LibreOffice Module filter (master) 1
xmlfilterdetect/filterdetect.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include "filterdetect.hxx"
21#include <com/sun/star/io/XInputStream.hpp>
22#include <com/sun/star/uno/XComponentContext.hpp>
23#include <com/sun/star/container/XNameAccess.hpp>
24#include <com/sun/star/beans/PropertyState.hpp>
27#include <ucbhelper/content.hxx>
29#include <svl/inettype.hxx>
30#include <memory>
31#include <o3tl/string_view.hxx>
32
33using namespace com::sun::star::container;
34using namespace com::sun::star::uno;
35using namespace com::sun::star::beans;
36
37namespace {
38
39OUString supportedByType( std::u16string_view clipBoardFormat, std::u16string_view resultString, const OUString& checkType)
40{
41 OUString sTypeName;
42 if ( o3tl::starts_with(clipBoardFormat, u"doctype:") )
43 {
44 std::u16string_view tryStr = clipBoardFormat.substr(8);
45 if (resultString.find(tryStr) != std::u16string_view::npos)
46 {
47 sTypeName = checkType;
48 }
49 }
50 return sTypeName;
51}
52
53bool IsMediaTypeXML( const OUString& mediaType )
54{
55 if (!mediaType.isEmpty())
56 {
57 OUString sType, sSubType;
58 if (INetContentTypes::parse(mediaType, sType, sSubType)
59 && sType == "application")
60 {
61 // RFC 3023: application/xml; don't detect text/xml
62 if (sSubType == "xml")
63 return true;
64 // Registered media types: application/XXXX+xml
65 if (sSubType.endsWith("+xml"))
66 return true;
67 }
68 }
69 return false;
70}
71
72}
73
74OUString SAL_CALL FilterDetect::detect( css::uno::Sequence< css::beans::PropertyValue >& aArguments )
75{
76 OUString sTypeName;
77 OUString sUrl;
79
80 css::uno::Reference< css::io::XInputStream > xInStream;
81 const PropertyValue * pValue = aArguments.getConstArray();
82 sal_Int32 nLength;
83 OUString resultString;
84
85 nLength = aArguments.getLength();
86 sal_Int32 location=nLength;
87 for (sal_Int32 i = 0 ; i < nLength; i++)
88 {
89 if ( pValue[i].Name == "TypeName" )
90 {
91 location=i;
92 }
93 else if ( pValue[i].Name == "URL" )
94 {
95 pValue[i].Value >>= sUrl;
96 }
97 else if ( pValue[i].Name == "InputStream" )
98 {
99 pValue[i].Value >>= xInStream ;
100 }
101 }
102 try
103 {
104 if (!xInStream.is())
105 {
106 ::ucbhelper::Content aContent(
108 mxCtx);
109 xInStream = aContent.openStream();
110 if (!xInStream.is())
111 {
112 return sTypeName;
113 }
114 }
115
116 std::unique_ptr< SvStream > pInStream( ::utl::UcbStreamHelper::CreateStream( xInStream ) );
117 pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW );
118 sal_uInt64 const nUniPos = pInStream->Tell();
119
120 const sal_uInt16 nSize = 4000;
121 bool bTryUtf16 = false;
122
123 if ( nUniPos == 0 ) // No BOM detected, try to guess UTF-16 endianness
124 {
125 sal_uInt16 nHeader = 0;
126 pInStream->ReadUInt16( nHeader );
127 if ( nHeader == 0x003C )
128 bTryUtf16 = true;
129 else if ( nHeader == 0x3C00 )
130 {
131 bTryUtf16 = true;
132 pInStream->SetEndian( pInStream->GetEndian() == SvStreamEndian::LITTLE ? SvStreamEndian::BIG : SvStreamEndian::LITTLE );
133 }
134 pInStream->Seek( STREAM_SEEK_TO_BEGIN );
135 }
136
137 if ( nUniPos == 3 || ( nUniPos == 0 && !bTryUtf16 ) ) // UTF-8 or non-Unicode
138 {
139 OString const str(read_uInt8s_ToOString(*pInStream, nSize));
140 resultString = OUString(str.getStr(), str.getLength(),
141 RTL_TEXTENCODING_ASCII_US,
142 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT);
143 }
144 else if ( nUniPos == 2 || bTryUtf16 ) // UTF-16
145 resultString = read_uInt16s_ToOUString( *pInStream, nSize );
146
147 if ( !resultString.startsWith( "<?xml" ) )
148 {
149 // Check the content type; XML declaration is optional in XML files according to XML 1.0 ch.2.8
150 // (see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd)
151 OUString sMediaType;
152 try
153 {
154 ::ucbhelper::Content aContent(
156 mxCtx);
157 aContent.getPropertyValue("MediaType") >>= sMediaType;
158 if (sMediaType.isEmpty())
159 {
160 aContent.getPropertyValue("Content-Type") >>= sMediaType;
161 }
162 }
163 catch (...) {}
164
165 if (!IsMediaTypeXML(sMediaType))
166 {
167 // This is not an XML stream. It makes no sense to try to detect
168 // a non-XML file type here.
169 return OUString();
170 }
171 }
172
173 // test typedetect code
174 Reference <XNameAccess> xTypeCont(mxCtx->getServiceManager()->createInstanceWithContext("com.sun.star.document.TypeDetection", mxCtx), UNO_QUERY);
175 Sequence < OUString > myTypes= xTypeCont->getElementNames();
176 nLength = myTypes.getLength();
177
178 sal_Int32 new_nlength=0;
179 sal_Int32 i = 0 ;
180 while ((i < nLength) && (sTypeName.isEmpty()))
181 {
182 Any elem = xTypeCont->getByName(myTypes[i]);
183 elem >>=lProps;
184 new_nlength = lProps.getLength();
185 sal_Int32 j =0;
186 while (j < new_nlength && (sTypeName.isEmpty()))
187 {
188 OUString tmpStr;
189 lProps[j].Value >>=tmpStr;
190 if ( lProps[j].Name == "ClipboardFormat" && !tmpStr.isEmpty() )
191 {
192 sTypeName = supportedByType(tmpStr,resultString, myTypes[i]);
193 }
194 j++;
195 }
196 i++;
197 }
198 }
199 catch (const Exception &)
200 {
201 TOOLS_WARN_EXCEPTION("filter.xmlfd", "An Exception occurred while opening File stream");
202 }
203
204 if (!sTypeName.isEmpty())
205 {
206 if (location == aArguments.getLength())
207 {
208 aArguments.realloc(nLength+1);
209 aArguments.getArray()[location].Name = "TypeName";
210 }
211 aArguments.getArray()[location].Value <<=sTypeName;
212 }
213
214 return sTypeName;
215}
216
217// XInitialization
218void SAL_CALL FilterDetect::initialize( const Sequence< Any >& /*aArguments*/ )
219{
220}
221
222// XServiceInfo
224{
225 return "com.sun.star.comp.filters.XMLFilterDetect";
226}
227
228sal_Bool SAL_CALL FilterDetect::supportsService( const OUString& rServiceName )
229{
230 return cppu::supportsService( this, rServiceName );
231}
232
234{
235 return { "com.sun.star.document.ExtendedTypeDetection" };
236}
237
238extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
240 css::uno::XComponentContext* context, css::uno::Sequence<css::uno::Any> const&)
241{
242 return cppu::acquire(new FilterDetect(context));
243}
244
245
246/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
OptionalString sType
constexpr OUStringLiteral sMediaType
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
css::uno::Reference< css::uno::XComponentContext > mxCtx
virtual OUString SAL_CALL getImplementationName() override
virtual OUString SAL_CALL detect(css::uno::Sequence< css::beans::PropertyValue > &lDescriptor) override
virtual sal_Bool SAL_CALL supportsService(const OUString &ServiceName) override
virtual void SAL_CALL initialize(const css::uno::Sequence< css::uno::Any > &aArguments) override
static bool parse(OUString const &rMediaType, OUString &rType, OUString &rSubType, INetContentTypeParameterList *pParameters=nullptr)
css::uno::Any getPropertyValue(const OUString &rPropertyName)
css::uno::Reference< css::io::XInputStream > openStream()
static std::unique_ptr< SvStream > CreateStream(const OUString &rFileName, StreamMode eOpenMode, css::uno::Reference< css::awt::XWindow > xParentWin=nullptr)
#define TOOLS_WARN_EXCEPTION(area, stream)
Sequence< PropertyValue > aArguments
@ Exception
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
int i
Definition: gentoken.py:48
constexpr bool starts_with(std::basic_string_view< charT, traits > sv, std::basic_string_view< charT, traits > x) noexcept
void checkType(rtl::Reference< TypeManager > const &manager, OUString const &name, std::set< OUString > &interfaceTypes, std::set< OUString > &serviceTypes, AttributeInfo &properties)
TOOLS_DLLPUBLIC OString read_uInt8s_ToOString(SvStream &rStrm, std::size_t nUnits)
TOOLS_DLLPUBLIC OUString read_uInt16s_ToOUString(SvStream &rStrm, std::size_t nUnits)
#define STREAM_SEEK_TO_BEGIN
OUString Name
unsigned char sal_Bool
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * filter_XMLFilterDetect_get_implementation(css::uno::XComponentContext *context, css::uno::Sequence< css::uno::Any > const &)
sal_Int32 nLength