LibreOffice Module oox (master)  1
filterdetect.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
21 
22 #include <com/sun/star/io/XStream.hpp>
26 
27 #include <oox/core/fastparser.hxx>
30 #include <oox/ole/olestorage.hxx>
31 #include <oox/token/namespaces.hxx>
32 #include <oox/token/tokens.hxx>
33 
35 
36 #include <com/sun/star/uri/UriReferenceFactory.hpp>
37 #include <com/sun/star/beans/NamedValue.hpp>
38 
39 using namespace ::com::sun::star;
40 
41 namespace oox::core {
42 
43 using namespace ::com::sun::star::beans;
44 using namespace ::com::sun::star::io;
45 using namespace ::com::sun::star::lang;
46 using namespace ::com::sun::star::uno;
47 using namespace ::com::sun::star::xml::sax;
48 using namespace ::com::sun::star::uri;
49 
53 
54 FilterDetectDocHandler::FilterDetectDocHandler( const Reference< XComponentContext >& rxContext, OUString& rFilterName, const OUString& rFileName ) :
55  mrFilterName( rFilterName ),
56  maFileName(rFileName),
57  maOOXMLVariant( OOXMLVariant::ECMA_Transitional ),
58  mxContext( rxContext )
59 {
60  maContextStack.reserve( 2 );
61 }
62 
64 {
65 }
66 
68 {
69 }
70 
72 {
73 }
74 
75 void SAL_CALL FilterDetectDocHandler::processingInstruction( const OUString& /*rTarget*/, const OUString& /*rData*/ )
76 {
77 }
78 
79 void SAL_CALL FilterDetectDocHandler::setDocumentLocator( const Reference<XLocator>& /*xLocator*/ )
80 {
81 }
82 
84  sal_Int32 nElement, const Reference< XFastAttributeList >& rAttribs )
85 {
86  AttributeList aAttribs( rAttribs );
87  switch ( nElement )
88  {
89  // cases for _rels/.rels
90  case PR_TOKEN( Relationships ):
91  break;
92  case PR_TOKEN( Relationship ):
93  if( !maContextStack.empty() && (maContextStack.back() == PR_TOKEN( Relationships )) )
94  parseRelationship( aAttribs );
95  break;
96 
97  // cases for [Content_Types].xml
98  case PC_TOKEN( Types ):
99  break;
100  case PC_TOKEN( Default ):
101  if( !maContextStack.empty() && (maContextStack.back() == PC_TOKEN( Types )) )
102  parseContentTypesDefault( aAttribs );
103  break;
104  case PC_TOKEN( Override ):
105  if( !maContextStack.empty() && (maContextStack.back() == PC_TOKEN( Types )) )
106  parseContentTypesOverride( aAttribs );
107  break;
108  }
109  maContextStack.push_back( nElement );
110 }
111 
113  const OUString& /*Namespace*/, const OUString& /*Name*/, const Reference<XFastAttributeList>& /*Attribs*/ )
114 {
115 }
116 
117 void SAL_CALL FilterDetectDocHandler::endFastElement( sal_Int32 /*nElement*/ )
118 {
119  maContextStack.pop_back();
120 }
121 
123  const OUString& /*Namespace*/, const OUString& /*Name*/ )
124 {
125 }
126 
127 Reference<XFastContextHandler> SAL_CALL FilterDetectDocHandler::createFastChildContext(
128  sal_Int32 /*Element*/, const Reference<XFastAttributeList>& /*Attribs*/ )
129 {
130  return this;
131 }
132 
133 Reference<XFastContextHandler> SAL_CALL FilterDetectDocHandler::createUnknownChildContext(
134  const OUString& /*Namespace*/, const OUString& /*Name*/, const Reference<XFastAttributeList>& /*Attribs*/)
135 {
136  return this;
137 }
138 
139 void SAL_CALL FilterDetectDocHandler::characters( const OUString& /*aChars*/ )
140 {
141 }
142 
144 {
145  OUString aType = rAttribs.getString( XML_Type, OUString() );
146 
147  // tdf#131936 Remember filter when opening file as 'Office Open XML Text'
148  if (aType.startsWithIgnoreAsciiCase("http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties"))
150  else if (aType.startsWithIgnoreAsciiCase("http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"))
152  else if (aType.startsWithIgnoreAsciiCase("http://purl.oclc.org/ooxml/officeDocument"))
154 
155  if ( aType != "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" // OOXML Transitional
156  && aType != "http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument" ) //OOXML strict
157  return;
158 
159  Reference<XUriReferenceFactory> xFactory = UriReferenceFactory::create( mxContext );
160  try
161  {
162  // use '/' to representent the root of the zip package ( and provide a 'file' scheme to
163  // keep the XUriReference implementation happy )
164  Reference< XUriReference > xBase = xFactory->parse( "file:///" );
165 
166  Reference< XUriReference > xPart = xFactory->parse( rAttribs.getString( XML_Target, OUString() ) );
167  Reference< XUriReference > xAbs = xFactory->makeAbsolute( xBase, xPart, true, RelativeUriExcessParentSegments_RETAIN );
168 
169  if ( xAbs.is() )
170  maTargetPath = xAbs->getPath();
171  }
172  catch( const Exception& )
173  {
174  }
175 }
176 
177 OUString FilterDetectDocHandler::getFilterNameFromContentType( std::u16string_view rContentType, const OUString& rFileName )
178 {
179  bool bDocm = rFileName.endsWithIgnoreAsciiCase(".docm");
180 
181  if( rContentType == u"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml" && !bDocm )
182  {
183  switch (maOOXMLVariant)
184  {
186  case OOXMLVariant::ISO_Strict: // Not supported, map to ISO transitional
187  return "writer_OOXML";
189  return "writer_MS_Word_2007";
190  }
191  }
192 
193  if( rContentType == u"application/vnd.ms-word.document.macroEnabled.main+xml" || bDocm )
194  return "writer_MS_Word_2007_VBA";
195 
196  if( rContentType == u"application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml" ||
197  rContentType == u"application/vnd.ms-word.template.macroEnabledTemplate.main+xml" )
198  {
199  switch (maOOXMLVariant)
200  {
202  case OOXMLVariant::ISO_Strict: // Not supported, map to ISO transitional
203  return "writer_OOXML_Text_Template";
205  return "writer_MS_Word_2007_Template";
206  }
207  }
208 
209  if( rContentType == u"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml")
210  return "MS Excel 2007 XML";
211 
212  if (rContentType == u"application/vnd.ms-excel.sheet.macroEnabled.main+xml")
213  return "MS Excel 2007 VBA XML";
214 
215  if( rContentType == u"application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml" ||
216  rContentType == u"application/vnd.ms-excel.template.macroEnabled.main+xml" )
217  return "MS Excel 2007 XML Template";
218 
219  if ( rContentType == u"application/vnd.ms-excel.sheet.binary.macroEnabled.main" )
220  return "MS Excel 2007 Binary";
221 
222  if (rContentType == u"application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml")
223  return "MS PowerPoint 2007 XML";
224 
225  if (rContentType == u"application/vnd.ms-powerpoint.presentation.macroEnabled.main+xml")
226  return "MS PowerPoint 2007 XML VBA";
227 
228  if( rContentType == u"application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml" ||
229  rContentType == u"application/vnd.ms-powerpoint.slideshow.macroEnabled.main+xml" )
230  return "MS PowerPoint 2007 XML AutoPlay";
231 
232  if( rContentType == u"application/vnd.openxmlformats-officedocument.presentationml.template.main+xml" ||
233  rContentType == u"application/vnd.ms-powerpoint.template.macroEnabled.main+xml" )
234  return "MS PowerPoint 2007 XML Template";
235 
236  return OUString();
237 }
238 
240 {
241  // only if no overridden part name found
242  if( mrFilterName.isEmpty() )
243  {
244  // check if target path ends with extension
245  OUString aExtension = rAttribs.getString( XML_Extension, OUString() );
246  sal_Int32 nExtPos = maTargetPath.getLength() - aExtension.getLength();
247  if( (nExtPos > 0) && (maTargetPath[ nExtPos - 1 ] == '.') && maTargetPath.match( aExtension, nExtPos ) )
248  mrFilterName = getFilterNameFromContentType( rAttribs.getString( XML_ContentType, OUString() ), maFileName );
249  }
250 }
251 
253 {
254  if( rAttribs.getString( XML_PartName, OUString() ) == maTargetPath )
255  mrFilterName = getFilterNameFromContentType( rAttribs.getString( XML_ContentType, OUString() ), maFileName );
256 }
257 
259  mxContext( rxContext, UNO_SET_THROW )
260 {
261 }
262 
264 {
265 }
266 
267 namespace
268 {
269 
270 bool lclIsZipPackage( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStrm )
271 {
272  ZipStorage aZipStorage( rxContext, rxInStrm );
273  return aZipStorage.isStorage();
274 }
275 
276 class PasswordVerifier : public IDocPasswordVerifier
277 {
278 public:
279  explicit PasswordVerifier( crypto::DocumentDecryption& aDecryptor );
280 
281  virtual DocPasswordVerifierResult verifyPassword( const OUString& rPassword, Sequence<NamedValue>& rEncryptionData ) override;
282 
283  virtual DocPasswordVerifierResult verifyEncryptionData( const Sequence<NamedValue>& rEncryptionData ) override;
284 private:
285  crypto::DocumentDecryption& mDecryptor;
286 };
287 
288 PasswordVerifier::PasswordVerifier( crypto::DocumentDecryption& aDecryptor ) :
289  mDecryptor(aDecryptor)
290 {}
291 
292 comphelper::DocPasswordVerifierResult PasswordVerifier::verifyPassword( const OUString& rPassword, Sequence<NamedValue>& rEncryptionData )
293 {
294  try
295  {
296  if (mDecryptor.generateEncryptionKey(rPassword))
297  rEncryptionData = mDecryptor.createEncryptionData(rPassword);
298  }
299  catch (...)
300  {
301  // Any exception is a reason to abort
303  }
304 
306 }
307 
308 comphelper::DocPasswordVerifierResult PasswordVerifier::verifyEncryptionData( const Sequence<NamedValue>& )
309 {
311 }
312 
313 } // namespace
314 
315 Reference< XInputStream > FilterDetect::extractUnencryptedPackage( MediaDescriptor& rMediaDescriptor ) const
316 {
317  // try the plain input stream
318  Reference<XInputStream> xInputStream( rMediaDescriptor[ MediaDescriptor::PROP_INPUTSTREAM() ], UNO_QUERY );
319  if( !xInputStream.is() || lclIsZipPackage( mxContext, xInputStream ) )
320  return xInputStream;
321 
322  // check if a temporary file is passed in the 'ComponentData' property
323  Reference<XStream> xDecrypted( rMediaDescriptor.getComponentDataEntry( "DecryptedPackage" ), UNO_QUERY );
324  if( xDecrypted.is() )
325  {
326  Reference<XInputStream> xDecryptedInputStream = xDecrypted->getInputStream();
327  if( lclIsZipPackage( mxContext, xDecryptedInputStream ) )
328  return xDecryptedInputStream;
329  }
330 
331  // try to decrypt an encrypted OLE package
332  oox::ole::OleStorage aOleStorage( mxContext, xInputStream, false );
333  if( aOleStorage.isStorage() )
334  {
335  try
336  {
337  crypto::DocumentDecryption aDecryptor(mxContext, aOleStorage);
338 
339  if( aDecryptor.readEncryptionInfo() )
340  {
341  /* "VelvetSweatshop" is the built-in default encryption
342  password used by MS Excel for the "workbook protection"
343  feature with password. Try this first before prompting the
344  user for a password. */
345  std::vector<OUString> aDefaultPasswords;
346  aDefaultPasswords.emplace_back("VelvetSweatshop");
347 
348  /* Use the comphelper password helper to request a password.
349  This helper returns either with the correct password
350  (according to the verifier), or with an empty string if
351  user has cancelled the password input dialog. */
352  PasswordVerifier aVerifier( aDecryptor );
353  Sequence<NamedValue> aEncryptionData = rMediaDescriptor.requestAndVerifyDocPassword(
354  aVerifier,
356  &aDefaultPasswords );
357 
358  if( !aEncryptionData.hasElements() )
359  {
360  rMediaDescriptor[ MediaDescriptor::PROP_ABORTED() ] <<= true;
361  }
362  else
363  {
364  // create MemoryStream for unencrypted package - rather not put this in a tempfile
365  Reference<XStream> const xTempStream(
366  mxContext->getServiceManager()->createInstanceWithContext(
367  "com.sun.star.comp.MemoryStream", mxContext),
368  UNO_QUERY_THROW);
369 
370  // if decryption was unsuccessful (corrupted file or any other reason)
371  if (!aDecryptor.decrypt(xTempStream))
372  {
373  rMediaDescriptor[ MediaDescriptor::PROP_ABORTED() ] <<= true;
374  }
375  else
376  {
377  // store temp file in media descriptor to keep it alive
378  rMediaDescriptor.setComponentDataEntry( "DecryptedPackage", Any( xTempStream ) );
379 
380  Reference<XInputStream> xDecryptedInputStream = xTempStream->getInputStream();
381  if( lclIsZipPackage( mxContext, xDecryptedInputStream ) )
382  return xDecryptedInputStream;
383  }
384  }
385  }
386  }
387  catch( const Exception& )
388  {
389  }
390  }
391  return Reference<XInputStream>();
392 }
393 
394 // com.sun.star.lang.XServiceInfo interface -----------------------------------
395 
397 {
398  return "com.sun.star.comp.oox.FormatDetector";
399 }
400 
401 sal_Bool SAL_CALL FilterDetect::supportsService( const OUString& rServiceName )
402 {
403  return cppu::supportsService(this, rServiceName);
404 }
405 
407 {
408  return { "com.sun.star.frame.ExtendedTypeDetection" };
409 }
410 
411 // com.sun.star.document.XExtendedFilterDetection interface -------------------
412 
413 OUString SAL_CALL FilterDetect::detect( Sequence< PropertyValue >& rMediaDescSeq )
414 {
415  OUString aFilterName;
416  MediaDescriptor aMediaDescriptor( rMediaDescSeq );
417 
418  try
419  {
420  aMediaDescriptor.addInputStream();
421 
422  /* Get the unencrypted input stream. This may include creation of a
423  temporary file that contains the decrypted package. This temporary
424  file will be stored in the 'ComponentData' property of the media
425  descriptor. */
426  Reference< XInputStream > xInputStream( extractUnencryptedPackage( aMediaDescriptor ), UNO_SET_THROW );
427 
428  // stream must be a ZIP package
429  ZipStorage aZipStorage( mxContext, xInputStream );
430  if( aZipStorage.isStorage() )
431  {
432  // create the fast parser, register the XML namespaces, set document handler
433  FastParser aParser;
434  aParser.registerNamespace( NMSP_packageRel );
435  aParser.registerNamespace( NMSP_officeRel );
436  aParser.registerNamespace( NMSP_packageContentTypes );
437 
438  OUString aFileName;
439  aMediaDescriptor[utl::MediaDescriptor::PROP_URL()] >>= aFileName;
440 
441  aParser.setDocumentHandler( new FilterDetectDocHandler( mxContext, aFilterName, aFileName ) );
442 
443  /* Parse '_rels/.rels' to get the target path and '[Content_Types].xml'
444  to determine the content type of the part at the target path. */
445  aParser.parseStream( aZipStorage, "_rels/.rels" );
446  aParser.parseStream( aZipStorage, "[Content_Types].xml" );
447  }
448  }
449  catch( const Exception& )
450  {
451  if ( aMediaDescriptor.getUnpackedValueOrDefault( MediaDescriptor::PROP_ABORTED(), false ) )
452  /* The user chose to abort detection, e.g. by hitting 'Cancel' in the password input dialog,
453  so we have to return non-empty type name to abort the detection loop. The loading code is
454  supposed to check whether the "Aborted" flag is present in the descriptor, and to not attempt
455  to actually load the file then.
456 
457  The returned type name is the one we got as an input, which typically was detected by the flat
458  detection (i.e. by file extension), so normally that's the correct one. Also at this point we
459  already know that the file is OLE encrypted package, so trying with other type detectors doesn't
460  make much sense anyway.
461  */
462  aFilterName = aMediaDescriptor.getUnpackedValueOrDefault( MediaDescriptor::PROP_TYPENAME(), OUString() );
463  }
464 
465  // write back changed media descriptor members
466  aMediaDescriptor >> rMediaDescSeq;
467  return aFilterName;
468 }
469 
470 } // namespace oox::core
471 
472 extern "C" SAL_DLLPUBLIC_EXPORT uno::XInterface*
474  uno::Sequence<uno::Any> const& /*rSeq*/)
475 {
476  return cppu::acquire(new oox::core::FilterDetect(pCtx));
477 }
478 
479 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
SAL_DLLPUBLIC_EXPORT uno::XInterface * com_sun_star_comp_oox_FormatDetector_get_implementation(uno::XComponentContext *pCtx, uno::Sequence< uno::Any > const &)
virtual sal_Bool SAL_CALL supportsService(const OUString &ServiceName) override
void registerNamespace(sal_Int32 nNamespaceId)
Registers an OOXML namespace at the parser.
Definition: fastparser.cxx:81
virtual css::uno::Reference< XFastContextHandler > SAL_CALL createUnknownChildContext(const OUString &Namespace, const OUString &Name, const css::uno::Reference< css::xml::sax::XFastAttributeList > &Attribs) override
Document handler specifically designed for detecting OOXML file formats.
FilterDetect(const css::uno::Reference< css::uno::XComponentContext > &rxContext)
bool isStorage() const
Returns true, if the object represents a valid storage.
Definition: storagebase.cxx:89
uno::Reference< uno::XComponentContext > mxContext
virtual OUString SAL_CALL detect(css::uno::Sequence< css::beans::PropertyValue > &lDescriptor) override
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
void parseStream(const css::xml::sax::InputSource &rInputSource, bool bCloseStream=false)
Parses the passed SAX input source.
virtual OUString SAL_CALL getImplementationName() override
OptValue< OUString > getString(sal_Int32 nAttrToken) const
Returns the string value of the specified attribute.
Implements stream access for binary OLE storages.
Definition: olestorage.hxx:43
css::uno::Any getComponentDataEntry(const OUString &rName) const
virtual void SAL_CALL startFastElement(sal_Int32 nElement, const css::uno::Reference< css::xml::sax::XFastAttributeList > &Attribs) override
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
void setDocumentHandler(const css::uno::Reference< css::xml::sax::XFastDocumentHandler > &rxDocHandler)
Sets the passed document handler that will receive the SAX parser events.
Definition: fastparser.cxx:101
virtual void SAL_CALL setDocumentLocator(const css::uno::Reference< css::xml::sax::XLocator > &xLocator) override
virtual void SAL_CALL endDocument() override
void parseContentTypesOverride(const AttributeList &rAttribs)
DocPasswordVerifierResult
static const OUString & PROP_URL()
void parseContentTypesDefault(const AttributeList &rAttribs)
void parseRelationship(const AttributeList &rAttribs)
virtual void SAL_CALL processingInstruction(const OUString &rTarget, const OUString &rData) override
float u
unsigned char sal_Bool
bool decrypt(const css::uno::Reference< css::io::XStream > &xDocumentStream)
void setComponentDataEntry(const OUString &rName, const css::uno::Any &rValue)
Provides access to attribute values of an element.
virtual void SAL_CALL startUnknownElement(const OUString &Namespace, const OUString &Name, const css::uno::Reference< css::xml::sax::XFastAttributeList > &Attribs) override
css::uno::Sequence< css::beans::NamedValue > requestAndVerifyDocPassword(comphelper::IDocPasswordVerifier &rVerifier, comphelper::DocPasswordRequestType eRequestType, const ::std::vector< OUString > *pDefaultPasswords)
virtual void SAL_CALL endUnknownElement(const OUString &Namespace, const OUString &Name) override
css::uno::Reference< css::uno::XComponentContext > mxContext
FilterDetectDocHandler(const css::uno::Reference< css::uno::XComponentContext > &rxContext, OUString &rFilter, const OUString &rFileName)
virtual css::uno::Reference< XFastContextHandler > SAL_CALL createFastChildContext(sal_Int32 Element, const css::uno::Reference< css::xml::sax::XFastAttributeList > &Attribs) override
Relationship
Implements stream access for ZIP storages containing XML streams.
Definition: zipstorage.hxx:41
Wrapper for a fast SAX parser that works on automatically generated OOXML token and namespace identif...
Definition: fastparser.hxx:53
crypto::DocumentDecryption & mDecryptor
virtual void SAL_CALL characters(const OUString &aChars) override
virtual void SAL_CALL endFastElement(sal_Int32 Element) override
virtual void SAL_CALL startDocument() override
Reference< XSingleServiceFactory > xFactory
OUString getFilterNameFromContentType(std::u16string_view rContentType, const OUString &rFileName)
virtual ~FilterDetectDocHandler() override
virtual ~FilterDetect() override