LibreOffice Module comphelper (master)  1
ofopxmlhelper.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
23 
24 #include <cppuhelper/implbase.hxx>
25 
26 #include <com/sun/star/beans/StringPair.hpp>
27 #include <com/sun/star/xml/sax/Parser.hpp>
28 #include <com/sun/star/xml/sax/XDocumentHandler.hpp>
29 #include <com/sun/star/xml/sax/SAXException.hpp>
30 #include <com/sun/star/xml/sax/Writer.hpp>
31 #include <com/sun/star/lang/IllegalArgumentException.hpp>
32 #include <vector>
33 
34 #define RELATIONINFO_FORMAT 0
35 #define CONTENTTYPE_FORMAT 1
36 #define FORMAT_MAX_ID CONTENTTYPE_FORMAT
37 
38 using namespace ::com::sun::star;
39 
40 namespace comphelper {
41 
42 // this helper class is designed to allow to parse ContentType- and Relationship-related information from OfficeOpenXML format
44  : public cppu::WeakImplHelper< css::xml::sax::XDocumentHandler >
45 {
46  sal_uInt16 const m_nFormat; // which format to parse
47 
48  css::uno::Sequence< css::uno::Sequence< css::beans::StringPair > > m_aResultSeq;
49  std::vector< OUString > m_aElementsSeq; // stack of elements being parsed
50 
51 
52 public:
53  css::uno::Sequence< css::uno::Sequence< css::beans::StringPair > > const & GetParsingResult() const;
54 
55  explicit OFOPXMLHelper_Impl( sal_uInt16 nFormat ); // must not be created directly
56 
57  // XDocumentHandler
58  virtual void SAL_CALL startDocument() override;
59  virtual void SAL_CALL endDocument() override;
60  virtual void SAL_CALL startElement( const OUString& aName, const css::uno::Reference< css::xml::sax::XAttributeList >& xAttribs ) override;
61  virtual void SAL_CALL endElement( const OUString& aName ) override;
62  virtual void SAL_CALL characters( const OUString& aChars ) override;
63  virtual void SAL_CALL ignorableWhitespace( const OUString& aWhitespaces ) override;
64  virtual void SAL_CALL processingInstruction( const OUString& aTarget, const OUString& aData ) override;
65  virtual void SAL_CALL setDocumentLocator( const css::uno::Reference< css::xml::sax::XLocator >& xLocator ) override;
66 };
67 
68 
69 namespace OFOPXMLHelper {
70 
72 static uno::Sequence<uno::Sequence< beans::StringPair>> ReadSequence_Impl(
73  const uno::Reference<io::XInputStream>& xInStream,
74  const OUString& aStringID, sal_uInt16 nFormat,
75  const uno::Reference<uno::XComponentContext>& xContext);
76 
77 uno::Sequence< uno::Sequence< beans::StringPair > > ReadRelationsInfoSequence(
78  const uno::Reference< io::XInputStream >& xInStream,
79  const OUString & aStreamName,
80  const uno::Reference< uno::XComponentContext >& rContext )
81 {
82  OUString aStringID = "_rels/" + aStreamName;
83  return ReadSequence_Impl( xInStream, aStringID, RELATIONINFO_FORMAT, rContext );
84 }
85 
86 
87 uno::Sequence< uno::Sequence< beans::StringPair > > ReadContentTypeSequence(
88  const uno::Reference< io::XInputStream >& xInStream,
89  const uno::Reference< uno::XComponentContext >& rContext )
90 {
91  OUString aStringID = "[Content_Types].xml";
92  return ReadSequence_Impl( xInStream, aStringID, CONTENTTYPE_FORMAT, rContext );
93 }
94 
96  const css::uno::Sequence<css::uno::Sequence<css::beans::StringPair>>& rContentTypes,
97  const OUString& rFilename)
98 {
99  if (rContentTypes.getLength() < 2)
100  {
101  return OUString();
102  }
103 
104  const uno::Sequence<beans::StringPair>& rDefaults = rContentTypes[0];
105  const uno::Sequence<beans::StringPair>& rOverrides = rContentTypes[1];
106 
107  // Find the extension and use it to get the type.
108  const sal_Int32 nDotOffset = rFilename.lastIndexOf('.');
109  const OUString aExt = (nDotOffset >= 0 ? rFilename.copy(nDotOffset + 1) : rFilename); // Skip the dot.
110 
111  const std::vector<OUString> aNames = { aExt, "/" + rFilename };
112  for (const OUString& aName : aNames)
113  {
114  const auto it1 = std::find_if(rOverrides.begin(), rOverrides.end(), [&aName](const beans::StringPair& rPair)
115  { return rPair.First == aName; });
116  if (it1 != rOverrides.end())
117  return it1->Second;
118 
119  const auto it2 = std::find_if(rDefaults.begin(), rDefaults.end(), [&aName](const beans::StringPair& rPair)
120  { return rPair.First == aName; });
121  if (it2 != rDefaults.end())
122  return it2->Second;
123  }
124 
125  return OUString();
126 }
127 
129  const uno::Reference< io::XOutputStream >& xOutStream,
130  const uno::Sequence< uno::Sequence< beans::StringPair > >& aSequence,
131  const uno::Reference< uno::XComponentContext >& rContext )
132 {
133  if ( !xOutStream.is() )
134  throw uno::RuntimeException();
135 
136  uno::Reference< css::xml::sax::XWriter > xWriter = css::xml::sax::Writer::create(rContext);
137 
138  xWriter->setOutputStream( xOutStream );
139 
140  OUString aRelListElement( "Relationships" );
141  OUString aRelElement( "Relationship" );
142  OUString aCDATAString( "CDATA" );
143  OUString aWhiteSpace( " " );
144 
145  // write the namespace
146  AttributeList* pRootAttrList = new AttributeList;
147  uno::Reference< css::xml::sax::XAttributeList > xRootAttrList( pRootAttrList );
148  pRootAttrList->AddAttribute(
149  "xmlns",
150  aCDATAString,
151  "http://schemas.openxmlformats.org/package/2006/relationships" );
152 
153  xWriter->startDocument();
154  xWriter->startElement( aRelListElement, xRootAttrList );
155 
156  for ( sal_Int32 nInd = 0; nInd < aSequence.getLength(); nInd++ )
157  {
158  AttributeList *pAttrList = new AttributeList;
159  uno::Reference< css::xml::sax::XAttributeList > xAttrList( pAttrList );
160  for( sal_Int32 nSecInd = 0; nSecInd < aSequence[nInd].getLength(); nSecInd++ )
161  {
162  if ( !(aSequence[nInd][nSecInd].First == "Id"
163  || aSequence[nInd][nSecInd].First == "Type"
164  || aSequence[nInd][nSecInd].First == "TargetMode"
165  || aSequence[nInd][nSecInd].First == "Target") )
166  {
167  // TODO/LATER: should the extensions be allowed?
168  throw lang::IllegalArgumentException();
169  }
170  pAttrList->AddAttribute( aSequence[nInd][nSecInd].First, aCDATAString, aSequence[nInd][nSecInd].Second );
171  }
172 
173  xWriter->startElement( aRelElement, xAttrList );
174  xWriter->ignorableWhitespace( aWhiteSpace );
175  xWriter->endElement( aRelElement );
176  }
177 
178  xWriter->ignorableWhitespace( aWhiteSpace );
179  xWriter->endElement( aRelListElement );
180  xWriter->endDocument();
181 }
182 
183 
185  const uno::Reference< io::XOutputStream >& xOutStream,
186  const uno::Sequence< beans::StringPair >& aDefaultsSequence,
187  const uno::Sequence< beans::StringPair >& aOverridesSequence,
188  const uno::Reference< uno::XComponentContext >& rContext )
189 {
190  if ( !xOutStream.is() )
191  throw uno::RuntimeException();
192 
193  uno::Reference< css::xml::sax::XWriter > xWriter = css::xml::sax::Writer::create(rContext);
194 
195  xWriter->setOutputStream( xOutStream );
196 
197  static const OUString aTypesElement("Types");
198  static const OUString aDefaultElement("Default");
199  static const OUString aOverrideElement("Override");
200  static const OUString aContentTypeAttr("ContentType");
201  static const OUString aCDATAString("CDATA");
202  static const OUString aWhiteSpace(" ");
203 
204  // write the namespace
205  AttributeList* pRootAttrList = new AttributeList;
206  uno::Reference< css::xml::sax::XAttributeList > xRootAttrList( pRootAttrList );
207  pRootAttrList->AddAttribute(
208  "xmlns",
209  aCDATAString,
210  "http://schemas.openxmlformats.org/package/2006/content-types" );
211 
212  xWriter->startDocument();
213  xWriter->startElement( aTypesElement, xRootAttrList );
214 
215  for ( sal_Int32 nInd = 0; nInd < aDefaultsSequence.getLength(); nInd++ )
216  {
217  AttributeList *pAttrList = new AttributeList;
218  uno::Reference< css::xml::sax::XAttributeList > xAttrList( pAttrList );
219  pAttrList->AddAttribute( "Extension", aCDATAString, aDefaultsSequence[nInd].First );
220  pAttrList->AddAttribute( aContentTypeAttr, aCDATAString, aDefaultsSequence[nInd].Second );
221 
222  xWriter->startElement( aDefaultElement, xAttrList );
223  xWriter->ignorableWhitespace( aWhiteSpace );
224  xWriter->endElement( aDefaultElement );
225  }
226 
227  for ( sal_Int32 nInd = 0; nInd < aOverridesSequence.getLength(); nInd++ )
228  {
229  AttributeList *pAttrList = new AttributeList;
230  uno::Reference< css::xml::sax::XAttributeList > xAttrList( pAttrList );
231  pAttrList->AddAttribute( "PartName", aCDATAString, aOverridesSequence[nInd].First );
232  pAttrList->AddAttribute( aContentTypeAttr, aCDATAString, aOverridesSequence[nInd].Second );
233 
234  xWriter->startElement( aOverrideElement, xAttrList );
235  xWriter->ignorableWhitespace( aWhiteSpace );
236  xWriter->endElement( aOverrideElement );
237  }
238 
239  xWriter->ignorableWhitespace( aWhiteSpace );
240  xWriter->endElement( aTypesElement );
241  xWriter->endDocument();
242 
243 }
244 
245 uno::Sequence< uno::Sequence< beans::StringPair > > ReadSequence_Impl(
246  const uno::Reference< io::XInputStream >& xInStream,
247  const OUString& aStringID, sal_uInt16 nFormat,
248  const uno::Reference< uno::XComponentContext >& rContext )
249 {
250  if ( !rContext.is() || !xInStream.is() || nFormat > FORMAT_MAX_ID )
251  throw uno::RuntimeException();
252 
253  uno::Reference< css::xml::sax::XParser > xParser = css::xml::sax::Parser::create( rContext );
254 
255  OFOPXMLHelper_Impl *const pHelper = new OFOPXMLHelper_Impl( nFormat );
256  uno::Reference< css::xml::sax::XDocumentHandler > xHelper( static_cast< css::xml::sax::XDocumentHandler* >( pHelper ) );
257  css::xml::sax::InputSource aParserInput;
258  aParserInput.aInputStream = xInStream;
259  aParserInput.sSystemId = aStringID;
260  xParser->setDocumentHandler( xHelper );
261  xParser->parseStream( aParserInput );
262  xParser->setDocumentHandler( uno::Reference < css::xml::sax::XDocumentHandler > () );
263 
264  return pHelper->GetParsingResult();
265 }
266 
267 } // namespace OFOPXMLHelper
268 
269 // Relations info related strings
270 static OUString const g_aRelListElement("Relationships");
271 static OUString const g_aRelElement( "Relationship" );
272 static OUString const g_aIDAttr( "Id" );
273 static OUString const g_aTypeAttr( "Type" );
274 static OUString const g_aTargetModeAttr( "TargetMode" );
275 static OUString const g_aTargetAttr( "Target" );
276 
277 // ContentType related strings
278 static OUString const g_aTypesElement( "Types" );
279 static OUString const g_aDefaultElement( "Default" );
280 static OUString const g_aOverrideElement( "Override" );
281 static OUString const g_aExtensionAttr( "Extension" );
282 static OUString const g_aPartNameAttr( "PartName" );
283 static OUString const g_aContentTypeAttr( "ContentType" );
284 
286 : m_nFormat( nFormat )
287 {
288 }
289 
290 uno::Sequence< uno::Sequence< beans::StringPair > > const & OFOPXMLHelper_Impl::GetParsingResult() const
291 {
292  if ( !m_aElementsSeq.empty() )
293  throw uno::RuntimeException(); // the parsing has still not finished!
294 
295  return m_aResultSeq;
296 }
297 
298 
300 {
301 }
302 
303 
305 {
306 }
307 
308 
309 void SAL_CALL OFOPXMLHelper_Impl::startElement( const OUString& aName, const uno::Reference< css::xml::sax::XAttributeList >& xAttribs )
310 {
312  {
313  if ( aName == g_aRelListElement )
314  {
315  sal_Int32 nNewLength = m_aElementsSeq.size() + 1;
316 
317  if ( nNewLength != 1 )
318  throw css::xml::sax::SAXException(); // TODO: this element must be the first level element
319 
320  m_aElementsSeq.push_back( aName );
321 
322  return; // nothing to do
323  }
324  else if ( aName == g_aRelElement )
325  {
326  sal_Int32 nNewLength = m_aElementsSeq.size() + 1;
327  if ( nNewLength != 2 )
328  throw css::xml::sax::SAXException(); // TODO: this element must be the second level element
329 
330  m_aElementsSeq.push_back( aName );
331 
332  sal_Int32 nNewEntryNum = m_aResultSeq.getLength() + 1;
333  m_aResultSeq.realloc( nNewEntryNum );
334  sal_Int32 nAttrNum = 0;
335  m_aResultSeq[nNewEntryNum-1].realloc( 4 ); // the maximal expected number of arguments is 4
336 
337  OUString aIDValue = xAttribs->getValueByName( g_aIDAttr );
338  if ( aIDValue.isEmpty() )
339  throw css::xml::sax::SAXException(); // TODO: the ID value must present
340 
341  OUString aTypeValue = xAttribs->getValueByName( g_aTypeAttr );
342  OUString aTargetValue = xAttribs->getValueByName( g_aTargetAttr );
343  OUString aTargetModeValue = xAttribs->getValueByName( g_aTargetModeAttr );
344 
345  m_aResultSeq[nNewEntryNum-1][++nAttrNum - 1].First = g_aIDAttr;
346  m_aResultSeq[nNewEntryNum-1][nAttrNum - 1].Second = aIDValue;
347 
348  if ( !aTypeValue.isEmpty() )
349  {
350  m_aResultSeq[nNewEntryNum-1][++nAttrNum - 1].First = g_aTypeAttr;
351  m_aResultSeq[nNewEntryNum-1][nAttrNum - 1].Second = aTypeValue;
352  }
353 
354  if ( !aTargetValue.isEmpty() )
355  {
356  m_aResultSeq[nNewEntryNum-1][++nAttrNum - 1].First = g_aTargetAttr;
357  m_aResultSeq[nNewEntryNum-1][nAttrNum - 1].Second = aTargetValue;
358  }
359 
360  if ( !aTargetModeValue.isEmpty() )
361  {
362  m_aResultSeq[nNewEntryNum-1][++nAttrNum - 1].First = g_aTargetModeAttr;
363  m_aResultSeq[nNewEntryNum-1][nAttrNum - 1].Second = aTargetModeValue;
364  }
365 
366  m_aResultSeq[nNewEntryNum-1].realloc( nAttrNum );
367  }
368  else
369  throw css::xml::sax::SAXException(); // TODO: no other elements expected!
370  }
371  else if ( m_nFormat == CONTENTTYPE_FORMAT )
372  {
373  if ( aName == g_aTypesElement )
374  {
375  sal_Int32 nNewLength = m_aElementsSeq.size() + 1;
376 
377  if ( nNewLength != 1 )
378  throw css::xml::sax::SAXException(); // TODO: this element must be the first level element
379 
380  m_aElementsSeq.push_back( aName );
381 
382  if ( !m_aResultSeq.hasElements() )
383  m_aResultSeq.realloc( 2 );
384 
385  return; // nothing to do
386  }
387  else if ( aName == g_aDefaultElement )
388  {
389  sal_Int32 nNewLength = m_aElementsSeq.size() + 1;
390  if ( nNewLength != 2 )
391  throw css::xml::sax::SAXException(); // TODO: this element must be the second level element
392 
393  m_aElementsSeq.push_back( aName );
394 
395  if ( !m_aResultSeq.hasElements() )
396  m_aResultSeq.realloc( 2 );
397 
398  if ( m_aResultSeq.getLength() != 2 )
399  throw uno::RuntimeException();
400 
401  const OUString aExtensionValue = xAttribs->getValueByName( g_aExtensionAttr );
402  if ( aExtensionValue.isEmpty() )
403  throw css::xml::sax::SAXException(); // TODO: the Extension value must present
404 
405  const OUString aContentTypeValue = xAttribs->getValueByName( g_aContentTypeAttr );
406  if ( aContentTypeValue.isEmpty() )
407  throw css::xml::sax::SAXException(); // TODO: the ContentType value must present
408 
409  const sal_Int32 nNewResultLen = m_aResultSeq[0].getLength() + 1;
410  m_aResultSeq[0].realloc( nNewResultLen );
411 
412  m_aResultSeq[0][nNewResultLen-1].First = aExtensionValue;
413  m_aResultSeq[0][nNewResultLen-1].Second = aContentTypeValue;
414  }
415  else if ( aName == g_aOverrideElement )
416  {
417  sal_Int32 nNewLength = m_aElementsSeq.size() + 1;
418  if ( nNewLength != 2 )
419  throw css::xml::sax::SAXException(); // TODO: this element must be the second level element
420 
421  m_aElementsSeq.push_back( aName );
422 
423  if ( !m_aResultSeq.hasElements() )
424  m_aResultSeq.realloc( 2 );
425 
426  if ( m_aResultSeq.getLength() != 2 )
427  throw uno::RuntimeException();
428 
429  OUString aPartNameValue = xAttribs->getValueByName( g_aPartNameAttr );
430  if ( aPartNameValue.isEmpty() )
431  throw css::xml::sax::SAXException(); // TODO: the PartName value must present
432 
433  OUString aContentTypeValue = xAttribs->getValueByName( g_aContentTypeAttr );
434  if ( aContentTypeValue.isEmpty() )
435  throw css::xml::sax::SAXException(); // TODO: the ContentType value must present
436 
437  sal_Int32 nNewResultLen = m_aResultSeq[1].getLength() + 1;
438  m_aResultSeq[1].realloc( nNewResultLen );
439 
440  m_aResultSeq[1][nNewResultLen-1].First = aPartNameValue;
441  m_aResultSeq[1][nNewResultLen-1].Second = aContentTypeValue;
442  }
443  else
444  throw css::xml::sax::SAXException(); // TODO: no other elements expected!
445  }
446  else
447  throw css::xml::sax::SAXException(); // TODO: no other elements expected!
448 }
449 
450 
451 void SAL_CALL OFOPXMLHelper_Impl::endElement( const OUString& aName )
452 {
454  {
455  sal_Int32 nLength = m_aElementsSeq.size();
456  if ( nLength <= 0 )
457  throw css::xml::sax::SAXException(); // TODO: no other end elements expected!
458 
459  if ( m_aElementsSeq[nLength-1] != aName )
460  throw css::xml::sax::SAXException(); // TODO: unexpected element ended
461 
462  m_aElementsSeq.resize( nLength - 1 );
463  }
464 }
465 
466 
467 void SAL_CALL OFOPXMLHelper_Impl::characters( const OUString& /*aChars*/ )
468 {
469 }
470 
471 
472 void SAL_CALL OFOPXMLHelper_Impl::ignorableWhitespace( const OUString& /*aWhitespaces*/ )
473 {
474 }
475 
476 
477 void SAL_CALL OFOPXMLHelper_Impl::processingInstruction( const OUString& /*aTarget*/, const OUString& /*aData*/ )
478 {
479 }
480 
481 
482 void SAL_CALL OFOPXMLHelper_Impl::setDocumentLocator( const uno::Reference< css::xml::sax::XLocator >& /*xLocator*/ )
483 {
484 }
485 
486 } // namespace comphelper
487 
488 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
virtual void SAL_CALL ignorableWhitespace(const OUString &aWhitespaces) override
static OUString const g_aExtensionAttr("Extension")
css::uno::Sequence< css::uno::Sequence< css::beans::StringPair > > m_aResultSeq
virtual void SAL_CALL characters(const OUString &aChars) override
std::vector< OUString > m_aElementsSeq
static OUString const g_aTypesElement("Types")
#define CONTENTTYPE_FORMAT
void WriteContentSequence(const uno::Reference< io::XOutputStream > &xOutStream, const uno::Sequence< beans::StringPair > &aDefaultsSequence, const uno::Sequence< beans::StringPair > &aOverridesSequence, const uno::Reference< uno::XComponentContext > &rContext)
virtual void SAL_CALL endDocument() override
static OUString const g_aOverrideElement("Override")
static OUString const g_aIDAttr("Id")
void AddAttribute(const OUString &sName, const OUString &sType, const OUString &sValue)
static OUString const g_aTypeAttr("Type")
OFOPXMLHelper_Impl(sal_uInt16 nFormat)
virtual void SAL_CALL processingInstruction(const OUString &aTarget, const OUString &aData) override
OUString GetContentTypeByName(const css::uno::Sequence< css::uno::Sequence< css::beans::StringPair >> &rContentTypes, const OUString &rFilename)
virtual void SAL_CALL startDocument() override
static OUString const g_aPartNameAttr("PartName")
static OUString const g_aRelListElement("Relationships")
static OUString const g_aRelElement("Relationship")
static OUString const g_aTargetModeAttr("TargetMode")
static OUString const g_aContentTypeAttr("ContentType")
void WriteRelationsInfoSequence(const uno::Reference< io::XOutputStream > &xOutStream, const uno::Sequence< uno::Sequence< beans::StringPair > > &aSequence, const uno::Reference< uno::XComponentContext > &rContext)
css::uno::Sequence< css::uno::Sequence< css::beans::StringPair > > const & GetParsingResult() const
uno::Sequence< uno::Sequence< beans::StringPair > > ReadRelationsInfoSequence(const uno::Reference< io::XInputStream > &xInStream, const OUString &aStreamName, const uno::Reference< uno::XComponentContext > &rContext)
static OUString const g_aDefaultElement("Default")
virtual void SAL_CALL startElement(const OUString &aName, const css::uno::Reference< css::xml::sax::XAttributeList > &xAttribs) override
static OUString const g_aTargetAttr("Target")
#define RELATIONINFO_FORMAT
OString const aName
virtual void SAL_CALL setDocumentLocator(const css::uno::Reference< css::xml::sax::XLocator > &xLocator) override
virtual void SAL_CALL endElement(const OUString &aName) override
sal_Int32 const nLength
uno::Sequence< uno::Sequence< beans::StringPair > > ReadContentTypeSequence(const uno::Reference< io::XInputStream > &xInStream, const uno::Reference< uno::XComponentContext > &rContext)
#define FORMAT_MAX_ID
static uno::Sequence< uno::Sequence< beans::StringPair > > ReadSequence_Impl(const uno::Reference< io::XInputStream > &xInStream, const OUString &aStringID, sal_uInt16 nFormat, const uno::Reference< uno::XComponentContext > &xContext)