LibreOffice Module oox (master)  1
recordparser.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
21 
22 #include <vector>
23 #include <com/sun/star/io/IOException.hpp>
24 #include <com/sun/star/lang/DisposedException.hpp>
25 #include <com/sun/star/xml/sax/SAXException.hpp>
26 #include <com/sun/star/xml/sax/XLocator.hpp>
27 #include <cppuhelper/implbase.hxx>
28 #include <osl/diagnose.h>
30 
31 namespace oox::core {
32 
33 using namespace ::com::sun::star::io;
34 using namespace ::com::sun::star::lang;
35 using namespace ::com::sun::star::uno;
36 using namespace ::com::sun::star::xml::sax;
37 
38 namespace prv {
39 
40 class Locator : public ::cppu::WeakImplHelper< XLocator >
41 {
42 public:
43  explicit Locator( RecordParser* pParser ) : mpParser( pParser ) {}
44 
45  void dispose();
47  void checkDispose();
48 
49  // com.sun.star.sax.XLocator interface
50 
51  virtual sal_Int32 SAL_CALL getColumnNumber() override;
52  virtual sal_Int32 SAL_CALL getLineNumber() override;
53  virtual OUString SAL_CALL getPublicId() override;
54  virtual OUString SAL_CALL getSystemId() override;
55 
56 private:
58 };
59 
61 {
62  mpParser = nullptr;
63 }
64 
66 {
67  if( !mpParser )
68  throw DisposedException();
69 }
70 
71 sal_Int32 SAL_CALL Locator::getColumnNumber()
72 {
73  return -1;
74 }
75 
76 sal_Int32 SAL_CALL Locator::getLineNumber()
77 {
78  return -1;
79 }
80 
81 OUString SAL_CALL Locator::getPublicId()
82 {
83  checkDispose();
84  return OUString();
85 }
86 
87 OUString SAL_CALL Locator::getSystemId()
88 {
89  checkDispose();
91 }
92 
94 {
95 public:
96  explicit ContextStack( FragmentHandlerRef const & xHandler );
97 
98  bool empty() const { return maStack.empty(); }
99 
100  sal_Int32 getCurrentRecId() const;
101  bool hasCurrentEndRecId() const;
103 
104  void pushContext( const RecordInfo& rRec, const ContextHandlerRef& rxContext );
105  void popContext();
106 
107 private:
108  typedef ::std::pair< RecordInfo, ContextHandlerRef > ContextInfo;
109  typedef ::std::vector< ContextInfo > ContextInfoVec;
110 
112  ContextInfoVec maStack;
113 };
114 
116  mxHandler( xHandler )
117 {
118 }
119 
121 {
122  return maStack.empty() ? -1 : maStack.back().first.mnStartRecId;
123 }
124 
126 {
127  return !maStack.empty() && (maStack.back().first.mnEndRecId >= 0);
128 }
129 
131 {
132  if( !maStack.empty() )
133  return maStack.back().second;
134  return mxHandler;
135 }
136 
137 void ContextStack::pushContext( const RecordInfo& rRecInfo, const ContextHandlerRef& rxContext )
138 {
139  OSL_ENSURE( (rRecInfo.mnEndRecId >= 0) || maStack.empty() || hasCurrentEndRecId(),
140  "ContextStack::pushContext - nested incomplete context record identifiers" );
141  maStack.emplace_back( rRecInfo, rxContext );
142 }
143 
145 {
146  OSL_ENSURE( !maStack.empty(), "ContextStack::popContext - no context on stack" );
147  if( !maStack.empty() )
148  {
149  ContextInfo& rContextInfo = maStack.back();
150  if( rContextInfo.second.is() )
151  rContextInfo.second->endRecord( rContextInfo.first.mnStartRecId );
152  maStack.pop_back();
153  }
154 }
155 
156 } // namespace oox::core::prv
157 
158 namespace {
159 
161 bool lclReadByte( sal_uInt8& ornByte, BinaryInputStream& rStrm )
162 {
163  return rStrm.readMemory( &ornByte, 1 ) == 1;
164 }
165 
167 bool lclReadCompressedInt( sal_Int32& ornValue, BinaryInputStream& rStrm )
168 {
169  ornValue = 0;
170  sal_uInt8 nByte;
171  if( !lclReadByte( nByte, rStrm ) ) return false;
172  ornValue = nByte & 0x7F;
173  if( (nByte & 0x80) == 0 ) return true;
174  if( !lclReadByte( nByte, rStrm ) ) return false;
175  ornValue |= sal_Int32( nByte & 0x7F ) << 7;
176  if( (nByte & 0x80) == 0 ) return true;
177  if( !lclReadByte( nByte, rStrm ) ) return false;
178  ornValue |= sal_Int32( nByte & 0x7F ) << 14;
179  if( (nByte & 0x80) == 0 ) return true;
180  if( !lclReadByte( nByte, rStrm ) ) return false;
181  ornValue |= sal_Int32( nByte & 0x7F ) << 21;
182  return true;
183 }
184 
185 bool lclReadRecordHeader( sal_Int32& ornRecId, sal_Int32& ornRecSize, BinaryInputStream& rStrm )
186 {
187  return
188  lclReadCompressedInt( ornRecId, rStrm ) && (ornRecId >= 0) &&
189  lclReadCompressedInt( ornRecSize, rStrm ) && (ornRecSize >= 0);
190 }
191 
192 bool lclReadNextRecord( sal_Int32& ornRecId, StreamDataSequence& orData, BinaryInputStream& rStrm )
193 {
194  sal_Int32 nRecSize = 0;
195  bool bValid = lclReadRecordHeader( ornRecId, nRecSize, rStrm );
196  if( bValid )
197  {
198  orData.realloc( nRecSize );
199  bValid = (nRecSize == 0) || (rStrm.readData( orData, nRecSize ) == nRecSize);
200  }
201  return bValid;
202 }
203 
204 } // namespace
205 
207 {
208  mxLocator.set( new prv::Locator( this ) );
209 }
210 
212 {
213  if( mxLocator.is() )
214  mxLocator->dispose();
215 }
216 
217 void RecordParser::setFragmentHandler( const ::rtl::Reference< FragmentHandler >& rxHandler )
218 {
219  mxHandler = rxHandler;
220 
221  // build record infos
222  maStartMap.clear();
223  maEndMap.clear();
224  const RecordInfo* pRecs = mxHandler.is() ? mxHandler->getRecordInfos() : nullptr;
225  OSL_ENSURE( pRecs, "RecordInfoProvider::RecordInfoProvider - missing record list" );
226  for( ; pRecs && pRecs->mnStartRecId >= 0; ++pRecs )
227  {
228  maStartMap[ pRecs->mnStartRecId ] = *pRecs;
229  if( pRecs->mnEndRecId >= 0 )
230  maEndMap[ pRecs->mnEndRecId ] = *pRecs;
231  }
232 }
233 
234 void RecordParser::parseStream( const RecordInputSource& rInputSource )
235 {
236  maSource = rInputSource;
237 
238  if( !maSource.mxInStream || maSource.mxInStream->isEof() )
239  throw IOException();
240  if( !mxHandler.is() )
241  throw SAXException();
242 
243  // start the document
244  mxHandler->setDocumentLocator( mxLocator );
245  mxHandler->startDocument();
246 
247  // parse the stream
248  mxStack.reset( new prv::ContextStack( mxHandler ) );
249  sal_Int32 nRecId = 0;
250  StreamDataSequence aRecData;
251  while( lclReadNextRecord( nRecId, aRecData, *maSource.mxInStream ) )
252  {
253  // create record stream object from imported record data
254  SequenceInputStream aRecStrm( aRecData );
255  // try to leave a context, there may be other incomplete contexts on the stack
256  if( const RecordInfo* pEndRecInfo = getEndRecordInfo( nRecId ) )
257  {
258  // finalize contexts without record identifier for context end
259  while( !mxStack->empty() && !mxStack->hasCurrentEndRecId() )
260  mxStack->popContext();
261  // finalize the current context and pop context info from stack
262  OSL_ENSURE( mxStack->getCurrentRecId() == pEndRecInfo->mnStartRecId, "RecordParser::parseStream - context records mismatch" );
263  ContextHandlerRef xCurrContext = mxStack->getCurrentContext();
264  if( xCurrContext.is() )
265  {
266  // context end record may contain some data, handle it as simple record
267  aRecStrm.seekToStart();
268  xCurrContext->startRecord( nRecId, aRecStrm );
269  xCurrContext->endRecord( nRecId );
270  }
271  mxStack->popContext();
272  }
273  else
274  {
275  // end context with incomplete record id, if the same id comes again
276  if( (mxStack->getCurrentRecId() == nRecId) && !mxStack->hasCurrentEndRecId() )
277  mxStack->popContext();
278  // try to start a new context
279  ContextHandlerRef xCurrContext = mxStack->getCurrentContext();
280  if( xCurrContext.is() )
281  {
282  aRecStrm.seekToStart();
283  xCurrContext = xCurrContext->createRecordContext( nRecId, aRecStrm );
284  }
285  // track all context identifiers on the stack (do not push simple records)
286  const RecordInfo* pStartRecInfo = getStartRecordInfo( nRecId );
287  if( pStartRecInfo )
288  mxStack->pushContext( *pStartRecInfo, xCurrContext );
289  // import the record
290  if( xCurrContext.is() )
291  {
292  // import the record
293  aRecStrm.seekToStart();
294  xCurrContext->startRecord( nRecId, aRecStrm );
295  // end simple records (context records are finished in ContextStack::popContext)
296  if( !pStartRecInfo )
297  xCurrContext->endRecord( nRecId );
298  }
299  }
300  }
301  // close remaining contexts (missing context end records or stream error)
302  while( !mxStack->empty() )
303  mxStack->popContext();
304  mxStack.reset();
305 
306  // finish document
307  mxHandler->endDocument();
308 
310 }
311 
312 const RecordInfo* RecordParser::getStartRecordInfo( sal_Int32 nRecId ) const
313 {
314  RecordInfoMap::const_iterator aIt = maStartMap.find( nRecId );
315  return (aIt == maStartMap.end()) ? nullptr : &aIt->second;
316 }
317 
318 const RecordInfo* RecordParser::getEndRecordInfo( sal_Int32 nRecId ) const
319 {
320  RecordInfoMap::const_iterator aIt = maEndMap.find( nRecId );
321  return (aIt == maEndMap.end()) ? nullptr : &aIt->second;
322 }
323 
324 } // namespace oox::core
325 
326 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Describes record identifiers used to create contexts in a binary stream.
void parseStream(const RecordInputSource &rInputSource)
::rtl::Reference< prv::Locator > mxLocator
virtual sal_Int32 SAL_CALL getColumnNumber() override
void setFragmentHandler(const ::rtl::Reference< FragmentHandler > &rxHandler)
sal_Int32 mnEndRecId
Record identifier for context end, -1 = no record.
RecordParser * mpParser
sal_Int32 getCurrentRecId() const
virtual sal_Int32 readMemory(void *opMem, sal_Int32 nBytes, size_t nAtomSize=1)=0
Derived classes implement reading nBytes bytes to the (preallocated!) memory buffer opMem...
const RecordInfo * getEndRecordInfo(sal_Int32 nRecId) const
Returns a RecordInfo struct that contains the passed record identifier as context end identifier...
sal_Int32 mnStartRecId
Record identifier for context start.
void seekToStart()
Seeks the stream to the beginning, if stream is seekable.
ContextStack(FragmentHandlerRef const &xHandler)
Locator(RecordParser *pParser)
Interface for binary input stream classes.
const RecordInfo * getStartRecordInfo(sal_Int32 nRecId) const
Returns a RecordInfo struct that contains the passed record identifier as context start identifier...
css::uno::Sequence< sal_Int8 > StreamDataSequence
::std::vector< ContextInfo > ContextInfoVec
::std::unique_ptr< prv::ContextStack > mxStack
RecordInfoMap maStartMap
const RecordInputSource & getInputSource() const
BinaryInputStreamRef mxInStream
FragmentHandlerRef mxHandler
rtl::Reference< FragmentHandler > mxHandler
virtual OUString SAL_CALL getSystemId() override
RecordInputSource maSource
unsigned char sal_uInt8
ContextHandlerRef getCurrentContext() const
virtual OUString SAL_CALL getPublicId() override
Wraps a StreamDataSequence and provides convenient access functions.
void pushContext(const RecordInfo &rRec, const ContextHandlerRef &rxContext)
virtual sal_Int32 SAL_CALL getLineNumber() override
::rtl::Reference< FragmentHandler > mxHandler
::std::pair< RecordInfo, ContextHandlerRef > ContextInfo