LibreOffice Module sdext (master)  1
pdfparse.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <pdfparse.hxx>
22 
23 // boost using obsolete stuff
24 #if defined(_MSC_VER)
25 #pragma warning(push)
26 #pragma warning(disable:4996)
27 #pragma warning(disable:4503)
28 #endif
29 
30 // workaround windows compiler: do not include multi_pass.hpp
31 #include <boost/spirit/include/classic_core.hpp>
32 #include <boost/spirit/include/classic_utility.hpp>
33 #include <boost/spirit/include/classic_error_handling.hpp>
34 #include <boost/spirit/include/classic_file_iterator.hpp>
35 #include <boost/bind/bind.hpp>
36 
37 #include <string.h>
38 
39 #include <o3tl/safeint.hxx>
40 #include <rtl/strbuf.hxx>
41 #include <rtl/ustrbuf.hxx>
42 #include <sal/log.hxx>
43 
44 // disable warnings again because someone along the line has enabled them
45 // (we have included boost headers, what did you expect?)
46 #if defined(_MSC_VER)
47 #pragma warning(push)
48 #pragma warning(disable:4996)
49 #pragma warning(disable:4503)
50 #endif
51 
52 
53 using namespace boost::spirit::classic;
54 using namespace pdfparse;
55 
56 namespace {
57 
58 class StringEmitContext : public EmitContext
59 {
60  OStringBuffer m_aBuf;
61  public:
62  StringEmitContext() : EmitContext(), m_aBuf(256) {}
63 
64  virtual bool write( const void* pBuf, unsigned int nLen ) throw() override
65  {
66  m_aBuf.append( static_cast<const char*>(pBuf), nLen );
67  return true;
68  }
69  virtual unsigned int getCurPos() throw() override { return m_aBuf.getLength(); }
70  virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw() override
71  { return (nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) ) &&
72  write( m_aBuf.getStr() + nOrigOffset, nLen ); }
73  virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw() override
74  {
75  if( nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) )
76  {
77  memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
78  return nLen;
79  }
80  return 0;
81  }
82 
83  OString getString() { return m_aBuf.makeStringAndClear(); }
84 };
85 
86 template< class iteratorT >
87 class PDFGrammar : public grammar< PDFGrammar<iteratorT> >
88 {
89 public:
90 
91  explicit PDFGrammar( const iteratorT& first )
92  : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
93  ~PDFGrammar()
94  {
95  if( !m_aObjectStack.empty() )
96  delete m_aObjectStack.front();
97  }
98 
99  double m_fDouble;
100  std::vector< unsigned int > m_aUIntStack;
101  std::vector< PDFEntry* > m_aObjectStack;
102  OString m_aErrorString;
103  iteratorT m_aGlobalBegin;
104 
105 public:
106  struct pdf_string_parser
107  {
108  typedef nil_t result_t;
109  template <typename ScannerT>
110  std::ptrdiff_t
111  operator()(ScannerT const& scan, result_t&) const
112  {
113  std::ptrdiff_t len = 0;
114 
115  int nBraceLevel = 0;
116  while( ! scan.at_end() )
117  {
118  char c = *scan;
119  if( c == ')' )
120  {
121  nBraceLevel--;
122  if( nBraceLevel < 0 )
123  break;
124  }
125  else if( c == '(' )
126  nBraceLevel++;
127  else if( c == '\\' ) // ignore escaped braces
128  {
129  ++len;
130  ++scan.first; // tdf#63054: avoid skipping spaces
131  if( scan.first == scan.last ) // tdf#63054: avoid skipping spaces
132  break;
133  }
134  ++len;
135  ++scan;
136  }
137  return scan.at_end() ? -1 : len;
138  }
139  };
140 
141  template< typename ScannerT >
142  struct definition
143  {
144  explicit definition( const PDFGrammar<iteratorT>& rSelf )
145  {
146  using namespace boost::placeholders;
147 
148  PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
149 
150  // workaround workshop compiler: comment_p doesn't work
151  // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
152  comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
153 
154  boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
155 
156  // workaround workshop compiler: confix_p doesn't work
157  //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
158  stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
159 
160  name = lexeme_d[
161  ch_p('/')
162  >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
163  [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
164 
165  // workaround workshop compiler: confix_p doesn't work
166  //stringtype = ( confix_p("(",*anychar_p, ")") |
167  // confix_p("<",*xdigit_p, ">") )
168  // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
169 
170  stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
171  ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
172  [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
173 
174  null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
175 
176  #ifdef USE_ASSIGN_ACTOR
177  objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
178  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
179  >> ch_p('R')
180  >> eps_p
181  )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
182  #else
183  objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
184  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
185  >> ch_p('R')
186  >> eps_p
187  )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
188  #endif
189 
190  #ifdef USE_ASSIGN_ACTOR
191  simple_type = objectref | name |
192  ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
193  [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
194  | stringtype | boolean | null_object;
195  #else
196  simple_type = objectref | name |
197  ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
198  [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
199  | stringtype | boolean | null_object;
200  #endif
201 
202  dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
203  dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
204 
205  array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
206  array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
207 
208  #ifdef USE_ASSIGN_ACTOR
209  object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
210  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
211  >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
212  #else
213  object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
214  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
215  >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
216  #endif
217  object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
218 
219  xref = str_p( "xref" ) >> uint_p >> uint_p
220  >> lexeme_d[
221  +( repeat_p(10)[digit_p]
222  >> blank_p
223  >> repeat_p(5)[digit_p]
224  >> blank_p
225  >> ( ch_p('n') | ch_p('f') )
226  >> repeat_p(2)[space_p]
227  ) ];
228 
229  dict_element= dict_begin | comment | simple_type
230  | array_begin | array_end | dict_end;
231 
232  object = object_begin
233  >> *dict_element
234  >> !stream
235  >> object_end;
236 
237  trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
238  >> *dict_element
239  >> str_p("startxref")
240  >> uint_p
241  >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
242 
243  #ifdef USE_ASSIGN_ACTOR
244  pdfrule = ! (lexeme_d[
245  str_p( "%PDF-" )
246  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
247  >> ch_p('.')
248  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
249  >> *((~ch_p('\r') & ~ch_p('\n')))
250  >> eol_p
251  ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
252  >> *( comment | object | ( xref >> trailer ) );
253  #else
254  pdfrule = ! (lexeme_d[
255  str_p( "%PDF-" )
256  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
257  >> ch_p('.')
258  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
259  >> *(~ch_p('\r') & ~ch_p('\n'))
260  >> eol_p
261  ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
262  >> *( comment | object | ( xref >> trailer ) );
263  #endif
264  }
265  rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
266  objectref, array, value, dict_element, dict_begin, dict_end,
267  array_begin, array_end, object, object_begin, object_end,
268  xref, trailer, pdfrule;
269 
270  const rule< ScannerT >& start() const { return pdfrule; }
271  };
272 
273  #ifndef USE_ASSIGN_ACTOR
274  void push_back_action_uint( unsigned int i )
275  {
276  m_aUIntStack.push_back( i );
277  }
278  void assign_action_double( double d )
279  {
280  m_fDouble = d;
281  }
282  #endif
283 
284  static void parseError( const char* pMessage, iteratorT pLocation )
285  {
286  throw_( pLocation, pMessage );
287  }
288 
289  OString iteratorToString( iteratorT first, iteratorT last ) const
290  {
291  OStringBuffer aStr( 32 );
292  while( first != last )
293  {
294  aStr.append( *first );
295  ++first;
296  }
297  return aStr.makeStringAndClear();
298  }
299 
300  void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
301  {
302  if( m_aObjectStack.empty() )
303  {
304  PDFFile* pFile = new PDFFile();
305  pFile->m_nMinor = m_aUIntStack.back();
306  m_aUIntStack.pop_back();
307  pFile->m_nMajor = m_aUIntStack.back();
308  m_aUIntStack.pop_back();
309  m_aObjectStack.push_back( pFile );
310  }
311  else
312  parseError( "found file header in unusual place", pBegin );
313  }
314 
315  void pushComment( iteratorT first, iteratorT last )
316  {
317  // add a comment to the current stack element
318  PDFComment* pComment =
319  new PDFComment(iteratorToString(first,last));
320  if( m_aObjectStack.empty() )
321  m_aObjectStack.push_back( new PDFPart() );
322  PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
323  if( pContainer == nullptr )
324  parseError( "comment without container", first );
325  pContainer->m_aSubElements.emplace_back( pComment );
326  }
327 
328  void insertNewValue( std::unique_ptr<PDFEntry> pNewValue, iteratorT pPos )
329  {
330  PDFContainer* pContainer = nullptr;
331  const char* pMsg = nullptr;
332  if( ! m_aObjectStack.empty() )
333  {
334  pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
335  if (pContainer)
336  {
337  if( dynamic_cast<PDFDict*>(pContainer) == nullptr &&
338  dynamic_cast<PDFArray*>(pContainer) == nullptr )
339  {
340  PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
341  if( pObj )
342  {
343  if( pObj->m_pObject == nullptr )
344  pObj->m_pObject = pNewValue.get();
345  else
346  {
347  pMsg = "second value for object";
348  pContainer = nullptr;
349  }
350  }
351  else if( dynamic_cast<PDFDict*>(pNewValue.get()) )
352  {
353  PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
354  if( pTrailer )
355  {
356  if( pTrailer->m_pDict == nullptr )
357  pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue.get());
358  else
359  pContainer = nullptr;
360  }
361  else
362  pContainer = nullptr;
363  }
364  else
365  pContainer = nullptr;
366  }
367  }
368  }
369  if( pContainer )
370  pContainer->m_aSubElements.emplace_back( std::move(pNewValue) );
371  else
372  {
373  if( ! pMsg )
374  {
375  if( dynamic_cast<PDFContainer*>(pNewValue.get()) )
376  pMsg = "array without container";
377  else
378  pMsg = "value without container";
379  }
380  parseError( pMsg, pPos );
381  }
382  }
383 
384  void pushName( iteratorT first, iteratorT last )
385  {
386  insertNewValue( std::make_unique<PDFName>(iteratorToString(first,last)), first );
387  }
388 
389  void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
390  {
391  insertNewValue( std::make_unique<PDFNumber>(m_fDouble), first );
392  }
393 
394  void pushString( iteratorT first, iteratorT last )
395  {
396  insertNewValue( std::make_unique<PDFString>(iteratorToString(first,last)), first );
397  }
398 
399  void pushBool( iteratorT first, iteratorT last )
400  {
401  insertNewValue( std::make_unique<PDFBool>( last-first == 4 ), first );
402  }
403 
404  void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
405  {
406  insertNewValue( std::make_unique<PDFNull>(), first );
407  }
408 
409 
410  void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
411  {
412  if( m_aObjectStack.empty() )
413  m_aObjectStack.push_back( new PDFPart() );
414 
415  unsigned int nGeneration = m_aUIntStack.back();
416  m_aUIntStack.pop_back();
417  unsigned int nObject = m_aUIntStack.back();
418  m_aUIntStack.pop_back();
419 
420  PDFObject* pObj = new PDFObject( nObject, nGeneration );
421  pObj->m_nOffset = first - m_aGlobalBegin;
422 
423  PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
424  if( pContainer &&
425  ( dynamic_cast<PDFFile*>(pContainer) ||
426  dynamic_cast<PDFPart*>(pContainer) ) )
427  {
428  pContainer->m_aSubElements.emplace_back( pObj );
429  m_aObjectStack.push_back( pObj );
430  }
431  else
432  parseError( "object in wrong place", first );
433  }
434 
435  void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
436  {
437  if( m_aObjectStack.empty() )
438  parseError( "endobj without obj", first );
439  else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == nullptr )
440  parseError( "spurious endobj", first );
441  else
442  m_aObjectStack.pop_back();
443  }
444 
445  void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
446  {
447  unsigned int nGeneration = m_aUIntStack.back();
448  m_aUIntStack.pop_back();
449  unsigned int nObject = m_aUIntStack.back();
450  m_aUIntStack.pop_back();
451  insertNewValue( std::make_unique<PDFObjectRef>(nObject,nGeneration), first );
452  }
453 
454  void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
455  {
456  PDFDict* pDict = new PDFDict();
457  pDict->m_nOffset = first - m_aGlobalBegin;
458 
459  insertNewValue( std::unique_ptr<PDFEntry>(pDict), first );
460  // will not come here if insertion fails (exception)
461  m_aObjectStack.push_back( pDict );
462  }
463  void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
464  {
465  PDFDict* pDict = nullptr;
466  if( m_aObjectStack.empty() )
467  parseError( "dictionary end without begin", first );
468  else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == nullptr )
469  parseError( "spurious dictionary end", first );
470  else
471  m_aObjectStack.pop_back();
472 
473  PDFEntry* pOffender = pDict->buildMap();
474  if( pOffender )
475  {
476  StringEmitContext aCtx;
477  aCtx.write( "offending dictionary element: ", 30 );
478  pOffender->emit( aCtx );
479  m_aErrorString = aCtx.getString();
480  parseError( m_aErrorString.getStr(), first );
481  }
482  }
483 
484  void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
485  {
486  PDFArray* pArray = new PDFArray();
487  pArray->m_nOffset = first - m_aGlobalBegin;
488 
489  insertNewValue( std::unique_ptr<PDFEntry>(pArray), first );
490  // will not come here if insertion fails (exception)
491  m_aObjectStack.push_back( pArray );
492  }
493 
494  void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
495  {
496  if( m_aObjectStack.empty() )
497  parseError( "array end without begin", first );
498  else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == nullptr )
499  parseError( "spurious array end", first );
500  else
501  m_aObjectStack.pop_back();
502  }
503 
504  void emitStream( iteratorT first, iteratorT last )
505  {
506  if( m_aObjectStack.empty() )
507  parseError( "stream without object", first );
508  PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
509  if( pObj && pObj->m_pObject )
510  {
511  if( pObj->m_pStream )
512  parseError( "multiple streams in object", first );
513 
514  PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
515  if( pDict )
516  {
517  PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
518 
519  pObj->m_pStream = pStream;
520  pObj->m_aSubElements.emplace_back( pStream );
521  }
522  }
523  else
524  parseError( "stream without object", first );
525  }
526 
527  void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
528  {
529  if( m_aObjectStack.empty() )
530  m_aObjectStack.push_back( new PDFPart() );
531 
532  PDFTrailer* pTrailer = new PDFTrailer();
533  pTrailer->m_nOffset = first - m_aGlobalBegin;
534 
535  PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
536  if( pContainer &&
537  ( dynamic_cast<PDFFile*>(pContainer) ||
538  dynamic_cast<PDFPart*>(pContainer) ) )
539  {
540  pContainer->m_aSubElements.emplace_back( pTrailer );
541  m_aObjectStack.push_back( pTrailer );
542  }
543  else
544  parseError( "trailer in wrong place", first );
545  }
546 
547  void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
548  {
549  if( m_aObjectStack.empty() )
550  parseError( "%%EOF without trailer", first );
551  else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == nullptr )
552  parseError( "spurious %%EOF", first );
553  else
554  m_aObjectStack.pop_back();
555  }
556 };
557 
558 }
559 
560 #ifdef _WIN32
561 std::unique_ptr<PDFEntry> PDFReader::read( const char* pBuffer, unsigned int nLen )
562 {
563  PDFGrammar<const char*> aGrammar( pBuffer );
564 
565  try
566  {
567 #if OSL_DEBUG_LEVEL > 0
568  boost::spirit::classic::parse_info<const char*> aInfo =
569 #endif
570  boost::spirit::classic::parse( pBuffer,
571  pBuffer+nLen,
572  aGrammar,
573  boost::spirit::classic::space_p );
574 #if OSL_DEBUG_LEVEL > 0
575  SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << (aInfo.hit ? OUString("true") : OUString("false")) << ", full = " << (aInfo.full ? OUString("true") : OUString("false")) << ", length = " << static_cast<int>(aInfo.length) );
576 #endif
577  }
578  catch( const parser_error<const char*, const char*>& rError )
579  {
580 #if OSL_DEBUG_LEVEL > 0
581  OString aTmp;
582  unsigned int nElem = aGrammar.m_aObjectStack.size();
583  for( unsigned int i = 0; i < nElem; i++ )
584  aTmp += OStringLiteral(" ") + typeid( *(aGrammar.m_aObjectStack[i]) ).name();
585 
586  SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp);
587 #else
588  (void)rError;
589 #endif
590  }
591 
592  std::unique_ptr<PDFEntry> pRet;
593  unsigned int nEntries = aGrammar.m_aObjectStack.size();
594  if( nEntries == 1 )
595  {
596  pRet.reset(aGrammar.m_aObjectStack.back());
597  aGrammar.m_aObjectStack.pop_back();
598  }
599 #if OSL_DEBUG_LEVEL > 0
600  else if( nEntries > 1 )
601  SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" );
602 #endif
603 
604  return pRet;
605 }
606 #endif
607 
608 std::unique_ptr<PDFEntry> PDFReader::read( const char* pFileName )
609 {
610 #ifdef _WIN32
611  /* #i106583#
612  since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
613  C++ stdlib istream_iterator does not allow "-" apparently
614  using spirit 2.0 doesn't work in our environment with the MSC
615 
616  So for the time being bite the bullet and read the whole file.
617  FIXME: give Spirit 2.x another try when we upgrade boost again.
618  */
619  std::unique_ptr<PDFEntry> pRet;
620  FILE* fp = fopen( pFileName, "rb" );
621  if( fp )
622  {
623  fseek( fp, 0, SEEK_END );
624  unsigned int nLen = static_cast<unsigned int>(ftell( fp ));
625  fseek( fp, 0, SEEK_SET );
626  char* pBuf = static_cast<char*>(std::malloc( nLen ));
627  if( pBuf )
628  {
629  fread( pBuf, 1, nLen, fp );
630  pRet = read( pBuf, nLen );
631  std::free( pBuf );
632  }
633  fclose( fp );
634  }
635  return pRet;
636 #else
637  file_iterator<> file_start( pFileName );
638  if( ! file_start )
639  return nullptr;
640  file_iterator<> file_end = file_start.make_end();
641  PDFGrammar< file_iterator<> > aGrammar( file_start );
642 
643  try
644  {
645 #if OSL_DEBUG_LEVEL > 0
646  boost::spirit::classic::parse_info< file_iterator<> > aInfo =
647 #endif
648  boost::spirit::classic::parse( file_start,
649  file_end,
650  aGrammar,
651  boost::spirit::classic::space_p );
652 #if OSL_DEBUG_LEVEL > 0
653  SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length);
654 #endif
655  }
656  catch( const parser_error< const char*, file_iterator<> >& rError )
657  {
658  SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start);
659 #if OSL_DEBUG_LEVEL > 0
660  OUStringBuffer aTmp;
661  unsigned int nElem = aGrammar.m_aObjectStack.size();
662  for( unsigned int i = 0; i < nElem; i++ )
663  {
664  aTmp.append(" ");
665  aTmp.appendAscii(typeid( *(aGrammar.m_aObjectStack[i]) ).name());
666  }
667  SAL_WARN("sdext.pdfimport.pdfparse", "parse error object stack: " << aTmp.makeStringAndClear());
668 #endif
669  }
670 
671  std::unique_ptr<PDFEntry> pRet;
672  unsigned int nEntries = aGrammar.m_aObjectStack.size();
673  if( nEntries == 1 )
674  {
675  pRet.reset(aGrammar.m_aObjectStack.back());
676  aGrammar.m_aObjectStack.pop_back();
677  }
678 #if OSL_DEBUG_LEVEL > 0
679  else if( nEntries > 1 )
680  {
681  SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse");
682  for( unsigned int i = 0; i < nEntries; i++ )
683  {
684  SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name());
685  PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
686  if( pObj )
687  SAL_WARN("sdext.pdfimport.pdfparse", " -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration);
688  else
689  SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")");
690  }
691  }
692 #endif
693  return pRet;
694 #endif // WIN32
695 }
696 
697 #if defined(_MSC_VER)
698 #pragma warning(pop)
699 #endif
700 
701 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
Definition: pdfparse.hxx:161
uno::Sequence< sal_Int8 > m_aBuf
Definition: odfemitter.cxx:41
unsigned int m_nMajor
Definition: pdfparse.hxx:235
PDFStream * m_pStream
Definition: pdfparse.hxx:261
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
PDFEntry * buildMap()
Definition: pdfentries.cxx:576
Reference< XOutputStream > stream
PDFEntry * m_pObject
Definition: pdfparse.hxx:260
#define SAL_INFO(area, stream)
const char * name
Any value
#define SAL_WARN(area, stream)
OUString getString(const Any &_rAny)
aStr
unsigned int m_nMinor
Definition: pdfparse.hxx:236
static osl::File * pStream
Definition: emitcontext.cxx:32
virtual bool emit(EmitContext &rWriteContext) const =0
typedef void(CALLTYPE *GetFuncDataPtr)(sal_uInt16 &nNo