LibreOffice Module sdext (master)  1
pdfparse.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <pdfparse.hxx>
22 
23 // boost using obsolete stuff
24 #if defined(_MSC_VER)
25 #pragma warning(push)
26 #pragma warning(disable:4996)
27 #pragma warning(disable:4503)
28 #endif
29 
30 // workaround windows compiler: do not include multi_pass.hpp
31 #include <boost/spirit/include/classic_core.hpp>
32 #include <boost/spirit/include/classic_utility.hpp>
33 #include <boost/spirit/include/classic_error_handling.hpp>
34 #include <boost/spirit/include/classic_file_iterator.hpp>
35 #include <boost/bind.hpp>
36 
37 #include <string.h>
38 
39 #include <o3tl/safeint.hxx>
40 #include <rtl/strbuf.hxx>
41 #include <rtl/ustrbuf.hxx>
42 #include <sal/log.hxx>
43 
44 // disable warnings again because someone along the line has enabled them
45 // (we have included boost headers, what did you expect?)
46 #if defined(_MSC_VER)
47 #pragma warning(push)
48 #pragma warning(disable:4996)
49 #pragma warning(disable:4503)
50 #endif
51 
52 
53 using namespace boost::spirit;
54 using namespace pdfparse;
55 
56 namespace {
57 
58 class StringEmitContext : public EmitContext
59 {
60  OStringBuffer m_aBuf;
61  public:
62  StringEmitContext() : EmitContext(), m_aBuf(256) {}
63 
64  virtual bool write( const void* pBuf, unsigned int nLen ) throw() override
65  {
66  m_aBuf.append( static_cast<const char*>(pBuf), nLen );
67  return true;
68  }
69  virtual unsigned int getCurPos() throw() override { return m_aBuf.getLength(); }
70  virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw() override
71  { return (nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) ) &&
72  write( m_aBuf.getStr() + nOrigOffset, nLen ); }
73  virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw() override
74  {
75  if( nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) )
76  {
77  memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
78  return nLen;
79  }
80  return 0;
81  }
82 
83  OString getString() { return m_aBuf.makeStringAndClear(); }
84 };
85 
86 template< class iteratorT >
87 class PDFGrammar : public grammar< PDFGrammar<iteratorT> >
88 {
89 public:
90 
91  explicit PDFGrammar( const iteratorT& first )
92  : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
93  ~PDFGrammar()
94  {
95  if( !m_aObjectStack.empty() )
96  delete m_aObjectStack.front();
97  }
98 
99  double m_fDouble;
100  std::vector< unsigned int > m_aUIntStack;
101  std::vector< PDFEntry* > m_aObjectStack;
102  OString m_aErrorString;
103  iteratorT m_aGlobalBegin;
104 
105 public:
106  struct pdf_string_parser
107  {
108  typedef nil_t result_t;
109  template <typename ScannerT>
110  std::ptrdiff_t
111  operator()(ScannerT const& scan, result_t&) const
112  {
113  std::ptrdiff_t len = 0;
114 
115  int nBraceLevel = 0;
116  while( ! scan.at_end() )
117  {
118  char c = *scan;
119  if( c == ')' )
120  {
121  nBraceLevel--;
122  if( nBraceLevel < 0 )
123  break;
124  }
125  else if( c == '(' )
126  nBraceLevel++;
127  else if( c == '\\' ) // ignore escaped braces
128  {
129  ++len;
130  ++scan.first; // tdf#63054: avoid skipping spaces
131  if( scan.first == scan.last ) // tdf#63054: avoid skipping spaces
132  break;
133  }
134  ++len;
135  ++scan;
136  }
137  return scan.at_end() ? -1 : len;
138  }
139  };
140 
141  template< typename ScannerT >
142  struct definition
143  {
144  explicit definition( const PDFGrammar<iteratorT>& rSelf )
145  {
146  PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
147 
148  // workaround workshop compiler: comment_p doesn't work
149  // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
150  comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
151 
152  boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
153 
154  // workaround workshop compiler: confix_p doesn't work
155  //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
156  stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
157 
158  name = lexeme_d[
159  ch_p('/')
160  >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
161  [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
162 
163  // workaround workshop compiler: confix_p doesn't work
164  //stringtype = ( confix_p("(",*anychar_p, ")") |
165  // confix_p("<",*xdigit_p, ">") )
166  // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
167 
168  stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
169  ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
170  [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
171 
172  null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
173 
174  #ifdef USE_ASSIGN_ACTOR
175  objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
176  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
177  >> ch_p('R')
178  >> eps_p
179  )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
180  #else
181  objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
182  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
183  >> ch_p('R')
184  >> eps_p
185  )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
186  #endif
187 
188  #ifdef USE_ASSIGN_ACTOR
189  simple_type = objectref | name |
190  ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
191  [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
192  | stringtype | boolean | null_object;
193  #else
194  simple_type = objectref | name |
195  ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
196  [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
197  | stringtype | boolean | null_object;
198  #endif
199 
200  dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
201  dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
202 
203  array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
204  array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
205 
206  #ifdef USE_ASSIGN_ACTOR
207  object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
208  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
209  >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
210  #else
211  object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
212  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
213  >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
214  #endif
215  object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
216 
217  xref = str_p( "xref" ) >> uint_p >> uint_p
218  >> lexeme_d[
219  +( repeat_p(10)[digit_p]
220  >> blank_p
221  >> repeat_p(5)[digit_p]
222  >> blank_p
223  >> ( ch_p('n') | ch_p('f') )
224  >> repeat_p(2)[space_p]
225  ) ];
226 
227  dict_element= dict_begin | comment | simple_type
228  | array_begin | array_end | dict_end;
229 
230  object = object_begin
231  >> *dict_element
232  >> !stream
233  >> object_end;
234 
235  trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
236  >> *dict_element
237  >> str_p("startxref")
238  >> uint_p
239  >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
240 
241  #ifdef USE_ASSIGN_ACTOR
242  pdfrule = ! (lexeme_d[
243  str_p( "%PDF-" )
244  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
245  >> ch_p('.')
246  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
247  >> *((~ch_p('\r') & ~ch_p('\n')))
248  >> eol_p
249  ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
250  >> *( comment | object | ( xref >> trailer ) );
251  #else
252  pdfrule = ! (lexeme_d[
253  str_p( "%PDF-" )
254  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
255  >> ch_p('.')
256  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
257  >> *(~ch_p('\r') & ~ch_p('\n'))
258  >> eol_p
259  ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
260  >> *( comment | object | ( xref >> trailer ) );
261  #endif
262  }
263  rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
264  objectref, array, value, dict_element, dict_begin, dict_end,
265  array_begin, array_end, object, object_begin, object_end,
266  xref, trailer, pdfrule;
267 
268  const rule< ScannerT >& start() const { return pdfrule; }
269  };
270 
271  #ifndef USE_ASSIGN_ACTOR
272  void push_back_action_uint( unsigned int i )
273  {
274  m_aUIntStack.push_back( i );
275  }
276  void assign_action_double( double d )
277  {
278  m_fDouble = d;
279  }
280  #endif
281 
282  static void parseError( const char* pMessage, iteratorT pLocation )
283  {
284  throw_( pLocation, pMessage );
285  }
286 
287  OString iteratorToString( iteratorT first, iteratorT last ) const
288  {
289  OStringBuffer aStr( 32 );
290  while( first != last )
291  {
292  aStr.append( *first );
293  ++first;
294  }
295  return aStr.makeStringAndClear();
296  }
297 
298  void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
299  {
300  if( m_aObjectStack.empty() )
301  {
302  PDFFile* pFile = new PDFFile();
303  pFile->m_nMinor = m_aUIntStack.back();
304  m_aUIntStack.pop_back();
305  pFile->m_nMajor = m_aUIntStack.back();
306  m_aUIntStack.pop_back();
307  m_aObjectStack.push_back( pFile );
308  }
309  else
310  parseError( "found file header in unusual place", pBegin );
311  }
312 
313  void pushComment( iteratorT first, iteratorT last )
314  {
315  // add a comment to the current stack element
316  PDFComment* pComment =
317  new PDFComment(iteratorToString(first,last));
318  if( m_aObjectStack.empty() )
319  m_aObjectStack.push_back( new PDFPart() );
320  PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
321  if( pContainer == nullptr )
322  parseError( "comment without container", first );
323  pContainer->m_aSubElements.emplace_back( pComment );
324  }
325 
326  void insertNewValue( std::unique_ptr<PDFEntry> pNewValue, iteratorT pPos )
327  {
328  PDFContainer* pContainer = nullptr;
329  const char* pMsg = nullptr;
330  if( ! m_aObjectStack.empty() &&
331  (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != nullptr )
332  {
333  if( dynamic_cast<PDFDict*>(pContainer) == nullptr &&
334  dynamic_cast<PDFArray*>(pContainer) == nullptr )
335  {
336  PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
337  if( pObj )
338  {
339  if( pObj->m_pObject == nullptr )
340  pObj->m_pObject = pNewValue.get();
341  else
342  {
343  pMsg = "second value for object";
344  pContainer = nullptr;
345  }
346  }
347  else if( dynamic_cast<PDFDict*>(pNewValue.get()) )
348  {
349  PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
350  if( pTrailer )
351  {
352  if( pTrailer->m_pDict == nullptr )
353  pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue.get());
354  else
355  pContainer = nullptr;
356  }
357  else
358  pContainer = nullptr;
359  }
360  else
361  pContainer = nullptr;
362  }
363  }
364  if( pContainer )
365  pContainer->m_aSubElements.emplace_back( std::move(pNewValue) );
366  else
367  {
368  if( ! pMsg )
369  {
370  if( dynamic_cast<PDFContainer*>(pNewValue.get()) )
371  pMsg = "array without container";
372  else
373  pMsg = "value without container";
374  }
375  parseError( pMsg, pPos );
376  }
377  }
378 
379  void pushName( iteratorT first, iteratorT last )
380  {
381  insertNewValue( std::make_unique<PDFName>(iteratorToString(first,last)), first );
382  }
383 
384  void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
385  {
386  insertNewValue( std::make_unique<PDFNumber>(m_fDouble), first );
387  }
388 
389  void pushString( iteratorT first, iteratorT last )
390  {
391  insertNewValue( std::make_unique<PDFString>(iteratorToString(first,last)), first );
392  }
393 
394  void pushBool( iteratorT first, iteratorT last )
395  {
396  insertNewValue( std::make_unique<PDFBool>( last-first == 4 ), first );
397  }
398 
399  void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
400  {
401  insertNewValue( std::make_unique<PDFNull>(), first );
402  }
403 
404 
405  void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
406  {
407  if( m_aObjectStack.empty() )
408  m_aObjectStack.push_back( new PDFPart() );
409 
410  unsigned int nGeneration = m_aUIntStack.back();
411  m_aUIntStack.pop_back();
412  unsigned int nObject = m_aUIntStack.back();
413  m_aUIntStack.pop_back();
414 
415  PDFObject* pObj = new PDFObject( nObject, nGeneration );
416  pObj->m_nOffset = first - m_aGlobalBegin;
417 
418  PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
419  if( pContainer &&
420  ( dynamic_cast<PDFFile*>(pContainer) ||
421  dynamic_cast<PDFPart*>(pContainer) ) )
422  {
423  pContainer->m_aSubElements.emplace_back( pObj );
424  m_aObjectStack.push_back( pObj );
425  }
426  else
427  parseError( "object in wrong place", first );
428  }
429 
430  void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
431  {
432  if( m_aObjectStack.empty() )
433  parseError( "endobj without obj", first );
434  else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == nullptr )
435  parseError( "spurious endobj", first );
436  else
437  m_aObjectStack.pop_back();
438  }
439 
440  void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
441  {
442  unsigned int nGeneration = m_aUIntStack.back();
443  m_aUIntStack.pop_back();
444  unsigned int nObject = m_aUIntStack.back();
445  m_aUIntStack.pop_back();
446  insertNewValue( std::make_unique<PDFObjectRef>(nObject,nGeneration), first );
447  }
448 
449  void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
450  {
451  PDFDict* pDict = new PDFDict();
452  pDict->m_nOffset = first - m_aGlobalBegin;
453 
454  insertNewValue( std::unique_ptr<PDFEntry>(pDict), first );
455  // will not come here if insertion fails (exception)
456  m_aObjectStack.push_back( pDict );
457  }
458  void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
459  {
460  PDFDict* pDict = nullptr;
461  if( m_aObjectStack.empty() )
462  parseError( "dictionary end without begin", first );
463  else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == nullptr )
464  parseError( "spurious dictionary end", first );
465  else
466  m_aObjectStack.pop_back();
467 
468  PDFEntry* pOffender = pDict->buildMap();
469  if( pOffender )
470  {
471  StringEmitContext aCtx;
472  aCtx.write( "offending dictionary element: ", 30 );
473  pOffender->emit( aCtx );
474  m_aErrorString = aCtx.getString();
475  parseError( m_aErrorString.getStr(), first );
476  }
477  }
478 
479  void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
480  {
481  PDFArray* pArray = new PDFArray();
482  pArray->m_nOffset = first - m_aGlobalBegin;
483 
484  insertNewValue( std::unique_ptr<PDFEntry>(pArray), first );
485  // will not come here if insertion fails (exception)
486  m_aObjectStack.push_back( pArray );
487  }
488 
489  void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
490  {
491  if( m_aObjectStack.empty() )
492  parseError( "array end without begin", first );
493  else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == nullptr )
494  parseError( "spurious array end", first );
495  else
496  m_aObjectStack.pop_back();
497  }
498 
499  void emitStream( iteratorT first, iteratorT last )
500  {
501  if( m_aObjectStack.empty() )
502  parseError( "stream without object", first );
503  PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
504  if( pObj && pObj->m_pObject )
505  {
506  if( pObj->m_pStream )
507  parseError( "multiple streams in object", first );
508 
509  PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
510  if( pDict )
511  {
512  PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
513 
514  pObj->m_pStream = pStream;
515  pObj->m_aSubElements.emplace_back( pStream );
516  }
517  }
518  else
519  parseError( "stream without object", first );
520  }
521 
522  void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
523  {
524  if( m_aObjectStack.empty() )
525  m_aObjectStack.push_back( new PDFPart() );
526 
527  PDFTrailer* pTrailer = new PDFTrailer();
528  pTrailer->m_nOffset = first - m_aGlobalBegin;
529 
530  PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
531  if( pContainer &&
532  ( dynamic_cast<PDFFile*>(pContainer) ||
533  dynamic_cast<PDFPart*>(pContainer) ) )
534  {
535  pContainer->m_aSubElements.emplace_back( pTrailer );
536  m_aObjectStack.push_back( pTrailer );
537  }
538  else
539  parseError( "trailer in wrong place", first );
540  }
541 
542  void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
543  {
544  if( m_aObjectStack.empty() )
545  parseError( "%%EOF without trailer", first );
546  else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == nullptr )
547  parseError( "spurious %%EOF", first );
548  else
549  m_aObjectStack.pop_back();
550  }
551 };
552 
553 }
554 
555 #ifdef _WIN32
556 std::unique_ptr<PDFEntry> PDFReader::read( const char* pBuffer, unsigned int nLen )
557 {
558  PDFGrammar<const char*> aGrammar( pBuffer );
559 
560  try
561  {
562 #if OSL_DEBUG_LEVEL > 0
563  boost::spirit::parse_info<const char*> aInfo =
564 #endif
565  boost::spirit::parse( pBuffer,
566  pBuffer+nLen,
567  aGrammar,
568  boost::spirit::space_p );
569 #if OSL_DEBUG_LEVEL > 0
570  SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << (aInfo.hit ? OUString("true") : OUString("false")) << ", full = " << (aInfo.full ? OUString("true") : OUString("false")) << ", length = " << static_cast<int>(aInfo.length) );
571 #endif
572  }
573  catch( const parser_error<const char*, const char*>& rError )
574  {
575 #if OSL_DEBUG_LEVEL > 0
576  OString aTmp;
577  unsigned int nElem = aGrammar.m_aObjectStack.size();
578  for( unsigned int i = 0; i < nElem; i++ )
579  aTmp += OStringLiteral(" ") + typeid( *(aGrammar.m_aObjectStack[i]) ).name();
580 
581  SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp);
582 #else
583  (void)rError;
584 #endif
585  }
586 
587  std::unique_ptr<PDFEntry> pRet;
588  unsigned int nEntries = aGrammar.m_aObjectStack.size();
589  if( nEntries == 1 )
590  {
591  pRet.reset(aGrammar.m_aObjectStack.back());
592  aGrammar.m_aObjectStack.pop_back();
593  }
594 #if OSL_DEBUG_LEVEL > 0
595  else if( nEntries > 1 )
596  SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" );
597 #endif
598 
599  return pRet;
600 }
601 #endif
602 
603 std::unique_ptr<PDFEntry> PDFReader::read( const char* pFileName )
604 {
605 #ifdef _WIN32
606  /* #i106583#
607  since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
608  C++ stdlib istream_iterator does not allow "-" apparently
609  using spirit 2.0 doesn't work in our environment with the MSC
610 
611  So for the time being bite the bullet and read the whole file.
612  FIXME: give Spirit 2.x another try when we upgrade boost again.
613  */
614  std::unique_ptr<PDFEntry> pRet;
615  FILE* fp = fopen( pFileName, "rb" );
616  if( fp )
617  {
618  fseek( fp, 0, SEEK_END );
619  unsigned int nLen = static_cast<unsigned int>(ftell( fp ));
620  fseek( fp, 0, SEEK_SET );
621  char* pBuf = static_cast<char*>(std::malloc( nLen ));
622  if( pBuf )
623  {
624  fread( pBuf, 1, nLen, fp );
625  pRet = read( pBuf, nLen );
626  std::free( pBuf );
627  }
628  fclose( fp );
629  }
630  return pRet;
631 #else
632  file_iterator<> file_start( pFileName );
633  if( ! file_start )
634  return nullptr;
635  file_iterator<> file_end = file_start.make_end();
636  PDFGrammar< file_iterator<> > aGrammar( file_start );
637 
638  try
639  {
640 #if OSL_DEBUG_LEVEL > 0
641  boost::spirit::parse_info< file_iterator<> > aInfo =
642 #endif
643  boost::spirit::parse( file_start,
644  file_end,
645  aGrammar,
646  boost::spirit::space_p );
647 #if OSL_DEBUG_LEVEL > 0
648  SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length);
649 #endif
650  }
651  catch( const parser_error< const char*, file_iterator<> >& rError )
652  {
653  SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start);
654 #if OSL_DEBUG_LEVEL > 0
655  OUStringBuffer aTmp;
656  unsigned int nElem = aGrammar.m_aObjectStack.size();
657  for( unsigned int i = 0; i < nElem; i++ )
658  {
659  aTmp.append(" ");
660  aTmp.appendAscii(typeid( *(aGrammar.m_aObjectStack[i]) ).name());
661  }
662  SAL_WARN("sdext.pdfimport.pdfparse", "parse error object stack: " << aTmp.makeStringAndClear());
663 #endif
664  }
665 
666  std::unique_ptr<PDFEntry> pRet;
667  unsigned int nEntries = aGrammar.m_aObjectStack.size();
668  if( nEntries == 1 )
669  {
670  pRet.reset(aGrammar.m_aObjectStack.back());
671  aGrammar.m_aObjectStack.pop_back();
672  }
673 #if OSL_DEBUG_LEVEL > 0
674  else if( nEntries > 1 )
675  {
676  SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse");
677  for( unsigned int i = 0; i < nEntries; i++ )
678  {
679  SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name());
680  PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
681  if( pObj )
682  SAL_WARN("sdext.pdfimport.pdfparse", " -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration);
683  else
684  SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")");
685  }
686  }
687 #endif
688  return pRet;
689 #endif // WIN32
690 }
691 
692 #if defined(_MSC_VER)
693 #pragma warning(pop)
694 #endif
695 
696 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
Definition: pdfparse.hxx:160
uno::Sequence< sal_Int8 > m_aBuf
Definition: odfemitter.cxx:41
unsigned int m_nMajor
Definition: pdfparse.hxx:234
PDFStream * m_pStream
Definition: pdfparse.hxx:260
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
PDFEntry * buildMap()
Definition: pdfentries.cxx:576
Reference< XOutputStream > stream
PDFEntry * m_pObject
Definition: pdfparse.hxx:259
#define SAL_INFO(area, stream)
const char * name
Any value
#define SAL_WARN(area, stream)
OUString getString(const Any &_rAny)
aStr
unsigned int m_nMinor
Definition: pdfparse.hxx:235
static osl::File * pStream
Definition: emitcontext.cxx:32
virtual bool emit(EmitContext &rWriteContext) const =0
typedef void(CALLTYPE *GetFuncDataPtr)(sal_uInt16 &nNo