LibreOffice Module sdext (master)  1
pdfparse.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <pdfparse.hxx>
22 
23 // boost using obsolete stuff
24 #if defined(_MSC_VER)
25 #pragma warning(push)
26 #pragma warning(disable:4996)
27 #pragma warning(disable:4503)
28 #endif
29 
30 // workaround windows compiler: do not include multi_pass.hpp
31 #include <boost/spirit/include/classic_core.hpp>
32 #include <boost/spirit/include/classic_utility.hpp>
33 #include <boost/spirit/include/classic_error_handling.hpp>
34 #include <boost/spirit/include/classic_file_iterator.hpp>
35 #include <boost/bind.hpp>
36 
37 #include <string.h>
38 
39 #include <o3tl/safeint.hxx>
40 #include <rtl/strbuf.hxx>
41 #include <rtl/ustrbuf.hxx>
42 #include <sal/log.hxx>
43 
44 // disable warnings again because someone along the line has enabled them
45 // (we have included boost headers, what did you expect?)
46 #if defined(_MSC_VER)
47 #pragma warning(push)
48 #pragma warning(disable:4996)
49 #pragma warning(disable:4503)
50 #endif
51 
52 
53 using namespace boost::spirit::classic;
54 using namespace pdfparse;
55 
56 namespace {
57 
58 class StringEmitContext : public EmitContext
59 {
60  OStringBuffer m_aBuf;
61  public:
62  StringEmitContext() : EmitContext(), m_aBuf(256) {}
63 
64  virtual bool write( const void* pBuf, unsigned int nLen ) throw() override
65  {
66  m_aBuf.append( static_cast<const char*>(pBuf), nLen );
67  return true;
68  }
69  virtual unsigned int getCurPos() throw() override { return m_aBuf.getLength(); }
70  virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw() override
71  { return (nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) ) &&
72  write( m_aBuf.getStr() + nOrigOffset, nLen ); }
73  virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw() override
74  {
75  if( nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) )
76  {
77  memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
78  return nLen;
79  }
80  return 0;
81  }
82 
83  OString getString() { return m_aBuf.makeStringAndClear(); }
84 };
85 
86 template< class iteratorT >
87 class PDFGrammar : public grammar< PDFGrammar<iteratorT> >
88 {
89 public:
90 
91  explicit PDFGrammar( const iteratorT& first )
92  : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
93  ~PDFGrammar()
94  {
95  if( !m_aObjectStack.empty() )
96  delete m_aObjectStack.front();
97  }
98 
99  double m_fDouble;
100  std::vector< unsigned int > m_aUIntStack;
101  std::vector< PDFEntry* > m_aObjectStack;
102  OString m_aErrorString;
103  iteratorT m_aGlobalBegin;
104 
105 public:
106  struct pdf_string_parser
107  {
108  typedef nil_t result_t;
109  template <typename ScannerT>
110  std::ptrdiff_t
111  operator()(ScannerT const& scan, result_t&) const
112  {
113  std::ptrdiff_t len = 0;
114 
115  int nBraceLevel = 0;
116  while( ! scan.at_end() )
117  {
118  char c = *scan;
119  if( c == ')' )
120  {
121  nBraceLevel--;
122  if( nBraceLevel < 0 )
123  break;
124  }
125  else if( c == '(' )
126  nBraceLevel++;
127  else if( c == '\\' ) // ignore escaped braces
128  {
129  ++len;
130  ++scan.first; // tdf#63054: avoid skipping spaces
131  if( scan.first == scan.last ) // tdf#63054: avoid skipping spaces
132  break;
133  }
134  ++len;
135  ++scan;
136  }
137  return scan.at_end() ? -1 : len;
138  }
139  };
140 
141  template< typename ScannerT >
142  struct definition
143  {
144  explicit definition( const PDFGrammar<iteratorT>& rSelf )
145  {
146  PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
147 
148  // workaround workshop compiler: comment_p doesn't work
149  // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
150  comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
151 
152  boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
153 
154  // workaround workshop compiler: confix_p doesn't work
155  //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
156  stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
157 
158  name = lexeme_d[
159  ch_p('/')
160  >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
161  [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
162 
163  // workaround workshop compiler: confix_p doesn't work
164  //stringtype = ( confix_p("(",*anychar_p, ")") |
165  // confix_p("<",*xdigit_p, ">") )
166  // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
167 
168  stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
169  ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
170  [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
171 
172  null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
173 
174  #ifdef USE_ASSIGN_ACTOR
175  objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
176  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
177  >> ch_p('R')
178  >> eps_p
179  )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
180  #else
181  objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
182  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
183  >> ch_p('R')
184  >> eps_p
185  )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
186  #endif
187 
188  #ifdef USE_ASSIGN_ACTOR
189  simple_type = objectref | name |
190  ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
191  [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
192  | stringtype | boolean | null_object;
193  #else
194  simple_type = objectref | name |
195  ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
196  [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
197  | stringtype | boolean | null_object;
198  #endif
199 
200  dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
201  dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
202 
203  array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
204  array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
205 
206  #ifdef USE_ASSIGN_ACTOR
207  object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
208  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
209  >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
210  #else
211  object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
212  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
213  >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
214  #endif
215  object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
216 
217  xref = str_p( "xref" ) >> uint_p >> uint_p
218  >> lexeme_d[
219  +( repeat_p(10)[digit_p]
220  >> blank_p
221  >> repeat_p(5)[digit_p]
222  >> blank_p
223  >> ( ch_p('n') | ch_p('f') )
224  >> repeat_p(2)[space_p]
225  ) ];
226 
227  dict_element= dict_begin | comment | simple_type
228  | array_begin | array_end | dict_end;
229 
230  object = object_begin
231  >> *dict_element
232  >> !stream
233  >> object_end;
234 
235  trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
236  >> *dict_element
237  >> str_p("startxref")
238  >> uint_p
239  >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
240 
241  #ifdef USE_ASSIGN_ACTOR
242  pdfrule = ! (lexeme_d[
243  str_p( "%PDF-" )
244  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
245  >> ch_p('.')
246  >> uint_p[push_back_a(pSelf->m_aUIntStack)]
247  >> *((~ch_p('\r') & ~ch_p('\n')))
248  >> eol_p
249  ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
250  >> *( comment | object | ( xref >> trailer ) );
251  #else
252  pdfrule = ! (lexeme_d[
253  str_p( "%PDF-" )
254  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
255  >> ch_p('.')
256  >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
257  >> *(~ch_p('\r') & ~ch_p('\n'))
258  >> eol_p
259  ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
260  >> *( comment | object | ( xref >> trailer ) );
261  #endif
262  }
263  rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
264  objectref, array, value, dict_element, dict_begin, dict_end,
265  array_begin, array_end, object, object_begin, object_end,
266  xref, trailer, pdfrule;
267 
268  const rule< ScannerT >& start() const { return pdfrule; }
269  };
270 
271  #ifndef USE_ASSIGN_ACTOR
272  void push_back_action_uint( unsigned int i )
273  {
274  m_aUIntStack.push_back( i );
275  }
276  void assign_action_double( double d )
277  {
278  m_fDouble = d;
279  }
280  #endif
281 
282  static void parseError( const char* pMessage, iteratorT pLocation )
283  {
284  throw_( pLocation, pMessage );
285  }
286 
287  OString iteratorToString( iteratorT first, iteratorT last ) const
288  {
289  OStringBuffer aStr( 32 );
290  while( first != last )
291  {
292  aStr.append( *first );
293  ++first;
294  }
295  return aStr.makeStringAndClear();
296  }
297 
298  void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
299  {
300  if( m_aObjectStack.empty() )
301  {
302  PDFFile* pFile = new PDFFile();
303  pFile->m_nMinor = m_aUIntStack.back();
304  m_aUIntStack.pop_back();
305  pFile->m_nMajor = m_aUIntStack.back();
306  m_aUIntStack.pop_back();
307  m_aObjectStack.push_back( pFile );
308  }
309  else
310  parseError( "found file header in unusual place", pBegin );
311  }
312 
313  void pushComment( iteratorT first, iteratorT last )
314  {
315  // add a comment to the current stack element
316  PDFComment* pComment =
317  new PDFComment(iteratorToString(first,last));
318  if( m_aObjectStack.empty() )
319  m_aObjectStack.push_back( new PDFPart() );
320  PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
321  if( pContainer == nullptr )
322  parseError( "comment without container", first );
323  pContainer->m_aSubElements.emplace_back( pComment );
324  }
325 
326  void insertNewValue( std::unique_ptr<PDFEntry> pNewValue, iteratorT pPos )
327  {
328  PDFContainer* pContainer = nullptr;
329  const char* pMsg = nullptr;
330  if( ! m_aObjectStack.empty() )
331  {
332  pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
333  if (pContainer)
334  {
335  if( dynamic_cast<PDFDict*>(pContainer) == nullptr &&
336  dynamic_cast<PDFArray*>(pContainer) == nullptr )
337  {
338  PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
339  if( pObj )
340  {
341  if( pObj->m_pObject == nullptr )
342  pObj->m_pObject = pNewValue.get();
343  else
344  {
345  pMsg = "second value for object";
346  pContainer = nullptr;
347  }
348  }
349  else if( dynamic_cast<PDFDict*>(pNewValue.get()) )
350  {
351  PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
352  if( pTrailer )
353  {
354  if( pTrailer->m_pDict == nullptr )
355  pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue.get());
356  else
357  pContainer = nullptr;
358  }
359  else
360  pContainer = nullptr;
361  }
362  else
363  pContainer = nullptr;
364  }
365  }
366  }
367  if( pContainer )
368  pContainer->m_aSubElements.emplace_back( std::move(pNewValue) );
369  else
370  {
371  if( ! pMsg )
372  {
373  if( dynamic_cast<PDFContainer*>(pNewValue.get()) )
374  pMsg = "array without container";
375  else
376  pMsg = "value without container";
377  }
378  parseError( pMsg, pPos );
379  }
380  }
381 
382  void pushName( iteratorT first, iteratorT last )
383  {
384  insertNewValue( std::make_unique<PDFName>(iteratorToString(first,last)), first );
385  }
386 
387  void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
388  {
389  insertNewValue( std::make_unique<PDFNumber>(m_fDouble), first );
390  }
391 
392  void pushString( iteratorT first, iteratorT last )
393  {
394  insertNewValue( std::make_unique<PDFString>(iteratorToString(first,last)), first );
395  }
396 
397  void pushBool( iteratorT first, iteratorT last )
398  {
399  insertNewValue( std::make_unique<PDFBool>( last-first == 4 ), first );
400  }
401 
402  void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
403  {
404  insertNewValue( std::make_unique<PDFNull>(), first );
405  }
406 
407 
408  void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
409  {
410  if( m_aObjectStack.empty() )
411  m_aObjectStack.push_back( new PDFPart() );
412 
413  unsigned int nGeneration = m_aUIntStack.back();
414  m_aUIntStack.pop_back();
415  unsigned int nObject = m_aUIntStack.back();
416  m_aUIntStack.pop_back();
417 
418  PDFObject* pObj = new PDFObject( nObject, nGeneration );
419  pObj->m_nOffset = first - m_aGlobalBegin;
420 
421  PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
422  if( pContainer &&
423  ( dynamic_cast<PDFFile*>(pContainer) ||
424  dynamic_cast<PDFPart*>(pContainer) ) )
425  {
426  pContainer->m_aSubElements.emplace_back( pObj );
427  m_aObjectStack.push_back( pObj );
428  }
429  else
430  parseError( "object in wrong place", first );
431  }
432 
433  void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
434  {
435  if( m_aObjectStack.empty() )
436  parseError( "endobj without obj", first );
437  else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == nullptr )
438  parseError( "spurious endobj", first );
439  else
440  m_aObjectStack.pop_back();
441  }
442 
443  void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
444  {
445  unsigned int nGeneration = m_aUIntStack.back();
446  m_aUIntStack.pop_back();
447  unsigned int nObject = m_aUIntStack.back();
448  m_aUIntStack.pop_back();
449  insertNewValue( std::make_unique<PDFObjectRef>(nObject,nGeneration), first );
450  }
451 
452  void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
453  {
454  PDFDict* pDict = new PDFDict();
455  pDict->m_nOffset = first - m_aGlobalBegin;
456 
457  insertNewValue( std::unique_ptr<PDFEntry>(pDict), first );
458  // will not come here if insertion fails (exception)
459  m_aObjectStack.push_back( pDict );
460  }
461  void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
462  {
463  PDFDict* pDict = nullptr;
464  if( m_aObjectStack.empty() )
465  parseError( "dictionary end without begin", first );
466  else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == nullptr )
467  parseError( "spurious dictionary end", first );
468  else
469  m_aObjectStack.pop_back();
470 
471  PDFEntry* pOffender = pDict->buildMap();
472  if( pOffender )
473  {
474  StringEmitContext aCtx;
475  aCtx.write( "offending dictionary element: ", 30 );
476  pOffender->emit( aCtx );
477  m_aErrorString = aCtx.getString();
478  parseError( m_aErrorString.getStr(), first );
479  }
480  }
481 
482  void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
483  {
484  PDFArray* pArray = new PDFArray();
485  pArray->m_nOffset = first - m_aGlobalBegin;
486 
487  insertNewValue( std::unique_ptr<PDFEntry>(pArray), first );
488  // will not come here if insertion fails (exception)
489  m_aObjectStack.push_back( pArray );
490  }
491 
492  void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
493  {
494  if( m_aObjectStack.empty() )
495  parseError( "array end without begin", first );
496  else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == nullptr )
497  parseError( "spurious array end", first );
498  else
499  m_aObjectStack.pop_back();
500  }
501 
502  void emitStream( iteratorT first, iteratorT last )
503  {
504  if( m_aObjectStack.empty() )
505  parseError( "stream without object", first );
506  PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
507  if( pObj && pObj->m_pObject )
508  {
509  if( pObj->m_pStream )
510  parseError( "multiple streams in object", first );
511 
512  PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
513  if( pDict )
514  {
515  PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
516 
517  pObj->m_pStream = pStream;
518  pObj->m_aSubElements.emplace_back( pStream );
519  }
520  }
521  else
522  parseError( "stream without object", first );
523  }
524 
525  void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
526  {
527  if( m_aObjectStack.empty() )
528  m_aObjectStack.push_back( new PDFPart() );
529 
530  PDFTrailer* pTrailer = new PDFTrailer();
531  pTrailer->m_nOffset = first - m_aGlobalBegin;
532 
533  PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
534  if( pContainer &&
535  ( dynamic_cast<PDFFile*>(pContainer) ||
536  dynamic_cast<PDFPart*>(pContainer) ) )
537  {
538  pContainer->m_aSubElements.emplace_back( pTrailer );
539  m_aObjectStack.push_back( pTrailer );
540  }
541  else
542  parseError( "trailer in wrong place", first );
543  }
544 
545  void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
546  {
547  if( m_aObjectStack.empty() )
548  parseError( "%%EOF without trailer", first );
549  else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == nullptr )
550  parseError( "spurious %%EOF", first );
551  else
552  m_aObjectStack.pop_back();
553  }
554 };
555 
556 }
557 
558 #ifdef _WIN32
559 std::unique_ptr<PDFEntry> PDFReader::read( const char* pBuffer, unsigned int nLen )
560 {
561  PDFGrammar<const char*> aGrammar( pBuffer );
562 
563  try
564  {
565 #if OSL_DEBUG_LEVEL > 0
566  boost::spirit::classic::parse_info<const char*> aInfo =
567 #endif
568  boost::spirit::classic::parse( pBuffer,
569  pBuffer+nLen,
570  aGrammar,
571  boost::spirit::classic::space_p );
572 #if OSL_DEBUG_LEVEL > 0
573  SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << (aInfo.hit ? OUString("true") : OUString("false")) << ", full = " << (aInfo.full ? OUString("true") : OUString("false")) << ", length = " << static_cast<int>(aInfo.length) );
574 #endif
575  }
576  catch( const parser_error<const char*, const char*>& rError )
577  {
578 #if OSL_DEBUG_LEVEL > 0
579  OString aTmp;
580  unsigned int nElem = aGrammar.m_aObjectStack.size();
581  for( unsigned int i = 0; i < nElem; i++ )
582  aTmp += OStringLiteral(" ") + typeid( *(aGrammar.m_aObjectStack[i]) ).name();
583 
584  SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp);
585 #else
586  (void)rError;
587 #endif
588  }
589 
590  std::unique_ptr<PDFEntry> pRet;
591  unsigned int nEntries = aGrammar.m_aObjectStack.size();
592  if( nEntries == 1 )
593  {
594  pRet.reset(aGrammar.m_aObjectStack.back());
595  aGrammar.m_aObjectStack.pop_back();
596  }
597 #if OSL_DEBUG_LEVEL > 0
598  else if( nEntries > 1 )
599  SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" );
600 #endif
601 
602  return pRet;
603 }
604 #endif
605 
606 std::unique_ptr<PDFEntry> PDFReader::read( const char* pFileName )
607 {
608 #ifdef _WIN32
609  /* #i106583#
610  since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
611  C++ stdlib istream_iterator does not allow "-" apparently
612  using spirit 2.0 doesn't work in our environment with the MSC
613 
614  So for the time being bite the bullet and read the whole file.
615  FIXME: give Spirit 2.x another try when we upgrade boost again.
616  */
617  std::unique_ptr<PDFEntry> pRet;
618  FILE* fp = fopen( pFileName, "rb" );
619  if( fp )
620  {
621  fseek( fp, 0, SEEK_END );
622  unsigned int nLen = static_cast<unsigned int>(ftell( fp ));
623  fseek( fp, 0, SEEK_SET );
624  char* pBuf = static_cast<char*>(std::malloc( nLen ));
625  if( pBuf )
626  {
627  fread( pBuf, 1, nLen, fp );
628  pRet = read( pBuf, nLen );
629  std::free( pBuf );
630  }
631  fclose( fp );
632  }
633  return pRet;
634 #else
635  file_iterator<> file_start( pFileName );
636  if( ! file_start )
637  return nullptr;
638  file_iterator<> file_end = file_start.make_end();
639  PDFGrammar< file_iterator<> > aGrammar( file_start );
640 
641  try
642  {
643 #if OSL_DEBUG_LEVEL > 0
644  boost::spirit::classic::parse_info< file_iterator<> > aInfo =
645 #endif
646  boost::spirit::classic::parse( file_start,
647  file_end,
648  aGrammar,
649  boost::spirit::classic::space_p );
650 #if OSL_DEBUG_LEVEL > 0
651  SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length);
652 #endif
653  }
654  catch( const parser_error< const char*, file_iterator<> >& rError )
655  {
656  SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start);
657 #if OSL_DEBUG_LEVEL > 0
658  OUStringBuffer aTmp;
659  unsigned int nElem = aGrammar.m_aObjectStack.size();
660  for( unsigned int i = 0; i < nElem; i++ )
661  {
662  aTmp.append(" ");
663  aTmp.appendAscii(typeid( *(aGrammar.m_aObjectStack[i]) ).name());
664  }
665  SAL_WARN("sdext.pdfimport.pdfparse", "parse error object stack: " << aTmp.makeStringAndClear());
666 #endif
667  }
668 
669  std::unique_ptr<PDFEntry> pRet;
670  unsigned int nEntries = aGrammar.m_aObjectStack.size();
671  if( nEntries == 1 )
672  {
673  pRet.reset(aGrammar.m_aObjectStack.back());
674  aGrammar.m_aObjectStack.pop_back();
675  }
676 #if OSL_DEBUG_LEVEL > 0
677  else if( nEntries > 1 )
678  {
679  SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse");
680  for( unsigned int i = 0; i < nEntries; i++ )
681  {
682  SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name());
683  PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
684  if( pObj )
685  SAL_WARN("sdext.pdfimport.pdfparse", " -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration);
686  else
687  SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")");
688  }
689  }
690 #endif
691  return pRet;
692 #endif // WIN32
693 }
694 
695 #if defined(_MSC_VER)
696 #pragma warning(pop)
697 #endif
698 
699 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
Definition: pdfparse.hxx:160
uno::Sequence< sal_Int8 > m_aBuf
Definition: odfemitter.cxx:41
unsigned int m_nMajor
Definition: pdfparse.hxx:234
PDFStream * m_pStream
Definition: pdfparse.hxx:260
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
PDFEntry * buildMap()
Definition: pdfentries.cxx:576
Reference< XOutputStream > stream
PDFEntry * m_pObject
Definition: pdfparse.hxx:259
#define SAL_INFO(area, stream)
const char * name
Any value
#define SAL_WARN(area, stream)
OUString getString(const Any &_rAny)
aStr
unsigned int m_nMinor
Definition: pdfparse.hxx:235
static osl::File * pStream
Definition: emitcontext.cxx:32
virtual bool emit(EmitContext &rWriteContext) const =0
typedef void(CALLTYPE *GetFuncDataPtr)(sal_uInt16 &nNo