LibreOffice Module sdext (master) 1
pdfparse.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20
21#include <pdfparse.hxx>
22
23// boost using obsolete stuff
24#if defined(_MSC_VER)
25#pragma warning(push)
26#pragma warning(disable:4996)
27#pragma warning(disable:4503)
28#endif
29
30// workaround windows compiler: do not include multi_pass.hpp
31#include <boost/spirit/include/classic_core.hpp>
32#include <boost/spirit/include/classic_utility.hpp>
33#include <boost/spirit/include/classic_error_handling.hpp>
34#include <boost/spirit/include/classic_file_iterator.hpp>
35#include <boost/bind/bind.hpp>
36
37#include <string.h>
38
39#include <o3tl/safeint.hxx>
40#include <rtl/strbuf.hxx>
41#include <rtl/ustrbuf.hxx>
42#include <sal/log.hxx>
43#include <utility>
44
45// disable warnings again because someone along the line has enabled them
46// (we have included boost headers, what did you expect?)
47#if defined(_MSC_VER)
48#pragma warning(push)
49#pragma warning(disable:4996)
50#pragma warning(disable:4503)
51#endif
52
53
54using namespace boost::spirit::classic;
55using namespace pdfparse;
56
57namespace {
58
59class StringEmitContext : public EmitContext
60{
61 OStringBuffer m_aBuf;
62 public:
63 StringEmitContext() : m_aBuf(256) {}
64
65 virtual bool write( const void* pBuf, unsigned int nLen ) noexcept override
66 {
67 m_aBuf.append( static_cast<const char*>(pBuf), nLen );
68 return true;
69 }
70 virtual unsigned int getCurPos() noexcept override { return m_aBuf.getLength(); }
71 virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) noexcept override
72 { return (nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) ) &&
73 write( m_aBuf.getStr() + nOrigOffset, nLen ); }
74 virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) noexcept override
75 {
76 if( nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) )
77 {
78 memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
79 return nLen;
80 }
81 return 0;
82 }
83
84 OString getString() { return m_aBuf.makeStringAndClear(); }
85};
86
87template< class iteratorT >
88class PDFGrammar : public grammar< PDFGrammar<iteratorT> >
89{
90public:
91
92 explicit PDFGrammar( iteratorT first )
93 : m_fDouble( 0.0 ), m_aGlobalBegin(std::move( first )) {}
94 ~PDFGrammar()
95 {
96 if( !m_aObjectStack.empty() )
97 delete m_aObjectStack.front();
98 }
99
100 double m_fDouble;
101 std::vector< unsigned int > m_aUIntStack;
102 std::vector< PDFEntry* > m_aObjectStack;
103 OString m_aErrorString;
104 iteratorT m_aGlobalBegin;
105
106public:
107 struct pdf_string_parser
108 {
109 typedef nil_t result_t;
110 template <typename ScannerT>
111 std::ptrdiff_t
112 operator()(ScannerT const& scan, result_t&) const
113 {
114 std::ptrdiff_t len = 0;
115
116 int nBraceLevel = 0;
117 while( ! scan.at_end() )
118 {
119 char c = *scan;
120 if( c == ')' )
121 {
122 nBraceLevel--;
123 if( nBraceLevel < 0 )
124 break;
125 }
126 else if( c == '(' )
127 nBraceLevel++;
128 else if( c == '\\' ) // ignore escaped braces
129 {
130 ++len;
131 ++scan.first; // tdf#63054: avoid skipping spaces
132 if( scan.first == scan.last ) // tdf#63054: avoid skipping spaces
133 break;
134 }
135 ++len;
136 ++scan;
137 }
138 return scan.at_end() ? -1 : len;
139 }
140 };
141
142 template< typename ScannerT >
143 struct definition
144 {
145 explicit definition( const PDFGrammar<iteratorT>& rSelf )
146 {
147 using namespace boost::placeholders;
148
149 PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
150
151 // workaround workshop compiler: comment_p doesn't work
152 // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
153 comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
154
155 boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
156
157 // workaround workshop compiler: confix_p doesn't work
158 //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
159 stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
160
161 name = lexeme_d[
162 ch_p('/')
163 >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
164 [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
165
166 // workaround workshop compiler: confix_p doesn't work
167 //stringtype = ( confix_p("(",*anychar_p, ")") |
168 // confix_p("<",*xdigit_p, ">") )
169 // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
170
171 stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
172 ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
173 [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
174
175 null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
176
177 #ifdef USE_ASSIGN_ACTOR
178 objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
179 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
180 >> ch_p('R')
181 >> eps_p
182 )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
183 #else
184 objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
185 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
186 >> ch_p('R')
187 >> eps_p
188 )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
189 #endif
190
191 #ifdef USE_ASSIGN_ACTOR
192 simple_type = objectref | name |
193 ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
194 [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
195 | stringtype | boolean | null_object;
196 #else
197 simple_type = objectref | name |
198 ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
199 [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
200 | stringtype | boolean | null_object;
201 #endif
202
203 dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
204 dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
205
206 array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
207 array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
208
209 #ifdef USE_ASSIGN_ACTOR
210 object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
211 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
212 >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
213 #else
214 object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
215 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
216 >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
217 #endif
218 object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
219
220 xref = str_p( "xref" ) >> uint_p >> uint_p
221 >> lexeme_d[
222 +( repeat_p(10)[digit_p]
223 >> blank_p
224 >> repeat_p(5)[digit_p]
225 >> blank_p
226 >> ( ch_p('n') | ch_p('f') )
227 >> repeat_p(2)[space_p]
228 ) ];
229
230 dict_element= dict_begin | comment | simple_type
231 | array_begin | array_end | dict_end;
232
233 object = object_begin
234 >> *dict_element
235 >> !stream
236 >> object_end;
237
238 trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
239 >> *dict_element
240 >> str_p("startxref")
241 >> uint_p
242 >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
243
244 #ifdef USE_ASSIGN_ACTOR
245 pdfrule = ! (lexeme_d[
246 str_p( "%PDF-" )
247 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
248 >> ch_p('.')
249 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
250 >> *((~ch_p('\r') & ~ch_p('\n')))
251 >> eol_p
252 ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
253 >> *( comment | object | ( xref >> trailer ) );
254 #else
255 pdfrule = ! (lexeme_d[
256 str_p( "%PDF-" )
257 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
258 >> ch_p('.')
259 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
260 >> *(~ch_p('\r') & ~ch_p('\n'))
261 >> eol_p
262 ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
263 >> *( comment | object | ( xref >> trailer ) );
264 #endif
265 }
266 rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
267 objectref, array, value, dict_element, dict_begin, dict_end,
268 array_begin, array_end, object, object_begin, object_end,
269 xref, trailer, pdfrule;
270
271 const rule< ScannerT >& start() const { return pdfrule; }
272 };
273
274 #ifndef USE_ASSIGN_ACTOR
275 void push_back_action_uint( unsigned int i )
276 {
277 m_aUIntStack.push_back( i );
278 }
279 void assign_action_double( double d )
280 {
281 m_fDouble = d;
282 }
283 #endif
284
285 static void parseError( const char* pMessage, iteratorT pLocation )
286 {
287 throw_( pLocation, pMessage );
288 }
289
290 OString iteratorToString( iteratorT first, iteratorT last ) const
291 {
292 OStringBuffer aStr( 32 );
293 while( first != last )
294 {
295 aStr.append( *first );
296 ++first;
297 }
298 return aStr.makeStringAndClear();
299 }
300
301 void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
302 {
303 if( m_aObjectStack.empty() )
304 {
305 PDFFile* pFile = new PDFFile();
306 pFile->m_nMinor = m_aUIntStack.back();
307 m_aUIntStack.pop_back();
308 pFile->m_nMajor = m_aUIntStack.back();
309 m_aUIntStack.pop_back();
310 m_aObjectStack.push_back( pFile );
311 }
312 else
313 parseError( "found file header in unusual place", pBegin );
314 }
315
316 void pushComment( iteratorT first, iteratorT last )
317 {
318 // add a comment to the current stack element
319 PDFComment* pComment =
320 new PDFComment(iteratorToString(first,last));
321 if( m_aObjectStack.empty() )
322 m_aObjectStack.push_back( new PDFPart() );
323 PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
324 if( pContainer == nullptr )
325 parseError( "comment without container", first );
326 pContainer->m_aSubElements.emplace_back( pComment );
327 }
328
329 void insertNewValue( std::unique_ptr<PDFEntry> pNewValue, iteratorT pPos )
330 {
331 PDFContainer* pContainer = nullptr;
332 const char* pMsg = nullptr;
333 if( ! m_aObjectStack.empty() )
334 {
335 pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
336 if (pContainer)
337 {
338 if( dynamic_cast<PDFDict*>(pContainer) == nullptr &&
339 dynamic_cast<PDFArray*>(pContainer) == nullptr )
340 {
341 PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
342 if( pObj )
343 {
344 if( pObj->m_pObject == nullptr )
345 pObj->m_pObject = pNewValue.get();
346 else
347 {
348 pMsg = "second value for object";
349 pContainer = nullptr;
350 }
351 }
352 else if( dynamic_cast<PDFDict*>(pNewValue.get()) )
353 {
354 PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
355 if( pTrailer )
356 {
357 if( pTrailer->m_pDict == nullptr )
358 pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue.get());
359 else
360 pContainer = nullptr;
361 }
362 else
363 pContainer = nullptr;
364 }
365 else
366 pContainer = nullptr;
367 }
368 }
369 }
370 if( pContainer )
371 pContainer->m_aSubElements.emplace_back( std::move(pNewValue) );
372 else
373 {
374 if( ! pMsg )
375 {
376 if( dynamic_cast<PDFContainer*>(pNewValue.get()) )
377 pMsg = "array without container";
378 else
379 pMsg = "value without container";
380 }
381 parseError( pMsg, pPos );
382 }
383 }
384
385 void pushName( iteratorT first, iteratorT last )
386 {
387 insertNewValue( std::make_unique<PDFName>(iteratorToString(first,last)), first );
388 }
389
390 void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
391 {
392 insertNewValue( std::make_unique<PDFNumber>(m_fDouble), first );
393 }
394
395 void pushString( iteratorT first, iteratorT last )
396 {
397 insertNewValue( std::make_unique<PDFString>(iteratorToString(first,last)), first );
398 }
399
400 void pushBool( iteratorT first, iteratorT last )
401 {
402 insertNewValue( std::make_unique<PDFBool>( last-first == 4 ), first );
403 }
404
405 void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
406 {
407 insertNewValue( std::make_unique<PDFNull>(), first );
408 }
409
410
411 void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
412 {
413 if( m_aObjectStack.empty() )
414 m_aObjectStack.push_back( new PDFPart() );
415
416 unsigned int nGeneration = m_aUIntStack.back();
417 m_aUIntStack.pop_back();
418 unsigned int nObject = m_aUIntStack.back();
419 m_aUIntStack.pop_back();
420
421 PDFObject* pObj = new PDFObject( nObject, nGeneration );
422 pObj->m_nOffset = first - m_aGlobalBegin;
423
424 PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
425 if( pContainer &&
426 ( dynamic_cast<PDFFile*>(pContainer) ||
427 dynamic_cast<PDFPart*>(pContainer) ) )
428 {
429 pContainer->m_aSubElements.emplace_back( pObj );
430 m_aObjectStack.push_back( pObj );
431 }
432 else
433 parseError( "object in wrong place", first );
434 }
435
436 void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
437 {
438 if( m_aObjectStack.empty() )
439 parseError( "endobj without obj", first );
440 else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == nullptr )
441 parseError( "spurious endobj", first );
442 else
443 m_aObjectStack.pop_back();
444 }
445
446 void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
447 {
448 unsigned int nGeneration = m_aUIntStack.back();
449 m_aUIntStack.pop_back();
450 unsigned int nObject = m_aUIntStack.back();
451 m_aUIntStack.pop_back();
452 insertNewValue( std::make_unique<PDFObjectRef>(nObject,nGeneration), first );
453 }
454
455 void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
456 {
457 PDFDict* pDict = new PDFDict();
458 pDict->m_nOffset = first - m_aGlobalBegin;
459
460 insertNewValue( std::unique_ptr<PDFEntry>(pDict), first );
461 // will not come here if insertion fails (exception)
462 m_aObjectStack.push_back( pDict );
463 }
464 void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
465 {
466 PDFDict* pDict = nullptr;
467 if( m_aObjectStack.empty() )
468 parseError( "dictionary end without begin", first );
469 else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == nullptr )
470 parseError( "spurious dictionary end", first );
471 else
472 m_aObjectStack.pop_back();
473
474 PDFEntry* pOffender = pDict->buildMap();
475 if( pOffender )
476 {
477 StringEmitContext aCtx;
478 aCtx.write( "offending dictionary element: ", 30 );
479 pOffender->emit( aCtx );
480 m_aErrorString = aCtx.getString();
481 parseError( m_aErrorString.getStr(), first );
482 }
483 }
484
485 void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
486 {
487 PDFArray* pArray = new PDFArray();
488 pArray->m_nOffset = first - m_aGlobalBegin;
489
490 insertNewValue( std::unique_ptr<PDFEntry>(pArray), first );
491 // will not come here if insertion fails (exception)
492 m_aObjectStack.push_back( pArray );
493 }
494
495 void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
496 {
497 if( m_aObjectStack.empty() )
498 parseError( "array end without begin", first );
499 else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == nullptr )
500 parseError( "spurious array end", first );
501 else
502 m_aObjectStack.pop_back();
503 }
504
505 void emitStream( iteratorT first, iteratorT last )
506 {
507 if( m_aObjectStack.empty() )
508 parseError( "stream without object", first );
509 PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
510 if( pObj && pObj->m_pObject )
511 {
512 if( pObj->m_pStream )
513 parseError( "multiple streams in object", first );
514
515 PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
516 if( pDict )
517 {
518 PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
519
520 pObj->m_pStream = pStream;
521 pObj->m_aSubElements.emplace_back( pStream );
522 }
523 }
524 else
525 parseError( "stream without object", first );
526 }
527
528 void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
529 {
530 if( m_aObjectStack.empty() )
531 m_aObjectStack.push_back( new PDFPart() );
532
533 PDFTrailer* pTrailer = new PDFTrailer();
534 pTrailer->m_nOffset = first - m_aGlobalBegin;
535
536 PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
537 if( pContainer &&
538 ( dynamic_cast<PDFFile*>(pContainer) ||
539 dynamic_cast<PDFPart*>(pContainer) ) )
540 {
541 pContainer->m_aSubElements.emplace_back( pTrailer );
542 m_aObjectStack.push_back( pTrailer );
543 }
544 else
545 parseError( "trailer in wrong place", first );
546 }
547
548 void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
549 {
550 if( m_aObjectStack.empty() )
551 parseError( "%%EOF without trailer", first );
552 else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == nullptr )
553 parseError( "spurious %%EOF", first );
554 else
555 m_aObjectStack.pop_back();
556 }
557};
558
559}
560
561#ifdef _WIN32
562std::unique_ptr<PDFEntry> PDFReader::read( const char* pBuffer, unsigned int nLen )
563{
564 PDFGrammar<const char*> aGrammar( pBuffer );
565
566 try
567 {
568#if OSL_DEBUG_LEVEL > 0
569 boost::spirit::classic::parse_info<const char*> aInfo =
570#endif
571 boost::spirit::classic::parse( pBuffer,
572 pBuffer+nLen,
573 aGrammar,
574 boost::spirit::classic::space_p );
575#if OSL_DEBUG_LEVEL > 0
576 SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << (aInfo.hit ? OUString("true") : OUString("false")) << ", full = " << (aInfo.full ? OUString("true") : OUString("false")) << ", length = " << static_cast<int>(aInfo.length) );
577#endif
578 }
579 catch( const parser_error<const char*, const char*>& rError )
580 {
581#if OSL_DEBUG_LEVEL > 0
582 OString aTmp;
583 unsigned int nElem = aGrammar.m_aObjectStack.size();
584 for( unsigned int i = 0; i < nElem; i++ )
585 aTmp += OString::Concat(" ") + typeid( *(aGrammar.m_aObjectStack[i]) ).name();
586
587 SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp);
588#else
589 (void)rError;
590#endif
591 }
592
593 std::unique_ptr<PDFEntry> pRet;
594 unsigned int nEntries = aGrammar.m_aObjectStack.size();
595 if( nEntries == 1 )
596 {
597 pRet.reset(aGrammar.m_aObjectStack.back());
598 aGrammar.m_aObjectStack.pop_back();
599 }
600#if OSL_DEBUG_LEVEL > 0
601 else if( nEntries > 1 )
602 SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" );
603#endif
604
605 return pRet;
606}
607#endif
608
609std::unique_ptr<PDFEntry> PDFReader::read( const char* pFileName )
610{
611#ifdef _WIN32
612 /* #i106583#
613 since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
614 C++ stdlib istream_iterator does not allow "-" apparently
615 using spirit 2.0 doesn't work in our environment with the MSC
616
617 So for the time being bite the bullet and read the whole file.
618 FIXME: give Spirit 2.x another try when we upgrade boost again.
619 */
620 std::unique_ptr<PDFEntry> pRet;
621 FILE* fp = fopen( pFileName, "rb" );
622 if( fp )
623 {
624 fseek( fp, 0, SEEK_END );
625 unsigned int nLen = static_cast<unsigned int>(ftell( fp ));
626 fseek( fp, 0, SEEK_SET );
627 char* pBuf = static_cast<char*>(std::malloc( nLen ));
628 if( pBuf )
629 {
630 fread( pBuf, 1, nLen, fp );
631 pRet = read( pBuf, nLen );
632 std::free( pBuf );
633 }
634 fclose( fp );
635 }
636 return pRet;
637#else
638 file_iterator<> file_start( pFileName );
639 if( ! file_start )
640 return nullptr;
641 file_iterator<> file_end = file_start.make_end();
642 PDFGrammar< file_iterator<> > aGrammar( file_start );
643
644 try
645 {
646#if OSL_DEBUG_LEVEL > 0
647 boost::spirit::classic::parse_info< file_iterator<> > aInfo =
648#endif
649 boost::spirit::classic::parse( file_start,
650 file_end,
651 aGrammar,
652 boost::spirit::classic::space_p );
653#if OSL_DEBUG_LEVEL > 0
654 SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length);
655#endif
656 }
657 catch( const parser_error< const char*, file_iterator<> >& rError )
658 {
659 SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start);
660#if OSL_DEBUG_LEVEL > 0
661 OUStringBuffer aTmp;
662 unsigned int nElem = aGrammar.m_aObjectStack.size();
663 for( unsigned int i = 0; i < nElem; i++ )
664 {
665 aTmp.append(" ");
666 aTmp.appendAscii(typeid( *(aGrammar.m_aObjectStack[i]) ).name());
667 }
668 SAL_WARN("sdext.pdfimport.pdfparse", "parse error object stack: " << aTmp.makeStringAndClear());
669#endif
670 }
671
672 std::unique_ptr<PDFEntry> pRet;
673 unsigned int nEntries = aGrammar.m_aObjectStack.size();
674 if( nEntries == 1 )
675 {
676 pRet.reset(aGrammar.m_aObjectStack.back());
677 aGrammar.m_aObjectStack.pop_back();
678 }
679#if OSL_DEBUG_LEVEL > 0
680 else if( nEntries > 1 )
681 {
682 SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse");
683 for( unsigned int i = 0; i < nEntries; i++ )
684 {
685 SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name());
686 PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
687 if( pObj )
688 SAL_WARN("sdext.pdfimport.pdfparse", " -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration);
689 else
690 SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")");
691 }
692 }
693#endif
694 return pRet;
695#endif // WIN32
696}
697
698#if defined(_MSC_VER)
699#pragma warning(pop)
700#endif
701
702/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
double d
virtual bool copyOrigBytes(unsigned int nOrigOffset, unsigned int nLen)=0
virtual unsigned int readOrigBytes(unsigned int nOrigOffset, unsigned int nLen, void *pBuf)=0
virtual bool write(const void *pBuf, unsigned int nLen)=0
virtual unsigned int getCurPos()=0
Any value
Reference< XOutputStream > stream
static osl::File * pStream
Definition: emitcontext.cxx:32
const char * name
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
aStr
OUString getString(const Any &_rAny)
int i
constexpr OUStringLiteral first
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
uno::Sequence< sal_Int8 > m_aBuf
Definition: odfemitter.cxx:42
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
Definition: pdfparse.hxx:162
PDFEntry * buildMap()
Definition: pdfentries.cxx:576
virtual bool emit(EmitContext &rWriteContext) const =0
unsigned int m_nMinor
Definition: pdfparse.hxx:237
unsigned int m_nMajor
Definition: pdfparse.hxx:236
unsigned int m_nGeneration
Definition: pdfparse.hxx:264
PDFStream * m_pStream
Definition: pdfparse.hxx:262
PDFEntry * m_pObject
Definition: pdfparse.hxx:261
unsigned int m_nNumber
Definition: pdfparse.hxx:263
static std::unique_ptr< PDFEntry > read(const char *pFileName)
Definition: pdfparse.cxx:609