LibreOffice Module sdext (master) 1
pdfentries.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20
21#include <pdfparse.hxx>
22
23#include <comphelper/hash.hxx>
24
25#include <rtl/strbuf.hxx>
26#include <rtl/ustring.hxx>
27#include <rtl/ustrbuf.hxx>
28#include <rtl/digest.h>
29#include <rtl/cipher.h>
30#include <sal/log.hxx>
31
32#include <zlib.h>
33
34#include <math.h>
35#include <map>
36
37#include <string.h>
38
39
40namespace pdfparse
41{
42
44{
45 // xref table: maps object number to a pair of (generation, buffer offset)
46 typedef std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable;
48 // container of all indirect objects (usually a PDFFile*)
50 unsigned int m_nDecryptObject;
52
53 // returns true if the xref table was updated
54 bool insertXref( unsigned int nObject, unsigned int nGeneration, unsigned int nOffset )
55 {
56 XRefTable::iterator it = m_aXRefTable.find( nObject );
57 if( it == m_aXRefTable.end() )
58 {
59 // new entry
60 m_aXRefTable[ nObject ] = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
61 return true;
62 }
63 // update old entry, if generation number is higher
64 if( it->second.first < nGeneration )
65 {
66 it->second = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
67 return true;
68 }
69 return false;
70 }
71
72 explicit EmitImplData( const PDFContainer* pTopContainer ) :
73 m_pObjectContainer( pTopContainer ),
76 {}
77 void decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
78 unsigned int nObject, unsigned int nGeneration ) const
79 {
80 const PDFFile* pFile = dynamic_cast<const PDFFile*>(m_pObjectContainer);
81 pFile && pFile->decrypt( pInBuffer, nLen, pOutBuffer, nObject, nGeneration );
82 }
83
84 void setDecryptObject( unsigned int nObject, unsigned int nGeneration )
85 {
86 m_nDecryptObject = nObject;
87 m_nDecryptGeneration = nGeneration;
88 }
89};
90
91}
92
93using namespace pdfparse;
94
95EmitContext::EmitContext( const PDFContainer* pTop ) :
96 m_bDeflate( false ),
97 m_bDecrypt( false )
98{
99 if( pTop )
100 m_pImplData.reset( new EmitImplData( pTop ) );
101}
102
104{
105}
106
108{
109}
110
112{
113 return rContext.m_pImplData.get();
114}
115
116void PDFEntry::setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData )
117{
118 if( rContext.m_pImplData && rContext.m_pImplData.get() != pNewEmitData )
119 rContext.m_pImplData.reset();
120 rContext.m_pImplData.reset( pNewEmitData );
121}
122
124{
125}
126
128{
129}
130
131bool PDFComment::emit( EmitContext& rWriteContext ) const
132{
133 return rWriteContext.write( m_aComment.getStr(), m_aComment.getLength() );
134}
135
137{
138 return new PDFComment( m_aComment );
139}
140
142{
143}
144
145bool PDFName::emit( EmitContext& rWriteContext ) const
146{
147 if( ! rWriteContext.write( " /", 2 ) )
148 return false;
149 return rWriteContext.write( m_aName.getStr(), m_aName.getLength() );
150}
151
153{
154 return new PDFName( m_aName );
155}
156
158{
159 OStringBuffer aFilter( m_aName.getLength() );
160 const char* pStr = m_aName.getStr();
161 unsigned int nLen = m_aName.getLength();
162 for( unsigned int i = 0; i < nLen; i++ )
163 {
164 if( (i < nLen - 3) && pStr[i] == '#' )
165 {
166 char rResult = 0;
167 i++;
168 if( pStr[i] >= '0' && pStr[i] <= '9' )
169 rResult = char( pStr[i]-'0' ) << 4;
170 else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
171 rResult = char( pStr[i]-'a' + 10 ) << 4;
172 else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
173 rResult = char( pStr[i]-'A' + 10 ) << 4;
174 i++;
175 if( pStr[i] >= '0' && pStr[i] <= '9' )
176 rResult |= char( pStr[i]-'0' );
177 else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
178 rResult |= char( pStr[i]-'a' + 10 );
179 else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
180 rResult |= char( pStr[i]-'A' + 10 );
181 aFilter.append( rResult );
182 }
183 else
184 aFilter.append( pStr[i] );
185 }
186 return OStringToOUString( aFilter, RTL_TEXTENCODING_UTF8 );
187}
188
190{
191}
192
193bool PDFString::emit( EmitContext& rWriteContext ) const
194{
195 if( ! rWriteContext.write( " ", 1 ) )
196 return false;
197 EmitImplData* pEData = getEmitData( rWriteContext );
198 if( rWriteContext.m_bDecrypt && pEData && pEData->m_nDecryptObject )
199 {
200 OString aFiltered( getFilteredString() );
201 // decrypt inplace (evil since OString is supposed to be const
202 // however in this case we know that getFilteredString returned a singular string instance
203 pEData->decrypt( reinterpret_cast<sal_uInt8 const *>(aFiltered.getStr()), aFiltered.getLength(),
204 reinterpret_cast<sal_uInt8 *>(const_cast<char *>(aFiltered.getStr())),
205 pEData->m_nDecryptObject, pEData->m_nDecryptGeneration );
206 // check for string or hex string
207 const char* pStr = aFiltered.getStr();
208 if( aFiltered.getLength() > 1 &&
209 ( (static_cast<unsigned char>(pStr[0]) == 0xff && static_cast<unsigned char>(pStr[1]) == 0xfe) ||
210 (static_cast<unsigned char>(pStr[0]) == 0xfe && static_cast<unsigned char>(pStr[1]) == 0xff) ) )
211 {
212 static const char pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
213 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
214 if( ! rWriteContext.write( "<", 1 ) )
215 return false;
216 for( sal_Int32 i = 0; i < aFiltered.getLength(); i++ )
217 {
218 if( ! rWriteContext.write( pHexTab + ((sal_uInt32(pStr[i]) >> 4) & 0x0f), 1 ) )
219 return false;
220 if( ! rWriteContext.write( pHexTab + (sal_uInt32(pStr[i]) & 0x0f), 1 ) )
221 return false;
222 }
223 if( ! rWriteContext.write( ">", 1 ) )
224 return false;
225 }
226 else
227 {
228 if( ! rWriteContext.write( "(", 1 ) )
229 return false;
230 if( ! rWriteContext.write( aFiltered.getStr(), aFiltered.getLength() ) )
231 return false;
232 if( ! rWriteContext.write( ")", 1 ) )
233 return false;
234 }
235 return true;
236 }
237 return rWriteContext.write( m_aString.getStr(), m_aString.getLength() );
238}
239
241{
242 return new PDFString( m_aString );
243}
244
246{
247 int nLen = m_aString.getLength();
248 OStringBuffer aBuf( nLen );
249
250 const char* pStr = m_aString.getStr();
251 if( *pStr == '(' )
252 {
253 const char* pRun = pStr+1;
254 while( pRun - pStr < nLen-1 )
255 {
256 if( *pRun == '\\' )
257 {
258 pRun++;
259 if( pRun - pStr < nLen )
260 {
261 char aEsc = 0;
262 if( *pRun == 'n' )
263 aEsc = '\n';
264 else if( *pRun == 'r' )
265 aEsc = '\r';
266 else if( *pRun == 't' )
267 aEsc = '\t';
268 else if( *pRun == 'b' )
269 aEsc = '\b';
270 else if( *pRun == 'f' )
271 aEsc = '\f';
272 else if( *pRun == '(' )
273 aEsc = '(';
274 else if( *pRun == ')' )
275 aEsc = ')';
276 else if( *pRun == '\\' )
277 aEsc = '\\';
278 else if( *pRun == '\n' )
279 {
280 pRun++;
281 continue;
282 }
283 else if( *pRun == '\r' )
284 {
285 pRun++;
286 if( *pRun == '\n' )
287 pRun++;
288 continue;
289 }
290 else
291 {
292 int i = 0;
293 while( i++ < 3 && *pRun >= '0' && *pRun <= '7' )
294 aEsc = 8*aEsc + (*pRun++ - '0');
295 // move pointer back to last character of octal sequence
296 pRun--;
297 }
298 aBuf.append( aEsc );
299 }
300 }
301 else
302 aBuf.append( *pRun );
303 // move pointer to next character
304 pRun++;
305 }
306 }
307 else if( *pStr == '<' )
308 {
309 const char* pRun = pStr+1;
310 while( *pRun != '>' && pRun - pStr < nLen )
311 {
312 char rResult = 0;
313 if( *pRun >= '0' && *pRun <= '9' )
314 rResult = char( ( *pRun-'0' ) << 4 );
315 else if( *pRun >= 'a' && *pRun <= 'f' )
316 rResult = char( ( *pRun-'a' + 10 ) << 4 );
317 else if( *pRun >= 'A' && *pRun <= 'F' )
318 rResult = char( ( *pRun-'A' + 10 ) << 4 );
319 pRun++;
320 if( *pRun != '>' && pRun - pStr < nLen )
321 {
322 if( *pRun >= '0' && *pRun <= '9' )
323 rResult |= char( *pRun-'0' );
324 else if( *pRun >= 'a' && *pRun <= 'f' )
325 rResult |= char( *pRun-'a' + 10 );
326 else if( *pRun >= 'A' && *pRun <= 'F' )
327 rResult |= char( *pRun-'A' + 10 );
328 }
329 pRun++;
330 aBuf.append( rResult );
331 }
332 }
333
334 return aBuf.makeStringAndClear();
335}
336
338{
339}
340
341bool PDFNumber::emit( EmitContext& rWriteContext ) const
342{
343 OStringBuffer aBuf( 32 );
344 aBuf.append( ' ' );
345
346 double fValue = m_fValue;
347 bool bNeg = false;
348 int nPrecision = 5;
349 if( fValue < 0.0 )
350 {
351 bNeg = true;
352 fValue=-fValue;
353 }
354
355 sal_Int64 nInt = static_cast<sal_Int64>(fValue);
356 fValue -= static_cast<double>(nInt);
357 // optimizing hardware may lead to a value of 1.0 after the subtraction
358 if( fValue == 1.0 || log10( 1.0-fValue ) <= -nPrecision )
359 {
360 nInt++;
361 fValue = 0.0;
362 }
363 sal_Int64 nFrac = 0;
364 if( fValue )
365 {
366 fValue *= pow( 10.0, static_cast<double>(nPrecision) );
367 nFrac = static_cast<sal_Int64>(fValue);
368 }
369 if( bNeg && ( nInt || nFrac ) )
370 aBuf.append( '-' );
371 aBuf.append( nInt );
372 if( nFrac )
373 {
374 int i;
375 aBuf.append( '.' );
376 sal_Int64 nBound = static_cast<sal_Int64>(pow( 10.0, nPrecision - 1.0 )+0.5);
377 for ( i = 0; ( i < nPrecision ) && nFrac; i++ )
378 {
379 sal_Int64 nNumb = nFrac / nBound;
380 nFrac -= nNumb * nBound;
381 aBuf.append( nNumb );
382 nBound /= 10;
383 }
384 }
385
386 return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
387}
388
390{
391 return new PDFNumber( m_fValue );
392}
393
394
396{
397}
398
399bool PDFBool::emit( EmitContext& rWriteContext ) const
400{
401 return m_bValue ? rWriteContext.write( " true", 5 ) : rWriteContext.write( " false", 6 );
402}
403
405{
406 return new PDFBool( m_bValue );
407}
408
410{
411}
412
413bool PDFNull::emit( EmitContext& rWriteContext ) const
414{
415 return rWriteContext.write( " null", 5 );
416}
417
419{
420 return new PDFNull();
421}
422
423
425{
426}
427
428bool PDFObjectRef::emit( EmitContext& rWriteContext ) const
429{
430 OString aBuf =
431 " " +
432 OString::number( sal_Int32( m_nNumber ) ) +
433 " " +
434 OString::number( sal_Int32( m_nGeneration ) ) +
435 " R";
436 return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
437}
438
440{
441 return new PDFObjectRef( m_nNumber, m_nGeneration );
442}
443
445{
446}
447
448bool PDFContainer::emitSubElements( EmitContext& rWriteContext ) const
449{
450 int nEle = m_aSubElements.size();
451 for( int i = 0; i < nEle; i++ )
452 {
453 if( rWriteContext.m_bDecrypt )
454 {
455 const PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
456 if (pName && pName->m_aName == "Encrypt")
457 {
458 i++;
459 continue;
460 }
461 }
462 if( ! m_aSubElements[i]->emit( rWriteContext ) )
463 return false;
464 }
465 return true;
466}
467
468void PDFContainer::cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const
469{
470 int nEle = m_aSubElements.size();
471 for( int i = 0; i < nEle; i++ )
472 rNewSubElements.emplace_back( m_aSubElements[i]->clone() );
473}
474
475PDFObject* PDFContainer::findObject( unsigned int nNumber, unsigned int nGeneration ) const
476{
477 unsigned int nEle = m_aSubElements.size();
478 for( unsigned int i = 0; i < nEle; i++ )
479 {
480 PDFObject* pObject = dynamic_cast<PDFObject*>(m_aSubElements[i].get());
481 if( pObject &&
482 pObject->m_nNumber == nNumber &&
483 pObject->m_nGeneration == nGeneration )
484 {
485 return pObject;
486 }
487 }
488 return nullptr;
489}
490
492{
493}
494
495bool PDFArray::emit( EmitContext& rWriteContext ) const
496{
497 if( ! rWriteContext.write( "[", 1 ) )
498 return false;
499 if( ! emitSubElements( rWriteContext ) )
500 return false;
501 return rWriteContext.write( "]", 1 );
502}
503
505{
506 PDFArray* pNewAr = new PDFArray();
508 return pNewAr;
509}
510
512{
513}
514
515bool PDFDict::emit( EmitContext& rWriteContext ) const
516{
517 if( ! rWriteContext.write( "<<\n", 3 ) )
518 return false;
519 if( ! emitSubElements( rWriteContext ) )
520 return false;
521 return rWriteContext.write( "\n>>\n", 4 );
522}
523
524void PDFDict::insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue )
525{
526 if( ! pValue )
527 eraseValue( rName );
528
529 PDFEntry* pValueTmp = nullptr;
530 std::unordered_map<OString,PDFEntry*>::iterator it = m_aMap.find( rName );
531 if( it == m_aMap.end() )
532 {
533 // new name/value, pair, append it
534 m_aSubElements.emplace_back(std::make_unique<PDFName>(rName));
535 m_aSubElements.emplace_back( std::move(pValue) );
536 pValueTmp = m_aSubElements.back().get();
537 }
538 else
539 {
540 unsigned int nSub = m_aSubElements.size();
541 for( unsigned int i = 0; i < nSub; i++ )
542 if( m_aSubElements[i].get() == it->second )
543 {
544 m_aSubElements[i] = std::move(pValue);
545 pValueTmp = m_aSubElements[i].get();
546 break;
547 }
548 }
549 assert(pValueTmp);
550 m_aMap[ rName ] = pValueTmp;
551}
552
553void PDFDict::eraseValue( std::string_view rName )
554{
555 unsigned int nEle = m_aSubElements.size();
556 for( unsigned int i = 0; i < nEle; i++ )
557 {
558 PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
559 if( pName && pName->m_aName == rName )
560 {
561 for( unsigned int j = i+1; j < nEle; j++ )
562 {
563 if( dynamic_cast<PDFComment*>(m_aSubElements[j].get()) == nullptr )
564 {
565 // remove and free subelements from vector
566 m_aSubElements.erase( m_aSubElements.begin()+j );
567 m_aSubElements.erase( m_aSubElements.begin()+i );
568 buildMap();
569 return;
570 }
571 }
572 }
573 }
574}
575
577{
578 // clear map
579 m_aMap.clear();
580 // build map
581 unsigned int nEle = m_aSubElements.size();
582 PDFName* pName = nullptr;
583 for( unsigned int i = 0; i < nEle; i++ )
584 {
585 if( dynamic_cast<PDFComment*>(m_aSubElements[i].get()) == nullptr )
586 {
587 if( pName )
588 {
589 m_aMap[ pName->m_aName ] = m_aSubElements[i].get();
590 pName = nullptr;
591 }
592 else if( (pName = dynamic_cast<PDFName*>(m_aSubElements[i].get())) == nullptr )
593 return m_aSubElements[i].get();
594 }
595 }
596 return pName;
597}
598
600{
601 PDFDict* pNewDict = new PDFDict();
602 cloneSubElements( pNewDict->m_aSubElements );
603 pNewDict->buildMap();
604 return pNewDict;
605}
606
608{
609}
610
611bool PDFStream::emit( EmitContext& rWriteContext ) const
612{
614}
615
617{
618 return new PDFStream( m_nBeginOffset, m_nEndOffset, nullptr );
619}
620
621unsigned int PDFStream::getDictLength( const PDFContainer* pContainer ) const
622{
623 if( ! m_pDict )
624 return 0;
625 // find /Length entry, can either be a direct or indirect number object
626 std::unordered_map<OString,PDFEntry*>::const_iterator it =
627 m_pDict->m_aMap.find( "Length" );
628 if( it == m_pDict->m_aMap.end() )
629 return 0;
630 PDFNumber* pNum = dynamic_cast<PDFNumber*>(it->second);
631 if( ! pNum && pContainer )
632 {
633 PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(it->second);
634 if( pRef )
635 {
636 int nEle = pContainer->m_aSubElements.size();
637 for (int i = 0; i < nEle; i++)
638 {
639 PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer->m_aSubElements[i].get());
640 if( pObj &&
641 pObj->m_nNumber == pRef->m_nNumber &&
642 pObj->m_nGeneration == pRef->m_nGeneration )
643 {
644 if( pObj->m_pObject )
645 pNum = dynamic_cast<PDFNumber*>(pObj->m_pObject);
646 break;
647 }
648 }
649 }
650 }
651 return pNum ? static_cast<unsigned int>(pNum->m_fValue) : 0;
652}
653
655{
656}
657
658bool PDFObject::getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const
659{
660 bool bIsDeflated = false;
661 if( m_pStream && m_pStream->m_pDict &&
663 )
664 {
665 unsigned int nOuterStreamLen = m_pStream->m_nEndOffset - m_pStream->m_nBeginOffset;
666 rpStream.reset(new char[ nOuterStreamLen ]);
667 unsigned int nRead = rContext.readOrigBytes( m_pStream->m_nBeginOffset, nOuterStreamLen, rpStream.get() );
668 if( nRead != nOuterStreamLen )
669 {
670 rpStream.reset();
671 *pBytes = 0;
672 return false;
673 }
674 // is there a filter entry ?
675 std::unordered_map<OString,PDFEntry*>::const_iterator it =
676 m_pStream->m_pDict->m_aMap.find( "Filter" );
677 if( it != m_pStream->m_pDict->m_aMap.end() )
678 {
679 PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
680 if( ! pFilter )
681 {
682 PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
683 if( pArray && ! pArray->m_aSubElements.empty() )
684 {
685 pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
686 }
687 }
688
689 // is the (first) filter FlateDecode ?
690 if (pFilter && pFilter->m_aName == "FlateDecode")
691 {
692 bIsDeflated = true;
693 }
694 }
695 // prepare compressed data section
696 char* pStream = rpStream.get();
697 if( pStream[0] == 's' )
698 pStream += 6; // skip "stream"
699 // skip line end after "stream"
700 while( *pStream == '\r' || *pStream == '\n' )
701 pStream++;
702 // get the compressed length
703 *pBytes = m_pStream->getDictLength( pObjectContainer );
704 if( pStream != rpStream.get() )
705 memmove( rpStream.get(), pStream, *pBytes );
706 if( rContext.m_bDecrypt )
707 {
708 EmitImplData* pEData = getEmitData( rContext );
709 pEData->decrypt( reinterpret_cast<const sal_uInt8*>(rpStream.get()),
710 *pBytes,
711 reinterpret_cast<sal_uInt8*>(rpStream.get()),
712 m_nNumber,
714 ); // decrypt inplace
715 }
716 }
717 else
718 {
719 *pBytes = 0;
720 }
721 return bIsDeflated;
722}
723
724static void unzipToBuffer( char* pBegin, unsigned int nLen,
725 sal_uInt8** pOutBuf, sal_uInt32* pOutLen )
726{
727 z_stream aZStr;
728 aZStr.next_in = reinterpret_cast<Bytef *>(pBegin);
729 aZStr.avail_in = nLen;
730 aZStr.total_out = aZStr.total_in = 0;
731 aZStr.zalloc = nullptr;
732 aZStr.zfree = nullptr;
733 aZStr.opaque = nullptr;
734
735 int err = inflateInit(&aZStr);
736
737 const unsigned int buf_increment_size = 16384;
738
739 if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, buf_increment_size)))
740 {
741 *pOutBuf = p;
742 aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf);
743 aZStr.avail_out = buf_increment_size;
744 *pOutLen = buf_increment_size;
745 }
746 else
747 err = Z_MEM_ERROR;
748 while( err != Z_STREAM_END && err >= Z_OK && aZStr.avail_in )
749 {
750 err = inflate( &aZStr, Z_NO_FLUSH );
751 if( aZStr.avail_out == 0 )
752 {
753 if( err != Z_STREAM_END )
754 {
755 const int nNewAlloc = *pOutLen + buf_increment_size;
756 if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, nNewAlloc)))
757 {
758 *pOutBuf = p;
759 aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf + *pOutLen);
760 aZStr.avail_out = buf_increment_size;
761 *pOutLen = nNewAlloc;
762 }
763 else
764 err = Z_MEM_ERROR;
765 }
766 }
767 }
768 if( err == Z_STREAM_END )
769 {
770 if( aZStr.avail_out > 0 )
771 *pOutLen -= aZStr.avail_out;
772 }
773 inflateEnd(&aZStr);
774 if( err < Z_OK )
775 {
776 std::free( *pOutBuf );
777 *pOutBuf = nullptr;
778 *pOutLen = 0;
779 }
780}
781
782void PDFObject::writeStream( EmitContext& rWriteContext, const PDFFile* pParsedFile ) const
783{
784 if( !m_pStream )
785 return;
786
787 std::unique_ptr<char[]> pStream;
788 unsigned int nBytes = 0;
789 if( getDeflatedStream( pStream, &nBytes, pParsedFile, rWriteContext ) && nBytes && rWriteContext.m_bDeflate )
790 {
791 sal_uInt8* pOutBytes = nullptr;
792 sal_uInt32 nOutBytes = 0;
793 unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
794 rWriteContext.write( pOutBytes, nOutBytes );
795 std::free( pOutBytes );
796 }
797 else if( pStream && nBytes )
798 rWriteContext.write( pStream.get(), nBytes );
799}
800
801bool PDFObject::emit( EmitContext& rWriteContext ) const
802{
803 if( ! rWriteContext.write( "\n", 1 ) )
804 return false;
805
806 EmitImplData* pEData = getEmitData( rWriteContext );
807 if( pEData )
808 pEData->insertXref( m_nNumber, m_nGeneration, rWriteContext.getCurPos() );
809
810 OString aBuf =
811 OString::number( sal_Int32( m_nNumber ) ) +
812 " " +
813 OString::number( sal_Int32( m_nGeneration ) ) +
814 " obj\n";
815 if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
816 return false;
817
818 if( pEData )
820 if( (rWriteContext.m_bDeflate || rWriteContext.m_bDecrypt) && pEData )
821 {
822 std::unique_ptr<char[]> pStream;
823 unsigned int nBytes = 0;
824 bool bDeflate = getDeflatedStream( pStream, &nBytes, pEData->m_pObjectContainer, rWriteContext );
825 if( pStream && nBytes )
826 {
827 // unzip the stream
828 sal_uInt8* pOutBytes = nullptr;
829 sal_uInt32 nOutBytes = 0;
830 if( bDeflate && rWriteContext.m_bDeflate )
831 unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
832 else
833 {
834 // nothing to deflate, but decryption has happened
835 pOutBytes = reinterpret_cast<sal_uInt8*>(pStream.get());
836 nOutBytes = static_cast<sal_uInt32>(nBytes);
837 }
838
839 if( nOutBytes )
840 {
841 // clone this object
842 std::unique_ptr<PDFObject> pClone(static_cast<PDFObject*>(clone()));
843 // set length in the dictionary to new stream length
844 std::unique_ptr<PDFNumber> pNewLen(new PDFNumber( double(nOutBytes) ));
845 pClone->m_pStream->m_pDict->insertValue( "Length", std::move(pNewLen) );
846
847 if( bDeflate && rWriteContext.m_bDeflate )
848 {
849 // delete flatedecode filter
850 std::unordered_map<OString,PDFEntry*>::const_iterator it =
851 pClone->m_pStream->m_pDict->m_aMap.find( "Filter" );
852 if( it != pClone->m_pStream->m_pDict->m_aMap.end() )
853 {
854 PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
855 if (pFilter && pFilter->m_aName == "FlateDecode")
856 pClone->m_pStream->m_pDict->eraseValue( "Filter" );
857 else
858 {
859 PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
860 if( pArray && ! pArray->m_aSubElements.empty() )
861 {
862 pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
863 if (pFilter && pFilter->m_aName == "FlateDecode")
864 {
865 pArray->m_aSubElements.erase( pArray->m_aSubElements.begin() );
866 }
867 }
868 }
869 }
870 }
871
872 // write sub elements except stream
873 bool bRet = true;
874 unsigned int nEle = pClone->m_aSubElements.size();
875 for( unsigned int i = 0; i < nEle && bRet; i++ )
876 {
877 if( pClone->m_aSubElements[i].get() != pClone->m_pStream )
878 bRet = pClone->m_aSubElements[i]->emit( rWriteContext );
879 }
880 pClone.reset();
881 // write stream
882 if( bRet )
883 bRet = rWriteContext.write("stream\n", 7)
884 && rWriteContext.write(pOutBytes, nOutBytes)
885 && rWriteContext.write("\nendstream\nendobj\n", 18);
886 if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
887 std::free( pOutBytes );
888 pEData->setDecryptObject( 0, 0 );
889 return bRet;
890 }
891 if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
892 std::free( pOutBytes );
893 }
894 }
895
896 bool bRet = emitSubElements( rWriteContext ) &&
897 rWriteContext.write( "\nendobj\n", 8 );
898 if( pEData )
899 pEData->setDecryptObject( 0, 0 );
900 return bRet;
901}
902
904{
905 PDFObject* pNewOb = new PDFObject( m_nNumber, m_nGeneration );
907 unsigned int nEle = m_aSubElements.size();
908 for( unsigned int i = 0; i < nEle; i++ )
909 {
910 if( m_aSubElements[i].get() == m_pObject )
911 pNewOb->m_pObject = pNewOb->m_aSubElements[i].get();
912 else if( m_aSubElements[i].get() == m_pStream && pNewOb->m_pObject )
913 {
914 pNewOb->m_pStream = dynamic_cast<PDFStream*>(pNewOb->m_aSubElements[i].get());
915 PDFDict* pNewDict = dynamic_cast<PDFDict*>(pNewOb->m_pObject);
916 if (pNewDict && pNewOb->m_pStream)
917 pNewOb->m_pStream->m_pDict = pNewDict;
918 }
919 }
920 return pNewOb;
921}
922
924{
925}
926
927bool PDFTrailer::emit( EmitContext& rWriteContext ) const
928{
929 // get xref offset
930 unsigned int nXRefPos = rWriteContext.getCurPos();
931 // begin xref section, object 0 is always free
932 if( ! rWriteContext.write( "xref\r\n"
933 "0 1\r\n"
934 "0000000000 65535 f\r\n", 31 ) )
935 return false;
936 // check if we are emitting a complete PDF file
937 EmitImplData* pEData = getEmitData( rWriteContext );
938 if( pEData )
939 {
940 // emit object xrefs
941 const EmitImplData::XRefTable& rXRefs = pEData->m_aXRefTable;
942 EmitImplData::XRefTable::const_iterator section_begin, section_end;
943 section_begin = rXRefs.begin();
944 while( section_begin != rXRefs.end() )
945 {
946 // find end of continuous object numbers
947 section_end = section_begin;
948 unsigned int nLast = section_begin->first;
949 while( (++section_end) != rXRefs.end() &&
950 section_end->first == nLast+1 )
951 nLast = section_end->first;
952 // write first object number and number of following entries
953 OStringBuffer aBuf( 21 );
954 aBuf.append( sal_Int32( section_begin->first ) );
955 aBuf.append( ' ' );
956 aBuf.append( sal_Int32(nLast - section_begin->first + 1) );
957 aBuf.append( "\r\n" );
958 if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
959 return false;
960 while( section_begin != section_end )
961 {
962 // write 20 char entry of form
963 // 0000offset 00gen n\r\n
964 aBuf.setLength( 0 );
965 OString aOffset( OString::number( section_begin->second.second ) );
966 int nPad = 10 - aOffset.getLength();
967 for( int i = 0; i < nPad; i++ )
968 aBuf.append( '0' );
969 aBuf.append( aOffset );
970 aBuf.append( ' ' );
971 OString aGeneration( OString::number( section_begin->second.first ) );
972 nPad = 5 - aGeneration.getLength();
973 for( int i = 0; i < nPad; i++ )
974 aBuf.append( '0' );
975 aBuf.append( aGeneration );
976 aBuf.append( " n\r\n" );
977 if( ! rWriteContext.write( aBuf.getStr(), 20 ) )
978 return false;
979 ++section_begin;
980 }
981 }
982 }
983 if( ! rWriteContext.write( "trailer\n", 8 ) )
984 return false;
985 if( ! emitSubElements( rWriteContext ) )
986 return false;
987 if( ! rWriteContext.write( "startxref\n", 10 ) )
988 return false;
989 OString aOffset( OString::number( nXRefPos ) );
990 if( ! rWriteContext.write( aOffset.getStr(), aOffset.getLength() ) )
991 return false;
992 return rWriteContext.write( "\n%%EOF\n", 7 );
993}
994
996{
997 PDFTrailer* pNewTr = new PDFTrailer();
999 unsigned int nEle = m_aSubElements.size();
1000 for( unsigned int i = 0; i < nEle; i++ )
1001 {
1002 if( m_aSubElements[i].get() == m_pDict )
1003 {
1004 pNewTr->m_pDict = dynamic_cast<PDFDict*>(pNewTr->m_aSubElements[i].get());
1005 break;
1006 }
1007 }
1008 return pNewTr;
1009}
1010
1011#define ENCRYPTION_KEY_LEN 16
1012#define ENCRYPTION_BUF_LEN 32
1013
1014namespace pdfparse {
1016{
1019 sal_uInt32 m_nAlgoVersion;
1021 sal_uInt32 m_nKeyLength;
1024 sal_uInt32 m_nPEntry;
1025 OString m_aDocID;
1026 rtlCipher m_aCipher;
1027
1028 sal_uInt8 m_aDecryptionKey[ENCRYPTION_KEY_LEN+5] = {}; // maximum handled key length
1029
1031 m_bIsEncrypted( false ),
1032 m_bStandardHandler( false ),
1033 m_nAlgoVersion( 0 ),
1035 m_nKeyLength( 0 ),
1036 m_nPEntry( 0 ),
1037 m_aCipher( nullptr )
1038 {
1039 }
1040
1042 {
1043 if( m_aCipher )
1044 rtl_cipher_destroyARCFOUR( m_aCipher );
1045 }
1046};
1047}
1048
1050 : m_nMajor( 0 ), m_nMinor( 0 )
1051{
1052}
1053
1055{
1056}
1057
1059{
1060 return impl_getData()->m_bIsEncrypted;
1061}
1062
1063bool PDFFile::decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
1064 unsigned int nObject, unsigned int nGeneration ) const
1065{
1066 if( ! isEncrypted() )
1067 return false;
1068
1069 if( ! m_pData->m_aCipher )
1070 m_pData->m_aCipher = rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream );
1071
1072 // modify encryption key
1073 sal_uInt32 i = m_pData->m_nKeyLength;
1074 m_pData->m_aDecryptionKey[i++] = sal_uInt8(nObject&0xff);
1075 m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>8)&0xff);
1076 m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>16)&0xff);
1077 m_pData->m_aDecryptionKey[i++] = sal_uInt8(nGeneration&0xff);
1078 m_pData->m_aDecryptionKey[i++] = sal_uInt8((nGeneration>>8)&0xff);
1079
1080 ::std::vector<unsigned char> const aSum(::comphelper::Hash::calculateHash(
1081 m_pData->m_aDecryptionKey, i, ::comphelper::HashType::MD5));
1082
1083 if( i > 16 )
1084 i = 16;
1085
1086 rtlCipherError aErr = rtl_cipher_initARCFOUR( m_pData->m_aCipher,
1087 rtl_Cipher_DirectionDecode,
1088 aSum.data(), i,
1089 nullptr, 0 );
1090 if( aErr == rtl_Cipher_E_None )
1091 aErr = rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1092 pInBuffer, nLen,
1093 pOutBuffer, nLen );
1094 return aErr == rtl_Cipher_E_None;
1095}
1096
1098{
1099 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
1100 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
1101};
1102
1103static void pad_or_truncate_to_32( const OString& rStr, char* pBuffer )
1104{
1105 int nLen = rStr.getLength();
1106 if( nLen > 32 )
1107 nLen = 32;
1108 const char* pStr = rStr.getStr();
1109 memcpy( pBuffer, pStr, nLen );
1110 int i = 0;
1111 while( nLen < 32 )
1112 pBuffer[nLen++] = nPadString[i++];
1113}
1114
1115// pass at least pData->m_nKeyLength bytes in
1116static sal_uInt32 password_to_key( const OString& rPwd, sal_uInt8* pOutKey, PDFFileImplData const * pData, bool bComputeO )
1117{
1118 // see PDF reference 1.4 Algorithm 3.2
1119 // encrypt pad string
1120 char aPadPwd[ENCRYPTION_BUF_LEN];
1121 pad_or_truncate_to_32( rPwd, aPadPwd );
1122 ::comphelper::Hash aDigest(::comphelper::HashType::MD5);
1123 aDigest.update(reinterpret_cast<unsigned char const*>(aPadPwd), sizeof(aPadPwd));
1124 if( ! bComputeO )
1125 {
1126 aDigest.update(pData->m_aOEntry, 32);
1127 sal_uInt8 aPEntry[4];
1128 aPEntry[0] = static_cast<sal_uInt8>(pData->m_nPEntry & 0xff);
1129 aPEntry[1] = static_cast<sal_uInt8>((pData->m_nPEntry >> 8 ) & 0xff);
1130 aPEntry[2] = static_cast<sal_uInt8>((pData->m_nPEntry >> 16) & 0xff);
1131 aPEntry[3] = static_cast<sal_uInt8>((pData->m_nPEntry >> 24) & 0xff);
1132 aDigest.update(aPEntry, sizeof(aPEntry));
1133 aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1134 }
1135 ::std::vector<unsigned char> nSum(aDigest.finalize());
1136 if( pData->m_nStandardRevision == 3 )
1137 {
1138 for( int i = 0; i < 50; i++ )
1139 {
1140 nSum = ::comphelper::Hash::calculateHash(nSum.data(), nSum.size(),
1141 ::comphelper::HashType::MD5);
1142 }
1143 }
1144 sal_uInt32 nLen = pData->m_nKeyLength;
1145 if( nLen > RTL_DIGEST_LENGTH_MD5 )
1146 nLen = RTL_DIGEST_LENGTH_MD5;
1147 memcpy( pOutKey, nSum.data(), nLen );
1148 return nLen;
1149}
1150
1151static bool check_user_password( const OString& rPwd, PDFFileImplData* pData )
1152{
1153 // see PDF reference 1.4 Algorithm 3.6
1154 bool bValid = false;
1156 sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, pData, false );
1157 // save (at this time potential) decryption key for later use
1158 memcpy( pData->m_aDecryptionKey, aKey, nKeyLen );
1159 if( pData->m_nStandardRevision == 2 )
1160 {
1161 sal_uInt8 nEncryptedEntry[ENCRYPTION_BUF_LEN] = {};
1162 // see PDF reference 1.4 Algorithm 3.4
1163 // encrypt pad string
1164 if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1165 aKey, nKeyLen,
1166 nullptr, 0 )
1167 != rtl_Cipher_E_None)
1168 {
1169 return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1170 }
1171 rtl_cipher_encodeARCFOUR( pData->m_aCipher, nPadString, sizeof( nPadString ),
1172 nEncryptedEntry, sizeof( nEncryptedEntry ) );
1173 bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 32 ) == 0);
1174 }
1175 else if( pData->m_nStandardRevision == 3 )
1176 {
1177 // see PDF reference 1.4 Algorithm 3.5
1178 ::comphelper::Hash aDigest(::comphelper::HashType::MD5);
1179 aDigest.update(nPadString, sizeof(nPadString));
1180 aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1181 ::std::vector<unsigned char> nEncryptedEntry(aDigest.finalize());
1182 if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1183 aKey, sizeof(aKey), nullptr, 0 )
1184 != rtl_Cipher_E_None)
1185 {
1186 return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1187 }
1188 rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1189 nEncryptedEntry.data(), 16,
1190 nEncryptedEntry.data(), 16 ); // encrypt in place
1191 for( int i = 1; i <= 19; i++ ) // do it 19 times, start with 1
1192 {
1193 sal_uInt8 aTempKey[ENCRYPTION_KEY_LEN];
1194 for( size_t j = 0; j < sizeof(aTempKey); j++ )
1195 aTempKey[j] = static_cast<sal_uInt8>( aKey[j] ^ i );
1196
1197 if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1198 aTempKey, sizeof(aTempKey), nullptr, 0 )
1199 != rtl_Cipher_E_None)
1200 {
1201 return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1202 }
1203 rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1204 nEncryptedEntry.data(), 16,
1205 nEncryptedEntry.data(), 16 ); // encrypt in place
1206 }
1207 bValid = (memcmp( nEncryptedEntry.data(), pData->m_aUEntry, 16 ) == 0);
1208 }
1209 return bValid;
1210}
1211
1213{
1214 return m_pData->m_bStandardHandler &&
1215 m_pData->m_nAlgoVersion >= 1 &&
1216 m_pData->m_nAlgoVersion <= 2 &&
1217 m_pData->m_nStandardRevision >= 2 &&
1218 m_pData->m_nStandardRevision <= 3;
1219}
1220
1221bool PDFFile::setupDecryptionData( const OString& rPwd ) const
1222{
1223 if( !impl_getData()->m_bIsEncrypted )
1224 return rPwd.isEmpty();
1225
1226 // check if we can handle this encryption at all
1228 return false;
1229
1230 if( ! m_pData->m_aCipher )
1231 m_pData->m_aCipher = rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream);
1232
1233 // first try user password
1234 bool bValid = check_user_password( rPwd, m_pData.get() );
1235
1236 if( ! bValid )
1237 {
1238 // try owner password
1239 // see PDF reference 1.4 Algorithm 3.7
1241 sal_uInt8 nPwd[ENCRYPTION_BUF_LEN] = {};
1242 sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, m_pData.get(), true );
1243 if( m_pData->m_nStandardRevision == 2 )
1244 {
1245 if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1246 aKey, nKeyLen, nullptr, 0 )
1247 != rtl_Cipher_E_None)
1248 {
1249 return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1250 }
1251 rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1252 m_pData->m_aOEntry, 32,
1253 nPwd, 32 );
1254 }
1255 else if( m_pData->m_nStandardRevision == 3 )
1256 {
1257 memcpy( nPwd, m_pData->m_aOEntry, 32 );
1258 for( int i = 19; i >= 0; i-- )
1259 {
1260 sal_uInt8 nTempKey[ENCRYPTION_KEY_LEN];
1261 for( size_t j = 0; j < sizeof(nTempKey); j++ )
1262 nTempKey[j] = sal_uInt8(aKey[j] ^ i);
1263 if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1264 nTempKey, nKeyLen, nullptr, 0 )
1265 != rtl_Cipher_E_None)
1266 {
1267 return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1268 }
1269 rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1270 nPwd, 32,
1271 nPwd, 32 ); // decrypt inplace
1272 }
1273 }
1274 bValid = check_user_password( OString( reinterpret_cast<char*>(nPwd), 32 ), m_pData.get() );
1275 }
1276
1277 return bValid;
1278}
1279
1281{
1282 if( m_pData )
1283 return m_pData.get();
1284 m_pData.reset( new PDFFileImplData );
1285 // check for encryption dict in a trailer
1286 unsigned int nElements = m_aSubElements.size();
1287 while( nElements-- > 0 )
1288 {
1289 PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(m_aSubElements[nElements].get());
1290 if( pTrailer && pTrailer->m_pDict )
1291 {
1292 // search doc id
1293 PDFDict::Map::iterator doc_id = pTrailer->m_pDict->m_aMap.find( "ID" );
1294 if( doc_id != pTrailer->m_pDict->m_aMap.end() )
1295 {
1296 PDFArray* pArr = dynamic_cast<PDFArray*>(doc_id->second);
1297 if( pArr && !pArr->m_aSubElements.empty() )
1298 {
1299 PDFString* pStr = dynamic_cast<PDFString*>(pArr->m_aSubElements[0].get());
1300 if( pStr )
1301 m_pData->m_aDocID = pStr->getFilteredString();
1302#if OSL_DEBUG_LEVEL > 0
1303 OUStringBuffer aTmp;
1304 for( int i = 0; i < m_pData->m_aDocID.getLength(); i++ )
1305 aTmp.append(static_cast<sal_Int32>(sal_uInt8(m_pData->m_aDocID[i])), 16);
1306 SAL_INFO("sdext.pdfimport.pdfparse", "DocId is <" << aTmp.makeStringAndClear() << ">");
1307#endif
1308 }
1309 }
1310 // search Encrypt entry
1311 PDFDict::Map::iterator enc =
1312 pTrailer->m_pDict->m_aMap.find( "Encrypt" );
1313 if( enc != pTrailer->m_pDict->m_aMap.end() )
1314 {
1315 PDFDict* pDict = dynamic_cast<PDFDict*>(enc->second);
1316 if( ! pDict )
1317 {
1318 PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(enc->second);
1319 if( pRef )
1320 {
1321 PDFObject* pObj = findObject( pRef );
1322 if( pObj && pObj->m_pObject )
1323 pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
1324 }
1325 }
1326 if( pDict )
1327 {
1328 PDFDict::Map::iterator filter = pDict->m_aMap.find( "Filter" );
1329 PDFDict::Map::iterator version = pDict->m_aMap.find( "V" );
1330 PDFDict::Map::iterator len = pDict->m_aMap.find( "Length" );
1331 PDFDict::Map::iterator o_ent = pDict->m_aMap.find( "O" );
1332 PDFDict::Map::iterator u_ent = pDict->m_aMap.find( "U" );
1333 PDFDict::Map::iterator r_ent = pDict->m_aMap.find( "R" );
1334 PDFDict::Map::iterator p_ent = pDict->m_aMap.find( "P" );
1335 if( filter != pDict->m_aMap.end() )
1336 {
1337 m_pData->m_bIsEncrypted = true;
1338 m_pData->m_nKeyLength = 5;
1339 if( version != pDict->m_aMap.end() )
1340 {
1341 PDFNumber* pNum = dynamic_cast<PDFNumber*>(version->second);
1342 if( pNum )
1343 m_pData->m_nAlgoVersion = static_cast<sal_uInt32>(pNum->m_fValue);
1344 }
1345 if( m_pData->m_nAlgoVersion >= 3 )
1346 m_pData->m_nKeyLength = 16;
1347 if( len != pDict->m_aMap.end() )
1348 {
1349 PDFNumber* pNum = dynamic_cast<PDFNumber*>(len->second);
1350 if( pNum )
1351 m_pData->m_nKeyLength = static_cast<sal_uInt32>(pNum->m_fValue) / 8;
1352 }
1353 PDFName* pFilter = dynamic_cast<PDFName*>(filter->second);
1354 if( pFilter && pFilter->getFilteredName() == "Standard" )
1355 m_pData->m_bStandardHandler = true;
1356 if( o_ent != pDict->m_aMap.end() )
1357 {
1358 PDFString* pString = dynamic_cast<PDFString*>(o_ent->second);
1359 if( pString )
1360 {
1361 OString aEnt = pString->getFilteredString();
1362 if( aEnt.getLength() == 32 )
1363 memcpy( m_pData->m_aOEntry, aEnt.getStr(), 32 );
1364#if OSL_DEBUG_LEVEL > 0
1365 else
1366 {
1367 OUStringBuffer aTmp;
1368 for( int i = 0; i < aEnt.getLength(); i++ )
1369 aTmp.append(" " + OUString::number(sal_uInt8(aEnt[i]), 16));
1370 SAL_WARN("sdext.pdfimport.pdfparse",
1371 "O entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1372 }
1373#endif
1374 }
1375 }
1376 if( u_ent != pDict->m_aMap.end() )
1377 {
1378 PDFString* pString = dynamic_cast<PDFString*>(u_ent->second);
1379 if( pString )
1380 {
1381 OString aEnt = pString->getFilteredString();
1382 if( aEnt.getLength() == 32 )
1383 memcpy( m_pData->m_aUEntry, aEnt.getStr(), 32 );
1384#if OSL_DEBUG_LEVEL > 0
1385 else
1386 {
1387 OUStringBuffer aTmp;
1388 for( int i = 0; i < aEnt.getLength(); i++ )
1389 aTmp.append(" " + OUString::number(sal_uInt8(aEnt[i]), 16));
1390 SAL_WARN("sdext.pdfimport.pdfparse",
1391 "U entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1392 }
1393#endif
1394 }
1395 }
1396 if( r_ent != pDict->m_aMap.end() )
1397 {
1398 PDFNumber* pNum = dynamic_cast<PDFNumber*>(r_ent->second);
1399 if( pNum )
1400 m_pData->m_nStandardRevision = static_cast<sal_uInt32>(pNum->m_fValue);
1401 }
1402 if( p_ent != pDict->m_aMap.end() )
1403 {
1404 PDFNumber* pNum = dynamic_cast<PDFNumber*>(p_ent->second);
1405 if( pNum )
1406 m_pData->m_nPEntry = static_cast<sal_uInt32>(static_cast<sal_Int32>(pNum->m_fValue));
1407 SAL_INFO("sdext.pdfimport.pdfparse", "p entry is " << m_pData->m_nPEntry );
1408 }
1409
1410 SAL_INFO("sdext.pdfimport.pdfparse", "Encryption dict: sec handler: " << (pFilter ? pFilter->getFilteredName() : OUString("<unknown>")) << ", version = " << static_cast<int>(m_pData->m_nAlgoVersion) << ", revision = " << static_cast<int>(m_pData->m_nStandardRevision) << ", key length = " << m_pData->m_nKeyLength );
1411 break;
1412 }
1413 }
1414 }
1415 }
1416 }
1417
1418 return m_pData.get();
1419}
1420
1421bool PDFFile::emit( EmitContext& rWriteContext ) const
1422{
1423 setEmitData( rWriteContext, new EmitImplData( this ) );
1424
1425 OString aBuf =
1426 "%PDF-" +
1427 OString::number( sal_Int32( m_nMajor ) ) +
1428 "." +
1429 OString::number( sal_Int32( m_nMinor ) ) +
1430 "\n";
1431 if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
1432 return false;
1433 return emitSubElements( rWriteContext );
1434}
1435
1437{
1438 PDFFile* pNewFl = new PDFFile();
1439 pNewFl->m_nMajor = m_nMajor;
1440 pNewFl->m_nMinor = m_nMinor;
1442 return pNewFl;
1443}
1444
1446{
1447}
1448
1449bool PDFPart::emit( EmitContext& rWriteContext ) const
1450{
1451 return emitSubElements( rWriteContext );
1452}
1453
1455{
1456 PDFPart* pNewPt = new PDFPart();
1458 return pNewPt;
1459}
1460
1461/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const char * pName
std::vector< unsigned char > finalize()
static std::vector< unsigned char > calculateHash(const unsigned char *pInput, size_t length, HashType eType)
void update(const unsigned char *pInput, size_t length)
virtual bool copyOrigBytes(unsigned int nOrigOffset, unsigned int nLen)=0
virtual unsigned int readOrigBytes(unsigned int nOrigOffset, unsigned int nLen, void *pBuf)=0
virtual bool write(const void *pBuf, unsigned int nLen)=0
std::unique_ptr< EmitImplData > m_pImplData
Definition: pdfparse.hxx:56
virtual unsigned int getCurPos()=0
sal_Int32 nElements
EmbeddedObjectRef * pObject
static osl::File * pStream
Definition: emitcontext.cxx:32
void * p
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
aBuf
std::unique_ptr< sal_Int32[]> pData
err
int i
css::uno::Reference< css::linguistic2::XProofreadingIterator > get(css::uno::Reference< css::uno::XComponentContext > const &context)
#define ENCRYPTION_BUF_LEN
static sal_uInt32 password_to_key(const OString &rPwd, sal_uInt8 *pOutKey, PDFFileImplData const *pData, bool bComputeO)
const sal_uInt8 nPadString[32]
#define ENCRYPTION_KEY_LEN
static bool check_user_password(const OString &rPwd, PDFFileImplData *pData)
static void pad_or_truncate_to_32(const OString &rStr, char *pBuffer)
static void unzipToBuffer(char *pBegin, unsigned int nLen, sal_uInt8 **pOutBuf, sal_uInt32 *pOutLen)
Definition: pdfentries.cxx:724
EmitImplData(const PDFContainer *pTopContainer)
Definition: pdfentries.cxx:72
void setDecryptObject(unsigned int nObject, unsigned int nGeneration)
Definition: pdfentries.cxx:84
const PDFContainer * m_pObjectContainer
Definition: pdfentries.cxx:49
unsigned int m_nDecryptGeneration
Definition: pdfentries.cxx:51
void decrypt(const sal_uInt8 *pInBuffer, sal_uInt32 nLen, sal_uInt8 *pOutBuffer, unsigned int nObject, unsigned int nGeneration) const
Definition: pdfentries.cxx:77
bool insertXref(unsigned int nObject, unsigned int nGeneration, unsigned int nOffset)
Definition: pdfentries.cxx:54
unsigned int m_nDecryptObject
Definition: pdfentries.cxx:50
std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable
Definition: pdfentries.cxx:46
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:495
virtual ~PDFArray() override
Definition: pdfentries.cxx:491
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:504
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:404
virtual ~PDFBool() override
Definition: pdfentries.cxx:395
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:399
PDFBool(bool bVal)
Definition: pdfparse.hxx:131
PDFComment(OString aComment)
Definition: pdfparse.hxx:76
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:131
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:136
virtual ~PDFComment() override
Definition: pdfentries.cxx:127
bool emitSubElements(EmitContext &rWriteContext) const
Definition: pdfentries.cxx:448
PDFObject * findObject(unsigned int nNumber, unsigned int nGeneration) const
Definition: pdfentries.cxx:475
void cloneSubElements(std::vector< std::unique_ptr< PDFEntry > > &rNewSubElements) const
Definition: pdfentries.cxx:468
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
Definition: pdfparse.hxx:162
virtual ~PDFContainer() override
Definition: pdfentries.cxx:444
void eraseValue(std::string_view rName)
Definition: pdfentries.cxx:553
void insertValue(const OString &rName, std::unique_ptr< PDFEntry > pValue)
Definition: pdfentries.cxx:524
PDFEntry * buildMap()
Definition: pdfentries.cxx:576
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:515
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:599
virtual ~PDFDict() override
Definition: pdfentries.cxx:511
virtual bool emit(EmitContext &rWriteContext) const =0
static void setEmitData(EmitContext &rContext, EmitImplData *pNewEmitData)
Definition: pdfentries.cxx:116
static EmitImplData * getEmitData(EmitContext const &rContext)
Definition: pdfentries.cxx:111
sal_uInt8 m_aDecryptionKey[ENCRYPTION_KEY_LEN+5]
bool usesSupportedEncryptionFormat() const
virtual bool emit(EmitContext &rWriteContext) const override
bool setupDecryptionData(const OString &rPwd) const
unsigned int m_nMinor
Definition: pdfparse.hxx:237
bool decrypt(const sal_uInt8 *pInBuffer, sal_uInt32 nLen, sal_uInt8 *pOutBuffer, unsigned int nObject, unsigned int nGeneration) const
virtual ~PDFFile() override
std::unique_ptr< PDFFileImplData > m_pData
Definition: pdfparse.hxx:233
bool isEncrypted() const
unsigned int m_nMajor
Definition: pdfparse.hxx:236
virtual PDFEntry * clone() const override
PDFFileImplData * impl_getData() const
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:152
PDFName(OString aName)
Definition: pdfparse.hxx:94
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:145
OUString getFilteredName() const
Definition: pdfentries.cxx:157
virtual ~PDFName() override
Definition: pdfentries.cxx:141
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:418
virtual ~PDFNull() override
Definition: pdfentries.cxx:409
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:413
PDFNumber(double fVal)
Definition: pdfparse.hxx:120
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:389
virtual ~PDFNumber() override
Definition: pdfentries.cxx:337
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:341
PDFObjectRef(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:143
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:439
unsigned int m_nNumber
Definition: pdfparse.hxx:140
unsigned int m_nGeneration
Definition: pdfparse.hxx:141
virtual ~PDFObjectRef() override
Definition: pdfentries.cxx:424
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:428
unsigned int m_nGeneration
Definition: pdfparse.hxx:264
PDFObject(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:266
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:801
virtual ~PDFObject() override
Definition: pdfentries.cxx:654
PDFStream * m_pStream
Definition: pdfparse.hxx:262
bool getDeflatedStream(std::unique_ptr< char[]> &rpStream, unsigned int *pBytes, const PDFContainer *pObjectContainer, EmitContext &rContext) const
Definition: pdfentries.cxx:658
void writeStream(EmitContext &rContext, const PDFFile *pPDFFile) const
Definition: pdfentries.cxx:782
PDFEntry * m_pObject
Definition: pdfparse.hxx:261
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:903
unsigned int m_nNumber
Definition: pdfparse.hxx:263
virtual ~PDFPart() override
virtual bool emit(EmitContext &rWriteContext) const override
virtual PDFEntry * clone() const override
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:611
unsigned int getDictLength(const PDFContainer *pObjectContainer) const
Definition: pdfentries.cxx:621
PDFStream(unsigned int nBegin, unsigned int nEnd, PDFDict *pStreamDict)
Definition: pdfparse.hxx:210
virtual ~PDFStream() override
Definition: pdfentries.cxx:607
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:616
unsigned int m_nBeginOffset
Definition: pdfparse.hxx:206
unsigned int m_nEndOffset
Definition: pdfparse.hxx:207
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:193
virtual ~PDFString() override
Definition: pdfentries.cxx:189
PDFString(OString aString)
Definition: pdfparse.hxx:107
OString getFilteredString() const
Definition: pdfentries.cxx:245
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:240
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:927
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:995
virtual ~PDFTrailer() override
Definition: pdfentries.cxx:923
virtual ~PDFValue() override
Definition: pdfentries.cxx:123
unsigned char sal_uInt8