LibreOffice Module sdext (master)  1
pdfentries.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <pdfparse.hxx>
22 
23 #include <comphelper/hash.hxx>
24 
25 #include <rtl/strbuf.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/ustrbuf.hxx>
28 #include <rtl/digest.h>
29 #include <rtl/cipher.h>
30 #include <sal/log.hxx>
31 
32 #include <zlib.h>
33 
34 #include <math.h>
35 #include <map>
36 
37 #include <string.h>
38 
39 
40 namespace pdfparse
41 {
42 
44 {
45  // xref table: maps object number to a pair of (generation, buffer offset)
46  typedef std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable;
47  XRefTable m_aXRefTable;
48  // container of all indirect objects (usually a PDFFile*)
50  unsigned int m_nDecryptObject;
51  unsigned int m_nDecryptGeneration;
52 
53  // returns true if the xref table was updated
54  bool insertXref( unsigned int nObject, unsigned int nGeneration, unsigned int nOffset )
55  {
56  XRefTable::iterator it = m_aXRefTable.find( nObject );
57  if( it == m_aXRefTable.end() )
58  {
59  // new entry
60  m_aXRefTable[ nObject ] = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
61  return true;
62  }
63  // update old entry, if generation number is higher
64  if( it->second.first < nGeneration )
65  {
66  it->second = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
67  return true;
68  }
69  return false;
70  }
71 
72  explicit EmitImplData( const PDFContainer* pTopContainer ) :
73  m_pObjectContainer( pTopContainer ),
74  m_nDecryptObject( 0 ),
75  m_nDecryptGeneration( 0 )
76  {}
77  void decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
78  unsigned int nObject, unsigned int nGeneration ) const
79  {
80  const PDFFile* pFile = dynamic_cast<const PDFFile*>(m_pObjectContainer);
81  pFile && pFile->decrypt( pInBuffer, nLen, pOutBuffer, nObject, nGeneration );
82  }
83 
84  void setDecryptObject( unsigned int nObject, unsigned int nGeneration )
85  {
86  m_nDecryptObject = nObject;
87  m_nDecryptGeneration = nGeneration;
88  }
89 };
90 
91 }
92 
93 using namespace pdfparse;
94 
96  m_bDeflate( false ),
97  m_bDecrypt( false )
98 {
99  if( pTop )
100  m_pImplData.reset( new EmitImplData( pTop ) );
101 }
102 
104 {
105 }
106 
108 {
109 }
110 
112 {
113  return rContext.m_pImplData.get();
114 }
115 
116 void PDFEntry::setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData )
117 {
118  if( rContext.m_pImplData && rContext.m_pImplData.get() != pNewEmitData )
119  rContext.m_pImplData.reset();
120  rContext.m_pImplData.reset( pNewEmitData );
121 }
122 
124 {
125 }
126 
128 {
129 }
130 
131 bool PDFComment::emit( EmitContext& rWriteContext ) const
132 {
133  return rWriteContext.write( m_aComment.getStr(), m_aComment.getLength() );
134 }
135 
137 {
138  return new PDFComment( m_aComment );
139 }
140 
142 {
143 }
144 
145 bool PDFName::emit( EmitContext& rWriteContext ) const
146 {
147  if( ! rWriteContext.write( " /", 2 ) )
148  return false;
149  return rWriteContext.write( m_aName.getStr(), m_aName.getLength() );
150 }
151 
153 {
154  return new PDFName( m_aName );
155 }
156 
157 OUString PDFName::getFilteredName() const
158 {
159  OStringBuffer aFilter( m_aName.getLength() );
160  const char* pStr = m_aName.getStr();
161  unsigned int nLen = m_aName.getLength();
162  for( unsigned int i = 0; i < nLen; i++ )
163  {
164  if( (i < nLen - 3) && pStr[i] == '#' )
165  {
166  char rResult = 0;
167  i++;
168  if( pStr[i] >= '0' && pStr[i] <= '9' )
169  rResult = char( pStr[i]-'0' ) << 4;
170  else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
171  rResult = char( pStr[i]-'a' + 10 ) << 4;
172  else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
173  rResult = char( pStr[i]-'A' + 10 ) << 4;
174  i++;
175  if( pStr[i] >= '0' && pStr[i] <= '9' )
176  rResult |= char( pStr[i]-'0' );
177  else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
178  rResult |= char( pStr[i]-'a' + 10 );
179  else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
180  rResult |= char( pStr[i]-'A' + 10 );
181  aFilter.append( rResult );
182  }
183  else
184  aFilter.append( pStr[i] );
185  }
186  return OStringToOUString( aFilter.makeStringAndClear(), RTL_TEXTENCODING_UTF8 );
187 }
188 
190 {
191 }
192 
193 bool PDFString::emit( EmitContext& rWriteContext ) const
194 {
195  if( ! rWriteContext.write( " ", 1 ) )
196  return false;
197  EmitImplData* pEData = getEmitData( rWriteContext );
198  if( rWriteContext.m_bDecrypt && pEData && pEData->m_nDecryptObject )
199  {
200  OString aFiltered( getFilteredString() );
201  // decrypt inplace (evil since OString is supposed to be const
202  // however in this case we know that getFilteredString returned a singular string instance
203  pEData->decrypt( reinterpret_cast<sal_uInt8 const *>(aFiltered.getStr()), aFiltered.getLength(),
204  reinterpret_cast<sal_uInt8 *>(const_cast<char *>(aFiltered.getStr())),
205  pEData->m_nDecryptObject, pEData->m_nDecryptGeneration );
206  // check for string or hex string
207  const char* pStr = aFiltered.getStr();
208  if( aFiltered.getLength() > 1 &&
209  ( (static_cast<unsigned char>(pStr[0]) == 0xff && static_cast<unsigned char>(pStr[1]) == 0xfe) ||
210  (static_cast<unsigned char>(pStr[0]) == 0xfe && static_cast<unsigned char>(pStr[1]) == 0xff) ) )
211  {
212  static const char pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
213  '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
214  if( ! rWriteContext.write( "<", 1 ) )
215  return false;
216  for( sal_Int32 i = 0; i < aFiltered.getLength(); i++ )
217  {
218  if( ! rWriteContext.write( pHexTab + ((sal_uInt32(pStr[i]) >> 4) & 0x0f), 1 ) )
219  return false;
220  if( ! rWriteContext.write( pHexTab + (sal_uInt32(pStr[i]) & 0x0f), 1 ) )
221  return false;
222  }
223  if( ! rWriteContext.write( ">", 1 ) )
224  return false;
225  }
226  else
227  {
228  if( ! rWriteContext.write( "(", 1 ) )
229  return false;
230  if( ! rWriteContext.write( aFiltered.getStr(), aFiltered.getLength() ) )
231  return false;
232  if( ! rWriteContext.write( ")", 1 ) )
233  return false;
234  }
235  return true;
236  }
237  return rWriteContext.write( m_aString.getStr(), m_aString.getLength() );
238 }
239 
241 {
242  return new PDFString( m_aString );
243 }
244 
246 {
247  int nLen = m_aString.getLength();
248  OStringBuffer aBuf( nLen );
249 
250  const char* pStr = m_aString.getStr();
251  if( *pStr == '(' )
252  {
253  const char* pRun = pStr+1;
254  while( pRun - pStr < nLen-1 )
255  {
256  if( *pRun == '\\' )
257  {
258  pRun++;
259  if( pRun - pStr < nLen )
260  {
261  char aEsc = 0;
262  if( *pRun == 'n' )
263  aEsc = '\n';
264  else if( *pRun == 'r' )
265  aEsc = '\r';
266  else if( *pRun == 't' )
267  aEsc = '\t';
268  else if( *pRun == 'b' )
269  aEsc = '\b';
270  else if( *pRun == 'f' )
271  aEsc = '\f';
272  else if( *pRun == '(' )
273  aEsc = '(';
274  else if( *pRun == ')' )
275  aEsc = ')';
276  else if( *pRun == '\\' )
277  aEsc = '\\';
278  else if( *pRun == '\n' )
279  {
280  pRun++;
281  continue;
282  }
283  else if( *pRun == '\r' )
284  {
285  pRun++;
286  if( *pRun == '\n' )
287  pRun++;
288  continue;
289  }
290  else
291  {
292  int i = 0;
293  while( i++ < 3 && *pRun >= '0' && *pRun <= '7' )
294  aEsc = 8*aEsc + (*pRun++ - '0');
295  // move pointer back to last character of octal sequence
296  pRun--;
297  }
298  aBuf.append( aEsc );
299  }
300  }
301  else
302  aBuf.append( *pRun );
303  // move pointer to next character
304  pRun++;
305  }
306  }
307  else if( *pStr == '<' )
308  {
309  const char* pRun = pStr+1;
310  while( *pRun != '>' && pRun - pStr < nLen )
311  {
312  char rResult = 0;
313  if( *pRun >= '0' && *pRun <= '9' )
314  rResult = char( ( *pRun-'0' ) << 4 );
315  else if( *pRun >= 'a' && *pRun <= 'f' )
316  rResult = char( ( *pRun-'a' + 10 ) << 4 );
317  else if( *pRun >= 'A' && *pRun <= 'F' )
318  rResult = char( ( *pRun-'A' + 10 ) << 4 );
319  pRun++;
320  if( *pRun != '>' && pRun - pStr < nLen )
321  {
322  if( *pRun >= '0' && *pRun <= '9' )
323  rResult |= char( *pRun-'0' );
324  else if( *pRun >= 'a' && *pRun <= 'f' )
325  rResult |= char( *pRun-'a' + 10 );
326  else if( *pRun >= 'A' && *pRun <= 'F' )
327  rResult |= char( *pRun-'A' + 10 );
328  }
329  pRun++;
330  aBuf.append( rResult );
331  }
332  }
333 
334  return aBuf.makeStringAndClear();
335 }
336 
338 {
339 }
340 
341 bool PDFNumber::emit( EmitContext& rWriteContext ) const
342 {
343  OStringBuffer aBuf( 32 );
344  aBuf.append( ' ' );
345 
346  double fValue = m_fValue;
347  bool bNeg = false;
348  int nPrecision = 5;
349  if( fValue < 0.0 )
350  {
351  bNeg = true;
352  fValue=-fValue;
353  }
354 
355  sal_Int64 nInt = static_cast<sal_Int64>(fValue);
356  fValue -= static_cast<double>(nInt);
357  // optimizing hardware may lead to a value of 1.0 after the subtraction
358  if( fValue == 1.0 || log10( 1.0-fValue ) <= -nPrecision )
359  {
360  nInt++;
361  fValue = 0.0;
362  }
363  sal_Int64 nFrac = 0;
364  if( fValue )
365  {
366  fValue *= pow( 10.0, static_cast<double>(nPrecision) );
367  nFrac = static_cast<sal_Int64>(fValue);
368  }
369  if( bNeg && ( nInt || nFrac ) )
370  aBuf.append( '-' );
371  aBuf.append( nInt );
372  if( nFrac )
373  {
374  int i;
375  aBuf.append( '.' );
376  sal_Int64 nBound = static_cast<sal_Int64>(pow( 10.0, nPrecision - 1.0 )+0.5);
377  for ( i = 0; ( i < nPrecision ) && nFrac; i++ )
378  {
379  sal_Int64 nNumb = nFrac / nBound;
380  nFrac -= nNumb * nBound;
381  aBuf.append( nNumb );
382  nBound /= 10;
383  }
384  }
385 
386  return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
387 }
388 
390 {
391  return new PDFNumber( m_fValue );
392 }
393 
394 
396 {
397 }
398 
399 bool PDFBool::emit( EmitContext& rWriteContext ) const
400 {
401  return m_bValue ? rWriteContext.write( " true", 5 ) : rWriteContext.write( " false", 6 );
402 }
403 
405 {
406  return new PDFBool( m_bValue );
407 }
408 
410 {
411 }
412 
413 bool PDFNull::emit( EmitContext& rWriteContext ) const
414 {
415  return rWriteContext.write( " null", 5 );
416 }
417 
419 {
420  return new PDFNull();
421 }
422 
423 
425 {
426 }
427 
428 bool PDFObjectRef::emit( EmitContext& rWriteContext ) const
429 {
430  OString aBuf =
431  " " +
432  OString::number( sal_Int32( m_nNumber ) ) +
433  " " +
434  OString::number( sal_Int32( m_nGeneration ) ) +
435  " R";
436  return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
437 }
438 
440 {
441  return new PDFObjectRef( m_nNumber, m_nGeneration );
442 }
443 
445 {
446 }
447 
448 bool PDFContainer::emitSubElements( EmitContext& rWriteContext ) const
449 {
450  int nEle = m_aSubElements.size();
451  for( int i = 0; i < nEle; i++ )
452  {
453  if( rWriteContext.m_bDecrypt )
454  {
455  const PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
456  if (pName && pName->m_aName == "Encrypt")
457  {
458  i++;
459  continue;
460  }
461  }
462  if( ! m_aSubElements[i]->emit( rWriteContext ) )
463  return false;
464  }
465  return true;
466 }
467 
468 void PDFContainer::cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const
469 {
470  int nEle = m_aSubElements.size();
471  for( int i = 0; i < nEle; i++ )
472  rNewSubElements.emplace_back( m_aSubElements[i]->clone() );
473 }
474 
475 PDFObject* PDFContainer::findObject( unsigned int nNumber, unsigned int nGeneration ) const
476 {
477  unsigned int nEle = m_aSubElements.size();
478  for( unsigned int i = 0; i < nEle; i++ )
479  {
480  PDFObject* pObject = dynamic_cast<PDFObject*>(m_aSubElements[i].get());
481  if( pObject &&
482  pObject->m_nNumber == nNumber &&
483  pObject->m_nGeneration == nGeneration )
484  {
485  return pObject;
486  }
487  }
488  return nullptr;
489 }
490 
492 {
493 }
494 
495 bool PDFArray::emit( EmitContext& rWriteContext ) const
496 {
497  if( ! rWriteContext.write( "[", 1 ) )
498  return false;
499  if( ! emitSubElements( rWriteContext ) )
500  return false;
501  return rWriteContext.write( "]", 1 );
502 }
503 
505 {
506  PDFArray* pNewAr = new PDFArray();
507  cloneSubElements( pNewAr->m_aSubElements );
508  return pNewAr;
509 }
510 
512 {
513 }
514 
515 bool PDFDict::emit( EmitContext& rWriteContext ) const
516 {
517  if( ! rWriteContext.write( "<<\n", 3 ) )
518  return false;
519  if( ! emitSubElements( rWriteContext ) )
520  return false;
521  return rWriteContext.write( "\n>>\n", 4 );
522 }
523 
524 void PDFDict::insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue )
525 {
526  if( ! pValue )
527  eraseValue( rName );
528 
529  PDFEntry* pValueTmp = nullptr;
530  std::unordered_map<OString,PDFEntry*>::iterator it = m_aMap.find( rName );
531  if( it == m_aMap.end() )
532  {
533  // new name/value, pair, append it
534  m_aSubElements.emplace_back(std::make_unique<PDFName>(rName));
535  m_aSubElements.emplace_back( std::move(pValue) );
536  pValueTmp = m_aSubElements.back().get();
537  }
538  else
539  {
540  unsigned int nSub = m_aSubElements.size();
541  for( unsigned int i = 0; i < nSub; i++ )
542  if( m_aSubElements[i].get() == it->second )
543  {
544  m_aSubElements[i] = std::move(pValue);
545  pValueTmp = m_aSubElements[i].get();
546  break;
547  }
548  }
549  assert(pValueTmp);
550  m_aMap[ rName ] = pValueTmp;
551 }
552 
553 void PDFDict::eraseValue( std::string_view rName )
554 {
555  unsigned int nEle = m_aSubElements.size();
556  for( unsigned int i = 0; i < nEle; i++ )
557  {
558  PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
559  if( pName && pName->m_aName == rName )
560  {
561  for( unsigned int j = i+1; j < nEle; j++ )
562  {
563  if( dynamic_cast<PDFComment*>(m_aSubElements[j].get()) == nullptr )
564  {
565  // remove and free subelements from vector
566  m_aSubElements.erase( m_aSubElements.begin()+j );
567  m_aSubElements.erase( m_aSubElements.begin()+i );
568  buildMap();
569  return;
570  }
571  }
572  }
573  }
574 }
575 
577 {
578  // clear map
579  m_aMap.clear();
580  // build map
581  unsigned int nEle = m_aSubElements.size();
582  PDFName* pName = nullptr;
583  for( unsigned int i = 0; i < nEle; i++ )
584  {
585  if( dynamic_cast<PDFComment*>(m_aSubElements[i].get()) == nullptr )
586  {
587  if( pName )
588  {
589  m_aMap[ pName->m_aName ] = m_aSubElements[i].get();
590  pName = nullptr;
591  }
592  else if( (pName = dynamic_cast<PDFName*>(m_aSubElements[i].get())) == nullptr )
593  return m_aSubElements[i].get();
594  }
595  }
596  return pName;
597 }
598 
600 {
601  PDFDict* pNewDict = new PDFDict();
602  cloneSubElements( pNewDict->m_aSubElements );
603  pNewDict->buildMap();
604  return pNewDict;
605 }
606 
608 {
609 }
610 
611 bool PDFStream::emit( EmitContext& rWriteContext ) const
612 {
613  return rWriteContext.copyOrigBytes( m_nBeginOffset, m_nEndOffset-m_nBeginOffset );
614 }
615 
617 {
618  return new PDFStream( m_nBeginOffset, m_nEndOffset, nullptr );
619 }
620 
621 unsigned int PDFStream::getDictLength( const PDFContainer* pContainer ) const
622 {
623  if( ! m_pDict )
624  return 0;
625  // find /Length entry, can either be a direct or indirect number object
626  std::unordered_map<OString,PDFEntry*>::const_iterator it =
627  m_pDict->m_aMap.find( "Length" );
628  if( it == m_pDict->m_aMap.end() )
629  return 0;
630  PDFNumber* pNum = dynamic_cast<PDFNumber*>(it->second);
631  if( ! pNum && pContainer )
632  {
633  PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(it->second);
634  if( pRef )
635  {
636  int nEle = pContainer->m_aSubElements.size();
637  for (int i = 0; i < nEle; i++)
638  {
639  PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer->m_aSubElements[i].get());
640  if( pObj &&
641  pObj->m_nNumber == pRef->m_nNumber &&
642  pObj->m_nGeneration == pRef->m_nGeneration )
643  {
644  if( pObj->m_pObject )
645  pNum = dynamic_cast<PDFNumber*>(pObj->m_pObject);
646  break;
647  }
648  }
649  }
650  }
651  return pNum ? static_cast<unsigned int>(pNum->m_fValue) : 0;
652 }
653 
655 {
656 }
657 
658 bool PDFObject::getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const
659 {
660  bool bIsDeflated = false;
661  if( m_pStream && m_pStream->m_pDict &&
663  )
664  {
665  unsigned int nOuterStreamLen = m_pStream->m_nEndOffset - m_pStream->m_nBeginOffset;
666  rpStream.reset(new char[ nOuterStreamLen ]);
667  unsigned int nRead = rContext.readOrigBytes( m_pStream->m_nBeginOffset, nOuterStreamLen, rpStream.get() );
668  if( nRead != nOuterStreamLen )
669  {
670  rpStream.reset();
671  *pBytes = 0;
672  return false;
673  }
674  // is there a filter entry ?
675  std::unordered_map<OString,PDFEntry*>::const_iterator it =
676  m_pStream->m_pDict->m_aMap.find( "Filter" );
677  if( it != m_pStream->m_pDict->m_aMap.end() )
678  {
679  PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
680  if( ! pFilter )
681  {
682  PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
683  if( pArray && ! pArray->m_aSubElements.empty() )
684  {
685  pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
686  }
687  }
688 
689  // is the (first) filter FlateDecode ?
690  if (pFilter && pFilter->m_aName == "FlateDecode")
691  {
692  bIsDeflated = true;
693  }
694  }
695  // prepare compressed data section
696  char* pStream = rpStream.get();
697  if( pStream[0] == 's' )
698  pStream += 6; // skip "stream"
699  // skip line end after "stream"
700  while( *pStream == '\r' || *pStream == '\n' )
701  pStream++;
702  // get the compressed length
703  *pBytes = m_pStream->getDictLength( pObjectContainer );
704  if( pStream != rpStream.get() )
705  memmove( rpStream.get(), pStream, *pBytes );
706  if( rContext.m_bDecrypt )
707  {
708  EmitImplData* pEData = getEmitData( rContext );
709  pEData->decrypt( reinterpret_cast<const sal_uInt8*>(rpStream.get()),
710  *pBytes,
711  reinterpret_cast<sal_uInt8*>(rpStream.get()),
712  m_nNumber,
714  ); // decrypt inplace
715  }
716  }
717  else
718  {
719  *pBytes = 0;
720  }
721  return bIsDeflated;
722 }
723 
724 static void unzipToBuffer( char* pBegin, unsigned int nLen,
725  sal_uInt8** pOutBuf, sal_uInt32* pOutLen )
726 {
727  z_stream aZStr;
728  aZStr.next_in = reinterpret_cast<Bytef *>(pBegin);
729  aZStr.avail_in = nLen;
730  aZStr.total_out = aZStr.total_in = 0;
731  aZStr.zalloc = nullptr;
732  aZStr.zfree = nullptr;
733  aZStr.opaque = nullptr;
734 
735  int err = inflateInit(&aZStr);
736 
737  const unsigned int buf_increment_size = 16384;
738 
739  if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, buf_increment_size)))
740  {
741  *pOutBuf = p;
742  aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf);
743  aZStr.avail_out = buf_increment_size;
744  *pOutLen = buf_increment_size;
745  }
746  else
747  err = Z_MEM_ERROR;
748  while( err != Z_STREAM_END && err >= Z_OK && aZStr.avail_in )
749  {
750  err = inflate( &aZStr, Z_NO_FLUSH );
751  if( aZStr.avail_out == 0 )
752  {
753  if( err != Z_STREAM_END )
754  {
755  const int nNewAlloc = *pOutLen + buf_increment_size;
756  if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, nNewAlloc)))
757  {
758  *pOutBuf = p;
759  aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf + *pOutLen);
760  aZStr.avail_out = buf_increment_size;
761  *pOutLen = nNewAlloc;
762  }
763  else
764  err = Z_MEM_ERROR;
765  }
766  }
767  }
768  if( err == Z_STREAM_END )
769  {
770  if( aZStr.avail_out > 0 )
771  *pOutLen -= aZStr.avail_out;
772  }
773  inflateEnd(&aZStr);
774  if( err < Z_OK )
775  {
776  std::free( *pOutBuf );
777  *pOutBuf = nullptr;
778  *pOutLen = 0;
779  }
780 }
781 
782 void PDFObject::writeStream( EmitContext& rWriteContext, const PDFFile* pParsedFile ) const
783 {
784  if( !m_pStream )
785  return;
786 
787  std::unique_ptr<char[]> pStream;
788  unsigned int nBytes = 0;
789  if( getDeflatedStream( pStream, &nBytes, pParsedFile, rWriteContext ) && nBytes && rWriteContext.m_bDeflate )
790  {
791  sal_uInt8* pOutBytes = nullptr;
792  sal_uInt32 nOutBytes = 0;
793  unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
794  rWriteContext.write( pOutBytes, nOutBytes );
795  std::free( pOutBytes );
796  }
797  else if( pStream && nBytes )
798  rWriteContext.write( pStream.get(), nBytes );
799 }
800 
801 bool PDFObject::emit( EmitContext& rWriteContext ) const
802 {
803  if( ! rWriteContext.write( "\n", 1 ) )
804  return false;
805 
806  EmitImplData* pEData = getEmitData( rWriteContext );
807  if( pEData )
808  pEData->insertXref( m_nNumber, m_nGeneration, rWriteContext.getCurPos() );
809 
810  OString aBuf =
811  OString::number( sal_Int32( m_nNumber ) ) +
812  " " +
813  OString::number( sal_Int32( m_nGeneration ) ) +
814  " obj\n";
815  if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
816  return false;
817 
818  if( pEData )
820  if( (rWriteContext.m_bDeflate || rWriteContext.m_bDecrypt) && pEData )
821  {
822  std::unique_ptr<char[]> pStream;
823  unsigned int nBytes = 0;
824  bool bDeflate = getDeflatedStream( pStream, &nBytes, pEData->m_pObjectContainer, rWriteContext );
825  if( pStream && nBytes )
826  {
827  // unzip the stream
828  sal_uInt8* pOutBytes = nullptr;
829  sal_uInt32 nOutBytes = 0;
830  if( bDeflate && rWriteContext.m_bDeflate )
831  unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
832  else
833  {
834  // nothing to deflate, but decryption has happened
835  pOutBytes = reinterpret_cast<sal_uInt8*>(pStream.get());
836  nOutBytes = static_cast<sal_uInt32>(nBytes);
837  }
838 
839  if( nOutBytes )
840  {
841  // clone this object
842  std::unique_ptr<PDFObject> pClone(static_cast<PDFObject*>(clone()));
843  // set length in the dictionary to new stream length
844  std::unique_ptr<PDFNumber> pNewLen(new PDFNumber( double(nOutBytes) ));
845  pClone->m_pStream->m_pDict->insertValue( "Length", std::move(pNewLen) );
846 
847  if( bDeflate && rWriteContext.m_bDeflate )
848  {
849  // delete flatedecode filter
850  std::unordered_map<OString,PDFEntry*>::const_iterator it =
851  pClone->m_pStream->m_pDict->m_aMap.find( "Filter" );
852  if( it != pClone->m_pStream->m_pDict->m_aMap.end() )
853  {
854  PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
855  if (pFilter && pFilter->m_aName == "FlateDecode")
856  pClone->m_pStream->m_pDict->eraseValue( "Filter" );
857  else
858  {
859  PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
860  if( pArray && ! pArray->m_aSubElements.empty() )
861  {
862  pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
863  if (pFilter && pFilter->m_aName == "FlateDecode")
864  {
865  pArray->m_aSubElements.erase( pArray->m_aSubElements.begin() );
866  }
867  }
868  }
869  }
870  }
871 
872  // write sub elements except stream
873  bool bRet = true;
874  unsigned int nEle = pClone->m_aSubElements.size();
875  for( unsigned int i = 0; i < nEle && bRet; i++ )
876  {
877  if( pClone->m_aSubElements[i].get() != pClone->m_pStream )
878  bRet = pClone->m_aSubElements[i]->emit( rWriteContext );
879  }
880  pClone.reset();
881  // write stream
882  if( bRet )
883  bRet = rWriteContext.write("stream\n", 7)
884  && rWriteContext.write(pOutBytes, nOutBytes)
885  && rWriteContext.write("\nendstream\nendobj\n", 18);
886  if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
887  std::free( pOutBytes );
888  pEData->setDecryptObject( 0, 0 );
889  return bRet;
890  }
891  if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
892  std::free( pOutBytes );
893  }
894  }
895 
896  bool bRet = emitSubElements( rWriteContext ) &&
897  rWriteContext.write( "\nendobj\n", 8 );
898  if( pEData )
899  pEData->setDecryptObject( 0, 0 );
900  return bRet;
901 }
902 
904 {
905  PDFObject* pNewOb = new PDFObject( m_nNumber, m_nGeneration );
906  cloneSubElements( pNewOb->m_aSubElements );
907  unsigned int nEle = m_aSubElements.size();
908  for( unsigned int i = 0; i < nEle; i++ )
909  {
910  if( m_aSubElements[i].get() == m_pObject )
911  pNewOb->m_pObject = pNewOb->m_aSubElements[i].get();
912  else if( m_aSubElements[i].get() == m_pStream && pNewOb->m_pObject )
913  {
914  pNewOb->m_pStream = dynamic_cast<PDFStream*>(pNewOb->m_aSubElements[i].get());
915  PDFDict* pNewDict = dynamic_cast<PDFDict*>(pNewOb->m_pObject);
916  if (pNewDict && pNewOb->m_pStream)
917  pNewOb->m_pStream->m_pDict = pNewDict;
918  }
919  }
920  return pNewOb;
921 }
922 
924 {
925 }
926 
927 bool PDFTrailer::emit( EmitContext& rWriteContext ) const
928 {
929  // get xref offset
930  unsigned int nXRefPos = rWriteContext.getCurPos();
931  // begin xref section, object 0 is always free
932  if( ! rWriteContext.write( "xref\r\n"
933  "0 1\r\n"
934  "0000000000 65535 f\r\n", 31 ) )
935  return false;
936  // check if we are emitting a complete PDF file
937  EmitImplData* pEData = getEmitData( rWriteContext );
938  if( pEData )
939  {
940  // emit object xrefs
941  const EmitImplData::XRefTable& rXRefs = pEData->m_aXRefTable;
942  EmitImplData::XRefTable::const_iterator section_begin, section_end;
943  section_begin = rXRefs.begin();
944  while( section_begin != rXRefs.end() )
945  {
946  // find end of continuous object numbers
947  section_end = section_begin;
948  unsigned int nLast = section_begin->first;
949  while( (++section_end) != rXRefs.end() &&
950  section_end->first == nLast+1 )
951  nLast = section_end->first;
952  // write first object number and number of following entries
953  OStringBuffer aBuf( 21 );
954  aBuf.append( sal_Int32( section_begin->first ) );
955  aBuf.append( ' ' );
956  aBuf.append( sal_Int32(nLast - section_begin->first + 1) );
957  aBuf.append( "\r\n" );
958  if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
959  return false;
960  while( section_begin != section_end )
961  {
962  // write 20 char entry of form
963  // 0000offset 00gen n\r\n
964  aBuf.setLength( 0 );
965  OString aOffset( OString::number( section_begin->second.second ) );
966  int nPad = 10 - aOffset.getLength();
967  for( int i = 0; i < nPad; i++ )
968  aBuf.append( '0' );
969  aBuf.append( aOffset );
970  aBuf.append( ' ' );
971  OString aGeneration( OString::number( section_begin->second.first ) );
972  nPad = 5 - aGeneration.getLength();
973  for( int i = 0; i < nPad; i++ )
974  aBuf.append( '0' );
975  aBuf.append( aGeneration );
976  aBuf.append( " n\r\n" );
977  if( ! rWriteContext.write( aBuf.getStr(), 20 ) )
978  return false;
979  ++section_begin;
980  }
981  }
982  }
983  if( ! rWriteContext.write( "trailer\n", 8 ) )
984  return false;
985  if( ! emitSubElements( rWriteContext ) )
986  return false;
987  if( ! rWriteContext.write( "startxref\n", 10 ) )
988  return false;
989  OString aOffset( OString::number( nXRefPos ) );
990  if( ! rWriteContext.write( aOffset.getStr(), aOffset.getLength() ) )
991  return false;
992  return rWriteContext.write( "\n%%EOF\n", 7 );
993 }
994 
996 {
997  PDFTrailer* pNewTr = new PDFTrailer();
998  cloneSubElements( pNewTr->m_aSubElements );
999  unsigned int nEle = m_aSubElements.size();
1000  for( unsigned int i = 0; i < nEle; i++ )
1001  {
1002  if( m_aSubElements[i].get() == m_pDict )
1003  {
1004  pNewTr->m_pDict = dynamic_cast<PDFDict*>(pNewTr->m_aSubElements[i].get());
1005  break;
1006  }
1007  }
1008  return pNewTr;
1009 }
1010 
1011 #define ENCRYPTION_KEY_LEN 16
1012 #define ENCRYPTION_BUF_LEN 32
1013 
1014 namespace pdfparse {
1016 {
1019  sal_uInt32 m_nAlgoVersion;
1021  sal_uInt32 m_nKeyLength;
1024  sal_uInt32 m_nPEntry;
1025  OString m_aDocID;
1026  rtlCipher m_aCipher;
1027 
1028  sal_uInt8 m_aDecryptionKey[ENCRYPTION_KEY_LEN+5] = {}; // maximum handled key length
1029 
1031  m_bIsEncrypted( false ),
1032  m_bStandardHandler( false ),
1033  m_nAlgoVersion( 0 ),
1034  m_nStandardRevision( 0 ),
1035  m_nKeyLength( 0 ),
1036  m_nPEntry( 0 ),
1037  m_aCipher( nullptr )
1038  {
1039  }
1040 
1042  {
1043  if( m_aCipher )
1044  rtl_cipher_destroyARCFOUR( m_aCipher );
1045  }
1046 };
1047 }
1048 
1050  : m_nMajor( 0 ), m_nMinor( 0 )
1051 {
1052 }
1053 
1055 {
1056 }
1057 
1059 {
1060  return impl_getData()->m_bIsEncrypted;
1061 }
1062 
1063 bool PDFFile::decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
1064  unsigned int nObject, unsigned int nGeneration ) const
1065 {
1066  if( ! isEncrypted() )
1067  return false;
1068 
1069  if( ! m_pData->m_aCipher )
1070  m_pData->m_aCipher = rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream );
1071 
1072  // modify encryption key
1073  sal_uInt32 i = m_pData->m_nKeyLength;
1074  m_pData->m_aDecryptionKey[i++] = sal_uInt8(nObject&0xff);
1075  m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>8)&0xff);
1076  m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>16)&0xff);
1077  m_pData->m_aDecryptionKey[i++] = sal_uInt8(nGeneration&0xff);
1078  m_pData->m_aDecryptionKey[i++] = sal_uInt8((nGeneration>>8)&0xff);
1079 
1080  ::std::vector<unsigned char> const aSum(::comphelper::Hash::calculateHash(
1081  m_pData->m_aDecryptionKey, i, ::comphelper::HashType::MD5));
1082 
1083  if( i > 16 )
1084  i = 16;
1085 
1086  rtlCipherError aErr = rtl_cipher_initARCFOUR( m_pData->m_aCipher,
1087  rtl_Cipher_DirectionDecode,
1088  aSum.data(), i,
1089  nullptr, 0 );
1090  if( aErr == rtl_Cipher_E_None )
1091  aErr = rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1092  pInBuffer, nLen,
1093  pOutBuffer, nLen );
1094  return aErr == rtl_Cipher_E_None;
1095 }
1096 
1098 {
1099  0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
1100  0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
1101 };
1102 
1103 static void pad_or_truncate_to_32( const OString& rStr, char* pBuffer )
1104 {
1105  int nLen = rStr.getLength();
1106  if( nLen > 32 )
1107  nLen = 32;
1108  const char* pStr = rStr.getStr();
1109  memcpy( pBuffer, pStr, nLen );
1110  int i = 0;
1111  while( nLen < 32 )
1112  pBuffer[nLen++] = nPadString[i++];
1113 }
1114 
1115 // pass at least pData->m_nKeyLength bytes in
1116 static sal_uInt32 password_to_key( const OString& rPwd, sal_uInt8* pOutKey, PDFFileImplData const * pData, bool bComputeO )
1117 {
1118  // see PDF reference 1.4 Algorithm 3.2
1119  // encrypt pad string
1120  char aPadPwd[ENCRYPTION_BUF_LEN];
1121  pad_or_truncate_to_32( rPwd, aPadPwd );
1123  aDigest.update(reinterpret_cast<unsigned char const*>(aPadPwd), sizeof(aPadPwd));
1124  if( ! bComputeO )
1125  {
1126  aDigest.update(pData->m_aOEntry, 32);
1127  sal_uInt8 aPEntry[4];
1128  aPEntry[0] = static_cast<sal_uInt8>(pData->m_nPEntry & 0xff);
1129  aPEntry[1] = static_cast<sal_uInt8>((pData->m_nPEntry >> 8 ) & 0xff);
1130  aPEntry[2] = static_cast<sal_uInt8>((pData->m_nPEntry >> 16) & 0xff);
1131  aPEntry[3] = static_cast<sal_uInt8>((pData->m_nPEntry >> 24) & 0xff);
1132  aDigest.update(aPEntry, sizeof(aPEntry));
1133  aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1134  }
1135  ::std::vector<unsigned char> nSum(aDigest.finalize());
1136  if( pData->m_nStandardRevision == 3 )
1137  {
1138  for( int i = 0; i < 50; i++ )
1139  {
1140  nSum = ::comphelper::Hash::calculateHash(nSum.data(), nSum.size(),
1141  ::comphelper::HashType::MD5);
1142  }
1143  }
1144  sal_uInt32 nLen = pData->m_nKeyLength;
1145  if( nLen > RTL_DIGEST_LENGTH_MD5 )
1146  nLen = RTL_DIGEST_LENGTH_MD5;
1147  memcpy( pOutKey, nSum.data(), nLen );
1148  return nLen;
1149 }
1150 
1151 static bool check_user_password( const OString& rPwd, PDFFileImplData* pData )
1152 {
1153  // see PDF reference 1.4 Algorithm 3.6
1154  bool bValid = false;
1156  sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, pData, false );
1157  // save (at this time potential) decryption key for later use
1158  memcpy( pData->m_aDecryptionKey, aKey, nKeyLen );
1159  if( pData->m_nStandardRevision == 2 )
1160  {
1161  sal_uInt8 nEncryptedEntry[ENCRYPTION_BUF_LEN] = {};
1162  // see PDF reference 1.4 Algorithm 3.4
1163  // encrypt pad string
1164  if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1165  aKey, nKeyLen,
1166  nullptr, 0 )
1167  != rtl_Cipher_E_None)
1168  {
1169  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1170  }
1171  rtl_cipher_encodeARCFOUR( pData->m_aCipher, nPadString, sizeof( nPadString ),
1172  nEncryptedEntry, sizeof( nEncryptedEntry ) );
1173  bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 32 ) == 0);
1174  }
1175  else if( pData->m_nStandardRevision == 3 )
1176  {
1177  // see PDF reference 1.4 Algorithm 3.5
1179  aDigest.update(nPadString, sizeof(nPadString));
1180  aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1181  ::std::vector<unsigned char> nEncryptedEntry(aDigest.finalize());
1182  if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1183  aKey, sizeof(aKey), nullptr, 0 )
1184  != rtl_Cipher_E_None)
1185  {
1186  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1187  }
1188  rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1189  nEncryptedEntry.data(), 16,
1190  nEncryptedEntry.data(), 16 ); // encrypt in place
1191  for( int i = 1; i <= 19; i++ ) // do it 19 times, start with 1
1192  {
1193  sal_uInt8 aTempKey[ENCRYPTION_KEY_LEN];
1194  for( size_t j = 0; j < sizeof(aTempKey); j++ )
1195  aTempKey[j] = static_cast<sal_uInt8>( aKey[j] ^ i );
1196 
1197  if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1198  aTempKey, sizeof(aTempKey), nullptr, 0 )
1199  != rtl_Cipher_E_None)
1200  {
1201  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1202  }
1203  rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1204  nEncryptedEntry.data(), 16,
1205  nEncryptedEntry.data(), 16 ); // encrypt in place
1206  }
1207  bValid = (memcmp( nEncryptedEntry.data(), pData->m_aUEntry, 16 ) == 0);
1208  }
1209  return bValid;
1210 }
1211 
1213 {
1214  return m_pData->m_bStandardHandler &&
1215  m_pData->m_nAlgoVersion >= 1 &&
1216  m_pData->m_nAlgoVersion <= 2 &&
1217  m_pData->m_nStandardRevision >= 2 &&
1218  m_pData->m_nStandardRevision <= 3;
1219 }
1220 
1221 bool PDFFile::setupDecryptionData( const OString& rPwd ) const
1222 {
1223  if( !impl_getData()->m_bIsEncrypted )
1224  return rPwd.isEmpty();
1225 
1226  // check if we can handle this encryption at all
1228  return false;
1229 
1230  if( ! m_pData->m_aCipher )
1231  m_pData->m_aCipher = rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream);
1232 
1233  // first try user password
1234  bool bValid = check_user_password( rPwd, m_pData.get() );
1235 
1236  if( ! bValid )
1237  {
1238  // try owner password
1239  // see PDF reference 1.4 Algorithm 3.7
1241  sal_uInt8 nPwd[ENCRYPTION_BUF_LEN] = {};
1242  sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, m_pData.get(), true );
1243  if( m_pData->m_nStandardRevision == 2 )
1244  {
1245  if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1246  aKey, nKeyLen, nullptr, 0 )
1247  != rtl_Cipher_E_None)
1248  {
1249  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1250  }
1251  rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1252  m_pData->m_aOEntry, 32,
1253  nPwd, 32 );
1254  }
1255  else if( m_pData->m_nStandardRevision == 3 )
1256  {
1257  memcpy( nPwd, m_pData->m_aOEntry, 32 );
1258  for( int i = 19; i >= 0; i-- )
1259  {
1260  sal_uInt8 nTempKey[ENCRYPTION_KEY_LEN];
1261  for( size_t j = 0; j < sizeof(nTempKey); j++ )
1262  nTempKey[j] = sal_uInt8(aKey[j] ^ i);
1263  if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1264  nTempKey, nKeyLen, nullptr, 0 )
1265  != rtl_Cipher_E_None)
1266  {
1267  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1268  }
1269  rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1270  nPwd, 32,
1271  nPwd, 32 ); // decrypt inplace
1272  }
1273  }
1274  bValid = check_user_password( OString( reinterpret_cast<char*>(nPwd), 32 ), m_pData.get() );
1275  }
1276 
1277  return bValid;
1278 }
1279 
1281 {
1282  if( m_pData )
1283  return m_pData.get();
1284  m_pData.reset( new PDFFileImplData );
1285  // check for encryption dict in a trailer
1286  unsigned int nElements = m_aSubElements.size();
1287  while( nElements-- > 0 )
1288  {
1289  PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(m_aSubElements[nElements].get());
1290  if( pTrailer && pTrailer->m_pDict )
1291  {
1292  // search doc id
1293  PDFDict::Map::iterator doc_id = pTrailer->m_pDict->m_aMap.find( "ID" );
1294  if( doc_id != pTrailer->m_pDict->m_aMap.end() )
1295  {
1296  PDFArray* pArr = dynamic_cast<PDFArray*>(doc_id->second);
1297  if( pArr && !pArr->m_aSubElements.empty() )
1298  {
1299  PDFString* pStr = dynamic_cast<PDFString*>(pArr->m_aSubElements[0].get());
1300  if( pStr )
1301  m_pData->m_aDocID = pStr->getFilteredString();
1302 #if OSL_DEBUG_LEVEL > 0
1303  OUStringBuffer aTmp;
1304  for( int i = 0; i < m_pData->m_aDocID.getLength(); i++ )
1305  aTmp.append(static_cast<sal_Int32>(sal_uInt8(m_pData->m_aDocID[i])), 16);
1306  SAL_INFO("sdext.pdfimport.pdfparse", "DocId is <" << aTmp.makeStringAndClear() << ">");
1307 #endif
1308  }
1309  }
1310  // search Encrypt entry
1311  PDFDict::Map::iterator enc =
1312  pTrailer->m_pDict->m_aMap.find( "Encrypt" );
1313  if( enc != pTrailer->m_pDict->m_aMap.end() )
1314  {
1315  PDFDict* pDict = dynamic_cast<PDFDict*>(enc->second);
1316  if( ! pDict )
1317  {
1318  PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(enc->second);
1319  if( pRef )
1320  {
1321  PDFObject* pObj = findObject( pRef );
1322  if( pObj && pObj->m_pObject )
1323  pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
1324  }
1325  }
1326  if( pDict )
1327  {
1328  PDFDict::Map::iterator filter = pDict->m_aMap.find( "Filter" );
1329  PDFDict::Map::iterator version = pDict->m_aMap.find( "V" );
1330  PDFDict::Map::iterator len = pDict->m_aMap.find( "Length" );
1331  PDFDict::Map::iterator o_ent = pDict->m_aMap.find( "O" );
1332  PDFDict::Map::iterator u_ent = pDict->m_aMap.find( "U" );
1333  PDFDict::Map::iterator r_ent = pDict->m_aMap.find( "R" );
1334  PDFDict::Map::iterator p_ent = pDict->m_aMap.find( "P" );
1335  if( filter != pDict->m_aMap.end() )
1336  {
1337  m_pData->m_bIsEncrypted = true;
1338  m_pData->m_nKeyLength = 5;
1339  if( version != pDict->m_aMap.end() )
1340  {
1341  PDFNumber* pNum = dynamic_cast<PDFNumber*>(version->second);
1342  if( pNum )
1343  m_pData->m_nAlgoVersion = static_cast<sal_uInt32>(pNum->m_fValue);
1344  }
1345  if( m_pData->m_nAlgoVersion >= 3 )
1346  m_pData->m_nKeyLength = 16;
1347  if( len != pDict->m_aMap.end() )
1348  {
1349  PDFNumber* pNum = dynamic_cast<PDFNumber*>(len->second);
1350  if( pNum )
1351  m_pData->m_nKeyLength = static_cast<sal_uInt32>(pNum->m_fValue) / 8;
1352  }
1353  PDFName* pFilter = dynamic_cast<PDFName*>(filter->second);
1354  if( pFilter && pFilter->getFilteredName() == "Standard" )
1355  m_pData->m_bStandardHandler = true;
1356  if( o_ent != pDict->m_aMap.end() )
1357  {
1358  PDFString* pString = dynamic_cast<PDFString*>(o_ent->second);
1359  if( pString )
1360  {
1361  OString aEnt = pString->getFilteredString();
1362  if( aEnt.getLength() == 32 )
1363  memcpy( m_pData->m_aOEntry, aEnt.getStr(), 32 );
1364 #if OSL_DEBUG_LEVEL > 0
1365  else
1366  {
1367  OUStringBuffer aTmp;
1368  for( int i = 0; i < aEnt.getLength(); i++ )
1369  aTmp.append(" " + OUString::number(sal_uInt8(aEnt[i]), 16));
1370  SAL_WARN("sdext.pdfimport.pdfparse",
1371  "O entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1372  }
1373 #endif
1374  }
1375  }
1376  if( u_ent != pDict->m_aMap.end() )
1377  {
1378  PDFString* pString = dynamic_cast<PDFString*>(u_ent->second);
1379  if( pString )
1380  {
1381  OString aEnt = pString->getFilteredString();
1382  if( aEnt.getLength() == 32 )
1383  memcpy( m_pData->m_aUEntry, aEnt.getStr(), 32 );
1384 #if OSL_DEBUG_LEVEL > 0
1385  else
1386  {
1387  OUStringBuffer aTmp;
1388  for( int i = 0; i < aEnt.getLength(); i++ )
1389  aTmp.append(" " + OUString::number(sal_uInt8(aEnt[i]), 16));
1390  SAL_WARN("sdext.pdfimport.pdfparse",
1391  "U entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1392  }
1393 #endif
1394  }
1395  }
1396  if( r_ent != pDict->m_aMap.end() )
1397  {
1398  PDFNumber* pNum = dynamic_cast<PDFNumber*>(r_ent->second);
1399  if( pNum )
1400  m_pData->m_nStandardRevision = static_cast<sal_uInt32>(pNum->m_fValue);
1401  }
1402  if( p_ent != pDict->m_aMap.end() )
1403  {
1404  PDFNumber* pNum = dynamic_cast<PDFNumber*>(p_ent->second);
1405  if( pNum )
1406  m_pData->m_nPEntry = static_cast<sal_uInt32>(static_cast<sal_Int32>(pNum->m_fValue));
1407  SAL_INFO("sdext.pdfimport.pdfparse", "p entry is " << m_pData->m_nPEntry );
1408  }
1409 
1410  SAL_INFO("sdext.pdfimport.pdfparse", "Encryption dict: sec handler: " << (pFilter ? pFilter->getFilteredName() : OUString("<unknown>")) << ", version = " << static_cast<int>(m_pData->m_nAlgoVersion) << ", revision = " << static_cast<int>(m_pData->m_nStandardRevision) << ", key length = " << m_pData->m_nKeyLength );
1411  break;
1412  }
1413  }
1414  }
1415  }
1416  }
1417 
1418  return m_pData.get();
1419 }
1420 
1421 bool PDFFile::emit( EmitContext& rWriteContext ) const
1422 {
1423  setEmitData( rWriteContext, new EmitImplData( this ) );
1424 
1425  OString aBuf =
1426  "%PDF-" +
1427  OString::number( sal_Int32( m_nMajor ) ) +
1428  "." +
1429  OString::number( sal_Int32( m_nMinor ) ) +
1430  "\n";
1431  if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
1432  return false;
1433  return emitSubElements( rWriteContext );
1434 }
1435 
1437 {
1438  PDFFile* pNewFl = new PDFFile();
1439  pNewFl->m_nMajor = m_nMajor;
1440  pNewFl->m_nMinor = m_nMinor;
1441  cloneSubElements( pNewFl->m_aSubElements );
1442  return pNewFl;
1443 }
1444 
1446 {
1447 }
1448 
1449 bool PDFPart::emit( EmitContext& rWriteContext ) const
1450 {
1451  return emitSubElements( rWriteContext );
1452 }
1453 
1455 {
1456  PDFPart* pNewPt = new PDFPart();
1457  cloneSubElements( pNewPt->m_aSubElements );
1458  return pNewPt;
1459 }
1460 
1461 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:927
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:240
void eraseValue(std::string_view rName)
Definition: pdfentries.cxx:553
void setDecryptObject(unsigned int nObject, unsigned int nGeneration)
Definition: pdfentries.cxx:84
virtual ~PDFNumber() override
Definition: pdfentries.cxx:337
virtual ~PDFComment() override
Definition: pdfentries.cxx:127
#define ENCRYPTION_BUF_LEN
const sal_uInt8 nPadString[32]
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:399
virtual ~PDFFile() override
std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable
Definition: pdfentries.cxx:46
OString getFilteredString() const
Definition: pdfentries.cxx:245
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:341
PDFComment(const OString &rComment)
Definition: pdfparse.hxx:75
virtual bool write(const void *pBuf, unsigned int nLen)=0
std::unique_ptr< PDFFileImplData > m_pData
Definition: pdfparse.hxx:232
unsigned int m_nBeginOffset
Definition: pdfparse.hxx:205
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:389
virtual ~PDFPart() override
static void pad_or_truncate_to_32(const OString &rStr, char *pBuffer)
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:131
PDFObject * findObject(unsigned int nNumber, unsigned int nGeneration) const
Definition: pdfentries.cxx:475
unsigned int m_nNumber
Definition: pdfparse.hxx:139
aBuf
bool decrypt(const sal_uInt8 *pInBuffer, sal_uInt32 nLen, sal_uInt8 *pOutBuffer, unsigned int nObject, unsigned int nGeneration) const
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:413
unsigned int m_nEndOffset
Definition: pdfparse.hxx:206
PDFString(const OString &rString)
Definition: pdfparse.hxx:106
unsigned int m_nGeneration
Definition: pdfparse.hxx:140
PDFObjectRef(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:142
virtual bool emit(EmitContext &rWriteContext) const override
EmbeddedObjectRef * pObject
PDFBool(bool bVal)
Definition: pdfparse.hxx:130
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:145
virtual ~PDFValue() override
Definition: pdfentries.cxx:123
virtual ~PDFDict() override
Definition: pdfentries.cxx:511
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:515
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
Definition: pdfparse.hxx:161
PDFNumber(double fVal)
Definition: pdfparse.hxx:119
unsigned int m_nDecryptObject
Definition: pdfentries.cxx:50
bool emitSubElements(EmitContext &rWriteContext) const
Definition: pdfentries.cxx:448
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:611
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:152
virtual ~PDFBool() override
Definition: pdfentries.cxx:395
virtual ~PDFNull() override
Definition: pdfentries.cxx:409
sal_Int32 nElements
virtual ~PDFName() override
Definition: pdfentries.cxx:141
OUString getFilteredName() const
Definition: pdfentries.cxx:157
bool usesSupportedEncryptionFormat() const
unsigned int m_nMajor
Definition: pdfparse.hxx:235
virtual ~PDFObject() override
Definition: pdfentries.cxx:654
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:418
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:616
static EmitImplData * getEmitData(EmitContext const &rContext)
Definition: pdfentries.cxx:111
err
sal_uInt16 char * pName
PDFStream * m_pStream
Definition: pdfparse.hxx:261
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:599
void insertValue(const OString &rName, std::unique_ptr< PDFEntry > pValue)
Definition: pdfentries.cxx:524
int i
virtual ~PDFStream() override
Definition: pdfentries.cxx:607
bool getDeflatedStream(std::unique_ptr< char[]> &rpStream, unsigned int *pBytes, const PDFContainer *pObjectContainer, EmitContext &rContext) const
Definition: pdfentries.cxx:658
virtual unsigned int getCurPos()=0
virtual unsigned int readOrigBytes(unsigned int nOrigOffset, unsigned int nLen, void *pBuf)=0
bool isEncrypted() const
virtual PDFEntry * clone() const override
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:136
virtual bool copyOrigBytes(unsigned int nOrigOffset, unsigned int nLen)=0
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:439
virtual ~PDFArray() override
Definition: pdfentries.cxx:491
#define ENCRYPTION_KEY_LEN
PDFEntry * buildMap()
Definition: pdfentries.cxx:576
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:995
static bool check_user_password(const OString &rPwd, PDFFileImplData *pData)
virtual bool emit(EmitContext &rWriteContext) const override
bool insertXref(unsigned int nObject, unsigned int nGeneration, unsigned int nOffset)
Definition: pdfentries.cxx:54
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:495
PDFFileImplData * impl_getData() const
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:903
static void unzipToBuffer(char *pBegin, unsigned int nLen, sal_uInt8 **pOutBuf, sal_uInt32 *pOutLen)
Definition: pdfentries.cxx:724
virtual ~PDFString() override
Definition: pdfentries.cxx:189
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:504
static void setEmitData(EmitContext &rContext, EmitImplData *pNewEmitData)
Definition: pdfentries.cxx:116
PDFEntry * m_pObject
Definition: pdfparse.hxx:260
unsigned int m_nNumber
Definition: pdfparse.hxx:262
EmitImplData(const PDFContainer *pTopContainer)
Definition: pdfentries.cxx:72
unsigned char sal_uInt8
void update(const unsigned char *pInput, size_t length)
#define SAL_INFO(area, stream)
void cloneSubElements(std::vector< std::unique_ptr< PDFEntry >> &rNewSubElements) const
Definition: pdfentries.cxx:468
EmitContext(const PDFContainer *pTop=nullptr)
Definition: pdfentries.cxx:95
void decrypt(const sal_uInt8 *pInBuffer, sal_uInt32 nLen, sal_uInt8 *pOutBuffer, unsigned int nObject, unsigned int nGeneration) const
Definition: pdfentries.cxx:77
void * p
PDFName(const OString &rName)
Definition: pdfparse.hxx:93
virtual ~PDFContainer() override
Definition: pdfentries.cxx:444
static std::vector< unsigned char > calculateHash(const unsigned char *pInput, size_t length, HashType eType)
PDFObject(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:265
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:193
static sal_uInt32 password_to_key(const OString &rPwd, sal_uInt8 *pOutKey, PDFFileImplData const *pData, bool bComputeO)
unsigned int getDictLength(const PDFContainer *pObjectContainer) const
Definition: pdfentries.cxx:621
#define SAL_WARN(area, stream)
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:801
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:404
virtual ~PDFObjectRef() override
Definition: pdfentries.cxx:424
std::unique_ptr< EmitImplData > m_pImplData
Definition: pdfparse.hxx:55
unsigned int m_nGeneration
Definition: pdfparse.hxx:263
bool setupDecryptionData(const OString &rPwd) const
std::vector< unsigned char > finalize()
virtual ~PDFTrailer() override
Definition: pdfentries.cxx:923
virtual PDFEntry * clone() const override
PDFStream(unsigned int nBegin, unsigned int nEnd, PDFDict *pStreamDict)
Definition: pdfparse.hxx:209
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:428
const PDFContainer * m_pObjectContainer
Definition: pdfentries.cxx:49
sal_uInt8 m_aDecryptionKey[ENCRYPTION_KEY_LEN+5]
unsigned int m_nMinor
Definition: pdfparse.hxx:236
static osl::File * pStream
Definition: emitcontext.cxx:32
unsigned int m_nDecryptGeneration
Definition: pdfentries.cxx:51
void writeStream(EmitContext &rContext, const PDFFile *pPDFFile) const
Definition: pdfentries.cxx:782
virtual bool emit(EmitContext &rWriteContext) const =0