LibreOffice Module sdext (master)  1
pdfentries.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <pdfparse.hxx>
22 
23 #include <comphelper/hash.hxx>
24 
25 #include <rtl/strbuf.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/ustrbuf.hxx>
28 #include <rtl/digest.h>
29 #include <rtl/cipher.h>
30 #include <sal/log.hxx>
31 
32 #include <zlib.h>
33 
34 #include <math.h>
35 #include <map>
36 
37 #include <string.h>
38 
39 
40 namespace pdfparse
41 {
42 
44 {
45  // xref table: maps object number to a pair of (generation, buffer offset)
46  typedef std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable;
47  XRefTable m_aXRefTable;
48  // container of all indirect objects (usually a PDFFile*)
50  unsigned int m_nDecryptObject;
51  unsigned int m_nDecryptGeneration;
52 
53  // returns true if the xref table was updated
54  bool insertXref( unsigned int nObject, unsigned int nGeneration, unsigned int nOffset )
55  {
56  XRefTable::iterator it = m_aXRefTable.find( nObject );
57  if( it == m_aXRefTable.end() )
58  {
59  // new entry
60  m_aXRefTable[ nObject ] = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
61  return true;
62  }
63  // update old entry, if generation number is higher
64  if( it->second.first < nGeneration )
65  {
66  it->second = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
67  return true;
68  }
69  return false;
70  }
71 
72  explicit EmitImplData( const PDFContainer* pTopContainer ) :
73  m_pObjectContainer( pTopContainer ),
74  m_nDecryptObject( 0 ),
75  m_nDecryptGeneration( 0 )
76  {}
77  void decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
78  unsigned int nObject, unsigned int nGeneration ) const
79  {
80  const PDFFile* pFile = dynamic_cast<const PDFFile*>(m_pObjectContainer);
81  pFile && pFile->decrypt( pInBuffer, nLen, pOutBuffer, nObject, nGeneration );
82  }
83 
84  void setDecryptObject( unsigned int nObject, unsigned int nGeneration )
85  {
86  m_nDecryptObject = nObject;
87  m_nDecryptGeneration = nGeneration;
88  }
89 };
90 
91 }
92 
93 using namespace pdfparse;
94 
96  m_bDeflate( false ),
97  m_bDecrypt( false )
98 {
99  if( pTop )
100  m_pImplData.reset( new EmitImplData( pTop ) );
101 }
102 
104 {
105 }
106 
108 {
109 }
110 
112 {
113  return rContext.m_pImplData.get();
114 }
115 
116 void PDFEntry::setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData )
117 {
118  if( rContext.m_pImplData && rContext.m_pImplData.get() != pNewEmitData )
119  rContext.m_pImplData.reset();
120  rContext.m_pImplData.reset( pNewEmitData );
121 }
122 
124 {
125 }
126 
128 {
129 }
130 
131 bool PDFComment::emit( EmitContext& rWriteContext ) const
132 {
133  return rWriteContext.write( m_aComment.getStr(), m_aComment.getLength() );
134 }
135 
137 {
138  return new PDFComment( m_aComment );
139 }
140 
142 {
143 }
144 
145 bool PDFName::emit( EmitContext& rWriteContext ) const
146 {
147  if( ! rWriteContext.write( " /", 2 ) )
148  return false;
149  return rWriteContext.write( m_aName.getStr(), m_aName.getLength() );
150 }
151 
153 {
154  return new PDFName( m_aName );
155 }
156 
157 OUString PDFName::getFilteredName() const
158 {
159  OStringBuffer aFilter( m_aName.getLength() );
160  const char* pStr = m_aName.getStr();
161  unsigned int nLen = m_aName.getLength();
162  for( unsigned int i = 0; i < nLen; i++ )
163  {
164  if( (i < nLen - 3) && pStr[i] == '#' )
165  {
166  char rResult = 0;
167  i++;
168  if( pStr[i] >= '0' && pStr[i] <= '9' )
169  rResult = char( pStr[i]-'0' ) << 4;
170  else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
171  rResult = char( pStr[i]-'a' + 10 ) << 4;
172  else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
173  rResult = char( pStr[i]-'A' + 10 ) << 4;
174  i++;
175  if( pStr[i] >= '0' && pStr[i] <= '9' )
176  rResult |= char( pStr[i]-'0' );
177  else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
178  rResult |= char( pStr[i]-'a' + 10 );
179  else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
180  rResult |= char( pStr[i]-'A' + 10 );
181  aFilter.append( rResult );
182  }
183  else
184  aFilter.append( pStr[i] );
185  }
186  return OStringToOUString( aFilter.makeStringAndClear(), RTL_TEXTENCODING_UTF8 );
187 }
188 
190 {
191 }
192 
193 bool PDFString::emit( EmitContext& rWriteContext ) const
194 {
195  if( ! rWriteContext.write( " ", 1 ) )
196  return false;
197  EmitImplData* pEData = getEmitData( rWriteContext );
198  if( rWriteContext.m_bDecrypt && pEData && pEData->m_nDecryptObject )
199  {
200  OString aFiltered( getFilteredString() );
201  // decrypt inplace (evil since OString is supposed to be const
202  // however in this case we know that getFilteredString returned a singular string instance
203  pEData->decrypt( reinterpret_cast<sal_uInt8 const *>(aFiltered.getStr()), aFiltered.getLength(),
204  reinterpret_cast<sal_uInt8 *>(const_cast<char *>(aFiltered.getStr())),
205  pEData->m_nDecryptObject, pEData->m_nDecryptGeneration );
206  // check for string or hex string
207  const char* pStr = aFiltered.getStr();
208  if( aFiltered.getLength() > 1 &&
209  ( (static_cast<unsigned char>(pStr[0]) == 0xff && static_cast<unsigned char>(pStr[1]) == 0xfe) ||
210  (static_cast<unsigned char>(pStr[0]) == 0xfe && static_cast<unsigned char>(pStr[1]) == 0xff) ) )
211  {
212  static const char pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
213  '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
214  if( ! rWriteContext.write( "<", 1 ) )
215  return false;
216  for( sal_Int32 i = 0; i < aFiltered.getLength(); i++ )
217  {
218  if( ! rWriteContext.write( pHexTab + ((sal_uInt32(pStr[i]) >> 4) & 0x0f), 1 ) )
219  return false;
220  if( ! rWriteContext.write( pHexTab + (sal_uInt32(pStr[i]) & 0x0f), 1 ) )
221  return false;
222  }
223  if( ! rWriteContext.write( ">", 1 ) )
224  return false;
225  }
226  else
227  {
228  if( ! rWriteContext.write( "(", 1 ) )
229  return false;
230  if( ! rWriteContext.write( aFiltered.getStr(), aFiltered.getLength() ) )
231  return false;
232  if( ! rWriteContext.write( ")", 1 ) )
233  return false;
234  }
235  return true;
236  }
237  return rWriteContext.write( m_aString.getStr(), m_aString.getLength() );
238 }
239 
241 {
242  return new PDFString( m_aString );
243 }
244 
246 {
247  int nLen = m_aString.getLength();
248  OStringBuffer aBuf( nLen );
249 
250  const char* pStr = m_aString.getStr();
251  if( *pStr == '(' )
252  {
253  const char* pRun = pStr+1;
254  while( pRun - pStr < nLen-1 )
255  {
256  if( *pRun == '\\' )
257  {
258  pRun++;
259  if( pRun - pStr < nLen )
260  {
261  char aEsc = 0;
262  if( *pRun == 'n' )
263  aEsc = '\n';
264  else if( *pRun == 'r' )
265  aEsc = '\r';
266  else if( *pRun == 't' )
267  aEsc = '\t';
268  else if( *pRun == 'b' )
269  aEsc = '\b';
270  else if( *pRun == 'f' )
271  aEsc = '\f';
272  else if( *pRun == '(' )
273  aEsc = '(';
274  else if( *pRun == ')' )
275  aEsc = ')';
276  else if( *pRun == '\\' )
277  aEsc = '\\';
278  else if( *pRun == '\n' )
279  {
280  pRun++;
281  continue;
282  }
283  else if( *pRun == '\r' )
284  {
285  pRun++;
286  if( *pRun == '\n' )
287  pRun++;
288  continue;
289  }
290  else
291  {
292  int i = 0;
293  while( i++ < 3 && *pRun >= '0' && *pRun <= '7' )
294  aEsc = 8*aEsc + (*pRun++ - '0');
295  // move pointer back to last character of octal sequence
296  pRun--;
297  }
298  aBuf.append( aEsc );
299  }
300  }
301  else
302  aBuf.append( *pRun );
303  // move pointer to next character
304  pRun++;
305  }
306  }
307  else if( *pStr == '<' )
308  {
309  const char* pRun = pStr+1;
310  while( *pRun != '>' && pRun - pStr < nLen )
311  {
312  char rResult = 0;
313  if( *pRun >= '0' && *pRun <= '9' )
314  rResult = char( ( *pRun-'0' ) << 4 );
315  else if( *pRun >= 'a' && *pRun <= 'f' )
316  rResult = char( ( *pRun-'a' + 10 ) << 4 );
317  else if( *pRun >= 'A' && *pRun <= 'F' )
318  rResult = char( ( *pRun-'A' + 10 ) << 4 );
319  pRun++;
320  if( *pRun != '>' && pRun - pStr < nLen )
321  {
322  if( *pRun >= '0' && *pRun <= '9' )
323  rResult |= char( *pRun-'0' );
324  else if( *pRun >= 'a' && *pRun <= 'f' )
325  rResult |= char( *pRun-'a' + 10 );
326  else if( *pRun >= 'A' && *pRun <= 'F' )
327  rResult |= char( *pRun-'A' + 10 );
328  }
329  pRun++;
330  aBuf.append( rResult );
331  }
332  }
333 
334  return aBuf.makeStringAndClear();
335 }
336 
338 {
339 }
340 
341 bool PDFNumber::emit( EmitContext& rWriteContext ) const
342 {
343  OStringBuffer aBuf( 32 );
344  aBuf.append( ' ' );
345 
346  double fValue = m_fValue;
347  bool bNeg = false;
348  int nPrecision = 5;
349  if( fValue < 0.0 )
350  {
351  bNeg = true;
352  fValue=-fValue;
353  }
354 
355  sal_Int64 nInt = static_cast<sal_Int64>(fValue);
356  fValue -= static_cast<double>(nInt);
357  // optimizing hardware may lead to a value of 1.0 after the subtraction
358  if( fValue == 1.0 || log10( 1.0-fValue ) <= -nPrecision )
359  {
360  nInt++;
361  fValue = 0.0;
362  }
363  sal_Int64 nFrac = 0;
364  if( fValue )
365  {
366  fValue *= pow( 10.0, static_cast<double>(nPrecision) );
367  nFrac = static_cast<sal_Int64>(fValue);
368  }
369  if( bNeg && ( nInt || nFrac ) )
370  aBuf.append( '-' );
371  aBuf.append( nInt );
372  if( nFrac )
373  {
374  int i;
375  aBuf.append( '.' );
376  sal_Int64 nBound = static_cast<sal_Int64>(pow( 10.0, nPrecision - 1.0 )+0.5);
377  for ( i = 0; ( i < nPrecision ) && nFrac; i++ )
378  {
379  sal_Int64 nNumb = nFrac / nBound;
380  nFrac -= nNumb * nBound;
381  aBuf.append( nNumb );
382  nBound /= 10;
383  }
384  }
385 
386  return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
387 }
388 
390 {
391  return new PDFNumber( m_fValue );
392 }
393 
394 
396 {
397 }
398 
399 bool PDFBool::emit( EmitContext& rWriteContext ) const
400 {
401  return m_bValue ? rWriteContext.write( " true", 5 ) : rWriteContext.write( " false", 6 );
402 }
403 
405 {
406  return new PDFBool( m_bValue );
407 }
408 
410 {
411 }
412 
413 bool PDFNull::emit( EmitContext& rWriteContext ) const
414 {
415  return rWriteContext.write( " null", 5 );
416 }
417 
419 {
420  return new PDFNull();
421 }
422 
423 
425 {
426 }
427 
428 bool PDFObjectRef::emit( EmitContext& rWriteContext ) const
429 {
430  OString aBuf =
431  " " +
432  OString::number( sal_Int32( m_nNumber ) ) +
433  " " +
434  OString::number( sal_Int32( m_nGeneration ) ) +
435  " R";
436  return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
437 }
438 
440 {
441  return new PDFObjectRef( m_nNumber, m_nGeneration );
442 }
443 
445 {
446 }
447 
448 bool PDFContainer::emitSubElements( EmitContext& rWriteContext ) const
449 {
450  int nEle = m_aSubElements.size();
451  for( int i = 0; i < nEle; i++ )
452  {
453  if( rWriteContext.m_bDecrypt )
454  {
455  const PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
456  if (pName && pName->m_aName == "Encrypt")
457  {
458  i++;
459  continue;
460  }
461  }
462  if( ! m_aSubElements[i]->emit( rWriteContext ) )
463  return false;
464  }
465  return true;
466 }
467 
468 void PDFContainer::cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const
469 {
470  int nEle = m_aSubElements.size();
471  for( int i = 0; i < nEle; i++ )
472  rNewSubElements.emplace_back( m_aSubElements[i]->clone() );
473 }
474 
475 PDFObject* PDFContainer::findObject( unsigned int nNumber, unsigned int nGeneration ) const
476 {
477  unsigned int nEle = m_aSubElements.size();
478  for( unsigned int i = 0; i < nEle; i++ )
479  {
480  PDFObject* pObject = dynamic_cast<PDFObject*>(m_aSubElements[i].get());
481  if( pObject &&
482  pObject->m_nNumber == nNumber &&
483  pObject->m_nGeneration == nGeneration )
484  {
485  return pObject;
486  }
487  }
488  return nullptr;
489 }
490 
492 {
493 }
494 
495 bool PDFArray::emit( EmitContext& rWriteContext ) const
496 {
497  if( ! rWriteContext.write( "[", 1 ) )
498  return false;
499  if( ! emitSubElements( rWriteContext ) )
500  return false;
501  return rWriteContext.write( "]", 1 );
502 }
503 
505 {
506  PDFArray* pNewAr = new PDFArray();
507  cloneSubElements( pNewAr->m_aSubElements );
508  return pNewAr;
509 }
510 
512 {
513 }
514 
515 bool PDFDict::emit( EmitContext& rWriteContext ) const
516 {
517  if( ! rWriteContext.write( "<<\n", 3 ) )
518  return false;
519  if( ! emitSubElements( rWriteContext ) )
520  return false;
521  return rWriteContext.write( "\n>>\n", 4 );
522 }
523 
524 void PDFDict::insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue )
525 {
526  if( ! pValue )
527  eraseValue( rName );
528 
529  PDFEntry* pValueTmp = nullptr;
530  std::unordered_map<OString,PDFEntry*>::iterator it = m_aMap.find( rName );
531  if( it == m_aMap.end() )
532  {
533  // new name/value, pair, append it
534  m_aSubElements.emplace_back(std::make_unique<PDFName>(rName));
535  m_aSubElements.emplace_back( std::move(pValue) );
536  pValueTmp = m_aSubElements.back().get();
537  }
538  else
539  {
540  unsigned int nSub = m_aSubElements.size();
541  bool bFound = false;
542  for( unsigned int i = 0; i < nSub && !bFound; i++ )
543  if( m_aSubElements[i].get() == it->second )
544  {
545  m_aSubElements[i] = std::move(pValue);
546  pValueTmp = m_aSubElements[i].get();
547  bFound = true;
548  break;
549  }
550  }
551  assert(pValueTmp);
552  m_aMap[ rName ] = pValueTmp;
553 }
554 
555 void PDFDict::eraseValue( const OString& rName )
556 {
557  unsigned int nEle = m_aSubElements.size();
558  for( unsigned int i = 0; i < nEle; i++ )
559  {
560  PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
561  if( pName && pName->m_aName == rName )
562  {
563  for( unsigned int j = i+1; j < nEle; j++ )
564  {
565  if( dynamic_cast<PDFComment*>(m_aSubElements[j].get()) == nullptr )
566  {
567  // remove and free subelements from vector
568  m_aSubElements.erase( m_aSubElements.begin()+j );
569  m_aSubElements.erase( m_aSubElements.begin()+i );
570  buildMap();
571  return;
572  }
573  }
574  }
575  }
576 }
577 
579 {
580  // clear map
581  m_aMap.clear();
582  // build map
583  unsigned int nEle = m_aSubElements.size();
584  PDFName* pName = nullptr;
585  for( unsigned int i = 0; i < nEle; i++ )
586  {
587  if( dynamic_cast<PDFComment*>(m_aSubElements[i].get()) == nullptr )
588  {
589  if( pName )
590  {
591  m_aMap[ pName->m_aName ] = m_aSubElements[i].get();
592  pName = nullptr;
593  }
594  else if( (pName = dynamic_cast<PDFName*>(m_aSubElements[i].get())) == nullptr )
595  return m_aSubElements[i].get();
596  }
597  }
598  return pName;
599 }
600 
602 {
603  PDFDict* pNewDict = new PDFDict();
604  cloneSubElements( pNewDict->m_aSubElements );
605  pNewDict->buildMap();
606  return pNewDict;
607 }
608 
610 {
611 }
612 
613 bool PDFStream::emit( EmitContext& rWriteContext ) const
614 {
615  return rWriteContext.copyOrigBytes( m_nBeginOffset, m_nEndOffset-m_nBeginOffset );
616 }
617 
619 {
620  return new PDFStream( m_nBeginOffset, m_nEndOffset, nullptr );
621 }
622 
623 unsigned int PDFStream::getDictLength( const PDFContainer* pContainer ) const
624 {
625  if( ! m_pDict )
626  return 0;
627  // find /Length entry, can either be a direct or indirect number object
628  std::unordered_map<OString,PDFEntry*>::const_iterator it =
629  m_pDict->m_aMap.find( "Length" );
630  if( it == m_pDict->m_aMap.end() )
631  return 0;
632  PDFNumber* pNum = dynamic_cast<PDFNumber*>(it->second);
633  if( ! pNum && pContainer )
634  {
635  PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(it->second);
636  if( pRef )
637  {
638  int nEle = pContainer->m_aSubElements.size();
639  for (int i = 0; i < nEle; i++)
640  {
641  PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer->m_aSubElements[i].get());
642  if( pObj &&
643  pObj->m_nNumber == pRef->m_nNumber &&
644  pObj->m_nGeneration == pRef->m_nGeneration )
645  {
646  if( pObj->m_pObject )
647  pNum = dynamic_cast<PDFNumber*>(pObj->m_pObject);
648  break;
649  }
650  }
651  }
652  }
653  return pNum ? static_cast<unsigned int>(pNum->m_fValue) : 0;
654 }
655 
657 {
658 }
659 
660 bool PDFObject::getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const
661 {
662  bool bIsDeflated = false;
663  if( m_pStream && m_pStream->m_pDict &&
665  )
666  {
667  unsigned int nOuterStreamLen = m_pStream->m_nEndOffset - m_pStream->m_nBeginOffset;
668  rpStream.reset(new char[ nOuterStreamLen ]);
669  unsigned int nRead = rContext.readOrigBytes( m_pStream->m_nBeginOffset, nOuterStreamLen, rpStream.get() );
670  if( nRead != nOuterStreamLen )
671  {
672  rpStream.reset();
673  *pBytes = 0;
674  return false;
675  }
676  // is there a filter entry ?
677  std::unordered_map<OString,PDFEntry*>::const_iterator it =
678  m_pStream->m_pDict->m_aMap.find( "Filter" );
679  if( it != m_pStream->m_pDict->m_aMap.end() )
680  {
681  PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
682  if( ! pFilter )
683  {
684  PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
685  if( pArray && ! pArray->m_aSubElements.empty() )
686  {
687  pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
688  }
689  }
690 
691  // is the (first) filter FlateDecode ?
692  if (pFilter && pFilter->m_aName == "FlateDecode")
693  {
694  bIsDeflated = true;
695  }
696  }
697  // prepare compressed data section
698  char* pStream = rpStream.get();
699  if( pStream[0] == 's' )
700  pStream += 6; // skip "stream"
701  // skip line end after "stream"
702  while( *pStream == '\r' || *pStream == '\n' )
703  pStream++;
704  // get the compressed length
705  *pBytes = m_pStream->getDictLength( pObjectContainer );
706  if( pStream != rpStream.get() )
707  memmove( rpStream.get(), pStream, *pBytes );
708  if( rContext.m_bDecrypt )
709  {
710  EmitImplData* pEData = getEmitData( rContext );
711  pEData->decrypt( reinterpret_cast<const sal_uInt8*>(rpStream.get()),
712  *pBytes,
713  reinterpret_cast<sal_uInt8*>(rpStream.get()),
714  m_nNumber,
716  ); // decrypt inplace
717  }
718  }
719  else
720  {
721  *pBytes = 0;
722  }
723  return bIsDeflated;
724 }
725 
726 static void unzipToBuffer( char* pBegin, unsigned int nLen,
727  sal_uInt8** pOutBuf, sal_uInt32* pOutLen )
728 {
729  z_stream aZStr;
730  aZStr.next_in = reinterpret_cast<Bytef *>(pBegin);
731  aZStr.avail_in = nLen;
732  aZStr.zalloc = nullptr;
733  aZStr.zfree = nullptr;
734  aZStr.opaque = nullptr;
735 
736  int err = inflateInit(&aZStr);
737 
738  const unsigned int buf_increment_size = 16384;
739 
740  if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, buf_increment_size)))
741  {
742  *pOutBuf = p;
743  aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf);
744  aZStr.avail_out = buf_increment_size;
745  *pOutLen = buf_increment_size;
746  }
747  else
748  err = Z_MEM_ERROR;
749  while( err != Z_STREAM_END && err >= Z_OK && aZStr.avail_in )
750  {
751  err = inflate( &aZStr, Z_NO_FLUSH );
752  if( aZStr.avail_out == 0 )
753  {
754  if( err != Z_STREAM_END )
755  {
756  const int nNewAlloc = *pOutLen + buf_increment_size;
757  if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, nNewAlloc)))
758  {
759  *pOutBuf = p;
760  aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf + *pOutLen);
761  aZStr.avail_out = buf_increment_size;
762  *pOutLen = nNewAlloc;
763  }
764  else
765  err = Z_MEM_ERROR;
766  }
767  }
768  }
769  if( err == Z_STREAM_END )
770  {
771  if( aZStr.avail_out > 0 )
772  *pOutLen -= aZStr.avail_out;
773  }
774  inflateEnd(&aZStr);
775  if( err < Z_OK )
776  {
777  std::free( *pOutBuf );
778  *pOutBuf = nullptr;
779  *pOutLen = 0;
780  }
781 }
782 
783 void PDFObject::writeStream( EmitContext& rWriteContext, const PDFFile* pParsedFile ) const
784 {
785  if( !m_pStream )
786  return;
787 
788  std::unique_ptr<char[]> pStream;
789  unsigned int nBytes = 0;
790  if( getDeflatedStream( pStream, &nBytes, pParsedFile, rWriteContext ) && nBytes && rWriteContext.m_bDeflate )
791  {
792  sal_uInt8* pOutBytes = nullptr;
793  sal_uInt32 nOutBytes = 0;
794  unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
795  rWriteContext.write( pOutBytes, nOutBytes );
796  std::free( pOutBytes );
797  }
798  else if( pStream && nBytes )
799  rWriteContext.write( pStream.get(), nBytes );
800 }
801 
802 bool PDFObject::emit( EmitContext& rWriteContext ) const
803 {
804  if( ! rWriteContext.write( "\n", 1 ) )
805  return false;
806 
807  EmitImplData* pEData = getEmitData( rWriteContext );
808  if( pEData )
809  pEData->insertXref( m_nNumber, m_nGeneration, rWriteContext.getCurPos() );
810 
811  OString aBuf =
812  OString::number( sal_Int32( m_nNumber ) ) +
813  " " +
814  OString::number( sal_Int32( m_nGeneration ) ) +
815  " obj\n";
816  if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
817  return false;
818 
819  if( pEData )
821  if( (rWriteContext.m_bDeflate || rWriteContext.m_bDecrypt) && pEData )
822  {
823  std::unique_ptr<char[]> pStream;
824  unsigned int nBytes = 0;
825  bool bDeflate = getDeflatedStream( pStream, &nBytes, pEData->m_pObjectContainer, rWriteContext );
826  if( pStream && nBytes )
827  {
828  // unzip the stream
829  sal_uInt8* pOutBytes = nullptr;
830  sal_uInt32 nOutBytes = 0;
831  if( bDeflate && rWriteContext.m_bDeflate )
832  unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
833  else
834  {
835  // nothing to deflate, but decryption has happened
836  pOutBytes = reinterpret_cast<sal_uInt8*>(pStream.get());
837  nOutBytes = static_cast<sal_uInt32>(nBytes);
838  }
839 
840  if( nOutBytes )
841  {
842  // clone this object
843  std::unique_ptr<PDFObject> pClone(static_cast<PDFObject*>(clone()));
844  // set length in the dictionary to new stream length
845  std::unique_ptr<PDFNumber> pNewLen(new PDFNumber( double(nOutBytes) ));
846  pClone->m_pStream->m_pDict->insertValue( "Length", std::move(pNewLen) );
847 
848  if( bDeflate && rWriteContext.m_bDeflate )
849  {
850  // delete flatedecode filter
851  std::unordered_map<OString,PDFEntry*>::const_iterator it =
852  pClone->m_pStream->m_pDict->m_aMap.find( "Filter" );
853  if( it != pClone->m_pStream->m_pDict->m_aMap.end() )
854  {
855  PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
856  if (pFilter && pFilter->m_aName == "FlateDecode")
857  pClone->m_pStream->m_pDict->eraseValue( "Filter" );
858  else
859  {
860  PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
861  if( pArray && ! pArray->m_aSubElements.empty() )
862  {
863  pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
864  if (pFilter && pFilter->m_aName == "FlateDecode")
865  {
866  pArray->m_aSubElements.erase( pArray->m_aSubElements.begin() );
867  }
868  }
869  }
870  }
871  }
872 
873  // write sub elements except stream
874  bool bRet = true;
875  unsigned int nEle = pClone->m_aSubElements.size();
876  for( unsigned int i = 0; i < nEle && bRet; i++ )
877  {
878  if( pClone->m_aSubElements[i].get() != pClone->m_pStream )
879  bRet = pClone->m_aSubElements[i]->emit( rWriteContext );
880  }
881  pClone.reset();
882  // write stream
883  if( bRet )
884  bRet = rWriteContext.write("stream\n", 7)
885  && rWriteContext.write(pOutBytes, nOutBytes)
886  && rWriteContext.write("\nendstream\nendobj\n", 18);
887  if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
888  std::free( pOutBytes );
889  pEData->setDecryptObject( 0, 0 );
890  return bRet;
891  }
892  if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
893  std::free( pOutBytes );
894  }
895  }
896 
897  bool bRet = emitSubElements( rWriteContext ) &&
898  rWriteContext.write( "\nendobj\n", 8 );
899  if( pEData )
900  pEData->setDecryptObject( 0, 0 );
901  return bRet;
902 }
903 
905 {
906  PDFObject* pNewOb = new PDFObject( m_nNumber, m_nGeneration );
907  cloneSubElements( pNewOb->m_aSubElements );
908  unsigned int nEle = m_aSubElements.size();
909  for( unsigned int i = 0; i < nEle; i++ )
910  {
911  if( m_aSubElements[i].get() == m_pObject )
912  pNewOb->m_pObject = pNewOb->m_aSubElements[i].get();
913  else if( m_aSubElements[i].get() == m_pStream && pNewOb->m_pObject )
914  {
915  pNewOb->m_pStream = dynamic_cast<PDFStream*>(pNewOb->m_aSubElements[i].get());
916  PDFDict* pNewDict = dynamic_cast<PDFDict*>(pNewOb->m_pObject);
917  if (pNewDict && pNewOb->m_pStream)
918  pNewOb->m_pStream->m_pDict = pNewDict;
919  }
920  }
921  return pNewOb;
922 }
923 
925 {
926 }
927 
928 bool PDFTrailer::emit( EmitContext& rWriteContext ) const
929 {
930  // get xref offset
931  unsigned int nXRefPos = rWriteContext.getCurPos();
932  // begin xref section, object 0 is always free
933  if( ! rWriteContext.write( "xref\r\n"
934  "0 1\r\n"
935  "0000000000 65535 f\r\n", 31 ) )
936  return false;
937  // check if we are emitting a complete PDF file
938  EmitImplData* pEData = getEmitData( rWriteContext );
939  if( pEData )
940  {
941  // emit object xrefs
942  const EmitImplData::XRefTable& rXRefs = pEData->m_aXRefTable;
943  EmitImplData::XRefTable::const_iterator section_begin, section_end;
944  section_begin = rXRefs.begin();
945  while( section_begin != rXRefs.end() )
946  {
947  // find end of continuous object numbers
948  section_end = section_begin;
949  unsigned int nLast = section_begin->first;
950  while( (++section_end) != rXRefs.end() &&
951  section_end->first == nLast+1 )
952  nLast = section_end->first;
953  // write first object number and number of following entries
954  OStringBuffer aBuf( 21 );
955  aBuf.append( sal_Int32( section_begin->first ) );
956  aBuf.append( ' ' );
957  aBuf.append( sal_Int32(nLast - section_begin->first + 1) );
958  aBuf.append( "\r\n" );
959  if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
960  return false;
961  while( section_begin != section_end )
962  {
963  // write 20 char entry of form
964  // 0000offset 00gen n\r\n
965  aBuf.setLength( 0 );
966  OString aOffset( OString::number( section_begin->second.second ) );
967  int nPad = 10 - aOffset.getLength();
968  for( int i = 0; i < nPad; i++ )
969  aBuf.append( '0' );
970  aBuf.append( aOffset );
971  aBuf.append( ' ' );
972  OString aGeneration( OString::number( section_begin->second.first ) );
973  nPad = 5 - aGeneration.getLength();
974  for( int i = 0; i < nPad; i++ )
975  aBuf.append( '0' );
976  aBuf.append( aGeneration );
977  aBuf.append( " n\r\n" );
978  if( ! rWriteContext.write( aBuf.getStr(), 20 ) )
979  return false;
980  ++section_begin;
981  }
982  }
983  }
984  if( ! rWriteContext.write( "trailer\n", 8 ) )
985  return false;
986  if( ! emitSubElements( rWriteContext ) )
987  return false;
988  if( ! rWriteContext.write( "startxref\n", 10 ) )
989  return false;
990  OString aOffset( OString::number( nXRefPos ) );
991  if( ! rWriteContext.write( aOffset.getStr(), aOffset.getLength() ) )
992  return false;
993  return rWriteContext.write( "\n%%EOF\n", 7 );
994 }
995 
997 {
998  PDFTrailer* pNewTr = new PDFTrailer();
999  cloneSubElements( pNewTr->m_aSubElements );
1000  unsigned int nEle = m_aSubElements.size();
1001  for( unsigned int i = 0; i < nEle; i++ )
1002  {
1003  if( m_aSubElements[i].get() == m_pDict )
1004  {
1005  pNewTr->m_pDict = dynamic_cast<PDFDict*>(pNewTr->m_aSubElements[i].get());
1006  break;
1007  }
1008  }
1009  return pNewTr;
1010 }
1011 
1012 #define ENCRYPTION_KEY_LEN 16
1013 #define ENCRYPTION_BUF_LEN 32
1014 
1015 namespace pdfparse {
1017 {
1020  sal_uInt32 m_nAlgoVersion;
1022  sal_uInt32 m_nKeyLength;
1025  sal_uInt32 m_nPEntry;
1026  OString m_aDocID;
1027  rtlCipher m_aCipher;
1028 
1029  sal_uInt8 m_aDecryptionKey[ENCRYPTION_KEY_LEN+5] = {}; // maximum handled key length
1030 
1032  m_bIsEncrypted( false ),
1033  m_bStandardHandler( false ),
1034  m_nAlgoVersion( 0 ),
1035  m_nStandardRevision( 0 ),
1036  m_nKeyLength( 0 ),
1037  m_nPEntry( 0 ),
1038  m_aCipher( nullptr )
1039  {
1040  }
1041 
1043  {
1044  if( m_aCipher )
1045  rtl_cipher_destroyARCFOUR( m_aCipher );
1046  }
1047 };
1048 }
1049 
1051  : PDFContainer(), m_nMajor( 0 ), m_nMinor( 0 )
1052 {
1053 }
1054 
1056 {
1057 }
1058 
1060 {
1061  return impl_getData()->m_bIsEncrypted;
1062 }
1063 
1064 bool PDFFile::decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
1065  unsigned int nObject, unsigned int nGeneration ) const
1066 {
1067  if( ! isEncrypted() )
1068  return false;
1069 
1070  if( ! m_pData->m_aCipher )
1071  m_pData->m_aCipher = rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream );
1072 
1073  // modify encryption key
1074  sal_uInt32 i = m_pData->m_nKeyLength;
1075  m_pData->m_aDecryptionKey[i++] = sal_uInt8(nObject&0xff);
1076  m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>8)&0xff);
1077  m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>16)&0xff);
1078  m_pData->m_aDecryptionKey[i++] = sal_uInt8(nGeneration&0xff);
1079  m_pData->m_aDecryptionKey[i++] = sal_uInt8((nGeneration>>8)&0xff);
1080 
1081  ::std::vector<unsigned char> const aSum(::comphelper::Hash::calculateHash(
1082  m_pData->m_aDecryptionKey, i, ::comphelper::HashType::MD5));
1083 
1084  if( i > 16 )
1085  i = 16;
1086 
1087  rtlCipherError aErr = rtl_cipher_initARCFOUR( m_pData->m_aCipher,
1088  rtl_Cipher_DirectionDecode,
1089  aSum.data(), i,
1090  nullptr, 0 );
1091  if( aErr == rtl_Cipher_E_None )
1092  aErr = rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1093  pInBuffer, nLen,
1094  pOutBuffer, nLen );
1095  return aErr == rtl_Cipher_E_None;
1096 }
1097 
1098 static const sal_uInt8 nPadString[32] =
1099 {
1100  0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
1101  0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
1102 };
1103 
1104 static void pad_or_truncate_to_32( const OString& rStr, char* pBuffer )
1105 {
1106  int nLen = rStr.getLength();
1107  if( nLen > 32 )
1108  nLen = 32;
1109  const char* pStr = rStr.getStr();
1110  memcpy( pBuffer, pStr, nLen );
1111  int i = 0;
1112  while( nLen < 32 )
1113  pBuffer[nLen++] = nPadString[i++];
1114 }
1115 
1116 // pass at least pData->m_nKeyLength bytes in
1117 static sal_uInt32 password_to_key( const OString& rPwd, sal_uInt8* pOutKey, PDFFileImplData const * pData, bool bComputeO )
1118 {
1119  // see PDF reference 1.4 Algorithm 3.2
1120  // encrypt pad string
1121  char aPadPwd[ENCRYPTION_BUF_LEN];
1122  pad_or_truncate_to_32( rPwd, aPadPwd );
1124  aDigest.update(reinterpret_cast<unsigned char const*>(aPadPwd), sizeof(aPadPwd));
1125  if( ! bComputeO )
1126  {
1127  aDigest.update(pData->m_aOEntry, 32);
1128  sal_uInt8 aPEntry[4];
1129  aPEntry[0] = static_cast<sal_uInt8>(pData->m_nPEntry & 0xff);
1130  aPEntry[1] = static_cast<sal_uInt8>((pData->m_nPEntry >> 8 ) & 0xff);
1131  aPEntry[2] = static_cast<sal_uInt8>((pData->m_nPEntry >> 16) & 0xff);
1132  aPEntry[3] = static_cast<sal_uInt8>((pData->m_nPEntry >> 24) & 0xff);
1133  aDigest.update(aPEntry, sizeof(aPEntry));
1134  aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1135  }
1136  ::std::vector<unsigned char> nSum(aDigest.finalize());
1137  if( pData->m_nStandardRevision == 3 )
1138  {
1139  for( int i = 0; i < 50; i++ )
1140  {
1141  nSum = ::comphelper::Hash::calculateHash(nSum.data(), nSum.size(),
1142  ::comphelper::HashType::MD5);
1143  }
1144  }
1145  sal_uInt32 nLen = pData->m_nKeyLength;
1146  if( nLen > RTL_DIGEST_LENGTH_MD5 )
1147  nLen = RTL_DIGEST_LENGTH_MD5;
1148  memcpy( pOutKey, nSum.data(), nLen );
1149  return nLen;
1150 }
1151 
1152 static bool check_user_password( const OString& rPwd, PDFFileImplData* pData )
1153 {
1154  // see PDF reference 1.4 Algorithm 3.6
1155  bool bValid = false;
1157  sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, pData, false );
1158  // save (at this time potential) decryption key for later use
1159  memcpy( pData->m_aDecryptionKey, aKey, nKeyLen );
1160  if( pData->m_nStandardRevision == 2 )
1161  {
1162  sal_uInt8 nEncryptedEntry[ENCRYPTION_BUF_LEN] = {};
1163  // see PDF reference 1.4 Algorithm 3.4
1164  // encrypt pad string
1165  if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1166  aKey, nKeyLen,
1167  nullptr, 0 )
1168  != rtl_Cipher_E_None)
1169  {
1170  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1171  }
1172  rtl_cipher_encodeARCFOUR( pData->m_aCipher, nPadString, sizeof( nPadString ),
1173  nEncryptedEntry, sizeof( nEncryptedEntry ) );
1174  bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 32 ) == 0);
1175  }
1176  else if( pData->m_nStandardRevision == 3 )
1177  {
1178  // see PDF reference 1.4 Algorithm 3.5
1180  aDigest.update(nPadString, sizeof(nPadString));
1181  aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1182  ::std::vector<unsigned char> nEncryptedEntry(aDigest.finalize());
1183  if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1184  aKey, sizeof(aKey), nullptr, 0 )
1185  != rtl_Cipher_E_None)
1186  {
1187  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1188  }
1189  rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1190  nEncryptedEntry.data(), 16,
1191  nEncryptedEntry.data(), 16 ); // encrypt in place
1192  for( int i = 1; i <= 19; i++ ) // do it 19 times, start with 1
1193  {
1194  sal_uInt8 aTempKey[ENCRYPTION_KEY_LEN];
1195  for( size_t j = 0; j < sizeof(aTempKey); j++ )
1196  aTempKey[j] = static_cast<sal_uInt8>( aKey[j] ^ i );
1197 
1198  if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1199  aTempKey, sizeof(aTempKey), nullptr, 0 )
1200  != rtl_Cipher_E_None)
1201  {
1202  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1203  }
1204  rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1205  nEncryptedEntry.data(), 16,
1206  nEncryptedEntry.data(), 16 ); // encrypt in place
1207  }
1208  bValid = (memcmp( nEncryptedEntry.data(), pData->m_aUEntry, 16 ) == 0);
1209  }
1210  return bValid;
1211 }
1212 
1214 {
1215  return m_pData->m_bStandardHandler &&
1216  m_pData->m_nAlgoVersion >= 1 &&
1217  m_pData->m_nAlgoVersion <= 2 &&
1218  m_pData->m_nStandardRevision >= 2 &&
1219  m_pData->m_nStandardRevision <= 3;
1220 }
1221 
1222 bool PDFFile::setupDecryptionData( const OString& rPwd ) const
1223 {
1224  if( !impl_getData()->m_bIsEncrypted )
1225  return rPwd.isEmpty();
1226 
1227  // check if we can handle this encryption at all
1229  return false;
1230 
1231  if( ! m_pData->m_aCipher )
1232  m_pData->m_aCipher = rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream);
1233 
1234  // first try user password
1235  bool bValid = check_user_password( rPwd, m_pData.get() );
1236 
1237  if( ! bValid )
1238  {
1239  // try owner password
1240  // see PDF reference 1.4 Algorithm 3.7
1242  sal_uInt8 nPwd[ENCRYPTION_BUF_LEN] = {};
1243  sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, m_pData.get(), true );
1244  if( m_pData->m_nStandardRevision == 2 )
1245  {
1246  if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1247  aKey, nKeyLen, nullptr, 0 )
1248  != rtl_Cipher_E_None)
1249  {
1250  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1251  }
1252  rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1253  m_pData->m_aOEntry, 32,
1254  nPwd, 32 );
1255  }
1256  else if( m_pData->m_nStandardRevision == 3 )
1257  {
1258  memcpy( nPwd, m_pData->m_aOEntry, 32 );
1259  for( int i = 19; i >= 0; i-- )
1260  {
1261  sal_uInt8 nTempKey[ENCRYPTION_KEY_LEN];
1262  for( size_t j = 0; j < sizeof(nTempKey); j++ )
1263  nTempKey[j] = sal_uInt8(aKey[j] ^ i);
1264  if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1265  nTempKey, nKeyLen, nullptr, 0 )
1266  != rtl_Cipher_E_None)
1267  {
1268  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1269  }
1270  rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1271  nPwd, 32,
1272  nPwd, 32 ); // decrypt inplace
1273  }
1274  }
1275  bValid = check_user_password( OString( reinterpret_cast<char*>(nPwd), 32 ), m_pData.get() );
1276  }
1277 
1278  return bValid;
1279 }
1280 
1282 {
1283  if( m_pData )
1284  return m_pData.get();
1285  m_pData.reset( new PDFFileImplData );
1286  // check for encryption dict in a trailer
1287  unsigned int nElements = m_aSubElements.size();
1288  while( nElements-- > 0 )
1289  {
1290  PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(m_aSubElements[nElements].get());
1291  if( pTrailer && pTrailer->m_pDict )
1292  {
1293  // search doc id
1294  PDFDict::Map::iterator doc_id = pTrailer->m_pDict->m_aMap.find( "ID" );
1295  if( doc_id != pTrailer->m_pDict->m_aMap.end() )
1296  {
1297  PDFArray* pArr = dynamic_cast<PDFArray*>(doc_id->second);
1298  if( pArr && !pArr->m_aSubElements.empty() )
1299  {
1300  PDFString* pStr = dynamic_cast<PDFString*>(pArr->m_aSubElements[0].get());
1301  if( pStr )
1302  m_pData->m_aDocID = pStr->getFilteredString();
1303 #if OSL_DEBUG_LEVEL > 0
1304  OUStringBuffer aTmp;
1305  for( int i = 0; i < m_pData->m_aDocID.getLength(); i++ )
1306  aTmp.append(OUString::number(static_cast<unsigned int>(sal_uInt8(m_pData->m_aDocID[i])), 16));
1307  SAL_INFO("sdext.pdfimport.pdfparse", "DocId is <" << aTmp.makeStringAndClear() << ">");
1308 #endif
1309  }
1310  }
1311  // search Encrypt entry
1312  PDFDict::Map::iterator enc =
1313  pTrailer->m_pDict->m_aMap.find( "Encrypt" );
1314  if( enc != pTrailer->m_pDict->m_aMap.end() )
1315  {
1316  PDFDict* pDict = dynamic_cast<PDFDict*>(enc->second);
1317  if( ! pDict )
1318  {
1319  PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(enc->second);
1320  if( pRef )
1321  {
1322  PDFObject* pObj = findObject( pRef );
1323  if( pObj && pObj->m_pObject )
1324  pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
1325  }
1326  }
1327  if( pDict )
1328  {
1329  PDFDict::Map::iterator filter = pDict->m_aMap.find( "Filter" );
1330  PDFDict::Map::iterator version = pDict->m_aMap.find( "V" );
1331  PDFDict::Map::iterator len = pDict->m_aMap.find( "Length" );
1332  PDFDict::Map::iterator o_ent = pDict->m_aMap.find( "O" );
1333  PDFDict::Map::iterator u_ent = pDict->m_aMap.find( "U" );
1334  PDFDict::Map::iterator r_ent = pDict->m_aMap.find( "R" );
1335  PDFDict::Map::iterator p_ent = pDict->m_aMap.find( "P" );
1336  if( filter != pDict->m_aMap.end() )
1337  {
1338  m_pData->m_bIsEncrypted = true;
1339  m_pData->m_nKeyLength = 5;
1340  if( version != pDict->m_aMap.end() )
1341  {
1342  PDFNumber* pNum = dynamic_cast<PDFNumber*>(version->second);
1343  if( pNum )
1344  m_pData->m_nAlgoVersion = static_cast<sal_uInt32>(pNum->m_fValue);
1345  }
1346  if( m_pData->m_nAlgoVersion >= 3 )
1347  m_pData->m_nKeyLength = 16;
1348  if( len != pDict->m_aMap.end() )
1349  {
1350  PDFNumber* pNum = dynamic_cast<PDFNumber*>(len->second);
1351  if( pNum )
1352  m_pData->m_nKeyLength = static_cast<sal_uInt32>(pNum->m_fValue) / 8;
1353  }
1354  PDFName* pFilter = dynamic_cast<PDFName*>(filter->second);
1355  if( pFilter && pFilter->getFilteredName() == "Standard" )
1356  m_pData->m_bStandardHandler = true;
1357  if( o_ent != pDict->m_aMap.end() )
1358  {
1359  PDFString* pString = dynamic_cast<PDFString*>(o_ent->second);
1360  if( pString )
1361  {
1362  OString aEnt = pString->getFilteredString();
1363  if( aEnt.getLength() == 32 )
1364  memcpy( m_pData->m_aOEntry, aEnt.getStr(), 32 );
1365 #if OSL_DEBUG_LEVEL > 0
1366  else
1367  {
1368  OUStringBuffer aTmp;
1369  for( int i = 0; i < aEnt.getLength(); i++ )
1370  aTmp.append(" ").append(OUString::number(static_cast<unsigned int>(sal_uInt8(aEnt[i])), 16));
1371  SAL_WARN("sdext.pdfimport.pdfparse",
1372  "O entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1373  }
1374 #endif
1375  }
1376  }
1377  if( u_ent != pDict->m_aMap.end() )
1378  {
1379  PDFString* pString = dynamic_cast<PDFString*>(u_ent->second);
1380  if( pString )
1381  {
1382  OString aEnt = pString->getFilteredString();
1383  if( aEnt.getLength() == 32 )
1384  memcpy( m_pData->m_aUEntry, aEnt.getStr(), 32 );
1385 #if OSL_DEBUG_LEVEL > 0
1386  else
1387  {
1388  OUStringBuffer aTmp;
1389  for( int i = 0; i < aEnt.getLength(); i++ )
1390  aTmp.append(" ").append(OUString::number(static_cast<unsigned int>(sal_uInt8(aEnt[i])), 16));
1391  SAL_WARN("sdext.pdfimport.pdfparse",
1392  "U entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1393  }
1394 #endif
1395  }
1396  }
1397  if( r_ent != pDict->m_aMap.end() )
1398  {
1399  PDFNumber* pNum = dynamic_cast<PDFNumber*>(r_ent->second);
1400  if( pNum )
1401  m_pData->m_nStandardRevision = static_cast<sal_uInt32>(pNum->m_fValue);
1402  }
1403  if( p_ent != pDict->m_aMap.end() )
1404  {
1405  PDFNumber* pNum = dynamic_cast<PDFNumber*>(p_ent->second);
1406  if( pNum )
1407  m_pData->m_nPEntry = static_cast<sal_uInt32>(static_cast<sal_Int32>(pNum->m_fValue));
1408  SAL_INFO("sdext.pdfimport.pdfparse", "p entry is " << m_pData->m_nPEntry );
1409  }
1410 
1411  SAL_INFO("sdext.pdfimport.pdfparse", "Encryption dict: sec handler: " << (pFilter ? pFilter->getFilteredName() : OUString("<unknown>")) << ", version = " << static_cast<int>(m_pData->m_nAlgoVersion) << ", revision = " << static_cast<int>(m_pData->m_nStandardRevision) << ", key length = " << m_pData->m_nKeyLength );
1412  break;
1413  }
1414  }
1415  }
1416  }
1417  }
1418 
1419  return m_pData.get();
1420 }
1421 
1422 bool PDFFile::emit( EmitContext& rWriteContext ) const
1423 {
1424  setEmitData( rWriteContext, new EmitImplData( this ) );
1425 
1426  OString aBuf =
1427  "%PDF-" +
1428  OString::number( sal_Int32( m_nMajor ) ) +
1429  "." +
1430  OString::number( sal_Int32( m_nMinor ) ) +
1431  "\n";
1432  if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
1433  return false;
1434  return emitSubElements( rWriteContext );
1435 }
1436 
1438 {
1439  PDFFile* pNewFl = new PDFFile();
1440  pNewFl->m_nMajor = m_nMajor;
1441  pNewFl->m_nMinor = m_nMinor;
1442  cloneSubElements( pNewFl->m_aSubElements );
1443  return pNewFl;
1444 }
1445 
1447 {
1448 }
1449 
1450 bool PDFPart::emit( EmitContext& rWriteContext ) const
1451 {
1452  return emitSubElements( rWriteContext );
1453 }
1454 
1456 {
1457  PDFPart* pNewPt = new PDFPart();
1458  cloneSubElements( pNewPt->m_aSubElements );
1459  return pNewPt;
1460 }
1461 
1462 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:928
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:240
void setDecryptObject(unsigned int nObject, unsigned int nGeneration)
Definition: pdfentries.cxx:84
virtual ~PDFNumber() override
Definition: pdfentries.cxx:337
virtual ~PDFComment() override
Definition: pdfentries.cxx:127
#define ENCRYPTION_BUF_LEN
static const sal_uInt8 nPadString[32]
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:399
virtual ~PDFFile() override
std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable
Definition: pdfentries.cxx:46
OString getFilteredString() const
Definition: pdfentries.cxx:245
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:341
PDFComment(const OString &rComment)
Definition: pdfparse.hxx:74
virtual bool write(const void *pBuf, unsigned int nLen)=0
std::unique_ptr< PDFFileImplData > m_pData
Definition: pdfparse.hxx:231
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:389
virtual ~PDFPart() override
static void pad_or_truncate_to_32(const OString &rStr, char *pBuffer)
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:131
PDFObject * findObject(unsigned int nNumber, unsigned int nGeneration) const
Definition: pdfentries.cxx:475
aBuf
bool decrypt(const sal_uInt8 *pInBuffer, sal_uInt32 nLen, sal_uInt8 *pOutBuffer, unsigned int nObject, unsigned int nGeneration) const
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:413
PDFString(const OString &rString)
Definition: pdfparse.hxx:105
PDFObjectRef(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:141
virtual bool emit(EmitContext &rWriteContext) const override
EmbeddedObjectRef * pObject
PDFBool(bool bVal)
Definition: pdfparse.hxx:129
unsigned int const m_nGeneration
Definition: pdfparse.hxx:262
double const m_fValue
Definition: pdfparse.hxx:116
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:145
unsigned int const m_nGeneration
Definition: pdfparse.hxx:139
virtual ~PDFValue() override
Definition: pdfentries.cxx:123
virtual ~PDFDict() override
Definition: pdfentries.cxx:511
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:515
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
Definition: pdfparse.hxx:160
PDFNumber(double fVal)
Definition: pdfparse.hxx:118
unsigned int m_nDecryptObject
Definition: pdfentries.cxx:50
bool emitSubElements(EmitContext &rWriteContext) const
Definition: pdfentries.cxx:448
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:613
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:152
void eraseValue(const OString &rName)
Definition: pdfentries.cxx:555
virtual ~PDFBool() override
Definition: pdfentries.cxx:395
virtual ~PDFNull() override
Definition: pdfentries.cxx:409
sal_Int32 nElements
virtual ~PDFName() override
Definition: pdfentries.cxx:141
unsigned int const m_nEndOffset
Definition: pdfparse.hxx:205
OUString getFilteredName() const
Definition: pdfentries.cxx:157
bool usesSupportedEncryptionFormat() const
unsigned int m_nMajor
Definition: pdfparse.hxx:234
virtual ~PDFObject() override
Definition: pdfentries.cxx:656
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:418
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:618
static EmitImplData * getEmitData(EmitContext const &rContext)
Definition: pdfentries.cxx:111
err
sal_uInt16 char * pName
PDFStream * m_pStream
Definition: pdfparse.hxx:260
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:601
void insertValue(const OString &rName, std::unique_ptr< PDFEntry > pValue)
Definition: pdfentries.cxx:524
unsigned int const m_nBeginOffset
Definition: pdfparse.hxx:204
OString const m_aComment
Definition: pdfparse.hxx:72
virtual ~PDFStream() override
Definition: pdfentries.cxx:609
int i
bool const m_bValue
Definition: pdfparse.hxx:127
unsigned int const m_nNumber
Definition: pdfparse.hxx:138
bool getDeflatedStream(std::unique_ptr< char[]> &rpStream, unsigned int *pBytes, const PDFContainer *pObjectContainer, EmitContext &rContext) const
Definition: pdfentries.cxx:660
virtual unsigned int getCurPos()=0
virtual unsigned int readOrigBytes(unsigned int nOrigOffset, unsigned int nLen, void *pBuf)=0
bool isEncrypted() const
virtual PDFEntry * clone() const override
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:136
virtual bool copyOrigBytes(unsigned int nOrigOffset, unsigned int nLen)=0
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:439
virtual ~PDFArray() override
Definition: pdfentries.cxx:491
#define ENCRYPTION_KEY_LEN
PDFEntry * buildMap()
Definition: pdfentries.cxx:578
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:996
static bool check_user_password(const OString &rPwd, PDFFileImplData *pData)
virtual bool emit(EmitContext &rWriteContext) const override
bool insertXref(unsigned int nObject, unsigned int nGeneration, unsigned int nOffset)
Definition: pdfentries.cxx:54
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:495
PDFFileImplData * impl_getData() const
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:904
static void unzipToBuffer(char *pBegin, unsigned int nLen, sal_uInt8 **pOutBuf, sal_uInt32 *pOutLen)
Definition: pdfentries.cxx:726
virtual ~PDFString() override
Definition: pdfentries.cxx:189
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:504
OString const m_aString
Definition: pdfparse.hxx:103
static void setEmitData(EmitContext &rContext, EmitImplData *pNewEmitData)
Definition: pdfentries.cxx:116
PDFEntry * m_pObject
Definition: pdfparse.hxx:259
unsigned int m_nNumber
Definition: pdfparse.hxx:261
EmitImplData(const PDFContainer *pTopContainer)
Definition: pdfentries.cxx:72
unsigned char sal_uInt8
void update(const unsigned char *pInput, size_t length)
#define SAL_INFO(area, stream)
void cloneSubElements(std::vector< std::unique_ptr< PDFEntry >> &rNewSubElements) const
Definition: pdfentries.cxx:468
EmitContext(const PDFContainer *pTop=nullptr)
Definition: pdfentries.cxx:95
void decrypt(const sal_uInt8 *pInBuffer, sal_uInt32 nLen, sal_uInt8 *pOutBuffer, unsigned int nObject, unsigned int nGeneration) const
Definition: pdfentries.cxx:77
void * p
PDFName(const OString &rName)
Definition: pdfparse.hxx:92
virtual ~PDFContainer() override
Definition: pdfentries.cxx:444
static std::vector< unsigned char > calculateHash(const unsigned char *pInput, size_t length, HashType eType)
PDFObject(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:264
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:193
static sal_uInt32 password_to_key(const OString &rPwd, sal_uInt8 *pOutKey, PDFFileImplData const *pData, bool bComputeO)
unsigned int getDictLength(const PDFContainer *pObjectContainer) const
Definition: pdfentries.cxx:623
#define SAL_WARN(area, stream)
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:802
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:404
virtual ~PDFObjectRef() override
Definition: pdfentries.cxx:424
std::unique_ptr< EmitImplData > m_pImplData
Definition: pdfparse.hxx:54
bool setupDecryptionData(const OString &rPwd) const
std::vector< unsigned char > finalize()
virtual ~PDFTrailer() override
Definition: pdfentries.cxx:924
virtual PDFEntry * clone() const override
PDFStream(unsigned int nBegin, unsigned int nEnd, PDFDict *pStreamDict)
Definition: pdfparse.hxx:208
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:428
const PDFContainer * m_pObjectContainer
Definition: pdfentries.cxx:49
sal_uInt8 m_aDecryptionKey[ENCRYPTION_KEY_LEN+5]
unsigned int m_nMinor
Definition: pdfparse.hxx:235
static osl::File * pStream
Definition: emitcontext.cxx:32
unsigned int m_nDecryptGeneration
Definition: pdfentries.cxx:51
void writeStream(EmitContext &rContext, const PDFFile *pPDFFile) const
Definition: pdfentries.cxx:783
virtual bool emit(EmitContext &rWriteContext) const =0