LibreOffice Module sdext (master)  1
pdfentries.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <pdfparse.hxx>
22 
23 #include <comphelper/hash.hxx>
24 
25 #include <rtl/strbuf.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/ustrbuf.hxx>
28 #include <rtl/digest.h>
29 #include <rtl/cipher.h>
30 #include <sal/log.hxx>
31 
32 #include <zlib.h>
33 
34 #include <math.h>
35 #include <map>
36 
37 #include <string.h>
38 
39 
40 namespace pdfparse
41 {
42 
44 {
45  // xref table: maps object number to a pair of (generation, buffer offset)
46  typedef std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable;
47  XRefTable m_aXRefTable;
48  // container of all indirect objects (usually a PDFFile*)
50  unsigned int m_nDecryptObject;
51  unsigned int m_nDecryptGeneration;
52 
53  // returns true if the xref table was updated
54  bool insertXref( unsigned int nObject, unsigned int nGeneration, unsigned int nOffset )
55  {
56  XRefTable::iterator it = m_aXRefTable.find( nObject );
57  if( it == m_aXRefTable.end() )
58  {
59  // new entry
60  m_aXRefTable[ nObject ] = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
61  return true;
62  }
63  // update old entry, if generation number is higher
64  if( it->second.first < nGeneration )
65  {
66  it->second = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
67  return true;
68  }
69  return false;
70  }
71 
72  explicit EmitImplData( const PDFContainer* pTopContainer ) :
73  m_pObjectContainer( pTopContainer ),
74  m_nDecryptObject( 0 ),
75  m_nDecryptGeneration( 0 )
76  {}
77  void decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
78  unsigned int nObject, unsigned int nGeneration ) const
79  {
80  const PDFFile* pFile = dynamic_cast<const PDFFile*>(m_pObjectContainer);
81  pFile && pFile->decrypt( pInBuffer, nLen, pOutBuffer, nObject, nGeneration );
82  }
83 
84  void setDecryptObject( unsigned int nObject, unsigned int nGeneration )
85  {
86  m_nDecryptObject = nObject;
87  m_nDecryptGeneration = nGeneration;
88  }
89 };
90 
91 }
92 
93 using namespace pdfparse;
94 
96  m_bDeflate( false ),
97  m_bDecrypt( false )
98 {
99  if( pTop )
100  m_pImplData.reset( new EmitImplData( pTop ) );
101 }
102 
104 {
105 }
106 
108 {
109 }
110 
112 {
113  return rContext.m_pImplData.get();
114 }
115 
116 void PDFEntry::setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData )
117 {
118  if( rContext.m_pImplData && rContext.m_pImplData.get() != pNewEmitData )
119  rContext.m_pImplData.reset();
120  rContext.m_pImplData.reset( pNewEmitData );
121 }
122 
124 {
125 }
126 
128 {
129 }
130 
131 bool PDFComment::emit( EmitContext& rWriteContext ) const
132 {
133  return rWriteContext.write( m_aComment.getStr(), m_aComment.getLength() );
134 }
135 
137 {
138  return new PDFComment( m_aComment );
139 }
140 
142 {
143 }
144 
145 bool PDFName::emit( EmitContext& rWriteContext ) const
146 {
147  if( ! rWriteContext.write( " /", 2 ) )
148  return false;
149  return rWriteContext.write( m_aName.getStr(), m_aName.getLength() );
150 }
151 
153 {
154  return new PDFName( m_aName );
155 }
156 
157 OUString PDFName::getFilteredName() const
158 {
159  OStringBuffer aFilter( m_aName.getLength() );
160  const char* pStr = m_aName.getStr();
161  unsigned int nLen = m_aName.getLength();
162  for( unsigned int i = 0; i < nLen; i++ )
163  {
164  if( (i < nLen - 3) && pStr[i] == '#' )
165  {
166  char rResult = 0;
167  i++;
168  if( pStr[i] >= '0' && pStr[i] <= '9' )
169  rResult = char( pStr[i]-'0' ) << 4;
170  else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
171  rResult = char( pStr[i]-'a' + 10 ) << 4;
172  else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
173  rResult = char( pStr[i]-'A' + 10 ) << 4;
174  i++;
175  if( pStr[i] >= '0' && pStr[i] <= '9' )
176  rResult |= char( pStr[i]-'0' );
177  else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
178  rResult |= char( pStr[i]-'a' + 10 );
179  else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
180  rResult |= char( pStr[i]-'A' + 10 );
181  aFilter.append( rResult );
182  }
183  else
184  aFilter.append( pStr[i] );
185  }
186  return OStringToOUString( aFilter.makeStringAndClear(), RTL_TEXTENCODING_UTF8 );
187 }
188 
190 {
191 }
192 
193 bool PDFString::emit( EmitContext& rWriteContext ) const
194 {
195  if( ! rWriteContext.write( " ", 1 ) )
196  return false;
197  EmitImplData* pEData = getEmitData( rWriteContext );
198  if( rWriteContext.m_bDecrypt && pEData && pEData->m_nDecryptObject )
199  {
200  OString aFiltered( getFilteredString() );
201  // decrypt inplace (evil since OString is supposed to be const
202  // however in this case we know that getFilteredString returned a singular string instance
203  pEData->decrypt( reinterpret_cast<sal_uInt8 const *>(aFiltered.getStr()), aFiltered.getLength(),
204  reinterpret_cast<sal_uInt8 *>(const_cast<char *>(aFiltered.getStr())),
205  pEData->m_nDecryptObject, pEData->m_nDecryptGeneration );
206  // check for string or hex string
207  const char* pStr = aFiltered.getStr();
208  if( aFiltered.getLength() > 1 &&
209  ( (static_cast<unsigned char>(pStr[0]) == 0xff && static_cast<unsigned char>(pStr[1]) == 0xfe) ||
210  (static_cast<unsigned char>(pStr[0]) == 0xfe && static_cast<unsigned char>(pStr[1]) == 0xff) ) )
211  {
212  static const char pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
213  '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
214  if( ! rWriteContext.write( "<", 1 ) )
215  return false;
216  for( sal_Int32 i = 0; i < aFiltered.getLength(); i++ )
217  {
218  if( ! rWriteContext.write( pHexTab + ((sal_uInt32(pStr[i]) >> 4) & 0x0f), 1 ) )
219  return false;
220  if( ! rWriteContext.write( pHexTab + (sal_uInt32(pStr[i]) & 0x0f), 1 ) )
221  return false;
222  }
223  if( ! rWriteContext.write( ">", 1 ) )
224  return false;
225  }
226  else
227  {
228  if( ! rWriteContext.write( "(", 1 ) )
229  return false;
230  if( ! rWriteContext.write( aFiltered.getStr(), aFiltered.getLength() ) )
231  return false;
232  if( ! rWriteContext.write( ")", 1 ) )
233  return false;
234  }
235  return true;
236  }
237  return rWriteContext.write( m_aString.getStr(), m_aString.getLength() );
238 }
239 
241 {
242  return new PDFString( m_aString );
243 }
244 
246 {
247  int nLen = m_aString.getLength();
248  OStringBuffer aBuf( nLen );
249 
250  const char* pStr = m_aString.getStr();
251  if( *pStr == '(' )
252  {
253  const char* pRun = pStr+1;
254  while( pRun - pStr < nLen-1 )
255  {
256  if( *pRun == '\\' )
257  {
258  pRun++;
259  if( pRun - pStr < nLen )
260  {
261  char aEsc = 0;
262  if( *pRun == 'n' )
263  aEsc = '\n';
264  else if( *pRun == 'r' )
265  aEsc = '\r';
266  else if( *pRun == 't' )
267  aEsc = '\t';
268  else if( *pRun == 'b' )
269  aEsc = '\b';
270  else if( *pRun == 'f' )
271  aEsc = '\f';
272  else if( *pRun == '(' )
273  aEsc = '(';
274  else if( *pRun == ')' )
275  aEsc = ')';
276  else if( *pRun == '\\' )
277  aEsc = '\\';
278  else if( *pRun == '\n' )
279  {
280  pRun++;
281  continue;
282  }
283  else if( *pRun == '\r' )
284  {
285  pRun++;
286  if( *pRun == '\n' )
287  pRun++;
288  continue;
289  }
290  else
291  {
292  int i = 0;
293  while( i++ < 3 && *pRun >= '0' && *pRun <= '7' )
294  aEsc = 8*aEsc + (*pRun++ - '0');
295  // move pointer back to last character of octal sequence
296  pRun--;
297  }
298  aBuf.append( aEsc );
299  }
300  }
301  else
302  aBuf.append( *pRun );
303  // move pointer to next character
304  pRun++;
305  }
306  }
307  else if( *pStr == '<' )
308  {
309  const char* pRun = pStr+1;
310  while( *pRun != '>' && pRun - pStr < nLen )
311  {
312  char rResult = 0;
313  if( *pRun >= '0' && *pRun <= '9' )
314  rResult = char( ( *pRun-'0' ) << 4 );
315  else if( *pRun >= 'a' && *pRun <= 'f' )
316  rResult = char( ( *pRun-'a' + 10 ) << 4 );
317  else if( *pRun >= 'A' && *pRun <= 'F' )
318  rResult = char( ( *pRun-'A' + 10 ) << 4 );
319  pRun++;
320  if( *pRun != '>' && pRun - pStr < nLen )
321  {
322  if( *pRun >= '0' && *pRun <= '9' )
323  rResult |= char( *pRun-'0' );
324  else if( *pRun >= 'a' && *pRun <= 'f' )
325  rResult |= char( *pRun-'a' + 10 );
326  else if( *pRun >= 'A' && *pRun <= 'F' )
327  rResult |= char( *pRun-'A' + 10 );
328  }
329  pRun++;
330  aBuf.append( rResult );
331  }
332  }
333 
334  return aBuf.makeStringAndClear();
335 }
336 
338 {
339 }
340 
341 bool PDFNumber::emit( EmitContext& rWriteContext ) const
342 {
343  OStringBuffer aBuf( 32 );
344  aBuf.append( ' ' );
345 
346  double fValue = m_fValue;
347  bool bNeg = false;
348  int nPrecision = 5;
349  if( fValue < 0.0 )
350  {
351  bNeg = true;
352  fValue=-fValue;
353  }
354 
355  sal_Int64 nInt = static_cast<sal_Int64>(fValue);
356  fValue -= static_cast<double>(nInt);
357  // optimizing hardware may lead to a value of 1.0 after the subtraction
358  if( fValue == 1.0 || log10( 1.0-fValue ) <= -nPrecision )
359  {
360  nInt++;
361  fValue = 0.0;
362  }
363  sal_Int64 nFrac = 0;
364  if( fValue )
365  {
366  fValue *= pow( 10.0, static_cast<double>(nPrecision) );
367  nFrac = static_cast<sal_Int64>(fValue);
368  }
369  if( bNeg && ( nInt || nFrac ) )
370  aBuf.append( '-' );
371  aBuf.append( nInt );
372  if( nFrac )
373  {
374  int i;
375  aBuf.append( '.' );
376  sal_Int64 nBound = static_cast<sal_Int64>(pow( 10.0, nPrecision - 1.0 )+0.5);
377  for ( i = 0; ( i < nPrecision ) && nFrac; i++ )
378  {
379  sal_Int64 nNumb = nFrac / nBound;
380  nFrac -= nNumb * nBound;
381  aBuf.append( nNumb );
382  nBound /= 10;
383  }
384  }
385 
386  return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
387 }
388 
390 {
391  return new PDFNumber( m_fValue );
392 }
393 
394 
396 {
397 }
398 
399 bool PDFBool::emit( EmitContext& rWriteContext ) const
400 {
401  return m_bValue ? rWriteContext.write( " true", 5 ) : rWriteContext.write( " false", 6 );
402 }
403 
405 {
406  return new PDFBool( m_bValue );
407 }
408 
410 {
411 }
412 
413 bool PDFNull::emit( EmitContext& rWriteContext ) const
414 {
415  return rWriteContext.write( " null", 5 );
416 }
417 
419 {
420  return new PDFNull();
421 }
422 
423 
425 {
426 }
427 
428 bool PDFObjectRef::emit( EmitContext& rWriteContext ) const
429 {
430  OString aBuf =
431  " " +
432  OString::number( sal_Int32( m_nNumber ) ) +
433  " " +
434  OString::number( sal_Int32( m_nGeneration ) ) +
435  " R";
436  return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
437 }
438 
440 {
441  return new PDFObjectRef( m_nNumber, m_nGeneration );
442 }
443 
445 {
446 }
447 
448 bool PDFContainer::emitSubElements( EmitContext& rWriteContext ) const
449 {
450  int nEle = m_aSubElements.size();
451  for( int i = 0; i < nEle; i++ )
452  {
453  if( rWriteContext.m_bDecrypt )
454  {
455  const PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
456  if (pName && pName->m_aName == "Encrypt")
457  {
458  i++;
459  continue;
460  }
461  }
462  if( ! m_aSubElements[i]->emit( rWriteContext ) )
463  return false;
464  }
465  return true;
466 }
467 
468 void PDFContainer::cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const
469 {
470  int nEle = m_aSubElements.size();
471  for( int i = 0; i < nEle; i++ )
472  rNewSubElements.emplace_back( m_aSubElements[i]->clone() );
473 }
474 
475 PDFObject* PDFContainer::findObject( unsigned int nNumber, unsigned int nGeneration ) const
476 {
477  unsigned int nEle = m_aSubElements.size();
478  for( unsigned int i = 0; i < nEle; i++ )
479  {
480  PDFObject* pObject = dynamic_cast<PDFObject*>(m_aSubElements[i].get());
481  if( pObject &&
482  pObject->m_nNumber == nNumber &&
483  pObject->m_nGeneration == nGeneration )
484  {
485  return pObject;
486  }
487  }
488  return nullptr;
489 }
490 
492 {
493 }
494 
495 bool PDFArray::emit( EmitContext& rWriteContext ) const
496 {
497  if( ! rWriteContext.write( "[", 1 ) )
498  return false;
499  if( ! emitSubElements( rWriteContext ) )
500  return false;
501  return rWriteContext.write( "]", 1 );
502 }
503 
505 {
506  PDFArray* pNewAr = new PDFArray();
507  cloneSubElements( pNewAr->m_aSubElements );
508  return pNewAr;
509 }
510 
512 {
513 }
514 
515 bool PDFDict::emit( EmitContext& rWriteContext ) const
516 {
517  if( ! rWriteContext.write( "<<\n", 3 ) )
518  return false;
519  if( ! emitSubElements( rWriteContext ) )
520  return false;
521  return rWriteContext.write( "\n>>\n", 4 );
522 }
523 
524 void PDFDict::insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue )
525 {
526  if( ! pValue )
527  eraseValue( rName );
528 
529  PDFEntry* pValueTmp = nullptr;
530  std::unordered_map<OString,PDFEntry*>::iterator it = m_aMap.find( rName );
531  if( it == m_aMap.end() )
532  {
533  // new name/value, pair, append it
534  m_aSubElements.emplace_back(std::make_unique<PDFName>(rName));
535  m_aSubElements.emplace_back( std::move(pValue) );
536  pValueTmp = m_aSubElements.back().get();
537  }
538  else
539  {
540  unsigned int nSub = m_aSubElements.size();
541  for( unsigned int i = 0; i < nSub; i++ )
542  if( m_aSubElements[i].get() == it->second )
543  {
544  m_aSubElements[i] = std::move(pValue);
545  pValueTmp = m_aSubElements[i].get();
546  break;
547  }
548  }
549  assert(pValueTmp);
550  m_aMap[ rName ] = pValueTmp;
551 }
552 
553 void PDFDict::eraseValue( const OString& rName )
554 {
555  unsigned int nEle = m_aSubElements.size();
556  for( unsigned int i = 0; i < nEle; i++ )
557  {
558  PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
559  if( pName && pName->m_aName == rName )
560  {
561  for( unsigned int j = i+1; j < nEle; j++ )
562  {
563  if( dynamic_cast<PDFComment*>(m_aSubElements[j].get()) == nullptr )
564  {
565  // remove and free subelements from vector
566  m_aSubElements.erase( m_aSubElements.begin()+j );
567  m_aSubElements.erase( m_aSubElements.begin()+i );
568  buildMap();
569  return;
570  }
571  }
572  }
573  }
574 }
575 
577 {
578  // clear map
579  m_aMap.clear();
580  // build map
581  unsigned int nEle = m_aSubElements.size();
582  PDFName* pName = nullptr;
583  for( unsigned int i = 0; i < nEle; i++ )
584  {
585  if( dynamic_cast<PDFComment*>(m_aSubElements[i].get()) == nullptr )
586  {
587  if( pName )
588  {
589  m_aMap[ pName->m_aName ] = m_aSubElements[i].get();
590  pName = nullptr;
591  }
592  else if( (pName = dynamic_cast<PDFName*>(m_aSubElements[i].get())) == nullptr )
593  return m_aSubElements[i].get();
594  }
595  }
596  return pName;
597 }
598 
600 {
601  PDFDict* pNewDict = new PDFDict();
602  cloneSubElements( pNewDict->m_aSubElements );
603  pNewDict->buildMap();
604  return pNewDict;
605 }
606 
608 {
609 }
610 
611 bool PDFStream::emit( EmitContext& rWriteContext ) const
612 {
613  return rWriteContext.copyOrigBytes( m_nBeginOffset, m_nEndOffset-m_nBeginOffset );
614 }
615 
617 {
618  return new PDFStream( m_nBeginOffset, m_nEndOffset, nullptr );
619 }
620 
621 unsigned int PDFStream::getDictLength( const PDFContainer* pContainer ) const
622 {
623  if( ! m_pDict )
624  return 0;
625  // find /Length entry, can either be a direct or indirect number object
626  std::unordered_map<OString,PDFEntry*>::const_iterator it =
627  m_pDict->m_aMap.find( "Length" );
628  if( it == m_pDict->m_aMap.end() )
629  return 0;
630  PDFNumber* pNum = dynamic_cast<PDFNumber*>(it->second);
631  if( ! pNum && pContainer )
632  {
633  PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(it->second);
634  if( pRef )
635  {
636  int nEle = pContainer->m_aSubElements.size();
637  for (int i = 0; i < nEle; i++)
638  {
639  PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer->m_aSubElements[i].get());
640  if( pObj &&
641  pObj->m_nNumber == pRef->m_nNumber &&
642  pObj->m_nGeneration == pRef->m_nGeneration )
643  {
644  if( pObj->m_pObject )
645  pNum = dynamic_cast<PDFNumber*>(pObj->m_pObject);
646  break;
647  }
648  }
649  }
650  }
651  return pNum ? static_cast<unsigned int>(pNum->m_fValue) : 0;
652 }
653 
655 {
656 }
657 
658 bool PDFObject::getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const
659 {
660  bool bIsDeflated = false;
661  if( m_pStream && m_pStream->m_pDict &&
663  )
664  {
665  unsigned int nOuterStreamLen = m_pStream->m_nEndOffset - m_pStream->m_nBeginOffset;
666  rpStream.reset(new char[ nOuterStreamLen ]);
667  unsigned int nRead = rContext.readOrigBytes( m_pStream->m_nBeginOffset, nOuterStreamLen, rpStream.get() );
668  if( nRead != nOuterStreamLen )
669  {
670  rpStream.reset();
671  *pBytes = 0;
672  return false;
673  }
674  // is there a filter entry ?
675  std::unordered_map<OString,PDFEntry*>::const_iterator it =
676  m_pStream->m_pDict->m_aMap.find( "Filter" );
677  if( it != m_pStream->m_pDict->m_aMap.end() )
678  {
679  PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
680  if( ! pFilter )
681  {
682  PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
683  if( pArray && ! pArray->m_aSubElements.empty() )
684  {
685  pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
686  }
687  }
688 
689  // is the (first) filter FlateDecode ?
690  if (pFilter && pFilter->m_aName == "FlateDecode")
691  {
692  bIsDeflated = true;
693  }
694  }
695  // prepare compressed data section
696  char* pStream = rpStream.get();
697  if( pStream[0] == 's' )
698  pStream += 6; // skip "stream"
699  // skip line end after "stream"
700  while( *pStream == '\r' || *pStream == '\n' )
701  pStream++;
702  // get the compressed length
703  *pBytes = m_pStream->getDictLength( pObjectContainer );
704  if( pStream != rpStream.get() )
705  memmove( rpStream.get(), pStream, *pBytes );
706  if( rContext.m_bDecrypt )
707  {
708  EmitImplData* pEData = getEmitData( rContext );
709  pEData->decrypt( reinterpret_cast<const sal_uInt8*>(rpStream.get()),
710  *pBytes,
711  reinterpret_cast<sal_uInt8*>(rpStream.get()),
712  m_nNumber,
714  ); // decrypt inplace
715  }
716  }
717  else
718  {
719  *pBytes = 0;
720  }
721  return bIsDeflated;
722 }
723 
724 static void unzipToBuffer( char* pBegin, unsigned int nLen,
725  sal_uInt8** pOutBuf, sal_uInt32* pOutLen )
726 {
727  z_stream aZStr;
728  aZStr.next_in = reinterpret_cast<Bytef *>(pBegin);
729  aZStr.avail_in = nLen;
730  aZStr.zalloc = nullptr;
731  aZStr.zfree = nullptr;
732  aZStr.opaque = nullptr;
733 
734  int err = inflateInit(&aZStr);
735 
736  const unsigned int buf_increment_size = 16384;
737 
738  if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, buf_increment_size)))
739  {
740  *pOutBuf = p;
741  aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf);
742  aZStr.avail_out = buf_increment_size;
743  *pOutLen = buf_increment_size;
744  }
745  else
746  err = Z_MEM_ERROR;
747  while( err != Z_STREAM_END && err >= Z_OK && aZStr.avail_in )
748  {
749  err = inflate( &aZStr, Z_NO_FLUSH );
750  if( aZStr.avail_out == 0 )
751  {
752  if( err != Z_STREAM_END )
753  {
754  const int nNewAlloc = *pOutLen + buf_increment_size;
755  if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, nNewAlloc)))
756  {
757  *pOutBuf = p;
758  aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf + *pOutLen);
759  aZStr.avail_out = buf_increment_size;
760  *pOutLen = nNewAlloc;
761  }
762  else
763  err = Z_MEM_ERROR;
764  }
765  }
766  }
767  if( err == Z_STREAM_END )
768  {
769  if( aZStr.avail_out > 0 )
770  *pOutLen -= aZStr.avail_out;
771  }
772  inflateEnd(&aZStr);
773  if( err < Z_OK )
774  {
775  std::free( *pOutBuf );
776  *pOutBuf = nullptr;
777  *pOutLen = 0;
778  }
779 }
780 
781 void PDFObject::writeStream( EmitContext& rWriteContext, const PDFFile* pParsedFile ) const
782 {
783  if( !m_pStream )
784  return;
785 
786  std::unique_ptr<char[]> pStream;
787  unsigned int nBytes = 0;
788  if( getDeflatedStream( pStream, &nBytes, pParsedFile, rWriteContext ) && nBytes && rWriteContext.m_bDeflate )
789  {
790  sal_uInt8* pOutBytes = nullptr;
791  sal_uInt32 nOutBytes = 0;
792  unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
793  rWriteContext.write( pOutBytes, nOutBytes );
794  std::free( pOutBytes );
795  }
796  else if( pStream && nBytes )
797  rWriteContext.write( pStream.get(), nBytes );
798 }
799 
800 bool PDFObject::emit( EmitContext& rWriteContext ) const
801 {
802  if( ! rWriteContext.write( "\n", 1 ) )
803  return false;
804 
805  EmitImplData* pEData = getEmitData( rWriteContext );
806  if( pEData )
807  pEData->insertXref( m_nNumber, m_nGeneration, rWriteContext.getCurPos() );
808 
809  OString aBuf =
810  OString::number( sal_Int32( m_nNumber ) ) +
811  " " +
812  OString::number( sal_Int32( m_nGeneration ) ) +
813  " obj\n";
814  if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
815  return false;
816 
817  if( pEData )
819  if( (rWriteContext.m_bDeflate || rWriteContext.m_bDecrypt) && pEData )
820  {
821  std::unique_ptr<char[]> pStream;
822  unsigned int nBytes = 0;
823  bool bDeflate = getDeflatedStream( pStream, &nBytes, pEData->m_pObjectContainer, rWriteContext );
824  if( pStream && nBytes )
825  {
826  // unzip the stream
827  sal_uInt8* pOutBytes = nullptr;
828  sal_uInt32 nOutBytes = 0;
829  if( bDeflate && rWriteContext.m_bDeflate )
830  unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
831  else
832  {
833  // nothing to deflate, but decryption has happened
834  pOutBytes = reinterpret_cast<sal_uInt8*>(pStream.get());
835  nOutBytes = static_cast<sal_uInt32>(nBytes);
836  }
837 
838  if( nOutBytes )
839  {
840  // clone this object
841  std::unique_ptr<PDFObject> pClone(static_cast<PDFObject*>(clone()));
842  // set length in the dictionary to new stream length
843  std::unique_ptr<PDFNumber> pNewLen(new PDFNumber( double(nOutBytes) ));
844  pClone->m_pStream->m_pDict->insertValue( "Length", std::move(pNewLen) );
845 
846  if( bDeflate && rWriteContext.m_bDeflate )
847  {
848  // delete flatedecode filter
849  std::unordered_map<OString,PDFEntry*>::const_iterator it =
850  pClone->m_pStream->m_pDict->m_aMap.find( "Filter" );
851  if( it != pClone->m_pStream->m_pDict->m_aMap.end() )
852  {
853  PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
854  if (pFilter && pFilter->m_aName == "FlateDecode")
855  pClone->m_pStream->m_pDict->eraseValue( "Filter" );
856  else
857  {
858  PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
859  if( pArray && ! pArray->m_aSubElements.empty() )
860  {
861  pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
862  if (pFilter && pFilter->m_aName == "FlateDecode")
863  {
864  pArray->m_aSubElements.erase( pArray->m_aSubElements.begin() );
865  }
866  }
867  }
868  }
869  }
870 
871  // write sub elements except stream
872  bool bRet = true;
873  unsigned int nEle = pClone->m_aSubElements.size();
874  for( unsigned int i = 0; i < nEle && bRet; i++ )
875  {
876  if( pClone->m_aSubElements[i].get() != pClone->m_pStream )
877  bRet = pClone->m_aSubElements[i]->emit( rWriteContext );
878  }
879  pClone.reset();
880  // write stream
881  if( bRet )
882  bRet = rWriteContext.write("stream\n", 7)
883  && rWriteContext.write(pOutBytes, nOutBytes)
884  && rWriteContext.write("\nendstream\nendobj\n", 18);
885  if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
886  std::free( pOutBytes );
887  pEData->setDecryptObject( 0, 0 );
888  return bRet;
889  }
890  if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
891  std::free( pOutBytes );
892  }
893  }
894 
895  bool bRet = emitSubElements( rWriteContext ) &&
896  rWriteContext.write( "\nendobj\n", 8 );
897  if( pEData )
898  pEData->setDecryptObject( 0, 0 );
899  return bRet;
900 }
901 
903 {
904  PDFObject* pNewOb = new PDFObject( m_nNumber, m_nGeneration );
905  cloneSubElements( pNewOb->m_aSubElements );
906  unsigned int nEle = m_aSubElements.size();
907  for( unsigned int i = 0; i < nEle; i++ )
908  {
909  if( m_aSubElements[i].get() == m_pObject )
910  pNewOb->m_pObject = pNewOb->m_aSubElements[i].get();
911  else if( m_aSubElements[i].get() == m_pStream && pNewOb->m_pObject )
912  {
913  pNewOb->m_pStream = dynamic_cast<PDFStream*>(pNewOb->m_aSubElements[i].get());
914  PDFDict* pNewDict = dynamic_cast<PDFDict*>(pNewOb->m_pObject);
915  if (pNewDict && pNewOb->m_pStream)
916  pNewOb->m_pStream->m_pDict = pNewDict;
917  }
918  }
919  return pNewOb;
920 }
921 
923 {
924 }
925 
926 bool PDFTrailer::emit( EmitContext& rWriteContext ) const
927 {
928  // get xref offset
929  unsigned int nXRefPos = rWriteContext.getCurPos();
930  // begin xref section, object 0 is always free
931  if( ! rWriteContext.write( "xref\r\n"
932  "0 1\r\n"
933  "0000000000 65535 f\r\n", 31 ) )
934  return false;
935  // check if we are emitting a complete PDF file
936  EmitImplData* pEData = getEmitData( rWriteContext );
937  if( pEData )
938  {
939  // emit object xrefs
940  const EmitImplData::XRefTable& rXRefs = pEData->m_aXRefTable;
941  EmitImplData::XRefTable::const_iterator section_begin, section_end;
942  section_begin = rXRefs.begin();
943  while( section_begin != rXRefs.end() )
944  {
945  // find end of continuous object numbers
946  section_end = section_begin;
947  unsigned int nLast = section_begin->first;
948  while( (++section_end) != rXRefs.end() &&
949  section_end->first == nLast+1 )
950  nLast = section_end->first;
951  // write first object number and number of following entries
952  OStringBuffer aBuf( 21 );
953  aBuf.append( sal_Int32( section_begin->first ) );
954  aBuf.append( ' ' );
955  aBuf.append( sal_Int32(nLast - section_begin->first + 1) );
956  aBuf.append( "\r\n" );
957  if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
958  return false;
959  while( section_begin != section_end )
960  {
961  // write 20 char entry of form
962  // 0000offset 00gen n\r\n
963  aBuf.setLength( 0 );
964  OString aOffset( OString::number( section_begin->second.second ) );
965  int nPad = 10 - aOffset.getLength();
966  for( int i = 0; i < nPad; i++ )
967  aBuf.append( '0' );
968  aBuf.append( aOffset );
969  aBuf.append( ' ' );
970  OString aGeneration( OString::number( section_begin->second.first ) );
971  nPad = 5 - aGeneration.getLength();
972  for( int i = 0; i < nPad; i++ )
973  aBuf.append( '0' );
974  aBuf.append( aGeneration );
975  aBuf.append( " n\r\n" );
976  if( ! rWriteContext.write( aBuf.getStr(), 20 ) )
977  return false;
978  ++section_begin;
979  }
980  }
981  }
982  if( ! rWriteContext.write( "trailer\n", 8 ) )
983  return false;
984  if( ! emitSubElements( rWriteContext ) )
985  return false;
986  if( ! rWriteContext.write( "startxref\n", 10 ) )
987  return false;
988  OString aOffset( OString::number( nXRefPos ) );
989  if( ! rWriteContext.write( aOffset.getStr(), aOffset.getLength() ) )
990  return false;
991  return rWriteContext.write( "\n%%EOF\n", 7 );
992 }
993 
995 {
996  PDFTrailer* pNewTr = new PDFTrailer();
997  cloneSubElements( pNewTr->m_aSubElements );
998  unsigned int nEle = m_aSubElements.size();
999  for( unsigned int i = 0; i < nEle; i++ )
1000  {
1001  if( m_aSubElements[i].get() == m_pDict )
1002  {
1003  pNewTr->m_pDict = dynamic_cast<PDFDict*>(pNewTr->m_aSubElements[i].get());
1004  break;
1005  }
1006  }
1007  return pNewTr;
1008 }
1009 
1010 #define ENCRYPTION_KEY_LEN 16
1011 #define ENCRYPTION_BUF_LEN 32
1012 
1013 namespace pdfparse {
1015 {
1018  sal_uInt32 m_nAlgoVersion;
1020  sal_uInt32 m_nKeyLength;
1023  sal_uInt32 m_nPEntry;
1024  OString m_aDocID;
1025  rtlCipher m_aCipher;
1026 
1027  sal_uInt8 m_aDecryptionKey[ENCRYPTION_KEY_LEN+5] = {}; // maximum handled key length
1028 
1030  m_bIsEncrypted( false ),
1031  m_bStandardHandler( false ),
1032  m_nAlgoVersion( 0 ),
1033  m_nStandardRevision( 0 ),
1034  m_nKeyLength( 0 ),
1035  m_nPEntry( 0 ),
1036  m_aCipher( nullptr )
1037  {
1038  }
1039 
1041  {
1042  if( m_aCipher )
1043  rtl_cipher_destroyARCFOUR( m_aCipher );
1044  }
1045 };
1046 }
1047 
1049  : PDFContainer(), m_nMajor( 0 ), m_nMinor( 0 )
1050 {
1051 }
1052 
1054 {
1055 }
1056 
1058 {
1059  return impl_getData()->m_bIsEncrypted;
1060 }
1061 
1062 bool PDFFile::decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
1063  unsigned int nObject, unsigned int nGeneration ) const
1064 {
1065  if( ! isEncrypted() )
1066  return false;
1067 
1068  if( ! m_pData->m_aCipher )
1069  m_pData->m_aCipher = rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream );
1070 
1071  // modify encryption key
1072  sal_uInt32 i = m_pData->m_nKeyLength;
1073  m_pData->m_aDecryptionKey[i++] = sal_uInt8(nObject&0xff);
1074  m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>8)&0xff);
1075  m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>16)&0xff);
1076  m_pData->m_aDecryptionKey[i++] = sal_uInt8(nGeneration&0xff);
1077  m_pData->m_aDecryptionKey[i++] = sal_uInt8((nGeneration>>8)&0xff);
1078 
1079  ::std::vector<unsigned char> const aSum(::comphelper::Hash::calculateHash(
1080  m_pData->m_aDecryptionKey, i, ::comphelper::HashType::MD5));
1081 
1082  if( i > 16 )
1083  i = 16;
1084 
1085  rtlCipherError aErr = rtl_cipher_initARCFOUR( m_pData->m_aCipher,
1086  rtl_Cipher_DirectionDecode,
1087  aSum.data(), i,
1088  nullptr, 0 );
1089  if( aErr == rtl_Cipher_E_None )
1090  aErr = rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1091  pInBuffer, nLen,
1092  pOutBuffer, nLen );
1093  return aErr == rtl_Cipher_E_None;
1094 }
1095 
1097 {
1098  0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
1099  0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
1100 };
1101 
1102 static void pad_or_truncate_to_32( const OString& rStr, char* pBuffer )
1103 {
1104  int nLen = rStr.getLength();
1105  if( nLen > 32 )
1106  nLen = 32;
1107  const char* pStr = rStr.getStr();
1108  memcpy( pBuffer, pStr, nLen );
1109  int i = 0;
1110  while( nLen < 32 )
1111  pBuffer[nLen++] = nPadString[i++];
1112 }
1113 
1114 // pass at least pData->m_nKeyLength bytes in
1115 static sal_uInt32 password_to_key( const OString& rPwd, sal_uInt8* pOutKey, PDFFileImplData const * pData, bool bComputeO )
1116 {
1117  // see PDF reference 1.4 Algorithm 3.2
1118  // encrypt pad string
1119  char aPadPwd[ENCRYPTION_BUF_LEN];
1120  pad_or_truncate_to_32( rPwd, aPadPwd );
1122  aDigest.update(reinterpret_cast<unsigned char const*>(aPadPwd), sizeof(aPadPwd));
1123  if( ! bComputeO )
1124  {
1125  aDigest.update(pData->m_aOEntry, 32);
1126  sal_uInt8 aPEntry[4];
1127  aPEntry[0] = static_cast<sal_uInt8>(pData->m_nPEntry & 0xff);
1128  aPEntry[1] = static_cast<sal_uInt8>((pData->m_nPEntry >> 8 ) & 0xff);
1129  aPEntry[2] = static_cast<sal_uInt8>((pData->m_nPEntry >> 16) & 0xff);
1130  aPEntry[3] = static_cast<sal_uInt8>((pData->m_nPEntry >> 24) & 0xff);
1131  aDigest.update(aPEntry, sizeof(aPEntry));
1132  aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1133  }
1134  ::std::vector<unsigned char> nSum(aDigest.finalize());
1135  if( pData->m_nStandardRevision == 3 )
1136  {
1137  for( int i = 0; i < 50; i++ )
1138  {
1139  nSum = ::comphelper::Hash::calculateHash(nSum.data(), nSum.size(),
1140  ::comphelper::HashType::MD5);
1141  }
1142  }
1143  sal_uInt32 nLen = pData->m_nKeyLength;
1144  if( nLen > RTL_DIGEST_LENGTH_MD5 )
1145  nLen = RTL_DIGEST_LENGTH_MD5;
1146  memcpy( pOutKey, nSum.data(), nLen );
1147  return nLen;
1148 }
1149 
1150 static bool check_user_password( const OString& rPwd, PDFFileImplData* pData )
1151 {
1152  // see PDF reference 1.4 Algorithm 3.6
1153  bool bValid = false;
1155  sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, pData, false );
1156  // save (at this time potential) decryption key for later use
1157  memcpy( pData->m_aDecryptionKey, aKey, nKeyLen );
1158  if( pData->m_nStandardRevision == 2 )
1159  {
1160  sal_uInt8 nEncryptedEntry[ENCRYPTION_BUF_LEN] = {};
1161  // see PDF reference 1.4 Algorithm 3.4
1162  // encrypt pad string
1163  if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1164  aKey, nKeyLen,
1165  nullptr, 0 )
1166  != rtl_Cipher_E_None)
1167  {
1168  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1169  }
1170  rtl_cipher_encodeARCFOUR( pData->m_aCipher, nPadString, sizeof( nPadString ),
1171  nEncryptedEntry, sizeof( nEncryptedEntry ) );
1172  bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 32 ) == 0);
1173  }
1174  else if( pData->m_nStandardRevision == 3 )
1175  {
1176  // see PDF reference 1.4 Algorithm 3.5
1178  aDigest.update(nPadString, sizeof(nPadString));
1179  aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1180  ::std::vector<unsigned char> nEncryptedEntry(aDigest.finalize());
1181  if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1182  aKey, sizeof(aKey), nullptr, 0 )
1183  != rtl_Cipher_E_None)
1184  {
1185  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1186  }
1187  rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1188  nEncryptedEntry.data(), 16,
1189  nEncryptedEntry.data(), 16 ); // encrypt in place
1190  for( int i = 1; i <= 19; i++ ) // do it 19 times, start with 1
1191  {
1192  sal_uInt8 aTempKey[ENCRYPTION_KEY_LEN];
1193  for( size_t j = 0; j < sizeof(aTempKey); j++ )
1194  aTempKey[j] = static_cast<sal_uInt8>( aKey[j] ^ i );
1195 
1196  if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1197  aTempKey, sizeof(aTempKey), nullptr, 0 )
1198  != rtl_Cipher_E_None)
1199  {
1200  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1201  }
1202  rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1203  nEncryptedEntry.data(), 16,
1204  nEncryptedEntry.data(), 16 ); // encrypt in place
1205  }
1206  bValid = (memcmp( nEncryptedEntry.data(), pData->m_aUEntry, 16 ) == 0);
1207  }
1208  return bValid;
1209 }
1210 
1212 {
1213  return m_pData->m_bStandardHandler &&
1214  m_pData->m_nAlgoVersion >= 1 &&
1215  m_pData->m_nAlgoVersion <= 2 &&
1216  m_pData->m_nStandardRevision >= 2 &&
1217  m_pData->m_nStandardRevision <= 3;
1218 }
1219 
1220 bool PDFFile::setupDecryptionData( const OString& rPwd ) const
1221 {
1222  if( !impl_getData()->m_bIsEncrypted )
1223  return rPwd.isEmpty();
1224 
1225  // check if we can handle this encryption at all
1227  return false;
1228 
1229  if( ! m_pData->m_aCipher )
1230  m_pData->m_aCipher = rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream);
1231 
1232  // first try user password
1233  bool bValid = check_user_password( rPwd, m_pData.get() );
1234 
1235  if( ! bValid )
1236  {
1237  // try owner password
1238  // see PDF reference 1.4 Algorithm 3.7
1240  sal_uInt8 nPwd[ENCRYPTION_BUF_LEN] = {};
1241  sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, m_pData.get(), true );
1242  if( m_pData->m_nStandardRevision == 2 )
1243  {
1244  if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1245  aKey, nKeyLen, nullptr, 0 )
1246  != rtl_Cipher_E_None)
1247  {
1248  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1249  }
1250  rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1251  m_pData->m_aOEntry, 32,
1252  nPwd, 32 );
1253  }
1254  else if( m_pData->m_nStandardRevision == 3 )
1255  {
1256  memcpy( nPwd, m_pData->m_aOEntry, 32 );
1257  for( int i = 19; i >= 0; i-- )
1258  {
1259  sal_uInt8 nTempKey[ENCRYPTION_KEY_LEN];
1260  for( size_t j = 0; j < sizeof(nTempKey); j++ )
1261  nTempKey[j] = sal_uInt8(aKey[j] ^ i);
1262  if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1263  nTempKey, nKeyLen, nullptr, 0 )
1264  != rtl_Cipher_E_None)
1265  {
1266  return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1267  }
1268  rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1269  nPwd, 32,
1270  nPwd, 32 ); // decrypt inplace
1271  }
1272  }
1273  bValid = check_user_password( OString( reinterpret_cast<char*>(nPwd), 32 ), m_pData.get() );
1274  }
1275 
1276  return bValid;
1277 }
1278 
1280 {
1281  if( m_pData )
1282  return m_pData.get();
1283  m_pData.reset( new PDFFileImplData );
1284  // check for encryption dict in a trailer
1285  unsigned int nElements = m_aSubElements.size();
1286  while( nElements-- > 0 )
1287  {
1288  PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(m_aSubElements[nElements].get());
1289  if( pTrailer && pTrailer->m_pDict )
1290  {
1291  // search doc id
1292  PDFDict::Map::iterator doc_id = pTrailer->m_pDict->m_aMap.find( "ID" );
1293  if( doc_id != pTrailer->m_pDict->m_aMap.end() )
1294  {
1295  PDFArray* pArr = dynamic_cast<PDFArray*>(doc_id->second);
1296  if( pArr && !pArr->m_aSubElements.empty() )
1297  {
1298  PDFString* pStr = dynamic_cast<PDFString*>(pArr->m_aSubElements[0].get());
1299  if( pStr )
1300  m_pData->m_aDocID = pStr->getFilteredString();
1301 #if OSL_DEBUG_LEVEL > 0
1302  OUStringBuffer aTmp;
1303  for( int i = 0; i < m_pData->m_aDocID.getLength(); i++ )
1304  aTmp.append(OUString::number(static_cast<unsigned int>(sal_uInt8(m_pData->m_aDocID[i])), 16));
1305  SAL_INFO("sdext.pdfimport.pdfparse", "DocId is <" << aTmp.makeStringAndClear() << ">");
1306 #endif
1307  }
1308  }
1309  // search Encrypt entry
1310  PDFDict::Map::iterator enc =
1311  pTrailer->m_pDict->m_aMap.find( "Encrypt" );
1312  if( enc != pTrailer->m_pDict->m_aMap.end() )
1313  {
1314  PDFDict* pDict = dynamic_cast<PDFDict*>(enc->second);
1315  if( ! pDict )
1316  {
1317  PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(enc->second);
1318  if( pRef )
1319  {
1320  PDFObject* pObj = findObject( pRef );
1321  if( pObj && pObj->m_pObject )
1322  pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
1323  }
1324  }
1325  if( pDict )
1326  {
1327  PDFDict::Map::iterator filter = pDict->m_aMap.find( "Filter" );
1328  PDFDict::Map::iterator version = pDict->m_aMap.find( "V" );
1329  PDFDict::Map::iterator len = pDict->m_aMap.find( "Length" );
1330  PDFDict::Map::iterator o_ent = pDict->m_aMap.find( "O" );
1331  PDFDict::Map::iterator u_ent = pDict->m_aMap.find( "U" );
1332  PDFDict::Map::iterator r_ent = pDict->m_aMap.find( "R" );
1333  PDFDict::Map::iterator p_ent = pDict->m_aMap.find( "P" );
1334  if( filter != pDict->m_aMap.end() )
1335  {
1336  m_pData->m_bIsEncrypted = true;
1337  m_pData->m_nKeyLength = 5;
1338  if( version != pDict->m_aMap.end() )
1339  {
1340  PDFNumber* pNum = dynamic_cast<PDFNumber*>(version->second);
1341  if( pNum )
1342  m_pData->m_nAlgoVersion = static_cast<sal_uInt32>(pNum->m_fValue);
1343  }
1344  if( m_pData->m_nAlgoVersion >= 3 )
1345  m_pData->m_nKeyLength = 16;
1346  if( len != pDict->m_aMap.end() )
1347  {
1348  PDFNumber* pNum = dynamic_cast<PDFNumber*>(len->second);
1349  if( pNum )
1350  m_pData->m_nKeyLength = static_cast<sal_uInt32>(pNum->m_fValue) / 8;
1351  }
1352  PDFName* pFilter = dynamic_cast<PDFName*>(filter->second);
1353  if( pFilter && pFilter->getFilteredName() == "Standard" )
1354  m_pData->m_bStandardHandler = true;
1355  if( o_ent != pDict->m_aMap.end() )
1356  {
1357  PDFString* pString = dynamic_cast<PDFString*>(o_ent->second);
1358  if( pString )
1359  {
1360  OString aEnt = pString->getFilteredString();
1361  if( aEnt.getLength() == 32 )
1362  memcpy( m_pData->m_aOEntry, aEnt.getStr(), 32 );
1363 #if OSL_DEBUG_LEVEL > 0
1364  else
1365  {
1366  OUStringBuffer aTmp;
1367  for( int i = 0; i < aEnt.getLength(); i++ )
1368  aTmp.append(" ").append(OUString::number(static_cast<unsigned int>(sal_uInt8(aEnt[i])), 16));
1369  SAL_WARN("sdext.pdfimport.pdfparse",
1370  "O entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1371  }
1372 #endif
1373  }
1374  }
1375  if( u_ent != pDict->m_aMap.end() )
1376  {
1377  PDFString* pString = dynamic_cast<PDFString*>(u_ent->second);
1378  if( pString )
1379  {
1380  OString aEnt = pString->getFilteredString();
1381  if( aEnt.getLength() == 32 )
1382  memcpy( m_pData->m_aUEntry, aEnt.getStr(), 32 );
1383 #if OSL_DEBUG_LEVEL > 0
1384  else
1385  {
1386  OUStringBuffer aTmp;
1387  for( int i = 0; i < aEnt.getLength(); i++ )
1388  aTmp.append(" ").append(OUString::number(static_cast<unsigned int>(sal_uInt8(aEnt[i])), 16));
1389  SAL_WARN("sdext.pdfimport.pdfparse",
1390  "U entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1391  }
1392 #endif
1393  }
1394  }
1395  if( r_ent != pDict->m_aMap.end() )
1396  {
1397  PDFNumber* pNum = dynamic_cast<PDFNumber*>(r_ent->second);
1398  if( pNum )
1399  m_pData->m_nStandardRevision = static_cast<sal_uInt32>(pNum->m_fValue);
1400  }
1401  if( p_ent != pDict->m_aMap.end() )
1402  {
1403  PDFNumber* pNum = dynamic_cast<PDFNumber*>(p_ent->second);
1404  if( pNum )
1405  m_pData->m_nPEntry = static_cast<sal_uInt32>(static_cast<sal_Int32>(pNum->m_fValue));
1406  SAL_INFO("sdext.pdfimport.pdfparse", "p entry is " << m_pData->m_nPEntry );
1407  }
1408 
1409  SAL_INFO("sdext.pdfimport.pdfparse", "Encryption dict: sec handler: " << (pFilter ? pFilter->getFilteredName() : OUString("<unknown>")) << ", version = " << static_cast<int>(m_pData->m_nAlgoVersion) << ", revision = " << static_cast<int>(m_pData->m_nStandardRevision) << ", key length = " << m_pData->m_nKeyLength );
1410  break;
1411  }
1412  }
1413  }
1414  }
1415  }
1416 
1417  return m_pData.get();
1418 }
1419 
1420 bool PDFFile::emit( EmitContext& rWriteContext ) const
1421 {
1422  setEmitData( rWriteContext, new EmitImplData( this ) );
1423 
1424  OString aBuf =
1425  "%PDF-" +
1426  OString::number( sal_Int32( m_nMajor ) ) +
1427  "." +
1428  OString::number( sal_Int32( m_nMinor ) ) +
1429  "\n";
1430  if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
1431  return false;
1432  return emitSubElements( rWriteContext );
1433 }
1434 
1436 {
1437  PDFFile* pNewFl = new PDFFile();
1438  pNewFl->m_nMajor = m_nMajor;
1439  pNewFl->m_nMinor = m_nMinor;
1440  cloneSubElements( pNewFl->m_aSubElements );
1441  return pNewFl;
1442 }
1443 
1445 {
1446 }
1447 
1448 bool PDFPart::emit( EmitContext& rWriteContext ) const
1449 {
1450  return emitSubElements( rWriteContext );
1451 }
1452 
1454 {
1455  PDFPart* pNewPt = new PDFPart();
1456  cloneSubElements( pNewPt->m_aSubElements );
1457  return pNewPt;
1458 }
1459 
1460 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:926
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:240
void setDecryptObject(unsigned int nObject, unsigned int nGeneration)
Definition: pdfentries.cxx:84
virtual ~PDFNumber() override
Definition: pdfentries.cxx:337
virtual ~PDFComment() override
Definition: pdfentries.cxx:127
#define ENCRYPTION_BUF_LEN
const sal_uInt8 nPadString[32]
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:399
virtual ~PDFFile() override
std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable
Definition: pdfentries.cxx:46
OString getFilteredString() const
Definition: pdfentries.cxx:245
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:341
PDFComment(const OString &rComment)
Definition: pdfparse.hxx:74
virtual bool write(const void *pBuf, unsigned int nLen)=0
std::unique_ptr< PDFFileImplData > m_pData
Definition: pdfparse.hxx:231
unsigned int m_nBeginOffset
Definition: pdfparse.hxx:204
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:389
virtual ~PDFPart() override
static void pad_or_truncate_to_32(const OString &rStr, char *pBuffer)
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:131
PDFObject * findObject(unsigned int nNumber, unsigned int nGeneration) const
Definition: pdfentries.cxx:475
unsigned int m_nNumber
Definition: pdfparse.hxx:138
aBuf
bool decrypt(const sal_uInt8 *pInBuffer, sal_uInt32 nLen, sal_uInt8 *pOutBuffer, unsigned int nObject, unsigned int nGeneration) const
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:413
unsigned int m_nEndOffset
Definition: pdfparse.hxx:205
PDFString(const OString &rString)
Definition: pdfparse.hxx:105
unsigned int m_nGeneration
Definition: pdfparse.hxx:139
PDFObjectRef(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:141
virtual bool emit(EmitContext &rWriteContext) const override
EmbeddedObjectRef * pObject
PDFBool(bool bVal)
Definition: pdfparse.hxx:129
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:145
virtual ~PDFValue() override
Definition: pdfentries.cxx:123
virtual ~PDFDict() override
Definition: pdfentries.cxx:511
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:515
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
Definition: pdfparse.hxx:160
PDFNumber(double fVal)
Definition: pdfparse.hxx:118
unsigned int m_nDecryptObject
Definition: pdfentries.cxx:50
bool emitSubElements(EmitContext &rWriteContext) const
Definition: pdfentries.cxx:448
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:611
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:152
const BorderLinePrimitive2D *pCandidateB assert(pCandidateA)
void eraseValue(const OString &rName)
Definition: pdfentries.cxx:553
virtual ~PDFBool() override
Definition: pdfentries.cxx:395
virtual ~PDFNull() override
Definition: pdfentries.cxx:409
sal_Int32 nElements
virtual ~PDFName() override
Definition: pdfentries.cxx:141
OUString getFilteredName() const
Definition: pdfentries.cxx:157
bool usesSupportedEncryptionFormat() const
unsigned int m_nMajor
Definition: pdfparse.hxx:234
virtual ~PDFObject() override
Definition: pdfentries.cxx:654
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:418
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:616
static EmitImplData * getEmitData(EmitContext const &rContext)
Definition: pdfentries.cxx:111
err
sal_uInt16 char * pName
PDFStream * m_pStream
Definition: pdfparse.hxx:260
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:599
void insertValue(const OString &rName, std::unique_ptr< PDFEntry > pValue)
Definition: pdfentries.cxx:524
int i
virtual ~PDFStream() override
Definition: pdfentries.cxx:607
bool getDeflatedStream(std::unique_ptr< char[]> &rpStream, unsigned int *pBytes, const PDFContainer *pObjectContainer, EmitContext &rContext) const
Definition: pdfentries.cxx:658
virtual unsigned int getCurPos()=0
virtual unsigned int readOrigBytes(unsigned int nOrigOffset, unsigned int nLen, void *pBuf)=0
bool isEncrypted() const
virtual PDFEntry * clone() const override
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:136
virtual bool copyOrigBytes(unsigned int nOrigOffset, unsigned int nLen)=0
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:439
virtual ~PDFArray() override
Definition: pdfentries.cxx:491
#define ENCRYPTION_KEY_LEN
PDFEntry * buildMap()
Definition: pdfentries.cxx:576
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:994
static bool check_user_password(const OString &rPwd, PDFFileImplData *pData)
virtual bool emit(EmitContext &rWriteContext) const override
bool insertXref(unsigned int nObject, unsigned int nGeneration, unsigned int nOffset)
Definition: pdfentries.cxx:54
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:495
PDFFileImplData * impl_getData() const
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:902
static void unzipToBuffer(char *pBegin, unsigned int nLen, sal_uInt8 **pOutBuf, sal_uInt32 *pOutLen)
Definition: pdfentries.cxx:724
virtual ~PDFString() override
Definition: pdfentries.cxx:189
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:504
static void setEmitData(EmitContext &rContext, EmitImplData *pNewEmitData)
Definition: pdfentries.cxx:116
PDFEntry * m_pObject
Definition: pdfparse.hxx:259
unsigned int m_nNumber
Definition: pdfparse.hxx:261
EmitImplData(const PDFContainer *pTopContainer)
Definition: pdfentries.cxx:72
unsigned char sal_uInt8
void update(const unsigned char *pInput, size_t length)
#define SAL_INFO(area, stream)
void cloneSubElements(std::vector< std::unique_ptr< PDFEntry >> &rNewSubElements) const
Definition: pdfentries.cxx:468
EmitContext(const PDFContainer *pTop=nullptr)
Definition: pdfentries.cxx:95
void decrypt(const sal_uInt8 *pInBuffer, sal_uInt32 nLen, sal_uInt8 *pOutBuffer, unsigned int nObject, unsigned int nGeneration) const
Definition: pdfentries.cxx:77
void * p
PDFName(const OString &rName)
Definition: pdfparse.hxx:92
virtual ~PDFContainer() override
Definition: pdfentries.cxx:444
static std::vector< unsigned char > calculateHash(const unsigned char *pInput, size_t length, HashType eType)
PDFObject(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:264
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:193
static sal_uInt32 password_to_key(const OString &rPwd, sal_uInt8 *pOutKey, PDFFileImplData const *pData, bool bComputeO)
unsigned int getDictLength(const PDFContainer *pObjectContainer) const
Definition: pdfentries.cxx:621
#define SAL_WARN(area, stream)
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:800
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:404
virtual ~PDFObjectRef() override
Definition: pdfentries.cxx:424
std::unique_ptr< EmitImplData > m_pImplData
Definition: pdfparse.hxx:54
unsigned int m_nGeneration
Definition: pdfparse.hxx:262
bool setupDecryptionData(const OString &rPwd) const
std::vector< unsigned char > finalize()
virtual ~PDFTrailer() override
Definition: pdfentries.cxx:922
virtual PDFEntry * clone() const override
PDFStream(unsigned int nBegin, unsigned int nEnd, PDFDict *pStreamDict)
Definition: pdfparse.hxx:208
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:428
const PDFContainer * m_pObjectContainer
Definition: pdfentries.cxx:49
sal_uInt8 m_aDecryptionKey[ENCRYPTION_KEY_LEN+5]
unsigned int m_nMinor
Definition: pdfparse.hxx:235
static osl::File * pStream
Definition: emitcontext.cxx:32
unsigned int m_nDecryptGeneration
Definition: pdfentries.cxx:51
void writeStream(EmitContext &rContext, const PDFFile *pPDFFile) const
Definition: pdfentries.cxx:781
virtual bool emit(EmitContext &rWriteContext) const =0