LibreOffice Module vcl (master)  1
pdfdocument.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
11 
12 #include <map>
13 #include <memory>
14 #include <vector>
15 
16 #include <com/sun/star/uno/Sequence.hxx>
17 #include <com/sun/star/security/XCertificate.hpp>
18 
20 #include <comphelper/string.hxx>
21 #include <rtl/character.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <svl/cryptosign.hxx>
27 #include <tools/zcodec.hxx>
28 #include <vcl/pdfwriter.hxx>
29 
30 using namespace com::sun::star;
31 
32 namespace vcl
33 {
34 namespace filter
35 {
36 const int MAX_SIGNATURE_CONTENT_LENGTH = 50000;
37 
38 class PDFTrailerElement;
39 
42 {
44  OString m_aComment;
45 
46 public:
47  explicit PDFCommentElement(PDFDocument& rDoc);
48  bool Read(SvStream& rStream) override;
49 };
50 
52 
55 {
57  sal_uInt64 m_nLocation = 0;
58 
59 public:
61  bool Read(SvStream& rStream) override;
62  sal_uInt64 GetLocation() const;
63 };
64 
67 {
68 public:
69  bool Read(SvStream& rStream) override;
70 };
71 
74 {
75 public:
76  bool Read(SvStream& rStream) override;
77 };
78 
81 {
83  sal_uInt64 m_nOffset = 0;
84 
85 public:
87  bool Read(SvStream& rStream) override;
88  sal_uInt64 GetOffset() const;
89 };
90 
93 {
94 public:
95  explicit PDFBooleanElement(bool bValue);
96  bool Read(SvStream& rStream) override;
97 };
98 
101 {
102 public:
103  bool Read(SvStream& rStream) override;
104 };
105 
108 {
110  std::map<OString, PDFElement*> m_aDictionary;
112  sal_uInt64 m_nOffset = 0;
113 
114 public:
115  explicit PDFTrailerElement(PDFDocument& rDoc);
116  bool Read(SvStream& rStream) override;
117  PDFElement* Lookup(const OString& rDictionaryKey);
118  sal_uInt64 GetLocation() const;
119 };
120 
121 XRefEntry::XRefEntry() = default;
122 
123 PDFDocument::PDFDocument() = default;
124 
125 bool PDFDocument::RemoveSignature(size_t nPosition)
126 {
127  std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
128  if (nPosition >= aSignatures.size())
129  {
130  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
131  return false;
132  }
133 
134  if (aSignatures.size() != m_aEOFs.size() - 1)
135  {
136  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
137  "and incremental updates");
138  return false;
139  }
140 
141  // The EOF offset is the end of the original file, without the signature at
142  // nPosition.
143  m_aEditBuffer.Seek(m_aEOFs[nPosition]);
144  // Drop all bytes after the current position.
145  m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
146 
147  return m_aEditBuffer.good();
148 }
149 
150 sal_uInt32 PDFDocument::GetNextSignature()
151 {
152  sal_uInt32 nRet = 0;
153  for (const auto& pSignature : GetSignatureWidgets())
154  {
155  auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
156  if (!pT)
157  continue;
158 
159  const OString& rValue = pT->GetValue();
160  const OString aPrefix = "Signature";
161  if (!rValue.startsWith(aPrefix))
162  continue;
163 
164  nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32());
165  }
166 
167  return nRet + 1;
168 }
169 
170 sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
171  sal_uInt64& rLastByteRangeOffset,
172  sal_Int64& rContentOffset)
173 {
174  // Write signature object.
175  sal_Int32 nSignatureId = m_aXRef.size();
176  XRefEntry aSignatureEntry;
177  aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
178  aSignatureEntry.SetDirty(true);
179  m_aXRef[nSignatureId] = aSignatureEntry;
180  OStringBuffer aSigBuffer;
181  aSigBuffer.append(nSignatureId);
182  aSigBuffer.append(" 0 obj\n");
183  aSigBuffer.append("<</Contents <");
184  rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
185  // Reserve space for the PKCS#7 object.
186  OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
187  comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0');
188  aSigBuffer.append(aContentFiller.makeStringAndClear());
189  aSigBuffer.append(">\n/Type/Sig/SubFilter");
190  if (bAdES)
191  aSigBuffer.append("/ETSI.CAdES.detached");
192  else
193  aSigBuffer.append("/adbe.pkcs7.detached");
194 
195  // Time of signing.
196  aSigBuffer.append(" /M (");
197  aSigBuffer.append(vcl::PDFWriter::GetDateTime());
198  aSigBuffer.append(")");
199 
200  // Byte range: we can write offset1-length1 and offset2 right now, will
201  // write length2 later.
202  aSigBuffer.append(" /ByteRange [ 0 ");
203  // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
204  aSigBuffer.append(rContentOffset - 1);
205  aSigBuffer.append(" ");
206  aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
207  aSigBuffer.append(" ");
208  rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
209  // We don't know how many bytes we need for the last ByteRange value, this
210  // should be enough.
211  OStringBuffer aByteRangeFiller;
212  comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
213  aSigBuffer.append(aByteRangeFiller.makeStringAndClear());
214  // Finish the Sig obj.
215  aSigBuffer.append(" /Filter/Adobe.PPKMS");
216 
217  if (!rDescription.isEmpty())
218  {
219  aSigBuffer.append("/Reason<");
220  vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
221  aSigBuffer.append(">");
222  }
223 
224  aSigBuffer.append(" >>\nendobj\n\n");
225  m_aEditBuffer.WriteOString(aSigBuffer.toString());
226 
227  return nSignatureId;
228 }
229 
230 sal_Int32 PDFDocument::WriteAppearanceObject()
231 {
232  // Write appearance object.
233  sal_Int32 nAppearanceId = m_aXRef.size();
234  XRefEntry aAppearanceEntry;
235  aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
236  aAppearanceEntry.SetDirty(true);
237  m_aXRef[nAppearanceId] = aAppearanceEntry;
238  m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
239  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
240  m_aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
241  m_aEditBuffer.WriteCharPtr("/BBox[0 0 0 0]\n/Length 0\n>>\n");
242  m_aEditBuffer.WriteCharPtr("stream\n\nendstream\nendobj\n\n");
243 
244  return nAppearanceId;
245 }
246 
247 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
248  sal_Int32 nAppearanceId)
249 {
250  // Decide what identifier to use for the new signature.
251  sal_uInt32 nNextSignature = GetNextSignature();
252 
253  // Write the Annot object, references nSignatureId and nAppearanceId.
254  sal_Int32 nAnnotId = m_aXRef.size();
255  XRefEntry aAnnotEntry;
256  aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
257  aAnnotEntry.SetDirty(true);
258  m_aXRef[nAnnotId] = aAnnotEntry;
259  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
260  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
261  m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
262  m_aEditBuffer.WriteCharPtr("/Rect[0 0 0 0]\n");
263  m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
264  m_aEditBuffer.WriteCharPtr("/P ");
265  m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue());
266  m_aEditBuffer.WriteCharPtr(" 0 R\n");
267  m_aEditBuffer.WriteCharPtr("/T(Signature");
268  m_aEditBuffer.WriteUInt32AsString(nNextSignature);
269  m_aEditBuffer.WriteCharPtr(")\n");
270  m_aEditBuffer.WriteCharPtr("/V ");
271  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
272  m_aEditBuffer.WriteCharPtr(" 0 R\n");
273  m_aEditBuffer.WriteCharPtr("/DV ");
274  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
275  m_aEditBuffer.WriteCharPtr(" 0 R\n");
276  m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
277  m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
278  m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
279  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
280 
281  return nAnnotId;
282 }
283 
284 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
285 {
286  PDFElement* pAnnots = rFirstPage.Lookup("Annots");
287  auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
288  if (pAnnotsReference)
289  {
290  // Write the updated Annots key of the Page object.
291  PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
292  if (!pAnnotsObject)
293  {
294  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
295  return false;
296  }
297 
298  sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
299  m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
300  m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
301  m_aXRef[nAnnotsId].SetDirty(true);
302  m_aEditBuffer.WriteUInt32AsString(nAnnotsId);
303  m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
304 
305  // Write existing references.
306  PDFArrayElement* pArray = pAnnotsObject->GetArray();
307  if (!pArray)
308  {
309  SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
310  return false;
311  }
312 
313  for (size_t i = 0; i < pArray->GetElements().size(); ++i)
314  {
315  auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
316  if (!pReference)
317  continue;
318 
319  if (i)
320  m_aEditBuffer.WriteCharPtr(" ");
321  m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
322  m_aEditBuffer.WriteCharPtr(" 0 R");
323  }
324  // Write our reference.
325  m_aEditBuffer.WriteCharPtr(" ");
326  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
327  m_aEditBuffer.WriteCharPtr(" 0 R");
328 
329  m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
330  }
331  else
332  {
333  // Write the updated first page object, references nAnnotId.
334  sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
335  if (nFirstPageId >= m_aXRef.size())
336  {
337  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
338  return false;
339  }
340  m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
341  m_aXRef[nFirstPageId].SetDirty(true);
342  m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
343  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
344  m_aEditBuffer.WriteCharPtr("<<");
345  auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
346  if (!pAnnotsArray)
347  {
348  // No Annots key, just write the key with a single reference.
349  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
350  + rFirstPage.GetDictionaryOffset(),
351  rFirstPage.GetDictionaryLength());
352  m_aEditBuffer.WriteCharPtr("/Annots[");
353  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
354  m_aEditBuffer.WriteCharPtr(" 0 R]");
355  }
356  else
357  {
358  // Annots key is already there, insert our reference at the end.
359  PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
360 
361  // Offset right before the end of the Annots array.
362  sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
363  + pDictionary->GetKeyValueLength("Annots") - 1;
364  // Length of beginning of the dictionary -> Annots end.
365  sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
366  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
367  + rFirstPage.GetDictionaryOffset(),
368  nAnnotsBeforeEndLength);
369  m_aEditBuffer.WriteCharPtr(" ");
370  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
371  m_aEditBuffer.WriteCharPtr(" 0 R");
372  // Length of Annots end -> end of the dictionary.
373  sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
374  + rFirstPage.GetDictionaryLength()
375  - nAnnotsEndOffset;
376  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
377  + nAnnotsEndOffset,
378  nAnnotsAfterEndLength);
379  }
380  m_aEditBuffer.WriteCharPtr(">>");
381  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
382  }
383 
384  return true;
385 }
386 
387 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
388 {
389  if (m_pXRefStream)
390  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
391  else
392  {
393  if (!m_pTrailer)
394  {
395  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
396  return false;
397  }
398  pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
399  }
400  if (!pRoot)
401  {
402  SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
403  return false;
404  }
405  PDFObjectElement* pCatalog = pRoot->LookupObject();
406  if (!pCatalog)
407  {
408  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
409  return false;
410  }
411  sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
412  if (nCatalogId >= m_aXRef.size())
413  {
414  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
415  return false;
416  }
417  PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
418  auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
419  if (pAcroFormReference)
420  {
421  // Write the updated AcroForm key of the Catalog object.
422  PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
423  if (!pAcroFormObject)
424  {
425  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
426  return false;
427  }
428 
429  sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
430  m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
431  m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
432  m_aXRef[nAcroFormId].SetDirty(true);
433  m_aEditBuffer.WriteUInt32AsString(nAcroFormId);
434  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
435 
436  // If this is nullptr, then the AcroForm object is not in an object stream.
437  SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
438 
439  if (!pAcroFormObject->Lookup("Fields"))
440  {
441  SAL_WARN("vcl.filter",
442  "PDFDocument::Sign: AcroForm object without required Fields key");
443  return false;
444  }
445 
446  PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
447  if (!pAcroFormDictionary)
448  {
449  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
450  return false;
451  }
452 
453  // Offset right before the end of the Fields array.
454  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
455  + pAcroFormDictionary->GetKeyValueLength("Fields")
456  - strlen("]");
457  // Length of beginning of the object dictionary -> Fields end.
458  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
459  if (pStreamBuffer)
460  m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
461  else
462  {
463  nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
464  m_aEditBuffer.WriteCharPtr("<<");
465  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
466  + pAcroFormObject->GetDictionaryOffset(),
467  nFieldsBeforeEndLength);
468  }
469 
470  // Append our reference at the end of the Fields array.
471  m_aEditBuffer.WriteCharPtr(" ");
472  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
473  m_aEditBuffer.WriteCharPtr(" 0 R");
474 
475  // Length of Fields end -> end of the object dictionary.
476  if (pStreamBuffer)
477  {
478  sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
479  m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
480  + nFieldsEndOffset,
481  nFieldsAfterEndLength);
482  }
483  else
484  {
485  sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
486  + pAcroFormObject->GetDictionaryLength()
487  - nFieldsEndOffset;
488  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
489  + nFieldsEndOffset,
490  nFieldsAfterEndLength);
491  m_aEditBuffer.WriteCharPtr(">>");
492  }
493 
494  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
495  }
496  else
497  {
498  // Write the updated Catalog object, references nAnnotId.
499  auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
500  m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
501  m_aXRef[nCatalogId].SetDirty(true);
502  m_aEditBuffer.WriteUInt32AsString(nCatalogId);
503  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
504  m_aEditBuffer.WriteCharPtr("<<");
505  if (!pAcroFormDictionary)
506  {
507  // No AcroForm key, assume no signatures.
508  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
509  + pCatalog->GetDictionaryOffset(),
510  pCatalog->GetDictionaryLength());
511  m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
512  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
513  m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
514  }
515  else
516  {
517  // AcroForm key is already there, insert our reference at the Fields end.
518  auto it = pAcroFormDictionary->GetItems().find("Fields");
519  if (it == pAcroFormDictionary->GetItems().end())
520  {
521  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
522  return false;
523  }
524 
525  auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
526  if (!pFields)
527  {
528  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
529  return false;
530  }
531 
532  // Offset right before the end of the Fields array.
533  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
534  + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
535  // Length of beginning of the Catalog dictionary -> Fields end.
536  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
537  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
538  + pCatalog->GetDictionaryOffset(),
539  nFieldsBeforeEndLength);
540  m_aEditBuffer.WriteCharPtr(" ");
541  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
542  m_aEditBuffer.WriteCharPtr(" 0 R");
543  // Length of Fields end -> end of the Catalog dictionary.
544  sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
545  + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
546  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
547  + nFieldsEndOffset,
548  nFieldsAfterEndLength);
549  }
550  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
551  }
552 
553  return true;
554 }
555 
556 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
557 {
558  if (m_pXRefStream)
559  {
560  // Write the xref stream.
561  // This is a bit meta: the xref stream stores its own offset.
562  sal_Int32 nXRefStreamId = m_aXRef.size();
563  XRefEntry aXRefStreamEntry;
564  aXRefStreamEntry.SetOffset(nXRefOffset);
565  aXRefStreamEntry.SetDirty(true);
566  m_aXRef[nXRefStreamId] = aXRefStreamEntry;
567 
568  // Write stream data.
569  SvMemoryStream aXRefStream;
570  const size_t nOffsetLen = 3;
571  // 3 additional bytes: predictor, the first and the third field.
572  const size_t nLineLength = nOffsetLen + 3;
573  // This is the line as it appears before tweaking according to the predictor.
574  std::vector<unsigned char> aOrigLine(nLineLength);
575  // This is the previous line.
576  std::vector<unsigned char> aPrevLine(nLineLength);
577  // This is the line as written to the stream.
578  std::vector<unsigned char> aFilteredLine(nLineLength);
579  for (const auto& rXRef : m_aXRef)
580  {
581  const XRefEntry& rEntry = rXRef.second;
582 
583  if (!rEntry.GetDirty())
584  continue;
585 
586  // Predictor.
587  size_t nPos = 0;
588  // PNG prediction: up (on all rows).
589  aOrigLine[nPos++] = 2;
590 
591  // First field.
592  unsigned char nType = 0;
593  switch (rEntry.GetType())
594  {
595  case XRefEntryType::FREE:
596  nType = 0;
597  break;
598  case XRefEntryType::NOT_COMPRESSED:
599  nType = 1;
600  break;
601  case XRefEntryType::COMPRESSED:
602  nType = 2;
603  break;
604  }
605  aOrigLine[nPos++] = nType;
606 
607  // Second field.
608  for (size_t i = 0; i < nOffsetLen; ++i)
609  {
610  size_t nByte = nOffsetLen - i - 1;
611  // Fields requiring more than one byte are stored with the
612  // high-order byte first.
613  unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
614  aOrigLine[nPos++] = nCh;
615  }
616 
617  // Third field.
618  aOrigLine[nPos++] = 0;
619 
620  // Now apply the predictor.
621  aFilteredLine[0] = aOrigLine[0];
622  for (size_t i = 1; i < nLineLength; ++i)
623  {
624  // Count the delta vs the previous line.
625  aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
626  // Remember the new reference.
627  aPrevLine[i] = aOrigLine[i];
628  }
629 
630  aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
631  }
632 
633  m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
634  m_aEditBuffer.WriteCharPtr(
635  " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
636 
637  // ID.
638  auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
639  if (pID)
640  {
641  const std::vector<PDFElement*>& rElements = pID->GetElements();
642  m_aEditBuffer.WriteCharPtr("/ID [ <");
643  for (size_t i = 0; i < rElements.size(); ++i)
644  {
645  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
646  if (!pIDString)
647  continue;
648 
649  m_aEditBuffer.WriteOString(pIDString->GetValue());
650  if ((i + 1) < rElements.size())
651  m_aEditBuffer.WriteCharPtr("> <");
652  }
653  m_aEditBuffer.WriteCharPtr("> ] ");
654  }
655 
656  // Index.
657  m_aEditBuffer.WriteCharPtr("/Index [ ");
658  for (const auto& rXRef : m_aXRef)
659  {
660  if (!rXRef.second.GetDirty())
661  continue;
662 
663  m_aEditBuffer.WriteUInt32AsString(rXRef.first);
664  m_aEditBuffer.WriteCharPtr(" 1 ");
665  }
666  m_aEditBuffer.WriteCharPtr("] ");
667 
668  // Info.
669  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
670  if (pInfo)
671  {
672  m_aEditBuffer.WriteCharPtr("/Info ");
673  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
674  m_aEditBuffer.WriteCharPtr(" ");
675  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
676  m_aEditBuffer.WriteCharPtr(" R ");
677  }
678 
679  // Length.
680  m_aEditBuffer.WriteCharPtr("/Length ");
681  {
682  ZCodec aZCodec;
683  aZCodec.BeginCompression();
684  aXRefStream.Seek(0);
685  SvMemoryStream aStream;
686  aZCodec.Compress(aXRefStream, aStream);
687  aZCodec.EndCompression();
688  aXRefStream.Seek(0);
689  aXRefStream.SetStreamSize(0);
690  aStream.Seek(0);
691  aXRefStream.WriteStream(aStream);
692  }
693  m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize());
694 
695  if (!m_aStartXRefs.empty())
696  {
697  // Write location of the previous cross-reference section.
698  m_aEditBuffer.WriteCharPtr("/Prev ");
699  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
700  }
701 
702  // Root.
703  m_aEditBuffer.WriteCharPtr("/Root ");
704  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
705  m_aEditBuffer.WriteCharPtr(" ");
706  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
707  m_aEditBuffer.WriteCharPtr(" R ");
708 
709  // Size.
710  m_aEditBuffer.WriteCharPtr("/Size ");
711  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
712 
713  m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
714  aXRefStream.Seek(0);
715  m_aEditBuffer.WriteStream(aXRefStream);
716  m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
717  }
718  else
719  {
720  // Write the xref table.
721  m_aEditBuffer.WriteCharPtr("xref\n");
722  for (const auto& rXRef : m_aXRef)
723  {
724  size_t nObject = rXRef.first;
725  size_t nOffset = rXRef.second.GetOffset();
726  if (!rXRef.second.GetDirty())
727  continue;
728 
729  m_aEditBuffer.WriteUInt32AsString(nObject);
730  m_aEditBuffer.WriteCharPtr(" 1\n");
731  OStringBuffer aBuffer;
732  aBuffer.append(static_cast<sal_Int32>(nOffset));
733  while (aBuffer.getLength() < 10)
734  aBuffer.insert(0, "0");
735  if (nObject == 0)
736  aBuffer.append(" 65535 f \n");
737  else
738  aBuffer.append(" 00000 n \n");
739  m_aEditBuffer.WriteOString(aBuffer.toString());
740  }
741 
742  // Write the trailer.
743  m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
744  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
745  m_aEditBuffer.WriteCharPtr("/Root ");
746  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
747  m_aEditBuffer.WriteCharPtr(" ");
748  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
749  m_aEditBuffer.WriteCharPtr(" R\n");
750  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
751  if (pInfo)
752  {
753  m_aEditBuffer.WriteCharPtr("/Info ");
754  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
755  m_aEditBuffer.WriteCharPtr(" ");
756  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
757  m_aEditBuffer.WriteCharPtr(" R\n");
758  }
759  auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
760  if (pID)
761  {
762  const std::vector<PDFElement*>& rElements = pID->GetElements();
763  m_aEditBuffer.WriteCharPtr("/ID [ <");
764  for (size_t i = 0; i < rElements.size(); ++i)
765  {
766  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
767  if (!pIDString)
768  continue;
769 
770  m_aEditBuffer.WriteOString(pIDString->GetValue());
771  if ((i + 1) < rElements.size())
772  m_aEditBuffer.WriteCharPtr(">\n<");
773  }
774  m_aEditBuffer.WriteCharPtr("> ]\n");
775  }
776 
777  if (!m_aStartXRefs.empty())
778  {
779  // Write location of the previous cross-reference section.
780  m_aEditBuffer.WriteCharPtr("/Prev ");
781  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
782  }
783 
784  m_aEditBuffer.WriteCharPtr(">>\n");
785  }
786 }
787 
788 bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
789  const OUString& rDescription, bool bAdES)
790 {
791  m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
792  m_aEditBuffer.WriteCharPtr("\n");
793 
794  sal_uInt64 nSignatureLastByteRangeOffset = 0;
795  sal_Int64 nSignatureContentOffset = 0;
796  sal_Int32 nSignatureId = WriteSignatureObject(
797  rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
798 
799  sal_Int32 nAppearanceId = WriteAppearanceObject();
800 
801  std::vector<PDFObjectElement*> aPages = GetPages();
802  if (aPages.empty() || !aPages[0])
803  {
804  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
805  return false;
806  }
807 
808  PDFObjectElement& rFirstPage = *aPages[0];
809  sal_Int32 nAnnotId = WriteAnnotObject(rFirstPage, nSignatureId, nAppearanceId);
810 
811  if (!WritePageObject(rFirstPage, nAnnotId))
812  {
813  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
814  return false;
815  }
816 
817  PDFReferenceElement* pRoot = nullptr;
818  if (!WriteCatalogObject(nAnnotId, pRoot))
819  {
820  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
821  return false;
822  }
823 
824  sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
825  WriteXRef(nXRefOffset, pRoot);
826 
827  // Write startxref.
828  m_aEditBuffer.WriteCharPtr("startxref\n");
829  m_aEditBuffer.WriteUInt32AsString(nXRefOffset);
830  m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
831 
832  // Finalize the signature, now that we know the total file size.
833  // Calculate the length of the last byte range.
834  sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
835  sal_Int64 nLastByteRangeLength
836  = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
837  // Write the length to the buffer.
838  m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
839  OStringBuffer aByteRangeBuffer;
840  aByteRangeBuffer.append(nLastByteRangeLength);
841  aByteRangeBuffer.append(" ]");
842  m_aEditBuffer.WriteOString(aByteRangeBuffer.toString());
843 
844  // Create the PKCS#7 object.
845  css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
846  if (!aDerEncoded.hasElements())
847  {
848  SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
849  return false;
850  }
851 
852  m_aEditBuffer.Seek(0);
853  sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
854  std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
855  m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
856 
857  m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
858  sal_uInt64 nBufferSize2 = nLastByteRangeLength;
859  std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
860  m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
861 
862  OStringBuffer aCMSHexBuffer;
863  svl::crypto::Signing aSigning(xCertificate);
864  aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
865  aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
866  if (!aSigning.Sign(aCMSHexBuffer))
867  {
868  SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
869  return false;
870  }
871 
872  assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
873 
874  m_aEditBuffer.Seek(nSignatureContentOffset);
875  m_aEditBuffer.WriteOString(aCMSHexBuffer.toString());
876 
877  return true;
878 }
879 
880 bool PDFDocument::Write(SvStream& rStream)
881 {
882  m_aEditBuffer.Seek(0);
883  rStream.WriteStream(m_aEditBuffer);
884  return rStream.good();
885 }
886 
887 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
888  std::vector<std::unique_ptr<PDFElement>>& rElements,
889  PDFObjectElement* pObjectElement)
890 {
891  // Last seen object token.
892  PDFObjectElement* pObject = pObjectElement;
893  PDFNameElement* pObjectKey = nullptr;
894  PDFObjectElement* pObjectStream = nullptr;
895  bool bInXRef = false;
896  // The next number will be an xref offset.
897  bool bInStartXRef = false;
898  // Dictionary depth, so we know when we're outside any dictionaries.
899  int nDictionaryDepth = 0;
900  // Array depth, only the offset/length of the toplevel array is tracked.
901  int nArrayDepth = 0;
902  // Last seen array token that's outside any dictionaries.
903  PDFArrayElement* pArray = nullptr;
904  // If we're inside an obj/endobj pair.
905  bool bInObject = false;
906  while (true)
907  {
908  char ch;
909  rStream.ReadChar(ch);
910  if (rStream.eof())
911  break;
912 
913  switch (ch)
914  {
915  case '%':
916  {
917  auto pComment = new PDFCommentElement(*this);
918  rElements.push_back(std::unique_ptr<PDFElement>(pComment));
919  rStream.SeekRel(-1);
920  if (!rElements.back()->Read(rStream))
921  {
922  SAL_WARN("vcl.filter",
923  "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
924  return false;
925  }
926  if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
927  && m_aEOFs.back() == rStream.Tell())
928  {
929  // Found EOF and partial parsing requested, we're done.
930  return true;
931  }
932  break;
933  }
934  case '<':
935  {
936  // Dictionary or hex string.
937  rStream.ReadChar(ch);
938  rStream.SeekRel(-2);
939  if (ch == '<')
940  {
941  rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
942  ++nDictionaryDepth;
943  }
944  else
945  rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
946  if (!rElements.back()->Read(rStream))
947  {
948  SAL_WARN("vcl.filter",
949  "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
950  return false;
951  }
952  break;
953  }
954  case '>':
955  {
956  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
957  --nDictionaryDepth;
958  rStream.SeekRel(-1);
959  if (!rElements.back()->Read(rStream))
960  {
961  SAL_WARN("vcl.filter",
962  "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
963  return false;
964  }
965  break;
966  }
967  case '[':
968  {
969  auto pArr = new PDFArrayElement(pObject);
970  rElements.push_back(std::unique_ptr<PDFElement>(pArr));
971  if (nDictionaryDepth == 0 && nArrayDepth == 0)
972  {
973  // The array is attached directly, inform the object.
974  pArray = pArr;
975  if (pObject)
976  {
977  pObject->SetArray(pArray);
978  pObject->SetArrayOffset(rStream.Tell());
979  }
980  }
981  ++nArrayDepth;
982  rStream.SeekRel(-1);
983  if (!rElements.back()->Read(rStream))
984  {
985  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
986  return false;
987  }
988  break;
989  }
990  case ']':
991  {
992  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
993  --nArrayDepth;
994  if (nArrayDepth == 0)
995  pArray = nullptr;
996  rStream.SeekRel(-1);
997  if (nDictionaryDepth == 0 && nArrayDepth == 0)
998  {
999  if (pObject)
1000  {
1001  pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1002  }
1003  }
1004  if (!rElements.back()->Read(rStream))
1005  {
1006  SAL_WARN("vcl.filter",
1007  "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1008  return false;
1009  }
1010  break;
1011  }
1012  case '/':
1013  {
1014  auto pNameElement = new PDFNameElement();
1015  rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1016  rStream.SeekRel(-1);
1017  if (!pNameElement->Read(rStream))
1018  {
1019  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1020  return false;
1021  }
1022  if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1023  && pNameElement->GetValue() == "ObjStm")
1024  pObjectStream = pObject;
1025  else
1026  pObjectKey = pNameElement;
1027  break;
1028  }
1029  case '(':
1030  {
1031  rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1032  rStream.SeekRel(-1);
1033  if (!rElements.back()->Read(rStream))
1034  {
1035  SAL_WARN("vcl.filter",
1036  "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1037  return false;
1038  }
1039  break;
1040  }
1041  default:
1042  {
1043  if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-')
1044  {
1045  // Numbering object: an integer or a real.
1046  auto pNumberElement = new PDFNumberElement();
1047  rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1048  rStream.SeekRel(-1);
1049  if (!pNumberElement->Read(rStream))
1050  {
1051  SAL_WARN("vcl.filter",
1052  "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1053  return false;
1054  }
1055  if (bInStartXRef)
1056  {
1057  bInStartXRef = false;
1058  m_aStartXRefs.push_back(pNumberElement->GetValue());
1059 
1060  auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1061  if (it != m_aOffsetObjects.end())
1062  m_pXRefStream = it->second;
1063  }
1064  else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
1065  // Number element inside an object, but outside a
1066  // dictionary / array: remember it.
1067  pObject->SetNumberElement(pNumberElement);
1068  }
1069  else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1070  {
1071  // Possible keyword, like "obj".
1072  rStream.SeekRel(-1);
1073  OString aKeyword = ReadKeyword(rStream);
1074 
1075  bool bObj = aKeyword == "obj";
1076  if (bObj || aKeyword == "R")
1077  {
1078  size_t nElements = rElements.size();
1079  if (nElements < 2)
1080  {
1081  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1082  "tokens before 'obj' or 'R' keyword");
1083  return false;
1084  }
1085 
1086  auto pObjectNumber
1087  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1088  auto pGenerationNumber
1089  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1090  if (!pObjectNumber || !pGenerationNumber)
1091  {
1092  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1093  "generation number before 'obj' or 'R' keyword");
1094  return false;
1095  }
1096 
1097  if (bObj)
1098  {
1099  pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1100  pGenerationNumber->GetValue());
1101  rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1102  m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1103  m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1104  bInObject = true;
1105  }
1106  else
1107  {
1108  auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1109  *pGenerationNumber);
1110  rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1111  if (pArray)
1112  // Reference is part of a direct (non-dictionary) array, inform the array.
1113  pArray->PushBack(rElements.back().get());
1114  if (bInObject && nDictionaryDepth > 0 && pObject)
1115  // Inform the object about a new in-dictionary reference.
1116  pObject->AddDictionaryReference(pReference);
1117  }
1118  if (!rElements.back()->Read(rStream))
1119  {
1120  SAL_WARN("vcl.filter",
1121  "PDFDocument::Tokenize: PDFElement::Read() failed");
1122  return false;
1123  }
1124  }
1125  else if (aKeyword == "stream")
1126  {
1127  // Look up the length of the stream from the parent object's dictionary.
1128  size_t nLength = 0;
1129  for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1130  {
1131  // Iterate in reverse order.
1132  size_t nIndex = rElements.size() - nElement - 1;
1133  PDFElement* pElement = rElements[nIndex].get();
1134  auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1135  if (!pObj)
1136  continue;
1137 
1138  PDFElement* pLookup = pObj->Lookup("Length");
1139  auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1140  if (pReference)
1141  {
1142  // Length is provided as a reference.
1143  nLength = pReference->LookupNumber(rStream);
1144  break;
1145  }
1146 
1147  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1148  if (pNumber)
1149  {
1150  // Length is provided directly.
1151  nLength = pNumber->GetValue();
1152  break;
1153  }
1154 
1155  SAL_WARN(
1156  "vcl.filter",
1157  "PDFDocument::Tokenize: found no Length key for stream keyword");
1158  return false;
1159  }
1160 
1161  PDFDocument::SkipLineBreaks(rStream);
1162  auto pStreamElement = new PDFStreamElement(nLength);
1163  if (pObject)
1164  pObject->SetStream(pStreamElement);
1165  rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1166  if (!rElements.back()->Read(rStream))
1167  {
1168  SAL_WARN("vcl.filter",
1169  "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1170  return false;
1171  }
1172  }
1173  else if (aKeyword == "endstream")
1174  {
1175  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1176  if (!rElements.back()->Read(rStream))
1177  {
1178  SAL_WARN("vcl.filter",
1179  "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1180  return false;
1181  }
1182  }
1183  else if (aKeyword == "endobj")
1184  {
1185  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1186  if (!rElements.back()->Read(rStream))
1187  {
1188  SAL_WARN("vcl.filter",
1189  "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1190  return false;
1191  }
1192  if (eMode == TokenizeMode::END_OF_OBJECT)
1193  {
1194  // Found endobj and only object parsing was requested, we're done.
1195  return true;
1196  }
1197 
1198  if (pObjectStream)
1199  {
1200  // We're at the end of an object stream, parse the stored objects.
1201  pObjectStream->ParseStoredObjects();
1202  pObjectStream = nullptr;
1203  pObjectKey = nullptr;
1204  }
1205  bInObject = false;
1206  }
1207  else if (aKeyword == "true" || aKeyword == "false")
1208  rElements.push_back(std::unique_ptr<PDFElement>(
1209  new PDFBooleanElement(aKeyword.toBoolean())));
1210  else if (aKeyword == "null")
1211  rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1212  else if (aKeyword == "xref")
1213  // Allow 'f' and 'n' keywords.
1214  bInXRef = true;
1215  else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1216  {
1217  }
1218  else if (aKeyword == "trailer")
1219  {
1220  auto pTrailer = new PDFTrailerElement(*this);
1221 
1222  // Make it possible to find this trailer later by offset.
1223  pTrailer->Read(rStream);
1224  m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1225 
1226  // When reading till the first EOF token only, remember
1227  // just the first trailer token.
1228  if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1229  m_pTrailer = pTrailer;
1230  rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1231  }
1232  else if (aKeyword == "startxref")
1233  {
1234  bInStartXRef = true;
1235  }
1236  else
1237  {
1238  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1239  << aKeyword << "' keyword at byte position "
1240  << rStream.Tell());
1241  return false;
1242  }
1243  }
1244  else
1245  {
1246  if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1247  {
1248  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: "
1249  << ch << " at byte position " << rStream.Tell());
1250  return false;
1251  }
1252  }
1253  break;
1254  }
1255  }
1256  }
1257 
1258  return true;
1259 }
1260 
1261 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1262 {
1263  m_aIDObjects[nID] = pObject;
1264 }
1265 
1266 bool PDFDocument::Read(SvStream& rStream)
1267 {
1268  // Check file magic.
1269  std::vector<sal_Int8> aHeader(5);
1270  rStream.Seek(0);
1271  rStream.ReadBytes(aHeader.data(), aHeader.size());
1272  if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1273  || aHeader[4] != '-')
1274  {
1275  SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1276  return false;
1277  }
1278 
1279  // Allow later editing of the contents in-memory.
1280  rStream.Seek(0);
1281  m_aEditBuffer.WriteStream(rStream);
1282 
1283  // Look up the offset of the xref table.
1284  size_t nStartXRef = FindStartXRef(rStream);
1285  SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1286  if (nStartXRef == 0)
1287  {
1288  SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1289  return false;
1290  }
1291  while (true)
1292  {
1293  rStream.Seek(nStartXRef);
1294  OString aKeyword = ReadKeyword(rStream);
1295  if (aKeyword.isEmpty())
1296  ReadXRefStream(rStream);
1297 
1298  else
1299  {
1300  if (aKeyword != "xref")
1301  {
1302  SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1303  return false;
1304  }
1305  ReadXRef(rStream);
1306  if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1307  {
1308  SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1309  return false;
1310  }
1311  }
1312 
1313  PDFNumberElement* pPrev = nullptr;
1314  if (m_pTrailer)
1315  {
1316  pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1317 
1318  // Remember the offset of this trailer in the correct order. It's
1319  // possible that newer trailers don't have a larger offset.
1320  m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1321  }
1322  else if (m_pXRefStream)
1323  pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1324  if (pPrev)
1325  nStartXRef = pPrev->GetValue();
1326 
1327  // Reset state, except the edit buffer.
1328  m_aElements.clear();
1329  m_aOffsetObjects.clear();
1330  m_aIDObjects.clear();
1331  m_aStartXRefs.clear();
1332  m_aEOFs.clear();
1333  m_pTrailer = nullptr;
1334  m_pXRefStream = nullptr;
1335  if (!pPrev)
1336  break;
1337  }
1338 
1339  // Then we can tokenize the stream.
1340  rStream.Seek(0);
1341  return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1342 }
1343 
1344 OString PDFDocument::ReadKeyword(SvStream& rStream)
1345 {
1346  OStringBuffer aBuf;
1347  char ch;
1348  rStream.ReadChar(ch);
1349  if (rStream.eof())
1350  return OString();
1351  while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1352  {
1353  aBuf.append(ch);
1354  rStream.ReadChar(ch);
1355  if (rStream.eof())
1356  return aBuf.toString();
1357  }
1358  rStream.SeekRel(-1);
1359  return aBuf.toString();
1360 }
1361 
1362 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1363 {
1364  // Find the "startxref" token, somewhere near the end of the document.
1365  std::vector<char> aBuf(1024);
1366  rStream.Seek(STREAM_SEEK_TO_END);
1367  if (rStream.Tell() > aBuf.size())
1368  rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1369  else
1370  // The document is really short, then just read it from the start.
1371  rStream.Seek(0);
1372  size_t nBeforePeek = rStream.Tell();
1373  size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1374  rStream.Seek(nBeforePeek);
1375  if (nSize != aBuf.size())
1376  aBuf.resize(nSize);
1377  OString aPrefix("startxref");
1378  // Find the last startxref at the end of the document.
1379  auto itLastValid = aBuf.end();
1380  auto it = aBuf.begin();
1381  while (true)
1382  {
1383  it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1384  if (it == aBuf.end())
1385  break;
1386 
1387  itLastValid = it;
1388  ++it;
1389  }
1390  if (itLastValid == aBuf.end())
1391  {
1392  SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1393  return 0;
1394  }
1395 
1396  rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1397  if (rStream.eof())
1398  {
1399  SAL_WARN("vcl.filter",
1400  "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1401  return 0;
1402  }
1403 
1404  PDFDocument::SkipWhitespace(rStream);
1405  PDFNumberElement aNumber;
1406  if (!aNumber.Read(rStream))
1407  return 0;
1408  return aNumber.GetValue();
1409 }
1410 
1411 void PDFDocument::ReadXRefStream(SvStream& rStream)
1412 {
1413  // Look up the stream length in the object dictionary.
1414  if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1415  {
1416  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1417  return;
1418  }
1419 
1420  if (m_aElements.empty())
1421  {
1422  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1423  return;
1424  }
1425 
1426  PDFObjectElement* pObject = nullptr;
1427  for (const auto& pElement : m_aElements)
1428  {
1429  if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1430  {
1431  pObject = pObj;
1432  break;
1433  }
1434  }
1435  if (!pObject)
1436  {
1437  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1438  return;
1439  }
1440 
1441  // So that the Prev key can be looked up later.
1442  m_pXRefStream = pObject;
1443 
1444  PDFElement* pLookup = pObject->Lookup("Length");
1445  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1446  if (!pNumber)
1447  {
1448  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1449  return;
1450  }
1451  sal_uInt64 nLength = pNumber->GetValue();
1452 
1453  // Look up the stream offset.
1454  PDFStreamElement* pStream = nullptr;
1455  for (const auto& pElement : m_aElements)
1456  {
1457  if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1458  {
1459  pStream = pS;
1460  break;
1461  }
1462  }
1463  if (!pStream)
1464  {
1465  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1466  return;
1467  }
1468 
1469  // Read and decompress it.
1470  rStream.Seek(pStream->GetOffset());
1471  std::vector<char> aBuf(nLength);
1472  rStream.ReadBytes(aBuf.data(), aBuf.size());
1473 
1474  auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1475  if (!pFilter)
1476  {
1477  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1478  return;
1479  }
1480 
1481  if (pFilter->GetValue() != "FlateDecode")
1482  {
1483  SAL_WARN("vcl.filter",
1484  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1485  return;
1486  }
1487 
1488  int nColumns = 1;
1489  int nPredictor = 1;
1490  if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1491  {
1492  const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1493  auto it = rItems.find("Columns");
1494  if (it != rItems.end())
1495  if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1496  nColumns = pColumns->GetValue();
1497  it = rItems.find("Predictor");
1498  if (it != rItems.end())
1499  if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1500  nPredictor = pPredictor->GetValue();
1501  }
1502 
1503  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1504  SvMemoryStream aStream;
1505  ZCodec aZCodec;
1506  aZCodec.BeginCompression();
1507  aZCodec.Decompress(aSource, aStream);
1508  if (!aZCodec.EndCompression())
1509  {
1510  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1511  return;
1512  }
1513 
1514  // Look up the first and the last entry we need to read.
1515  auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1516  std::vector<size_t> aFirstObjects;
1517  std::vector<size_t> aNumberOfObjects;
1518  if (!pIndex)
1519  {
1520  auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1521  if (pSize)
1522  {
1523  aFirstObjects.push_back(0);
1524  aNumberOfObjects.push_back(pSize->GetValue());
1525  }
1526  else
1527  {
1528  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1529  return;
1530  }
1531  }
1532  else
1533  {
1534  const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1535  size_t nFirstObject = 0;
1536  for (size_t i = 0; i < rIndexElements.size(); ++i)
1537  {
1538  if (i % 2 == 0)
1539  {
1540  auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1541  if (!pFirstObject)
1542  {
1543  SAL_WARN("vcl.filter",
1544  "PDFDocument::ReadXRefStream: Index has no first object");
1545  return;
1546  }
1547  nFirstObject = pFirstObject->GetValue();
1548  continue;
1549  }
1550 
1551  auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1552  if (!pNumberOfObjects)
1553  {
1554  SAL_WARN("vcl.filter",
1555  "PDFDocument::ReadXRefStream: Index has no number of objects");
1556  return;
1557  }
1558  aFirstObjects.push_back(nFirstObject);
1559  aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1560  }
1561  }
1562 
1563  // Look up the format of a single entry.
1564  const int nWSize = 3;
1565  auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1566  if (!pW || pW->GetElements().size() < nWSize)
1567  {
1568  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1569  return;
1570  }
1571  int aW[nWSize];
1572  // First character is the (kind of) repeated predictor.
1573  int nLineLength = 1;
1574  for (size_t i = 0; i < nWSize; ++i)
1575  {
1576  auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1577  if (!pI)
1578  {
1579  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1580  return;
1581  }
1582  aW[i] = pI->GetValue();
1583  nLineLength += aW[i];
1584  }
1585 
1586  if (nPredictor > 1 && nLineLength - 1 != nColumns)
1587  {
1588  SAL_WARN("vcl.filter",
1589  "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1590  return;
1591  }
1592 
1593  aStream.Seek(0);
1594  for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1595  {
1596  size_t nFirstObject = aFirstObjects[nSubSection];
1597  size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1598 
1599  // This is the line as read from the stream.
1600  std::vector<unsigned char> aOrigLine(nLineLength);
1601  // This is the line as it appears after tweaking according to nPredictor.
1602  std::vector<unsigned char> aFilteredLine(nLineLength);
1603  for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1604  {
1605  size_t nIndex = nFirstObject + nEntry;
1606 
1607  aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1608  if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1609  {
1610  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1611  "inconsistent with /DecodeParms/Predictor for object #"
1612  << nIndex);
1613  return;
1614  }
1615 
1616  for (int i = 0; i < nLineLength; ++i)
1617  {
1618  switch (nPredictor)
1619  {
1620  case 1:
1621  // No prediction.
1622  break;
1623  case 12:
1624  // PNG prediction: up (on all rows).
1625  aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1626  break;
1627  default:
1628  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1629  << nPredictor);
1630  return;
1631  break;
1632  }
1633  }
1634 
1635  // First character is already handled above.
1636  int nPos = 1;
1637  size_t nType = 0;
1638  // Start of the current field in the stream data.
1639  int nOffset = nPos;
1640  for (; nPos < nOffset + aW[0]; ++nPos)
1641  {
1642  unsigned char nCh = aFilteredLine[nPos];
1643  nType = (nType << 8) + nCh;
1644  }
1645 
1646  // Start of the object in the file stream.
1647  size_t nStreamOffset = 0;
1648  nOffset = nPos;
1649  for (; nPos < nOffset + aW[1]; ++nPos)
1650  {
1651  unsigned char nCh = aFilteredLine[nPos];
1652  nStreamOffset = (nStreamOffset << 8) + nCh;
1653  }
1654 
1655  // Generation number of the object.
1656  size_t nGenerationNumber = 0;
1657  nOffset = nPos;
1658  for (; nPos < nOffset + aW[2]; ++nPos)
1659  {
1660  unsigned char nCh = aFilteredLine[nPos];
1661  nGenerationNumber = (nGenerationNumber << 8) + nCh;
1662  }
1663 
1664  // Ignore invalid nType.
1665  if (nType <= 2)
1666  {
1667  if (m_aXRef.find(nIndex) == m_aXRef.end())
1668  {
1669  XRefEntry aEntry;
1670  switch (nType)
1671  {
1672  case 0:
1673  aEntry.SetType(XRefEntryType::FREE);
1674  break;
1675  case 1:
1676  aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
1677  break;
1678  case 2:
1679  aEntry.SetType(XRefEntryType::COMPRESSED);
1680  break;
1681  }
1682  aEntry.SetOffset(nStreamOffset);
1683  m_aXRef[nIndex] = aEntry;
1684  }
1685  }
1686  }
1687  }
1688 }
1689 
1690 void PDFDocument::ReadXRef(SvStream& rStream)
1691 {
1692  PDFDocument::SkipWhitespace(rStream);
1693 
1694  while (true)
1695  {
1696  PDFNumberElement aFirstObject;
1697  if (!aFirstObject.Read(rStream))
1698  {
1699  // Next token is not a number, it'll be the trailer.
1700  return;
1701  }
1702 
1703  if (aFirstObject.GetValue() < 0)
1704  {
1705  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1706  return;
1707  }
1708 
1709  PDFDocument::SkipWhitespace(rStream);
1710  PDFNumberElement aNumberOfEntries;
1711  if (!aNumberOfEntries.Read(rStream))
1712  {
1713  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1714  return;
1715  }
1716 
1717  if (aNumberOfEntries.GetValue() < 0)
1718  {
1719  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1720  return;
1721  }
1722 
1723  size_t nSize = aNumberOfEntries.GetValue();
1724  for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1725  {
1726  size_t nIndex = aFirstObject.GetValue() + nEntry;
1727  PDFDocument::SkipWhitespace(rStream);
1728  PDFNumberElement aOffset;
1729  if (!aOffset.Read(rStream))
1730  {
1731  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1732  return;
1733  }
1734 
1735  PDFDocument::SkipWhitespace(rStream);
1736  PDFNumberElement aGenerationNumber;
1737  if (!aGenerationNumber.Read(rStream))
1738  {
1739  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1740  return;
1741  }
1742 
1743  PDFDocument::SkipWhitespace(rStream);
1744  OString aKeyword = ReadKeyword(rStream);
1745  if (aKeyword != "f" && aKeyword != "n")
1746  {
1747  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1748  return;
1749  }
1750  // xrefs are read in reverse order, so never update an existing
1751  // offset with an older one.
1752  if (m_aXRef.find(nIndex) == m_aXRef.end())
1753  {
1754  XRefEntry aEntry;
1755  aEntry.SetOffset(aOffset.GetValue());
1756  // Initially only the first entry is dirty.
1757  if (nIndex == 0)
1758  aEntry.SetDirty(true);
1759  m_aXRef[nIndex] = aEntry;
1760  }
1761  PDFDocument::SkipWhitespace(rStream);
1762  }
1763  }
1764 }
1765 
1766 void PDFDocument::SkipWhitespace(SvStream& rStream)
1767 {
1768  char ch = 0;
1769 
1770  while (true)
1771  {
1772  rStream.ReadChar(ch);
1773  if (rStream.eof())
1774  break;
1775 
1776  if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1777  {
1778  rStream.SeekRel(-1);
1779  return;
1780  }
1781  }
1782 }
1783 
1784 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1785 {
1786  char ch = 0;
1787 
1788  while (true)
1789  {
1790  rStream.ReadChar(ch);
1791  if (rStream.eof())
1792  break;
1793 
1794  if (ch != '\n' && ch != '\r')
1795  {
1796  rStream.SeekRel(-1);
1797  return;
1798  }
1799  }
1800 }
1801 
1802 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1803 {
1804  auto it = m_aXRef.find(nIndex);
1805  if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1806  {
1807  SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1808  << nIndex << ", but failed");
1809  return 0;
1810  }
1811 
1812  return it->second.GetOffset();
1813 }
1814 
1815 const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1816 {
1817  return m_aElements;
1818 }
1819 
1821 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1822 {
1823  auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1824  if (!pKids)
1825  {
1826  SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1827  return;
1828  }
1829 
1830  pPages->setVisiting(true);
1831 
1832  for (const auto& pKid : pKids->GetElements())
1833  {
1834  auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1835  if (!pReference)
1836  continue;
1837 
1838  PDFObjectElement* pKidObject = pReference->LookupObject();
1839  if (!pKidObject)
1840  continue;
1841 
1842  // detect if visiting reenters itself
1843  if (pKidObject->alreadyVisiting())
1844  {
1845  SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1846  continue;
1847  }
1848 
1849  auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1850  if (pName && pName->GetValue() == "Pages")
1851  // Pages inside pages: recurse.
1852  visitPages(pKidObject, rRet);
1853  else
1854  // Found an actual page.
1855  rRet.push_back(pKidObject);
1856  }
1857 
1858  pPages->setVisiting(false);
1859 }
1860 
1861 std::vector<PDFObjectElement*> PDFDocument::GetPages()
1862 {
1863  std::vector<PDFObjectElement*> aRet;
1864 
1865  PDFReferenceElement* pRoot = nullptr;
1866 
1867  PDFTrailerElement* pTrailer = nullptr;
1868  if (!m_aTrailerOffsets.empty())
1869  {
1870  // Get access to the latest trailer, and work with the keys of that
1871  // one.
1872  auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1873  if (it != m_aOffsetTrailers.end())
1874  pTrailer = it->second;
1875  }
1876 
1877  if (pTrailer)
1878  pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1879  else if (m_pXRefStream)
1880  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1881 
1882  if (!pRoot)
1883  {
1884  SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no Root key");
1885  return aRet;
1886  }
1887 
1888  PDFObjectElement* pCatalog = pRoot->LookupObject();
1889  if (!pCatalog)
1890  {
1891  SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1892  return aRet;
1893  }
1894 
1895  PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1896  if (!pPages)
1897  {
1898  SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
1899  << ") has no pages");
1900  return aRet;
1901  }
1902 
1903  visitPages(pPages, aRet);
1904 
1905  return aRet;
1906 }
1907 
1908 void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
1909 
1910 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
1911 {
1912  std::vector<PDFObjectElement*> aRet;
1913 
1914  std::vector<PDFObjectElement*> aPages = GetPages();
1915 
1916  for (const auto& pPage : aPages)
1917  {
1918  if (!pPage)
1919  continue;
1920 
1921  PDFElement* pAnnotsElement = pPage->Lookup("Annots");
1922  auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
1923  if (!pAnnots)
1924  {
1925  // Annots is not an array, see if it's a reference to an object
1926  // with a direct array.
1927  auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
1928  if (pAnnotsRef)
1929  {
1930  if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
1931  {
1932  pAnnots = pAnnotsObject->GetArray();
1933  }
1934  }
1935  }
1936 
1937  if (!pAnnots)
1938  continue;
1939 
1940  for (const auto& pAnnot : pAnnots->GetElements())
1941  {
1942  auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
1943  if (!pReference)
1944  continue;
1945 
1946  PDFObjectElement* pAnnotObject = pReference->LookupObject();
1947  if (!pAnnotObject)
1948  continue;
1949 
1950  auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
1951  if (!pFT || pFT->GetValue() != "Sig")
1952  continue;
1953 
1954  aRet.push_back(pAnnotObject);
1955  }
1956  }
1957 
1958  return aRet;
1959 }
1960 
1961 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
1962 {
1963  return svl::crypto::DecodeHexString(pElement->GetValue());
1964 }
1965 
1966 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
1967  : m_rDoc(rDoc)
1968 {
1969 }
1970 
1972 {
1973  // Read from (including) the % char till (excluding) the end of the line/stream.
1974  OStringBuffer aBuf;
1975  char ch;
1976  rStream.ReadChar(ch);
1977  while (true)
1978  {
1979  if (ch == '\n' || ch == '\r' || rStream.eof())
1980  {
1981  m_aComment = aBuf.makeStringAndClear();
1982 
1983  if (m_aComment.startsWith("%%EOF"))
1984  m_rDoc.PushBackEOF(rStream.Tell());
1985 
1986  SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
1987  return true;
1988  }
1989  aBuf.append(ch);
1990  rStream.ReadChar(ch);
1991  }
1992 
1993  return false;
1994 }
1995 
1997 
1999 {
2000  OStringBuffer aBuf;
2001  m_nOffset = rStream.Tell();
2002  char ch;
2003  rStream.ReadChar(ch);
2004  if (rStream.eof())
2005  {
2006  return false;
2007  }
2008  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2009  {
2010  rStream.SeekRel(-1);
2011  return false;
2012  }
2013  while (!rStream.eof())
2014  {
2015  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2016  {
2017  rStream.SeekRel(-1);
2018  m_nLength = rStream.Tell() - m_nOffset;
2019  m_fValue = aBuf.makeStringAndClear().toDouble();
2020  SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2021  return true;
2022  }
2023  aBuf.append(ch);
2024  rStream.ReadChar(ch);
2025  }
2026 
2027  return false;
2028 }
2029 
2030 sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2031 
2032 sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2033 
2035 
2036 bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2037 
2038 bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2039 
2041 {
2042  char ch;
2043  rStream.ReadChar(ch);
2044  if (ch != '<')
2045  {
2046  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2047  return false;
2048  }
2049  rStream.ReadChar(ch);
2050 
2051  OStringBuffer aBuf;
2052  while (!rStream.eof())
2053  {
2054  if (ch == '>')
2055  {
2056  m_aValue = aBuf.makeStringAndClear();
2057  SAL_INFO("vcl.filter",
2058  "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2059  return true;
2060  }
2061  aBuf.append(ch);
2062  rStream.ReadChar(ch);
2063  }
2064 
2065  return false;
2066 }
2067 
2068 const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2069 
2071 {
2072  char nPrevCh = 0;
2073  char ch = 0;
2074  rStream.ReadChar(ch);
2075  if (ch != '(')
2076  {
2077  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2078  return false;
2079  }
2080  nPrevCh = ch;
2081  rStream.ReadChar(ch);
2082 
2083  // Start with 1 nesting level as we read a '(' above already.
2084  int nDepth = 1;
2085  OStringBuffer aBuf;
2086  while (!rStream.eof())
2087  {
2088  if (ch == '(' && nPrevCh != '\\')
2089  ++nDepth;
2090 
2091  if (ch == ')' && nPrevCh != '\\')
2092  --nDepth;
2093 
2094  if (nDepth == 0)
2095  {
2096  // ')' of the outermost '(' is reached.
2097  m_aValue = aBuf.makeStringAndClear();
2098  SAL_INFO("vcl.filter",
2099  "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2100  return true;
2101  }
2102  aBuf.append(ch);
2103  nPrevCh = ch;
2104  rStream.ReadChar(ch);
2105  }
2106 
2107  return false;
2108 }
2109 
2110 const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2111 
2113  : m_rDoc(rDoc)
2114 {
2115 }
2116 
2118 {
2119  m_nOffset = rStream.Tell();
2120  return true;
2121 }
2122 
2123 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2124 {
2125  if (m_aDictionary.empty())
2127 
2128  return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2129 }
2130 
2131 sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2132 
2133 double PDFNumberElement::GetValue() const { return m_fValue; }
2134 
2135 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2136  : m_rDoc(rDoc)
2137  , m_fObjectValue(fObjectValue)
2138  , m_fGenerationValue(fGenerationValue)
2139  , m_pNumberElement(nullptr)
2140  , m_nDictionaryOffset(0)
2141  , m_nDictionaryLength(0)
2142  , m_pDictionaryElement(nullptr)
2143  , m_nArrayOffset(0)
2144  , m_nArrayLength(0)
2145  , m_pArrayElement(nullptr)
2146  , m_pStreamElement(nullptr)
2147 {
2148 }
2149 
2151 {
2152  SAL_INFO("vcl.filter",
2153  "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2154  return true;
2155 }
2156 
2158 
2159 size_t PDFDictionaryElement::Parse(const std::vector<std::unique_ptr<PDFElement>>& rElements,
2160  PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary)
2161 {
2162  // The index of last parsed element, in case of nested dictionaries.
2163  size_t nRet = 0;
2164 
2165  if (!rDictionary.empty())
2166  return nRet;
2167 
2168  pThis->setParsing(true);
2169 
2170  auto pThisObject = dynamic_cast<PDFObjectElement*>(pThis);
2171  // This is set to non-nullptr here for nested dictionaries only.
2172  auto pThisDictionary = dynamic_cast<PDFDictionaryElement*>(pThis);
2173 
2174  // Find out where the dictionary for this object starts.
2175  size_t nIndex = 0;
2176  for (size_t i = 0; i < rElements.size(); ++i)
2177  {
2178  if (rElements[i].get() == pThis)
2179  {
2180  nIndex = i;
2181  break;
2182  }
2183  }
2184 
2185  OString aName;
2186  sal_uInt64 nNameOffset = 0;
2187  std::vector<PDFNumberElement*> aNumbers;
2188  // The array value we're in -- if any.
2189  PDFArrayElement* pArray = nullptr;
2190  sal_uInt64 nDictionaryOffset = 0;
2191  int nDictionaryDepth = 0;
2192  // Toplevel dictionary found (not inside an array).
2193  bool bDictionaryFound = false;
2194  // Toplevel array found (not inside a dictionary).
2195  bool bArrayFound = false;
2196  for (size_t i = nIndex; i < rElements.size(); ++i)
2197  {
2198  // Dictionary tokens can be nested, track enter/leave.
2199  if (auto pDictionary = dynamic_cast<PDFDictionaryElement*>(rElements[i].get()))
2200  {
2201  bDictionaryFound = true;
2202  if (++nDictionaryDepth == 1)
2203  {
2204  // First dictionary start, track start offset.
2205  nDictionaryOffset = pDictionary->m_nLocation;
2206  if (pThisObject)
2207  {
2208  if (!bArrayFound)
2209  // Then the toplevel dictionary of the object.
2210  pThisObject->SetDictionary(pDictionary);
2211  pThisDictionary = pDictionary;
2212  pThisObject->SetDictionaryOffset(nDictionaryOffset);
2213  }
2214  }
2215  else if (!pDictionary->alreadyParsing())
2216  {
2217  // Nested dictionary.
2218  const size_t nexti
2219  = PDFDictionaryElement::Parse(rElements, pDictionary, pDictionary->m_aItems);
2220  if (nexti >= i) // ensure we go forwards and not endlessly loop
2221  {
2222  i = nexti;
2223  rDictionary[aName] = pDictionary;
2224  aName.clear();
2225  }
2226  }
2227  }
2228 
2229  if (auto pEndDictionary = dynamic_cast<PDFEndDictionaryElement*>(rElements[i].get()))
2230  {
2231  if (--nDictionaryDepth == 0)
2232  {
2233  // Last dictionary end, track length and stop parsing.
2234  if (pThisObject)
2235  pThisObject->SetDictionaryLength(pEndDictionary->GetLocation()
2236  - nDictionaryOffset);
2237  nRet = i;
2238  break;
2239  }
2240  }
2241 
2242  auto pName = dynamic_cast<PDFNameElement*>(rElements[i].get());
2243  if (pName)
2244  {
2245  if (!aNumbers.empty())
2246  {
2247  PDFNumberElement* pNumber = aNumbers.back();
2248  rDictionary[aName] = pNumber;
2249  if (pThisDictionary)
2250  {
2251  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2252  pThisDictionary->SetKeyValueLength(
2253  aName, pNumber->GetLocation() + pNumber->GetLength() - nNameOffset);
2254  }
2255  aName.clear();
2256  aNumbers.clear();
2257  }
2258 
2259  if (aName.isEmpty())
2260  {
2261  // Remember key.
2262  aName = pName->GetValue();
2263  nNameOffset = pName->GetLocation();
2264  }
2265  else
2266  {
2267  if (pArray)
2268  {
2269  if (bDictionaryFound)
2270  // Array inside dictionary.
2271  pArray->PushBack(pName);
2272  }
2273  else
2274  {
2275  // Name-name key-value.
2276  rDictionary[aName] = pName;
2277  if (pThisDictionary)
2278  {
2279  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2280  pThisDictionary->SetKeyValueLength(aName, pName->GetLocation()
2282  - nNameOffset);
2283  }
2284  aName.clear();
2285  }
2286  }
2287  continue;
2288  }
2289 
2290  auto pArr = dynamic_cast<PDFArrayElement*>(rElements[i].get());
2291  if (pArr)
2292  {
2293  bArrayFound = true;
2294  pArray = pArr;
2295  continue;
2296  }
2297 
2298  auto pEndArr = dynamic_cast<PDFEndArrayElement*>(rElements[i].get());
2299  if (pArray && pEndArr)
2300  {
2301  for (auto& pNumber : aNumbers)
2302  pArray->PushBack(pNumber);
2303  aNumbers.clear();
2304  rDictionary[aName] = pArray;
2305  if (pThisDictionary)
2306  {
2307  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2308  // Include the ending ']' in the length of the key - (array)value pair length.
2309  pThisDictionary->SetKeyValueLength(aName, pEndArr->GetOffset() - nNameOffset + 1);
2310  }
2311  aName.clear();
2312  pArray = nullptr;
2313  continue;
2314  }
2315 
2316  auto pReference = dynamic_cast<PDFReferenceElement*>(rElements[i].get());
2317  if (pReference)
2318  {
2319  if (!pArray)
2320  {
2321  rDictionary[aName] = pReference;
2322  if (pThisDictionary)
2323  {
2324  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2325  pThisDictionary->SetKeyValueLength(aName,
2326  pReference->GetOffset() - nNameOffset);
2327  }
2328  aName.clear();
2329  }
2330  else
2331  {
2332  if (bDictionaryFound)
2333  // Array inside dictionary.
2334  pArray->PushBack(pReference);
2335  }
2336  aNumbers.clear();
2337  continue;
2338  }
2339 
2340  auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(rElements[i].get());
2341  if (pLiteralString)
2342  {
2343  rDictionary[aName] = pLiteralString;
2344  if (pThisDictionary)
2345  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2346  aName.clear();
2347  continue;
2348  }
2349 
2350  auto pBoolean = dynamic_cast<PDFBooleanElement*>(rElements[i].get());
2351  if (pBoolean)
2352  {
2353  rDictionary[aName] = pBoolean;
2354  if (pThisDictionary)
2355  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2356  aName.clear();
2357  continue;
2358  }
2359 
2360  auto pHexString = dynamic_cast<PDFHexStringElement*>(rElements[i].get());
2361  if (pHexString)
2362  {
2363  if (!pArray)
2364  {
2365  rDictionary[aName] = pHexString;
2366  if (pThisDictionary)
2367  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2368  aName.clear();
2369  }
2370  else
2371  {
2372  pArray->PushBack(pHexString);
2373  }
2374  continue;
2375  }
2376 
2377  if (dynamic_cast<PDFEndObjectElement*>(rElements[i].get()))
2378  break;
2379 
2380  // Just remember this, so that in case it's not a reference parameter,
2381  // we can handle it later.
2382  auto pNumber = dynamic_cast<PDFNumberElement*>(rElements[i].get());
2383  if (pNumber)
2384  aNumbers.push_back(pNumber);
2385  }
2386 
2387  if (!aNumbers.empty())
2388  {
2389  rDictionary[aName] = aNumbers.back();
2390  if (pThisDictionary)
2391  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2392  aName.clear();
2393  aNumbers.clear();
2394  }
2395 
2396  pThis->setParsing(false);
2397 
2398  return nRet;
2399 }
2400 
2401 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2402  const OString& rKey)
2403 {
2404  auto it = rDictionary.find(rKey);
2405  if (it == rDictionary.end())
2406  return nullptr;
2407 
2408  return it->second;
2409 }
2410 
2412 {
2413  auto pKey = dynamic_cast<PDFReferenceElement*>(
2414  PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2415  if (!pKey)
2416  {
2417  SAL_WARN("vcl.filter",
2418  "PDFDictionaryElement::LookupObject: no such key with reference value: "
2419  << rDictionaryKey);
2420  return nullptr;
2421  }
2422 
2423  return pKey->LookupObject();
2424 }
2425 
2426 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2427 {
2428  return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2429 }
2430 
2431 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2432 {
2433  if (m_aDictionary.empty())
2434  {
2435  if (!m_aElements.empty())
2436  // This is a stored object in an object stream.
2438  else
2439  // Normal object: elements are stored as members of the document itself.
2441  }
2442 
2443  return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2444 }
2445 
2446 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2447 {
2448  auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2449  if (!pKey)
2450  {
2451  SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2452  << rDictionaryKey);
2453  return nullptr;
2454  }
2455 
2456  return pKey->LookupObject();
2457 }
2458 
2460 
2461 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2462 {
2463  m_nDictionaryOffset = nDictionaryOffset;
2464 }
2465 
2467 {
2468  if (m_aDictionary.empty())
2470 
2471  return m_nDictionaryOffset;
2472 }
2473 
2474 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2475 
2476 sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
2477 
2478 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2479 {
2480  m_aDictionaryKeyOffset[rKey] = nOffset;
2481 }
2482 
2483 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2484 {
2485  m_aDictionaryKeyValueLength[rKey] = nLength;
2486 }
2487 
2488 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2489 {
2490  auto it = m_aDictionaryKeyOffset.find(rKey);
2491  if (it == m_aDictionaryKeyOffset.end())
2492  return 0;
2493 
2494  return it->second;
2495 }
2496 
2497 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2498 {
2499  auto it = m_aDictionaryKeyValueLength.find(rKey);
2500  if (it == m_aDictionaryKeyValueLength.end())
2501  return 0;
2502 
2503  return it->second;
2504 }
2505 
2506 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2507 
2508 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2509 {
2510  m_nDictionaryLength = nDictionaryLength;
2511 }
2512 
2514 {
2515  if (m_aDictionary.empty())
2517 
2518  return m_nDictionaryLength;
2519 }
2520 
2521 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2522 
2523 sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
2524 
2526 {
2527  if (m_aDictionary.empty())
2529  return m_pDictionaryElement;
2530 }
2531 
2533 {
2534  m_pDictionaryElement = pDictionaryElement;
2535 }
2536 
2538 {
2539  m_pNumberElement = pNumberElement;
2540 }
2541 
2543 
2544 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2545 {
2546  return m_aDictionaryReferences;
2547 }
2548 
2550 {
2551  m_aDictionaryReferences.push_back(pReference);
2552 }
2553 
2554 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2555 {
2556  if (m_aDictionary.empty())
2558 
2559  return m_aDictionary;
2560 }
2561 
2562 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2563 
2565 {
2566  m_pStreamElement = pStreamElement;
2567 }
2568 
2570 
2572 
2574 {
2575  if (!m_pStreamElement)
2576  {
2577  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2578  return;
2579  }
2580 
2581  auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2582  if (!pType || pType->GetValue() != "ObjStm")
2583  {
2584  if (!pType)
2585  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2586  else
2587  SAL_WARN("vcl.filter",
2588  "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2589  return;
2590  }
2591 
2592  auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2593  if (!pFilter || pFilter->GetValue() != "FlateDecode")
2594  {
2595  if (!pFilter)
2596  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2597  else
2598  SAL_WARN("vcl.filter",
2599  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2600  return;
2601  }
2602 
2603  auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2604  if (!pFirst)
2605  {
2606  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2607  return;
2608  }
2609 
2610  auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2611  if (!pN)
2612  {
2613  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2614  return;
2615  }
2616  size_t nN = pN->GetValue();
2617 
2618  auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2619  if (!pLength)
2620  {
2621  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2622  return;
2623  }
2624  size_t nLength = pLength->GetValue();
2625 
2626  // Read and decompress it.
2627  SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2628  rEditBuffer.Seek(m_pStreamElement->GetOffset());
2629  std::vector<char> aBuf(nLength);
2630  rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2631  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2632  SvMemoryStream aStream;
2633  ZCodec aZCodec;
2634  aZCodec.BeginCompression();
2635  aZCodec.Decompress(aSource, aStream);
2636  if (!aZCodec.EndCompression())
2637  {
2638  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2639  return;
2640  }
2641 
2642  nLength = aStream.TellEnd();
2643  aStream.Seek(0);
2644  std::vector<size_t> aObjNums;
2645  std::vector<size_t> aOffsets;
2646  std::vector<size_t> aLengths;
2647  // First iterate over and find out the lengths.
2648  for (size_t nObject = 0; nObject < nN; ++nObject)
2649  {
2650  PDFNumberElement aObjNum;
2651  if (!aObjNum.Read(aStream))
2652  {
2653  SAL_WARN("vcl.filter",
2654  "PDFObjectElement::ParseStoredObjects: failed to read object number");
2655  return;
2656  }
2657  aObjNums.push_back(aObjNum.GetValue());
2658 
2659  PDFDocument::SkipWhitespace(aStream);
2660 
2661  PDFNumberElement aByteOffset;
2662  if (!aByteOffset.Read(aStream))
2663  {
2664  SAL_WARN("vcl.filter",
2665  "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2666  return;
2667  }
2668  aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2669 
2670  if (aOffsets.size() > 1)
2671  aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2672  if (nObject + 1 == nN)
2673  aLengths.push_back(nLength - aOffsets.back());
2674 
2675  PDFDocument::SkipWhitespace(aStream);
2676  }
2677 
2678  // Now create streams with the proper length and tokenize the data.
2679  for (size_t nObject = 0; nObject < nN; ++nObject)
2680  {
2681  size_t nObjNum = aObjNums[nObject];
2682  size_t nOffset = aOffsets[nObject];
2683  size_t nLen = aLengths[nObject];
2684 
2685  aStream.Seek(nOffset);
2686  m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2687  PDFObjectElement* pStored = m_aStoredElements.back().get();
2688 
2689  aBuf.clear();
2690  aBuf.resize(nLen);
2691  aStream.ReadBytes(aBuf.data(), aBuf.size());
2692  SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2693 
2694  m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
2695  pStored);
2696  // This is how references know the object is stored inside this object stream.
2697  m_rDoc.SetIDObject(nObjNum, pStored);
2698 
2699  // Store the stream of the object in the object stream for later use.
2700  std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2701  aStoredStream.Seek(0);
2702  pStreamBuffer->WriteStream(aStoredStream);
2703  pStored->SetStreamBuffer(pStreamBuffer);
2704  }
2705 }
2706 
2707 std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2708 {
2709  return m_aElements;
2710 }
2711 
2713 
2714 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2715 {
2716  m_pStreamBuffer = std::move(pStreamBuffer);
2717 }
2718 
2720 
2722  PDFNumberElement const& rGeneration)
2723  : m_rDoc(rDoc)
2724  , m_fObjectValue(rObject.GetValue())
2725  , m_fGenerationValue(rGeneration.GetValue())
2726  , m_rObject(rObject)
2727 {
2728 }
2729 
2731 
2733 {
2734  SAL_INFO("vcl.filter",
2735  "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2736  m_nOffset = rStream.Tell();
2737  return true;
2738 }
2739 
2740 sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2741 
2743 {
2744  size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2745  if (nOffset == 0)
2746  {
2747  SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2748  << m_fObjectValue);
2749  return 0;
2750  }
2751 
2752  sal_uInt64 nOrigPos = rStream.Tell();
2753  comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2754 
2755  rStream.Seek(nOffset);
2756  {
2757  PDFDocument::SkipWhitespace(rStream);
2758  PDFNumberElement aNumber;
2759  bool bRet = aNumber.Read(rStream);
2760  if (!bRet || aNumber.GetValue() != m_fObjectValue)
2761  {
2762  SAL_WARN("vcl.filter",
2763  "PDFReferenceElement::LookupNumber: offset points to not matching object");
2764  return 0;
2765  }
2766  }
2767 
2768  {
2769  PDFDocument::SkipWhitespace(rStream);
2770  PDFNumberElement aNumber;
2771  bool bRet = aNumber.Read(rStream);
2772  if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2773  {
2774  SAL_WARN("vcl.filter",
2775  "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2776  return 0;
2777  }
2778  }
2779 
2780  {
2781  PDFDocument::SkipWhitespace(rStream);
2782  OString aKeyword = PDFDocument::ReadKeyword(rStream);
2783  if (aKeyword != "obj")
2784  {
2785  SAL_WARN("vcl.filter",
2786  "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2787  return 0;
2788  }
2789  }
2790 
2791  PDFDocument::SkipWhitespace(rStream);
2792  PDFNumberElement aNumber;
2793  if (!aNumber.Read(rStream))
2794  {
2795  SAL_WARN("vcl.filter",
2796  "PDFReferenceElement::LookupNumber: failed to read referenced number");
2797  return 0;
2798  }
2799 
2800  return aNumber.GetValue();
2801 }
2802 
2804 {
2806 }
2807 
2809 {
2810  auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2811 
2812  if (itIDObjects != m_aIDObjects.end())
2813  return itIDObjects->second;
2814 
2815  SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2816  return nullptr;
2817 }
2818 
2820 
2822 
2824 
2826 {
2827  char ch;
2828  rStream.ReadChar(ch);
2829  if (ch != '<')
2830  {
2831  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2832  return false;
2833  }
2834 
2835  if (rStream.eof())
2836  {
2837  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2838  return false;
2839  }
2840 
2841  rStream.ReadChar(ch);
2842  if (ch != '<')
2843  {
2844  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2845  return false;
2846  }
2847 
2848  m_nLocation = rStream.Tell();
2849 
2850  SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2851 
2852  return true;
2853 }
2854 
2856 
2858 
2860 {
2861  m_nLocation = rStream.Tell();
2862  char ch;
2863  rStream.ReadChar(ch);
2864  if (ch != '>')
2865  {
2866  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2867  return false;
2868  }
2869 
2870  if (rStream.eof())
2871  {
2872  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2873  return false;
2874  }
2875 
2876  rStream.ReadChar(ch);
2877  if (ch != '>')
2878  {
2879  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2880  return false;
2881  }
2882 
2883  SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2884 
2885  return true;
2886 }
2887 
2888 PDFNameElement::PDFNameElement() = default;
2889 
2891 {
2892  char ch;
2893  rStream.ReadChar(ch);
2894  if (ch != '/')
2895  {
2896  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2897  return false;
2898  }
2899  m_nLocation = rStream.Tell();
2900 
2901  if (rStream.eof())
2902  {
2903  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2904  return false;
2905  }
2906 
2907  // Read till the first white-space.
2908  OStringBuffer aBuf;
2909  rStream.ReadChar(ch);
2910  while (!rStream.eof())
2911  {
2912  if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2913  || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2914  {
2915  rStream.SeekRel(-1);
2916  m_aValue = aBuf.makeStringAndClear();
2917  SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2918  return true;
2919  }
2920  aBuf.append(ch);
2921  rStream.ReadChar(ch);
2922  }
2923 
2924  return false;
2925 }
2926 
2927 const OString& PDFNameElement::GetValue() const { return m_aValue; }
2928 
2929 sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2930 
2932  : m_nLength(nLength)
2933  , m_nOffset(0)
2934 {
2935 }
2936 
2938 {
2939  SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2940  m_nOffset = rStream.Tell();
2941  std::vector<unsigned char> aBytes(m_nLength);
2942  rStream.ReadBytes(aBytes.data(), aBytes.size());
2943  m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2944 
2945  return rStream.good();
2946 }
2947 
2949 
2950 sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2951 
2952 bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2953 
2954 bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2955 
2957  : m_pObject(pObject)
2958 {
2959 }
2960 
2962 {
2963  char ch;
2964  rStream.ReadChar(ch);
2965  if (ch != '[')
2966  {
2967  SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2968  return false;
2969  }
2970 
2971  SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2972 
2973  return true;
2974 }
2975 
2977 {
2978  if (m_pObject)
2979  SAL_INFO("vcl.filter",
2980  "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2981  m_aElements.push_back(pElement);
2982 }
2983 
2984 const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2985 
2987 
2989 {
2990  m_nOffset = rStream.Tell();
2991  char ch;
2992  rStream.ReadChar(ch);
2993  if (ch != ']')
2994  {
2995  SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2996  return false;
2997  }
2998 
2999  SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
3000 
3001  return true;
3002 }
3003 
3004 sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
3005 
3006 } // namespace filter
3007 } // namespace vcl
3008 
3009 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
sal_uInt64 m_nOffset
Location of the end of the trailer token.
Array object: a list.
bool Read(SvStream &rStream) override
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:89
bool SetStreamSize(sal_uInt64 nSize)
PDFObjectElement(PDFDocument &rDoc, double fObjectValue, double fGenerationValue)
Numbering object: an integer or a real.
Boolean object: a 'true' or a 'false'.
Definition: pdfdocument.cxx:92
PDFElement * Lookup(const OString &rDictionaryKey)
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:92
size_t GetObjectOffset(size_t nIndex) const
sal_uInt64 m_nLocation
Offset after the '/' token.
End of an array: ']'.
Definition: pdfdocument.cxx:80
void SetStream(PDFStreamElement *pStreamElement)
SvMemoryStream m_aMemory
The byte array itself.
std::string GetValue
const int MAX_SIGNATURE_CONTENT_LENGTH
Definition: pdfdocument.cxx:36
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:98
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
double LookupNumber(SvStream &rStream) const
Assuming the reference points to a number object, return its value.
void SetDictionaryLength(sal_uInt64 nDictionaryLength)
PDFTrailerElement(PDFDocument &rDoc)
aBuf
#define STREAM_SEEK_TO_END
sal_uInt64 m_nLocation
Offset before the '>>' token.
Definition: pdfdocument.cxx:57
static size_t Parse(const std::vector< std::unique_ptr< PDFElement >> &rElements, PDFElement *pThis, std::map< OString, PDFElement * > &rDictionary)
sal_uInt64 Seek(sal_uInt64 nPos)
std::vector< unsigned char > DecodeHexString(const OString &rHex)
void SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:82
void PushBack(PDFElement *pElement)
sal_uInt64 GetLocation() const
static sal_uInt64 GetLength()
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
Looks up an object which is only referenced in this dictionary.
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:94
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
void SetArrayOffset(sal_uInt64 nArrayOffset)
SvStream & WriteCharPtr(const char *pBuf)
void SetArrayLength(sal_uInt64 nArrayLength)
sal_uInt64 SeekRel(sal_Int64 nPos)
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:72
SvMemoryStream * GetStreamBuffer() const
void SetStreamBuffer(std::unique_ptr< SvMemoryStream > &pStreamBuffer)
static OString GetDateTime()
Get current date/time in PDF D:YYYYMMDDHHMMSS form.
SvStream & WriteUInt32AsString(sal_uInt32 nUInt32)
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
bool Read(SvStream &rStream) override
A byte range in a PDF file.
Definition: pdfdocument.hxx:63
bool Read(SvStream &rStream) override
long EndCompression()
FUNC_TYPE const nType
bool eof() const
PDFObjectElement * LookupObject(size_t nObjectNumber)
Look up object based on object number, possibly by parsing object streams.
bool Read(SvStream &rStream) override
In-memory representation of an on-disk PDF document.
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:96
An entry in a cross-reference stream.
PDFReferenceElement(PDFDocument &rDoc, PDFNumberElement &rObject, PDFNumberElement const &rGeneration)
XRefEntryType GetType() const
const OString & GetValue() const
static void visitPages(PDFObjectElement *pPages, std::vector< PDFObjectElement * > &rRet)
Visits the page tree recursively, looking for page objects.
void PushBackEOF(size_t nOffset)
Remember the end location of an EOF token.
PDFObjectElement *const m_pObject
The object that contains this array.
bool Read(SvStream &rStream) override
sal_uInt64 GetArrayLength() const
sal_Int32 nElements
bool Read(SvStream &rStream) override
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:87
void Compress(SvStream &rIStm, SvStream &rOStm)
void SetType(XRefEntryType eType)
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
void SetKeyValueLength(const OString &rKey, sal_uInt64 nLength)
sal_uInt64 m_nOffset
Input file start location.
void ParseStoredObjects()
Parse objects stored in this object stream.
bool Read(SvStream &rStream) override
sal_uInt64 GetOffset() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
bool GetDirty() const
long Decompress(SvStream &rIStm, SvStream &rOStm)
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:79
bool Read(SvStream &rStream) override
PDFNumberElement & m_rObject
The element providing the object number.
sal_uInt64 m_nLength
Input file token length.
static OString ReadKeyword(SvStream &rStream)
int i
sal_uInt64 GetSize()
bool Read(SvStream &rStream) override
OStringBuffer & padToLength(OStringBuffer &rBuffer, sal_Int32 nLength, sal_Char cFill= '\0')
std::size_t WriteBytes(const void *pData, std::size_t nSize)
End of a dictionary: '>>'.
Definition: pdfdocument.cxx:54
void AddDictionaryReference(PDFReferenceElement *pReference)
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
A one-liner comment.
Definition: pdfdocument.cxx:41
sal_uInt64 GetLocation() const
Dictionary object: a set key-value pairs.
sal_uInt16 sal_Char * pName
sal_uInt64 GetOffset() const
bool Read(SvStream &rStream) override
std::vector< PDFElement * > m_aElements
bool Tokenize(SvStream &rStream, TokenizeMode eMode, std::vector< std::unique_ptr< PDFElement >> &rElements, PDFObjectElement *pObjectElement)
Tokenize elements from current offset.
bool Read(SvStream &rStream) override
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
SvStream & WriteStream(SvStream &rStream)
const std::map< OString, PDFElement * > & GetItems() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
sal_uInt64 GetKeyOffset(const OString &rKey) const
void SetIDObject(size_t nID, PDFObjectElement *pObject)
Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID...
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:74
bool Read(SvStream &rStream) override
sal_uInt64 GetKeyValueLength(const OString &rKey) const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
std::size_t ReadBytes(void *pData, std::size_t nSize)
bool Read(SvStream &rStream) override
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
const OString & GetValue() const
std::vector< std::unique_ptr< PDFElement > > & GetStoredElements()
void SetNumberElement(PDFNumberElement *pNumberElement)
sal_uInt64 m_nOffset
Location before the ']' token.
Definition: pdfdocument.cxx:83
sal_uInt64 GetLength() const
PDFDictionaryElement * GetDictionary()
const std::vector< std::unique_ptr< PDFElement > > & GetElements() const
bool Read(SvStream &rStream) override
void SetDirty(bool bDirty)
PDFObjectElement * LookupObject()
Lookup referenced object, without assuming anything about its contents.
SvStream & ReadChar(char &rChar)
End of an object: 'endobj' keyword.
Definition: pdfdocument.cxx:73
PDFElement * LookupElement(const OString &rDictionaryKey)
Looks up an element which is contained in this dictionary.
std::map< OString, PDFElement * > m_aDictionary
PDFArrayElement * GetArray() const
SvMemoryStream & GetMemory()
Null object: the 'null' singleton.
const std::vector< PDFReferenceElement * > & GetDictionaryReferences() const
#define SAL_INFO(area, stream)
bool Read(SvStream &rStream) override
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
static void SkipWhitespace(SvStream &rStream)
PDFNumberElement * GetNumberElement() const
sal_uInt64 Tell() const
const OString & GetValue() const
bool Sign(OStringBuffer &rCMSHexBuffer)
Reference object: something with a unique ID.
static void AppendUnicodeTextString(const OUString &rString, OStringBuffer &rBuffer)
Write rString as a PDF hex string into rBuffer.
const std::vector< PDFElement * > & GetElements() const
End of a stream: 'endstream' keyword.
Definition: pdfdocument.cxx:66
sal_uInt64 GetLocation() const
bool good() const
OString const aName
#define SAL_WARN(area, stream)
bool alreadyVisiting() const
Definition: pdfdocument.hxx:73
Literal string: in (asdf) form.
PDFArrayElement(PDFObjectElement *pObject)
bool Read(SvStream &rStream) override
Name object: a key string.
void SetOffset(sal_uInt64 nOffset)
The trailer singleton is at the end of the doc.
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
sal_Int32 const nLength
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
PDFNumberElement & GetObjectElement() const
void AddDataRange(const void *pData, sal_Int32 size)
PDFElement * Lookup(const OString &rDictionaryKey)
sal_Int32 nPos
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:91
sal_uInt64 GetArrayOffset() const
Stream object: a byte array with a known length.
sal_uInt64 m_nLocation
Offset after the '<<' token.
static PDFElement * Lookup(const std::map< OString, PDFElement * > &rDictionary, const OString &rKey)
const void * GetData()
void SetArray(PDFArrayElement *pArrayElement)
bool Read(SvStream &rStream) override
std::map< OString, PDFElement * > m_aDictionary
Definition: pdfdocument.hxx:85
sal_uInt64 m_nOffset
Location after the 'R' token.
void SetKeyOffset(const OString &rKey, sal_uInt64 nOffset)