LibreOffice Module vcl (master)  1
pdfdocument.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
11 
12 #include <map>
13 #include <memory>
14 #include <vector>
15 
16 #include <com/sun/star/uno/Sequence.hxx>
17 #include <com/sun/star/security/XCertificate.hpp>
18 
20 #include <comphelper/string.hxx>
21 #include <rtl/character.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <svl/cryptosign.hxx>
27 #include <tools/zcodec.hxx>
28 #include <vcl/pdfwriter.hxx>
29 
30 using namespace com::sun::star;
31 
32 namespace vcl::filter
33 {
34 const int MAX_SIGNATURE_CONTENT_LENGTH = 50000;
35 
36 class PDFTrailerElement;
37 
38 namespace
39 {
41 class PDFCommentElement : public PDFElement
42 {
43  PDFDocument& m_rDoc;
44  OString m_aComment;
45 
46 public:
47  explicit PDFCommentElement(PDFDocument& rDoc);
48  bool Read(SvStream& rStream) override;
49 };
50 }
51 
52 class PDFReferenceElement;
53 
54 namespace
55 {
57 class PDFEndDictionaryElement : public PDFElement
58 {
60  sal_uInt64 m_nLocation = 0;
61 
62 public:
63  PDFEndDictionaryElement();
64  bool Read(SvStream& rStream) override;
65  sal_uInt64 GetLocation() const;
66 };
67 
69 class PDFEndStreamElement : public PDFElement
70 {
71 public:
72  bool Read(SvStream& rStream) override;
73 };
74 
76 class PDFEndObjectElement : public PDFElement
77 {
78 public:
79  bool Read(SvStream& rStream) override;
80 };
81 
83 class PDFEndArrayElement : public PDFElement
84 {
86  sal_uInt64 m_nOffset = 0;
87 
88 public:
89  PDFEndArrayElement();
90  bool Read(SvStream& rStream) override;
91  sal_uInt64 GetOffset() const;
92 };
93 
95 class PDFBooleanElement : public PDFElement
96 {
97 public:
98  explicit PDFBooleanElement(bool bValue);
99  bool Read(SvStream& rStream) override;
100 };
101 
103 class PDFNullElement : public PDFElement
104 {
105 public:
106  bool Read(SvStream& rStream) override;
107 };
108 }
109 
112 {
114  std::map<OString, PDFElement*> m_aDictionary;
116  sal_uInt64 m_nOffset = 0;
117 
118 public:
119  explicit PDFTrailerElement(PDFDocument& rDoc);
120  bool Read(SvStream& rStream) override;
121  PDFElement* Lookup(const OString& rDictionaryKey);
122  sal_uInt64 GetLocation() const;
123 };
124 
125 XRefEntry::XRefEntry() = default;
126 
127 PDFDocument::PDFDocument() = default;
128 
129 bool PDFDocument::RemoveSignature(size_t nPosition)
130 {
131  std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
132  if (nPosition >= aSignatures.size())
133  {
134  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
135  return false;
136  }
137 
138  if (aSignatures.size() != m_aEOFs.size() - 1)
139  {
140  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
141  "and incremental updates");
142  return false;
143  }
144 
145  // The EOF offset is the end of the original file, without the signature at
146  // nPosition.
147  m_aEditBuffer.Seek(m_aEOFs[nPosition]);
148  // Drop all bytes after the current position.
149  m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
150 
151  return m_aEditBuffer.good();
152 }
153 
154 sal_uInt32 PDFDocument::GetNextSignature()
155 {
156  sal_uInt32 nRet = 0;
157  for (const auto& pSignature : GetSignatureWidgets())
158  {
159  auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
160  if (!pT)
161  continue;
162 
163  const OString& rValue = pT->GetValue();
164  const OString aPrefix = "Signature";
165  if (!rValue.startsWith(aPrefix))
166  continue;
167 
168  nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32());
169  }
170 
171  return nRet + 1;
172 }
173 
174 sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
175  sal_uInt64& rLastByteRangeOffset,
176  sal_Int64& rContentOffset)
177 {
178  // Write signature object.
179  sal_Int32 nSignatureId = m_aXRef.size();
180  XRefEntry aSignatureEntry;
181  aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
182  aSignatureEntry.SetDirty(true);
183  m_aXRef[nSignatureId] = aSignatureEntry;
184  OStringBuffer aSigBuffer;
185  aSigBuffer.append(nSignatureId);
186  aSigBuffer.append(" 0 obj\n");
187  aSigBuffer.append("<</Contents <");
188  rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
189  // Reserve space for the PKCS#7 object.
190  OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
191  comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0');
192  aSigBuffer.append(aContentFiller.makeStringAndClear());
193  aSigBuffer.append(">\n/Type/Sig/SubFilter");
194  if (bAdES)
195  aSigBuffer.append("/ETSI.CAdES.detached");
196  else
197  aSigBuffer.append("/adbe.pkcs7.detached");
198 
199  // Time of signing.
200  aSigBuffer.append(" /M (");
201  aSigBuffer.append(vcl::PDFWriter::GetDateTime());
202  aSigBuffer.append(")");
203 
204  // Byte range: we can write offset1-length1 and offset2 right now, will
205  // write length2 later.
206  aSigBuffer.append(" /ByteRange [ 0 ");
207  // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
208  aSigBuffer.append(rContentOffset - 1);
209  aSigBuffer.append(" ");
210  aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
211  aSigBuffer.append(" ");
212  rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
213  // We don't know how many bytes we need for the last ByteRange value, this
214  // should be enough.
215  OStringBuffer aByteRangeFiller;
216  comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
217  aSigBuffer.append(aByteRangeFiller.makeStringAndClear());
218  // Finish the Sig obj.
219  aSigBuffer.append(" /Filter/Adobe.PPKMS");
220 
221  if (!rDescription.isEmpty())
222  {
223  aSigBuffer.append("/Reason<");
224  vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
225  aSigBuffer.append(">");
226  }
227 
228  aSigBuffer.append(" >>\nendobj\n\n");
229  m_aEditBuffer.WriteOString(aSigBuffer.toString());
230 
231  return nSignatureId;
232 }
233 
234 sal_Int32 PDFDocument::WriteAppearanceObject()
235 {
236  // Write appearance object.
237  sal_Int32 nAppearanceId = m_aXRef.size();
238  XRefEntry aAppearanceEntry;
239  aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
240  aAppearanceEntry.SetDirty(true);
241  m_aXRef[nAppearanceId] = aAppearanceEntry;
242  m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
243  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
244  m_aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
245  m_aEditBuffer.WriteCharPtr("/BBox[0 0 0 0]\n/Length 0\n>>\n");
246  m_aEditBuffer.WriteCharPtr("stream\n\nendstream\nendobj\n\n");
247 
248  return nAppearanceId;
249 }
250 
251 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
252  sal_Int32 nAppearanceId)
253 {
254  // Decide what identifier to use for the new signature.
255  sal_uInt32 nNextSignature = GetNextSignature();
256 
257  // Write the Annot object, references nSignatureId and nAppearanceId.
258  sal_Int32 nAnnotId = m_aXRef.size();
259  XRefEntry aAnnotEntry;
260  aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
261  aAnnotEntry.SetDirty(true);
262  m_aXRef[nAnnotId] = aAnnotEntry;
263  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
264  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
265  m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
266  m_aEditBuffer.WriteCharPtr("/Rect[0 0 0 0]\n");
267  m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
268  m_aEditBuffer.WriteCharPtr("/P ");
269  m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue());
270  m_aEditBuffer.WriteCharPtr(" 0 R\n");
271  m_aEditBuffer.WriteCharPtr("/T(Signature");
272  m_aEditBuffer.WriteUInt32AsString(nNextSignature);
273  m_aEditBuffer.WriteCharPtr(")\n");
274  m_aEditBuffer.WriteCharPtr("/V ");
275  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
276  m_aEditBuffer.WriteCharPtr(" 0 R\n");
277  m_aEditBuffer.WriteCharPtr("/DV ");
278  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
279  m_aEditBuffer.WriteCharPtr(" 0 R\n");
280  m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
281  m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
282  m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
283  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
284 
285  return nAnnotId;
286 }
287 
288 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
289 {
290  PDFElement* pAnnots = rFirstPage.Lookup("Annots");
291  auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
292  if (pAnnotsReference)
293  {
294  // Write the updated Annots key of the Page object.
295  PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
296  if (!pAnnotsObject)
297  {
298  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
299  return false;
300  }
301 
302  sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
303  m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
304  m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
305  m_aXRef[nAnnotsId].SetDirty(true);
306  m_aEditBuffer.WriteUInt32AsString(nAnnotsId);
307  m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
308 
309  // Write existing references.
310  PDFArrayElement* pArray = pAnnotsObject->GetArray();
311  if (!pArray)
312  {
313  SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
314  return false;
315  }
316 
317  for (size_t i = 0; i < pArray->GetElements().size(); ++i)
318  {
319  auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
320  if (!pReference)
321  continue;
322 
323  if (i)
324  m_aEditBuffer.WriteCharPtr(" ");
325  m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
326  m_aEditBuffer.WriteCharPtr(" 0 R");
327  }
328  // Write our reference.
329  m_aEditBuffer.WriteCharPtr(" ");
330  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
331  m_aEditBuffer.WriteCharPtr(" 0 R");
332 
333  m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
334  }
335  else
336  {
337  // Write the updated first page object, references nAnnotId.
338  sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
339  if (nFirstPageId >= m_aXRef.size())
340  {
341  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
342  return false;
343  }
344  m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
345  m_aXRef[nFirstPageId].SetDirty(true);
346  m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
347  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
348  m_aEditBuffer.WriteCharPtr("<<");
349  auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
350  if (!pAnnotsArray)
351  {
352  // No Annots key, just write the key with a single reference.
353  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
354  + rFirstPage.GetDictionaryOffset(),
355  rFirstPage.GetDictionaryLength());
356  m_aEditBuffer.WriteCharPtr("/Annots[");
357  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
358  m_aEditBuffer.WriteCharPtr(" 0 R]");
359  }
360  else
361  {
362  // Annots key is already there, insert our reference at the end.
363  PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
364 
365  // Offset right before the end of the Annots array.
366  sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
367  + pDictionary->GetKeyValueLength("Annots") - 1;
368  // Length of beginning of the dictionary -> Annots end.
369  sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
370  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
371  + rFirstPage.GetDictionaryOffset(),
372  nAnnotsBeforeEndLength);
373  m_aEditBuffer.WriteCharPtr(" ");
374  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
375  m_aEditBuffer.WriteCharPtr(" 0 R");
376  // Length of Annots end -> end of the dictionary.
377  sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
378  + rFirstPage.GetDictionaryLength()
379  - nAnnotsEndOffset;
380  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
381  + nAnnotsEndOffset,
382  nAnnotsAfterEndLength);
383  }
384  m_aEditBuffer.WriteCharPtr(">>");
385  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
386  }
387 
388  return true;
389 }
390 
391 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
392 {
393  if (m_pXRefStream)
394  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
395  else
396  {
397  if (!m_pTrailer)
398  {
399  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
400  return false;
401  }
402  pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
403  }
404  if (!pRoot)
405  {
406  SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
407  return false;
408  }
409  PDFObjectElement* pCatalog = pRoot->LookupObject();
410  if (!pCatalog)
411  {
412  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
413  return false;
414  }
415  sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
416  if (nCatalogId >= m_aXRef.size())
417  {
418  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
419  return false;
420  }
421  PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
422  auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
423  if (pAcroFormReference)
424  {
425  // Write the updated AcroForm key of the Catalog object.
426  PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
427  if (!pAcroFormObject)
428  {
429  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
430  return false;
431  }
432 
433  sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
434  m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
435  m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
436  m_aXRef[nAcroFormId].SetDirty(true);
437  m_aEditBuffer.WriteUInt32AsString(nAcroFormId);
438  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
439 
440  // If this is nullptr, then the AcroForm object is not in an object stream.
441  SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
442 
443  if (!pAcroFormObject->Lookup("Fields"))
444  {
445  SAL_WARN("vcl.filter",
446  "PDFDocument::Sign: AcroForm object without required Fields key");
447  return false;
448  }
449 
450  PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
451  if (!pAcroFormDictionary)
452  {
453  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
454  return false;
455  }
456 
457  // Offset right before the end of the Fields array.
458  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
459  + pAcroFormDictionary->GetKeyValueLength("Fields")
460  - strlen("]");
461  // Length of beginning of the object dictionary -> Fields end.
462  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
463  if (pStreamBuffer)
464  m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
465  else
466  {
467  nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
468  m_aEditBuffer.WriteCharPtr("<<");
469  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
470  + pAcroFormObject->GetDictionaryOffset(),
471  nFieldsBeforeEndLength);
472  }
473 
474  // Append our reference at the end of the Fields array.
475  m_aEditBuffer.WriteCharPtr(" ");
476  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
477  m_aEditBuffer.WriteCharPtr(" 0 R");
478 
479  // Length of Fields end -> end of the object dictionary.
480  if (pStreamBuffer)
481  {
482  sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
483  m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
484  + nFieldsEndOffset,
485  nFieldsAfterEndLength);
486  }
487  else
488  {
489  sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
490  + pAcroFormObject->GetDictionaryLength()
491  - nFieldsEndOffset;
492  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
493  + nFieldsEndOffset,
494  nFieldsAfterEndLength);
495  m_aEditBuffer.WriteCharPtr(">>");
496  }
497 
498  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
499  }
500  else
501  {
502  // Write the updated Catalog object, references nAnnotId.
503  auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
504  m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
505  m_aXRef[nCatalogId].SetDirty(true);
506  m_aEditBuffer.WriteUInt32AsString(nCatalogId);
507  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
508  m_aEditBuffer.WriteCharPtr("<<");
509  if (!pAcroFormDictionary)
510  {
511  // No AcroForm key, assume no signatures.
512  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
513  + pCatalog->GetDictionaryOffset(),
514  pCatalog->GetDictionaryLength());
515  m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
516  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
517  m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
518  }
519  else
520  {
521  // AcroForm key is already there, insert our reference at the Fields end.
522  auto it = pAcroFormDictionary->GetItems().find("Fields");
523  if (it == pAcroFormDictionary->GetItems().end())
524  {
525  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
526  return false;
527  }
528 
529  auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
530  if (!pFields)
531  {
532  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
533  return false;
534  }
535 
536  // Offset right before the end of the Fields array.
537  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
538  + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
539  // Length of beginning of the Catalog dictionary -> Fields end.
540  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
541  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
542  + pCatalog->GetDictionaryOffset(),
543  nFieldsBeforeEndLength);
544  m_aEditBuffer.WriteCharPtr(" ");
545  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
546  m_aEditBuffer.WriteCharPtr(" 0 R");
547  // Length of Fields end -> end of the Catalog dictionary.
548  sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
549  + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
550  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
551  + nFieldsEndOffset,
552  nFieldsAfterEndLength);
553  }
554  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
555  }
556 
557  return true;
558 }
559 
560 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
561 {
562  if (m_pXRefStream)
563  {
564  // Write the xref stream.
565  // This is a bit meta: the xref stream stores its own offset.
566  sal_Int32 nXRefStreamId = m_aXRef.size();
567  XRefEntry aXRefStreamEntry;
568  aXRefStreamEntry.SetOffset(nXRefOffset);
569  aXRefStreamEntry.SetDirty(true);
570  m_aXRef[nXRefStreamId] = aXRefStreamEntry;
571 
572  // Write stream data.
573  SvMemoryStream aXRefStream;
574  const size_t nOffsetLen = 3;
575  // 3 additional bytes: predictor, the first and the third field.
576  const size_t nLineLength = nOffsetLen + 3;
577  // This is the line as it appears before tweaking according to the predictor.
578  std::vector<unsigned char> aOrigLine(nLineLength);
579  // This is the previous line.
580  std::vector<unsigned char> aPrevLine(nLineLength);
581  // This is the line as written to the stream.
582  std::vector<unsigned char> aFilteredLine(nLineLength);
583  for (const auto& rXRef : m_aXRef)
584  {
585  const XRefEntry& rEntry = rXRef.second;
586 
587  if (!rEntry.GetDirty())
588  continue;
589 
590  // Predictor.
591  size_t nPos = 0;
592  // PNG prediction: up (on all rows).
593  aOrigLine[nPos++] = 2;
594 
595  // First field.
596  unsigned char nType = 0;
597  switch (rEntry.GetType())
598  {
599  case XRefEntryType::FREE:
600  nType = 0;
601  break;
602  case XRefEntryType::NOT_COMPRESSED:
603  nType = 1;
604  break;
605  case XRefEntryType::COMPRESSED:
606  nType = 2;
607  break;
608  }
609  aOrigLine[nPos++] = nType;
610 
611  // Second field.
612  for (size_t i = 0; i < nOffsetLen; ++i)
613  {
614  size_t nByte = nOffsetLen - i - 1;
615  // Fields requiring more than one byte are stored with the
616  // high-order byte first.
617  unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
618  aOrigLine[nPos++] = nCh;
619  }
620 
621  // Third field.
622  aOrigLine[nPos++] = 0;
623 
624  // Now apply the predictor.
625  aFilteredLine[0] = aOrigLine[0];
626  for (size_t i = 1; i < nLineLength; ++i)
627  {
628  // Count the delta vs the previous line.
629  aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
630  // Remember the new reference.
631  aPrevLine[i] = aOrigLine[i];
632  }
633 
634  aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
635  }
636 
637  m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
638  m_aEditBuffer.WriteCharPtr(
639  " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
640 
641  // ID.
642  auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
643  if (pID)
644  {
645  const std::vector<PDFElement*>& rElements = pID->GetElements();
646  m_aEditBuffer.WriteCharPtr("/ID [ <");
647  for (size_t i = 0; i < rElements.size(); ++i)
648  {
649  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
650  if (!pIDString)
651  continue;
652 
653  m_aEditBuffer.WriteOString(pIDString->GetValue());
654  if ((i + 1) < rElements.size())
655  m_aEditBuffer.WriteCharPtr("> <");
656  }
657  m_aEditBuffer.WriteCharPtr("> ] ");
658  }
659 
660  // Index.
661  m_aEditBuffer.WriteCharPtr("/Index [ ");
662  for (const auto& rXRef : m_aXRef)
663  {
664  if (!rXRef.second.GetDirty())
665  continue;
666 
667  m_aEditBuffer.WriteUInt32AsString(rXRef.first);
668  m_aEditBuffer.WriteCharPtr(" 1 ");
669  }
670  m_aEditBuffer.WriteCharPtr("] ");
671 
672  // Info.
673  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
674  if (pInfo)
675  {
676  m_aEditBuffer.WriteCharPtr("/Info ");
677  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
678  m_aEditBuffer.WriteCharPtr(" ");
679  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
680  m_aEditBuffer.WriteCharPtr(" R ");
681  }
682 
683  // Length.
684  m_aEditBuffer.WriteCharPtr("/Length ");
685  {
686  ZCodec aZCodec;
687  aZCodec.BeginCompression();
688  aXRefStream.Seek(0);
689  SvMemoryStream aStream;
690  aZCodec.Compress(aXRefStream, aStream);
691  aZCodec.EndCompression();
692  aXRefStream.Seek(0);
693  aXRefStream.SetStreamSize(0);
694  aStream.Seek(0);
695  aXRefStream.WriteStream(aStream);
696  }
697  m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize());
698 
699  if (!m_aStartXRefs.empty())
700  {
701  // Write location of the previous cross-reference section.
702  m_aEditBuffer.WriteCharPtr("/Prev ");
703  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
704  }
705 
706  // Root.
707  m_aEditBuffer.WriteCharPtr("/Root ");
708  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
709  m_aEditBuffer.WriteCharPtr(" ");
710  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
711  m_aEditBuffer.WriteCharPtr(" R ");
712 
713  // Size.
714  m_aEditBuffer.WriteCharPtr("/Size ");
715  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
716 
717  m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
718  aXRefStream.Seek(0);
719  m_aEditBuffer.WriteStream(aXRefStream);
720  m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
721  }
722  else
723  {
724  // Write the xref table.
725  m_aEditBuffer.WriteCharPtr("xref\n");
726  for (const auto& rXRef : m_aXRef)
727  {
728  size_t nObject = rXRef.first;
729  size_t nOffset = rXRef.second.GetOffset();
730  if (!rXRef.second.GetDirty())
731  continue;
732 
733  m_aEditBuffer.WriteUInt32AsString(nObject);
734  m_aEditBuffer.WriteCharPtr(" 1\n");
735  OStringBuffer aBuffer;
736  aBuffer.append(static_cast<sal_Int32>(nOffset));
737  while (aBuffer.getLength() < 10)
738  aBuffer.insert(0, "0");
739  if (nObject == 0)
740  aBuffer.append(" 65535 f \n");
741  else
742  aBuffer.append(" 00000 n \n");
743  m_aEditBuffer.WriteOString(aBuffer.toString());
744  }
745 
746  // Write the trailer.
747  m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
748  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
749  m_aEditBuffer.WriteCharPtr("/Root ");
750  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
751  m_aEditBuffer.WriteCharPtr(" ");
752  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
753  m_aEditBuffer.WriteCharPtr(" R\n");
754  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
755  if (pInfo)
756  {
757  m_aEditBuffer.WriteCharPtr("/Info ");
758  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
759  m_aEditBuffer.WriteCharPtr(" ");
760  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
761  m_aEditBuffer.WriteCharPtr(" R\n");
762  }
763  auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
764  if (pID)
765  {
766  const std::vector<PDFElement*>& rElements = pID->GetElements();
767  m_aEditBuffer.WriteCharPtr("/ID [ <");
768  for (size_t i = 0; i < rElements.size(); ++i)
769  {
770  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
771  if (!pIDString)
772  continue;
773 
774  m_aEditBuffer.WriteOString(pIDString->GetValue());
775  if ((i + 1) < rElements.size())
776  m_aEditBuffer.WriteCharPtr(">\n<");
777  }
778  m_aEditBuffer.WriteCharPtr("> ]\n");
779  }
780 
781  if (!m_aStartXRefs.empty())
782  {
783  // Write location of the previous cross-reference section.
784  m_aEditBuffer.WriteCharPtr("/Prev ");
785  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
786  }
787 
788  m_aEditBuffer.WriteCharPtr(">>\n");
789  }
790 }
791 
792 bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
793  const OUString& rDescription, bool bAdES)
794 {
795  m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
796  m_aEditBuffer.WriteCharPtr("\n");
797 
798  sal_uInt64 nSignatureLastByteRangeOffset = 0;
799  sal_Int64 nSignatureContentOffset = 0;
800  sal_Int32 nSignatureId = WriteSignatureObject(
801  rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
802 
803  sal_Int32 nAppearanceId = WriteAppearanceObject();
804 
805  std::vector<PDFObjectElement*> aPages = GetPages();
806  if (aPages.empty() || !aPages[0])
807  {
808  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
809  return false;
810  }
811 
812  PDFObjectElement& rFirstPage = *aPages[0];
813  sal_Int32 nAnnotId = WriteAnnotObject(rFirstPage, nSignatureId, nAppearanceId);
814 
815  if (!WritePageObject(rFirstPage, nAnnotId))
816  {
817  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
818  return false;
819  }
820 
821  PDFReferenceElement* pRoot = nullptr;
822  if (!WriteCatalogObject(nAnnotId, pRoot))
823  {
824  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
825  return false;
826  }
827 
828  sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
829  WriteXRef(nXRefOffset, pRoot);
830 
831  // Write startxref.
832  m_aEditBuffer.WriteCharPtr("startxref\n");
833  m_aEditBuffer.WriteUInt32AsString(nXRefOffset);
834  m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
835 
836  // Finalize the signature, now that we know the total file size.
837  // Calculate the length of the last byte range.
838  sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
839  sal_Int64 nLastByteRangeLength
840  = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
841  // Write the length to the buffer.
842  m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
843  OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
844  m_aEditBuffer.WriteOString(aByteRangeBuffer);
845 
846  // Create the PKCS#7 object.
847  css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
848  if (!aDerEncoded.hasElements())
849  {
850  SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
851  return false;
852  }
853 
854  m_aEditBuffer.Seek(0);
855  sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
856  std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
857  m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
858 
859  m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
860  sal_uInt64 nBufferSize2 = nLastByteRangeLength;
861  std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
862  m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
863 
864  OStringBuffer aCMSHexBuffer;
865  svl::crypto::Signing aSigning(xCertificate);
866  aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
867  aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
868  if (!aSigning.Sign(aCMSHexBuffer))
869  {
870  SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
871  return false;
872  }
873 
874  assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
875 
876  m_aEditBuffer.Seek(nSignatureContentOffset);
877  m_aEditBuffer.WriteOString(aCMSHexBuffer.toString());
878 
879  return true;
880 }
881 
882 bool PDFDocument::Write(SvStream& rStream)
883 {
884  m_aEditBuffer.Seek(0);
885  rStream.WriteStream(m_aEditBuffer);
886  return rStream.good();
887 }
888 
889 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
890  std::vector<std::unique_ptr<PDFElement>>& rElements,
891  PDFObjectElement* pObjectElement)
892 {
893  // Last seen object token.
894  PDFObjectElement* pObject = pObjectElement;
895  PDFNameElement* pObjectKey = nullptr;
896  PDFObjectElement* pObjectStream = nullptr;
897  bool bInXRef = false;
898  // The next number will be an xref offset.
899  bool bInStartXRef = false;
900  // Dictionary depth, so we know when we're outside any dictionaries.
901  int nDictionaryDepth = 0;
902  // Array depth, only the offset/length of the toplevel array is tracked.
903  int nArrayDepth = 0;
904  // Last seen array token that's outside any dictionaries.
905  PDFArrayElement* pArray = nullptr;
906  // If we're inside an obj/endobj pair.
907  bool bInObject = false;
908  while (true)
909  {
910  char ch;
911  rStream.ReadChar(ch);
912  if (rStream.eof())
913  break;
914 
915  switch (ch)
916  {
917  case '%':
918  {
919  auto pComment = new PDFCommentElement(*this);
920  rElements.push_back(std::unique_ptr<PDFElement>(pComment));
921  rStream.SeekRel(-1);
922  if (!rElements.back()->Read(rStream))
923  {
924  SAL_WARN("vcl.filter",
925  "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
926  return false;
927  }
928  if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
929  && m_aEOFs.back() == rStream.Tell())
930  {
931  // Found EOF and partial parsing requested, we're done.
932  return true;
933  }
934  break;
935  }
936  case '<':
937  {
938  // Dictionary or hex string.
939  rStream.ReadChar(ch);
940  rStream.SeekRel(-2);
941  if (ch == '<')
942  {
943  rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
944  ++nDictionaryDepth;
945  }
946  else
947  rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
948  if (!rElements.back()->Read(rStream))
949  {
950  SAL_WARN("vcl.filter",
951  "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
952  return false;
953  }
954  break;
955  }
956  case '>':
957  {
958  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
959  --nDictionaryDepth;
960  rStream.SeekRel(-1);
961  if (!rElements.back()->Read(rStream))
962  {
963  SAL_WARN("vcl.filter",
964  "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
965  return false;
966  }
967  break;
968  }
969  case '[':
970  {
971  auto pArr = new PDFArrayElement(pObject);
972  rElements.push_back(std::unique_ptr<PDFElement>(pArr));
973  if (nDictionaryDepth == 0 && nArrayDepth == 0)
974  {
975  // The array is attached directly, inform the object.
976  pArray = pArr;
977  if (pObject)
978  {
979  pObject->SetArray(pArray);
980  pObject->SetArrayOffset(rStream.Tell());
981  }
982  }
983  ++nArrayDepth;
984  rStream.SeekRel(-1);
985  if (!rElements.back()->Read(rStream))
986  {
987  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
988  return false;
989  }
990  break;
991  }
992  case ']':
993  {
994  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
995  --nArrayDepth;
996  if (nArrayDepth == 0)
997  pArray = nullptr;
998  rStream.SeekRel(-1);
999  if (nDictionaryDepth == 0 && nArrayDepth == 0)
1000  {
1001  if (pObject)
1002  {
1003  pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1004  }
1005  }
1006  if (!rElements.back()->Read(rStream))
1007  {
1008  SAL_WARN("vcl.filter",
1009  "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1010  return false;
1011  }
1012  break;
1013  }
1014  case '/':
1015  {
1016  auto pNameElement = new PDFNameElement();
1017  rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1018  rStream.SeekRel(-1);
1019  if (!pNameElement->Read(rStream))
1020  {
1021  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1022  return false;
1023  }
1024  if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1025  && pNameElement->GetValue() == "ObjStm")
1026  pObjectStream = pObject;
1027  else
1028  pObjectKey = pNameElement;
1029  break;
1030  }
1031  case '(':
1032  {
1033  rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1034  rStream.SeekRel(-1);
1035  if (!rElements.back()->Read(rStream))
1036  {
1037  SAL_WARN("vcl.filter",
1038  "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1039  return false;
1040  }
1041  break;
1042  }
1043  default:
1044  {
1045  if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-')
1046  {
1047  // Numbering object: an integer or a real.
1048  auto pNumberElement = new PDFNumberElement();
1049  rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1050  rStream.SeekRel(-1);
1051  if (!pNumberElement->Read(rStream))
1052  {
1053  SAL_WARN("vcl.filter",
1054  "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1055  return false;
1056  }
1057  if (bInStartXRef)
1058  {
1059  bInStartXRef = false;
1060  m_aStartXRefs.push_back(pNumberElement->GetValue());
1061 
1062  auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1063  if (it != m_aOffsetObjects.end())
1064  m_pXRefStream = it->second;
1065  }
1066  else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
1067  // Number element inside an object, but outside a
1068  // dictionary / array: remember it.
1069  pObject->SetNumberElement(pNumberElement);
1070  }
1071  else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1072  {
1073  // Possible keyword, like "obj".
1074  rStream.SeekRel(-1);
1075  OString aKeyword = ReadKeyword(rStream);
1076 
1077  bool bObj = aKeyword == "obj";
1078  if (bObj || aKeyword == "R")
1079  {
1080  size_t nElements = rElements.size();
1081  if (nElements < 2)
1082  {
1083  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1084  "tokens before 'obj' or 'R' keyword");
1085  return false;
1086  }
1087 
1088  auto pObjectNumber
1089  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1090  auto pGenerationNumber
1091  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1092  if (!pObjectNumber || !pGenerationNumber)
1093  {
1094  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1095  "generation number before 'obj' or 'R' keyword");
1096  return false;
1097  }
1098 
1099  if (bObj)
1100  {
1101  pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1102  pGenerationNumber->GetValue());
1103  rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1104  m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1105  m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1106  bInObject = true;
1107  }
1108  else
1109  {
1110  auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1111  *pGenerationNumber);
1112  rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1113  if (pArray)
1114  // Reference is part of a direct (non-dictionary) array, inform the array.
1115  pArray->PushBack(rElements.back().get());
1116  if (bInObject && nDictionaryDepth > 0 && pObject)
1117  // Inform the object about a new in-dictionary reference.
1118  pObject->AddDictionaryReference(pReference);
1119  }
1120  if (!rElements.back()->Read(rStream))
1121  {
1122  SAL_WARN("vcl.filter",
1123  "PDFDocument::Tokenize: PDFElement::Read() failed");
1124  return false;
1125  }
1126  }
1127  else if (aKeyword == "stream")
1128  {
1129  // Look up the length of the stream from the parent object's dictionary.
1130  size_t nLength = 0;
1131  for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1132  {
1133  // Iterate in reverse order.
1134  size_t nIndex = rElements.size() - nElement - 1;
1135  PDFElement* pElement = rElements[nIndex].get();
1136  auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1137  if (!pObj)
1138  continue;
1139 
1140  PDFElement* pLookup = pObj->Lookup("Length");
1141  auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1142  if (pReference)
1143  {
1144  // Length is provided as a reference.
1145  nLength = pReference->LookupNumber(rStream);
1146  break;
1147  }
1148 
1149  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1150  if (pNumber)
1151  {
1152  // Length is provided directly.
1153  nLength = pNumber->GetValue();
1154  break;
1155  }
1156 
1157  SAL_WARN(
1158  "vcl.filter",
1159  "PDFDocument::Tokenize: found no Length key for stream keyword");
1160  return false;
1161  }
1162 
1163  PDFDocument::SkipLineBreaks(rStream);
1164  auto pStreamElement = new PDFStreamElement(nLength);
1165  if (pObject)
1166  pObject->SetStream(pStreamElement);
1167  rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1168  if (!rElements.back()->Read(rStream))
1169  {
1170  SAL_WARN("vcl.filter",
1171  "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1172  return false;
1173  }
1174  }
1175  else if (aKeyword == "endstream")
1176  {
1177  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1178  if (!rElements.back()->Read(rStream))
1179  {
1180  SAL_WARN("vcl.filter",
1181  "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1182  return false;
1183  }
1184  }
1185  else if (aKeyword == "endobj")
1186  {
1187  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1188  if (!rElements.back()->Read(rStream))
1189  {
1190  SAL_WARN("vcl.filter",
1191  "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1192  return false;
1193  }
1194  if (eMode == TokenizeMode::END_OF_OBJECT)
1195  {
1196  // Found endobj and only object parsing was requested, we're done.
1197  return true;
1198  }
1199 
1200  if (pObjectStream)
1201  {
1202  // We're at the end of an object stream, parse the stored objects.
1203  pObjectStream->ParseStoredObjects();
1204  pObjectStream = nullptr;
1205  pObjectKey = nullptr;
1206  }
1207  bInObject = false;
1208  }
1209  else if (aKeyword == "true" || aKeyword == "false")
1210  rElements.push_back(std::unique_ptr<PDFElement>(
1211  new PDFBooleanElement(aKeyword.toBoolean())));
1212  else if (aKeyword == "null")
1213  rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1214  else if (aKeyword == "xref")
1215  // Allow 'f' and 'n' keywords.
1216  bInXRef = true;
1217  else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1218  {
1219  }
1220  else if (aKeyword == "trailer")
1221  {
1222  auto pTrailer = new PDFTrailerElement(*this);
1223 
1224  // Make it possible to find this trailer later by offset.
1225  pTrailer->Read(rStream);
1226  m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1227 
1228  // When reading till the first EOF token only, remember
1229  // just the first trailer token.
1230  if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1231  m_pTrailer = pTrailer;
1232  rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1233  }
1234  else if (aKeyword == "startxref")
1235  {
1236  bInStartXRef = true;
1237  }
1238  else
1239  {
1240  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1241  << aKeyword << "' keyword at byte position "
1242  << rStream.Tell());
1243  return false;
1244  }
1245  }
1246  else
1247  {
1248  if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1249  {
1250  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: "
1251  << ch << " at byte position " << rStream.Tell());
1252  return false;
1253  }
1254  }
1255  break;
1256  }
1257  }
1258  }
1259 
1260  return true;
1261 }
1262 
1263 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1264 {
1265  m_aIDObjects[nID] = pObject;
1266 }
1267 
1268 bool PDFDocument::Read(SvStream& rStream)
1269 {
1270  // Check file magic.
1271  std::vector<sal_Int8> aHeader(5);
1272  rStream.Seek(0);
1273  rStream.ReadBytes(aHeader.data(), aHeader.size());
1274  if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1275  || aHeader[4] != '-')
1276  {
1277  SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1278  return false;
1279  }
1280 
1281  // Allow later editing of the contents in-memory.
1282  rStream.Seek(0);
1283  m_aEditBuffer.WriteStream(rStream);
1284 
1285  // Look up the offset of the xref table.
1286  size_t nStartXRef = FindStartXRef(rStream);
1287  SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1288  if (nStartXRef == 0)
1289  {
1290  SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1291  return false;
1292  }
1293  while (true)
1294  {
1295  rStream.Seek(nStartXRef);
1296  OString aKeyword = ReadKeyword(rStream);
1297  if (aKeyword.isEmpty())
1298  ReadXRefStream(rStream);
1299 
1300  else
1301  {
1302  if (aKeyword != "xref")
1303  {
1304  SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1305  return false;
1306  }
1307  ReadXRef(rStream);
1308  if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1309  {
1310  SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1311  return false;
1312  }
1313  }
1314 
1315  PDFNumberElement* pPrev = nullptr;
1316  if (m_pTrailer)
1317  {
1318  pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1319 
1320  // Remember the offset of this trailer in the correct order. It's
1321  // possible that newer trailers don't have a larger offset.
1322  m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1323  }
1324  else if (m_pXRefStream)
1325  pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1326  if (pPrev)
1327  nStartXRef = pPrev->GetValue();
1328 
1329  // Reset state, except the edit buffer.
1330  m_aElements.clear();
1331  m_aOffsetObjects.clear();
1332  m_aIDObjects.clear();
1333  m_aStartXRefs.clear();
1334  m_aEOFs.clear();
1335  m_pTrailer = nullptr;
1336  m_pXRefStream = nullptr;
1337  if (!pPrev)
1338  break;
1339  }
1340 
1341  // Then we can tokenize the stream.
1342  rStream.Seek(0);
1343  return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1344 }
1345 
1346 OString PDFDocument::ReadKeyword(SvStream& rStream)
1347 {
1348  OStringBuffer aBuf;
1349  char ch;
1350  rStream.ReadChar(ch);
1351  if (rStream.eof())
1352  return OString();
1353  while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1354  {
1355  aBuf.append(ch);
1356  rStream.ReadChar(ch);
1357  if (rStream.eof())
1358  return aBuf.toString();
1359  }
1360  rStream.SeekRel(-1);
1361  return aBuf.toString();
1362 }
1363 
1364 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1365 {
1366  // Find the "startxref" token, somewhere near the end of the document.
1367  std::vector<char> aBuf(1024);
1368  rStream.Seek(STREAM_SEEK_TO_END);
1369  if (rStream.Tell() > aBuf.size())
1370  rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1371  else
1372  // The document is really short, then just read it from the start.
1373  rStream.Seek(0);
1374  size_t nBeforePeek = rStream.Tell();
1375  size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1376  rStream.Seek(nBeforePeek);
1377  if (nSize != aBuf.size())
1378  aBuf.resize(nSize);
1379  OString aPrefix("startxref");
1380  // Find the last startxref at the end of the document.
1381  auto itLastValid = aBuf.end();
1382  auto it = aBuf.begin();
1383  while (true)
1384  {
1385  it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1386  if (it == aBuf.end())
1387  break;
1388 
1389  itLastValid = it;
1390  ++it;
1391  }
1392  if (itLastValid == aBuf.end())
1393  {
1394  SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1395  return 0;
1396  }
1397 
1398  rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1399  if (rStream.eof())
1400  {
1401  SAL_WARN("vcl.filter",
1402  "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1403  return 0;
1404  }
1405 
1406  PDFDocument::SkipWhitespace(rStream);
1407  PDFNumberElement aNumber;
1408  if (!aNumber.Read(rStream))
1409  return 0;
1410  return aNumber.GetValue();
1411 }
1412 
1413 void PDFDocument::ReadXRefStream(SvStream& rStream)
1414 {
1415  // Look up the stream length in the object dictionary.
1416  if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1417  {
1418  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1419  return;
1420  }
1421 
1422  if (m_aElements.empty())
1423  {
1424  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1425  return;
1426  }
1427 
1428  PDFObjectElement* pObject = nullptr;
1429  for (const auto& pElement : m_aElements)
1430  {
1431  if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1432  {
1433  pObject = pObj;
1434  break;
1435  }
1436  }
1437  if (!pObject)
1438  {
1439  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1440  return;
1441  }
1442 
1443  // So that the Prev key can be looked up later.
1444  m_pXRefStream = pObject;
1445 
1446  PDFElement* pLookup = pObject->Lookup("Length");
1447  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1448  if (!pNumber)
1449  {
1450  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1451  return;
1452  }
1453  sal_uInt64 nLength = pNumber->GetValue();
1454 
1455  // Look up the stream offset.
1456  PDFStreamElement* pStream = nullptr;
1457  for (const auto& pElement : m_aElements)
1458  {
1459  if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1460  {
1461  pStream = pS;
1462  break;
1463  }
1464  }
1465  if (!pStream)
1466  {
1467  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1468  return;
1469  }
1470 
1471  // Read and decompress it.
1472  rStream.Seek(pStream->GetOffset());
1473  std::vector<char> aBuf(nLength);
1474  rStream.ReadBytes(aBuf.data(), aBuf.size());
1475 
1476  auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1477  if (!pFilter)
1478  {
1479  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1480  return;
1481  }
1482 
1483  if (pFilter->GetValue() != "FlateDecode")
1484  {
1485  SAL_WARN("vcl.filter",
1486  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1487  return;
1488  }
1489 
1490  int nColumns = 1;
1491  int nPredictor = 1;
1492  if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1493  {
1494  const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1495  auto it = rItems.find("Columns");
1496  if (it != rItems.end())
1497  if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1498  nColumns = pColumns->GetValue();
1499  it = rItems.find("Predictor");
1500  if (it != rItems.end())
1501  if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1502  nPredictor = pPredictor->GetValue();
1503  }
1504 
1505  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1506  SvMemoryStream aStream;
1507  ZCodec aZCodec;
1508  aZCodec.BeginCompression();
1509  aZCodec.Decompress(aSource, aStream);
1510  if (!aZCodec.EndCompression())
1511  {
1512  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1513  return;
1514  }
1515 
1516  // Look up the first and the last entry we need to read.
1517  auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1518  std::vector<size_t> aFirstObjects;
1519  std::vector<size_t> aNumberOfObjects;
1520  if (!pIndex)
1521  {
1522  auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1523  if (pSize)
1524  {
1525  aFirstObjects.push_back(0);
1526  aNumberOfObjects.push_back(pSize->GetValue());
1527  }
1528  else
1529  {
1530  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1531  return;
1532  }
1533  }
1534  else
1535  {
1536  const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1537  size_t nFirstObject = 0;
1538  for (size_t i = 0; i < rIndexElements.size(); ++i)
1539  {
1540  if (i % 2 == 0)
1541  {
1542  auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1543  if (!pFirstObject)
1544  {
1545  SAL_WARN("vcl.filter",
1546  "PDFDocument::ReadXRefStream: Index has no first object");
1547  return;
1548  }
1549  nFirstObject = pFirstObject->GetValue();
1550  continue;
1551  }
1552 
1553  auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1554  if (!pNumberOfObjects)
1555  {
1556  SAL_WARN("vcl.filter",
1557  "PDFDocument::ReadXRefStream: Index has no number of objects");
1558  return;
1559  }
1560  aFirstObjects.push_back(nFirstObject);
1561  aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1562  }
1563  }
1564 
1565  // Look up the format of a single entry.
1566  const int nWSize = 3;
1567  auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1568  if (!pW || pW->GetElements().size() < nWSize)
1569  {
1570  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1571  return;
1572  }
1573  int aW[nWSize];
1574  // First character is the (kind of) repeated predictor.
1575  int nLineLength = 1;
1576  for (size_t i = 0; i < nWSize; ++i)
1577  {
1578  auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1579  if (!pI)
1580  {
1581  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1582  return;
1583  }
1584  aW[i] = pI->GetValue();
1585  nLineLength += aW[i];
1586  }
1587 
1588  if (nPredictor > 1 && nLineLength - 1 != nColumns)
1589  {
1590  SAL_WARN("vcl.filter",
1591  "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1592  return;
1593  }
1594 
1595  aStream.Seek(0);
1596  for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1597  {
1598  size_t nFirstObject = aFirstObjects[nSubSection];
1599  size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1600 
1601  // This is the line as read from the stream.
1602  std::vector<unsigned char> aOrigLine(nLineLength);
1603  // This is the line as it appears after tweaking according to nPredictor.
1604  std::vector<unsigned char> aFilteredLine(nLineLength);
1605  for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1606  {
1607  size_t nIndex = nFirstObject + nEntry;
1608 
1609  aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1610  if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1611  {
1612  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1613  "inconsistent with /DecodeParms/Predictor for object #"
1614  << nIndex);
1615  return;
1616  }
1617 
1618  for (int i = 0; i < nLineLength; ++i)
1619  {
1620  switch (nPredictor)
1621  {
1622  case 1:
1623  // No prediction.
1624  break;
1625  case 12:
1626  // PNG prediction: up (on all rows).
1627  aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1628  break;
1629  default:
1630  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1631  << nPredictor);
1632  return;
1633  break;
1634  }
1635  }
1636 
1637  // First character is already handled above.
1638  int nPos = 1;
1639  size_t nType = 0;
1640  // Start of the current field in the stream data.
1641  int nOffset = nPos;
1642  for (; nPos < nOffset + aW[0]; ++nPos)
1643  {
1644  unsigned char nCh = aFilteredLine[nPos];
1645  nType = (nType << 8) + nCh;
1646  }
1647 
1648  // Start of the object in the file stream.
1649  size_t nStreamOffset = 0;
1650  nOffset = nPos;
1651  for (; nPos < nOffset + aW[1]; ++nPos)
1652  {
1653  unsigned char nCh = aFilteredLine[nPos];
1654  nStreamOffset = (nStreamOffset << 8) + nCh;
1655  }
1656 
1657  // Generation number of the object.
1658  size_t nGenerationNumber = 0;
1659  nOffset = nPos;
1660  for (; nPos < nOffset + aW[2]; ++nPos)
1661  {
1662  unsigned char nCh = aFilteredLine[nPos];
1663  nGenerationNumber = (nGenerationNumber << 8) + nCh;
1664  }
1665 
1666  // Ignore invalid nType.
1667  if (nType <= 2)
1668  {
1669  if (m_aXRef.find(nIndex) == m_aXRef.end())
1670  {
1671  XRefEntry aEntry;
1672  switch (nType)
1673  {
1674  case 0:
1675  aEntry.SetType(XRefEntryType::FREE);
1676  break;
1677  case 1:
1678  aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
1679  break;
1680  case 2:
1681  aEntry.SetType(XRefEntryType::COMPRESSED);
1682  break;
1683  }
1684  aEntry.SetOffset(nStreamOffset);
1685  m_aXRef[nIndex] = aEntry;
1686  }
1687  }
1688  }
1689  }
1690 }
1691 
1692 void PDFDocument::ReadXRef(SvStream& rStream)
1693 {
1694  PDFDocument::SkipWhitespace(rStream);
1695 
1696  while (true)
1697  {
1698  PDFNumberElement aFirstObject;
1699  if (!aFirstObject.Read(rStream))
1700  {
1701  // Next token is not a number, it'll be the trailer.
1702  return;
1703  }
1704 
1705  if (aFirstObject.GetValue() < 0)
1706  {
1707  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1708  return;
1709  }
1710 
1711  PDFDocument::SkipWhitespace(rStream);
1712  PDFNumberElement aNumberOfEntries;
1713  if (!aNumberOfEntries.Read(rStream))
1714  {
1715  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1716  return;
1717  }
1718 
1719  if (aNumberOfEntries.GetValue() < 0)
1720  {
1721  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1722  return;
1723  }
1724 
1725  size_t nSize = aNumberOfEntries.GetValue();
1726  for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1727  {
1728  size_t nIndex = aFirstObject.GetValue() + nEntry;
1729  PDFDocument::SkipWhitespace(rStream);
1730  PDFNumberElement aOffset;
1731  if (!aOffset.Read(rStream))
1732  {
1733  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1734  return;
1735  }
1736 
1737  PDFDocument::SkipWhitespace(rStream);
1738  PDFNumberElement aGenerationNumber;
1739  if (!aGenerationNumber.Read(rStream))
1740  {
1741  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1742  return;
1743  }
1744 
1745  PDFDocument::SkipWhitespace(rStream);
1746  OString aKeyword = ReadKeyword(rStream);
1747  if (aKeyword != "f" && aKeyword != "n")
1748  {
1749  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1750  return;
1751  }
1752  // xrefs are read in reverse order, so never update an existing
1753  // offset with an older one.
1754  if (m_aXRef.find(nIndex) == m_aXRef.end())
1755  {
1756  XRefEntry aEntry;
1757  aEntry.SetOffset(aOffset.GetValue());
1758  // Initially only the first entry is dirty.
1759  if (nIndex == 0)
1760  aEntry.SetDirty(true);
1761  m_aXRef[nIndex] = aEntry;
1762  }
1763  PDFDocument::SkipWhitespace(rStream);
1764  }
1765  }
1766 }
1767 
1768 void PDFDocument::SkipWhitespace(SvStream& rStream)
1769 {
1770  char ch = 0;
1771 
1772  while (true)
1773  {
1774  rStream.ReadChar(ch);
1775  if (rStream.eof())
1776  break;
1777 
1778  if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1779  {
1780  rStream.SeekRel(-1);
1781  return;
1782  }
1783  }
1784 }
1785 
1786 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1787 {
1788  char ch = 0;
1789 
1790  while (true)
1791  {
1792  rStream.ReadChar(ch);
1793  if (rStream.eof())
1794  break;
1795 
1796  if (ch != '\n' && ch != '\r')
1797  {
1798  rStream.SeekRel(-1);
1799  return;
1800  }
1801  }
1802 }
1803 
1804 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1805 {
1806  auto it = m_aXRef.find(nIndex);
1807  if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1808  {
1809  SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1810  << nIndex << ", but failed");
1811  return 0;
1812  }
1813 
1814  return it->second.GetOffset();
1815 }
1816 
1817 const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1818 {
1819  return m_aElements;
1820 }
1821 
1823 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1824 {
1825  auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1826  if (!pKids)
1827  {
1828  SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1829  return;
1830  }
1831 
1832  pPages->setVisiting(true);
1833 
1834  for (const auto& pKid : pKids->GetElements())
1835  {
1836  auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1837  if (!pReference)
1838  continue;
1839 
1840  PDFObjectElement* pKidObject = pReference->LookupObject();
1841  if (!pKidObject)
1842  continue;
1843 
1844  // detect if visiting reenters itself
1845  if (pKidObject->alreadyVisiting())
1846  {
1847  SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1848  continue;
1849  }
1850 
1851  auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1852  if (pName && pName->GetValue() == "Pages")
1853  // Pages inside pages: recurse.
1854  visitPages(pKidObject, rRet);
1855  else
1856  // Found an actual page.
1857  rRet.push_back(pKidObject);
1858  }
1859 
1860  pPages->setVisiting(false);
1861 }
1862 
1863 std::vector<PDFObjectElement*> PDFDocument::GetPages()
1864 {
1865  std::vector<PDFObjectElement*> aRet;
1866 
1867  PDFReferenceElement* pRoot = nullptr;
1868 
1869  PDFTrailerElement* pTrailer = nullptr;
1870  if (!m_aTrailerOffsets.empty())
1871  {
1872  // Get access to the latest trailer, and work with the keys of that
1873  // one.
1874  auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1875  if (it != m_aOffsetTrailers.end())
1876  pTrailer = it->second;
1877  }
1878 
1879  if (pTrailer)
1880  pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1881  else if (m_pXRefStream)
1882  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1883 
1884  if (!pRoot)
1885  {
1886  SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no Root key");
1887  return aRet;
1888  }
1889 
1890  PDFObjectElement* pCatalog = pRoot->LookupObject();
1891  if (!pCatalog)
1892  {
1893  SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1894  return aRet;
1895  }
1896 
1897  PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1898  if (!pPages)
1899  {
1900  SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
1901  << ") has no pages");
1902  return aRet;
1903  }
1904 
1905  visitPages(pPages, aRet);
1906 
1907  return aRet;
1908 }
1909 
1910 void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
1911 
1912 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
1913 {
1914  std::vector<PDFObjectElement*> aRet;
1915 
1916  std::vector<PDFObjectElement*> aPages = GetPages();
1917 
1918  for (const auto& pPage : aPages)
1919  {
1920  if (!pPage)
1921  continue;
1922 
1923  PDFElement* pAnnotsElement = pPage->Lookup("Annots");
1924  auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
1925  if (!pAnnots)
1926  {
1927  // Annots is not an array, see if it's a reference to an object
1928  // with a direct array.
1929  auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
1930  if (pAnnotsRef)
1931  {
1932  if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
1933  {
1934  pAnnots = pAnnotsObject->GetArray();
1935  }
1936  }
1937  }
1938 
1939  if (!pAnnots)
1940  continue;
1941 
1942  for (const auto& pAnnot : pAnnots->GetElements())
1943  {
1944  auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
1945  if (!pReference)
1946  continue;
1947 
1948  PDFObjectElement* pAnnotObject = pReference->LookupObject();
1949  if (!pAnnotObject)
1950  continue;
1951 
1952  auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
1953  if (!pFT || pFT->GetValue() != "Sig")
1954  continue;
1955 
1956  aRet.push_back(pAnnotObject);
1957  }
1958  }
1959 
1960  return aRet;
1961 }
1962 
1963 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
1964 {
1965  return svl::crypto::DecodeHexString(pElement->GetValue());
1966 }
1967 
1968 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
1969  : m_rDoc(rDoc)
1970 {
1971 }
1972 
1973 bool PDFCommentElement::Read(SvStream& rStream)
1974 {
1975  // Read from (including) the % char till (excluding) the end of the line/stream.
1976  OStringBuffer aBuf;
1977  char ch;
1978  rStream.ReadChar(ch);
1979  while (true)
1980  {
1981  if (ch == '\n' || ch == '\r' || rStream.eof())
1982  {
1983  m_aComment = aBuf.makeStringAndClear();
1984 
1985  if (m_aComment.startsWith("%%EOF"))
1986  m_rDoc.PushBackEOF(rStream.Tell());
1987 
1988  SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
1989  return true;
1990  }
1991  aBuf.append(ch);
1992  rStream.ReadChar(ch);
1993  }
1994 
1995  return false;
1996 }
1997 
1999 
2001 {
2002  OStringBuffer aBuf;
2003  m_nOffset = rStream.Tell();
2004  char ch;
2005  rStream.ReadChar(ch);
2006  if (rStream.eof())
2007  {
2008  return false;
2009  }
2010  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2011  {
2012  rStream.SeekRel(-1);
2013  return false;
2014  }
2015  while (!rStream.eof())
2016  {
2017  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2018  {
2019  rStream.SeekRel(-1);
2020  m_nLength = rStream.Tell() - m_nOffset;
2021  m_fValue = aBuf.makeStringAndClear().toDouble();
2022  SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2023  return true;
2024  }
2025  aBuf.append(ch);
2026  rStream.ReadChar(ch);
2027  }
2028 
2029  return false;
2030 }
2031 
2032 sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2033 
2034 sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2035 
2036 PDFBooleanElement::PDFBooleanElement(bool /*bValue*/) {}
2037 
2038 bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2039 
2040 bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2041 
2043 {
2044  char ch;
2045  rStream.ReadChar(ch);
2046  if (ch != '<')
2047  {
2048  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2049  return false;
2050  }
2051  rStream.ReadChar(ch);
2052 
2053  OStringBuffer aBuf;
2054  while (!rStream.eof())
2055  {
2056  if (ch == '>')
2057  {
2058  m_aValue = aBuf.makeStringAndClear();
2059  SAL_INFO("vcl.filter",
2060  "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2061  return true;
2062  }
2063  aBuf.append(ch);
2064  rStream.ReadChar(ch);
2065  }
2066 
2067  return false;
2068 }
2069 
2070 const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2071 
2073 {
2074  char nPrevCh = 0;
2075  char ch = 0;
2076  rStream.ReadChar(ch);
2077  if (ch != '(')
2078  {
2079  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2080  return false;
2081  }
2082  nPrevCh = ch;
2083  rStream.ReadChar(ch);
2084 
2085  // Start with 1 nesting level as we read a '(' above already.
2086  int nDepth = 1;
2087  OStringBuffer aBuf;
2088  while (!rStream.eof())
2089  {
2090  if (ch == '(' && nPrevCh != '\\')
2091  ++nDepth;
2092 
2093  if (ch == ')' && nPrevCh != '\\')
2094  --nDepth;
2095 
2096  if (nDepth == 0)
2097  {
2098  // ')' of the outermost '(' is reached.
2099  m_aValue = aBuf.makeStringAndClear();
2100  SAL_INFO("vcl.filter",
2101  "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2102  return true;
2103  }
2104  aBuf.append(ch);
2105  nPrevCh = ch;
2106  rStream.ReadChar(ch);
2107  }
2108 
2109  return false;
2110 }
2111 
2112 const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2113 
2115  : m_rDoc(rDoc)
2116 {
2117 }
2118 
2120 {
2121  m_nOffset = rStream.Tell();
2122  return true;
2123 }
2124 
2125 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2126 {
2127  if (m_aDictionary.empty())
2129 
2130  return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2131 }
2132 
2133 sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2134 
2135 double PDFNumberElement::GetValue() const { return m_fValue; }
2136 
2137 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2138  : m_rDoc(rDoc)
2139  , m_fObjectValue(fObjectValue)
2140  , m_fGenerationValue(fGenerationValue)
2141  , m_pNumberElement(nullptr)
2142  , m_nDictionaryOffset(0)
2143  , m_nDictionaryLength(0)
2144  , m_pDictionaryElement(nullptr)
2145  , m_nArrayOffset(0)
2146  , m_nArrayLength(0)
2147  , m_pArrayElement(nullptr)
2148  , m_pStreamElement(nullptr)
2149 {
2150 }
2151 
2153 {
2154  SAL_INFO("vcl.filter",
2155  "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2156  return true;
2157 }
2158 
2160 
2161 size_t PDFDictionaryElement::Parse(const std::vector<std::unique_ptr<PDFElement>>& rElements,
2162  PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary)
2163 {
2164  // The index of last parsed element, in case of nested dictionaries.
2165  size_t nRet = 0;
2166 
2167  if (!rDictionary.empty())
2168  return nRet;
2169 
2170  pThis->setParsing(true);
2171 
2172  auto pThisObject = dynamic_cast<PDFObjectElement*>(pThis);
2173  // This is set to non-nullptr here for nested dictionaries only.
2174  auto pThisDictionary = dynamic_cast<PDFDictionaryElement*>(pThis);
2175 
2176  // Find out where the dictionary for this object starts.
2177  size_t nIndex = 0;
2178  for (size_t i = 0; i < rElements.size(); ++i)
2179  {
2180  if (rElements[i].get() == pThis)
2181  {
2182  nIndex = i;
2183  break;
2184  }
2185  }
2186 
2187  OString aName;
2188  sal_uInt64 nNameOffset = 0;
2189  std::vector<PDFNumberElement*> aNumbers;
2190  // The array value we're in -- if any.
2191  PDFArrayElement* pArray = nullptr;
2192  sal_uInt64 nDictionaryOffset = 0;
2193  int nDictionaryDepth = 0;
2194  // Toplevel dictionary found (not inside an array).
2195  bool bDictionaryFound = false;
2196  // Toplevel array found (not inside a dictionary).
2197  bool bArrayFound = false;
2198  for (size_t i = nIndex; i < rElements.size(); ++i)
2199  {
2200  // Dictionary tokens can be nested, track enter/leave.
2201  if (auto pDictionary = dynamic_cast<PDFDictionaryElement*>(rElements[i].get()))
2202  {
2203  bDictionaryFound = true;
2204  if (++nDictionaryDepth == 1)
2205  {
2206  // First dictionary start, track start offset.
2207  nDictionaryOffset = pDictionary->m_nLocation;
2208  if (pThisObject)
2209  {
2210  if (!bArrayFound)
2211  // Then the toplevel dictionary of the object.
2212  pThisObject->SetDictionary(pDictionary);
2213  pThisDictionary = pDictionary;
2214  pThisObject->SetDictionaryOffset(nDictionaryOffset);
2215  }
2216  }
2217  else if (!pDictionary->alreadyParsing())
2218  {
2219  // Nested dictionary.
2220  const size_t nexti
2221  = PDFDictionaryElement::Parse(rElements, pDictionary, pDictionary->m_aItems);
2222  if (nexti >= i) // ensure we go forwards and not endlessly loop
2223  {
2224  i = nexti;
2225  rDictionary[aName] = pDictionary;
2226  aName.clear();
2227  }
2228  }
2229  }
2230 
2231  if (auto pEndDictionary = dynamic_cast<PDFEndDictionaryElement*>(rElements[i].get()))
2232  {
2233  if (--nDictionaryDepth == 0)
2234  {
2235  // Last dictionary end, track length and stop parsing.
2236  if (pThisObject)
2237  pThisObject->SetDictionaryLength(pEndDictionary->GetLocation()
2238  - nDictionaryOffset);
2239  nRet = i;
2240  break;
2241  }
2242  }
2243 
2244  auto pName = dynamic_cast<PDFNameElement*>(rElements[i].get());
2245  if (pName)
2246  {
2247  if (!aNumbers.empty())
2248  {
2249  PDFNumberElement* pNumber = aNumbers.back();
2250  rDictionary[aName] = pNumber;
2251  if (pThisDictionary)
2252  {
2253  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2254  pThisDictionary->SetKeyValueLength(
2255  aName, pNumber->GetLocation() + pNumber->GetLength() - nNameOffset);
2256  }
2257  aName.clear();
2258  aNumbers.clear();
2259  }
2260 
2261  if (aName.isEmpty())
2262  {
2263  // Remember key.
2264  aName = pName->GetValue();
2265  nNameOffset = pName->GetLocation();
2266  }
2267  else
2268  {
2269  if (pArray)
2270  {
2271  if (bDictionaryFound)
2272  // Array inside dictionary.
2273  pArray->PushBack(pName);
2274  }
2275  else
2276  {
2277  // Name-name key-value.
2278  rDictionary[aName] = pName;
2279  if (pThisDictionary)
2280  {
2281  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2282  pThisDictionary->SetKeyValueLength(aName, pName->GetLocation()
2284  - nNameOffset);
2285  }
2286  aName.clear();
2287  }
2288  }
2289  continue;
2290  }
2291 
2292  auto pArr = dynamic_cast<PDFArrayElement*>(rElements[i].get());
2293  if (pArr)
2294  {
2295  bArrayFound = true;
2296  pArray = pArr;
2297  continue;
2298  }
2299 
2300  auto pEndArr = dynamic_cast<PDFEndArrayElement*>(rElements[i].get());
2301  if (pArray && pEndArr)
2302  {
2303  for (auto& pNumber : aNumbers)
2304  pArray->PushBack(pNumber);
2305  aNumbers.clear();
2306  rDictionary[aName] = pArray;
2307  if (pThisDictionary)
2308  {
2309  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2310  // Include the ending ']' in the length of the key - (array)value pair length.
2311  pThisDictionary->SetKeyValueLength(aName, pEndArr->GetOffset() - nNameOffset + 1);
2312  }
2313  aName.clear();
2314  pArray = nullptr;
2315  continue;
2316  }
2317 
2318  auto pReference = dynamic_cast<PDFReferenceElement*>(rElements[i].get());
2319  if (pReference)
2320  {
2321  if (!pArray)
2322  {
2323  rDictionary[aName] = pReference;
2324  if (pThisDictionary)
2325  {
2326  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2327  pThisDictionary->SetKeyValueLength(aName,
2328  pReference->GetOffset() - nNameOffset);
2329  }
2330  aName.clear();
2331  }
2332  else
2333  {
2334  if (bDictionaryFound)
2335  // Array inside dictionary.
2336  pArray->PushBack(pReference);
2337  }
2338  aNumbers.clear();
2339  continue;
2340  }
2341 
2342  auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(rElements[i].get());
2343  if (pLiteralString)
2344  {
2345  rDictionary[aName] = pLiteralString;
2346  if (pThisDictionary)
2347  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2348  aName.clear();
2349  continue;
2350  }
2351 
2352  auto pBoolean = dynamic_cast<PDFBooleanElement*>(rElements[i].get());
2353  if (pBoolean)
2354  {
2355  rDictionary[aName] = pBoolean;
2356  if (pThisDictionary)
2357  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2358  aName.clear();
2359  continue;
2360  }
2361 
2362  auto pHexString = dynamic_cast<PDFHexStringElement*>(rElements[i].get());
2363  if (pHexString)
2364  {
2365  if (!pArray)
2366  {
2367  rDictionary[aName] = pHexString;
2368  if (pThisDictionary)
2369  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2370  aName.clear();
2371  }
2372  else
2373  {
2374  pArray->PushBack(pHexString);
2375  }
2376  continue;
2377  }
2378 
2379  if (dynamic_cast<PDFEndObjectElement*>(rElements[i].get()))
2380  break;
2381 
2382  // Just remember this, so that in case it's not a reference parameter,
2383  // we can handle it later.
2384  auto pNumber = dynamic_cast<PDFNumberElement*>(rElements[i].get());
2385  if (pNumber)
2386  aNumbers.push_back(pNumber);
2387  }
2388 
2389  if (!aNumbers.empty())
2390  {
2391  rDictionary[aName] = aNumbers.back();
2392  if (pThisDictionary)
2393  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2394  aName.clear();
2395  aNumbers.clear();
2396  }
2397 
2398  pThis->setParsing(false);
2399 
2400  return nRet;
2401 }
2402 
2403 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2404  const OString& rKey)
2405 {
2406  auto it = rDictionary.find(rKey);
2407  if (it == rDictionary.end())
2408  return nullptr;
2409 
2410  return it->second;
2411 }
2412 
2414 {
2415  auto pKey = dynamic_cast<PDFReferenceElement*>(
2416  PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2417  if (!pKey)
2418  {
2419  SAL_WARN("vcl.filter",
2420  "PDFDictionaryElement::LookupObject: no such key with reference value: "
2421  << rDictionaryKey);
2422  return nullptr;
2423  }
2424 
2425  return pKey->LookupObject();
2426 }
2427 
2428 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2429 {
2430  return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2431 }
2432 
2433 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2434 {
2435  if (m_aDictionary.empty())
2436  {
2437  if (!m_aElements.empty())
2438  // This is a stored object in an object stream.
2440  else
2441  // Normal object: elements are stored as members of the document itself.
2443  }
2444 
2445  return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2446 }
2447 
2448 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2449 {
2450  auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2451  if (!pKey)
2452  {
2453  SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2454  << rDictionaryKey);
2455  return nullptr;
2456  }
2457 
2458  return pKey->LookupObject();
2459 }
2460 
2462 
2463 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2464 {
2465  m_nDictionaryOffset = nDictionaryOffset;
2466 }
2467 
2469 {
2470  if (m_aDictionary.empty())
2472 
2473  return m_nDictionaryOffset;
2474 }
2475 
2476 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2477 
2478 sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
2479 
2480 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2481 {
2482  m_aDictionaryKeyOffset[rKey] = nOffset;
2483 }
2484 
2485 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2486 {
2487  m_aDictionaryKeyValueLength[rKey] = nLength;
2488 }
2489 
2490 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2491 {
2492  auto it = m_aDictionaryKeyOffset.find(rKey);
2493  if (it == m_aDictionaryKeyOffset.end())
2494  return 0;
2495 
2496  return it->second;
2497 }
2498 
2499 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2500 {
2501  auto it = m_aDictionaryKeyValueLength.find(rKey);
2502  if (it == m_aDictionaryKeyValueLength.end())
2503  return 0;
2504 
2505  return it->second;
2506 }
2507 
2508 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2509 
2510 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2511 {
2512  m_nDictionaryLength = nDictionaryLength;
2513 }
2514 
2516 {
2517  if (m_aDictionary.empty())
2519 
2520  return m_nDictionaryLength;
2521 }
2522 
2523 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2524 
2525 sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
2526 
2528 {
2529  if (m_aDictionary.empty())
2531  return m_pDictionaryElement;
2532 }
2533 
2535 {
2536  m_pDictionaryElement = pDictionaryElement;
2537 }
2538 
2540 {
2541  m_pNumberElement = pNumberElement;
2542 }
2543 
2545 
2546 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2547 {
2548  return m_aDictionaryReferences;
2549 }
2550 
2552 {
2553  m_aDictionaryReferences.push_back(pReference);
2554 }
2555 
2556 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2557 {
2558  if (m_aDictionary.empty())
2560 
2561  return m_aDictionary;
2562 }
2563 
2564 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2565 
2567 {
2568  m_pStreamElement = pStreamElement;
2569 }
2570 
2572 
2574 
2576 {
2577  if (!m_pStreamElement)
2578  {
2579  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2580  return;
2581  }
2582 
2583  auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2584  if (!pType || pType->GetValue() != "ObjStm")
2585  {
2586  if (!pType)
2587  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2588  else
2589  SAL_WARN("vcl.filter",
2590  "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2591  return;
2592  }
2593 
2594  auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2595  if (!pFilter || pFilter->GetValue() != "FlateDecode")
2596  {
2597  if (!pFilter)
2598  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2599  else
2600  SAL_WARN("vcl.filter",
2601  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2602  return;
2603  }
2604 
2605  auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2606  if (!pFirst)
2607  {
2608  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2609  return;
2610  }
2611 
2612  auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2613  if (!pN)
2614  {
2615  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2616  return;
2617  }
2618  size_t nN = pN->GetValue();
2619 
2620  auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2621  if (!pLength)
2622  {
2623  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2624  return;
2625  }
2626  size_t nLength = pLength->GetValue();
2627 
2628  // Read and decompress it.
2629  SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2630  rEditBuffer.Seek(m_pStreamElement->GetOffset());
2631  std::vector<char> aBuf(nLength);
2632  rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2633  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2634  SvMemoryStream aStream;
2635  ZCodec aZCodec;
2636  aZCodec.BeginCompression();
2637  aZCodec.Decompress(aSource, aStream);
2638  if (!aZCodec.EndCompression())
2639  {
2640  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2641  return;
2642  }
2643 
2644  nLength = aStream.TellEnd();
2645  aStream.Seek(0);
2646  std::vector<size_t> aObjNums;
2647  std::vector<size_t> aOffsets;
2648  std::vector<size_t> aLengths;
2649  // First iterate over and find out the lengths.
2650  for (size_t nObject = 0; nObject < nN; ++nObject)
2651  {
2652  PDFNumberElement aObjNum;
2653  if (!aObjNum.Read(aStream))
2654  {
2655  SAL_WARN("vcl.filter",
2656  "PDFObjectElement::ParseStoredObjects: failed to read object number");
2657  return;
2658  }
2659  aObjNums.push_back(aObjNum.GetValue());
2660 
2661  PDFDocument::SkipWhitespace(aStream);
2662 
2663  PDFNumberElement aByteOffset;
2664  if (!aByteOffset.Read(aStream))
2665  {
2666  SAL_WARN("vcl.filter",
2667  "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2668  return;
2669  }
2670  aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2671 
2672  if (aOffsets.size() > 1)
2673  aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2674  if (nObject + 1 == nN)
2675  aLengths.push_back(nLength - aOffsets.back());
2676 
2677  PDFDocument::SkipWhitespace(aStream);
2678  }
2679 
2680  // Now create streams with the proper length and tokenize the data.
2681  for (size_t nObject = 0; nObject < nN; ++nObject)
2682  {
2683  size_t nObjNum = aObjNums[nObject];
2684  size_t nOffset = aOffsets[nObject];
2685  size_t nLen = aLengths[nObject];
2686 
2687  aStream.Seek(nOffset);
2688  m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2689  PDFObjectElement* pStored = m_aStoredElements.back().get();
2690 
2691  aBuf.clear();
2692  aBuf.resize(nLen);
2693  aStream.ReadBytes(aBuf.data(), aBuf.size());
2694  SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2695 
2696  m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
2697  pStored);
2698  // This is how references know the object is stored inside this object stream.
2699  m_rDoc.SetIDObject(nObjNum, pStored);
2700 
2701  // Store the stream of the object in the object stream for later use.
2702  std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2703  aStoredStream.Seek(0);
2704  pStreamBuffer->WriteStream(aStoredStream);
2705  pStored->SetStreamBuffer(pStreamBuffer);
2706  }
2707 }
2708 
2709 std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2710 {
2711  return m_aElements;
2712 }
2713 
2715 
2716 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2717 {
2718  m_pStreamBuffer = std::move(pStreamBuffer);
2719 }
2720 
2722 
2724  PDFNumberElement const& rGeneration)
2725  : m_rDoc(rDoc)
2726  , m_fObjectValue(rObject.GetValue())
2727  , m_fGenerationValue(rGeneration.GetValue())
2728  , m_rObject(rObject)
2729 {
2730 }
2731 
2733 
2735 {
2736  SAL_INFO("vcl.filter",
2737  "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2738  m_nOffset = rStream.Tell();
2739  return true;
2740 }
2741 
2742 sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2743 
2745 {
2746  size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2747  if (nOffset == 0)
2748  {
2749  SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2750  << m_fObjectValue);
2751  return 0;
2752  }
2753 
2754  sal_uInt64 nOrigPos = rStream.Tell();
2755  comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2756 
2757  rStream.Seek(nOffset);
2758  {
2759  PDFDocument::SkipWhitespace(rStream);
2760  PDFNumberElement aNumber;
2761  bool bRet = aNumber.Read(rStream);
2762  if (!bRet || aNumber.GetValue() != m_fObjectValue)
2763  {
2764  SAL_WARN("vcl.filter",
2765  "PDFReferenceElement::LookupNumber: offset points to not matching object");
2766  return 0;
2767  }
2768  }
2769 
2770  {
2771  PDFDocument::SkipWhitespace(rStream);
2772  PDFNumberElement aNumber;
2773  bool bRet = aNumber.Read(rStream);
2774  if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2775  {
2776  SAL_WARN("vcl.filter",
2777  "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2778  return 0;
2779  }
2780  }
2781 
2782  {
2783  PDFDocument::SkipWhitespace(rStream);
2784  OString aKeyword = PDFDocument::ReadKeyword(rStream);
2785  if (aKeyword != "obj")
2786  {
2787  SAL_WARN("vcl.filter",
2788  "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2789  return 0;
2790  }
2791  }
2792 
2793  PDFDocument::SkipWhitespace(rStream);
2794  PDFNumberElement aNumber;
2795  if (!aNumber.Read(rStream))
2796  {
2797  SAL_WARN("vcl.filter",
2798  "PDFReferenceElement::LookupNumber: failed to read referenced number");
2799  return 0;
2800  }
2801 
2802  return aNumber.GetValue();
2803 }
2804 
2806 {
2808 }
2809 
2811 {
2812  auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2813 
2814  if (itIDObjects != m_aIDObjects.end())
2815  return itIDObjects->second;
2816 
2817  SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2818  return nullptr;
2819 }
2820 
2822 
2824 
2826 
2828 {
2829  char ch;
2830  rStream.ReadChar(ch);
2831  if (ch != '<')
2832  {
2833  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2834  return false;
2835  }
2836 
2837  if (rStream.eof())
2838  {
2839  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2840  return false;
2841  }
2842 
2843  rStream.ReadChar(ch);
2844  if (ch != '<')
2845  {
2846  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2847  return false;
2848  }
2849 
2850  m_nLocation = rStream.Tell();
2851 
2852  SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2853 
2854  return true;
2855 }
2856 
2857 PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
2858 
2859 sal_uInt64 PDFEndDictionaryElement::GetLocation() const { return m_nLocation; }
2860 
2861 bool PDFEndDictionaryElement::Read(SvStream& rStream)
2862 {
2863  m_nLocation = rStream.Tell();
2864  char ch;
2865  rStream.ReadChar(ch);
2866  if (ch != '>')
2867  {
2868  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2869  return false;
2870  }
2871 
2872  if (rStream.eof())
2873  {
2874  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2875  return false;
2876  }
2877 
2878  rStream.ReadChar(ch);
2879  if (ch != '>')
2880  {
2881  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2882  return false;
2883  }
2884 
2885  SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2886 
2887  return true;
2888 }
2889 
2890 PDFNameElement::PDFNameElement() = default;
2891 
2893 {
2894  char ch;
2895  rStream.ReadChar(ch);
2896  if (ch != '/')
2897  {
2898  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2899  return false;
2900  }
2901  m_nLocation = rStream.Tell();
2902 
2903  if (rStream.eof())
2904  {
2905  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2906  return false;
2907  }
2908 
2909  // Read till the first white-space.
2910  OStringBuffer aBuf;
2911  rStream.ReadChar(ch);
2912  while (!rStream.eof())
2913  {
2914  if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2915  || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2916  {
2917  rStream.SeekRel(-1);
2918  m_aValue = aBuf.makeStringAndClear();
2919  SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2920  return true;
2921  }
2922  aBuf.append(ch);
2923  rStream.ReadChar(ch);
2924  }
2925 
2926  return false;
2927 }
2928 
2929 const OString& PDFNameElement::GetValue() const { return m_aValue; }
2930 
2931 sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2932 
2934  : m_nLength(nLength)
2935  , m_nOffset(0)
2936 {
2937 }
2938 
2940 {
2941  SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2942  m_nOffset = rStream.Tell();
2943  std::vector<unsigned char> aBytes(m_nLength);
2944  rStream.ReadBytes(aBytes.data(), aBytes.size());
2945  m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2946 
2947  return rStream.good();
2948 }
2949 
2951 
2952 sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2953 
2954 bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2955 
2956 bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2957 
2959  : m_pObject(pObject)
2960 {
2961 }
2962 
2964 {
2965  char ch;
2966  rStream.ReadChar(ch);
2967  if (ch != '[')
2968  {
2969  SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2970  return false;
2971  }
2972 
2973  SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2974 
2975  return true;
2976 }
2977 
2979 {
2980  if (m_pObject)
2981  SAL_INFO("vcl.filter",
2982  "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2983  m_aElements.push_back(pElement);
2984 }
2985 
2986 const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2987 
2988 PDFEndArrayElement::PDFEndArrayElement() = default;
2989 
2990 bool PDFEndArrayElement::Read(SvStream& rStream)
2991 {
2992  m_nOffset = rStream.Tell();
2993  char ch;
2994  rStream.ReadChar(ch);
2995  if (ch != ']')
2996  {
2997  SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2998  return false;
2999  }
3000 
3001  SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
3002 
3003  return true;
3004 }
3005 
3006 sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
3007 
3008 } // namespace vcl
3009 
3010 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
sal_uInt64 m_nOffset
Location of the end of the trailer token.
Array object: a list.
const sal_Int32 m_nLength
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:89
bool SetStreamSize(sal_uInt64 nSize)
sal_Int32 nIndex
PDFObjectElement(PDFDocument &rDoc, double fObjectValue, double fGenerationValue)
Numbering object: an integer or a real.
PDFElement * Lookup(const OString &rDictionaryKey)
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:92
size_t GetObjectOffset(size_t nIndex) const
sal_uInt64 m_nLocation
Offset after the '/' token.
void SetStream(PDFStreamElement *pStreamElement)
SvMemoryStream m_aMemory
The byte array itself.
std::string GetValue
const int MAX_SIGNATURE_CONTENT_LENGTH
Definition: pdfdocument.cxx:34
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:98
PDFObjectElement * m_pObject
The object that contains this array.
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
double LookupNumber(SvStream &rStream) const
Assuming the reference points to a number object, return its value.
void SetDictionaryLength(sal_uInt64 nDictionaryLength)
PDFTrailerElement(PDFDocument &rDoc)
aBuf
#define STREAM_SEEK_TO_END
static size_t Parse(const std::vector< std::unique_ptr< PDFElement >> &rElements, PDFElement *pThis, std::map< OString, PDFElement * > &rDictionary)
sal_uInt64 Seek(sal_uInt64 nPos)
std::vector< unsigned char > DecodeHexString(const OString &rHex)
void SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:82
void PushBack(PDFElement *pElement)
sal_uInt64 GetLocation() const
static sal_uInt64 GetLength()
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
Looks up an object which is only referenced in this dictionary.
EmbeddedObjectRef * pObject
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:94
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
void SetArrayOffset(sal_uInt64 nArrayOffset)
SvStream & WriteCharPtr(const char *pBuf)
void SetArrayLength(sal_uInt64 nArrayLength)
sal_uInt64 SeekRel(sal_Int64 nPos)
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:72
SvMemoryStream * GetStreamBuffer() const
void SetStreamBuffer(std::unique_ptr< SvMemoryStream > &pStreamBuffer)
static OString GetDateTime()
Get current date/time in PDF D:YYYYMMDDHHMMSS form.
SvStream & WriteUInt32AsString(sal_uInt32 nUInt32)
css::uno::Any const & rValue
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
A byte range in a PDF file.
Definition: pdfdocument.hxx:63
bool Read(SvStream &rStream) override
long EndCompression()
bool eof() const
PDFObjectElement * LookupObject(size_t nObjectNumber)
Look up object based on object number, possibly by parsing object streams.
bool Read(SvStream &rStream) override
In-memory representation of an on-disk PDF document.
TStyleElements m_aElements
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:96
An entry in a cross-reference stream.
PDFReferenceElement(PDFDocument &rDoc, PDFNumberElement &rObject, PDFNumberElement const &rGeneration)
XRefEntryType GetType() const
const OString & GetValue() const
static void visitPages(PDFObjectElement *pPages, std::vector< PDFObjectElement * > &rRet)
Visits the page tree recursively, looking for page objects.
void PushBackEOF(size_t nOffset)
Remember the end location of an EOF token.
sal_uInt64 GetArrayLength() const
sal_Int32 nElements
bool Read(SvStream &rStream) override
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:87
void Compress(SvStream &rIStm, SvStream &rOStm)
void SetType(XRefEntryType eType)
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
void SetKeyValueLength(const OString &rKey, sal_uInt64 nLength)
sal_uInt64 m_nOffset
Input file start location.
void ParseStoredObjects()
Parse objects stored in this object stream.
bool Read(SvStream &rStream) override
sal_uInt64 GetOffset() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
sal_uInt16 char * pName
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
bool GetDirty() const
int i
long Decompress(SvStream &rIStm, SvStream &rOStm)
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:79
OString m_aComment
Definition: pdfdocument.cxx:44
PDFNumberElement & m_rObject
The element providing the object number.
sal_uInt64 m_nLength
Input file token length.
static OString ReadKeyword(SvStream &rStream)
sal_uInt64 GetSize()
bool Read(SvStream &rStream) override
std::size_t WriteBytes(const void *pData, std::size_t nSize)
void AddDictionaryReference(PDFReferenceElement *pReference)
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
sal_uInt64 GetLocation() const
Dictionary object: a set key-value pairs.
sal_uInt64 GetOffset() const
bool Read(SvStream &rStream) override
std::vector< PDFElement * > m_aElements
bool Tokenize(SvStream &rStream, TokenizeMode eMode, std::vector< std::unique_ptr< PDFElement >> &rElements, PDFObjectElement *pObjectElement)
Tokenize elements from current offset.
bool Read(SvStream &rStream) override
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
PDFDocument & m_rDoc
Definition: pdfdocument.cxx:43
sal_uInt64 m_nOffset
Location before the ']' token.
Definition: pdfdocument.cxx:86
SvStream & WriteStream(SvStream &rStream)
const std::map< OString, PDFElement * > & GetItems() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
const char * pS
sal_uInt64 GetKeyOffset(const OString &rKey) const
void SetIDObject(size_t nID, PDFObjectElement *pObject)
Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID...
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:74
bool Read(SvStream &rStream) override
sal_uInt64 GetKeyValueLength(const OString &rKey) const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
std::size_t ReadBytes(void *pData, std::size_t nSize)
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
const OString & GetValue() const
std::vector< std::unique_ptr< PDFElement > > & GetStoredElements()
void SetNumberElement(PDFNumberElement *pNumberElement)
sal_uInt64 GetLength() const
PDFDictionaryElement * GetDictionary()
const std::vector< std::unique_ptr< PDFElement > > & GetElements() const
bool Read(SvStream &rStream) override
void SetDirty(bool bDirty)
PDFObjectElement * LookupObject()
Lookup referenced object, without assuming anything about its contents.
std::unique_ptr< char[]> aBuffer
SvStream & ReadChar(char &rChar)
PDFElement * LookupElement(const OString &rDictionaryKey)
Looks up an element which is contained in this dictionary.
std::map< OString, PDFElement * > m_aDictionary
PDFArrayElement * GetArray() const
SvMemoryStream & GetMemory()
const std::vector< PDFReferenceElement * > & GetDictionaryReferences() const
#define SAL_INFO(area, stream)
OUString aName
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
static void SkipWhitespace(SvStream &rStream)
PDFNumberElement * GetNumberElement() const
sal_uInt64 Tell() const
QPRO_FUNC_TYPE nType
const OString & GetValue() const
bool Sign(OStringBuffer &rCMSHexBuffer)
Reference object: something with a unique ID.
static void AppendUnicodeTextString(const OUString &rString, OStringBuffer &rBuffer)
Write rString as a PDF hex string into rBuffer.
const std::vector< PDFElement * > & GetElements() const
sal_uInt64 GetLocation() const
bool good() const
#define SAL_WARN(area, stream)
bool alreadyVisiting() const
Definition: pdfdocument.hxx:73
Literal string: in (asdf) form.
PDFArrayElement(PDFObjectElement *pObject)
sal_Int32 nLength
Name object: a key string.
void SetOffset(sal_uInt64 nOffset)
The trailer singleton is at the end of the doc.
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
PDFNumberElement & GetObjectElement() const
void AddDataRange(const void *pData, sal_Int32 size)
PDFElement * Lookup(const OString &rDictionaryKey)
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:91
sal_uInt64 GetArrayOffset() const
Stream object: a byte array with a known length.
sal_uInt64 m_nLocation
Offset after the '<<' token.
sal_uInt16 nPos
static PDFElement * Lookup(const std::map< OString, PDFElement * > &rDictionary, const OString &rKey)
const void * GetData()
void SetArray(PDFArrayElement *pArrayElement)
sal_uInt64 m_nLocation
Offset before the '>>' token.
Definition: pdfdocument.cxx:60
bool Read(SvStream &rStream) override
std::map< OString, PDFElement * > m_aDictionary
Definition: pdfdocument.hxx:85
sal_uInt64 m_nOffset
Location after the 'R' token.
void SetKeyOffset(const OString &rKey, sal_uInt64 nOffset)
OStringBuffer & padToLength(OStringBuffer &rBuffer, sal_Int32 nLength, char cFill= '\0')