LibreOffice Module vcl (master)  1
pdfdocument.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
11 
12 #include <map>
13 #include <memory>
14 #include <vector>
15 
16 #include <com/sun/star/uno/Sequence.hxx>
17 #include <com/sun/star/security/XCertificate.hpp>
18 
20 #include <comphelper/string.hxx>
21 #include <rtl/character.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <svl/cryptosign.hxx>
27 #include <tools/zcodec.hxx>
28 #include <vcl/pdfwriter.hxx>
29 
30 using namespace com::sun::star;
31 
32 namespace vcl
33 {
34 namespace filter
35 {
36 const int MAX_SIGNATURE_CONTENT_LENGTH = 50000;
37 
38 class PDFTrailerElement;
39 
40 namespace
41 {
43 class PDFCommentElement : public PDFElement
44 {
45  PDFDocument& m_rDoc;
46  OString m_aComment;
47 
48 public:
49  explicit PDFCommentElement(PDFDocument& rDoc);
50  bool Read(SvStream& rStream) override;
51 };
52 }
53 
54 class PDFReferenceElement;
55 
56 namespace
57 {
59 class PDFEndDictionaryElement : public PDFElement
60 {
62  sal_uInt64 m_nLocation = 0;
63 
64 public:
65  PDFEndDictionaryElement();
66  bool Read(SvStream& rStream) override;
67  sal_uInt64 GetLocation() const;
68 };
69 
71 class PDFEndStreamElement : public PDFElement
72 {
73 public:
74  bool Read(SvStream& rStream) override;
75 };
76 
78 class PDFEndObjectElement : public PDFElement
79 {
80 public:
81  bool Read(SvStream& rStream) override;
82 };
83 
85 class PDFEndArrayElement : public PDFElement
86 {
88  sal_uInt64 m_nOffset = 0;
89 
90 public:
91  PDFEndArrayElement();
92  bool Read(SvStream& rStream) override;
93  sal_uInt64 GetOffset() const;
94 };
95 
97 class PDFBooleanElement : public PDFElement
98 {
99 public:
100  explicit PDFBooleanElement(bool bValue);
101  bool Read(SvStream& rStream) override;
102 };
103 
105 class PDFNullElement : public PDFElement
106 {
107 public:
108  bool Read(SvStream& rStream) override;
109 };
110 }
111 
114 {
116  std::map<OString, PDFElement*> m_aDictionary;
118  sal_uInt64 m_nOffset = 0;
119 
120 public:
121  explicit PDFTrailerElement(PDFDocument& rDoc);
122  bool Read(SvStream& rStream) override;
123  PDFElement* Lookup(const OString& rDictionaryKey);
124  sal_uInt64 GetLocation() const;
125 };
126 
127 XRefEntry::XRefEntry() = default;
128 
129 PDFDocument::PDFDocument() = default;
130 
131 bool PDFDocument::RemoveSignature(size_t nPosition)
132 {
133  std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
134  if (nPosition >= aSignatures.size())
135  {
136  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
137  return false;
138  }
139 
140  if (aSignatures.size() != m_aEOFs.size() - 1)
141  {
142  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
143  "and incremental updates");
144  return false;
145  }
146 
147  // The EOF offset is the end of the original file, without the signature at
148  // nPosition.
149  m_aEditBuffer.Seek(m_aEOFs[nPosition]);
150  // Drop all bytes after the current position.
151  m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
152 
153  return m_aEditBuffer.good();
154 }
155 
156 sal_uInt32 PDFDocument::GetNextSignature()
157 {
158  sal_uInt32 nRet = 0;
159  for (const auto& pSignature : GetSignatureWidgets())
160  {
161  auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
162  if (!pT)
163  continue;
164 
165  const OString& rValue = pT->GetValue();
166  const OString aPrefix = "Signature";
167  if (!rValue.startsWith(aPrefix))
168  continue;
169 
170  nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32());
171  }
172 
173  return nRet + 1;
174 }
175 
176 sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
177  sal_uInt64& rLastByteRangeOffset,
178  sal_Int64& rContentOffset)
179 {
180  // Write signature object.
181  sal_Int32 nSignatureId = m_aXRef.size();
182  XRefEntry aSignatureEntry;
183  aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
184  aSignatureEntry.SetDirty(true);
185  m_aXRef[nSignatureId] = aSignatureEntry;
186  OStringBuffer aSigBuffer;
187  aSigBuffer.append(nSignatureId);
188  aSigBuffer.append(" 0 obj\n");
189  aSigBuffer.append("<</Contents <");
190  rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
191  // Reserve space for the PKCS#7 object.
192  OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
193  comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0');
194  aSigBuffer.append(aContentFiller.makeStringAndClear());
195  aSigBuffer.append(">\n/Type/Sig/SubFilter");
196  if (bAdES)
197  aSigBuffer.append("/ETSI.CAdES.detached");
198  else
199  aSigBuffer.append("/adbe.pkcs7.detached");
200 
201  // Time of signing.
202  aSigBuffer.append(" /M (");
203  aSigBuffer.append(vcl::PDFWriter::GetDateTime());
204  aSigBuffer.append(")");
205 
206  // Byte range: we can write offset1-length1 and offset2 right now, will
207  // write length2 later.
208  aSigBuffer.append(" /ByteRange [ 0 ");
209  // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
210  aSigBuffer.append(rContentOffset - 1);
211  aSigBuffer.append(" ");
212  aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
213  aSigBuffer.append(" ");
214  rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
215  // We don't know how many bytes we need for the last ByteRange value, this
216  // should be enough.
217  OStringBuffer aByteRangeFiller;
218  comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
219  aSigBuffer.append(aByteRangeFiller.makeStringAndClear());
220  // Finish the Sig obj.
221  aSigBuffer.append(" /Filter/Adobe.PPKMS");
222 
223  if (!rDescription.isEmpty())
224  {
225  aSigBuffer.append("/Reason<");
226  vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
227  aSigBuffer.append(">");
228  }
229 
230  aSigBuffer.append(" >>\nendobj\n\n");
231  m_aEditBuffer.WriteOString(aSigBuffer.toString());
232 
233  return nSignatureId;
234 }
235 
236 sal_Int32 PDFDocument::WriteAppearanceObject()
237 {
238  // Write appearance object.
239  sal_Int32 nAppearanceId = m_aXRef.size();
240  XRefEntry aAppearanceEntry;
241  aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
242  aAppearanceEntry.SetDirty(true);
243  m_aXRef[nAppearanceId] = aAppearanceEntry;
244  m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
245  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
246  m_aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
247  m_aEditBuffer.WriteCharPtr("/BBox[0 0 0 0]\n/Length 0\n>>\n");
248  m_aEditBuffer.WriteCharPtr("stream\n\nendstream\nendobj\n\n");
249 
250  return nAppearanceId;
251 }
252 
253 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
254  sal_Int32 nAppearanceId)
255 {
256  // Decide what identifier to use for the new signature.
257  sal_uInt32 nNextSignature = GetNextSignature();
258 
259  // Write the Annot object, references nSignatureId and nAppearanceId.
260  sal_Int32 nAnnotId = m_aXRef.size();
261  XRefEntry aAnnotEntry;
262  aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
263  aAnnotEntry.SetDirty(true);
264  m_aXRef[nAnnotId] = aAnnotEntry;
265  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
266  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
267  m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
268  m_aEditBuffer.WriteCharPtr("/Rect[0 0 0 0]\n");
269  m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
270  m_aEditBuffer.WriteCharPtr("/P ");
271  m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue());
272  m_aEditBuffer.WriteCharPtr(" 0 R\n");
273  m_aEditBuffer.WriteCharPtr("/T(Signature");
274  m_aEditBuffer.WriteUInt32AsString(nNextSignature);
275  m_aEditBuffer.WriteCharPtr(")\n");
276  m_aEditBuffer.WriteCharPtr("/V ");
277  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
278  m_aEditBuffer.WriteCharPtr(" 0 R\n");
279  m_aEditBuffer.WriteCharPtr("/DV ");
280  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
281  m_aEditBuffer.WriteCharPtr(" 0 R\n");
282  m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
283  m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
284  m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
285  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
286 
287  return nAnnotId;
288 }
289 
290 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
291 {
292  PDFElement* pAnnots = rFirstPage.Lookup("Annots");
293  auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
294  if (pAnnotsReference)
295  {
296  // Write the updated Annots key of the Page object.
297  PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
298  if (!pAnnotsObject)
299  {
300  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
301  return false;
302  }
303 
304  sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
305  m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
306  m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
307  m_aXRef[nAnnotsId].SetDirty(true);
308  m_aEditBuffer.WriteUInt32AsString(nAnnotsId);
309  m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
310 
311  // Write existing references.
312  PDFArrayElement* pArray = pAnnotsObject->GetArray();
313  if (!pArray)
314  {
315  SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
316  return false;
317  }
318 
319  for (size_t i = 0; i < pArray->GetElements().size(); ++i)
320  {
321  auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
322  if (!pReference)
323  continue;
324 
325  if (i)
326  m_aEditBuffer.WriteCharPtr(" ");
327  m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
328  m_aEditBuffer.WriteCharPtr(" 0 R");
329  }
330  // Write our reference.
331  m_aEditBuffer.WriteCharPtr(" ");
332  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
333  m_aEditBuffer.WriteCharPtr(" 0 R");
334 
335  m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
336  }
337  else
338  {
339  // Write the updated first page object, references nAnnotId.
340  sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
341  if (nFirstPageId >= m_aXRef.size())
342  {
343  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
344  return false;
345  }
346  m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
347  m_aXRef[nFirstPageId].SetDirty(true);
348  m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
349  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
350  m_aEditBuffer.WriteCharPtr("<<");
351  auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
352  if (!pAnnotsArray)
353  {
354  // No Annots key, just write the key with a single reference.
355  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
356  + rFirstPage.GetDictionaryOffset(),
357  rFirstPage.GetDictionaryLength());
358  m_aEditBuffer.WriteCharPtr("/Annots[");
359  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
360  m_aEditBuffer.WriteCharPtr(" 0 R]");
361  }
362  else
363  {
364  // Annots key is already there, insert our reference at the end.
365  PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
366 
367  // Offset right before the end of the Annots array.
368  sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
369  + pDictionary->GetKeyValueLength("Annots") - 1;
370  // Length of beginning of the dictionary -> Annots end.
371  sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
372  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
373  + rFirstPage.GetDictionaryOffset(),
374  nAnnotsBeforeEndLength);
375  m_aEditBuffer.WriteCharPtr(" ");
376  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
377  m_aEditBuffer.WriteCharPtr(" 0 R");
378  // Length of Annots end -> end of the dictionary.
379  sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
380  + rFirstPage.GetDictionaryLength()
381  - nAnnotsEndOffset;
382  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
383  + nAnnotsEndOffset,
384  nAnnotsAfterEndLength);
385  }
386  m_aEditBuffer.WriteCharPtr(">>");
387  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
388  }
389 
390  return true;
391 }
392 
393 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
394 {
395  if (m_pXRefStream)
396  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
397  else
398  {
399  if (!m_pTrailer)
400  {
401  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
402  return false;
403  }
404  pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
405  }
406  if (!pRoot)
407  {
408  SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
409  return false;
410  }
411  PDFObjectElement* pCatalog = pRoot->LookupObject();
412  if (!pCatalog)
413  {
414  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
415  return false;
416  }
417  sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
418  if (nCatalogId >= m_aXRef.size())
419  {
420  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
421  return false;
422  }
423  PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
424  auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
425  if (pAcroFormReference)
426  {
427  // Write the updated AcroForm key of the Catalog object.
428  PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
429  if (!pAcroFormObject)
430  {
431  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
432  return false;
433  }
434 
435  sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
436  m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
437  m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
438  m_aXRef[nAcroFormId].SetDirty(true);
439  m_aEditBuffer.WriteUInt32AsString(nAcroFormId);
440  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
441 
442  // If this is nullptr, then the AcroForm object is not in an object stream.
443  SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
444 
445  if (!pAcroFormObject->Lookup("Fields"))
446  {
447  SAL_WARN("vcl.filter",
448  "PDFDocument::Sign: AcroForm object without required Fields key");
449  return false;
450  }
451 
452  PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
453  if (!pAcroFormDictionary)
454  {
455  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
456  return false;
457  }
458 
459  // Offset right before the end of the Fields array.
460  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
461  + pAcroFormDictionary->GetKeyValueLength("Fields")
462  - strlen("]");
463  // Length of beginning of the object dictionary -> Fields end.
464  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
465  if (pStreamBuffer)
466  m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
467  else
468  {
469  nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
470  m_aEditBuffer.WriteCharPtr("<<");
471  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
472  + pAcroFormObject->GetDictionaryOffset(),
473  nFieldsBeforeEndLength);
474  }
475 
476  // Append our reference at the end of the Fields array.
477  m_aEditBuffer.WriteCharPtr(" ");
478  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
479  m_aEditBuffer.WriteCharPtr(" 0 R");
480 
481  // Length of Fields end -> end of the object dictionary.
482  if (pStreamBuffer)
483  {
484  sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
485  m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
486  + nFieldsEndOffset,
487  nFieldsAfterEndLength);
488  }
489  else
490  {
491  sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
492  + pAcroFormObject->GetDictionaryLength()
493  - nFieldsEndOffset;
494  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
495  + nFieldsEndOffset,
496  nFieldsAfterEndLength);
497  m_aEditBuffer.WriteCharPtr(">>");
498  }
499 
500  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
501  }
502  else
503  {
504  // Write the updated Catalog object, references nAnnotId.
505  auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
506  m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
507  m_aXRef[nCatalogId].SetDirty(true);
508  m_aEditBuffer.WriteUInt32AsString(nCatalogId);
509  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
510  m_aEditBuffer.WriteCharPtr("<<");
511  if (!pAcroFormDictionary)
512  {
513  // No AcroForm key, assume no signatures.
514  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
515  + pCatalog->GetDictionaryOffset(),
516  pCatalog->GetDictionaryLength());
517  m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
518  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
519  m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
520  }
521  else
522  {
523  // AcroForm key is already there, insert our reference at the Fields end.
524  auto it = pAcroFormDictionary->GetItems().find("Fields");
525  if (it == pAcroFormDictionary->GetItems().end())
526  {
527  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
528  return false;
529  }
530 
531  auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
532  if (!pFields)
533  {
534  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
535  return false;
536  }
537 
538  // Offset right before the end of the Fields array.
539  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
540  + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
541  // Length of beginning of the Catalog dictionary -> Fields end.
542  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
543  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
544  + pCatalog->GetDictionaryOffset(),
545  nFieldsBeforeEndLength);
546  m_aEditBuffer.WriteCharPtr(" ");
547  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
548  m_aEditBuffer.WriteCharPtr(" 0 R");
549  // Length of Fields end -> end of the Catalog dictionary.
550  sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
551  + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
552  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
553  + nFieldsEndOffset,
554  nFieldsAfterEndLength);
555  }
556  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
557  }
558 
559  return true;
560 }
561 
562 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
563 {
564  if (m_pXRefStream)
565  {
566  // Write the xref stream.
567  // This is a bit meta: the xref stream stores its own offset.
568  sal_Int32 nXRefStreamId = m_aXRef.size();
569  XRefEntry aXRefStreamEntry;
570  aXRefStreamEntry.SetOffset(nXRefOffset);
571  aXRefStreamEntry.SetDirty(true);
572  m_aXRef[nXRefStreamId] = aXRefStreamEntry;
573 
574  // Write stream data.
575  SvMemoryStream aXRefStream;
576  const size_t nOffsetLen = 3;
577  // 3 additional bytes: predictor, the first and the third field.
578  const size_t nLineLength = nOffsetLen + 3;
579  // This is the line as it appears before tweaking according to the predictor.
580  std::vector<unsigned char> aOrigLine(nLineLength);
581  // This is the previous line.
582  std::vector<unsigned char> aPrevLine(nLineLength);
583  // This is the line as written to the stream.
584  std::vector<unsigned char> aFilteredLine(nLineLength);
585  for (const auto& rXRef : m_aXRef)
586  {
587  const XRefEntry& rEntry = rXRef.second;
588 
589  if (!rEntry.GetDirty())
590  continue;
591 
592  // Predictor.
593  size_t nPos = 0;
594  // PNG prediction: up (on all rows).
595  aOrigLine[nPos++] = 2;
596 
597  // First field.
598  unsigned char nType = 0;
599  switch (rEntry.GetType())
600  {
601  case XRefEntryType::FREE:
602  nType = 0;
603  break;
604  case XRefEntryType::NOT_COMPRESSED:
605  nType = 1;
606  break;
607  case XRefEntryType::COMPRESSED:
608  nType = 2;
609  break;
610  }
611  aOrigLine[nPos++] = nType;
612 
613  // Second field.
614  for (size_t i = 0; i < nOffsetLen; ++i)
615  {
616  size_t nByte = nOffsetLen - i - 1;
617  // Fields requiring more than one byte are stored with the
618  // high-order byte first.
619  unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
620  aOrigLine[nPos++] = nCh;
621  }
622 
623  // Third field.
624  aOrigLine[nPos++] = 0;
625 
626  // Now apply the predictor.
627  aFilteredLine[0] = aOrigLine[0];
628  for (size_t i = 1; i < nLineLength; ++i)
629  {
630  // Count the delta vs the previous line.
631  aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
632  // Remember the new reference.
633  aPrevLine[i] = aOrigLine[i];
634  }
635 
636  aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
637  }
638 
639  m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
640  m_aEditBuffer.WriteCharPtr(
641  " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
642 
643  // ID.
644  auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
645  if (pID)
646  {
647  const std::vector<PDFElement*>& rElements = pID->GetElements();
648  m_aEditBuffer.WriteCharPtr("/ID [ <");
649  for (size_t i = 0; i < rElements.size(); ++i)
650  {
651  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
652  if (!pIDString)
653  continue;
654 
655  m_aEditBuffer.WriteOString(pIDString->GetValue());
656  if ((i + 1) < rElements.size())
657  m_aEditBuffer.WriteCharPtr("> <");
658  }
659  m_aEditBuffer.WriteCharPtr("> ] ");
660  }
661 
662  // Index.
663  m_aEditBuffer.WriteCharPtr("/Index [ ");
664  for (const auto& rXRef : m_aXRef)
665  {
666  if (!rXRef.second.GetDirty())
667  continue;
668 
669  m_aEditBuffer.WriteUInt32AsString(rXRef.first);
670  m_aEditBuffer.WriteCharPtr(" 1 ");
671  }
672  m_aEditBuffer.WriteCharPtr("] ");
673 
674  // Info.
675  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
676  if (pInfo)
677  {
678  m_aEditBuffer.WriteCharPtr("/Info ");
679  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
680  m_aEditBuffer.WriteCharPtr(" ");
681  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
682  m_aEditBuffer.WriteCharPtr(" R ");
683  }
684 
685  // Length.
686  m_aEditBuffer.WriteCharPtr("/Length ");
687  {
688  ZCodec aZCodec;
689  aZCodec.BeginCompression();
690  aXRefStream.Seek(0);
691  SvMemoryStream aStream;
692  aZCodec.Compress(aXRefStream, aStream);
693  aZCodec.EndCompression();
694  aXRefStream.Seek(0);
695  aXRefStream.SetStreamSize(0);
696  aStream.Seek(0);
697  aXRefStream.WriteStream(aStream);
698  }
699  m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize());
700 
701  if (!m_aStartXRefs.empty())
702  {
703  // Write location of the previous cross-reference section.
704  m_aEditBuffer.WriteCharPtr("/Prev ");
705  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
706  }
707 
708  // Root.
709  m_aEditBuffer.WriteCharPtr("/Root ");
710  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
711  m_aEditBuffer.WriteCharPtr(" ");
712  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
713  m_aEditBuffer.WriteCharPtr(" R ");
714 
715  // Size.
716  m_aEditBuffer.WriteCharPtr("/Size ");
717  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
718 
719  m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
720  aXRefStream.Seek(0);
721  m_aEditBuffer.WriteStream(aXRefStream);
722  m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
723  }
724  else
725  {
726  // Write the xref table.
727  m_aEditBuffer.WriteCharPtr("xref\n");
728  for (const auto& rXRef : m_aXRef)
729  {
730  size_t nObject = rXRef.first;
731  size_t nOffset = rXRef.second.GetOffset();
732  if (!rXRef.second.GetDirty())
733  continue;
734 
735  m_aEditBuffer.WriteUInt32AsString(nObject);
736  m_aEditBuffer.WriteCharPtr(" 1\n");
737  OStringBuffer aBuffer;
738  aBuffer.append(static_cast<sal_Int32>(nOffset));
739  while (aBuffer.getLength() < 10)
740  aBuffer.insert(0, "0");
741  if (nObject == 0)
742  aBuffer.append(" 65535 f \n");
743  else
744  aBuffer.append(" 00000 n \n");
745  m_aEditBuffer.WriteOString(aBuffer.toString());
746  }
747 
748  // Write the trailer.
749  m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
750  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
751  m_aEditBuffer.WriteCharPtr("/Root ");
752  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
753  m_aEditBuffer.WriteCharPtr(" ");
754  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
755  m_aEditBuffer.WriteCharPtr(" R\n");
756  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
757  if (pInfo)
758  {
759  m_aEditBuffer.WriteCharPtr("/Info ");
760  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
761  m_aEditBuffer.WriteCharPtr(" ");
762  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
763  m_aEditBuffer.WriteCharPtr(" R\n");
764  }
765  auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
766  if (pID)
767  {
768  const std::vector<PDFElement*>& rElements = pID->GetElements();
769  m_aEditBuffer.WriteCharPtr("/ID [ <");
770  for (size_t i = 0; i < rElements.size(); ++i)
771  {
772  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
773  if (!pIDString)
774  continue;
775 
776  m_aEditBuffer.WriteOString(pIDString->GetValue());
777  if ((i + 1) < rElements.size())
778  m_aEditBuffer.WriteCharPtr(">\n<");
779  }
780  m_aEditBuffer.WriteCharPtr("> ]\n");
781  }
782 
783  if (!m_aStartXRefs.empty())
784  {
785  // Write location of the previous cross-reference section.
786  m_aEditBuffer.WriteCharPtr("/Prev ");
787  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
788  }
789 
790  m_aEditBuffer.WriteCharPtr(">>\n");
791  }
792 }
793 
794 bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
795  const OUString& rDescription, bool bAdES)
796 {
797  m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
798  m_aEditBuffer.WriteCharPtr("\n");
799 
800  sal_uInt64 nSignatureLastByteRangeOffset = 0;
801  sal_Int64 nSignatureContentOffset = 0;
802  sal_Int32 nSignatureId = WriteSignatureObject(
803  rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
804 
805  sal_Int32 nAppearanceId = WriteAppearanceObject();
806 
807  std::vector<PDFObjectElement*> aPages = GetPages();
808  if (aPages.empty() || !aPages[0])
809  {
810  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
811  return false;
812  }
813 
814  PDFObjectElement& rFirstPage = *aPages[0];
815  sal_Int32 nAnnotId = WriteAnnotObject(rFirstPage, nSignatureId, nAppearanceId);
816 
817  if (!WritePageObject(rFirstPage, nAnnotId))
818  {
819  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
820  return false;
821  }
822 
823  PDFReferenceElement* pRoot = nullptr;
824  if (!WriteCatalogObject(nAnnotId, pRoot))
825  {
826  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
827  return false;
828  }
829 
830  sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
831  WriteXRef(nXRefOffset, pRoot);
832 
833  // Write startxref.
834  m_aEditBuffer.WriteCharPtr("startxref\n");
835  m_aEditBuffer.WriteUInt32AsString(nXRefOffset);
836  m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
837 
838  // Finalize the signature, now that we know the total file size.
839  // Calculate the length of the last byte range.
840  sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
841  sal_Int64 nLastByteRangeLength
842  = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
843  // Write the length to the buffer.
844  m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
845  OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
846  m_aEditBuffer.WriteOString(aByteRangeBuffer);
847 
848  // Create the PKCS#7 object.
849  css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
850  if (!aDerEncoded.hasElements())
851  {
852  SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
853  return false;
854  }
855 
856  m_aEditBuffer.Seek(0);
857  sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
858  std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
859  m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
860 
861  m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
862  sal_uInt64 nBufferSize2 = nLastByteRangeLength;
863  std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
864  m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
865 
866  OStringBuffer aCMSHexBuffer;
867  svl::crypto::Signing aSigning(xCertificate);
868  aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
869  aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
870  if (!aSigning.Sign(aCMSHexBuffer))
871  {
872  SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
873  return false;
874  }
875 
876  assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
877 
878  m_aEditBuffer.Seek(nSignatureContentOffset);
879  m_aEditBuffer.WriteOString(aCMSHexBuffer.toString());
880 
881  return true;
882 }
883 
884 bool PDFDocument::Write(SvStream& rStream)
885 {
886  m_aEditBuffer.Seek(0);
887  rStream.WriteStream(m_aEditBuffer);
888  return rStream.good();
889 }
890 
891 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
892  std::vector<std::unique_ptr<PDFElement>>& rElements,
893  PDFObjectElement* pObjectElement)
894 {
895  // Last seen object token.
896  PDFObjectElement* pObject = pObjectElement;
897  PDFNameElement* pObjectKey = nullptr;
898  PDFObjectElement* pObjectStream = nullptr;
899  bool bInXRef = false;
900  // The next number will be an xref offset.
901  bool bInStartXRef = false;
902  // Dictionary depth, so we know when we're outside any dictionaries.
903  int nDictionaryDepth = 0;
904  // Array depth, only the offset/length of the toplevel array is tracked.
905  int nArrayDepth = 0;
906  // Last seen array token that's outside any dictionaries.
907  PDFArrayElement* pArray = nullptr;
908  // If we're inside an obj/endobj pair.
909  bool bInObject = false;
910  while (true)
911  {
912  char ch;
913  rStream.ReadChar(ch);
914  if (rStream.eof())
915  break;
916 
917  switch (ch)
918  {
919  case '%':
920  {
921  auto pComment = new PDFCommentElement(*this);
922  rElements.push_back(std::unique_ptr<PDFElement>(pComment));
923  rStream.SeekRel(-1);
924  if (!rElements.back()->Read(rStream))
925  {
926  SAL_WARN("vcl.filter",
927  "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
928  return false;
929  }
930  if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
931  && m_aEOFs.back() == rStream.Tell())
932  {
933  // Found EOF and partial parsing requested, we're done.
934  return true;
935  }
936  break;
937  }
938  case '<':
939  {
940  // Dictionary or hex string.
941  rStream.ReadChar(ch);
942  rStream.SeekRel(-2);
943  if (ch == '<')
944  {
945  rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
946  ++nDictionaryDepth;
947  }
948  else
949  rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
950  if (!rElements.back()->Read(rStream))
951  {
952  SAL_WARN("vcl.filter",
953  "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
954  return false;
955  }
956  break;
957  }
958  case '>':
959  {
960  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
961  --nDictionaryDepth;
962  rStream.SeekRel(-1);
963  if (!rElements.back()->Read(rStream))
964  {
965  SAL_WARN("vcl.filter",
966  "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
967  return false;
968  }
969  break;
970  }
971  case '[':
972  {
973  auto pArr = new PDFArrayElement(pObject);
974  rElements.push_back(std::unique_ptr<PDFElement>(pArr));
975  if (nDictionaryDepth == 0 && nArrayDepth == 0)
976  {
977  // The array is attached directly, inform the object.
978  pArray = pArr;
979  if (pObject)
980  {
981  pObject->SetArray(pArray);
982  pObject->SetArrayOffset(rStream.Tell());
983  }
984  }
985  ++nArrayDepth;
986  rStream.SeekRel(-1);
987  if (!rElements.back()->Read(rStream))
988  {
989  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
990  return false;
991  }
992  break;
993  }
994  case ']':
995  {
996  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
997  --nArrayDepth;
998  if (nArrayDepth == 0)
999  pArray = nullptr;
1000  rStream.SeekRel(-1);
1001  if (nDictionaryDepth == 0 && nArrayDepth == 0)
1002  {
1003  if (pObject)
1004  {
1005  pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1006  }
1007  }
1008  if (!rElements.back()->Read(rStream))
1009  {
1010  SAL_WARN("vcl.filter",
1011  "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1012  return false;
1013  }
1014  break;
1015  }
1016  case '/':
1017  {
1018  auto pNameElement = new PDFNameElement();
1019  rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1020  rStream.SeekRel(-1);
1021  if (!pNameElement->Read(rStream))
1022  {
1023  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1024  return false;
1025  }
1026  if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1027  && pNameElement->GetValue() == "ObjStm")
1028  pObjectStream = pObject;
1029  else
1030  pObjectKey = pNameElement;
1031  break;
1032  }
1033  case '(':
1034  {
1035  rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1036  rStream.SeekRel(-1);
1037  if (!rElements.back()->Read(rStream))
1038  {
1039  SAL_WARN("vcl.filter",
1040  "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1041  return false;
1042  }
1043  break;
1044  }
1045  default:
1046  {
1047  if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-')
1048  {
1049  // Numbering object: an integer or a real.
1050  auto pNumberElement = new PDFNumberElement();
1051  rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1052  rStream.SeekRel(-1);
1053  if (!pNumberElement->Read(rStream))
1054  {
1055  SAL_WARN("vcl.filter",
1056  "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1057  return false;
1058  }
1059  if (bInStartXRef)
1060  {
1061  bInStartXRef = false;
1062  m_aStartXRefs.push_back(pNumberElement->GetValue());
1063 
1064  auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1065  if (it != m_aOffsetObjects.end())
1066  m_pXRefStream = it->second;
1067  }
1068  else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
1069  // Number element inside an object, but outside a
1070  // dictionary / array: remember it.
1071  pObject->SetNumberElement(pNumberElement);
1072  }
1073  else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1074  {
1075  // Possible keyword, like "obj".
1076  rStream.SeekRel(-1);
1077  OString aKeyword = ReadKeyword(rStream);
1078 
1079  bool bObj = aKeyword == "obj";
1080  if (bObj || aKeyword == "R")
1081  {
1082  size_t nElements = rElements.size();
1083  if (nElements < 2)
1084  {
1085  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1086  "tokens before 'obj' or 'R' keyword");
1087  return false;
1088  }
1089 
1090  auto pObjectNumber
1091  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1092  auto pGenerationNumber
1093  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1094  if (!pObjectNumber || !pGenerationNumber)
1095  {
1096  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1097  "generation number before 'obj' or 'R' keyword");
1098  return false;
1099  }
1100 
1101  if (bObj)
1102  {
1103  pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1104  pGenerationNumber->GetValue());
1105  rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1106  m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1107  m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1108  bInObject = true;
1109  }
1110  else
1111  {
1112  auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1113  *pGenerationNumber);
1114  rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1115  if (pArray)
1116  // Reference is part of a direct (non-dictionary) array, inform the array.
1117  pArray->PushBack(rElements.back().get());
1118  if (bInObject && nDictionaryDepth > 0 && pObject)
1119  // Inform the object about a new in-dictionary reference.
1120  pObject->AddDictionaryReference(pReference);
1121  }
1122  if (!rElements.back()->Read(rStream))
1123  {
1124  SAL_WARN("vcl.filter",
1125  "PDFDocument::Tokenize: PDFElement::Read() failed");
1126  return false;
1127  }
1128  }
1129  else if (aKeyword == "stream")
1130  {
1131  // Look up the length of the stream from the parent object's dictionary.
1132  size_t nLength = 0;
1133  for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1134  {
1135  // Iterate in reverse order.
1136  size_t nIndex = rElements.size() - nElement - 1;
1137  PDFElement* pElement = rElements[nIndex].get();
1138  auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1139  if (!pObj)
1140  continue;
1141 
1142  PDFElement* pLookup = pObj->Lookup("Length");
1143  auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1144  if (pReference)
1145  {
1146  // Length is provided as a reference.
1147  nLength = pReference->LookupNumber(rStream);
1148  break;
1149  }
1150 
1151  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1152  if (pNumber)
1153  {
1154  // Length is provided directly.
1155  nLength = pNumber->GetValue();
1156  break;
1157  }
1158 
1159  SAL_WARN(
1160  "vcl.filter",
1161  "PDFDocument::Tokenize: found no Length key for stream keyword");
1162  return false;
1163  }
1164 
1165  PDFDocument::SkipLineBreaks(rStream);
1166  auto pStreamElement = new PDFStreamElement(nLength);
1167  if (pObject)
1168  pObject->SetStream(pStreamElement);
1169  rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1170  if (!rElements.back()->Read(rStream))
1171  {
1172  SAL_WARN("vcl.filter",
1173  "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1174  return false;
1175  }
1176  }
1177  else if (aKeyword == "endstream")
1178  {
1179  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1180  if (!rElements.back()->Read(rStream))
1181  {
1182  SAL_WARN("vcl.filter",
1183  "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1184  return false;
1185  }
1186  }
1187  else if (aKeyword == "endobj")
1188  {
1189  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1190  if (!rElements.back()->Read(rStream))
1191  {
1192  SAL_WARN("vcl.filter",
1193  "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1194  return false;
1195  }
1196  if (eMode == TokenizeMode::END_OF_OBJECT)
1197  {
1198  // Found endobj and only object parsing was requested, we're done.
1199  return true;
1200  }
1201 
1202  if (pObjectStream)
1203  {
1204  // We're at the end of an object stream, parse the stored objects.
1205  pObjectStream->ParseStoredObjects();
1206  pObjectStream = nullptr;
1207  pObjectKey = nullptr;
1208  }
1209  bInObject = false;
1210  }
1211  else if (aKeyword == "true" || aKeyword == "false")
1212  rElements.push_back(std::unique_ptr<PDFElement>(
1213  new PDFBooleanElement(aKeyword.toBoolean())));
1214  else if (aKeyword == "null")
1215  rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1216  else if (aKeyword == "xref")
1217  // Allow 'f' and 'n' keywords.
1218  bInXRef = true;
1219  else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1220  {
1221  }
1222  else if (aKeyword == "trailer")
1223  {
1224  auto pTrailer = new PDFTrailerElement(*this);
1225 
1226  // Make it possible to find this trailer later by offset.
1227  pTrailer->Read(rStream);
1228  m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1229 
1230  // When reading till the first EOF token only, remember
1231  // just the first trailer token.
1232  if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1233  m_pTrailer = pTrailer;
1234  rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1235  }
1236  else if (aKeyword == "startxref")
1237  {
1238  bInStartXRef = true;
1239  }
1240  else
1241  {
1242  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1243  << aKeyword << "' keyword at byte position "
1244  << rStream.Tell());
1245  return false;
1246  }
1247  }
1248  else
1249  {
1250  if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1251  {
1252  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: "
1253  << ch << " at byte position " << rStream.Tell());
1254  return false;
1255  }
1256  }
1257  break;
1258  }
1259  }
1260  }
1261 
1262  return true;
1263 }
1264 
1265 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1266 {
1267  m_aIDObjects[nID] = pObject;
1268 }
1269 
1270 bool PDFDocument::Read(SvStream& rStream)
1271 {
1272  // Check file magic.
1273  std::vector<sal_Int8> aHeader(5);
1274  rStream.Seek(0);
1275  rStream.ReadBytes(aHeader.data(), aHeader.size());
1276  if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1277  || aHeader[4] != '-')
1278  {
1279  SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1280  return false;
1281  }
1282 
1283  // Allow later editing of the contents in-memory.
1284  rStream.Seek(0);
1285  m_aEditBuffer.WriteStream(rStream);
1286 
1287  // Look up the offset of the xref table.
1288  size_t nStartXRef = FindStartXRef(rStream);
1289  SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1290  if (nStartXRef == 0)
1291  {
1292  SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1293  return false;
1294  }
1295  while (true)
1296  {
1297  rStream.Seek(nStartXRef);
1298  OString aKeyword = ReadKeyword(rStream);
1299  if (aKeyword.isEmpty())
1300  ReadXRefStream(rStream);
1301 
1302  else
1303  {
1304  if (aKeyword != "xref")
1305  {
1306  SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1307  return false;
1308  }
1309  ReadXRef(rStream);
1310  if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1311  {
1312  SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1313  return false;
1314  }
1315  }
1316 
1317  PDFNumberElement* pPrev = nullptr;
1318  if (m_pTrailer)
1319  {
1320  pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1321 
1322  // Remember the offset of this trailer in the correct order. It's
1323  // possible that newer trailers don't have a larger offset.
1324  m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1325  }
1326  else if (m_pXRefStream)
1327  pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1328  if (pPrev)
1329  nStartXRef = pPrev->GetValue();
1330 
1331  // Reset state, except the edit buffer.
1332  m_aElements.clear();
1333  m_aOffsetObjects.clear();
1334  m_aIDObjects.clear();
1335  m_aStartXRefs.clear();
1336  m_aEOFs.clear();
1337  m_pTrailer = nullptr;
1338  m_pXRefStream = nullptr;
1339  if (!pPrev)
1340  break;
1341  }
1342 
1343  // Then we can tokenize the stream.
1344  rStream.Seek(0);
1345  return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1346 }
1347 
1348 OString PDFDocument::ReadKeyword(SvStream& rStream)
1349 {
1350  OStringBuffer aBuf;
1351  char ch;
1352  rStream.ReadChar(ch);
1353  if (rStream.eof())
1354  return OString();
1355  while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1356  {
1357  aBuf.append(ch);
1358  rStream.ReadChar(ch);
1359  if (rStream.eof())
1360  return aBuf.toString();
1361  }
1362  rStream.SeekRel(-1);
1363  return aBuf.toString();
1364 }
1365 
1366 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1367 {
1368  // Find the "startxref" token, somewhere near the end of the document.
1369  std::vector<char> aBuf(1024);
1370  rStream.Seek(STREAM_SEEK_TO_END);
1371  if (rStream.Tell() > aBuf.size())
1372  rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1373  else
1374  // The document is really short, then just read it from the start.
1375  rStream.Seek(0);
1376  size_t nBeforePeek = rStream.Tell();
1377  size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1378  rStream.Seek(nBeforePeek);
1379  if (nSize != aBuf.size())
1380  aBuf.resize(nSize);
1381  OString aPrefix("startxref");
1382  // Find the last startxref at the end of the document.
1383  auto itLastValid = aBuf.end();
1384  auto it = aBuf.begin();
1385  while (true)
1386  {
1387  it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1388  if (it == aBuf.end())
1389  break;
1390 
1391  itLastValid = it;
1392  ++it;
1393  }
1394  if (itLastValid == aBuf.end())
1395  {
1396  SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1397  return 0;
1398  }
1399 
1400  rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1401  if (rStream.eof())
1402  {
1403  SAL_WARN("vcl.filter",
1404  "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1405  return 0;
1406  }
1407 
1408  PDFDocument::SkipWhitespace(rStream);
1409  PDFNumberElement aNumber;
1410  if (!aNumber.Read(rStream))
1411  return 0;
1412  return aNumber.GetValue();
1413 }
1414 
1415 void PDFDocument::ReadXRefStream(SvStream& rStream)
1416 {
1417  // Look up the stream length in the object dictionary.
1418  if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1419  {
1420  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1421  return;
1422  }
1423 
1424  if (m_aElements.empty())
1425  {
1426  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1427  return;
1428  }
1429 
1430  PDFObjectElement* pObject = nullptr;
1431  for (const auto& pElement : m_aElements)
1432  {
1433  if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1434  {
1435  pObject = pObj;
1436  break;
1437  }
1438  }
1439  if (!pObject)
1440  {
1441  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1442  return;
1443  }
1444 
1445  // So that the Prev key can be looked up later.
1446  m_pXRefStream = pObject;
1447 
1448  PDFElement* pLookup = pObject->Lookup("Length");
1449  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1450  if (!pNumber)
1451  {
1452  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1453  return;
1454  }
1455  sal_uInt64 nLength = pNumber->GetValue();
1456 
1457  // Look up the stream offset.
1458  PDFStreamElement* pStream = nullptr;
1459  for (const auto& pElement : m_aElements)
1460  {
1461  if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1462  {
1463  pStream = pS;
1464  break;
1465  }
1466  }
1467  if (!pStream)
1468  {
1469  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1470  return;
1471  }
1472 
1473  // Read and decompress it.
1474  rStream.Seek(pStream->GetOffset());
1475  std::vector<char> aBuf(nLength);
1476  rStream.ReadBytes(aBuf.data(), aBuf.size());
1477 
1478  auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1479  if (!pFilter)
1480  {
1481  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1482  return;
1483  }
1484 
1485  if (pFilter->GetValue() != "FlateDecode")
1486  {
1487  SAL_WARN("vcl.filter",
1488  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1489  return;
1490  }
1491 
1492  int nColumns = 1;
1493  int nPredictor = 1;
1494  if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1495  {
1496  const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1497  auto it = rItems.find("Columns");
1498  if (it != rItems.end())
1499  if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1500  nColumns = pColumns->GetValue();
1501  it = rItems.find("Predictor");
1502  if (it != rItems.end())
1503  if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1504  nPredictor = pPredictor->GetValue();
1505  }
1506 
1507  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1508  SvMemoryStream aStream;
1509  ZCodec aZCodec;
1510  aZCodec.BeginCompression();
1511  aZCodec.Decompress(aSource, aStream);
1512  if (!aZCodec.EndCompression())
1513  {
1514  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1515  return;
1516  }
1517 
1518  // Look up the first and the last entry we need to read.
1519  auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1520  std::vector<size_t> aFirstObjects;
1521  std::vector<size_t> aNumberOfObjects;
1522  if (!pIndex)
1523  {
1524  auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1525  if (pSize)
1526  {
1527  aFirstObjects.push_back(0);
1528  aNumberOfObjects.push_back(pSize->GetValue());
1529  }
1530  else
1531  {
1532  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1533  return;
1534  }
1535  }
1536  else
1537  {
1538  const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1539  size_t nFirstObject = 0;
1540  for (size_t i = 0; i < rIndexElements.size(); ++i)
1541  {
1542  if (i % 2 == 0)
1543  {
1544  auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1545  if (!pFirstObject)
1546  {
1547  SAL_WARN("vcl.filter",
1548  "PDFDocument::ReadXRefStream: Index has no first object");
1549  return;
1550  }
1551  nFirstObject = pFirstObject->GetValue();
1552  continue;
1553  }
1554 
1555  auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1556  if (!pNumberOfObjects)
1557  {
1558  SAL_WARN("vcl.filter",
1559  "PDFDocument::ReadXRefStream: Index has no number of objects");
1560  return;
1561  }
1562  aFirstObjects.push_back(nFirstObject);
1563  aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1564  }
1565  }
1566 
1567  // Look up the format of a single entry.
1568  const int nWSize = 3;
1569  auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1570  if (!pW || pW->GetElements().size() < nWSize)
1571  {
1572  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1573  return;
1574  }
1575  int aW[nWSize];
1576  // First character is the (kind of) repeated predictor.
1577  int nLineLength = 1;
1578  for (size_t i = 0; i < nWSize; ++i)
1579  {
1580  auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1581  if (!pI)
1582  {
1583  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1584  return;
1585  }
1586  aW[i] = pI->GetValue();
1587  nLineLength += aW[i];
1588  }
1589 
1590  if (nPredictor > 1 && nLineLength - 1 != nColumns)
1591  {
1592  SAL_WARN("vcl.filter",
1593  "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1594  return;
1595  }
1596 
1597  aStream.Seek(0);
1598  for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1599  {
1600  size_t nFirstObject = aFirstObjects[nSubSection];
1601  size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1602 
1603  // This is the line as read from the stream.
1604  std::vector<unsigned char> aOrigLine(nLineLength);
1605  // This is the line as it appears after tweaking according to nPredictor.
1606  std::vector<unsigned char> aFilteredLine(nLineLength);
1607  for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1608  {
1609  size_t nIndex = nFirstObject + nEntry;
1610 
1611  aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1612  if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1613  {
1614  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1615  "inconsistent with /DecodeParms/Predictor for object #"
1616  << nIndex);
1617  return;
1618  }
1619 
1620  for (int i = 0; i < nLineLength; ++i)
1621  {
1622  switch (nPredictor)
1623  {
1624  case 1:
1625  // No prediction.
1626  break;
1627  case 12:
1628  // PNG prediction: up (on all rows).
1629  aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1630  break;
1631  default:
1632  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1633  << nPredictor);
1634  return;
1635  break;
1636  }
1637  }
1638 
1639  // First character is already handled above.
1640  int nPos = 1;
1641  size_t nType = 0;
1642  // Start of the current field in the stream data.
1643  int nOffset = nPos;
1644  for (; nPos < nOffset + aW[0]; ++nPos)
1645  {
1646  unsigned char nCh = aFilteredLine[nPos];
1647  nType = (nType << 8) + nCh;
1648  }
1649 
1650  // Start of the object in the file stream.
1651  size_t nStreamOffset = 0;
1652  nOffset = nPos;
1653  for (; nPos < nOffset + aW[1]; ++nPos)
1654  {
1655  unsigned char nCh = aFilteredLine[nPos];
1656  nStreamOffset = (nStreamOffset << 8) + nCh;
1657  }
1658 
1659  // Generation number of the object.
1660  size_t nGenerationNumber = 0;
1661  nOffset = nPos;
1662  for (; nPos < nOffset + aW[2]; ++nPos)
1663  {
1664  unsigned char nCh = aFilteredLine[nPos];
1665  nGenerationNumber = (nGenerationNumber << 8) + nCh;
1666  }
1667 
1668  // Ignore invalid nType.
1669  if (nType <= 2)
1670  {
1671  if (m_aXRef.find(nIndex) == m_aXRef.end())
1672  {
1673  XRefEntry aEntry;
1674  switch (nType)
1675  {
1676  case 0:
1677  aEntry.SetType(XRefEntryType::FREE);
1678  break;
1679  case 1:
1680  aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
1681  break;
1682  case 2:
1683  aEntry.SetType(XRefEntryType::COMPRESSED);
1684  break;
1685  }
1686  aEntry.SetOffset(nStreamOffset);
1687  m_aXRef[nIndex] = aEntry;
1688  }
1689  }
1690  }
1691  }
1692 }
1693 
1694 void PDFDocument::ReadXRef(SvStream& rStream)
1695 {
1696  PDFDocument::SkipWhitespace(rStream);
1697 
1698  while (true)
1699  {
1700  PDFNumberElement aFirstObject;
1701  if (!aFirstObject.Read(rStream))
1702  {
1703  // Next token is not a number, it'll be the trailer.
1704  return;
1705  }
1706 
1707  if (aFirstObject.GetValue() < 0)
1708  {
1709  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1710  return;
1711  }
1712 
1713  PDFDocument::SkipWhitespace(rStream);
1714  PDFNumberElement aNumberOfEntries;
1715  if (!aNumberOfEntries.Read(rStream))
1716  {
1717  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1718  return;
1719  }
1720 
1721  if (aNumberOfEntries.GetValue() < 0)
1722  {
1723  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1724  return;
1725  }
1726 
1727  size_t nSize = aNumberOfEntries.GetValue();
1728  for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1729  {
1730  size_t nIndex = aFirstObject.GetValue() + nEntry;
1731  PDFDocument::SkipWhitespace(rStream);
1732  PDFNumberElement aOffset;
1733  if (!aOffset.Read(rStream))
1734  {
1735  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1736  return;
1737  }
1738 
1739  PDFDocument::SkipWhitespace(rStream);
1740  PDFNumberElement aGenerationNumber;
1741  if (!aGenerationNumber.Read(rStream))
1742  {
1743  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1744  return;
1745  }
1746 
1747  PDFDocument::SkipWhitespace(rStream);
1748  OString aKeyword = ReadKeyword(rStream);
1749  if (aKeyword != "f" && aKeyword != "n")
1750  {
1751  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1752  return;
1753  }
1754  // xrefs are read in reverse order, so never update an existing
1755  // offset with an older one.
1756  if (m_aXRef.find(nIndex) == m_aXRef.end())
1757  {
1758  XRefEntry aEntry;
1759  aEntry.SetOffset(aOffset.GetValue());
1760  // Initially only the first entry is dirty.
1761  if (nIndex == 0)
1762  aEntry.SetDirty(true);
1763  m_aXRef[nIndex] = aEntry;
1764  }
1765  PDFDocument::SkipWhitespace(rStream);
1766  }
1767  }
1768 }
1769 
1770 void PDFDocument::SkipWhitespace(SvStream& rStream)
1771 {
1772  char ch = 0;
1773 
1774  while (true)
1775  {
1776  rStream.ReadChar(ch);
1777  if (rStream.eof())
1778  break;
1779 
1780  if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1781  {
1782  rStream.SeekRel(-1);
1783  return;
1784  }
1785  }
1786 }
1787 
1788 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1789 {
1790  char ch = 0;
1791 
1792  while (true)
1793  {
1794  rStream.ReadChar(ch);
1795  if (rStream.eof())
1796  break;
1797 
1798  if (ch != '\n' && ch != '\r')
1799  {
1800  rStream.SeekRel(-1);
1801  return;
1802  }
1803  }
1804 }
1805 
1806 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1807 {
1808  auto it = m_aXRef.find(nIndex);
1809  if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1810  {
1811  SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1812  << nIndex << ", but failed");
1813  return 0;
1814  }
1815 
1816  return it->second.GetOffset();
1817 }
1818 
1819 const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1820 {
1821  return m_aElements;
1822 }
1823 
1825 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1826 {
1827  auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1828  if (!pKids)
1829  {
1830  SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1831  return;
1832  }
1833 
1834  pPages->setVisiting(true);
1835 
1836  for (const auto& pKid : pKids->GetElements())
1837  {
1838  auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1839  if (!pReference)
1840  continue;
1841 
1842  PDFObjectElement* pKidObject = pReference->LookupObject();
1843  if (!pKidObject)
1844  continue;
1845 
1846  // detect if visiting reenters itself
1847  if (pKidObject->alreadyVisiting())
1848  {
1849  SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1850  continue;
1851  }
1852 
1853  auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1854  if (pName && pName->GetValue() == "Pages")
1855  // Pages inside pages: recurse.
1856  visitPages(pKidObject, rRet);
1857  else
1858  // Found an actual page.
1859  rRet.push_back(pKidObject);
1860  }
1861 
1862  pPages->setVisiting(false);
1863 }
1864 
1865 std::vector<PDFObjectElement*> PDFDocument::GetPages()
1866 {
1867  std::vector<PDFObjectElement*> aRet;
1868 
1869  PDFReferenceElement* pRoot = nullptr;
1870 
1871  PDFTrailerElement* pTrailer = nullptr;
1872  if (!m_aTrailerOffsets.empty())
1873  {
1874  // Get access to the latest trailer, and work with the keys of that
1875  // one.
1876  auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1877  if (it != m_aOffsetTrailers.end())
1878  pTrailer = it->second;
1879  }
1880 
1881  if (pTrailer)
1882  pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1883  else if (m_pXRefStream)
1884  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1885 
1886  if (!pRoot)
1887  {
1888  SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no Root key");
1889  return aRet;
1890  }
1891 
1892  PDFObjectElement* pCatalog = pRoot->LookupObject();
1893  if (!pCatalog)
1894  {
1895  SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1896  return aRet;
1897  }
1898 
1899  PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1900  if (!pPages)
1901  {
1902  SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
1903  << ") has no pages");
1904  return aRet;
1905  }
1906 
1907  visitPages(pPages, aRet);
1908 
1909  return aRet;
1910 }
1911 
1912 void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
1913 
1914 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
1915 {
1916  std::vector<PDFObjectElement*> aRet;
1917 
1918  std::vector<PDFObjectElement*> aPages = GetPages();
1919 
1920  for (const auto& pPage : aPages)
1921  {
1922  if (!pPage)
1923  continue;
1924 
1925  PDFElement* pAnnotsElement = pPage->Lookup("Annots");
1926  auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
1927  if (!pAnnots)
1928  {
1929  // Annots is not an array, see if it's a reference to an object
1930  // with a direct array.
1931  auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
1932  if (pAnnotsRef)
1933  {
1934  if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
1935  {
1936  pAnnots = pAnnotsObject->GetArray();
1937  }
1938  }
1939  }
1940 
1941  if (!pAnnots)
1942  continue;
1943 
1944  for (const auto& pAnnot : pAnnots->GetElements())
1945  {
1946  auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
1947  if (!pReference)
1948  continue;
1949 
1950  PDFObjectElement* pAnnotObject = pReference->LookupObject();
1951  if (!pAnnotObject)
1952  continue;
1953 
1954  auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
1955  if (!pFT || pFT->GetValue() != "Sig")
1956  continue;
1957 
1958  aRet.push_back(pAnnotObject);
1959  }
1960  }
1961 
1962  return aRet;
1963 }
1964 
1965 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
1966 {
1967  return svl::crypto::DecodeHexString(pElement->GetValue());
1968 }
1969 
1970 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
1971  : m_rDoc(rDoc)
1972 {
1973 }
1974 
1975 bool PDFCommentElement::Read(SvStream& rStream)
1976 {
1977  // Read from (including) the % char till (excluding) the end of the line/stream.
1978  OStringBuffer aBuf;
1979  char ch;
1980  rStream.ReadChar(ch);
1981  while (true)
1982  {
1983  if (ch == '\n' || ch == '\r' || rStream.eof())
1984  {
1985  m_aComment = aBuf.makeStringAndClear();
1986 
1987  if (m_aComment.startsWith("%%EOF"))
1988  m_rDoc.PushBackEOF(rStream.Tell());
1989 
1990  SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
1991  return true;
1992  }
1993  aBuf.append(ch);
1994  rStream.ReadChar(ch);
1995  }
1996 
1997  return false;
1998 }
1999 
2001 
2003 {
2004  OStringBuffer aBuf;
2005  m_nOffset = rStream.Tell();
2006  char ch;
2007  rStream.ReadChar(ch);
2008  if (rStream.eof())
2009  {
2010  return false;
2011  }
2012  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2013  {
2014  rStream.SeekRel(-1);
2015  return false;
2016  }
2017  while (!rStream.eof())
2018  {
2019  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2020  {
2021  rStream.SeekRel(-1);
2022  m_nLength = rStream.Tell() - m_nOffset;
2023  m_fValue = aBuf.makeStringAndClear().toDouble();
2024  SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2025  return true;
2026  }
2027  aBuf.append(ch);
2028  rStream.ReadChar(ch);
2029  }
2030 
2031  return false;
2032 }
2033 
2034 sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2035 
2036 sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2037 
2038 PDFBooleanElement::PDFBooleanElement(bool /*bValue*/) {}
2039 
2040 bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2041 
2042 bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2043 
2045 {
2046  char ch;
2047  rStream.ReadChar(ch);
2048  if (ch != '<')
2049  {
2050  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2051  return false;
2052  }
2053  rStream.ReadChar(ch);
2054 
2055  OStringBuffer aBuf;
2056  while (!rStream.eof())
2057  {
2058  if (ch == '>')
2059  {
2060  m_aValue = aBuf.makeStringAndClear();
2061  SAL_INFO("vcl.filter",
2062  "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2063  return true;
2064  }
2065  aBuf.append(ch);
2066  rStream.ReadChar(ch);
2067  }
2068 
2069  return false;
2070 }
2071 
2072 const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2073 
2075 {
2076  char nPrevCh = 0;
2077  char ch = 0;
2078  rStream.ReadChar(ch);
2079  if (ch != '(')
2080  {
2081  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2082  return false;
2083  }
2084  nPrevCh = ch;
2085  rStream.ReadChar(ch);
2086 
2087  // Start with 1 nesting level as we read a '(' above already.
2088  int nDepth = 1;
2089  OStringBuffer aBuf;
2090  while (!rStream.eof())
2091  {
2092  if (ch == '(' && nPrevCh != '\\')
2093  ++nDepth;
2094 
2095  if (ch == ')' && nPrevCh != '\\')
2096  --nDepth;
2097 
2098  if (nDepth == 0)
2099  {
2100  // ')' of the outermost '(' is reached.
2101  m_aValue = aBuf.makeStringAndClear();
2102  SAL_INFO("vcl.filter",
2103  "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2104  return true;
2105  }
2106  aBuf.append(ch);
2107  nPrevCh = ch;
2108  rStream.ReadChar(ch);
2109  }
2110 
2111  return false;
2112 }
2113 
2114 const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2115 
2117  : m_rDoc(rDoc)
2118 {
2119 }
2120 
2122 {
2123  m_nOffset = rStream.Tell();
2124  return true;
2125 }
2126 
2127 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2128 {
2129  if (m_aDictionary.empty())
2131 
2132  return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2133 }
2134 
2135 sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2136 
2137 double PDFNumberElement::GetValue() const { return m_fValue; }
2138 
2139 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2140  : m_rDoc(rDoc)
2141  , m_fObjectValue(fObjectValue)
2142  , m_fGenerationValue(fGenerationValue)
2143  , m_pNumberElement(nullptr)
2144  , m_nDictionaryOffset(0)
2145  , m_nDictionaryLength(0)
2146  , m_pDictionaryElement(nullptr)
2147  , m_nArrayOffset(0)
2148  , m_nArrayLength(0)
2149  , m_pArrayElement(nullptr)
2150  , m_pStreamElement(nullptr)
2151 {
2152 }
2153 
2155 {
2156  SAL_INFO("vcl.filter",
2157  "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2158  return true;
2159 }
2160 
2162 
2163 size_t PDFDictionaryElement::Parse(const std::vector<std::unique_ptr<PDFElement>>& rElements,
2164  PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary)
2165 {
2166  // The index of last parsed element, in case of nested dictionaries.
2167  size_t nRet = 0;
2168 
2169  if (!rDictionary.empty())
2170  return nRet;
2171 
2172  pThis->setParsing(true);
2173 
2174  auto pThisObject = dynamic_cast<PDFObjectElement*>(pThis);
2175  // This is set to non-nullptr here for nested dictionaries only.
2176  auto pThisDictionary = dynamic_cast<PDFDictionaryElement*>(pThis);
2177 
2178  // Find out where the dictionary for this object starts.
2179  size_t nIndex = 0;
2180  for (size_t i = 0; i < rElements.size(); ++i)
2181  {
2182  if (rElements[i].get() == pThis)
2183  {
2184  nIndex = i;
2185  break;
2186  }
2187  }
2188 
2189  OString aName;
2190  sal_uInt64 nNameOffset = 0;
2191  std::vector<PDFNumberElement*> aNumbers;
2192  // The array value we're in -- if any.
2193  PDFArrayElement* pArray = nullptr;
2194  sal_uInt64 nDictionaryOffset = 0;
2195  int nDictionaryDepth = 0;
2196  // Toplevel dictionary found (not inside an array).
2197  bool bDictionaryFound = false;
2198  // Toplevel array found (not inside a dictionary).
2199  bool bArrayFound = false;
2200  for (size_t i = nIndex; i < rElements.size(); ++i)
2201  {
2202  // Dictionary tokens can be nested, track enter/leave.
2203  if (auto pDictionary = dynamic_cast<PDFDictionaryElement*>(rElements[i].get()))
2204  {
2205  bDictionaryFound = true;
2206  if (++nDictionaryDepth == 1)
2207  {
2208  // First dictionary start, track start offset.
2209  nDictionaryOffset = pDictionary->m_nLocation;
2210  if (pThisObject)
2211  {
2212  if (!bArrayFound)
2213  // Then the toplevel dictionary of the object.
2214  pThisObject->SetDictionary(pDictionary);
2215  pThisDictionary = pDictionary;
2216  pThisObject->SetDictionaryOffset(nDictionaryOffset);
2217  }
2218  }
2219  else if (!pDictionary->alreadyParsing())
2220  {
2221  // Nested dictionary.
2222  const size_t nexti
2223  = PDFDictionaryElement::Parse(rElements, pDictionary, pDictionary->m_aItems);
2224  if (nexti >= i) // ensure we go forwards and not endlessly loop
2225  {
2226  i = nexti;
2227  rDictionary[aName] = pDictionary;
2228  aName.clear();
2229  }
2230  }
2231  }
2232 
2233  if (auto pEndDictionary = dynamic_cast<PDFEndDictionaryElement*>(rElements[i].get()))
2234  {
2235  if (--nDictionaryDepth == 0)
2236  {
2237  // Last dictionary end, track length and stop parsing.
2238  if (pThisObject)
2239  pThisObject->SetDictionaryLength(pEndDictionary->GetLocation()
2240  - nDictionaryOffset);
2241  nRet = i;
2242  break;
2243  }
2244  }
2245 
2246  auto pName = dynamic_cast<PDFNameElement*>(rElements[i].get());
2247  if (pName)
2248  {
2249  if (!aNumbers.empty())
2250  {
2251  PDFNumberElement* pNumber = aNumbers.back();
2252  rDictionary[aName] = pNumber;
2253  if (pThisDictionary)
2254  {
2255  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2256  pThisDictionary->SetKeyValueLength(
2257  aName, pNumber->GetLocation() + pNumber->GetLength() - nNameOffset);
2258  }
2259  aName.clear();
2260  aNumbers.clear();
2261  }
2262 
2263  if (aName.isEmpty())
2264  {
2265  // Remember key.
2266  aName = pName->GetValue();
2267  nNameOffset = pName->GetLocation();
2268  }
2269  else
2270  {
2271  if (pArray)
2272  {
2273  if (bDictionaryFound)
2274  // Array inside dictionary.
2275  pArray->PushBack(pName);
2276  }
2277  else
2278  {
2279  // Name-name key-value.
2280  rDictionary[aName] = pName;
2281  if (pThisDictionary)
2282  {
2283  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2284  pThisDictionary->SetKeyValueLength(aName, pName->GetLocation()
2286  - nNameOffset);
2287  }
2288  aName.clear();
2289  }
2290  }
2291  continue;
2292  }
2293 
2294  auto pArr = dynamic_cast<PDFArrayElement*>(rElements[i].get());
2295  if (pArr)
2296  {
2297  bArrayFound = true;
2298  pArray = pArr;
2299  continue;
2300  }
2301 
2302  auto pEndArr = dynamic_cast<PDFEndArrayElement*>(rElements[i].get());
2303  if (pArray && pEndArr)
2304  {
2305  for (auto& pNumber : aNumbers)
2306  pArray->PushBack(pNumber);
2307  aNumbers.clear();
2308  rDictionary[aName] = pArray;
2309  if (pThisDictionary)
2310  {
2311  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2312  // Include the ending ']' in the length of the key - (array)value pair length.
2313  pThisDictionary->SetKeyValueLength(aName, pEndArr->GetOffset() - nNameOffset + 1);
2314  }
2315  aName.clear();
2316  pArray = nullptr;
2317  continue;
2318  }
2319 
2320  auto pReference = dynamic_cast<PDFReferenceElement*>(rElements[i].get());
2321  if (pReference)
2322  {
2323  if (!pArray)
2324  {
2325  rDictionary[aName] = pReference;
2326  if (pThisDictionary)
2327  {
2328  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2329  pThisDictionary->SetKeyValueLength(aName,
2330  pReference->GetOffset() - nNameOffset);
2331  }
2332  aName.clear();
2333  }
2334  else
2335  {
2336  if (bDictionaryFound)
2337  // Array inside dictionary.
2338  pArray->PushBack(pReference);
2339  }
2340  aNumbers.clear();
2341  continue;
2342  }
2343 
2344  auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(rElements[i].get());
2345  if (pLiteralString)
2346  {
2347  rDictionary[aName] = pLiteralString;
2348  if (pThisDictionary)
2349  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2350  aName.clear();
2351  continue;
2352  }
2353 
2354  auto pBoolean = dynamic_cast<PDFBooleanElement*>(rElements[i].get());
2355  if (pBoolean)
2356  {
2357  rDictionary[aName] = pBoolean;
2358  if (pThisDictionary)
2359  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2360  aName.clear();
2361  continue;
2362  }
2363 
2364  auto pHexString = dynamic_cast<PDFHexStringElement*>(rElements[i].get());
2365  if (pHexString)
2366  {
2367  if (!pArray)
2368  {
2369  rDictionary[aName] = pHexString;
2370  if (pThisDictionary)
2371  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2372  aName.clear();
2373  }
2374  else
2375  {
2376  pArray->PushBack(pHexString);
2377  }
2378  continue;
2379  }
2380 
2381  if (dynamic_cast<PDFEndObjectElement*>(rElements[i].get()))
2382  break;
2383 
2384  // Just remember this, so that in case it's not a reference parameter,
2385  // we can handle it later.
2386  auto pNumber = dynamic_cast<PDFNumberElement*>(rElements[i].get());
2387  if (pNumber)
2388  aNumbers.push_back(pNumber);
2389  }
2390 
2391  if (!aNumbers.empty())
2392  {
2393  rDictionary[aName] = aNumbers.back();
2394  if (pThisDictionary)
2395  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2396  aName.clear();
2397  aNumbers.clear();
2398  }
2399 
2400  pThis->setParsing(false);
2401 
2402  return nRet;
2403 }
2404 
2405 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2406  const OString& rKey)
2407 {
2408  auto it = rDictionary.find(rKey);
2409  if (it == rDictionary.end())
2410  return nullptr;
2411 
2412  return it->second;
2413 }
2414 
2416 {
2417  auto pKey = dynamic_cast<PDFReferenceElement*>(
2418  PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2419  if (!pKey)
2420  {
2421  SAL_WARN("vcl.filter",
2422  "PDFDictionaryElement::LookupObject: no such key with reference value: "
2423  << rDictionaryKey);
2424  return nullptr;
2425  }
2426 
2427  return pKey->LookupObject();
2428 }
2429 
2430 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2431 {
2432  return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2433 }
2434 
2435 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2436 {
2437  if (m_aDictionary.empty())
2438  {
2439  if (!m_aElements.empty())
2440  // This is a stored object in an object stream.
2442  else
2443  // Normal object: elements are stored as members of the document itself.
2445  }
2446 
2447  return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2448 }
2449 
2450 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2451 {
2452  auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2453  if (!pKey)
2454  {
2455  SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2456  << rDictionaryKey);
2457  return nullptr;
2458  }
2459 
2460  return pKey->LookupObject();
2461 }
2462 
2464 
2465 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2466 {
2467  m_nDictionaryOffset = nDictionaryOffset;
2468 }
2469 
2471 {
2472  if (m_aDictionary.empty())
2474 
2475  return m_nDictionaryOffset;
2476 }
2477 
2478 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2479 
2480 sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
2481 
2482 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2483 {
2484  m_aDictionaryKeyOffset[rKey] = nOffset;
2485 }
2486 
2487 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2488 {
2489  m_aDictionaryKeyValueLength[rKey] = nLength;
2490 }
2491 
2492 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2493 {
2494  auto it = m_aDictionaryKeyOffset.find(rKey);
2495  if (it == m_aDictionaryKeyOffset.end())
2496  return 0;
2497 
2498  return it->second;
2499 }
2500 
2501 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2502 {
2503  auto it = m_aDictionaryKeyValueLength.find(rKey);
2504  if (it == m_aDictionaryKeyValueLength.end())
2505  return 0;
2506 
2507  return it->second;
2508 }
2509 
2510 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2511 
2512 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2513 {
2514  m_nDictionaryLength = nDictionaryLength;
2515 }
2516 
2518 {
2519  if (m_aDictionary.empty())
2521 
2522  return m_nDictionaryLength;
2523 }
2524 
2525 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2526 
2527 sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
2528 
2530 {
2531  if (m_aDictionary.empty())
2533  return m_pDictionaryElement;
2534 }
2535 
2537 {
2538  m_pDictionaryElement = pDictionaryElement;
2539 }
2540 
2542 {
2543  m_pNumberElement = pNumberElement;
2544 }
2545 
2547 
2548 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2549 {
2550  return m_aDictionaryReferences;
2551 }
2552 
2554 {
2555  m_aDictionaryReferences.push_back(pReference);
2556 }
2557 
2558 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2559 {
2560  if (m_aDictionary.empty())
2562 
2563  return m_aDictionary;
2564 }
2565 
2566 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2567 
2569 {
2570  m_pStreamElement = pStreamElement;
2571 }
2572 
2574 
2576 
2578 {
2579  if (!m_pStreamElement)
2580  {
2581  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2582  return;
2583  }
2584 
2585  auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2586  if (!pType || pType->GetValue() != "ObjStm")
2587  {
2588  if (!pType)
2589  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2590  else
2591  SAL_WARN("vcl.filter",
2592  "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2593  return;
2594  }
2595 
2596  auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2597  if (!pFilter || pFilter->GetValue() != "FlateDecode")
2598  {
2599  if (!pFilter)
2600  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2601  else
2602  SAL_WARN("vcl.filter",
2603  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2604  return;
2605  }
2606 
2607  auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2608  if (!pFirst)
2609  {
2610  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2611  return;
2612  }
2613 
2614  auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2615  if (!pN)
2616  {
2617  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2618  return;
2619  }
2620  size_t nN = pN->GetValue();
2621 
2622  auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2623  if (!pLength)
2624  {
2625  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2626  return;
2627  }
2628  size_t nLength = pLength->GetValue();
2629 
2630  // Read and decompress it.
2631  SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2632  rEditBuffer.Seek(m_pStreamElement->GetOffset());
2633  std::vector<char> aBuf(nLength);
2634  rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2635  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2636  SvMemoryStream aStream;
2637  ZCodec aZCodec;
2638  aZCodec.BeginCompression();
2639  aZCodec.Decompress(aSource, aStream);
2640  if (!aZCodec.EndCompression())
2641  {
2642  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2643  return;
2644  }
2645 
2646  nLength = aStream.TellEnd();
2647  aStream.Seek(0);
2648  std::vector<size_t> aObjNums;
2649  std::vector<size_t> aOffsets;
2650  std::vector<size_t> aLengths;
2651  // First iterate over and find out the lengths.
2652  for (size_t nObject = 0; nObject < nN; ++nObject)
2653  {
2654  PDFNumberElement aObjNum;
2655  if (!aObjNum.Read(aStream))
2656  {
2657  SAL_WARN("vcl.filter",
2658  "PDFObjectElement::ParseStoredObjects: failed to read object number");
2659  return;
2660  }
2661  aObjNums.push_back(aObjNum.GetValue());
2662 
2663  PDFDocument::SkipWhitespace(aStream);
2664 
2665  PDFNumberElement aByteOffset;
2666  if (!aByteOffset.Read(aStream))
2667  {
2668  SAL_WARN("vcl.filter",
2669  "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2670  return;
2671  }
2672  aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2673 
2674  if (aOffsets.size() > 1)
2675  aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2676  if (nObject + 1 == nN)
2677  aLengths.push_back(nLength - aOffsets.back());
2678 
2679  PDFDocument::SkipWhitespace(aStream);
2680  }
2681 
2682  // Now create streams with the proper length and tokenize the data.
2683  for (size_t nObject = 0; nObject < nN; ++nObject)
2684  {
2685  size_t nObjNum = aObjNums[nObject];
2686  size_t nOffset = aOffsets[nObject];
2687  size_t nLen = aLengths[nObject];
2688 
2689  aStream.Seek(nOffset);
2690  m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2691  PDFObjectElement* pStored = m_aStoredElements.back().get();
2692 
2693  aBuf.clear();
2694  aBuf.resize(nLen);
2695  aStream.ReadBytes(aBuf.data(), aBuf.size());
2696  SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2697 
2698  m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
2699  pStored);
2700  // This is how references know the object is stored inside this object stream.
2701  m_rDoc.SetIDObject(nObjNum, pStored);
2702 
2703  // Store the stream of the object in the object stream for later use.
2704  std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2705  aStoredStream.Seek(0);
2706  pStreamBuffer->WriteStream(aStoredStream);
2707  pStored->SetStreamBuffer(pStreamBuffer);
2708  }
2709 }
2710 
2711 std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2712 {
2713  return m_aElements;
2714 }
2715 
2717 
2718 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2719 {
2720  m_pStreamBuffer = std::move(pStreamBuffer);
2721 }
2722 
2724 
2726  PDFNumberElement const& rGeneration)
2727  : m_rDoc(rDoc)
2728  , m_fObjectValue(rObject.GetValue())
2729  , m_fGenerationValue(rGeneration.GetValue())
2730  , m_rObject(rObject)
2731 {
2732 }
2733 
2735 
2737 {
2738  SAL_INFO("vcl.filter",
2739  "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2740  m_nOffset = rStream.Tell();
2741  return true;
2742 }
2743 
2744 sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2745 
2747 {
2748  size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2749  if (nOffset == 0)
2750  {
2751  SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2752  << m_fObjectValue);
2753  return 0;
2754  }
2755 
2756  sal_uInt64 nOrigPos = rStream.Tell();
2757  comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2758 
2759  rStream.Seek(nOffset);
2760  {
2761  PDFDocument::SkipWhitespace(rStream);
2762  PDFNumberElement aNumber;
2763  bool bRet = aNumber.Read(rStream);
2764  if (!bRet || aNumber.GetValue() != m_fObjectValue)
2765  {
2766  SAL_WARN("vcl.filter",
2767  "PDFReferenceElement::LookupNumber: offset points to not matching object");
2768  return 0;
2769  }
2770  }
2771 
2772  {
2773  PDFDocument::SkipWhitespace(rStream);
2774  PDFNumberElement aNumber;
2775  bool bRet = aNumber.Read(rStream);
2776  if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2777  {
2778  SAL_WARN("vcl.filter",
2779  "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2780  return 0;
2781  }
2782  }
2783 
2784  {
2785  PDFDocument::SkipWhitespace(rStream);
2786  OString aKeyword = PDFDocument::ReadKeyword(rStream);
2787  if (aKeyword != "obj")
2788  {
2789  SAL_WARN("vcl.filter",
2790  "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2791  return 0;
2792  }
2793  }
2794 
2795  PDFDocument::SkipWhitespace(rStream);
2796  PDFNumberElement aNumber;
2797  if (!aNumber.Read(rStream))
2798  {
2799  SAL_WARN("vcl.filter",
2800  "PDFReferenceElement::LookupNumber: failed to read referenced number");
2801  return 0;
2802  }
2803 
2804  return aNumber.GetValue();
2805 }
2806 
2808 {
2810 }
2811 
2813 {
2814  auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2815 
2816  if (itIDObjects != m_aIDObjects.end())
2817  return itIDObjects->second;
2818 
2819  SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2820  return nullptr;
2821 }
2822 
2824 
2826 
2828 
2830 {
2831  char ch;
2832  rStream.ReadChar(ch);
2833  if (ch != '<')
2834  {
2835  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2836  return false;
2837  }
2838 
2839  if (rStream.eof())
2840  {
2841  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2842  return false;
2843  }
2844 
2845  rStream.ReadChar(ch);
2846  if (ch != '<')
2847  {
2848  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2849  return false;
2850  }
2851 
2852  m_nLocation = rStream.Tell();
2853 
2854  SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2855 
2856  return true;
2857 }
2858 
2859 PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
2860 
2861 sal_uInt64 PDFEndDictionaryElement::GetLocation() const { return m_nLocation; }
2862 
2863 bool PDFEndDictionaryElement::Read(SvStream& rStream)
2864 {
2865  m_nLocation = rStream.Tell();
2866  char ch;
2867  rStream.ReadChar(ch);
2868  if (ch != '>')
2869  {
2870  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2871  return false;
2872  }
2873 
2874  if (rStream.eof())
2875  {
2876  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2877  return false;
2878  }
2879 
2880  rStream.ReadChar(ch);
2881  if (ch != '>')
2882  {
2883  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2884  return false;
2885  }
2886 
2887  SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2888 
2889  return true;
2890 }
2891 
2892 PDFNameElement::PDFNameElement() = default;
2893 
2895 {
2896  char ch;
2897  rStream.ReadChar(ch);
2898  if (ch != '/')
2899  {
2900  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2901  return false;
2902  }
2903  m_nLocation = rStream.Tell();
2904 
2905  if (rStream.eof())
2906  {
2907  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2908  return false;
2909  }
2910 
2911  // Read till the first white-space.
2912  OStringBuffer aBuf;
2913  rStream.ReadChar(ch);
2914  while (!rStream.eof())
2915  {
2916  if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2917  || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2918  {
2919  rStream.SeekRel(-1);
2920  m_aValue = aBuf.makeStringAndClear();
2921  SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2922  return true;
2923  }
2924  aBuf.append(ch);
2925  rStream.ReadChar(ch);
2926  }
2927 
2928  return false;
2929 }
2930 
2931 const OString& PDFNameElement::GetValue() const { return m_aValue; }
2932 
2933 sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2934 
2936  : m_nLength(nLength)
2937  , m_nOffset(0)
2938 {
2939 }
2940 
2942 {
2943  SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2944  m_nOffset = rStream.Tell();
2945  std::vector<unsigned char> aBytes(m_nLength);
2946  rStream.ReadBytes(aBytes.data(), aBytes.size());
2947  m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2948 
2949  return rStream.good();
2950 }
2951 
2953 
2954 sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2955 
2956 bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2957 
2958 bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2959 
2961  : m_pObject(pObject)
2962 {
2963 }
2964 
2966 {
2967  char ch;
2968  rStream.ReadChar(ch);
2969  if (ch != '[')
2970  {
2971  SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2972  return false;
2973  }
2974 
2975  SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2976 
2977  return true;
2978 }
2979 
2981 {
2982  if (m_pObject)
2983  SAL_INFO("vcl.filter",
2984  "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2985  m_aElements.push_back(pElement);
2986 }
2987 
2988 const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2989 
2990 PDFEndArrayElement::PDFEndArrayElement() = default;
2991 
2992 bool PDFEndArrayElement::Read(SvStream& rStream)
2993 {
2994  m_nOffset = rStream.Tell();
2995  char ch;
2996  rStream.ReadChar(ch);
2997  if (ch != ']')
2998  {
2999  SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
3000  return false;
3001  }
3002 
3003  SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
3004 
3005  return true;
3006 }
3007 
3008 sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
3009 
3010 } // namespace filter
3011 } // namespace vcl
3012 
3013 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
sal_uInt64 m_nOffset
Location of the end of the trailer token.
Array object: a list.
const sal_Int32 m_nLength
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:89
bool SetStreamSize(sal_uInt64 nSize)
sal_Int32 nIndex
PDFObjectElement(PDFDocument &rDoc, double fObjectValue, double fGenerationValue)
Numbering object: an integer or a real.
PDFElement * Lookup(const OString &rDictionaryKey)
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:92
size_t GetObjectOffset(size_t nIndex) const
sal_uInt64 m_nLocation
Offset after the '/' token.
void SetStream(PDFStreamElement *pStreamElement)
SvMemoryStream m_aMemory
The byte array itself.
std::string GetValue
const int MAX_SIGNATURE_CONTENT_LENGTH
Definition: pdfdocument.cxx:36
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:98
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
double LookupNumber(SvStream &rStream) const
Assuming the reference points to a number object, return its value.
void SetDictionaryLength(sal_uInt64 nDictionaryLength)
PDFTrailerElement(PDFDocument &rDoc)
aBuf
#define STREAM_SEEK_TO_END
static size_t Parse(const std::vector< std::unique_ptr< PDFElement >> &rElements, PDFElement *pThis, std::map< OString, PDFElement * > &rDictionary)
sal_uInt64 Seek(sal_uInt64 nPos)
std::vector< unsigned char > DecodeHexString(const OString &rHex)
void SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:82
void PushBack(PDFElement *pElement)
sal_uInt64 GetLocation() const
static sal_uInt64 GetLength()
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
Looks up an object which is only referenced in this dictionary.
EmbeddedObjectRef * pObject
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:94
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
void SetArrayOffset(sal_uInt64 nArrayOffset)
SvStream & WriteCharPtr(const char *pBuf)
void SetArrayLength(sal_uInt64 nArrayLength)
sal_uInt64 SeekRel(sal_Int64 nPos)
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:72
SvMemoryStream * GetStreamBuffer() const
void SetStreamBuffer(std::unique_ptr< SvMemoryStream > &pStreamBuffer)
static OString GetDateTime()
Get current date/time in PDF D:YYYYMMDDHHMMSS form.
SvStream & WriteUInt32AsString(sal_uInt32 nUInt32)
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
A byte range in a PDF file.
Definition: pdfdocument.hxx:63
bool Read(SvStream &rStream) override
long EndCompression()
bool eof() const
PDFObjectElement * LookupObject(size_t nObjectNumber)
Look up object based on object number, possibly by parsing object streams.
bool Read(SvStream &rStream) override
In-memory representation of an on-disk PDF document.
TStyleElements m_aElements
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:96
An entry in a cross-reference stream.
PDFReferenceElement(PDFDocument &rDoc, PDFNumberElement &rObject, PDFNumberElement const &rGeneration)
XRefEntryType GetType() const
const OString & GetValue() const
static void visitPages(PDFObjectElement *pPages, std::vector< PDFObjectElement * > &rRet)
Visits the page tree recursively, looking for page objects.
void PushBackEOF(size_t nOffset)
Remember the end location of an EOF token.
PDFObjectElement *const m_pObject
The object that contains this array.
sal_uInt64 GetArrayLength() const
sal_Int32 nElements
bool Read(SvStream &rStream) override
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:87
void Compress(SvStream &rIStm, SvStream &rOStm)
void SetType(XRefEntryType eType)
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
void SetKeyValueLength(const OString &rKey, sal_uInt64 nLength)
sal_uInt64 m_nOffset
Input file start location.
void ParseStoredObjects()
Parse objects stored in this object stream.
bool Read(SvStream &rStream) override
sal_uInt64 GetOffset() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
sal_uInt16 char * pName
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
bool GetDirty() const
long Decompress(SvStream &rIStm, SvStream &rOStm)
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:79
OString m_aComment
Definition: pdfdocument.cxx:46
PDFNumberElement & m_rObject
The element providing the object number.
sal_uInt64 m_nLength
Input file token length.
static OString ReadKeyword(SvStream &rStream)
int i
QPRO_FUNC_TYPE const nType
sal_uInt64 GetSize()
bool Read(SvStream &rStream) override
std::size_t WriteBytes(const void *pData, std::size_t nSize)
void AddDictionaryReference(PDFReferenceElement *pReference)
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
sal_uInt64 GetLocation() const
Dictionary object: a set key-value pairs.
sal_uInt64 GetOffset() const
bool Read(SvStream &rStream) override
std::vector< PDFElement * > m_aElements
bool Tokenize(SvStream &rStream, TokenizeMode eMode, std::vector< std::unique_ptr< PDFElement >> &rElements, PDFObjectElement *pObjectElement)
Tokenize elements from current offset.
bool Read(SvStream &rStream) override
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
PDFDocument & m_rDoc
Definition: pdfdocument.cxx:45
sal_uInt64 m_nOffset
Location before the ']' token.
Definition: pdfdocument.cxx:88
SvStream & WriteStream(SvStream &rStream)
const std::map< OString, PDFElement * > & GetItems() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
const char * pS
sal_uInt64 GetKeyOffset(const OString &rKey) const
void SetIDObject(size_t nID, PDFObjectElement *pObject)
Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID...
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:74
bool Read(SvStream &rStream) override
sal_uInt64 GetKeyValueLength(const OString &rKey) const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
std::size_t ReadBytes(void *pData, std::size_t nSize)
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
const OString & GetValue() const
std::vector< std::unique_ptr< PDFElement > > & GetStoredElements()
void SetNumberElement(PDFNumberElement *pNumberElement)
sal_uInt64 GetLength() const
PDFDictionaryElement * GetDictionary()
const std::vector< std::unique_ptr< PDFElement > > & GetElements() const
bool Read(SvStream &rStream) override
void SetDirty(bool bDirty)
PDFObjectElement * LookupObject()
Lookup referenced object, without assuming anything about its contents.
std::unique_ptr< char[]> aBuffer
SvStream & ReadChar(char &rChar)
PDFElement * LookupElement(const OString &rDictionaryKey)
Looks up an element which is contained in this dictionary.
std::map< OString, PDFElement * > m_aDictionary
PDFArrayElement * GetArray() const
SvMemoryStream & GetMemory()
const std::vector< PDFReferenceElement * > & GetDictionaryReferences() const
#define SAL_INFO(area, stream)
OUString aName
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
static void SkipWhitespace(SvStream &rStream)
PDFNumberElement * GetNumberElement() const
sal_uInt64 Tell() const
const OString & GetValue() const
bool Sign(OStringBuffer &rCMSHexBuffer)
Reference object: something with a unique ID.
static void AppendUnicodeTextString(const OUString &rString, OStringBuffer &rBuffer)
Write rString as a PDF hex string into rBuffer.
const std::vector< PDFElement * > & GetElements() const
sal_uInt64 GetLocation() const
bool good() const
#define SAL_WARN(area, stream)
bool alreadyVisiting() const
Definition: pdfdocument.hxx:73
Literal string: in (asdf) form.
PDFArrayElement(PDFObjectElement *pObject)
Name object: a key string.
void SetOffset(sal_uInt64 nOffset)
The trailer singleton is at the end of the doc.
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
sal_Int32 const nLength
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
PDFNumberElement & GetObjectElement() const
void AddDataRange(const void *pData, sal_Int32 size)
PDFElement * Lookup(const OString &rDictionaryKey)
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:91
sal_uInt64 GetArrayOffset() const
Stream object: a byte array with a known length.
sal_uInt64 m_nLocation
Offset after the '<<' token.
sal_uInt16 nPos
static PDFElement * Lookup(const std::map< OString, PDFElement * > &rDictionary, const OString &rKey)
const void * GetData()
void SetArray(PDFArrayElement *pArrayElement)
sal_uInt64 m_nLocation
Offset before the '>>' token.
Definition: pdfdocument.cxx:62
bool Read(SvStream &rStream) override
std::map< OString, PDFElement * > m_aDictionary
Definition: pdfdocument.hxx:85
sal_uInt64 m_nOffset
Location after the 'R' token.
void SetKeyOffset(const OString &rKey, sal_uInt64 nOffset)
OStringBuffer & padToLength(OStringBuffer &rBuffer, sal_Int32 nLength, char cFill= '\0')