LibreOffice Module vcl (master)  1
pdfdocument.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
11 
12 #include <map>
13 #include <memory>
14 #include <vector>
15 
16 #include <com/sun/star/uno/Sequence.hxx>
17 #include <com/sun/star/security/XCertificate.hpp>
18 
20 #include <comphelper/string.hxx>
21 #include <rtl/character.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <svl/cryptosign.hxx>
27 #include <tools/zcodec.hxx>
28 #include <vcl/pdfwriter.hxx>
29 #include <o3tl/safeint.hxx>
30 
31 #include <pdf/objectcopier.hxx>
32 
33 using namespace com::sun::star;
34 
35 namespace vcl::filter
36 {
37 const int MAX_SIGNATURE_CONTENT_LENGTH = 50000;
38 
39 class PDFTrailerElement;
40 
41 namespace
42 {
44 class PDFCommentElement : public PDFElement
45 {
46  PDFDocument& m_rDoc;
47  OString m_aComment;
48 
49 public:
50  explicit PDFCommentElement(PDFDocument& rDoc);
51  bool Read(SvStream& rStream) override;
52 };
53 }
54 
55 class PDFReferenceElement;
56 
57 namespace
58 {
60 class PDFEndDictionaryElement : public PDFElement
61 {
63  sal_uInt64 m_nLocation = 0;
64 
65 public:
66  PDFEndDictionaryElement();
67  bool Read(SvStream& rStream) override;
68  sal_uInt64 GetLocation() const;
69 };
70 
72 class PDFEndStreamElement : public PDFElement
73 {
74 public:
75  bool Read(SvStream& rStream) override;
76 };
77 
79 class PDFEndObjectElement : public PDFElement
80 {
81 public:
82  bool Read(SvStream& rStream) override;
83 };
84 
86 class PDFEndArrayElement : public PDFElement
87 {
89  sal_uInt64 m_nOffset = 0;
90 
91 public:
92  PDFEndArrayElement();
93  bool Read(SvStream& rStream) override;
94  sal_uInt64 GetOffset() const;
95 };
96 
98 class PDFBooleanElement : public PDFElement
99 {
100 public:
101  explicit PDFBooleanElement(bool bValue);
102  bool Read(SvStream& rStream) override;
103 };
104 
106 class PDFNullElement : public PDFElement
107 {
108 public:
109  bool Read(SvStream& rStream) override;
110 };
111 }
112 
115 {
117  std::map<OString, PDFElement*> m_aDictionary;
119  sal_uInt64 m_nOffset = 0;
120 
121 public:
122  explicit PDFTrailerElement(PDFDocument& rDoc);
123  bool Read(SvStream& rStream) override;
124  PDFElement* Lookup(const OString& rDictionaryKey);
125  sal_uInt64 GetLocation() const;
126 };
127 
128 XRefEntry::XRefEntry() = default;
129 
130 PDFDocument::PDFDocument() = default;
131 
132 PDFDocument::~PDFDocument() = default;
133 
134 bool PDFDocument::RemoveSignature(size_t nPosition)
135 {
136  std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
137  if (nPosition >= aSignatures.size())
138  {
139  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
140  return false;
141  }
142 
143  if (aSignatures.size() != m_aEOFs.size() - 1)
144  {
145  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
146  "and incremental updates");
147  return false;
148  }
149 
150  // The EOF offset is the end of the original file, without the signature at
151  // nPosition.
152  m_aEditBuffer.Seek(m_aEOFs[nPosition]);
153  // Drop all bytes after the current position.
154  m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
155 
156  return m_aEditBuffer.good();
157 }
158 
159 sal_Int32 PDFDocument::createObject()
160 {
161  sal_Int32 nObject = m_aXRef.size();
162  m_aXRef[nObject] = XRefEntry();
163  return nObject;
164 }
165 
166 bool PDFDocument::updateObject(sal_Int32 nObject)
167 {
168  if (o3tl::make_unsigned(nObject) >= m_aXRef.size())
169  {
170  SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
171  return false;
172  }
173 
174  XRefEntry aEntry;
175  aEntry.SetOffset(m_aEditBuffer.Tell());
176  aEntry.SetDirty(true);
177  m_aXRef[nObject] = aEntry;
178  return true;
179 }
180 
181 bool PDFDocument::writeBuffer(const void* pBuffer, sal_uInt64 nBytes)
182 {
183  std::size_t nWritten = m_aEditBuffer.WriteBytes(pBuffer, nBytes);
184  return nWritten == nBytes;
185 }
186 
187 void PDFDocument::SetSignatureLine(const std::vector<sal_Int8>& rSignatureLine)
188 {
189  m_aSignatureLine = rSignatureLine;
190 }
191 
192 void PDFDocument::SetSignaturePage(size_t nPage) { m_nSignaturePage = nPage; }
193 
194 sal_uInt32 PDFDocument::GetNextSignature()
195 {
196  sal_uInt32 nRet = 0;
197  for (const auto& pSignature : GetSignatureWidgets())
198  {
199  auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
200  if (!pT)
201  continue;
202 
203  const OString& rValue = pT->GetValue();
204  const OString aPrefix = "Signature";
205  if (!rValue.startsWith(aPrefix))
206  continue;
207 
208  nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32());
209  }
210 
211  return nRet + 1;
212 }
213 
214 sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
215  sal_uInt64& rLastByteRangeOffset,
216  sal_Int64& rContentOffset)
217 {
218  // Write signature object.
219  sal_Int32 nSignatureId = m_aXRef.size();
220  XRefEntry aSignatureEntry;
221  aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
222  aSignatureEntry.SetDirty(true);
223  m_aXRef[nSignatureId] = aSignatureEntry;
224  OStringBuffer aSigBuffer;
225  aSigBuffer.append(nSignatureId);
226  aSigBuffer.append(" 0 obj\n");
227  aSigBuffer.append("<</Contents <");
228  rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
229  // Reserve space for the PKCS#7 object.
230  OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
231  comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0');
232  aSigBuffer.append(aContentFiller.makeStringAndClear());
233  aSigBuffer.append(">\n/Type/Sig/SubFilter");
234  if (bAdES)
235  aSigBuffer.append("/ETSI.CAdES.detached");
236  else
237  aSigBuffer.append("/adbe.pkcs7.detached");
238 
239  // Time of signing.
240  aSigBuffer.append(" /M (");
241  aSigBuffer.append(vcl::PDFWriter::GetDateTime());
242  aSigBuffer.append(")");
243 
244  // Byte range: we can write offset1-length1 and offset2 right now, will
245  // write length2 later.
246  aSigBuffer.append(" /ByteRange [ 0 ");
247  // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
248  aSigBuffer.append(rContentOffset - 1);
249  aSigBuffer.append(" ");
250  aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
251  aSigBuffer.append(" ");
252  rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
253  // We don't know how many bytes we need for the last ByteRange value, this
254  // should be enough.
255  OStringBuffer aByteRangeFiller;
256  comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
257  aSigBuffer.append(aByteRangeFiller.makeStringAndClear());
258  // Finish the Sig obj.
259  aSigBuffer.append(" /Filter/Adobe.PPKMS");
260 
261  if (!rDescription.isEmpty())
262  {
263  aSigBuffer.append("/Reason<");
264  vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
265  aSigBuffer.append(">");
266  }
267 
268  aSigBuffer.append(" >>\nendobj\n\n");
269  m_aEditBuffer.WriteOString(aSigBuffer.toString());
270 
271  return nSignatureId;
272 }
273 
274 sal_Int32 PDFDocument::WriteAppearanceObject(tools::Rectangle& rSignatureRectangle)
275 {
276  PDFDocument aPDFDocument;
277  filter::PDFObjectElement* pPage = nullptr;
278  std::vector<filter::PDFObjectElement*> aContentStreams;
279 
280  if (!m_aSignatureLine.empty())
281  {
282  // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
283  // based on it.
284  SvMemoryStream aPDFStream;
285  aPDFStream.WriteBytes(m_aSignatureLine.data(), m_aSignatureLine.size());
286  aPDFStream.Seek(0);
287  if (!aPDFDocument.Read(aPDFStream))
288  {
289  SAL_WARN("vcl.filter",
290  "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
291  return -1;
292  }
293 
294  std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
295  if (aPages.empty())
296  {
297  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
298  return -1;
299  }
300 
301  pPage = aPages[0];
302  if (!pPage)
303  {
304  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
305  return -1;
306  }
307 
308  // Calculate the bounding box.
309  PDFElement* pMediaBox = pPage->Lookup("MediaBox");
310  auto pMediaBoxArray = dynamic_cast<PDFArrayElement*>(pMediaBox);
311  if (!pMediaBoxArray || pMediaBoxArray->GetElements().size() < 4)
312  {
313  SAL_WARN("vcl.filter",
314  "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
315  return -1;
316  }
317  const std::vector<PDFElement*>& rMediaBoxElements = pMediaBoxArray->GetElements();
318  auto pWidth = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[2]);
319  if (!pWidth)
320  {
321  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
322  return -1;
323  }
324  rSignatureRectangle.setWidth(pWidth->GetValue());
325  auto pHeight = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[3]);
326  if (!pHeight)
327  {
328  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
329  return -1;
330  }
331  rSignatureRectangle.setHeight(pHeight->GetValue());
332 
333  if (PDFObjectElement* pContentStream = pPage->LookupObject("Contents"))
334  {
335  aContentStreams.push_back(pContentStream);
336  }
337 
338  if (aContentStreams.empty())
339  {
340  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
341  return -1;
342  }
343  }
344  m_aSignatureLine.clear();
345 
346  // Write appearance object: allocate an ID.
347  sal_Int32 nAppearanceId = m_aXRef.size();
348  m_aXRef[nAppearanceId] = XRefEntry();
349 
350  // Write the object content.
351  SvMemoryStream aEditBuffer;
352  aEditBuffer.WriteUInt32AsString(nAppearanceId);
353  aEditBuffer.WriteCharPtr(" 0 obj\n");
354  aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
355 
356  PDFObjectCopier aCopier(*this);
357  if (!aContentStreams.empty())
358  {
359  OStringBuffer aBuffer;
360  aCopier.copyPageResources(pPage, aBuffer);
361  aEditBuffer.WriteOString(aBuffer.makeStringAndClear());
362  }
363 
364  aEditBuffer.WriteCharPtr("/BBox[0 0 ");
365  aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
366  aEditBuffer.WriteCharPtr(" ");
367  aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
368  aEditBuffer.WriteCharPtr("]\n/Length ");
369 
370  // Add the object to the doc-level edit buffer and update the offset.
371  SvMemoryStream aStream;
372  bool bCompressed = false;
373  sal_Int32 nLength = 0;
374  if (!aContentStreams.empty())
375  {
376  nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed);
377  }
378  aEditBuffer.WriteOString(OString::number(nLength));
379  if (bCompressed)
380  {
381  aEditBuffer.WriteOString(" /Filter/FlateDecode");
382  }
383 
384  aEditBuffer.WriteCharPtr("\n>>\n");
385 
386  aEditBuffer.WriteCharPtr("stream\n");
387 
388  // Copy the original page streams to the form XObject stream.
389  aStream.Seek(0);
390  aEditBuffer.WriteStream(aStream);
391 
392  aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
393 
394  aEditBuffer.Seek(0);
395  XRefEntry aAppearanceEntry;
396  aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
397  aAppearanceEntry.SetDirty(true);
398  m_aXRef[nAppearanceId] = aAppearanceEntry;
399  m_aEditBuffer.WriteStream(aEditBuffer);
400 
401  return nAppearanceId;
402 }
403 
404 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
405  sal_Int32 nAppearanceId,
406  const tools::Rectangle& rSignatureRectangle)
407 {
408  // Decide what identifier to use for the new signature.
409  sal_uInt32 nNextSignature = GetNextSignature();
410 
411  // Write the Annot object, references nSignatureId and nAppearanceId.
412  sal_Int32 nAnnotId = m_aXRef.size();
413  XRefEntry aAnnotEntry;
414  aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
415  aAnnotEntry.SetDirty(true);
416  m_aXRef[nAnnotId] = aAnnotEntry;
417  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
418  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
419  m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
420  m_aEditBuffer.WriteCharPtr("/Rect[0 0 ");
421  m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
422  m_aEditBuffer.WriteCharPtr(" ");
423  m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
424  m_aEditBuffer.WriteCharPtr("]\n");
425  m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
426  m_aEditBuffer.WriteCharPtr("/P ");
427  m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue());
428  m_aEditBuffer.WriteCharPtr(" 0 R\n");
429  m_aEditBuffer.WriteCharPtr("/T(Signature");
430  m_aEditBuffer.WriteUInt32AsString(nNextSignature);
431  m_aEditBuffer.WriteCharPtr(")\n");
432  m_aEditBuffer.WriteCharPtr("/V ");
433  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
434  m_aEditBuffer.WriteCharPtr(" 0 R\n");
435  m_aEditBuffer.WriteCharPtr("/DV ");
436  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
437  m_aEditBuffer.WriteCharPtr(" 0 R\n");
438  m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
439  m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
440  m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
441  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
442 
443  return nAnnotId;
444 }
445 
446 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
447 {
448  PDFElement* pAnnots = rFirstPage.Lookup("Annots");
449  auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
450  if (pAnnotsReference)
451  {
452  // Write the updated Annots key of the Page object.
453  PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
454  if (!pAnnotsObject)
455  {
456  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
457  return false;
458  }
459 
460  sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
461  m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
462  m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
463  m_aXRef[nAnnotsId].SetDirty(true);
464  m_aEditBuffer.WriteUInt32AsString(nAnnotsId);
465  m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
466 
467  // Write existing references.
468  PDFArrayElement* pArray = pAnnotsObject->GetArray();
469  if (!pArray)
470  {
471  SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
472  return false;
473  }
474 
475  for (size_t i = 0; i < pArray->GetElements().size(); ++i)
476  {
477  auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
478  if (!pReference)
479  continue;
480 
481  if (i)
482  m_aEditBuffer.WriteCharPtr(" ");
483  m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
484  m_aEditBuffer.WriteCharPtr(" 0 R");
485  }
486  // Write our reference.
487  m_aEditBuffer.WriteCharPtr(" ");
488  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
489  m_aEditBuffer.WriteCharPtr(" 0 R");
490 
491  m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
492  }
493  else
494  {
495  // Write the updated first page object, references nAnnotId.
496  sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
497  if (nFirstPageId >= m_aXRef.size())
498  {
499  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
500  return false;
501  }
502  m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
503  m_aXRef[nFirstPageId].SetDirty(true);
504  m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
505  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
506  m_aEditBuffer.WriteCharPtr("<<");
507  auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
508  if (!pAnnotsArray)
509  {
510  // No Annots key, just write the key with a single reference.
511  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
512  + rFirstPage.GetDictionaryOffset(),
513  rFirstPage.GetDictionaryLength());
514  m_aEditBuffer.WriteCharPtr("/Annots[");
515  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
516  m_aEditBuffer.WriteCharPtr(" 0 R]");
517  }
518  else
519  {
520  // Annots key is already there, insert our reference at the end.
521  PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
522 
523  // Offset right before the end of the Annots array.
524  sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
525  + pDictionary->GetKeyValueLength("Annots") - 1;
526  // Length of beginning of the dictionary -> Annots end.
527  sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
528  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
529  + rFirstPage.GetDictionaryOffset(),
530  nAnnotsBeforeEndLength);
531  m_aEditBuffer.WriteCharPtr(" ");
532  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
533  m_aEditBuffer.WriteCharPtr(" 0 R");
534  // Length of Annots end -> end of the dictionary.
535  sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
536  + rFirstPage.GetDictionaryLength()
537  - nAnnotsEndOffset;
538  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
539  + nAnnotsEndOffset,
540  nAnnotsAfterEndLength);
541  }
542  m_aEditBuffer.WriteCharPtr(">>");
543  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
544  }
545 
546  return true;
547 }
548 
549 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
550 {
551  if (m_pXRefStream)
552  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
553  else
554  {
555  if (!m_pTrailer)
556  {
557  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
558  return false;
559  }
560  pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
561  }
562  if (!pRoot)
563  {
564  SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
565  return false;
566  }
567  PDFObjectElement* pCatalog = pRoot->LookupObject();
568  if (!pCatalog)
569  {
570  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
571  return false;
572  }
573  sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
574  if (nCatalogId >= m_aXRef.size())
575  {
576  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
577  return false;
578  }
579  PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
580  auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
581  if (pAcroFormReference)
582  {
583  // Write the updated AcroForm key of the Catalog object.
584  PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
585  if (!pAcroFormObject)
586  {
587  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
588  return false;
589  }
590 
591  sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
592  m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
593  m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
594  m_aXRef[nAcroFormId].SetDirty(true);
595  m_aEditBuffer.WriteUInt32AsString(nAcroFormId);
596  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
597 
598  // If this is nullptr, then the AcroForm object is not in an object stream.
599  SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
600 
601  if (!pAcroFormObject->Lookup("Fields"))
602  {
603  SAL_WARN("vcl.filter",
604  "PDFDocument::Sign: AcroForm object without required Fields key");
605  return false;
606  }
607 
608  PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
609  if (!pAcroFormDictionary)
610  {
611  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
612  return false;
613  }
614 
615  // Offset right before the end of the Fields array.
616  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
617  + pAcroFormDictionary->GetKeyValueLength("Fields")
618  - strlen("]");
619  // Length of beginning of the object dictionary -> Fields end.
620  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
621  if (pStreamBuffer)
622  m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
623  else
624  {
625  nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
626  m_aEditBuffer.WriteCharPtr("<<");
627  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
628  + pAcroFormObject->GetDictionaryOffset(),
629  nFieldsBeforeEndLength);
630  }
631 
632  // Append our reference at the end of the Fields array.
633  m_aEditBuffer.WriteCharPtr(" ");
634  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
635  m_aEditBuffer.WriteCharPtr(" 0 R");
636 
637  // Length of Fields end -> end of the object dictionary.
638  if (pStreamBuffer)
639  {
640  sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
641  m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
642  + nFieldsEndOffset,
643  nFieldsAfterEndLength);
644  }
645  else
646  {
647  sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
648  + pAcroFormObject->GetDictionaryLength()
649  - nFieldsEndOffset;
650  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
651  + nFieldsEndOffset,
652  nFieldsAfterEndLength);
653  m_aEditBuffer.WriteCharPtr(">>");
654  }
655 
656  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
657  }
658  else
659  {
660  // Write the updated Catalog object, references nAnnotId.
661  auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
662  m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
663  m_aXRef[nCatalogId].SetDirty(true);
664  m_aEditBuffer.WriteUInt32AsString(nCatalogId);
665  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
666  m_aEditBuffer.WriteCharPtr("<<");
667  if (!pAcroFormDictionary)
668  {
669  // No AcroForm key, assume no signatures.
670  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
671  + pCatalog->GetDictionaryOffset(),
672  pCatalog->GetDictionaryLength());
673  m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
674  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
675  m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
676  }
677  else
678  {
679  // AcroForm key is already there, insert our reference at the Fields end.
680  auto it = pAcroFormDictionary->GetItems().find("Fields");
681  if (it == pAcroFormDictionary->GetItems().end())
682  {
683  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
684  return false;
685  }
686 
687  auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
688  if (!pFields)
689  {
690  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
691  return false;
692  }
693 
694  // Offset right before the end of the Fields array.
695  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
696  + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
697  // Length of beginning of the Catalog dictionary -> Fields end.
698  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
699  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
700  + pCatalog->GetDictionaryOffset(),
701  nFieldsBeforeEndLength);
702  m_aEditBuffer.WriteCharPtr(" ");
703  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
704  m_aEditBuffer.WriteCharPtr(" 0 R");
705  // Length of Fields end -> end of the Catalog dictionary.
706  sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
707  + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
708  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
709  + nFieldsEndOffset,
710  nFieldsAfterEndLength);
711  }
712  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
713  }
714 
715  return true;
716 }
717 
718 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
719 {
720  if (m_pXRefStream)
721  {
722  // Write the xref stream.
723  // This is a bit meta: the xref stream stores its own offset.
724  sal_Int32 nXRefStreamId = m_aXRef.size();
725  XRefEntry aXRefStreamEntry;
726  aXRefStreamEntry.SetOffset(nXRefOffset);
727  aXRefStreamEntry.SetDirty(true);
728  m_aXRef[nXRefStreamId] = aXRefStreamEntry;
729 
730  // Write stream data.
731  SvMemoryStream aXRefStream;
732  const size_t nOffsetLen = 3;
733  // 3 additional bytes: predictor, the first and the third field.
734  const size_t nLineLength = nOffsetLen + 3;
735  // This is the line as it appears before tweaking according to the predictor.
736  std::vector<unsigned char> aOrigLine(nLineLength);
737  // This is the previous line.
738  std::vector<unsigned char> aPrevLine(nLineLength);
739  // This is the line as written to the stream.
740  std::vector<unsigned char> aFilteredLine(nLineLength);
741  for (const auto& rXRef : m_aXRef)
742  {
743  const XRefEntry& rEntry = rXRef.second;
744 
745  if (!rEntry.GetDirty())
746  continue;
747 
748  // Predictor.
749  size_t nPos = 0;
750  // PNG prediction: up (on all rows).
751  aOrigLine[nPos++] = 2;
752 
753  // First field.
754  unsigned char nType = 0;
755  switch (rEntry.GetType())
756  {
757  case XRefEntryType::FREE:
758  nType = 0;
759  break;
760  case XRefEntryType::NOT_COMPRESSED:
761  nType = 1;
762  break;
763  case XRefEntryType::COMPRESSED:
764  nType = 2;
765  break;
766  }
767  aOrigLine[nPos++] = nType;
768 
769  // Second field.
770  for (size_t i = 0; i < nOffsetLen; ++i)
771  {
772  size_t nByte = nOffsetLen - i - 1;
773  // Fields requiring more than one byte are stored with the
774  // high-order byte first.
775  unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
776  aOrigLine[nPos++] = nCh;
777  }
778 
779  // Third field.
780  aOrigLine[nPos++] = 0;
781 
782  // Now apply the predictor.
783  aFilteredLine[0] = aOrigLine[0];
784  for (size_t i = 1; i < nLineLength; ++i)
785  {
786  // Count the delta vs the previous line.
787  aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
788  // Remember the new reference.
789  aPrevLine[i] = aOrigLine[i];
790  }
791 
792  aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
793  }
794 
795  m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
796  m_aEditBuffer.WriteCharPtr(
797  " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
798 
799  // ID.
800  auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
801  if (pID)
802  {
803  const std::vector<PDFElement*>& rElements = pID->GetElements();
804  m_aEditBuffer.WriteCharPtr("/ID [ <");
805  for (size_t i = 0; i < rElements.size(); ++i)
806  {
807  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
808  if (!pIDString)
809  continue;
810 
811  m_aEditBuffer.WriteOString(pIDString->GetValue());
812  if ((i + 1) < rElements.size())
813  m_aEditBuffer.WriteCharPtr("> <");
814  }
815  m_aEditBuffer.WriteCharPtr("> ] ");
816  }
817 
818  // Index.
819  m_aEditBuffer.WriteCharPtr("/Index [ ");
820  for (const auto& rXRef : m_aXRef)
821  {
822  if (!rXRef.second.GetDirty())
823  continue;
824 
825  m_aEditBuffer.WriteUInt32AsString(rXRef.first);
826  m_aEditBuffer.WriteCharPtr(" 1 ");
827  }
828  m_aEditBuffer.WriteCharPtr("] ");
829 
830  // Info.
831  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
832  if (pInfo)
833  {
834  m_aEditBuffer.WriteCharPtr("/Info ");
835  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
836  m_aEditBuffer.WriteCharPtr(" ");
837  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
838  m_aEditBuffer.WriteCharPtr(" R ");
839  }
840 
841  // Length.
842  m_aEditBuffer.WriteCharPtr("/Length ");
843  {
844  ZCodec aZCodec;
845  aZCodec.BeginCompression();
846  aXRefStream.Seek(0);
847  SvMemoryStream aStream;
848  aZCodec.Compress(aXRefStream, aStream);
849  aZCodec.EndCompression();
850  aXRefStream.Seek(0);
851  aXRefStream.SetStreamSize(0);
852  aStream.Seek(0);
853  aXRefStream.WriteStream(aStream);
854  }
855  m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize());
856 
857  if (!m_aStartXRefs.empty())
858  {
859  // Write location of the previous cross-reference section.
860  m_aEditBuffer.WriteCharPtr("/Prev ");
861  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
862  }
863 
864  // Root.
865  m_aEditBuffer.WriteCharPtr("/Root ");
866  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
867  m_aEditBuffer.WriteCharPtr(" ");
868  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
869  m_aEditBuffer.WriteCharPtr(" R ");
870 
871  // Size.
872  m_aEditBuffer.WriteCharPtr("/Size ");
873  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
874 
875  m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
876  aXRefStream.Seek(0);
877  m_aEditBuffer.WriteStream(aXRefStream);
878  m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
879  }
880  else
881  {
882  // Write the xref table.
883  m_aEditBuffer.WriteCharPtr("xref\n");
884  for (const auto& rXRef : m_aXRef)
885  {
886  size_t nObject = rXRef.first;
887  size_t nOffset = rXRef.second.GetOffset();
888  if (!rXRef.second.GetDirty())
889  continue;
890 
891  m_aEditBuffer.WriteUInt32AsString(nObject);
892  m_aEditBuffer.WriteCharPtr(" 1\n");
893  OStringBuffer aBuffer;
894  aBuffer.append(static_cast<sal_Int32>(nOffset));
895  while (aBuffer.getLength() < 10)
896  aBuffer.insert(0, "0");
897  if (nObject == 0)
898  aBuffer.append(" 65535 f \n");
899  else
900  aBuffer.append(" 00000 n \n");
901  m_aEditBuffer.WriteOString(aBuffer.toString());
902  }
903 
904  // Write the trailer.
905  m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
906  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
907  m_aEditBuffer.WriteCharPtr("/Root ");
908  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
909  m_aEditBuffer.WriteCharPtr(" ");
910  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
911  m_aEditBuffer.WriteCharPtr(" R\n");
912  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
913  if (pInfo)
914  {
915  m_aEditBuffer.WriteCharPtr("/Info ");
916  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
917  m_aEditBuffer.WriteCharPtr(" ");
918  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
919  m_aEditBuffer.WriteCharPtr(" R\n");
920  }
921  auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
922  if (pID)
923  {
924  const std::vector<PDFElement*>& rElements = pID->GetElements();
925  m_aEditBuffer.WriteCharPtr("/ID [ <");
926  for (size_t i = 0; i < rElements.size(); ++i)
927  {
928  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
929  if (!pIDString)
930  continue;
931 
932  m_aEditBuffer.WriteOString(pIDString->GetValue());
933  if ((i + 1) < rElements.size())
934  m_aEditBuffer.WriteCharPtr(">\n<");
935  }
936  m_aEditBuffer.WriteCharPtr("> ]\n");
937  }
938 
939  if (!m_aStartXRefs.empty())
940  {
941  // Write location of the previous cross-reference section.
942  m_aEditBuffer.WriteCharPtr("/Prev ");
943  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
944  }
945 
946  m_aEditBuffer.WriteCharPtr(">>\n");
947  }
948 }
949 
950 bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
951  const OUString& rDescription, bool bAdES)
952 {
953  m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
954  m_aEditBuffer.WriteCharPtr("\n");
955 
956  sal_uInt64 nSignatureLastByteRangeOffset = 0;
957  sal_Int64 nSignatureContentOffset = 0;
958  sal_Int32 nSignatureId = WriteSignatureObject(
959  rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
960 
961  tools::Rectangle aSignatureRectangle;
962  sal_Int32 nAppearanceId = WriteAppearanceObject(aSignatureRectangle);
963 
964  std::vector<PDFObjectElement*> aPages = GetPages();
965  if (aPages.empty())
966  {
967  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
968  return false;
969  }
970 
971  size_t nPage = 0;
972  if (m_nSignaturePage < aPages.size())
973  {
974  nPage = m_nSignaturePage;
975  }
976  if (!aPages[nPage])
977  {
978  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage);
979  return false;
980  }
981 
982  PDFObjectElement& rPage = *aPages[nPage];
983  sal_Int32 nAnnotId = WriteAnnotObject(rPage, nSignatureId, nAppearanceId, aSignatureRectangle);
984 
985  if (!WritePageObject(rPage, nAnnotId))
986  {
987  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
988  return false;
989  }
990 
991  PDFReferenceElement* pRoot = nullptr;
992  if (!WriteCatalogObject(nAnnotId, pRoot))
993  {
994  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
995  return false;
996  }
997 
998  sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
999  WriteXRef(nXRefOffset, pRoot);
1000 
1001  // Write startxref.
1002  m_aEditBuffer.WriteCharPtr("startxref\n");
1003  m_aEditBuffer.WriteUInt32AsString(nXRefOffset);
1004  m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
1005 
1006  // Finalize the signature, now that we know the total file size.
1007  // Calculate the length of the last byte range.
1008  sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
1009  sal_Int64 nLastByteRangeLength
1010  = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
1011  // Write the length to the buffer.
1012  m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
1013  OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
1014  m_aEditBuffer.WriteOString(aByteRangeBuffer);
1015 
1016  // Create the PKCS#7 object.
1017  css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
1018  if (!aDerEncoded.hasElements())
1019  {
1020  SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
1021  return false;
1022  }
1023 
1024  m_aEditBuffer.Seek(0);
1025  sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
1026  std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
1027  m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
1028 
1029  m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
1030  sal_uInt64 nBufferSize2 = nLastByteRangeLength;
1031  std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
1032  m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
1033 
1034  OStringBuffer aCMSHexBuffer;
1035  svl::crypto::Signing aSigning(xCertificate);
1036  aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
1037  aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
1038  if (!aSigning.Sign(aCMSHexBuffer))
1039  {
1040  SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
1041  return false;
1042  }
1043 
1044  assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
1045 
1046  m_aEditBuffer.Seek(nSignatureContentOffset);
1047  m_aEditBuffer.WriteOString(aCMSHexBuffer.toString());
1048 
1049  return true;
1050 }
1051 
1052 bool PDFDocument::Write(SvStream& rStream)
1053 {
1054  m_aEditBuffer.Seek(0);
1055  rStream.WriteStream(m_aEditBuffer);
1056  return rStream.good();
1057 }
1058 
1059 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
1060  std::vector<std::unique_ptr<PDFElement>>& rElements,
1061  PDFObjectElement* pObjectElement)
1062 {
1063  // Last seen object token.
1064  PDFObjectElement* pObject = pObjectElement;
1065  PDFNameElement* pObjectKey = nullptr;
1066  PDFObjectElement* pObjectStream = nullptr;
1067  bool bInXRef = false;
1068  // The next number will be an xref offset.
1069  bool bInStartXRef = false;
1070  // Dictionary depth, so we know when we're outside any dictionaries.
1071  int nDictionaryDepth = 0;
1072  // Array depth, only the offset/length of the toplevel array is tracked.
1073  int nArrayDepth = 0;
1074  // Last seen array token that's outside any dictionaries.
1075  PDFArrayElement* pArray = nullptr;
1076  // If we're inside an obj/endobj pair.
1077  bool bInObject = false;
1078  while (true)
1079  {
1080  char ch;
1081  rStream.ReadChar(ch);
1082  if (rStream.eof())
1083  break;
1084 
1085  switch (ch)
1086  {
1087  case '%':
1088  {
1089  auto pComment = new PDFCommentElement(*this);
1090  rElements.push_back(std::unique_ptr<PDFElement>(pComment));
1091  rStream.SeekRel(-1);
1092  if (!rElements.back()->Read(rStream))
1093  {
1094  SAL_WARN("vcl.filter",
1095  "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
1096  return false;
1097  }
1098  if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
1099  && m_aEOFs.back() == rStream.Tell())
1100  {
1101  // Found EOF and partial parsing requested, we're done.
1102  return true;
1103  }
1104  break;
1105  }
1106  case '<':
1107  {
1108  // Dictionary or hex string.
1109  rStream.ReadChar(ch);
1110  rStream.SeekRel(-2);
1111  if (ch == '<')
1112  {
1113  rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
1114  ++nDictionaryDepth;
1115  }
1116  else
1117  rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
1118  if (!rElements.back()->Read(rStream))
1119  {
1120  SAL_WARN("vcl.filter",
1121  "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
1122  return false;
1123  }
1124  break;
1125  }
1126  case '>':
1127  {
1128  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
1129  --nDictionaryDepth;
1130  rStream.SeekRel(-1);
1131  if (!rElements.back()->Read(rStream))
1132  {
1133  SAL_WARN("vcl.filter",
1134  "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
1135  return false;
1136  }
1137  break;
1138  }
1139  case '[':
1140  {
1141  auto pArr = new PDFArrayElement(pObject);
1142  rElements.push_back(std::unique_ptr<PDFElement>(pArr));
1143  if (nDictionaryDepth == 0 && nArrayDepth == 0)
1144  {
1145  // The array is attached directly, inform the object.
1146  pArray = pArr;
1147  if (pObject)
1148  {
1149  pObject->SetArray(pArray);
1150  pObject->SetArrayOffset(rStream.Tell());
1151  }
1152  }
1153  ++nArrayDepth;
1154  rStream.SeekRel(-1);
1155  if (!rElements.back()->Read(rStream))
1156  {
1157  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
1158  return false;
1159  }
1160  break;
1161  }
1162  case ']':
1163  {
1164  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
1165  --nArrayDepth;
1166  if (nArrayDepth == 0)
1167  pArray = nullptr;
1168  rStream.SeekRel(-1);
1169  if (nDictionaryDepth == 0 && nArrayDepth == 0)
1170  {
1171  if (pObject)
1172  {
1173  pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1174  }
1175  }
1176  if (!rElements.back()->Read(rStream))
1177  {
1178  SAL_WARN("vcl.filter",
1179  "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1180  return false;
1181  }
1182  break;
1183  }
1184  case '/':
1185  {
1186  auto pNameElement = new PDFNameElement();
1187  rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1188  rStream.SeekRel(-1);
1189  if (!pNameElement->Read(rStream))
1190  {
1191  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1192  return false;
1193  }
1194  if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1195  && pNameElement->GetValue() == "ObjStm")
1196  pObjectStream = pObject;
1197  else
1198  pObjectKey = pNameElement;
1199  break;
1200  }
1201  case '(':
1202  {
1203  rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1204  rStream.SeekRel(-1);
1205  if (!rElements.back()->Read(rStream))
1206  {
1207  SAL_WARN("vcl.filter",
1208  "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1209  return false;
1210  }
1211  break;
1212  }
1213  default:
1214  {
1215  if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-')
1216  {
1217  // Numbering object: an integer or a real.
1218  auto pNumberElement = new PDFNumberElement();
1219  rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1220  rStream.SeekRel(-1);
1221  if (!pNumberElement->Read(rStream))
1222  {
1223  SAL_WARN("vcl.filter",
1224  "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1225  return false;
1226  }
1227  if (bInStartXRef)
1228  {
1229  bInStartXRef = false;
1230  m_aStartXRefs.push_back(pNumberElement->GetValue());
1231 
1232  auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1233  if (it != m_aOffsetObjects.end())
1234  m_pXRefStream = it->second;
1235  }
1236  else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
1237  // Number element inside an object, but outside a
1238  // dictionary / array: remember it.
1239  pObject->SetNumberElement(pNumberElement);
1240  }
1241  else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1242  {
1243  // Possible keyword, like "obj".
1244  rStream.SeekRel(-1);
1245  OString aKeyword = ReadKeyword(rStream);
1246 
1247  bool bObj = aKeyword == "obj";
1248  if (bObj || aKeyword == "R")
1249  {
1250  size_t nElements = rElements.size();
1251  if (nElements < 2)
1252  {
1253  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1254  "tokens before 'obj' or 'R' keyword");
1255  return false;
1256  }
1257 
1258  auto pObjectNumber
1259  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1260  auto pGenerationNumber
1261  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1262  if (!pObjectNumber || !pGenerationNumber)
1263  {
1264  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1265  "generation number before 'obj' or 'R' keyword");
1266  return false;
1267  }
1268 
1269  if (bObj)
1270  {
1271  pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1272  pGenerationNumber->GetValue());
1273  rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1274  m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1275  m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1276  bInObject = true;
1277  }
1278  else
1279  {
1280  auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1281  *pGenerationNumber);
1282  rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1283  if (pArray)
1284  // Reference is part of a direct (non-dictionary) array, inform the array.
1285  pArray->PushBack(rElements.back().get());
1286  if (bInObject && nDictionaryDepth > 0 && pObject)
1287  // Inform the object about a new in-dictionary reference.
1288  pObject->AddDictionaryReference(pReference);
1289  }
1290  if (!rElements.back()->Read(rStream))
1291  {
1292  SAL_WARN("vcl.filter",
1293  "PDFDocument::Tokenize: PDFElement::Read() failed");
1294  return false;
1295  }
1296  }
1297  else if (aKeyword == "stream")
1298  {
1299  // Look up the length of the stream from the parent object's dictionary.
1300  size_t nLength = 0;
1301  for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1302  {
1303  // Iterate in reverse order.
1304  size_t nIndex = rElements.size() - nElement - 1;
1305  PDFElement* pElement = rElements[nIndex].get();
1306  auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1307  if (!pObj)
1308  continue;
1309 
1310  PDFElement* pLookup = pObj->Lookup("Length");
1311  auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1312  if (pReference)
1313  {
1314  // Length is provided as a reference.
1315  nLength = pReference->LookupNumber(rStream);
1316  break;
1317  }
1318 
1319  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1320  if (pNumber)
1321  {
1322  // Length is provided directly.
1323  nLength = pNumber->GetValue();
1324  break;
1325  }
1326 
1327  SAL_WARN(
1328  "vcl.filter",
1329  "PDFDocument::Tokenize: found no Length key for stream keyword");
1330  return false;
1331  }
1332 
1333  PDFDocument::SkipLineBreaks(rStream);
1334  auto pStreamElement = new PDFStreamElement(nLength);
1335  if (pObject)
1336  pObject->SetStream(pStreamElement);
1337  rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1338  if (!rElements.back()->Read(rStream))
1339  {
1340  SAL_WARN("vcl.filter",
1341  "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1342  return false;
1343  }
1344  }
1345  else if (aKeyword == "endstream")
1346  {
1347  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1348  if (!rElements.back()->Read(rStream))
1349  {
1350  SAL_WARN("vcl.filter",
1351  "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1352  return false;
1353  }
1354  }
1355  else if (aKeyword == "endobj")
1356  {
1357  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1358  if (!rElements.back()->Read(rStream))
1359  {
1360  SAL_WARN("vcl.filter",
1361  "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1362  return false;
1363  }
1364  if (eMode == TokenizeMode::END_OF_OBJECT)
1365  {
1366  // Found endobj and only object parsing was requested, we're done.
1367  return true;
1368  }
1369 
1370  if (pObjectStream)
1371  {
1372  // We're at the end of an object stream, parse the stored objects.
1373  pObjectStream->ParseStoredObjects();
1374  pObjectStream = nullptr;
1375  pObjectKey = nullptr;
1376  }
1377  bInObject = false;
1378  }
1379  else if (aKeyword == "true" || aKeyword == "false")
1380  rElements.push_back(std::unique_ptr<PDFElement>(
1381  new PDFBooleanElement(aKeyword.toBoolean())));
1382  else if (aKeyword == "null")
1383  rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1384  else if (aKeyword == "xref")
1385  // Allow 'f' and 'n' keywords.
1386  bInXRef = true;
1387  else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1388  {
1389  }
1390  else if (aKeyword == "trailer")
1391  {
1392  auto pTrailer = new PDFTrailerElement(*this);
1393 
1394  // Make it possible to find this trailer later by offset.
1395  pTrailer->Read(rStream);
1396  m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1397 
1398  // When reading till the first EOF token only, remember
1399  // just the first trailer token.
1400  if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1401  m_pTrailer = pTrailer;
1402  rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1403  }
1404  else if (aKeyword == "startxref")
1405  {
1406  bInStartXRef = true;
1407  }
1408  else
1409  {
1410  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1411  << aKeyword << "' keyword at byte position "
1412  << rStream.Tell());
1413  return false;
1414  }
1415  }
1416  else
1417  {
1418  if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1419  {
1420  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: "
1421  << ch << " at byte position " << rStream.Tell());
1422  return false;
1423  }
1424  }
1425  break;
1426  }
1427  }
1428  }
1429 
1430  return true;
1431 }
1432 
1433 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1434 {
1435  m_aIDObjects[nID] = pObject;
1436 }
1437 
1438 bool PDFDocument::Read(SvStream& rStream)
1439 {
1440  // Check file magic.
1441  std::vector<sal_Int8> aHeader(5);
1442  rStream.Seek(0);
1443  rStream.ReadBytes(aHeader.data(), aHeader.size());
1444  if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1445  || aHeader[4] != '-')
1446  {
1447  SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1448  return false;
1449  }
1450 
1451  // Allow later editing of the contents in-memory.
1452  rStream.Seek(0);
1453  m_aEditBuffer.WriteStream(rStream);
1454 
1455  // Look up the offset of the xref table.
1456  size_t nStartXRef = FindStartXRef(rStream);
1457  SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1458  if (nStartXRef == 0)
1459  {
1460  SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1461  return false;
1462  }
1463  while (true)
1464  {
1465  rStream.Seek(nStartXRef);
1466  OString aKeyword = ReadKeyword(rStream);
1467  if (aKeyword.isEmpty())
1468  ReadXRefStream(rStream);
1469 
1470  else
1471  {
1472  if (aKeyword != "xref")
1473  {
1474  SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1475  return false;
1476  }
1477  ReadXRef(rStream);
1478  if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1479  {
1480  SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1481  return false;
1482  }
1483  }
1484 
1485  PDFNumberElement* pPrev = nullptr;
1486  if (m_pTrailer)
1487  {
1488  pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1489 
1490  // Remember the offset of this trailer in the correct order. It's
1491  // possible that newer trailers don't have a larger offset.
1492  m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1493  }
1494  else if (m_pXRefStream)
1495  pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1496  if (pPrev)
1497  nStartXRef = pPrev->GetValue();
1498 
1499  // Reset state, except the edit buffer.
1500  m_aElements.clear();
1501  m_aOffsetObjects.clear();
1502  m_aIDObjects.clear();
1503  m_aStartXRefs.clear();
1504  m_aEOFs.clear();
1505  m_pTrailer = nullptr;
1506  m_pXRefStream = nullptr;
1507  if (!pPrev)
1508  break;
1509  }
1510 
1511  // Then we can tokenize the stream.
1512  rStream.Seek(0);
1513  return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1514 }
1515 
1516 OString PDFDocument::ReadKeyword(SvStream& rStream)
1517 {
1518  OStringBuffer aBuf;
1519  char ch;
1520  rStream.ReadChar(ch);
1521  if (rStream.eof())
1522  return OString();
1523  while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1524  {
1525  aBuf.append(ch);
1526  rStream.ReadChar(ch);
1527  if (rStream.eof())
1528  return aBuf.toString();
1529  }
1530  rStream.SeekRel(-1);
1531  return aBuf.toString();
1532 }
1533 
1534 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1535 {
1536  // Find the "startxref" token, somewhere near the end of the document.
1537  std::vector<char> aBuf(1024);
1538  rStream.Seek(STREAM_SEEK_TO_END);
1539  if (rStream.Tell() > aBuf.size())
1540  rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1541  else
1542  // The document is really short, then just read it from the start.
1543  rStream.Seek(0);
1544  size_t nBeforePeek = rStream.Tell();
1545  size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1546  rStream.Seek(nBeforePeek);
1547  if (nSize != aBuf.size())
1548  aBuf.resize(nSize);
1549  OString aPrefix("startxref");
1550  // Find the last startxref at the end of the document.
1551  auto itLastValid = aBuf.end();
1552  auto it = aBuf.begin();
1553  while (true)
1554  {
1555  it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1556  if (it == aBuf.end())
1557  break;
1558 
1559  itLastValid = it;
1560  ++it;
1561  }
1562  if (itLastValid == aBuf.end())
1563  {
1564  SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1565  return 0;
1566  }
1567 
1568  rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1569  if (rStream.eof())
1570  {
1571  SAL_WARN("vcl.filter",
1572  "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1573  return 0;
1574  }
1575 
1576  PDFDocument::SkipWhitespace(rStream);
1577  PDFNumberElement aNumber;
1578  if (!aNumber.Read(rStream))
1579  return 0;
1580  return aNumber.GetValue();
1581 }
1582 
1583 void PDFDocument::ReadXRefStream(SvStream& rStream)
1584 {
1585  // Look up the stream length in the object dictionary.
1586  if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1587  {
1588  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1589  return;
1590  }
1591 
1592  if (m_aElements.empty())
1593  {
1594  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1595  return;
1596  }
1597 
1598  PDFObjectElement* pObject = nullptr;
1599  for (const auto& pElement : m_aElements)
1600  {
1601  if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1602  {
1603  pObject = pObj;
1604  break;
1605  }
1606  }
1607  if (!pObject)
1608  {
1609  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1610  return;
1611  }
1612 
1613  // So that the Prev key can be looked up later.
1614  m_pXRefStream = pObject;
1615 
1616  PDFElement* pLookup = pObject->Lookup("Length");
1617  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1618  if (!pNumber)
1619  {
1620  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1621  return;
1622  }
1623  sal_uInt64 nLength = pNumber->GetValue();
1624 
1625  // Look up the stream offset.
1626  PDFStreamElement* pStream = nullptr;
1627  for (const auto& pElement : m_aElements)
1628  {
1629  if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1630  {
1631  pStream = pS;
1632  break;
1633  }
1634  }
1635  if (!pStream)
1636  {
1637  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1638  return;
1639  }
1640 
1641  // Read and decompress it.
1642  rStream.Seek(pStream->GetOffset());
1643  std::vector<char> aBuf(nLength);
1644  rStream.ReadBytes(aBuf.data(), aBuf.size());
1645 
1646  auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1647  if (!pFilter)
1648  {
1649  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1650  return;
1651  }
1652 
1653  if (pFilter->GetValue() != "FlateDecode")
1654  {
1655  SAL_WARN("vcl.filter",
1656  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1657  return;
1658  }
1659 
1660  int nColumns = 1;
1661  int nPredictor = 1;
1662  if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1663  {
1664  const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1665  auto it = rItems.find("Columns");
1666  if (it != rItems.end())
1667  if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1668  nColumns = pColumns->GetValue();
1669  it = rItems.find("Predictor");
1670  if (it != rItems.end())
1671  if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1672  nPredictor = pPredictor->GetValue();
1673  }
1674 
1675  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1676  SvMemoryStream aStream;
1677  ZCodec aZCodec;
1678  aZCodec.BeginCompression();
1679  aZCodec.Decompress(aSource, aStream);
1680  if (!aZCodec.EndCompression())
1681  {
1682  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1683  return;
1684  }
1685 
1686  // Look up the first and the last entry we need to read.
1687  auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1688  std::vector<size_t> aFirstObjects;
1689  std::vector<size_t> aNumberOfObjects;
1690  if (!pIndex)
1691  {
1692  auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1693  if (pSize)
1694  {
1695  aFirstObjects.push_back(0);
1696  aNumberOfObjects.push_back(pSize->GetValue());
1697  }
1698  else
1699  {
1700  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1701  return;
1702  }
1703  }
1704  else
1705  {
1706  const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1707  size_t nFirstObject = 0;
1708  for (size_t i = 0; i < rIndexElements.size(); ++i)
1709  {
1710  if (i % 2 == 0)
1711  {
1712  auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1713  if (!pFirstObject)
1714  {
1715  SAL_WARN("vcl.filter",
1716  "PDFDocument::ReadXRefStream: Index has no first object");
1717  return;
1718  }
1719  nFirstObject = pFirstObject->GetValue();
1720  continue;
1721  }
1722 
1723  auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1724  if (!pNumberOfObjects)
1725  {
1726  SAL_WARN("vcl.filter",
1727  "PDFDocument::ReadXRefStream: Index has no number of objects");
1728  return;
1729  }
1730  aFirstObjects.push_back(nFirstObject);
1731  aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1732  }
1733  }
1734 
1735  // Look up the format of a single entry.
1736  const int nWSize = 3;
1737  auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1738  if (!pW || pW->GetElements().size() < nWSize)
1739  {
1740  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1741  return;
1742  }
1743  int aW[nWSize];
1744  // First character is the (kind of) repeated predictor.
1745  int nLineLength = 1;
1746  for (size_t i = 0; i < nWSize; ++i)
1747  {
1748  auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1749  if (!pI)
1750  {
1751  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1752  return;
1753  }
1754  aW[i] = pI->GetValue();
1755  nLineLength += aW[i];
1756  }
1757 
1758  if (nPredictor > 1 && nLineLength - 1 != nColumns)
1759  {
1760  SAL_WARN("vcl.filter",
1761  "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1762  return;
1763  }
1764 
1765  aStream.Seek(0);
1766  for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1767  {
1768  size_t nFirstObject = aFirstObjects[nSubSection];
1769  size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1770 
1771  // This is the line as read from the stream.
1772  std::vector<unsigned char> aOrigLine(nLineLength);
1773  // This is the line as it appears after tweaking according to nPredictor.
1774  std::vector<unsigned char> aFilteredLine(nLineLength);
1775  for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1776  {
1777  size_t nIndex = nFirstObject + nEntry;
1778 
1779  aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1780  if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1781  {
1782  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1783  "inconsistent with /DecodeParms/Predictor for object #"
1784  << nIndex);
1785  return;
1786  }
1787 
1788  for (int i = 0; i < nLineLength; ++i)
1789  {
1790  switch (nPredictor)
1791  {
1792  case 1:
1793  // No prediction.
1794  break;
1795  case 12:
1796  // PNG prediction: up (on all rows).
1797  aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1798  break;
1799  default:
1800  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1801  << nPredictor);
1802  return;
1803  break;
1804  }
1805  }
1806 
1807  // First character is already handled above.
1808  int nPos = 1;
1809  size_t nType = 0;
1810  // Start of the current field in the stream data.
1811  int nOffset = nPos;
1812  for (; nPos < nOffset + aW[0]; ++nPos)
1813  {
1814  unsigned char nCh = aFilteredLine[nPos];
1815  nType = (nType << 8) + nCh;
1816  }
1817 
1818  // Start of the object in the file stream.
1819  size_t nStreamOffset = 0;
1820  nOffset = nPos;
1821  for (; nPos < nOffset + aW[1]; ++nPos)
1822  {
1823  unsigned char nCh = aFilteredLine[nPos];
1824  nStreamOffset = (nStreamOffset << 8) + nCh;
1825  }
1826 
1827  // Generation number of the object.
1828  size_t nGenerationNumber = 0;
1829  nOffset = nPos;
1830  for (; nPos < nOffset + aW[2]; ++nPos)
1831  {
1832  unsigned char nCh = aFilteredLine[nPos];
1833  nGenerationNumber = (nGenerationNumber << 8) + nCh;
1834  }
1835 
1836  // Ignore invalid nType.
1837  if (nType <= 2)
1838  {
1839  if (m_aXRef.find(nIndex) == m_aXRef.end())
1840  {
1841  XRefEntry aEntry;
1842  switch (nType)
1843  {
1844  case 0:
1845  aEntry.SetType(XRefEntryType::FREE);
1846  break;
1847  case 1:
1848  aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
1849  break;
1850  case 2:
1851  aEntry.SetType(XRefEntryType::COMPRESSED);
1852  break;
1853  }
1854  aEntry.SetOffset(nStreamOffset);
1855  m_aXRef[nIndex] = aEntry;
1856  }
1857  }
1858  }
1859  }
1860 }
1861 
1862 void PDFDocument::ReadXRef(SvStream& rStream)
1863 {
1864  PDFDocument::SkipWhitespace(rStream);
1865 
1866  while (true)
1867  {
1868  PDFNumberElement aFirstObject;
1869  if (!aFirstObject.Read(rStream))
1870  {
1871  // Next token is not a number, it'll be the trailer.
1872  return;
1873  }
1874 
1875  if (aFirstObject.GetValue() < 0)
1876  {
1877  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1878  return;
1879  }
1880 
1881  PDFDocument::SkipWhitespace(rStream);
1882  PDFNumberElement aNumberOfEntries;
1883  if (!aNumberOfEntries.Read(rStream))
1884  {
1885  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1886  return;
1887  }
1888 
1889  if (aNumberOfEntries.GetValue() < 0)
1890  {
1891  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1892  return;
1893  }
1894 
1895  size_t nSize = aNumberOfEntries.GetValue();
1896  for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1897  {
1898  size_t nIndex = aFirstObject.GetValue() + nEntry;
1899  PDFDocument::SkipWhitespace(rStream);
1900  PDFNumberElement aOffset;
1901  if (!aOffset.Read(rStream))
1902  {
1903  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1904  return;
1905  }
1906 
1907  PDFDocument::SkipWhitespace(rStream);
1908  PDFNumberElement aGenerationNumber;
1909  if (!aGenerationNumber.Read(rStream))
1910  {
1911  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1912  return;
1913  }
1914 
1915  PDFDocument::SkipWhitespace(rStream);
1916  OString aKeyword = ReadKeyword(rStream);
1917  if (aKeyword != "f" && aKeyword != "n")
1918  {
1919  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1920  return;
1921  }
1922  // xrefs are read in reverse order, so never update an existing
1923  // offset with an older one.
1924  if (m_aXRef.find(nIndex) == m_aXRef.end())
1925  {
1926  XRefEntry aEntry;
1927  aEntry.SetOffset(aOffset.GetValue());
1928  // Initially only the first entry is dirty.
1929  if (nIndex == 0)
1930  aEntry.SetDirty(true);
1931  m_aXRef[nIndex] = aEntry;
1932  }
1933  PDFDocument::SkipWhitespace(rStream);
1934  }
1935  }
1936 }
1937 
1938 void PDFDocument::SkipWhitespace(SvStream& rStream)
1939 {
1940  char ch = 0;
1941 
1942  while (true)
1943  {
1944  rStream.ReadChar(ch);
1945  if (rStream.eof())
1946  break;
1947 
1948  if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1949  {
1950  rStream.SeekRel(-1);
1951  return;
1952  }
1953  }
1954 }
1955 
1956 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1957 {
1958  char ch = 0;
1959 
1960  while (true)
1961  {
1962  rStream.ReadChar(ch);
1963  if (rStream.eof())
1964  break;
1965 
1966  if (ch != '\n' && ch != '\r')
1967  {
1968  rStream.SeekRel(-1);
1969  return;
1970  }
1971  }
1972 }
1973 
1974 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1975 {
1976  auto it = m_aXRef.find(nIndex);
1977  if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1978  {
1979  SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1980  << nIndex << ", but failed");
1981  return 0;
1982  }
1983 
1984  return it->second.GetOffset();
1985 }
1986 
1987 const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1988 {
1989  return m_aElements;
1990 }
1991 
1993 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1994 {
1995  auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1996  if (!pKids)
1997  {
1998  SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1999  return;
2000  }
2001 
2002  pPages->setVisiting(true);
2003 
2004  for (const auto& pKid : pKids->GetElements())
2005  {
2006  auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
2007  if (!pReference)
2008  continue;
2009 
2010  PDFObjectElement* pKidObject = pReference->LookupObject();
2011  if (!pKidObject)
2012  continue;
2013 
2014  // detect if visiting reenters itself
2015  if (pKidObject->alreadyVisiting())
2016  {
2017  SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
2018  continue;
2019  }
2020 
2021  auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
2022  if (pName && pName->GetValue() == "Pages")
2023  // Pages inside pages: recurse.
2024  visitPages(pKidObject, rRet);
2025  else
2026  // Found an actual page.
2027  rRet.push_back(pKidObject);
2028  }
2029 
2030  pPages->setVisiting(false);
2031 }
2032 
2033 std::vector<PDFObjectElement*> PDFDocument::GetPages()
2034 {
2035  std::vector<PDFObjectElement*> aRet;
2036 
2037  PDFReferenceElement* pRoot = nullptr;
2038 
2039  PDFTrailerElement* pTrailer = nullptr;
2040  if (!m_aTrailerOffsets.empty())
2041  {
2042  // Get access to the latest trailer, and work with the keys of that
2043  // one.
2044  auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
2045  if (it != m_aOffsetTrailers.end())
2046  pTrailer = it->second;
2047  }
2048 
2049  if (pTrailer)
2050  pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
2051  else if (m_pXRefStream)
2052  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
2053 
2054  if (!pRoot)
2055  {
2056  SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no Root key");
2057  return aRet;
2058  }
2059 
2060  PDFObjectElement* pCatalog = pRoot->LookupObject();
2061  if (!pCatalog)
2062  {
2063  SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
2064  return aRet;
2065  }
2066 
2067  PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
2068  if (!pPages)
2069  {
2070  SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
2071  << ") has no pages");
2072  return aRet;
2073  }
2074 
2075  visitPages(pPages, aRet);
2076 
2077  return aRet;
2078 }
2079 
2080 void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
2081 
2082 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
2083 {
2084  std::vector<PDFObjectElement*> aRet;
2085 
2086  std::vector<PDFObjectElement*> aPages = GetPages();
2087 
2088  for (const auto& pPage : aPages)
2089  {
2090  if (!pPage)
2091  continue;
2092 
2093  PDFElement* pAnnotsElement = pPage->Lookup("Annots");
2094  auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
2095  if (!pAnnots)
2096  {
2097  // Annots is not an array, see if it's a reference to an object
2098  // with a direct array.
2099  auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
2100  if (pAnnotsRef)
2101  {
2102  if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
2103  {
2104  pAnnots = pAnnotsObject->GetArray();
2105  }
2106  }
2107  }
2108 
2109  if (!pAnnots)
2110  continue;
2111 
2112  for (const auto& pAnnot : pAnnots->GetElements())
2113  {
2114  auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
2115  if (!pReference)
2116  continue;
2117 
2118  PDFObjectElement* pAnnotObject = pReference->LookupObject();
2119  if (!pAnnotObject)
2120  continue;
2121 
2122  auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
2123  if (!pFT || pFT->GetValue() != "Sig")
2124  continue;
2125 
2126  aRet.push_back(pAnnotObject);
2127  }
2128  }
2129 
2130  return aRet;
2131 }
2132 
2133 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
2134 {
2135  return svl::crypto::DecodeHexString(pElement->GetValue());
2136 }
2137 
2138 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
2139  : m_rDoc(rDoc)
2140 {
2141 }
2142 
2143 bool PDFCommentElement::Read(SvStream& rStream)
2144 {
2145  // Read from (including) the % char till (excluding) the end of the line/stream.
2146  OStringBuffer aBuf;
2147  char ch;
2148  rStream.ReadChar(ch);
2149  while (true)
2150  {
2151  if (ch == '\n' || ch == '\r' || rStream.eof())
2152  {
2153  m_aComment = aBuf.makeStringAndClear();
2154 
2155  if (m_aComment.startsWith("%%EOF"))
2156  m_rDoc.PushBackEOF(rStream.Tell());
2157 
2158  SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
2159  return true;
2160  }
2161  aBuf.append(ch);
2162  rStream.ReadChar(ch);
2163  }
2164 
2165  return false;
2166 }
2167 
2169 
2171 {
2172  OStringBuffer aBuf;
2173  m_nOffset = rStream.Tell();
2174  char ch;
2175  rStream.ReadChar(ch);
2176  if (rStream.eof())
2177  {
2178  return false;
2179  }
2180  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2181  {
2182  rStream.SeekRel(-1);
2183  return false;
2184  }
2185  while (!rStream.eof())
2186  {
2187  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2188  {
2189  rStream.SeekRel(-1);
2190  m_nLength = rStream.Tell() - m_nOffset;
2191  m_fValue = aBuf.makeStringAndClear().toDouble();
2192  SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2193  return true;
2194  }
2195  aBuf.append(ch);
2196  rStream.ReadChar(ch);
2197  }
2198 
2199  return false;
2200 }
2201 
2202 sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2203 
2204 sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2205 
2206 PDFBooleanElement::PDFBooleanElement(bool /*bValue*/) {}
2207 
2208 bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2209 
2210 bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2211 
2213 {
2214  char ch;
2215  rStream.ReadChar(ch);
2216  if (ch != '<')
2217  {
2218  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2219  return false;
2220  }
2221  rStream.ReadChar(ch);
2222 
2223  OStringBuffer aBuf;
2224  while (!rStream.eof())
2225  {
2226  if (ch == '>')
2227  {
2228  m_aValue = aBuf.makeStringAndClear();
2229  SAL_INFO("vcl.filter",
2230  "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2231  return true;
2232  }
2233  aBuf.append(ch);
2234  rStream.ReadChar(ch);
2235  }
2236 
2237  return false;
2238 }
2239 
2240 const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2241 
2243 {
2244  char nPrevCh = 0;
2245  char ch = 0;
2246  rStream.ReadChar(ch);
2247  if (ch != '(')
2248  {
2249  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2250  return false;
2251  }
2252  nPrevCh = ch;
2253  rStream.ReadChar(ch);
2254 
2255  // Start with 1 nesting level as we read a '(' above already.
2256  int nDepth = 1;
2257  OStringBuffer aBuf;
2258  while (!rStream.eof())
2259  {
2260  if (ch == '(' && nPrevCh != '\\')
2261  ++nDepth;
2262 
2263  if (ch == ')' && nPrevCh != '\\')
2264  --nDepth;
2265 
2266  if (nDepth == 0)
2267  {
2268  // ')' of the outermost '(' is reached.
2269  m_aValue = aBuf.makeStringAndClear();
2270  SAL_INFO("vcl.filter",
2271  "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2272  return true;
2273  }
2274  aBuf.append(ch);
2275  nPrevCh = ch;
2276  rStream.ReadChar(ch);
2277  }
2278 
2279  return false;
2280 }
2281 
2282 const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2283 
2285  : m_rDoc(rDoc)
2286 {
2287 }
2288 
2290 {
2291  m_nOffset = rStream.Tell();
2292  return true;
2293 }
2294 
2295 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2296 {
2297  if (m_aDictionary.empty())
2299 
2300  return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2301 }
2302 
2303 sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2304 
2305 double PDFNumberElement::GetValue() const { return m_fValue; }
2306 
2307 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2308  : m_rDoc(rDoc)
2309  , m_fObjectValue(fObjectValue)
2310  , m_fGenerationValue(fGenerationValue)
2311  , m_pNumberElement(nullptr)
2312  , m_nDictionaryOffset(0)
2313  , m_nDictionaryLength(0)
2314  , m_pDictionaryElement(nullptr)
2315  , m_nArrayOffset(0)
2316  , m_nArrayLength(0)
2317  , m_pArrayElement(nullptr)
2318  , m_pStreamElement(nullptr)
2319 {
2320 }
2321 
2323 {
2324  SAL_INFO("vcl.filter",
2325  "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2326  return true;
2327 }
2328 
2330 
2331 size_t PDFDictionaryElement::Parse(const std::vector<std::unique_ptr<PDFElement>>& rElements,
2332  PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary)
2333 {
2334  // The index of last parsed element, in case of nested dictionaries.
2335  size_t nRet = 0;
2336 
2337  if (!rDictionary.empty())
2338  return nRet;
2339 
2340  pThis->setParsing(true);
2341 
2342  auto pThisObject = dynamic_cast<PDFObjectElement*>(pThis);
2343  // This is set to non-nullptr here for nested dictionaries only.
2344  auto pThisDictionary = dynamic_cast<PDFDictionaryElement*>(pThis);
2345 
2346  // Find out where the dictionary for this object starts.
2347  size_t nIndex = 0;
2348  for (size_t i = 0; i < rElements.size(); ++i)
2349  {
2350  if (rElements[i].get() == pThis)
2351  {
2352  nIndex = i;
2353  break;
2354  }
2355  }
2356 
2357  OString aName;
2358  sal_uInt64 nNameOffset = 0;
2359  std::vector<PDFNumberElement*> aNumbers;
2360  // The array value we're in -- if any.
2361  PDFArrayElement* pArray = nullptr;
2362  sal_uInt64 nDictionaryOffset = 0;
2363  int nDictionaryDepth = 0;
2364  // Toplevel dictionary found (not inside an array).
2365  bool bDictionaryFound = false;
2366  // Toplevel array found (not inside a dictionary).
2367  bool bArrayFound = false;
2368  for (size_t i = nIndex; i < rElements.size(); ++i)
2369  {
2370  // Dictionary tokens can be nested, track enter/leave.
2371  if (auto pDictionary = dynamic_cast<PDFDictionaryElement*>(rElements[i].get()))
2372  {
2373  bDictionaryFound = true;
2374  if (++nDictionaryDepth == 1)
2375  {
2376  // First dictionary start, track start offset.
2377  nDictionaryOffset = pDictionary->m_nLocation;
2378  if (pThisObject)
2379  {
2380  if (!bArrayFound)
2381  // Then the toplevel dictionary of the object.
2382  pThisObject->SetDictionary(pDictionary);
2383  pThisDictionary = pDictionary;
2384  pThisObject->SetDictionaryOffset(nDictionaryOffset);
2385  }
2386  }
2387  else if (!pDictionary->alreadyParsing())
2388  {
2389  // Nested dictionary.
2390  const size_t nexti
2391  = PDFDictionaryElement::Parse(rElements, pDictionary, pDictionary->m_aItems);
2392  if (nexti >= i) // ensure we go forwards and not endlessly loop
2393  {
2394  i = nexti;
2395  rDictionary[aName] = pDictionary;
2396  aName.clear();
2397  }
2398  }
2399  }
2400 
2401  if (auto pEndDictionary = dynamic_cast<PDFEndDictionaryElement*>(rElements[i].get()))
2402  {
2403  if (--nDictionaryDepth == 0)
2404  {
2405  // Last dictionary end, track length and stop parsing.
2406  if (pThisObject)
2407  pThisObject->SetDictionaryLength(pEndDictionary->GetLocation()
2408  - nDictionaryOffset);
2409  nRet = i;
2410  break;
2411  }
2412  }
2413 
2414  auto pName = dynamic_cast<PDFNameElement*>(rElements[i].get());
2415  if (pName)
2416  {
2417  if (!aNumbers.empty())
2418  {
2419  PDFNumberElement* pNumber = aNumbers.back();
2420  rDictionary[aName] = pNumber;
2421  if (pThisDictionary)
2422  {
2423  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2424  pThisDictionary->SetKeyValueLength(
2425  aName, pNumber->GetLocation() + pNumber->GetLength() - nNameOffset);
2426  }
2427  aName.clear();
2428  aNumbers.clear();
2429  }
2430 
2431  if (aName.isEmpty())
2432  {
2433  // Remember key.
2434  aName = pName->GetValue();
2435  nNameOffset = pName->GetLocation();
2436  }
2437  else
2438  {
2439  if (pArray)
2440  {
2441  if (bDictionaryFound)
2442  // Array inside dictionary.
2443  pArray->PushBack(pName);
2444  }
2445  else
2446  {
2447  // Name-name key-value.
2448  rDictionary[aName] = pName;
2449  if (pThisDictionary)
2450  {
2451  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2452  pThisDictionary->SetKeyValueLength(aName, pName->GetLocation()
2454  - nNameOffset);
2455  }
2456  aName.clear();
2457  }
2458  }
2459  continue;
2460  }
2461 
2462  auto pArr = dynamic_cast<PDFArrayElement*>(rElements[i].get());
2463  if (pArr)
2464  {
2465  bArrayFound = true;
2466  pArray = pArr;
2467  continue;
2468  }
2469 
2470  auto pEndArr = dynamic_cast<PDFEndArrayElement*>(rElements[i].get());
2471  if (pArray && pEndArr)
2472  {
2473  for (auto& pNumber : aNumbers)
2474  pArray->PushBack(pNumber);
2475  aNumbers.clear();
2476  rDictionary[aName] = pArray;
2477  if (pThisDictionary)
2478  {
2479  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2480  // Include the ending ']' in the length of the key - (array)value pair length.
2481  pThisDictionary->SetKeyValueLength(aName, pEndArr->GetOffset() - nNameOffset + 1);
2482  }
2483  aName.clear();
2484  pArray = nullptr;
2485  continue;
2486  }
2487 
2488  auto pReference = dynamic_cast<PDFReferenceElement*>(rElements[i].get());
2489  if (pReference)
2490  {
2491  if (!pArray)
2492  {
2493  rDictionary[aName] = pReference;
2494  if (pThisDictionary)
2495  {
2496  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2497  pThisDictionary->SetKeyValueLength(aName,
2498  pReference->GetOffset() - nNameOffset);
2499  }
2500  aName.clear();
2501  }
2502  else
2503  {
2504  if (bDictionaryFound)
2505  // Array inside dictionary.
2506  pArray->PushBack(pReference);
2507  }
2508  aNumbers.clear();
2509  continue;
2510  }
2511 
2512  auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(rElements[i].get());
2513  if (pLiteralString)
2514  {
2515  rDictionary[aName] = pLiteralString;
2516  if (pThisDictionary)
2517  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2518  aName.clear();
2519  continue;
2520  }
2521 
2522  auto pBoolean = dynamic_cast<PDFBooleanElement*>(rElements[i].get());
2523  if (pBoolean)
2524  {
2525  rDictionary[aName] = pBoolean;
2526  if (pThisDictionary)
2527  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2528  aName.clear();
2529  continue;
2530  }
2531 
2532  auto pHexString = dynamic_cast<PDFHexStringElement*>(rElements[i].get());
2533  if (pHexString)
2534  {
2535  if (!pArray)
2536  {
2537  rDictionary[aName] = pHexString;
2538  if (pThisDictionary)
2539  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2540  aName.clear();
2541  }
2542  else
2543  {
2544  pArray->PushBack(pHexString);
2545  }
2546  continue;
2547  }
2548 
2549  if (dynamic_cast<PDFEndObjectElement*>(rElements[i].get()))
2550  break;
2551 
2552  // Just remember this, so that in case it's not a reference parameter,
2553  // we can handle it later.
2554  auto pNumber = dynamic_cast<PDFNumberElement*>(rElements[i].get());
2555  if (pNumber)
2556  aNumbers.push_back(pNumber);
2557  }
2558 
2559  if (!aNumbers.empty())
2560  {
2561  rDictionary[aName] = aNumbers.back();
2562  if (pThisDictionary)
2563  pThisDictionary->SetKeyOffset(aName, nNameOffset);
2564  aName.clear();
2565  aNumbers.clear();
2566  }
2567 
2568  pThis->setParsing(false);
2569 
2570  return nRet;
2571 }
2572 
2573 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2574  const OString& rKey)
2575 {
2576  auto it = rDictionary.find(rKey);
2577  if (it == rDictionary.end())
2578  return nullptr;
2579 
2580  return it->second;
2581 }
2582 
2584 {
2585  auto pKey = dynamic_cast<PDFReferenceElement*>(
2586  PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2587  if (!pKey)
2588  {
2589  SAL_WARN("vcl.filter",
2590  "PDFDictionaryElement::LookupObject: no such key with reference value: "
2591  << rDictionaryKey);
2592  return nullptr;
2593  }
2594 
2595  return pKey->LookupObject();
2596 }
2597 
2598 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2599 {
2600  return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2601 }
2602 
2603 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2604 {
2605  if (m_aDictionary.empty())
2606  {
2607  if (!m_aElements.empty())
2608  // This is a stored object in an object stream.
2610  else
2611  // Normal object: elements are stored as members of the document itself.
2613  }
2614 
2615  return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2616 }
2617 
2618 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2619 {
2620  auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2621  if (!pKey)
2622  {
2623  SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2624  << rDictionaryKey);
2625  return nullptr;
2626  }
2627 
2628  return pKey->LookupObject();
2629 }
2630 
2632 
2633 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2634 {
2635  m_nDictionaryOffset = nDictionaryOffset;
2636 }
2637 
2639 {
2640  if (m_aDictionary.empty())
2642 
2643  return m_nDictionaryOffset;
2644 }
2645 
2646 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2647 
2648 sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
2649 
2650 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2651 {
2652  m_aDictionaryKeyOffset[rKey] = nOffset;
2653 }
2654 
2655 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2656 {
2657  m_aDictionaryKeyValueLength[rKey] = nLength;
2658 }
2659 
2660 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2661 {
2662  auto it = m_aDictionaryKeyOffset.find(rKey);
2663  if (it == m_aDictionaryKeyOffset.end())
2664  return 0;
2665 
2666  return it->second;
2667 }
2668 
2669 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2670 {
2671  auto it = m_aDictionaryKeyValueLength.find(rKey);
2672  if (it == m_aDictionaryKeyValueLength.end())
2673  return 0;
2674 
2675  return it->second;
2676 }
2677 
2678 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2679 
2680 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2681 {
2682  m_nDictionaryLength = nDictionaryLength;
2683 }
2684 
2686 {
2687  if (m_aDictionary.empty())
2689 
2690  return m_nDictionaryLength;
2691 }
2692 
2693 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2694 
2695 sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
2696 
2698 {
2699  if (m_aDictionary.empty())
2701  return m_pDictionaryElement;
2702 }
2703 
2705 {
2706  m_pDictionaryElement = pDictionaryElement;
2707 }
2708 
2710 {
2711  m_pNumberElement = pNumberElement;
2712 }
2713 
2715 
2716 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2717 {
2718  return m_aDictionaryReferences;
2719 }
2720 
2722 {
2723  m_aDictionaryReferences.push_back(pReference);
2724 }
2725 
2726 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2727 {
2728  if (m_aDictionary.empty())
2730 
2731  return m_aDictionary;
2732 }
2733 
2734 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2735 
2737 {
2738  m_pStreamElement = pStreamElement;
2739 }
2740 
2742 
2744 
2746 {
2747  if (!m_pStreamElement)
2748  {
2749  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2750  return;
2751  }
2752 
2753  auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2754  if (!pType || pType->GetValue() != "ObjStm")
2755  {
2756  if (!pType)
2757  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2758  else
2759  SAL_WARN("vcl.filter",
2760  "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2761  return;
2762  }
2763 
2764  auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2765  if (!pFilter || pFilter->GetValue() != "FlateDecode")
2766  {
2767  if (!pFilter)
2768  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2769  else
2770  SAL_WARN("vcl.filter",
2771  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2772  return;
2773  }
2774 
2775  auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2776  if (!pFirst)
2777  {
2778  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2779  return;
2780  }
2781 
2782  auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2783  if (!pN)
2784  {
2785  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2786  return;
2787  }
2788  size_t nN = pN->GetValue();
2789 
2790  auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2791  if (!pLength)
2792  {
2793  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2794  return;
2795  }
2796  size_t nLength = pLength->GetValue();
2797 
2798  // Read and decompress it.
2799  SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2800  rEditBuffer.Seek(m_pStreamElement->GetOffset());
2801  std::vector<char> aBuf(nLength);
2802  rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2803  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2804  SvMemoryStream aStream;
2805  ZCodec aZCodec;
2806  aZCodec.BeginCompression();
2807  aZCodec.Decompress(aSource, aStream);
2808  if (!aZCodec.EndCompression())
2809  {
2810  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2811  return;
2812  }
2813 
2814  nLength = aStream.TellEnd();
2815  aStream.Seek(0);
2816  std::vector<size_t> aObjNums;
2817  std::vector<size_t> aOffsets;
2818  std::vector<size_t> aLengths;
2819  // First iterate over and find out the lengths.
2820  for (size_t nObject = 0; nObject < nN; ++nObject)
2821  {
2822  PDFNumberElement aObjNum;
2823  if (!aObjNum.Read(aStream))
2824  {
2825  SAL_WARN("vcl.filter",
2826  "PDFObjectElement::ParseStoredObjects: failed to read object number");
2827  return;
2828  }
2829  aObjNums.push_back(aObjNum.GetValue());
2830 
2831  PDFDocument::SkipWhitespace(aStream);
2832 
2833  PDFNumberElement aByteOffset;
2834  if (!aByteOffset.Read(aStream))
2835  {
2836  SAL_WARN("vcl.filter",
2837  "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2838  return;
2839  }
2840  aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2841 
2842  if (aOffsets.size() > 1)
2843  aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2844  if (nObject + 1 == nN)
2845  aLengths.push_back(nLength - aOffsets.back());
2846 
2847  PDFDocument::SkipWhitespace(aStream);
2848  }
2849 
2850  // Now create streams with the proper length and tokenize the data.
2851  for (size_t nObject = 0; nObject < nN; ++nObject)
2852  {
2853  size_t nObjNum = aObjNums[nObject];
2854  size_t nOffset = aOffsets[nObject];
2855  size_t nLen = aLengths[nObject];
2856 
2857  aStream.Seek(nOffset);
2858  m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2859  PDFObjectElement* pStored = m_aStoredElements.back().get();
2860 
2861  aBuf.clear();
2862  aBuf.resize(nLen);
2863  aStream.ReadBytes(aBuf.data(), aBuf.size());
2864  SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2865 
2866  m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
2867  pStored);
2868  // This is how references know the object is stored inside this object stream.
2869  m_rDoc.SetIDObject(nObjNum, pStored);
2870 
2871  // Store the stream of the object in the object stream for later use.
2872  std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2873  aStoredStream.Seek(0);
2874  pStreamBuffer->WriteStream(aStoredStream);
2875  pStored->SetStreamBuffer(pStreamBuffer);
2876  }
2877 }
2878 
2879 std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2880 {
2881  return m_aElements;
2882 }
2883 
2885 
2886 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2887 {
2888  m_pStreamBuffer = std::move(pStreamBuffer);
2889 }
2890 
2892 
2894  PDFNumberElement const& rGeneration)
2895  : m_rDoc(rDoc)
2896  , m_fObjectValue(rObject.GetValue())
2897  , m_fGenerationValue(rGeneration.GetValue())
2898  , m_rObject(rObject)
2899 {
2900 }
2901 
2903 
2905 {
2906  SAL_INFO("vcl.filter",
2907  "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2908  m_nOffset = rStream.Tell();
2909  return true;
2910 }
2911 
2912 sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2913 
2915 {
2916  size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2917  if (nOffset == 0)
2918  {
2919  SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2920  << m_fObjectValue);
2921  return 0;
2922  }
2923 
2924  sal_uInt64 nOrigPos = rStream.Tell();
2925  comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2926 
2927  rStream.Seek(nOffset);
2928  {
2929  PDFDocument::SkipWhitespace(rStream);
2930  PDFNumberElement aNumber;
2931  bool bRet = aNumber.Read(rStream);
2932  if (!bRet || aNumber.GetValue() != m_fObjectValue)
2933  {
2934  SAL_WARN("vcl.filter",
2935  "PDFReferenceElement::LookupNumber: offset points to not matching object");
2936  return 0;
2937  }
2938  }
2939 
2940  {
2941  PDFDocument::SkipWhitespace(rStream);
2942  PDFNumberElement aNumber;
2943  bool bRet = aNumber.Read(rStream);
2944  if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2945  {
2946  SAL_WARN("vcl.filter",
2947  "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2948  return 0;
2949  }
2950  }
2951 
2952  {
2953  PDFDocument::SkipWhitespace(rStream);
2954  OString aKeyword = PDFDocument::ReadKeyword(rStream);
2955  if (aKeyword != "obj")
2956  {
2957  SAL_WARN("vcl.filter",
2958  "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2959  return 0;
2960  }
2961  }
2962 
2963  PDFDocument::SkipWhitespace(rStream);
2964  PDFNumberElement aNumber;
2965  if (!aNumber.Read(rStream))
2966  {
2967  SAL_WARN("vcl.filter",
2968  "PDFReferenceElement::LookupNumber: failed to read referenced number");
2969  return 0;
2970  }
2971 
2972  return aNumber.GetValue();
2973 }
2974 
2976 {
2978 }
2979 
2981 {
2982  auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2983 
2984  if (itIDObjects != m_aIDObjects.end())
2985  return itIDObjects->second;
2986 
2987  SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2988  return nullptr;
2989 }
2990 
2992 
2994 
2996 
2998 {
2999  char ch;
3000  rStream.ReadChar(ch);
3001  if (ch != '<')
3002  {
3003  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
3004  return false;
3005  }
3006 
3007  if (rStream.eof())
3008  {
3009  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
3010  return false;
3011  }
3012 
3013  rStream.ReadChar(ch);
3014  if (ch != '<')
3015  {
3016  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
3017  return false;
3018  }
3019 
3020  m_nLocation = rStream.Tell();
3021 
3022  SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
3023 
3024  return true;
3025 }
3026 
3027 PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
3028 
3029 sal_uInt64 PDFEndDictionaryElement::GetLocation() const { return m_nLocation; }
3030 
3031 bool PDFEndDictionaryElement::Read(SvStream& rStream)
3032 {
3033  m_nLocation = rStream.Tell();
3034  char ch;
3035  rStream.ReadChar(ch);
3036  if (ch != '>')
3037  {
3038  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
3039  return false;
3040  }
3041 
3042  if (rStream.eof())
3043  {
3044  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
3045  return false;
3046  }
3047 
3048  rStream.ReadChar(ch);
3049  if (ch != '>')
3050  {
3051  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
3052  return false;
3053  }
3054 
3055  SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
3056 
3057  return true;
3058 }
3059 
3060 PDFNameElement::PDFNameElement() = default;
3061 
3063 {
3064  char ch;
3065  rStream.ReadChar(ch);
3066  if (ch != '/')
3067  {
3068  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
3069  return false;
3070  }
3071  m_nLocation = rStream.Tell();
3072 
3073  if (rStream.eof())
3074  {
3075  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
3076  return false;
3077  }
3078 
3079  // Read till the first white-space.
3080  OStringBuffer aBuf;
3081  rStream.ReadChar(ch);
3082  while (!rStream.eof())
3083  {
3084  if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
3085  || ch == ']' || ch == '<' || ch == '>' || ch == '(')
3086  {
3087  rStream.SeekRel(-1);
3088  m_aValue = aBuf.makeStringAndClear();
3089  SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
3090  return true;
3091  }
3092  aBuf.append(ch);
3093  rStream.ReadChar(ch);
3094  }
3095 
3096  return false;
3097 }
3098 
3099 const OString& PDFNameElement::GetValue() const { return m_aValue; }
3100 
3101 sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
3102 
3104  : m_nLength(nLength)
3105  , m_nOffset(0)
3106 {
3107 }
3108 
3110 {
3111  SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
3112  m_nOffset = rStream.Tell();
3113  std::vector<unsigned char> aBytes(m_nLength);
3114  rStream.ReadBytes(aBytes.data(), aBytes.size());
3115  m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
3116 
3117  return rStream.good();
3118 }
3119 
3121 
3122 sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
3123 
3124 bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
3125 
3126 bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
3127 
3129  : m_pObject(pObject)
3130 {
3131 }
3132 
3134 {
3135  char ch;
3136  rStream.ReadChar(ch);
3137  if (ch != '[')
3138  {
3139  SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
3140  return false;
3141  }
3142 
3143  SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
3144 
3145  return true;
3146 }
3147 
3149 {
3150  if (m_pObject)
3151  SAL_INFO("vcl.filter",
3152  "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
3153  m_aElements.push_back(pElement);
3154 }
3155 
3156 const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
3157 
3158 PDFEndArrayElement::PDFEndArrayElement() = default;
3159 
3160 bool PDFEndArrayElement::Read(SvStream& rStream)
3161 {
3162  m_nOffset = rStream.Tell();
3163  char ch;
3164  rStream.ReadChar(ch);
3165  if (ch != ']')
3166  {
3167  SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
3168  return false;
3169  }
3170 
3171  SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
3172 
3173  return true;
3174 }
3175 
3176 sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
3177 
3178 } // namespace vcl
3179 
3180 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
Definition: pdfdocument.hxx:90
long getHeight() const
sal_uInt64 m_nOffset
Location of the end of the trailer token.
Array object: a list.
const sal_Int32 m_nLength
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:75
bool SetStreamSize(sal_uInt64 nSize)
sal_Int32 nIndex
PDFObjectElement(PDFDocument &rDoc, double fObjectValue, double fGenerationValue)
Numbering object: an integer or a real.
void setHeight(long n)
PDFElement * Lookup(const OString &rDictionaryKey)
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:78
size_t GetObjectOffset(size_t nIndex) const
sal_uInt64 m_nLocation
Offset after the '/' token.
static void AppendUnicodeTextString(const OUString &rString, OStringBuffer &rBuffer)
Write rString as a PDF hex string into rBuffer.
void SetStream(PDFStreamElement *pStreamElement)
SvMemoryStream m_aMemory
The byte array itself.
std::string GetValue
const int MAX_SIGNATURE_CONTENT_LENGTH
Definition: pdfdocument.cxx:37
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:84
long getWidth() const
PDFObjectElement * m_pObject
The object that contains this array.
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
Definition: pdfdocument.hxx:88
double LookupNumber(SvStream &rStream) const
Assuming the reference points to a number object, return its value.
void SetDictionaryLength(sal_uInt64 nDictionaryLength)
PDFTrailerElement(PDFDocument &rDoc)
aBuf
#define STREAM_SEEK_TO_END
static size_t Parse(const std::vector< std::unique_ptr< PDFElement >> &rElements, PDFElement *pThis, std::map< OString, PDFElement * > &rDictionary)
sal_uInt64 Seek(sal_uInt64 nPos)
std::vector< unsigned char > DecodeHexString(const OString &rHex)
void SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:68
void PushBack(PDFElement *pElement)
SvStream & WriteOString(const OString &rStr)
sal_uInt64 GetLocation() const
static sal_uInt64 GetLength()
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
Looks up an object which is only referenced in this dictionary.
EmbeddedObjectRef * pObject
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:80
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
void SetArrayOffset(sal_uInt64 nArrayOffset)
SvStream & WriteCharPtr(const char *pBuf)
void SetArrayLength(sal_uInt64 nArrayLength)
sal_uInt64 SeekRel(sal_Int64 nPos)
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:58
SvMemoryStream * GetStreamBuffer() const
void SetStreamBuffer(std::unique_ptr< SvMemoryStream > &pStreamBuffer)
SvStream & WriteUInt32AsString(sal_uInt32 nUInt32)
css::uno::Any const & rValue
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
Definition: pdfdocument.hxx:92
A byte range in a PDF file.
Definition: pdfdocument.hxx:49
bool Read(SvStream &rStream) override
long EndCompression()
bool eof() const
PDFObjectElement * LookupObject(size_t nObjectNumber)
Look up object based on object number, possibly by parsing object streams.
const BorderLinePrimitive2D *pCandidateB assert(pCandidateA)
bool Read(SvStream &rStream) override
In-memory representation of an on-disk PDF document.
TStyleElements m_aElements
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:82
An entry in a cross-reference stream.
PDFReferenceElement(PDFDocument &rDoc, PDFNumberElement &rObject, PDFNumberElement const &rGeneration)
XRefEntryType GetType() const
const OString & GetValue() const
static void visitPages(PDFObjectElement *pPages, std::vector< PDFObjectElement * > &rRet)
Visits the page tree recursively, looking for page objects.
void PushBackEOF(size_t nOffset)
Remember the end location of an EOF token.
sal_uInt64 GetArrayLength() const
void copyPageResources(filter::PDFObjectElement *pPage, OStringBuffer &rLine)
Copies resources of pPage into rLine.
sal_Int32 nElements
bool Read(SvStream &rStream) override
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:73
void Compress(SvStream &rIStm, SvStream &rOStm)
Copies objects from one PDF file into another one.
void setWidth(long n)
void SetType(XRefEntryType eType)
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
void SetKeyValueLength(const OString &rKey, sal_uInt64 nLength)
sal_uInt64 m_nOffset
Input file start location.
void ParseStoredObjects()
Parse objects stored in this object stream.
static OString GetDateTime()
Get current date/time in PDF D:YYYYMMDDHHMMSS form.
bool Read(SvStream &rStream) override
sal_uInt64 GetOffset() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
sal_uInt16 char * pName
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
Definition: pdfdocument.hxx:86
bool GetDirty() const
int i
long Decompress(SvStream &rIStm, SvStream &rOStm)
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:65
OString m_aComment
Definition: pdfdocument.cxx:47
PDFNumberElement & m_rObject
The element providing the object number.
sal_uInt64 m_nLength
Input file token length.
static OString ReadKeyword(SvStream &rStream)
sal_uInt64 GetSize()
bool Read(SvStream &rStream) override
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
std::size_t WriteBytes(const void *pData, std::size_t nSize)
void AddDictionaryReference(PDFReferenceElement *pReference)
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
std::vector< PDFObjectElement * > GetPages()
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
sal_uInt64 GetLocation() const
Dictionary object: a set key-value pairs.
bool Read(SvStream &rStream)
Read elements from the start of the stream till its end.
sal_uInt64 GetOffset() const
bool Read(SvStream &rStream) override
std::vector< PDFElement * > m_aElements
bool Tokenize(SvStream &rStream, TokenizeMode eMode, std::vector< std::unique_ptr< PDFElement >> &rElements, PDFObjectElement *pObjectElement)
Tokenize elements from current offset.
bool Read(SvStream &rStream) override
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
Definition: pdfdocument.hxx:95
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
PDFDocument & m_rDoc
Definition: pdfdocument.cxx:46
sal_uInt64 m_nOffset
Location before the ']' token.
Definition: pdfdocument.cxx:89
SvStream & WriteStream(SvStream &rStream)
const std::map< OString, PDFElement * > & GetItems() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
const char * pS
sal_uInt64 GetKeyOffset(const OString &rKey) const
void SetIDObject(size_t nID, PDFObjectElement *pObject)
Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID...
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:60
bool Read(SvStream &rStream) override
sal_uInt64 GetKeyValueLength(const OString &rKey) const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
std::size_t ReadBytes(void *pData, std::size_t nSize)
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
const OString & GetValue() const
std::vector< std::unique_ptr< PDFElement > > & GetStoredElements()
void SetNumberElement(PDFNumberElement *pNumberElement)
sal_uInt64 GetLength() const
PDFDictionaryElement * GetDictionary()
const std::vector< std::unique_ptr< PDFElement > > & GetElements() const
bool Read(SvStream &rStream) override
void SetDirty(bool bDirty)
PDFObjectElement * LookupObject()
Lookup referenced object, without assuming anything about its contents.
std::unique_ptr< char[]> aBuffer
SvStream & ReadChar(char &rChar)
PDFElement * LookupElement(const OString &rDictionaryKey)
Looks up an element which is contained in this dictionary.
std::map< OString, PDFElement * > m_aDictionary
PDFArrayElement * GetArray() const
SvMemoryStream & GetMemory()
const std::vector< PDFReferenceElement * > & GetDictionaryReferences() const
#define SAL_INFO(area, stream)
OUString aName
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
static void SkipWhitespace(SvStream &rStream)
PDFNumberElement * GetNumberElement() const
sal_uInt64 Tell() const
QPRO_FUNC_TYPE nType
const OString & GetValue() const
bool Sign(OStringBuffer &rCMSHexBuffer)
Reference object: something with a unique ID.
const std::vector< PDFElement * > & GetElements() const
sal_uInt64 GetLocation() const
bool good() const
#define SAL_WARN(area, stream)
bool alreadyVisiting() const
Definition: pdfdocument.hxx:59
Literal string: in (asdf) form.
PDFArrayElement(PDFObjectElement *pObject)
sal_Int32 nLength
Name object: a key string.
void SetOffset(sal_uInt64 nOffset)
The trailer singleton is at the end of the doc.
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
PDFNumberElement & GetObjectElement() const
void AddDataRange(const void *pData, sal_Int32 size)
PDFElement * Lookup(const OString &rDictionaryKey)
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:77
sal_uInt64 GetArrayOffset() const
Stream object: a byte array with a known length.
sal_uInt64 m_nLocation
Offset after the '<<' token.
sal_uInt16 nPos
static PDFElement * Lookup(const std::map< OString, PDFElement * > &rDictionary, const OString &rKey)
const void * GetData()
void SetArray(PDFArrayElement *pArrayElement)
sal_uInt64 m_nLocation
Offset before the '>>' token.
Definition: pdfdocument.cxx:63
bool Read(SvStream &rStream) override
std::map< OString, PDFElement * > m_aDictionary
Definition: pdfdocument.hxx:71
sal_uInt64 m_nOffset
Location after the 'R' token.
void SetKeyOffset(const OString &rKey, sal_uInt64 nOffset)
OStringBuffer & padToLength(OStringBuffer &rBuffer, sal_Int32 nLength, char cFill= '\0')