LibreOffice Module vcl (master)  1
pdfdocument.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
11 
12 #include <map>
13 #include <memory>
14 #include <vector>
15 
16 #include <com/sun/star/uno/Sequence.hxx>
17 #include <com/sun/star/security/XCertificate.hpp>
18 
20 #include <comphelper/string.hxx>
21 #include <rtl/character.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <svl/cryptosign.hxx>
27 #include <tools/zcodec.hxx>
28 #include <vcl/pdfwriter.hxx>
29 #include <o3tl/safeint.hxx>
30 
31 #include <pdf/objectcopier.hxx>
32 
33 using namespace com::sun::star;
34 
35 namespace vcl::filter
36 {
37 const int MAX_SIGNATURE_CONTENT_LENGTH = 50000;
38 
39 XRefEntry::XRefEntry() = default;
40 
41 PDFDocument::PDFDocument() = default;
42 
43 PDFDocument::~PDFDocument() = default;
44 
45 bool PDFDocument::RemoveSignature(size_t nPosition)
46 {
47  std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
48  if (nPosition >= aSignatures.size())
49  {
50  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
51  return false;
52  }
53 
54  if (aSignatures.size() != m_aEOFs.size() - 1)
55  {
56  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
57  "and incremental updates");
58  return false;
59  }
60 
61  // The EOF offset is the end of the original file, without the signature at
62  // nPosition.
63  m_aEditBuffer.Seek(m_aEOFs[nPosition]);
64  // Drop all bytes after the current position.
65  m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
66 
67  return m_aEditBuffer.good();
68 }
69 
70 sal_Int32 PDFDocument::createObject()
71 {
72  sal_Int32 nObject = m_aXRef.size();
73  m_aXRef[nObject] = XRefEntry();
74  return nObject;
75 }
76 
77 bool PDFDocument::updateObject(sal_Int32 nObject)
78 {
79  if (o3tl::make_unsigned(nObject) >= m_aXRef.size())
80  {
81  SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
82  return false;
83  }
84 
85  XRefEntry aEntry;
86  aEntry.SetOffset(m_aEditBuffer.Tell());
87  aEntry.SetDirty(true);
88  m_aXRef[nObject] = aEntry;
89  return true;
90 }
91 
92 bool PDFDocument::writeBuffer(const void* pBuffer, sal_uInt64 nBytes)
93 {
94  std::size_t nWritten = m_aEditBuffer.WriteBytes(pBuffer, nBytes);
95  return nWritten == nBytes;
96 }
97 
98 void PDFDocument::SetSignatureLine(const std::vector<sal_Int8>& rSignatureLine)
99 {
100  m_aSignatureLine = rSignatureLine;
101 }
102 
103 void PDFDocument::SetSignaturePage(size_t nPage) { m_nSignaturePage = nPage; }
104 
105 sal_uInt32 PDFDocument::GetNextSignature()
106 {
107  sal_uInt32 nRet = 0;
108  for (const auto& pSignature : GetSignatureWidgets())
109  {
110  auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
111  if (!pT)
112  continue;
113 
114  const OString& rValue = pT->GetValue();
115  const OString aPrefix = "Signature";
116  if (!rValue.startsWith(aPrefix))
117  continue;
118 
119  nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32());
120  }
121 
122  return nRet + 1;
123 }
124 
125 sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
126  sal_uInt64& rLastByteRangeOffset,
127  sal_Int64& rContentOffset)
128 {
129  // Write signature object.
130  sal_Int32 nSignatureId = m_aXRef.size();
131  XRefEntry aSignatureEntry;
132  aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
133  aSignatureEntry.SetDirty(true);
134  m_aXRef[nSignatureId] = aSignatureEntry;
135  OStringBuffer aSigBuffer;
136  aSigBuffer.append(nSignatureId);
137  aSigBuffer.append(" 0 obj\n");
138  aSigBuffer.append("<</Contents <");
139  rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
140  // Reserve space for the PKCS#7 object.
141  OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
142  comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0');
143  aSigBuffer.append(aContentFiller.makeStringAndClear());
144  aSigBuffer.append(">\n/Type/Sig/SubFilter");
145  if (bAdES)
146  aSigBuffer.append("/ETSI.CAdES.detached");
147  else
148  aSigBuffer.append("/adbe.pkcs7.detached");
149 
150  // Time of signing.
151  aSigBuffer.append(" /M (");
152  aSigBuffer.append(vcl::PDFWriter::GetDateTime());
153  aSigBuffer.append(")");
154 
155  // Byte range: we can write offset1-length1 and offset2 right now, will
156  // write length2 later.
157  aSigBuffer.append(" /ByteRange [ 0 ");
158  // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
159  aSigBuffer.append(rContentOffset - 1);
160  aSigBuffer.append(" ");
161  aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
162  aSigBuffer.append(" ");
163  rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
164  // We don't know how many bytes we need for the last ByteRange value, this
165  // should be enough.
166  OStringBuffer aByteRangeFiller;
167  comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
168  aSigBuffer.append(aByteRangeFiller.makeStringAndClear());
169  // Finish the Sig obj.
170  aSigBuffer.append(" /Filter/Adobe.PPKMS");
171 
172  if (!rDescription.isEmpty())
173  {
174  aSigBuffer.append("/Reason<");
175  vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
176  aSigBuffer.append(">");
177  }
178 
179  aSigBuffer.append(" >>\nendobj\n\n");
180  m_aEditBuffer.WriteOString(aSigBuffer.toString());
181 
182  return nSignatureId;
183 }
184 
185 sal_Int32 PDFDocument::WriteAppearanceObject(tools::Rectangle& rSignatureRectangle)
186 {
187  PDFDocument aPDFDocument;
188  filter::PDFObjectElement* pPage = nullptr;
189  std::vector<filter::PDFObjectElement*> aContentStreams;
190 
191  if (!m_aSignatureLine.empty())
192  {
193  // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
194  // based on it.
195  SvMemoryStream aPDFStream;
196  aPDFStream.WriteBytes(m_aSignatureLine.data(), m_aSignatureLine.size());
197  aPDFStream.Seek(0);
198  if (!aPDFDocument.Read(aPDFStream))
199  {
200  SAL_WARN("vcl.filter",
201  "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
202  return -1;
203  }
204 
205  std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
206  if (aPages.empty())
207  {
208  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
209  return -1;
210  }
211 
212  pPage = aPages[0];
213  if (!pPage)
214  {
215  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
216  return -1;
217  }
218 
219  // Calculate the bounding box.
220  PDFElement* pMediaBox = pPage->Lookup("MediaBox");
221  auto pMediaBoxArray = dynamic_cast<PDFArrayElement*>(pMediaBox);
222  if (!pMediaBoxArray || pMediaBoxArray->GetElements().size() < 4)
223  {
224  SAL_WARN("vcl.filter",
225  "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
226  return -1;
227  }
228  const std::vector<PDFElement*>& rMediaBoxElements = pMediaBoxArray->GetElements();
229  auto pWidth = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[2]);
230  if (!pWidth)
231  {
232  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
233  return -1;
234  }
235  rSignatureRectangle.setWidth(pWidth->GetValue());
236  auto pHeight = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[3]);
237  if (!pHeight)
238  {
239  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
240  return -1;
241  }
242  rSignatureRectangle.setHeight(pHeight->GetValue());
243 
244  if (PDFObjectElement* pContentStream = pPage->LookupObject("Contents"))
245  {
246  aContentStreams.push_back(pContentStream);
247  }
248 
249  if (aContentStreams.empty())
250  {
251  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
252  return -1;
253  }
254  }
255  m_aSignatureLine.clear();
256 
257  // Write appearance object: allocate an ID.
258  sal_Int32 nAppearanceId = m_aXRef.size();
259  m_aXRef[nAppearanceId] = XRefEntry();
260 
261  // Write the object content.
262  SvMemoryStream aEditBuffer;
263  aEditBuffer.WriteUInt32AsString(nAppearanceId);
264  aEditBuffer.WriteCharPtr(" 0 obj\n");
265  aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
266 
267  PDFObjectCopier aCopier(*this);
268  if (!aContentStreams.empty())
269  {
270  assert(pPage && "aContentStreams is only filled if there was a pPage");
271  OStringBuffer aBuffer;
272  aCopier.copyPageResources(pPage, aBuffer);
273  aEditBuffer.WriteOString(aBuffer.makeStringAndClear());
274  }
275 
276  aEditBuffer.WriteCharPtr("/BBox[0 0 ");
277  aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
278  aEditBuffer.WriteCharPtr(" ");
279  aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
280  aEditBuffer.WriteCharPtr("]\n/Length ");
281 
282  // Add the object to the doc-level edit buffer and update the offset.
283  SvMemoryStream aStream;
284  bool bCompressed = false;
285  sal_Int32 nLength = 0;
286  if (!aContentStreams.empty())
287  {
288  nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed);
289  }
290  aEditBuffer.WriteOString(OString::number(nLength));
291  if (bCompressed)
292  {
293  aEditBuffer.WriteOString(" /Filter/FlateDecode");
294  }
295 
296  aEditBuffer.WriteCharPtr("\n>>\n");
297 
298  aEditBuffer.WriteCharPtr("stream\n");
299 
300  // Copy the original page streams to the form XObject stream.
301  aStream.Seek(0);
302  aEditBuffer.WriteStream(aStream);
303 
304  aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
305 
306  aEditBuffer.Seek(0);
307  XRefEntry aAppearanceEntry;
308  aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
309  aAppearanceEntry.SetDirty(true);
310  m_aXRef[nAppearanceId] = aAppearanceEntry;
311  m_aEditBuffer.WriteStream(aEditBuffer);
312 
313  return nAppearanceId;
314 }
315 
316 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
317  sal_Int32 nAppearanceId,
318  const tools::Rectangle& rSignatureRectangle)
319 {
320  // Decide what identifier to use for the new signature.
321  sal_uInt32 nNextSignature = GetNextSignature();
322 
323  // Write the Annot object, references nSignatureId and nAppearanceId.
324  sal_Int32 nAnnotId = m_aXRef.size();
325  XRefEntry aAnnotEntry;
326  aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
327  aAnnotEntry.SetDirty(true);
328  m_aXRef[nAnnotId] = aAnnotEntry;
329  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
330  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
331  m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
332  m_aEditBuffer.WriteCharPtr("/Rect[0 0 ");
333  m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
334  m_aEditBuffer.WriteCharPtr(" ");
335  m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
336  m_aEditBuffer.WriteCharPtr("]\n");
337  m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
338  m_aEditBuffer.WriteCharPtr("/P ");
339  m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue());
340  m_aEditBuffer.WriteCharPtr(" 0 R\n");
341  m_aEditBuffer.WriteCharPtr("/T(Signature");
342  m_aEditBuffer.WriteUInt32AsString(nNextSignature);
343  m_aEditBuffer.WriteCharPtr(")\n");
344  m_aEditBuffer.WriteCharPtr("/V ");
345  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
346  m_aEditBuffer.WriteCharPtr(" 0 R\n");
347  m_aEditBuffer.WriteCharPtr("/DV ");
348  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
349  m_aEditBuffer.WriteCharPtr(" 0 R\n");
350  m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
351  m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
352  m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
353  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
354 
355  return nAnnotId;
356 }
357 
358 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
359 {
360  PDFElement* pAnnots = rFirstPage.Lookup("Annots");
361  auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
362  if (pAnnotsReference)
363  {
364  // Write the updated Annots key of the Page object.
365  PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
366  if (!pAnnotsObject)
367  {
368  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
369  return false;
370  }
371 
372  sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
373  m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
374  m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
375  m_aXRef[nAnnotsId].SetDirty(true);
376  m_aEditBuffer.WriteUInt32AsString(nAnnotsId);
377  m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
378 
379  // Write existing references.
380  PDFArrayElement* pArray = pAnnotsObject->GetArray();
381  if (!pArray)
382  {
383  SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
384  return false;
385  }
386 
387  for (size_t i = 0; i < pArray->GetElements().size(); ++i)
388  {
389  auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
390  if (!pReference)
391  continue;
392 
393  if (i)
394  m_aEditBuffer.WriteCharPtr(" ");
395  m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
396  m_aEditBuffer.WriteCharPtr(" 0 R");
397  }
398  // Write our reference.
399  m_aEditBuffer.WriteCharPtr(" ");
400  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
401  m_aEditBuffer.WriteCharPtr(" 0 R");
402 
403  m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
404  }
405  else
406  {
407  // Write the updated first page object, references nAnnotId.
408  sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
409  if (nFirstPageId >= m_aXRef.size())
410  {
411  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
412  return false;
413  }
414  m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
415  m_aXRef[nFirstPageId].SetDirty(true);
416  m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
417  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
418  m_aEditBuffer.WriteCharPtr("<<");
419  auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
420  if (!pAnnotsArray)
421  {
422  // No Annots key, just write the key with a single reference.
423  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
424  + rFirstPage.GetDictionaryOffset(),
425  rFirstPage.GetDictionaryLength());
426  m_aEditBuffer.WriteCharPtr("/Annots[");
427  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
428  m_aEditBuffer.WriteCharPtr(" 0 R]");
429  }
430  else
431  {
432  // Annots key is already there, insert our reference at the end.
433  PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
434 
435  // Offset right before the end of the Annots array.
436  sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
437  + pDictionary->GetKeyValueLength("Annots") - 1;
438  // Length of beginning of the dictionary -> Annots end.
439  sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
440  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
441  + rFirstPage.GetDictionaryOffset(),
442  nAnnotsBeforeEndLength);
443  m_aEditBuffer.WriteCharPtr(" ");
444  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
445  m_aEditBuffer.WriteCharPtr(" 0 R");
446  // Length of Annots end -> end of the dictionary.
447  sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
448  + rFirstPage.GetDictionaryLength()
449  - nAnnotsEndOffset;
450  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
451  + nAnnotsEndOffset,
452  nAnnotsAfterEndLength);
453  }
454  m_aEditBuffer.WriteCharPtr(">>");
455  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
456  }
457 
458  return true;
459 }
460 
461 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
462 {
463  if (m_pXRefStream)
464  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
465  else
466  {
467  if (!m_pTrailer)
468  {
469  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
470  return false;
471  }
472  pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
473  }
474  if (!pRoot)
475  {
476  SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
477  return false;
478  }
479  PDFObjectElement* pCatalog = pRoot->LookupObject();
480  if (!pCatalog)
481  {
482  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
483  return false;
484  }
485  sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
486  if (nCatalogId >= m_aXRef.size())
487  {
488  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
489  return false;
490  }
491  PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
492  auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
493  if (pAcroFormReference)
494  {
495  // Write the updated AcroForm key of the Catalog object.
496  PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
497  if (!pAcroFormObject)
498  {
499  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
500  return false;
501  }
502 
503  sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
504  m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
505  m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
506  m_aXRef[nAcroFormId].SetDirty(true);
507  m_aEditBuffer.WriteUInt32AsString(nAcroFormId);
508  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
509 
510  // If this is nullptr, then the AcroForm object is not in an object stream.
511  SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
512 
513  if (!pAcroFormObject->Lookup("Fields"))
514  {
515  SAL_WARN("vcl.filter",
516  "PDFDocument::Sign: AcroForm object without required Fields key");
517  return false;
518  }
519 
520  PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
521  if (!pAcroFormDictionary)
522  {
523  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
524  return false;
525  }
526 
527  // Offset right before the end of the Fields array.
528  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
529  + pAcroFormDictionary->GetKeyValueLength("Fields")
530  - strlen("]");
531 
532  // Length of beginning of the object dictionary -> Fields end.
533  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
534  if (pStreamBuffer)
535  m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
536  else
537  {
538  nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
539  m_aEditBuffer.WriteCharPtr("<<");
540  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
541  + pAcroFormObject->GetDictionaryOffset(),
542  nFieldsBeforeEndLength);
543  }
544 
545  // Append our reference at the end of the Fields array.
546  m_aEditBuffer.WriteCharPtr(" ");
547  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
548  m_aEditBuffer.WriteCharPtr(" 0 R");
549 
550  // Length of Fields end -> end of the object dictionary.
551  if (pStreamBuffer)
552  {
553  sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
554  m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
555  + nFieldsEndOffset,
556  nFieldsAfterEndLength);
557  }
558  else
559  {
560  sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
561  + pAcroFormObject->GetDictionaryLength()
562  - nFieldsEndOffset;
563  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
564  + nFieldsEndOffset,
565  nFieldsAfterEndLength);
566  m_aEditBuffer.WriteCharPtr(">>");
567  }
568 
569  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
570  }
571  else
572  {
573  // Write the updated Catalog object, references nAnnotId.
574  auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
575  m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
576  m_aXRef[nCatalogId].SetDirty(true);
577  m_aEditBuffer.WriteUInt32AsString(nCatalogId);
578  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
579  m_aEditBuffer.WriteCharPtr("<<");
580  if (!pAcroFormDictionary)
581  {
582  // No AcroForm key, assume no signatures.
583  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
584  + pCatalog->GetDictionaryOffset(),
585  pCatalog->GetDictionaryLength());
586  m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
587  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
588  m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
589  }
590  else
591  {
592  // AcroForm key is already there, insert our reference at the Fields end.
593  auto it = pAcroFormDictionary->GetItems().find("Fields");
594  if (it == pAcroFormDictionary->GetItems().end())
595  {
596  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
597  return false;
598  }
599 
600  auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
601  if (!pFields)
602  {
603  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
604  return false;
605  }
606 
607  // Offset right before the end of the Fields array.
608  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
609  + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
610  // Length of beginning of the Catalog dictionary -> Fields end.
611  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
612  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
613  + pCatalog->GetDictionaryOffset(),
614  nFieldsBeforeEndLength);
615  m_aEditBuffer.WriteCharPtr(" ");
616  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
617  m_aEditBuffer.WriteCharPtr(" 0 R");
618  // Length of Fields end -> end of the Catalog dictionary.
619  sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
620  + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
621  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
622  + nFieldsEndOffset,
623  nFieldsAfterEndLength);
624  }
625  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
626  }
627 
628  return true;
629 }
630 
631 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
632 {
633  if (m_pXRefStream)
634  {
635  // Write the xref stream.
636  // This is a bit meta: the xref stream stores its own offset.
637  sal_Int32 nXRefStreamId = m_aXRef.size();
638  XRefEntry aXRefStreamEntry;
639  aXRefStreamEntry.SetOffset(nXRefOffset);
640  aXRefStreamEntry.SetDirty(true);
641  m_aXRef[nXRefStreamId] = aXRefStreamEntry;
642 
643  // Write stream data.
644  SvMemoryStream aXRefStream;
645  const size_t nOffsetLen = 3;
646  // 3 additional bytes: predictor, the first and the third field.
647  const size_t nLineLength = nOffsetLen + 3;
648  // This is the line as it appears before tweaking according to the predictor.
649  std::vector<unsigned char> aOrigLine(nLineLength);
650  // This is the previous line.
651  std::vector<unsigned char> aPrevLine(nLineLength);
652  // This is the line as written to the stream.
653  std::vector<unsigned char> aFilteredLine(nLineLength);
654  for (const auto& rXRef : m_aXRef)
655  {
656  const XRefEntry& rEntry = rXRef.second;
657 
658  if (!rEntry.GetDirty())
659  continue;
660 
661  // Predictor.
662  size_t nPos = 0;
663  // PNG prediction: up (on all rows).
664  aOrigLine[nPos++] = 2;
665 
666  // First field.
667  unsigned char nType = 0;
668  switch (rEntry.GetType())
669  {
670  case XRefEntryType::FREE:
671  nType = 0;
672  break;
673  case XRefEntryType::NOT_COMPRESSED:
674  nType = 1;
675  break;
676  case XRefEntryType::COMPRESSED:
677  nType = 2;
678  break;
679  }
680  aOrigLine[nPos++] = nType;
681 
682  // Second field.
683  for (size_t i = 0; i < nOffsetLen; ++i)
684  {
685  size_t nByte = nOffsetLen - i - 1;
686  // Fields requiring more than one byte are stored with the
687  // high-order byte first.
688  unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
689  aOrigLine[nPos++] = nCh;
690  }
691 
692  // Third field.
693  aOrigLine[nPos++] = 0;
694 
695  // Now apply the predictor.
696  aFilteredLine[0] = aOrigLine[0];
697  for (size_t i = 1; i < nLineLength; ++i)
698  {
699  // Count the delta vs the previous line.
700  aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
701  // Remember the new reference.
702  aPrevLine[i] = aOrigLine[i];
703  }
704 
705  aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
706  }
707 
708  m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
709  m_aEditBuffer.WriteCharPtr(
710  " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
711 
712  // ID.
713  auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
714  if (pID)
715  {
716  const std::vector<PDFElement*>& rElements = pID->GetElements();
717  m_aEditBuffer.WriteCharPtr("/ID [ <");
718  for (size_t i = 0; i < rElements.size(); ++i)
719  {
720  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
721  if (!pIDString)
722  continue;
723 
724  m_aEditBuffer.WriteOString(pIDString->GetValue());
725  if ((i + 1) < rElements.size())
726  m_aEditBuffer.WriteCharPtr("> <");
727  }
728  m_aEditBuffer.WriteCharPtr("> ] ");
729  }
730 
731  // Index.
732  m_aEditBuffer.WriteCharPtr("/Index [ ");
733  for (const auto& rXRef : m_aXRef)
734  {
735  if (!rXRef.second.GetDirty())
736  continue;
737 
738  m_aEditBuffer.WriteUInt32AsString(rXRef.first);
739  m_aEditBuffer.WriteCharPtr(" 1 ");
740  }
741  m_aEditBuffer.WriteCharPtr("] ");
742 
743  // Info.
744  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
745  if (pInfo)
746  {
747  m_aEditBuffer.WriteCharPtr("/Info ");
748  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
749  m_aEditBuffer.WriteCharPtr(" ");
750  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
751  m_aEditBuffer.WriteCharPtr(" R ");
752  }
753 
754  // Length.
755  m_aEditBuffer.WriteCharPtr("/Length ");
756  {
757  ZCodec aZCodec;
758  aZCodec.BeginCompression();
759  aXRefStream.Seek(0);
760  SvMemoryStream aStream;
761  aZCodec.Compress(aXRefStream, aStream);
762  aZCodec.EndCompression();
763  aXRefStream.Seek(0);
764  aXRefStream.SetStreamSize(0);
765  aStream.Seek(0);
766  aXRefStream.WriteStream(aStream);
767  }
768  m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize());
769 
770  if (!m_aStartXRefs.empty())
771  {
772  // Write location of the previous cross-reference section.
773  m_aEditBuffer.WriteCharPtr("/Prev ");
774  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
775  }
776 
777  // Root.
778  m_aEditBuffer.WriteCharPtr("/Root ");
779  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
780  m_aEditBuffer.WriteCharPtr(" ");
781  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
782  m_aEditBuffer.WriteCharPtr(" R ");
783 
784  // Size.
785  m_aEditBuffer.WriteCharPtr("/Size ");
786  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
787 
788  m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
789  aXRefStream.Seek(0);
790  m_aEditBuffer.WriteStream(aXRefStream);
791  m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
792  }
793  else
794  {
795  // Write the xref table.
796  m_aEditBuffer.WriteCharPtr("xref\n");
797  for (const auto& rXRef : m_aXRef)
798  {
799  size_t nObject = rXRef.first;
800  size_t nOffset = rXRef.second.GetOffset();
801  if (!rXRef.second.GetDirty())
802  continue;
803 
804  m_aEditBuffer.WriteUInt32AsString(nObject);
805  m_aEditBuffer.WriteCharPtr(" 1\n");
806  OStringBuffer aBuffer;
807  aBuffer.append(static_cast<sal_Int32>(nOffset));
808  while (aBuffer.getLength() < 10)
809  aBuffer.insert(0, "0");
810  if (nObject == 0)
811  aBuffer.append(" 65535 f \n");
812  else
813  aBuffer.append(" 00000 n \n");
814  m_aEditBuffer.WriteOString(aBuffer.toString());
815  }
816 
817  // Write the trailer.
818  m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
819  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
820  m_aEditBuffer.WriteCharPtr("/Root ");
821  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
822  m_aEditBuffer.WriteCharPtr(" ");
823  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
824  m_aEditBuffer.WriteCharPtr(" R\n");
825  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
826  if (pInfo)
827  {
828  m_aEditBuffer.WriteCharPtr("/Info ");
829  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
830  m_aEditBuffer.WriteCharPtr(" ");
831  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
832  m_aEditBuffer.WriteCharPtr(" R\n");
833  }
834  auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
835  if (pID)
836  {
837  const std::vector<PDFElement*>& rElements = pID->GetElements();
838  m_aEditBuffer.WriteCharPtr("/ID [ <");
839  for (size_t i = 0; i < rElements.size(); ++i)
840  {
841  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
842  if (!pIDString)
843  continue;
844 
845  m_aEditBuffer.WriteOString(pIDString->GetValue());
846  if ((i + 1) < rElements.size())
847  m_aEditBuffer.WriteCharPtr(">\n<");
848  }
849  m_aEditBuffer.WriteCharPtr("> ]\n");
850  }
851 
852  if (!m_aStartXRefs.empty())
853  {
854  // Write location of the previous cross-reference section.
855  m_aEditBuffer.WriteCharPtr("/Prev ");
856  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
857  }
858 
859  m_aEditBuffer.WriteCharPtr(">>\n");
860  }
861 }
862 
863 bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
864  const OUString& rDescription, bool bAdES)
865 {
866  m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
867  m_aEditBuffer.WriteCharPtr("\n");
868 
869  sal_uInt64 nSignatureLastByteRangeOffset = 0;
870  sal_Int64 nSignatureContentOffset = 0;
871  sal_Int32 nSignatureId = WriteSignatureObject(
872  rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
873 
874  tools::Rectangle aSignatureRectangle;
875  sal_Int32 nAppearanceId = WriteAppearanceObject(aSignatureRectangle);
876 
877  std::vector<PDFObjectElement*> aPages = GetPages();
878  if (aPages.empty())
879  {
880  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
881  return false;
882  }
883 
884  size_t nPage = 0;
885  if (m_nSignaturePage < aPages.size())
886  {
887  nPage = m_nSignaturePage;
888  }
889  if (!aPages[nPage])
890  {
891  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage);
892  return false;
893  }
894 
895  PDFObjectElement& rPage = *aPages[nPage];
896  sal_Int32 nAnnotId = WriteAnnotObject(rPage, nSignatureId, nAppearanceId, aSignatureRectangle);
897 
898  if (!WritePageObject(rPage, nAnnotId))
899  {
900  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
901  return false;
902  }
903 
904  PDFReferenceElement* pRoot = nullptr;
905  if (!WriteCatalogObject(nAnnotId, pRoot))
906  {
907  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
908  return false;
909  }
910 
911  sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
912  WriteXRef(nXRefOffset, pRoot);
913 
914  // Write startxref.
915  m_aEditBuffer.WriteCharPtr("startxref\n");
916  m_aEditBuffer.WriteUInt32AsString(nXRefOffset);
917  m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
918 
919  // Finalize the signature, now that we know the total file size.
920  // Calculate the length of the last byte range.
921  sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
922  sal_Int64 nLastByteRangeLength
923  = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
924  // Write the length to the buffer.
925  m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
926  OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
927  m_aEditBuffer.WriteOString(aByteRangeBuffer);
928 
929  // Create the PKCS#7 object.
930  css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
931  if (!aDerEncoded.hasElements())
932  {
933  SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
934  return false;
935  }
936 
937  m_aEditBuffer.Seek(0);
938  sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
939  std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
940  m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
941 
942  m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
943  sal_uInt64 nBufferSize2 = nLastByteRangeLength;
944  std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
945  m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
946 
947  OStringBuffer aCMSHexBuffer;
948  svl::crypto::Signing aSigning(xCertificate);
949  aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
950  aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
951  if (!aSigning.Sign(aCMSHexBuffer))
952  {
953  SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
954  return false;
955  }
956 
957  assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
958 
959  m_aEditBuffer.Seek(nSignatureContentOffset);
960  m_aEditBuffer.WriteOString(aCMSHexBuffer.toString());
961 
962  return true;
963 }
964 
965 bool PDFDocument::Write(SvStream& rStream)
966 {
967  m_aEditBuffer.Seek(0);
968  rStream.WriteStream(m_aEditBuffer);
969  return rStream.good();
970 }
971 
972 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
973  std::vector<std::unique_ptr<PDFElement>>& rElements,
974  PDFObjectElement* pObjectElement)
975 {
976  // Last seen object token.
977  PDFObjectElement* pObject = pObjectElement;
978  PDFNameElement* pObjectKey = nullptr;
979  PDFObjectElement* pObjectStream = nullptr;
980  bool bInXRef = false;
981  // The next number will be an xref offset.
982  bool bInStartXRef = false;
983  // Dictionary depth, so we know when we're outside any dictionaries.
984  int nDepth = 0;
985  // Last seen array token that's outside any dictionaries.
986  PDFArrayElement* pArray = nullptr;
987  // If we're inside an obj/endobj pair.
988  bool bInObject = false;
989 
990  while (true)
991  {
992  char ch;
993  rStream.ReadChar(ch);
994  if (rStream.eof())
995  break;
996 
997  switch (ch)
998  {
999  case '%':
1000  {
1001  auto pComment = new PDFCommentElement(*this);
1002  rElements.push_back(std::unique_ptr<PDFElement>(pComment));
1003  rStream.SeekRel(-1);
1004  if (!rElements.back()->Read(rStream))
1005  {
1006  SAL_WARN("vcl.filter",
1007  "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
1008  return false;
1009  }
1010  if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
1011  && m_aEOFs.back() == rStream.Tell())
1012  {
1013  // Found EOF and partial parsing requested, we're done.
1014  return true;
1015  }
1016  break;
1017  }
1018  case '<':
1019  {
1020  // Dictionary or hex string.
1021  rStream.ReadChar(ch);
1022  rStream.SeekRel(-2);
1023  if (ch == '<')
1024  {
1025  rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
1026  ++nDepth;
1027  }
1028  else
1029  rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
1030  if (!rElements.back()->Read(rStream))
1031  {
1032  SAL_WARN("vcl.filter",
1033  "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
1034  return false;
1035  }
1036  break;
1037  }
1038  case '>':
1039  {
1040  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
1041  --nDepth;
1042  rStream.SeekRel(-1);
1043  if (!rElements.back()->Read(rStream))
1044  {
1045  SAL_WARN("vcl.filter",
1046  "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
1047  return false;
1048  }
1049  break;
1050  }
1051  case '[':
1052  {
1053  auto pArr = new PDFArrayElement(pObject);
1054  rElements.push_back(std::unique_ptr<PDFElement>(pArr));
1055  if (nDepth == 0)
1056  {
1057  // The array is attached directly, inform the object.
1058  pArray = pArr;
1059  if (pObject)
1060  {
1061  pObject->SetArray(pArray);
1062  pObject->SetArrayOffset(rStream.Tell());
1063  }
1064  }
1065  ++nDepth;
1066  rStream.SeekRel(-1);
1067  if (!rElements.back()->Read(rStream))
1068  {
1069  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
1070  return false;
1071  }
1072  break;
1073  }
1074  case ']':
1075  {
1076  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
1077  --nDepth;
1078  rStream.SeekRel(-1);
1079  if (nDepth == 0)
1080  {
1081  if (pObject)
1082  {
1083  pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1084  }
1085  }
1086  if (!rElements.back()->Read(rStream))
1087  {
1088  SAL_WARN("vcl.filter",
1089  "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1090  return false;
1091  }
1092  break;
1093  }
1094  case '/':
1095  {
1096  auto pNameElement = new PDFNameElement();
1097  rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1098  rStream.SeekRel(-1);
1099  if (!pNameElement->Read(rStream))
1100  {
1101  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1102  return false;
1103  }
1104 
1105  if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1106  && pNameElement->GetValue() == "ObjStm")
1107  pObjectStream = pObject;
1108  else
1109  pObjectKey = pNameElement;
1110  break;
1111  }
1112  case '(':
1113  {
1114  rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1115  rStream.SeekRel(-1);
1116  if (!rElements.back()->Read(rStream))
1117  {
1118  SAL_WARN("vcl.filter",
1119  "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1120  return false;
1121  }
1122  break;
1123  }
1124  default:
1125  {
1126  if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-' || ch == '+'
1127  || ch == '.')
1128  {
1129  // Numbering object: an integer or a real.
1130  auto pNumberElement = new PDFNumberElement();
1131  rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1132  rStream.SeekRel(-1);
1133  if (!pNumberElement->Read(rStream))
1134  {
1135  SAL_WARN("vcl.filter",
1136  "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1137  return false;
1138  }
1139  if (bInStartXRef)
1140  {
1141  bInStartXRef = false;
1142  m_aStartXRefs.push_back(pNumberElement->GetValue());
1143 
1144  auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1145  if (it != m_aOffsetObjects.end())
1146  m_pXRefStream = it->second;
1147  }
1148  else if (bInObject && !nDepth && pObject)
1149  // Number element inside an object, but outside a
1150  // dictionary / array: remember it.
1151  pObject->SetNumberElement(pNumberElement);
1152  }
1153  else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1154  {
1155  // Possible keyword, like "obj".
1156  rStream.SeekRel(-1);
1157  OString aKeyword = ReadKeyword(rStream);
1158 
1159  bool bObj = aKeyword == "obj";
1160  if (bObj || aKeyword == "R")
1161  {
1162  size_t nElements = rElements.size();
1163  if (nElements < 2)
1164  {
1165  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1166  "tokens before 'obj' or 'R' keyword");
1167  return false;
1168  }
1169 
1170  auto pObjectNumber
1171  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1172  auto pGenerationNumber
1173  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1174  if (!pObjectNumber || !pGenerationNumber)
1175  {
1176  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1177  "generation number before 'obj' or 'R' keyword");
1178  return false;
1179  }
1180 
1181  if (bObj)
1182  {
1183  pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1184  pGenerationNumber->GetValue());
1185  rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1186  m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1187  m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1188  bInObject = true;
1189  }
1190  else
1191  {
1192  auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1193  *pGenerationNumber);
1194  rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1195  if (bInObject && nDepth > 0 && pObject)
1196  // Inform the object about a new in-dictionary reference.
1197  pObject->AddDictionaryReference(pReference);
1198  }
1199  if (!rElements.back()->Read(rStream))
1200  {
1201  SAL_WARN("vcl.filter",
1202  "PDFDocument::Tokenize: PDFElement::Read() failed");
1203  return false;
1204  }
1205  }
1206  else if (aKeyword == "stream")
1207  {
1208  // Look up the length of the stream from the parent object's dictionary.
1209  size_t nLength = 0;
1210  for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1211  {
1212  // Iterate in reverse order.
1213  size_t nIndex = rElements.size() - nElement - 1;
1214  PDFElement* pElement = rElements[nIndex].get();
1215  auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1216  if (!pObj)
1217  continue;
1218 
1219  PDFElement* pLookup = pObj->Lookup("Length");
1220  auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1221  if (pReference)
1222  {
1223  // Length is provided as a reference.
1224  nLength = pReference->LookupNumber(rStream);
1225  break;
1226  }
1227 
1228  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1229  if (pNumber)
1230  {
1231  // Length is provided directly.
1232  nLength = pNumber->GetValue();
1233  break;
1234  }
1235 
1236  SAL_WARN(
1237  "vcl.filter",
1238  "PDFDocument::Tokenize: found no Length key for stream keyword");
1239  return false;
1240  }
1241 
1242  PDFDocument::SkipLineBreaks(rStream);
1243  auto pStreamElement = new PDFStreamElement(nLength);
1244  if (pObject)
1245  pObject->SetStream(pStreamElement);
1246  rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1247  if (!rElements.back()->Read(rStream))
1248  {
1249  SAL_WARN("vcl.filter",
1250  "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1251  return false;
1252  }
1253  }
1254  else if (aKeyword == "endstream")
1255  {
1256  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1257  if (!rElements.back()->Read(rStream))
1258  {
1259  SAL_WARN("vcl.filter",
1260  "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1261  return false;
1262  }
1263  }
1264  else if (aKeyword == "endobj")
1265  {
1266  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1267  if (!rElements.back()->Read(rStream))
1268  {
1269  SAL_WARN("vcl.filter",
1270  "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1271  return false;
1272  }
1273  if (eMode == TokenizeMode::END_OF_OBJECT)
1274  {
1275  // Found endobj and only object parsing was requested, we're done.
1276  return true;
1277  }
1278 
1279  if (pObjectStream)
1280  {
1281  // We're at the end of an object stream, parse the stored objects.
1282  pObjectStream->ParseStoredObjects();
1283  pObjectStream = nullptr;
1284  pObjectKey = nullptr;
1285  }
1286  bInObject = false;
1287  }
1288  else if (aKeyword == "true" || aKeyword == "false")
1289  rElements.push_back(std::unique_ptr<PDFElement>(
1290  new PDFBooleanElement(aKeyword.toBoolean())));
1291  else if (aKeyword == "null")
1292  rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1293  else if (aKeyword == "xref")
1294  // Allow 'f' and 'n' keywords.
1295  bInXRef = true;
1296  else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1297  {
1298  }
1299  else if (aKeyword == "trailer")
1300  {
1301  auto pTrailer = new PDFTrailerElement(*this);
1302 
1303  // Make it possible to find this trailer later by offset.
1304  pTrailer->Read(rStream);
1305  m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1306 
1307  // When reading till the first EOF token only, remember
1308  // just the first trailer token.
1309  if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1310  m_pTrailer = pTrailer;
1311  rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1312  }
1313  else if (aKeyword == "startxref")
1314  {
1315  bInStartXRef = true;
1316  }
1317  else
1318  {
1319  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1320  << aKeyword << "' keyword at byte position "
1321  << rStream.Tell());
1322  return false;
1323  }
1324  }
1325  else
1326  {
1327  auto uChar = static_cast<unsigned char>(ch);
1328  // Be more lenient and allow unexpected null char
1329  if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
1330  {
1331  SAL_WARN("vcl.filter",
1332  "PDFDocument::Tokenize: unexpected character with code "
1333  << sal_Int32(ch) << " at byte position " << rStream.Tell());
1334  return false;
1335  }
1336  SAL_WARN_IF(uChar == 0, "vcl.filter",
1337  "PDFDocument::Tokenize: unexpected null character at "
1338  << rStream.Tell() << " - ignoring");
1339  }
1340  break;
1341  }
1342  }
1343  }
1344 
1345  return true;
1346 }
1347 
1348 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1349 {
1350  m_aIDObjects[nID] = pObject;
1351 }
1352 
1353 bool PDFDocument::Read(SvStream& rStream)
1354 {
1355  // Check file magic.
1356  std::vector<sal_Int8> aHeader(5);
1357  rStream.Seek(0);
1358  rStream.ReadBytes(aHeader.data(), aHeader.size());
1359  if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1360  || aHeader[4] != '-')
1361  {
1362  SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1363  return false;
1364  }
1365 
1366  // Allow later editing of the contents in-memory.
1367  rStream.Seek(0);
1368  m_aEditBuffer.WriteStream(rStream);
1369 
1370  // Look up the offset of the xref table.
1371  size_t nStartXRef = FindStartXRef(rStream);
1372  SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1373  if (nStartXRef == 0)
1374  {
1375  SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1376  return false;
1377  }
1378  while (true)
1379  {
1380  rStream.Seek(nStartXRef);
1381  OString aKeyword = ReadKeyword(rStream);
1382  if (aKeyword.isEmpty())
1383  ReadXRefStream(rStream);
1384 
1385  else
1386  {
1387  if (aKeyword != "xref")
1388  {
1389  SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1390  return false;
1391  }
1392  ReadXRef(rStream);
1393  if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1394  {
1395  SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1396  return false;
1397  }
1398  }
1399 
1400  PDFNumberElement* pPrev = nullptr;
1401  if (m_pTrailer)
1402  {
1403  pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1404 
1405  // Remember the offset of this trailer in the correct order. It's
1406  // possible that newer trailers don't have a larger offset.
1407  m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1408  }
1409  else if (m_pXRefStream)
1410  pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1411  if (pPrev)
1412  nStartXRef = pPrev->GetValue();
1413 
1414  // Reset state, except the edit buffer.
1415  m_aElements.clear();
1416  m_aOffsetObjects.clear();
1417  m_aIDObjects.clear();
1418  m_aStartXRefs.clear();
1419  m_aEOFs.clear();
1420  m_pTrailer = nullptr;
1421  m_pXRefStream = nullptr;
1422  if (!pPrev)
1423  break;
1424  }
1425 
1426  // Then we can tokenize the stream.
1427  rStream.Seek(0);
1428  return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1429 }
1430 
1431 OString PDFDocument::ReadKeyword(SvStream& rStream)
1432 {
1433  OStringBuffer aBuf;
1434  char ch;
1435  rStream.ReadChar(ch);
1436  if (rStream.eof())
1437  return OString();
1438  while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1439  {
1440  aBuf.append(ch);
1441  rStream.ReadChar(ch);
1442  if (rStream.eof())
1443  return aBuf.toString();
1444  }
1445  rStream.SeekRel(-1);
1446  return aBuf.toString();
1447 }
1448 
1449 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1450 {
1451  // Find the "startxref" token, somewhere near the end of the document.
1452  std::vector<char> aBuf(1024);
1453  rStream.Seek(STREAM_SEEK_TO_END);
1454  if (rStream.Tell() > aBuf.size())
1455  rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1456  else
1457  // The document is really short, then just read it from the start.
1458  rStream.Seek(0);
1459  size_t nBeforePeek = rStream.Tell();
1460  size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1461  rStream.Seek(nBeforePeek);
1462  if (nSize != aBuf.size())
1463  aBuf.resize(nSize);
1464  OString aPrefix("startxref");
1465  // Find the last startxref at the end of the document.
1466  auto itLastValid = aBuf.end();
1467  auto it = aBuf.begin();
1468  while (true)
1469  {
1470  it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1471  if (it == aBuf.end())
1472  break;
1473 
1474  itLastValid = it;
1475  ++it;
1476  }
1477  if (itLastValid == aBuf.end())
1478  {
1479  SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1480  return 0;
1481  }
1482 
1483  rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1484  if (rStream.eof())
1485  {
1486  SAL_WARN("vcl.filter",
1487  "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1488  return 0;
1489  }
1490 
1491  PDFDocument::SkipWhitespace(rStream);
1492  PDFNumberElement aNumber;
1493  if (!aNumber.Read(rStream))
1494  return 0;
1495  return aNumber.GetValue();
1496 }
1497 
1498 void PDFDocument::ReadXRefStream(SvStream& rStream)
1499 {
1500  // Look up the stream length in the object dictionary.
1501  if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1502  {
1503  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1504  return;
1505  }
1506 
1507  if (m_aElements.empty())
1508  {
1509  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1510  return;
1511  }
1512 
1513  PDFObjectElement* pObject = nullptr;
1514  for (const auto& pElement : m_aElements)
1515  {
1516  if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1517  {
1518  pObject = pObj;
1519  break;
1520  }
1521  }
1522  if (!pObject)
1523  {
1524  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1525  return;
1526  }
1527 
1528  // So that the Prev key can be looked up later.
1529  m_pXRefStream = pObject;
1530 
1531  PDFElement* pLookup = pObject->Lookup("Length");
1532  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1533  if (!pNumber)
1534  {
1535  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1536  return;
1537  }
1538  sal_uInt64 nLength = pNumber->GetValue();
1539 
1540  // Look up the stream offset.
1541  PDFStreamElement* pStream = nullptr;
1542  for (const auto& pElement : m_aElements)
1543  {
1544  if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1545  {
1546  pStream = pS;
1547  break;
1548  }
1549  }
1550  if (!pStream)
1551  {
1552  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1553  return;
1554  }
1555 
1556  // Read and decompress it.
1557  rStream.Seek(pStream->GetOffset());
1558  std::vector<char> aBuf(nLength);
1559  rStream.ReadBytes(aBuf.data(), aBuf.size());
1560 
1561  auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1562  if (!pFilter)
1563  {
1564  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1565  return;
1566  }
1567 
1568  if (pFilter->GetValue() != "FlateDecode")
1569  {
1570  SAL_WARN("vcl.filter",
1571  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1572  return;
1573  }
1574 
1575  int nColumns = 1;
1576  int nPredictor = 1;
1577  if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1578  {
1579  const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1580  auto it = rItems.find("Columns");
1581  if (it != rItems.end())
1582  if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1583  nColumns = pColumns->GetValue();
1584  it = rItems.find("Predictor");
1585  if (it != rItems.end())
1586  if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1587  nPredictor = pPredictor->GetValue();
1588  }
1589 
1590  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1591  SvMemoryStream aStream;
1592  ZCodec aZCodec;
1593  aZCodec.BeginCompression();
1594  aZCodec.Decompress(aSource, aStream);
1595  if (!aZCodec.EndCompression())
1596  {
1597  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1598  return;
1599  }
1600 
1601  // Look up the first and the last entry we need to read.
1602  auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1603  std::vector<size_t> aFirstObjects;
1604  std::vector<size_t> aNumberOfObjects;
1605  if (!pIndex)
1606  {
1607  auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1608  if (pSize)
1609  {
1610  aFirstObjects.push_back(0);
1611  aNumberOfObjects.push_back(pSize->GetValue());
1612  }
1613  else
1614  {
1615  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1616  return;
1617  }
1618  }
1619  else
1620  {
1621  const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1622  size_t nFirstObject = 0;
1623  for (size_t i = 0; i < rIndexElements.size(); ++i)
1624  {
1625  if (i % 2 == 0)
1626  {
1627  auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1628  if (!pFirstObject)
1629  {
1630  SAL_WARN("vcl.filter",
1631  "PDFDocument::ReadXRefStream: Index has no first object");
1632  return;
1633  }
1634  nFirstObject = pFirstObject->GetValue();
1635  continue;
1636  }
1637 
1638  auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1639  if (!pNumberOfObjects)
1640  {
1641  SAL_WARN("vcl.filter",
1642  "PDFDocument::ReadXRefStream: Index has no number of objects");
1643  return;
1644  }
1645  aFirstObjects.push_back(nFirstObject);
1646  aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1647  }
1648  }
1649 
1650  // Look up the format of a single entry.
1651  const int nWSize = 3;
1652  auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1653  if (!pW || pW->GetElements().size() < nWSize)
1654  {
1655  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1656  return;
1657  }
1658  int aW[nWSize];
1659  // First character is the (kind of) repeated predictor.
1660  int nLineLength = 1;
1661  for (size_t i = 0; i < nWSize; ++i)
1662  {
1663  auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1664  if (!pI)
1665  {
1666  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1667  return;
1668  }
1669  aW[i] = pI->GetValue();
1670  nLineLength += aW[i];
1671  }
1672 
1673  if (nPredictor > 1 && nLineLength - 1 != nColumns)
1674  {
1675  SAL_WARN("vcl.filter",
1676  "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1677  return;
1678  }
1679 
1680  aStream.Seek(0);
1681  for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1682  {
1683  size_t nFirstObject = aFirstObjects[nSubSection];
1684  size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1685 
1686  // This is the line as read from the stream.
1687  std::vector<unsigned char> aOrigLine(nLineLength);
1688  // This is the line as it appears after tweaking according to nPredictor.
1689  std::vector<unsigned char> aFilteredLine(nLineLength);
1690  for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1691  {
1692  size_t nIndex = nFirstObject + nEntry;
1693 
1694  aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1695  if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1696  {
1697  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1698  "inconsistent with /DecodeParms/Predictor for object #"
1699  << nIndex);
1700  return;
1701  }
1702 
1703  for (int i = 0; i < nLineLength; ++i)
1704  {
1705  switch (nPredictor)
1706  {
1707  case 1:
1708  // No prediction.
1709  break;
1710  case 12:
1711  // PNG prediction: up (on all rows).
1712  aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1713  break;
1714  default:
1715  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1716  << nPredictor);
1717  return;
1718  }
1719  }
1720 
1721  // First character is already handled above.
1722  int nPos = 1;
1723  size_t nType = 0;
1724  // Start of the current field in the stream data.
1725  int nOffset = nPos;
1726  for (; nPos < nOffset + aW[0]; ++nPos)
1727  {
1728  unsigned char nCh = aFilteredLine[nPos];
1729  nType = (nType << 8) + nCh;
1730  }
1731 
1732  // Start of the object in the file stream.
1733  size_t nStreamOffset = 0;
1734  nOffset = nPos;
1735  for (; nPos < nOffset + aW[1]; ++nPos)
1736  {
1737  unsigned char nCh = aFilteredLine[nPos];
1738  nStreamOffset = (nStreamOffset << 8) + nCh;
1739  }
1740 
1741  // Generation number of the object.
1742  size_t nGenerationNumber = 0;
1743  nOffset = nPos;
1744  for (; nPos < nOffset + aW[2]; ++nPos)
1745  {
1746  unsigned char nCh = aFilteredLine[nPos];
1747  nGenerationNumber = (nGenerationNumber << 8) + nCh;
1748  }
1749 
1750  // Ignore invalid nType.
1751  if (nType <= 2)
1752  {
1753  if (m_aXRef.find(nIndex) == m_aXRef.end())
1754  {
1755  XRefEntry aEntry;
1756  switch (nType)
1757  {
1758  case 0:
1759  aEntry.SetType(XRefEntryType::FREE);
1760  break;
1761  case 1:
1762  aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
1763  break;
1764  case 2:
1765  aEntry.SetType(XRefEntryType::COMPRESSED);
1766  break;
1767  }
1768  aEntry.SetOffset(nStreamOffset);
1769  m_aXRef[nIndex] = aEntry;
1770  }
1771  }
1772  }
1773  }
1774 }
1775 
1776 void PDFDocument::ReadXRef(SvStream& rStream)
1777 {
1778  PDFDocument::SkipWhitespace(rStream);
1779 
1780  while (true)
1781  {
1782  PDFNumberElement aFirstObject;
1783  if (!aFirstObject.Read(rStream))
1784  {
1785  // Next token is not a number, it'll be the trailer.
1786  return;
1787  }
1788 
1789  if (aFirstObject.GetValue() < 0)
1790  {
1791  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1792  return;
1793  }
1794 
1795  PDFDocument::SkipWhitespace(rStream);
1796  PDFNumberElement aNumberOfEntries;
1797  if (!aNumberOfEntries.Read(rStream))
1798  {
1799  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1800  return;
1801  }
1802 
1803  if (aNumberOfEntries.GetValue() < 0)
1804  {
1805  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1806  return;
1807  }
1808 
1809  size_t nSize = aNumberOfEntries.GetValue();
1810  for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1811  {
1812  size_t nIndex = aFirstObject.GetValue() + nEntry;
1813  PDFDocument::SkipWhitespace(rStream);
1814  PDFNumberElement aOffset;
1815  if (!aOffset.Read(rStream))
1816  {
1817  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1818  return;
1819  }
1820 
1821  PDFDocument::SkipWhitespace(rStream);
1822  PDFNumberElement aGenerationNumber;
1823  if (!aGenerationNumber.Read(rStream))
1824  {
1825  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1826  return;
1827  }
1828 
1829  PDFDocument::SkipWhitespace(rStream);
1830  OString aKeyword = ReadKeyword(rStream);
1831  if (aKeyword != "f" && aKeyword != "n")
1832  {
1833  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1834  return;
1835  }
1836  // xrefs are read in reverse order, so never update an existing
1837  // offset with an older one.
1838  if (m_aXRef.find(nIndex) == m_aXRef.end())
1839  {
1840  XRefEntry aEntry;
1841  aEntry.SetOffset(aOffset.GetValue());
1842  // Initially only the first entry is dirty.
1843  if (nIndex == 0)
1844  aEntry.SetDirty(true);
1845  m_aXRef[nIndex] = aEntry;
1846  }
1847  PDFDocument::SkipWhitespace(rStream);
1848  }
1849  }
1850 }
1851 
1852 void PDFDocument::SkipWhitespace(SvStream& rStream)
1853 {
1854  char ch = 0;
1855 
1856  while (true)
1857  {
1858  rStream.ReadChar(ch);
1859  if (rStream.eof())
1860  break;
1861 
1862  if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1863  {
1864  rStream.SeekRel(-1);
1865  return;
1866  }
1867  }
1868 }
1869 
1870 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1871 {
1872  char ch = 0;
1873 
1874  while (true)
1875  {
1876  rStream.ReadChar(ch);
1877  if (rStream.eof())
1878  break;
1879 
1880  if (ch != '\n' && ch != '\r')
1881  {
1882  rStream.SeekRel(-1);
1883  return;
1884  }
1885  }
1886 }
1887 
1888 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1889 {
1890  auto it = m_aXRef.find(nIndex);
1891  if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1892  {
1893  SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1894  << nIndex << ", but failed");
1895  return 0;
1896  }
1897 
1898  return it->second.GetOffset();
1899 }
1900 
1901 const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1902 {
1903  return m_aElements;
1904 }
1905 
1907 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1908 {
1909  auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1910  if (!pKids)
1911  {
1912  SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1913  return;
1914  }
1915 
1916  pPages->setVisiting(true);
1917 
1918  for (const auto& pKid : pKids->GetElements())
1919  {
1920  auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1921  if (!pReference)
1922  continue;
1923 
1924  PDFObjectElement* pKidObject = pReference->LookupObject();
1925  if (!pKidObject)
1926  continue;
1927 
1928  // detect if visiting reenters itself
1929  if (pKidObject->alreadyVisiting())
1930  {
1931  SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1932  continue;
1933  }
1934 
1935  auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1936  if (pName && pName->GetValue() == "Pages")
1937  // Pages inside pages: recurse.
1938  visitPages(pKidObject, rRet);
1939  else
1940  // Found an actual page.
1941  rRet.push_back(pKidObject);
1942  }
1943 
1944  pPages->setVisiting(false);
1945 }
1946 
1947 PDFObjectElement* PDFDocument::GetCatalog()
1948 {
1949  PDFReferenceElement* pRoot = nullptr;
1950 
1951  PDFTrailerElement* pTrailer = nullptr;
1952  if (!m_aTrailerOffsets.empty())
1953  {
1954  // Get access to the latest trailer, and work with the keys of that
1955  // one.
1956  auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1957  if (it != m_aOffsetTrailers.end())
1958  pTrailer = it->second;
1959  }
1960 
1961  if (pTrailer)
1962  pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1963  else if (m_pXRefStream)
1964  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1965 
1966  if (!pRoot)
1967  {
1968  SAL_WARN("vcl.filter", "PDFDocument::GetCatalog: trailer has no Root key");
1969  return nullptr;
1970  }
1971 
1972  return pRoot->LookupObject();
1973 }
1974 
1975 std::vector<PDFObjectElement*> PDFDocument::GetPages()
1976 {
1977  std::vector<PDFObjectElement*> aRet;
1978 
1979  PDFObjectElement* pCatalog = GetCatalog();
1980  if (!pCatalog)
1981  {
1982  SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1983  return aRet;
1984  }
1985 
1986  PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1987  if (!pPages)
1988  {
1989  SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
1990  << ") has no pages");
1991  return aRet;
1992  }
1993 
1994  visitPages(pPages, aRet);
1995 
1996  return aRet;
1997 }
1998 
1999 void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
2000 
2001 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
2002 {
2003  std::vector<PDFObjectElement*> aRet;
2004 
2005  std::vector<PDFObjectElement*> aPages = GetPages();
2006 
2007  for (const auto& pPage : aPages)
2008  {
2009  if (!pPage)
2010  continue;
2011 
2012  PDFElement* pAnnotsElement = pPage->Lookup("Annots");
2013  auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
2014  if (!pAnnots)
2015  {
2016  // Annots is not an array, see if it's a reference to an object
2017  // with a direct array.
2018  auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
2019  if (pAnnotsRef)
2020  {
2021  if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
2022  {
2023  pAnnots = pAnnotsObject->GetArray();
2024  }
2025  }
2026  }
2027 
2028  if (!pAnnots)
2029  continue;
2030 
2031  for (const auto& pAnnot : pAnnots->GetElements())
2032  {
2033  auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
2034  if (!pReference)
2035  continue;
2036 
2037  PDFObjectElement* pAnnotObject = pReference->LookupObject();
2038  if (!pAnnotObject)
2039  continue;
2040 
2041  auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
2042  if (!pFT || pFT->GetValue() != "Sig")
2043  continue;
2044 
2045  aRet.push_back(pAnnotObject);
2046  }
2047  }
2048 
2049  return aRet;
2050 }
2051 
2052 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
2053 {
2054  return svl::crypto::DecodeHexString(pElement->GetValue());
2055 }
2056 
2057 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
2058  : m_rDoc(rDoc)
2059 {
2060 }
2061 
2063 {
2064  // Read from (including) the % char till (excluding) the end of the line/stream.
2065  OStringBuffer aBuf;
2066  char ch;
2067  rStream.ReadChar(ch);
2068  while (true)
2069  {
2070  if (ch == '\n' || ch == '\r' || rStream.eof())
2071  {
2072  m_aComment = aBuf.makeStringAndClear();
2073 
2074  if (m_aComment.startsWith("%%EOF"))
2075  {
2076  sal_uInt64 nPos = rStream.Tell();
2077  if (ch == '\r')
2078  {
2079  // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
2080  // behavior.
2081  nPos += 1;
2082  }
2083  m_rDoc.PushBackEOF(nPos);
2084  }
2085 
2086  SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
2087  return true;
2088  }
2089  aBuf.append(ch);
2090  rStream.ReadChar(ch);
2091  }
2092 
2093  return false;
2094 }
2095 
2097 
2099 {
2100  OStringBuffer aBuf;
2101  m_nOffset = rStream.Tell();
2102  char ch;
2103  rStream.ReadChar(ch);
2104  if (rStream.eof())
2105  {
2106  return false;
2107  }
2108  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+' && ch != '.')
2109  {
2110  rStream.SeekRel(-1);
2111  return false;
2112  }
2113  while (!rStream.eof())
2114  {
2115  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+'
2116  && ch != '.')
2117  {
2118  rStream.SeekRel(-1);
2119  m_nLength = rStream.Tell() - m_nOffset;
2120  m_fValue = aBuf.makeStringAndClear().toDouble();
2121  SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2122  return true;
2123  }
2124  aBuf.append(ch);
2125  rStream.ReadChar(ch);
2126  }
2127 
2128  return false;
2129 }
2130 
2131 sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2132 
2133 sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2134 
2135 bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2136 
2137 bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2138 
2140 {
2141  char ch;
2142  rStream.ReadChar(ch);
2143  if (ch != '<')
2144  {
2145  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2146  return false;
2147  }
2148  rStream.ReadChar(ch);
2149 
2150  OStringBuffer aBuf;
2151  while (!rStream.eof())
2152  {
2153  if (ch == '>')
2154  {
2155  m_aValue = aBuf.makeStringAndClear();
2156  SAL_INFO("vcl.filter",
2157  "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2158  return true;
2159  }
2160  aBuf.append(ch);
2161  rStream.ReadChar(ch);
2162  }
2163 
2164  return false;
2165 }
2166 
2167 const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2168 
2170 {
2171  char nPrevCh = 0;
2172  char ch = 0;
2173  rStream.ReadChar(ch);
2174  if (ch != '(')
2175  {
2176  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2177  return false;
2178  }
2179  nPrevCh = ch;
2180  rStream.ReadChar(ch);
2181 
2182  // Start with 1 nesting level as we read a '(' above already.
2183  int nDepth = 1;
2184  OStringBuffer aBuf;
2185  while (!rStream.eof())
2186  {
2187  if (ch == '(' && nPrevCh != '\\')
2188  ++nDepth;
2189 
2190  if (ch == ')' && nPrevCh != '\\')
2191  --nDepth;
2192 
2193  if (nDepth == 0)
2194  {
2195  // ')' of the outermost '(' is reached.
2196  m_aValue = aBuf.makeStringAndClear();
2197  SAL_INFO("vcl.filter",
2198  "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2199  return true;
2200  }
2201  aBuf.append(ch);
2202  nPrevCh = ch;
2203  rStream.ReadChar(ch);
2204  }
2205 
2206  return false;
2207 }
2208 
2209 const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2210 
2212  : m_rDoc(rDoc)
2213  , m_pDictionaryElement(nullptr)
2214 {
2215 }
2216 
2218 {
2219  m_nOffset = rStream.Tell();
2220  return true;
2221 }
2222 
2223 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2224 {
2225  if (!m_pDictionaryElement)
2226  {
2227  PDFObjectParser aParser(m_rDoc.GetElements());
2228  aParser.parse(this);
2229  }
2230  if (!m_pDictionaryElement)
2231  return nullptr;
2232  return m_pDictionaryElement->LookupElement(rDictionaryKey);
2233 }
2234 
2235 sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2236 
2237 double PDFNumberElement::GetValue() const { return m_fValue; }
2238 
2239 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2240  : m_rDoc(rDoc)
2241  , m_fObjectValue(fObjectValue)
2242  , m_fGenerationValue(fGenerationValue)
2243  , m_pNumberElement(nullptr)
2244  , m_nDictionaryOffset(0)
2245  , m_nDictionaryLength(0)
2246  , m_pDictionaryElement(nullptr)
2247  , m_nArrayOffset(0)
2248  , m_nArrayLength(0)
2249  , m_pArrayElement(nullptr)
2250  , m_pStreamElement(nullptr)
2251  , m_bParsed(false)
2252 {
2253 }
2254 
2256 {
2257  SAL_INFO("vcl.filter",
2258  "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2259  return true;
2260 }
2261 
2263 
2264 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2265  const OString& rKey)
2266 {
2267  auto it = rDictionary.find(rKey);
2268  if (it == rDictionary.end())
2269  return nullptr;
2270 
2271  return it->second;
2272 }
2273 
2275 {
2276  auto pKey = dynamic_cast<PDFReferenceElement*>(
2277  PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2278  if (!pKey)
2279  {
2280  SAL_WARN("vcl.filter",
2281  "PDFDictionaryElement::LookupObject: no such key with reference value: "
2282  << rDictionaryKey);
2283  return nullptr;
2284  }
2285 
2286  return pKey->LookupObject();
2287 }
2288 
2289 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2290 {
2291  return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2292 }
2293 
2295 {
2296  if (!m_bParsed)
2297  {
2298  if (!m_aElements.empty())
2299  {
2300  // This is a stored object in an object stream.
2301  PDFObjectParser aParser(m_aElements);
2302  aParser.parse(this);
2303  }
2304  else
2305  {
2306  // Normal object: elements are stored as members of the document itself.
2307  PDFObjectParser aParser(m_rDoc.GetElements());
2308  aParser.parse(this);
2309  }
2310  m_bParsed = true;
2311  }
2312 }
2313 
2314 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2315 {
2316  parseIfNecessary();
2317  if (!m_pDictionaryElement)
2318  return nullptr;
2319  return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey);
2320 }
2321 
2322 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2323 {
2324  auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2325  if (!pKey)
2326  {
2327  SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2328  << rDictionaryKey);
2329  return nullptr;
2330  }
2331 
2332  return pKey->LookupObject();
2333 }
2334 
2336 
2337 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2338 {
2339  m_nDictionaryOffset = nDictionaryOffset;
2340 }
2341 
2343 {
2344  parseIfNecessary();
2345  return m_nDictionaryOffset;
2346 }
2347 
2348 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2349 
2350 sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
2351 
2352 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2353 {
2354  m_aDictionaryKeyOffset[rKey] = nOffset;
2355 }
2356 
2357 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2358 {
2359  m_aDictionaryKeyValueLength[rKey] = nLength;
2360 }
2361 
2362 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2363 {
2364  auto it = m_aDictionaryKeyOffset.find(rKey);
2365  if (it == m_aDictionaryKeyOffset.end())
2366  return 0;
2367 
2368  return it->second;
2369 }
2370 
2371 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2372 {
2373  auto it = m_aDictionaryKeyValueLength.find(rKey);
2374  if (it == m_aDictionaryKeyValueLength.end())
2375  return 0;
2376 
2377  return it->second;
2378 }
2379 
2380 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2381 
2382 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2383 {
2384  m_nDictionaryLength = nDictionaryLength;
2385 }
2386 
2388 {
2389  parseIfNecessary();
2390  return m_nDictionaryLength;
2391 }
2392 
2393 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2394 
2395 sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
2396 
2398 {
2399  parseIfNecessary();
2400  return m_pDictionaryElement;
2401 }
2402 
2404 {
2405  m_pDictionaryElement = pDictionaryElement;
2406 }
2407 
2409 {
2410  m_pNumberElement = pNumberElement;
2411 }
2412 
2414 
2415 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2416 {
2417  return m_aDictionaryReferences;
2418 }
2419 
2421 {
2422  m_aDictionaryReferences.push_back(pReference);
2423 }
2424 
2425 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2426 {
2427  parseIfNecessary();
2428  return m_pDictionaryElement->GetItems();
2429 }
2430 
2431 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2432 
2434 {
2435  m_pStreamElement = pStreamElement;
2436 }
2437 
2439 
2441 {
2442  parseIfNecessary();
2443  return m_pArrayElement;
2444 }
2445 
2447 {
2448  if (!m_pStreamElement)
2449  {
2450  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2451  return;
2452  }
2453 
2454  auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2455  if (!pType || pType->GetValue() != "ObjStm")
2456  {
2457  if (!pType)
2458  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2459  else
2460  SAL_WARN("vcl.filter",
2461  "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2462  return;
2463  }
2464 
2465  auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2466  if (!pFilter || pFilter->GetValue() != "FlateDecode")
2467  {
2468  if (!pFilter)
2469  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2470  else
2471  SAL_WARN("vcl.filter",
2472  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2473  return;
2474  }
2475 
2476  auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2477  if (!pFirst)
2478  {
2479  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2480  return;
2481  }
2482 
2483  auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2484  if (!pN)
2485  {
2486  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2487  return;
2488  }
2489  size_t nN = pN->GetValue();
2490 
2491  auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2492  if (!pLength)
2493  {
2494  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2495  return;
2496  }
2497  size_t nLength = pLength->GetValue();
2498 
2499  // Read and decompress it.
2500  SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2501  rEditBuffer.Seek(m_pStreamElement->GetOffset());
2502  std::vector<char> aBuf(nLength);
2503  rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2504  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2505  SvMemoryStream aStream;
2506  ZCodec aZCodec;
2507  aZCodec.BeginCompression();
2508  aZCodec.Decompress(aSource, aStream);
2509  if (!aZCodec.EndCompression())
2510  {
2511  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2512  return;
2513  }
2514 
2515  nLength = aStream.TellEnd();
2516  aStream.Seek(0);
2517  std::vector<size_t> aObjNums;
2518  std::vector<size_t> aOffsets;
2519  std::vector<size_t> aLengths;
2520  // First iterate over and find out the lengths.
2521  for (size_t nObject = 0; nObject < nN; ++nObject)
2522  {
2523  PDFNumberElement aObjNum;
2524  if (!aObjNum.Read(aStream))
2525  {
2526  SAL_WARN("vcl.filter",
2527  "PDFObjectElement::ParseStoredObjects: failed to read object number");
2528  return;
2529  }
2530  aObjNums.push_back(aObjNum.GetValue());
2531 
2532  PDFDocument::SkipWhitespace(aStream);
2533 
2534  PDFNumberElement aByteOffset;
2535  if (!aByteOffset.Read(aStream))
2536  {
2537  SAL_WARN("vcl.filter",
2538  "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2539  return;
2540  }
2541  aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2542 
2543  if (aOffsets.size() > 1)
2544  aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2545  if (nObject + 1 == nN)
2546  aLengths.push_back(nLength - aOffsets.back());
2547 
2548  PDFDocument::SkipWhitespace(aStream);
2549  }
2550 
2551  // Now create streams with the proper length and tokenize the data.
2552  for (size_t nObject = 0; nObject < nN; ++nObject)
2553  {
2554  size_t nObjNum = aObjNums[nObject];
2555  size_t nOffset = aOffsets[nObject];
2556  size_t nLen = aLengths[nObject];
2557 
2558  aStream.Seek(nOffset);
2559  m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2560  PDFObjectElement* pStored = m_aStoredElements.back().get();
2561 
2562  aBuf.clear();
2563  aBuf.resize(nLen);
2564  aStream.ReadBytes(aBuf.data(), aBuf.size());
2565  SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2566 
2567  m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
2568  pStored);
2569  // This is how references know the object is stored inside this object stream.
2570  m_rDoc.SetIDObject(nObjNum, pStored);
2571 
2572  // Store the stream of the object in the object stream for later use.
2573  std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2574  aStoredStream.Seek(0);
2575  pStreamBuffer->WriteStream(aStoredStream);
2576  pStored->SetStreamBuffer(pStreamBuffer);
2577  }
2578 }
2579 
2580 std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2581 {
2582  return m_aElements;
2583 }
2584 
2586 
2587 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2588 {
2589  m_pStreamBuffer = std::move(pStreamBuffer);
2590 }
2591 
2593 
2595  PDFNumberElement const& rGeneration)
2596  : m_rDoc(rDoc)
2597  , m_fObjectValue(rObject.GetValue())
2598  , m_fGenerationValue(rGeneration.GetValue())
2599  , m_rObject(rObject)
2600 {
2601 }
2602 
2604 
2606 {
2607  SAL_INFO("vcl.filter",
2608  "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2609  m_nOffset = rStream.Tell();
2610  return true;
2611 }
2612 
2613 sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2614 
2616 {
2617  size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2618  if (nOffset == 0)
2619  {
2620  SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2621  << m_fObjectValue);
2622  return 0;
2623  }
2624 
2625  sal_uInt64 nOrigPos = rStream.Tell();
2626  comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2627 
2628  rStream.Seek(nOffset);
2629  {
2630  PDFDocument::SkipWhitespace(rStream);
2631  PDFNumberElement aNumber;
2632  bool bRet = aNumber.Read(rStream);
2633  if (!bRet || aNumber.GetValue() != m_fObjectValue)
2634  {
2635  SAL_WARN("vcl.filter",
2636  "PDFReferenceElement::LookupNumber: offset points to not matching object");
2637  return 0;
2638  }
2639  }
2640 
2641  {
2642  PDFDocument::SkipWhitespace(rStream);
2643  PDFNumberElement aNumber;
2644  bool bRet = aNumber.Read(rStream);
2645  if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2646  {
2647  SAL_WARN("vcl.filter",
2648  "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2649  return 0;
2650  }
2651  }
2652 
2653  {
2654  PDFDocument::SkipWhitespace(rStream);
2655  OString aKeyword = PDFDocument::ReadKeyword(rStream);
2656  if (aKeyword != "obj")
2657  {
2658  SAL_WARN("vcl.filter",
2659  "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2660  return 0;
2661  }
2662  }
2663 
2664  PDFDocument::SkipWhitespace(rStream);
2665  PDFNumberElement aNumber;
2666  if (!aNumber.Read(rStream))
2667  {
2668  SAL_WARN("vcl.filter",
2669  "PDFReferenceElement::LookupNumber: failed to read referenced number");
2670  return 0;
2671  }
2672 
2673  return aNumber.GetValue();
2674 }
2675 
2677 {
2679 }
2680 
2682 {
2683  auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2684 
2685  if (itIDObjects != m_aIDObjects.end())
2686  return itIDObjects->second;
2687 
2688  SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2689  return nullptr;
2690 }
2691 
2693 
2695 
2697 
2699 {
2700  char ch;
2701  rStream.ReadChar(ch);
2702  if (ch != '<')
2703  {
2704  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2705  return false;
2706  }
2707 
2708  if (rStream.eof())
2709  {
2710  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2711  return false;
2712  }
2713 
2714  rStream.ReadChar(ch);
2715  if (ch != '<')
2716  {
2717  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2718  return false;
2719  }
2720 
2721  m_nLocation = rStream.Tell();
2722 
2723  SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2724 
2725  return true;
2726 }
2727 
2729 
2731 
2733 {
2734  m_nLocation = rStream.Tell();
2735  char ch;
2736  rStream.ReadChar(ch);
2737  if (ch != '>')
2738  {
2739  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2740  return false;
2741  }
2742 
2743  if (rStream.eof())
2744  {
2745  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2746  return false;
2747  }
2748 
2749  rStream.ReadChar(ch);
2750  if (ch != '>')
2751  {
2752  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2753  return false;
2754  }
2755 
2756  SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2757 
2758  return true;
2759 }
2760 
2761 PDFNameElement::PDFNameElement() = default;
2762 
2764 {
2765  char ch;
2766  rStream.ReadChar(ch);
2767  if (ch != '/')
2768  {
2769  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2770  return false;
2771  }
2772  m_nLocation = rStream.Tell();
2773 
2774  if (rStream.eof())
2775  {
2776  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2777  return false;
2778  }
2779 
2780  // Read till the first white-space.
2781  OStringBuffer aBuf;
2782  rStream.ReadChar(ch);
2783  while (!rStream.eof())
2784  {
2785  if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2786  || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2787  {
2788  rStream.SeekRel(-1);
2789  m_aValue = aBuf.makeStringAndClear();
2790  SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2791  return true;
2792  }
2793  aBuf.append(ch);
2794  rStream.ReadChar(ch);
2795  }
2796 
2797  return false;
2798 }
2799 
2800 const OString& PDFNameElement::GetValue() const { return m_aValue; }
2801 
2802 sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2803 
2805  : m_nLength(nLength)
2806  , m_nOffset(0)
2807 {
2808 }
2809 
2811 {
2812  SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2813  m_nOffset = rStream.Tell();
2814  std::vector<unsigned char> aBytes(m_nLength);
2815  rStream.ReadBytes(aBytes.data(), aBytes.size());
2816  m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2817 
2818  return rStream.good();
2819 }
2820 
2822 
2823 sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2824 
2825 bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2826 
2827 bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2828 
2830  : m_pObject(pObject)
2831 {
2832 }
2833 
2835 {
2836  char ch;
2837  rStream.ReadChar(ch);
2838  if (ch != '[')
2839  {
2840  SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2841  return false;
2842  }
2843 
2844  SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2845 
2846  return true;
2847 }
2848 
2850 {
2851  if (m_pObject)
2852  SAL_INFO("vcl.filter",
2853  "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2854  m_aElements.push_back(pElement);
2855 }
2856 
2857 const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2858 
2860 
2862 {
2863  m_nOffset = rStream.Tell();
2864  char ch;
2865  rStream.ReadChar(ch);
2866  if (ch != ']')
2867  {
2868  SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2869  return false;
2870  }
2871 
2872  SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
2873 
2874  return true;
2875 }
2876 
2877 sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
2878 
2879 // PDFObjectParser
2880 
2881 size_t PDFObjectParser::parse(PDFElement* pParsingElement, size_t nStartIndex, int nCurrentDepth)
2882 {
2883  // The index of last parsed element
2884  size_t nReturnIndex = 0;
2885 
2886  pParsingElement->setParsing(true);
2887 
2888  comphelper::ScopeGuard aGuard([pParsingElement]() { pParsingElement->setParsing(false); });
2889 
2890  // Current object, if root is an object, else nullptr
2891  auto pParsingObject = dynamic_cast<PDFObjectElement*>(pParsingElement);
2892  auto pParsingTrailer = dynamic_cast<PDFTrailerElement*>(pParsingElement);
2893 
2894  // Current dictionary, if root is an dictionary, else nullptr
2895  auto pParsingDictionary = dynamic_cast<PDFDictionaryElement*>(pParsingElement);
2896 
2897  // Current parsing array, if root is an array, else nullptr
2898  auto pParsingArray = dynamic_cast<PDFArrayElement*>(pParsingElement);
2899 
2900  // Find out where the dictionary for this object starts.
2901  size_t nIndex = nStartIndex;
2902  for (size_t i = nStartIndex; i < mrElements.size(); ++i)
2903  {
2904  if (mrElements[i].get() == pParsingElement)
2905  {
2906  nIndex = i;
2907  break;
2908  }
2909  }
2910 
2911  OString aName;
2912  sal_uInt64 nNameOffset = 0;
2913  std::vector<PDFNumberElement*> aNumbers;
2914 
2915  sal_uInt64 nDictionaryOffset = 0;
2916 
2917  // Current depth; 1 is current
2918  int nDepth = 0;
2919 
2920  for (size_t i = nIndex; i < mrElements.size(); ++i)
2921  {
2922  auto* pCurrentElement = mrElements[i].get();
2923 
2924  // Dictionary tokens can be nested, track enter/leave.
2925  if (auto pCurrentDictionary = dynamic_cast<PDFDictionaryElement*>(pCurrentElement))
2926  {
2927  // Handle previously stored number
2928  if (!aNumbers.empty())
2929  {
2930  if (pParsingDictionary)
2931  {
2932  PDFNumberElement* pNumber = aNumbers.back();
2933  sal_uInt64 nLength
2934  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
2935 
2936  pParsingDictionary->insert(aName, pNumber);
2937  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
2938  pParsingDictionary->SetKeyValueLength(aName, nLength);
2939  }
2940  else if (pParsingArray)
2941  {
2942  for (auto& pNumber : aNumbers)
2943  pParsingArray->PushBack(pNumber);
2944  }
2945  else
2946  {
2947  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
2948  }
2949  aName.clear();
2950  aNumbers.clear();
2951  }
2952 
2953  nDepth++;
2954 
2955  if (nDepth == 1) // pParsingDictionary is the current one
2956  {
2957  // First dictionary start, track start offset.
2958  nDictionaryOffset = pCurrentDictionary->GetLocation();
2959 
2960  if (pParsingObject)
2961  {
2962  // Then the toplevel dictionary of the object.
2963  pParsingObject->SetDictionary(pCurrentDictionary);
2964  pParsingObject->SetDictionaryOffset(nDictionaryOffset);
2965  pParsingDictionary = pCurrentDictionary;
2966  }
2967  else if (pParsingTrailer)
2968  {
2969  pParsingTrailer->SetDictionary(pCurrentDictionary);
2970  pParsingDictionary = pCurrentDictionary;
2971  }
2972  }
2973  else if (!pCurrentDictionary->alreadyParsing())
2974  {
2975  if (pParsingArray)
2976  {
2977  pParsingArray->PushBack(pCurrentDictionary);
2978  }
2979  else if (pParsingDictionary)
2980  {
2981  // Dictionary toplevel value.
2982  pParsingDictionary->insert(aName, pCurrentDictionary);
2983  }
2984  else
2985  {
2986  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
2987  }
2988  // Nested dictionary.
2989  const size_t nNextElementIndex = parse(pCurrentDictionary, i, nCurrentDepth + 1);
2990  i = std::max(i, nNextElementIndex - 1);
2991  }
2992  }
2993  else if (auto pCurrentEndDictionary
2994  = dynamic_cast<PDFEndDictionaryElement*>(pCurrentElement))
2995  {
2996  // Handle previously stored number
2997  if (!aNumbers.empty())
2998  {
2999  if (pParsingDictionary)
3000  {
3001  PDFNumberElement* pNumber = aNumbers.back();
3002  sal_uInt64 nLength
3003  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3004 
3005  pParsingDictionary->insert(aName, pNumber);
3006  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3007  pParsingDictionary->SetKeyValueLength(aName, nLength);
3008  }
3009  else if (pParsingArray)
3010  {
3011  for (auto& pNumber : aNumbers)
3012  pParsingArray->PushBack(pNumber);
3013  }
3014  else
3015  {
3016  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3017  }
3018  aName.clear();
3019  aNumbers.clear();
3020  }
3021 
3022  if (pParsingDictionary)
3023  {
3024  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3025  sal_uInt64 nLength = pCurrentEndDictionary->GetLocation() - nNameOffset + 2;
3026  pParsingDictionary->SetKeyValueLength(aName, nLength);
3027  aName.clear();
3028  }
3029 
3030  if (nDepth == 1) // did the parsing ended
3031  {
3032  // Last dictionary end, track length and stop parsing.
3033  if (pParsingObject)
3034  {
3035  sal_uInt64 nDictionaryLength
3036  = pCurrentEndDictionary->GetLocation() - nDictionaryOffset;
3037  pParsingObject->SetDictionaryLength(nDictionaryLength);
3038  }
3039  nReturnIndex = i;
3040  break;
3041  }
3042 
3043  nDepth--;
3044  }
3045  else if (auto pCurrentArray = dynamic_cast<PDFArrayElement*>(pCurrentElement))
3046  {
3047  // Handle previously stored number
3048  if (!aNumbers.empty())
3049  {
3050  if (pParsingDictionary)
3051  {
3052  PDFNumberElement* pNumber = aNumbers.back();
3053 
3054  sal_uInt64 nLength
3055  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3056  pParsingDictionary->insert(aName, pNumber);
3057  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3058  pParsingDictionary->SetKeyValueLength(aName, nLength);
3059  }
3060  else if (pParsingArray)
3061  {
3062  for (auto& pNumber : aNumbers)
3063  pParsingArray->PushBack(pNumber);
3064  }
3065  else
3066  {
3067  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3068  }
3069  aName.clear();
3070  aNumbers.clear();
3071  }
3072 
3073  nDepth++;
3074  if (nDepth == 1) // pParsingDictionary is the current one
3075  {
3076  if (pParsingObject)
3077  {
3078  pParsingObject->SetArray(pCurrentArray);
3079  pParsingArray = pCurrentArray;
3080  }
3081  }
3082  else if (!pCurrentArray->alreadyParsing())
3083  {
3084  if (pParsingArray)
3085  {
3086  // Array is toplevel
3087  pParsingArray->PushBack(pCurrentArray);
3088  }
3089  else if (pParsingDictionary)
3090  {
3091  // Dictionary toplevel value.
3092  pParsingDictionary->insert(aName, pCurrentArray);
3093  }
3094 
3095  const size_t nNextElementIndex = parse(pCurrentArray, i, nCurrentDepth + 1);
3096 
3097  // ensure we go forwards and not endlessly loop
3098  i = std::max(i, nNextElementIndex - 1);
3099  }
3100  }
3101  else if (auto pCurrentEndArray = dynamic_cast<PDFEndArrayElement*>(pCurrentElement))
3102  {
3103  // Handle previously stored number
3104  if (!aNumbers.empty())
3105  {
3106  if (pParsingDictionary)
3107  {
3108  PDFNumberElement* pNumber = aNumbers.back();
3109 
3110  sal_uInt64 nLength
3111  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3112  pParsingDictionary->insert(aName, pNumber);
3113  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3114  pParsingDictionary->SetKeyValueLength(aName, nLength);
3115  }
3116  else if (pParsingArray)
3117  {
3118  for (auto& pNumber : aNumbers)
3119  pParsingArray->PushBack(pNumber);
3120  }
3121  else
3122  {
3123  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3124  }
3125  aName.clear();
3126  aNumbers.clear();
3127  }
3128 
3129  if (nDepth == 1) // did the pParsing ended
3130  {
3131  // Last array end, track length and stop parsing.
3132  nReturnIndex = i;
3133  break;
3134  }
3135  else
3136  {
3137  if (pParsingDictionary)
3138  {
3139  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3140  // Include the ending ']' in the length of the key - (array)value pair length.
3141  sal_uInt64 nLength = pCurrentEndArray->GetOffset() - nNameOffset + 1;
3142  pParsingDictionary->SetKeyValueLength(aName, nLength);
3143  aName.clear();
3144  }
3145  }
3146  nDepth--;
3147  }
3148  else if (auto pCurrentName = dynamic_cast<PDFNameElement*>(pCurrentElement))
3149  {
3150  // Handle previously stored number
3151  if (!aNumbers.empty())
3152  {
3153  if (pParsingDictionary)
3154  {
3155  PDFNumberElement* pNumber = aNumbers.back();
3156 
3157  sal_uInt64 nLength
3158  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3159  pParsingDictionary->insert(aName, pNumber);
3160  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3161  pParsingDictionary->SetKeyValueLength(aName, nLength);
3162  }
3163  else if (pParsingArray)
3164  {
3165  for (auto& pNumber : aNumbers)
3166  pParsingArray->PushBack(pNumber);
3167  }
3168  aName.clear();
3169  aNumbers.clear();
3170  }
3171 
3172  // Now handle name
3173  if (pParsingArray)
3174  {
3175  // if we are in an array, just push the name to array
3176  pParsingArray->PushBack(pCurrentName);
3177  }
3178  else if (pParsingDictionary)
3179  {
3180  // if we are in a dictionary, we need to store the name as a possible key
3181  if (aName.isEmpty())
3182  {
3183  aName = pCurrentName->GetValue();
3184  nNameOffset = pCurrentName->GetLocation();
3185  }
3186  else
3187  {
3188  sal_uInt64 nKeyLength
3189  = pCurrentName->GetLocation() + pCurrentName->GetLength() - nNameOffset;
3190  pParsingDictionary->insert(aName, pCurrentName);
3191  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3192  pParsingDictionary->SetKeyValueLength(aName, nKeyLength);
3193  aName.clear();
3194  }
3195  }
3196  }
3197  else if (auto pReference = dynamic_cast<PDFReferenceElement*>(pCurrentElement))
3198  {
3199  if (pParsingArray)
3200  {
3201  pParsingArray->PushBack(pReference);
3202  }
3203  else if (pParsingDictionary)
3204  {
3205  sal_uInt64 nLength = pReference->GetOffset() - nNameOffset;
3206  pParsingDictionary->insert(aName, pReference);
3207  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3208  pParsingDictionary->SetKeyValueLength(aName, nLength);
3209  aName.clear();
3210  }
3211  else
3212  {
3213  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3214  }
3215  aNumbers.clear();
3216  }
3217  else if (auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(pCurrentElement))
3218  {
3219  if (pParsingArray)
3220  {
3221  pParsingArray->PushBack(pLiteralString);
3222  }
3223  else if (pParsingDictionary)
3224  {
3225  pParsingDictionary->insert(aName, pLiteralString);
3226  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3227  aName.clear();
3228  }
3229  else
3230  {
3231  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3232  }
3233  }
3234  else if (auto pBoolean = dynamic_cast<PDFBooleanElement*>(pCurrentElement))
3235  {
3236  if (pParsingArray)
3237  {
3238  pParsingArray->PushBack(pBoolean);
3239  }
3240  else if (pParsingDictionary)
3241  {
3242  pParsingDictionary->insert(aName, pBoolean);
3243  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3244  aName.clear();
3245  }
3246  else
3247  {
3248  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3249  }
3250  }
3251  else if (auto pHexString = dynamic_cast<PDFHexStringElement*>(pCurrentElement))
3252  {
3253  if (pParsingArray)
3254  {
3255  pParsingArray->PushBack(pHexString);
3256  }
3257  else if (pParsingDictionary)
3258  {
3259  pParsingDictionary->insert(aName, pHexString);
3260  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3261  aName.clear();
3262  }
3263  }
3264  else if (auto pNumberElement = dynamic_cast<PDFNumberElement*>(pCurrentElement))
3265  {
3266  // Just remember this, so that in case it's not a reference parameter,
3267  // we can handle it later.
3268  aNumbers.push_back(pNumberElement);
3269  }
3270  else if (dynamic_cast<PDFEndObjectElement*>(pCurrentElement))
3271  {
3272  // parsing of the object is finished
3273  break;
3274  }
3275  else if (dynamic_cast<PDFObjectElement*>(pCurrentElement)
3276  || dynamic_cast<PDFTrailerElement*>(pCurrentElement))
3277  {
3278  continue;
3279  }
3280  else
3281  {
3282  SAL_INFO("vcl.filter", "Unhandled element while parsing.");
3283  }
3284  }
3285 
3286  return nReturnIndex;
3287 }
3288 
3289 } // namespace vcl
3290 
3291 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
Definition: pdfdocument.hxx:92
sal_uInt64 m_nOffset
Location of the end of the trailer token.
Array object: a list.
bool Read(SvStream &rStream) override
const sal_Int32 m_nLength
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:77
bool SetStreamSize(sal_uInt64 nSize)
sal_Int32 nIndex
PDFObjectElement(PDFDocument &rDoc, double fObjectValue, double fGenerationValue)
Numbering object: an integer or a real.
Boolean object: a 'true' or a 'false'.
PDFElement * Lookup(const OString &rDictionaryKey)
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:80
size_t GetObjectOffset(size_t nIndex) const
sal_uInt64 m_nLocation
Offset after the '/' token.
End of an array: ']'.
static void AppendUnicodeTextString(const OUString &rString, OStringBuffer &rBuffer)
Write rString as a PDF hex string into rBuffer.
void SetStream(PDFStreamElement *pStreamElement)
SvMemoryStream m_aMemory
The byte array itself.
std::string GetValue
const int MAX_SIGNATURE_CONTENT_LENGTH
Definition: pdfdocument.cxx:37
tools::Long getWidth() const
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:86
PDFObjectElement * m_pObject
The object that contains this array.
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
Definition: pdfdocument.hxx:90
double LookupNumber(SvStream &rStream) const
Assuming the reference points to a number object, return its value.
void SetDictionaryLength(sal_uInt64 nDictionaryLength)
PDFTrailerElement(PDFDocument &rDoc)
aBuf
#define STREAM_SEEK_TO_END
sal_uInt64 m_nLocation
Offset before the '>>' token.
sal_uInt64 Seek(sal_uInt64 nPos)
std::vector< unsigned char > DecodeHexString(const OString &rHex)
void SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
tools::Long Decompress(SvStream &rIStm, SvStream &rOStm)
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:71
void PushBack(PDFElement *pElement)
sal_uInt64 GetLocation() const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
Looks up an object which is only referenced in this dictionary.
EmbeddedObjectRef * pObject
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:82
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
void SetArrayOffset(sal_uInt64 nArrayOffset)
SvStream & WriteCharPtr(const char *pBuf)
void SetArrayLength(sal_uInt64 nArrayLength)
size_t parse(PDFElement *pParsingElement, size_t nStartIndex=0, int nCurrentDepth=0)
sal_uInt64 SeekRel(sal_Int64 nPos)
const std::vector< std::unique_ptr< PDFElement > > & mrElements
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:59
SvMemoryStream * GetStreamBuffer() const
void SetStreamBuffer(std::unique_ptr< SvMemoryStream > &pStreamBuffer)
tools::Long getHeight() const
SwDoc & m_rDoc
SvStream & WriteUInt32AsString(sal_uInt32 nUInt32)
css::uno::Any const & rValue
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
Definition: pdfdocument.hxx:94
bool Read(SvStream &rStream) override
A byte range in a PDF file.
Definition: pdfdocument.hxx:50
bool Read(SvStream &rStream) override
bool eof() const
PDFObjectElement * LookupObject(size_t nObjectNumber)
Look up object based on object number, possibly by parsing object streams.
const BorderLinePrimitive2D *pCandidateB assert(pCandidateA)
bool Read(SvStream &rStream) override
In-memory representation of an on-disk PDF document.
TStyleElements m_aElements
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:84
An entry in a cross-reference stream.
PDFReferenceElement(PDFDocument &rDoc, PDFNumberElement &rObject, PDFNumberElement const &rGeneration)
XRefEntryType GetType() const
const OString & GetValue() const
static void visitPages(PDFObjectElement *pPages, std::vector< PDFObjectElement * > &rRet)
Visits the page tree recursively, looking for page objects.
void PushBackEOF(size_t nOffset)
Remember the end location of an EOF token.
bool Read(SvStream &rStream) override
sal_uInt64 GetArrayLength() const
void copyPageResources(filter::PDFObjectElement *pPage, OStringBuffer &rLine)
Copies resources of pPage into rLine.
sal_Int32 nElements
bool Read(SvStream &rStream) override
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:75
void Compress(SvStream &rIStm, SvStream &rOStm)
Copies objects from one PDF file into another one.
void SetType(XRefEntryType eType)
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
void SetKeyValueLength(const OString &rKey, sal_uInt64 nLength)
sal_uInt64 m_nOffset
Input file start location.
void ParseStoredObjects()
Parse objects stored in this object stream.
static OString GetDateTime()
Get current date/time in PDF D:YYYYMMDDHHMMSS form.
bool Read(SvStream &rStream) override
sal_uInt64 GetOffset() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
sal_uInt16 char * pName
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
Definition: pdfdocument.hxx:88
bool GetDirty() const
int i
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:68
bool Read(SvStream &rStream) override
PDFNumberElement & m_rObject
The element providing the object number.
sal_uInt64 m_nLength
Input file token length.
static OString ReadKeyword(SvStream &rStream)
sal_uInt64 GetSize()
bool Read(SvStream &rStream) override
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
End of a dictionary: '>>'.
std::size_t WriteBytes(const void *pData, std::size_t nSize)
void AddDictionaryReference(PDFReferenceElement *pReference)
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
tools::Long EndCompression()
std::vector< PDFObjectElement * > GetPages()
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
A one-liner comment.
sal_uInt64 GetLocation() const
Dictionary object: a set key-value pairs.
bool Read(SvStream &rStream)
Read elements from the start of the stream till its end.
sal_uInt64 GetOffset() const
bool Read(SvStream &rStream) override
std::vector< PDFElement * > m_aElements
bool Tokenize(SvStream &rStream, TokenizeMode eMode, std::vector< std::unique_ptr< PDFElement >> &rElements, PDFObjectElement *pObjectElement)
Tokenize elements from current offset.
bool Read(SvStream &rStream) override
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
Definition: pdfdocument.hxx:97
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
SvStream & WriteStream(SvStream &rStream)
const std::map< OString, PDFElement * > & GetItems() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
const char * pS
sal_uInt64 GetKeyOffset(const OString &rKey) const
void SetIDObject(size_t nID, PDFObjectElement *pObject)
Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID...
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:61
bool Read(SvStream &rStream) override
sal_uInt64 GetKeyValueLength(const OString &rKey) const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
std::size_t ReadBytes(void *pData, std::size_t nSize)
bool Read(SvStream &rStream) override
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
const OString & GetValue() const
std::vector< std::unique_ptr< PDFElement > > & GetStoredElements()
void SetNumberElement(PDFNumberElement *pNumberElement)
sal_uInt64 m_nOffset
Location before the ']' token.
sal_uInt64 GetLength() const
PDFDictionaryElement * GetDictionary()
const std::vector< std::unique_ptr< PDFElement > > & GetElements() const
bool Read(SvStream &rStream) override
void SetDirty(bool bDirty)
PDFObjectElement * LookupObject()
Lookup referenced object, without assuming anything about its contents.
std::unique_ptr< char[]> aBuffer
SvStream & ReadChar(char &rChar)
End of an object: 'endobj' keyword.
PDFElement * LookupElement(const OString &rDictionaryKey)
Looks up an element which is contained in this dictionary.
#define SAL_WARN_IF(condition, area, stream)
SvStream & WriteOString(std::string_view rStr)
void setWidth(tools::Long n)
SvMemoryStream & GetMemory()
Null object: the 'null' singleton.
const std::vector< PDFReferenceElement * > & GetDictionaryReferences() const
#define SAL_INFO(area, stream)
OUString aName
bool Read(SvStream &rStream) override
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
static void SkipWhitespace(SvStream &rStream)
PDFNumberElement * GetNumberElement() const
sal_uInt64 Tell() const
QPRO_FUNC_TYPE nType
const OString & GetValue() const
bool Sign(OStringBuffer &rCMSHexBuffer)
PDFArrayElement * GetArray()
Reference object: something with a unique ID.
const std::vector< PDFElement * > & GetElements() const
End of a stream: 'endstream' keyword.
sal_uInt64 GetLocation() const
bool good() const
PDFDictionaryElement * m_pDictionaryElement
#define SAL_WARN(area, stream)
bool alreadyVisiting() const
Definition: pdfdocument.hxx:60
Literal string: in (asdf) form.
PDFArrayElement(PDFObjectElement *pObject)
bool Read(SvStream &rStream) override
sal_Int32 nLength
Name object: a key string.
void SetOffset(sal_uInt64 nOffset)
The trailer singleton is at the end of the doc.
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
PDFNumberElement & GetObjectElement() const
void AddDataRange(const void *pData, sal_Int32 size)
PDFElement * Lookup(const OString &rDictionaryKey)
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:79
sal_uInt64 GetArrayOffset() const
Stream object: a byte array with a known length.
sal_uInt64 m_nLocation
Offset after the '<<' token.
void setHeight(tools::Long n)
sal_uInt16 nPos
static PDFElement * Lookup(const std::map< OString, PDFElement * > &rDictionary, const OString &rKey)
const void * GetData()
void SetArray(PDFArrayElement *pArrayElement)
bool Read(SvStream &rStream) override
sal_uInt64 m_nOffset
Location after the 'R' token.
void SetKeyOffset(const OString &rKey, sal_uInt64 nOffset)
OStringBuffer & padToLength(OStringBuffer &rBuffer, sal_Int32 nLength, char cFill= '\0')