LibreOffice Module vcl (master)  1
pdfdocument.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
11 
12 #include <map>
13 #include <memory>
14 #include <vector>
15 
16 #include <com/sun/star/uno/Sequence.hxx>
17 #include <com/sun/star/security/XCertificate.hpp>
18 
20 #include <comphelper/string.hxx>
21 #include <rtl/character.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <svl/cryptosign.hxx>
27 #include <tools/zcodec.hxx>
28 #include <vcl/pdfwriter.hxx>
29 #include <o3tl/safeint.hxx>
30 
31 #include <pdf/objectcopier.hxx>
32 
33 using namespace com::sun::star;
34 
35 namespace vcl::filter
36 {
37 XRefEntry::XRefEntry() = default;
38 
39 PDFDocument::PDFDocument() = default;
40 
41 PDFDocument::~PDFDocument() = default;
42 
43 bool PDFDocument::RemoveSignature(size_t nPosition)
44 {
45  std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
46  if (nPosition >= aSignatures.size())
47  {
48  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
49  return false;
50  }
51 
52  if (aSignatures.size() != m_aEOFs.size() - 1)
53  {
54  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
55  "and incremental updates");
56  return false;
57  }
58 
59  // The EOF offset is the end of the original file, without the signature at
60  // nPosition.
61  m_aEditBuffer.Seek(m_aEOFs[nPosition]);
62  // Drop all bytes after the current position.
63  m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
64 
65  return m_aEditBuffer.good();
66 }
67 
68 sal_Int32 PDFDocument::createObject()
69 {
70  sal_Int32 nObject = m_aXRef.size();
71  m_aXRef[nObject] = XRefEntry();
72  return nObject;
73 }
74 
75 bool PDFDocument::updateObject(sal_Int32 nObject)
76 {
77  if (o3tl::make_unsigned(nObject) >= m_aXRef.size())
78  {
79  SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
80  return false;
81  }
82 
83  XRefEntry aEntry;
84  aEntry.SetOffset(m_aEditBuffer.Tell());
85  aEntry.SetDirty(true);
86  m_aXRef[nObject] = aEntry;
87  return true;
88 }
89 
90 bool PDFDocument::writeBuffer(const void* pBuffer, sal_uInt64 nBytes)
91 {
92  std::size_t nWritten = m_aEditBuffer.WriteBytes(pBuffer, nBytes);
93  return nWritten == nBytes;
94 }
95 
96 void PDFDocument::SetSignatureLine(const std::vector<sal_Int8>& rSignatureLine)
97 {
98  m_aSignatureLine = rSignatureLine;
99 }
100 
101 void PDFDocument::SetSignaturePage(size_t nPage) { m_nSignaturePage = nPage; }
102 
103 sal_uInt32 PDFDocument::GetNextSignature()
104 {
105  sal_uInt32 nRet = 0;
106  for (const auto& pSignature : GetSignatureWidgets())
107  {
108  auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
109  if (!pT)
110  continue;
111 
112  const OString& rValue = pT->GetValue();
113  const OString aPrefix = "Signature";
114  if (!rValue.startsWith(aPrefix))
115  continue;
116 
117  nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32());
118  }
119 
120  return nRet + 1;
121 }
122 
123 sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
124  sal_uInt64& rLastByteRangeOffset,
125  sal_Int64& rContentOffset)
126 {
127  // Write signature object.
128  sal_Int32 nSignatureId = m_aXRef.size();
129  XRefEntry aSignatureEntry;
130  aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
131  aSignatureEntry.SetDirty(true);
132  m_aXRef[nSignatureId] = aSignatureEntry;
133  OStringBuffer aSigBuffer;
134  aSigBuffer.append(nSignatureId);
135  aSigBuffer.append(" 0 obj\n");
136  aSigBuffer.append("<</Contents <");
137  rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
138  // Reserve space for the PKCS#7 object.
139  OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
141  aSigBuffer.append(aContentFiller.makeStringAndClear());
142  aSigBuffer.append(">\n/Type/Sig/SubFilter");
143  if (bAdES)
144  aSigBuffer.append("/ETSI.CAdES.detached");
145  else
146  aSigBuffer.append("/adbe.pkcs7.detached");
147 
148  // Time of signing.
149  aSigBuffer.append(" /M (");
150  aSigBuffer.append(vcl::PDFWriter::GetDateTime());
151  aSigBuffer.append(")");
152 
153  // Byte range: we can write offset1-length1 and offset2 right now, will
154  // write length2 later.
155  aSigBuffer.append(" /ByteRange [ 0 ");
156  // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
157  aSigBuffer.append(rContentOffset - 1);
158  aSigBuffer.append(" ");
159  aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
160  aSigBuffer.append(" ");
161  rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
162  // We don't know how many bytes we need for the last ByteRange value, this
163  // should be enough.
164  OStringBuffer aByteRangeFiller;
165  comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
166  aSigBuffer.append(aByteRangeFiller.makeStringAndClear());
167  // Finish the Sig obj.
168  aSigBuffer.append(" /Filter/Adobe.PPKMS");
169 
170  if (!rDescription.isEmpty())
171  {
172  aSigBuffer.append("/Reason<");
173  vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
174  aSigBuffer.append(">");
175  }
176 
177  aSigBuffer.append(" >>\nendobj\n\n");
178  m_aEditBuffer.WriteOString(aSigBuffer.toString());
179 
180  return nSignatureId;
181 }
182 
183 sal_Int32 PDFDocument::WriteAppearanceObject(tools::Rectangle& rSignatureRectangle)
184 {
185  PDFDocument aPDFDocument;
186  filter::PDFObjectElement* pPage = nullptr;
187  std::vector<filter::PDFObjectElement*> aContentStreams;
188 
189  if (!m_aSignatureLine.empty())
190  {
191  // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
192  // based on it.
193  SvMemoryStream aPDFStream;
194  aPDFStream.WriteBytes(m_aSignatureLine.data(), m_aSignatureLine.size());
195  aPDFStream.Seek(0);
196  if (!aPDFDocument.Read(aPDFStream))
197  {
198  SAL_WARN("vcl.filter",
199  "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
200  return -1;
201  }
202 
203  std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
204  if (aPages.empty())
205  {
206  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
207  return -1;
208  }
209 
210  pPage = aPages[0];
211  if (!pPage)
212  {
213  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
214  return -1;
215  }
216 
217  // Calculate the bounding box.
218  PDFElement* pMediaBox = pPage->Lookup("MediaBox");
219  auto pMediaBoxArray = dynamic_cast<PDFArrayElement*>(pMediaBox);
220  if (!pMediaBoxArray || pMediaBoxArray->GetElements().size() < 4)
221  {
222  SAL_WARN("vcl.filter",
223  "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
224  return -1;
225  }
226  const std::vector<PDFElement*>& rMediaBoxElements = pMediaBoxArray->GetElements();
227  auto pWidth = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[2]);
228  if (!pWidth)
229  {
230  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
231  return -1;
232  }
233  rSignatureRectangle.setWidth(pWidth->GetValue());
234  auto pHeight = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[3]);
235  if (!pHeight)
236  {
237  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
238  return -1;
239  }
240  rSignatureRectangle.setHeight(pHeight->GetValue());
241 
242  if (PDFObjectElement* pContentStream = pPage->LookupObject("Contents"))
243  {
244  aContentStreams.push_back(pContentStream);
245  }
246 
247  if (aContentStreams.empty())
248  {
249  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
250  return -1;
251  }
252  }
253  m_aSignatureLine.clear();
254 
255  // Write appearance object: allocate an ID.
256  sal_Int32 nAppearanceId = m_aXRef.size();
257  m_aXRef[nAppearanceId] = XRefEntry();
258 
259  // Write the object content.
260  SvMemoryStream aEditBuffer;
261  aEditBuffer.WriteUInt32AsString(nAppearanceId);
262  aEditBuffer.WriteCharPtr(" 0 obj\n");
263  aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
264 
265  PDFObjectCopier aCopier(*this);
266  if (!aContentStreams.empty())
267  {
268  assert(pPage && "aContentStreams is only filled if there was a pPage");
269  OStringBuffer aBuffer;
270  aCopier.copyPageResources(pPage, aBuffer);
271  aEditBuffer.WriteOString(aBuffer.makeStringAndClear());
272  }
273 
274  aEditBuffer.WriteCharPtr("/BBox[0 0 ");
275  aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
276  aEditBuffer.WriteCharPtr(" ");
277  aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
278  aEditBuffer.WriteCharPtr("]\n/Length ");
279 
280  // Add the object to the doc-level edit buffer and update the offset.
281  SvMemoryStream aStream;
282  bool bCompressed = false;
283  sal_Int32 nLength = 0;
284  if (!aContentStreams.empty())
285  {
286  nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed);
287  }
288  aEditBuffer.WriteOString(OString::number(nLength));
289  if (bCompressed)
290  {
291  aEditBuffer.WriteOString(" /Filter/FlateDecode");
292  }
293 
294  aEditBuffer.WriteCharPtr("\n>>\n");
295 
296  aEditBuffer.WriteCharPtr("stream\n");
297 
298  // Copy the original page streams to the form XObject stream.
299  aStream.Seek(0);
300  aEditBuffer.WriteStream(aStream);
301 
302  aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
303 
304  aEditBuffer.Seek(0);
305  XRefEntry aAppearanceEntry;
306  aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
307  aAppearanceEntry.SetDirty(true);
308  m_aXRef[nAppearanceId] = aAppearanceEntry;
309  m_aEditBuffer.WriteStream(aEditBuffer);
310 
311  return nAppearanceId;
312 }
313 
314 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
315  sal_Int32 nAppearanceId,
316  const tools::Rectangle& rSignatureRectangle)
317 {
318  // Decide what identifier to use for the new signature.
319  sal_uInt32 nNextSignature = GetNextSignature();
320 
321  // Write the Annot object, references nSignatureId and nAppearanceId.
322  sal_Int32 nAnnotId = m_aXRef.size();
323  XRefEntry aAnnotEntry;
324  aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
325  aAnnotEntry.SetDirty(true);
326  m_aXRef[nAnnotId] = aAnnotEntry;
327  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
328  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
329  m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
330  m_aEditBuffer.WriteCharPtr("/Rect[0 0 ");
331  m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
332  m_aEditBuffer.WriteCharPtr(" ");
333  m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
334  m_aEditBuffer.WriteCharPtr("]\n");
335  m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
336  m_aEditBuffer.WriteCharPtr("/P ");
337  m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue());
338  m_aEditBuffer.WriteCharPtr(" 0 R\n");
339  m_aEditBuffer.WriteCharPtr("/T(Signature");
340  m_aEditBuffer.WriteUInt32AsString(nNextSignature);
341  m_aEditBuffer.WriteCharPtr(")\n");
342  m_aEditBuffer.WriteCharPtr("/V ");
343  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
344  m_aEditBuffer.WriteCharPtr(" 0 R\n");
345  m_aEditBuffer.WriteCharPtr("/DV ");
346  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
347  m_aEditBuffer.WriteCharPtr(" 0 R\n");
348  m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
349  m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
350  m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
351  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
352 
353  return nAnnotId;
354 }
355 
356 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
357 {
358  PDFElement* pAnnots = rFirstPage.Lookup("Annots");
359  auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
360  if (pAnnotsReference)
361  {
362  // Write the updated Annots key of the Page object.
363  PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
364  if (!pAnnotsObject)
365  {
366  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
367  return false;
368  }
369 
370  sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
371  m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
372  m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
373  m_aXRef[nAnnotsId].SetDirty(true);
374  m_aEditBuffer.WriteUInt32AsString(nAnnotsId);
375  m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
376 
377  // Write existing references.
378  PDFArrayElement* pArray = pAnnotsObject->GetArray();
379  if (!pArray)
380  {
381  SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
382  return false;
383  }
384 
385  for (size_t i = 0; i < pArray->GetElements().size(); ++i)
386  {
387  auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
388  if (!pReference)
389  continue;
390 
391  if (i)
392  m_aEditBuffer.WriteCharPtr(" ");
393  m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
394  m_aEditBuffer.WriteCharPtr(" 0 R");
395  }
396  // Write our reference.
397  m_aEditBuffer.WriteCharPtr(" ");
398  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
399  m_aEditBuffer.WriteCharPtr(" 0 R");
400 
401  m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
402  }
403  else
404  {
405  // Write the updated first page object, references nAnnotId.
406  sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
407  if (nFirstPageId >= m_aXRef.size())
408  {
409  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
410  return false;
411  }
412  m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
413  m_aXRef[nFirstPageId].SetDirty(true);
414  m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
415  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
416  m_aEditBuffer.WriteCharPtr("<<");
417  auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
418  if (!pAnnotsArray)
419  {
420  // No Annots key, just write the key with a single reference.
421  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
422  + rFirstPage.GetDictionaryOffset(),
423  rFirstPage.GetDictionaryLength());
424  m_aEditBuffer.WriteCharPtr("/Annots[");
425  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
426  m_aEditBuffer.WriteCharPtr(" 0 R]");
427  }
428  else
429  {
430  // Annots key is already there, insert our reference at the end.
431  PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
432 
433  // Offset right before the end of the Annots array.
434  sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
435  + pDictionary->GetKeyValueLength("Annots") - 1;
436  // Length of beginning of the dictionary -> Annots end.
437  sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
438  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
439  + rFirstPage.GetDictionaryOffset(),
440  nAnnotsBeforeEndLength);
441  m_aEditBuffer.WriteCharPtr(" ");
442  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
443  m_aEditBuffer.WriteCharPtr(" 0 R");
444  // Length of Annots end -> end of the dictionary.
445  sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
446  + rFirstPage.GetDictionaryLength()
447  - nAnnotsEndOffset;
448  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
449  + nAnnotsEndOffset,
450  nAnnotsAfterEndLength);
451  }
452  m_aEditBuffer.WriteCharPtr(">>");
453  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
454  }
455 
456  return true;
457 }
458 
459 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
460 {
461  if (m_pXRefStream)
462  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
463  else
464  {
465  if (!m_pTrailer)
466  {
467  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
468  return false;
469  }
470  pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
471  }
472  if (!pRoot)
473  {
474  SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
475  return false;
476  }
477  PDFObjectElement* pCatalog = pRoot->LookupObject();
478  if (!pCatalog)
479  {
480  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
481  return false;
482  }
483  sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
484  if (nCatalogId >= m_aXRef.size())
485  {
486  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
487  return false;
488  }
489  PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
490  auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
491  if (pAcroFormReference)
492  {
493  // Write the updated AcroForm key of the Catalog object.
494  PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
495  if (!pAcroFormObject)
496  {
497  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
498  return false;
499  }
500 
501  sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
502  m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
503  m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
504  m_aXRef[nAcroFormId].SetDirty(true);
505  m_aEditBuffer.WriteUInt32AsString(nAcroFormId);
506  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
507 
508  // If this is nullptr, then the AcroForm object is not in an object stream.
509  SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
510 
511  if (!pAcroFormObject->Lookup("Fields"))
512  {
513  SAL_WARN("vcl.filter",
514  "PDFDocument::Sign: AcroForm object without required Fields key");
515  return false;
516  }
517 
518  PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
519  if (!pAcroFormDictionary)
520  {
521  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
522  return false;
523  }
524 
525  // Offset right before the end of the Fields array.
526  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
527  + pAcroFormDictionary->GetKeyValueLength("Fields")
528  - strlen("]");
529 
530  // Length of beginning of the object dictionary -> Fields end.
531  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
532  if (pStreamBuffer)
533  m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
534  else
535  {
536  nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
537  m_aEditBuffer.WriteCharPtr("<<");
538  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
539  + pAcroFormObject->GetDictionaryOffset(),
540  nFieldsBeforeEndLength);
541  }
542 
543  // Append our reference at the end of the Fields array.
544  m_aEditBuffer.WriteCharPtr(" ");
545  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
546  m_aEditBuffer.WriteCharPtr(" 0 R");
547 
548  // Length of Fields end -> end of the object dictionary.
549  if (pStreamBuffer)
550  {
551  sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
552  m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
553  + nFieldsEndOffset,
554  nFieldsAfterEndLength);
555  }
556  else
557  {
558  sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
559  + pAcroFormObject->GetDictionaryLength()
560  - nFieldsEndOffset;
561  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
562  + nFieldsEndOffset,
563  nFieldsAfterEndLength);
564  m_aEditBuffer.WriteCharPtr(">>");
565  }
566 
567  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
568  }
569  else
570  {
571  // Write the updated Catalog object, references nAnnotId.
572  auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
573  m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
574  m_aXRef[nCatalogId].SetDirty(true);
575  m_aEditBuffer.WriteUInt32AsString(nCatalogId);
576  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
577  m_aEditBuffer.WriteCharPtr("<<");
578  if (!pAcroFormDictionary)
579  {
580  // No AcroForm key, assume no signatures.
581  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
582  + pCatalog->GetDictionaryOffset(),
583  pCatalog->GetDictionaryLength());
584  m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
585  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
586  m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
587  }
588  else
589  {
590  // AcroForm key is already there, insert our reference at the Fields end.
591  auto it = pAcroFormDictionary->GetItems().find("Fields");
592  if (it == pAcroFormDictionary->GetItems().end())
593  {
594  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
595  return false;
596  }
597 
598  auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
599  if (!pFields)
600  {
601  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
602  return false;
603  }
604 
605  // Offset right before the end of the Fields array.
606  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
607  + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
608  // Length of beginning of the Catalog dictionary -> Fields end.
609  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
610  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
611  + pCatalog->GetDictionaryOffset(),
612  nFieldsBeforeEndLength);
613  m_aEditBuffer.WriteCharPtr(" ");
614  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
615  m_aEditBuffer.WriteCharPtr(" 0 R");
616  // Length of Fields end -> end of the Catalog dictionary.
617  sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
618  + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
619  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
620  + nFieldsEndOffset,
621  nFieldsAfterEndLength);
622  }
623  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
624  }
625 
626  return true;
627 }
628 
629 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
630 {
631  if (m_pXRefStream)
632  {
633  // Write the xref stream.
634  // This is a bit meta: the xref stream stores its own offset.
635  sal_Int32 nXRefStreamId = m_aXRef.size();
636  XRefEntry aXRefStreamEntry;
637  aXRefStreamEntry.SetOffset(nXRefOffset);
638  aXRefStreamEntry.SetDirty(true);
639  m_aXRef[nXRefStreamId] = aXRefStreamEntry;
640 
641  // Write stream data.
642  SvMemoryStream aXRefStream;
643  const size_t nOffsetLen = 3;
644  // 3 additional bytes: predictor, the first and the third field.
645  const size_t nLineLength = nOffsetLen + 3;
646  // This is the line as it appears before tweaking according to the predictor.
647  std::vector<unsigned char> aOrigLine(nLineLength);
648  // This is the previous line.
649  std::vector<unsigned char> aPrevLine(nLineLength);
650  // This is the line as written to the stream.
651  std::vector<unsigned char> aFilteredLine(nLineLength);
652  for (const auto& rXRef : m_aXRef)
653  {
654  const XRefEntry& rEntry = rXRef.second;
655 
656  if (!rEntry.GetDirty())
657  continue;
658 
659  // Predictor.
660  size_t nPos = 0;
661  // PNG prediction: up (on all rows).
662  aOrigLine[nPos++] = 2;
663 
664  // First field.
665  unsigned char nType = 0;
666  switch (rEntry.GetType())
667  {
668  case XRefEntryType::FREE:
669  nType = 0;
670  break;
671  case XRefEntryType::NOT_COMPRESSED:
672  nType = 1;
673  break;
674  case XRefEntryType::COMPRESSED:
675  nType = 2;
676  break;
677  }
678  aOrigLine[nPos++] = nType;
679 
680  // Second field.
681  for (size_t i = 0; i < nOffsetLen; ++i)
682  {
683  size_t nByte = nOffsetLen - i - 1;
684  // Fields requiring more than one byte are stored with the
685  // high-order byte first.
686  unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
687  aOrigLine[nPos++] = nCh;
688  }
689 
690  // Third field.
691  aOrigLine[nPos++] = 0;
692 
693  // Now apply the predictor.
694  aFilteredLine[0] = aOrigLine[0];
695  for (size_t i = 1; i < nLineLength; ++i)
696  {
697  // Count the delta vs the previous line.
698  aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
699  // Remember the new reference.
700  aPrevLine[i] = aOrigLine[i];
701  }
702 
703  aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
704  }
705 
706  m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
707  m_aEditBuffer.WriteCharPtr(
708  " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
709 
710  // ID.
711  auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
712  if (pID)
713  {
714  const std::vector<PDFElement*>& rElements = pID->GetElements();
715  m_aEditBuffer.WriteCharPtr("/ID [ <");
716  for (size_t i = 0; i < rElements.size(); ++i)
717  {
718  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
719  if (!pIDString)
720  continue;
721 
722  m_aEditBuffer.WriteOString(pIDString->GetValue());
723  if ((i + 1) < rElements.size())
724  m_aEditBuffer.WriteCharPtr("> <");
725  }
726  m_aEditBuffer.WriteCharPtr("> ] ");
727  }
728 
729  // Index.
730  m_aEditBuffer.WriteCharPtr("/Index [ ");
731  for (const auto& rXRef : m_aXRef)
732  {
733  if (!rXRef.second.GetDirty())
734  continue;
735 
736  m_aEditBuffer.WriteUInt32AsString(rXRef.first);
737  m_aEditBuffer.WriteCharPtr(" 1 ");
738  }
739  m_aEditBuffer.WriteCharPtr("] ");
740 
741  // Info.
742  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
743  if (pInfo)
744  {
745  m_aEditBuffer.WriteCharPtr("/Info ");
746  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
747  m_aEditBuffer.WriteCharPtr(" ");
748  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
749  m_aEditBuffer.WriteCharPtr(" R ");
750  }
751 
752  // Length.
753  m_aEditBuffer.WriteCharPtr("/Length ");
754  {
755  ZCodec aZCodec;
756  aZCodec.BeginCompression();
757  aXRefStream.Seek(0);
758  SvMemoryStream aStream;
759  aZCodec.Compress(aXRefStream, aStream);
760  aZCodec.EndCompression();
761  aXRefStream.Seek(0);
762  aXRefStream.SetStreamSize(0);
763  aStream.Seek(0);
764  aXRefStream.WriteStream(aStream);
765  }
766  m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize());
767 
768  if (!m_aStartXRefs.empty())
769  {
770  // Write location of the previous cross-reference section.
771  m_aEditBuffer.WriteCharPtr("/Prev ");
772  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
773  }
774 
775  // Root.
776  m_aEditBuffer.WriteCharPtr("/Root ");
777  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
778  m_aEditBuffer.WriteCharPtr(" ");
779  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
780  m_aEditBuffer.WriteCharPtr(" R ");
781 
782  // Size.
783  m_aEditBuffer.WriteCharPtr("/Size ");
784  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
785 
786  m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
787  aXRefStream.Seek(0);
788  m_aEditBuffer.WriteStream(aXRefStream);
789  m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
790  }
791  else
792  {
793  // Write the xref table.
794  m_aEditBuffer.WriteCharPtr("xref\n");
795  for (const auto& rXRef : m_aXRef)
796  {
797  size_t nObject = rXRef.first;
798  size_t nOffset = rXRef.second.GetOffset();
799  if (!rXRef.second.GetDirty())
800  continue;
801 
802  m_aEditBuffer.WriteUInt32AsString(nObject);
803  m_aEditBuffer.WriteCharPtr(" 1\n");
804  OStringBuffer aBuffer;
805  aBuffer.append(static_cast<sal_Int32>(nOffset));
806  while (aBuffer.getLength() < 10)
807  aBuffer.insert(0, "0");
808  if (nObject == 0)
809  aBuffer.append(" 65535 f \n");
810  else
811  aBuffer.append(" 00000 n \n");
812  m_aEditBuffer.WriteOString(aBuffer.toString());
813  }
814 
815  // Write the trailer.
816  m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
817  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
818  m_aEditBuffer.WriteCharPtr("/Root ");
819  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
820  m_aEditBuffer.WriteCharPtr(" ");
821  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
822  m_aEditBuffer.WriteCharPtr(" R\n");
823  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
824  if (pInfo)
825  {
826  m_aEditBuffer.WriteCharPtr("/Info ");
827  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
828  m_aEditBuffer.WriteCharPtr(" ");
829  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
830  m_aEditBuffer.WriteCharPtr(" R\n");
831  }
832  auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
833  if (pID)
834  {
835  const std::vector<PDFElement*>& rElements = pID->GetElements();
836  m_aEditBuffer.WriteCharPtr("/ID [ <");
837  for (size_t i = 0; i < rElements.size(); ++i)
838  {
839  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
840  if (!pIDString)
841  continue;
842 
843  m_aEditBuffer.WriteOString(pIDString->GetValue());
844  if ((i + 1) < rElements.size())
845  m_aEditBuffer.WriteCharPtr(">\n<");
846  }
847  m_aEditBuffer.WriteCharPtr("> ]\n");
848  }
849 
850  if (!m_aStartXRefs.empty())
851  {
852  // Write location of the previous cross-reference section.
853  m_aEditBuffer.WriteCharPtr("/Prev ");
854  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
855  }
856 
857  m_aEditBuffer.WriteCharPtr(">>\n");
858  }
859 }
860 
861 bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
862  const OUString& rDescription, bool bAdES)
863 {
864  m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
865  m_aEditBuffer.WriteCharPtr("\n");
866 
867  sal_uInt64 nSignatureLastByteRangeOffset = 0;
868  sal_Int64 nSignatureContentOffset = 0;
869  sal_Int32 nSignatureId = WriteSignatureObject(
870  rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
871 
872  tools::Rectangle aSignatureRectangle;
873  sal_Int32 nAppearanceId = WriteAppearanceObject(aSignatureRectangle);
874 
875  std::vector<PDFObjectElement*> aPages = GetPages();
876  if (aPages.empty())
877  {
878  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
879  return false;
880  }
881 
882  size_t nPage = 0;
883  if (m_nSignaturePage < aPages.size())
884  {
885  nPage = m_nSignaturePage;
886  }
887  if (!aPages[nPage])
888  {
889  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage);
890  return false;
891  }
892 
893  PDFObjectElement& rPage = *aPages[nPage];
894  sal_Int32 nAnnotId = WriteAnnotObject(rPage, nSignatureId, nAppearanceId, aSignatureRectangle);
895 
896  if (!WritePageObject(rPage, nAnnotId))
897  {
898  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
899  return false;
900  }
901 
902  PDFReferenceElement* pRoot = nullptr;
903  if (!WriteCatalogObject(nAnnotId, pRoot))
904  {
905  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
906  return false;
907  }
908 
909  sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
910  WriteXRef(nXRefOffset, pRoot);
911 
912  // Write startxref.
913  m_aEditBuffer.WriteCharPtr("startxref\n");
914  m_aEditBuffer.WriteUInt32AsString(nXRefOffset);
915  m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
916 
917  // Finalize the signature, now that we know the total file size.
918  // Calculate the length of the last byte range.
919  sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
920  sal_Int64 nLastByteRangeLength
921  = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
922  // Write the length to the buffer.
923  m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
924  OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
925  m_aEditBuffer.WriteOString(aByteRangeBuffer);
926 
927  // Create the PKCS#7 object.
928  css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
929  if (!aDerEncoded.hasElements())
930  {
931  SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
932  return false;
933  }
934 
935  m_aEditBuffer.Seek(0);
936  sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
937  std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
938  m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
939 
940  m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
941  sal_uInt64 nBufferSize2 = nLastByteRangeLength;
942  std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
943  m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
944 
945  OStringBuffer aCMSHexBuffer;
946  svl::crypto::Signing aSigning(xCertificate);
947  aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
948  aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
949  if (!aSigning.Sign(aCMSHexBuffer))
950  {
951  SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
952  return false;
953  }
954 
955  assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
956 
957  m_aEditBuffer.Seek(nSignatureContentOffset);
958  m_aEditBuffer.WriteOString(aCMSHexBuffer.toString());
959 
960  return true;
961 }
962 
963 bool PDFDocument::Write(SvStream& rStream)
964 {
965  m_aEditBuffer.Seek(0);
966  rStream.WriteStream(m_aEditBuffer);
967  return rStream.good();
968 }
969 
970 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
971  std::vector<std::unique_ptr<PDFElement>>& rElements,
972  PDFObjectElement* pObjectElement)
973 {
974  // Last seen object token.
975  PDFObjectElement* pObject = pObjectElement;
976  PDFNameElement* pObjectKey = nullptr;
977  PDFObjectElement* pObjectStream = nullptr;
978  bool bInXRef = false;
979  // The next number will be an xref offset.
980  bool bInStartXRef = false;
981  // Dictionary depth, so we know when we're outside any dictionaries.
982  int nDepth = 0;
983  // Last seen array token that's outside any dictionaries.
984  PDFArrayElement* pArray = nullptr;
985  // If we're inside an obj/endobj pair.
986  bool bInObject = false;
987 
988  while (true)
989  {
990  char ch;
991  rStream.ReadChar(ch);
992  if (rStream.eof())
993  break;
994 
995  switch (ch)
996  {
997  case '%':
998  {
999  auto pComment = new PDFCommentElement(*this);
1000  rElements.push_back(std::unique_ptr<PDFElement>(pComment));
1001  rStream.SeekRel(-1);
1002  if (!rElements.back()->Read(rStream))
1003  {
1004  SAL_WARN("vcl.filter",
1005  "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
1006  return false;
1007  }
1008  if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
1009  && m_aEOFs.back() == rStream.Tell())
1010  {
1011  // Found EOF and partial parsing requested, we're done.
1012  return true;
1013  }
1014  break;
1015  }
1016  case '<':
1017  {
1018  // Dictionary or hex string.
1019  rStream.ReadChar(ch);
1020  rStream.SeekRel(-2);
1021  if (ch == '<')
1022  {
1023  rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
1024  ++nDepth;
1025  }
1026  else
1027  rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
1028  if (!rElements.back()->Read(rStream))
1029  {
1030  SAL_WARN("vcl.filter",
1031  "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
1032  return false;
1033  }
1034  break;
1035  }
1036  case '>':
1037  {
1038  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
1039  --nDepth;
1040  rStream.SeekRel(-1);
1041  if (!rElements.back()->Read(rStream))
1042  {
1043  SAL_WARN("vcl.filter",
1044  "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
1045  return false;
1046  }
1047  break;
1048  }
1049  case '[':
1050  {
1051  auto pArr = new PDFArrayElement(pObject);
1052  rElements.push_back(std::unique_ptr<PDFElement>(pArr));
1053  if (nDepth == 0)
1054  {
1055  // The array is attached directly, inform the object.
1056  pArray = pArr;
1057  if (pObject)
1058  {
1059  pObject->SetArray(pArray);
1060  pObject->SetArrayOffset(rStream.Tell());
1061  }
1062  }
1063  ++nDepth;
1064  rStream.SeekRel(-1);
1065  if (!rElements.back()->Read(rStream))
1066  {
1067  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
1068  return false;
1069  }
1070  break;
1071  }
1072  case ']':
1073  {
1074  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
1075  --nDepth;
1076  rStream.SeekRel(-1);
1077  if (nDepth == 0)
1078  {
1079  if (pObject)
1080  {
1081  pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1082  }
1083  }
1084  if (!rElements.back()->Read(rStream))
1085  {
1086  SAL_WARN("vcl.filter",
1087  "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1088  return false;
1089  }
1090  break;
1091  }
1092  case '/':
1093  {
1094  auto pNameElement = new PDFNameElement();
1095  rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1096  rStream.SeekRel(-1);
1097  if (!pNameElement->Read(rStream))
1098  {
1099  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1100  return false;
1101  }
1102 
1103  if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1104  && pNameElement->GetValue() == "ObjStm")
1105  pObjectStream = pObject;
1106  else
1107  pObjectKey = pNameElement;
1108  break;
1109  }
1110  case '(':
1111  {
1112  rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1113  rStream.SeekRel(-1);
1114  if (!rElements.back()->Read(rStream))
1115  {
1116  SAL_WARN("vcl.filter",
1117  "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1118  return false;
1119  }
1120  break;
1121  }
1122  default:
1123  {
1124  if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-' || ch == '+'
1125  || ch == '.')
1126  {
1127  // Numbering object: an integer or a real.
1128  auto pNumberElement = new PDFNumberElement();
1129  rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1130  rStream.SeekRel(-1);
1131  if (!pNumberElement->Read(rStream))
1132  {
1133  SAL_WARN("vcl.filter",
1134  "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1135  return false;
1136  }
1137  if (bInStartXRef)
1138  {
1139  bInStartXRef = false;
1140  m_aStartXRefs.push_back(pNumberElement->GetValue());
1141 
1142  auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1143  if (it != m_aOffsetObjects.end())
1144  m_pXRefStream = it->second;
1145  }
1146  else if (bInObject && !nDepth && pObject)
1147  // Number element inside an object, but outside a
1148  // dictionary / array: remember it.
1149  pObject->SetNumberElement(pNumberElement);
1150  }
1151  else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1152  {
1153  // Possible keyword, like "obj".
1154  rStream.SeekRel(-1);
1155  OString aKeyword = ReadKeyword(rStream);
1156 
1157  bool bObj = aKeyword == "obj";
1158  if (bObj || aKeyword == "R")
1159  {
1160  size_t nElements = rElements.size();
1161  if (nElements < 2)
1162  {
1163  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1164  "tokens before 'obj' or 'R' keyword");
1165  return false;
1166  }
1167 
1168  auto pObjectNumber
1169  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1170  auto pGenerationNumber
1171  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1172  if (!pObjectNumber || !pGenerationNumber)
1173  {
1174  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1175  "generation number before 'obj' or 'R' keyword");
1176  return false;
1177  }
1178 
1179  if (bObj)
1180  {
1181  pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1182  pGenerationNumber->GetValue());
1183  rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1184  m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1185  m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1186  bInObject = true;
1187  }
1188  else
1189  {
1190  auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1191  *pGenerationNumber);
1192  rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1193  if (bInObject && nDepth > 0 && pObject)
1194  // Inform the object about a new in-dictionary reference.
1195  pObject->AddDictionaryReference(pReference);
1196  }
1197  if (!rElements.back()->Read(rStream))
1198  {
1199  SAL_WARN("vcl.filter",
1200  "PDFDocument::Tokenize: PDFElement::Read() failed");
1201  return false;
1202  }
1203  }
1204  else if (aKeyword == "stream")
1205  {
1206  // Look up the length of the stream from the parent object's dictionary.
1207  size_t nLength = 0;
1208  for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1209  {
1210  // Iterate in reverse order.
1211  size_t nIndex = rElements.size() - nElement - 1;
1212  PDFElement* pElement = rElements[nIndex].get();
1213  auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1214  if (!pObj)
1215  continue;
1216 
1217  PDFElement* pLookup = pObj->Lookup("Length");
1218  auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1219  if (pReference)
1220  {
1221  // Length is provided as a reference.
1222  nLength = pReference->LookupNumber(rStream);
1223  break;
1224  }
1225 
1226  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1227  if (pNumber)
1228  {
1229  // Length is provided directly.
1230  nLength = pNumber->GetValue();
1231  break;
1232  }
1233 
1234  SAL_WARN(
1235  "vcl.filter",
1236  "PDFDocument::Tokenize: found no Length key for stream keyword");
1237  return false;
1238  }
1239 
1240  PDFDocument::SkipLineBreaks(rStream);
1241  auto pStreamElement = new PDFStreamElement(nLength);
1242  if (pObject)
1243  pObject->SetStream(pStreamElement);
1244  rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1245  if (!rElements.back()->Read(rStream))
1246  {
1247  SAL_WARN("vcl.filter",
1248  "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1249  return false;
1250  }
1251  }
1252  else if (aKeyword == "endstream")
1253  {
1254  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1255  if (!rElements.back()->Read(rStream))
1256  {
1257  SAL_WARN("vcl.filter",
1258  "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1259  return false;
1260  }
1261  }
1262  else if (aKeyword == "endobj")
1263  {
1264  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1265  if (!rElements.back()->Read(rStream))
1266  {
1267  SAL_WARN("vcl.filter",
1268  "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1269  return false;
1270  }
1271  if (eMode == TokenizeMode::END_OF_OBJECT)
1272  {
1273  // Found endobj and only object parsing was requested, we're done.
1274  return true;
1275  }
1276 
1277  if (pObjectStream)
1278  {
1279  // We're at the end of an object stream, parse the stored objects.
1280  pObjectStream->ParseStoredObjects();
1281  pObjectStream = nullptr;
1282  pObjectKey = nullptr;
1283  }
1284  bInObject = false;
1285  }
1286  else if (aKeyword == "true" || aKeyword == "false")
1287  rElements.push_back(std::unique_ptr<PDFElement>(
1288  new PDFBooleanElement(aKeyword.toBoolean())));
1289  else if (aKeyword == "null")
1290  rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1291  else if (aKeyword == "xref")
1292  // Allow 'f' and 'n' keywords.
1293  bInXRef = true;
1294  else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1295  {
1296  }
1297  else if (aKeyword == "trailer")
1298  {
1299  auto pTrailer = new PDFTrailerElement(*this);
1300 
1301  // Make it possible to find this trailer later by offset.
1302  pTrailer->Read(rStream);
1303  m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1304 
1305  // When reading till the first EOF token only, remember
1306  // just the first trailer token.
1307  if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1308  m_pTrailer = pTrailer;
1309  rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1310  }
1311  else if (aKeyword == "startxref")
1312  {
1313  bInStartXRef = true;
1314  }
1315  else
1316  {
1317  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1318  << aKeyword << "' keyword at byte position "
1319  << rStream.Tell());
1320  return false;
1321  }
1322  }
1323  else
1324  {
1325  auto uChar = static_cast<unsigned char>(ch);
1326  // Be more lenient and allow unexpected null char
1327  if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
1328  {
1329  SAL_WARN("vcl.filter",
1330  "PDFDocument::Tokenize: unexpected character with code "
1331  << sal_Int32(ch) << " at byte position " << rStream.Tell());
1332  return false;
1333  }
1334  SAL_WARN_IF(uChar == 0, "vcl.filter",
1335  "PDFDocument::Tokenize: unexpected null character at "
1336  << rStream.Tell() << " - ignoring");
1337  }
1338  break;
1339  }
1340  }
1341  }
1342 
1343  return true;
1344 }
1345 
1346 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1347 {
1348  m_aIDObjects[nID] = pObject;
1349 }
1350 
1351 bool PDFDocument::Read(SvStream& rStream)
1352 {
1353  // Check file magic.
1354  std::vector<sal_Int8> aHeader(5);
1355  rStream.Seek(0);
1356  rStream.ReadBytes(aHeader.data(), aHeader.size());
1357  if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1358  || aHeader[4] != '-')
1359  {
1360  SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1361  return false;
1362  }
1363 
1364  // Allow later editing of the contents in-memory.
1365  rStream.Seek(0);
1366  m_aEditBuffer.WriteStream(rStream);
1367 
1368  // Look up the offset of the xref table.
1369  size_t nStartXRef = FindStartXRef(rStream);
1370  SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1371  if (nStartXRef == 0)
1372  {
1373  SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1374  return false;
1375  }
1376  while (true)
1377  {
1378  rStream.Seek(nStartXRef);
1379  OString aKeyword = ReadKeyword(rStream);
1380  if (aKeyword.isEmpty())
1381  ReadXRefStream(rStream);
1382 
1383  else
1384  {
1385  if (aKeyword != "xref")
1386  {
1387  SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1388  return false;
1389  }
1390  ReadXRef(rStream);
1391  if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1392  {
1393  SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1394  return false;
1395  }
1396  }
1397 
1398  PDFNumberElement* pPrev = nullptr;
1399  if (m_pTrailer)
1400  {
1401  pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1402 
1403  // Remember the offset of this trailer in the correct order. It's
1404  // possible that newer trailers don't have a larger offset.
1405  m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1406  }
1407  else if (m_pXRefStream)
1408  pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1409  if (pPrev)
1410  nStartXRef = pPrev->GetValue();
1411 
1412  // Reset state, except the edit buffer.
1413  m_aElements.clear();
1414  m_aOffsetObjects.clear();
1415  m_aIDObjects.clear();
1416  m_aStartXRefs.clear();
1417  m_aEOFs.clear();
1418  m_pTrailer = nullptr;
1419  m_pXRefStream = nullptr;
1420  if (!pPrev)
1421  break;
1422  }
1423 
1424  // Then we can tokenize the stream.
1425  rStream.Seek(0);
1426  return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1427 }
1428 
1429 OString PDFDocument::ReadKeyword(SvStream& rStream)
1430 {
1431  OStringBuffer aBuf;
1432  char ch;
1433  rStream.ReadChar(ch);
1434  if (rStream.eof())
1435  return OString();
1436  while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1437  {
1438  aBuf.append(ch);
1439  rStream.ReadChar(ch);
1440  if (rStream.eof())
1441  return aBuf.toString();
1442  }
1443  rStream.SeekRel(-1);
1444  return aBuf.toString();
1445 }
1446 
1447 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1448 {
1449  // Find the "startxref" token, somewhere near the end of the document.
1450  std::vector<char> aBuf(1024);
1451  rStream.Seek(STREAM_SEEK_TO_END);
1452  if (rStream.Tell() > aBuf.size())
1453  rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1454  else
1455  // The document is really short, then just read it from the start.
1456  rStream.Seek(0);
1457  size_t nBeforePeek = rStream.Tell();
1458  size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1459  rStream.Seek(nBeforePeek);
1460  if (nSize != aBuf.size())
1461  aBuf.resize(nSize);
1462  OString aPrefix("startxref");
1463  // Find the last startxref at the end of the document.
1464  auto itLastValid = aBuf.end();
1465  auto it = aBuf.begin();
1466  while (true)
1467  {
1468  it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1469  if (it == aBuf.end())
1470  break;
1471 
1472  itLastValid = it;
1473  ++it;
1474  }
1475  if (itLastValid == aBuf.end())
1476  {
1477  SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1478  return 0;
1479  }
1480 
1481  rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1482  if (rStream.eof())
1483  {
1484  SAL_WARN("vcl.filter",
1485  "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1486  return 0;
1487  }
1488 
1489  PDFDocument::SkipWhitespace(rStream);
1490  PDFNumberElement aNumber;
1491  if (!aNumber.Read(rStream))
1492  return 0;
1493  return aNumber.GetValue();
1494 }
1495 
1496 void PDFDocument::ReadXRefStream(SvStream& rStream)
1497 {
1498  // Look up the stream length in the object dictionary.
1499  if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1500  {
1501  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1502  return;
1503  }
1504 
1505  if (m_aElements.empty())
1506  {
1507  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1508  return;
1509  }
1510 
1511  PDFObjectElement* pObject = nullptr;
1512  for (const auto& pElement : m_aElements)
1513  {
1514  if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1515  {
1516  pObject = pObj;
1517  break;
1518  }
1519  }
1520  if (!pObject)
1521  {
1522  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1523  return;
1524  }
1525 
1526  // So that the Prev key can be looked up later.
1527  m_pXRefStream = pObject;
1528 
1529  PDFElement* pLookup = pObject->Lookup("Length");
1530  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1531  if (!pNumber)
1532  {
1533  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1534  return;
1535  }
1536  sal_uInt64 nLength = pNumber->GetValue();
1537 
1538  // Look up the stream offset.
1539  PDFStreamElement* pStream = nullptr;
1540  for (const auto& pElement : m_aElements)
1541  {
1542  if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1543  {
1544  pStream = pS;
1545  break;
1546  }
1547  }
1548  if (!pStream)
1549  {
1550  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1551  return;
1552  }
1553 
1554  // Read and decompress it.
1555  rStream.Seek(pStream->GetOffset());
1556  std::vector<char> aBuf(nLength);
1557  rStream.ReadBytes(aBuf.data(), aBuf.size());
1558 
1559  auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1560  if (!pFilter)
1561  {
1562  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1563  return;
1564  }
1565 
1566  if (pFilter->GetValue() != "FlateDecode")
1567  {
1568  SAL_WARN("vcl.filter",
1569  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1570  return;
1571  }
1572 
1573  int nColumns = 1;
1574  int nPredictor = 1;
1575  if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1576  {
1577  const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1578  auto it = rItems.find("Columns");
1579  if (it != rItems.end())
1580  if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1581  nColumns = pColumns->GetValue();
1582  it = rItems.find("Predictor");
1583  if (it != rItems.end())
1584  if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1585  nPredictor = pPredictor->GetValue();
1586  }
1587 
1588  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1589  SvMemoryStream aStream;
1590  ZCodec aZCodec;
1591  aZCodec.BeginCompression();
1592  aZCodec.Decompress(aSource, aStream);
1593  if (!aZCodec.EndCompression())
1594  {
1595  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1596  return;
1597  }
1598 
1599  // Look up the first and the last entry we need to read.
1600  auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1601  std::vector<size_t> aFirstObjects;
1602  std::vector<size_t> aNumberOfObjects;
1603  if (!pIndex)
1604  {
1605  auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1606  if (pSize)
1607  {
1608  aFirstObjects.push_back(0);
1609  aNumberOfObjects.push_back(pSize->GetValue());
1610  }
1611  else
1612  {
1613  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1614  return;
1615  }
1616  }
1617  else
1618  {
1619  const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1620  size_t nFirstObject = 0;
1621  for (size_t i = 0; i < rIndexElements.size(); ++i)
1622  {
1623  if (i % 2 == 0)
1624  {
1625  auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1626  if (!pFirstObject)
1627  {
1628  SAL_WARN("vcl.filter",
1629  "PDFDocument::ReadXRefStream: Index has no first object");
1630  return;
1631  }
1632  nFirstObject = pFirstObject->GetValue();
1633  continue;
1634  }
1635 
1636  auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1637  if (!pNumberOfObjects)
1638  {
1639  SAL_WARN("vcl.filter",
1640  "PDFDocument::ReadXRefStream: Index has no number of objects");
1641  return;
1642  }
1643  aFirstObjects.push_back(nFirstObject);
1644  aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1645  }
1646  }
1647 
1648  // Look up the format of a single entry.
1649  const int nWSize = 3;
1650  auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1651  if (!pW || pW->GetElements().size() < nWSize)
1652  {
1653  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1654  return;
1655  }
1656  int aW[nWSize];
1657  // First character is the (kind of) repeated predictor.
1658  int nLineLength = 1;
1659  for (size_t i = 0; i < nWSize; ++i)
1660  {
1661  auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1662  if (!pI)
1663  {
1664  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1665  return;
1666  }
1667  aW[i] = pI->GetValue();
1668  nLineLength += aW[i];
1669  }
1670 
1671  if (nPredictor > 1 && nLineLength - 1 != nColumns)
1672  {
1673  SAL_WARN("vcl.filter",
1674  "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1675  return;
1676  }
1677 
1678  aStream.Seek(0);
1679  for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1680  {
1681  size_t nFirstObject = aFirstObjects[nSubSection];
1682  size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1683 
1684  // This is the line as read from the stream.
1685  std::vector<unsigned char> aOrigLine(nLineLength);
1686  // This is the line as it appears after tweaking according to nPredictor.
1687  std::vector<unsigned char> aFilteredLine(nLineLength);
1688  for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1689  {
1690  size_t nIndex = nFirstObject + nEntry;
1691 
1692  aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1693  if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1694  {
1695  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1696  "inconsistent with /DecodeParms/Predictor for object #"
1697  << nIndex);
1698  return;
1699  }
1700 
1701  for (int i = 0; i < nLineLength; ++i)
1702  {
1703  switch (nPredictor)
1704  {
1705  case 1:
1706  // No prediction.
1707  break;
1708  case 12:
1709  // PNG prediction: up (on all rows).
1710  aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1711  break;
1712  default:
1713  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1714  << nPredictor);
1715  return;
1716  }
1717  }
1718 
1719  // First character is already handled above.
1720  int nPos = 1;
1721  size_t nType = 0;
1722  // Start of the current field in the stream data.
1723  int nOffset = nPos;
1724  for (; nPos < nOffset + aW[0]; ++nPos)
1725  {
1726  unsigned char nCh = aFilteredLine[nPos];
1727  nType = (nType << 8) + nCh;
1728  }
1729 
1730  // Start of the object in the file stream.
1731  size_t nStreamOffset = 0;
1732  nOffset = nPos;
1733  for (; nPos < nOffset + aW[1]; ++nPos)
1734  {
1735  unsigned char nCh = aFilteredLine[nPos];
1736  nStreamOffset = (nStreamOffset << 8) + nCh;
1737  }
1738 
1739  // Generation number of the object.
1740  size_t nGenerationNumber = 0;
1741  nOffset = nPos;
1742  for (; nPos < nOffset + aW[2]; ++nPos)
1743  {
1744  unsigned char nCh = aFilteredLine[nPos];
1745  nGenerationNumber = (nGenerationNumber << 8) + nCh;
1746  }
1747 
1748  // Ignore invalid nType.
1749  if (nType <= 2)
1750  {
1751  if (m_aXRef.find(nIndex) == m_aXRef.end())
1752  {
1753  XRefEntry aEntry;
1754  switch (nType)
1755  {
1756  case 0:
1757  aEntry.SetType(XRefEntryType::FREE);
1758  break;
1759  case 1:
1760  aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
1761  break;
1762  case 2:
1763  aEntry.SetType(XRefEntryType::COMPRESSED);
1764  break;
1765  }
1766  aEntry.SetOffset(nStreamOffset);
1767  m_aXRef[nIndex] = aEntry;
1768  }
1769  }
1770  }
1771  }
1772 }
1773 
1774 void PDFDocument::ReadXRef(SvStream& rStream)
1775 {
1776  PDFDocument::SkipWhitespace(rStream);
1777 
1778  while (true)
1779  {
1780  PDFNumberElement aFirstObject;
1781  if (!aFirstObject.Read(rStream))
1782  {
1783  // Next token is not a number, it'll be the trailer.
1784  return;
1785  }
1786 
1787  if (aFirstObject.GetValue() < 0)
1788  {
1789  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1790  return;
1791  }
1792 
1793  PDFDocument::SkipWhitespace(rStream);
1794  PDFNumberElement aNumberOfEntries;
1795  if (!aNumberOfEntries.Read(rStream))
1796  {
1797  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1798  return;
1799  }
1800 
1801  if (aNumberOfEntries.GetValue() < 0)
1802  {
1803  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1804  return;
1805  }
1806 
1807  size_t nSize = aNumberOfEntries.GetValue();
1808  for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1809  {
1810  size_t nIndex = aFirstObject.GetValue() + nEntry;
1811  PDFDocument::SkipWhitespace(rStream);
1812  PDFNumberElement aOffset;
1813  if (!aOffset.Read(rStream))
1814  {
1815  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1816  return;
1817  }
1818 
1819  PDFDocument::SkipWhitespace(rStream);
1820  PDFNumberElement aGenerationNumber;
1821  if (!aGenerationNumber.Read(rStream))
1822  {
1823  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1824  return;
1825  }
1826 
1827  PDFDocument::SkipWhitespace(rStream);
1828  OString aKeyword = ReadKeyword(rStream);
1829  if (aKeyword != "f" && aKeyword != "n")
1830  {
1831  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1832  return;
1833  }
1834  // xrefs are read in reverse order, so never update an existing
1835  // offset with an older one.
1836  if (m_aXRef.find(nIndex) == m_aXRef.end())
1837  {
1838  XRefEntry aEntry;
1839  aEntry.SetOffset(aOffset.GetValue());
1840  // Initially only the first entry is dirty.
1841  if (nIndex == 0)
1842  aEntry.SetDirty(true);
1843  m_aXRef[nIndex] = aEntry;
1844  }
1845  PDFDocument::SkipWhitespace(rStream);
1846  }
1847  }
1848 }
1849 
1850 void PDFDocument::SkipWhitespace(SvStream& rStream)
1851 {
1852  char ch = 0;
1853 
1854  while (true)
1855  {
1856  rStream.ReadChar(ch);
1857  if (rStream.eof())
1858  break;
1859 
1860  if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1861  {
1862  rStream.SeekRel(-1);
1863  return;
1864  }
1865  }
1866 }
1867 
1868 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1869 {
1870  char ch = 0;
1871 
1872  while (true)
1873  {
1874  rStream.ReadChar(ch);
1875  if (rStream.eof())
1876  break;
1877 
1878  if (ch != '\n' && ch != '\r')
1879  {
1880  rStream.SeekRel(-1);
1881  return;
1882  }
1883  }
1884 }
1885 
1886 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1887 {
1888  auto it = m_aXRef.find(nIndex);
1889  if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1890  {
1891  SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1892  << nIndex << ", but failed");
1893  return 0;
1894  }
1895 
1896  return it->second.GetOffset();
1897 }
1898 
1899 const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1900 {
1901  return m_aElements;
1902 }
1903 
1905 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1906 {
1907  auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1908  if (!pKids)
1909  {
1910  SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1911  return;
1912  }
1913 
1914  pPages->setVisiting(true);
1915 
1916  for (const auto& pKid : pKids->GetElements())
1917  {
1918  auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1919  if (!pReference)
1920  continue;
1921 
1922  PDFObjectElement* pKidObject = pReference->LookupObject();
1923  if (!pKidObject)
1924  continue;
1925 
1926  // detect if visiting reenters itself
1927  if (pKidObject->alreadyVisiting())
1928  {
1929  SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1930  continue;
1931  }
1932 
1933  auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1934  if (pName && pName->GetValue() == "Pages")
1935  // Pages inside pages: recurse.
1936  visitPages(pKidObject, rRet);
1937  else
1938  // Found an actual page.
1939  rRet.push_back(pKidObject);
1940  }
1941 
1942  pPages->setVisiting(false);
1943 }
1944 
1945 PDFObjectElement* PDFDocument::GetCatalog()
1946 {
1947  PDFReferenceElement* pRoot = nullptr;
1948 
1949  PDFTrailerElement* pTrailer = nullptr;
1950  if (!m_aTrailerOffsets.empty())
1951  {
1952  // Get access to the latest trailer, and work with the keys of that
1953  // one.
1954  auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1955  if (it != m_aOffsetTrailers.end())
1956  pTrailer = it->second;
1957  }
1958 
1959  if (pTrailer)
1960  pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1961  else if (m_pXRefStream)
1962  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1963 
1964  if (!pRoot)
1965  {
1966  SAL_WARN("vcl.filter", "PDFDocument::GetCatalog: trailer has no Root key");
1967  return nullptr;
1968  }
1969 
1970  return pRoot->LookupObject();
1971 }
1972 
1973 std::vector<PDFObjectElement*> PDFDocument::GetPages()
1974 {
1975  std::vector<PDFObjectElement*> aRet;
1976 
1977  PDFObjectElement* pCatalog = GetCatalog();
1978  if (!pCatalog)
1979  {
1980  SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1981  return aRet;
1982  }
1983 
1984  PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1985  if (!pPages)
1986  {
1987  SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
1988  << ") has no pages");
1989  return aRet;
1990  }
1991 
1992  visitPages(pPages, aRet);
1993 
1994  return aRet;
1995 }
1996 
1997 void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
1998 
1999 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
2000 {
2001  std::vector<PDFObjectElement*> aRet;
2002 
2003  std::vector<PDFObjectElement*> aPages = GetPages();
2004 
2005  for (const auto& pPage : aPages)
2006  {
2007  if (!pPage)
2008  continue;
2009 
2010  PDFElement* pAnnotsElement = pPage->Lookup("Annots");
2011  auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
2012  if (!pAnnots)
2013  {
2014  // Annots is not an array, see if it's a reference to an object
2015  // with a direct array.
2016  auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
2017  if (pAnnotsRef)
2018  {
2019  if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
2020  {
2021  pAnnots = pAnnotsObject->GetArray();
2022  }
2023  }
2024  }
2025 
2026  if (!pAnnots)
2027  continue;
2028 
2029  for (const auto& pAnnot : pAnnots->GetElements())
2030  {
2031  auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
2032  if (!pReference)
2033  continue;
2034 
2035  PDFObjectElement* pAnnotObject = pReference->LookupObject();
2036  if (!pAnnotObject)
2037  continue;
2038 
2039  auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
2040  if (!pFT || pFT->GetValue() != "Sig")
2041  continue;
2042 
2043  aRet.push_back(pAnnotObject);
2044  }
2045  }
2046 
2047  return aRet;
2048 }
2049 
2050 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
2051 {
2052  return svl::crypto::DecodeHexString(pElement->GetValue());
2053 }
2054 
2055 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
2056  : m_rDoc(rDoc)
2057 {
2058 }
2059 
2061 {
2062  // Read from (including) the % char till (excluding) the end of the line/stream.
2063  OStringBuffer aBuf;
2064  char ch;
2065  rStream.ReadChar(ch);
2066  while (true)
2067  {
2068  if (ch == '\n' || ch == '\r' || rStream.eof())
2069  {
2070  m_aComment = aBuf.makeStringAndClear();
2071 
2072  if (m_aComment.startsWith("%%EOF"))
2073  {
2074  sal_uInt64 nPos = rStream.Tell();
2075  if (ch == '\r')
2076  {
2077  rStream.ReadChar(ch);
2078  rStream.SeekRel(-1);
2079  // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
2080  // behavior.
2081  if (ch == '\n')
2082  {
2083  nPos += 1;
2084  }
2085  }
2086  m_rDoc.PushBackEOF(nPos);
2087  }
2088 
2089  SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
2090  return true;
2091  }
2092  aBuf.append(ch);
2093  rStream.ReadChar(ch);
2094  }
2095 
2096  return false;
2097 }
2098 
2100 
2102 {
2103  OStringBuffer aBuf;
2104  m_nOffset = rStream.Tell();
2105  char ch;
2106  rStream.ReadChar(ch);
2107  if (rStream.eof())
2108  {
2109  return false;
2110  }
2111  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+' && ch != '.')
2112  {
2113  rStream.SeekRel(-1);
2114  return false;
2115  }
2116  while (!rStream.eof())
2117  {
2118  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+'
2119  && ch != '.')
2120  {
2121  rStream.SeekRel(-1);
2122  m_nLength = rStream.Tell() - m_nOffset;
2123  m_fValue = aBuf.makeStringAndClear().toDouble();
2124  SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2125  return true;
2126  }
2127  aBuf.append(ch);
2128  rStream.ReadChar(ch);
2129  }
2130 
2131  return false;
2132 }
2133 
2134 sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2135 
2136 sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2137 
2138 bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2139 
2140 bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2141 
2143 {
2144  char ch;
2145  rStream.ReadChar(ch);
2146  if (ch != '<')
2147  {
2148  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2149  return false;
2150  }
2151  rStream.ReadChar(ch);
2152 
2153  OStringBuffer aBuf;
2154  while (!rStream.eof())
2155  {
2156  if (ch == '>')
2157  {
2158  m_aValue = aBuf.makeStringAndClear();
2159  SAL_INFO("vcl.filter",
2160  "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2161  return true;
2162  }
2163  aBuf.append(ch);
2164  rStream.ReadChar(ch);
2165  }
2166 
2167  return false;
2168 }
2169 
2170 const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2171 
2173 {
2174  char nPrevCh = 0;
2175  char ch = 0;
2176  rStream.ReadChar(ch);
2177  if (ch != '(')
2178  {
2179  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2180  return false;
2181  }
2182  nPrevCh = ch;
2183  rStream.ReadChar(ch);
2184 
2185  // Start with 1 nesting level as we read a '(' above already.
2186  int nDepth = 1;
2187  OStringBuffer aBuf;
2188  while (!rStream.eof())
2189  {
2190  if (ch == '(' && nPrevCh != '\\')
2191  ++nDepth;
2192 
2193  if (ch == ')' && nPrevCh != '\\')
2194  --nDepth;
2195 
2196  if (nDepth == 0)
2197  {
2198  // ')' of the outermost '(' is reached.
2199  m_aValue = aBuf.makeStringAndClear();
2200  SAL_INFO("vcl.filter",
2201  "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2202  return true;
2203  }
2204  aBuf.append(ch);
2205  nPrevCh = ch;
2206  rStream.ReadChar(ch);
2207  }
2208 
2209  return false;
2210 }
2211 
2212 const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2213 
2215  : m_rDoc(rDoc)
2216  , m_pDictionaryElement(nullptr)
2217 {
2218 }
2219 
2221 {
2222  m_nOffset = rStream.Tell();
2223  return true;
2224 }
2225 
2226 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2227 {
2228  if (!m_pDictionaryElement)
2229  {
2230  PDFObjectParser aParser(m_rDoc.GetElements());
2231  aParser.parse(this);
2232  }
2233  if (!m_pDictionaryElement)
2234  return nullptr;
2235  return m_pDictionaryElement->LookupElement(rDictionaryKey);
2236 }
2237 
2238 sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2239 
2240 double PDFNumberElement::GetValue() const { return m_fValue; }
2241 
2242 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2243  : m_rDoc(rDoc)
2244  , m_fObjectValue(fObjectValue)
2245  , m_fGenerationValue(fGenerationValue)
2246  , m_pNumberElement(nullptr)
2247  , m_nDictionaryOffset(0)
2248  , m_nDictionaryLength(0)
2249  , m_pDictionaryElement(nullptr)
2250  , m_nArrayOffset(0)
2251  , m_nArrayLength(0)
2252  , m_pArrayElement(nullptr)
2253  , m_pStreamElement(nullptr)
2254  , m_bParsed(false)
2255 {
2256 }
2257 
2259 {
2260  SAL_INFO("vcl.filter",
2261  "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2262  return true;
2263 }
2264 
2266 
2267 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2268  const OString& rKey)
2269 {
2270  auto it = rDictionary.find(rKey);
2271  if (it == rDictionary.end())
2272  return nullptr;
2273 
2274  return it->second;
2275 }
2276 
2278 {
2279  auto pKey = dynamic_cast<PDFReferenceElement*>(
2280  PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2281  if (!pKey)
2282  {
2283  SAL_WARN("vcl.filter",
2284  "PDFDictionaryElement::LookupObject: no such key with reference value: "
2285  << rDictionaryKey);
2286  return nullptr;
2287  }
2288 
2289  return pKey->LookupObject();
2290 }
2291 
2292 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2293 {
2294  return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2295 }
2296 
2298 {
2299  if (!m_bParsed)
2300  {
2301  if (!m_aElements.empty())
2302  {
2303  // This is a stored object in an object stream.
2304  PDFObjectParser aParser(m_aElements);
2305  aParser.parse(this);
2306  }
2307  else
2308  {
2309  // Normal object: elements are stored as members of the document itself.
2310  PDFObjectParser aParser(m_rDoc.GetElements());
2311  aParser.parse(this);
2312  }
2313  m_bParsed = true;
2314  }
2315 }
2316 
2317 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2318 {
2319  parseIfNecessary();
2320  if (!m_pDictionaryElement)
2321  return nullptr;
2322  return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey);
2323 }
2324 
2325 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2326 {
2327  auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2328  if (!pKey)
2329  {
2330  SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2331  << rDictionaryKey);
2332  return nullptr;
2333  }
2334 
2335  return pKey->LookupObject();
2336 }
2337 
2339 
2340 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2341 {
2342  m_nDictionaryOffset = nDictionaryOffset;
2343 }
2344 
2346 {
2347  parseIfNecessary();
2348  return m_nDictionaryOffset;
2349 }
2350 
2351 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2352 
2353 sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
2354 
2355 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2356 {
2357  m_aDictionaryKeyOffset[rKey] = nOffset;
2358 }
2359 
2360 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2361 {
2362  m_aDictionaryKeyValueLength[rKey] = nLength;
2363 }
2364 
2365 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2366 {
2367  auto it = m_aDictionaryKeyOffset.find(rKey);
2368  if (it == m_aDictionaryKeyOffset.end())
2369  return 0;
2370 
2371  return it->second;
2372 }
2373 
2374 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2375 {
2376  auto it = m_aDictionaryKeyValueLength.find(rKey);
2377  if (it == m_aDictionaryKeyValueLength.end())
2378  return 0;
2379 
2380  return it->second;
2381 }
2382 
2383 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2384 
2385 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2386 {
2387  m_nDictionaryLength = nDictionaryLength;
2388 }
2389 
2391 {
2392  parseIfNecessary();
2393  return m_nDictionaryLength;
2394 }
2395 
2396 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2397 
2398 sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
2399 
2401 {
2402  parseIfNecessary();
2403  return m_pDictionaryElement;
2404 }
2405 
2407 {
2408  m_pDictionaryElement = pDictionaryElement;
2409 }
2410 
2412 {
2413  m_pNumberElement = pNumberElement;
2414 }
2415 
2417 
2418 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2419 {
2420  return m_aDictionaryReferences;
2421 }
2422 
2424 {
2425  m_aDictionaryReferences.push_back(pReference);
2426 }
2427 
2428 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2429 {
2430  parseIfNecessary();
2431  return m_pDictionaryElement->GetItems();
2432 }
2433 
2434 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2435 
2437 {
2438  m_pStreamElement = pStreamElement;
2439 }
2440 
2442 
2444 {
2445  parseIfNecessary();
2446  return m_pArrayElement;
2447 }
2448 
2450 {
2451  if (!m_pStreamElement)
2452  {
2453  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2454  return;
2455  }
2456 
2457  auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2458  if (!pType || pType->GetValue() != "ObjStm")
2459  {
2460  if (!pType)
2461  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2462  else
2463  SAL_WARN("vcl.filter",
2464  "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2465  return;
2466  }
2467 
2468  auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2469  if (!pFilter || pFilter->GetValue() != "FlateDecode")
2470  {
2471  if (!pFilter)
2472  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2473  else
2474  SAL_WARN("vcl.filter",
2475  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2476  return;
2477  }
2478 
2479  auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2480  if (!pFirst)
2481  {
2482  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2483  return;
2484  }
2485 
2486  auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2487  if (!pN)
2488  {
2489  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2490  return;
2491  }
2492  size_t nN = pN->GetValue();
2493 
2494  auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2495  if (!pLength)
2496  {
2497  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2498  return;
2499  }
2500  size_t nLength = pLength->GetValue();
2501 
2502  // Read and decompress it.
2503  SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2504  rEditBuffer.Seek(m_pStreamElement->GetOffset());
2505  std::vector<char> aBuf(nLength);
2506  rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2507  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2508  SvMemoryStream aStream;
2509  ZCodec aZCodec;
2510  aZCodec.BeginCompression();
2511  aZCodec.Decompress(aSource, aStream);
2512  if (!aZCodec.EndCompression())
2513  {
2514  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2515  return;
2516  }
2517 
2518  nLength = aStream.TellEnd();
2519  aStream.Seek(0);
2520  std::vector<size_t> aObjNums;
2521  std::vector<size_t> aOffsets;
2522  std::vector<size_t> aLengths;
2523  // First iterate over and find out the lengths.
2524  for (size_t nObject = 0; nObject < nN; ++nObject)
2525  {
2526  PDFNumberElement aObjNum;
2527  if (!aObjNum.Read(aStream))
2528  {
2529  SAL_WARN("vcl.filter",
2530  "PDFObjectElement::ParseStoredObjects: failed to read object number");
2531  return;
2532  }
2533  aObjNums.push_back(aObjNum.GetValue());
2534 
2535  PDFDocument::SkipWhitespace(aStream);
2536 
2537  PDFNumberElement aByteOffset;
2538  if (!aByteOffset.Read(aStream))
2539  {
2540  SAL_WARN("vcl.filter",
2541  "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2542  return;
2543  }
2544  aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2545 
2546  if (aOffsets.size() > 1)
2547  aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2548  if (nObject + 1 == nN)
2549  aLengths.push_back(nLength - aOffsets.back());
2550 
2551  PDFDocument::SkipWhitespace(aStream);
2552  }
2553 
2554  // Now create streams with the proper length and tokenize the data.
2555  for (size_t nObject = 0; nObject < nN; ++nObject)
2556  {
2557  size_t nObjNum = aObjNums[nObject];
2558  size_t nOffset = aOffsets[nObject];
2559  size_t nLen = aLengths[nObject];
2560 
2561  aStream.Seek(nOffset);
2562  m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2563  PDFObjectElement* pStored = m_aStoredElements.back().get();
2564 
2565  aBuf.clear();
2566  aBuf.resize(nLen);
2567  aStream.ReadBytes(aBuf.data(), aBuf.size());
2568  SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2569 
2570  m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
2571  pStored);
2572  // This is how references know the object is stored inside this object stream.
2573  m_rDoc.SetIDObject(nObjNum, pStored);
2574 
2575  // Store the stream of the object in the object stream for later use.
2576  std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2577  aStoredStream.Seek(0);
2578  pStreamBuffer->WriteStream(aStoredStream);
2579  pStored->SetStreamBuffer(pStreamBuffer);
2580  }
2581 }
2582 
2583 std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2584 {
2585  return m_aElements;
2586 }
2587 
2589 
2590 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2591 {
2592  m_pStreamBuffer = std::move(pStreamBuffer);
2593 }
2594 
2596 
2598  PDFNumberElement const& rGeneration)
2599  : m_rDoc(rDoc)
2600  , m_fObjectValue(rObject.GetValue())
2601  , m_fGenerationValue(rGeneration.GetValue())
2602  , m_rObject(rObject)
2603 {
2604 }
2605 
2607 
2609 {
2610  SAL_INFO("vcl.filter",
2611  "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2612  m_nOffset = rStream.Tell();
2613  return true;
2614 }
2615 
2616 sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2617 
2619 {
2620  size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2621  if (nOffset == 0)
2622  {
2623  SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2624  << m_fObjectValue);
2625  return 0;
2626  }
2627 
2628  sal_uInt64 nOrigPos = rStream.Tell();
2629  comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2630 
2631  rStream.Seek(nOffset);
2632  {
2633  PDFDocument::SkipWhitespace(rStream);
2634  PDFNumberElement aNumber;
2635  bool bRet = aNumber.Read(rStream);
2636  if (!bRet || aNumber.GetValue() != m_fObjectValue)
2637  {
2638  SAL_WARN("vcl.filter",
2639  "PDFReferenceElement::LookupNumber: offset points to not matching object");
2640  return 0;
2641  }
2642  }
2643 
2644  {
2645  PDFDocument::SkipWhitespace(rStream);
2646  PDFNumberElement aNumber;
2647  bool bRet = aNumber.Read(rStream);
2648  if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2649  {
2650  SAL_WARN("vcl.filter",
2651  "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2652  return 0;
2653  }
2654  }
2655 
2656  {
2657  PDFDocument::SkipWhitespace(rStream);
2658  OString aKeyword = PDFDocument::ReadKeyword(rStream);
2659  if (aKeyword != "obj")
2660  {
2661  SAL_WARN("vcl.filter",
2662  "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2663  return 0;
2664  }
2665  }
2666 
2667  PDFDocument::SkipWhitespace(rStream);
2668  PDFNumberElement aNumber;
2669  if (!aNumber.Read(rStream))
2670  {
2671  SAL_WARN("vcl.filter",
2672  "PDFReferenceElement::LookupNumber: failed to read referenced number");
2673  return 0;
2674  }
2675 
2676  return aNumber.GetValue();
2677 }
2678 
2680 {
2682 }
2683 
2685 {
2686  auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2687 
2688  if (itIDObjects != m_aIDObjects.end())
2689  return itIDObjects->second;
2690 
2691  SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2692  return nullptr;
2693 }
2694 
2696 
2698 
2700 
2702 {
2703  char ch;
2704  rStream.ReadChar(ch);
2705  if (ch != '<')
2706  {
2707  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2708  return false;
2709  }
2710 
2711  if (rStream.eof())
2712  {
2713  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2714  return false;
2715  }
2716 
2717  rStream.ReadChar(ch);
2718  if (ch != '<')
2719  {
2720  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2721  return false;
2722  }
2723 
2724  m_nLocation = rStream.Tell();
2725 
2726  SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2727 
2728  return true;
2729 }
2730 
2732 
2734 
2736 {
2737  m_nLocation = rStream.Tell();
2738  char ch;
2739  rStream.ReadChar(ch);
2740  if (ch != '>')
2741  {
2742  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2743  return false;
2744  }
2745 
2746  if (rStream.eof())
2747  {
2748  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2749  return false;
2750  }
2751 
2752  rStream.ReadChar(ch);
2753  if (ch != '>')
2754  {
2755  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2756  return false;
2757  }
2758 
2759  SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2760 
2761  return true;
2762 }
2763 
2764 PDFNameElement::PDFNameElement() = default;
2765 
2767 {
2768  char ch;
2769  rStream.ReadChar(ch);
2770  if (ch != '/')
2771  {
2772  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2773  return false;
2774  }
2775  m_nLocation = rStream.Tell();
2776 
2777  if (rStream.eof())
2778  {
2779  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2780  return false;
2781  }
2782 
2783  // Read till the first white-space.
2784  OStringBuffer aBuf;
2785  rStream.ReadChar(ch);
2786  while (!rStream.eof())
2787  {
2788  if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2789  || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2790  {
2791  rStream.SeekRel(-1);
2792  m_aValue = aBuf.makeStringAndClear();
2793  SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2794  return true;
2795  }
2796  aBuf.append(ch);
2797  rStream.ReadChar(ch);
2798  }
2799 
2800  return false;
2801 }
2802 
2803 const OString& PDFNameElement::GetValue() const { return m_aValue; }
2804 
2805 sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2806 
2808  : m_nLength(nLength)
2809  , m_nOffset(0)
2810 {
2811 }
2812 
2814 {
2815  SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2816  m_nOffset = rStream.Tell();
2817  std::vector<unsigned char> aBytes(m_nLength);
2818  rStream.ReadBytes(aBytes.data(), aBytes.size());
2819  m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2820 
2821  return rStream.good();
2822 }
2823 
2825 
2826 sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2827 
2828 bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2829 
2830 bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2831 
2833  : m_pObject(pObject)
2834 {
2835 }
2836 
2838 {
2839  char ch;
2840  rStream.ReadChar(ch);
2841  if (ch != '[')
2842  {
2843  SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2844  return false;
2845  }
2846 
2847  SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2848 
2849  return true;
2850 }
2851 
2853 {
2854  if (m_pObject)
2855  SAL_INFO("vcl.filter",
2856  "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2857  m_aElements.push_back(pElement);
2858 }
2859 
2860 const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2861 
2863 
2865 {
2866  m_nOffset = rStream.Tell();
2867  char ch;
2868  rStream.ReadChar(ch);
2869  if (ch != ']')
2870  {
2871  SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2872  return false;
2873  }
2874 
2875  SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
2876 
2877  return true;
2878 }
2879 
2880 sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
2881 
2882 // PDFObjectParser
2883 
2884 size_t PDFObjectParser::parse(PDFElement* pParsingElement, size_t nStartIndex, int nCurrentDepth)
2885 {
2886  // The index of last parsed element
2887  size_t nReturnIndex = 0;
2888 
2889  pParsingElement->setParsing(true);
2890 
2891  comphelper::ScopeGuard aGuard([pParsingElement]() { pParsingElement->setParsing(false); });
2892 
2893  // Current object, if root is an object, else nullptr
2894  auto pParsingObject = dynamic_cast<PDFObjectElement*>(pParsingElement);
2895  auto pParsingTrailer = dynamic_cast<PDFTrailerElement*>(pParsingElement);
2896 
2897  // Current dictionary, if root is an dictionary, else nullptr
2898  auto pParsingDictionary = dynamic_cast<PDFDictionaryElement*>(pParsingElement);
2899 
2900  // Current parsing array, if root is an array, else nullptr
2901  auto pParsingArray = dynamic_cast<PDFArrayElement*>(pParsingElement);
2902 
2903  // Find out where the dictionary for this object starts.
2904  size_t nIndex = nStartIndex;
2905  for (size_t i = nStartIndex; i < mrElements.size(); ++i)
2906  {
2907  if (mrElements[i].get() == pParsingElement)
2908  {
2909  nIndex = i;
2910  break;
2911  }
2912  }
2913 
2914  OString aName;
2915  sal_uInt64 nNameOffset = 0;
2916  std::vector<PDFNumberElement*> aNumbers;
2917 
2918  sal_uInt64 nDictionaryOffset = 0;
2919 
2920  // Current depth; 1 is current
2921  int nDepth = 0;
2922 
2923  for (size_t i = nIndex; i < mrElements.size(); ++i)
2924  {
2925  auto* pCurrentElement = mrElements[i].get();
2926 
2927  // Dictionary tokens can be nested, track enter/leave.
2928  if (auto pCurrentDictionary = dynamic_cast<PDFDictionaryElement*>(pCurrentElement))
2929  {
2930  // Handle previously stored number
2931  if (!aNumbers.empty())
2932  {
2933  if (pParsingDictionary)
2934  {
2935  PDFNumberElement* pNumber = aNumbers.back();
2936  sal_uInt64 nLength
2937  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
2938 
2939  pParsingDictionary->insert(aName, pNumber);
2940  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
2941  pParsingDictionary->SetKeyValueLength(aName, nLength);
2942  }
2943  else if (pParsingArray)
2944  {
2945  for (auto& pNumber : aNumbers)
2946  pParsingArray->PushBack(pNumber);
2947  }
2948  else
2949  {
2950  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
2951  }
2952  aName.clear();
2953  aNumbers.clear();
2954  }
2955 
2956  nDepth++;
2957 
2958  if (nDepth == 1) // pParsingDictionary is the current one
2959  {
2960  // First dictionary start, track start offset.
2961  nDictionaryOffset = pCurrentDictionary->GetLocation();
2962 
2963  if (pParsingObject)
2964  {
2965  // Then the toplevel dictionary of the object.
2966  pParsingObject->SetDictionary(pCurrentDictionary);
2967  pParsingObject->SetDictionaryOffset(nDictionaryOffset);
2968  pParsingDictionary = pCurrentDictionary;
2969  }
2970  else if (pParsingTrailer)
2971  {
2972  pParsingTrailer->SetDictionary(pCurrentDictionary);
2973  pParsingDictionary = pCurrentDictionary;
2974  }
2975  }
2976  else if (!pCurrentDictionary->alreadyParsing())
2977  {
2978  if (pParsingArray)
2979  {
2980  pParsingArray->PushBack(pCurrentDictionary);
2981  }
2982  else if (pParsingDictionary)
2983  {
2984  // Dictionary toplevel value.
2985  pParsingDictionary->insert(aName, pCurrentDictionary);
2986  }
2987  else
2988  {
2989  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
2990  }
2991  // Nested dictionary.
2992  const size_t nNextElementIndex = parse(pCurrentDictionary, i, nCurrentDepth + 1);
2993  i = std::max(i, nNextElementIndex - 1);
2994  }
2995  }
2996  else if (auto pCurrentEndDictionary
2997  = dynamic_cast<PDFEndDictionaryElement*>(pCurrentElement))
2998  {
2999  // Handle previously stored number
3000  if (!aNumbers.empty())
3001  {
3002  if (pParsingDictionary)
3003  {
3004  PDFNumberElement* pNumber = aNumbers.back();
3005  sal_uInt64 nLength
3006  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3007 
3008  pParsingDictionary->insert(aName, pNumber);
3009  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3010  pParsingDictionary->SetKeyValueLength(aName, nLength);
3011  }
3012  else if (pParsingArray)
3013  {
3014  for (auto& pNumber : aNumbers)
3015  pParsingArray->PushBack(pNumber);
3016  }
3017  else
3018  {
3019  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3020  }
3021  aName.clear();
3022  aNumbers.clear();
3023  }
3024 
3025  if (pParsingDictionary)
3026  {
3027  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3028  sal_uInt64 nLength = pCurrentEndDictionary->GetLocation() - nNameOffset + 2;
3029  pParsingDictionary->SetKeyValueLength(aName, nLength);
3030  aName.clear();
3031  }
3032 
3033  if (nDepth == 1) // did the parsing ended
3034  {
3035  // Last dictionary end, track length and stop parsing.
3036  if (pParsingObject)
3037  {
3038  sal_uInt64 nDictionaryLength
3039  = pCurrentEndDictionary->GetLocation() - nDictionaryOffset;
3040  pParsingObject->SetDictionaryLength(nDictionaryLength);
3041  }
3042  nReturnIndex = i;
3043  break;
3044  }
3045 
3046  nDepth--;
3047  }
3048  else if (auto pCurrentArray = dynamic_cast<PDFArrayElement*>(pCurrentElement))
3049  {
3050  // Handle previously stored number
3051  if (!aNumbers.empty())
3052  {
3053  if (pParsingDictionary)
3054  {
3055  PDFNumberElement* pNumber = aNumbers.back();
3056 
3057  sal_uInt64 nLength
3058  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3059  pParsingDictionary->insert(aName, pNumber);
3060  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3061  pParsingDictionary->SetKeyValueLength(aName, nLength);
3062  }
3063  else if (pParsingArray)
3064  {
3065  for (auto& pNumber : aNumbers)
3066  pParsingArray->PushBack(pNumber);
3067  }
3068  else
3069  {
3070  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3071  }
3072  aName.clear();
3073  aNumbers.clear();
3074  }
3075 
3076  nDepth++;
3077  if (nDepth == 1) // pParsingDictionary is the current one
3078  {
3079  if (pParsingObject)
3080  {
3081  pParsingObject->SetArray(pCurrentArray);
3082  pParsingArray = pCurrentArray;
3083  }
3084  }
3085  else if (!pCurrentArray->alreadyParsing())
3086  {
3087  if (pParsingArray)
3088  {
3089  // Array is toplevel
3090  pParsingArray->PushBack(pCurrentArray);
3091  }
3092  else if (pParsingDictionary)
3093  {
3094  // Dictionary toplevel value.
3095  pParsingDictionary->insert(aName, pCurrentArray);
3096  }
3097 
3098  const size_t nNextElementIndex = parse(pCurrentArray, i, nCurrentDepth + 1);
3099 
3100  // ensure we go forwards and not endlessly loop
3101  i = std::max(i, nNextElementIndex - 1);
3102  }
3103  }
3104  else if (auto pCurrentEndArray = dynamic_cast<PDFEndArrayElement*>(pCurrentElement))
3105  {
3106  // Handle previously stored number
3107  if (!aNumbers.empty())
3108  {
3109  if (pParsingDictionary)
3110  {
3111  PDFNumberElement* pNumber = aNumbers.back();
3112 
3113  sal_uInt64 nLength
3114  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3115  pParsingDictionary->insert(aName, pNumber);
3116  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3117  pParsingDictionary->SetKeyValueLength(aName, nLength);
3118  }
3119  else if (pParsingArray)
3120  {
3121  for (auto& pNumber : aNumbers)
3122  pParsingArray->PushBack(pNumber);
3123  }
3124  else
3125  {
3126  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3127  }
3128  aName.clear();
3129  aNumbers.clear();
3130  }
3131 
3132  if (nDepth == 1) // did the pParsing ended
3133  {
3134  // Last array end, track length and stop parsing.
3135  nReturnIndex = i;
3136  break;
3137  }
3138  else
3139  {
3140  if (pParsingDictionary)
3141  {
3142  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3143  // Include the ending ']' in the length of the key - (array)value pair length.
3144  sal_uInt64 nLength = pCurrentEndArray->GetOffset() - nNameOffset + 1;
3145  pParsingDictionary->SetKeyValueLength(aName, nLength);
3146  aName.clear();
3147  }
3148  }
3149  nDepth--;
3150  }
3151  else if (auto pCurrentName = dynamic_cast<PDFNameElement*>(pCurrentElement))
3152  {
3153  // Handle previously stored number
3154  if (!aNumbers.empty())
3155  {
3156  if (pParsingDictionary)
3157  {
3158  PDFNumberElement* pNumber = aNumbers.back();
3159 
3160  sal_uInt64 nLength
3161  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3162  pParsingDictionary->insert(aName, pNumber);
3163  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3164  pParsingDictionary->SetKeyValueLength(aName, nLength);
3165  }
3166  else if (pParsingArray)
3167  {
3168  for (auto& pNumber : aNumbers)
3169  pParsingArray->PushBack(pNumber);
3170  }
3171  aName.clear();
3172  aNumbers.clear();
3173  }
3174 
3175  // Now handle name
3176  if (pParsingArray)
3177  {
3178  // if we are in an array, just push the name to array
3179  pParsingArray->PushBack(pCurrentName);
3180  }
3181  else if (pParsingDictionary)
3182  {
3183  // if we are in a dictionary, we need to store the name as a possible key
3184  if (aName.isEmpty())
3185  {
3186  aName = pCurrentName->GetValue();
3187  nNameOffset = pCurrentName->GetLocation();
3188  }
3189  else
3190  {
3191  sal_uInt64 nKeyLength
3192  = pCurrentName->GetLocation() + pCurrentName->GetLength() - nNameOffset;
3193  pParsingDictionary->insert(aName, pCurrentName);
3194  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3195  pParsingDictionary->SetKeyValueLength(aName, nKeyLength);
3196  aName.clear();
3197  }
3198  }
3199  }
3200  else if (auto pReference = dynamic_cast<PDFReferenceElement*>(pCurrentElement))
3201  {
3202  if (pParsingArray)
3203  {
3204  pParsingArray->PushBack(pReference);
3205  }
3206  else if (pParsingDictionary)
3207  {
3208  sal_uInt64 nLength = pReference->GetOffset() - nNameOffset;
3209  pParsingDictionary->insert(aName, pReference);
3210  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3211  pParsingDictionary->SetKeyValueLength(aName, nLength);
3212  aName.clear();
3213  }
3214  else
3215  {
3216  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3217  }
3218  aNumbers.clear();
3219  }
3220  else if (auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(pCurrentElement))
3221  {
3222  if (pParsingArray)
3223  {
3224  pParsingArray->PushBack(pLiteralString);
3225  }
3226  else if (pParsingDictionary)
3227  {
3228  pParsingDictionary->insert(aName, pLiteralString);
3229  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3230  aName.clear();
3231  }
3232  else
3233  {
3234  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3235  }
3236  }
3237  else if (auto pBoolean = dynamic_cast<PDFBooleanElement*>(pCurrentElement))
3238  {
3239  if (pParsingArray)
3240  {
3241  pParsingArray->PushBack(pBoolean);
3242  }
3243  else if (pParsingDictionary)
3244  {
3245  pParsingDictionary->insert(aName, pBoolean);
3246  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3247  aName.clear();
3248  }
3249  else
3250  {
3251  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3252  }
3253  }
3254  else if (auto pHexString = dynamic_cast<PDFHexStringElement*>(pCurrentElement))
3255  {
3256  if (pParsingArray)
3257  {
3258  pParsingArray->PushBack(pHexString);
3259  }
3260  else if (pParsingDictionary)
3261  {
3262  pParsingDictionary->insert(aName, pHexString);
3263  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3264  aName.clear();
3265  }
3266  }
3267  else if (auto pNumberElement = dynamic_cast<PDFNumberElement*>(pCurrentElement))
3268  {
3269  // Just remember this, so that in case it's not a reference parameter,
3270  // we can handle it later.
3271  aNumbers.push_back(pNumberElement);
3272  }
3273  else if (dynamic_cast<PDFEndObjectElement*>(pCurrentElement))
3274  {
3275  // parsing of the object is finished
3276  break;
3277  }
3278  else if (dynamic_cast<PDFObjectElement*>(pCurrentElement)
3279  || dynamic_cast<PDFTrailerElement*>(pCurrentElement))
3280  {
3281  continue;
3282  }
3283  else
3284  {
3285  SAL_INFO("vcl.filter", "Unhandled element while parsing.");
3286  }
3287  }
3288 
3289  return nReturnIndex;
3290 }
3291 
3292 } // namespace vcl
3293 
3294 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
Definition: pdfdocument.hxx:92
sal_uInt64 m_nOffset
Location of the end of the trailer token.
Array object: a list.
bool Read(SvStream &rStream) override
const sal_Int32 m_nLength
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:77
bool SetStreamSize(sal_uInt64 nSize)
sal_Int32 nIndex
PDFObjectElement(PDFDocument &rDoc, double fObjectValue, double fGenerationValue)
Numbering object: an integer or a real.
Boolean object: a 'true' or a 'false'.
PDFElement * Lookup(const OString &rDictionaryKey)
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:80
size_t GetObjectOffset(size_t nIndex) const
sal_uInt64 m_nLocation
Offset after the '/' token.
End of an array: ']'.
static void AppendUnicodeTextString(const OUString &rString, OStringBuffer &rBuffer)
Write rString as a PDF hex string into rBuffer.
void SetStream(PDFStreamElement *pStreamElement)
SvMemoryStream m_aMemory
The byte array itself.
std::string GetValue
tools::Long getWidth() const
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:86
PDFObjectElement * m_pObject
The object that contains this array.
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
Definition: pdfdocument.hxx:90
double LookupNumber(SvStream &rStream) const
Assuming the reference points to a number object, return its value.
void SetDictionaryLength(sal_uInt64 nDictionaryLength)
PDFTrailerElement(PDFDocument &rDoc)
aBuf
#define STREAM_SEEK_TO_END
sal_uInt64 m_nLocation
Offset before the '>>' token.
sal_uInt64 Seek(sal_uInt64 nPos)
std::vector< unsigned char > DecodeHexString(const OString &rHex)
void SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
tools::Long Decompress(SvStream &rIStm, SvStream &rOStm)
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:71
void PushBack(PDFElement *pElement)
sal_uInt64 GetLocation() const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
Looks up an object which is only referenced in this dictionary.
EmbeddedObjectRef * pObject
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:82
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
void SetArrayOffset(sal_uInt64 nArrayOffset)
SvStream & WriteCharPtr(const char *pBuf)
void SetArrayLength(sal_uInt64 nArrayLength)
size_t parse(PDFElement *pParsingElement, size_t nStartIndex=0, int nCurrentDepth=0)
sal_uInt64 SeekRel(sal_Int64 nPos)
const std::vector< std::unique_ptr< PDFElement > > & mrElements
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:59
SvMemoryStream * GetStreamBuffer() const
void SetStreamBuffer(std::unique_ptr< SvMemoryStream > &pStreamBuffer)
tools::Long getHeight() const
SwDoc & m_rDoc
SvStream & WriteUInt32AsString(sal_uInt32 nUInt32)
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
Definition: pdfdocument.hxx:94
bool Read(SvStream &rStream) override
A byte range in a PDF file.
Definition: pdfdocument.hxx:50
bool Read(SvStream &rStream) override
bool eof() const
PDFObjectElement * LookupObject(size_t nObjectNumber)
Look up object based on object number, possibly by parsing object streams.
bool Read(SvStream &rStream) override
In-memory representation of an on-disk PDF document.
TStyleElements m_aElements
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:84
An entry in a cross-reference stream.
PDFReferenceElement(PDFDocument &rDoc, PDFNumberElement &rObject, PDFNumberElement const &rGeneration)
XRefEntryType GetType() const
const OString & GetValue() const
static void visitPages(PDFObjectElement *pPages, std::vector< PDFObjectElement * > &rRet)
Visits the page tree recursively, looking for page objects.
void PushBackEOF(size_t nOffset)
Remember the end location of an EOF token.
bool Read(SvStream &rStream) override
sal_uInt64 GetArrayLength() const
void copyPageResources(filter::PDFObjectElement *pPage, OStringBuffer &rLine)
Copies resources of pPage into rLine.
sal_Int32 nElements
bool Read(SvStream &rStream) override
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:75
void Compress(SvStream &rIStm, SvStream &rOStm)
Copies objects from one PDF file into another one.
void SetType(XRefEntryType eType)
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
void SetKeyValueLength(const OString &rKey, sal_uInt64 nLength)
sal_uInt64 m_nOffset
Input file start location.
void ParseStoredObjects()
Parse objects stored in this object stream.
static OString GetDateTime()
Get current date/time in PDF D:YYYYMMDDHHMMSS form.
bool Read(SvStream &rStream) override
sal_uInt64 GetOffset() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
sal_uInt16 char * pName
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
Definition: pdfdocument.hxx:88
bool GetDirty() const
int i
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:68
bool Read(SvStream &rStream) override
PDFNumberElement & m_rObject
The element providing the object number.
sal_uInt64 m_nLength
Input file token length.
static OString ReadKeyword(SvStream &rStream)
sal_uInt64 GetSize()
bool Read(SvStream &rStream) override
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
End of a dictionary: '>>'.
std::size_t WriteBytes(const void *pData, std::size_t nSize)
void AddDictionaryReference(PDFReferenceElement *pReference)
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
tools::Long EndCompression()
std::vector< PDFObjectElement * > GetPages()
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
A one-liner comment.
sal_uInt64 GetLocation() const
Dictionary object: a set key-value pairs.
bool Read(SvStream &rStream)
Read elements from the start of the stream till its end.
sal_uInt64 GetOffset() const
bool Read(SvStream &rStream) override
std::vector< PDFElement * > m_aElements
bool Tokenize(SvStream &rStream, TokenizeMode eMode, std::vector< std::unique_ptr< PDFElement >> &rElements, PDFObjectElement *pObjectElement)
Tokenize elements from current offset.
bool Read(SvStream &rStream) override
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
Definition: pdfdocument.hxx:97
#define MAX_SIGNATURE_CONTENT_LENGTH
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
SvStream & WriteStream(SvStream &rStream)
const std::map< OString, PDFElement * > & GetItems() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
const char * pS
sal_uInt64 GetKeyOffset(const OString &rKey) const
void SetIDObject(size_t nID, PDFObjectElement *pObject)
Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID...
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:61
bool Read(SvStream &rStream) override
sal_uInt64 GetKeyValueLength(const OString &rKey) const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
std::size_t ReadBytes(void *pData, std::size_t nSize)
bool Read(SvStream &rStream) override
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
const OString & GetValue() const
std::vector< std::unique_ptr< PDFElement > > & GetStoredElements()
void SetNumberElement(PDFNumberElement *pNumberElement)
sal_uInt64 m_nOffset
Location before the ']' token.
sal_uInt64 GetLength() const
PDFDictionaryElement * GetDictionary()
const std::vector< std::unique_ptr< PDFElement > > & GetElements() const
bool Read(SvStream &rStream) override
void SetDirty(bool bDirty)
PDFObjectElement * LookupObject()
Lookup referenced object, without assuming anything about its contents.
std::unique_ptr< char[]> aBuffer
SvStream & ReadChar(char &rChar)
End of an object: 'endobj' keyword.
PDFElement * LookupElement(const OString &rDictionaryKey)
Looks up an element which is contained in this dictionary.
#define SAL_WARN_IF(condition, area, stream)
SvStream & WriteOString(std::string_view rStr)
void setWidth(tools::Long n)
SvMemoryStream & GetMemory()
Null object: the 'null' singleton.
const std::vector< PDFReferenceElement * > & GetDictionaryReferences() const
#define SAL_INFO(area, stream)
OUString aName
bool Read(SvStream &rStream) override
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
static void SkipWhitespace(SvStream &rStream)
PDFNumberElement * GetNumberElement() const
sal_uInt64 Tell() const
QPRO_FUNC_TYPE nType
const OString & GetValue() const
bool Sign(OStringBuffer &rCMSHexBuffer)
PDFArrayElement * GetArray()
Reference object: something with a unique ID.
const std::vector< PDFElement * > & GetElements() const
End of a stream: 'endstream' keyword.
sal_uInt64 GetLocation() const
bool good() const
PDFDictionaryElement * m_pDictionaryElement
#define SAL_WARN(area, stream)
bool alreadyVisiting() const
Definition: pdfdocument.hxx:60
Literal string: in (asdf) form.
PDFArrayElement(PDFObjectElement *pObject)
bool Read(SvStream &rStream) override
sal_Int32 nLength
Name object: a key string.
void SetOffset(sal_uInt64 nOffset)
The trailer singleton is at the end of the doc.
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
PDFNumberElement & GetObjectElement() const
void AddDataRange(const void *pData, sal_Int32 size)
PDFElement * Lookup(const OString &rDictionaryKey)
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:79
sal_uInt64 GetArrayOffset() const
Stream object: a byte array with a known length.
sal_uInt64 m_nLocation
Offset after the '<<' token.
void setHeight(tools::Long n)
sal_uInt16 nPos
static PDFElement * Lookup(const std::map< OString, PDFElement * > &rDictionary, const OString &rKey)
const void * GetData()
void SetArray(PDFArrayElement *pArrayElement)
bool Read(SvStream &rStream) override
sal_uInt64 m_nOffset
Location after the 'R' token.
void SetKeyOffset(const OString &rKey, sal_uInt64 nOffset)
OStringBuffer & padToLength(OStringBuffer &rBuffer, sal_Int32 nLength, char cFill= '\0')