LibreOffice Module vcl (master)  1
pdfdocument.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
11 
12 #include <map>
13 #include <memory>
14 #include <vector>
15 
16 #include <com/sun/star/uno/Sequence.hxx>
17 #include <com/sun/star/security/XCertificate.hpp>
18 
20 #include <comphelper/string.hxx>
21 #include <rtl/character.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <svl/cryptosign.hxx>
27 #include <tools/zcodec.hxx>
28 #include <vcl/pdfwriter.hxx>
29 #include <o3tl/safeint.hxx>
30 
31 #include <pdf/objectcopier.hxx>
32 
33 using namespace com::sun::star;
34 
35 namespace vcl::filter
36 {
37 XRefEntry::XRefEntry() = default;
38 
39 PDFDocument::PDFDocument() = default;
40 
41 PDFDocument::~PDFDocument() = default;
42 
43 bool PDFDocument::RemoveSignature(size_t nPosition)
44 {
45  std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
46  if (nPosition >= aSignatures.size())
47  {
48  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
49  return false;
50  }
51 
52  if (aSignatures.size() != m_aEOFs.size() - 1)
53  {
54  SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
55  "and incremental updates");
56  return false;
57  }
58 
59  // The EOF offset is the end of the original file, without the signature at
60  // nPosition.
61  m_aEditBuffer.Seek(m_aEOFs[nPosition]);
62  // Drop all bytes after the current position.
63  m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
64 
65  return m_aEditBuffer.good();
66 }
67 
68 sal_Int32 PDFDocument::createObject()
69 {
70  sal_Int32 nObject = m_aXRef.size();
71  m_aXRef[nObject] = XRefEntry();
72  return nObject;
73 }
74 
75 bool PDFDocument::updateObject(sal_Int32 nObject)
76 {
77  if (o3tl::make_unsigned(nObject) >= m_aXRef.size())
78  {
79  SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
80  return false;
81  }
82 
83  XRefEntry aEntry;
84  aEntry.SetOffset(m_aEditBuffer.Tell());
85  aEntry.SetDirty(true);
86  m_aXRef[nObject] = aEntry;
87  return true;
88 }
89 
90 bool PDFDocument::writeBuffer(const void* pBuffer, sal_uInt64 nBytes)
91 {
92  std::size_t nWritten = m_aEditBuffer.WriteBytes(pBuffer, nBytes);
93  return nWritten == nBytes;
94 }
95 
96 void PDFDocument::SetSignatureLine(std::vector<sal_Int8>&& rSignatureLine)
97 {
98  m_aSignatureLine = std::move(rSignatureLine);
99 }
100 
101 void PDFDocument::SetSignaturePage(size_t nPage) { m_nSignaturePage = nPage; }
102 
103 sal_uInt32 PDFDocument::GetNextSignature()
104 {
105  sal_uInt32 nRet = 0;
106  for (const auto& pSignature : GetSignatureWidgets())
107  {
108  auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
109  if (!pT)
110  continue;
111 
112  const OString& rValue = pT->GetValue();
113  const OString aPrefix = "Signature";
114  if (!rValue.startsWith(aPrefix))
115  continue;
116 
117  nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32());
118  }
119 
120  return nRet + 1;
121 }
122 
123 sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
124  sal_uInt64& rLastByteRangeOffset,
125  sal_Int64& rContentOffset)
126 {
127  // Write signature object.
128  sal_Int32 nSignatureId = m_aXRef.size();
129  XRefEntry aSignatureEntry;
130  aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
131  aSignatureEntry.SetDirty(true);
132  m_aXRef[nSignatureId] = aSignatureEntry;
133  OStringBuffer aSigBuffer;
134  aSigBuffer.append(nSignatureId);
135  aSigBuffer.append(" 0 obj\n");
136  aSigBuffer.append("<</Contents <");
137  rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
138  // Reserve space for the PKCS#7 object.
139  OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
141  aSigBuffer.append(aContentFiller.makeStringAndClear());
142  aSigBuffer.append(">\n/Type/Sig/SubFilter");
143  if (bAdES)
144  aSigBuffer.append("/ETSI.CAdES.detached");
145  else
146  aSigBuffer.append("/adbe.pkcs7.detached");
147 
148  // Time of signing.
149  aSigBuffer.append(" /M (");
150  aSigBuffer.append(vcl::PDFWriter::GetDateTime());
151  aSigBuffer.append(")");
152 
153  // Byte range: we can write offset1-length1 and offset2 right now, will
154  // write length2 later.
155  aSigBuffer.append(" /ByteRange [ 0 ");
156  // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
157  aSigBuffer.append(rContentOffset - 1);
158  aSigBuffer.append(" ");
159  aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
160  aSigBuffer.append(" ");
161  rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
162  // We don't know how many bytes we need for the last ByteRange value, this
163  // should be enough.
164  OStringBuffer aByteRangeFiller;
165  comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
166  aSigBuffer.append(aByteRangeFiller.makeStringAndClear());
167  // Finish the Sig obj.
168  aSigBuffer.append(" /Filter/Adobe.PPKMS");
169 
170  if (!rDescription.isEmpty())
171  {
172  aSigBuffer.append("/Reason<");
173  vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
174  aSigBuffer.append(">");
175  }
176 
177  aSigBuffer.append(" >>\nendobj\n\n");
178  m_aEditBuffer.WriteOString(aSigBuffer.toString());
179 
180  return nSignatureId;
181 }
182 
183 sal_Int32 PDFDocument::WriteAppearanceObject(tools::Rectangle& rSignatureRectangle)
184 {
185  PDFDocument aPDFDocument;
186  filter::PDFObjectElement* pPage = nullptr;
187  std::vector<filter::PDFObjectElement*> aContentStreams;
188 
189  if (!m_aSignatureLine.empty())
190  {
191  // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
192  // based on it.
193  SvMemoryStream aPDFStream;
194  aPDFStream.WriteBytes(m_aSignatureLine.data(), m_aSignatureLine.size());
195  aPDFStream.Seek(0);
196  if (!aPDFDocument.Read(aPDFStream))
197  {
198  SAL_WARN("vcl.filter",
199  "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
200  return -1;
201  }
202 
203  std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
204  if (aPages.empty())
205  {
206  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
207  return -1;
208  }
209 
210  pPage = aPages[0];
211  if (!pPage)
212  {
213  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
214  return -1;
215  }
216 
217  // Calculate the bounding box.
218  PDFElement* pMediaBox = pPage->Lookup("MediaBox");
219  auto pMediaBoxArray = dynamic_cast<PDFArrayElement*>(pMediaBox);
220  if (!pMediaBoxArray || pMediaBoxArray->GetElements().size() < 4)
221  {
222  SAL_WARN("vcl.filter",
223  "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
224  return -1;
225  }
226  const std::vector<PDFElement*>& rMediaBoxElements = pMediaBoxArray->GetElements();
227  auto pWidth = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[2]);
228  if (!pWidth)
229  {
230  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
231  return -1;
232  }
233  rSignatureRectangle.setWidth(pWidth->GetValue());
234  auto pHeight = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[3]);
235  if (!pHeight)
236  {
237  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
238  return -1;
239  }
240  rSignatureRectangle.setHeight(pHeight->GetValue());
241 
242  if (PDFObjectElement* pContentStream = pPage->LookupObject("Contents"))
243  {
244  aContentStreams.push_back(pContentStream);
245  }
246 
247  if (aContentStreams.empty())
248  {
249  SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
250  return -1;
251  }
252  }
253  m_aSignatureLine.clear();
254 
255  // Write appearance object: allocate an ID.
256  sal_Int32 nAppearanceId = m_aXRef.size();
257  m_aXRef[nAppearanceId] = XRefEntry();
258 
259  // Write the object content.
260  SvMemoryStream aEditBuffer;
261  aEditBuffer.WriteUInt32AsString(nAppearanceId);
262  aEditBuffer.WriteCharPtr(" 0 obj\n");
263  aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
264 
265  PDFObjectCopier aCopier(*this);
266  if (!aContentStreams.empty())
267  {
268  assert(pPage && "aContentStreams is only filled if there was a pPage");
269  OStringBuffer aBuffer;
270  aCopier.copyPageResources(pPage, aBuffer);
271  aEditBuffer.WriteOString(aBuffer.makeStringAndClear());
272  }
273 
274  aEditBuffer.WriteCharPtr("/BBox[0 0 ");
275  aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
276  aEditBuffer.WriteCharPtr(" ");
277  aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
278  aEditBuffer.WriteCharPtr("]\n/Length ");
279 
280  // Add the object to the doc-level edit buffer and update the offset.
281  SvMemoryStream aStream;
282  bool bCompressed = false;
283  sal_Int32 nLength = 0;
284  if (!aContentStreams.empty())
285  {
286  nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed);
287  }
288  aEditBuffer.WriteOString(OString::number(nLength));
289  if (bCompressed)
290  {
291  aEditBuffer.WriteOString(" /Filter/FlateDecode");
292  }
293 
294  aEditBuffer.WriteCharPtr("\n>>\n");
295 
296  aEditBuffer.WriteCharPtr("stream\n");
297 
298  // Copy the original page streams to the form XObject stream.
299  aStream.Seek(0);
300  aEditBuffer.WriteStream(aStream);
301 
302  aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
303 
304  aEditBuffer.Seek(0);
305  XRefEntry aAppearanceEntry;
306  aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
307  aAppearanceEntry.SetDirty(true);
308  m_aXRef[nAppearanceId] = aAppearanceEntry;
309  m_aEditBuffer.WriteStream(aEditBuffer);
310 
311  return nAppearanceId;
312 }
313 
314 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
315  sal_Int32 nAppearanceId,
316  const tools::Rectangle& rSignatureRectangle)
317 {
318  // Decide what identifier to use for the new signature.
319  sal_uInt32 nNextSignature = GetNextSignature();
320 
321  // Write the Annot object, references nSignatureId and nAppearanceId.
322  sal_Int32 nAnnotId = m_aXRef.size();
323  XRefEntry aAnnotEntry;
324  aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
325  aAnnotEntry.SetDirty(true);
326  m_aXRef[nAnnotId] = aAnnotEntry;
327  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
328  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
329  m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
330  m_aEditBuffer.WriteCharPtr("/Rect[0 0 ");
331  m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
332  m_aEditBuffer.WriteCharPtr(" ");
333  m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
334  m_aEditBuffer.WriteCharPtr("]\n");
335  m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
336  m_aEditBuffer.WriteCharPtr("/P ");
337  m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue());
338  m_aEditBuffer.WriteCharPtr(" 0 R\n");
339  m_aEditBuffer.WriteCharPtr("/T(Signature");
340  m_aEditBuffer.WriteUInt32AsString(nNextSignature);
341  m_aEditBuffer.WriteCharPtr(")\n");
342  m_aEditBuffer.WriteCharPtr("/V ");
343  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
344  m_aEditBuffer.WriteCharPtr(" 0 R\n");
345  m_aEditBuffer.WriteCharPtr("/DV ");
346  m_aEditBuffer.WriteUInt32AsString(nSignatureId);
347  m_aEditBuffer.WriteCharPtr(" 0 R\n");
348  m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
349  m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
350  m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
351  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
352 
353  return nAnnotId;
354 }
355 
356 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
357 {
358  PDFElement* pAnnots = rFirstPage.Lookup("Annots");
359  auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
360  if (pAnnotsReference)
361  {
362  // Write the updated Annots key of the Page object.
363  PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
364  if (!pAnnotsObject)
365  {
366  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
367  return false;
368  }
369 
370  sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
371  m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
372  m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
373  m_aXRef[nAnnotsId].SetDirty(true);
374  m_aEditBuffer.WriteUInt32AsString(nAnnotsId);
375  m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
376 
377  // Write existing references.
378  PDFArrayElement* pArray = pAnnotsObject->GetArray();
379  if (!pArray)
380  {
381  SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
382  return false;
383  }
384 
385  for (size_t i = 0; i < pArray->GetElements().size(); ++i)
386  {
387  auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
388  if (!pReference)
389  continue;
390 
391  if (i)
392  m_aEditBuffer.WriteCharPtr(" ");
393  m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
394  m_aEditBuffer.WriteCharPtr(" 0 R");
395  }
396  // Write our reference.
397  m_aEditBuffer.WriteCharPtr(" ");
398  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
399  m_aEditBuffer.WriteCharPtr(" 0 R");
400 
401  m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
402  }
403  else
404  {
405  // Write the updated first page object, references nAnnotId.
406  sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
407  if (nFirstPageId >= m_aXRef.size())
408  {
409  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
410  return false;
411  }
412  m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
413  m_aXRef[nFirstPageId].SetDirty(true);
414  m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
415  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
416  m_aEditBuffer.WriteCharPtr("<<");
417  auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
418  if (!pAnnotsArray)
419  {
420  // No Annots key, just write the key with a single reference.
421  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
422  + rFirstPage.GetDictionaryOffset(),
423  rFirstPage.GetDictionaryLength());
424  m_aEditBuffer.WriteCharPtr("/Annots[");
425  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
426  m_aEditBuffer.WriteCharPtr(" 0 R]");
427  }
428  else
429  {
430  // Annots key is already there, insert our reference at the end.
431  PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
432 
433  // Offset right before the end of the Annots array.
434  sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
435  + pDictionary->GetKeyValueLength("Annots") - 1;
436  // Length of beginning of the dictionary -> Annots end.
437  sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
438  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
439  + rFirstPage.GetDictionaryOffset(),
440  nAnnotsBeforeEndLength);
441  m_aEditBuffer.WriteCharPtr(" ");
442  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
443  m_aEditBuffer.WriteCharPtr(" 0 R");
444  // Length of Annots end -> end of the dictionary.
445  sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
446  + rFirstPage.GetDictionaryLength()
447  - nAnnotsEndOffset;
448  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
449  + nAnnotsEndOffset,
450  nAnnotsAfterEndLength);
451  }
452  m_aEditBuffer.WriteCharPtr(">>");
453  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
454  }
455 
456  return true;
457 }
458 
459 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
460 {
461  if (m_pXRefStream)
462  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
463  else
464  {
465  if (!m_pTrailer)
466  {
467  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
468  return false;
469  }
470  pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
471  }
472  if (!pRoot)
473  {
474  SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
475  return false;
476  }
477  PDFObjectElement* pCatalog = pRoot->LookupObject();
478  if (!pCatalog)
479  {
480  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
481  return false;
482  }
483  sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
484  if (nCatalogId >= m_aXRef.size())
485  {
486  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
487  return false;
488  }
489  PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
490  auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
491  if (pAcroFormReference)
492  {
493  // Write the updated AcroForm key of the Catalog object.
494  PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
495  if (!pAcroFormObject)
496  {
497  SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
498  return false;
499  }
500 
501  sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
502  m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
503  m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
504  m_aXRef[nAcroFormId].SetDirty(true);
505  m_aEditBuffer.WriteUInt32AsString(nAcroFormId);
506  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
507 
508  // If this is nullptr, then the AcroForm object is not in an object stream.
509  SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
510 
511  if (!pAcroFormObject->Lookup("Fields"))
512  {
513  SAL_WARN("vcl.filter",
514  "PDFDocument::Sign: AcroForm object without required Fields key");
515  return false;
516  }
517 
518  PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
519  if (!pAcroFormDictionary)
520  {
521  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
522  return false;
523  }
524 
525  // Offset right before the end of the Fields array.
526  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
527  + pAcroFormDictionary->GetKeyValueLength("Fields")
528  - strlen("]");
529 
530  // Length of beginning of the object dictionary -> Fields end.
531  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
532  if (pStreamBuffer)
533  m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
534  else
535  {
536  nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
537  m_aEditBuffer.WriteCharPtr("<<");
538  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
539  + pAcroFormObject->GetDictionaryOffset(),
540  nFieldsBeforeEndLength);
541  }
542 
543  // Append our reference at the end of the Fields array.
544  m_aEditBuffer.WriteCharPtr(" ");
545  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
546  m_aEditBuffer.WriteCharPtr(" 0 R");
547 
548  // Length of Fields end -> end of the object dictionary.
549  if (pStreamBuffer)
550  {
551  sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
552  m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
553  + nFieldsEndOffset,
554  nFieldsAfterEndLength);
555  }
556  else
557  {
558  sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
559  + pAcroFormObject->GetDictionaryLength()
560  - nFieldsEndOffset;
561  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
562  + nFieldsEndOffset,
563  nFieldsAfterEndLength);
564  m_aEditBuffer.WriteCharPtr(">>");
565  }
566 
567  m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
568  }
569  else
570  {
571  // Write the updated Catalog object, references nAnnotId.
572  auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
573  m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
574  m_aXRef[nCatalogId].SetDirty(true);
575  m_aEditBuffer.WriteUInt32AsString(nCatalogId);
576  m_aEditBuffer.WriteCharPtr(" 0 obj\n");
577  m_aEditBuffer.WriteCharPtr("<<");
578  if (!pAcroFormDictionary)
579  {
580  // No AcroForm key, assume no signatures.
581  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
582  + pCatalog->GetDictionaryOffset(),
583  pCatalog->GetDictionaryLength());
584  m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
585  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
586  m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
587  }
588  else
589  {
590  // AcroForm key is already there, insert our reference at the Fields end.
591  auto it = pAcroFormDictionary->GetItems().find("Fields");
592  if (it == pAcroFormDictionary->GetItems().end())
593  {
594  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
595  return false;
596  }
597 
598  auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
599  if (!pFields)
600  {
601  SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
602  return false;
603  }
604 
605  // Offset right before the end of the Fields array.
606  sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
607  + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
608  // Length of beginning of the Catalog dictionary -> Fields end.
609  sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
610  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
611  + pCatalog->GetDictionaryOffset(),
612  nFieldsBeforeEndLength);
613  m_aEditBuffer.WriteCharPtr(" ");
614  m_aEditBuffer.WriteUInt32AsString(nAnnotId);
615  m_aEditBuffer.WriteCharPtr(" 0 R");
616  // Length of Fields end -> end of the Catalog dictionary.
617  sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
618  + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
619  m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
620  + nFieldsEndOffset,
621  nFieldsAfterEndLength);
622  }
623  m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
624  }
625 
626  return true;
627 }
628 
629 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
630 {
631  if (m_pXRefStream)
632  {
633  // Write the xref stream.
634  // This is a bit meta: the xref stream stores its own offset.
635  sal_Int32 nXRefStreamId = m_aXRef.size();
636  XRefEntry aXRefStreamEntry;
637  aXRefStreamEntry.SetOffset(nXRefOffset);
638  aXRefStreamEntry.SetDirty(true);
639  m_aXRef[nXRefStreamId] = aXRefStreamEntry;
640 
641  // Write stream data.
642  SvMemoryStream aXRefStream;
643  const size_t nOffsetLen = 3;
644  // 3 additional bytes: predictor, the first and the third field.
645  const size_t nLineLength = nOffsetLen + 3;
646  // This is the line as it appears before tweaking according to the predictor.
647  std::vector<unsigned char> aOrigLine(nLineLength);
648  // This is the previous line.
649  std::vector<unsigned char> aPrevLine(nLineLength);
650  // This is the line as written to the stream.
651  std::vector<unsigned char> aFilteredLine(nLineLength);
652  for (const auto& rXRef : m_aXRef)
653  {
654  const XRefEntry& rEntry = rXRef.second;
655 
656  if (!rEntry.GetDirty())
657  continue;
658 
659  // Predictor.
660  size_t nPos = 0;
661  // PNG prediction: up (on all rows).
662  aOrigLine[nPos++] = 2;
663 
664  // First field.
665  unsigned char nType = 0;
666  switch (rEntry.GetType())
667  {
668  case XRefEntryType::FREE:
669  nType = 0;
670  break;
671  case XRefEntryType::NOT_COMPRESSED:
672  nType = 1;
673  break;
674  case XRefEntryType::COMPRESSED:
675  nType = 2;
676  break;
677  }
678  aOrigLine[nPos++] = nType;
679 
680  // Second field.
681  for (size_t i = 0; i < nOffsetLen; ++i)
682  {
683  size_t nByte = nOffsetLen - i - 1;
684  // Fields requiring more than one byte are stored with the
685  // high-order byte first.
686  unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
687  aOrigLine[nPos++] = nCh;
688  }
689 
690  // Third field.
691  aOrigLine[nPos++] = 0;
692 
693  // Now apply the predictor.
694  aFilteredLine[0] = aOrigLine[0];
695  for (size_t i = 1; i < nLineLength; ++i)
696  {
697  // Count the delta vs the previous line.
698  aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
699  // Remember the new reference.
700  aPrevLine[i] = aOrigLine[i];
701  }
702 
703  aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
704  }
705 
706  m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
707  m_aEditBuffer.WriteCharPtr(
708  " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
709 
710  // ID.
711  auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
712  if (pID)
713  {
714  const std::vector<PDFElement*>& rElements = pID->GetElements();
715  m_aEditBuffer.WriteCharPtr("/ID [ <");
716  for (size_t i = 0; i < rElements.size(); ++i)
717  {
718  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
719  if (!pIDString)
720  continue;
721 
722  m_aEditBuffer.WriteOString(pIDString->GetValue());
723  if ((i + 1) < rElements.size())
724  m_aEditBuffer.WriteCharPtr("> <");
725  }
726  m_aEditBuffer.WriteCharPtr("> ] ");
727  }
728 
729  // Index.
730  m_aEditBuffer.WriteCharPtr("/Index [ ");
731  for (const auto& rXRef : m_aXRef)
732  {
733  if (!rXRef.second.GetDirty())
734  continue;
735 
736  m_aEditBuffer.WriteUInt32AsString(rXRef.first);
737  m_aEditBuffer.WriteCharPtr(" 1 ");
738  }
739  m_aEditBuffer.WriteCharPtr("] ");
740 
741  // Info.
742  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
743  if (pInfo)
744  {
745  m_aEditBuffer.WriteCharPtr("/Info ");
746  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
747  m_aEditBuffer.WriteCharPtr(" ");
748  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
749  m_aEditBuffer.WriteCharPtr(" R ");
750  }
751 
752  // Length.
753  m_aEditBuffer.WriteCharPtr("/Length ");
754  {
755  ZCodec aZCodec;
756  aZCodec.BeginCompression();
757  aXRefStream.Seek(0);
758  SvMemoryStream aStream;
759  aZCodec.Compress(aXRefStream, aStream);
760  aZCodec.EndCompression();
761  aXRefStream.Seek(0);
762  aXRefStream.SetStreamSize(0);
763  aStream.Seek(0);
764  aXRefStream.WriteStream(aStream);
765  }
766  m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize());
767 
768  if (!m_aStartXRefs.empty())
769  {
770  // Write location of the previous cross-reference section.
771  m_aEditBuffer.WriteCharPtr("/Prev ");
772  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
773  }
774 
775  // Root.
776  m_aEditBuffer.WriteCharPtr("/Root ");
777  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
778  m_aEditBuffer.WriteCharPtr(" ");
779  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
780  m_aEditBuffer.WriteCharPtr(" R ");
781 
782  // Size.
783  m_aEditBuffer.WriteCharPtr("/Size ");
784  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
785 
786  m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
787  aXRefStream.Seek(0);
788  m_aEditBuffer.WriteStream(aXRefStream);
789  m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
790  }
791  else
792  {
793  // Write the xref table.
794  m_aEditBuffer.WriteCharPtr("xref\n");
795  for (const auto& rXRef : m_aXRef)
796  {
797  size_t nObject = rXRef.first;
798  size_t nOffset = rXRef.second.GetOffset();
799  if (!rXRef.second.GetDirty())
800  continue;
801 
802  m_aEditBuffer.WriteUInt32AsString(nObject);
803  m_aEditBuffer.WriteCharPtr(" 1\n");
804  OStringBuffer aBuffer;
805  aBuffer.append(static_cast<sal_Int32>(nOffset));
806  while (aBuffer.getLength() < 10)
807  aBuffer.insert(0, "0");
808  if (nObject == 0)
809  aBuffer.append(" 65535 f \n");
810  else
811  aBuffer.append(" 00000 n \n");
812  m_aEditBuffer.WriteOString(aBuffer.toString());
813  }
814 
815  // Write the trailer.
816  m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
817  m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
818  m_aEditBuffer.WriteCharPtr("/Root ");
819  m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
820  m_aEditBuffer.WriteCharPtr(" ");
821  m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
822  m_aEditBuffer.WriteCharPtr(" R\n");
823  auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
824  if (pInfo)
825  {
826  m_aEditBuffer.WriteCharPtr("/Info ");
827  m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
828  m_aEditBuffer.WriteCharPtr(" ");
829  m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
830  m_aEditBuffer.WriteCharPtr(" R\n");
831  }
832  auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
833  if (pID)
834  {
835  const std::vector<PDFElement*>& rElements = pID->GetElements();
836  m_aEditBuffer.WriteCharPtr("/ID [ <");
837  for (size_t i = 0; i < rElements.size(); ++i)
838  {
839  auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
840  if (!pIDString)
841  continue;
842 
843  m_aEditBuffer.WriteOString(pIDString->GetValue());
844  if ((i + 1) < rElements.size())
845  m_aEditBuffer.WriteCharPtr(">\n<");
846  }
847  m_aEditBuffer.WriteCharPtr("> ]\n");
848  }
849 
850  if (!m_aStartXRefs.empty())
851  {
852  // Write location of the previous cross-reference section.
853  m_aEditBuffer.WriteCharPtr("/Prev ");
854  m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
855  }
856 
857  m_aEditBuffer.WriteCharPtr(">>\n");
858  }
859 }
860 
861 bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
862  const OUString& rDescription, bool bAdES)
863 {
864  m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
865  m_aEditBuffer.WriteCharPtr("\n");
866 
867  sal_uInt64 nSignatureLastByteRangeOffset = 0;
868  sal_Int64 nSignatureContentOffset = 0;
869  sal_Int32 nSignatureId = WriteSignatureObject(
870  rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
871 
872  tools::Rectangle aSignatureRectangle;
873  sal_Int32 nAppearanceId = WriteAppearanceObject(aSignatureRectangle);
874 
875  std::vector<PDFObjectElement*> aPages = GetPages();
876  if (aPages.empty())
877  {
878  SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
879  return false;
880  }
881 
882  size_t nPage = 0;
883  if (m_nSignaturePage < aPages.size())
884  {
885  nPage = m_nSignaturePage;
886  }
887  if (!aPages[nPage])
888  {
889  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage);
890  return false;
891  }
892 
893  PDFObjectElement& rPage = *aPages[nPage];
894  sal_Int32 nAnnotId = WriteAnnotObject(rPage, nSignatureId, nAppearanceId, aSignatureRectangle);
895 
896  if (!WritePageObject(rPage, nAnnotId))
897  {
898  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
899  return false;
900  }
901 
902  PDFReferenceElement* pRoot = nullptr;
903  if (!WriteCatalogObject(nAnnotId, pRoot))
904  {
905  SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
906  return false;
907  }
908 
909  sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
910  WriteXRef(nXRefOffset, pRoot);
911 
912  // Write startxref.
913  m_aEditBuffer.WriteCharPtr("startxref\n");
914  m_aEditBuffer.WriteUInt32AsString(nXRefOffset);
915  m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
916 
917  // Finalize the signature, now that we know the total file size.
918  // Calculate the length of the last byte range.
919  sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
920  sal_Int64 nLastByteRangeLength
921  = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
922  // Write the length to the buffer.
923  m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
924  OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
925  m_aEditBuffer.WriteOString(aByteRangeBuffer);
926 
927  // Create the PKCS#7 object.
928  css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
929  if (!aDerEncoded.hasElements())
930  {
931  SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
932  return false;
933  }
934 
935  m_aEditBuffer.Seek(0);
936  sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
937  std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
938  m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
939 
940  m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
941  sal_uInt64 nBufferSize2 = nLastByteRangeLength;
942  std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
943  m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
944 
945  OStringBuffer aCMSHexBuffer;
946  svl::crypto::Signing aSigning(xCertificate);
947  aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
948  aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
949  if (!aSigning.Sign(aCMSHexBuffer))
950  {
951  SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
952  return false;
953  }
954 
955  assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
956 
957  m_aEditBuffer.Seek(nSignatureContentOffset);
958  m_aEditBuffer.WriteOString(aCMSHexBuffer.toString());
959 
960  return true;
961 }
962 
963 bool PDFDocument::Write(SvStream& rStream)
964 {
965  m_aEditBuffer.Seek(0);
966  rStream.WriteStream(m_aEditBuffer);
967  return rStream.good();
968 }
969 
970 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
971  std::vector<std::unique_ptr<PDFElement>>& rElements,
972  PDFObjectElement* pObjectElement)
973 {
974  // Last seen object token.
975  PDFObjectElement* pObject = pObjectElement;
976  PDFNameElement* pObjectKey = nullptr;
977  PDFObjectElement* pObjectStream = nullptr;
978  bool bInXRef = false;
979  // The next number will be an xref offset.
980  bool bInStartXRef = false;
981  // Dictionary depth, so we know when we're outside any dictionaries.
982  int nDepth = 0;
983  // Last seen array token that's outside any dictionaries.
984  PDFArrayElement* pArray = nullptr;
985  // If we're inside an obj/endobj pair.
986  bool bInObject = false;
987 
988  while (true)
989  {
990  char ch;
991  rStream.ReadChar(ch);
992  if (rStream.eof())
993  break;
994 
995  switch (ch)
996  {
997  case '%':
998  {
999  auto pComment = new PDFCommentElement(*this);
1000  rElements.push_back(std::unique_ptr<PDFElement>(pComment));
1001  rStream.SeekRel(-1);
1002  if (!rElements.back()->Read(rStream))
1003  {
1004  SAL_WARN("vcl.filter",
1005  "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
1006  return false;
1007  }
1008  if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
1009  && m_aEOFs.back() == rStream.Tell())
1010  {
1011  // Found EOF and partial parsing requested, we're done.
1012  return true;
1013  }
1014  break;
1015  }
1016  case '<':
1017  {
1018  // Dictionary or hex string.
1019  rStream.ReadChar(ch);
1020  rStream.SeekRel(-2);
1021  if (ch == '<')
1022  {
1023  rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
1024  ++nDepth;
1025  }
1026  else
1027  rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
1028  if (!rElements.back()->Read(rStream))
1029  {
1030  SAL_WARN("vcl.filter",
1031  "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
1032  return false;
1033  }
1034  break;
1035  }
1036  case '>':
1037  {
1038  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
1039  --nDepth;
1040  rStream.SeekRel(-1);
1041  if (!rElements.back()->Read(rStream))
1042  {
1043  SAL_WARN("vcl.filter",
1044  "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
1045  return false;
1046  }
1047  break;
1048  }
1049  case '[':
1050  {
1051  auto pArr = new PDFArrayElement(pObject);
1052  rElements.push_back(std::unique_ptr<PDFElement>(pArr));
1053  if (nDepth == 0)
1054  {
1055  // The array is attached directly, inform the object.
1056  pArray = pArr;
1057  if (pObject)
1058  {
1059  pObject->SetArray(pArray);
1060  pObject->SetArrayOffset(rStream.Tell());
1061  }
1062  }
1063  ++nDepth;
1064  rStream.SeekRel(-1);
1065  if (!rElements.back()->Read(rStream))
1066  {
1067  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
1068  return false;
1069  }
1070  break;
1071  }
1072  case ']':
1073  {
1074  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
1075  --nDepth;
1076  rStream.SeekRel(-1);
1077  if (nDepth == 0)
1078  {
1079  if (pObject)
1080  {
1081  pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1082  }
1083  }
1084  if (!rElements.back()->Read(rStream))
1085  {
1086  SAL_WARN("vcl.filter",
1087  "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1088  return false;
1089  }
1090  break;
1091  }
1092  case '/':
1093  {
1094  auto pNameElement = new PDFNameElement();
1095  rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1096  rStream.SeekRel(-1);
1097  if (!pNameElement->Read(rStream))
1098  {
1099  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1100  return false;
1101  }
1102 
1103  if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1104  && pNameElement->GetValue() == "ObjStm")
1105  pObjectStream = pObject;
1106  else
1107  pObjectKey = pNameElement;
1108  break;
1109  }
1110  case '(':
1111  {
1112  rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1113  rStream.SeekRel(-1);
1114  if (!rElements.back()->Read(rStream))
1115  {
1116  SAL_WARN("vcl.filter",
1117  "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1118  return false;
1119  }
1120  break;
1121  }
1122  default:
1123  {
1124  if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-' || ch == '+'
1125  || ch == '.')
1126  {
1127  // Numbering object: an integer or a real.
1128  auto pNumberElement = new PDFNumberElement();
1129  rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1130  rStream.SeekRel(-1);
1131  if (!pNumberElement->Read(rStream))
1132  {
1133  SAL_WARN("vcl.filter",
1134  "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1135  return false;
1136  }
1137  if (bInStartXRef)
1138  {
1139  bInStartXRef = false;
1140  m_aStartXRefs.push_back(pNumberElement->GetValue());
1141 
1142  auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1143  if (it != m_aOffsetObjects.end())
1144  m_pXRefStream = it->second;
1145  }
1146  else if (bInObject && !nDepth && pObject)
1147  // Number element inside an object, but outside a
1148  // dictionary / array: remember it.
1149  pObject->SetNumberElement(pNumberElement);
1150  }
1151  else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1152  {
1153  // Possible keyword, like "obj".
1154  rStream.SeekRel(-1);
1155  OString aKeyword = ReadKeyword(rStream);
1156 
1157  bool bObj = aKeyword == "obj";
1158  if (bObj || aKeyword == "R")
1159  {
1160  size_t nElements = rElements.size();
1161  if (nElements < 2)
1162  {
1163  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1164  "tokens before 'obj' or 'R' keyword");
1165  return false;
1166  }
1167 
1168  auto pObjectNumber
1169  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1170  auto pGenerationNumber
1171  = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1172  if (!pObjectNumber || !pGenerationNumber)
1173  {
1174  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1175  "generation number before 'obj' or 'R' keyword");
1176  return false;
1177  }
1178 
1179  if (bObj)
1180  {
1181  pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1182  pGenerationNumber->GetValue());
1183  rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1184  m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1185  m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1186  bInObject = true;
1187  }
1188  else
1189  {
1190  auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1191  *pGenerationNumber);
1192  rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1193  if (bInObject && nDepth > 0 && pObject)
1194  // Inform the object about a new in-dictionary reference.
1195  pObject->AddDictionaryReference(pReference);
1196  }
1197  if (!rElements.back()->Read(rStream))
1198  {
1199  SAL_WARN("vcl.filter",
1200  "PDFDocument::Tokenize: PDFElement::Read() failed");
1201  return false;
1202  }
1203  }
1204  else if (aKeyword == "stream")
1205  {
1206  // Look up the length of the stream from the parent object's dictionary.
1207  size_t nLength = 0;
1208  for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1209  {
1210  // Iterate in reverse order.
1211  size_t nIndex = rElements.size() - nElement - 1;
1212  PDFElement* pElement = rElements[nIndex].get();
1213  auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1214  if (!pObj)
1215  continue;
1216 
1217  PDFElement* pLookup = pObj->Lookup("Length");
1218  auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1219  if (pReference)
1220  {
1221  // Length is provided as a reference.
1222  nLength = pReference->LookupNumber(rStream);
1223  break;
1224  }
1225 
1226  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1227  if (pNumber)
1228  {
1229  // Length is provided directly.
1230  nLength = pNumber->GetValue();
1231  break;
1232  }
1233 
1234  SAL_WARN(
1235  "vcl.filter",
1236  "PDFDocument::Tokenize: found no Length key for stream keyword");
1237  return false;
1238  }
1239 
1240  PDFDocument::SkipLineBreaks(rStream);
1241  auto pStreamElement = new PDFStreamElement(nLength);
1242  if (pObject)
1243  pObject->SetStream(pStreamElement);
1244  rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1245  if (!rElements.back()->Read(rStream))
1246  {
1247  SAL_WARN("vcl.filter",
1248  "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1249  return false;
1250  }
1251  }
1252  else if (aKeyword == "endstream")
1253  {
1254  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1255  if (!rElements.back()->Read(rStream))
1256  {
1257  SAL_WARN("vcl.filter",
1258  "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1259  return false;
1260  }
1261  }
1262  else if (aKeyword == "endobj")
1263  {
1264  rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1265  if (!rElements.back()->Read(rStream))
1266  {
1267  SAL_WARN("vcl.filter",
1268  "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1269  return false;
1270  }
1271  if (eMode == TokenizeMode::END_OF_OBJECT)
1272  {
1273  // Found endobj and only object parsing was requested, we're done.
1274  return true;
1275  }
1276 
1277  if (pObjectStream)
1278  {
1279  // We're at the end of an object stream, parse the stored objects.
1280  pObjectStream->ParseStoredObjects();
1281  pObjectStream = nullptr;
1282  pObjectKey = nullptr;
1283  }
1284  bInObject = false;
1285  }
1286  else if (aKeyword == "true" || aKeyword == "false")
1287  rElements.push_back(std::unique_ptr<PDFElement>(
1288  new PDFBooleanElement(aKeyword.toBoolean())));
1289  else if (aKeyword == "null")
1290  rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1291  else if (aKeyword == "xref")
1292  // Allow 'f' and 'n' keywords.
1293  bInXRef = true;
1294  else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1295  {
1296  }
1297  else if (aKeyword == "trailer")
1298  {
1299  auto pTrailer = new PDFTrailerElement(*this);
1300 
1301  // Make it possible to find this trailer later by offset.
1302  pTrailer->Read(rStream);
1303  m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1304 
1305  // When reading till the first EOF token only, remember
1306  // just the first trailer token.
1307  if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1308  m_pTrailer = pTrailer;
1309  rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1310  }
1311  else if (aKeyword == "startxref")
1312  {
1313  bInStartXRef = true;
1314  }
1315  else
1316  {
1317  SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1318  << aKeyword << "' keyword at byte position "
1319  << rStream.Tell());
1320  return false;
1321  }
1322  }
1323  else
1324  {
1325  auto uChar = static_cast<unsigned char>(ch);
1326  // Be more lenient and allow unexpected null char
1327  if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
1328  {
1329  SAL_WARN("vcl.filter",
1330  "PDFDocument::Tokenize: unexpected character with code "
1331  << sal_Int32(ch) << " at byte position " << rStream.Tell());
1332  return false;
1333  }
1334  SAL_WARN_IF(uChar == 0, "vcl.filter",
1335  "PDFDocument::Tokenize: unexpected null character at "
1336  << rStream.Tell() << " - ignoring");
1337  }
1338  break;
1339  }
1340  }
1341  }
1342 
1343  return true;
1344 }
1345 
1346 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1347 {
1348  m_aIDObjects[nID] = pObject;
1349 }
1350 
1351 bool PDFDocument::Read(SvStream& rStream)
1352 {
1353  // Check file magic.
1354  std::vector<sal_Int8> aHeader(5);
1355  rStream.Seek(0);
1356  rStream.ReadBytes(aHeader.data(), aHeader.size());
1357  if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1358  || aHeader[4] != '-')
1359  {
1360  SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1361  return false;
1362  }
1363 
1364  // Allow later editing of the contents in-memory.
1365  rStream.Seek(0);
1366  m_aEditBuffer.WriteStream(rStream);
1367 
1368  // Look up the offset of the xref table.
1369  size_t nStartXRef = FindStartXRef(rStream);
1370  SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1371  if (nStartXRef == 0)
1372  {
1373  SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1374  return false;
1375  }
1376  while (true)
1377  {
1378  rStream.Seek(nStartXRef);
1379  OString aKeyword = ReadKeyword(rStream);
1380  if (aKeyword.isEmpty())
1381  ReadXRefStream(rStream);
1382 
1383  else
1384  {
1385  if (aKeyword != "xref")
1386  {
1387  SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1388  return false;
1389  }
1390  ReadXRef(rStream);
1391  if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1392  {
1393  SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1394  return false;
1395  }
1396  }
1397 
1398  PDFNumberElement* pPrev = nullptr;
1399  if (m_pTrailer)
1400  {
1401  pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1402 
1403  // Remember the offset of this trailer in the correct order. It's
1404  // possible that newer trailers don't have a larger offset.
1405  m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1406  }
1407  else if (m_pXRefStream)
1408  pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1409  if (pPrev)
1410  nStartXRef = pPrev->GetValue();
1411 
1412  // Reset state, except the edit buffer.
1413  m_aElements.clear();
1414  m_aOffsetObjects.clear();
1415  m_aIDObjects.clear();
1416  m_aStartXRefs.clear();
1417  m_aEOFs.clear();
1418  m_pTrailer = nullptr;
1419  m_pXRefStream = nullptr;
1420  if (!pPrev)
1421  break;
1422  }
1423 
1424  // Then we can tokenize the stream.
1425  rStream.Seek(0);
1426  return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1427 }
1428 
1429 OString PDFDocument::ReadKeyword(SvStream& rStream)
1430 {
1431  OStringBuffer aBuf;
1432  char ch;
1433  rStream.ReadChar(ch);
1434  if (rStream.eof())
1435  return {};
1436  while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1437  {
1438  aBuf.append(ch);
1439  rStream.ReadChar(ch);
1440  if (rStream.eof())
1441  return aBuf.toString();
1442  }
1443  rStream.SeekRel(-1);
1444  return aBuf.toString();
1445 }
1446 
1447 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1448 {
1449  // Find the "startxref" token, somewhere near the end of the document.
1450  std::vector<char> aBuf(1024);
1451  rStream.Seek(STREAM_SEEK_TO_END);
1452  if (rStream.Tell() > aBuf.size())
1453  rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1454  else
1455  // The document is really short, then just read it from the start.
1456  rStream.Seek(0);
1457  size_t nBeforePeek = rStream.Tell();
1458  size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1459  rStream.Seek(nBeforePeek);
1460  if (nSize != aBuf.size())
1461  aBuf.resize(nSize);
1462  OString aPrefix("startxref");
1463  // Find the last startxref at the end of the document.
1464  auto itLastValid = aBuf.end();
1465  auto it = aBuf.begin();
1466  while (true)
1467  {
1468  it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1469  if (it == aBuf.end())
1470  break;
1471 
1472  itLastValid = it;
1473  ++it;
1474  }
1475  if (itLastValid == aBuf.end())
1476  {
1477  SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1478  return 0;
1479  }
1480 
1481  rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1482  if (rStream.eof())
1483  {
1484  SAL_WARN("vcl.filter",
1485  "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1486  return 0;
1487  }
1488 
1489  PDFDocument::SkipWhitespace(rStream);
1490  PDFNumberElement aNumber;
1491  if (!aNumber.Read(rStream))
1492  return 0;
1493  return aNumber.GetValue();
1494 }
1495 
1496 void PDFDocument::ReadXRefStream(SvStream& rStream)
1497 {
1498  // Look up the stream length in the object dictionary.
1499  if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1500  {
1501  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1502  return;
1503  }
1504 
1505  if (m_aElements.empty())
1506  {
1507  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1508  return;
1509  }
1510 
1511  PDFObjectElement* pObject = nullptr;
1512  for (const auto& pElement : m_aElements)
1513  {
1514  if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1515  {
1516  pObject = pObj;
1517  break;
1518  }
1519  }
1520  if (!pObject)
1521  {
1522  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1523  return;
1524  }
1525 
1526  // So that the Prev key can be looked up later.
1527  m_pXRefStream = pObject;
1528 
1529  PDFElement* pLookup = pObject->Lookup("Length");
1530  auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1531  if (!pNumber)
1532  {
1533  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1534  return;
1535  }
1536  sal_uInt64 nLength = pNumber->GetValue();
1537 
1538  // Look up the stream offset.
1539  PDFStreamElement* pStream = nullptr;
1540  for (const auto& pElement : m_aElements)
1541  {
1542  if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1543  {
1544  pStream = pS;
1545  break;
1546  }
1547  }
1548  if (!pStream)
1549  {
1550  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1551  return;
1552  }
1553 
1554  // Read and decompress it.
1555  rStream.Seek(pStream->GetOffset());
1556  std::vector<char> aBuf(nLength);
1557  rStream.ReadBytes(aBuf.data(), aBuf.size());
1558 
1559  auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1560  if (!pFilter)
1561  {
1562  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1563  return;
1564  }
1565 
1566  if (pFilter->GetValue() != "FlateDecode")
1567  {
1568  SAL_WARN("vcl.filter",
1569  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1570  return;
1571  }
1572 
1573  int nColumns = 1;
1574  int nPredictor = 1;
1575  if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1576  {
1577  const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1578  auto it = rItems.find("Columns");
1579  if (it != rItems.end())
1580  if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1581  nColumns = pColumns->GetValue();
1582  it = rItems.find("Predictor");
1583  if (it != rItems.end())
1584  if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1585  nPredictor = pPredictor->GetValue();
1586  }
1587 
1588  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1589  SvMemoryStream aStream;
1590  ZCodec aZCodec;
1591  aZCodec.BeginCompression();
1592  aZCodec.Decompress(aSource, aStream);
1593  if (!aZCodec.EndCompression())
1594  {
1595  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1596  return;
1597  }
1598 
1599  // Look up the first and the last entry we need to read.
1600  auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1601  std::vector<size_t> aFirstObjects;
1602  std::vector<size_t> aNumberOfObjects;
1603  if (!pIndex)
1604  {
1605  auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1606  if (pSize)
1607  {
1608  aFirstObjects.push_back(0);
1609  aNumberOfObjects.push_back(pSize->GetValue());
1610  }
1611  else
1612  {
1613  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1614  return;
1615  }
1616  }
1617  else
1618  {
1619  const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1620  size_t nFirstObject = 0;
1621  for (size_t i = 0; i < rIndexElements.size(); ++i)
1622  {
1623  if (i % 2 == 0)
1624  {
1625  auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1626  if (!pFirstObject)
1627  {
1628  SAL_WARN("vcl.filter",
1629  "PDFDocument::ReadXRefStream: Index has no first object");
1630  return;
1631  }
1632  nFirstObject = pFirstObject->GetValue();
1633  continue;
1634  }
1635 
1636  auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1637  if (!pNumberOfObjects)
1638  {
1639  SAL_WARN("vcl.filter",
1640  "PDFDocument::ReadXRefStream: Index has no number of objects");
1641  return;
1642  }
1643  aFirstObjects.push_back(nFirstObject);
1644  aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1645  }
1646  }
1647 
1648  // Look up the format of a single entry.
1649  const int nWSize = 3;
1650  auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1651  if (!pW || pW->GetElements().size() < nWSize)
1652  {
1653  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1654  return;
1655  }
1656  int aW[nWSize];
1657  // First character is the (kind of) repeated predictor.
1658  int nLineLength = 1;
1659  for (size_t i = 0; i < nWSize; ++i)
1660  {
1661  auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1662  if (!pI)
1663  {
1664  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1665  return;
1666  }
1667  aW[i] = pI->GetValue();
1668  nLineLength += aW[i];
1669  }
1670 
1671  if (nPredictor > 1 && nLineLength - 1 != nColumns)
1672  {
1673  SAL_WARN("vcl.filter",
1674  "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1675  return;
1676  }
1677 
1678  aStream.Seek(0);
1679  for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1680  {
1681  size_t nFirstObject = aFirstObjects[nSubSection];
1682  size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1683 
1684  // This is the line as read from the stream.
1685  std::vector<unsigned char> aOrigLine(nLineLength);
1686  // This is the line as it appears after tweaking according to nPredictor.
1687  std::vector<unsigned char> aFilteredLine(nLineLength);
1688  for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1689  {
1690  size_t nIndex = nFirstObject + nEntry;
1691 
1692  aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1693  if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1694  {
1695  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1696  "inconsistent with /DecodeParms/Predictor for object #"
1697  << nIndex);
1698  return;
1699  }
1700 
1701  for (int i = 0; i < nLineLength; ++i)
1702  {
1703  switch (nPredictor)
1704  {
1705  case 1:
1706  // No prediction.
1707  break;
1708  case 12:
1709  // PNG prediction: up (on all rows).
1710  aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1711  break;
1712  default:
1713  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1714  << nPredictor);
1715  return;
1716  }
1717  }
1718 
1719  // First character is already handled above.
1720  int nPos = 1;
1721  size_t nType = 0;
1722  // Start of the current field in the stream data.
1723  int nOffset = nPos;
1724  for (; nPos < nOffset + aW[0]; ++nPos)
1725  {
1726  unsigned char nCh = aFilteredLine[nPos];
1727  nType = (nType << 8) + nCh;
1728  }
1729 
1730  // Start of the object in the file stream.
1731  size_t nStreamOffset = 0;
1732  nOffset = nPos;
1733  for (; nPos < nOffset + aW[1]; ++nPos)
1734  {
1735  unsigned char nCh = aFilteredLine[nPos];
1736  nStreamOffset = (nStreamOffset << 8) + nCh;
1737  }
1738 
1739  // Generation number of the object.
1740  size_t nGenerationNumber = 0;
1741  nOffset = nPos;
1742  for (; nPos < nOffset + aW[2]; ++nPos)
1743  {
1744  unsigned char nCh = aFilteredLine[nPos];
1745  nGenerationNumber = (nGenerationNumber << 8) + nCh;
1746  }
1747 
1748  // Ignore invalid nType.
1749  if (nType <= 2)
1750  {
1751  if (m_aXRef.find(nIndex) == m_aXRef.end())
1752  {
1753  XRefEntry aEntry;
1754  switch (nType)
1755  {
1756  case 0:
1757  aEntry.SetType(XRefEntryType::FREE);
1758  break;
1759  case 1:
1760  aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
1761  break;
1762  case 2:
1763  aEntry.SetType(XRefEntryType::COMPRESSED);
1764  break;
1765  }
1766  aEntry.SetOffset(nStreamOffset);
1767  m_aXRef[nIndex] = aEntry;
1768  }
1769  }
1770  }
1771  }
1772 }
1773 
1774 void PDFDocument::ReadXRef(SvStream& rStream)
1775 {
1776  PDFDocument::SkipWhitespace(rStream);
1777 
1778  while (true)
1779  {
1780  PDFNumberElement aFirstObject;
1781  if (!aFirstObject.Read(rStream))
1782  {
1783  // Next token is not a number, it'll be the trailer.
1784  return;
1785  }
1786 
1787  if (aFirstObject.GetValue() < 0)
1788  {
1789  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1790  return;
1791  }
1792 
1793  PDFDocument::SkipWhitespace(rStream);
1794  PDFNumberElement aNumberOfEntries;
1795  if (!aNumberOfEntries.Read(rStream))
1796  {
1797  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1798  return;
1799  }
1800 
1801  if (aNumberOfEntries.GetValue() < 0)
1802  {
1803  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1804  return;
1805  }
1806 
1807  size_t nSize = aNumberOfEntries.GetValue();
1808  for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1809  {
1810  size_t nIndex = aFirstObject.GetValue() + nEntry;
1811  PDFDocument::SkipWhitespace(rStream);
1812  PDFNumberElement aOffset;
1813  if (!aOffset.Read(rStream))
1814  {
1815  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1816  return;
1817  }
1818 
1819  PDFDocument::SkipWhitespace(rStream);
1820  PDFNumberElement aGenerationNumber;
1821  if (!aGenerationNumber.Read(rStream))
1822  {
1823  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1824  return;
1825  }
1826 
1827  PDFDocument::SkipWhitespace(rStream);
1828  OString aKeyword = ReadKeyword(rStream);
1829  if (aKeyword != "f" && aKeyword != "n")
1830  {
1831  SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1832  return;
1833  }
1834  // xrefs are read in reverse order, so never update an existing
1835  // offset with an older one.
1836  if (m_aXRef.find(nIndex) == m_aXRef.end())
1837  {
1838  XRefEntry aEntry;
1839  aEntry.SetOffset(aOffset.GetValue());
1840  // Initially only the first entry is dirty.
1841  if (nIndex == 0)
1842  aEntry.SetDirty(true);
1843  m_aXRef[nIndex] = aEntry;
1844  }
1845  PDFDocument::SkipWhitespace(rStream);
1846  }
1847  }
1848 }
1849 
1850 void PDFDocument::SkipWhitespace(SvStream& rStream)
1851 {
1852  char ch = 0;
1853 
1854  while (true)
1855  {
1856  rStream.ReadChar(ch);
1857  if (rStream.eof())
1858  break;
1859 
1860  if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1861  {
1862  rStream.SeekRel(-1);
1863  return;
1864  }
1865  }
1866 }
1867 
1868 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1869 {
1870  char ch = 0;
1871 
1872  while (true)
1873  {
1874  rStream.ReadChar(ch);
1875  if (rStream.eof())
1876  break;
1877 
1878  if (ch != '\n' && ch != '\r')
1879  {
1880  rStream.SeekRel(-1);
1881  return;
1882  }
1883  }
1884 }
1885 
1886 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1887 {
1888  auto it = m_aXRef.find(nIndex);
1889  if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1890  {
1891  SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1892  << nIndex << ", but failed");
1893  return 0;
1894  }
1895 
1896  return it->second.GetOffset();
1897 }
1898 
1899 const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1900 {
1901  return m_aElements;
1902 }
1903 
1905 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1906 {
1907  auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1908  if (!pKids)
1909  {
1910  SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1911  return;
1912  }
1913 
1914  pPages->setVisiting(true);
1915 
1916  for (const auto& pKid : pKids->GetElements())
1917  {
1918  auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1919  if (!pReference)
1920  continue;
1921 
1922  PDFObjectElement* pKidObject = pReference->LookupObject();
1923  if (!pKidObject)
1924  continue;
1925 
1926  // detect if visiting reenters itself
1927  if (pKidObject->alreadyVisiting())
1928  {
1929  SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1930  continue;
1931  }
1932 
1933  auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1934  if (pName && pName->GetValue() == "Pages")
1935  // Pages inside pages: recurse.
1936  visitPages(pKidObject, rRet);
1937  else
1938  // Found an actual page.
1939  rRet.push_back(pKidObject);
1940  }
1941 
1942  pPages->setVisiting(false);
1943 }
1944 
1945 PDFObjectElement* PDFDocument::GetCatalog()
1946 {
1947  PDFReferenceElement* pRoot = nullptr;
1948 
1949  PDFTrailerElement* pTrailer = nullptr;
1950  if (!m_aTrailerOffsets.empty())
1951  {
1952  // Get access to the latest trailer, and work with the keys of that
1953  // one.
1954  auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1955  if (it != m_aOffsetTrailers.end())
1956  pTrailer = it->second;
1957  }
1958 
1959  if (pTrailer)
1960  pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1961  else if (m_pXRefStream)
1962  pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1963 
1964  if (!pRoot)
1965  {
1966  SAL_WARN("vcl.filter", "PDFDocument::GetCatalog: trailer has no Root key");
1967  return nullptr;
1968  }
1969 
1970  return pRoot->LookupObject();
1971 }
1972 
1973 std::vector<PDFObjectElement*> PDFDocument::GetPages()
1974 {
1975  std::vector<PDFObjectElement*> aRet;
1976 
1977  PDFObjectElement* pCatalog = GetCatalog();
1978  if (!pCatalog)
1979  {
1980  SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1981  return aRet;
1982  }
1983 
1984  PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1985  if (!pPages)
1986  {
1987  SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
1988  << ") has no pages");
1989  return aRet;
1990  }
1991 
1992  visitPages(pPages, aRet);
1993 
1994  return aRet;
1995 }
1996 
1997 void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
1998 
1999 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
2000 {
2001  std::vector<PDFObjectElement*> aRet;
2002 
2003  std::vector<PDFObjectElement*> aPages = GetPages();
2004 
2005  for (const auto& pPage : aPages)
2006  {
2007  if (!pPage)
2008  continue;
2009 
2010  PDFElement* pAnnotsElement = pPage->Lookup("Annots");
2011  auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
2012  if (!pAnnots)
2013  {
2014  // Annots is not an array, see if it's a reference to an object
2015  // with a direct array.
2016  auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
2017  if (pAnnotsRef)
2018  {
2019  if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
2020  {
2021  pAnnots = pAnnotsObject->GetArray();
2022  }
2023  }
2024  }
2025 
2026  if (!pAnnots)
2027  continue;
2028 
2029  for (const auto& pAnnot : pAnnots->GetElements())
2030  {
2031  auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
2032  if (!pReference)
2033  continue;
2034 
2035  PDFObjectElement* pAnnotObject = pReference->LookupObject();
2036  if (!pAnnotObject)
2037  continue;
2038 
2039  auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
2040  if (!pFT || pFT->GetValue() != "Sig")
2041  continue;
2042 
2043  aRet.push_back(pAnnotObject);
2044  }
2045  }
2046 
2047  return aRet;
2048 }
2049 
2050 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
2051 {
2052  return svl::crypto::DecodeHexString(pElement->GetValue());
2053 }
2054 
2055 OUString PDFDocument::DecodeHexStringUTF16BE(PDFHexStringElement const& rElement)
2056 {
2057  std::vector<unsigned char> const encoded(DecodeHexString(&rElement));
2058  // Text strings can be PDF-DocEncoding or UTF-16BE with mandatory BOM;
2059  // only the latter supported is here
2060  if (encoded.size() < 2 || encoded[0] != 0xFE || encoded[1] != 0xFF || (encoded.size() & 1) != 0)
2061  {
2062  return OUString();
2063  }
2064  OUStringBuffer buf(static_cast<unsigned int>(encoded.size() - 2));
2065  for (size_t i = 2; i < encoded.size(); i += 2)
2066  {
2067  buf.append(sal_Unicode((static_cast<sal_uInt16>(encoded[i]) << 8) | encoded[i + 1]));
2068  }
2069  return buf.makeStringAndClear();
2070 }
2071 
2072 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
2073  : m_rDoc(rDoc)
2074 {
2075 }
2076 
2078 {
2079  // Read from (including) the % char till (excluding) the end of the line/stream.
2080  OStringBuffer aBuf;
2081  char ch;
2082  rStream.ReadChar(ch);
2083  while (true)
2084  {
2085  if (ch == '\n' || ch == '\r' || rStream.eof())
2086  {
2087  m_aComment = aBuf.makeStringAndClear();
2088 
2089  if (m_aComment.startsWith("%%EOF"))
2090  {
2091  sal_uInt64 nPos = rStream.Tell();
2092  if (ch == '\r')
2093  {
2094  rStream.ReadChar(ch);
2095  rStream.SeekRel(-1);
2096  // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
2097  // behavior.
2098  if (ch == '\n')
2099  {
2100  nPos += 1;
2101  }
2102  }
2103  m_rDoc.PushBackEOF(nPos);
2104  }
2105 
2106  SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
2107  return true;
2108  }
2109  aBuf.append(ch);
2110  rStream.ReadChar(ch);
2111  }
2112 
2113  return false;
2114 }
2115 
2117 
2119 {
2120  OStringBuffer aBuf;
2121  m_nOffset = rStream.Tell();
2122  char ch;
2123  rStream.ReadChar(ch);
2124  if (rStream.eof())
2125  {
2126  return false;
2127  }
2128  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+' && ch != '.')
2129  {
2130  rStream.SeekRel(-1);
2131  return false;
2132  }
2133  while (!rStream.eof())
2134  {
2135  if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+'
2136  && ch != '.')
2137  {
2138  rStream.SeekRel(-1);
2139  m_nLength = rStream.Tell() - m_nOffset;
2140  m_fValue = aBuf.makeStringAndClear().toDouble();
2141  SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2142  return true;
2143  }
2144  aBuf.append(ch);
2145  rStream.ReadChar(ch);
2146  }
2147 
2148  return false;
2149 }
2150 
2151 sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2152 
2153 sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2154 
2155 bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2156 
2157 bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2158 
2160 {
2161  char ch;
2162  rStream.ReadChar(ch);
2163  if (ch != '<')
2164  {
2165  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2166  return false;
2167  }
2168  rStream.ReadChar(ch);
2169 
2170  OStringBuffer aBuf;
2171  while (!rStream.eof())
2172  {
2173  if (ch == '>')
2174  {
2175  m_aValue = aBuf.makeStringAndClear();
2176  SAL_INFO("vcl.filter",
2177  "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2178  return true;
2179  }
2180  aBuf.append(ch);
2181  rStream.ReadChar(ch);
2182  }
2183 
2184  return false;
2185 }
2186 
2187 const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2188 
2190 {
2191  char nPrevCh = 0;
2192  char ch = 0;
2193  rStream.ReadChar(ch);
2194  if (ch != '(')
2195  {
2196  SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2197  return false;
2198  }
2199  nPrevCh = ch;
2200  rStream.ReadChar(ch);
2201 
2202  // Start with 1 nesting level as we read a '(' above already.
2203  int nDepth = 1;
2204  OStringBuffer aBuf;
2205  while (!rStream.eof())
2206  {
2207  if (ch == '(' && nPrevCh != '\\')
2208  ++nDepth;
2209 
2210  if (ch == ')' && nPrevCh != '\\')
2211  --nDepth;
2212 
2213  if (nDepth == 0)
2214  {
2215  // ')' of the outermost '(' is reached.
2216  m_aValue = aBuf.makeStringAndClear();
2217  SAL_INFO("vcl.filter",
2218  "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2219  return true;
2220  }
2221  aBuf.append(ch);
2222  nPrevCh = ch;
2223  rStream.ReadChar(ch);
2224  }
2225 
2226  return false;
2227 }
2228 
2229 const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2230 
2232  : m_rDoc(rDoc)
2233  , m_pDictionaryElement(nullptr)
2234 {
2235 }
2236 
2238 {
2239  m_nOffset = rStream.Tell();
2240  return true;
2241 }
2242 
2243 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2244 {
2245  if (!m_pDictionaryElement)
2246  {
2247  PDFObjectParser aParser(m_rDoc.GetElements());
2248  aParser.parse(this);
2249  }
2250  if (!m_pDictionaryElement)
2251  return nullptr;
2252  return m_pDictionaryElement->LookupElement(rDictionaryKey);
2253 }
2254 
2255 sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2256 
2257 double PDFNumberElement::GetValue() const { return m_fValue; }
2258 
2259 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2260  : m_rDoc(rDoc)
2261  , m_fObjectValue(fObjectValue)
2262  , m_fGenerationValue(fGenerationValue)
2263  , m_pNumberElement(nullptr)
2264  , m_nDictionaryOffset(0)
2265  , m_nDictionaryLength(0)
2266  , m_pDictionaryElement(nullptr)
2267  , m_nArrayOffset(0)
2268  , m_nArrayLength(0)
2269  , m_pArrayElement(nullptr)
2270  , m_pStreamElement(nullptr)
2271  , m_bParsed(false)
2272 {
2273 }
2274 
2276 {
2277  SAL_INFO("vcl.filter",
2278  "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2279  return true;
2280 }
2281 
2283 
2284 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2285  const OString& rKey)
2286 {
2287  auto it = rDictionary.find(rKey);
2288  if (it == rDictionary.end())
2289  return nullptr;
2290 
2291  return it->second;
2292 }
2293 
2295 {
2296  auto pKey = dynamic_cast<PDFReferenceElement*>(
2297  PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2298  if (!pKey)
2299  {
2300  SAL_WARN("vcl.filter",
2301  "PDFDictionaryElement::LookupObject: no such key with reference value: "
2302  << rDictionaryKey);
2303  return nullptr;
2304  }
2305 
2306  return pKey->LookupObject();
2307 }
2308 
2309 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2310 {
2311  return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2312 }
2313 
2315 {
2316  if (!m_bParsed)
2317  {
2318  if (!m_aElements.empty())
2319  {
2320  // This is a stored object in an object stream.
2321  PDFObjectParser aParser(m_aElements);
2322  aParser.parse(this);
2323  }
2324  else
2325  {
2326  // Normal object: elements are stored as members of the document itself.
2327  PDFObjectParser aParser(m_rDoc.GetElements());
2328  aParser.parse(this);
2329  }
2330  m_bParsed = true;
2331  }
2332 }
2333 
2334 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2335 {
2336  parseIfNecessary();
2337  if (!m_pDictionaryElement)
2338  return nullptr;
2339  return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey);
2340 }
2341 
2342 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2343 {
2344  auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2345  if (!pKey)
2346  {
2347  SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2348  << rDictionaryKey);
2349  return nullptr;
2350  }
2351 
2352  return pKey->LookupObject();
2353 }
2354 
2356 
2357 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2358 {
2359  m_nDictionaryOffset = nDictionaryOffset;
2360 }
2361 
2363 {
2364  parseIfNecessary();
2365  return m_nDictionaryOffset;
2366 }
2367 
2368 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2369 
2370 sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
2371 
2372 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2373 {
2374  m_aDictionaryKeyOffset[rKey] = nOffset;
2375 }
2376 
2377 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2378 {
2379  m_aDictionaryKeyValueLength[rKey] = nLength;
2380 }
2381 
2382 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2383 {
2384  auto it = m_aDictionaryKeyOffset.find(rKey);
2385  if (it == m_aDictionaryKeyOffset.end())
2386  return 0;
2387 
2388  return it->second;
2389 }
2390 
2391 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2392 {
2393  auto it = m_aDictionaryKeyValueLength.find(rKey);
2394  if (it == m_aDictionaryKeyValueLength.end())
2395  return 0;
2396 
2397  return it->second;
2398 }
2399 
2400 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2401 
2402 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2403 {
2404  m_nDictionaryLength = nDictionaryLength;
2405 }
2406 
2408 {
2409  parseIfNecessary();
2410  return m_nDictionaryLength;
2411 }
2412 
2413 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2414 
2415 sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
2416 
2418 {
2419  parseIfNecessary();
2420  return m_pDictionaryElement;
2421 }
2422 
2424 {
2425  m_pDictionaryElement = pDictionaryElement;
2426 }
2427 
2429 {
2430  m_pNumberElement = pNumberElement;
2431 }
2432 
2434 
2435 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2436 {
2437  return m_aDictionaryReferences;
2438 }
2439 
2441 {
2442  m_aDictionaryReferences.push_back(pReference);
2443 }
2444 
2445 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2446 {
2447  parseIfNecessary();
2448  return m_pDictionaryElement->GetItems();
2449 }
2450 
2451 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2452 
2454 {
2455  m_pStreamElement = pStreamElement;
2456 }
2457 
2459 
2461 {
2462  parseIfNecessary();
2463  return m_pArrayElement;
2464 }
2465 
2467 {
2468  if (!m_pStreamElement)
2469  {
2470  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2471  return;
2472  }
2473 
2474  auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2475  if (!pType || pType->GetValue() != "ObjStm")
2476  {
2477  if (!pType)
2478  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2479  else
2480  SAL_WARN("vcl.filter",
2481  "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2482  return;
2483  }
2484 
2485  auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2486  if (!pFilter || pFilter->GetValue() != "FlateDecode")
2487  {
2488  if (!pFilter)
2489  SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2490  else
2491  SAL_WARN("vcl.filter",
2492  "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2493  return;
2494  }
2495 
2496  auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2497  if (!pFirst)
2498  {
2499  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2500  return;
2501  }
2502 
2503  auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2504  if (!pN)
2505  {
2506  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2507  return;
2508  }
2509  size_t nN = pN->GetValue();
2510 
2511  auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2512  if (!pLength)
2513  {
2514  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2515  return;
2516  }
2517  size_t nLength = pLength->GetValue();
2518 
2519  // Read and decompress it.
2520  SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2521  rEditBuffer.Seek(m_pStreamElement->GetOffset());
2522  std::vector<char> aBuf(nLength);
2523  rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2524  SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2525  SvMemoryStream aStream;
2526  ZCodec aZCodec;
2527  aZCodec.BeginCompression();
2528  aZCodec.Decompress(aSource, aStream);
2529  if (!aZCodec.EndCompression())
2530  {
2531  SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2532  return;
2533  }
2534 
2535  nLength = aStream.TellEnd();
2536  aStream.Seek(0);
2537  std::vector<size_t> aObjNums;
2538  std::vector<size_t> aOffsets;
2539  std::vector<size_t> aLengths;
2540  // First iterate over and find out the lengths.
2541  for (size_t nObject = 0; nObject < nN; ++nObject)
2542  {
2543  PDFNumberElement aObjNum;
2544  if (!aObjNum.Read(aStream))
2545  {
2546  SAL_WARN("vcl.filter",
2547  "PDFObjectElement::ParseStoredObjects: failed to read object number");
2548  return;
2549  }
2550  aObjNums.push_back(aObjNum.GetValue());
2551 
2552  PDFDocument::SkipWhitespace(aStream);
2553 
2554  PDFNumberElement aByteOffset;
2555  if (!aByteOffset.Read(aStream))
2556  {
2557  SAL_WARN("vcl.filter",
2558  "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2559  return;
2560  }
2561  aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2562 
2563  if (aOffsets.size() > 1)
2564  aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2565  if (nObject + 1 == nN)
2566  aLengths.push_back(nLength - aOffsets.back());
2567 
2568  PDFDocument::SkipWhitespace(aStream);
2569  }
2570 
2571  // Now create streams with the proper length and tokenize the data.
2572  for (size_t nObject = 0; nObject < nN; ++nObject)
2573  {
2574  size_t nObjNum = aObjNums[nObject];
2575  size_t nOffset = aOffsets[nObject];
2576  size_t nLen = aLengths[nObject];
2577 
2578  aStream.Seek(nOffset);
2579  m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2580  PDFObjectElement* pStored = m_aStoredElements.back().get();
2581 
2582  aBuf.clear();
2583  aBuf.resize(nLen);
2584  aStream.ReadBytes(aBuf.data(), aBuf.size());
2585  SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2586 
2587  m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
2588  pStored);
2589  // This is how references know the object is stored inside this object stream.
2590  m_rDoc.SetIDObject(nObjNum, pStored);
2591 
2592  // Store the stream of the object in the object stream for later use.
2593  std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2594  aStoredStream.Seek(0);
2595  pStreamBuffer->WriteStream(aStoredStream);
2596  pStored->SetStreamBuffer(pStreamBuffer);
2597  }
2598 }
2599 
2600 std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2601 {
2602  return m_aElements;
2603 }
2604 
2606 
2607 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2608 {
2609  m_pStreamBuffer = std::move(pStreamBuffer);
2610 }
2611 
2613 
2615  PDFNumberElement const& rGeneration)
2616  : m_rDoc(rDoc)
2617  , m_fObjectValue(rObject.GetValue())
2618  , m_fGenerationValue(rGeneration.GetValue())
2619  , m_rObject(rObject)
2620 {
2621 }
2622 
2624 
2626 {
2627  SAL_INFO("vcl.filter",
2628  "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2629  m_nOffset = rStream.Tell();
2630  return true;
2631 }
2632 
2633 sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2634 
2636 {
2637  size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2638  if (nOffset == 0)
2639  {
2640  SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2641  << m_fObjectValue);
2642  return 0;
2643  }
2644 
2645  sal_uInt64 nOrigPos = rStream.Tell();
2646  comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2647 
2648  rStream.Seek(nOffset);
2649  {
2650  PDFDocument::SkipWhitespace(rStream);
2651  PDFNumberElement aNumber;
2652  bool bRet = aNumber.Read(rStream);
2653  if (!bRet || aNumber.GetValue() != m_fObjectValue)
2654  {
2655  SAL_WARN("vcl.filter",
2656  "PDFReferenceElement::LookupNumber: offset points to not matching object");
2657  return 0;
2658  }
2659  }
2660 
2661  {
2662  PDFDocument::SkipWhitespace(rStream);
2663  PDFNumberElement aNumber;
2664  bool bRet = aNumber.Read(rStream);
2665  if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2666  {
2667  SAL_WARN("vcl.filter",
2668  "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2669  return 0;
2670  }
2671  }
2672 
2673  {
2674  PDFDocument::SkipWhitespace(rStream);
2675  OString aKeyword = PDFDocument::ReadKeyword(rStream);
2676  if (aKeyword != "obj")
2677  {
2678  SAL_WARN("vcl.filter",
2679  "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2680  return 0;
2681  }
2682  }
2683 
2684  PDFDocument::SkipWhitespace(rStream);
2685  PDFNumberElement aNumber;
2686  if (!aNumber.Read(rStream))
2687  {
2688  SAL_WARN("vcl.filter",
2689  "PDFReferenceElement::LookupNumber: failed to read referenced number");
2690  return 0;
2691  }
2692 
2693  return aNumber.GetValue();
2694 }
2695 
2697 {
2699 }
2700 
2702 {
2703  auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2704 
2705  if (itIDObjects != m_aIDObjects.end())
2706  return itIDObjects->second;
2707 
2708  SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2709  return nullptr;
2710 }
2711 
2713 
2715 
2717 
2719 {
2720  char ch;
2721  rStream.ReadChar(ch);
2722  if (ch != '<')
2723  {
2724  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2725  return false;
2726  }
2727 
2728  if (rStream.eof())
2729  {
2730  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2731  return false;
2732  }
2733 
2734  rStream.ReadChar(ch);
2735  if (ch != '<')
2736  {
2737  SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2738  return false;
2739  }
2740 
2741  m_nLocation = rStream.Tell();
2742 
2743  SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2744 
2745  return true;
2746 }
2747 
2749 
2751 
2753 {
2754  m_nLocation = rStream.Tell();
2755  char ch;
2756  rStream.ReadChar(ch);
2757  if (ch != '>')
2758  {
2759  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2760  return false;
2761  }
2762 
2763  if (rStream.eof())
2764  {
2765  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2766  return false;
2767  }
2768 
2769  rStream.ReadChar(ch);
2770  if (ch != '>')
2771  {
2772  SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2773  return false;
2774  }
2775 
2776  SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2777 
2778  return true;
2779 }
2780 
2781 PDFNameElement::PDFNameElement() = default;
2782 
2784 {
2785  char ch;
2786  rStream.ReadChar(ch);
2787  if (ch != '/')
2788  {
2789  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2790  return false;
2791  }
2792  m_nLocation = rStream.Tell();
2793 
2794  if (rStream.eof())
2795  {
2796  SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2797  return false;
2798  }
2799 
2800  // Read till the first white-space.
2801  OStringBuffer aBuf;
2802  rStream.ReadChar(ch);
2803  while (!rStream.eof())
2804  {
2805  if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2806  || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2807  {
2808  rStream.SeekRel(-1);
2809  m_aValue = aBuf.makeStringAndClear();
2810  SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2811  return true;
2812  }
2813  aBuf.append(ch);
2814  rStream.ReadChar(ch);
2815  }
2816 
2817  return false;
2818 }
2819 
2820 const OString& PDFNameElement::GetValue() const { return m_aValue; }
2821 
2822 sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2823 
2825  : m_nLength(nLength)
2826  , m_nOffset(0)
2827 {
2828 }
2829 
2831 {
2832  SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2833  m_nOffset = rStream.Tell();
2834  std::vector<unsigned char> aBytes(m_nLength);
2835  rStream.ReadBytes(aBytes.data(), aBytes.size());
2836  m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2837 
2838  return rStream.good();
2839 }
2840 
2842 
2843 sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2844 
2845 bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2846 
2847 bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2848 
2850  : m_pObject(pObject)
2851 {
2852 }
2853 
2855 {
2856  char ch;
2857  rStream.ReadChar(ch);
2858  if (ch != '[')
2859  {
2860  SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2861  return false;
2862  }
2863 
2864  SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2865 
2866  return true;
2867 }
2868 
2870 {
2871  if (m_pObject)
2872  SAL_INFO("vcl.filter",
2873  "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2874  m_aElements.push_back(pElement);
2875 }
2876 
2877 const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2878 
2880 
2882 {
2883  m_nOffset = rStream.Tell();
2884  char ch;
2885  rStream.ReadChar(ch);
2886  if (ch != ']')
2887  {
2888  SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2889  return false;
2890  }
2891 
2892  SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
2893 
2894  return true;
2895 }
2896 
2897 sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
2898 
2899 // PDFObjectParser
2900 
2901 size_t PDFObjectParser::parse(PDFElement* pParsingElement, size_t nStartIndex, int nCurrentDepth)
2902 {
2903  // The index of last parsed element
2904  size_t nReturnIndex = 0;
2905 
2906  pParsingElement->setParsing(true);
2907 
2908  comphelper::ScopeGuard aGuard([pParsingElement]() { pParsingElement->setParsing(false); });
2909 
2910  // Current object, if root is an object, else nullptr
2911  auto pParsingObject = dynamic_cast<PDFObjectElement*>(pParsingElement);
2912  auto pParsingTrailer = dynamic_cast<PDFTrailerElement*>(pParsingElement);
2913 
2914  // Current dictionary, if root is an dictionary, else nullptr
2915  auto pParsingDictionary = dynamic_cast<PDFDictionaryElement*>(pParsingElement);
2916 
2917  // Current parsing array, if root is an array, else nullptr
2918  auto pParsingArray = dynamic_cast<PDFArrayElement*>(pParsingElement);
2919 
2920  // Find out where the dictionary for this object starts.
2921  size_t nIndex = nStartIndex;
2922  for (size_t i = nStartIndex; i < mrElements.size(); ++i)
2923  {
2924  if (mrElements[i].get() == pParsingElement)
2925  {
2926  nIndex = i;
2927  break;
2928  }
2929  }
2930 
2931  OString aName;
2932  sal_uInt64 nNameOffset = 0;
2933  std::vector<PDFNumberElement*> aNumbers;
2934 
2935  sal_uInt64 nDictionaryOffset = 0;
2936 
2937  // Current depth; 1 is current
2938  int nDepth = 0;
2939 
2940  for (size_t i = nIndex; i < mrElements.size(); ++i)
2941  {
2942  auto* pCurrentElement = mrElements[i].get();
2943 
2944  // Dictionary tokens can be nested, track enter/leave.
2945  if (auto pCurrentDictionary = dynamic_cast<PDFDictionaryElement*>(pCurrentElement))
2946  {
2947  // Handle previously stored number
2948  if (!aNumbers.empty())
2949  {
2950  if (pParsingDictionary)
2951  {
2952  PDFNumberElement* pNumber = aNumbers.back();
2953  sal_uInt64 nLength
2954  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
2955 
2956  pParsingDictionary->insert(aName, pNumber);
2957  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
2958  pParsingDictionary->SetKeyValueLength(aName, nLength);
2959  }
2960  else if (pParsingArray)
2961  {
2962  for (auto& pNumber : aNumbers)
2963  pParsingArray->PushBack(pNumber);
2964  }
2965  else
2966  {
2967  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
2968  }
2969  aName.clear();
2970  aNumbers.clear();
2971  }
2972 
2973  nDepth++;
2974 
2975  if (nDepth == 1) // pParsingDictionary is the current one
2976  {
2977  // First dictionary start, track start offset.
2978  nDictionaryOffset = pCurrentDictionary->GetLocation();
2979 
2980  if (pParsingObject)
2981  {
2982  // Then the toplevel dictionary of the object.
2983  pParsingObject->SetDictionary(pCurrentDictionary);
2984  pParsingObject->SetDictionaryOffset(nDictionaryOffset);
2985  pParsingDictionary = pCurrentDictionary;
2986  }
2987  else if (pParsingTrailer)
2988  {
2989  pParsingTrailer->SetDictionary(pCurrentDictionary);
2990  pParsingDictionary = pCurrentDictionary;
2991  }
2992  }
2993  else if (!pCurrentDictionary->alreadyParsing())
2994  {
2995  if (pParsingArray)
2996  {
2997  pParsingArray->PushBack(pCurrentDictionary);
2998  }
2999  else if (pParsingDictionary)
3000  {
3001  // Dictionary toplevel value.
3002  pParsingDictionary->insert(aName, pCurrentDictionary);
3003  }
3004  else
3005  {
3006  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3007  }
3008  // Nested dictionary.
3009  const size_t nNextElementIndex = parse(pCurrentDictionary, i, nCurrentDepth + 1);
3010  i = std::max(i, nNextElementIndex - 1);
3011  }
3012  }
3013  else if (auto pCurrentEndDictionary
3014  = dynamic_cast<PDFEndDictionaryElement*>(pCurrentElement))
3015  {
3016  // Handle previously stored number
3017  if (!aNumbers.empty())
3018  {
3019  if (pParsingDictionary)
3020  {
3021  PDFNumberElement* pNumber = aNumbers.back();
3022  sal_uInt64 nLength
3023  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3024 
3025  pParsingDictionary->insert(aName, pNumber);
3026  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3027  pParsingDictionary->SetKeyValueLength(aName, nLength);
3028  }
3029  else if (pParsingArray)
3030  {
3031  for (auto& pNumber : aNumbers)
3032  pParsingArray->PushBack(pNumber);
3033  }
3034  else
3035  {
3036  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3037  }
3038  aName.clear();
3039  aNumbers.clear();
3040  }
3041 
3042  if (pParsingDictionary)
3043  {
3044  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3045  sal_uInt64 nLength = pCurrentEndDictionary->GetLocation() - nNameOffset + 2;
3046  pParsingDictionary->SetKeyValueLength(aName, nLength);
3047  aName.clear();
3048  }
3049 
3050  if (nDepth == 1) // did the parsing ended
3051  {
3052  // Last dictionary end, track length and stop parsing.
3053  if (pParsingObject)
3054  {
3055  sal_uInt64 nDictionaryLength
3056  = pCurrentEndDictionary->GetLocation() - nDictionaryOffset;
3057  pParsingObject->SetDictionaryLength(nDictionaryLength);
3058  }
3059  nReturnIndex = i;
3060  break;
3061  }
3062 
3063  nDepth--;
3064  }
3065  else if (auto pCurrentArray = dynamic_cast<PDFArrayElement*>(pCurrentElement))
3066  {
3067  // Handle previously stored number
3068  if (!aNumbers.empty())
3069  {
3070  if (pParsingDictionary)
3071  {
3072  PDFNumberElement* pNumber = aNumbers.back();
3073 
3074  sal_uInt64 nLength
3075  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3076  pParsingDictionary->insert(aName, pNumber);
3077  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3078  pParsingDictionary->SetKeyValueLength(aName, nLength);
3079  }
3080  else if (pParsingArray)
3081  {
3082  for (auto& pNumber : aNumbers)
3083  pParsingArray->PushBack(pNumber);
3084  }
3085  else
3086  {
3087  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3088  }
3089  aName.clear();
3090  aNumbers.clear();
3091  }
3092 
3093  nDepth++;
3094  if (nDepth == 1) // pParsingDictionary is the current one
3095  {
3096  if (pParsingObject)
3097  {
3098  pParsingObject->SetArray(pCurrentArray);
3099  pParsingArray = pCurrentArray;
3100  }
3101  }
3102  else if (!pCurrentArray->alreadyParsing())
3103  {
3104  if (pParsingArray)
3105  {
3106  // Array is toplevel
3107  pParsingArray->PushBack(pCurrentArray);
3108  }
3109  else if (pParsingDictionary)
3110  {
3111  // Dictionary toplevel value.
3112  pParsingDictionary->insert(aName, pCurrentArray);
3113  }
3114 
3115  const size_t nNextElementIndex = parse(pCurrentArray, i, nCurrentDepth + 1);
3116 
3117  // ensure we go forwards and not endlessly loop
3118  i = std::max(i, nNextElementIndex - 1);
3119  }
3120  }
3121  else if (auto pCurrentEndArray = dynamic_cast<PDFEndArrayElement*>(pCurrentElement))
3122  {
3123  // Handle previously stored number
3124  if (!aNumbers.empty())
3125  {
3126  if (pParsingDictionary)
3127  {
3128  PDFNumberElement* pNumber = aNumbers.back();
3129 
3130  sal_uInt64 nLength
3131  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3132  pParsingDictionary->insert(aName, pNumber);
3133  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3134  pParsingDictionary->SetKeyValueLength(aName, nLength);
3135  }
3136  else if (pParsingArray)
3137  {
3138  for (auto& pNumber : aNumbers)
3139  pParsingArray->PushBack(pNumber);
3140  }
3141  else
3142  {
3143  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3144  }
3145  aName.clear();
3146  aNumbers.clear();
3147  }
3148 
3149  if (nDepth == 1) // did the pParsing ended
3150  {
3151  // Last array end, track length and stop parsing.
3152  nReturnIndex = i;
3153  break;
3154  }
3155  else
3156  {
3157  if (pParsingDictionary)
3158  {
3159  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3160  // Include the ending ']' in the length of the key - (array)value pair length.
3161  sal_uInt64 nLength = pCurrentEndArray->GetOffset() - nNameOffset + 1;
3162  pParsingDictionary->SetKeyValueLength(aName, nLength);
3163  aName.clear();
3164  }
3165  }
3166  nDepth--;
3167  }
3168  else if (auto pCurrentName = dynamic_cast<PDFNameElement*>(pCurrentElement))
3169  {
3170  // Handle previously stored number
3171  if (!aNumbers.empty())
3172  {
3173  if (pParsingDictionary)
3174  {
3175  PDFNumberElement* pNumber = aNumbers.back();
3176 
3177  sal_uInt64 nLength
3178  = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3179  pParsingDictionary->insert(aName, pNumber);
3180  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3181  pParsingDictionary->SetKeyValueLength(aName, nLength);
3182  }
3183  else if (pParsingArray)
3184  {
3185  for (auto& pNumber : aNumbers)
3186  pParsingArray->PushBack(pNumber);
3187  }
3188  aName.clear();
3189  aNumbers.clear();
3190  }
3191 
3192  // Now handle name
3193  if (pParsingArray)
3194  {
3195  // if we are in an array, just push the name to array
3196  pParsingArray->PushBack(pCurrentName);
3197  }
3198  else if (pParsingDictionary)
3199  {
3200  // if we are in a dictionary, we need to store the name as a possible key
3201  if (aName.isEmpty())
3202  {
3203  aName = pCurrentName->GetValue();
3204  nNameOffset = pCurrentName->GetLocation();
3205  }
3206  else
3207  {
3208  sal_uInt64 nKeyLength
3209  = pCurrentName->GetLocation() + pCurrentName->GetLength() - nNameOffset;
3210  pParsingDictionary->insert(aName, pCurrentName);
3211  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3212  pParsingDictionary->SetKeyValueLength(aName, nKeyLength);
3213  aName.clear();
3214  }
3215  }
3216  }
3217  else if (auto pReference = dynamic_cast<PDFReferenceElement*>(pCurrentElement))
3218  {
3219  if (pParsingArray)
3220  {
3221  pParsingArray->PushBack(pReference);
3222  }
3223  else if (pParsingDictionary)
3224  {
3225  sal_uInt64 nLength = pReference->GetOffset() - nNameOffset;
3226  pParsingDictionary->insert(aName, pReference);
3227  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3228  pParsingDictionary->SetKeyValueLength(aName, nLength);
3229  aName.clear();
3230  }
3231  else
3232  {
3233  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3234  }
3235  aNumbers.clear();
3236  }
3237  else if (auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(pCurrentElement))
3238  {
3239  if (pParsingArray)
3240  {
3241  pParsingArray->PushBack(pLiteralString);
3242  }
3243  else if (pParsingDictionary)
3244  {
3245  pParsingDictionary->insert(aName, pLiteralString);
3246  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3247  aName.clear();
3248  }
3249  else
3250  {
3251  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3252  }
3253  }
3254  else if (auto pBoolean = dynamic_cast<PDFBooleanElement*>(pCurrentElement))
3255  {
3256  if (pParsingArray)
3257  {
3258  pParsingArray->PushBack(pBoolean);
3259  }
3260  else if (pParsingDictionary)
3261  {
3262  pParsingDictionary->insert(aName, pBoolean);
3263  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3264  aName.clear();
3265  }
3266  else
3267  {
3268  SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3269  }
3270  }
3271  else if (auto pHexString = dynamic_cast<PDFHexStringElement*>(pCurrentElement))
3272  {
3273  if (pParsingArray)
3274  {
3275  pParsingArray->PushBack(pHexString);
3276  }
3277  else if (pParsingDictionary)
3278  {
3279  pParsingDictionary->insert(aName, pHexString);
3280  pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3281  aName.clear();
3282  }
3283  }
3284  else if (auto pNumberElement = dynamic_cast<PDFNumberElement*>(pCurrentElement))
3285  {
3286  // Just remember this, so that in case it's not a reference parameter,
3287  // we can handle it later.
3288  aNumbers.push_back(pNumberElement);
3289  }
3290  else if (dynamic_cast<PDFEndObjectElement*>(pCurrentElement))
3291  {
3292  // parsing of the object is finished
3293  break;
3294  }
3295  else if (dynamic_cast<PDFObjectElement*>(pCurrentElement)
3296  || dynamic_cast<PDFTrailerElement*>(pCurrentElement))
3297  {
3298  continue;
3299  }
3300  else
3301  {
3302  SAL_INFO("vcl.filter", "Unhandled element while parsing.");
3303  }
3304  }
3305 
3306  return nReturnIndex;
3307 }
3308 
3309 } // namespace vcl
3310 
3311 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
Definition: pdfdocument.hxx:92
sal_uInt64 m_nOffset
Location of the end of the trailer token.
Array object: a list.
bool Read(SvStream &rStream) override
const sal_Int32 m_nLength
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:77
bool SetStreamSize(sal_uInt64 nSize)
sal_Int32 nIndex
PDFObjectElement(PDFDocument &rDoc, double fObjectValue, double fGenerationValue)
Numbering object: an integer or a real.
Boolean object: a 'true' or a 'false'.
PDFElement * Lookup(const OString &rDictionaryKey)
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:80
size_t GetObjectOffset(size_t nIndex) const
sal_uInt64 m_nLocation
Offset after the '/' token.
End of an array: ']'.
static void AppendUnicodeTextString(const OUString &rString, OStringBuffer &rBuffer)
Write rString as a PDF hex string into rBuffer.
void SetStream(PDFStreamElement *pStreamElement)
SvMemoryStream m_aMemory
The byte array itself.
std::string GetValue
tools::Long getWidth() const
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:86
PDFObjectElement * m_pObject
The object that contains this array.
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
Definition: pdfdocument.hxx:90
double LookupNumber(SvStream &rStream) const
Assuming the reference points to a number object, return its value.
void SetDictionaryLength(sal_uInt64 nDictionaryLength)
PDFTrailerElement(PDFDocument &rDoc)
aBuf
#define STREAM_SEEK_TO_END
sal_uInt64 m_nLocation
Offset before the '>>' token.
sal_uInt64 Seek(sal_uInt64 nPos)
std::vector< unsigned char > DecodeHexString(const OString &rHex)
void SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
tools::Long Decompress(SvStream &rIStm, SvStream &rOStm)
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:71
void PushBack(PDFElement *pElement)
sal_uInt64 GetLocation() const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
Looks up an object which is only referenced in this dictionary.
EmbeddedObjectRef * pObject
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:82
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
void SetArrayOffset(sal_uInt64 nArrayOffset)
SvStream & WriteCharPtr(const char *pBuf)
void SetArrayLength(sal_uInt64 nArrayLength)
size_t parse(PDFElement *pParsingElement, size_t nStartIndex=0, int nCurrentDepth=0)
sal_uInt64 SeekRel(sal_Int64 nPos)
const std::vector< std::unique_ptr< PDFElement > > & mrElements
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:59
SvMemoryStream * GetStreamBuffer() const
void SetStreamBuffer(std::unique_ptr< SvMemoryStream > &pStreamBuffer)
tools::Long getHeight() const
SwDoc & m_rDoc
SvStream & WriteUInt32AsString(sal_uInt32 nUInt32)
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
Definition: pdfdocument.hxx:94
bool Read(SvStream &rStream) override
sal_uInt16 sal_Unicode
A byte range in a PDF file.
Definition: pdfdocument.hxx:50
bool Read(SvStream &rStream) override
bool eof() const
PDFObjectElement * LookupObject(size_t nObjectNumber)
Look up object based on object number, possibly by parsing object streams.
bool Read(SvStream &rStream) override
In-memory representation of an on-disk PDF document.
TStyleElements m_aElements
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:84
An entry in a cross-reference stream.
PDFReferenceElement(PDFDocument &rDoc, PDFNumberElement &rObject, PDFNumberElement const &rGeneration)
XRefEntryType GetType() const
const OString & GetValue() const
static void visitPages(PDFObjectElement *pPages, std::vector< PDFObjectElement * > &rRet)
Visits the page tree recursively, looking for page objects.
void PushBackEOF(size_t nOffset)
Remember the end location of an EOF token.
bool Read(SvStream &rStream) override
sal_uInt64 GetArrayLength() const
void copyPageResources(filter::PDFObjectElement *pPage, OStringBuffer &rLine)
Copies resources of pPage into rLine.
sal_Int32 nElements
bool Read(SvStream &rStream) override
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:75
void Compress(SvStream &rIStm, SvStream &rOStm)
Copies objects from one PDF file into another one.
void SetType(XRefEntryType eType)
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
void SetKeyValueLength(const OString &rKey, sal_uInt64 nLength)
sal_uInt64 m_nOffset
Input file start location.
void ParseStoredObjects()
Parse objects stored in this object stream.
static OString GetDateTime()
Get current date/time in PDF D:YYYYMMDDHHMMSS form.
bool Read(SvStream &rStream) override
sal_uInt64 GetOffset() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
sal_uInt16 char * pName
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
Definition: pdfdocument.hxx:88
bool GetDirty() const
int i
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:68
bool Read(SvStream &rStream) override
PDFNumberElement & m_rObject
The element providing the object number.
sal_uInt64 m_nLength
Input file token length.
static OString ReadKeyword(SvStream &rStream)
sal_uInt64 GetSize()
bool Read(SvStream &rStream) override
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
End of a dictionary: '>>'.
std::size_t WriteBytes(const void *pData, std::size_t nSize)
void AddDictionaryReference(PDFReferenceElement *pReference)
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
tools::Long EndCompression()
std::vector< PDFObjectElement * > GetPages()
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
A one-liner comment.
sal_uInt64 GetLocation() const
Dictionary object: a set key-value pairs.
bool Read(SvStream &rStream)
Read elements from the start of the stream till its end.
sal_uInt64 GetOffset() const
bool Read(SvStream &rStream) override
std::vector< PDFElement * > m_aElements
bool Tokenize(SvStream &rStream, TokenizeMode eMode, std::vector< std::unique_ptr< PDFElement >> &rElements, PDFObjectElement *pObjectElement)
Tokenize elements from current offset.
bool Read(SvStream &rStream) override
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
Definition: pdfdocument.hxx:97
#define MAX_SIGNATURE_CONTENT_LENGTH
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
SvStream & WriteStream(SvStream &rStream)
const std::map< OString, PDFElement * > & GetItems() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
const char * pS
sal_uInt64 GetKeyOffset(const OString &rKey) const
void SetIDObject(size_t nID, PDFObjectElement *pObject)
Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID...
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:61
bool Read(SvStream &rStream) override
sal_uInt64 GetKeyValueLength(const OString &rKey) const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
std::size_t ReadBytes(void *pData, std::size_t nSize)
bool Read(SvStream &rStream) override
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
const OString & GetValue() const
std::vector< std::unique_ptr< PDFElement > > & GetStoredElements()
void SetNumberElement(PDFNumberElement *pNumberElement)
sal_uInt64 m_nOffset
Location before the ']' token.
sal_uInt64 GetLength() const
PDFDictionaryElement * GetDictionary()
const std::vector< std::unique_ptr< PDFElement > > & GetElements() const
bool Read(SvStream &rStream) override
void SetDirty(bool bDirty)
PDFObjectElement * LookupObject()
Lookup referenced object, without assuming anything about its contents.
std::unique_ptr< char[]> aBuffer
SvStream & ReadChar(char &rChar)
End of an object: 'endobj' keyword.
PDFElement * LookupElement(const OString &rDictionaryKey)
Looks up an element which is contained in this dictionary.
#define SAL_WARN_IF(condition, area, stream)
SvStream & WriteOString(std::string_view rStr)
void setWidth(tools::Long n)
SvMemoryStream & GetMemory()
Null object: the 'null' singleton.
const std::vector< PDFReferenceElement * > & GetDictionaryReferences() const
#define SAL_INFO(area, stream)
OUString aName
bool Read(SvStream &rStream) override
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
static void SkipWhitespace(SvStream &rStream)
PDFNumberElement * GetNumberElement() const
sal_uInt64 Tell() const
QPRO_FUNC_TYPE nType
const OString & GetValue() const
bool Sign(OStringBuffer &rCMSHexBuffer)
PDFArrayElement * GetArray()
Reference object: something with a unique ID.
const std::vector< PDFElement * > & GetElements() const
End of a stream: 'endstream' keyword.
sal_uInt64 GetLocation() const
bool good() const
PDFDictionaryElement * m_pDictionaryElement
#define SAL_WARN(area, stream)
bool alreadyVisiting() const
Definition: pdfdocument.hxx:60
Literal string: in (asdf) form.
PDFArrayElement(PDFObjectElement *pObject)
bool Read(SvStream &rStream) override
sal_Int32 nLength
Name object: a key string.
void SetOffset(sal_uInt64 nOffset)
The trailer singleton is at the end of the doc.
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
PDFNumberElement & GetObjectElement() const
void AddDataRange(const void *pData, sal_Int32 size)
PDFElement * Lookup(const OString &rDictionaryKey)
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:79
sal_uInt64 GetArrayOffset() const
Stream object: a byte array with a known length.
sal_uInt64 m_nLocation
Offset after the '<<' token.
void setHeight(tools::Long n)
sal_uInt16 nPos
static PDFElement * Lookup(const std::map< OString, PDFElement * > &rDictionary, const OString &rKey)
const void * GetData()
void SetArray(PDFArrayElement *pArrayElement)
bool Read(SvStream &rStream) override
sal_uInt64 m_nOffset
Location after the 'R' token.
void SetKeyOffset(const OString &rKey, sal_uInt64 nOffset)
OStringBuffer & padToLength(OStringBuffer &rBuffer, sal_Int32 nLength, char cFill= '\0')