LibreOffice Module vcl (master) 1
pdfdocument.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
11#include <pdf/pdfcompat.hxx>
12
13#include <map>
14#include <memory>
15#include <vector>
16
17#include <com/sun/star/uno/Sequence.hxx>
18#include <com/sun/star/security/XCertificate.hpp>
19
21#include <comphelper/string.hxx>
22#include <o3tl/string_view.hxx>
23#include <rtl/character.hxx>
24#include <rtl/strbuf.hxx>
25#include <rtl/string.hxx>
26#include <sal/log.hxx>
27#include <sal/types.h>
28#include <svl/cryptosign.hxx>
29#include <tools/zcodec.hxx>
30#include <vcl/pdfwriter.hxx>
31#include <o3tl/safeint.hxx>
32
33#include <pdf/objectcopier.hxx>
34
35using namespace com::sun::star;
36
37namespace vcl::filter
38{
39XRefEntry::XRefEntry() = default;
40
41PDFDocument::PDFDocument() = default;
42
44
45bool PDFDocument::RemoveSignature(size_t nPosition)
46{
47 std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
48 if (nPosition >= aSignatures.size())
49 {
50 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
51 return false;
52 }
53
54 if (aSignatures.size() != m_aEOFs.size() - 1)
55 {
56 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
57 "and incremental updates");
58 return false;
59 }
60
61 // The EOF offset is the end of the original file, without the signature at
62 // nPosition.
63 m_aEditBuffer.Seek(m_aEOFs[nPosition]);
64 // Drop all bytes after the current position.
66
67 return m_aEditBuffer.good();
68}
69
71{
72 sal_Int32 nObject = m_aXRef.size();
73 m_aXRef[nObject] = XRefEntry();
74 return nObject;
75}
76
77bool PDFDocument::updateObject(sal_Int32 nObject)
78{
79 if (o3tl::make_unsigned(nObject) >= m_aXRef.size())
80 {
81 SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
82 return false;
83 }
84
85 XRefEntry aEntry;
87 aEntry.SetDirty(true);
88 m_aXRef[nObject] = aEntry;
89 return true;
90}
91
92bool PDFDocument::writeBuffer(const void* pBuffer, sal_uInt64 nBytes)
93{
94 std::size_t nWritten = m_aEditBuffer.WriteBytes(pBuffer, nBytes);
95 return nWritten == nBytes;
96}
97
98void PDFDocument::SetSignatureLine(std::vector<sal_Int8>&& rSignatureLine)
99{
100 m_aSignatureLine = std::move(rSignatureLine);
101}
102
103void PDFDocument::SetSignaturePage(size_t nPage) { m_nSignaturePage = nPage; }
104
106{
107 sal_uInt32 nRet = 0;
108 for (const auto& pSignature : GetSignatureWidgets())
109 {
110 auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
111 if (!pT)
112 continue;
113
114 const OString& rValue = pT->GetValue();
115 const OString aPrefix = "Signature";
116 if (!rValue.startsWith(aPrefix))
117 continue;
118
119 nRet = std::max(nRet, o3tl::toUInt32(rValue.subView(aPrefix.getLength())));
120 }
121
122 return nRet + 1;
123}
124
125sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
126 sal_uInt64& rLastByteRangeOffset,
127 sal_Int64& rContentOffset)
128{
129 // Write signature object.
130 sal_Int32 nSignatureId = m_aXRef.size();
131 XRefEntry aSignatureEntry;
132 aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
133 aSignatureEntry.SetDirty(true);
134 m_aXRef[nSignatureId] = aSignatureEntry;
135 OStringBuffer aSigBuffer;
136 aSigBuffer.append(nSignatureId);
137 aSigBuffer.append(" 0 obj\n");
138 aSigBuffer.append("<</Contents <");
139 rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
140 // Reserve space for the PKCS#7 object.
141 OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
143 aSigBuffer.append(aContentFiller);
144 aSigBuffer.append(">\n/Type/Sig/SubFilter");
145 if (bAdES)
146 aSigBuffer.append("/ETSI.CAdES.detached");
147 else
148 aSigBuffer.append("/adbe.pkcs7.detached");
149
150 // Time of signing.
151 aSigBuffer.append(" /M (");
152 aSigBuffer.append(vcl::PDFWriter::GetDateTime());
153 aSigBuffer.append(")");
154
155 // Byte range: we can write offset1-length1 and offset2 right now, will
156 // write length2 later.
157 aSigBuffer.append(" /ByteRange [ 0 ");
158 // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
159 aSigBuffer.append(rContentOffset - 1);
160 aSigBuffer.append(" ");
161 aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
162 aSigBuffer.append(" ");
163 rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
164 // We don't know how many bytes we need for the last ByteRange value, this
165 // should be enough.
166 OStringBuffer aByteRangeFiller;
167 comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
168 aSigBuffer.append(aByteRangeFiller);
169 // Finish the Sig obj.
170 aSigBuffer.append(" /Filter/Adobe.PPKMS");
171
172 if (!rDescription.isEmpty())
173 {
174 aSigBuffer.append("/Reason<");
175 vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
176 aSigBuffer.append(">");
177 }
178
179 aSigBuffer.append(" >>\nendobj\n\n");
180 m_aEditBuffer.WriteOString(aSigBuffer);
181
182 return nSignatureId;
183}
184
186{
187 PDFDocument aPDFDocument;
188 filter::PDFObjectElement* pPage = nullptr;
189 std::vector<filter::PDFObjectElement*> aContentStreams;
190
191 if (!m_aSignatureLine.empty())
192 {
193 // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
194 // based on it.
195 SvMemoryStream aPDFStream;
196 aPDFStream.WriteBytes(m_aSignatureLine.data(), m_aSignatureLine.size());
197 aPDFStream.Seek(0);
198 if (!aPDFDocument.Read(aPDFStream))
199 {
200 SAL_WARN("vcl.filter",
201 "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
202 return -1;
203 }
204
205 std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
206 if (aPages.empty())
207 {
208 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
209 return -1;
210 }
211
212 pPage = aPages[0];
213 if (!pPage)
214 {
215 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
216 return -1;
217 }
218
219 // Calculate the bounding box.
220 PDFElement* pMediaBox = pPage->Lookup("MediaBox");
221 auto pMediaBoxArray = dynamic_cast<PDFArrayElement*>(pMediaBox);
222 if (!pMediaBoxArray || pMediaBoxArray->GetElements().size() < 4)
223 {
224 SAL_WARN("vcl.filter",
225 "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
226 return -1;
227 }
228 const std::vector<PDFElement*>& rMediaBoxElements = pMediaBoxArray->GetElements();
229 auto pWidth = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[2]);
230 if (!pWidth)
231 {
232 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
233 return -1;
234 }
235 rSignatureRectangle.setWidth(pWidth->GetValue());
236 auto pHeight = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[3]);
237 if (!pHeight)
238 {
239 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
240 return -1;
241 }
242 rSignatureRectangle.setHeight(pHeight->GetValue());
243
244 if (PDFObjectElement* pContentStream = pPage->LookupObject("Contents"))
245 {
246 aContentStreams.push_back(pContentStream);
247 }
248
249 if (aContentStreams.empty())
250 {
251 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
252 return -1;
253 }
254 }
255 m_aSignatureLine.clear();
256
257 // Write appearance object: allocate an ID.
258 sal_Int32 nAppearanceId = m_aXRef.size();
259 m_aXRef[nAppearanceId] = XRefEntry();
260
261 // Write the object content.
262 SvMemoryStream aEditBuffer;
263 aEditBuffer.WriteUInt32AsString(nAppearanceId);
264 aEditBuffer.WriteCharPtr(" 0 obj\n");
265 aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
266
267 PDFObjectCopier aCopier(*this);
268 if (!aContentStreams.empty())
269 {
270 assert(pPage && "aContentStreams is only filled if there was a pPage");
271 OStringBuffer aBuffer;
272 aCopier.copyPageResources(pPage, aBuffer);
273 aEditBuffer.WriteOString(aBuffer);
274 }
275
276 aEditBuffer.WriteCharPtr("/BBox[0 0 ");
277 aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getOpenWidth()));
278 aEditBuffer.WriteCharPtr(" ");
279 aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getOpenHeight()));
280 aEditBuffer.WriteCharPtr("]\n/Length ");
281
282 // Add the object to the doc-level edit buffer and update the offset.
283 SvMemoryStream aStream;
284 bool bCompressed = false;
285 sal_Int32 nLength = 0;
286 if (!aContentStreams.empty())
287 {
288 nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed);
289 }
290 aEditBuffer.WriteOString(OString::number(nLength));
291 if (bCompressed)
292 {
293 aEditBuffer.WriteOString(" /Filter/FlateDecode");
294 }
295
296 aEditBuffer.WriteCharPtr("\n>>\n");
297
298 aEditBuffer.WriteCharPtr("stream\n");
299
300 // Copy the original page streams to the form XObject stream.
301 aStream.Seek(0);
302 aEditBuffer.WriteStream(aStream);
303
304 aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
305
306 aEditBuffer.Seek(0);
307 XRefEntry aAppearanceEntry;
308 aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
309 aAppearanceEntry.SetDirty(true);
310 m_aXRef[nAppearanceId] = aAppearanceEntry;
311 m_aEditBuffer.WriteStream(aEditBuffer);
312
313 return nAppearanceId;
314}
315
316sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
317 sal_Int32 nAppearanceId,
318 const tools::Rectangle& rSignatureRectangle)
319{
320 // Decide what identifier to use for the new signature.
321 sal_uInt32 nNextSignature = GetNextSignature();
322
323 // Write the Annot object, references nSignatureId and nAppearanceId.
324 sal_Int32 nAnnotId = m_aXRef.size();
325 XRefEntry aAnnotEntry;
326 aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
327 aAnnotEntry.SetDirty(true);
328 m_aXRef[nAnnotId] = aAnnotEntry;
330 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
331 m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
332 m_aEditBuffer.WriteCharPtr("/Rect[0 0 ");
333 m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getOpenWidth()));
335 m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getOpenHeight()));
337 m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
340 m_aEditBuffer.WriteCharPtr(" 0 R\n");
341 m_aEditBuffer.WriteCharPtr("/T(Signature");
342 m_aEditBuffer.WriteUInt32AsString(nNextSignature);
346 m_aEditBuffer.WriteCharPtr(" 0 R\n");
349 m_aEditBuffer.WriteCharPtr(" 0 R\n");
350 m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
351 m_aEditBuffer.WriteInt32AsString(nAppearanceId);
352 m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
353 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
354
355 return nAnnotId;
356}
357
358bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
359{
360 PDFElement* pAnnots = rFirstPage.Lookup("Annots");
361 auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
362 if (pAnnotsReference)
363 {
364 // Write the updated Annots key of the Page object.
365 PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
366 if (!pAnnotsObject)
367 {
368 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
369 return false;
370 }
371
372 sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
373 m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
374 m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
375 m_aXRef[nAnnotsId].SetDirty(true);
377 m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
378
379 // Write existing references.
380 PDFArrayElement* pArray = pAnnotsObject->GetArray();
381 if (!pArray)
382 {
383 SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
384 return false;
385 }
386
387 for (size_t i = 0; i < pArray->GetElements().size(); ++i)
388 {
389 auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
390 if (!pReference)
391 continue;
392
393 if (i)
395 m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
397 }
398 // Write our reference.
402
403 m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
404 }
405 else
406 {
407 // Write the updated first page object, references nAnnotId.
408 sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
409 if (nFirstPageId >= m_aXRef.size())
410 {
411 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
412 return false;
413 }
414 m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
415 m_aXRef[nFirstPageId].SetDirty(true);
417 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
419 auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
420 if (!pAnnotsArray)
421 {
422 // No Annots key, just write the key with a single reference.
423 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
424 + rFirstPage.GetDictionaryOffset(),
425 rFirstPage.GetDictionaryLength());
426 m_aEditBuffer.WriteCharPtr("/Annots[");
429 }
430 else
431 {
432 // Annots key is already there, insert our reference at the end.
433 PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
434
435 // Offset right before the end of the Annots array.
436 sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
437 + pDictionary->GetKeyValueLength("Annots") - 1;
438 // Length of beginning of the dictionary -> Annots end.
439 sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
440 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
441 + rFirstPage.GetDictionaryOffset(),
442 nAnnotsBeforeEndLength);
446 // Length of Annots end -> end of the dictionary.
447 sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
448 + rFirstPage.GetDictionaryLength()
449 - nAnnotsEndOffset;
450 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
451 + nAnnotsEndOffset,
452 nAnnotsAfterEndLength);
453 }
455 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
456 }
457
458 return true;
459}
460
462{
463 if (m_pXRefStream)
464 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
465 else
466 {
467 if (!m_pTrailer)
468 {
469 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
470 return false;
471 }
472 pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
473 }
474 if (!pRoot)
475 {
476 SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
477 return false;
478 }
479 PDFObjectElement* pCatalog = pRoot->LookupObject();
480 if (!pCatalog)
481 {
482 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
483 return false;
484 }
485 sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
486 if (nCatalogId >= m_aXRef.size())
487 {
488 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
489 return false;
490 }
491 PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
492 auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
493 if (pAcroFormReference)
494 {
495 // Write the updated AcroForm key of the Catalog object.
496 PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
497 if (!pAcroFormObject)
498 {
499 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
500 return false;
501 }
502
503 sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
504 m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
505 m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
506 m_aXRef[nAcroFormId].SetDirty(true);
508 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
509
510 // If this is nullptr, then the AcroForm object is not in an object stream.
511 SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
512
513 if (!pAcroFormObject->Lookup("Fields"))
514 {
515 SAL_WARN("vcl.filter",
516 "PDFDocument::Sign: AcroForm object without required Fields key");
517 return false;
518 }
519
520 PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
521 if (!pAcroFormDictionary)
522 {
523 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
524 return false;
525 }
526
527 // Offset right before the end of the Fields array.
528 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
529 + pAcroFormDictionary->GetKeyValueLength("Fields")
530 - strlen("]");
531
532 // Length of beginning of the object dictionary -> Fields end.
533 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
534 if (pStreamBuffer)
535 m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
536 else
537 {
538 nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
540 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
541 + pAcroFormObject->GetDictionaryOffset(),
542 nFieldsBeforeEndLength);
543 }
544
545 // Append our reference at the end of the Fields array.
549
550 // Length of Fields end -> end of the object dictionary.
551 if (pStreamBuffer)
552 {
553 sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
554 m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
555 + nFieldsEndOffset,
556 nFieldsAfterEndLength);
557 }
558 else
559 {
560 sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
561 + pAcroFormObject->GetDictionaryLength()
562 - nFieldsEndOffset;
563 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
564 + nFieldsEndOffset,
565 nFieldsAfterEndLength);
567 }
568
569 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
570 }
571 else
572 {
573 // Write the updated Catalog object, references nAnnotId.
574 auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
575 m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
576 m_aXRef[nCatalogId].SetDirty(true);
578 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
580 if (!pAcroFormDictionary)
581 {
582 // No AcroForm key, assume no signatures.
583 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
584 + pCatalog->GetDictionaryOffset(),
585 pCatalog->GetDictionaryLength());
586 m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
588 m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
589 }
590 else
591 {
592 // AcroForm key is already there, insert our reference at the Fields end.
593 auto it = pAcroFormDictionary->GetItems().find("Fields");
594 if (it == pAcroFormDictionary->GetItems().end())
595 {
596 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
597 return false;
598 }
599
600 auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
601 if (!pFields)
602 {
603 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
604 return false;
605 }
606
607 // Offset right before the end of the Fields array.
608 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
609 + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
610 // Length of beginning of the Catalog dictionary -> Fields end.
611 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
612 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
613 + pCatalog->GetDictionaryOffset(),
614 nFieldsBeforeEndLength);
618 // Length of Fields end -> end of the Catalog dictionary.
619 sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
620 + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
621 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
622 + nFieldsEndOffset,
623 nFieldsAfterEndLength);
624 }
625 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
626 }
627
628 return true;
629}
630
631void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
632{
633 if (m_pXRefStream)
634 {
635 // Write the xref stream.
636 // This is a bit meta: the xref stream stores its own offset.
637 sal_Int32 nXRefStreamId = m_aXRef.size();
638 XRefEntry aXRefStreamEntry;
639 aXRefStreamEntry.SetOffset(nXRefOffset);
640 aXRefStreamEntry.SetDirty(true);
641 m_aXRef[nXRefStreamId] = aXRefStreamEntry;
642
643 // Write stream data.
644 SvMemoryStream aXRefStream;
645 const size_t nOffsetLen = 3;
646 // 3 additional bytes: predictor, the first and the third field.
647 const size_t nLineLength = nOffsetLen + 3;
648 // This is the line as it appears before tweaking according to the predictor.
649 std::vector<unsigned char> aOrigLine(nLineLength);
650 // This is the previous line.
651 std::vector<unsigned char> aPrevLine(nLineLength);
652 // This is the line as written to the stream.
653 std::vector<unsigned char> aFilteredLine(nLineLength);
654 for (const auto& rXRef : m_aXRef)
655 {
656 const XRefEntry& rEntry = rXRef.second;
657
658 if (!rEntry.GetDirty())
659 continue;
660
661 // Predictor.
662 size_t nPos = 0;
663 // PNG prediction: up (on all rows).
664 aOrigLine[nPos++] = 2;
665
666 // First field.
667 unsigned char nType = 0;
668 switch (rEntry.GetType())
669 {
671 nType = 0;
672 break;
674 nType = 1;
675 break;
677 nType = 2;
678 break;
679 }
680 aOrigLine[nPos++] = nType;
681
682 // Second field.
683 for (size_t i = 0; i < nOffsetLen; ++i)
684 {
685 size_t nByte = nOffsetLen - i - 1;
686 // Fields requiring more than one byte are stored with the
687 // high-order byte first.
688 unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
689 aOrigLine[nPos++] = nCh;
690 }
691
692 // Third field.
693 aOrigLine[nPos++] = 0;
694
695 // Now apply the predictor.
696 aFilteredLine[0] = aOrigLine[0];
697 for (size_t i = 1; i < nLineLength; ++i)
698 {
699 // Count the delta vs the previous line.
700 aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
701 // Remember the new reference.
702 aPrevLine[i] = aOrigLine[i];
703 }
704
705 aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
706 }
707
708 m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
710 " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
711
712 // ID.
713 auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
714 if (pID)
715 {
716 const std::vector<PDFElement*>& rElements = pID->GetElements();
717 m_aEditBuffer.WriteCharPtr("/ID [ <");
718 for (size_t i = 0; i < rElements.size(); ++i)
719 {
720 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
721 if (!pIDString)
722 continue;
723
724 m_aEditBuffer.WriteOString(pIDString->GetValue());
725 if ((i + 1) < rElements.size())
727 }
729 }
730
731 // Index.
732 m_aEditBuffer.WriteCharPtr("/Index [ ");
733 for (const auto& rXRef : m_aXRef)
734 {
735 if (!rXRef.second.GetDirty())
736 continue;
737
740 }
742
743 // Info.
744 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
745 if (pInfo)
746 {
747 m_aEditBuffer.WriteCharPtr("/Info ");
748 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
750 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
752 }
753
754 // Length.
755 m_aEditBuffer.WriteCharPtr("/Length ");
756 {
757 ZCodec aZCodec;
758 aZCodec.BeginCompression();
759 aXRefStream.Seek(0);
760 SvMemoryStream aStream;
761 aZCodec.Compress(aXRefStream, aStream);
762 aZCodec.EndCompression();
763 aXRefStream.Seek(0);
764 aXRefStream.SetStreamSize(0);
765 aStream.Seek(0);
766 aXRefStream.WriteStream(aStream);
767 }
769
770 if (!m_aStartXRefs.empty())
771 {
772 // Write location of the previous cross-reference section.
773 m_aEditBuffer.WriteCharPtr("/Prev ");
775 }
776
777 // Root.
778 m_aEditBuffer.WriteCharPtr("/Root ");
783
784 // Size.
785 m_aEditBuffer.WriteCharPtr("/Size ");
787
788 m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
789 aXRefStream.Seek(0);
790 m_aEditBuffer.WriteStream(aXRefStream);
791 m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
792 }
793 else
794 {
795 // Write the xref table.
796 m_aEditBuffer.WriteCharPtr("xref\n");
797 for (const auto& rXRef : m_aXRef)
798 {
799 size_t nObject = rXRef.first;
800 size_t nOffset = rXRef.second.GetOffset();
801 if (!rXRef.second.GetDirty())
802 continue;
803
806 OStringBuffer aBuffer;
807 aBuffer.append(static_cast<sal_Int32>(nOffset));
808 while (aBuffer.getLength() < 10)
809 aBuffer.insert(0, "0");
810 if (nObject == 0)
811 aBuffer.append(" 65535 f \n");
812 else
813 aBuffer.append(" 00000 n \n");
815 }
816
817 // Write the trailer.
818 m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
820 m_aEditBuffer.WriteCharPtr("/Root ");
825 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
826 if (pInfo)
827 {
828 m_aEditBuffer.WriteCharPtr("/Info ");
829 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
831 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
833 }
834 auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
835 if (pID)
836 {
837 const std::vector<PDFElement*>& rElements = pID->GetElements();
838 m_aEditBuffer.WriteCharPtr("/ID [ <");
839 for (size_t i = 0; i < rElements.size(); ++i)
840 {
841 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
842 if (!pIDString)
843 continue;
844
845 m_aEditBuffer.WriteOString(pIDString->GetValue());
846 if ((i + 1) < rElements.size())
848 }
850 }
851
852 if (!m_aStartXRefs.empty())
853 {
854 // Write location of the previous cross-reference section.
855 m_aEditBuffer.WriteCharPtr("/Prev ");
857 }
858
860 }
861}
862
863bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
864 const OUString& rDescription, bool bAdES)
865{
868
869 sal_uInt64 nSignatureLastByteRangeOffset = 0;
870 sal_Int64 nSignatureContentOffset = 0;
871 sal_Int32 nSignatureId = WriteSignatureObject(
872 rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
873
874 tools::Rectangle aSignatureRectangle;
875 sal_Int32 nAppearanceId = WriteAppearanceObject(aSignatureRectangle);
876
877 std::vector<PDFObjectElement*> aPages = GetPages();
878 if (aPages.empty())
879 {
880 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
881 return false;
882 }
883
884 size_t nPage = 0;
885 if (m_nSignaturePage < aPages.size())
886 {
887 nPage = m_nSignaturePage;
888 }
889 if (!aPages[nPage])
890 {
891 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage);
892 return false;
893 }
894
895 PDFObjectElement& rPage = *aPages[nPage];
896 sal_Int32 nAnnotId = WriteAnnotObject(rPage, nSignatureId, nAppearanceId, aSignatureRectangle);
897
898 if (!WritePageObject(rPage, nAnnotId))
899 {
900 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
901 return false;
902 }
903
904 PDFReferenceElement* pRoot = nullptr;
905 if (!WriteCatalogObject(nAnnotId, pRoot))
906 {
907 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
908 return false;
909 }
910
911 sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
912 WriteXRef(nXRefOffset, pRoot);
913
914 // Write startxref.
915 m_aEditBuffer.WriteCharPtr("startxref\n");
917 m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
918
919 // Finalize the signature, now that we know the total file size.
920 // Calculate the length of the last byte range.
921 sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
922 sal_Int64 nLastByteRangeLength
923 = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
924 // Write the length to the buffer.
925 m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
926 OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
927 m_aEditBuffer.WriteOString(aByteRangeBuffer);
928
929 // Create the PKCS#7 object.
930 css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
931 if (!aDerEncoded.hasElements())
932 {
933 SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
934 return false;
935 }
936
938 sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
939 std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
940 m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
941
942 m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
943 sal_uInt64 nBufferSize2 = nLastByteRangeLength;
944 std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
945 m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
946
947 OStringBuffer aCMSHexBuffer;
948 svl::crypto::Signing aSigning(xCertificate);
949 aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
950 aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
951 if (!aSigning.Sign(aCMSHexBuffer))
952 {
953 SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
954 return false;
955 }
956
957 assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
958
959 m_aEditBuffer.Seek(nSignatureContentOffset);
960 m_aEditBuffer.WriteOString(aCMSHexBuffer);
961
962 return true;
963}
964
966{
968 rStream.WriteStream(m_aEditBuffer);
969 return rStream.good();
970}
971
973 std::vector<std::unique_ptr<PDFElement>>& rElements,
974 PDFObjectElement* pObjectElement)
975{
976 // Last seen object token.
977 PDFObjectElement* pObject = pObjectElement;
978 PDFNameElement* pObjectKey = nullptr;
979 PDFObjectElement* pObjectStream = nullptr;
980 bool bInXRef = false;
981 // The next number will be an xref offset.
982 bool bInStartXRef = false;
983 // Dictionary depth, so we know when we're outside any dictionaries.
984 int nDepth = 0;
985 // Last seen array token that's outside any dictionaries.
986 PDFArrayElement* pArray = nullptr;
987 // If we're inside an obj/endobj pair.
988 bool bInObject = false;
989
990 while (true)
991 {
992 char ch;
993 rStream.ReadChar(ch);
994 if (rStream.eof())
995 break;
996
997 switch (ch)
998 {
999 case '%':
1000 {
1001 auto pComment = new PDFCommentElement(*this);
1002 rElements.push_back(std::unique_ptr<PDFElement>(pComment));
1003 rStream.SeekRel(-1);
1004 if (!rElements.back()->Read(rStream))
1005 {
1006 SAL_WARN("vcl.filter",
1007 "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
1008 return false;
1009 }
1010 if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
1011 && m_aEOFs.back() == rStream.Tell())
1012 {
1013 // Found EOF and partial parsing requested, we're done.
1014 return true;
1015 }
1016 break;
1017 }
1018 case '<':
1019 {
1020 // Dictionary or hex string.
1021 rStream.ReadChar(ch);
1022 rStream.SeekRel(-2);
1023 if (ch == '<')
1024 {
1025 rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
1026 ++nDepth;
1027 }
1028 else
1029 rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
1030 if (!rElements.back()->Read(rStream))
1031 {
1032 SAL_WARN("vcl.filter",
1033 "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
1034 return false;
1035 }
1036 break;
1037 }
1038 case '>':
1039 {
1040 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
1041 --nDepth;
1042 rStream.SeekRel(-1);
1043 if (!rElements.back()->Read(rStream))
1044 {
1045 SAL_WARN("vcl.filter",
1046 "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
1047 return false;
1048 }
1049 break;
1050 }
1051 case '[':
1052 {
1053 auto pArr = new PDFArrayElement(pObject);
1054 rElements.push_back(std::unique_ptr<PDFElement>(pArr));
1055 if (nDepth == 0)
1056 {
1057 // The array is attached directly, inform the object.
1058 pArray = pArr;
1059 if (pObject)
1060 {
1061 pObject->SetArray(pArray);
1062 pObject->SetArrayOffset(rStream.Tell());
1063 }
1064 }
1065 ++nDepth;
1066 rStream.SeekRel(-1);
1067 if (!rElements.back()->Read(rStream))
1068 {
1069 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
1070 return false;
1071 }
1072 break;
1073 }
1074 case ']':
1075 {
1076 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
1077 --nDepth;
1078 rStream.SeekRel(-1);
1079 if (nDepth == 0)
1080 {
1081 if (pObject)
1082 {
1083 pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1084 }
1085 }
1086 if (!rElements.back()->Read(rStream))
1087 {
1088 SAL_WARN("vcl.filter",
1089 "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1090 return false;
1091 }
1092 break;
1093 }
1094 case '/':
1095 {
1096 auto pNameElement = new PDFNameElement();
1097 rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1098 rStream.SeekRel(-1);
1099 if (!pNameElement->Read(rStream))
1100 {
1101 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1102 return false;
1103 }
1104
1105 if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1106 && pNameElement->GetValue() == "ObjStm")
1107 pObjectStream = pObject;
1108 else
1109 pObjectKey = pNameElement;
1110 break;
1111 }
1112 case '(':
1113 {
1114 rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1115 rStream.SeekRel(-1);
1116 if (!rElements.back()->Read(rStream))
1117 {
1118 SAL_WARN("vcl.filter",
1119 "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1120 return false;
1121 }
1122 break;
1123 }
1124 default:
1125 {
1126 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-' || ch == '+'
1127 || ch == '.')
1128 {
1129 // Numbering object: an integer or a real.
1130 auto pNumberElement = new PDFNumberElement();
1131 rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1132 rStream.SeekRel(-1);
1133 if (!pNumberElement->Read(rStream))
1134 {
1135 SAL_WARN("vcl.filter",
1136 "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1137 return false;
1138 }
1139 if (bInStartXRef)
1140 {
1141 bInStartXRef = false;
1142 m_aStartXRefs.push_back(pNumberElement->GetValue());
1143
1144 auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1145 if (it != m_aOffsetObjects.end())
1146 m_pXRefStream = it->second;
1147 }
1148 else if (bInObject && !nDepth && pObject)
1149 // Number element inside an object, but outside a
1150 // dictionary / array: remember it.
1151 pObject->SetNumberElement(pNumberElement);
1152 }
1153 else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1154 {
1155 // Possible keyword, like "obj".
1156 rStream.SeekRel(-1);
1157 OString aKeyword = ReadKeyword(rStream);
1158
1159 bool bObj = aKeyword == "obj";
1160 if (bObj || aKeyword == "R")
1161 {
1162 size_t nElements = rElements.size();
1163 if (nElements < 2)
1164 {
1165 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1166 "tokens before 'obj' or 'R' keyword");
1167 return false;
1168 }
1169
1170 auto pObjectNumber
1171 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1172 auto pGenerationNumber
1173 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1174 if (!pObjectNumber || !pGenerationNumber)
1175 {
1176 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1177 "generation number before 'obj' or 'R' keyword");
1178 return false;
1179 }
1180
1181 if (bObj)
1182 {
1183 pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1184 pGenerationNumber->GetValue());
1185 rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1186 m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1187 m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1188 bInObject = true;
1189 }
1190 else
1191 {
1192 auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1193 *pGenerationNumber);
1194 rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1195 if (bInObject && nDepth > 0 && pObject)
1196 // Inform the object about a new in-dictionary reference.
1197 pObject->AddDictionaryReference(pReference);
1198 }
1199 if (!rElements.back()->Read(rStream))
1200 {
1201 SAL_WARN("vcl.filter",
1202 "PDFDocument::Tokenize: PDFElement::Read() failed");
1203 return false;
1204 }
1205 }
1206 else if (aKeyword == "stream")
1207 {
1208 // Look up the length of the stream from the parent object's dictionary.
1209 size_t nLength = 0;
1210 for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1211 {
1212 // Iterate in reverse order.
1213 size_t nIndex = rElements.size() - nElement - 1;
1214 PDFElement* pElement = rElements[nIndex].get();
1215 auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1216 if (!pObj)
1217 continue;
1218
1219 PDFElement* pLookup = pObj->Lookup("Length");
1220 auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1221 if (pReference)
1222 {
1223 // Length is provided as a reference.
1224 nLength = pReference->LookupNumber(rStream);
1225 break;
1226 }
1227
1228 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1229 if (pNumber)
1230 {
1231 // Length is provided directly.
1232 nLength = pNumber->GetValue();
1233 break;
1234 }
1235
1236 SAL_WARN(
1237 "vcl.filter",
1238 "PDFDocument::Tokenize: found no Length key for stream keyword");
1239 return false;
1240 }
1241
1243 auto pStreamElement = new PDFStreamElement(nLength);
1244 if (pObject)
1245 pObject->SetStream(pStreamElement);
1246 rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1247 if (!rElements.back()->Read(rStream))
1248 {
1249 SAL_WARN("vcl.filter",
1250 "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1251 return false;
1252 }
1253 }
1254 else if (aKeyword == "endstream")
1255 {
1256 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1257 if (!rElements.back()->Read(rStream))
1258 {
1259 SAL_WARN("vcl.filter",
1260 "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1261 return false;
1262 }
1263 }
1264 else if (aKeyword == "endobj")
1265 {
1266 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1267 if (!rElements.back()->Read(rStream))
1268 {
1269 SAL_WARN("vcl.filter",
1270 "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1271 return false;
1272 }
1274 {
1275 // Found endobj and only object parsing was requested, we're done.
1276 return true;
1277 }
1278
1279 if (pObjectStream)
1280 {
1281 // We're at the end of an object stream, parse the stored objects.
1282 pObjectStream->ParseStoredObjects();
1283 pObjectStream = nullptr;
1284 pObjectKey = nullptr;
1285 }
1286 bInObject = false;
1287 }
1288 else if (aKeyword == "true" || aKeyword == "false")
1289 rElements.push_back(std::unique_ptr<PDFElement>(
1290 new PDFBooleanElement(aKeyword.toBoolean())));
1291 else if (aKeyword == "null")
1292 rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1293 else if (aKeyword == "xref")
1294 // Allow 'f' and 'n' keywords.
1295 bInXRef = true;
1296 else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1297 {
1298 }
1299 else if (aKeyword == "trailer")
1300 {
1301 auto pTrailer = new PDFTrailerElement(*this);
1302
1303 // Make it possible to find this trailer later by offset.
1304 pTrailer->Read(rStream);
1305 m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1306
1307 // When reading till the first EOF token only, remember
1308 // just the first trailer token.
1310 m_pTrailer = pTrailer;
1311 rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1312 }
1313 else if (aKeyword == "startxref")
1314 {
1315 bInStartXRef = true;
1316 }
1317 else
1318 {
1319 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1320 << aKeyword << "' keyword at byte position "
1321 << rStream.Tell());
1322 return false;
1323 }
1324 }
1325 else
1326 {
1327 auto uChar = static_cast<unsigned char>(ch);
1328 // Be more lenient and allow unexpected null char
1329 if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
1330 {
1331 SAL_WARN("vcl.filter",
1332 "PDFDocument::Tokenize: unexpected character with code "
1333 << sal_Int32(ch) << " at byte position " << rStream.Tell());
1334 return false;
1335 }
1336 SAL_WARN_IF(uChar == 0, "vcl.filter",
1337 "PDFDocument::Tokenize: unexpected null character at "
1338 << rStream.Tell() << " - ignoring");
1339 }
1340 break;
1341 }
1342 }
1343 }
1344
1345 return true;
1346}
1347
1349{
1350 m_aIDObjects[nID] = pObject;
1351}
1352
1354{
1355 if (Read(rStream))
1356 return true;
1357
1358 // Read failed, try a roundtrip through pdfium and then retry.
1359 rStream.Seek(0);
1360 SvMemoryStream aStandardizedStream;
1361 vcl::pdf::convertToHighestSupported(rStream, aStandardizedStream);
1362 return Read(aStandardizedStream);
1363}
1364
1366{
1367 // Check file magic.
1368 std::vector<sal_Int8> aHeader(5);
1369 rStream.Seek(0);
1370 rStream.ReadBytes(aHeader.data(), aHeader.size());
1371 if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1372 || aHeader[4] != '-')
1373 {
1374 SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1375 return false;
1376 }
1377
1378 // Allow later editing of the contents in-memory.
1379 rStream.Seek(0);
1380 m_aEditBuffer.WriteStream(rStream);
1381
1382 // Look up the offset of the xref table.
1383 size_t nStartXRef = FindStartXRef(rStream);
1384 SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1385 if (nStartXRef == 0)
1386 {
1387 SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1388 return false;
1389 }
1390 while (true)
1391 {
1392 rStream.Seek(nStartXRef);
1393 OString aKeyword = ReadKeyword(rStream);
1394 if (aKeyword.isEmpty())
1395 ReadXRefStream(rStream);
1396
1397 else
1398 {
1399 if (aKeyword != "xref")
1400 {
1401 SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1402 return false;
1403 }
1404 ReadXRef(rStream);
1405 if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1406 {
1407 SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1408 return false;
1409 }
1410 }
1411
1412 PDFNumberElement* pPrev = nullptr;
1413 if (m_pTrailer)
1414 {
1415 pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1416
1417 // Remember the offset of this trailer in the correct order. It's
1418 // possible that newer trailers don't have a larger offset.
1420 }
1421 else if (m_pXRefStream)
1422 pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1423 if (pPrev)
1424 nStartXRef = pPrev->GetValue();
1425
1426 // Reset state, except the edit buffer.
1427 m_aElements.clear();
1428 m_aOffsetObjects.clear();
1429 m_aIDObjects.clear();
1430 m_aStartXRefs.clear();
1431 m_aEOFs.clear();
1432 m_pTrailer = nullptr;
1433 m_pXRefStream = nullptr;
1434 if (!pPrev)
1435 break;
1436 }
1437
1438 // Then we can tokenize the stream.
1439 rStream.Seek(0);
1440 return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1441}
1442
1444{
1445 OStringBuffer aBuf;
1446 char ch;
1447 rStream.ReadChar(ch);
1448 if (rStream.eof())
1449 return {};
1450 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1451 {
1452 aBuf.append(ch);
1453 rStream.ReadChar(ch);
1454 if (rStream.eof())
1455 return aBuf.toString();
1456 }
1457 rStream.SeekRel(-1);
1458 return aBuf.toString();
1459}
1460
1462{
1463 // Find the "startxref" token, somewhere near the end of the document.
1464 std::vector<char> aBuf(1024);
1465 rStream.Seek(STREAM_SEEK_TO_END);
1466 if (rStream.Tell() > aBuf.size())
1467 rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1468 else
1469 // The document is really short, then just read it from the start.
1470 rStream.Seek(0);
1471 size_t nBeforePeek = rStream.Tell();
1472 size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1473 rStream.Seek(nBeforePeek);
1474 if (nSize != aBuf.size())
1475 aBuf.resize(nSize);
1476 OString aPrefix("startxref");
1477 // Find the last startxref at the end of the document.
1478 auto itLastValid = aBuf.end();
1479 auto it = aBuf.begin();
1480 while (true)
1481 {
1482 it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1483 if (it == aBuf.end())
1484 break;
1485
1486 itLastValid = it;
1487 ++it;
1488 }
1489 if (itLastValid == aBuf.end())
1490 {
1491 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1492 return 0;
1493 }
1494
1495 rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1496 if (rStream.eof())
1497 {
1498 SAL_WARN("vcl.filter",
1499 "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1500 return 0;
1501 }
1502
1504 PDFNumberElement aNumber;
1505 if (!aNumber.Read(rStream))
1506 return 0;
1507 return aNumber.GetValue();
1508}
1509
1511{
1512 // Look up the stream length in the object dictionary.
1513 if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1514 {
1515 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1516 return;
1517 }
1518
1519 if (m_aElements.empty())
1520 {
1521 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1522 return;
1523 }
1524
1525 PDFObjectElement* pObject = nullptr;
1526 for (const auto& pElement : m_aElements)
1527 {
1528 if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1529 {
1530 pObject = pObj;
1531 break;
1532 }
1533 }
1534 if (!pObject)
1535 {
1536 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1537 return;
1538 }
1539
1540 // So that the Prev key can be looked up later.
1542
1543 PDFElement* pLookup = pObject->Lookup("Length");
1544 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1545 if (!pNumber)
1546 {
1547 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1548 return;
1549 }
1550 sal_uInt64 nLength = pNumber->GetValue();
1551
1552 // Look up the stream offset.
1553 PDFStreamElement* pStream = nullptr;
1554 for (const auto& pElement : m_aElements)
1555 {
1556 if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1557 {
1558 pStream = pS;
1559 break;
1560 }
1561 }
1562 if (!pStream)
1563 {
1564 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1565 return;
1566 }
1567
1568 // Read and decompress it.
1569 rStream.Seek(pStream->GetOffset());
1570 std::vector<char> aBuf(nLength);
1571 rStream.ReadBytes(aBuf.data(), aBuf.size());
1572
1573 auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1574 if (!pFilter)
1575 {
1576 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1577 return;
1578 }
1579
1580 if (pFilter->GetValue() != "FlateDecode")
1581 {
1582 SAL_WARN("vcl.filter",
1583 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1584 return;
1585 }
1586
1587 int nColumns = 1;
1588 int nPredictor = 1;
1589 if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1590 {
1591 const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1592 auto it = rItems.find("Columns");
1593 if (it != rItems.end())
1594 if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1595 nColumns = pColumns->GetValue();
1596 it = rItems.find("Predictor");
1597 if (it != rItems.end())
1598 if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1599 nPredictor = pPredictor->GetValue();
1600 }
1601
1602 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1603 SvMemoryStream aStream;
1604 ZCodec aZCodec;
1605 aZCodec.BeginCompression();
1606 aZCodec.Decompress(aSource, aStream);
1607 if (!aZCodec.EndCompression())
1608 {
1609 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1610 return;
1611 }
1612
1613 // Look up the first and the last entry we need to read.
1614 auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1615 std::vector<size_t> aFirstObjects;
1616 std::vector<size_t> aNumberOfObjects;
1617 if (!pIndex)
1618 {
1619 auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1620 if (pSize)
1621 {
1622 aFirstObjects.push_back(0);
1623 aNumberOfObjects.push_back(pSize->GetValue());
1624 }
1625 else
1626 {
1627 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1628 return;
1629 }
1630 }
1631 else
1632 {
1633 const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1634 size_t nFirstObject = 0;
1635 for (size_t i = 0; i < rIndexElements.size(); ++i)
1636 {
1637 if (i % 2 == 0)
1638 {
1639 auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1640 if (!pFirstObject)
1641 {
1642 SAL_WARN("vcl.filter",
1643 "PDFDocument::ReadXRefStream: Index has no first object");
1644 return;
1645 }
1646 nFirstObject = pFirstObject->GetValue();
1647 continue;
1648 }
1649
1650 auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1651 if (!pNumberOfObjects)
1652 {
1653 SAL_WARN("vcl.filter",
1654 "PDFDocument::ReadXRefStream: Index has no number of objects");
1655 return;
1656 }
1657 aFirstObjects.push_back(nFirstObject);
1658 aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1659 }
1660 }
1661
1662 // Look up the format of a single entry.
1663 const int nWSize = 3;
1664 auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1665 if (!pW || pW->GetElements().size() < nWSize)
1666 {
1667 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1668 return;
1669 }
1670 int aW[nWSize];
1671 // First character is the (kind of) repeated predictor.
1672 int nLineLength = 1;
1673 for (size_t i = 0; i < nWSize; ++i)
1674 {
1675 auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1676 if (!pI)
1677 {
1678 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1679 return;
1680 }
1681 aW[i] = pI->GetValue();
1682 nLineLength += aW[i];
1683 }
1684
1685 if (nPredictor > 1 && nLineLength - 1 != nColumns)
1686 {
1687 SAL_WARN("vcl.filter",
1688 "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1689 return;
1690 }
1691
1692 aStream.Seek(0);
1693 for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1694 {
1695 size_t nFirstObject = aFirstObjects[nSubSection];
1696 size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1697
1698 // This is the line as read from the stream.
1699 std::vector<unsigned char> aOrigLine(nLineLength);
1700 // This is the line as it appears after tweaking according to nPredictor.
1701 std::vector<unsigned char> aFilteredLine(nLineLength);
1702 for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1703 {
1704 size_t nIndex = nFirstObject + nEntry;
1705
1706 aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1707 if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1708 {
1709 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1710 "inconsistent with /DecodeParms/Predictor for object #"
1711 << nIndex);
1712 return;
1713 }
1714
1715 for (int i = 0; i < nLineLength; ++i)
1716 {
1717 switch (nPredictor)
1718 {
1719 case 1:
1720 // No prediction.
1721 break;
1722 case 12:
1723 // PNG prediction: up (on all rows).
1724 aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1725 break;
1726 default:
1727 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1728 << nPredictor);
1729 return;
1730 }
1731 }
1732
1733 // First character is already handled above.
1734 int nPos = 1;
1735 size_t nType = 0;
1736 // Start of the current field in the stream data.
1737 int nOffset = nPos;
1738 for (; nPos < nOffset + aW[0]; ++nPos)
1739 {
1740 unsigned char nCh = aFilteredLine[nPos];
1741 nType = (nType << 8) + nCh;
1742 }
1743
1744 // Start of the object in the file stream.
1745 size_t nStreamOffset = 0;
1746 nOffset = nPos;
1747 for (; nPos < nOffset + aW[1]; ++nPos)
1748 {
1749 unsigned char nCh = aFilteredLine[nPos];
1750 nStreamOffset = (nStreamOffset << 8) + nCh;
1751 }
1752
1753 // Generation number of the object.
1754 size_t nGenerationNumber = 0;
1755 nOffset = nPos;
1756 for (; nPos < nOffset + aW[2]; ++nPos)
1757 {
1758 unsigned char nCh = aFilteredLine[nPos];
1759 nGenerationNumber = (nGenerationNumber << 8) + nCh;
1760 }
1761
1762 // Ignore invalid nType.
1763 if (nType <= 2)
1764 {
1765 if (m_aXRef.find(nIndex) == m_aXRef.end())
1766 {
1767 XRefEntry aEntry;
1768 switch (nType)
1769 {
1770 case 0:
1772 break;
1773 case 1:
1775 break;
1776 case 2:
1778 break;
1779 }
1780 aEntry.SetOffset(nStreamOffset);
1781 m_aXRef[nIndex] = aEntry;
1782 }
1783 }
1784 }
1785 }
1786}
1787
1789{
1791
1792 while (true)
1793 {
1794 PDFNumberElement aFirstObject;
1795 if (!aFirstObject.Read(rStream))
1796 {
1797 // Next token is not a number, it'll be the trailer.
1798 return;
1799 }
1800
1801 if (aFirstObject.GetValue() < 0)
1802 {
1803 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1804 return;
1805 }
1806
1808 PDFNumberElement aNumberOfEntries;
1809 if (!aNumberOfEntries.Read(rStream))
1810 {
1811 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1812 return;
1813 }
1814
1815 if (aNumberOfEntries.GetValue() < 0)
1816 {
1817 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1818 return;
1819 }
1820
1821 size_t nSize = aNumberOfEntries.GetValue();
1822 for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1823 {
1824 size_t nIndex = aFirstObject.GetValue() + nEntry;
1826 PDFNumberElement aOffset;
1827 if (!aOffset.Read(rStream))
1828 {
1829 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1830 return;
1831 }
1832
1834 PDFNumberElement aGenerationNumber;
1835 if (!aGenerationNumber.Read(rStream))
1836 {
1837 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1838 return;
1839 }
1840
1842 OString aKeyword = ReadKeyword(rStream);
1843 if (aKeyword != "f" && aKeyword != "n")
1844 {
1845 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1846 return;
1847 }
1848 // xrefs are read in reverse order, so never update an existing
1849 // offset with an older one.
1850 if (m_aXRef.find(nIndex) == m_aXRef.end())
1851 {
1852 XRefEntry aEntry;
1853 aEntry.SetOffset(aOffset.GetValue());
1854 // Initially only the first entry is dirty.
1855 if (nIndex == 0)
1856 aEntry.SetDirty(true);
1857 m_aXRef[nIndex] = aEntry;
1858 }
1860 }
1861 }
1862}
1863
1865{
1866 char ch = 0;
1867
1868 while (true)
1869 {
1870 rStream.ReadChar(ch);
1871 if (rStream.eof())
1872 break;
1873
1874 if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1875 {
1876 rStream.SeekRel(-1);
1877 return;
1878 }
1879 }
1880}
1881
1883{
1884 char ch = 0;
1885
1886 while (true)
1887 {
1888 rStream.ReadChar(ch);
1889 if (rStream.eof())
1890 break;
1891
1892 if (ch != '\n' && ch != '\r')
1893 {
1894 rStream.SeekRel(-1);
1895 return;
1896 }
1897 }
1898}
1899
1900size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1901{
1902 auto it = m_aXRef.find(nIndex);
1903 if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1904 {
1905 SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1906 << nIndex << ", but failed");
1907 return 0;
1908 }
1909
1910 return it->second.GetOffset();
1911}
1912
1913const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1914{
1915 return m_aElements;
1916}
1917
1919static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1920{
1921 auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1922 if (!pKids)
1923 {
1924 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1925 return;
1926 }
1927
1928 pPages->setVisiting(true);
1929
1930 for (const auto& pKid : pKids->GetElements())
1931 {
1932 auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1933 if (!pReference)
1934 continue;
1935
1936 PDFObjectElement* pKidObject = pReference->LookupObject();
1937 if (!pKidObject)
1938 continue;
1939
1940 // detect if visiting reenters itself
1941 if (pKidObject->alreadyVisiting())
1942 {
1943 SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1944 continue;
1945 }
1946
1947 auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1948 if (pName && pName->GetValue() == "Pages")
1949 // Pages inside pages: recurse.
1950 visitPages(pKidObject, rRet);
1951 else
1952 // Found an actual page.
1953 rRet.push_back(pKidObject);
1954 }
1955
1956 pPages->setVisiting(false);
1957}
1958
1960{
1961 PDFReferenceElement* pRoot = nullptr;
1962
1963 PDFTrailerElement* pTrailer = nullptr;
1964 if (!m_aTrailerOffsets.empty())
1965 {
1966 // Get access to the latest trailer, and work with the keys of that
1967 // one.
1968 auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1969 if (it != m_aOffsetTrailers.end())
1970 pTrailer = it->second;
1971 }
1972
1973 if (pTrailer)
1974 pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1975 else if (m_pXRefStream)
1976 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1977
1978 if (!pRoot)
1979 {
1980 SAL_WARN("vcl.filter", "PDFDocument::GetCatalog: trailer has no Root key");
1981 return nullptr;
1982 }
1983
1984 return pRoot->LookupObject();
1985}
1986
1987std::vector<PDFObjectElement*> PDFDocument::GetPages()
1988{
1989 std::vector<PDFObjectElement*> aRet;
1990
1991 PDFObjectElement* pCatalog = GetCatalog();
1992 if (!pCatalog)
1993 {
1994 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1995 return aRet;
1996 }
1997
1998 PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1999 if (!pPages)
2000 {
2001 SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
2002 << ") has no pages");
2003 return aRet;
2004 }
2005
2006 visitPages(pPages, aRet);
2007
2008 return aRet;
2009}
2010
2011void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
2012
2013std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
2014{
2015 std::vector<PDFObjectElement*> aRet;
2016
2017 std::vector<PDFObjectElement*> aPages = GetPages();
2018
2019 for (const auto& pPage : aPages)
2020 {
2021 if (!pPage)
2022 continue;
2023
2024 PDFElement* pAnnotsElement = pPage->Lookup("Annots");
2025 auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
2026 if (!pAnnots)
2027 {
2028 // Annots is not an array, see if it's a reference to an object
2029 // with a direct array.
2030 auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
2031 if (pAnnotsRef)
2032 {
2033 if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
2034 {
2035 pAnnots = pAnnotsObject->GetArray();
2036 }
2037 }
2038 }
2039
2040 if (!pAnnots)
2041 continue;
2042
2043 for (const auto& pAnnot : pAnnots->GetElements())
2044 {
2045 auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
2046 if (!pReference)
2047 continue;
2048
2049 PDFObjectElement* pAnnotObject = pReference->LookupObject();
2050 if (!pAnnotObject)
2051 continue;
2052
2053 auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
2054 if (!pFT || pFT->GetValue() != "Sig")
2055 continue;
2056
2057 aRet.push_back(pAnnotObject);
2058 }
2059 }
2060
2061 return aRet;
2062}
2063
2064std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
2065{
2066 return svl::crypto::DecodeHexString(pElement->GetValue());
2067}
2068
2070{
2071 std::vector<unsigned char> const encoded(DecodeHexString(&rElement));
2072 // Text strings can be PDF-DocEncoding or UTF-16BE with mandatory BOM;
2073 // only the latter supported is here
2074 if (encoded.size() < 2 || encoded[0] != 0xFE || encoded[1] != 0xFF || (encoded.size() & 1) != 0)
2075 {
2076 return {};
2077 }
2078 OUStringBuffer buf(encoded.size() - 2);
2079 for (size_t i = 2; i < encoded.size(); i += 2)
2080 {
2081 buf.append(sal_Unicode((static_cast<sal_uInt16>(encoded[i]) << 8) | encoded[i + 1]));
2082 }
2083 return buf.makeStringAndClear();
2084}
2085
2087 : m_rDoc(rDoc)
2088{
2089}
2090
2092{
2093 // Read from (including) the % char till (excluding) the end of the line/stream.
2094 OStringBuffer aBuf;
2095 char ch;
2096 rStream.ReadChar(ch);
2097 while (true)
2098 {
2099 if (ch == '\n' || ch == '\r' || rStream.eof())
2100 {
2101 m_aComment = aBuf.makeStringAndClear();
2102
2103 if (m_aComment.startsWith("%%EOF"))
2104 {
2105 sal_uInt64 nPos = rStream.Tell();
2106 if (ch == '\r')
2107 {
2108 rStream.ReadChar(ch);
2109 rStream.SeekRel(-1);
2110 // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
2111 // behavior.
2112 if (ch == '\n')
2113 {
2114 nPos += 1;
2115 }
2116 }
2118 }
2119
2120 SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
2121 return true;
2122 }
2123 aBuf.append(ch);
2124 rStream.ReadChar(ch);
2125 }
2126
2127 return false;
2128}
2129
2131
2133{
2134 OStringBuffer aBuf;
2135 m_nOffset = rStream.Tell();
2136 char ch;
2137 rStream.ReadChar(ch);
2138 if (rStream.eof())
2139 {
2140 return false;
2141 }
2142 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+' && ch != '.')
2143 {
2144 rStream.SeekRel(-1);
2145 return false;
2146 }
2147 while (!rStream.eof())
2148 {
2149 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+'
2150 && ch != '.')
2151 {
2152 rStream.SeekRel(-1);
2153 m_nLength = rStream.Tell() - m_nOffset;
2155 aBuf.setLength(0);
2156 SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2157 return true;
2158 }
2159 aBuf.append(ch);
2160 rStream.ReadChar(ch);
2161 }
2162
2163 return false;
2164}
2165
2166sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2167
2168sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2169
2170bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2171
2172bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2173
2175{
2176 char ch;
2177 rStream.ReadChar(ch);
2178 if (ch != '<')
2179 {
2180 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2181 return false;
2182 }
2183 rStream.ReadChar(ch);
2184
2185 OStringBuffer aBuf;
2186 while (!rStream.eof())
2187 {
2188 if (ch == '>')
2189 {
2190 m_aValue = aBuf.makeStringAndClear();
2191 SAL_INFO("vcl.filter",
2192 "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2193 return true;
2194 }
2195 aBuf.append(ch);
2196 rStream.ReadChar(ch);
2197 }
2198
2199 return false;
2200}
2201
2202const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2203
2205{
2206 char nPrevCh = 0;
2207 char ch = 0;
2208 rStream.ReadChar(ch);
2209 if (ch != '(')
2210 {
2211 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2212 return false;
2213 }
2214 nPrevCh = ch;
2215 rStream.ReadChar(ch);
2216
2217 // Start with 1 nesting level as we read a '(' above already.
2218 int nDepth = 1;
2219 OStringBuffer aBuf;
2220 while (!rStream.eof())
2221 {
2222 if (ch == '(' && nPrevCh != '\\')
2223 ++nDepth;
2224
2225 if (ch == ')' && nPrevCh != '\\')
2226 --nDepth;
2227
2228 if (nDepth == 0)
2229 {
2230 // ')' of the outermost '(' is reached.
2231 m_aValue = aBuf.makeStringAndClear();
2232 SAL_INFO("vcl.filter",
2233 "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2234 return true;
2235 }
2236 aBuf.append(ch);
2237 nPrevCh = ch;
2238 rStream.ReadChar(ch);
2239 }
2240
2241 return false;
2242}
2243
2244const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2245
2247 : m_rDoc(rDoc)
2248 , m_pDictionaryElement(nullptr)
2249{
2250}
2251
2253{
2254 m_nOffset = rStream.Tell();
2255 return true;
2256}
2257
2258PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2259{
2261 {
2263 aParser.parse(this);
2264 }
2266 return nullptr;
2267 return m_pDictionaryElement->LookupElement(rDictionaryKey);
2268}
2269
2270sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2271
2272double PDFNumberElement::GetValue() const { return m_fValue; }
2273
2274PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2275 : m_rDoc(rDoc)
2276 , m_fObjectValue(fObjectValue)
2277 , m_fGenerationValue(fGenerationValue)
2278 , m_pNumberElement(nullptr)
2279 , m_nDictionaryOffset(0)
2280 , m_nDictionaryLength(0)
2281 , m_pDictionaryElement(nullptr)
2282 , m_nArrayOffset(0)
2283 , m_nArrayLength(0)
2284 , m_pArrayElement(nullptr)
2285 , m_pStreamElement(nullptr)
2286 , m_bParsed(false)
2287{
2288}
2289
2291{
2292 SAL_INFO("vcl.filter",
2293 "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2294 return true;
2295}
2296
2298
2299PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2300 const OString& rKey)
2301{
2302 auto it = rDictionary.find(rKey);
2303 if (it == rDictionary.end())
2304 return nullptr;
2305
2306 return it->second;
2307}
2308
2310{
2311 auto pKey = dynamic_cast<PDFReferenceElement*>(
2312 PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2313 if (!pKey)
2314 {
2315 SAL_WARN("vcl.filter",
2316 "PDFDictionaryElement::LookupObject: no such key with reference value: "
2317 << rDictionaryKey);
2318 return nullptr;
2319 }
2320
2321 return pKey->LookupObject();
2322}
2323
2325{
2326 return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2327}
2328
2330{
2331 if (m_bParsed)
2332 return;
2333
2334 if (!m_aElements.empty())
2335 {
2336 // This is a stored object in an object stream.
2338 aParser.parse(this);
2339 }
2340 else
2341 {
2342 // Normal object: elements are stored as members of the document itself.
2344 aParser.parse(this);
2345 }
2346 m_bParsed = true;
2347}
2348
2349PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2350{
2353 return nullptr;
2354 return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey);
2355}
2356
2358{
2359 auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2360 if (!pKey)
2361 {
2362 SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2363 << rDictionaryKey);
2364 return nullptr;
2365 }
2366
2367 return pKey->LookupObject();
2368}
2369
2371
2372void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2373{
2374 m_nDictionaryOffset = nDictionaryOffset;
2375}
2376
2378{
2380 return m_nDictionaryOffset;
2381}
2382
2383void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2384
2386
2387void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2388{
2389 m_aDictionaryKeyOffset[rKey] = nOffset;
2390}
2391
2392void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2393{
2395}
2396
2397sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2398{
2399 auto it = m_aDictionaryKeyOffset.find(rKey);
2400 if (it == m_aDictionaryKeyOffset.end())
2401 return 0;
2402
2403 return it->second;
2404}
2405
2406sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2407{
2408 auto it = m_aDictionaryKeyValueLength.find(rKey);
2409 if (it == m_aDictionaryKeyValueLength.end())
2410 return 0;
2411
2412 return it->second;
2413}
2414
2415const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2416
2417void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2418{
2419 m_nDictionaryLength = nDictionaryLength;
2420}
2421
2423{
2425 return m_nDictionaryLength;
2426}
2427
2428void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2429
2431
2433{
2435 return m_pDictionaryElement;
2436}
2437
2439{
2440 m_pDictionaryElement = pDictionaryElement;
2441}
2442
2444{
2445 m_pNumberElement = pNumberElement;
2446}
2447
2449
2450const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2451{
2453}
2454
2456{
2457 m_aDictionaryReferences.push_back(pReference);
2458}
2459
2460const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2461{
2464}
2465
2466void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2467
2469{
2470 m_pStreamElement = pStreamElement;
2471}
2472
2474
2476{
2478 return m_pArrayElement;
2479}
2480
2482{
2483 if (!m_pStreamElement)
2484 {
2485 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2486 return;
2487 }
2488
2489 auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2490 if (!pType || pType->GetValue() != "ObjStm")
2491 {
2492 if (!pType)
2493 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2494 else
2495 SAL_WARN("vcl.filter",
2496 "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2497 return;
2498 }
2499
2500 auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2501 if (!pFilter || pFilter->GetValue() != "FlateDecode")
2502 {
2503 if (!pFilter)
2504 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2505 else
2506 SAL_WARN("vcl.filter",
2507 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2508 return;
2509 }
2510
2511 auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2512 if (!pFirst)
2513 {
2514 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2515 return;
2516 }
2517
2518 auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2519 if (!pN)
2520 {
2521 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2522 return;
2523 }
2524 size_t nN = pN->GetValue();
2525
2526 auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2527 if (!pLength)
2528 {
2529 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2530 return;
2531 }
2532 size_t nLength = pLength->GetValue();
2533
2534 // Read and decompress it.
2535 SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2536 rEditBuffer.Seek(m_pStreamElement->GetOffset());
2537 std::vector<char> aBuf(nLength);
2538 rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2539 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2540 SvMemoryStream aStream;
2541 ZCodec aZCodec;
2542 aZCodec.BeginCompression();
2543 aZCodec.Decompress(aSource, aStream);
2544 if (!aZCodec.EndCompression())
2545 {
2546 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2547 return;
2548 }
2549
2550 nLength = aStream.TellEnd();
2551 aStream.Seek(0);
2552 std::vector<size_t> aObjNums;
2553 std::vector<size_t> aOffsets;
2554 std::vector<size_t> aLengths;
2555 // First iterate over and find out the lengths.
2556 for (size_t nObject = 0; nObject < nN; ++nObject)
2557 {
2558 PDFNumberElement aObjNum;
2559 if (!aObjNum.Read(aStream))
2560 {
2561 SAL_WARN("vcl.filter",
2562 "PDFObjectElement::ParseStoredObjects: failed to read object number");
2563 return;
2564 }
2565 aObjNums.push_back(aObjNum.GetValue());
2566
2568
2569 PDFNumberElement aByteOffset;
2570 if (!aByteOffset.Read(aStream))
2571 {
2572 SAL_WARN("vcl.filter",
2573 "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2574 return;
2575 }
2576 aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2577
2578 if (aOffsets.size() > 1)
2579 aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2580 if (nObject + 1 == nN)
2581 aLengths.push_back(nLength - aOffsets.back());
2582
2584 }
2585
2586 // Now create streams with the proper length and tokenize the data.
2587 for (size_t nObject = 0; nObject < nN; ++nObject)
2588 {
2589 size_t nObjNum = aObjNums[nObject];
2590 size_t nOffset = aOffsets[nObject];
2591 size_t nLen = aLengths[nObject];
2592
2593 aStream.Seek(nOffset);
2594 m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2595 PDFObjectElement* pStored = m_aStoredElements.back().get();
2596
2597 aBuf.clear();
2598 aBuf.resize(nLen);
2599 aStream.ReadBytes(aBuf.data(), aBuf.size());
2600 SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2601
2603 pStored);
2604 // This is how references know the object is stored inside this object stream.
2605 m_rDoc.SetIDObject(nObjNum, pStored);
2606
2607 // Store the stream of the object in the object stream for later use.
2608 std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2609 aStoredStream.Seek(0);
2610 pStreamBuffer->WriteStream(aStoredStream);
2611 pStored->SetStreamBuffer(pStreamBuffer);
2612 }
2613}
2614
2615std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2616{
2617 return m_aElements;
2618}
2619
2621
2622void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2623{
2624 m_pStreamBuffer = std::move(pStreamBuffer);
2625}
2626
2628
2630 PDFNumberElement const& rGeneration)
2631 : m_rDoc(rDoc)
2632 , m_fObjectValue(rObject.GetValue())
2633 , m_fGenerationValue(rGeneration.GetValue())
2634 , m_rObject(rObject)
2635{
2636}
2637
2639
2641{
2642 SAL_INFO("vcl.filter",
2643 "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2644 m_nOffset = rStream.Tell();
2645 return true;
2646}
2647
2648sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2649
2651{
2652 size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2653 if (nOffset == 0)
2654 {
2655 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2656 << m_fObjectValue);
2657 return 0;
2658 }
2659
2660 sal_uInt64 nOrigPos = rStream.Tell();
2661 comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2662
2663 rStream.Seek(nOffset);
2664 {
2666 PDFNumberElement aNumber;
2667 bool bRet = aNumber.Read(rStream);
2668 if (!bRet || aNumber.GetValue() != m_fObjectValue)
2669 {
2670 SAL_WARN("vcl.filter",
2671 "PDFReferenceElement::LookupNumber: offset points to not matching object");
2672 return 0;
2673 }
2674 }
2675
2676 {
2678 PDFNumberElement aNumber;
2679 bool bRet = aNumber.Read(rStream);
2680 if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2681 {
2682 SAL_WARN("vcl.filter",
2683 "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2684 return 0;
2685 }
2686 }
2687
2688 {
2690 OString aKeyword = PDFDocument::ReadKeyword(rStream);
2691 if (aKeyword != "obj")
2692 {
2693 SAL_WARN("vcl.filter",
2694 "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2695 return 0;
2696 }
2697 }
2698
2700 PDFNumberElement aNumber;
2701 if (!aNumber.Read(rStream))
2702 {
2703 SAL_WARN("vcl.filter",
2704 "PDFReferenceElement::LookupNumber: failed to read referenced number");
2705 return 0;
2706 }
2707
2708 return aNumber.GetValue();
2709}
2710
2712{
2714}
2715
2717{
2718 auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2719
2720 if (itIDObjects != m_aIDObjects.end())
2721 return itIDObjects->second;
2722
2723 SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2724 return nullptr;
2725}
2726
2728
2730
2732
2734{
2735 char ch;
2736 rStream.ReadChar(ch);
2737 if (ch != '<')
2738 {
2739 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2740 return false;
2741 }
2742
2743 if (rStream.eof())
2744 {
2745 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2746 return false;
2747 }
2748
2749 rStream.ReadChar(ch);
2750 if (ch != '<')
2751 {
2752 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2753 return false;
2754 }
2755
2756 m_nLocation = rStream.Tell();
2757
2758 SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2759
2760 return true;
2761}
2762
2764
2766
2768{
2769 m_nLocation = rStream.Tell();
2770 char ch;
2771 rStream.ReadChar(ch);
2772 if (ch != '>')
2773 {
2774 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2775 return false;
2776 }
2777
2778 if (rStream.eof())
2779 {
2780 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2781 return false;
2782 }
2783
2784 rStream.ReadChar(ch);
2785 if (ch != '>')
2786 {
2787 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2788 return false;
2789 }
2790
2791 SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2792
2793 return true;
2794}
2795
2797
2799{
2800 char ch;
2801 rStream.ReadChar(ch);
2802 if (ch != '/')
2803 {
2804 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2805 return false;
2806 }
2807 m_nLocation = rStream.Tell();
2808
2809 if (rStream.eof())
2810 {
2811 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2812 return false;
2813 }
2814
2815 // Read till the first white-space.
2816 OStringBuffer aBuf;
2817 rStream.ReadChar(ch);
2818 while (!rStream.eof())
2819 {
2820 if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2821 || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2822 {
2823 rStream.SeekRel(-1);
2824 m_aValue = aBuf.makeStringAndClear();
2825 SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2826 return true;
2827 }
2828 aBuf.append(ch);
2829 rStream.ReadChar(ch);
2830 }
2831
2832 return false;
2833}
2834
2835const OString& PDFNameElement::GetValue() const { return m_aValue; }
2836
2837sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2838
2841 , m_nOffset(0)
2842{
2843}
2844
2846{
2847 SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2848 m_nOffset = rStream.Tell();
2849 std::vector<unsigned char> aBytes(m_nLength);
2850 rStream.ReadBytes(aBytes.data(), aBytes.size());
2851 m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2852
2853 return rStream.good();
2854}
2855
2857
2858sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2859
2860bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2861
2862bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2863
2865 : m_pObject(pObject)
2866{
2867}
2868
2870{
2871 char ch;
2872 rStream.ReadChar(ch);
2873 if (ch != '[')
2874 {
2875 SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2876 return false;
2877 }
2878
2879 SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2880
2881 return true;
2882}
2883
2885{
2886 if (m_pObject)
2887 SAL_INFO("vcl.filter",
2888 "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2889 m_aElements.push_back(pElement);
2890}
2891
2892const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2893
2895
2897{
2898 m_nOffset = rStream.Tell();
2899 char ch;
2900 rStream.ReadChar(ch);
2901 if (ch != ']')
2902 {
2903 SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2904 return false;
2905 }
2906
2907 SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
2908
2909 return true;
2910}
2911
2912sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
2913
2914// PDFObjectParser
2915
2916size_t PDFObjectParser::parse(PDFElement* pParsingElement, size_t nStartIndex, int nCurrentDepth)
2917{
2918 // The index of last parsed element
2919 size_t nReturnIndex = 0;
2920
2921 pParsingElement->setParsing(true);
2922
2923 comphelper::ScopeGuard aGuard([pParsingElement]() { pParsingElement->setParsing(false); });
2924
2925 // Current object, if root is an object, else nullptr
2926 auto pParsingObject = dynamic_cast<PDFObjectElement*>(pParsingElement);
2927 auto pParsingTrailer = dynamic_cast<PDFTrailerElement*>(pParsingElement);
2928
2929 // Current dictionary, if root is an dictionary, else nullptr
2930 auto pParsingDictionary = dynamic_cast<PDFDictionaryElement*>(pParsingElement);
2931
2932 // Current parsing array, if root is an array, else nullptr
2933 auto pParsingArray = dynamic_cast<PDFArrayElement*>(pParsingElement);
2934
2935 // Find out where the dictionary for this object starts.
2936 size_t nIndex = nStartIndex;
2937 for (size_t i = nStartIndex; i < mrElements.size(); ++i)
2938 {
2939 if (mrElements[i].get() == pParsingElement)
2940 {
2941 nIndex = i;
2942 break;
2943 }
2944 }
2945
2946 OString aName;
2947 sal_uInt64 nNameOffset = 0;
2948 std::vector<PDFNumberElement*> aNumbers;
2949
2950 sal_uInt64 nDictionaryOffset = 0;
2951
2952 // Current depth; 1 is current
2953 int nDepth = 0;
2954
2955 for (size_t i = nIndex; i < mrElements.size(); ++i)
2956 {
2957 auto* pCurrentElement = mrElements[i].get();
2958
2959 // Dictionary tokens can be nested, track enter/leave.
2960 if (auto pCurrentDictionary = dynamic_cast<PDFDictionaryElement*>(pCurrentElement))
2961 {
2962 // Handle previously stored number
2963 if (!aNumbers.empty())
2964 {
2965 if (pParsingDictionary)
2966 {
2967 PDFNumberElement* pNumber = aNumbers.back();
2968 sal_uInt64 nLength
2969 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
2970
2971 pParsingDictionary->insert(aName, pNumber);
2972 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
2973 pParsingDictionary->SetKeyValueLength(aName, nLength);
2974 }
2975 else if (pParsingArray)
2976 {
2977 for (auto& pNumber : aNumbers)
2978 pParsingArray->PushBack(pNumber);
2979 }
2980 else
2981 {
2982 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
2983 }
2984 aName.clear();
2985 aNumbers.clear();
2986 }
2987
2988 nDepth++;
2989
2990 if (nDepth == 1) // pParsingDictionary is the current one
2991 {
2992 // First dictionary start, track start offset.
2993 nDictionaryOffset = pCurrentDictionary->GetLocation();
2994
2995 if (pParsingObject)
2996 {
2997 // Then the toplevel dictionary of the object.
2998 pParsingObject->SetDictionary(pCurrentDictionary);
2999 pParsingObject->SetDictionaryOffset(nDictionaryOffset);
3000 pParsingDictionary = pCurrentDictionary;
3001 }
3002 else if (pParsingTrailer)
3003 {
3004 pParsingTrailer->SetDictionary(pCurrentDictionary);
3005 pParsingDictionary = pCurrentDictionary;
3006 }
3007 }
3008 else if (!pCurrentDictionary->alreadyParsing())
3009 {
3010 if (pParsingArray)
3011 {
3012 pParsingArray->PushBack(pCurrentDictionary);
3013 }
3014 else if (pParsingDictionary)
3015 {
3016 // Dictionary toplevel value.
3017 pParsingDictionary->insert(aName, pCurrentDictionary);
3018 }
3019 else
3020 {
3021 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3022 }
3023 // Nested dictionary.
3024 const size_t nNextElementIndex = parse(pCurrentDictionary, i, nCurrentDepth + 1);
3025 i = std::max(i, nNextElementIndex - 1);
3026 }
3027 }
3028 else if (auto pCurrentEndDictionary
3029 = dynamic_cast<PDFEndDictionaryElement*>(pCurrentElement))
3030 {
3031 // Handle previously stored number
3032 if (!aNumbers.empty())
3033 {
3034 if (pParsingDictionary)
3035 {
3036 PDFNumberElement* pNumber = aNumbers.back();
3037 sal_uInt64 nLength
3038 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3039
3040 pParsingDictionary->insert(aName, pNumber);
3041 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3042 pParsingDictionary->SetKeyValueLength(aName, nLength);
3043 }
3044 else if (pParsingArray)
3045 {
3046 for (auto& pNumber : aNumbers)
3047 pParsingArray->PushBack(pNumber);
3048 }
3049 else
3050 {
3051 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3052 }
3053 aName.clear();
3054 aNumbers.clear();
3055 }
3056
3057 if (pParsingDictionary)
3058 {
3059 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3060 sal_uInt64 nLength = pCurrentEndDictionary->GetLocation() - nNameOffset + 2;
3061 pParsingDictionary->SetKeyValueLength(aName, nLength);
3062 aName.clear();
3063 }
3064
3065 if (nDepth == 1) // did the parsing ended
3066 {
3067 // Last dictionary end, track length and stop parsing.
3068 if (pParsingObject)
3069 {
3070 sal_uInt64 nDictionaryLength
3071 = pCurrentEndDictionary->GetLocation() - nDictionaryOffset;
3072 pParsingObject->SetDictionaryLength(nDictionaryLength);
3073 }
3074 nReturnIndex = i;
3075 break;
3076 }
3077
3078 nDepth--;
3079 }
3080 else if (auto pCurrentArray = dynamic_cast<PDFArrayElement*>(pCurrentElement))
3081 {
3082 // Handle previously stored number
3083 if (!aNumbers.empty())
3084 {
3085 if (pParsingDictionary)
3086 {
3087 PDFNumberElement* pNumber = aNumbers.back();
3088
3089 sal_uInt64 nLength
3090 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3091 pParsingDictionary->insert(aName, pNumber);
3092 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3093 pParsingDictionary->SetKeyValueLength(aName, nLength);
3094 }
3095 else if (pParsingArray)
3096 {
3097 for (auto& pNumber : aNumbers)
3098 pParsingArray->PushBack(pNumber);
3099 }
3100 else
3101 {
3102 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3103 }
3104 aName.clear();
3105 aNumbers.clear();
3106 }
3107
3108 nDepth++;
3109 if (nDepth == 1) // pParsingDictionary is the current one
3110 {
3111 if (pParsingObject)
3112 {
3113 pParsingObject->SetArray(pCurrentArray);
3114 pParsingArray = pCurrentArray;
3115 }
3116 }
3117 else if (!pCurrentArray->alreadyParsing())
3118 {
3119 if (pParsingArray)
3120 {
3121 // Array is toplevel
3122 pParsingArray->PushBack(pCurrentArray);
3123 }
3124 else if (pParsingDictionary)
3125 {
3126 // Dictionary toplevel value.
3127 pParsingDictionary->insert(aName, pCurrentArray);
3128 }
3129
3130 const size_t nNextElementIndex = parse(pCurrentArray, i, nCurrentDepth + 1);
3131
3132 // ensure we go forwards and not endlessly loop
3133 i = std::max(i, nNextElementIndex - 1);
3134 }
3135 }
3136 else if (auto pCurrentEndArray = dynamic_cast<PDFEndArrayElement*>(pCurrentElement))
3137 {
3138 // Handle previously stored number
3139 if (!aNumbers.empty())
3140 {
3141 if (pParsingDictionary)
3142 {
3143 PDFNumberElement* pNumber = aNumbers.back();
3144
3145 sal_uInt64 nLength
3146 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3147 pParsingDictionary->insert(aName, pNumber);
3148 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3149 pParsingDictionary->SetKeyValueLength(aName, nLength);
3150 }
3151 else if (pParsingArray)
3152 {
3153 for (auto& pNumber : aNumbers)
3154 pParsingArray->PushBack(pNumber);
3155 }
3156 else
3157 {
3158 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3159 }
3160 aName.clear();
3161 aNumbers.clear();
3162 }
3163
3164 if (nDepth == 1) // did the pParsing ended
3165 {
3166 // Last array end, track length and stop parsing.
3167 nReturnIndex = i;
3168 break;
3169 }
3170
3171 if (pParsingDictionary)
3172 {
3173 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3174 // Include the ending ']' in the length of the key - (array)value pair length.
3175 sal_uInt64 nLength = pCurrentEndArray->GetOffset() - nNameOffset + 1;
3176 pParsingDictionary->SetKeyValueLength(aName, nLength);
3177 aName.clear();
3178 }
3179 nDepth--;
3180 }
3181 else if (auto pCurrentName = dynamic_cast<PDFNameElement*>(pCurrentElement))
3182 {
3183 // Handle previously stored number
3184 if (!aNumbers.empty())
3185 {
3186 if (pParsingDictionary)
3187 {
3188 PDFNumberElement* pNumber = aNumbers.back();
3189
3190 sal_uInt64 nLength
3191 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3192 pParsingDictionary->insert(aName, pNumber);
3193 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3194 pParsingDictionary->SetKeyValueLength(aName, nLength);
3195 }
3196 else if (pParsingArray)
3197 {
3198 for (auto& pNumber : aNumbers)
3199 pParsingArray->PushBack(pNumber);
3200 }
3201 aName.clear();
3202 aNumbers.clear();
3203 }
3204
3205 // Now handle name
3206 if (pParsingArray)
3207 {
3208 // if we are in an array, just push the name to array
3209 pParsingArray->PushBack(pCurrentName);
3210 }
3211 else if (pParsingDictionary)
3212 {
3213 // if we are in a dictionary, we need to store the name as a possible key
3214 if (aName.isEmpty())
3215 {
3216 aName = pCurrentName->GetValue();
3217 nNameOffset = pCurrentName->GetLocation();
3218 }
3219 else
3220 {
3221 sal_uInt64 nKeyLength
3222 = pCurrentName->GetLocation() + pCurrentName->GetLength() - nNameOffset;
3223 pParsingDictionary->insert(aName, pCurrentName);
3224 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3225 pParsingDictionary->SetKeyValueLength(aName, nKeyLength);
3226 aName.clear();
3227 }
3228 }
3229 }
3230 else if (auto pReference = dynamic_cast<PDFReferenceElement*>(pCurrentElement))
3231 {
3232 if (pParsingArray)
3233 {
3234 pParsingArray->PushBack(pReference);
3235 }
3236 else if (pParsingDictionary)
3237 {
3238 sal_uInt64 nLength = pReference->GetOffset() - nNameOffset;
3239 pParsingDictionary->insert(aName, pReference);
3240 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3241 pParsingDictionary->SetKeyValueLength(aName, nLength);
3242 aName.clear();
3243 }
3244 else
3245 {
3246 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3247 }
3248 aNumbers.clear();
3249 }
3250 else if (auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(pCurrentElement))
3251 {
3252 if (pParsingArray)
3253 {
3254 pParsingArray->PushBack(pLiteralString);
3255 }
3256 else if (pParsingDictionary)
3257 {
3258 pParsingDictionary->insert(aName, pLiteralString);
3259 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3260 aName.clear();
3261 }
3262 else
3263 {
3264 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3265 }
3266 }
3267 else if (auto pBoolean = dynamic_cast<PDFBooleanElement*>(pCurrentElement))
3268 {
3269 if (pParsingArray)
3270 {
3271 pParsingArray->PushBack(pBoolean);
3272 }
3273 else if (pParsingDictionary)
3274 {
3275 pParsingDictionary->insert(aName, pBoolean);
3276 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3277 aName.clear();
3278 }
3279 else
3280 {
3281 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3282 }
3283 }
3284 else if (auto pHexString = dynamic_cast<PDFHexStringElement*>(pCurrentElement))
3285 {
3286 if (pParsingArray)
3287 {
3288 pParsingArray->PushBack(pHexString);
3289 }
3290 else if (pParsingDictionary)
3291 {
3292 pParsingDictionary->insert(aName, pHexString);
3293 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3294 aName.clear();
3295 }
3296 }
3297 else if (auto pNumberElement = dynamic_cast<PDFNumberElement*>(pCurrentElement))
3298 {
3299 // Just remember this, so that in case it's not a reference parameter,
3300 // we can handle it later.
3301 aNumbers.push_back(pNumberElement);
3302 }
3303 else if (dynamic_cast<PDFEndObjectElement*>(pCurrentElement))
3304 {
3305 // parsing of the object is finished
3306 break;
3307 }
3308 else if (dynamic_cast<PDFObjectElement*>(pCurrentElement)
3309 || dynamic_cast<PDFTrailerElement*>(pCurrentElement))
3310 {
3311 continue;
3312 }
3313 else
3314 {
3315 SAL_INFO("vcl.filter", "Unhandled element while parsing.");
3316 }
3317 }
3318
3319 return nReturnIndex;
3320}
3321
3322} // namespace vcl
3323
3324/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const sal_Int32 m_nLength
const char * pName
const void * GetData()
sal_uInt64 GetSize()
virtual sal_uInt64 TellEnd() override
sal_uInt64 Tell() const
bool good() const
std::size_t WriteBytes(const void *pData, std::size_t nSize)
bool eof() const
bool SetStreamSize(sal_uInt64 nSize)
SvStream & WriteOString(std::string_view rStr)
SvStream & WriteUInt32AsString(sal_uInt32 nUInt32)
SvStream & ReadChar(char &rChar)
sal_uInt64 Seek(sal_uInt64 nPos)
SvStream & WriteInt32AsString(sal_Int32 nInt32)
std::size_t ReadBytes(void *pData, std::size_t nSize)
sal_uInt64 SeekRel(sal_Int64 nPos)
SvStream & WriteCharPtr(const char *pBuf)
SvStream & WriteStream(SvStream &rStream)
tools::Long Decompress(SvStream &rIStm, SvStream &rOStm)
tools::Long EndCompression()
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
void Compress(SvStream &rIStm, SvStream &rOStm)
void AddDataRange(const void *pData, sal_Int32 size)
bool Sign(OStringBuffer &rCMSHexBuffer)
tools::Long getOpenHeight() const
void setWidth(tools::Long n)
void setHeight(tools::Long n)
tools::Long getOpenWidth() const
Copies objects from one PDF file into another one.
static sal_Int32 copyPageStreams(std::vector< filter::PDFObjectElement * > &rContentStreams, SvMemoryStream &rStream, bool &rCompressed)
Copies page one or more page streams from rContentStreams into rStream.
void copyPageResources(filter::PDFObjectElement *pPage, OStringBuffer &rLine)
Copies resources of pPage into rLine.
static void AppendUnicodeTextString(const OUString &rString, OStringBuffer &rBuffer)
Write rString as a PDF hex string into rBuffer.
static OString GetDateTime()
Get current date/time in PDF D:YYYYMMDDHHMMSS form.
Array object: a list.
PDFObjectElement * m_pObject
The object that contains this array.
const std::vector< PDFElement * > & GetElements() const
bool Read(SvStream &rStream) override
void PushBack(PDFElement *pElement)
PDFArrayElement(PDFObjectElement *pObject)
std::vector< PDFElement * > m_aElements
Boolean object: a 'true' or a 'false'.
bool Read(SvStream &rStream) override
PDFCommentElement(PDFDocument &rDoc)
bool Read(SvStream &rStream) override
Dictionary object: a set key-value pairs.
static PDFElement * Lookup(const std::map< OString, PDFElement * > &rDictionary, const OString &rKey)
sal_uInt64 GetKeyOffset(const OString &rKey) const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
sal_uInt64 m_nLocation
Offset after the '<<' token.
bool Read(SvStream &rStream) override
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
PDFElement * LookupElement(const OString &rDictionaryKey)
Looks up an element which is contained in this dictionary.
const std::map< OString, PDFElement * > & GetItems() const
void SetKeyValueLength(const OString &rKey, sal_uInt64 nLength)
void SetKeyOffset(const OString &rKey, sal_uInt64 nOffset)
sal_uInt64 GetKeyValueLength(const OString &rKey) const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
Looks up an object which is only referenced in this dictionary.
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
In-memory representation of an on-disk PDF document.
PDFObjectElement * m_pXRefStream
When m_pTrailer is nullptr, this can still have a dictionary.
static OUString DecodeHexStringUTF16BE(PDFHexStringElement const &rElement)
bool RemoveSignature(size_t nPosition)
Remove the nth signature from read document in the edit buffer.
Definition: pdfdocument.cxx:45
PDFTrailerElement * m_pTrailer
sal_Int32 createObject() override
See vcl::PDFObjectContainer::createObject().
Definition: pdfdocument.cxx:70
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
std::vector< size_t > m_aEOFs
List of EOF offsets we know.
std::map< size_t, PDFObjectElement * > m_aOffsetObjects
Object offset <-> Object pointer map.
std::vector< size_t > m_aTrailerOffsets
Offsets of trailers, from latest to oldest.
bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement *&pRoot)
Write the updated Catalog object as part of signing.
void SetSignatureLine(std::vector< sal_Int8 > &&rSignatureLine)
Definition: pdfdocument.cxx:98
bool Sign(const css::uno::Reference< css::security::XCertificate > &xCertificate, const OUString &rDescription, bool bAdES)
Sign the read document with xCertificate in the edit buffer.
void PushBackEOF(size_t nOffset)
Remember the end location of an EOF token.
bool Read(SvStream &rStream)
Read elements from the start of the stream till its end.
static size_t FindStartXRef(SvStream &rStream)
sal_uInt32 GetNextSignature()
Suggest a minimal, yet free signature ID to use for the next signature.
bool ReadWithPossibleFixup(SvStream &rStream)
Calls Read() first and if it fails it tries to fixup and then retry.
bool WritePageObject(PDFObjectElement &rFirstPage, sal_Int32 nAnnotId)
Write the updated Page object as part of signing.
static OString ReadKeyword(SvStream &rStream)
bool Tokenize(SvStream &rStream, TokenizeMode eMode, std::vector< std::unique_ptr< PDFElement > > &rElements, PDFObjectElement *pObjectElement)
Tokenize elements from current offset.
void ReadXRef(SvStream &rStream)
size_t GetObjectOffset(size_t nIndex) const
static void SkipWhitespace(SvStream &rStream)
PDFObjectElement * LookupObject(size_t nObjectNumber)
Look up object based on object number, possibly by parsing object streams.
static void SkipLineBreaks(SvStream &rStream)
Instead of all whitespace, just skip CR and NL characters.
PDFObjectElement * GetCatalog()
size_t m_nSignaturePage
0-based page number where m_aSignatureLine should be placed.
sal_Int32 WriteAnnotObject(PDFObjectElement const &rFirstPage, sal_Int32 nSignatureId, sal_Int32 nAppearanceId, const tools::Rectangle &rSignatureRectangle)
Write the annot object as part of signing.
bool writeBuffer(const void *pBuffer, sal_uInt64 nBytes) override
See vcl::PDFObjectContainer::writeBuffer().
Definition: pdfdocument.cxx:92
bool updateObject(sal_Int32 n) override
See vcl::PDFObjectContainer::updateObject().
Definition: pdfdocument.cxx:77
std::vector< PDFObjectElement * > GetSignatureWidgets()
Get a list of signatures embedded into this document.
sal_Int32 WriteAppearanceObject(tools::Rectangle &rSignatureRectangle)
Write the appearance object as part of signing.
void SetIDObject(size_t nID, PDFObjectElement *pObject)
Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID.
bool Write(SvStream &rStream)
Serializes the contents of the edit buffer.
std::vector< PDFObjectElement * > GetPages()
const std::vector< std::unique_ptr< PDFElement > > & GetElements() const
void ReadXRefStream(SvStream &rStream)
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const *pRoot)
Write the updated cross-references as part of signing.
std::map< size_t, XRefEntry > m_aXRef
Object ID <-> object offset map.
std::vector< std::unique_ptr< PDFElement > > m_aElements
This vector owns all elements.
sal_Int32 WriteSignatureObject(const OUString &rDescription, bool bAdES, sal_uInt64 &rLastByteRangeOffset, sal_Int64 &rContentOffset)
Write the signature object as part of signing.
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
static std::vector< unsigned char > DecodeHexString(PDFHexStringElement const *pElement)
Decode a hex dump.
void SetSignaturePage(size_t nPage)
std::vector< size_t > m_aStartXRefs
List of xref offsets we know.
std::map< size_t, PDFTrailerElement * > m_aOffsetTrailers
Trailer offset <-> Trailer pointer map.
std::vector< sal_Int8 > m_aSignatureLine
Signature line in PDF format, to be consumed by the next Sign() invocation.
A byte range in a PDF file.
Definition: pdfdocument.hxx:51
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:59
bool alreadyVisiting() const
Definition: pdfdocument.hxx:60
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:61
bool Read(SvStream &rStream) override
sal_uInt64 m_nOffset
Location before the ']' token.
End of a dictionary: '>>'.
bool Read(SvStream &rStream) override
sal_uInt64 m_nLocation
Offset before the '>>' token.
End of an object: 'endobj' keyword.
bool Read(SvStream &rStream) override
End of a stream: 'endstream' keyword.
bool Read(SvStream &rStream) override
Hex string: in <AABB> form.
const OString & GetValue() const
bool Read(SvStream &rStream) override
Literal string: in (asdf) form.
bool Read(SvStream &rStream) override
const OString & GetValue() const
Name object: a key string.
bool Read(SvStream &rStream) override
sal_uInt64 m_nLocation
Offset after the '/' token.
sal_uInt64 GetLocation() const
const OString & GetValue() const
Null object: the 'null' singleton.
bool Read(SvStream &rStream) override
Numbering object: an integer or a real.
sal_uInt64 GetLocation() const
sal_uInt64 m_nLength
Input file token length.
sal_uInt64 GetLength() const
bool Read(SvStream &rStream) override
sal_uInt64 m_nOffset
Input file start location.
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:69
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:86
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
Definition: pdfdocument.hxx:92
void SetNumberElement(PDFNumberElement *pNumberElement)
const std::vector< PDFReferenceElement * > & GetDictionaryReferences() const
void SetDictionaryLength(sal_uInt64 nDictionaryLength)
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:77
PDFElement * Lookup(const OString &rDictionaryKey)
void SetArrayOffset(sal_uInt64 nArrayOffset)
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:75
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
Definition: pdfdocument.hxx:90
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:80
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:79
void SetStream(PDFStreamElement *pStreamElement)
bool Read(SvStream &rStream) override
void SetArrayLength(sal_uInt64 nArrayLength)
sal_uInt64 GetArrayOffset() const
SvMemoryStream * GetStreamBuffer() const
void SetArray(PDFArrayElement *pArrayElement)
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
Definition: pdfdocument.hxx:97
PDFArrayElement * GetArray()
std::vector< std::unique_ptr< PDFElement > > & GetStoredElements()
void SetStreamBuffer(std::unique_ptr< SvMemoryStream > &pStreamBuffer)
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
Definition: pdfdocument.hxx:94
PDFObjectElement(PDFDocument &rDoc, double fObjectValue, double fGenerationValue)
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:82
void AddDictionaryReference(PDFReferenceElement *pReference)
void ParseStoredObjects()
Parse objects stored in this object stream.
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:84
PDFNumberElement * GetNumberElement() const
sal_uInt64 GetArrayLength() const
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:71
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
void SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
Definition: pdfdocument.hxx:88
PDFDictionaryElement * GetDictionary()
const std::vector< std::unique_ptr< PDFElement > > & mrElements
size_t parse(PDFElement *pParsingElement, size_t nStartIndex=0, int nCurrentDepth=0)
Reference object: something with a unique ID.
sal_uInt64 m_nOffset
Location after the 'R' token.
PDFReferenceElement(PDFDocument &rDoc, PDFNumberElement &rObject, PDFNumberElement const &rGeneration)
PDFNumberElement & GetObjectElement() const
double LookupNumber(SvStream &rStream) const
Assuming the reference points to a number object, return its value.
bool Read(SvStream &rStream) override
PDFNumberElement & m_rObject
The element providing the object number.
PDFObjectElement * LookupObject()
Lookup referenced object, without assuming anything about its contents.
Stream object: a byte array with a known length.
SvMemoryStream & GetMemory()
sal_uInt64 GetOffset() const
SvMemoryStream m_aMemory
The byte array itself.
bool Read(SvStream &rStream) override
The trailer singleton is at the end of the doc.
sal_uInt64 GetLocation() const
PDFDictionaryElement * m_pDictionaryElement
sal_uInt64 m_nOffset
Location of the end of the trailer token.
PDFTrailerElement(PDFDocument &rDoc)
bool Read(SvStream &rStream) override
PDFElement * Lookup(const OString &rDictionaryKey)
An entry in a cross-reference stream.
void SetDirty(bool bDirty)
void SetOffset(sal_uInt64 nOffset)
XRefEntryType GetType() const
sal_uInt64 GetOffset() const
void SetType(XRefEntryType eType)
sal_Int32 nElements
#define MAX_SIGNATURE_CONTENT_LENGTH
const char * pS
SwDoc & m_rDoc
EmbeddedObjectRef * pObject
sal_Int32 nIndex
OUString aName
Mode eMode
sal_uInt16 nPos
#define SAL_WARN_IF(condition, area, stream)
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
aBuf
size
OStringBuffer & padToLength(OStringBuffer &rBuffer, sal_Int32 nLength, char cFill='\0')
int i
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
double toDouble(std::u16string_view str)
sal_uInt32 toUInt32(std::u16string_view str, sal_Int16 radix=10)
std::vector< unsigned char > DecodeHexString(std::string_view rHex)
css::uno::Reference< css::linguistic2::XProofreadingIterator > get(css::uno::Reference< css::uno::XComponentContext > const &context)
@ COMPRESSED
xref stream "2".
@ FREE
xref "f" or xref stream "0".
@ NOT_COMPRESSED
xref "n" or xref stream "1".
static void visitPages(PDFObjectElement *pPages, std::vector< PDFObjectElement * > &rRet)
Visits the page tree recursively, looking for page objects.
@ STORED_OBJECT
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
@ EOF_TOKEN
Till the first %EOF token.
@ END_OF_OBJECT
Till the end of the current object.
bool convertToHighestSupported(SvStream &rInStream, SvStream &rOutStream)
Converts to highest supported format version (1.6).
Definition: pdfcompat.cxx:43
const char GetValue[]
QPRO_FUNC_TYPE nType
#define STREAM_SEEK_TO_END
sal_uInt16 sal_Unicode
std::unique_ptr< char[]> aBuffer
sal_Int32 nLength