LibreOffice Module vcl (master) 1
pdfdocument.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
11#include <pdf/pdfcompat.hxx>
12
13#include <map>
14#include <memory>
15#include <vector>
16
17#include <com/sun/star/uno/Sequence.hxx>
18#include <com/sun/star/security/XCertificate.hpp>
19
21#include <comphelper/string.hxx>
22#include <o3tl/string_view.hxx>
23#include <rtl/character.hxx>
24#include <rtl/strbuf.hxx>
25#include <rtl/string.hxx>
26#include <sal/log.hxx>
27#include <sal/types.h>
28#include <svl/cryptosign.hxx>
29#include <tools/zcodec.hxx>
30#include <vcl/pdfwriter.hxx>
31#include <o3tl/safeint.hxx>
32
33#include <pdf/objectcopier.hxx>
34
35using namespace com::sun::star;
36
37namespace vcl::filter
38{
39XRefEntry::XRefEntry() = default;
40
41PDFDocument::PDFDocument() = default;
42
44
45bool PDFDocument::RemoveSignature(size_t nPosition)
46{
47 std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
48 if (nPosition >= aSignatures.size())
49 {
50 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
51 return false;
52 }
53
54 if (aSignatures.size() != m_aEOFs.size() - 1)
55 {
56 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
57 "and incremental updates");
58 return false;
59 }
60
61 // The EOF offset is the end of the original file, without the signature at
62 // nPosition.
63 m_aEditBuffer.Seek(m_aEOFs[nPosition]);
64 // Drop all bytes after the current position.
66
67 return m_aEditBuffer.good();
68}
69
71{
72 sal_Int32 nObject = m_aXRef.size();
73 m_aXRef[nObject] = XRefEntry();
74 return nObject;
75}
76
77bool PDFDocument::updateObject(sal_Int32 nObject)
78{
79 if (o3tl::make_unsigned(nObject) >= m_aXRef.size())
80 {
81 SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
82 return false;
83 }
84
85 XRefEntry aEntry;
87 aEntry.SetDirty(true);
88 m_aXRef[nObject] = aEntry;
89 return true;
90}
91
92bool PDFDocument::writeBufferBytes(const void* pBuffer, sal_uInt64 nBytes)
93{
94 std::size_t nWritten = m_aEditBuffer.WriteBytes(pBuffer, nBytes);
95 return nWritten == nBytes;
96}
97
98void PDFDocument::SetSignatureLine(std::vector<sal_Int8>&& rSignatureLine)
99{
100 m_aSignatureLine = std::move(rSignatureLine);
101}
102
103void PDFDocument::SetSignaturePage(size_t nPage) { m_nSignaturePage = nPage; }
104
106{
107 sal_uInt32 nRet = 0;
108 for (const auto& pSignature : GetSignatureWidgets())
109 {
110 auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
111 if (!pT)
112 continue;
113
114 const OString& rValue = pT->GetValue();
115 static constexpr std::string_view aPrefix = "Signature";
116 if (!rValue.startsWith(aPrefix))
117 continue;
118
119 nRet = std::max(nRet, o3tl::toUInt32(rValue.subView(aPrefix.size())));
120 }
121
122 return nRet + 1;
123}
124
125sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
126 sal_uInt64& rLastByteRangeOffset,
127 sal_Int64& rContentOffset)
128{
129 // Write signature object.
130 sal_Int32 nSignatureId = m_aXRef.size();
131 XRefEntry aSignatureEntry;
132 aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
133 aSignatureEntry.SetDirty(true);
134 m_aXRef[nSignatureId] = aSignatureEntry;
135 OStringBuffer aSigBuffer(OString::number(nSignatureId)
136 + " 0 obj\n"
137 "<</Contents <");
138 rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
139 // Reserve space for the PKCS#7 object.
140 OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
142 aSigBuffer.append(aContentFiller + ">\n/Type/Sig/SubFilter");
143 if (bAdES)
144 aSigBuffer.append("/ETSI.CAdES.detached");
145 else
146 aSigBuffer.append("/adbe.pkcs7.detached");
147
148 // Time of signing.
149 aSigBuffer.append(" /M (" + vcl::PDFWriter::GetDateTime()
150 + ")"
151
152 // Byte range: we can write offset1-length1 and offset2 right now, will
153 // write length2 later.
154 " /ByteRange [ 0 "
155 // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
156 + OString::number(rContentOffset - 1) + " "
157 + OString::number(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1) + " ");
158 rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
159 // We don't know how many bytes we need for the last ByteRange value, this
160 // should be enough.
161 OStringBuffer aByteRangeFiller;
162 comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
163 aSigBuffer.append(aByteRangeFiller
164 // Finish the Sig obj.
165 + " /Filter/Adobe.PPKMS");
166
167 if (!rDescription.isEmpty())
168 {
169 aSigBuffer.append("/Reason<");
170 vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
171 aSigBuffer.append(">");
172 }
173
174 aSigBuffer.append(" >>\nendobj\n\n");
175 m_aEditBuffer.WriteOString(aSigBuffer);
176
177 return nSignatureId;
178}
179
181{
182 PDFDocument aPDFDocument;
183 filter::PDFObjectElement* pPage = nullptr;
184 std::vector<filter::PDFObjectElement*> aContentStreams;
185
186 if (!m_aSignatureLine.empty())
187 {
188 // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
189 // based on it.
190 SvMemoryStream aPDFStream;
191 aPDFStream.WriteBytes(m_aSignatureLine.data(), m_aSignatureLine.size());
192 aPDFStream.Seek(0);
193 if (!aPDFDocument.Read(aPDFStream))
194 {
195 SAL_WARN("vcl.filter",
196 "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
197 return -1;
198 }
199
200 std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
201 if (aPages.empty())
202 {
203 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
204 return -1;
205 }
206
207 pPage = aPages[0];
208 if (!pPage)
209 {
210 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
211 return -1;
212 }
213
214 // Calculate the bounding box.
215 PDFElement* pMediaBox = pPage->Lookup("MediaBox");
216 auto pMediaBoxArray = dynamic_cast<PDFArrayElement*>(pMediaBox);
217 if (!pMediaBoxArray || pMediaBoxArray->GetElements().size() < 4)
218 {
219 SAL_WARN("vcl.filter",
220 "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
221 return -1;
222 }
223 const std::vector<PDFElement*>& rMediaBoxElements = pMediaBoxArray->GetElements();
224 auto pWidth = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[2]);
225 if (!pWidth)
226 {
227 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
228 return -1;
229 }
230 rSignatureRectangle.setWidth(pWidth->GetValue());
231 auto pHeight = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[3]);
232 if (!pHeight)
233 {
234 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
235 return -1;
236 }
237 rSignatureRectangle.setHeight(pHeight->GetValue());
238
239 if (PDFObjectElement* pContentStream = pPage->LookupObject("Contents"))
240 {
241 aContentStreams.push_back(pContentStream);
242 }
243
244 if (aContentStreams.empty())
245 {
246 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
247 return -1;
248 }
249 }
250 m_aSignatureLine.clear();
251
252 // Write appearance object: allocate an ID.
253 sal_Int32 nAppearanceId = m_aXRef.size();
254 m_aXRef[nAppearanceId] = XRefEntry();
255
256 // Write the object content.
257 SvMemoryStream aEditBuffer;
258 aEditBuffer.WriteNumberAsString(nAppearanceId);
259 aEditBuffer.WriteOString(" 0 obj\n");
260 aEditBuffer.WriteOString("<</Type/XObject\n/Subtype/Form\n");
261
262 PDFObjectCopier aCopier(*this);
263 if (!aContentStreams.empty())
264 {
265 assert(pPage && "aContentStreams is only filled if there was a pPage");
266 OStringBuffer aBuffer;
267 aCopier.copyPageResources(pPage, aBuffer);
268 aEditBuffer.WriteOString(aBuffer);
269 }
270
271 aEditBuffer.WriteOString("/BBox[0 0 ");
272 aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenWidth());
273 aEditBuffer.WriteOString(" ");
274 aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenHeight());
275 aEditBuffer.WriteOString("]\n/Length ");
276
277 // Add the object to the doc-level edit buffer and update the offset.
278 SvMemoryStream aStream;
279 bool bCompressed = false;
280 sal_Int32 nLength = 0;
281 if (!aContentStreams.empty())
282 {
283 nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed);
284 }
285 aEditBuffer.WriteNumberAsString(nLength);
286 if (bCompressed)
287 {
288 aEditBuffer.WriteOString(" /Filter/FlateDecode");
289 }
290
291 aEditBuffer.WriteOString("\n>>\n");
292
293 aEditBuffer.WriteOString("stream\n");
294
295 // Copy the original page streams to the form XObject stream.
296 aStream.Seek(0);
297 aEditBuffer.WriteStream(aStream);
298
299 aEditBuffer.WriteOString("\nendstream\nendobj\n\n");
300
301 aEditBuffer.Seek(0);
302 XRefEntry aAppearanceEntry;
303 aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
304 aAppearanceEntry.SetDirty(true);
305 m_aXRef[nAppearanceId] = aAppearanceEntry;
306 m_aEditBuffer.WriteStream(aEditBuffer);
307
308 return nAppearanceId;
309}
310
311sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
312 sal_Int32 nAppearanceId,
313 const tools::Rectangle& rSignatureRectangle)
314{
315 // Decide what identifier to use for the new signature.
316 sal_uInt32 nNextSignature = GetNextSignature();
317
318 // Write the Annot object, references nSignatureId and nAppearanceId.
319 sal_Int32 nAnnotId = m_aXRef.size();
320 XRefEntry aAnnotEntry;
321 aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
322 aAnnotEntry.SetDirty(true);
323 m_aXRef[nAnnotId] = aAnnotEntry;
325 m_aEditBuffer.WriteOString(" 0 obj\n");
326 m_aEditBuffer.WriteOString("<</Type/Annot/Subtype/Widget/F 132\n");
327 m_aEditBuffer.WriteOString("/Rect[0 0 ");
328 m_aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenWidth());
330 m_aEditBuffer.WriteNumberAsString(rSignatureRectangle.getOpenHeight());
332 m_aEditBuffer.WriteOString("/FT/Sig\n");
335 m_aEditBuffer.WriteOString(" 0 R\n");
336 m_aEditBuffer.WriteOString("/T(Signature");
337 m_aEditBuffer.WriteNumberAsString(nNextSignature);
341 m_aEditBuffer.WriteOString(" 0 R\n");
344 m_aEditBuffer.WriteOString(" 0 R\n");
345 m_aEditBuffer.WriteOString("/AP<<\n/N ");
346 m_aEditBuffer.WriteNumberAsString(nAppearanceId);
347 m_aEditBuffer.WriteOString(" 0 R\n>>\n");
348 m_aEditBuffer.WriteOString(">>\nendobj\n\n");
349
350 return nAnnotId;
351}
352
353bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
354{
355 PDFElement* pAnnots = rFirstPage.Lookup("Annots");
356 auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
357 if (pAnnotsReference)
358 {
359 // Write the updated Annots key of the Page object.
360 PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
361 if (!pAnnotsObject)
362 {
363 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
364 return false;
365 }
366
367 sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
368 m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
369 m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
370 m_aXRef[nAnnotsId].SetDirty(true);
372 m_aEditBuffer.WriteOString(" 0 obj\n[");
373
374 // Write existing references.
375 PDFArrayElement* pArray = pAnnotsObject->GetArray();
376 if (!pArray)
377 {
378 SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
379 return false;
380 }
381
382 for (size_t i = 0; i < pArray->GetElements().size(); ++i)
383 {
384 auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
385 if (!pReference)
386 continue;
387
388 if (i)
390 m_aEditBuffer.WriteNumberAsString(pReference->GetObjectValue());
392 }
393 // Write our reference.
397
398 m_aEditBuffer.WriteOString("]\nendobj\n\n");
399 }
400 else
401 {
402 // Write the updated first page object, references nAnnotId.
403 sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
404 if (nFirstPageId >= m_aXRef.size())
405 {
406 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
407 return false;
408 }
409 m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
410 m_aXRef[nFirstPageId].SetDirty(true);
412 m_aEditBuffer.WriteOString(" 0 obj\n");
414 auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
415 if (!pAnnotsArray)
416 {
417 // No Annots key, just write the key with a single reference.
418 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
419 + rFirstPage.GetDictionaryOffset(),
420 rFirstPage.GetDictionaryLength());
421 m_aEditBuffer.WriteOString("/Annots[");
424 }
425 else
426 {
427 // Annots key is already there, insert our reference at the end.
428 PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
429
430 // Offset right before the end of the Annots array.
431 sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
432 + pDictionary->GetKeyValueLength("Annots") - 1;
433 // Length of beginning of the dictionary -> Annots end.
434 sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
435 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
436 + rFirstPage.GetDictionaryOffset(),
437 nAnnotsBeforeEndLength);
441 // Length of Annots end -> end of the dictionary.
442 sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
443 + rFirstPage.GetDictionaryLength()
444 - nAnnotsEndOffset;
445 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
446 + nAnnotsEndOffset,
447 nAnnotsAfterEndLength);
448 }
450 m_aEditBuffer.WriteOString("\nendobj\n\n");
451 }
452
453 return true;
454}
455
457{
458 if (m_pXRefStream)
459 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
460 else
461 {
462 if (!m_pTrailer)
463 {
464 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
465 return false;
466 }
467 pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
468 }
469 if (!pRoot)
470 {
471 SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
472 return false;
473 }
474 PDFObjectElement* pCatalog = pRoot->LookupObject();
475 if (!pCatalog)
476 {
477 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
478 return false;
479 }
480 sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
481 if (nCatalogId >= m_aXRef.size())
482 {
483 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
484 return false;
485 }
486 PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
487 auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
488 if (pAcroFormReference)
489 {
490 // Write the updated AcroForm key of the Catalog object.
491 PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
492 if (!pAcroFormObject)
493 {
494 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
495 return false;
496 }
497
498 sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
499 m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
500 m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
501 m_aXRef[nAcroFormId].SetDirty(true);
503 m_aEditBuffer.WriteOString(" 0 obj\n");
504
505 // If this is nullptr, then the AcroForm object is not in an object stream.
506 SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
507
508 if (!pAcroFormObject->Lookup("Fields"))
509 {
510 SAL_WARN("vcl.filter",
511 "PDFDocument::Sign: AcroForm object without required Fields key");
512 return false;
513 }
514
515 PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
516 if (!pAcroFormDictionary)
517 {
518 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
519 return false;
520 }
521
522 // Offset right before the end of the Fields array.
523 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
524 + pAcroFormDictionary->GetKeyValueLength("Fields")
525 - strlen("]");
526
527 // Length of beginning of the object dictionary -> Fields end.
528 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
529 if (pStreamBuffer)
530 m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
531 else
532 {
533 nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
535 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
536 + pAcroFormObject->GetDictionaryOffset(),
537 nFieldsBeforeEndLength);
538 }
539
540 // Append our reference at the end of the Fields array.
544
545 // Length of Fields end -> end of the object dictionary.
546 if (pStreamBuffer)
547 {
548 sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
549 m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
550 + nFieldsEndOffset,
551 nFieldsAfterEndLength);
552 }
553 else
554 {
555 sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
556 + pAcroFormObject->GetDictionaryLength()
557 - nFieldsEndOffset;
558 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
559 + nFieldsEndOffset,
560 nFieldsAfterEndLength);
562 }
563
564 m_aEditBuffer.WriteOString("\nendobj\n\n");
565 }
566 else
567 {
568 // Write the updated Catalog object, references nAnnotId.
569 auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
570 m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
571 m_aXRef[nCatalogId].SetDirty(true);
573 m_aEditBuffer.WriteOString(" 0 obj\n");
575 if (!pAcroFormDictionary)
576 {
577 // No AcroForm key, assume no signatures.
578 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
579 + pCatalog->GetDictionaryOffset(),
580 pCatalog->GetDictionaryLength());
581 m_aEditBuffer.WriteOString("/AcroForm<</Fields[\n");
583 m_aEditBuffer.WriteOString(" 0 R\n]/SigFlags 3>>\n");
584 }
585 else
586 {
587 // AcroForm key is already there, insert our reference at the Fields end.
588 auto it = pAcroFormDictionary->GetItems().find("Fields");
589 if (it == pAcroFormDictionary->GetItems().end())
590 {
591 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
592 return false;
593 }
594
595 auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
596 if (!pFields)
597 {
598 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
599 return false;
600 }
601
602 // Offset right before the end of the Fields array.
603 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
604 + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
605 // Length of beginning of the Catalog dictionary -> Fields end.
606 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
607 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
608 + pCatalog->GetDictionaryOffset(),
609 nFieldsBeforeEndLength);
613 // Length of Fields end -> end of the Catalog dictionary.
614 sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
615 + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
616 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
617 + nFieldsEndOffset,
618 nFieldsAfterEndLength);
619 }
620 m_aEditBuffer.WriteOString(">>\nendobj\n\n");
621 }
622
623 return true;
624}
625
626void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
627{
628 if (m_pXRefStream)
629 {
630 // Write the xref stream.
631 // This is a bit meta: the xref stream stores its own offset.
632 sal_Int32 nXRefStreamId = m_aXRef.size();
633 XRefEntry aXRefStreamEntry;
634 aXRefStreamEntry.SetOffset(nXRefOffset);
635 aXRefStreamEntry.SetDirty(true);
636 m_aXRef[nXRefStreamId] = aXRefStreamEntry;
637
638 // Write stream data.
639 SvMemoryStream aXRefStream;
640 const size_t nOffsetLen = 3;
641 // 3 additional bytes: predictor, the first and the third field.
642 const size_t nLineLength = nOffsetLen + 3;
643 // This is the line as it appears before tweaking according to the predictor.
644 std::vector<unsigned char> aOrigLine(nLineLength);
645 // This is the previous line.
646 std::vector<unsigned char> aPrevLine(nLineLength);
647 // This is the line as written to the stream.
648 std::vector<unsigned char> aFilteredLine(nLineLength);
649 for (const auto& rXRef : m_aXRef)
650 {
651 const XRefEntry& rEntry = rXRef.second;
652
653 if (!rEntry.GetDirty())
654 continue;
655
656 // Predictor.
657 size_t nPos = 0;
658 // PNG prediction: up (on all rows).
659 aOrigLine[nPos++] = 2;
660
661 // First field.
662 unsigned char nType = 0;
663 switch (rEntry.GetType())
664 {
666 nType = 0;
667 break;
669 nType = 1;
670 break;
672 nType = 2;
673 break;
674 }
675 aOrigLine[nPos++] = nType;
676
677 // Second field.
678 for (size_t i = 0; i < nOffsetLen; ++i)
679 {
680 size_t nByte = nOffsetLen - i - 1;
681 // Fields requiring more than one byte are stored with the
682 // high-order byte first.
683 unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
684 aOrigLine[nPos++] = nCh;
685 }
686
687 // Third field.
688 aOrigLine[nPos++] = 0;
689
690 // Now apply the predictor.
691 aFilteredLine[0] = aOrigLine[0];
692 for (size_t i = 1; i < nLineLength; ++i)
693 {
694 // Count the delta vs the previous line.
695 aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
696 // Remember the new reference.
697 aPrevLine[i] = aOrigLine[i];
698 }
699
700 aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
701 }
702
703 m_aEditBuffer.WriteNumberAsString(nXRefStreamId);
705 " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
706
707 // ID.
708 auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
709 if (pID)
710 {
711 const std::vector<PDFElement*>& rElements = pID->GetElements();
712 m_aEditBuffer.WriteOString("/ID [ <");
713 for (size_t i = 0; i < rElements.size(); ++i)
714 {
715 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
716 if (!pIDString)
717 continue;
718
719 m_aEditBuffer.WriteOString(pIDString->GetValue());
720 if ((i + 1) < rElements.size())
722 }
724 }
725
726 // Index.
727 m_aEditBuffer.WriteOString("/Index [ ");
728 for (const auto& rXRef : m_aXRef)
729 {
730 if (!rXRef.second.GetDirty())
731 continue;
732
735 }
737
738 // Info.
739 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
740 if (pInfo)
741 {
742 m_aEditBuffer.WriteOString("/Info ");
743 m_aEditBuffer.WriteNumberAsString(pInfo->GetObjectValue());
745 m_aEditBuffer.WriteNumberAsString(pInfo->GetGenerationValue());
747 }
748
749 // Length.
750 m_aEditBuffer.WriteOString("/Length ");
751 {
752 ZCodec aZCodec;
753 aZCodec.BeginCompression();
754 aXRefStream.Seek(0);
755 SvMemoryStream aStream;
756 aZCodec.Compress(aXRefStream, aStream);
757 aZCodec.EndCompression();
758 aXRefStream.Seek(0);
759 aXRefStream.SetStreamSize(0);
760 aStream.Seek(0);
761 aXRefStream.WriteStream(aStream);
762 }
764
765 if (!m_aStartXRefs.empty())
766 {
767 // Write location of the previous cross-reference section.
768 m_aEditBuffer.WriteOString("/Prev ");
770 }
771
772 // Root.
773 m_aEditBuffer.WriteOString("/Root ");
778
779 // Size.
780 m_aEditBuffer.WriteOString("/Size ");
782
783 m_aEditBuffer.WriteOString("/Type/XRef/W[1 3 1]>>\nstream\n");
784 aXRefStream.Seek(0);
785 m_aEditBuffer.WriteStream(aXRefStream);
786 m_aEditBuffer.WriteOString("\nendstream\nendobj\n\n");
787 }
788 else
789 {
790 // Write the xref table.
791 m_aEditBuffer.WriteOString("xref\n");
792 for (const auto& rXRef : m_aXRef)
793 {
794 size_t nObject = rXRef.first;
795 size_t nOffset = rXRef.second.GetOffset();
796 if (!rXRef.second.GetDirty())
797 continue;
798
801 OStringBuffer aBuffer = OString::number(static_cast<sal_Int32>(nOffset));
802 while (aBuffer.getLength() < 10)
803 aBuffer.insert(0, "0");
804 if (nObject == 0)
805 aBuffer.append(" 65535 f \n");
806 else
807 aBuffer.append(" 00000 n \n");
809 }
810
811 // Write the trailer.
812 m_aEditBuffer.WriteOString("trailer\n<</Size ");
814 m_aEditBuffer.WriteOString("/Root ");
819 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
820 if (pInfo)
821 {
822 m_aEditBuffer.WriteOString("/Info ");
823 m_aEditBuffer.WriteNumberAsString(pInfo->GetObjectValue());
825 m_aEditBuffer.WriteNumberAsString(pInfo->GetGenerationValue());
827 }
828 auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
829 if (pID)
830 {
831 const std::vector<PDFElement*>& rElements = pID->GetElements();
832 m_aEditBuffer.WriteOString("/ID [ <");
833 for (size_t i = 0; i < rElements.size(); ++i)
834 {
835 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
836 if (!pIDString)
837 continue;
838
839 m_aEditBuffer.WriteOString(pIDString->GetValue());
840 if ((i + 1) < rElements.size())
842 }
844 }
845
846 if (!m_aStartXRefs.empty())
847 {
848 // Write location of the previous cross-reference section.
849 m_aEditBuffer.WriteOString("/Prev ");
851 }
852
854 }
855}
856
857bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
858 const OUString& rDescription, bool bAdES)
859{
862
863 sal_uInt64 nSignatureLastByteRangeOffset = 0;
864 sal_Int64 nSignatureContentOffset = 0;
865 sal_Int32 nSignatureId = WriteSignatureObject(
866 rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
867
868 tools::Rectangle aSignatureRectangle;
869 sal_Int32 nAppearanceId = WriteAppearanceObject(aSignatureRectangle);
870
871 std::vector<PDFObjectElement*> aPages = GetPages();
872 if (aPages.empty())
873 {
874 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
875 return false;
876 }
877
878 size_t nPage = 0;
879 if (m_nSignaturePage < aPages.size())
880 {
881 nPage = m_nSignaturePage;
882 }
883 if (!aPages[nPage])
884 {
885 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage);
886 return false;
887 }
888
889 PDFObjectElement& rPage = *aPages[nPage];
890 sal_Int32 nAnnotId = WriteAnnotObject(rPage, nSignatureId, nAppearanceId, aSignatureRectangle);
891
892 if (!WritePageObject(rPage, nAnnotId))
893 {
894 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
895 return false;
896 }
897
898 PDFReferenceElement* pRoot = nullptr;
899 if (!WriteCatalogObject(nAnnotId, pRoot))
900 {
901 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
902 return false;
903 }
904
905 sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
906 WriteXRef(nXRefOffset, pRoot);
907
908 // Write startxref.
909 m_aEditBuffer.WriteOString("startxref\n");
911 m_aEditBuffer.WriteOString("\n%%EOF\n");
912
913 // Finalize the signature, now that we know the total file size.
914 // Calculate the length of the last byte range.
915 sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
916 sal_Int64 nLastByteRangeLength
917 = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
918 // Write the length to the buffer.
919 m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
920 OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
921 m_aEditBuffer.WriteOString(aByteRangeBuffer);
922
923 // Create the PKCS#7 object.
924 css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
925 if (!aDerEncoded.hasElements())
926 {
927 SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
928 return false;
929 }
930
932 sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
933 std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
934 m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
935
936 m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
937 sal_uInt64 nBufferSize2 = nLastByteRangeLength;
938 std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
939 m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
940
941 OStringBuffer aCMSHexBuffer;
942 svl::crypto::Signing aSigning(xCertificate);
943 aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
944 aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
945 if (!aSigning.Sign(aCMSHexBuffer))
946 {
947 SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
948 return false;
949 }
950
951 assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
952
953 m_aEditBuffer.Seek(nSignatureContentOffset);
954 m_aEditBuffer.WriteOString(aCMSHexBuffer);
955
956 return true;
957}
958
960{
962 rStream.WriteStream(m_aEditBuffer);
963 return rStream.good();
964}
965
967 std::vector<std::unique_ptr<PDFElement>>& rElements,
968 PDFObjectElement* pObjectElement)
969{
970 // Last seen object token.
971 PDFObjectElement* pObject = pObjectElement;
972 PDFNameElement* pObjectKey = nullptr;
973 PDFObjectElement* pObjectStream = nullptr;
974 bool bInXRef = false;
975 // The next number will be an xref offset.
976 bool bInStartXRef = false;
977 // Dictionary depth, so we know when we're outside any dictionaries.
978 int nDepth = 0;
979 // Last seen array token that's outside any dictionaries.
980 PDFArrayElement* pArray = nullptr;
981 // If we're inside an obj/endobj pair.
982 bool bInObject = false;
983
984 while (true)
985 {
986 char ch;
987 rStream.ReadChar(ch);
988 if (rStream.eof())
989 break;
990
991 switch (ch)
992 {
993 case '%':
994 {
995 auto pComment = new PDFCommentElement(*this);
996 rElements.push_back(std::unique_ptr<PDFElement>(pComment));
997 rStream.SeekRel(-1);
998 if (!rElements.back()->Read(rStream))
999 {
1000 SAL_WARN("vcl.filter",
1001 "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
1002 return false;
1003 }
1004 if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
1005 && m_aEOFs.back() == rStream.Tell())
1006 {
1007 // Found EOF and partial parsing requested, we're done.
1008 return true;
1009 }
1010 break;
1011 }
1012 case '<':
1013 {
1014 // Dictionary or hex string.
1015 rStream.ReadChar(ch);
1016 rStream.SeekRel(-2);
1017 if (ch == '<')
1018 {
1019 rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
1020 ++nDepth;
1021 }
1022 else
1023 rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
1024 if (!rElements.back()->Read(rStream))
1025 {
1026 SAL_WARN("vcl.filter",
1027 "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
1028 return false;
1029 }
1030 break;
1031 }
1032 case '>':
1033 {
1034 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
1035 --nDepth;
1036 rStream.SeekRel(-1);
1037 if (!rElements.back()->Read(rStream))
1038 {
1039 SAL_WARN("vcl.filter",
1040 "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
1041 return false;
1042 }
1043 break;
1044 }
1045 case '[':
1046 {
1047 auto pArr = new PDFArrayElement(pObject);
1048 rElements.push_back(std::unique_ptr<PDFElement>(pArr));
1049 if (nDepth == 0)
1050 {
1051 // The array is attached directly, inform the object.
1052 pArray = pArr;
1053 if (pObject)
1054 {
1055 pObject->SetArray(pArray);
1056 pObject->SetArrayOffset(rStream.Tell());
1057 }
1058 }
1059 ++nDepth;
1060 rStream.SeekRel(-1);
1061 if (!rElements.back()->Read(rStream))
1062 {
1063 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
1064 return false;
1065 }
1066 break;
1067 }
1068 case ']':
1069 {
1070 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
1071 --nDepth;
1072 rStream.SeekRel(-1);
1073 if (nDepth == 0)
1074 {
1075 if (pObject)
1076 {
1077 pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1078 }
1079 }
1080 if (!rElements.back()->Read(rStream))
1081 {
1082 SAL_WARN("vcl.filter",
1083 "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1084 return false;
1085 }
1086 break;
1087 }
1088 case '/':
1089 {
1090 auto pNameElement = new PDFNameElement();
1091 rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1092 rStream.SeekRel(-1);
1093 if (!pNameElement->Read(rStream))
1094 {
1095 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1096 return false;
1097 }
1098
1099 if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1100 && pNameElement->GetValue() == "ObjStm")
1101 pObjectStream = pObject;
1102 else
1103 pObjectKey = pNameElement;
1104 break;
1105 }
1106 case '(':
1107 {
1108 rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1109 rStream.SeekRel(-1);
1110 if (!rElements.back()->Read(rStream))
1111 {
1112 SAL_WARN("vcl.filter",
1113 "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1114 return false;
1115 }
1116 break;
1117 }
1118 default:
1119 {
1120 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-' || ch == '+'
1121 || ch == '.')
1122 {
1123 // Numbering object: an integer or a real.
1124 auto pNumberElement = new PDFNumberElement();
1125 rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1126 rStream.SeekRel(-1);
1127 if (!pNumberElement->Read(rStream))
1128 {
1129 SAL_WARN("vcl.filter",
1130 "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1131 return false;
1132 }
1133 if (bInStartXRef)
1134 {
1135 bInStartXRef = false;
1136 m_aStartXRefs.push_back(pNumberElement->GetValue());
1137
1138 auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1139 if (it != m_aOffsetObjects.end())
1140 m_pXRefStream = it->second;
1141 }
1142 else if (bInObject && !nDepth && pObject)
1143 // Number element inside an object, but outside a
1144 // dictionary / array: remember it.
1145 pObject->SetNumberElement(pNumberElement);
1146 }
1147 else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1148 {
1149 // Possible keyword, like "obj".
1150 rStream.SeekRel(-1);
1151 OString aKeyword = ReadKeyword(rStream);
1152
1153 bool bObj = aKeyword == "obj";
1154 if (bObj || aKeyword == "R")
1155 {
1156 size_t nElements = rElements.size();
1157 if (nElements < 2)
1158 {
1159 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1160 "tokens before 'obj' or 'R' keyword");
1161 return false;
1162 }
1163
1164 auto pObjectNumber
1165 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1166 auto pGenerationNumber
1167 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1168 if (!pObjectNumber || !pGenerationNumber)
1169 {
1170 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1171 "generation number before 'obj' or 'R' keyword");
1172 return false;
1173 }
1174
1175 if (bObj)
1176 {
1177 pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1178 pGenerationNumber->GetValue());
1179 rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1180 m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1181 m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1182 bInObject = true;
1183 }
1184 else
1185 {
1186 auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1187 *pGenerationNumber);
1188 rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1189 if (bInObject && nDepth > 0 && pObject)
1190 // Inform the object about a new in-dictionary reference.
1191 pObject->AddDictionaryReference(pReference);
1192 }
1193 if (!rElements.back()->Read(rStream))
1194 {
1195 SAL_WARN("vcl.filter",
1196 "PDFDocument::Tokenize: PDFElement::Read() failed");
1197 return false;
1198 }
1199 }
1200 else if (aKeyword == "stream")
1201 {
1202 // Look up the length of the stream from the parent object's dictionary.
1203 size_t nLength = 0;
1204 for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1205 {
1206 // Iterate in reverse order.
1207 size_t nIndex = rElements.size() - nElement - 1;
1208 PDFElement* pElement = rElements[nIndex].get();
1209 auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1210 if (!pObj)
1211 continue;
1212
1213 PDFElement* pLookup = pObj->Lookup("Length");
1214 auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1215 if (pReference)
1216 {
1217 // Length is provided as a reference.
1218 nLength = pReference->LookupNumber(rStream);
1219 break;
1220 }
1221
1222 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1223 if (pNumber)
1224 {
1225 // Length is provided directly.
1226 nLength = pNumber->GetValue();
1227 break;
1228 }
1229
1230 SAL_WARN(
1231 "vcl.filter",
1232 "PDFDocument::Tokenize: found no Length key for stream keyword");
1233 return false;
1234 }
1235
1237 auto pStreamElement = new PDFStreamElement(nLength);
1238 if (pObject)
1239 pObject->SetStream(pStreamElement);
1240 rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1241 if (!rElements.back()->Read(rStream))
1242 {
1243 SAL_WARN("vcl.filter",
1244 "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1245 return false;
1246 }
1247 }
1248 else if (aKeyword == "endstream")
1249 {
1250 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1251 if (!rElements.back()->Read(rStream))
1252 {
1253 SAL_WARN("vcl.filter",
1254 "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1255 return false;
1256 }
1257 }
1258 else if (aKeyword == "endobj")
1259 {
1260 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1261 if (!rElements.back()->Read(rStream))
1262 {
1263 SAL_WARN("vcl.filter",
1264 "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1265 return false;
1266 }
1268 {
1269 // Found endobj and only object parsing was requested, we're done.
1270 return true;
1271 }
1272
1273 if (pObjectStream)
1274 {
1275 // We're at the end of an object stream, parse the stored objects.
1276 pObjectStream->ParseStoredObjects();
1277 pObjectStream = nullptr;
1278 pObjectKey = nullptr;
1279 }
1280 bInObject = false;
1281 }
1282 else if (aKeyword == "true" || aKeyword == "false")
1283 rElements.push_back(std::unique_ptr<PDFElement>(
1284 new PDFBooleanElement(aKeyword.toBoolean())));
1285 else if (aKeyword == "null")
1286 rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1287 else if (aKeyword == "xref")
1288 // Allow 'f' and 'n' keywords.
1289 bInXRef = true;
1290 else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1291 {
1292 }
1293 else if (aKeyword == "trailer")
1294 {
1295 auto pTrailer = new PDFTrailerElement(*this);
1296
1297 // Make it possible to find this trailer later by offset.
1298 pTrailer->Read(rStream);
1299 m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1300
1301 // When reading till the first EOF token only, remember
1302 // just the first trailer token.
1304 m_pTrailer = pTrailer;
1305 rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1306 }
1307 else if (aKeyword == "startxref")
1308 {
1309 bInStartXRef = true;
1310 }
1311 else
1312 {
1313 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1314 << aKeyword << "' keyword at byte position "
1315 << rStream.Tell());
1316 return false;
1317 }
1318 }
1319 else
1320 {
1321 auto uChar = static_cast<unsigned char>(ch);
1322 // Be more lenient and allow unexpected null char
1323 if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
1324 {
1325 SAL_WARN("vcl.filter",
1326 "PDFDocument::Tokenize: unexpected character with code "
1327 << sal_Int32(ch) << " at byte position " << rStream.Tell());
1328 return false;
1329 }
1330 SAL_WARN_IF(uChar == 0, "vcl.filter",
1331 "PDFDocument::Tokenize: unexpected null character at "
1332 << rStream.Tell() << " - ignoring");
1333 }
1334 break;
1335 }
1336 }
1337 }
1338
1339 return true;
1340}
1341
1343{
1344 m_aIDObjects[nID] = pObject;
1345}
1346
1348{
1349 if (Read(rStream))
1350 return true;
1351
1352 // Read failed, try a roundtrip through pdfium and then retry.
1353 rStream.Seek(0);
1354 SvMemoryStream aStandardizedStream;
1355 vcl::pdf::convertToHighestSupported(rStream, aStandardizedStream);
1356 return Read(aStandardizedStream);
1357}
1358
1360{
1361 // Check file magic.
1362 std::vector<sal_Int8> aHeader(5);
1363 rStream.Seek(0);
1364 rStream.ReadBytes(aHeader.data(), aHeader.size());
1365 if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1366 || aHeader[4] != '-')
1367 {
1368 SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1369 return false;
1370 }
1371
1372 // Allow later editing of the contents in-memory.
1373 rStream.Seek(0);
1374 m_aEditBuffer.WriteStream(rStream);
1375
1376 // Look up the offset of the xref table.
1377 size_t nStartXRef = FindStartXRef(rStream);
1378 SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1379 if (nStartXRef == 0)
1380 {
1381 SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1382 return false;
1383 }
1384 while (true)
1385 {
1386 rStream.Seek(nStartXRef);
1387 OString aKeyword = ReadKeyword(rStream);
1388 if (aKeyword.isEmpty())
1389 ReadXRefStream(rStream);
1390
1391 else
1392 {
1393 if (aKeyword != "xref")
1394 {
1395 SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1396 return false;
1397 }
1398 ReadXRef(rStream);
1399 if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1400 {
1401 SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1402 return false;
1403 }
1404 }
1405
1406 PDFNumberElement* pPrev = nullptr;
1407 if (m_pTrailer)
1408 {
1409 pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1410
1411 // Remember the offset of this trailer in the correct order. It's
1412 // possible that newer trailers don't have a larger offset.
1414 }
1415 else if (m_pXRefStream)
1416 pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1417 if (pPrev)
1418 nStartXRef = pPrev->GetValue();
1419
1420 // Reset state, except the edit buffer.
1421 m_aElements.clear();
1422 m_aOffsetObjects.clear();
1423 m_aIDObjects.clear();
1424 m_aStartXRefs.clear();
1425 m_aEOFs.clear();
1426 m_pTrailer = nullptr;
1427 m_pXRefStream = nullptr;
1428 if (!pPrev)
1429 break;
1430 }
1431
1432 // Then we can tokenize the stream.
1433 rStream.Seek(0);
1434 return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1435}
1436
1438{
1439 OStringBuffer aBuf;
1440 char ch;
1441 rStream.ReadChar(ch);
1442 if (rStream.eof())
1443 return {};
1444 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1445 {
1446 aBuf.append(ch);
1447 rStream.ReadChar(ch);
1448 if (rStream.eof())
1449 return aBuf.toString();
1450 }
1451 rStream.SeekRel(-1);
1452 return aBuf.toString();
1453}
1454
1456{
1457 // Find the "startxref" token, somewhere near the end of the document.
1458 std::vector<char> aBuf(1024);
1459 rStream.Seek(STREAM_SEEK_TO_END);
1460 if (rStream.Tell() > aBuf.size())
1461 rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1462 else
1463 // The document is really short, then just read it from the start.
1464 rStream.Seek(0);
1465 size_t nBeforePeek = rStream.Tell();
1466 size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1467 rStream.Seek(nBeforePeek);
1468 if (nSize != aBuf.size())
1469 aBuf.resize(nSize);
1470 OString aPrefix("startxref");
1471 // Find the last startxref at the end of the document.
1472 auto itLastValid = aBuf.end();
1473 auto it = aBuf.begin();
1474 while (true)
1475 {
1476 it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1477 if (it == aBuf.end())
1478 break;
1479
1480 itLastValid = it;
1481 ++it;
1482 }
1483 if (itLastValid == aBuf.end())
1484 {
1485 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1486 return 0;
1487 }
1488
1489 rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1490 if (rStream.eof())
1491 {
1492 SAL_WARN("vcl.filter",
1493 "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1494 return 0;
1495 }
1496
1498 PDFNumberElement aNumber;
1499 if (!aNumber.Read(rStream))
1500 return 0;
1501 return aNumber.GetValue();
1502}
1503
1505{
1506 // Look up the stream length in the object dictionary.
1507 if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1508 {
1509 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1510 return;
1511 }
1512
1513 if (m_aElements.empty())
1514 {
1515 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1516 return;
1517 }
1518
1519 PDFObjectElement* pObject = nullptr;
1520 for (const auto& pElement : m_aElements)
1521 {
1522 if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1523 {
1524 pObject = pObj;
1525 break;
1526 }
1527 }
1528 if (!pObject)
1529 {
1530 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1531 return;
1532 }
1533
1534 // So that the Prev key can be looked up later.
1536
1537 PDFElement* pLookup = pObject->Lookup("Length");
1538 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1539 if (!pNumber)
1540 {
1541 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1542 return;
1543 }
1544 sal_uInt64 nLength = pNumber->GetValue();
1545
1546 // Look up the stream offset.
1547 PDFStreamElement* pStream = nullptr;
1548 for (const auto& pElement : m_aElements)
1549 {
1550 if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1551 {
1552 pStream = pS;
1553 break;
1554 }
1555 }
1556 if (!pStream)
1557 {
1558 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1559 return;
1560 }
1561
1562 // Read and decompress it.
1563 rStream.Seek(pStream->GetOffset());
1564 std::vector<char> aBuf(nLength);
1565 rStream.ReadBytes(aBuf.data(), aBuf.size());
1566
1567 auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1568 if (!pFilter)
1569 {
1570 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1571 return;
1572 }
1573
1574 if (pFilter->GetValue() != "FlateDecode")
1575 {
1576 SAL_WARN("vcl.filter",
1577 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1578 return;
1579 }
1580
1581 int nColumns = 1;
1582 int nPredictor = 1;
1583 if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1584 {
1585 const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1586 auto it = rItems.find("Columns");
1587 if (it != rItems.end())
1588 if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1589 nColumns = pColumns->GetValue();
1590 it = rItems.find("Predictor");
1591 if (it != rItems.end())
1592 if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1593 nPredictor = pPredictor->GetValue();
1594 }
1595
1596 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1597 SvMemoryStream aStream;
1598 ZCodec aZCodec;
1599 aZCodec.BeginCompression();
1600 aZCodec.Decompress(aSource, aStream);
1601 if (!aZCodec.EndCompression())
1602 {
1603 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1604 return;
1605 }
1606
1607 // Look up the first and the last entry we need to read.
1608 auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1609 std::vector<size_t> aFirstObjects;
1610 std::vector<size_t> aNumberOfObjects;
1611 if (!pIndex)
1612 {
1613 auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1614 if (pSize)
1615 {
1616 aFirstObjects.push_back(0);
1617 aNumberOfObjects.push_back(pSize->GetValue());
1618 }
1619 else
1620 {
1621 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1622 return;
1623 }
1624 }
1625 else
1626 {
1627 const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1628 size_t nFirstObject = 0;
1629 for (size_t i = 0; i < rIndexElements.size(); ++i)
1630 {
1631 if (i % 2 == 0)
1632 {
1633 auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1634 if (!pFirstObject)
1635 {
1636 SAL_WARN("vcl.filter",
1637 "PDFDocument::ReadXRefStream: Index has no first object");
1638 return;
1639 }
1640 nFirstObject = pFirstObject->GetValue();
1641 continue;
1642 }
1643
1644 auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1645 if (!pNumberOfObjects)
1646 {
1647 SAL_WARN("vcl.filter",
1648 "PDFDocument::ReadXRefStream: Index has no number of objects");
1649 return;
1650 }
1651 aFirstObjects.push_back(nFirstObject);
1652 aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1653 }
1654 }
1655
1656 // Look up the format of a single entry.
1657 const int nWSize = 3;
1658 auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1659 if (!pW || pW->GetElements().size() < nWSize)
1660 {
1661 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1662 return;
1663 }
1664 int aW[nWSize];
1665 // First character is the (kind of) repeated predictor.
1666 int nLineLength = 1;
1667 for (size_t i = 0; i < nWSize; ++i)
1668 {
1669 auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1670 if (!pI)
1671 {
1672 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1673 return;
1674 }
1675 aW[i] = pI->GetValue();
1676 nLineLength += aW[i];
1677 }
1678
1679 if (nPredictor > 1 && nLineLength - 1 != nColumns)
1680 {
1681 SAL_WARN("vcl.filter",
1682 "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1683 return;
1684 }
1685
1686 aStream.Seek(0);
1687 for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1688 {
1689 size_t nFirstObject = aFirstObjects[nSubSection];
1690 size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1691
1692 // This is the line as read from the stream.
1693 std::vector<unsigned char> aOrigLine(nLineLength);
1694 // This is the line as it appears after tweaking according to nPredictor.
1695 std::vector<unsigned char> aFilteredLine(nLineLength);
1696 for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1697 {
1698 size_t nIndex = nFirstObject + nEntry;
1699
1700 aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1701 if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1702 {
1703 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1704 "inconsistent with /DecodeParms/Predictor for object #"
1705 << nIndex);
1706 return;
1707 }
1708
1709 for (int i = 0; i < nLineLength; ++i)
1710 {
1711 switch (nPredictor)
1712 {
1713 case 1:
1714 // No prediction.
1715 break;
1716 case 12:
1717 // PNG prediction: up (on all rows).
1718 aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1719 break;
1720 default:
1721 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1722 << nPredictor);
1723 return;
1724 }
1725 }
1726
1727 // First character is already handled above.
1728 int nPos = 1;
1729 size_t nType = 0;
1730 // Start of the current field in the stream data.
1731 int nOffset = nPos;
1732 for (; nPos < nOffset + aW[0]; ++nPos)
1733 {
1734 unsigned char nCh = aFilteredLine[nPos];
1735 nType = (nType << 8) + nCh;
1736 }
1737
1738 // Start of the object in the file stream.
1739 size_t nStreamOffset = 0;
1740 nOffset = nPos;
1741 for (; nPos < nOffset + aW[1]; ++nPos)
1742 {
1743 unsigned char nCh = aFilteredLine[nPos];
1744 nStreamOffset = (nStreamOffset << 8) + nCh;
1745 }
1746
1747 // Generation number of the object.
1748 size_t nGenerationNumber = 0;
1749 nOffset = nPos;
1750 for (; nPos < nOffset + aW[2]; ++nPos)
1751 {
1752 unsigned char nCh = aFilteredLine[nPos];
1753 nGenerationNumber = (nGenerationNumber << 8) + nCh;
1754 }
1755
1756 // Ignore invalid nType.
1757 if (nType <= 2)
1758 {
1759 if (m_aXRef.find(nIndex) == m_aXRef.end())
1760 {
1761 XRefEntry aEntry;
1762 switch (nType)
1763 {
1764 case 0:
1766 break;
1767 case 1:
1769 break;
1770 case 2:
1772 break;
1773 }
1774 aEntry.SetOffset(nStreamOffset);
1775 m_aXRef[nIndex] = aEntry;
1776 }
1777 }
1778 }
1779 }
1780}
1781
1783{
1785
1786 while (true)
1787 {
1788 PDFNumberElement aFirstObject;
1789 if (!aFirstObject.Read(rStream))
1790 {
1791 // Next token is not a number, it'll be the trailer.
1792 return;
1793 }
1794
1795 if (aFirstObject.GetValue() < 0)
1796 {
1797 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1798 return;
1799 }
1800
1802 PDFNumberElement aNumberOfEntries;
1803 if (!aNumberOfEntries.Read(rStream))
1804 {
1805 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1806 return;
1807 }
1808
1809 if (aNumberOfEntries.GetValue() < 0)
1810 {
1811 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1812 return;
1813 }
1814
1815 size_t nSize = aNumberOfEntries.GetValue();
1816 for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1817 {
1818 size_t nIndex = aFirstObject.GetValue() + nEntry;
1820 PDFNumberElement aOffset;
1821 if (!aOffset.Read(rStream))
1822 {
1823 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1824 return;
1825 }
1826
1828 PDFNumberElement aGenerationNumber;
1829 if (!aGenerationNumber.Read(rStream))
1830 {
1831 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1832 return;
1833 }
1834
1836 OString aKeyword = ReadKeyword(rStream);
1837 if (aKeyword != "f" && aKeyword != "n")
1838 {
1839 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1840 return;
1841 }
1842 // xrefs are read in reverse order, so never update an existing
1843 // offset with an older one.
1844 if (m_aXRef.find(nIndex) == m_aXRef.end())
1845 {
1846 XRefEntry aEntry;
1847 aEntry.SetOffset(aOffset.GetValue());
1848 // Initially only the first entry is dirty.
1849 if (nIndex == 0)
1850 aEntry.SetDirty(true);
1851 m_aXRef[nIndex] = aEntry;
1852 }
1854 }
1855 }
1856}
1857
1859{
1860 char ch = 0;
1861
1862 while (true)
1863 {
1864 rStream.ReadChar(ch);
1865 if (rStream.eof())
1866 break;
1867
1868 if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1869 {
1870 rStream.SeekRel(-1);
1871 return;
1872 }
1873 }
1874}
1875
1877{
1878 char ch = 0;
1879
1880 while (true)
1881 {
1882 rStream.ReadChar(ch);
1883 if (rStream.eof())
1884 break;
1885
1886 if (ch != '\n' && ch != '\r')
1887 {
1888 rStream.SeekRel(-1);
1889 return;
1890 }
1891 }
1892}
1893
1894size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1895{
1896 auto it = m_aXRef.find(nIndex);
1897 if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1898 {
1899 SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1900 << nIndex << ", but failed");
1901 return 0;
1902 }
1903
1904 return it->second.GetOffset();
1905}
1906
1907const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1908{
1909 return m_aElements;
1910}
1911
1913static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1914{
1915 auto pKidsRef = pPages->Lookup("Kids");
1916 auto pKids = dynamic_cast<PDFArrayElement*>(pKidsRef);
1917 if (!pKids)
1918 {
1919 auto pRefKids = dynamic_cast<PDFReferenceElement*>(pKidsRef);
1920 if (!pRefKids)
1921 {
1922 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1923 return;
1924 }
1925 auto pObjWithKids = pRefKids->LookupObject();
1926 if (!pObjWithKids)
1927 {
1928 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1929 return;
1930 }
1931
1932 pKids = pObjWithKids->GetArray();
1933 }
1934
1935 if (!pKids)
1936 {
1937 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1938 return;
1939 }
1940
1941 pPages->setVisiting(true);
1942
1943 for (const auto& pKid : pKids->GetElements())
1944 {
1945 auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1946 if (!pReference)
1947 continue;
1948
1949 PDFObjectElement* pKidObject = pReference->LookupObject();
1950 if (!pKidObject)
1951 continue;
1952
1953 // detect if visiting reenters itself
1954 if (pKidObject->alreadyVisiting())
1955 {
1956 SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1957 continue;
1958 }
1959
1960 auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1961 if (pName && pName->GetValue() == "Pages")
1962 // Pages inside pages: recurse.
1963 visitPages(pKidObject, rRet);
1964 else
1965 // Found an actual page.
1966 rRet.push_back(pKidObject);
1967 }
1968
1969 pPages->setVisiting(false);
1970}
1971
1973{
1974 PDFReferenceElement* pRoot = nullptr;
1975
1976 PDFTrailerElement* pTrailer = nullptr;
1977 if (!m_aTrailerOffsets.empty())
1978 {
1979 // Get access to the latest trailer, and work with the keys of that
1980 // one.
1981 auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1982 if (it != m_aOffsetTrailers.end())
1983 pTrailer = it->second;
1984 }
1985
1986 if (pTrailer)
1987 pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1988 else if (m_pXRefStream)
1989 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1990
1991 if (!pRoot)
1992 {
1993 SAL_WARN("vcl.filter", "PDFDocument::GetCatalog: trailer has no Root key");
1994 return nullptr;
1995 }
1996
1997 return pRoot->LookupObject();
1998}
1999
2000std::vector<PDFObjectElement*> PDFDocument::GetPages()
2001{
2002 std::vector<PDFObjectElement*> aRet;
2003
2004 PDFObjectElement* pCatalog = GetCatalog();
2005 if (!pCatalog)
2006 {
2007 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
2008 return aRet;
2009 }
2010
2011 PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
2012 if (!pPages)
2013 {
2014 SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
2015 << ") has no pages");
2016 return aRet;
2017 }
2018
2019 visitPages(pPages, aRet);
2020
2021 return aRet;
2022}
2023
2024void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
2025
2026std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
2027{
2028 std::vector<PDFObjectElement*> aRet;
2029
2030 std::vector<PDFObjectElement*> aPages = GetPages();
2031
2032 for (const auto& pPage : aPages)
2033 {
2034 if (!pPage)
2035 continue;
2036
2037 PDFElement* pAnnotsElement = pPage->Lookup("Annots");
2038 auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
2039 if (!pAnnots)
2040 {
2041 // Annots is not an array, see if it's a reference to an object
2042 // with a direct array.
2043 auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
2044 if (pAnnotsRef)
2045 {
2046 if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
2047 {
2048 pAnnots = pAnnotsObject->GetArray();
2049 }
2050 }
2051 }
2052
2053 if (!pAnnots)
2054 continue;
2055
2056 for (const auto& pAnnot : pAnnots->GetElements())
2057 {
2058 auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
2059 if (!pReference)
2060 continue;
2061
2062 PDFObjectElement* pAnnotObject = pReference->LookupObject();
2063 if (!pAnnotObject)
2064 continue;
2065
2066 auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
2067 if (!pFT || pFT->GetValue() != "Sig")
2068 continue;
2069
2070 aRet.push_back(pAnnotObject);
2071 }
2072 }
2073
2074 return aRet;
2075}
2076
2077std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
2078{
2079 return svl::crypto::DecodeHexString(pElement->GetValue());
2080}
2081
2083{
2084 std::vector<unsigned char> const encoded(DecodeHexString(&rElement));
2085 // Text strings can be PDF-DocEncoding or UTF-16BE with mandatory BOM;
2086 // only the latter supported is here
2087 if (encoded.size() < 2 || encoded[0] != 0xFE || encoded[1] != 0xFF || (encoded.size() & 1) != 0)
2088 {
2089 return {};
2090 }
2091 OUStringBuffer buf(encoded.size() - 2);
2092 for (size_t i = 2; i < encoded.size(); i += 2)
2093 {
2094 buf.append(sal_Unicode((static_cast<sal_uInt16>(encoded[i]) << 8) | encoded[i + 1]));
2095 }
2096 return buf.makeStringAndClear();
2097}
2098
2100 : m_rDoc(rDoc)
2101{
2102}
2103
2105{
2106 // Read from (including) the % char till (excluding) the end of the line/stream.
2107 OStringBuffer aBuf;
2108 char ch;
2109 rStream.ReadChar(ch);
2110 while (true)
2111 {
2112 if (ch == '\n' || ch == '\r' || rStream.eof())
2113 {
2114 m_aComment = aBuf.makeStringAndClear();
2115
2116 if (m_aComment.startsWith("%%EOF"))
2117 {
2118 sal_uInt64 nPos = rStream.Tell();
2119 if (ch == '\r')
2120 {
2121 rStream.ReadChar(ch);
2122 rStream.SeekRel(-1);
2123 // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
2124 // behavior.
2125 if (ch == '\n')
2126 {
2127 nPos += 1;
2128 }
2129 }
2131 }
2132
2133 SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
2134 return true;
2135 }
2136 aBuf.append(ch);
2137 rStream.ReadChar(ch);
2138 }
2139
2140 return false;
2141}
2142
2144
2146{
2147 OStringBuffer aBuf;
2148 m_nOffset = rStream.Tell();
2149 char ch;
2150 rStream.ReadChar(ch);
2151 if (rStream.eof())
2152 {
2153 return false;
2154 }
2155 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+' && ch != '.')
2156 {
2157 rStream.SeekRel(-1);
2158 return false;
2159 }
2160 while (!rStream.eof())
2161 {
2162 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+'
2163 && ch != '.')
2164 {
2165 rStream.SeekRel(-1);
2166 m_nLength = rStream.Tell() - m_nOffset;
2168 aBuf.setLength(0);
2169 SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2170 return true;
2171 }
2172 aBuf.append(ch);
2173 rStream.ReadChar(ch);
2174 }
2175
2176 return false;
2177}
2178
2179sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2180
2181sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2182
2183bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2184
2185bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2186
2188{
2189 char ch;
2190 rStream.ReadChar(ch);
2191 if (ch != '<')
2192 {
2193 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2194 return false;
2195 }
2196 rStream.ReadChar(ch);
2197
2198 OStringBuffer aBuf;
2199 while (!rStream.eof())
2200 {
2201 if (ch == '>')
2202 {
2203 m_aValue = aBuf.makeStringAndClear();
2204 SAL_INFO("vcl.filter",
2205 "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2206 return true;
2207 }
2208 aBuf.append(ch);
2209 rStream.ReadChar(ch);
2210 }
2211
2212 return false;
2213}
2214
2215const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2216
2218{
2219 char nPrevCh = 0;
2220 char ch = 0;
2221 rStream.ReadChar(ch);
2222 if (ch != '(')
2223 {
2224 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2225 return false;
2226 }
2227 nPrevCh = ch;
2228 rStream.ReadChar(ch);
2229
2230 // Start with 1 nesting level as we read a '(' above already.
2231 int nDepth = 1;
2232 OStringBuffer aBuf;
2233 while (!rStream.eof())
2234 {
2235 if (ch == '(' && nPrevCh != '\\')
2236 ++nDepth;
2237
2238 if (ch == ')' && nPrevCh != '\\')
2239 --nDepth;
2240
2241 if (nDepth == 0)
2242 {
2243 // ')' of the outermost '(' is reached.
2244 m_aValue = aBuf.makeStringAndClear();
2245 SAL_INFO("vcl.filter",
2246 "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2247 return true;
2248 }
2249 aBuf.append(ch);
2250 nPrevCh = ch;
2251 rStream.ReadChar(ch);
2252 }
2253
2254 return false;
2255}
2256
2257const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2258
2260 : m_rDoc(rDoc)
2261 , m_pDictionaryElement(nullptr)
2262{
2263}
2264
2266{
2267 m_nOffset = rStream.Tell();
2268 return true;
2269}
2270
2271PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2272{
2274 {
2276 aParser.parse(this);
2277 }
2279 return nullptr;
2280 return m_pDictionaryElement->LookupElement(rDictionaryKey);
2281}
2282
2283sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2284
2285double PDFNumberElement::GetValue() const { return m_fValue; }
2286
2287PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2288 : m_rDoc(rDoc)
2289 , m_fObjectValue(fObjectValue)
2290 , m_fGenerationValue(fGenerationValue)
2291 , m_pNumberElement(nullptr)
2292 , m_nDictionaryOffset(0)
2293 , m_nDictionaryLength(0)
2294 , m_pDictionaryElement(nullptr)
2295 , m_nArrayOffset(0)
2296 , m_nArrayLength(0)
2297 , m_pArrayElement(nullptr)
2298 , m_pStreamElement(nullptr)
2299 , m_bParsed(false)
2300{
2301}
2302
2304{
2305 SAL_INFO("vcl.filter",
2306 "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2307 return true;
2308}
2309
2311
2312PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2313 const OString& rKey)
2314{
2315 auto it = rDictionary.find(rKey);
2316 if (it == rDictionary.end())
2317 return nullptr;
2318
2319 return it->second;
2320}
2321
2323{
2324 auto pKey = dynamic_cast<PDFReferenceElement*>(
2325 PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2326 if (!pKey)
2327 {
2328 SAL_WARN("vcl.filter",
2329 "PDFDictionaryElement::LookupObject: no such key with reference value: "
2330 << rDictionaryKey);
2331 return nullptr;
2332 }
2333
2334 return pKey->LookupObject();
2335}
2336
2338{
2339 return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2340}
2341
2343{
2344 if (m_bParsed)
2345 return;
2346
2347 if (!m_aElements.empty())
2348 {
2349 // This is a stored object in an object stream.
2351 aParser.parse(this);
2352 }
2353 else
2354 {
2355 // Normal object: elements are stored as members of the document itself.
2357 aParser.parse(this);
2358 }
2359 m_bParsed = true;
2360}
2361
2362PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2363{
2366 return nullptr;
2367 return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey);
2368}
2369
2371{
2372 auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2373 if (!pKey)
2374 {
2375 SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2376 << rDictionaryKey);
2377 return nullptr;
2378 }
2379
2380 return pKey->LookupObject();
2381}
2382
2384
2385void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2386{
2387 m_nDictionaryOffset = nDictionaryOffset;
2388}
2389
2391{
2393 return m_nDictionaryOffset;
2394}
2395
2396void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2397
2399
2400void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2401{
2402 m_aDictionaryKeyOffset[rKey] = nOffset;
2403}
2404
2405void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2406{
2408}
2409
2410sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2411{
2412 auto it = m_aDictionaryKeyOffset.find(rKey);
2413 if (it == m_aDictionaryKeyOffset.end())
2414 return 0;
2415
2416 return it->second;
2417}
2418
2419sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2420{
2421 auto it = m_aDictionaryKeyValueLength.find(rKey);
2422 if (it == m_aDictionaryKeyValueLength.end())
2423 return 0;
2424
2425 return it->second;
2426}
2427
2428const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2429
2430void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2431{
2432 m_nDictionaryLength = nDictionaryLength;
2433}
2434
2436{
2438 return m_nDictionaryLength;
2439}
2440
2441void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2442
2444
2446{
2448 return m_pDictionaryElement;
2449}
2450
2452{
2453 m_pDictionaryElement = pDictionaryElement;
2454}
2455
2457{
2458 m_pNumberElement = pNumberElement;
2459}
2460
2462
2463const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2464{
2466}
2467
2469{
2470 m_aDictionaryReferences.push_back(pReference);
2471}
2472
2473const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2474{
2477}
2478
2479void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2480
2482{
2483 m_pStreamElement = pStreamElement;
2484}
2485
2487
2489{
2491 return m_pArrayElement;
2492}
2493
2495{
2496 if (!m_pStreamElement)
2497 {
2498 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2499 return;
2500 }
2501
2502 auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2503 if (!pType || pType->GetValue() != "ObjStm")
2504 {
2505 if (!pType)
2506 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2507 else
2508 SAL_WARN("vcl.filter",
2509 "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2510 return;
2511 }
2512
2513 auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2514 if (!pFilter || pFilter->GetValue() != "FlateDecode")
2515 {
2516 if (!pFilter)
2517 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2518 else
2519 SAL_WARN("vcl.filter",
2520 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2521 return;
2522 }
2523
2524 auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2525 if (!pFirst)
2526 {
2527 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2528 return;
2529 }
2530
2531 auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2532 if (!pN)
2533 {
2534 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2535 return;
2536 }
2537 size_t nN = pN->GetValue();
2538
2539 auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2540 if (!pLength)
2541 {
2542 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2543 return;
2544 }
2545 size_t nLength = pLength->GetValue();
2546
2547 // Read and decompress it.
2548 SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2549 rEditBuffer.Seek(m_pStreamElement->GetOffset());
2550 std::vector<char> aBuf(nLength);
2551 rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2552 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2553 SvMemoryStream aStream;
2554 ZCodec aZCodec;
2555 aZCodec.BeginCompression();
2556 aZCodec.Decompress(aSource, aStream);
2557 if (!aZCodec.EndCompression())
2558 {
2559 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2560 return;
2561 }
2562
2563 nLength = aStream.TellEnd();
2564 aStream.Seek(0);
2565 std::vector<size_t> aObjNums;
2566 std::vector<size_t> aOffsets;
2567 std::vector<size_t> aLengths;
2568 // First iterate over and find out the lengths.
2569 for (size_t nObject = 0; nObject < nN; ++nObject)
2570 {
2571 PDFNumberElement aObjNum;
2572 if (!aObjNum.Read(aStream))
2573 {
2574 SAL_WARN("vcl.filter",
2575 "PDFObjectElement::ParseStoredObjects: failed to read object number");
2576 return;
2577 }
2578 aObjNums.push_back(aObjNum.GetValue());
2579
2581
2582 PDFNumberElement aByteOffset;
2583 if (!aByteOffset.Read(aStream))
2584 {
2585 SAL_WARN("vcl.filter",
2586 "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2587 return;
2588 }
2589 aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2590
2591 if (aOffsets.size() > 1)
2592 aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2593 if (nObject + 1 == nN)
2594 aLengths.push_back(nLength - aOffsets.back());
2595
2597 }
2598
2599 // Now create streams with the proper length and tokenize the data.
2600 for (size_t nObject = 0; nObject < nN; ++nObject)
2601 {
2602 size_t nObjNum = aObjNums[nObject];
2603 size_t nOffset = aOffsets[nObject];
2604 size_t nLen = aLengths[nObject];
2605
2606 aStream.Seek(nOffset);
2607 m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2608 PDFObjectElement* pStored = m_aStoredElements.back().get();
2609
2610 aBuf.clear();
2611 aBuf.resize(nLen);
2612 aStream.ReadBytes(aBuf.data(), aBuf.size());
2613 SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2614
2616 pStored);
2617 // This is how references know the object is stored inside this object stream.
2618 m_rDoc.SetIDObject(nObjNum, pStored);
2619
2620 // Store the stream of the object in the object stream for later use.
2621 std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2622 aStoredStream.Seek(0);
2623 pStreamBuffer->WriteStream(aStoredStream);
2624 pStored->SetStreamBuffer(pStreamBuffer);
2625 }
2626}
2627
2628std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2629{
2630 return m_aElements;
2631}
2632
2634
2635void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2636{
2637 m_pStreamBuffer = std::move(pStreamBuffer);
2638}
2639
2641
2643 PDFNumberElement const& rGeneration)
2644 : m_rDoc(rDoc)
2645 , m_fObjectValue(rObject.GetValue())
2646 , m_fGenerationValue(rGeneration.GetValue())
2647 , m_rObject(rObject)
2648{
2649}
2650
2652
2654{
2655 SAL_INFO("vcl.filter",
2656 "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2657 m_nOffset = rStream.Tell();
2658 return true;
2659}
2660
2661sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2662
2664{
2665 size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2666 if (nOffset == 0)
2667 {
2668 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2669 << m_fObjectValue);
2670 return 0;
2671 }
2672
2673 sal_uInt64 nOrigPos = rStream.Tell();
2674 comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2675
2676 rStream.Seek(nOffset);
2677 {
2679 PDFNumberElement aNumber;
2680 bool bRet = aNumber.Read(rStream);
2681 if (!bRet || aNumber.GetValue() != m_fObjectValue)
2682 {
2683 SAL_WARN("vcl.filter",
2684 "PDFReferenceElement::LookupNumber: offset points to not matching object");
2685 return 0;
2686 }
2687 }
2688
2689 {
2691 PDFNumberElement aNumber;
2692 bool bRet = aNumber.Read(rStream);
2693 if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2694 {
2695 SAL_WARN("vcl.filter",
2696 "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2697 return 0;
2698 }
2699 }
2700
2701 {
2703 OString aKeyword = PDFDocument::ReadKeyword(rStream);
2704 if (aKeyword != "obj")
2705 {
2706 SAL_WARN("vcl.filter",
2707 "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2708 return 0;
2709 }
2710 }
2711
2713 PDFNumberElement aNumber;
2714 if (!aNumber.Read(rStream))
2715 {
2716 SAL_WARN("vcl.filter",
2717 "PDFReferenceElement::LookupNumber: failed to read referenced number");
2718 return 0;
2719 }
2720
2721 return aNumber.GetValue();
2722}
2723
2725{
2727}
2728
2730{
2731 auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2732
2733 if (itIDObjects != m_aIDObjects.end())
2734 return itIDObjects->second;
2735
2736 SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2737 return nullptr;
2738}
2739
2741
2743
2745
2747{
2748 char ch;
2749 rStream.ReadChar(ch);
2750 if (ch != '<')
2751 {
2752 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2753 return false;
2754 }
2755
2756 if (rStream.eof())
2757 {
2758 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2759 return false;
2760 }
2761
2762 rStream.ReadChar(ch);
2763 if (ch != '<')
2764 {
2765 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2766 return false;
2767 }
2768
2769 m_nLocation = rStream.Tell();
2770
2771 SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2772
2773 return true;
2774}
2775
2777
2779
2781{
2782 m_nLocation = rStream.Tell();
2783 char ch;
2784 rStream.ReadChar(ch);
2785 if (ch != '>')
2786 {
2787 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2788 return false;
2789 }
2790
2791 if (rStream.eof())
2792 {
2793 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2794 return false;
2795 }
2796
2797 rStream.ReadChar(ch);
2798 if (ch != '>')
2799 {
2800 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2801 return false;
2802 }
2803
2804 SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2805
2806 return true;
2807}
2808
2810
2812{
2813 char ch;
2814 rStream.ReadChar(ch);
2815 if (ch != '/')
2816 {
2817 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2818 return false;
2819 }
2820 m_nLocation = rStream.Tell();
2821
2822 if (rStream.eof())
2823 {
2824 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2825 return false;
2826 }
2827
2828 // Read till the first white-space.
2829 OStringBuffer aBuf;
2830 rStream.ReadChar(ch);
2831 while (!rStream.eof())
2832 {
2833 if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2834 || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2835 {
2836 rStream.SeekRel(-1);
2837 m_aValue = aBuf.makeStringAndClear();
2838 SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2839 return true;
2840 }
2841 aBuf.append(ch);
2842 rStream.ReadChar(ch);
2843 }
2844
2845 return false;
2846}
2847
2848const OString& PDFNameElement::GetValue() const { return m_aValue; }
2849
2850sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2851
2854 , m_nOffset(0)
2855{
2856}
2857
2859{
2860 SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2861 m_nOffset = rStream.Tell();
2862 std::vector<unsigned char> aBytes(m_nLength);
2863 rStream.ReadBytes(aBytes.data(), aBytes.size());
2864 m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2865
2866 return rStream.good();
2867}
2868
2870
2871sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2872
2873bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2874
2875bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2876
2878 : m_pObject(pObject)
2879{
2880}
2881
2883{
2884 char ch;
2885 rStream.ReadChar(ch);
2886 if (ch != '[')
2887 {
2888 SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2889 return false;
2890 }
2891
2892 SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2893
2894 return true;
2895}
2896
2898{
2899 if (m_pObject)
2900 SAL_INFO("vcl.filter",
2901 "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2902 m_aElements.push_back(pElement);
2903}
2904
2905const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2906
2908
2910{
2911 m_nOffset = rStream.Tell();
2912 char ch;
2913 rStream.ReadChar(ch);
2914 if (ch != ']')
2915 {
2916 SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2917 return false;
2918 }
2919
2920 SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
2921
2922 return true;
2923}
2924
2925sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
2926
2927// PDFObjectParser
2928
2929size_t PDFObjectParser::parse(PDFElement* pParsingElement, size_t nStartIndex, int nCurrentDepth)
2930{
2931 // The index of last parsed element
2932 size_t nReturnIndex = 0;
2933
2934 pParsingElement->setParsing(true);
2935
2936 comphelper::ScopeGuard aGuard([pParsingElement]() { pParsingElement->setParsing(false); });
2937
2938 // Current object, if root is an object, else nullptr
2939 auto pParsingObject = dynamic_cast<PDFObjectElement*>(pParsingElement);
2940 auto pParsingTrailer = dynamic_cast<PDFTrailerElement*>(pParsingElement);
2941
2942 // Current dictionary, if root is an dictionary, else nullptr
2943 auto pParsingDictionary = dynamic_cast<PDFDictionaryElement*>(pParsingElement);
2944
2945 // Current parsing array, if root is an array, else nullptr
2946 auto pParsingArray = dynamic_cast<PDFArrayElement*>(pParsingElement);
2947
2948 // Find out where the dictionary for this object starts.
2949 size_t nIndex = nStartIndex;
2950 for (size_t i = nStartIndex; i < mrElements.size(); ++i)
2951 {
2952 if (mrElements[i].get() == pParsingElement)
2953 {
2954 nIndex = i;
2955 break;
2956 }
2957 }
2958
2959 OString aName;
2960 sal_uInt64 nNameOffset = 0;
2961 std::vector<PDFNumberElement*> aNumbers;
2962
2963 sal_uInt64 nDictionaryOffset = 0;
2964
2965 // Current depth; 1 is current
2966 int nDepth = 0;
2967
2968 for (size_t i = nIndex; i < mrElements.size(); ++i)
2969 {
2970 auto* pCurrentElement = mrElements[i].get();
2971
2972 // Dictionary tokens can be nested, track enter/leave.
2973 if (auto pCurrentDictionary = dynamic_cast<PDFDictionaryElement*>(pCurrentElement))
2974 {
2975 // Handle previously stored number
2976 if (!aNumbers.empty())
2977 {
2978 if (pParsingDictionary)
2979 {
2980 PDFNumberElement* pNumber = aNumbers.back();
2981 sal_uInt64 nLength
2982 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
2983
2984 pParsingDictionary->insert(aName, pNumber);
2985 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
2986 pParsingDictionary->SetKeyValueLength(aName, nLength);
2987 }
2988 else if (pParsingArray)
2989 {
2990 for (auto& pNumber : aNumbers)
2991 pParsingArray->PushBack(pNumber);
2992 }
2993 else
2994 {
2995 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
2996 }
2997 aName.clear();
2998 aNumbers.clear();
2999 }
3000
3001 nDepth++;
3002
3003 if (nDepth == 1) // pParsingDictionary is the current one
3004 {
3005 // First dictionary start, track start offset.
3006 nDictionaryOffset = pCurrentDictionary->GetLocation();
3007
3008 if (pParsingObject)
3009 {
3010 // Then the toplevel dictionary of the object.
3011 pParsingObject->SetDictionary(pCurrentDictionary);
3012 pParsingObject->SetDictionaryOffset(nDictionaryOffset);
3013 pParsingDictionary = pCurrentDictionary;
3014 }
3015 else if (pParsingTrailer)
3016 {
3017 pParsingTrailer->SetDictionary(pCurrentDictionary);
3018 pParsingDictionary = pCurrentDictionary;
3019 }
3020 }
3021 else if (!pCurrentDictionary->alreadyParsing())
3022 {
3023 if (pParsingArray)
3024 {
3025 pParsingArray->PushBack(pCurrentDictionary);
3026 }
3027 else if (pParsingDictionary)
3028 {
3029 // Dictionary toplevel value.
3030 pParsingDictionary->insert(aName, pCurrentDictionary);
3031 }
3032 else
3033 {
3034 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3035 }
3036 // Nested dictionary.
3037 const size_t nNextElementIndex = parse(pCurrentDictionary, i, nCurrentDepth + 1);
3038 i = std::max(i, nNextElementIndex - 1);
3039 }
3040 }
3041 else if (auto pCurrentEndDictionary
3042 = dynamic_cast<PDFEndDictionaryElement*>(pCurrentElement))
3043 {
3044 // Handle previously stored number
3045 if (!aNumbers.empty())
3046 {
3047 if (pParsingDictionary)
3048 {
3049 PDFNumberElement* pNumber = aNumbers.back();
3050 sal_uInt64 nLength
3051 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3052
3053 pParsingDictionary->insert(aName, pNumber);
3054 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3055 pParsingDictionary->SetKeyValueLength(aName, nLength);
3056 }
3057 else if (pParsingArray)
3058 {
3059 for (auto& pNumber : aNumbers)
3060 pParsingArray->PushBack(pNumber);
3061 }
3062 else
3063 {
3064 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3065 }
3066 aName.clear();
3067 aNumbers.clear();
3068 }
3069
3070 if (pParsingDictionary)
3071 {
3072 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3073 sal_uInt64 nLength = pCurrentEndDictionary->GetLocation() - nNameOffset + 2;
3074 pParsingDictionary->SetKeyValueLength(aName, nLength);
3075 aName.clear();
3076 }
3077
3078 if (nDepth == 1) // did the parsing ended
3079 {
3080 // Last dictionary end, track length and stop parsing.
3081 if (pParsingObject)
3082 {
3083 sal_uInt64 nDictionaryLength
3084 = pCurrentEndDictionary->GetLocation() - nDictionaryOffset;
3085 pParsingObject->SetDictionaryLength(nDictionaryLength);
3086 }
3087 nReturnIndex = i;
3088 break;
3089 }
3090
3091 nDepth--;
3092 }
3093 else if (auto pCurrentArray = dynamic_cast<PDFArrayElement*>(pCurrentElement))
3094 {
3095 // Handle previously stored number
3096 if (!aNumbers.empty())
3097 {
3098 if (pParsingDictionary)
3099 {
3100 PDFNumberElement* pNumber = aNumbers.back();
3101
3102 sal_uInt64 nLength
3103 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3104 pParsingDictionary->insert(aName, pNumber);
3105 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3106 pParsingDictionary->SetKeyValueLength(aName, nLength);
3107 }
3108 else if (pParsingArray)
3109 {
3110 for (auto& pNumber : aNumbers)
3111 pParsingArray->PushBack(pNumber);
3112 }
3113 else
3114 {
3115 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3116 }
3117 aName.clear();
3118 aNumbers.clear();
3119 }
3120
3121 nDepth++;
3122 if (nDepth == 1) // pParsingDictionary is the current one
3123 {
3124 if (pParsingObject)
3125 {
3126 pParsingObject->SetArray(pCurrentArray);
3127 pParsingArray = pCurrentArray;
3128 }
3129 }
3130 else if (!pCurrentArray->alreadyParsing())
3131 {
3132 if (pParsingArray)
3133 {
3134 // Array is toplevel
3135 pParsingArray->PushBack(pCurrentArray);
3136 }
3137 else if (pParsingDictionary)
3138 {
3139 // Dictionary toplevel value.
3140 pParsingDictionary->insert(aName, pCurrentArray);
3141 }
3142
3143 const size_t nNextElementIndex = parse(pCurrentArray, i, nCurrentDepth + 1);
3144
3145 // ensure we go forwards and not endlessly loop
3146 i = std::max(i, nNextElementIndex - 1);
3147 }
3148 }
3149 else if (auto pCurrentEndArray = dynamic_cast<PDFEndArrayElement*>(pCurrentElement))
3150 {
3151 // Handle previously stored number
3152 if (!aNumbers.empty())
3153 {
3154 if (pParsingDictionary)
3155 {
3156 PDFNumberElement* pNumber = aNumbers.back();
3157
3158 sal_uInt64 nLength
3159 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3160 pParsingDictionary->insert(aName, pNumber);
3161 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3162 pParsingDictionary->SetKeyValueLength(aName, nLength);
3163 }
3164 else if (pParsingArray)
3165 {
3166 for (auto& pNumber : aNumbers)
3167 pParsingArray->PushBack(pNumber);
3168 }
3169 else
3170 {
3171 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3172 }
3173 aName.clear();
3174 aNumbers.clear();
3175 }
3176
3177 if (nDepth == 1) // did the pParsing ended
3178 {
3179 // Last array end, track length and stop parsing.
3180 nReturnIndex = i;
3181 break;
3182 }
3183
3184 if (pParsingDictionary)
3185 {
3186 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3187 // Include the ending ']' in the length of the key - (array)value pair length.
3188 sal_uInt64 nLength = pCurrentEndArray->GetOffset() - nNameOffset + 1;
3189 pParsingDictionary->SetKeyValueLength(aName, nLength);
3190 aName.clear();
3191 }
3192 nDepth--;
3193 }
3194 else if (auto pCurrentName = dynamic_cast<PDFNameElement*>(pCurrentElement))
3195 {
3196 // Handle previously stored number
3197 if (!aNumbers.empty())
3198 {
3199 if (pParsingDictionary)
3200 {
3201 PDFNumberElement* pNumber = aNumbers.back();
3202
3203 sal_uInt64 nLength
3204 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3205 pParsingDictionary->insert(aName, pNumber);
3206 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3207 pParsingDictionary->SetKeyValueLength(aName, nLength);
3208 }
3209 else if (pParsingArray)
3210 {
3211 for (auto& pNumber : aNumbers)
3212 pParsingArray->PushBack(pNumber);
3213 }
3214 aName.clear();
3215 aNumbers.clear();
3216 }
3217
3218 // Now handle name
3219 if (pParsingArray)
3220 {
3221 // if we are in an array, just push the name to array
3222 pParsingArray->PushBack(pCurrentName);
3223 }
3224 else if (pParsingDictionary)
3225 {
3226 // if we are in a dictionary, we need to store the name as a possible key
3227 if (aName.isEmpty())
3228 {
3229 aName = pCurrentName->GetValue();
3230 nNameOffset = pCurrentName->GetLocation();
3231 }
3232 else
3233 {
3234 sal_uInt64 nKeyLength
3235 = pCurrentName->GetLocation() + pCurrentName->GetLength() - nNameOffset;
3236 pParsingDictionary->insert(aName, pCurrentName);
3237 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3238 pParsingDictionary->SetKeyValueLength(aName, nKeyLength);
3239 aName.clear();
3240 }
3241 }
3242 }
3243 else if (auto pReference = dynamic_cast<PDFReferenceElement*>(pCurrentElement))
3244 {
3245 // Handle previously stored number
3246 if (aNumbers.size() > 2)
3247 {
3248 aNumbers.resize(aNumbers.size() - 2);
3249 if (pParsingArray)
3250 {
3251 for (auto& pNumber : aNumbers)
3252 pParsingArray->PushBack(pNumber);
3253 }
3254 aNumbers.clear();
3255 }
3256
3257 if (pParsingArray)
3258 {
3259 pParsingArray->PushBack(pReference);
3260 }
3261 else if (pParsingDictionary)
3262 {
3263 sal_uInt64 nLength = pReference->GetOffset() - nNameOffset;
3264 pParsingDictionary->insert(aName, pReference);
3265 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3266 pParsingDictionary->SetKeyValueLength(aName, nLength);
3267 aName.clear();
3268 }
3269 else
3270 {
3271 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3272 }
3273 aNumbers.clear();
3274 }
3275 else if (auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(pCurrentElement))
3276 {
3277 // Handle previously stored number
3278 if (!aNumbers.empty())
3279 {
3280 if (pParsingArray)
3281 {
3282 for (auto& pNumber : aNumbers)
3283 pParsingArray->PushBack(pNumber);
3284 }
3285 aNumbers.clear();
3286 }
3287
3288 if (pParsingArray)
3289 {
3290 pParsingArray->PushBack(pLiteralString);
3291 }
3292 else if (pParsingDictionary)
3293 {
3294 pParsingDictionary->insert(aName, pLiteralString);
3295 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3296 aName.clear();
3297 }
3298 else
3299 {
3300 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3301 }
3302 }
3303 else if (auto pBoolean = dynamic_cast<PDFBooleanElement*>(pCurrentElement))
3304 {
3305 // Handle previously stored number
3306 if (!aNumbers.empty())
3307 {
3308 if (pParsingArray)
3309 {
3310 for (auto& pNumber : aNumbers)
3311 pParsingArray->PushBack(pNumber);
3312 }
3313 aNumbers.clear();
3314 }
3315
3316 if (pParsingArray)
3317 {
3318 pParsingArray->PushBack(pBoolean);
3319 }
3320 else if (pParsingDictionary)
3321 {
3322 pParsingDictionary->insert(aName, pBoolean);
3323 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3324 aName.clear();
3325 }
3326 else
3327 {
3328 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3329 }
3330 }
3331 else if (auto pHexString = dynamic_cast<PDFHexStringElement*>(pCurrentElement))
3332 {
3333 // Handle previously stored number
3334 if (!aNumbers.empty())
3335 {
3336 if (pParsingArray)
3337 {
3338 for (auto& pNumber : aNumbers)
3339 pParsingArray->PushBack(pNumber);
3340 }
3341 aNumbers.clear();
3342 }
3343
3344 if (pParsingArray)
3345 {
3346 pParsingArray->PushBack(pHexString);
3347 }
3348 else if (pParsingDictionary)
3349 {
3350 pParsingDictionary->insert(aName, pHexString);
3351 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3352 aName.clear();
3353 }
3354 }
3355 else if (auto pNumberElement = dynamic_cast<PDFNumberElement*>(pCurrentElement))
3356 {
3357 // Just remember this, so that in case it's not a reference parameter,
3358 // we can handle it later.
3359 aNumbers.push_back(pNumberElement);
3360 }
3361 else if (dynamic_cast<PDFEndObjectElement*>(pCurrentElement))
3362 {
3363 // parsing of the object is finished
3364 break;
3365 }
3366 else if (dynamic_cast<PDFObjectElement*>(pCurrentElement)
3367 || dynamic_cast<PDFTrailerElement*>(pCurrentElement))
3368 {
3369 continue;
3370 }
3371 else
3372 {
3373 SAL_INFO("vcl.filter", "Unhandled element while parsing.");
3374 }
3375 }
3376
3377 return nReturnIndex;
3378}
3379
3380} // namespace vcl
3381
3382/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const sal_Int32 m_nLength
const char * pName
const void * GetData()
sal_uInt64 GetSize()
virtual sal_uInt64 TellEnd() override
sal_uInt64 Tell() const
bool good() const
SvStream & WriteNumberAsString(N n)
std::size_t WriteBytes(const void *pData, std::size_t nSize)
bool eof() const
bool SetStreamSize(sal_uInt64 nSize)
SvStream & WriteOString(std::string_view rStr)
SvStream & ReadChar(char &rChar)
sal_uInt64 Seek(sal_uInt64 nPos)
std::size_t ReadBytes(void *pData, std::size_t nSize)
sal_uInt64 SeekRel(sal_Int64 nPos)
SvStream & WriteStream(SvStream &rStream)
tools::Long Decompress(SvStream &rIStm, SvStream &rOStm)
tools::Long EndCompression()
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
void Compress(SvStream &rIStm, SvStream &rOStm)
void AddDataRange(const void *pData, sal_Int32 size)
bool Sign(OStringBuffer &rCMSHexBuffer)
tools::Long getOpenHeight() const
void setWidth(tools::Long n)
void setHeight(tools::Long n)
tools::Long getOpenWidth() const
Copies objects from one PDF file into another one.
static sal_Int32 copyPageStreams(std::vector< filter::PDFObjectElement * > &rContentStreams, SvMemoryStream &rStream, bool &rCompressed)
Copies page one or more page streams from rContentStreams into rStream.
void copyPageResources(filter::PDFObjectElement *pPage, OStringBuffer &rLine)
Copies resources of pPage into rLine.
static void AppendUnicodeTextString(const OUString &rString, OStringBuffer &rBuffer)
Write rString as a PDF hex string into rBuffer.
static OString GetDateTime()
Get current date/time in PDF D:YYYYMMDDHHMMSS form.
Array object: a list.
PDFObjectElement * m_pObject
The object that contains this array.
const std::vector< PDFElement * > & GetElements() const
bool Read(SvStream &rStream) override
void PushBack(PDFElement *pElement)
PDFArrayElement(PDFObjectElement *pObject)
std::vector< PDFElement * > m_aElements
Boolean object: a 'true' or a 'false'.
bool Read(SvStream &rStream) override
PDFCommentElement(PDFDocument &rDoc)
bool Read(SvStream &rStream) override
Dictionary object: a set key-value pairs.
static PDFElement * Lookup(const std::map< OString, PDFElement * > &rDictionary, const OString &rKey)
sal_uInt64 GetKeyOffset(const OString &rKey) const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
sal_uInt64 m_nLocation
Offset after the '<<' token.
bool Read(SvStream &rStream) override
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
PDFElement * LookupElement(const OString &rDictionaryKey)
Looks up an element which is contained in this dictionary.
const std::map< OString, PDFElement * > & GetItems() const
void SetKeyValueLength(const OString &rKey, sal_uInt64 nLength)
void SetKeyOffset(const OString &rKey, sal_uInt64 nOffset)
sal_uInt64 GetKeyValueLength(const OString &rKey) const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
Looks up an object which is only referenced in this dictionary.
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
In-memory representation of an on-disk PDF document.
PDFObjectElement * m_pXRefStream
When m_pTrailer is nullptr, this can still have a dictionary.
static OUString DecodeHexStringUTF16BE(PDFHexStringElement const &rElement)
bool RemoveSignature(size_t nPosition)
Remove the nth signature from read document in the edit buffer.
Definition: pdfdocument.cxx:45
PDFTrailerElement * m_pTrailer
sal_Int32 createObject() override
See vcl::PDFObjectContainer::createObject().
Definition: pdfdocument.cxx:70
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
bool writeBufferBytes(const void *pBuffer, sal_uInt64 nBytes) override
See vcl::PDFObjectContainer::writeBuffer().
Definition: pdfdocument.cxx:92
std::vector< size_t > m_aEOFs
List of EOF offsets we know.
std::map< size_t, PDFObjectElement * > m_aOffsetObjects
Object offset <-> Object pointer map.
std::vector< size_t > m_aTrailerOffsets
Offsets of trailers, from latest to oldest.
bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement *&pRoot)
Write the updated Catalog object as part of signing.
void SetSignatureLine(std::vector< sal_Int8 > &&rSignatureLine)
Definition: pdfdocument.cxx:98
bool Sign(const css::uno::Reference< css::security::XCertificate > &xCertificate, const OUString &rDescription, bool bAdES)
Sign the read document with xCertificate in the edit buffer.
void PushBackEOF(size_t nOffset)
Remember the end location of an EOF token.
bool Read(SvStream &rStream)
Read elements from the start of the stream till its end.
static size_t FindStartXRef(SvStream &rStream)
sal_uInt32 GetNextSignature()
Suggest a minimal, yet free signature ID to use for the next signature.
bool ReadWithPossibleFixup(SvStream &rStream)
Calls Read() first and if it fails it tries to fixup and then retry.
bool WritePageObject(PDFObjectElement &rFirstPage, sal_Int32 nAnnotId)
Write the updated Page object as part of signing.
static OString ReadKeyword(SvStream &rStream)
bool Tokenize(SvStream &rStream, TokenizeMode eMode, std::vector< std::unique_ptr< PDFElement > > &rElements, PDFObjectElement *pObjectElement)
Tokenize elements from current offset.
void ReadXRef(SvStream &rStream)
size_t GetObjectOffset(size_t nIndex) const
static void SkipWhitespace(SvStream &rStream)
PDFObjectElement * LookupObject(size_t nObjectNumber)
Look up object based on object number, possibly by parsing object streams.
static void SkipLineBreaks(SvStream &rStream)
Instead of all whitespace, just skip CR and NL characters.
PDFObjectElement * GetCatalog()
size_t m_nSignaturePage
0-based page number where m_aSignatureLine should be placed.
sal_Int32 WriteAnnotObject(PDFObjectElement const &rFirstPage, sal_Int32 nSignatureId, sal_Int32 nAppearanceId, const tools::Rectangle &rSignatureRectangle)
Write the annot object as part of signing.
bool updateObject(sal_Int32 n) override
See vcl::PDFObjectContainer::updateObject().
Definition: pdfdocument.cxx:77
std::vector< PDFObjectElement * > GetSignatureWidgets()
Get a list of signatures embedded into this document.
sal_Int32 WriteAppearanceObject(tools::Rectangle &rSignatureRectangle)
Write the appearance object as part of signing.
void SetIDObject(size_t nID, PDFObjectElement *pObject)
Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID.
bool Write(SvStream &rStream)
Serializes the contents of the edit buffer.
std::vector< PDFObjectElement * > GetPages()
const std::vector< std::unique_ptr< PDFElement > > & GetElements() const
void ReadXRefStream(SvStream &rStream)
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const *pRoot)
Write the updated cross-references as part of signing.
std::map< size_t, XRefEntry > m_aXRef
Object ID <-> object offset map.
std::vector< std::unique_ptr< PDFElement > > m_aElements
This vector owns all elements.
sal_Int32 WriteSignatureObject(const OUString &rDescription, bool bAdES, sal_uInt64 &rLastByteRangeOffset, sal_Int64 &rContentOffset)
Write the signature object as part of signing.
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
static std::vector< unsigned char > DecodeHexString(PDFHexStringElement const *pElement)
Decode a hex dump.
void SetSignaturePage(size_t nPage)
std::vector< size_t > m_aStartXRefs
List of xref offsets we know.
std::map< size_t, PDFTrailerElement * > m_aOffsetTrailers
Trailer offset <-> Trailer pointer map.
std::vector< sal_Int8 > m_aSignatureLine
Signature line in PDF format, to be consumed by the next Sign() invocation.
A byte range in a PDF file.
Definition: pdfdocument.hxx:51
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:59
bool alreadyVisiting() const
Definition: pdfdocument.hxx:60
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:61
bool Read(SvStream &rStream) override
sal_uInt64 m_nOffset
Location before the ']' token.
End of a dictionary: '>>'.
bool Read(SvStream &rStream) override
sal_uInt64 m_nLocation
Offset before the '>>' token.
End of an object: 'endobj' keyword.
bool Read(SvStream &rStream) override
End of a stream: 'endstream' keyword.
bool Read(SvStream &rStream) override
Hex string: in <AABB> form.
const OString & GetValue() const
bool Read(SvStream &rStream) override
Literal string: in (asdf) form.
bool Read(SvStream &rStream) override
const OString & GetValue() const
Name object: a key string.
bool Read(SvStream &rStream) override
sal_uInt64 m_nLocation
Offset after the '/' token.
sal_uInt64 GetLocation() const
const OString & GetValue() const
Null object: the 'null' singleton.
bool Read(SvStream &rStream) override
Numbering object: an integer or a real.
sal_uInt64 GetLocation() const
sal_uInt64 m_nLength
Input file token length.
sal_uInt64 GetLength() const
bool Read(SvStream &rStream) override
sal_uInt64 m_nOffset
Input file start location.
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:69
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:86
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
Definition: pdfdocument.hxx:92
void SetNumberElement(PDFNumberElement *pNumberElement)
const std::vector< PDFReferenceElement * > & GetDictionaryReferences() const
void SetDictionaryLength(sal_uInt64 nDictionaryLength)
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:77
PDFElement * Lookup(const OString &rDictionaryKey)
void SetArrayOffset(sal_uInt64 nArrayOffset)
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:75
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
Definition: pdfdocument.hxx:90
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:80
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:79
void SetStream(PDFStreamElement *pStreamElement)
bool Read(SvStream &rStream) override
void SetArrayLength(sal_uInt64 nArrayLength)
sal_uInt64 GetArrayOffset() const
SvMemoryStream * GetStreamBuffer() const
void SetArray(PDFArrayElement *pArrayElement)
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
Definition: pdfdocument.hxx:97
PDFArrayElement * GetArray()
std::vector< std::unique_ptr< PDFElement > > & GetStoredElements()
void SetStreamBuffer(std::unique_ptr< SvMemoryStream > &pStreamBuffer)
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
Definition: pdfdocument.hxx:94
PDFObjectElement(PDFDocument &rDoc, double fObjectValue, double fGenerationValue)
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:82
void AddDictionaryReference(PDFReferenceElement *pReference)
void ParseStoredObjects()
Parse objects stored in this object stream.
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:84
PDFNumberElement * GetNumberElement() const
sal_uInt64 GetArrayLength() const
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:71
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
void SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
Definition: pdfdocument.hxx:88
PDFDictionaryElement * GetDictionary()
const std::vector< std::unique_ptr< PDFElement > > & mrElements
size_t parse(PDFElement *pParsingElement, size_t nStartIndex=0, int nCurrentDepth=0)
Reference object: something with a unique ID.
sal_uInt64 m_nOffset
Location after the 'R' token.
PDFReferenceElement(PDFDocument &rDoc, PDFNumberElement &rObject, PDFNumberElement const &rGeneration)
PDFNumberElement & GetObjectElement() const
double LookupNumber(SvStream &rStream) const
Assuming the reference points to a number object, return its value.
bool Read(SvStream &rStream) override
PDFNumberElement & m_rObject
The element providing the object number.
PDFObjectElement * LookupObject()
Lookup referenced object, without assuming anything about its contents.
Stream object: a byte array with a known length.
SvMemoryStream & GetMemory()
sal_uInt64 GetOffset() const
SvMemoryStream m_aMemory
The byte array itself.
bool Read(SvStream &rStream) override
The trailer singleton is at the end of the doc.
sal_uInt64 GetLocation() const
PDFDictionaryElement * m_pDictionaryElement
sal_uInt64 m_nOffset
Location of the end of the trailer token.
PDFTrailerElement(PDFDocument &rDoc)
bool Read(SvStream &rStream) override
PDFElement * Lookup(const OString &rDictionaryKey)
An entry in a cross-reference stream.
void SetDirty(bool bDirty)
void SetOffset(sal_uInt64 nOffset)
XRefEntryType GetType() const
sal_uInt64 GetOffset() const
void SetType(XRefEntryType eType)
sal_Int32 nElements
#define MAX_SIGNATURE_CONTENT_LENGTH
const char * pS
SwDoc & m_rDoc
EmbeddedObjectRef * pObject
sal_Int32 nIndex
OUString aName
Mode eMode
sal_uInt16 nPos
#define SAL_WARN_IF(condition, area, stream)
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
aBuf
size
OStringBuffer & padToLength(OStringBuffer &rBuffer, sal_Int32 nLength, char cFill='\0')
int i
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
double toDouble(std::u16string_view str)
sal_uInt32 toUInt32(std::u16string_view str, sal_Int16 radix=10)
std::vector< unsigned char > DecodeHexString(std::string_view rHex)
css::uno::Reference< css::linguistic2::XProofreadingIterator > get(css::uno::Reference< css::uno::XComponentContext > const &context)
@ COMPRESSED
xref stream "2".
@ FREE
xref "f" or xref stream "0".
@ NOT_COMPRESSED
xref "n" or xref stream "1".
static void visitPages(PDFObjectElement *pPages, std::vector< PDFObjectElement * > &rRet)
Visits the page tree recursively, looking for page objects.
@ STORED_OBJECT
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
@ EOF_TOKEN
Till the first %EOF token.
@ END_OF_OBJECT
Till the end of the current object.
bool convertToHighestSupported(SvStream &rInStream, SvStream &rOutStream)
Converts to highest supported format version (1.6).
Definition: pdfcompat.cxx:43
const char GetValue[]
QPRO_FUNC_TYPE nType
#define STREAM_SEEK_TO_END
sal_uInt16 sal_Unicode
std::unique_ptr< char[]> aBuffer
sal_Int32 nLength