LibreOffice Module vcl (master) 1
pdfdocument.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
11
12#include <map>
13#include <memory>
14#include <vector>
15
16#include <com/sun/star/uno/Sequence.hxx>
17#include <com/sun/star/security/XCertificate.hpp>
18
20#include <comphelper/string.hxx>
21#include <o3tl/string_view.hxx>
22#include <rtl/character.hxx>
23#include <rtl/strbuf.hxx>
24#include <rtl/string.hxx>
25#include <sal/log.hxx>
26#include <sal/types.h>
27#include <svl/cryptosign.hxx>
28#include <tools/zcodec.hxx>
29#include <vcl/pdfwriter.hxx>
30#include <o3tl/safeint.hxx>
31
32#include <pdf/objectcopier.hxx>
33
34using namespace com::sun::star;
35
36namespace vcl::filter
37{
38XRefEntry::XRefEntry() = default;
39
40PDFDocument::PDFDocument() = default;
41
43
44bool PDFDocument::RemoveSignature(size_t nPosition)
45{
46 std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
47 if (nPosition >= aSignatures.size())
48 {
49 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
50 return false;
51 }
52
53 if (aSignatures.size() != m_aEOFs.size() - 1)
54 {
55 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
56 "and incremental updates");
57 return false;
58 }
59
60 // The EOF offset is the end of the original file, without the signature at
61 // nPosition.
62 m_aEditBuffer.Seek(m_aEOFs[nPosition]);
63 // Drop all bytes after the current position.
65
66 return m_aEditBuffer.good();
67}
68
70{
71 sal_Int32 nObject = m_aXRef.size();
72 m_aXRef[nObject] = XRefEntry();
73 return nObject;
74}
75
76bool PDFDocument::updateObject(sal_Int32 nObject)
77{
78 if (o3tl::make_unsigned(nObject) >= m_aXRef.size())
79 {
80 SAL_WARN("vcl.filter", "PDFDocument::updateObject: invalid nObject");
81 return false;
82 }
83
84 XRefEntry aEntry;
86 aEntry.SetDirty(true);
87 m_aXRef[nObject] = aEntry;
88 return true;
89}
90
91bool PDFDocument::writeBuffer(const void* pBuffer, sal_uInt64 nBytes)
92{
93 std::size_t nWritten = m_aEditBuffer.WriteBytes(pBuffer, nBytes);
94 return nWritten == nBytes;
95}
96
97void PDFDocument::SetSignatureLine(std::vector<sal_Int8>&& rSignatureLine)
98{
99 m_aSignatureLine = std::move(rSignatureLine);
100}
101
102void PDFDocument::SetSignaturePage(size_t nPage) { m_nSignaturePage = nPage; }
103
105{
106 sal_uInt32 nRet = 0;
107 for (const auto& pSignature : GetSignatureWidgets())
108 {
109 auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
110 if (!pT)
111 continue;
112
113 const OString& rValue = pT->GetValue();
114 const OString aPrefix = "Signature";
115 if (!rValue.startsWith(aPrefix))
116 continue;
117
118 nRet = std::max(nRet, o3tl::toUInt32(rValue.subView(aPrefix.getLength())));
119 }
120
121 return nRet + 1;
122}
123
124sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
125 sal_uInt64& rLastByteRangeOffset,
126 sal_Int64& rContentOffset)
127{
128 // Write signature object.
129 sal_Int32 nSignatureId = m_aXRef.size();
130 XRefEntry aSignatureEntry;
131 aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
132 aSignatureEntry.SetDirty(true);
133 m_aXRef[nSignatureId] = aSignatureEntry;
134 OStringBuffer aSigBuffer;
135 aSigBuffer.append(nSignatureId);
136 aSigBuffer.append(" 0 obj\n");
137 aSigBuffer.append("<</Contents <");
138 rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
139 // Reserve space for the PKCS#7 object.
140 OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
142 aSigBuffer.append(aContentFiller);
143 aSigBuffer.append(">\n/Type/Sig/SubFilter");
144 if (bAdES)
145 aSigBuffer.append("/ETSI.CAdES.detached");
146 else
147 aSigBuffer.append("/adbe.pkcs7.detached");
148
149 // Time of signing.
150 aSigBuffer.append(" /M (");
151 aSigBuffer.append(vcl::PDFWriter::GetDateTime());
152 aSigBuffer.append(")");
153
154 // Byte range: we can write offset1-length1 and offset2 right now, will
155 // write length2 later.
156 aSigBuffer.append(" /ByteRange [ 0 ");
157 // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
158 aSigBuffer.append(rContentOffset - 1);
159 aSigBuffer.append(" ");
160 aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
161 aSigBuffer.append(" ");
162 rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
163 // We don't know how many bytes we need for the last ByteRange value, this
164 // should be enough.
165 OStringBuffer aByteRangeFiller;
166 comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
167 aSigBuffer.append(aByteRangeFiller);
168 // Finish the Sig obj.
169 aSigBuffer.append(" /Filter/Adobe.PPKMS");
170
171 if (!rDescription.isEmpty())
172 {
173 aSigBuffer.append("/Reason<");
174 vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
175 aSigBuffer.append(">");
176 }
177
178 aSigBuffer.append(" >>\nendobj\n\n");
179 m_aEditBuffer.WriteOString(aSigBuffer);
180
181 return nSignatureId;
182}
183
185{
186 PDFDocument aPDFDocument;
187 filter::PDFObjectElement* pPage = nullptr;
188 std::vector<filter::PDFObjectElement*> aContentStreams;
189
190 if (!m_aSignatureLine.empty())
191 {
192 // Parse the PDF data of signature line: we can set the signature rectangle to non-empty
193 // based on it.
194 SvMemoryStream aPDFStream;
195 aPDFStream.WriteBytes(m_aSignatureLine.data(), m_aSignatureLine.size());
196 aPDFStream.Seek(0);
197 if (!aPDFDocument.Read(aPDFStream))
198 {
199 SAL_WARN("vcl.filter",
200 "PDFDocument::WriteAppearanceObject: failed to read the PDF document");
201 return -1;
202 }
203
204 std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
205 if (aPages.empty())
206 {
207 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no pages");
208 return -1;
209 }
210
211 pPage = aPages[0];
212 if (!pPage)
213 {
214 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no page");
215 return -1;
216 }
217
218 // Calculate the bounding box.
219 PDFElement* pMediaBox = pPage->Lookup("MediaBox");
220 auto pMediaBoxArray = dynamic_cast<PDFArrayElement*>(pMediaBox);
221 if (!pMediaBoxArray || pMediaBoxArray->GetElements().size() < 4)
222 {
223 SAL_WARN("vcl.filter",
224 "PDFDocument::WriteAppearanceObject: MediaBox is not an array of 4");
225 return -1;
226 }
227 const std::vector<PDFElement*>& rMediaBoxElements = pMediaBoxArray->GetElements();
228 auto pWidth = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[2]);
229 if (!pWidth)
230 {
231 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no width");
232 return -1;
233 }
234 rSignatureRectangle.setWidth(pWidth->GetValue());
235 auto pHeight = dynamic_cast<PDFNumberElement*>(rMediaBoxElements[3]);
236 if (!pHeight)
237 {
238 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: MediaBox has no height");
239 return -1;
240 }
241 rSignatureRectangle.setHeight(pHeight->GetValue());
242
243 if (PDFObjectElement* pContentStream = pPage->LookupObject("Contents"))
244 {
245 aContentStreams.push_back(pContentStream);
246 }
247
248 if (aContentStreams.empty())
249 {
250 SAL_WARN("vcl.filter", "PDFDocument::WriteAppearanceObject: no content stream");
251 return -1;
252 }
253 }
254 m_aSignatureLine.clear();
255
256 // Write appearance object: allocate an ID.
257 sal_Int32 nAppearanceId = m_aXRef.size();
258 m_aXRef[nAppearanceId] = XRefEntry();
259
260 // Write the object content.
261 SvMemoryStream aEditBuffer;
262 aEditBuffer.WriteUInt32AsString(nAppearanceId);
263 aEditBuffer.WriteCharPtr(" 0 obj\n");
264 aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
265
266 PDFObjectCopier aCopier(*this);
267 if (!aContentStreams.empty())
268 {
269 assert(pPage && "aContentStreams is only filled if there was a pPage");
270 OStringBuffer aBuffer;
271 aCopier.copyPageResources(pPage, aBuffer);
272 aEditBuffer.WriteOString(aBuffer);
273 }
274
275 aEditBuffer.WriteCharPtr("/BBox[0 0 ");
276 aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
277 aEditBuffer.WriteCharPtr(" ");
278 aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
279 aEditBuffer.WriteCharPtr("]\n/Length ");
280
281 // Add the object to the doc-level edit buffer and update the offset.
282 SvMemoryStream aStream;
283 bool bCompressed = false;
284 sal_Int32 nLength = 0;
285 if (!aContentStreams.empty())
286 {
287 nLength = PDFObjectCopier::copyPageStreams(aContentStreams, aStream, bCompressed);
288 }
289 aEditBuffer.WriteOString(OString::number(nLength));
290 if (bCompressed)
291 {
292 aEditBuffer.WriteOString(" /Filter/FlateDecode");
293 }
294
295 aEditBuffer.WriteCharPtr("\n>>\n");
296
297 aEditBuffer.WriteCharPtr("stream\n");
298
299 // Copy the original page streams to the form XObject stream.
300 aStream.Seek(0);
301 aEditBuffer.WriteStream(aStream);
302
303 aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
304
305 aEditBuffer.Seek(0);
306 XRefEntry aAppearanceEntry;
307 aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
308 aAppearanceEntry.SetDirty(true);
309 m_aXRef[nAppearanceId] = aAppearanceEntry;
310 m_aEditBuffer.WriteStream(aEditBuffer);
311
312 return nAppearanceId;
313}
314
315sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
316 sal_Int32 nAppearanceId,
317 const tools::Rectangle& rSignatureRectangle)
318{
319 // Decide what identifier to use for the new signature.
320 sal_uInt32 nNextSignature = GetNextSignature();
321
322 // Write the Annot object, references nSignatureId and nAppearanceId.
323 sal_Int32 nAnnotId = m_aXRef.size();
324 XRefEntry aAnnotEntry;
325 aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
326 aAnnotEntry.SetDirty(true);
327 m_aXRef[nAnnotId] = aAnnotEntry;
329 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
330 m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
331 m_aEditBuffer.WriteCharPtr("/Rect[0 0 ");
332 m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getWidth()));
334 m_aEditBuffer.WriteOString(OString::number(rSignatureRectangle.getHeight()));
336 m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
339 m_aEditBuffer.WriteCharPtr(" 0 R\n");
340 m_aEditBuffer.WriteCharPtr("/T(Signature");
341 m_aEditBuffer.WriteUInt32AsString(nNextSignature);
345 m_aEditBuffer.WriteCharPtr(" 0 R\n");
348 m_aEditBuffer.WriteCharPtr(" 0 R\n");
349 m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
350 m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
351 m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
352 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
353
354 return nAnnotId;
355}
356
357bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
358{
359 PDFElement* pAnnots = rFirstPage.Lookup("Annots");
360 auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
361 if (pAnnotsReference)
362 {
363 // Write the updated Annots key of the Page object.
364 PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
365 if (!pAnnotsObject)
366 {
367 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
368 return false;
369 }
370
371 sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
372 m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
373 m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
374 m_aXRef[nAnnotsId].SetDirty(true);
376 m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
377
378 // Write existing references.
379 PDFArrayElement* pArray = pAnnotsObject->GetArray();
380 if (!pArray)
381 {
382 SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
383 return false;
384 }
385
386 for (size_t i = 0; i < pArray->GetElements().size(); ++i)
387 {
388 auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
389 if (!pReference)
390 continue;
391
392 if (i)
394 m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
396 }
397 // Write our reference.
401
402 m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
403 }
404 else
405 {
406 // Write the updated first page object, references nAnnotId.
407 sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
408 if (nFirstPageId >= m_aXRef.size())
409 {
410 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
411 return false;
412 }
413 m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
414 m_aXRef[nFirstPageId].SetDirty(true);
416 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
418 auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
419 if (!pAnnotsArray)
420 {
421 // No Annots key, just write the key with a single reference.
422 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
423 + rFirstPage.GetDictionaryOffset(),
424 rFirstPage.GetDictionaryLength());
425 m_aEditBuffer.WriteCharPtr("/Annots[");
428 }
429 else
430 {
431 // Annots key is already there, insert our reference at the end.
432 PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
433
434 // Offset right before the end of the Annots array.
435 sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
436 + pDictionary->GetKeyValueLength("Annots") - 1;
437 // Length of beginning of the dictionary -> Annots end.
438 sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
439 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
440 + rFirstPage.GetDictionaryOffset(),
441 nAnnotsBeforeEndLength);
445 // Length of Annots end -> end of the dictionary.
446 sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
447 + rFirstPage.GetDictionaryLength()
448 - nAnnotsEndOffset;
449 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
450 + nAnnotsEndOffset,
451 nAnnotsAfterEndLength);
452 }
454 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
455 }
456
457 return true;
458}
459
461{
462 if (m_pXRefStream)
463 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
464 else
465 {
466 if (!m_pTrailer)
467 {
468 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
469 return false;
470 }
471 pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
472 }
473 if (!pRoot)
474 {
475 SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
476 return false;
477 }
478 PDFObjectElement* pCatalog = pRoot->LookupObject();
479 if (!pCatalog)
480 {
481 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
482 return false;
483 }
484 sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
485 if (nCatalogId >= m_aXRef.size())
486 {
487 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
488 return false;
489 }
490 PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
491 auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
492 if (pAcroFormReference)
493 {
494 // Write the updated AcroForm key of the Catalog object.
495 PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
496 if (!pAcroFormObject)
497 {
498 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
499 return false;
500 }
501
502 sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
503 m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
504 m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
505 m_aXRef[nAcroFormId].SetDirty(true);
507 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
508
509 // If this is nullptr, then the AcroForm object is not in an object stream.
510 SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
511
512 if (!pAcroFormObject->Lookup("Fields"))
513 {
514 SAL_WARN("vcl.filter",
515 "PDFDocument::Sign: AcroForm object without required Fields key");
516 return false;
517 }
518
519 PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
520 if (!pAcroFormDictionary)
521 {
522 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
523 return false;
524 }
525
526 // Offset right before the end of the Fields array.
527 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
528 + pAcroFormDictionary->GetKeyValueLength("Fields")
529 - strlen("]");
530
531 // Length of beginning of the object dictionary -> Fields end.
532 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
533 if (pStreamBuffer)
534 m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
535 else
536 {
537 nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
539 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
540 + pAcroFormObject->GetDictionaryOffset(),
541 nFieldsBeforeEndLength);
542 }
543
544 // Append our reference at the end of the Fields array.
548
549 // Length of Fields end -> end of the object dictionary.
550 if (pStreamBuffer)
551 {
552 sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
553 m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
554 + nFieldsEndOffset,
555 nFieldsAfterEndLength);
556 }
557 else
558 {
559 sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
560 + pAcroFormObject->GetDictionaryLength()
561 - nFieldsEndOffset;
562 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
563 + nFieldsEndOffset,
564 nFieldsAfterEndLength);
566 }
567
568 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
569 }
570 else
571 {
572 // Write the updated Catalog object, references nAnnotId.
573 auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
574 m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
575 m_aXRef[nCatalogId].SetDirty(true);
577 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
579 if (!pAcroFormDictionary)
580 {
581 // No AcroForm key, assume no signatures.
582 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
583 + pCatalog->GetDictionaryOffset(),
584 pCatalog->GetDictionaryLength());
585 m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
587 m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
588 }
589 else
590 {
591 // AcroForm key is already there, insert our reference at the Fields end.
592 auto it = pAcroFormDictionary->GetItems().find("Fields");
593 if (it == pAcroFormDictionary->GetItems().end())
594 {
595 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
596 return false;
597 }
598
599 auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
600 if (!pFields)
601 {
602 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
603 return false;
604 }
605
606 // Offset right before the end of the Fields array.
607 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
608 + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
609 // Length of beginning of the Catalog dictionary -> Fields end.
610 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
611 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
612 + pCatalog->GetDictionaryOffset(),
613 nFieldsBeforeEndLength);
617 // Length of Fields end -> end of the Catalog dictionary.
618 sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
619 + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
620 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
621 + nFieldsEndOffset,
622 nFieldsAfterEndLength);
623 }
624 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
625 }
626
627 return true;
628}
629
630void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
631{
632 if (m_pXRefStream)
633 {
634 // Write the xref stream.
635 // This is a bit meta: the xref stream stores its own offset.
636 sal_Int32 nXRefStreamId = m_aXRef.size();
637 XRefEntry aXRefStreamEntry;
638 aXRefStreamEntry.SetOffset(nXRefOffset);
639 aXRefStreamEntry.SetDirty(true);
640 m_aXRef[nXRefStreamId] = aXRefStreamEntry;
641
642 // Write stream data.
643 SvMemoryStream aXRefStream;
644 const size_t nOffsetLen = 3;
645 // 3 additional bytes: predictor, the first and the third field.
646 const size_t nLineLength = nOffsetLen + 3;
647 // This is the line as it appears before tweaking according to the predictor.
648 std::vector<unsigned char> aOrigLine(nLineLength);
649 // This is the previous line.
650 std::vector<unsigned char> aPrevLine(nLineLength);
651 // This is the line as written to the stream.
652 std::vector<unsigned char> aFilteredLine(nLineLength);
653 for (const auto& rXRef : m_aXRef)
654 {
655 const XRefEntry& rEntry = rXRef.second;
656
657 if (!rEntry.GetDirty())
658 continue;
659
660 // Predictor.
661 size_t nPos = 0;
662 // PNG prediction: up (on all rows).
663 aOrigLine[nPos++] = 2;
664
665 // First field.
666 unsigned char nType = 0;
667 switch (rEntry.GetType())
668 {
670 nType = 0;
671 break;
673 nType = 1;
674 break;
676 nType = 2;
677 break;
678 }
679 aOrigLine[nPos++] = nType;
680
681 // Second field.
682 for (size_t i = 0; i < nOffsetLen; ++i)
683 {
684 size_t nByte = nOffsetLen - i - 1;
685 // Fields requiring more than one byte are stored with the
686 // high-order byte first.
687 unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
688 aOrigLine[nPos++] = nCh;
689 }
690
691 // Third field.
692 aOrigLine[nPos++] = 0;
693
694 // Now apply the predictor.
695 aFilteredLine[0] = aOrigLine[0];
696 for (size_t i = 1; i < nLineLength; ++i)
697 {
698 // Count the delta vs the previous line.
699 aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
700 // Remember the new reference.
701 aPrevLine[i] = aOrigLine[i];
702 }
703
704 aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
705 }
706
707 m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
709 " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
710
711 // ID.
712 auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
713 if (pID)
714 {
715 const std::vector<PDFElement*>& rElements = pID->GetElements();
716 m_aEditBuffer.WriteCharPtr("/ID [ <");
717 for (size_t i = 0; i < rElements.size(); ++i)
718 {
719 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
720 if (!pIDString)
721 continue;
722
723 m_aEditBuffer.WriteOString(pIDString->GetValue());
724 if ((i + 1) < rElements.size())
726 }
728 }
729
730 // Index.
731 m_aEditBuffer.WriteCharPtr("/Index [ ");
732 for (const auto& rXRef : m_aXRef)
733 {
734 if (!rXRef.second.GetDirty())
735 continue;
736
739 }
741
742 // Info.
743 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
744 if (pInfo)
745 {
746 m_aEditBuffer.WriteCharPtr("/Info ");
747 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
749 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
751 }
752
753 // Length.
754 m_aEditBuffer.WriteCharPtr("/Length ");
755 {
756 ZCodec aZCodec;
757 aZCodec.BeginCompression();
758 aXRefStream.Seek(0);
759 SvMemoryStream aStream;
760 aZCodec.Compress(aXRefStream, aStream);
761 aZCodec.EndCompression();
762 aXRefStream.Seek(0);
763 aXRefStream.SetStreamSize(0);
764 aStream.Seek(0);
765 aXRefStream.WriteStream(aStream);
766 }
768
769 if (!m_aStartXRefs.empty())
770 {
771 // Write location of the previous cross-reference section.
772 m_aEditBuffer.WriteCharPtr("/Prev ");
774 }
775
776 // Root.
777 m_aEditBuffer.WriteCharPtr("/Root ");
782
783 // Size.
784 m_aEditBuffer.WriteCharPtr("/Size ");
786
787 m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
788 aXRefStream.Seek(0);
789 m_aEditBuffer.WriteStream(aXRefStream);
790 m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
791 }
792 else
793 {
794 // Write the xref table.
795 m_aEditBuffer.WriteCharPtr("xref\n");
796 for (const auto& rXRef : m_aXRef)
797 {
798 size_t nObject = rXRef.first;
799 size_t nOffset = rXRef.second.GetOffset();
800 if (!rXRef.second.GetDirty())
801 continue;
802
805 OStringBuffer aBuffer;
806 aBuffer.append(static_cast<sal_Int32>(nOffset));
807 while (aBuffer.getLength() < 10)
808 aBuffer.insert(0, "0");
809 if (nObject == 0)
810 aBuffer.append(" 65535 f \n");
811 else
812 aBuffer.append(" 00000 n \n");
814 }
815
816 // Write the trailer.
817 m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
819 m_aEditBuffer.WriteCharPtr("/Root ");
824 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
825 if (pInfo)
826 {
827 m_aEditBuffer.WriteCharPtr("/Info ");
828 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
830 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
832 }
833 auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
834 if (pID)
835 {
836 const std::vector<PDFElement*>& rElements = pID->GetElements();
837 m_aEditBuffer.WriteCharPtr("/ID [ <");
838 for (size_t i = 0; i < rElements.size(); ++i)
839 {
840 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
841 if (!pIDString)
842 continue;
843
844 m_aEditBuffer.WriteOString(pIDString->GetValue());
845 if ((i + 1) < rElements.size())
847 }
849 }
850
851 if (!m_aStartXRefs.empty())
852 {
853 // Write location of the previous cross-reference section.
854 m_aEditBuffer.WriteCharPtr("/Prev ");
856 }
857
859 }
860}
861
862bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
863 const OUString& rDescription, bool bAdES)
864{
867
868 sal_uInt64 nSignatureLastByteRangeOffset = 0;
869 sal_Int64 nSignatureContentOffset = 0;
870 sal_Int32 nSignatureId = WriteSignatureObject(
871 rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
872
873 tools::Rectangle aSignatureRectangle;
874 sal_Int32 nAppearanceId = WriteAppearanceObject(aSignatureRectangle);
875
876 std::vector<PDFObjectElement*> aPages = GetPages();
877 if (aPages.empty())
878 {
879 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
880 return false;
881 }
882
883 size_t nPage = 0;
884 if (m_nSignaturePage < aPages.size())
885 {
886 nPage = m_nSignaturePage;
887 }
888 if (!aPages[nPage])
889 {
890 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to find page #" << nPage);
891 return false;
892 }
893
894 PDFObjectElement& rPage = *aPages[nPage];
895 sal_Int32 nAnnotId = WriteAnnotObject(rPage, nSignatureId, nAppearanceId, aSignatureRectangle);
896
897 if (!WritePageObject(rPage, nAnnotId))
898 {
899 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
900 return false;
901 }
902
903 PDFReferenceElement* pRoot = nullptr;
904 if (!WriteCatalogObject(nAnnotId, pRoot))
905 {
906 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
907 return false;
908 }
909
910 sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
911 WriteXRef(nXRefOffset, pRoot);
912
913 // Write startxref.
914 m_aEditBuffer.WriteCharPtr("startxref\n");
916 m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
917
918 // Finalize the signature, now that we know the total file size.
919 // Calculate the length of the last byte range.
920 sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
921 sal_Int64 nLastByteRangeLength
922 = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
923 // Write the length to the buffer.
924 m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
925 OString aByteRangeBuffer = OString::number(nLastByteRangeLength) + " ]";
926 m_aEditBuffer.WriteOString(aByteRangeBuffer);
927
928 // Create the PKCS#7 object.
929 css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
930 if (!aDerEncoded.hasElements())
931 {
932 SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
933 return false;
934 }
935
937 sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
938 std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
939 m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
940
941 m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
942 sal_uInt64 nBufferSize2 = nLastByteRangeLength;
943 std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
944 m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
945
946 OStringBuffer aCMSHexBuffer;
947 svl::crypto::Signing aSigning(xCertificate);
948 aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
949 aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
950 if (!aSigning.Sign(aCMSHexBuffer))
951 {
952 SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
953 return false;
954 }
955
956 assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
957
958 m_aEditBuffer.Seek(nSignatureContentOffset);
959 m_aEditBuffer.WriteOString(aCMSHexBuffer);
960
961 return true;
962}
963
965{
967 rStream.WriteStream(m_aEditBuffer);
968 return rStream.good();
969}
970
972 std::vector<std::unique_ptr<PDFElement>>& rElements,
973 PDFObjectElement* pObjectElement)
974{
975 // Last seen object token.
976 PDFObjectElement* pObject = pObjectElement;
977 PDFNameElement* pObjectKey = nullptr;
978 PDFObjectElement* pObjectStream = nullptr;
979 bool bInXRef = false;
980 // The next number will be an xref offset.
981 bool bInStartXRef = false;
982 // Dictionary depth, so we know when we're outside any dictionaries.
983 int nDepth = 0;
984 // Last seen array token that's outside any dictionaries.
985 PDFArrayElement* pArray = nullptr;
986 // If we're inside an obj/endobj pair.
987 bool bInObject = false;
988
989 while (true)
990 {
991 char ch;
992 rStream.ReadChar(ch);
993 if (rStream.eof())
994 break;
995
996 switch (ch)
997 {
998 case '%':
999 {
1000 auto pComment = new PDFCommentElement(*this);
1001 rElements.push_back(std::unique_ptr<PDFElement>(pComment));
1002 rStream.SeekRel(-1);
1003 if (!rElements.back()->Read(rStream))
1004 {
1005 SAL_WARN("vcl.filter",
1006 "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
1007 return false;
1008 }
1009 if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
1010 && m_aEOFs.back() == rStream.Tell())
1011 {
1012 // Found EOF and partial parsing requested, we're done.
1013 return true;
1014 }
1015 break;
1016 }
1017 case '<':
1018 {
1019 // Dictionary or hex string.
1020 rStream.ReadChar(ch);
1021 rStream.SeekRel(-2);
1022 if (ch == '<')
1023 {
1024 rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
1025 ++nDepth;
1026 }
1027 else
1028 rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
1029 if (!rElements.back()->Read(rStream))
1030 {
1031 SAL_WARN("vcl.filter",
1032 "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
1033 return false;
1034 }
1035 break;
1036 }
1037 case '>':
1038 {
1039 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
1040 --nDepth;
1041 rStream.SeekRel(-1);
1042 if (!rElements.back()->Read(rStream))
1043 {
1044 SAL_WARN("vcl.filter",
1045 "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
1046 return false;
1047 }
1048 break;
1049 }
1050 case '[':
1051 {
1052 auto pArr = new PDFArrayElement(pObject);
1053 rElements.push_back(std::unique_ptr<PDFElement>(pArr));
1054 if (nDepth == 0)
1055 {
1056 // The array is attached directly, inform the object.
1057 pArray = pArr;
1058 if (pObject)
1059 {
1060 pObject->SetArray(pArray);
1061 pObject->SetArrayOffset(rStream.Tell());
1062 }
1063 }
1064 ++nDepth;
1065 rStream.SeekRel(-1);
1066 if (!rElements.back()->Read(rStream))
1067 {
1068 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
1069 return false;
1070 }
1071 break;
1072 }
1073 case ']':
1074 {
1075 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
1076 --nDepth;
1077 rStream.SeekRel(-1);
1078 if (nDepth == 0)
1079 {
1080 if (pObject)
1081 {
1082 pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1083 }
1084 }
1085 if (!rElements.back()->Read(rStream))
1086 {
1087 SAL_WARN("vcl.filter",
1088 "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1089 return false;
1090 }
1091 break;
1092 }
1093 case '/':
1094 {
1095 auto pNameElement = new PDFNameElement();
1096 rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1097 rStream.SeekRel(-1);
1098 if (!pNameElement->Read(rStream))
1099 {
1100 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1101 return false;
1102 }
1103
1104 if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1105 && pNameElement->GetValue() == "ObjStm")
1106 pObjectStream = pObject;
1107 else
1108 pObjectKey = pNameElement;
1109 break;
1110 }
1111 case '(':
1112 {
1113 rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1114 rStream.SeekRel(-1);
1115 if (!rElements.back()->Read(rStream))
1116 {
1117 SAL_WARN("vcl.filter",
1118 "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1119 return false;
1120 }
1121 break;
1122 }
1123 default:
1124 {
1125 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-' || ch == '+'
1126 || ch == '.')
1127 {
1128 // Numbering object: an integer or a real.
1129 auto pNumberElement = new PDFNumberElement();
1130 rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1131 rStream.SeekRel(-1);
1132 if (!pNumberElement->Read(rStream))
1133 {
1134 SAL_WARN("vcl.filter",
1135 "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1136 return false;
1137 }
1138 if (bInStartXRef)
1139 {
1140 bInStartXRef = false;
1141 m_aStartXRefs.push_back(pNumberElement->GetValue());
1142
1143 auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1144 if (it != m_aOffsetObjects.end())
1145 m_pXRefStream = it->second;
1146 }
1147 else if (bInObject && !nDepth && pObject)
1148 // Number element inside an object, but outside a
1149 // dictionary / array: remember it.
1150 pObject->SetNumberElement(pNumberElement);
1151 }
1152 else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1153 {
1154 // Possible keyword, like "obj".
1155 rStream.SeekRel(-1);
1156 OString aKeyword = ReadKeyword(rStream);
1157
1158 bool bObj = aKeyword == "obj";
1159 if (bObj || aKeyword == "R")
1160 {
1161 size_t nElements = rElements.size();
1162 if (nElements < 2)
1163 {
1164 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1165 "tokens before 'obj' or 'R' keyword");
1166 return false;
1167 }
1168
1169 auto pObjectNumber
1170 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1171 auto pGenerationNumber
1172 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1173 if (!pObjectNumber || !pGenerationNumber)
1174 {
1175 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1176 "generation number before 'obj' or 'R' keyword");
1177 return false;
1178 }
1179
1180 if (bObj)
1181 {
1182 pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1183 pGenerationNumber->GetValue());
1184 rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1185 m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1186 m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1187 bInObject = true;
1188 }
1189 else
1190 {
1191 auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1192 *pGenerationNumber);
1193 rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1194 if (bInObject && nDepth > 0 && pObject)
1195 // Inform the object about a new in-dictionary reference.
1196 pObject->AddDictionaryReference(pReference);
1197 }
1198 if (!rElements.back()->Read(rStream))
1199 {
1200 SAL_WARN("vcl.filter",
1201 "PDFDocument::Tokenize: PDFElement::Read() failed");
1202 return false;
1203 }
1204 }
1205 else if (aKeyword == "stream")
1206 {
1207 // Look up the length of the stream from the parent object's dictionary.
1208 size_t nLength = 0;
1209 for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1210 {
1211 // Iterate in reverse order.
1212 size_t nIndex = rElements.size() - nElement - 1;
1213 PDFElement* pElement = rElements[nIndex].get();
1214 auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1215 if (!pObj)
1216 continue;
1217
1218 PDFElement* pLookup = pObj->Lookup("Length");
1219 auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1220 if (pReference)
1221 {
1222 // Length is provided as a reference.
1223 nLength = pReference->LookupNumber(rStream);
1224 break;
1225 }
1226
1227 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1228 if (pNumber)
1229 {
1230 // Length is provided directly.
1231 nLength = pNumber->GetValue();
1232 break;
1233 }
1234
1235 SAL_WARN(
1236 "vcl.filter",
1237 "PDFDocument::Tokenize: found no Length key for stream keyword");
1238 return false;
1239 }
1240
1242 auto pStreamElement = new PDFStreamElement(nLength);
1243 if (pObject)
1244 pObject->SetStream(pStreamElement);
1245 rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1246 if (!rElements.back()->Read(rStream))
1247 {
1248 SAL_WARN("vcl.filter",
1249 "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1250 return false;
1251 }
1252 }
1253 else if (aKeyword == "endstream")
1254 {
1255 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1256 if (!rElements.back()->Read(rStream))
1257 {
1258 SAL_WARN("vcl.filter",
1259 "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1260 return false;
1261 }
1262 }
1263 else if (aKeyword == "endobj")
1264 {
1265 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1266 if (!rElements.back()->Read(rStream))
1267 {
1268 SAL_WARN("vcl.filter",
1269 "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1270 return false;
1271 }
1273 {
1274 // Found endobj and only object parsing was requested, we're done.
1275 return true;
1276 }
1277
1278 if (pObjectStream)
1279 {
1280 // We're at the end of an object stream, parse the stored objects.
1281 pObjectStream->ParseStoredObjects();
1282 pObjectStream = nullptr;
1283 pObjectKey = nullptr;
1284 }
1285 bInObject = false;
1286 }
1287 else if (aKeyword == "true" || aKeyword == "false")
1288 rElements.push_back(std::unique_ptr<PDFElement>(
1289 new PDFBooleanElement(aKeyword.toBoolean())));
1290 else if (aKeyword == "null")
1291 rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1292 else if (aKeyword == "xref")
1293 // Allow 'f' and 'n' keywords.
1294 bInXRef = true;
1295 else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1296 {
1297 }
1298 else if (aKeyword == "trailer")
1299 {
1300 auto pTrailer = new PDFTrailerElement(*this);
1301
1302 // Make it possible to find this trailer later by offset.
1303 pTrailer->Read(rStream);
1304 m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1305
1306 // When reading till the first EOF token only, remember
1307 // just the first trailer token.
1309 m_pTrailer = pTrailer;
1310 rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1311 }
1312 else if (aKeyword == "startxref")
1313 {
1314 bInStartXRef = true;
1315 }
1316 else
1317 {
1318 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1319 << aKeyword << "' keyword at byte position "
1320 << rStream.Tell());
1321 return false;
1322 }
1323 }
1324 else
1325 {
1326 auto uChar = static_cast<unsigned char>(ch);
1327 // Be more lenient and allow unexpected null char
1328 if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
1329 {
1330 SAL_WARN("vcl.filter",
1331 "PDFDocument::Tokenize: unexpected character with code "
1332 << sal_Int32(ch) << " at byte position " << rStream.Tell());
1333 return false;
1334 }
1335 SAL_WARN_IF(uChar == 0, "vcl.filter",
1336 "PDFDocument::Tokenize: unexpected null character at "
1337 << rStream.Tell() << " - ignoring");
1338 }
1339 break;
1340 }
1341 }
1342 }
1343
1344 return true;
1345}
1346
1348{
1349 m_aIDObjects[nID] = pObject;
1350}
1351
1353{
1354 // Check file magic.
1355 std::vector<sal_Int8> aHeader(5);
1356 rStream.Seek(0);
1357 rStream.ReadBytes(aHeader.data(), aHeader.size());
1358 if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1359 || aHeader[4] != '-')
1360 {
1361 SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1362 return false;
1363 }
1364
1365 // Allow later editing of the contents in-memory.
1366 rStream.Seek(0);
1367 m_aEditBuffer.WriteStream(rStream);
1368
1369 // Look up the offset of the xref table.
1370 size_t nStartXRef = FindStartXRef(rStream);
1371 SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1372 if (nStartXRef == 0)
1373 {
1374 SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1375 return false;
1376 }
1377 while (true)
1378 {
1379 rStream.Seek(nStartXRef);
1380 OString aKeyword = ReadKeyword(rStream);
1381 if (aKeyword.isEmpty())
1382 ReadXRefStream(rStream);
1383
1384 else
1385 {
1386 if (aKeyword != "xref")
1387 {
1388 SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1389 return false;
1390 }
1391 ReadXRef(rStream);
1392 if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1393 {
1394 SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1395 return false;
1396 }
1397 }
1398
1399 PDFNumberElement* pPrev = nullptr;
1400 if (m_pTrailer)
1401 {
1402 pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1403
1404 // Remember the offset of this trailer in the correct order. It's
1405 // possible that newer trailers don't have a larger offset.
1407 }
1408 else if (m_pXRefStream)
1409 pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1410 if (pPrev)
1411 nStartXRef = pPrev->GetValue();
1412
1413 // Reset state, except the edit buffer.
1414 m_aElements.clear();
1415 m_aOffsetObjects.clear();
1416 m_aIDObjects.clear();
1417 m_aStartXRefs.clear();
1418 m_aEOFs.clear();
1419 m_pTrailer = nullptr;
1420 m_pXRefStream = nullptr;
1421 if (!pPrev)
1422 break;
1423 }
1424
1425 // Then we can tokenize the stream.
1426 rStream.Seek(0);
1427 return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1428}
1429
1431{
1432 OStringBuffer aBuf;
1433 char ch;
1434 rStream.ReadChar(ch);
1435 if (rStream.eof())
1436 return {};
1437 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1438 {
1439 aBuf.append(ch);
1440 rStream.ReadChar(ch);
1441 if (rStream.eof())
1442 return aBuf.toString();
1443 }
1444 rStream.SeekRel(-1);
1445 return aBuf.toString();
1446}
1447
1449{
1450 // Find the "startxref" token, somewhere near the end of the document.
1451 std::vector<char> aBuf(1024);
1452 rStream.Seek(STREAM_SEEK_TO_END);
1453 if (rStream.Tell() > aBuf.size())
1454 rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1455 else
1456 // The document is really short, then just read it from the start.
1457 rStream.Seek(0);
1458 size_t nBeforePeek = rStream.Tell();
1459 size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1460 rStream.Seek(nBeforePeek);
1461 if (nSize != aBuf.size())
1462 aBuf.resize(nSize);
1463 OString aPrefix("startxref");
1464 // Find the last startxref at the end of the document.
1465 auto itLastValid = aBuf.end();
1466 auto it = aBuf.begin();
1467 while (true)
1468 {
1469 it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1470 if (it == aBuf.end())
1471 break;
1472
1473 itLastValid = it;
1474 ++it;
1475 }
1476 if (itLastValid == aBuf.end())
1477 {
1478 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1479 return 0;
1480 }
1481
1482 rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1483 if (rStream.eof())
1484 {
1485 SAL_WARN("vcl.filter",
1486 "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1487 return 0;
1488 }
1489
1491 PDFNumberElement aNumber;
1492 if (!aNumber.Read(rStream))
1493 return 0;
1494 return aNumber.GetValue();
1495}
1496
1498{
1499 // Look up the stream length in the object dictionary.
1500 if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1501 {
1502 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1503 return;
1504 }
1505
1506 if (m_aElements.empty())
1507 {
1508 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1509 return;
1510 }
1511
1512 PDFObjectElement* pObject = nullptr;
1513 for (const auto& pElement : m_aElements)
1514 {
1515 if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1516 {
1517 pObject = pObj;
1518 break;
1519 }
1520 }
1521 if (!pObject)
1522 {
1523 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1524 return;
1525 }
1526
1527 // So that the Prev key can be looked up later.
1529
1530 PDFElement* pLookup = pObject->Lookup("Length");
1531 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1532 if (!pNumber)
1533 {
1534 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1535 return;
1536 }
1537 sal_uInt64 nLength = pNumber->GetValue();
1538
1539 // Look up the stream offset.
1540 PDFStreamElement* pStream = nullptr;
1541 for (const auto& pElement : m_aElements)
1542 {
1543 if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1544 {
1545 pStream = pS;
1546 break;
1547 }
1548 }
1549 if (!pStream)
1550 {
1551 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1552 return;
1553 }
1554
1555 // Read and decompress it.
1556 rStream.Seek(pStream->GetOffset());
1557 std::vector<char> aBuf(nLength);
1558 rStream.ReadBytes(aBuf.data(), aBuf.size());
1559
1560 auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1561 if (!pFilter)
1562 {
1563 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1564 return;
1565 }
1566
1567 if (pFilter->GetValue() != "FlateDecode")
1568 {
1569 SAL_WARN("vcl.filter",
1570 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1571 return;
1572 }
1573
1574 int nColumns = 1;
1575 int nPredictor = 1;
1576 if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1577 {
1578 const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1579 auto it = rItems.find("Columns");
1580 if (it != rItems.end())
1581 if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1582 nColumns = pColumns->GetValue();
1583 it = rItems.find("Predictor");
1584 if (it != rItems.end())
1585 if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1586 nPredictor = pPredictor->GetValue();
1587 }
1588
1589 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1590 SvMemoryStream aStream;
1591 ZCodec aZCodec;
1592 aZCodec.BeginCompression();
1593 aZCodec.Decompress(aSource, aStream);
1594 if (!aZCodec.EndCompression())
1595 {
1596 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1597 return;
1598 }
1599
1600 // Look up the first and the last entry we need to read.
1601 auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1602 std::vector<size_t> aFirstObjects;
1603 std::vector<size_t> aNumberOfObjects;
1604 if (!pIndex)
1605 {
1606 auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1607 if (pSize)
1608 {
1609 aFirstObjects.push_back(0);
1610 aNumberOfObjects.push_back(pSize->GetValue());
1611 }
1612 else
1613 {
1614 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1615 return;
1616 }
1617 }
1618 else
1619 {
1620 const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1621 size_t nFirstObject = 0;
1622 for (size_t i = 0; i < rIndexElements.size(); ++i)
1623 {
1624 if (i % 2 == 0)
1625 {
1626 auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1627 if (!pFirstObject)
1628 {
1629 SAL_WARN("vcl.filter",
1630 "PDFDocument::ReadXRefStream: Index has no first object");
1631 return;
1632 }
1633 nFirstObject = pFirstObject->GetValue();
1634 continue;
1635 }
1636
1637 auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1638 if (!pNumberOfObjects)
1639 {
1640 SAL_WARN("vcl.filter",
1641 "PDFDocument::ReadXRefStream: Index has no number of objects");
1642 return;
1643 }
1644 aFirstObjects.push_back(nFirstObject);
1645 aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1646 }
1647 }
1648
1649 // Look up the format of a single entry.
1650 const int nWSize = 3;
1651 auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1652 if (!pW || pW->GetElements().size() < nWSize)
1653 {
1654 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1655 return;
1656 }
1657 int aW[nWSize];
1658 // First character is the (kind of) repeated predictor.
1659 int nLineLength = 1;
1660 for (size_t i = 0; i < nWSize; ++i)
1661 {
1662 auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1663 if (!pI)
1664 {
1665 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1666 return;
1667 }
1668 aW[i] = pI->GetValue();
1669 nLineLength += aW[i];
1670 }
1671
1672 if (nPredictor > 1 && nLineLength - 1 != nColumns)
1673 {
1674 SAL_WARN("vcl.filter",
1675 "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1676 return;
1677 }
1678
1679 aStream.Seek(0);
1680 for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1681 {
1682 size_t nFirstObject = aFirstObjects[nSubSection];
1683 size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1684
1685 // This is the line as read from the stream.
1686 std::vector<unsigned char> aOrigLine(nLineLength);
1687 // This is the line as it appears after tweaking according to nPredictor.
1688 std::vector<unsigned char> aFilteredLine(nLineLength);
1689 for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1690 {
1691 size_t nIndex = nFirstObject + nEntry;
1692
1693 aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1694 if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1695 {
1696 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1697 "inconsistent with /DecodeParms/Predictor for object #"
1698 << nIndex);
1699 return;
1700 }
1701
1702 for (int i = 0; i < nLineLength; ++i)
1703 {
1704 switch (nPredictor)
1705 {
1706 case 1:
1707 // No prediction.
1708 break;
1709 case 12:
1710 // PNG prediction: up (on all rows).
1711 aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1712 break;
1713 default:
1714 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1715 << nPredictor);
1716 return;
1717 }
1718 }
1719
1720 // First character is already handled above.
1721 int nPos = 1;
1722 size_t nType = 0;
1723 // Start of the current field in the stream data.
1724 int nOffset = nPos;
1725 for (; nPos < nOffset + aW[0]; ++nPos)
1726 {
1727 unsigned char nCh = aFilteredLine[nPos];
1728 nType = (nType << 8) + nCh;
1729 }
1730
1731 // Start of the object in the file stream.
1732 size_t nStreamOffset = 0;
1733 nOffset = nPos;
1734 for (; nPos < nOffset + aW[1]; ++nPos)
1735 {
1736 unsigned char nCh = aFilteredLine[nPos];
1737 nStreamOffset = (nStreamOffset << 8) + nCh;
1738 }
1739
1740 // Generation number of the object.
1741 size_t nGenerationNumber = 0;
1742 nOffset = nPos;
1743 for (; nPos < nOffset + aW[2]; ++nPos)
1744 {
1745 unsigned char nCh = aFilteredLine[nPos];
1746 nGenerationNumber = (nGenerationNumber << 8) + nCh;
1747 }
1748
1749 // Ignore invalid nType.
1750 if (nType <= 2)
1751 {
1752 if (m_aXRef.find(nIndex) == m_aXRef.end())
1753 {
1754 XRefEntry aEntry;
1755 switch (nType)
1756 {
1757 case 0:
1759 break;
1760 case 1:
1762 break;
1763 case 2:
1765 break;
1766 }
1767 aEntry.SetOffset(nStreamOffset);
1768 m_aXRef[nIndex] = aEntry;
1769 }
1770 }
1771 }
1772 }
1773}
1774
1776{
1778
1779 while (true)
1780 {
1781 PDFNumberElement aFirstObject;
1782 if (!aFirstObject.Read(rStream))
1783 {
1784 // Next token is not a number, it'll be the trailer.
1785 return;
1786 }
1787
1788 if (aFirstObject.GetValue() < 0)
1789 {
1790 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1791 return;
1792 }
1793
1795 PDFNumberElement aNumberOfEntries;
1796 if (!aNumberOfEntries.Read(rStream))
1797 {
1798 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1799 return;
1800 }
1801
1802 if (aNumberOfEntries.GetValue() < 0)
1803 {
1804 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1805 return;
1806 }
1807
1808 size_t nSize = aNumberOfEntries.GetValue();
1809 for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1810 {
1811 size_t nIndex = aFirstObject.GetValue() + nEntry;
1813 PDFNumberElement aOffset;
1814 if (!aOffset.Read(rStream))
1815 {
1816 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1817 return;
1818 }
1819
1821 PDFNumberElement aGenerationNumber;
1822 if (!aGenerationNumber.Read(rStream))
1823 {
1824 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1825 return;
1826 }
1827
1829 OString aKeyword = ReadKeyword(rStream);
1830 if (aKeyword != "f" && aKeyword != "n")
1831 {
1832 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1833 return;
1834 }
1835 // xrefs are read in reverse order, so never update an existing
1836 // offset with an older one.
1837 if (m_aXRef.find(nIndex) == m_aXRef.end())
1838 {
1839 XRefEntry aEntry;
1840 aEntry.SetOffset(aOffset.GetValue());
1841 // Initially only the first entry is dirty.
1842 if (nIndex == 0)
1843 aEntry.SetDirty(true);
1844 m_aXRef[nIndex] = aEntry;
1845 }
1847 }
1848 }
1849}
1850
1852{
1853 char ch = 0;
1854
1855 while (true)
1856 {
1857 rStream.ReadChar(ch);
1858 if (rStream.eof())
1859 break;
1860
1861 if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1862 {
1863 rStream.SeekRel(-1);
1864 return;
1865 }
1866 }
1867}
1868
1870{
1871 char ch = 0;
1872
1873 while (true)
1874 {
1875 rStream.ReadChar(ch);
1876 if (rStream.eof())
1877 break;
1878
1879 if (ch != '\n' && ch != '\r')
1880 {
1881 rStream.SeekRel(-1);
1882 return;
1883 }
1884 }
1885}
1886
1887size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1888{
1889 auto it = m_aXRef.find(nIndex);
1890 if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1891 {
1892 SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1893 << nIndex << ", but failed");
1894 return 0;
1895 }
1896
1897 return it->second.GetOffset();
1898}
1899
1900const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1901{
1902 return m_aElements;
1903}
1904
1906static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1907{
1908 auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1909 if (!pKids)
1910 {
1911 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1912 return;
1913 }
1914
1915 pPages->setVisiting(true);
1916
1917 for (const auto& pKid : pKids->GetElements())
1918 {
1919 auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1920 if (!pReference)
1921 continue;
1922
1923 PDFObjectElement* pKidObject = pReference->LookupObject();
1924 if (!pKidObject)
1925 continue;
1926
1927 // detect if visiting reenters itself
1928 if (pKidObject->alreadyVisiting())
1929 {
1930 SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1931 continue;
1932 }
1933
1934 auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1935 if (pName && pName->GetValue() == "Pages")
1936 // Pages inside pages: recurse.
1937 visitPages(pKidObject, rRet);
1938 else
1939 // Found an actual page.
1940 rRet.push_back(pKidObject);
1941 }
1942
1943 pPages->setVisiting(false);
1944}
1945
1947{
1948 PDFReferenceElement* pRoot = nullptr;
1949
1950 PDFTrailerElement* pTrailer = nullptr;
1951 if (!m_aTrailerOffsets.empty())
1952 {
1953 // Get access to the latest trailer, and work with the keys of that
1954 // one.
1955 auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1956 if (it != m_aOffsetTrailers.end())
1957 pTrailer = it->second;
1958 }
1959
1960 if (pTrailer)
1961 pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1962 else if (m_pXRefStream)
1963 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1964
1965 if (!pRoot)
1966 {
1967 SAL_WARN("vcl.filter", "PDFDocument::GetCatalog: trailer has no Root key");
1968 return nullptr;
1969 }
1970
1971 return pRoot->LookupObject();
1972}
1973
1974std::vector<PDFObjectElement*> PDFDocument::GetPages()
1975{
1976 std::vector<PDFObjectElement*> aRet;
1977
1978 PDFObjectElement* pCatalog = GetCatalog();
1979 if (!pCatalog)
1980 {
1981 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1982 return aRet;
1983 }
1984
1985 PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1986 if (!pPages)
1987 {
1988 SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
1989 << ") has no pages");
1990 return aRet;
1991 }
1992
1993 visitPages(pPages, aRet);
1994
1995 return aRet;
1996}
1997
1998void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
1999
2000std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
2001{
2002 std::vector<PDFObjectElement*> aRet;
2003
2004 std::vector<PDFObjectElement*> aPages = GetPages();
2005
2006 for (const auto& pPage : aPages)
2007 {
2008 if (!pPage)
2009 continue;
2010
2011 PDFElement* pAnnotsElement = pPage->Lookup("Annots");
2012 auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
2013 if (!pAnnots)
2014 {
2015 // Annots is not an array, see if it's a reference to an object
2016 // with a direct array.
2017 auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
2018 if (pAnnotsRef)
2019 {
2020 if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
2021 {
2022 pAnnots = pAnnotsObject->GetArray();
2023 }
2024 }
2025 }
2026
2027 if (!pAnnots)
2028 continue;
2029
2030 for (const auto& pAnnot : pAnnots->GetElements())
2031 {
2032 auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
2033 if (!pReference)
2034 continue;
2035
2036 PDFObjectElement* pAnnotObject = pReference->LookupObject();
2037 if (!pAnnotObject)
2038 continue;
2039
2040 auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
2041 if (!pFT || pFT->GetValue() != "Sig")
2042 continue;
2043
2044 aRet.push_back(pAnnotObject);
2045 }
2046 }
2047
2048 return aRet;
2049}
2050
2051std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
2052{
2053 return svl::crypto::DecodeHexString(pElement->GetValue());
2054}
2055
2057{
2058 std::vector<unsigned char> const encoded(DecodeHexString(&rElement));
2059 // Text strings can be PDF-DocEncoding or UTF-16BE with mandatory BOM;
2060 // only the latter supported is here
2061 if (encoded.size() < 2 || encoded[0] != 0xFE || encoded[1] != 0xFF || (encoded.size() & 1) != 0)
2062 {
2063 return {};
2064 }
2065 OUStringBuffer buf(encoded.size() - 2);
2066 for (size_t i = 2; i < encoded.size(); i += 2)
2067 {
2068 buf.append(sal_Unicode((static_cast<sal_uInt16>(encoded[i]) << 8) | encoded[i + 1]));
2069 }
2070 return buf.makeStringAndClear();
2071}
2072
2074 : m_rDoc(rDoc)
2075{
2076}
2077
2079{
2080 // Read from (including) the % char till (excluding) the end of the line/stream.
2081 OStringBuffer aBuf;
2082 char ch;
2083 rStream.ReadChar(ch);
2084 while (true)
2085 {
2086 if (ch == '\n' || ch == '\r' || rStream.eof())
2087 {
2088 m_aComment = aBuf.makeStringAndClear();
2089
2090 if (m_aComment.startsWith("%%EOF"))
2091 {
2092 sal_uInt64 nPos = rStream.Tell();
2093 if (ch == '\r')
2094 {
2095 rStream.ReadChar(ch);
2096 rStream.SeekRel(-1);
2097 // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
2098 // behavior.
2099 if (ch == '\n')
2100 {
2101 nPos += 1;
2102 }
2103 }
2105 }
2106
2107 SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
2108 return true;
2109 }
2110 aBuf.append(ch);
2111 rStream.ReadChar(ch);
2112 }
2113
2114 return false;
2115}
2116
2118
2120{
2121 OStringBuffer aBuf;
2122 m_nOffset = rStream.Tell();
2123 char ch;
2124 rStream.ReadChar(ch);
2125 if (rStream.eof())
2126 {
2127 return false;
2128 }
2129 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+' && ch != '.')
2130 {
2131 rStream.SeekRel(-1);
2132 return false;
2133 }
2134 while (!rStream.eof())
2135 {
2136 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '+'
2137 && ch != '.')
2138 {
2139 rStream.SeekRel(-1);
2140 m_nLength = rStream.Tell() - m_nOffset;
2142 aBuf.setLength(0);
2143 SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2144 return true;
2145 }
2146 aBuf.append(ch);
2147 rStream.ReadChar(ch);
2148 }
2149
2150 return false;
2151}
2152
2153sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2154
2155sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2156
2157bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2158
2159bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2160
2162{
2163 char ch;
2164 rStream.ReadChar(ch);
2165 if (ch != '<')
2166 {
2167 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2168 return false;
2169 }
2170 rStream.ReadChar(ch);
2171
2172 OStringBuffer aBuf;
2173 while (!rStream.eof())
2174 {
2175 if (ch == '>')
2176 {
2177 m_aValue = aBuf.makeStringAndClear();
2178 SAL_INFO("vcl.filter",
2179 "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2180 return true;
2181 }
2182 aBuf.append(ch);
2183 rStream.ReadChar(ch);
2184 }
2185
2186 return false;
2187}
2188
2189const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2190
2192{
2193 char nPrevCh = 0;
2194 char ch = 0;
2195 rStream.ReadChar(ch);
2196 if (ch != '(')
2197 {
2198 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2199 return false;
2200 }
2201 nPrevCh = ch;
2202 rStream.ReadChar(ch);
2203
2204 // Start with 1 nesting level as we read a '(' above already.
2205 int nDepth = 1;
2206 OStringBuffer aBuf;
2207 while (!rStream.eof())
2208 {
2209 if (ch == '(' && nPrevCh != '\\')
2210 ++nDepth;
2211
2212 if (ch == ')' && nPrevCh != '\\')
2213 --nDepth;
2214
2215 if (nDepth == 0)
2216 {
2217 // ')' of the outermost '(' is reached.
2218 m_aValue = aBuf.makeStringAndClear();
2219 SAL_INFO("vcl.filter",
2220 "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2221 return true;
2222 }
2223 aBuf.append(ch);
2224 nPrevCh = ch;
2225 rStream.ReadChar(ch);
2226 }
2227
2228 return false;
2229}
2230
2231const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2232
2234 : m_rDoc(rDoc)
2235 , m_pDictionaryElement(nullptr)
2236{
2237}
2238
2240{
2241 m_nOffset = rStream.Tell();
2242 return true;
2243}
2244
2245PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2246{
2248 {
2250 aParser.parse(this);
2251 }
2253 return nullptr;
2254 return m_pDictionaryElement->LookupElement(rDictionaryKey);
2255}
2256
2257sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2258
2259double PDFNumberElement::GetValue() const { return m_fValue; }
2260
2261PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2262 : m_rDoc(rDoc)
2263 , m_fObjectValue(fObjectValue)
2264 , m_fGenerationValue(fGenerationValue)
2265 , m_pNumberElement(nullptr)
2266 , m_nDictionaryOffset(0)
2267 , m_nDictionaryLength(0)
2268 , m_pDictionaryElement(nullptr)
2269 , m_nArrayOffset(0)
2270 , m_nArrayLength(0)
2271 , m_pArrayElement(nullptr)
2272 , m_pStreamElement(nullptr)
2273 , m_bParsed(false)
2274{
2275}
2276
2278{
2279 SAL_INFO("vcl.filter",
2280 "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2281 return true;
2282}
2283
2285
2286PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2287 const OString& rKey)
2288{
2289 auto it = rDictionary.find(rKey);
2290 if (it == rDictionary.end())
2291 return nullptr;
2292
2293 return it->second;
2294}
2295
2297{
2298 auto pKey = dynamic_cast<PDFReferenceElement*>(
2299 PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2300 if (!pKey)
2301 {
2302 SAL_WARN("vcl.filter",
2303 "PDFDictionaryElement::LookupObject: no such key with reference value: "
2304 << rDictionaryKey);
2305 return nullptr;
2306 }
2307
2308 return pKey->LookupObject();
2309}
2310
2312{
2313 return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2314}
2315
2317{
2318 if (m_bParsed)
2319 return;
2320
2321 if (!m_aElements.empty())
2322 {
2323 // This is a stored object in an object stream.
2325 aParser.parse(this);
2326 }
2327 else
2328 {
2329 // Normal object: elements are stored as members of the document itself.
2331 aParser.parse(this);
2332 }
2333 m_bParsed = true;
2334}
2335
2336PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2337{
2340 return nullptr;
2341 return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey);
2342}
2343
2345{
2346 auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2347 if (!pKey)
2348 {
2349 SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2350 << rDictionaryKey);
2351 return nullptr;
2352 }
2353
2354 return pKey->LookupObject();
2355}
2356
2358
2359void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2360{
2361 m_nDictionaryOffset = nDictionaryOffset;
2362}
2363
2365{
2367 return m_nDictionaryOffset;
2368}
2369
2370void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2371
2373
2374void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2375{
2376 m_aDictionaryKeyOffset[rKey] = nOffset;
2377}
2378
2379void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2380{
2382}
2383
2384sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2385{
2386 auto it = m_aDictionaryKeyOffset.find(rKey);
2387 if (it == m_aDictionaryKeyOffset.end())
2388 return 0;
2389
2390 return it->second;
2391}
2392
2393sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2394{
2395 auto it = m_aDictionaryKeyValueLength.find(rKey);
2396 if (it == m_aDictionaryKeyValueLength.end())
2397 return 0;
2398
2399 return it->second;
2400}
2401
2402const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2403
2404void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2405{
2406 m_nDictionaryLength = nDictionaryLength;
2407}
2408
2410{
2412 return m_nDictionaryLength;
2413}
2414
2415void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2416
2418
2420{
2422 return m_pDictionaryElement;
2423}
2424
2426{
2427 m_pDictionaryElement = pDictionaryElement;
2428}
2429
2431{
2432 m_pNumberElement = pNumberElement;
2433}
2434
2436
2437const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2438{
2440}
2441
2443{
2444 m_aDictionaryReferences.push_back(pReference);
2445}
2446
2447const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2448{
2451}
2452
2453void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2454
2456{
2457 m_pStreamElement = pStreamElement;
2458}
2459
2461
2463{
2465 return m_pArrayElement;
2466}
2467
2469{
2470 if (!m_pStreamElement)
2471 {
2472 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2473 return;
2474 }
2475
2476 auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2477 if (!pType || pType->GetValue() != "ObjStm")
2478 {
2479 if (!pType)
2480 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2481 else
2482 SAL_WARN("vcl.filter",
2483 "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2484 return;
2485 }
2486
2487 auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2488 if (!pFilter || pFilter->GetValue() != "FlateDecode")
2489 {
2490 if (!pFilter)
2491 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2492 else
2493 SAL_WARN("vcl.filter",
2494 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2495 return;
2496 }
2497
2498 auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2499 if (!pFirst)
2500 {
2501 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2502 return;
2503 }
2504
2505 auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2506 if (!pN)
2507 {
2508 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2509 return;
2510 }
2511 size_t nN = pN->GetValue();
2512
2513 auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2514 if (!pLength)
2515 {
2516 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2517 return;
2518 }
2519 size_t nLength = pLength->GetValue();
2520
2521 // Read and decompress it.
2522 SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2523 rEditBuffer.Seek(m_pStreamElement->GetOffset());
2524 std::vector<char> aBuf(nLength);
2525 rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2526 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2527 SvMemoryStream aStream;
2528 ZCodec aZCodec;
2529 aZCodec.BeginCompression();
2530 aZCodec.Decompress(aSource, aStream);
2531 if (!aZCodec.EndCompression())
2532 {
2533 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2534 return;
2535 }
2536
2537 nLength = aStream.TellEnd();
2538 aStream.Seek(0);
2539 std::vector<size_t> aObjNums;
2540 std::vector<size_t> aOffsets;
2541 std::vector<size_t> aLengths;
2542 // First iterate over and find out the lengths.
2543 for (size_t nObject = 0; nObject < nN; ++nObject)
2544 {
2545 PDFNumberElement aObjNum;
2546 if (!aObjNum.Read(aStream))
2547 {
2548 SAL_WARN("vcl.filter",
2549 "PDFObjectElement::ParseStoredObjects: failed to read object number");
2550 return;
2551 }
2552 aObjNums.push_back(aObjNum.GetValue());
2553
2555
2556 PDFNumberElement aByteOffset;
2557 if (!aByteOffset.Read(aStream))
2558 {
2559 SAL_WARN("vcl.filter",
2560 "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2561 return;
2562 }
2563 aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2564
2565 if (aOffsets.size() > 1)
2566 aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2567 if (nObject + 1 == nN)
2568 aLengths.push_back(nLength - aOffsets.back());
2569
2571 }
2572
2573 // Now create streams with the proper length and tokenize the data.
2574 for (size_t nObject = 0; nObject < nN; ++nObject)
2575 {
2576 size_t nObjNum = aObjNums[nObject];
2577 size_t nOffset = aOffsets[nObject];
2578 size_t nLen = aLengths[nObject];
2579
2580 aStream.Seek(nOffset);
2581 m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2582 PDFObjectElement* pStored = m_aStoredElements.back().get();
2583
2584 aBuf.clear();
2585 aBuf.resize(nLen);
2586 aStream.ReadBytes(aBuf.data(), aBuf.size());
2587 SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2588
2590 pStored);
2591 // This is how references know the object is stored inside this object stream.
2592 m_rDoc.SetIDObject(nObjNum, pStored);
2593
2594 // Store the stream of the object in the object stream for later use.
2595 std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2596 aStoredStream.Seek(0);
2597 pStreamBuffer->WriteStream(aStoredStream);
2598 pStored->SetStreamBuffer(pStreamBuffer);
2599 }
2600}
2601
2602std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2603{
2604 return m_aElements;
2605}
2606
2608
2609void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2610{
2611 m_pStreamBuffer = std::move(pStreamBuffer);
2612}
2613
2615
2617 PDFNumberElement const& rGeneration)
2618 : m_rDoc(rDoc)
2619 , m_fObjectValue(rObject.GetValue())
2620 , m_fGenerationValue(rGeneration.GetValue())
2621 , m_rObject(rObject)
2622{
2623}
2624
2626
2628{
2629 SAL_INFO("vcl.filter",
2630 "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2631 m_nOffset = rStream.Tell();
2632 return true;
2633}
2634
2635sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2636
2638{
2639 size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2640 if (nOffset == 0)
2641 {
2642 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2643 << m_fObjectValue);
2644 return 0;
2645 }
2646
2647 sal_uInt64 nOrigPos = rStream.Tell();
2648 comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2649
2650 rStream.Seek(nOffset);
2651 {
2653 PDFNumberElement aNumber;
2654 bool bRet = aNumber.Read(rStream);
2655 if (!bRet || aNumber.GetValue() != m_fObjectValue)
2656 {
2657 SAL_WARN("vcl.filter",
2658 "PDFReferenceElement::LookupNumber: offset points to not matching object");
2659 return 0;
2660 }
2661 }
2662
2663 {
2665 PDFNumberElement aNumber;
2666 bool bRet = aNumber.Read(rStream);
2667 if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2668 {
2669 SAL_WARN("vcl.filter",
2670 "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2671 return 0;
2672 }
2673 }
2674
2675 {
2677 OString aKeyword = PDFDocument::ReadKeyword(rStream);
2678 if (aKeyword != "obj")
2679 {
2680 SAL_WARN("vcl.filter",
2681 "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2682 return 0;
2683 }
2684 }
2685
2687 PDFNumberElement aNumber;
2688 if (!aNumber.Read(rStream))
2689 {
2690 SAL_WARN("vcl.filter",
2691 "PDFReferenceElement::LookupNumber: failed to read referenced number");
2692 return 0;
2693 }
2694
2695 return aNumber.GetValue();
2696}
2697
2699{
2701}
2702
2704{
2705 auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2706
2707 if (itIDObjects != m_aIDObjects.end())
2708 return itIDObjects->second;
2709
2710 SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2711 return nullptr;
2712}
2713
2715
2717
2719
2721{
2722 char ch;
2723 rStream.ReadChar(ch);
2724 if (ch != '<')
2725 {
2726 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2727 return false;
2728 }
2729
2730 if (rStream.eof())
2731 {
2732 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2733 return false;
2734 }
2735
2736 rStream.ReadChar(ch);
2737 if (ch != '<')
2738 {
2739 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2740 return false;
2741 }
2742
2743 m_nLocation = rStream.Tell();
2744
2745 SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2746
2747 return true;
2748}
2749
2751
2753
2755{
2756 m_nLocation = rStream.Tell();
2757 char ch;
2758 rStream.ReadChar(ch);
2759 if (ch != '>')
2760 {
2761 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2762 return false;
2763 }
2764
2765 if (rStream.eof())
2766 {
2767 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2768 return false;
2769 }
2770
2771 rStream.ReadChar(ch);
2772 if (ch != '>')
2773 {
2774 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2775 return false;
2776 }
2777
2778 SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2779
2780 return true;
2781}
2782
2784
2786{
2787 char ch;
2788 rStream.ReadChar(ch);
2789 if (ch != '/')
2790 {
2791 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2792 return false;
2793 }
2794 m_nLocation = rStream.Tell();
2795
2796 if (rStream.eof())
2797 {
2798 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2799 return false;
2800 }
2801
2802 // Read till the first white-space.
2803 OStringBuffer aBuf;
2804 rStream.ReadChar(ch);
2805 while (!rStream.eof())
2806 {
2807 if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2808 || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2809 {
2810 rStream.SeekRel(-1);
2811 m_aValue = aBuf.makeStringAndClear();
2812 SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2813 return true;
2814 }
2815 aBuf.append(ch);
2816 rStream.ReadChar(ch);
2817 }
2818
2819 return false;
2820}
2821
2822const OString& PDFNameElement::GetValue() const { return m_aValue; }
2823
2824sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2825
2828 , m_nOffset(0)
2829{
2830}
2831
2833{
2834 SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2835 m_nOffset = rStream.Tell();
2836 std::vector<unsigned char> aBytes(m_nLength);
2837 rStream.ReadBytes(aBytes.data(), aBytes.size());
2838 m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2839
2840 return rStream.good();
2841}
2842
2844
2845sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2846
2847bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2848
2849bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2850
2852 : m_pObject(pObject)
2853{
2854}
2855
2857{
2858 char ch;
2859 rStream.ReadChar(ch);
2860 if (ch != '[')
2861 {
2862 SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2863 return false;
2864 }
2865
2866 SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2867
2868 return true;
2869}
2870
2872{
2873 if (m_pObject)
2874 SAL_INFO("vcl.filter",
2875 "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2876 m_aElements.push_back(pElement);
2877}
2878
2879const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2880
2882
2884{
2885 m_nOffset = rStream.Tell();
2886 char ch;
2887 rStream.ReadChar(ch);
2888 if (ch != ']')
2889 {
2890 SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2891 return false;
2892 }
2893
2894 SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
2895
2896 return true;
2897}
2898
2899sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
2900
2901// PDFObjectParser
2902
2903size_t PDFObjectParser::parse(PDFElement* pParsingElement, size_t nStartIndex, int nCurrentDepth)
2904{
2905 // The index of last parsed element
2906 size_t nReturnIndex = 0;
2907
2908 pParsingElement->setParsing(true);
2909
2910 comphelper::ScopeGuard aGuard([pParsingElement]() { pParsingElement->setParsing(false); });
2911
2912 // Current object, if root is an object, else nullptr
2913 auto pParsingObject = dynamic_cast<PDFObjectElement*>(pParsingElement);
2914 auto pParsingTrailer = dynamic_cast<PDFTrailerElement*>(pParsingElement);
2915
2916 // Current dictionary, if root is an dictionary, else nullptr
2917 auto pParsingDictionary = dynamic_cast<PDFDictionaryElement*>(pParsingElement);
2918
2919 // Current parsing array, if root is an array, else nullptr
2920 auto pParsingArray = dynamic_cast<PDFArrayElement*>(pParsingElement);
2921
2922 // Find out where the dictionary for this object starts.
2923 size_t nIndex = nStartIndex;
2924 for (size_t i = nStartIndex; i < mrElements.size(); ++i)
2925 {
2926 if (mrElements[i].get() == pParsingElement)
2927 {
2928 nIndex = i;
2929 break;
2930 }
2931 }
2932
2933 OString aName;
2934 sal_uInt64 nNameOffset = 0;
2935 std::vector<PDFNumberElement*> aNumbers;
2936
2937 sal_uInt64 nDictionaryOffset = 0;
2938
2939 // Current depth; 1 is current
2940 int nDepth = 0;
2941
2942 for (size_t i = nIndex; i < mrElements.size(); ++i)
2943 {
2944 auto* pCurrentElement = mrElements[i].get();
2945
2946 // Dictionary tokens can be nested, track enter/leave.
2947 if (auto pCurrentDictionary = dynamic_cast<PDFDictionaryElement*>(pCurrentElement))
2948 {
2949 // Handle previously stored number
2950 if (!aNumbers.empty())
2951 {
2952 if (pParsingDictionary)
2953 {
2954 PDFNumberElement* pNumber = aNumbers.back();
2955 sal_uInt64 nLength
2956 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
2957
2958 pParsingDictionary->insert(aName, pNumber);
2959 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
2960 pParsingDictionary->SetKeyValueLength(aName, nLength);
2961 }
2962 else if (pParsingArray)
2963 {
2964 for (auto& pNumber : aNumbers)
2965 pParsingArray->PushBack(pNumber);
2966 }
2967 else
2968 {
2969 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
2970 }
2971 aName.clear();
2972 aNumbers.clear();
2973 }
2974
2975 nDepth++;
2976
2977 if (nDepth == 1) // pParsingDictionary is the current one
2978 {
2979 // First dictionary start, track start offset.
2980 nDictionaryOffset = pCurrentDictionary->GetLocation();
2981
2982 if (pParsingObject)
2983 {
2984 // Then the toplevel dictionary of the object.
2985 pParsingObject->SetDictionary(pCurrentDictionary);
2986 pParsingObject->SetDictionaryOffset(nDictionaryOffset);
2987 pParsingDictionary = pCurrentDictionary;
2988 }
2989 else if (pParsingTrailer)
2990 {
2991 pParsingTrailer->SetDictionary(pCurrentDictionary);
2992 pParsingDictionary = pCurrentDictionary;
2993 }
2994 }
2995 else if (!pCurrentDictionary->alreadyParsing())
2996 {
2997 if (pParsingArray)
2998 {
2999 pParsingArray->PushBack(pCurrentDictionary);
3000 }
3001 else if (pParsingDictionary)
3002 {
3003 // Dictionary toplevel value.
3004 pParsingDictionary->insert(aName, pCurrentDictionary);
3005 }
3006 else
3007 {
3008 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3009 }
3010 // Nested dictionary.
3011 const size_t nNextElementIndex = parse(pCurrentDictionary, i, nCurrentDepth + 1);
3012 i = std::max(i, nNextElementIndex - 1);
3013 }
3014 }
3015 else if (auto pCurrentEndDictionary
3016 = dynamic_cast<PDFEndDictionaryElement*>(pCurrentElement))
3017 {
3018 // Handle previously stored number
3019 if (!aNumbers.empty())
3020 {
3021 if (pParsingDictionary)
3022 {
3023 PDFNumberElement* pNumber = aNumbers.back();
3024 sal_uInt64 nLength
3025 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3026
3027 pParsingDictionary->insert(aName, pNumber);
3028 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3029 pParsingDictionary->SetKeyValueLength(aName, nLength);
3030 }
3031 else if (pParsingArray)
3032 {
3033 for (auto& pNumber : aNumbers)
3034 pParsingArray->PushBack(pNumber);
3035 }
3036 else
3037 {
3038 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3039 }
3040 aName.clear();
3041 aNumbers.clear();
3042 }
3043
3044 if (pParsingDictionary)
3045 {
3046 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3047 sal_uInt64 nLength = pCurrentEndDictionary->GetLocation() - nNameOffset + 2;
3048 pParsingDictionary->SetKeyValueLength(aName, nLength);
3049 aName.clear();
3050 }
3051
3052 if (nDepth == 1) // did the parsing ended
3053 {
3054 // Last dictionary end, track length and stop parsing.
3055 if (pParsingObject)
3056 {
3057 sal_uInt64 nDictionaryLength
3058 = pCurrentEndDictionary->GetLocation() - nDictionaryOffset;
3059 pParsingObject->SetDictionaryLength(nDictionaryLength);
3060 }
3061 nReturnIndex = i;
3062 break;
3063 }
3064
3065 nDepth--;
3066 }
3067 else if (auto pCurrentArray = dynamic_cast<PDFArrayElement*>(pCurrentElement))
3068 {
3069 // Handle previously stored number
3070 if (!aNumbers.empty())
3071 {
3072 if (pParsingDictionary)
3073 {
3074 PDFNumberElement* pNumber = aNumbers.back();
3075
3076 sal_uInt64 nLength
3077 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3078 pParsingDictionary->insert(aName, pNumber);
3079 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3080 pParsingDictionary->SetKeyValueLength(aName, nLength);
3081 }
3082 else if (pParsingArray)
3083 {
3084 for (auto& pNumber : aNumbers)
3085 pParsingArray->PushBack(pNumber);
3086 }
3087 else
3088 {
3089 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3090 }
3091 aName.clear();
3092 aNumbers.clear();
3093 }
3094
3095 nDepth++;
3096 if (nDepth == 1) // pParsingDictionary is the current one
3097 {
3098 if (pParsingObject)
3099 {
3100 pParsingObject->SetArray(pCurrentArray);
3101 pParsingArray = pCurrentArray;
3102 }
3103 }
3104 else if (!pCurrentArray->alreadyParsing())
3105 {
3106 if (pParsingArray)
3107 {
3108 // Array is toplevel
3109 pParsingArray->PushBack(pCurrentArray);
3110 }
3111 else if (pParsingDictionary)
3112 {
3113 // Dictionary toplevel value.
3114 pParsingDictionary->insert(aName, pCurrentArray);
3115 }
3116
3117 const size_t nNextElementIndex = parse(pCurrentArray, i, nCurrentDepth + 1);
3118
3119 // ensure we go forwards and not endlessly loop
3120 i = std::max(i, nNextElementIndex - 1);
3121 }
3122 }
3123 else if (auto pCurrentEndArray = dynamic_cast<PDFEndArrayElement*>(pCurrentElement))
3124 {
3125 // Handle previously stored number
3126 if (!aNumbers.empty())
3127 {
3128 if (pParsingDictionary)
3129 {
3130 PDFNumberElement* pNumber = aNumbers.back();
3131
3132 sal_uInt64 nLength
3133 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3134 pParsingDictionary->insert(aName, pNumber);
3135 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3136 pParsingDictionary->SetKeyValueLength(aName, nLength);
3137 }
3138 else if (pParsingArray)
3139 {
3140 for (auto& pNumber : aNumbers)
3141 pParsingArray->PushBack(pNumber);
3142 }
3143 else
3144 {
3145 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3146 }
3147 aName.clear();
3148 aNumbers.clear();
3149 }
3150
3151 if (nDepth == 1) // did the pParsing ended
3152 {
3153 // Last array end, track length and stop parsing.
3154 nReturnIndex = i;
3155 break;
3156 }
3157
3158 if (pParsingDictionary)
3159 {
3160 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3161 // Include the ending ']' in the length of the key - (array)value pair length.
3162 sal_uInt64 nLength = pCurrentEndArray->GetOffset() - nNameOffset + 1;
3163 pParsingDictionary->SetKeyValueLength(aName, nLength);
3164 aName.clear();
3165 }
3166 nDepth--;
3167 }
3168 else if (auto pCurrentName = dynamic_cast<PDFNameElement*>(pCurrentElement))
3169 {
3170 // Handle previously stored number
3171 if (!aNumbers.empty())
3172 {
3173 if (pParsingDictionary)
3174 {
3175 PDFNumberElement* pNumber = aNumbers.back();
3176
3177 sal_uInt64 nLength
3178 = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
3179 pParsingDictionary->insert(aName, pNumber);
3180 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3181 pParsingDictionary->SetKeyValueLength(aName, nLength);
3182 }
3183 else if (pParsingArray)
3184 {
3185 for (auto& pNumber : aNumbers)
3186 pParsingArray->PushBack(pNumber);
3187 }
3188 aName.clear();
3189 aNumbers.clear();
3190 }
3191
3192 // Now handle name
3193 if (pParsingArray)
3194 {
3195 // if we are in an array, just push the name to array
3196 pParsingArray->PushBack(pCurrentName);
3197 }
3198 else if (pParsingDictionary)
3199 {
3200 // if we are in a dictionary, we need to store the name as a possible key
3201 if (aName.isEmpty())
3202 {
3203 aName = pCurrentName->GetValue();
3204 nNameOffset = pCurrentName->GetLocation();
3205 }
3206 else
3207 {
3208 sal_uInt64 nKeyLength
3209 = pCurrentName->GetLocation() + pCurrentName->GetLength() - nNameOffset;
3210 pParsingDictionary->insert(aName, pCurrentName);
3211 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3212 pParsingDictionary->SetKeyValueLength(aName, nKeyLength);
3213 aName.clear();
3214 }
3215 }
3216 }
3217 else if (auto pReference = dynamic_cast<PDFReferenceElement*>(pCurrentElement))
3218 {
3219 if (pParsingArray)
3220 {
3221 pParsingArray->PushBack(pReference);
3222 }
3223 else if (pParsingDictionary)
3224 {
3225 sal_uInt64 nLength = pReference->GetOffset() - nNameOffset;
3226 pParsingDictionary->insert(aName, pReference);
3227 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3228 pParsingDictionary->SetKeyValueLength(aName, nLength);
3229 aName.clear();
3230 }
3231 else
3232 {
3233 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3234 }
3235 aNumbers.clear();
3236 }
3237 else if (auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(pCurrentElement))
3238 {
3239 if (pParsingArray)
3240 {
3241 pParsingArray->PushBack(pLiteralString);
3242 }
3243 else if (pParsingDictionary)
3244 {
3245 pParsingDictionary->insert(aName, pLiteralString);
3246 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3247 aName.clear();
3248 }
3249 else
3250 {
3251 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3252 }
3253 }
3254 else if (auto pBoolean = dynamic_cast<PDFBooleanElement*>(pCurrentElement))
3255 {
3256 if (pParsingArray)
3257 {
3258 pParsingArray->PushBack(pBoolean);
3259 }
3260 else if (pParsingDictionary)
3261 {
3262 pParsingDictionary->insert(aName, pBoolean);
3263 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3264 aName.clear();
3265 }
3266 else
3267 {
3268 SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
3269 }
3270 }
3271 else if (auto pHexString = dynamic_cast<PDFHexStringElement*>(pCurrentElement))
3272 {
3273 if (pParsingArray)
3274 {
3275 pParsingArray->PushBack(pHexString);
3276 }
3277 else if (pParsingDictionary)
3278 {
3279 pParsingDictionary->insert(aName, pHexString);
3280 pParsingDictionary->SetKeyOffset(aName, nNameOffset);
3281 aName.clear();
3282 }
3283 }
3284 else if (auto pNumberElement = dynamic_cast<PDFNumberElement*>(pCurrentElement))
3285 {
3286 // Just remember this, so that in case it's not a reference parameter,
3287 // we can handle it later.
3288 aNumbers.push_back(pNumberElement);
3289 }
3290 else if (dynamic_cast<PDFEndObjectElement*>(pCurrentElement))
3291 {
3292 // parsing of the object is finished
3293 break;
3294 }
3295 else if (dynamic_cast<PDFObjectElement*>(pCurrentElement)
3296 || dynamic_cast<PDFTrailerElement*>(pCurrentElement))
3297 {
3298 continue;
3299 }
3300 else
3301 {
3302 SAL_INFO("vcl.filter", "Unhandled element while parsing.");
3303 }
3304 }
3305
3306 return nReturnIndex;
3307}
3308
3309} // namespace vcl
3310
3311/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const sal_Int32 m_nLength
const char * pName
const void * GetData()
sal_uInt64 GetSize()
virtual sal_uInt64 TellEnd() override
sal_uInt64 Tell() const
bool good() const
std::size_t WriteBytes(const void *pData, std::size_t nSize)
bool eof() const
bool SetStreamSize(sal_uInt64 nSize)
SvStream & WriteOString(std::string_view rStr)
SvStream & WriteUInt32AsString(sal_uInt32 nUInt32)
SvStream & ReadChar(char &rChar)
sal_uInt64 Seek(sal_uInt64 nPos)
std::size_t ReadBytes(void *pData, std::size_t nSize)
sal_uInt64 SeekRel(sal_Int64 nPos)
SvStream & WriteCharPtr(const char *pBuf)
SvStream & WriteStream(SvStream &rStream)
tools::Long Decompress(SvStream &rIStm, SvStream &rOStm)
tools::Long EndCompression()
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
void Compress(SvStream &rIStm, SvStream &rOStm)
void AddDataRange(const void *pData, sal_Int32 size)
bool Sign(OStringBuffer &rCMSHexBuffer)
tools::Long getHeight() const
void setWidth(tools::Long n)
void setHeight(tools::Long n)
tools::Long getWidth() const
Copies objects from one PDF file into another one.
static sal_Int32 copyPageStreams(std::vector< filter::PDFObjectElement * > &rContentStreams, SvMemoryStream &rStream, bool &rCompressed)
Copies page one or more page streams from rContentStreams into rStream.
void copyPageResources(filter::PDFObjectElement *pPage, OStringBuffer &rLine)
Copies resources of pPage into rLine.
static void AppendUnicodeTextString(const OUString &rString, OStringBuffer &rBuffer)
Write rString as a PDF hex string into rBuffer.
static OString GetDateTime()
Get current date/time in PDF D:YYYYMMDDHHMMSS form.
Array object: a list.
PDFObjectElement * m_pObject
The object that contains this array.
const std::vector< PDFElement * > & GetElements() const
bool Read(SvStream &rStream) override
void PushBack(PDFElement *pElement)
PDFArrayElement(PDFObjectElement *pObject)
std::vector< PDFElement * > m_aElements
Boolean object: a 'true' or a 'false'.
bool Read(SvStream &rStream) override
PDFCommentElement(PDFDocument &rDoc)
bool Read(SvStream &rStream) override
Dictionary object: a set key-value pairs.
static PDFElement * Lookup(const std::map< OString, PDFElement * > &rDictionary, const OString &rKey)
sal_uInt64 GetKeyOffset(const OString &rKey) const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
sal_uInt64 m_nLocation
Offset after the '<<' token.
bool Read(SvStream &rStream) override
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
PDFElement * LookupElement(const OString &rDictionaryKey)
Looks up an element which is contained in this dictionary.
const std::map< OString, PDFElement * > & GetItems() const
void SetKeyValueLength(const OString &rKey, sal_uInt64 nLength)
void SetKeyOffset(const OString &rKey, sal_uInt64 nOffset)
sal_uInt64 GetKeyValueLength(const OString &rKey) const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
Looks up an object which is only referenced in this dictionary.
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
In-memory representation of an on-disk PDF document.
PDFObjectElement * m_pXRefStream
When m_pTrailer is nullptr, this can still have a dictionary.
static OUString DecodeHexStringUTF16BE(PDFHexStringElement const &rElement)
bool RemoveSignature(size_t nPosition)
Remove the nth signature from read document in the edit buffer.
Definition: pdfdocument.cxx:44
PDFTrailerElement * m_pTrailer
sal_Int32 createObject() override
See vcl::PDFObjectContainer::createObject().
Definition: pdfdocument.cxx:69
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
std::vector< size_t > m_aEOFs
List of EOF offsets we know.
std::map< size_t, PDFObjectElement * > m_aOffsetObjects
Object offset <-> Object pointer map.
std::vector< size_t > m_aTrailerOffsets
Offsets of trailers, from latest to oldest.
bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement *&pRoot)
Write the updated Catalog object as part of signing.
void SetSignatureLine(std::vector< sal_Int8 > &&rSignatureLine)
Definition: pdfdocument.cxx:97
bool Sign(const css::uno::Reference< css::security::XCertificate > &xCertificate, const OUString &rDescription, bool bAdES)
Sign the read document with xCertificate in the edit buffer.
void PushBackEOF(size_t nOffset)
Remember the end location of an EOF token.
bool Read(SvStream &rStream)
Read elements from the start of the stream till its end.
static size_t FindStartXRef(SvStream &rStream)
sal_uInt32 GetNextSignature()
Suggest a minimal, yet free signature ID to use for the next signature.
bool WritePageObject(PDFObjectElement &rFirstPage, sal_Int32 nAnnotId)
Write the updated Page object as part of signing.
static OString ReadKeyword(SvStream &rStream)
bool Tokenize(SvStream &rStream, TokenizeMode eMode, std::vector< std::unique_ptr< PDFElement > > &rElements, PDFObjectElement *pObjectElement)
Tokenize elements from current offset.
void ReadXRef(SvStream &rStream)
size_t GetObjectOffset(size_t nIndex) const
static void SkipWhitespace(SvStream &rStream)
PDFObjectElement * LookupObject(size_t nObjectNumber)
Look up object based on object number, possibly by parsing object streams.
static void SkipLineBreaks(SvStream &rStream)
Instead of all whitespace, just skip CR and NL characters.
PDFObjectElement * GetCatalog()
size_t m_nSignaturePage
0-based page number where m_aSignatureLine should be placed.
sal_Int32 WriteAnnotObject(PDFObjectElement const &rFirstPage, sal_Int32 nSignatureId, sal_Int32 nAppearanceId, const tools::Rectangle &rSignatureRectangle)
Write the annot object as part of signing.
bool writeBuffer(const void *pBuffer, sal_uInt64 nBytes) override
See vcl::PDFObjectContainer::writeBuffer().
Definition: pdfdocument.cxx:91
bool updateObject(sal_Int32 n) override
See vcl::PDFObjectContainer::updateObject().
Definition: pdfdocument.cxx:76
std::vector< PDFObjectElement * > GetSignatureWidgets()
Get a list of signatures embedded into this document.
sal_Int32 WriteAppearanceObject(tools::Rectangle &rSignatureRectangle)
Write the appearance object as part of signing.
void SetIDObject(size_t nID, PDFObjectElement *pObject)
Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID.
bool Write(SvStream &rStream)
Serializes the contents of the edit buffer.
std::vector< PDFObjectElement * > GetPages()
const std::vector< std::unique_ptr< PDFElement > > & GetElements() const
void ReadXRefStream(SvStream &rStream)
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const *pRoot)
Write the updated cross-references as part of signing.
std::map< size_t, XRefEntry > m_aXRef
Object ID <-> object offset map.
std::vector< std::unique_ptr< PDFElement > > m_aElements
This vector owns all elements.
sal_Int32 WriteSignatureObject(const OUString &rDescription, bool bAdES, sal_uInt64 &rLastByteRangeOffset, sal_Int64 &rContentOffset)
Write the signature object as part of signing.
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
static std::vector< unsigned char > DecodeHexString(PDFHexStringElement const *pElement)
Decode a hex dump.
void SetSignaturePage(size_t nPage)
std::vector< size_t > m_aStartXRefs
List of xref offsets we know.
std::map< size_t, PDFTrailerElement * > m_aOffsetTrailers
Trailer offset <-> Trailer pointer map.
std::vector< sal_Int8 > m_aSignatureLine
Signature line in PDF format, to be consumed by the next Sign() invocation.
A byte range in a PDF file.
Definition: pdfdocument.hxx:51
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:59
bool alreadyVisiting() const
Definition: pdfdocument.hxx:60
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:61
bool Read(SvStream &rStream) override
sal_uInt64 m_nOffset
Location before the ']' token.
End of a dictionary: '>>'.
bool Read(SvStream &rStream) override
sal_uInt64 m_nLocation
Offset before the '>>' token.
End of an object: 'endobj' keyword.
bool Read(SvStream &rStream) override
End of a stream: 'endstream' keyword.
bool Read(SvStream &rStream) override
Hex string: in <AABB> form.
const OString & GetValue() const
bool Read(SvStream &rStream) override
Literal string: in (asdf) form.
bool Read(SvStream &rStream) override
const OString & GetValue() const
Name object: a key string.
bool Read(SvStream &rStream) override
sal_uInt64 m_nLocation
Offset after the '/' token.
sal_uInt64 GetLocation() const
const OString & GetValue() const
Null object: the 'null' singleton.
bool Read(SvStream &rStream) override
Numbering object: an integer or a real.
sal_uInt64 GetLocation() const
sal_uInt64 m_nLength
Input file token length.
sal_uInt64 GetLength() const
bool Read(SvStream &rStream) override
sal_uInt64 m_nOffset
Input file start location.
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:69
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:86
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
Definition: pdfdocument.hxx:92
void SetNumberElement(PDFNumberElement *pNumberElement)
const std::vector< PDFReferenceElement * > & GetDictionaryReferences() const
void SetDictionaryLength(sal_uInt64 nDictionaryLength)
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:77
PDFElement * Lookup(const OString &rDictionaryKey)
void SetArrayOffset(sal_uInt64 nArrayOffset)
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:75
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
Definition: pdfdocument.hxx:90
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:80
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:79
void SetStream(PDFStreamElement *pStreamElement)
bool Read(SvStream &rStream) override
void SetArrayLength(sal_uInt64 nArrayLength)
sal_uInt64 GetArrayOffset() const
SvMemoryStream * GetStreamBuffer() const
void SetArray(PDFArrayElement *pArrayElement)
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
Definition: pdfdocument.hxx:97
PDFArrayElement * GetArray()
std::vector< std::unique_ptr< PDFElement > > & GetStoredElements()
void SetStreamBuffer(std::unique_ptr< SvMemoryStream > &pStreamBuffer)
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
Definition: pdfdocument.hxx:94
PDFObjectElement(PDFDocument &rDoc, double fObjectValue, double fGenerationValue)
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:82
void AddDictionaryReference(PDFReferenceElement *pReference)
void ParseStoredObjects()
Parse objects stored in this object stream.
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:84
PDFNumberElement * GetNumberElement() const
sal_uInt64 GetArrayLength() const
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:71
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
void SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
Definition: pdfdocument.hxx:88
PDFDictionaryElement * GetDictionary()
const std::vector< std::unique_ptr< PDFElement > > & mrElements
size_t parse(PDFElement *pParsingElement, size_t nStartIndex=0, int nCurrentDepth=0)
Reference object: something with a unique ID.
sal_uInt64 m_nOffset
Location after the 'R' token.
PDFReferenceElement(PDFDocument &rDoc, PDFNumberElement &rObject, PDFNumberElement const &rGeneration)
PDFNumberElement & GetObjectElement() const
double LookupNumber(SvStream &rStream) const
Assuming the reference points to a number object, return its value.
bool Read(SvStream &rStream) override
PDFNumberElement & m_rObject
The element providing the object number.
PDFObjectElement * LookupObject()
Lookup referenced object, without assuming anything about its contents.
Stream object: a byte array with a known length.
SvMemoryStream & GetMemory()
sal_uInt64 GetOffset() const
SvMemoryStream m_aMemory
The byte array itself.
bool Read(SvStream &rStream) override
The trailer singleton is at the end of the doc.
sal_uInt64 GetLocation() const
PDFDictionaryElement * m_pDictionaryElement
sal_uInt64 m_nOffset
Location of the end of the trailer token.
PDFTrailerElement(PDFDocument &rDoc)
bool Read(SvStream &rStream) override
PDFElement * Lookup(const OString &rDictionaryKey)
An entry in a cross-reference stream.
void SetDirty(bool bDirty)
void SetOffset(sal_uInt64 nOffset)
XRefEntryType GetType() const
sal_uInt64 GetOffset() const
void SetType(XRefEntryType eType)
sal_Int32 nElements
#define MAX_SIGNATURE_CONTENT_LENGTH
const char * pS
SwDoc & m_rDoc
EmbeddedObjectRef * pObject
sal_Int32 nIndex
OUString aName
Mode eMode
sal_uInt16 nPos
#define SAL_WARN_IF(condition, area, stream)
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
aBuf
size
OStringBuffer & padToLength(OStringBuffer &rBuffer, sal_Int32 nLength, char cFill='\0')
int i
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
double toDouble(std::u16string_view str)
sal_uInt32 toUInt32(std::u16string_view str, sal_Int16 radix=10)
std::vector< unsigned char > DecodeHexString(std::string_view rHex)
css::uno::Reference< css::linguistic2::XProofreadingIterator > get(css::uno::Reference< css::uno::XComponentContext > const &context)
@ COMPRESSED
xref stream "2".
@ FREE
xref "f" or xref stream "0".
@ NOT_COMPRESSED
xref "n" or xref stream "1".
static void visitPages(PDFObjectElement *pPages, std::vector< PDFObjectElement * > &rRet)
Visits the page tree recursively, looking for page objects.
@ STORED_OBJECT
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
@ EOF_TOKEN
Till the first %EOF token.
@ END_OF_OBJECT
Till the end of the current object.
std::string GetValue
QPRO_FUNC_TYPE nType
#define STREAM_SEEK_TO_END
sal_uInt16 sal_Unicode
std::unique_ptr< char[]> aBuffer
sal_Int32 nLength