LibreOffice Module vcl (master) 1
pdfdocument.hxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 */
10
11#ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
12#define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
13
14#include <memory>
15#include <map>
16#include <vector>
17
18#include <tools/stream.hxx>
19#include <vcl/dllapi.h>
20#include <rtl/strbuf.hxx>
21
23
25{
26class XCertificate;
27}
28
29namespace com::sun::star::uno
30{
31template <class interface_type> class Reference;
32}
33
34namespace tools
35{
36class Rectangle;
37}
38
39namespace vcl::filter
40{
41class PDFTrailerElement;
42class PDFReferenceElement;
43class PDFDocument;
44class PDFDictionaryElement;
45class PDFArrayElement;
46class PDFStreamElement;
47class PDFNumberElement;
48
51{
52 bool m_bVisiting = false;
53 bool m_bParsing = false;
54
55public:
56 PDFElement() = default;
57 virtual bool Read(SvStream& rStream) = 0;
58 virtual ~PDFElement() = default;
59 void setVisiting(bool bVisiting) { m_bVisiting = bVisiting; }
60 bool alreadyVisiting() const { return m_bVisiting; }
61 void setParsing(bool bParsing) { m_bParsing = bParsing; }
62 bool alreadyParsing() const { return m_bParsing; }
63
64 virtual void writeString(OStringBuffer& rBuffer) = 0;
65};
66
69{
82 sal_uInt64 m_nArrayOffset;
84 sal_uInt64 m_nArrayLength;
90 std::vector<std::unique_ptr<PDFObjectElement>> m_aStoredElements;
92 std::vector<std::unique_ptr<PDFElement>> m_aElements;
94 std::unique_ptr<SvMemoryStream> m_pStreamBuffer;
97 std::vector<PDFReferenceElement*> m_aDictionaryReferences;
98
100
101 void parseIfNecessary();
102
103public:
104 PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue);
105 bool Read(SvStream& rStream) override;
106 PDFElement* Lookup(const OString& rDictionaryKey);
107 PDFObjectElement* LookupObject(const OString& rDictionaryKey);
108 double GetObjectValue() const;
109 void SetDictionaryOffset(sal_uInt64 nDictionaryOffset);
110 sal_uInt64 GetDictionaryOffset();
111 void SetDictionaryLength(sal_uInt64 nDictionaryLength);
112 sal_uInt64 GetDictionaryLength();
113 PDFDictionaryElement* GetDictionary();
114 void SetDictionary(PDFDictionaryElement* pDictionaryElement);
115 void SetNumberElement(PDFNumberElement* pNumberElement);
116 PDFNumberElement* GetNumberElement() const;
118 const std::map<OString, PDFElement*>& GetDictionaryItems();
119 const std::vector<PDFReferenceElement*>& GetDictionaryReferences() const;
120 void AddDictionaryReference(PDFReferenceElement* pReference);
121 void SetArray(PDFArrayElement* pArrayElement);
122 void SetStream(PDFStreamElement* pStreamElement);
124 PDFStreamElement* GetStream() const;
125 void SetArrayOffset(sal_uInt64 nArrayOffset);
126 sal_uInt64 GetArrayOffset() const;
127 void SetArrayLength(sal_uInt64 nArrayLength);
128 sal_uInt64 GetArrayLength() const;
129 PDFArrayElement* GetArray();
131 void ParseStoredObjects();
132 std::vector<std::unique_ptr<PDFElement>>& GetStoredElements();
133 SvMemoryStream* GetStreamBuffer() const;
134 void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer);
135 PDFDocument& GetDocument();
136
137 void writeString(OStringBuffer& /*rBuffer*/) override { assert(false && "not implemented"); }
138};
139
142{
143 std::vector<PDFElement*> m_aElements;
146
147public:
149 bool Read(SvStream& rStream) override;
150 void PushBack(PDFElement* pElement);
151 const std::vector<PDFElement*>& GetElements() const;
152 PDFElement* GetElement(size_t nIndex) const { return m_aElements[nIndex]; }
153
154 void writeString(OStringBuffer& rBuffer) override
155 {
156 rBuffer.append("[ ");
157 for (auto& rElement : m_aElements)
158 {
159 rElement->writeString(rBuffer);
160 rBuffer.append(" ");
161 }
162 rBuffer.append("]");
163 }
164};
165
168{
173 sal_uInt64 m_nOffset = 0;
176
177public:
179 PDFNumberElement const& rGeneration);
180 bool Read(SvStream& rStream) override;
182 double LookupNumber(SvStream& rStream) const;
184 PDFObjectElement* LookupObject();
185 int GetObjectValue() const;
186 int GetGenerationValue() const;
187 sal_uInt64 GetOffset() const;
188 PDFNumberElement& GetObjectElement() const;
189
190 void writeString(OStringBuffer& rBuffer) override
191 {
192 rBuffer.append(sal_Int32(GetObjectValue()));
193 rBuffer.append(' ');
194 rBuffer.append(sal_Int32(GetGenerationValue()));
195 rBuffer.append(" R");
196 }
197};
198
201{
202 size_t m_nLength;
203 sal_uInt64 m_nOffset;
206
207public:
208 explicit PDFStreamElement(size_t nLength);
209 bool Read(SvStream& rStream) override;
210 sal_uInt64 GetOffset() const;
211 SvMemoryStream& GetMemory();
212
213 void writeString(OStringBuffer& rBuffer) override
214 {
215 rBuffer.append("stream\n");
216 rBuffer.append(static_cast<const char*>(m_aMemory.GetData()), m_aMemory.GetSize());
217 rBuffer.append("\nendstream\n");
218 }
219};
220
223{
224 OString m_aValue;
226 sal_uInt64 m_nLocation = 0;
227
228public:
230 bool Read(SvStream& rStream) override;
231 void SetValue(const OString& rValue) { m_aValue = rValue; }
232 const OString& GetValue() const;
233 sal_uInt64 GetLocation() const;
234 sal_uInt64 GetLength() const { return m_aValue.getLength(); }
235
236 void writeString(OStringBuffer& rBuffer) override
237 {
238 rBuffer.append("/");
239 rBuffer.append(m_aValue);
240 }
241};
242
245{
247 std::map<OString, PDFElement*> m_aItems;
249 sal_uInt64 m_nLocation = 0;
251 std::map<OString, sal_uInt64> m_aDictionaryKeyOffset;
253 std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength;
254
255public:
257 bool Read(SvStream& rStream) override;
258
259 static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary,
260 const OString& rKey);
261 void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset);
262 sal_uInt64 GetKeyOffset(const OString& rKey) const;
263 void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength);
264 sal_uInt64 GetKeyValueLength(const OString& rKey) const;
265 const std::map<OString, PDFElement*>& GetItems() const;
267 PDFObjectElement* LookupObject(const OString& rDictionaryKey);
269 PDFElement* LookupElement(const OString& rDictionaryKey);
270 sal_uInt64 GetLocation() const { return m_nLocation; }
271 void insert(OString const& rKey, PDFElement* pPDFElement)
272 {
273 m_aItems.emplace(rKey, pPDFElement);
274 }
275
276 void writeString(OStringBuffer& rBuffer) override
277 {
278 rBuffer.append("<< ");
279 for (auto& rPair : m_aItems)
280 {
281 rBuffer.append("/");
282 rBuffer.append(rPair.first);
283 rBuffer.append(" ");
284 rPair.second->writeString(rBuffer);
285 rBuffer.append(" ");
286 }
287 rBuffer.append(">>");
288 }
289};
290
291enum class TokenizeMode
292{
296 EOF_TOKEN,
301};
302
305{
307 FREE,
312};
313
316{
324 sal_uInt64 m_nOffset = 0;
326 bool m_bDirty = false;
327
328public:
330
332
333 XRefEntryType GetType() const { return m_eType; }
334
335 void SetOffset(sal_uInt64 nOffset) { m_nOffset = nOffset; }
336
337 sal_uInt64 GetOffset() const { return m_nOffset; }
338
339 void SetDirty(bool bDirty) { m_bDirty = bDirty; }
340
341 bool GetDirty() const { return m_bDirty; }
342};
343
346{
347 OString m_aValue;
348
349public:
350 bool Read(SvStream& rStream) override;
351 const OString& GetValue() const;
352
353 void writeString(OStringBuffer& rBuffer) override
354 {
355 rBuffer.append("<");
356 rBuffer.append(m_aValue);
357 rBuffer.append(">");
358 }
359};
360
363{
364 OString m_aValue;
365
366public:
367 bool Read(SvStream& rStream) override;
368 const OString& GetValue() const;
369
370 void writeString(OStringBuffer& rBuffer) override
371 {
372 rBuffer.append("(");
373 rBuffer.append(m_aValue);
374 rBuffer.append(")");
375 }
376};
377
380{
382 sal_uInt64 m_nOffset = 0;
384 sal_uInt64 m_nLength = 0;
385 double m_fValue = 0;
386
387public:
389 bool Read(SvStream& rStream) override;
390 double GetValue() const;
391 void SetValue(double fValue) { m_fValue = fValue; }
392
393 sal_uInt64 GetLocation() const;
394 sal_uInt64 GetLength() const;
395
396 void writeString(OStringBuffer& rBuffer) override { rBuffer.append(m_fValue); }
397};
398
401{
403 OString m_aComment;
404
405public:
406 explicit PDFCommentElement(PDFDocument& rDoc);
407 bool Read(SvStream& rStream) override;
408 void writeString(OStringBuffer& /*rBuffer*/) override {}
409};
410
413{
415 sal_uInt64 m_nLocation = 0;
416
417public:
419 bool Read(SvStream& rStream) override;
420 sal_uInt64 GetLocation() const;
421
422 void writeString(OStringBuffer& /*rBuffer*/) override {}
423};
424
427{
428public:
429 bool Read(SvStream& rStream) override;
430
431 void writeString(OStringBuffer& /*rBuffer*/) override {}
432};
433
436{
437public:
438 bool Read(SvStream& rStream) override;
439
440 void writeString(OStringBuffer& /*rBuffer*/) override {}
441};
442
445{
447 sal_uInt64 m_nOffset = 0;
448
449public:
451 bool Read(SvStream& rStream) override;
452 sal_uInt64 GetOffset() const;
453
454 void writeString(OStringBuffer& /*rBuffer*/) override {}
455};
456
459{
461
462public:
463 explicit PDFBooleanElement(bool bValue)
464 : m_aValue(bValue)
465 {
466 }
467
468 bool Read(SvStream& rStream) override;
469
470 void writeString(OStringBuffer& rBuffer) override
471 {
472 rBuffer.append(m_aValue ? "true" : "false");
473 }
474};
475
478{
479public:
480 bool Read(SvStream& rStream) override;
481
482 void writeString(OStringBuffer& rBuffer) override { rBuffer.append("null"); }
483};
484
493{
495 std::vector<std::unique_ptr<PDFElement>> m_aElements;
497 std::map<size_t, XRefEntry> m_aXRef;
499 std::map<size_t, PDFObjectElement*> m_aOffsetObjects;
501 std::map<size_t, PDFObjectElement*> m_aIDObjects;
503 std::vector<size_t> m_aStartXRefs;
505 std::vector<size_t> m_aTrailerOffsets;
507 std::map<size_t, PDFTrailerElement*> m_aOffsetTrailers;
509 std::vector<size_t> m_aEOFs;
510 PDFTrailerElement* m_pTrailer = nullptr;
512 PDFObjectElement* m_pXRefStream = nullptr;
515
517 std::vector<sal_Int8> m_aSignatureLine;
518
520 size_t m_nSignaturePage = 0;
521
523 sal_uInt32 GetNextSignature();
525 sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES,
526 sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset);
528 sal_Int32 WriteAppearanceObject(tools::Rectangle& rSignatureRectangle);
530 sal_Int32 WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
531 sal_Int32 nAppearanceId,
532 const tools::Rectangle& rSignatureRectangle);
534 bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId);
536 bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot);
538 void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot);
539
540public:
542 virtual ~PDFDocument();
544 PDFDocument(const PDFDocument&) = delete;
546
547
548 static std::vector<unsigned char> DecodeHexString(PDFHexStringElement const* pElement);
549 static OUString DecodeHexStringUTF16BE(PDFHexStringElement const& rElement);
550 static OString ReadKeyword(SvStream& rStream);
551 static size_t FindStartXRef(SvStream& rStream);
552 void ReadXRef(SvStream& rStream);
553 void ReadXRefStream(SvStream& rStream);
554 static void SkipWhitespace(SvStream& rStream);
556 static void SkipLineBreaks(SvStream& rStream);
557 size_t GetObjectOffset(size_t nIndex) const;
558 const std::vector<std::unique_ptr<PDFElement>>& GetElements() const;
559 std::vector<PDFObjectElement*> GetPages();
560 PDFObjectElement* GetCatalog();
562 void PushBackEOF(size_t nOffset);
564 PDFObjectElement* LookupObject(size_t nObjectNumber);
566 SvMemoryStream& GetEditBuffer();
568 bool Tokenize(SvStream& rStream, TokenizeMode eMode,
569 std::vector<std::unique_ptr<PDFElement>>& rElements,
570 PDFObjectElement* pObjectElement);
572 void SetIDObject(size_t nID, PDFObjectElement* pObject);
574
576
577
578 bool Read(SvStream& rStream);
580 bool ReadWithPossibleFixup(SvStream& rStream);
581 void SetSignatureLine(std::vector<sal_Int8>&& rSignatureLine);
582 void SetSignaturePage(size_t nPage);
584 bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate,
585 const OUString& rDescription, bool bAdES);
587 bool Write(SvStream& rStream);
589 std::vector<PDFObjectElement*> GetSignatureWidgets();
591 bool RemoveSignature(size_t nPosition);
593
595 sal_Int32 createObject() override;
597 bool updateObject(sal_Int32 n) override;
599 bool writeBufferBytes(const void* pBuffer, sal_uInt64 nBytes) override;
600 void checkAndEnableStreamEncryption(sal_Int32 /*nObject*/) override {}
601 void disableStreamEncryption() override {}
602};
603
606{
610 sal_uInt64 m_nOffset = 0;
611
612public:
613 explicit PDFTrailerElement(PDFDocument& rDoc);
614 bool Read(SvStream& rStream) override;
615 PDFElement* Lookup(const OString& rDictionaryKey);
616 sal_uInt64 GetLocation() const;
617
618 void SetDictionary(PDFDictionaryElement* pDictionaryElement)
619 {
620 m_pDictionaryElement = pDictionaryElement;
621 }
622
623 PDFDictionaryElement* GetDictionary() { return m_pDictionaryElement; }
624
625 void writeString(OStringBuffer& /*rBuffer*/) override { assert(false && "not implemented"); }
626};
627
629{
630 const std::vector<std::unique_ptr<PDFElement>>& mrElements;
631
632public:
633 PDFObjectParser(std::vector<std::unique_ptr<PDFElement>> const& rElements)
634 : mrElements(rElements)
635 {
636 }
637
638 size_t parse(PDFElement* pParsingElement, size_t nStartIndex = 0, int nCurrentDepth = 0);
639};
640
641} // namespace vcl::filter
642
643#endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
644
645/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const sal_Int32 m_nLength
TStyleElements m_aElements
const void * GetData()
sal_uInt64 GetSize()
Allows creating, updating and writing PDF objects in a container.
Array object: a list.
PDFObjectElement * m_pObject
The object that contains this array.
void writeString(OStringBuffer &rBuffer) override
PDFElement * GetElement(size_t nIndex) const
std::vector< PDFElement * > m_aElements
Boolean object: a 'true' or a 'false'.
void writeString(OStringBuffer &rBuffer) override
void writeString(OStringBuffer &) override
Dictionary object: a set key-value pairs.
void writeString(OStringBuffer &rBuffer) override
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
void insert(OString const &rKey, PDFElement *pPDFElement)
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
In-memory representation of an on-disk PDF document.
std::vector< size_t > m_aEOFs
List of EOF offsets we know.
std::map< size_t, PDFObjectElement * > m_aOffsetObjects
Object offset <-> Object pointer map.
std::vector< size_t > m_aTrailerOffsets
Offsets of trailers, from latest to oldest.
PDFDocument & operator=(const PDFDocument &)=delete
PDFDocument(const PDFDocument &)=delete
void checkAndEnableStreamEncryption(sal_Int32) override
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
std::map< size_t, XRefEntry > m_aXRef
Object ID <-> object offset map.
std::vector< std::unique_ptr< PDFElement > > m_aElements
This vector owns all elements.
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
std::vector< size_t > m_aStartXRefs
List of xref offsets we know.
std::map< size_t, PDFTrailerElement * > m_aOffsetTrailers
Trailer offset <-> Trailer pointer map.
std::vector< sal_Int8 > m_aSignatureLine
Signature line in PDF format, to be consumed by the next Sign() invocation.
void disableStreamEncryption() override
A byte range in a PDF file.
Definition: pdfdocument.hxx:51
bool alreadyParsing() const
Definition: pdfdocument.hxx:62
virtual void writeString(OStringBuffer &rBuffer)=0
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:59
virtual bool Read(SvStream &rStream)=0
bool alreadyVisiting() const
Definition: pdfdocument.hxx:60
virtual ~PDFElement()=default
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:61
void writeString(OStringBuffer &) override
End of a dictionary: '>>'.
void writeString(OStringBuffer &) override
End of an object: 'endobj' keyword.
void writeString(OStringBuffer &) override
End of a stream: 'endstream' keyword.
void writeString(OStringBuffer &) override
Hex string: in <AABB> form.
void writeString(OStringBuffer &rBuffer) override
Literal string: in (asdf) form.
void writeString(OStringBuffer &rBuffer) override
Name object: a key string.
void writeString(OStringBuffer &rBuffer) override
sal_uInt64 GetLength() const
void SetValue(const OString &rValue)
Null object: the 'null' singleton.
void writeString(OStringBuffer &rBuffer) override
Numbering object: an integer or a real.
void SetValue(double fValue)
void writeString(OStringBuffer &rBuffer) override
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:69
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:86
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
Definition: pdfdocument.hxx:92
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:77
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:75
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
Definition: pdfdocument.hxx:90
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:80
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:79
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
Definition: pdfdocument.hxx:97
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
Definition: pdfdocument.hxx:94
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:82
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:84
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:71
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
Definition: pdfdocument.hxx:88
void writeString(OStringBuffer &) override
const std::vector< std::unique_ptr< PDFElement > > & mrElements
PDFObjectParser(std::vector< std::unique_ptr< PDFElement > > const &rElements)
Reference object: something with a unique ID.
void writeString(OStringBuffer &rBuffer) override
PDFNumberElement & m_rObject
The element providing the object number.
Stream object: a byte array with a known length.
SvMemoryStream m_aMemory
The byte array itself.
void writeString(OStringBuffer &rBuffer) override
The trailer singleton is at the end of the doc.
PDFDictionaryElement * m_pDictionaryElement
void writeString(OStringBuffer &) override
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
PDFDictionaryElement * GetDictionary()
An entry in a cross-reference stream.
void SetDirty(bool bDirty)
sal_uInt64 m_nOffset
Non-compressed: The byte offset of the object, starting from the beginning of the file.
void SetOffset(sal_uInt64 nOffset)
XRefEntryType GetType() const
sal_uInt64 GetOffset() const
bool m_bDirty
Are changed as part of an incremental update?.
void SetType(XRefEntryType eType)
#define VCL_DLLPUBLIC
Definition: dllapi.h:29
DocumentType eType
sal_Int32 nIndex
Reference
std::vector< unsigned char > DecodeHexString(std::string_view rHex)
bool parse(OUString const &uri, SourceProviderScannerData *data)
XRefEntryType
The type column of an entry in a cross-reference stream.
@ COMPRESSED
xref stream "2".
@ NOT_COMPRESSED
xref "n" or xref stream "1".
@ STORED_OBJECT
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
@ EOF_TOKEN
Till the first %EOF token.
@ END_OF_OBJECT
Till the end of the current object.
const char GetValue[]
#define FREE(p)