LibreOffice Module vcl (master)  1
pdfdocument.hxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  */
10 
11 #ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
12 #define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
13 
14 #include <memory>
15 #include <map>
16 #include <vector>
17 
18 #include <tools/stream.hxx>
19 #include <vcl/dllapi.h>
20 
22 
24 {
25 class XCertificate;
26 }
27 
28 namespace com::sun::star::uno
29 {
30 template <class interface_type> class Reference;
31 }
32 
33 namespace tools
34 {
35 class Rectangle;
36 }
37 
38 namespace vcl::filter
39 {
40 class PDFTrailerElement;
41 class PDFReferenceElement;
42 class PDFDocument;
43 class PDFDictionaryElement;
44 class PDFArrayElement;
45 class PDFStreamElement;
46 class PDFNumberElement;
47 
50 {
51  bool m_bVisiting = false;
52  bool m_bParsing = false;
53 
54 public:
55  PDFElement() = default;
56  virtual bool Read(SvStream& rStream) = 0;
57  virtual ~PDFElement() = default;
58  void setVisiting(bool bVisiting) { m_bVisiting = bVisiting; }
59  bool alreadyVisiting() const { return m_bVisiting; }
60  void setParsing(bool bParsing) { m_bParsing = bParsing; }
61  bool alreadyParsing() const { return m_bParsing; }
62 };
63 
66 {
71  std::map<OString, PDFElement*> m_aDictionary;
75  sal_uInt64 m_nDictionaryOffset;
77  sal_uInt64 m_nDictionaryLength;
80  sal_uInt64 m_nArrayOffset;
82  sal_uInt64 m_nArrayLength;
88  std::vector<std::unique_ptr<PDFObjectElement>> m_aStoredElements;
90  std::vector<std::unique_ptr<PDFElement>> m_aElements;
92  std::unique_ptr<SvMemoryStream> m_pStreamBuffer;
95  std::vector<PDFReferenceElement*> m_aDictionaryReferences;
96 
97 public:
98  PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue);
99  bool Read(SvStream& rStream) override;
100  PDFElement* Lookup(const OString& rDictionaryKey);
101  PDFObjectElement* LookupObject(const OString& rDictionaryKey);
102  double GetObjectValue() const;
103  void SetDictionaryOffset(sal_uInt64 nDictionaryOffset);
104  sal_uInt64 GetDictionaryOffset();
105  void SetDictionaryLength(sal_uInt64 nDictionaryLength);
106  sal_uInt64 GetDictionaryLength();
107  PDFDictionaryElement* GetDictionary();
108  void SetDictionary(PDFDictionaryElement* pDictionaryElement);
109  void SetNumberElement(PDFNumberElement* pNumberElement);
110  PDFNumberElement* GetNumberElement() const;
112  const std::map<OString, PDFElement*>& GetDictionaryItems();
113  const std::vector<PDFReferenceElement*>& GetDictionaryReferences() const;
114  void AddDictionaryReference(PDFReferenceElement* pReference);
115  void SetArray(PDFArrayElement* pArrayElement);
116  void SetStream(PDFStreamElement* pStreamElement);
118  PDFStreamElement* GetStream() const;
119  void SetArrayOffset(sal_uInt64 nArrayOffset);
120  sal_uInt64 GetArrayOffset() const;
121  void SetArrayLength(sal_uInt64 nArrayLength);
122  sal_uInt64 GetArrayLength() const;
123  PDFArrayElement* GetArray() const;
125  void ParseStoredObjects();
126  std::vector<std::unique_ptr<PDFElement>>& GetStoredElements();
127  SvMemoryStream* GetStreamBuffer() const;
128  void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer);
129  PDFDocument& GetDocument();
130 };
131 
134 {
135  std::vector<PDFElement*> m_aElements;
138 
139 public:
141  bool Read(SvStream& rStream) override;
142  void PushBack(PDFElement* pElement);
143  const std::vector<PDFElement*>& GetElements() const;
144 };
145 
148 {
153  sal_uInt64 m_nOffset = 0;
156 
157 public:
159  PDFNumberElement const& rGeneration);
160  bool Read(SvStream& rStream) override;
162  double LookupNumber(SvStream& rStream) const;
164  PDFObjectElement* LookupObject();
165  int GetObjectValue() const;
166  int GetGenerationValue() const;
167  sal_uInt64 GetOffset() const;
168  PDFNumberElement& GetObjectElement() const;
169 };
170 
173 {
174  size_t m_nLength;
175  sal_uInt64 m_nOffset;
178 
179 public:
180  explicit PDFStreamElement(size_t nLength);
181  bool Read(SvStream& rStream) override;
182  sal_uInt64 GetOffset() const;
183  SvMemoryStream& GetMemory();
184 };
185 
188 {
189  OString m_aValue;
191  sal_uInt64 m_nLocation = 0;
192 
193 public:
194  PDFNameElement();
195  bool Read(SvStream& rStream) override;
196  const OString& GetValue() const;
197  sal_uInt64 GetLocation() const;
198  static sal_uInt64 GetLength() { return 0; }
199 };
200 
203 {
205  std::map<OString, PDFElement*> m_aItems;
207  sal_uInt64 m_nLocation = 0;
209  std::map<OString, sal_uInt64> m_aDictionaryKeyOffset;
211  std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength;
212 
213 public:
215  bool Read(SvStream& rStream) override;
216 
217  static size_t Parse(const std::vector<std::unique_ptr<PDFElement>>& rElements,
218  PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary);
219  static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary,
220  const OString& rKey);
221  void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset);
222  sal_uInt64 GetKeyOffset(const OString& rKey) const;
223  void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength);
224  sal_uInt64 GetKeyValueLength(const OString& rKey) const;
225  const std::map<OString, PDFElement*>& GetItems() const;
227  PDFObjectElement* LookupObject(const OString& rDictionaryKey);
229  PDFElement* LookupElement(const OString& rDictionaryKey);
230 };
231 
232 enum class TokenizeMode
233 {
237  EOF_TOKEN,
242 };
243 
245 enum class XRefEntryType
246 {
248  FREE,
252  COMPRESSED
253 };
254 
257 {
265  sal_uInt64 m_nOffset = 0;
267  bool m_bDirty = false;
268 
269 public:
270  XRefEntry();
271 
272  void SetType(XRefEntryType eType) { m_eType = eType; }
273 
274  XRefEntryType GetType() const { return m_eType; }
275 
276  void SetOffset(sal_uInt64 nOffset) { m_nOffset = nOffset; }
277 
278  sal_uInt64 GetOffset() const { return m_nOffset; }
279 
280  void SetDirty(bool bDirty) { m_bDirty = bDirty; }
281 
282  bool GetDirty() const { return m_bDirty; }
283 };
284 
287 {
288  OString m_aValue;
289 
290 public:
291  bool Read(SvStream& rStream) override;
292  const OString& GetValue() const;
293 };
294 
297 {
298  OString m_aValue;
299 
300 public:
301  bool Read(SvStream& rStream) override;
302  const OString& GetValue() const;
303 };
304 
307 {
309  sal_uInt64 m_nOffset = 0;
311  sal_uInt64 m_nLength = 0;
312  double m_fValue = 0;
313 
314 public:
316  bool Read(SvStream& rStream) override;
317  double GetValue() const;
318  sal_uInt64 GetLocation() const;
319  sal_uInt64 GetLength() const;
320 };
321 
330 {
332  std::vector<std::unique_ptr<PDFElement>> m_aElements;
334  std::map<size_t, XRefEntry> m_aXRef;
336  std::map<size_t, PDFObjectElement*> m_aOffsetObjects;
338  std::map<size_t, PDFObjectElement*> m_aIDObjects;
340  std::vector<size_t> m_aStartXRefs;
342  std::vector<size_t> m_aTrailerOffsets;
344  std::map<size_t, PDFTrailerElement*> m_aOffsetTrailers;
346  std::vector<size_t> m_aEOFs;
347  PDFTrailerElement* m_pTrailer = nullptr;
349  PDFObjectElement* m_pXRefStream = nullptr;
352 
354  std::vector<sal_Int8> m_aSignatureLine;
355 
357  size_t m_nSignaturePage = 0;
358 
360  sal_uInt32 GetNextSignature();
362  sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES,
363  sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset);
365  sal_Int32 WriteAppearanceObject(tools::Rectangle& rSignatureRectangle);
367  sal_Int32 WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
368  sal_Int32 nAppearanceId,
369  const tools::Rectangle& rSignatureRectangle);
371  bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId);
373  bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot);
375  void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot);
376 
377 public:
378  PDFDocument();
379  virtual ~PDFDocument();
380  PDFDocument& operator=(const PDFDocument&) = delete;
381  PDFDocument(const PDFDocument&) = delete;
383 
384  static std::vector<unsigned char> DecodeHexString(PDFHexStringElement const* pElement);
386  static OString ReadKeyword(SvStream& rStream);
387  static size_t FindStartXRef(SvStream& rStream);
388  void ReadXRef(SvStream& rStream);
389  void ReadXRefStream(SvStream& rStream);
390  static void SkipWhitespace(SvStream& rStream);
392  static void SkipLineBreaks(SvStream& rStream);
393  size_t GetObjectOffset(size_t nIndex) const;
394  const std::vector<std::unique_ptr<PDFElement>>& GetElements() const;
395  std::vector<PDFObjectElement*> GetPages();
397  void PushBackEOF(size_t nOffset);
399  PDFObjectElement* LookupObject(size_t nObjectNumber);
401  SvMemoryStream& GetEditBuffer();
403  bool Tokenize(SvStream& rStream, TokenizeMode eMode,
404  std::vector<std::unique_ptr<PDFElement>>& rElements,
405  PDFObjectElement* pObjectElement);
407  void SetIDObject(size_t nID, PDFObjectElement* pObject);
409 
411 
412  bool Read(SvStream& rStream);
414  void SetSignatureLine(const std::vector<sal_Int8>& rSignatureLine);
415  void SetSignaturePage(size_t nPage);
417  bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate,
418  const OUString& rDescription, bool bAdES);
420  bool Write(SvStream& rStream);
422  std::vector<PDFObjectElement*> GetSignatureWidgets();
424  bool RemoveSignature(size_t nPosition);
426 
428  sal_Int32 createObject() override;
430  bool updateObject(sal_Int32 n) override;
432  bool writeBuffer(const void* pBuffer, sal_uInt64 nBytes) override;
433 };
434 
435 } // namespace vcl::filter
436 
437 #endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
438 
439 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
Definition: pdfdocument.hxx:90
Array object: a list.
const sal_Int32 m_nLength
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:75
std::vector< std::unique_ptr< PDFElement > > m_aElements
This vector owns all elements.
Numbering object: an integer or a real.
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:78
sal_uInt64 m_nOffset
Non-compressed: The byte offset of the object, starting from the beginning of the file...
SvMemoryStream m_aMemory
The byte array itself.
std::string GetValue
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:84
PDFObjectElement * m_pObject
The object that contains this array.
#define VCL_DLLPUBLIC
Definition: dllapi.h:29
bool m_bDirty
Are changed as part of an incremental update?.
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
Definition: pdfdocument.hxx:88
Reference
std::vector< unsigned char > DecodeHexString(const OString &rHex)
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:68
static sal_uInt64 GetLength()
bool alreadyParsing() const
Definition: pdfdocument.hxx:61
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:80
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:58
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
Definition: pdfdocument.hxx:92
A byte range in a PDF file.
Definition: pdfdocument.hxx:49
In-memory representation of an on-disk PDF document.
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:82
xref "f" or xref stream "0".
An entry in a cross-reference stream.
XRefEntryType GetType() const
std::vector< sal_Int8 > m_aSignatureLine
Signature line in PDF format, to be consumed by the next Sign() invocation.
xref "n" or xref stream "1".
std::vector< size_t > m_aTrailerOffsets
Offsets of trailers, from latest to oldest.
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:73
void SetType(XRefEntryType eType)
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
sal_uInt64 GetOffset() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
Definition: pdfdocument.hxx:86
bool GetDirty() const
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:65
PDFNumberElement & m_rObject
The element providing the object number.
std::map< size_t, PDFObjectElement * > m_aOffsetObjects
Object offset <-> Object pointer map.
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
Dictionary object: a set key-value pairs.
std::vector< PDFElement * > m_aElements
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
Definition: pdfdocument.hxx:95
sal_uInt64 m_nOffset
Location before the ']' token.
Definition: pdfdocument.cxx:89
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:60
Till the end of the current object.
void SetDirty(bool bDirty)
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
Reference object: something with a unique ID.
std::vector< size_t > m_aEOFs
List of EOF offsets we know.
std::map< size_t, PDFTrailerElement * > m_aOffsetTrailers
Trailer offset <-> Trailer pointer map.
bool alreadyVisiting() const
Definition: pdfdocument.hxx:59
Literal string: in (asdf) form.
Name object: a key string.
std::vector< size_t > m_aStartXRefs
List of xref offsets we know.
void SetOffset(sal_uInt64 nOffset)
The trailer singleton is at the end of the doc.
XRefEntryType
The type column of an entry in a cross-reference stream.
Till the first %EOF token.
std::map< size_t, XRefEntry > m_aXRef
Object ID <-> object offset map.
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:77
Stream object: a byte array with a known length.
sal_uInt64 m_nLocation
Offset before the '>>' token.
Definition: pdfdocument.cxx:63
Allows creating, updating and writing PDF objects in a container.
std::map< OString, PDFElement * > m_aDictionary
Definition: pdfdocument.hxx:71