LibreOffice Module vcl (master)  1
pdfdocument.hxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  */
10 
11 #ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
12 #define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
13 
14 #include <memory>
15 #include <map>
16 #include <vector>
17 
18 #include <tools/stream.hxx>
19 
20 #include <vcl/dllapi.h>
21 
22 namespace com
23 {
24 namespace sun
25 {
26 namespace star
27 {
28 namespace security
29 {
30 class XCertificate;
31 }
32 }
33 }
34 }
35 
36 namespace com
37 {
38 namespace sun
39 {
40 namespace star
41 {
42 namespace uno
43 {
44 template <class interface_type> class Reference;
45 }
46 }
47 }
48 }
49 
50 namespace vcl
51 {
52 namespace filter
53 {
54 class PDFTrailerElement;
55 class PDFReferenceElement;
56 class PDFDocument;
57 class PDFDictionaryElement;
58 class PDFArrayElement;
59 class PDFStreamElement;
60 class PDFNumberElement;
61 
64 {
65  bool m_bVisiting = false;
66  bool m_bParsing = false;
67 
68 public:
69  PDFElement() = default;
70  virtual bool Read(SvStream& rStream) = 0;
71  virtual ~PDFElement() = default;
72  void setVisiting(bool bVisiting) { m_bVisiting = bVisiting; }
73  bool alreadyVisiting() const { return m_bVisiting; }
74  void setParsing(bool bParsing) { m_bParsing = bParsing; }
75  bool alreadyParsing() const { return m_bParsing; }
76 };
77 
80 {
85  std::map<OString, PDFElement*> m_aDictionary;
89  sal_uInt64 m_nDictionaryOffset;
91  sal_uInt64 m_nDictionaryLength;
94  sal_uInt64 m_nArrayOffset;
96  sal_uInt64 m_nArrayLength;
102  std::vector<std::unique_ptr<PDFObjectElement>> m_aStoredElements;
104  std::vector<std::unique_ptr<PDFElement>> m_aElements;
106  std::unique_ptr<SvMemoryStream> m_pStreamBuffer;
109  std::vector<PDFReferenceElement*> m_aDictionaryReferences;
110 
111 public:
112  PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue);
113  bool Read(SvStream& rStream) override;
114  PDFElement* Lookup(const OString& rDictionaryKey);
115  PDFObjectElement* LookupObject(const OString& rDictionaryKey);
116  double GetObjectValue() const;
117  void SetDictionaryOffset(sal_uInt64 nDictionaryOffset);
118  sal_uInt64 GetDictionaryOffset();
119  void SetDictionaryLength(sal_uInt64 nDictionaryLength);
120  sal_uInt64 GetDictionaryLength();
121  PDFDictionaryElement* GetDictionary();
122  void SetDictionary(PDFDictionaryElement* pDictionaryElement);
123  void SetNumberElement(PDFNumberElement* pNumberElement);
124  PDFNumberElement* GetNumberElement() const;
126  const std::map<OString, PDFElement*>& GetDictionaryItems();
127  const std::vector<PDFReferenceElement*>& GetDictionaryReferences() const;
128  void AddDictionaryReference(PDFReferenceElement* pReference);
129  void SetArray(PDFArrayElement* pArrayElement);
130  void SetStream(PDFStreamElement* pStreamElement);
132  PDFStreamElement* GetStream() const;
133  void SetArrayOffset(sal_uInt64 nArrayOffset);
134  sal_uInt64 GetArrayOffset() const;
135  void SetArrayLength(sal_uInt64 nArrayLength);
136  sal_uInt64 GetArrayLength() const;
137  PDFArrayElement* GetArray() const;
139  void ParseStoredObjects();
140  std::vector<std::unique_ptr<PDFElement>>& GetStoredElements();
141  SvMemoryStream* GetStreamBuffer() const;
142  void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer);
143  PDFDocument& GetDocument();
144 };
145 
148 {
149  std::vector<PDFElement*> m_aElements;
152 
153 public:
155  bool Read(SvStream& rStream) override;
156  void PushBack(PDFElement* pElement);
157  const std::vector<PDFElement*>& GetElements() const;
158 };
159 
162 {
167  sal_uInt64 m_nOffset = 0;
170 
171 public:
173  PDFNumberElement const& rGeneration);
174  bool Read(SvStream& rStream) override;
176  double LookupNumber(SvStream& rStream) const;
178  PDFObjectElement* LookupObject();
179  int GetObjectValue() const;
180  int GetGenerationValue() const;
181  sal_uInt64 GetOffset() const;
182  PDFNumberElement& GetObjectElement() const;
183 };
184 
187 {
188  size_t const m_nLength;
189  sal_uInt64 m_nOffset;
192 
193 public:
194  explicit PDFStreamElement(size_t nLength);
195  bool Read(SvStream& rStream) override;
196  sal_uInt64 GetOffset() const;
197  SvMemoryStream& GetMemory();
198 };
199 
202 {
203  OString m_aValue;
205  sal_uInt64 m_nLocation = 0;
206 
207 public:
208  PDFNameElement();
209  bool Read(SvStream& rStream) override;
210  const OString& GetValue() const;
211  sal_uInt64 GetLocation() const;
212  static sal_uInt64 GetLength() { return 0; }
213 };
214 
217 {
219  std::map<OString, PDFElement*> m_aItems;
221  sal_uInt64 m_nLocation = 0;
223  std::map<OString, sal_uInt64> m_aDictionaryKeyOffset;
225  std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength;
226 
227 public:
229  bool Read(SvStream& rStream) override;
230 
231  static size_t Parse(const std::vector<std::unique_ptr<PDFElement>>& rElements,
232  PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary);
233  static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary,
234  const OString& rKey);
235  void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset);
236  sal_uInt64 GetKeyOffset(const OString& rKey) const;
237  void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength);
238  sal_uInt64 GetKeyValueLength(const OString& rKey) const;
239  const std::map<OString, PDFElement*>& GetItems() const;
241  PDFObjectElement* LookupObject(const OString& rDictionaryKey);
243  PDFElement* LookupElement(const OString& rDictionaryKey);
244 };
245 
246 enum class TokenizeMode
247 {
251  EOF_TOKEN,
256 };
257 
259 enum class XRefEntryType
260 {
262  FREE,
266  COMPRESSED
267 };
268 
271 {
279  sal_uInt64 m_nOffset = 0;
281  bool m_bDirty = false;
282 
283 public:
284  XRefEntry();
285 
286  void SetType(XRefEntryType eType) { m_eType = eType; }
287 
288  XRefEntryType GetType() const { return m_eType; }
289 
290  void SetOffset(sal_uInt64 nOffset) { m_nOffset = nOffset; }
291 
292  sal_uInt64 GetOffset() const { return m_nOffset; }
293 
294  void SetDirty(bool bDirty) { m_bDirty = bDirty; }
295 
296  bool GetDirty() const { return m_bDirty; }
297 };
298 
301 {
302  OString m_aValue;
303 
304 public:
305  bool Read(SvStream& rStream) override;
306  const OString& GetValue() const;
307 };
308 
311 {
312  OString m_aValue;
313 
314 public:
315  bool Read(SvStream& rStream) override;
316  const OString& GetValue() const;
317 };
318 
321 {
323  sal_uInt64 m_nOffset = 0;
325  sal_uInt64 m_nLength = 0;
326  double m_fValue = 0;
327 
328 public:
330  bool Read(SvStream& rStream) override;
331  double GetValue() const;
332  sal_uInt64 GetLocation() const;
333  sal_uInt64 GetLength() const;
334 };
335 
344 {
346  std::vector<std::unique_ptr<PDFElement>> m_aElements;
348  std::map<size_t, XRefEntry> m_aXRef;
350  std::map<size_t, PDFObjectElement*> m_aOffsetObjects;
352  std::map<size_t, PDFObjectElement*> m_aIDObjects;
354  std::vector<size_t> m_aStartXRefs;
356  std::vector<size_t> m_aTrailerOffsets;
358  std::map<size_t, PDFTrailerElement*> m_aOffsetTrailers;
360  std::vector<size_t> m_aEOFs;
361  PDFTrailerElement* m_pTrailer = nullptr;
363  PDFObjectElement* m_pXRefStream = nullptr;
366 
368  sal_uInt32 GetNextSignature();
370  sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES,
371  sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset);
373  sal_Int32 WriteAppearanceObject();
375  sal_Int32 WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
376  sal_Int32 nAppearanceId);
378  bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId);
380  bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot);
382  void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot);
383 
384 public:
385  PDFDocument();
386  PDFDocument& operator=(const PDFDocument&) = delete;
387  PDFDocument(const PDFDocument&) = delete;
389 
390  static std::vector<unsigned char> DecodeHexString(PDFHexStringElement const* pElement);
392  static OString ReadKeyword(SvStream& rStream);
393  static size_t FindStartXRef(SvStream& rStream);
394  void ReadXRef(SvStream& rStream);
395  void ReadXRefStream(SvStream& rStream);
396  static void SkipWhitespace(SvStream& rStream);
398  static void SkipLineBreaks(SvStream& rStream);
399  size_t GetObjectOffset(size_t nIndex) const;
400  const std::vector<std::unique_ptr<PDFElement>>& GetElements() const;
401  std::vector<PDFObjectElement*> GetPages();
403  void PushBackEOF(size_t nOffset);
405  PDFObjectElement* LookupObject(size_t nObjectNumber);
407  SvMemoryStream& GetEditBuffer();
409  bool Tokenize(SvStream& rStream, TokenizeMode eMode,
410  std::vector<std::unique_ptr<PDFElement>>& rElements,
411  PDFObjectElement* pObjectElement);
413  void SetIDObject(size_t nID, PDFObjectElement* pObject);
415 
417 
418  bool Read(SvStream& rStream);
421  bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate,
422  const OUString& rDescription, bool bAdES);
424  bool Write(SvStream& rStream);
426  std::vector<PDFObjectElement*> GetSignatureWidgets();
428  bool RemoveSignature(size_t nPosition);
430 };
431 
432 } // namespace pdfio
433 } // namespace xmlsecurity
434 
435 #endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
436 
437 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
Array object: a list.
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:89
std::vector< std::unique_ptr< PDFElement > > m_aElements
This vector owns all elements.
Numbering object: an integer or a real.
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:92
sal_uInt64 m_nOffset
Non-compressed: The byte offset of the object, starting from the beginning of the file...
SvMemoryStream m_aMemory
The byte array itself.
std::string GetValue
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:98
#define VCL_DLLPUBLIC
Definition: dllapi.h:29
bool m_bDirty
Are changed as part of an incremental update?.
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
Reference
std::vector< unsigned char > DecodeHexString(const OString &rHex)
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:82
static sal_uInt64 GetLength()
bool alreadyParsing() const
Definition: pdfdocument.hxx:75
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:94
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:72
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
A byte range in a PDF file.
Definition: pdfdocument.hxx:63
In-memory representation of an on-disk PDF document.
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:96
xref "f" or xref stream "0".
An entry in a cross-reference stream.
XRefEntryType GetType() const
PDFObjectElement *const m_pObject
The object that contains this array.
xref "n" or xref stream "1".
std::vector< size_t > m_aTrailerOffsets
Offsets of trailers, from latest to oldest.
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:87
void SetType(XRefEntryType eType)
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
sal_uInt64 GetOffset() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
bool GetDirty() const
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:79
PDFNumberElement & m_rObject
The element providing the object number.
std::map< size_t, PDFObjectElement * > m_aOffsetObjects
Object offset <-> Object pointer map.
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
Dictionary object: a set key-value pairs.
std::vector< PDFElement * > m_aElements
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:74
Till the end of the current object.
void SetDirty(bool bDirty)
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
Reference object: something with a unique ID.
std::vector< size_t > m_aEOFs
List of EOF offsets we know.
std::map< size_t, PDFTrailerElement * > m_aOffsetTrailers
Trailer offset <-> Trailer pointer map.
bool alreadyVisiting() const
Definition: pdfdocument.hxx:73
Literal string: in (asdf) form.
Name object: a key string.
std::vector< size_t > m_aStartXRefs
List of xref offsets we know.
void SetOffset(sal_uInt64 nOffset)
The trailer singleton is at the end of the doc.
XRefEntryType
The type column of an entry in a cross-reference stream.
Till the first %EOF token.
std::map< size_t, XRefEntry > m_aXRef
Object ID <-> object offset map.
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:91
Stream object: a byte array with a known length.
std::map< OString, PDFElement * > m_aDictionary
Definition: pdfdocument.hxx:85