LibreOffice Module vcl (master)  1
pdfdocument.hxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  */
10 
11 #ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
12 #define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
13 
14 #include <memory>
15 #include <map>
16 #include <vector>
17 
18 #include <tools/stream.hxx>
19 #include <vcl/dllapi.h>
20 #include <rtl/strbuf.hxx>
21 
23 
25 {
26 class XCertificate;
27 }
28 
29 namespace com::sun::star::uno
30 {
31 template <class interface_type> class Reference;
32 }
33 
34 namespace tools
35 {
36 class Rectangle;
37 }
38 
39 namespace vcl::filter
40 {
41 class PDFTrailerElement;
42 class PDFReferenceElement;
43 class PDFDocument;
44 class PDFDictionaryElement;
45 class PDFArrayElement;
46 class PDFStreamElement;
47 class PDFNumberElement;
48 
51 {
52  bool m_bVisiting = false;
53  bool m_bParsing = false;
54 
55 public:
56  PDFElement() = default;
57  virtual bool Read(SvStream& rStream) = 0;
58  virtual ~PDFElement() = default;
59  void setVisiting(bool bVisiting) { m_bVisiting = bVisiting; }
60  bool alreadyVisiting() const { return m_bVisiting; }
61  void setParsing(bool bParsing) { m_bParsing = bParsing; }
62  bool alreadyParsing() const { return m_bParsing; }
63 
64  virtual void writeString(OStringBuffer& rBuffer) = 0;
65 };
66 
69 {
77  sal_uInt64 m_nDictionaryOffset;
79  sal_uInt64 m_nDictionaryLength;
82  sal_uInt64 m_nArrayOffset;
84  sal_uInt64 m_nArrayLength;
90  std::vector<std::unique_ptr<PDFObjectElement>> m_aStoredElements;
92  std::vector<std::unique_ptr<PDFElement>> m_aElements;
94  std::unique_ptr<SvMemoryStream> m_pStreamBuffer;
97  std::vector<PDFReferenceElement*> m_aDictionaryReferences;
98 
99  bool m_bParsed;
100 
101  void parseIfNecessary();
102 
103 public:
104  PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue);
105  bool Read(SvStream& rStream) override;
106  PDFElement* Lookup(const OString& rDictionaryKey);
107  PDFObjectElement* LookupObject(const OString& rDictionaryKey);
108  double GetObjectValue() const;
109  void SetDictionaryOffset(sal_uInt64 nDictionaryOffset);
110  sal_uInt64 GetDictionaryOffset();
111  void SetDictionaryLength(sal_uInt64 nDictionaryLength);
112  sal_uInt64 GetDictionaryLength();
113  PDFDictionaryElement* GetDictionary();
114  void SetDictionary(PDFDictionaryElement* pDictionaryElement);
115  void SetNumberElement(PDFNumberElement* pNumberElement);
116  PDFNumberElement* GetNumberElement() const;
118  const std::map<OString, PDFElement*>& GetDictionaryItems();
119  const std::vector<PDFReferenceElement*>& GetDictionaryReferences() const;
120  void AddDictionaryReference(PDFReferenceElement* pReference);
121  void SetArray(PDFArrayElement* pArrayElement);
122  void SetStream(PDFStreamElement* pStreamElement);
124  PDFStreamElement* GetStream() const;
125  void SetArrayOffset(sal_uInt64 nArrayOffset);
126  sal_uInt64 GetArrayOffset() const;
127  void SetArrayLength(sal_uInt64 nArrayLength);
128  sal_uInt64 GetArrayLength() const;
129  PDFArrayElement* GetArray();
131  void ParseStoredObjects();
132  std::vector<std::unique_ptr<PDFElement>>& GetStoredElements();
133  SvMemoryStream* GetStreamBuffer() const;
134  void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer);
135  PDFDocument& GetDocument();
136 
137  void writeString(OStringBuffer& /*rBuffer*/) override { assert(false && "not implemented"); }
138 };
139 
142 {
143  std::vector<PDFElement*> m_aElements;
146 
147 public:
149  bool Read(SvStream& rStream) override;
150  void PushBack(PDFElement* pElement);
151  const std::vector<PDFElement*>& GetElements() const;
152  PDFElement* GetElement(size_t nIndex) const { return m_aElements[nIndex]; }
153 
154  void writeString(OStringBuffer& rBuffer) override
155  {
156  rBuffer.append("[ ");
157  for (auto& rElement : m_aElements)
158  {
159  rElement->writeString(rBuffer);
160  rBuffer.append(" ");
161  }
162  rBuffer.append("]");
163  }
164 };
165 
168 {
173  sal_uInt64 m_nOffset = 0;
176 
177 public:
179  PDFNumberElement const& rGeneration);
180  bool Read(SvStream& rStream) override;
182  double LookupNumber(SvStream& rStream) const;
184  PDFObjectElement* LookupObject();
185  int GetObjectValue() const;
186  int GetGenerationValue() const;
187  sal_uInt64 GetOffset() const;
188  PDFNumberElement& GetObjectElement() const;
189 
190  void writeString(OStringBuffer& rBuffer) override
191  {
192  rBuffer.append(sal_Int32(GetObjectValue()));
193  rBuffer.append(' ');
194  rBuffer.append(sal_Int32(GetGenerationValue()));
195  rBuffer.append(" R");
196  }
197 };
198 
201 {
202  size_t m_nLength;
203  sal_uInt64 m_nOffset;
206 
207 public:
208  explicit PDFStreamElement(size_t nLength);
209  bool Read(SvStream& rStream) override;
210  sal_uInt64 GetOffset() const;
211  SvMemoryStream& GetMemory();
212 
213  void writeString(OStringBuffer& rBuffer) override
214  {
215  rBuffer.append("stream\n");
216  rBuffer.append(static_cast<const char*>(m_aMemory.GetData()), m_aMemory.GetSize());
217  rBuffer.append("\nendstream\n");
218  }
219 };
220 
223 {
224  OString m_aValue;
226  sal_uInt64 m_nLocation = 0;
227 
228 public:
229  PDFNameElement();
230  bool Read(SvStream& rStream) override;
231  void SetValue(const OString& rValue) { m_aValue = rValue; }
232  const OString& GetValue() const;
233  sal_uInt64 GetLocation() const;
234  sal_uInt64 GetLength() { return m_aValue.getLength(); }
235 
236  void writeString(OStringBuffer& rBuffer) override
237  {
238  rBuffer.append("/");
239  rBuffer.append(m_aValue);
240  }
241 };
242 
245 {
247  std::map<OString, PDFElement*> m_aItems;
249  sal_uInt64 m_nLocation = 0;
251  std::map<OString, sal_uInt64> m_aDictionaryKeyOffset;
253  std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength;
254 
255 public:
257  bool Read(SvStream& rStream) override;
258 
259  static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary,
260  const OString& rKey);
261  void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset);
262  sal_uInt64 GetKeyOffset(const OString& rKey) const;
263  void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength);
264  sal_uInt64 GetKeyValueLength(const OString& rKey) const;
265  const std::map<OString, PDFElement*>& GetItems() const;
267  PDFObjectElement* LookupObject(const OString& rDictionaryKey);
269  PDFElement* LookupElement(const OString& rDictionaryKey);
270  sal_uInt64 GetLocation() const { return m_nLocation; }
271  void insert(OString const& rKey, PDFElement* pPDFElement)
272  {
273  m_aItems.emplace(rKey, pPDFElement);
274  }
275 
276  void writeString(OStringBuffer& rBuffer) override
277  {
278  rBuffer.append("<< ");
279  for (auto& rPair : m_aItems)
280  {
281  rBuffer.append("/");
282  rBuffer.append(rPair.first);
283  rBuffer.append(" ");
284  rPair.second->writeString(rBuffer);
285  rBuffer.append(" ");
286  }
287  rBuffer.append(">>");
288  }
289 };
290 
291 enum class TokenizeMode
292 {
296  EOF_TOKEN,
301 };
302 
304 enum class XRefEntryType
305 {
307  FREE,
311  COMPRESSED
312 };
313 
316 {
324  sal_uInt64 m_nOffset = 0;
326  bool m_bDirty = false;
327 
328 public:
329  XRefEntry();
330 
331  void SetType(XRefEntryType eType) { m_eType = eType; }
332 
333  XRefEntryType GetType() const { return m_eType; }
334 
335  void SetOffset(sal_uInt64 nOffset) { m_nOffset = nOffset; }
336 
337  sal_uInt64 GetOffset() const { return m_nOffset; }
338 
339  void SetDirty(bool bDirty) { m_bDirty = bDirty; }
340 
341  bool GetDirty() const { return m_bDirty; }
342 };
343 
346 {
347  OString m_aValue;
348 
349 public:
350  bool Read(SvStream& rStream) override;
351  const OString& GetValue() const;
352 
353  void writeString(OStringBuffer& rBuffer) override
354  {
355  rBuffer.append("<");
356  rBuffer.append(m_aValue);
357  rBuffer.append(">");
358  }
359 };
360 
363 {
364  OString m_aValue;
365 
366 public:
367  bool Read(SvStream& rStream) override;
368  const OString& GetValue() const;
369 
370  void writeString(OStringBuffer& rBuffer) override
371  {
372  rBuffer.append("(");
373  rBuffer.append(m_aValue);
374  rBuffer.append(")");
375  }
376 };
377 
380 {
382  sal_uInt64 m_nOffset = 0;
384  sal_uInt64 m_nLength = 0;
385  double m_fValue = 0;
386 
387 public:
389  bool Read(SvStream& rStream) override;
390  double GetValue() const;
391  void SetValue(double fValue) { m_fValue = fValue; }
392 
393  sal_uInt64 GetLocation() const;
394  sal_uInt64 GetLength() const;
395 
396  void writeString(OStringBuffer& rBuffer) override { rBuffer.append(m_fValue); }
397 };
398 
401 {
403  OString m_aComment;
404 
405 public:
406  explicit PDFCommentElement(PDFDocument& rDoc);
407  bool Read(SvStream& rStream) override;
408  void writeString(OStringBuffer& /*rBuffer*/) override {}
409 };
410 
413 {
415  sal_uInt64 m_nLocation = 0;
416 
417 public:
419  bool Read(SvStream& rStream) override;
420  sal_uInt64 GetLocation() const;
421 
422  void writeString(OStringBuffer& /*rBuffer*/) override {}
423 };
424 
427 {
428 public:
429  bool Read(SvStream& rStream) override;
430 
431  void writeString(OStringBuffer& /*rBuffer*/) override {}
432 };
433 
436 {
437 public:
438  bool Read(SvStream& rStream) override;
439 
440  void writeString(OStringBuffer& /*rBuffer*/) override {}
441 };
442 
445 {
447  sal_uInt64 m_nOffset = 0;
448 
449 public:
451  bool Read(SvStream& rStream) override;
452  sal_uInt64 GetOffset() const;
453 
454  void writeString(OStringBuffer& /*rBuffer*/) override {}
455 };
456 
459 {
460  bool m_aValue;
461 
462 public:
463  explicit PDFBooleanElement(bool bValue)
464  : m_aValue(bValue)
465  {
466  }
467 
468  bool Read(SvStream& rStream) override;
469 
470  void writeString(OStringBuffer& rBuffer) override
471  {
472  rBuffer.append(m_aValue ? "true" : "false");
473  }
474 };
475 
478 {
479 public:
480  bool Read(SvStream& rStream) override;
481 
482  void writeString(OStringBuffer& rBuffer) override { rBuffer.append("null"); }
483 };
484 
493 {
495  std::vector<std::unique_ptr<PDFElement>> m_aElements;
497  std::map<size_t, XRefEntry> m_aXRef;
499  std::map<size_t, PDFObjectElement*> m_aOffsetObjects;
501  std::map<size_t, PDFObjectElement*> m_aIDObjects;
503  std::vector<size_t> m_aStartXRefs;
505  std::vector<size_t> m_aTrailerOffsets;
507  std::map<size_t, PDFTrailerElement*> m_aOffsetTrailers;
509  std::vector<size_t> m_aEOFs;
510  PDFTrailerElement* m_pTrailer = nullptr;
512  PDFObjectElement* m_pXRefStream = nullptr;
515 
517  std::vector<sal_Int8> m_aSignatureLine;
518 
520  size_t m_nSignaturePage = 0;
521 
523  sal_uInt32 GetNextSignature();
525  sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES,
526  sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset);
528  sal_Int32 WriteAppearanceObject(tools::Rectangle& rSignatureRectangle);
530  sal_Int32 WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
531  sal_Int32 nAppearanceId,
532  const tools::Rectangle& rSignatureRectangle);
534  bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId);
536  bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot);
538  void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot);
539 
540 public:
541  PDFDocument();
542  virtual ~PDFDocument();
543  PDFDocument& operator=(const PDFDocument&) = delete;
544  PDFDocument(const PDFDocument&) = delete;
546 
547  static std::vector<unsigned char> DecodeHexString(PDFHexStringElement const* pElement);
549  static OString ReadKeyword(SvStream& rStream);
550  static size_t FindStartXRef(SvStream& rStream);
551  void ReadXRef(SvStream& rStream);
552  void ReadXRefStream(SvStream& rStream);
553  static void SkipWhitespace(SvStream& rStream);
555  static void SkipLineBreaks(SvStream& rStream);
556  size_t GetObjectOffset(size_t nIndex) const;
557  const std::vector<std::unique_ptr<PDFElement>>& GetElements() const;
558  std::vector<PDFObjectElement*> GetPages();
559  PDFObjectElement* GetCatalog();
561  void PushBackEOF(size_t nOffset);
563  PDFObjectElement* LookupObject(size_t nObjectNumber);
565  SvMemoryStream& GetEditBuffer();
567  bool Tokenize(SvStream& rStream, TokenizeMode eMode,
568  std::vector<std::unique_ptr<PDFElement>>& rElements,
569  PDFObjectElement* pObjectElement);
571  void SetIDObject(size_t nID, PDFObjectElement* pObject);
573 
575 
576  bool Read(SvStream& rStream);
578  void SetSignatureLine(const std::vector<sal_Int8>& rSignatureLine);
579  void SetSignaturePage(size_t nPage);
581  bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate,
582  const OUString& rDescription, bool bAdES);
584  bool Write(SvStream& rStream);
586  std::vector<PDFObjectElement*> GetSignatureWidgets();
588  bool RemoveSignature(size_t nPosition);
590 
592  sal_Int32 createObject() override;
594  bool updateObject(sal_Int32 n) override;
596  bool writeBuffer(const void* pBuffer, sal_uInt64 nBytes) override;
597 };
598 
601 {
605  sal_uInt64 m_nOffset = 0;
606 
607 public:
608  explicit PDFTrailerElement(PDFDocument& rDoc);
609  bool Read(SvStream& rStream) override;
610  PDFElement* Lookup(const OString& rDictionaryKey);
611  sal_uInt64 GetLocation() const;
612 
613  void SetDictionary(PDFDictionaryElement* pDictionaryElement)
614  {
615  m_pDictionaryElement = pDictionaryElement;
616  }
617 
618  PDFDictionaryElement* GetDictionary() { return m_pDictionaryElement; }
619 
620  void writeString(OStringBuffer& /*rBuffer*/) override { assert(false && "not implemented"); }
621 };
622 
624 {
625  const std::vector<std::unique_ptr<PDFElement>>& mrElements;
626 
627 public:
628  PDFObjectParser(std::vector<std::unique_ptr<PDFElement>> const& rElements)
629  : mrElements(rElements)
630  {
631  }
632 
633  size_t parse(PDFElement* pParsingElement, size_t nStartIndex = 0, int nCurrentDepth = 0);
634 };
635 
636 } // namespace vcl::filter
637 
638 #endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
639 
640 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::vector< std::unique_ptr< PDFElement > > m_aElements
Elements of an object in an object stream.
Definition: pdfdocument.hxx:92
Array object: a list.
const sal_Int32 m_nLength
sal_uInt64 m_nDictionaryOffset
Position after the '<<' token.
Definition: pdfdocument.hxx:77
std::vector< std::unique_ptr< PDFElement > > m_aElements
This vector owns all elements.
void writeString(OStringBuffer &rBuffer) override
void writeString(OStringBuffer &rBuffer) override
Numbering object: an integer or a real.
Boolean object: a 'true' or a 'false'.
PDFDictionaryElement * m_pDictionaryElement
Definition: pdfdocument.hxx:80
End of an array: ']'.
sal_uInt64 m_nOffset
Non-compressed: The byte offset of the object, starting from the beginning of the file...
SvMemoryStream m_aMemory
The byte array itself.
std::string GetValue
PDFArrayElement * m_pArrayElement
The contained direct array, if any.
Definition: pdfdocument.hxx:86
void writeString(OStringBuffer &rBuffer) override
PDFObjectElement * m_pObject
The object that contains this array.
#define VCL_DLLPUBLIC
Definition: dllapi.h:29
bool m_bDirty
Are changed as part of an incremental update?.
std::vector< std::unique_ptr< PDFObjectElement > > m_aStoredElements
Objects of an object stream.
Definition: pdfdocument.hxx:90
void SetValue(const OString &rValue)
Reference
std::vector< unsigned char > DecodeHexString(const OString &rHex)
PDFDocument & m_rDoc
The document owning this element.
Definition: pdfdocument.hxx:71
bool alreadyParsing() const
Definition: pdfdocument.hxx:62
sal_uInt64 m_nArrayOffset
Position after the '[' token, if m_pArrayElement is set.
Definition: pdfdocument.hxx:82
std::map< OString, PDFElement * > m_aItems
Key-value pairs when the dictionary is a nested value.
const std::vector< std::unique_ptr< PDFElement > > & mrElements
void setVisiting(bool bVisiting)
Definition: pdfdocument.hxx:59
std::unique_ptr< SvMemoryStream > m_pStreamBuffer
Uncompressed buffer of an object in an object stream.
Definition: pdfdocument.hxx:94
void writeString(OStringBuffer &) override
A byte range in a PDF file.
Definition: pdfdocument.hxx:50
void writeString(OStringBuffer &rBuffer) override
void SetValue(double fValue)
const BorderLinePrimitive2D *pCandidateB assert(pCandidateA)
In-memory representation of an on-disk PDF document.
sal_uInt64 m_nArrayLength
Length of the array buffer till (before) the ']' token.
Definition: pdfdocument.hxx:84
xref "f" or xref stream "0".
An entry in a cross-reference stream.
PDFObjectParser(std::vector< std::unique_ptr< PDFElement >> const &rElements)
XRefEntryType GetType() const
std::vector< sal_Int8 > m_aSignatureLine
Signature line in PDF format, to be consumed by the next Sign() invocation.
void writeString(OStringBuffer &) override
void writeString(OStringBuffer &rBuffer) override
xref "n" or xref stream "1".
void writeString(OStringBuffer &) override
PDFElement * GetElement(size_t nIndex) const
std::vector< size_t > m_aTrailerOffsets
Offsets of trailers, from latest to oldest.
PDFNumberElement * m_pNumberElement
If set, the object contains this number element (outside any dictionary/array).
Definition: pdfdocument.hxx:75
void SetType(XRefEntryType eType)
Same as END_OF_OBJECT, but for object streams (no endobj keyword).
void writeString(OStringBuffer &) override
sal_uInt64 GetOffset() const
std::map< OString, sal_uInt64 > m_aDictionaryKeyOffset
Position after the '/' token.
void writeString(OStringBuffer &rBuffer) override
PDFStreamElement * m_pStreamElement
The stream of this object, used when this is an object stream.
Definition: pdfdocument.hxx:88
PDFDictionaryElement * GetDictionary()
bool GetDirty() const
void writeString(OStringBuffer &) override
void writeString(OStringBuffer &rBuffer) override
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:68
sal_uInt32 writeString(sal_uInt8 *buffer, const sal_Unicode *v)
PDFNumberElement & m_rObject
The element providing the object number.
sal_uInt64 GetSize()
std::map< size_t, PDFObjectElement * > m_aOffsetObjects
Object offset <-> Object pointer map.
End of a dictionary: '>>'.
SvMemoryStream m_aEditBuffer
All editing takes place in this buffer, if it happens.
A one-liner comment.
Dictionary object: a set key-value pairs.
std::vector< PDFElement * > m_aElements
std::vector< PDFReferenceElement * > m_aDictionaryReferences
List of all reference elements inside this object's dictionary and nested dictionaries.
Definition: pdfdocument.hxx:97
void SetDictionary(PDFDictionaryElement *pDictionaryElement)
std::map< OString, sal_uInt64 > m_aDictionaryKeyValueLength
Length of the dictionary key and value, till (before) the next token.
void setParsing(bool bParsing)
Definition: pdfdocument.hxx:61
Till the end of the current object.
void SetDirty(bool bDirty)
End of an object: 'endobj' keyword.
Null object: the 'null' singleton.
void insert(OString const &rKey, PDFElement *pPDFElement)
std::map< size_t, PDFObjectElement * > m_aIDObjects
Object ID <-> Object pointer map.
void writeString(OStringBuffer &) override
Reference object: something with a unique ID.
std::vector< size_t > m_aEOFs
List of EOF offsets we know.
End of a stream: 'endstream' keyword.
void writeString(OStringBuffer &rBuffer) override
std::map< size_t, PDFTrailerElement * > m_aOffsetTrailers
Trailer offset <-> Trailer pointer map.
void writeString(OStringBuffer &) override
PDFDictionaryElement * m_pDictionaryElement
bool alreadyVisiting() const
Definition: pdfdocument.hxx:60
Literal string: in (asdf) form.
Name object: a key string.
std::vector< size_t > m_aStartXRefs
List of xref offsets we know.
void SetOffset(sal_uInt64 nOffset)
The trailer singleton is at the end of the doc.
XRefEntryType
The type column of an entry in a cross-reference stream.
Till the first %EOF token.
std::map< size_t, XRefEntry > m_aXRef
Object ID <-> object offset map.
sal_uInt64 m_nDictionaryLength
Length of the dictionary buffer till (before) the '>>' token.
Definition: pdfdocument.hxx:79
bool parse(OUString const &uri, SourceProviderScannerData *data)
void writeString(OStringBuffer &rBuffer) override
Stream object: a byte array with a known length.
const void * GetData()
void writeString(OStringBuffer &rBuffer) override
Allows creating, updating and writing PDF objects in a container.