LibreOffice Module sdext (master)  1
pdfparse.hxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
21 #define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
22 
23 #include <sal/types.h>
24 #include <rtl/ustring.hxx>
25 #include <rtl/string.hxx>
26 
27 #include <string_view>
28 #include <unordered_map>
29 #include <vector>
30 #include <memory>
31 
32 namespace pdfparse
33 {
34 
35 struct EmitImplData;
36 struct PDFContainer;
38 {
39 public:
40  virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
41  virtual unsigned int getCurPos() = 0;
42  virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
43  virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
44 
45  explicit EmitContext( const PDFContainer* pTop = nullptr );
46  virtual ~EmitContext();
47 
48  // set this to deflate contained streams
49  bool m_bDeflate;
50  // set this to decrypt the PDF file
51  bool m_bDecrypt;
52 
53 private:
54  friend struct PDFEntry;
55  std::unique_ptr<EmitImplData> m_pImplData;
56 };
57 
58 struct PDFEntry
59 {
60  PDFEntry() {}
61  virtual ~PDFEntry();
62 
63  virtual bool emit( EmitContext& rWriteContext ) const = 0;
64  virtual PDFEntry* clone() const = 0;
65 
66 protected:
67  static EmitImplData* getEmitData( EmitContext const & rContext );
68  static void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData );
69 };
70 
71 struct PDFComment : public PDFEntry
72 {
73  OString m_aComment;
74 
75  explicit PDFComment( const OString& rComment )
76  : PDFEntry(), m_aComment( rComment ) {}
77  virtual ~PDFComment() override;
78  virtual bool emit( EmitContext& rWriteContext ) const override;
79  virtual PDFEntry* clone() const override;
80 };
81 
82 struct PDFValue : public PDFEntry
83 {
84  // abstract base class for simple values
85  PDFValue() : PDFEntry() {}
86  virtual ~PDFValue() override;
87 };
88 
89 struct PDFName : public PDFValue
90 {
91  OString m_aName;
92 
93  explicit PDFName( const OString& rName )
94  : PDFValue(), m_aName( rName ) {}
95  virtual ~PDFName() override;
96  virtual bool emit( EmitContext& rWriteContext ) const override;
97  virtual PDFEntry* clone() const override;
98 
99  OUString getFilteredName() const;
100 };
101 
102 struct PDFString : public PDFValue
103 {
104  OString m_aString;
105 
106  explicit PDFString( const OString& rString )
107  : PDFValue(), m_aString( rString ) {}
108  virtual ~PDFString() override;
109  virtual bool emit( EmitContext& rWriteContext ) const override;
110  virtual PDFEntry* clone() const override;
111 
112  OString getFilteredString() const;
113 };
114 
115 struct PDFNumber : public PDFValue
116 {
117  double m_fValue;
118 
119  explicit PDFNumber( double fVal )
120  : PDFValue(), m_fValue( fVal ) {}
121  virtual ~PDFNumber() override;
122  virtual bool emit( EmitContext& rWriteContext ) const override;
123  virtual PDFEntry* clone() const override;
124 };
125 
126 struct PDFBool : public PDFValue
127 {
128  bool m_bValue;
129 
130  explicit PDFBool( bool bVal )
131  : PDFValue(), m_bValue( bVal ) {}
132  virtual ~PDFBool() override;
133  virtual bool emit( EmitContext& rWriteContext ) const override;
134  virtual PDFEntry* clone() const override;
135 };
136 
137 struct PDFObjectRef : public PDFValue
138 {
139  unsigned int m_nNumber;
140  unsigned int m_nGeneration;
141 
142  PDFObjectRef( unsigned int nNr, unsigned int nGen )
143  : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
144  virtual ~PDFObjectRef() override;
145  virtual bool emit( EmitContext& rWriteContext ) const override;
146  virtual PDFEntry* clone() const override;
147 };
148 
149 struct PDFNull : public PDFValue
150 {
151  PDFNull() {}
152  virtual ~PDFNull() override;
153  virtual bool emit( EmitContext& rWriteContext ) const override;
154  virtual PDFEntry* clone() const override;
155 };
156 
157 struct PDFObject;
158 struct PDFContainer : public PDFEntry
159 {
160  sal_Int32 m_nOffset;
161  std::vector<std::unique_ptr<PDFEntry>> m_aSubElements;
162 
163  // this is an abstract base class for identifying
164  // entries that can contain sub elements besides comments
165  PDFContainer() : PDFEntry(), m_nOffset( 0 ) {}
166  virtual ~PDFContainer() override;
167  bool emitSubElements( EmitContext& rWriteContext ) const;
168  void cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const;
169 
170  PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
171  PDFObject* findObject( PDFObjectRef const * pRef ) const
172  { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
173 };
174 
175 struct PDFArray : public PDFContainer
176 {
177  PDFArray() {}
178  virtual ~PDFArray() override;
179  virtual bool emit( EmitContext& rWriteContext ) const override;
180  virtual PDFEntry* clone() const override;
181 };
182 
183 struct PDFDict : public PDFContainer
184 {
185  typedef std::unordered_map<OString,PDFEntry*> Map;
186  Map m_aMap;
187 
188  PDFDict() {}
189  virtual ~PDFDict() override;
190  virtual bool emit( EmitContext& rWriteContext ) const override;
191  virtual PDFEntry* clone() const override;
192 
193  // inserting a value of NULL will remove rName and the previous value
194  // from the dictionary
195  void insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue );
196  // removes a name/value pair from the dict
197  void eraseValue( std::string_view rName );
198  // builds new map as of sub elements
199  // returns NULL if successful, else the first offending element
200  PDFEntry* buildMap();
201 };
202 
203 struct PDFStream : public PDFEntry
204 {
205  unsigned int m_nBeginOffset;
206  unsigned int m_nEndOffset; // offset of the byte after the stream
208 
209  PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
210  : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
211  virtual ~PDFStream() override;
212  virtual bool emit( EmitContext& rWriteContext ) const override;
213  virtual PDFEntry* clone() const override;
214 
215  unsigned int getDictLength( const PDFContainer* pObjectContainer ) const; // get contents of the "Length" entry of the dict
216 };
217 
218 struct PDFTrailer : public PDFContainer
219 {
221 
222  PDFTrailer() : PDFContainer(), m_pDict( nullptr ) {}
223  virtual ~PDFTrailer() override;
224  virtual bool emit( EmitContext& rWriteContext ) const override;
225  virtual PDFEntry* clone() const override;
226 };
227 
228 struct PDFFileImplData;
229 struct PDFFile : public PDFContainer
230 {
231 private:
232  mutable std::unique_ptr<PDFFileImplData> m_pData;
233  PDFFileImplData* impl_getData() const;
234 public:
235  unsigned int m_nMajor; // PDF major
236  unsigned int m_nMinor; // PDF minor
237 
238  PDFFile();
239  virtual ~PDFFile() override;
240 
241  virtual bool emit( EmitContext& rWriteContext ) const override;
242  virtual PDFEntry* clone() const override;
243 
244  bool isEncrypted() const;
245 
246  bool usesSupportedEncryptionFormat() const;
247 
248  // this method checks whether rPwd is compatible with
249  // either user or owner password and sets up decrypt data in that case
250  // returns true if decryption can be done
251  bool setupDecryptionData( const OString& rPwd ) const;
252 
253  bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
254  sal_uInt8* pOutBuffer,
255  unsigned int nObject, unsigned int nGeneration ) const;
256 };
257 
258 struct PDFObject : public PDFContainer
259 {
262  unsigned int m_nNumber;
263  unsigned int m_nGeneration;
264 
265  PDFObject( unsigned int nNr, unsigned int nGen )
266  : m_pObject( nullptr ), m_pStream( nullptr ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
267  virtual ~PDFObject() override;
268  virtual bool emit( EmitContext& rWriteContext ) const override;
269  virtual PDFEntry* clone() const override;
270 
271  // writes only the contained stream, deflated if necessary
272  void writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
273 
274 private:
275  // returns true if stream is deflated
276  // fills *ppStream and *pBytes with start of stream and count of bytes
277  // memory returned in *ppStream must be freed with std::free afterwards
278  // fills in NULL and 0 in case of error
279  bool getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
280 };
281 
282 struct PDFPart : public PDFContainer
283 {
285  virtual ~PDFPart() override;
286  virtual bool emit( EmitContext& rWriteContext ) const override;
287  virtual PDFEntry* clone() const override;
288 };
289 
290 struct PDFReader
291 {
292  PDFReader() = delete;
293 
294  static std::unique_ptr<PDFEntry> read( const char* pFileName );
295 #ifdef _WIN32
296  static std::unique_ptr<PDFEntry> read( const char* pBuffer, unsigned int nLen );
297 #endif
298 };
299 
300 } // namespace
301 
302 #endif
303 
304 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:927
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:240
void eraseValue(std::string_view rName)
Definition: pdfentries.cxx:553
virtual ~PDFNumber() override
Definition: pdfentries.cxx:337
virtual ~PDFComment() override
Definition: pdfentries.cxx:127
std::unordered_map< OString, PDFEntry * > Map
Definition: pdfparse.hxx:185
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:399
virtual ~PDFFile() override
OString getFilteredString() const
Definition: pdfentries.cxx:245
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:341
PDFComment(const OString &rComment)
Definition: pdfparse.hxx:75
virtual bool write(const void *pBuf, unsigned int nLen)=0
std::unique_ptr< PDFFileImplData > m_pData
Definition: pdfparse.hxx:232
unsigned int m_nBeginOffset
Definition: pdfparse.hxx:205
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:389
virtual ~PDFPart() override
static std::unique_ptr< PDFEntry > read(const char *pFileName)
Definition: pdfparse.cxx:608
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:131
PDFObject * findObject(unsigned int nNumber, unsigned int nGeneration) const
Definition: pdfentries.cxx:475
unsigned int m_nNumber
Definition: pdfparse.hxx:139
bool decrypt(const sal_uInt8 *pInBuffer, sal_uInt32 nLen, sal_uInt8 *pOutBuffer, unsigned int nObject, unsigned int nGeneration) const
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:413
unsigned int m_nEndOffset
Definition: pdfparse.hxx:206
PDFString(const OString &rString)
Definition: pdfparse.hxx:106
unsigned int m_nGeneration
Definition: pdfparse.hxx:140
PDFObjectRef(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:142
virtual bool emit(EmitContext &rWriteContext) const override
PDFBool(bool bVal)
Definition: pdfparse.hxx:130
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:145
virtual ~PDFValue() override
Definition: pdfentries.cxx:123
virtual ~PDFDict() override
Definition: pdfentries.cxx:511
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:515
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
Definition: pdfparse.hxx:161
PDFNumber(double fVal)
Definition: pdfparse.hxx:119
bool emitSubElements(EmitContext &rWriteContext) const
Definition: pdfentries.cxx:448
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:611
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:152
virtual ~PDFBool() override
Definition: pdfentries.cxx:395
virtual ~PDFNull() override
Definition: pdfentries.cxx:409
virtual ~PDFName() override
Definition: pdfentries.cxx:141
OUString getFilteredName() const
Definition: pdfentries.cxx:157
bool usesSupportedEncryptionFormat() const
unsigned int m_nMajor
Definition: pdfparse.hxx:235
virtual ~PDFObject() override
Definition: pdfentries.cxx:654
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:418
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:616
static EmitImplData * getEmitData(EmitContext const &rContext)
Definition: pdfentries.cxx:111
PDFStream * m_pStream
Definition: pdfparse.hxx:261
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:599
void insertValue(const OString &rName, std::unique_ptr< PDFEntry > pValue)
Definition: pdfentries.cxx:524
virtual ~PDFStream() override
Definition: pdfentries.cxx:607
bool getDeflatedStream(std::unique_ptr< char[]> &rpStream, unsigned int *pBytes, const PDFContainer *pObjectContainer, EmitContext &rContext) const
Definition: pdfentries.cxx:658
virtual unsigned int getCurPos()=0
virtual unsigned int readOrigBytes(unsigned int nOrigOffset, unsigned int nLen, void *pBuf)=0
bool isEncrypted() const
virtual PDFEntry * clone() const override
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:136
virtual bool copyOrigBytes(unsigned int nOrigOffset, unsigned int nLen)=0
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:439
virtual ~PDFArray() override
Definition: pdfentries.cxx:491
PDFEntry * buildMap()
Definition: pdfentries.cxx:576
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:995
virtual bool emit(EmitContext &rWriteContext) const override
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:495
PDFFileImplData * impl_getData() const
PDFObject * findObject(PDFObjectRef const *pRef) const
Definition: pdfparse.hxx:171
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:903
virtual ~PDFString() override
Definition: pdfentries.cxx:189
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:504
static void setEmitData(EmitContext &rContext, EmitImplData *pNewEmitData)
Definition: pdfentries.cxx:116
PDFEntry * m_pObject
Definition: pdfparse.hxx:260
unsigned int m_nNumber
Definition: pdfparse.hxx:262
unsigned char sal_uInt8
void cloneSubElements(std::vector< std::unique_ptr< PDFEntry >> &rNewSubElements) const
Definition: pdfentries.cxx:468
EmitContext(const PDFContainer *pTop=nullptr)
Definition: pdfentries.cxx:95
PDFName(const OString &rName)
Definition: pdfparse.hxx:93
virtual ~PDFContainer() override
Definition: pdfentries.cxx:444
PDFObject(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:265
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:193
unsigned int getDictLength(const PDFContainer *pObjectContainer) const
Definition: pdfentries.cxx:621
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:801
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:404
virtual ~PDFObjectRef() override
Definition: pdfentries.cxx:424
virtual PDFEntry * clone() const =0
std::unique_ptr< EmitImplData > m_pImplData
Definition: pdfparse.hxx:55
unsigned int m_nGeneration
Definition: pdfparse.hxx:263
bool setupDecryptionData(const OString &rPwd) const
virtual ~PDFTrailer() override
Definition: pdfentries.cxx:923
virtual PDFEntry * clone() const override
PDFStream(unsigned int nBegin, unsigned int nEnd, PDFDict *pStreamDict)
Definition: pdfparse.hxx:209
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:428
unsigned int m_nMinor
Definition: pdfparse.hxx:236
void writeStream(EmitContext &rContext, const PDFFile *pPDFFile) const
Definition: pdfentries.cxx:782
virtual bool emit(EmitContext &rWriteContext) const =0