LibreOffice Module sdext (master)  1
pdfparse.hxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
21 #define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
22 
23 #include <sal/types.h>
24 #include <rtl/ustring.hxx>
25 #include <rtl/string.hxx>
26 
27 #include <unordered_map>
28 #include <vector>
29 #include <memory>
30 
31 namespace pdfparse
32 {
33 
34 struct EmitImplData;
35 struct PDFContainer;
37 {
38 public:
39  virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
40  virtual unsigned int getCurPos() = 0;
41  virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
42  virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
43 
44  explicit EmitContext( const PDFContainer* pTop = nullptr );
45  virtual ~EmitContext();
46 
47  // set this to deflate contained streams
48  bool m_bDeflate;
49  // set this to decrypt the PDF file
50  bool m_bDecrypt;
51 
52 private:
53  friend struct PDFEntry;
54  std::unique_ptr<EmitImplData> m_pImplData;
55 };
56 
57 struct PDFEntry
58 {
59  PDFEntry() {}
60  virtual ~PDFEntry();
61 
62  virtual bool emit( EmitContext& rWriteContext ) const = 0;
63  virtual PDFEntry* clone() const = 0;
64 
65 protected:
66  static EmitImplData* getEmitData( EmitContext const & rContext );
67  static void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData );
68 };
69 
70 struct PDFComment : public PDFEntry
71 {
72  OString const m_aComment;
73 
74  explicit PDFComment( const OString& rComment )
75  : PDFEntry(), m_aComment( rComment ) {}
76  virtual ~PDFComment() override;
77  virtual bool emit( EmitContext& rWriteContext ) const override;
78  virtual PDFEntry* clone() const override;
79 };
80 
81 struct PDFValue : public PDFEntry
82 {
83  // abstract base class for simple values
84  PDFValue() : PDFEntry() {}
85  virtual ~PDFValue() override;
86 };
87 
88 struct PDFName : public PDFValue
89 {
90  OString m_aName;
91 
92  explicit PDFName( const OString& rName )
93  : PDFValue(), m_aName( rName ) {}
94  virtual ~PDFName() override;
95  virtual bool emit( EmitContext& rWriteContext ) const override;
96  virtual PDFEntry* clone() const override;
97 
98  OUString getFilteredName() const;
99 };
100 
101 struct PDFString : public PDFValue
102 {
103  OString const m_aString;
104 
105  explicit PDFString( const OString& rString )
106  : PDFValue(), m_aString( rString ) {}
107  virtual ~PDFString() override;
108  virtual bool emit( EmitContext& rWriteContext ) const override;
109  virtual PDFEntry* clone() const override;
110 
111  OString getFilteredString() const;
112 };
113 
114 struct PDFNumber : public PDFValue
115 {
116  double const m_fValue;
117 
118  explicit PDFNumber( double fVal )
119  : PDFValue(), m_fValue( fVal ) {}
120  virtual ~PDFNumber() override;
121  virtual bool emit( EmitContext& rWriteContext ) const override;
122  virtual PDFEntry* clone() const override;
123 };
124 
125 struct PDFBool : public PDFValue
126 {
127  bool const m_bValue;
128 
129  explicit PDFBool( bool bVal )
130  : PDFValue(), m_bValue( bVal ) {}
131  virtual ~PDFBool() override;
132  virtual bool emit( EmitContext& rWriteContext ) const override;
133  virtual PDFEntry* clone() const override;
134 };
135 
136 struct PDFObjectRef : public PDFValue
137 {
138  unsigned int const m_nNumber;
139  unsigned int const m_nGeneration;
140 
141  PDFObjectRef( unsigned int nNr, unsigned int nGen )
142  : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
143  virtual ~PDFObjectRef() override;
144  virtual bool emit( EmitContext& rWriteContext ) const override;
145  virtual PDFEntry* clone() const override;
146 };
147 
148 struct PDFNull : public PDFValue
149 {
150  PDFNull() {}
151  virtual ~PDFNull() override;
152  virtual bool emit( EmitContext& rWriteContext ) const override;
153  virtual PDFEntry* clone() const override;
154 };
155 
156 struct PDFObject;
157 struct PDFContainer : public PDFEntry
158 {
159  sal_Int32 m_nOffset;
160  std::vector<std::unique_ptr<PDFEntry>> m_aSubElements;
161 
162  // this is an abstract base class for identifying
163  // entries that can contain sub elements besides comments
164  PDFContainer() : PDFEntry(), m_nOffset( 0 ) {}
165  virtual ~PDFContainer() override;
166  bool emitSubElements( EmitContext& rWriteContext ) const;
167  void cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const;
168 
169  PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
170  PDFObject* findObject( PDFObjectRef const * pRef ) const
171  { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
172 };
173 
174 struct PDFArray : public PDFContainer
175 {
176  PDFArray() {}
177  virtual ~PDFArray() override;
178  virtual bool emit( EmitContext& rWriteContext ) const override;
179  virtual PDFEntry* clone() const override;
180 };
181 
182 struct PDFDict : public PDFContainer
183 {
184  typedef std::unordered_map<OString,PDFEntry*> Map;
185  Map m_aMap;
186 
187  PDFDict() {}
188  virtual ~PDFDict() override;
189  virtual bool emit( EmitContext& rWriteContext ) const override;
190  virtual PDFEntry* clone() const override;
191 
192  // inserting a value of NULL will remove rName and the previous value
193  // from the dictionary
194  void insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue );
195  // removes a name/value pair from the dict
196  void eraseValue( const OString& rName );
197  // builds new map as of sub elements
198  // returns NULL if successful, else the first offending element
199  PDFEntry* buildMap();
200 };
201 
202 struct PDFStream : public PDFEntry
203 {
204  unsigned int const m_nBeginOffset;
205  unsigned int const m_nEndOffset; // offset of the byte after the stream
207 
208  PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
209  : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
210  virtual ~PDFStream() override;
211  virtual bool emit( EmitContext& rWriteContext ) const override;
212  virtual PDFEntry* clone() const override;
213 
214  unsigned int getDictLength( const PDFContainer* pObjectContainer ) const; // get contents of the "Length" entry of the dict
215 };
216 
217 struct PDFTrailer : public PDFContainer
218 {
220 
221  PDFTrailer() : PDFContainer(), m_pDict( nullptr ) {}
222  virtual ~PDFTrailer() override;
223  virtual bool emit( EmitContext& rWriteContext ) const override;
224  virtual PDFEntry* clone() const override;
225 };
226 
227 struct PDFFileImplData;
228 struct PDFFile : public PDFContainer
229 {
230 private:
231  mutable std::unique_ptr<PDFFileImplData> m_pData;
232  PDFFileImplData* impl_getData() const;
233 public:
234  unsigned int m_nMajor; // PDF major
235  unsigned int m_nMinor; // PDF minor
236 
237  PDFFile();
238  virtual ~PDFFile() override;
239 
240  virtual bool emit( EmitContext& rWriteContext ) const override;
241  virtual PDFEntry* clone() const override;
242 
243  bool isEncrypted() const;
244 
245  bool usesSupportedEncryptionFormat() const;
246 
247  // this method checks whether rPwd is compatible with
248  // either user or owner password and sets up decrypt data in that case
249  // returns true if decryption can be done
250  bool setupDecryptionData( const OString& rPwd ) const;
251 
252  bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
253  sal_uInt8* pOutBuffer,
254  unsigned int nObject, unsigned int nGeneration ) const;
255 };
256 
257 struct PDFObject : public PDFContainer
258 {
261  unsigned int m_nNumber;
262  unsigned int const m_nGeneration;
263 
264  PDFObject( unsigned int nNr, unsigned int nGen )
265  : m_pObject( nullptr ), m_pStream( nullptr ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
266  virtual ~PDFObject() override;
267  virtual bool emit( EmitContext& rWriteContext ) const override;
268  virtual PDFEntry* clone() const override;
269 
270  // writes only the contained stream, deflated if necessary
271  void writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
272 
273 private:
274  // returns true if stream is deflated
275  // fills *ppStream and *pBytes with start of stream and count of bytes
276  // memory returned in *ppStream must be freed with std::free afterwards
277  // fills in NULL and 0 in case of error
278  bool getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
279 };
280 
281 struct PDFPart : public PDFContainer
282 {
284  virtual ~PDFPart() override;
285  virtual bool emit( EmitContext& rWriteContext ) const override;
286  virtual PDFEntry* clone() const override;
287 };
288 
289 struct PDFReader
290 {
291  PDFReader() = delete;
292 
293  static std::unique_ptr<PDFEntry> read( const char* pFileName );
294 #ifdef _WIN32
295  static std::unique_ptr<PDFEntry> read( const char* pBuffer, unsigned int nLen );
296 #endif
297 };
298 
299 } // namespace
300 
301 #endif
302 
303 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:928
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:240
virtual ~PDFNumber() override
Definition: pdfentries.cxx:337
virtual ~PDFComment() override
Definition: pdfentries.cxx:127
std::unordered_map< OString, PDFEntry * > Map
Definition: pdfparse.hxx:184
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:399
virtual ~PDFFile() override
OString getFilteredString() const
Definition: pdfentries.cxx:245
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:341
PDFComment(const OString &rComment)
Definition: pdfparse.hxx:74
virtual bool write(const void *pBuf, unsigned int nLen)=0
std::unique_ptr< PDFFileImplData > m_pData
Definition: pdfparse.hxx:231
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:389
virtual ~PDFPart() override
static std::unique_ptr< PDFEntry > read(const char *pFileName)
Definition: pdfparse.cxx:602
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:131
PDFObject * findObject(unsigned int nNumber, unsigned int nGeneration) const
Definition: pdfentries.cxx:475
bool decrypt(const sal_uInt8 *pInBuffer, sal_uInt32 nLen, sal_uInt8 *pOutBuffer, unsigned int nObject, unsigned int nGeneration) const
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:413
PDFString(const OString &rString)
Definition: pdfparse.hxx:105
PDFObjectRef(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:141
virtual bool emit(EmitContext &rWriteContext) const override
PDFBool(bool bVal)
Definition: pdfparse.hxx:129
unsigned int const m_nGeneration
Definition: pdfparse.hxx:262
double const m_fValue
Definition: pdfparse.hxx:116
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:145
unsigned int const m_nGeneration
Definition: pdfparse.hxx:139
virtual ~PDFValue() override
Definition: pdfentries.cxx:123
virtual ~PDFDict() override
Definition: pdfentries.cxx:511
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:515
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
Definition: pdfparse.hxx:160
PDFNumber(double fVal)
Definition: pdfparse.hxx:118
bool emitSubElements(EmitContext &rWriteContext) const
Definition: pdfentries.cxx:448
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:613
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:152
void eraseValue(const OString &rName)
Definition: pdfentries.cxx:555
virtual ~PDFBool() override
Definition: pdfentries.cxx:395
virtual ~PDFNull() override
Definition: pdfentries.cxx:409
virtual ~PDFName() override
Definition: pdfentries.cxx:141
unsigned int const m_nEndOffset
Definition: pdfparse.hxx:205
OUString getFilteredName() const
Definition: pdfentries.cxx:157
bool usesSupportedEncryptionFormat() const
unsigned int m_nMajor
Definition: pdfparse.hxx:234
virtual ~PDFObject() override
Definition: pdfentries.cxx:656
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:418
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:618
static EmitImplData * getEmitData(EmitContext const &rContext)
Definition: pdfentries.cxx:111
PDFStream * m_pStream
Definition: pdfparse.hxx:260
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:601
void insertValue(const OString &rName, std::unique_ptr< PDFEntry > pValue)
Definition: pdfentries.cxx:524
unsigned int const m_nBeginOffset
Definition: pdfparse.hxx:204
OString const m_aComment
Definition: pdfparse.hxx:72
virtual ~PDFStream() override
Definition: pdfentries.cxx:609
bool const m_bValue
Definition: pdfparse.hxx:127
unsigned int const m_nNumber
Definition: pdfparse.hxx:138
bool getDeflatedStream(std::unique_ptr< char[]> &rpStream, unsigned int *pBytes, const PDFContainer *pObjectContainer, EmitContext &rContext) const
Definition: pdfentries.cxx:660
virtual unsigned int getCurPos()=0
virtual unsigned int readOrigBytes(unsigned int nOrigOffset, unsigned int nLen, void *pBuf)=0
bool isEncrypted() const
virtual PDFEntry * clone() const override
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:136
virtual bool copyOrigBytes(unsigned int nOrigOffset, unsigned int nLen)=0
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:439
virtual ~PDFArray() override
Definition: pdfentries.cxx:491
PDFEntry * buildMap()
Definition: pdfentries.cxx:578
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:996
virtual bool emit(EmitContext &rWriteContext) const override
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:495
PDFFileImplData * impl_getData() const
PDFObject * findObject(PDFObjectRef const *pRef) const
Definition: pdfparse.hxx:170
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:904
virtual ~PDFString() override
Definition: pdfentries.cxx:189
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:504
OString const m_aString
Definition: pdfparse.hxx:103
static void setEmitData(EmitContext &rContext, EmitImplData *pNewEmitData)
Definition: pdfentries.cxx:116
PDFEntry * m_pObject
Definition: pdfparse.hxx:259
unsigned int m_nNumber
Definition: pdfparse.hxx:261
unsigned char sal_uInt8
void cloneSubElements(std::vector< std::unique_ptr< PDFEntry >> &rNewSubElements) const
Definition: pdfentries.cxx:468
EmitContext(const PDFContainer *pTop=nullptr)
Definition: pdfentries.cxx:95
PDFName(const OString &rName)
Definition: pdfparse.hxx:92
virtual ~PDFContainer() override
Definition: pdfentries.cxx:444
PDFObject(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:264
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:193
unsigned int getDictLength(const PDFContainer *pObjectContainer) const
Definition: pdfentries.cxx:623
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:802
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:404
virtual ~PDFObjectRef() override
Definition: pdfentries.cxx:424
virtual PDFEntry * clone() const =0
std::unique_ptr< EmitImplData > m_pImplData
Definition: pdfparse.hxx:54
bool setupDecryptionData(const OString &rPwd) const
virtual ~PDFTrailer() override
Definition: pdfentries.cxx:924
virtual PDFEntry * clone() const override
PDFStream(unsigned int nBegin, unsigned int nEnd, PDFDict *pStreamDict)
Definition: pdfparse.hxx:208
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:428
unsigned int m_nMinor
Definition: pdfparse.hxx:235
void writeStream(EmitContext &rContext, const PDFFile *pPDFFile) const
Definition: pdfentries.cxx:783
virtual bool emit(EmitContext &rWriteContext) const =0