LibreOffice Module sdext (master) 1
pdfparse.hxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
21#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
22
23#include <sal/types.h>
24#include <rtl/ustring.hxx>
25#include <rtl/string.hxx>
26
27#include <string_view>
28#include <unordered_map>
29#include <utility>
30#include <vector>
31#include <memory>
32
33namespace pdfparse
34{
35
36struct EmitImplData;
37struct PDFContainer;
39{
40public:
41 virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
42 virtual unsigned int getCurPos() = 0;
43 virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
44 virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
45
46 explicit EmitContext( const PDFContainer* pTop = nullptr );
47 virtual ~EmitContext();
48
49 // set this to deflate contained streams
51 // set this to decrypt the PDF file
53
54private:
55 friend struct PDFEntry;
56 std::unique_ptr<EmitImplData> m_pImplData;
57};
58
60{
62 virtual ~PDFEntry();
63
64 virtual bool emit( EmitContext& rWriteContext ) const = 0;
65 virtual PDFEntry* clone() const = 0;
66
67protected:
68 static EmitImplData* getEmitData( EmitContext const & rContext );
69 static void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData );
70};
71
72struct PDFComment final : public PDFEntry
73{
74 OString m_aComment;
75
76 explicit PDFComment( OString aComment )
77 : PDFEntry(), m_aComment(std::move( aComment )) {}
78 virtual ~PDFComment() override;
79 virtual bool emit( EmitContext& rWriteContext ) const override;
80 virtual PDFEntry* clone() const override;
81};
82
83struct PDFValue : public PDFEntry
84{
85 // abstract base class for simple values
87 virtual ~PDFValue() override;
88};
89
90struct PDFName final : public PDFValue
91{
92 OString m_aName;
93
94 explicit PDFName( OString aName )
95 : PDFValue(), m_aName(std::move( aName )) {}
96 virtual ~PDFName() override;
97 virtual bool emit( EmitContext& rWriteContext ) const override;
98 virtual PDFEntry* clone() const override;
99
100 OUString getFilteredName() const;
101};
102
103struct PDFString final : public PDFValue
104{
105 OString m_aString;
106
107 explicit PDFString( OString aString )
108 : PDFValue(), m_aString(std::move( aString )) {}
109 virtual ~PDFString() override;
110 virtual bool emit( EmitContext& rWriteContext ) const override;
111 virtual PDFEntry* clone() const override;
112
113 OString getFilteredString() const;
114};
115
116struct PDFNumber final : public PDFValue
117{
118 double m_fValue;
119
120 explicit PDFNumber( double fVal )
121 : PDFValue(), m_fValue( fVal ) {}
122 virtual ~PDFNumber() override;
123 virtual bool emit( EmitContext& rWriteContext ) const override;
124 virtual PDFEntry* clone() const override;
125};
126
127struct PDFBool final : public PDFValue
128{
130
131 explicit PDFBool( bool bVal )
132 : PDFValue(), m_bValue( bVal ) {}
133 virtual ~PDFBool() override;
134 virtual bool emit( EmitContext& rWriteContext ) const override;
135 virtual PDFEntry* clone() const override;
136};
137
138struct PDFObjectRef final : public PDFValue
139{
140 unsigned int m_nNumber;
141 unsigned int m_nGeneration;
142
143 PDFObjectRef( unsigned int nNr, unsigned int nGen )
144 : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
145 virtual ~PDFObjectRef() override;
146 virtual bool emit( EmitContext& rWriteContext ) const override;
147 virtual PDFEntry* clone() const override;
148};
149
150struct PDFNull final : public PDFValue
151{
153 virtual ~PDFNull() override;
154 virtual bool emit( EmitContext& rWriteContext ) const override;
155 virtual PDFEntry* clone() const override;
156};
157
158struct PDFObject;
159struct PDFContainer : public PDFEntry
160{
161 sal_Int32 m_nOffset;
162 std::vector<std::unique_ptr<PDFEntry>> m_aSubElements;
163
164 // this is an abstract base class for identifying
165 // entries that can contain sub elements besides comments
167 virtual ~PDFContainer() override;
168 bool emitSubElements( EmitContext& rWriteContext ) const;
169 void cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const;
170
171 PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
172 PDFObject* findObject( PDFObjectRef const * pRef ) const
173 { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
174};
175
176struct PDFArray final : public PDFContainer
177{
179 virtual ~PDFArray() override;
180 virtual bool emit( EmitContext& rWriteContext ) const override;
181 virtual PDFEntry* clone() const override;
182};
183
184struct PDFDict final : public PDFContainer
185{
186 typedef std::unordered_map<OString,PDFEntry*> Map;
188
190 virtual ~PDFDict() override;
191 virtual bool emit( EmitContext& rWriteContext ) const override;
192 virtual PDFEntry* clone() const override;
193
194 // inserting a value of NULL will remove rName and the previous value
195 // from the dictionary
196 void insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue );
197 // removes a name/value pair from the dict
198 void eraseValue( std::string_view rName );
199 // builds new map as of sub elements
200 // returns NULL if successful, else the first offending element
202};
203
204struct PDFStream final : public PDFEntry
205{
206 unsigned int m_nBeginOffset;
207 unsigned int m_nEndOffset; // offset of the byte after the stream
209
210 PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
211 : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
212 virtual ~PDFStream() override;
213 virtual bool emit( EmitContext& rWriteContext ) const override;
214 virtual PDFEntry* clone() const override;
215
216 unsigned int getDictLength( const PDFContainer* pObjectContainer ) const; // get contents of the "Length" entry of the dict
217};
218
219struct PDFTrailer final : public PDFContainer
220{
222
223 PDFTrailer() : PDFContainer(), m_pDict( nullptr ) {}
224 virtual ~PDFTrailer() override;
225 virtual bool emit( EmitContext& rWriteContext ) const override;
226 virtual PDFEntry* clone() const override;
227};
228
229struct PDFFileImplData;
230struct PDFFile final : public PDFContainer
231{
232private:
233 mutable std::unique_ptr<PDFFileImplData> m_pData;
235public:
236 unsigned int m_nMajor; // PDF major
237 unsigned int m_nMinor; // PDF minor
238
239 PDFFile();
240 virtual ~PDFFile() override;
241
242 virtual bool emit( EmitContext& rWriteContext ) const override;
243 virtual PDFEntry* clone() const override;
244
245 bool isEncrypted() const;
246
248
249 // this method checks whether rPwd is compatible with
250 // either user or owner password and sets up decrypt data in that case
251 // returns true if decryption can be done
252 bool setupDecryptionData( const OString& rPwd ) const;
253
254 bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
255 sal_uInt8* pOutBuffer,
256 unsigned int nObject, unsigned int nGeneration ) const;
257};
258
259struct PDFObject final : public PDFContainer
260{
263 unsigned int m_nNumber;
264 unsigned int m_nGeneration;
265
266 PDFObject( unsigned int nNr, unsigned int nGen )
267 : m_pObject( nullptr ), m_pStream( nullptr ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
268 virtual ~PDFObject() override;
269 virtual bool emit( EmitContext& rWriteContext ) const override;
270 virtual PDFEntry* clone() const override;
271
272 // writes only the contained stream, deflated if necessary
273 void writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
274
275private:
276 // returns true if stream is deflated
277 // fills *ppStream and *pBytes with start of stream and count of bytes
278 // memory returned in *ppStream must be freed with std::free afterwards
279 // fills in NULL and 0 in case of error
280 bool getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
281};
282
283struct PDFPart final : public PDFContainer
284{
286 virtual ~PDFPart() override;
287 virtual bool emit( EmitContext& rWriteContext ) const override;
288 virtual PDFEntry* clone() const override;
289};
290
292{
293 PDFReader() = delete;
294
295 static std::unique_ptr<PDFEntry> read( const char* pFileName );
296#ifdef _WIN32
297 static std::unique_ptr<PDFEntry> read( const char* pBuffer, unsigned int nLen );
298#endif
299};
300
301} // namespace
302
303#endif
304
305/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
EmitContext(const PDFContainer *pTop=nullptr)
Definition: pdfentries.cxx:95
virtual bool copyOrigBytes(unsigned int nOrigOffset, unsigned int nLen)=0
virtual unsigned int readOrigBytes(unsigned int nOrigOffset, unsigned int nLen, void *pBuf)=0
virtual bool write(const void *pBuf, unsigned int nLen)=0
std::unique_ptr< EmitImplData > m_pImplData
Definition: pdfparse.hxx:56
virtual unsigned int getCurPos()=0
OUString aName
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:495
virtual ~PDFArray() override
Definition: pdfentries.cxx:491
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:504
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:404
virtual ~PDFBool() override
Definition: pdfentries.cxx:395
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:399
PDFBool(bool bVal)
Definition: pdfparse.hxx:131
PDFComment(OString aComment)
Definition: pdfparse.hxx:76
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:131
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:136
virtual ~PDFComment() override
Definition: pdfentries.cxx:127
bool emitSubElements(EmitContext &rWriteContext) const
Definition: pdfentries.cxx:448
PDFObject * findObject(unsigned int nNumber, unsigned int nGeneration) const
Definition: pdfentries.cxx:475
void cloneSubElements(std::vector< std::unique_ptr< PDFEntry > > &rNewSubElements) const
Definition: pdfentries.cxx:468
PDFObject * findObject(PDFObjectRef const *pRef) const
Definition: pdfparse.hxx:172
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
Definition: pdfparse.hxx:162
virtual ~PDFContainer() override
Definition: pdfentries.cxx:444
std::unordered_map< OString, PDFEntry * > Map
Definition: pdfparse.hxx:186
void eraseValue(std::string_view rName)
Definition: pdfentries.cxx:553
void insertValue(const OString &rName, std::unique_ptr< PDFEntry > pValue)
Definition: pdfentries.cxx:524
PDFEntry * buildMap()
Definition: pdfentries.cxx:576
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:515
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:599
virtual ~PDFDict() override
Definition: pdfentries.cxx:511
virtual bool emit(EmitContext &rWriteContext) const =0
static void setEmitData(EmitContext &rContext, EmitImplData *pNewEmitData)
Definition: pdfentries.cxx:116
virtual PDFEntry * clone() const =0
static EmitImplData * getEmitData(EmitContext const &rContext)
Definition: pdfentries.cxx:111
bool usesSupportedEncryptionFormat() const
virtual bool emit(EmitContext &rWriteContext) const override
bool setupDecryptionData(const OString &rPwd) const
unsigned int m_nMinor
Definition: pdfparse.hxx:237
bool decrypt(const sal_uInt8 *pInBuffer, sal_uInt32 nLen, sal_uInt8 *pOutBuffer, unsigned int nObject, unsigned int nGeneration) const
virtual ~PDFFile() override
std::unique_ptr< PDFFileImplData > m_pData
Definition: pdfparse.hxx:233
bool isEncrypted() const
unsigned int m_nMajor
Definition: pdfparse.hxx:236
virtual PDFEntry * clone() const override
PDFFileImplData * impl_getData() const
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:152
PDFName(OString aName)
Definition: pdfparse.hxx:94
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:145
OUString getFilteredName() const
Definition: pdfentries.cxx:157
virtual ~PDFName() override
Definition: pdfentries.cxx:141
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:418
virtual ~PDFNull() override
Definition: pdfentries.cxx:409
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:413
PDFNumber(double fVal)
Definition: pdfparse.hxx:120
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:389
virtual ~PDFNumber() override
Definition: pdfentries.cxx:337
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:341
PDFObjectRef(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:143
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:439
unsigned int m_nNumber
Definition: pdfparse.hxx:140
unsigned int m_nGeneration
Definition: pdfparse.hxx:141
virtual ~PDFObjectRef() override
Definition: pdfentries.cxx:424
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:428
unsigned int m_nGeneration
Definition: pdfparse.hxx:264
PDFObject(unsigned int nNr, unsigned int nGen)
Definition: pdfparse.hxx:266
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:801
virtual ~PDFObject() override
Definition: pdfentries.cxx:654
PDFStream * m_pStream
Definition: pdfparse.hxx:262
bool getDeflatedStream(std::unique_ptr< char[]> &rpStream, unsigned int *pBytes, const PDFContainer *pObjectContainer, EmitContext &rContext) const
Definition: pdfentries.cxx:658
void writeStream(EmitContext &rContext, const PDFFile *pPDFFile) const
Definition: pdfentries.cxx:782
PDFEntry * m_pObject
Definition: pdfparse.hxx:261
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:903
unsigned int m_nNumber
Definition: pdfparse.hxx:263
virtual ~PDFPart() override
virtual bool emit(EmitContext &rWriteContext) const override
virtual PDFEntry * clone() const override
static std::unique_ptr< PDFEntry > read(const char *pFileName)
Definition: pdfparse.cxx:609
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:611
unsigned int getDictLength(const PDFContainer *pObjectContainer) const
Definition: pdfentries.cxx:621
PDFStream(unsigned int nBegin, unsigned int nEnd, PDFDict *pStreamDict)
Definition: pdfparse.hxx:210
virtual ~PDFStream() override
Definition: pdfentries.cxx:607
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:616
unsigned int m_nBeginOffset
Definition: pdfparse.hxx:206
unsigned int m_nEndOffset
Definition: pdfparse.hxx:207
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:193
virtual ~PDFString() override
Definition: pdfentries.cxx:189
PDFString(OString aString)
Definition: pdfparse.hxx:107
OString getFilteredString() const
Definition: pdfentries.cxx:245
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:240
virtual bool emit(EmitContext &rWriteContext) const override
Definition: pdfentries.cxx:927
virtual PDFEntry * clone() const override
Definition: pdfentries.cxx:993
virtual ~PDFTrailer() override
Definition: pdfentries.cxx:923
virtual ~PDFValue() override
Definition: pdfentries.cxx:123
unsigned char sal_uInt8