LibreOffice Module vcl (master) 1
pdfobjectcopier.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
10#include <sal/log.hxx>
11#include <sal/types.h>
12#include <rtl/strbuf.hxx>
13#include <tools/stream.hxx>
14#include <tools/zcodec.hxx>
15
18
19#include <pdf/objectcopier.hxx>
21
22namespace vcl
23{
25 : m_rContainer(rContainer)
26{
27}
28
29void PDFObjectCopier::copyRecursively(OStringBuffer& rLine, filter::PDFElement& rInputElement,
30 SvMemoryStream& rDocBuffer,
31 std::map<sal_Int32, sal_Int32>& rCopiedResources)
32{
33 if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(&rInputElement))
34 {
35 filter::PDFObjectElement* pReferenced = pReference->LookupObject();
36 if (pReferenced)
37 {
38 // Copy the referenced object.
39 sal_Int32 nRef = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
40
41 // Write the updated reference.
42 rLine.append(nRef);
43 rLine.append(" 0 R");
44 }
45 }
46 else if (auto pInputArray = dynamic_cast<filter::PDFArrayElement*>(&rInputElement))
47 {
48 rLine.append("[ ");
49 for (auto const& pElement : pInputArray->GetElements())
50 {
51 copyRecursively(rLine, *pElement, rDocBuffer, rCopiedResources);
52 rLine.append(" ");
53 }
54 rLine.append("] ");
55 }
56 else if (auto pInputDictionary = dynamic_cast<filter::PDFDictionaryElement*>(&rInputElement))
57 {
58 rLine.append("<< ");
59 for (auto const& pPair : pInputDictionary->GetItems())
60 {
61 rLine.append("/");
62 rLine.append(pPair.first);
63 rLine.append(" ");
64 copyRecursively(rLine, *pPair.second, rDocBuffer, rCopiedResources);
65 rLine.append(" ");
66 }
67 rLine.append(">> ");
68 }
69 else
70 {
71 rInputElement.writeString(rLine);
72 }
73}
74
77 std::map<sal_Int32, sal_Int32>& rCopiedResources)
78{
79 auto it = rCopiedResources.find(rObject.GetObjectValue());
80 if (it != rCopiedResources.end())
81 {
82 // This resource was already copied once, nothing to do.
83 return it->second;
84 }
85
86 sal_Int32 nObject = m_rContainer.createObject();
87 // Remember what is the ID of this object in our output.
88 rCopiedResources[rObject.GetObjectValue()] = nObject;
89 SAL_INFO("vcl.pdfwriter", "PDFObjectCopier::copyExternalResource: " << rObject.GetObjectValue()
90 << " -> " << nObject);
91
92 OStringBuffer aLine;
93 aLine.append(nObject);
94 aLine.append(" 0 obj\n");
95
96 if (rObject.GetDictionary())
97 {
98 aLine.append("<< ");
99 bool bFirst = true;
100 for (auto const& rPair : rObject.GetDictionaryItems())
101 {
102 if (bFirst)
103 bFirst = false;
104 else
105 aLine.append(" ");
106
107 aLine.append("/");
108 aLine.append(rPair.first);
109 aLine.append(" ");
110 copyRecursively(aLine, *rPair.second, rDocBuffer, rCopiedResources);
111 }
112
113 aLine.append(" >>\n");
114 }
115
116 filter::PDFStreamElement* pStream = rObject.GetStream();
117 if (pStream)
118 {
119 aLine.append("stream\n");
120 }
121
122 if (filter::PDFArrayElement* pArray = rObject.GetArray())
123 {
124 aLine.append("[ ");
125
126 const std::vector<filter::PDFElement*>& rElements = pArray->GetElements();
127
128 bool bFirst = true;
129 for (auto const& pElement : rElements)
130 {
131 if (bFirst)
132 bFirst = false;
133 else
134 aLine.append(" ");
135 copyRecursively(aLine, *pElement, rDocBuffer, rCopiedResources);
136 }
137 aLine.append("]\n");
138 }
139
140 // If the object has a number element outside a dictionary or array, copy that.
141 if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
142 {
143 pNumber->writeString(aLine);
144 aLine.append("\n");
145 }
146
147 // We have the whole object, now write it to the output.
148 if (!m_rContainer.updateObject(nObject))
149 return -1;
150 if (!m_rContainer.writeBuffer(aLine.getStr(), aLine.getLength()))
151 return -1;
152 aLine.setLength(0);
153
154 if (pStream)
155 {
156 SvMemoryStream& rStream = pStream->GetMemory();
158 aLine.append(static_cast<const char*>(rStream.GetData()), rStream.GetSize());
159 if (!m_rContainer.writeBuffer(aLine.getStr(), aLine.getLength()))
160 return -1;
161 aLine.setLength(0);
163
164 aLine.append("\nendstream\n");
165 if (!m_rContainer.writeBuffer(aLine.getStr(), aLine.getLength()))
166 return -1;
167 aLine.setLength(0);
168 }
169
170 aLine.append("endobj\n\n");
171 if (!m_rContainer.writeBuffer(aLine.getStr(), aLine.getLength()))
172 return -1;
173
174 return nObject;
175}
176
178 const OString& rKind,
179 std::map<sal_Int32, sal_Int32>& rCopiedResources)
180{
181 // A name - object ID map, IDs as they appear in our output, not the
182 // original ones.
183 std::map<OString, sal_Int32> aRet;
184
185 // Get the rKind subset of the resource dictionary.
186 std::map<OString, filter::PDFElement*> aItems;
187 filter::PDFObjectElement* pKindObject = nullptr;
188 if (auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources")))
189 {
190 // Resources is a direct dictionary.
191 filter::PDFElement* pLookup = pResources->LookupElement(rKind);
192 if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pLookup))
193 {
194 // rKind is an inline dictionary.
195 aItems = pDictionary->GetItems();
196 }
197 else if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pLookup))
198 {
199 // rKind refers to a dictionary.
200 filter::PDFObjectElement* pReferenced = pReference->LookupObject();
201 if (!pReferenced)
202 {
203 return {};
204 }
205
206 pKindObject = pReferenced;
207 aItems = pReferenced->GetDictionaryItems();
208 }
209 }
210 else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"))
211 {
212 // Resources is an indirect object.
213 filter::PDFElement* pValue = pPageResources->Lookup(rKind);
214 if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue))
215 {
216 // Kind is a direct dictionary.
217 aItems = pDictionary->GetItems();
218 }
219 else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind))
220 {
221 // Kind is an indirect object.
222 aItems = pObject->GetDictionaryItems();
223 pKindObject = pObject;
224 }
225 }
226 if (aItems.empty())
227 return {};
228
229 SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer();
230 bool bHasDictValue = false;
231
232 for (const auto& rItem : aItems)
233 {
234 // For each item copy it over to our output then insert it into aRet.
235 auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);
236 if (!pReference)
237 {
238 if (pKindObject && dynamic_cast<filter::PDFDictionaryElement*>(rItem.second))
239 {
240 bHasDictValue = true;
241 break;
242 }
243
244 continue;
245 }
246
247 filter::PDFObjectElement* pValue = pReference->LookupObject();
248 if (!pValue)
249 continue;
250
251 // Then copying over an object copy its dictionary and its stream.
252 sal_Int32 nObject = copyExternalResource(rDocBuffer, *pValue, rCopiedResources);
253 aRet[rItem.first] = nObject;
254 }
255
256 if (bHasDictValue && pKindObject)
257 {
258 sal_Int32 nObject = copyExternalResource(rDocBuffer, *pKindObject, rCopiedResources);
259 return "/" + rKind + " " + OString::number(nObject) + " 0 R";
260 }
261
262 // Build the dictionary entry string.
263 OStringBuffer sRet("/" + rKind + "<<");
264 for (const auto& rPair : aRet)
265 {
266 sRet.append("/" + rPair.first + " " + OString::number(rPair.second) + " 0 R");
267 }
268 sRet.append(">>");
269
270 return sRet.makeStringAndClear();
271}
272
274{
275 // Maps from source object id (PDF image) to target object id (export result).
276 std::map<sal_Int32, sal_Int32> aCopiedResources;
277 copyPageResources(pPage, rLine, aCopiedResources);
278}
279
281 std::map<sal_Int32, sal_Int32>& rCopiedResources)
282{
283 rLine.append(" /Resources <<");
284 static const std::initializer_list<OString> aKeys
285 = { "ColorSpace", "ExtGState", "Font", "XObject", "Shading", "Pattern" };
286 for (const auto& rKey : aKeys)
287 {
288 rLine.append(copyExternalResources(*pPage, rKey, rCopiedResources));
289 }
290 rLine.append(">>");
291}
292
293sal_Int32 PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams,
294 SvMemoryStream& rStream, bool& rCompressed)
295{
296 for (auto pContent : rContentStreams)
297 {
298 filter::PDFStreamElement* pPageStream = pContent->GetStream();
299 if (!pPageStream)
300 {
301 SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: contents has no stream");
302 continue;
303 }
304
305 SvMemoryStream& rPageStream = pPageStream->GetMemory();
306
307 auto pFilter = dynamic_cast<filter::PDFNameElement*>(pContent->Lookup("Filter"));
308 auto pFilterArray = dynamic_cast<filter::PDFArrayElement*>(pContent->Lookup("Filter"));
309 if (!pFilter && pFilterArray)
310 {
311 auto& aElements = pFilterArray->GetElements();
312 if (!aElements.empty())
313 pFilter = dynamic_cast<filter::PDFNameElement*>(aElements[0]);
314 }
315
316 if (pFilter)
317 {
318 if (pFilter->GetValue() != "FlateDecode")
319 {
320 continue;
321 }
322
323 SvMemoryStream aMemoryStream;
324 ZCodec aZCodec;
325 rPageStream.Seek(0);
326 aZCodec.BeginCompression();
327 aZCodec.Decompress(rPageStream, aMemoryStream);
328 if (!aZCodec.EndCompression())
329 {
330 SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: decompression failed");
331 continue;
332 }
333
334 rStream.WriteBytes(aMemoryStream.GetData(), aMemoryStream.GetSize());
335 }
336 else
337 {
338 rStream.WriteBytes(rPageStream.GetData(), rPageStream.GetSize());
339 }
340 }
341
342 rCompressed = PDFWriterImpl::compressStream(&rStream);
343
344 return rStream.Tell();
345}
346}
347
348/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const void * GetData()
sal_uInt64 GetSize()
sal_uInt64 Tell() const
std::size_t WriteBytes(const void *pData, std::size_t nSize)
sal_uInt64 Seek(sal_uInt64 nPos)
tools::Long Decompress(SvStream &rIStm, SvStream &rOStm)
tools::Long EndCompression()
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
Allows creating, updating and writing PDF objects in a container.
virtual void checkAndEnableStreamEncryption(sal_Int32 nObject)=0
virtual bool updateObject(sal_Int32 n)=0
virtual sal_Int32 createObject()=0
virtual void disableStreamEncryption()=0
virtual bool writeBuffer(const void *pBuffer, sal_uInt64 nBytes)=0
OString copyExternalResources(filter::PDFObjectElement &rPage, const OString &rKind, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
Copies resources of a given kind from an external page to the output, returning what has to be includ...
PDFObjectCopier(PDFObjectContainer &rContainer)
void copyRecursively(OStringBuffer &rLine, filter::PDFElement &rInputElement, SvMemoryStream &rDocBuffer, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
static sal_Int32 copyPageStreams(std::vector< filter::PDFObjectElement * > &rContentStreams, SvMemoryStream &rStream, bool &rCompressed)
Copies page one or more page streams from rContentStreams into rStream.
void copyPageResources(filter::PDFObjectElement *pPage, OStringBuffer &rLine)
Copies resources of pPage into rLine.
PDFObjectContainer & m_rContainer
sal_Int32 copyExternalResource(SvMemoryStream &rDocBuffer, filter::PDFObjectElement &rObject, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
Copies a single resource from an external document, returns the new object ID in our document.
static bool compressStream(SvMemoryStream *)
Array object: a list.
const std::vector< PDFElement * > & GetElements() const
Dictionary object: a set key-value pairs.
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
A byte range in a PDF file.
Definition: pdfdocument.hxx:51
virtual void writeString(OStringBuffer &rBuffer)=0
Name object: a key string.
Numbering object: an integer or a real.
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:69
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
PDFElement * Lookup(const OString &rDictionaryKey)
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
PDFArrayElement * GetArray()
PDFNumberElement * GetNumberElement() const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
PDFDictionaryElement * GetDictionary()
Reference object: something with a unique ID.
Stream object: a byte array with a known length.
SvMemoryStream & GetMemory()
EmbeddedObjectRef * pObject
sal_Int32 nRef
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)