LibreOffice Module vcl (master) 1
pdfobjectcopier.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
10#include <sal/log.hxx>
11#include <sal/types.h>
12#include <rtl/strbuf.hxx>
13#include <tools/stream.hxx>
14#include <tools/zcodec.hxx>
15
18
19#include <pdf/objectcopier.hxx>
21
22namespace vcl
23{
25 : m_rContainer(rContainer)
26{
27}
28
29void PDFObjectCopier::copyRecursively(OStringBuffer& rLine, filter::PDFElement& rInputElement,
30 SvMemoryStream& rDocBuffer,
31 std::map<sal_Int32, sal_Int32>& rCopiedResources)
32{
33 if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(&rInputElement))
34 {
35 filter::PDFObjectElement* pReferenced = pReference->LookupObject();
36 if (pReferenced)
37 {
38 // Copy the referenced object.
39 sal_Int32 nRef = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
40
41 // Write the updated reference.
42 rLine.append(nRef);
43 rLine.append(" 0 R");
44 }
45 }
46 else if (auto pInputArray = dynamic_cast<filter::PDFArrayElement*>(&rInputElement))
47 {
48 rLine.append("[ ");
49 for (auto const& pElement : pInputArray->GetElements())
50 {
51 copyRecursively(rLine, *pElement, rDocBuffer, rCopiedResources);
52 rLine.append(" ");
53 }
54 rLine.append("] ");
55 }
56 else if (auto pInputDictionary = dynamic_cast<filter::PDFDictionaryElement*>(&rInputElement))
57 {
58 rLine.append("<< ");
59 for (auto const& pPair : pInputDictionary->GetItems())
60 {
61 rLine.append("/");
62 rLine.append(pPair.first);
63 rLine.append(" ");
64 copyRecursively(rLine, *pPair.second, rDocBuffer, rCopiedResources);
65 rLine.append(" ");
66 }
67 rLine.append(">> ");
68 }
69 else
70 {
71 rInputElement.writeString(rLine);
72 }
73}
74
77 std::map<sal_Int32, sal_Int32>& rCopiedResources)
78{
79 auto it = rCopiedResources.find(rObject.GetObjectValue());
80 if (it != rCopiedResources.end())
81 {
82 // This resource was already copied once, nothing to do.
83 return it->second;
84 }
85
86 sal_Int32 nObject = m_rContainer.createObject();
87 // Remember what is the ID of this object in our output.
88 rCopiedResources[rObject.GetObjectValue()] = nObject;
89 SAL_INFO("vcl.pdfwriter", "PDFObjectCopier::copyExternalResource: " << rObject.GetObjectValue()
90 << " -> " << nObject);
91
92 OStringBuffer aLine = OString::number(nObject) + " 0 obj\n";
93
94 if (rObject.GetDictionary())
95 {
96 aLine.append("<< ");
97 bool bFirst = true;
98 for (auto const& rPair : rObject.GetDictionaryItems())
99 {
100 if (bFirst)
101 bFirst = false;
102 else
103 aLine.append(" ");
104
105 aLine.append("/" + rPair.first + " ");
106 copyRecursively(aLine, *rPair.second, rDocBuffer, rCopiedResources);
107 }
108
109 aLine.append(" >>\n");
110 }
111
112 filter::PDFStreamElement* pStream = rObject.GetStream();
113 if (pStream)
114 {
115 aLine.append("stream\n");
116 }
117
118 if (filter::PDFArrayElement* pArray = rObject.GetArray())
119 {
120 aLine.append("[ ");
121
122 const std::vector<filter::PDFElement*>& rElements = pArray->GetElements();
123
124 bool bFirst = true;
125 for (auto const& pElement : rElements)
126 {
127 if (bFirst)
128 bFirst = false;
129 else
130 aLine.append(" ");
131 copyRecursively(aLine, *pElement, rDocBuffer, rCopiedResources);
132 }
133 aLine.append("]\n");
134 }
135
136 // If the object has a number element outside a dictionary or array, copy that.
137 if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
138 {
139 pNumber->writeString(aLine);
140 aLine.append("\n");
141 }
142
143 // We have the whole object, now write it to the output.
144 if (!m_rContainer.updateObject(nObject))
145 return -1;
146 if (!m_rContainer.writeBuffer(aLine))
147 return -1;
148 aLine.setLength(0);
149
150 if (pStream)
151 {
152 SvMemoryStream& rStream = pStream->GetMemory();
154 aLine.append(static_cast<const char*>(rStream.GetData()), rStream.GetSize());
155 if (!m_rContainer.writeBuffer(aLine))
156 return -1;
157 aLine.setLength(0);
159
160 aLine.append("\nendstream\n");
161 if (!m_rContainer.writeBuffer(aLine))
162 return -1;
163 aLine.setLength(0);
164 }
165
166 aLine.append("endobj\n\n");
167 if (!m_rContainer.writeBuffer(aLine))
168 return -1;
169
170 return nObject;
171}
172
174 const OString& rKind,
175 std::map<sal_Int32, sal_Int32>& rCopiedResources)
176{
177 // A name - object ID map, IDs as they appear in our output, not the
178 // original ones.
179 std::map<OString, sal_Int32> aRet;
180
181 // Get the rKind subset of the resource dictionary.
182 std::map<OString, filter::PDFElement*> aItems;
183 filter::PDFObjectElement* pKindObject = nullptr;
184 if (auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources")))
185 {
186 // Resources is a direct dictionary.
187 filter::PDFElement* pLookup = pResources->LookupElement(rKind);
188 if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pLookup))
189 {
190 // rKind is an inline dictionary.
191 aItems = pDictionary->GetItems();
192 }
193 else if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pLookup))
194 {
195 // rKind refers to a dictionary.
196 filter::PDFObjectElement* pReferenced = pReference->LookupObject();
197 if (!pReferenced)
198 {
199 return {};
200 }
201
202 pKindObject = pReferenced;
203 aItems = pReferenced->GetDictionaryItems();
204 }
205 }
206 else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"))
207 {
208 // Resources is an indirect object.
209 filter::PDFElement* pValue = pPageResources->Lookup(rKind);
210 if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue))
211 {
212 // Kind is a direct dictionary.
213 aItems = pDictionary->GetItems();
214 }
215 else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind))
216 {
217 // Kind is an indirect object.
218 aItems = pObject->GetDictionaryItems();
219 pKindObject = pObject;
220 }
221 }
222 if (aItems.empty())
223 return {};
224
225 SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer();
226 bool bHasDictValue = false;
227
228 for (const auto& rItem : aItems)
229 {
230 // For each item copy it over to our output then insert it into aRet.
231 auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);
232 if (!pReference)
233 {
234 if (pKindObject && dynamic_cast<filter::PDFDictionaryElement*>(rItem.second))
235 {
236 bHasDictValue = true;
237 break;
238 }
239
240 continue;
241 }
242
243 filter::PDFObjectElement* pValue = pReference->LookupObject();
244 if (!pValue)
245 continue;
246
247 // Then copying over an object copy its dictionary and its stream.
248 sal_Int32 nObject = copyExternalResource(rDocBuffer, *pValue, rCopiedResources);
249 aRet[rItem.first] = nObject;
250 }
251
252 if (bHasDictValue && pKindObject)
253 {
254 sal_Int32 nObject = copyExternalResource(rDocBuffer, *pKindObject, rCopiedResources);
255 return "/" + rKind + " " + OString::number(nObject) + " 0 R";
256 }
257
258 // Build the dictionary entry string.
259 OStringBuffer sRet("/" + rKind + "<<");
260 for (const auto& rPair : aRet)
261 {
262 sRet.append("/" + rPair.first + " " + OString::number(rPair.second) + " 0 R");
263 }
264 sRet.append(">>");
265
266 return sRet.makeStringAndClear();
267}
268
270{
271 // Maps from source object id (PDF image) to target object id (export result).
272 std::map<sal_Int32, sal_Int32> aCopiedResources;
273 copyPageResources(pPage, rLine, aCopiedResources);
274}
275
277 std::map<sal_Int32, sal_Int32>& rCopiedResources)
278{
279 rLine.append(" /Resources <<");
280 static const std::initializer_list<OString> aKeys
281 = { "ColorSpace", "ExtGState", "Font", "XObject", "Shading", "Pattern" };
282 for (const auto& rKey : aKeys)
283 {
284 rLine.append(copyExternalResources(*pPage, rKey, rCopiedResources));
285 }
286 rLine.append(">>");
287}
288
289sal_Int32 PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams,
290 SvMemoryStream& rStream, bool& rCompressed)
291{
292 for (auto pContent : rContentStreams)
293 {
294 filter::PDFStreamElement* pPageStream = pContent->GetStream();
295 if (!pPageStream)
296 {
297 SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: contents has no stream");
298 continue;
299 }
300
301 SvMemoryStream& rPageStream = pPageStream->GetMemory();
302
303 auto pFilter = dynamic_cast<filter::PDFNameElement*>(pContent->Lookup("Filter"));
304 auto pFilterArray = dynamic_cast<filter::PDFArrayElement*>(pContent->Lookup("Filter"));
305 if (!pFilter && pFilterArray)
306 {
307 auto& aElements = pFilterArray->GetElements();
308 if (!aElements.empty())
309 pFilter = dynamic_cast<filter::PDFNameElement*>(aElements[0]);
310 }
311
312 if (pFilter)
313 {
314 if (pFilter->GetValue() != "FlateDecode")
315 {
316 continue;
317 }
318
319 SvMemoryStream aMemoryStream;
320 ZCodec aZCodec;
321 rPageStream.Seek(0);
322 aZCodec.BeginCompression();
323 aZCodec.Decompress(rPageStream, aMemoryStream);
324 if (!aZCodec.EndCompression())
325 {
326 SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: decompression failed");
327 continue;
328 }
329
330 rStream.WriteBytes(aMemoryStream.GetData(), aMemoryStream.GetSize());
331 }
332 else
333 {
334 rStream.WriteBytes(rPageStream.GetData(), rPageStream.GetSize());
335 }
336 }
337
338 rCompressed = PDFWriterImpl::compressStream(&rStream);
339
340 return rStream.Tell();
341}
342}
343
344/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const void * GetData()
sal_uInt64 GetSize()
sal_uInt64 Tell() const
std::size_t WriteBytes(const void *pData, std::size_t nSize)
sal_uInt64 Seek(sal_uInt64 nPos)
tools::Long Decompress(SvStream &rIStm, SvStream &rOStm)
tools::Long EndCompression()
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
Allows creating, updating and writing PDF objects in a container.
virtual void checkAndEnableStreamEncryption(sal_Int32 nObject)=0
virtual bool updateObject(sal_Int32 n)=0
bool writeBuffer(std::string_view aBuffer)
virtual sal_Int32 createObject()=0
virtual void disableStreamEncryption()=0
OString copyExternalResources(filter::PDFObjectElement &rPage, const OString &rKind, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
Copies resources of a given kind from an external page to the output, returning what has to be includ...
PDFObjectCopier(PDFObjectContainer &rContainer)
void copyRecursively(OStringBuffer &rLine, filter::PDFElement &rInputElement, SvMemoryStream &rDocBuffer, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
static sal_Int32 copyPageStreams(std::vector< filter::PDFObjectElement * > &rContentStreams, SvMemoryStream &rStream, bool &rCompressed)
Copies page one or more page streams from rContentStreams into rStream.
void copyPageResources(filter::PDFObjectElement *pPage, OStringBuffer &rLine)
Copies resources of pPage into rLine.
PDFObjectContainer & m_rContainer
sal_Int32 copyExternalResource(SvMemoryStream &rDocBuffer, filter::PDFObjectElement &rObject, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
Copies a single resource from an external document, returns the new object ID in our document.
static bool compressStream(SvMemoryStream *)
Array object: a list.
const std::vector< PDFElement * > & GetElements() const
Dictionary object: a set key-value pairs.
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
A byte range in a PDF file.
Definition: pdfdocument.hxx:51
virtual void writeString(OStringBuffer &rBuffer)=0
Name object: a key string.
Numbering object: an integer or a real.
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:69
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
PDFElement * Lookup(const OString &rDictionaryKey)
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
PDFArrayElement * GetArray()
PDFNumberElement * GetNumberElement() const
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
PDFDictionaryElement * GetDictionary()
Reference object: something with a unique ID.
Stream object: a byte array with a known length.
SvMemoryStream & GetMemory()
EmbeddedObjectRef * pObject
sal_Int32 nRef
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)