LibreOffice Module vcl (master)  1
pdfobjectcopier.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <pdf/objectcopier.hxx>
11 
12 #include <rtl/strbuf.hxx>
13 #include <sal/log.hxx>
14 #include <sal/types.h>
15 #include <tools/stream.hxx>
16 #include <tools/zcodec.hxx>
19 
20 #include "pdfwriter_impl.hxx"
21 
22 namespace vcl
23 {
25  : m_rContainer(rContainer)
26 {
27 }
28 
29 void PDFObjectCopier::copyRecursively(OStringBuffer& rLine, filter::PDFElement* pInputElement,
30  SvMemoryStream& rDocBuffer,
31  std::map<sal_Int32, sal_Int32>& rCopiedResources)
32 {
33  if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pInputElement))
34  {
35  filter::PDFObjectElement* pReferenced = pReference->LookupObject();
36  if (pReferenced)
37  {
38  // Copy the referenced object.
39  sal_Int32 nRef = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
40 
41  // Write the updated reference.
42  rLine.append(nRef);
43  rLine.append(" 0 R");
44  }
45  }
46  else if (auto pInputArray = dynamic_cast<filter::PDFArrayElement*>(pInputElement))
47  {
48  rLine.append("[ ");
49  for (auto const& pElement : pInputArray->GetElements())
50  {
51  copyRecursively(rLine, pElement, rDocBuffer, rCopiedResources);
52  rLine.append(" ");
53  }
54  rLine.append("] ");
55  }
56  else if (auto pInputDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pInputElement))
57  {
58  rLine.append("<< ");
59  for (auto const& pPair : pInputDictionary->GetItems())
60  {
61  rLine.append("/");
62  rLine.append(pPair.first);
63  rLine.append(" ");
64  copyRecursively(rLine, pPair.second, rDocBuffer, rCopiedResources);
65  rLine.append(" ");
66  }
67  rLine.append(">> ");
68  }
69  else
70  {
71  pInputElement->writeString(rLine);
72  }
73 }
74 
76  filter::PDFObjectElement& rObject,
77  std::map<sal_Int32, sal_Int32>& rCopiedResources)
78 {
79  auto it = rCopiedResources.find(rObject.GetObjectValue());
80  if (it != rCopiedResources.end())
81  {
82  // This resource was already copied once, nothing to do.
83  return it->second;
84  }
85 
86  sal_Int32 nObject = m_rContainer.createObject();
87  // Remember what is the ID of this object in our output.
88  rCopiedResources[rObject.GetObjectValue()] = nObject;
89  SAL_INFO("vcl.pdfwriter", "PDFObjectCopier::copyExternalResource: " << rObject.GetObjectValue()
90  << " -> " << nObject);
91 
92  SvMemoryStream* pObjectStream = rObject.GetStreamBuffer();
93  if (!pObjectStream)
94  {
95  pObjectStream = &rDocBuffer;
96  }
97 
98  OStringBuffer aLine;
99  aLine.append(nObject);
100  aLine.append(" 0 obj\n");
101 
102  if (rObject.GetDictionary())
103  {
104  aLine.append("<< ");
105  bool bFirst = true;
106  for (auto const& rPair : rObject.GetDictionaryItems())
107  {
108  if (bFirst)
109  bFirst = false;
110  else
111  aLine.append(" ");
112 
113  aLine.append("/");
114  aLine.append(rPair.first);
115  aLine.append(" ");
116  copyRecursively(aLine, rPair.second, rDocBuffer, rCopiedResources);
117  }
118 
119  aLine.append(" >>\n");
120  }
121 
122  if (filter::PDFStreamElement* pStream = rObject.GetStream())
123  {
124  aLine.append("stream\n");
125  SvMemoryStream& rStream = pStream->GetMemory();
126  aLine.append(static_cast<const char*>(rStream.GetData()), rStream.GetSize());
127  aLine.append("\nendstream\n");
128  }
129 
130  if (filter::PDFArrayElement* pArray = rObject.GetArray())
131  {
132  aLine.append("[ ");
133 
134  const std::vector<filter::PDFElement*>& rElements = pArray->GetElements();
135 
136  bool bFirst = true;
137  for (auto const& pElement : rElements)
138  {
139  if (bFirst)
140  bFirst = false;
141  else
142  aLine.append(" ");
143  copyRecursively(aLine, pElement, rDocBuffer, rCopiedResources);
144  }
145  aLine.append("]\n");
146  }
147 
148  // If the object has a number element outside a dictionary or array, copy that.
149  if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
150  {
151  pNumber->writeString(aLine);
152  aLine.append("\n");
153  }
154 
155  aLine.append("endobj\n\n");
156 
157  // We have the whole object, now write it to the output.
158  if (!m_rContainer.updateObject(nObject))
159  return -1;
160  if (!m_rContainer.writeBuffer(aLine.getStr(), aLine.getLength()))
161  return -1;
162 
163  return nObject;
164 }
165 
167  const OString& rKind,
168  std::map<sal_Int32, sal_Int32>& rCopiedResources)
169 {
170  // A name - object ID map, IDs as they appear in our output, not the
171  // original ones.
172  std::map<OString, sal_Int32> aRet;
173 
174  // Get the rKind subset of the resource dictionary.
175  std::map<OString, filter::PDFElement*> aItems;
176  if (auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources")))
177  {
178  // Resources is a direct dictionary.
179  filter::PDFElement* pLookup = pResources->LookupElement(rKind);
180  if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pLookup))
181  {
182  // rKind is an inline dictionary.
183  aItems = pDictionary->GetItems();
184  }
185  else if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pLookup))
186  {
187  // rKind refers to a dictionary.
188  filter::PDFObjectElement* pReferenced = pReference->LookupObject();
189  if (!pReferenced)
190  {
191  return OString();
192  }
193 
194  aItems = pReferenced->GetDictionaryItems();
195  }
196  }
197  else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"))
198  {
199  // Resources is an indirect object.
200  filter::PDFElement* pValue = pPageResources->Lookup(rKind);
201  if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue))
202  // Kind is a direct dictionary.
203  aItems = pDictionary->GetItems();
204  else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind))
205  // Kind is an indirect object.
206  aItems = pObject->GetDictionaryItems();
207  }
208  if (aItems.empty())
209  return OString();
210 
211  SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer();
212 
213  for (const auto& rItem : aItems)
214  {
215  // For each item copy it over to our output then insert it into aRet.
216  auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);
217  if (!pReference)
218  continue;
219 
220  filter::PDFObjectElement* pValue = pReference->LookupObject();
221  if (!pValue)
222  continue;
223 
224  // Then copying over an object copy its dictionary and its stream.
225  sal_Int32 nObject = copyExternalResource(rDocBuffer, *pValue, rCopiedResources);
226  aRet[rItem.first] = nObject;
227  }
228 
229  // Build the dictionary entry string.
230  OStringBuffer sRet("/" + rKind + "<<");
231  for (const auto& rPair : aRet)
232  {
233  sRet.append("/")
234  .append(rPair.first)
235  .append(" ")
236  .append(OString::number(rPair.second))
237  .append(" 0 R");
238  }
239  sRet.append(">>");
240 
241  return sRet.makeStringAndClear();
242 }
243 
245 {
246  // Maps from source object id (PDF image) to target object id (export result).
247  std::map<sal_Int32, sal_Int32> aCopiedResources;
248  copyPageResources(pPage, rLine, aCopiedResources);
249 }
250 
252  std::map<sal_Int32, sal_Int32>& rCopiedResources)
253 {
254  rLine.append(" /Resources <<");
255  static const std::initializer_list<OString> aKeys
256  = { "ColorSpace", "ExtGState", "Font", "XObject", "Shading" };
257  for (const auto& rKey : aKeys)
258  {
259  rLine.append(copyExternalResources(*pPage, rKey, rCopiedResources));
260  }
261  rLine.append(">>");
262 }
263 
264 sal_Int32 PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams,
265  SvMemoryStream& rStream, bool& rCompressed)
266 {
267  for (auto pContent : rContentStreams)
268  {
269  filter::PDFStreamElement* pPageStream = pContent->GetStream();
270  if (!pPageStream)
271  {
272  SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: contents has no stream");
273  continue;
274  }
275 
276  SvMemoryStream& rPageStream = pPageStream->GetMemory();
277 
278  auto pFilter = dynamic_cast<filter::PDFNameElement*>(pContent->Lookup("Filter"));
279  if (pFilter)
280  {
281  if (pFilter->GetValue() != "FlateDecode")
282  {
283  continue;
284  }
285 
286  SvMemoryStream aMemoryStream;
287  ZCodec aZCodec;
288  rPageStream.Seek(0);
289  aZCodec.BeginCompression();
290  aZCodec.Decompress(rPageStream, aMemoryStream);
291  if (!aZCodec.EndCompression())
292  {
293  SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: decompression failed");
294  continue;
295  }
296 
297  rStream.WriteBytes(aMemoryStream.GetData(), aMemoryStream.GetSize());
298  }
299  else
300  {
301  rStream.WriteBytes(rPageStream.GetData(), rPageStream.GetSize());
302  }
303  }
304 
305  rCompressed = PDFWriterImpl::compressStream(&rStream);
306 
307  return rStream.Tell();
308 }
309 }
310 
311 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Array object: a list.
Numbering object: an integer or a real.
virtual bool updateObject(sal_Int32 n)=0
void copyRecursively(OStringBuffer &rLine, filter::PDFElement *pInputElement, SvMemoryStream &rDocBuffer, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
virtual bool writeBuffer(const void *pBuffer, sal_uInt64 nBytes)=0
sal_uInt64 Seek(sal_uInt64 nPos)
tools::Long Decompress(SvStream &rIStm, SvStream &rOStm)
EmbeddedObjectRef * pObject
SvMemoryStream * GetStreamBuffer() const
A byte range in a PDF file.
Definition: pdfdocument.hxx:50
void copyPageResources(filter::PDFObjectElement *pPage, OStringBuffer &rLine)
Copies resources of pPage into rLine.
PDFObjectContainer & m_rContainer
sal_Int32 nRef
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:68
sal_uInt64 GetSize()
std::size_t WriteBytes(const void *pData, std::size_t nSize)
::rtl::Reference< Content > pContent
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
tools::Long EndCompression()
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
PDFDictionaryElement * GetDictionary()
OString copyExternalResources(filter::PDFObjectElement &rPage, const OString &rKind, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
Copies resources of a given kind from an external page to the output, returning what has to be includ...
SvMemoryStream & GetMemory()
#define SAL_INFO(area, stream)
PDFNumberElement * GetNumberElement() const
sal_uInt64 Tell() const
static sal_Int32 copyPageStreams(std::vector< filter::PDFObjectElement * > &rContentStreams, SvMemoryStream &rStream, bool &rCompressed)
Copies page one or more page streams from rContentStreams into rStream.
PDFArrayElement * GetArray()
Reference object: something with a unique ID.
virtual sal_Int32 createObject()=0
static bool compressStream(SvMemoryStream *)
virtual void writeString(OStringBuffer &rBuffer)=0
#define SAL_WARN(area, stream)
Name object: a key string.
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
PDFElement * Lookup(const OString &rDictionaryKey)
sal_Int32 copyExternalResource(SvMemoryStream &rDocBuffer, filter::PDFObjectElement &rObject, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
Copies a single resource from an external document, returns the new object ID in our document...
Stream object: a byte array with a known length.
PDFObjectCopier(PDFObjectContainer &rContainer)
const void * GetData()
Allows creating, updating and writing PDF objects in a container.