LibreOffice Module vcl (master)  1
pdfobjectcopier.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <sal/log.hxx>
11 #include <sal/types.h>
12 #include <rtl/strbuf.hxx>
13 #include <tools/stream.hxx>
14 #include <tools/zcodec.hxx>
15 
18 
19 #include <pdf/objectcopier.hxx>
20 #include <pdf/pdfwriter_impl.hxx>
21 
22 namespace vcl
23 {
25  : m_rContainer(rContainer)
26 {
27 }
28 
29 void PDFObjectCopier::copyRecursively(OStringBuffer& rLine, filter::PDFElement& rInputElement,
30  SvMemoryStream& rDocBuffer,
31  std::map<sal_Int32, sal_Int32>& rCopiedResources)
32 {
33  if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(&rInputElement))
34  {
35  filter::PDFObjectElement* pReferenced = pReference->LookupObject();
36  if (pReferenced)
37  {
38  // Copy the referenced object.
39  sal_Int32 nRef = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
40 
41  // Write the updated reference.
42  rLine.append(nRef);
43  rLine.append(" 0 R");
44  }
45  }
46  else if (auto pInputArray = dynamic_cast<filter::PDFArrayElement*>(&rInputElement))
47  {
48  rLine.append("[ ");
49  for (auto const& pElement : pInputArray->GetElements())
50  {
51  copyRecursively(rLine, *pElement, rDocBuffer, rCopiedResources);
52  rLine.append(" ");
53  }
54  rLine.append("] ");
55  }
56  else if (auto pInputDictionary = dynamic_cast<filter::PDFDictionaryElement*>(&rInputElement))
57  {
58  rLine.append("<< ");
59  for (auto const& pPair : pInputDictionary->GetItems())
60  {
61  rLine.append("/");
62  rLine.append(pPair.first);
63  rLine.append(" ");
64  copyRecursively(rLine, *pPair.second, rDocBuffer, rCopiedResources);
65  rLine.append(" ");
66  }
67  rLine.append(">> ");
68  }
69  else
70  {
71  rInputElement.writeString(rLine);
72  }
73 }
74 
76  filter::PDFObjectElement& rObject,
77  std::map<sal_Int32, sal_Int32>& rCopiedResources)
78 {
79  auto it = rCopiedResources.find(rObject.GetObjectValue());
80  if (it != rCopiedResources.end())
81  {
82  // This resource was already copied once, nothing to do.
83  return it->second;
84  }
85 
86  sal_Int32 nObject = m_rContainer.createObject();
87  // Remember what is the ID of this object in our output.
88  rCopiedResources[rObject.GetObjectValue()] = nObject;
89  SAL_INFO("vcl.pdfwriter", "PDFObjectCopier::copyExternalResource: " << rObject.GetObjectValue()
90  << " -> " << nObject);
91 
92  OStringBuffer aLine;
93  aLine.append(nObject);
94  aLine.append(" 0 obj\n");
95 
96  if (rObject.GetDictionary())
97  {
98  aLine.append("<< ");
99  bool bFirst = true;
100  for (auto const& rPair : rObject.GetDictionaryItems())
101  {
102  if (bFirst)
103  bFirst = false;
104  else
105  aLine.append(" ");
106 
107  aLine.append("/");
108  aLine.append(rPair.first);
109  aLine.append(" ");
110  copyRecursively(aLine, *rPair.second, rDocBuffer, rCopiedResources);
111  }
112 
113  aLine.append(" >>\n");
114  }
115 
116  if (filter::PDFStreamElement* pStream = rObject.GetStream())
117  {
118  aLine.append("stream\n");
119  SvMemoryStream& rStream = pStream->GetMemory();
120  aLine.append(static_cast<const char*>(rStream.GetData()), rStream.GetSize());
121  aLine.append("\nendstream\n");
122  }
123 
124  if (filter::PDFArrayElement* pArray = rObject.GetArray())
125  {
126  aLine.append("[ ");
127 
128  const std::vector<filter::PDFElement*>& rElements = pArray->GetElements();
129 
130  bool bFirst = true;
131  for (auto const& pElement : rElements)
132  {
133  if (bFirst)
134  bFirst = false;
135  else
136  aLine.append(" ");
137  copyRecursively(aLine, *pElement, rDocBuffer, rCopiedResources);
138  }
139  aLine.append("]\n");
140  }
141 
142  // If the object has a number element outside a dictionary or array, copy that.
143  if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
144  {
145  pNumber->writeString(aLine);
146  aLine.append("\n");
147  }
148 
149  aLine.append("endobj\n\n");
150 
151  // We have the whole object, now write it to the output.
152  if (!m_rContainer.updateObject(nObject))
153  return -1;
154  if (!m_rContainer.writeBuffer(aLine.getStr(), aLine.getLength()))
155  return -1;
156 
157  return nObject;
158 }
159 
161  const OString& rKind,
162  std::map<sal_Int32, sal_Int32>& rCopiedResources)
163 {
164  // A name - object ID map, IDs as they appear in our output, not the
165  // original ones.
166  std::map<OString, sal_Int32> aRet;
167 
168  // Get the rKind subset of the resource dictionary.
169  std::map<OString, filter::PDFElement*> aItems;
170  if (auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources")))
171  {
172  // Resources is a direct dictionary.
173  filter::PDFElement* pLookup = pResources->LookupElement(rKind);
174  if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pLookup))
175  {
176  // rKind is an inline dictionary.
177  aItems = pDictionary->GetItems();
178  }
179  else if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pLookup))
180  {
181  // rKind refers to a dictionary.
182  filter::PDFObjectElement* pReferenced = pReference->LookupObject();
183  if (!pReferenced)
184  {
185  return {};
186  }
187 
188  aItems = pReferenced->GetDictionaryItems();
189  }
190  }
191  else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"))
192  {
193  // Resources is an indirect object.
194  filter::PDFElement* pValue = pPageResources->Lookup(rKind);
195  if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue))
196  // Kind is a direct dictionary.
197  aItems = pDictionary->GetItems();
198  else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind))
199  // Kind is an indirect object.
200  aItems = pObject->GetDictionaryItems();
201  }
202  if (aItems.empty())
203  return {};
204 
205  SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer();
206 
207  for (const auto& rItem : aItems)
208  {
209  // For each item copy it over to our output then insert it into aRet.
210  auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);
211  if (!pReference)
212  continue;
213 
214  filter::PDFObjectElement* pValue = pReference->LookupObject();
215  if (!pValue)
216  continue;
217 
218  // Then copying over an object copy its dictionary and its stream.
219  sal_Int32 nObject = copyExternalResource(rDocBuffer, *pValue, rCopiedResources);
220  aRet[rItem.first] = nObject;
221  }
222 
223  // Build the dictionary entry string.
224  OStringBuffer sRet("/" + rKind + "<<");
225  for (const auto& rPair : aRet)
226  {
227  sRet.append("/" + rPair.first + " " + OString::number(rPair.second) + " 0 R");
228  }
229  sRet.append(">>");
230 
231  return sRet.makeStringAndClear();
232 }
233 
235 {
236  // Maps from source object id (PDF image) to target object id (export result).
237  std::map<sal_Int32, sal_Int32> aCopiedResources;
238  copyPageResources(pPage, rLine, aCopiedResources);
239 }
240 
242  std::map<sal_Int32, sal_Int32>& rCopiedResources)
243 {
244  rLine.append(" /Resources <<");
245  static const std::initializer_list<OString> aKeys
246  = { "ColorSpace", "ExtGState", "Font", "XObject", "Shading" };
247  for (const auto& rKey : aKeys)
248  {
249  rLine.append(copyExternalResources(*pPage, rKey, rCopiedResources));
250  }
251  rLine.append(">>");
252 }
253 
254 sal_Int32 PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams,
255  SvMemoryStream& rStream, bool& rCompressed)
256 {
257  for (auto pContent : rContentStreams)
258  {
259  filter::PDFStreamElement* pPageStream = pContent->GetStream();
260  if (!pPageStream)
261  {
262  SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: contents has no stream");
263  continue;
264  }
265 
266  SvMemoryStream& rPageStream = pPageStream->GetMemory();
267 
268  auto pFilter = dynamic_cast<filter::PDFNameElement*>(pContent->Lookup("Filter"));
269  if (pFilter)
270  {
271  if (pFilter->GetValue() != "FlateDecode")
272  {
273  continue;
274  }
275 
276  SvMemoryStream aMemoryStream;
277  ZCodec aZCodec;
278  rPageStream.Seek(0);
279  aZCodec.BeginCompression();
280  aZCodec.Decompress(rPageStream, aMemoryStream);
281  if (!aZCodec.EndCompression())
282  {
283  SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: decompression failed");
284  continue;
285  }
286 
287  rStream.WriteBytes(aMemoryStream.GetData(), aMemoryStream.GetSize());
288  }
289  else
290  {
291  rStream.WriteBytes(rPageStream.GetData(), rPageStream.GetSize());
292  }
293  }
294 
295  rCompressed = PDFWriterImpl::compressStream(&rStream);
296 
297  return rStream.Tell();
298 }
299 }
300 
301 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Array object: a list.
Numbering object: an integer or a real.
virtual bool updateObject(sal_Int32 n)=0
virtual bool writeBuffer(const void *pBuffer, sal_uInt64 nBytes)=0
sal_uInt64 Seek(sal_uInt64 nPos)
tools::Long Decompress(SvStream &rIStm, SvStream &rOStm)
EmbeddedObjectRef * pObject
A byte range in a PDF file.
Definition: pdfdocument.hxx:50
void copyPageResources(filter::PDFObjectElement *pPage, OStringBuffer &rLine)
Copies resources of pPage into rLine.
PDFObjectContainer & m_rContainer
sal_Int32 nRef
Indirect object: something with a unique ID.
Definition: pdfdocument.hxx:68
sal_uInt64 GetSize()
std::size_t WriteBytes(const void *pData, std::size_t nSize)
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
tools::Long EndCompression()
PDFStreamElement * GetStream() const
Access to the stream of the object, if it has any.
PDFObjectElement * LookupObject(const OString &rDictionaryKey)
SvMemoryStream & GetEditBuffer()
Access to the input document, even after the input stream is gone.
PDFDictionaryElement * GetDictionary()
OString copyExternalResources(filter::PDFObjectElement &rPage, const OString &rKind, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
Copies resources of a given kind from an external page to the output, returning what has to be includ...
SvMemoryStream & GetMemory()
#define SAL_INFO(area, stream)
PDFNumberElement * GetNumberElement() const
sal_uInt64 Tell() const
static sal_Int32 copyPageStreams(std::vector< filter::PDFObjectElement * > &rContentStreams, SvMemoryStream &rStream, bool &rCompressed)
Copies page one or more page streams from rContentStreams into rStream.
PDFArrayElement * GetArray()
Reference object: something with a unique ID.
virtual sal_Int32 createObject()=0
static bool compressStream(SvMemoryStream *)
virtual void writeString(OStringBuffer &rBuffer)=0
#define SAL_WARN(area, stream)
Name object: a key string.
const std::map< OString, PDFElement * > & GetDictionaryItems()
Get access to the parsed key-value items from the object dictionary.
PDFElement * Lookup(const OString &rDictionaryKey)
void copyRecursively(OStringBuffer &rLine, filter::PDFElement &rInputElement, SvMemoryStream &rDocBuffer, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
sal_Int32 copyExternalResource(SvMemoryStream &rDocBuffer, filter::PDFObjectElement &rObject, std::map< sal_Int32, sal_Int32 > &rCopiedResources)
Copies a single resource from an external document, returns the new object ID in our document...
Stream object: a byte array with a known length.
PDFObjectCopier(PDFObjectContainer &rContainer)
const void * GetData()
Allows creating, updating and writing PDF objects in a container.