LibreOffice Module vcl (master)  1
pdfread.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <vcl/pdfread.hxx>
11 
12 #include <config_features.h>
13 
14 #if HAVE_FEATURE_PDFIUM
15 #include <fpdfview.h>
16 #include <fpdf_edit.h>
17 #include <fpdf_save.h>
18 #endif
19 
20 #include <vcl/graph.hxx>
21 #include <bitmapwriteaccess.hxx>
23 
24 using namespace com::sun::star;
25 
26 namespace
27 {
28 #if HAVE_FEATURE_PDFIUM
29 
31 struct CompatibleWriter : public FPDF_FILEWRITE
32 {
33  SvMemoryStream m_aStream;
34 };
35 
36 int CompatibleWriterCallback(FPDF_FILEWRITE* pFileWrite, const void* pData, unsigned long nSize)
37 {
38  auto pImpl = static_cast<CompatibleWriter*>(pFileWrite);
39  pImpl->m_aStream.WriteBytes(pData, nSize);
40  return 1;
41 }
42 
44 inline double pointToPixel(const double fPoint, const double fResolutionDPI)
45 {
46  return fPoint * fResolutionDPI / 72.;
47 }
48 
50 size_t generatePreview(SvStream& rStream, std::vector<Bitmap>& rBitmaps, sal_uInt64 nPos,
51  sal_uInt64 nSize, const size_t nFirstPage = 0, int nPages = 1,
52  const double fResolutionDPI = 96.)
53 {
54  // Read input into a buffer.
55  SvMemoryStream aInBuffer;
56  rStream.Seek(nPos);
57  aInBuffer.WriteStream(rStream, nSize);
58 
59  return vcl::RenderPDFBitmaps(aInBuffer.GetData(), aInBuffer.GetSize(), rBitmaps, nFirstPage,
60  nPages, fResolutionDPI);
61 }
62 
64 bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
65 {
66  if (nSize < 8)
67  return false;
68 
69  // %PDF-x.y
70  sal_uInt8 aFirstBytes[8];
71  rInStream.Seek(nPos);
72  sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
73  if (nRead < 8)
74  return false;
75 
76  if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
77  || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
78  return false;
79 
80  sal_Int32 nMajor = OString(aFirstBytes[5]).toInt32();
81  sal_Int32 nMinor = OString(aFirstBytes[7]).toInt32();
82  return !(nMajor > 1 || (nMajor == 1 && nMinor > 5));
83 }
84 
87 bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream, sal_uInt64 nPos,
88  sal_uInt64 nSize)
89 {
90  bool bCompatible = isCompatible(rInStream, nPos, nSize);
91  rInStream.Seek(nPos);
92  if (bCompatible)
93  // Not converting.
94  rOutStream.WriteStream(rInStream, nSize);
95  else
96  {
97  // Downconvert to PDF-1.5.
98  FPDF_LIBRARY_CONFIG aConfig;
99  aConfig.version = 2;
100  aConfig.m_pUserFontPaths = nullptr;
101  aConfig.m_pIsolate = nullptr;
102  aConfig.m_v8EmbedderSlot = 0;
103  FPDF_InitLibraryWithConfig(&aConfig);
104 
105  // Read input into a buffer.
106  SvMemoryStream aInBuffer;
107  aInBuffer.WriteStream(rInStream, nSize);
108 
109  // Load the buffer using pdfium.
110  FPDF_DOCUMENT pPdfDocument
111  = FPDF_LoadMemDocument(aInBuffer.GetData(), aInBuffer.GetSize(), /*password=*/nullptr);
112  if (!pPdfDocument)
113  return false;
114 
115  CompatibleWriter aWriter;
116  aWriter.version = 1;
117  aWriter.WriteBlock = &CompatibleWriterCallback;
118 
119  // 15 means PDF-1.5.
120  if (!FPDF_SaveWithVersion(pPdfDocument, &aWriter, 0, 15))
121  return false;
122 
123  FPDF_CloseDocument(pPdfDocument);
124  FPDF_DestroyLibrary();
125 
126  aWriter.m_aStream.Seek(STREAM_SEEK_TO_BEGIN);
127  rOutStream.WriteStream(aWriter.m_aStream);
128  }
129 
130  return rOutStream.good();
131 }
132 #else
133 size_t generatePreview(SvStream&, std::vector<Bitmap>&, sal_uInt64, sal_uInt64, size_t, int,
134  const double)
135 {
136  return 0;
137 }
138 
139 bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream, sal_uInt64 nPos,
140  sal_uInt64 nSize)
141 {
142  rInStream.Seek(nPos);
143  rOutStream.WriteStream(rInStream, nSize);
144  return rOutStream.good();
145 }
146 #endif // HAVE_FEATURE_PDFIUM
147 }
148 
149 namespace vcl
150 {
151 size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<Bitmap>& rBitmaps,
152  const size_t nFirstPage, int nPages, const double fResolutionDPI)
153 {
154 #if HAVE_FEATURE_PDFIUM
155  FPDF_LIBRARY_CONFIG aConfig;
156  aConfig.version = 2;
157  aConfig.m_pUserFontPaths = nullptr;
158  aConfig.m_pIsolate = nullptr;
159  aConfig.m_v8EmbedderSlot = 0;
160  FPDF_InitLibraryWithConfig(&aConfig);
161 
162  // Load the buffer using pdfium.
163  FPDF_DOCUMENT pPdfDocument = FPDF_LoadMemDocument(pBuffer, nSize, /*password=*/nullptr);
164  if (!pPdfDocument)
165  return 0;
166 
167  const int nPageCount = FPDF_GetPageCount(pPdfDocument);
168  if (nPages <= 0)
169  nPages = nPageCount;
170  const size_t nLastPage = std::min<int>(nPageCount, nFirstPage + nPages) - 1;
171  for (size_t nPageIndex = nFirstPage; nPageIndex <= nLastPage; ++nPageIndex)
172  {
173  // Render next page.
174  FPDF_PAGE pPdfPage = FPDF_LoadPage(pPdfDocument, nPageIndex);
175  if (!pPdfPage)
176  break;
177 
178  // Returned unit is points, convert that to pixel.
179  const size_t nPageWidth = pointToPixel(FPDF_GetPageWidth(pPdfPage), fResolutionDPI);
180  const size_t nPageHeight = pointToPixel(FPDF_GetPageHeight(pPdfPage), fResolutionDPI);
181  FPDF_BITMAP pPdfBitmap = FPDFBitmap_Create(nPageWidth, nPageHeight, /*alpha=*/1);
182  if (!pPdfBitmap)
183  break;
184 
185  const FPDF_DWORD nColor = FPDFPage_HasTransparency(pPdfPage) ? 0x00000000 : 0xFFFFFFFF;
186  FPDFBitmap_FillRect(pPdfBitmap, 0, 0, nPageWidth, nPageHeight, nColor);
187  FPDF_RenderPageBitmap(pPdfBitmap, pPdfPage, /*start_x=*/0, /*start_y=*/0, nPageWidth,
188  nPageHeight, /*rotate=*/0, /*flags=*/0);
189 
190  // Save the buffer as a bitmap.
191  Bitmap aBitmap(Size(nPageWidth, nPageHeight), 24);
192  {
193  BitmapScopedWriteAccess pWriteAccess(aBitmap);
194  const auto pPdfBuffer = static_cast<ConstScanline>(FPDFBitmap_GetBuffer(pPdfBitmap));
195  const int nStride = FPDFBitmap_GetStride(pPdfBitmap);
196  for (size_t nRow = 0; nRow < nPageHeight; ++nRow)
197  {
198  ConstScanline pPdfLine = pPdfBuffer + (nStride * nRow);
199  // pdfium byte order is BGRA.
200  pWriteAccess->CopyScanline(nRow, pPdfLine, ScanlineFormat::N32BitTcBgra, nStride);
201  }
202  }
203 
204  rBitmaps.emplace_back(std::move(aBitmap));
205  FPDFBitmap_Destroy(pPdfBitmap);
206  FPDF_ClosePage(pPdfPage);
207  }
208 
209  FPDF_CloseDocument(pPdfDocument);
210  FPDF_DestroyLibrary();
211 
212  return rBitmaps.size();
213 #else
214  (void)pBuffer;
215  (void)nSize;
216  (void)rBitmaps;
217  (void)nFirstPage;
218  (void)nPages;
219  (void)fResolutionDPI;
220  return 0;
221 #endif // HAVE_FEATURE_PDFIUM
222 }
223 
224 bool ImportPDF(SvStream& rStream, Bitmap& rBitmap, size_t nPageIndex,
225  std::vector<sal_Int8>& rPdfData, sal_uInt64 nPos, sal_uInt64 nSize,
226  const double fResolutionDPI)
227 {
228  // Get the preview of the first page.
229  std::vector<Bitmap> aBitmaps;
230  if (generatePreview(rStream, aBitmaps, nPos, nSize, nPageIndex, 1, fResolutionDPI) != 1
231  || aBitmaps.empty())
232  return false;
233 
234  rBitmap = aBitmaps[0];
235 
236  // Save the original PDF stream for later use.
237  SvMemoryStream aMemoryStream;
238  if (!getCompatibleStream(rStream, aMemoryStream, nPos, nSize))
239  return false;
240 
241  rPdfData = std::vector<sal_Int8>(aMemoryStream.TellEnd());
242  aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
243  aMemoryStream.ReadBytes(rPdfData.data(), rPdfData.size());
244 
245  return true;
246 }
247 
248 bool ImportPDF(SvStream& rStream, Graphic& rGraphic, const double fResolutionDPI)
249 {
250  std::vector<sal_Int8> aPdfData;
251  Bitmap aBitmap;
252  const bool bRet = ImportPDF(rStream, aBitmap, 0, aPdfData, STREAM_SEEK_TO_BEGIN,
253  STREAM_SEEK_TO_END, fResolutionDPI);
254  rGraphic = aBitmap;
255  rGraphic.setPdfData(std::make_shared<std::vector<sal_Int8>>(aPdfData));
256  rGraphic.setPageNumber(0); // We currently import only the first page.
257  return bRet;
258 }
259 
260 size_t ImportPDF(const OUString& rURL, std::vector<Bitmap>& rBitmaps,
261  std::vector<sal_Int8>& rPdfData, const double fResolutionDPI)
262 {
263  std::unique_ptr<SvStream> xStream(
264  ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE));
265 
266  if (generatePreview(*xStream, rBitmaps, STREAM_SEEK_TO_BEGIN, STREAM_SEEK_TO_END, 0, -1,
267  fResolutionDPI)
268  == 0)
269  return 0;
270 
271  // Save the original PDF stream for later use.
272  SvMemoryStream aMemoryStream;
273  if (!getCompatibleStream(*xStream, aMemoryStream, STREAM_SEEK_TO_BEGIN, STREAM_SEEK_TO_END))
274  return 0;
275 
276  rPdfData = std::vector<sal_Int8>(aMemoryStream.TellEnd());
277  aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
278  aMemoryStream.ReadBytes(rPdfData.data(), rPdfData.size());
279 
280  return rBitmaps.size();
281 }
282 
283 size_t ImportPDFUnloaded(const OUString& rURL, std::vector<std::pair<Graphic, Size>>& rGraphics,
284  const double fResolutionDPI)
285 {
286 #if HAVE_FEATURE_PDFIUM
287  std::unique_ptr<SvStream> xStream(
288  ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE));
289 
290  // Save the original PDF stream for later use.
291  SvMemoryStream aMemoryStream;
292  if (!getCompatibleStream(*xStream, aMemoryStream, STREAM_SEEK_TO_BEGIN, STREAM_SEEK_TO_END))
293  return 0;
294 
295  // Copy into PdfData
296  aMemoryStream.Seek(STREAM_SEEK_TO_END);
297  auto pPdfData = std::make_shared<std::vector<sal_Int8>>(aMemoryStream.Tell());
298  aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
299  aMemoryStream.ReadBytes(pPdfData->data(), pPdfData->size());
300 
301  // Prepare the link with the PDF stream.
302  const size_t nGraphicContentSize = pPdfData->size();
303  std::unique_ptr<sal_uInt8[]> pGraphicContent(new sal_uInt8[nGraphicContentSize]);
304  memcpy(pGraphicContent.get(), pPdfData->data(), nGraphicContentSize);
305  std::shared_ptr<GfxLink> pGfxLink(std::make_shared<GfxLink>(
306  std::move(pGraphicContent), nGraphicContentSize, GfxLinkType::NativePdf));
307 
308  FPDF_LIBRARY_CONFIG aConfig;
309  aConfig.version = 2;
310  aConfig.m_pUserFontPaths = nullptr;
311  aConfig.m_pIsolate = nullptr;
312  aConfig.m_v8EmbedderSlot = 0;
313  FPDF_InitLibraryWithConfig(&aConfig);
314 
315  // Load the buffer using pdfium.
316  FPDF_DOCUMENT pPdfDocument
317  = FPDF_LoadMemDocument(pPdfData->data(), pPdfData->size(), /*password=*/nullptr);
318  if (!pPdfDocument)
319  return 0;
320 
321  const int nPageCount = FPDF_GetPageCount(pPdfDocument);
322  if (nPageCount <= 0)
323  return 0;
324 
325  // dummy Bitmap
326  Bitmap aBitmap(Size(1, 1), 24);
327 
328  for (size_t nPageIndex = 0; nPageIndex < static_cast<size_t>(nPageCount); ++nPageIndex)
329  {
330  double fPageWidth = 0;
331  double fPageHeight = 0;
332  if (FPDF_GetPageSizeByIndex(pPdfDocument, nPageIndex, &fPageWidth, &fPageHeight) == 0)
333  continue;
334 
335  // Returned unit is points, convert that to pixel.
336  const size_t nPageWidth = pointToPixel(fPageWidth, fResolutionDPI);
337  const size_t nPageHeight = pointToPixel(fPageHeight, fResolutionDPI);
338 
339  // Create the Graphic with a dummy Bitmap and link the original PDF stream.
340  // We swap out this Graphic as soon as possible, and a later swap in
341  // actually renders the correct Bitmap on demand.
342  Graphic aGraphic(aBitmap);
343  aGraphic.setPdfData(pPdfData);
344  aGraphic.setPageNumber(nPageIndex);
345  aGraphic.SetGfxLink(pGfxLink);
346 
347  rGraphics.emplace_back(std::move(aGraphic), Size(nPageWidth, nPageHeight));
348  }
349 
350  FPDF_CloseDocument(pPdfDocument);
351  FPDF_DestroyLibrary();
352 
353  return rGraphics.size();
354 #else
355  (void)rURL;
356  (void)rGraphics;
357  (void)fResolutionDPI;
358  return 0;
359 #endif // HAVE_FEATURE_PDFIUM
360 }
361 }
362 
363 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
sal_uIntPtr sal_uLong
#define STREAM_SEEK_TO_END
sal_uInt64 Seek(sal_uInt64 nPos)
size_t ImportPDFUnloaded(const OUString &rURL, std::vector< std::pair< Graphic, Size >> &rGraphics, const double fResolutionDPI)
Import PDF as Graphic images (1 per page), all unloaded.
Definition: pdfread.cxx:283
virtual sal_uInt64 TellEnd() override
void setPdfData(const std::shared_ptr< std::vector< sal_Int8 >> &rPdfData)
Definition: graph.cxx:563
Reference< XInputStream > xStream
static std::unique_ptr< SvStream > CreateStream(const OUString &rFileName, StreamMode eOpenMode, css::uno::Reference< css::awt::XWindow > xParentWin=nullptr)
void SetGfxLink(const std::shared_ptr< GfxLink > &rGfxLink)
Definition: graph.cxx:516
static sal_Int32 pointToPixel(double pt)
#define STREAM_SEEK_TO_BEGIN
sal_uInt64 GetSize()
std::size_t WriteBytes(const void *pData, std::size_t nSize)
SvStream & WriteStream(SvStream &rStream)
void setPageNumber(sal_Int32 nPageNumber)
Set the page number of the multi-page source this Graphic is rendered from.
Definition: graph.cxx:580
const sal_uInt8 * ConstScanline
Definition: Scanline.hxx:26
std::size_t ReadBytes(void *pData, std::size_t nSize)
unsigned char sal_uInt8
sal_uInt64 Tell() const
bool good() const
void CopyScanline(long nY, const BitmapReadAccess &rReadAcc)
Definition: bmpacc.cxx:363
size_t RenderPDFBitmaps(const void *pBuffer, int nSize, std::vector< Bitmap > &rBitmaps, const size_t nFirstPage, int nPages, const double fResolutionDPI)
Fills the rBitmaps vector with rendered pages.
Definition: pdfread.cxx:151
size_t ImportPDF(const OUString &rURL, std::vector< Bitmap > &rBitmaps, std::vector< sal_Int8 > &rPdfData, const double fResolutionDPI)
Definition: pdfread.cxx:260
const void * GetData()
typedef void(CALLTYPE *GetFuncDataPtr)(sal_uInt16 &nNo