LibreOffice Module vcl (master)  1
pdfread.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <vcl/pdfread.hxx>
11 
12 #include <config_features.h>
13 
14 #if HAVE_FEATURE_PDFIUM
15 #include <fpdfview.h>
16 #include <fpdf_edit.h>
17 #include <tools/UnitConversion.hxx>
18 #endif
19 
20 #include <vcl/graph.hxx>
23 #include <unotools/datetime.hxx>
24 
26 #include <sal/log.hxx>
27 
28 using namespace com::sun::star;
29 
30 namespace
31 {
32 #if HAVE_FEATURE_PDFIUM
33 
35 inline double pointToPixel(const double fPoint, const double fResolutionDPI)
36 {
37  return fPoint * fResolutionDPI / 72.;
38 }
39 
41 bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
42 {
43  if (nSize < 8)
44  return false;
45 
46  // %PDF-x.y
47  sal_uInt8 aFirstBytes[8];
48  rInStream.Seek(nPos);
49  sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
50  if (nRead < 8)
51  return false;
52 
53  if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
54  || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
55  return false;
56 
57  sal_Int32 nMajor = OString(char(aFirstBytes[5])).toInt32();
58  sal_Int32 nMinor = OString(char(aFirstBytes[7])).toInt32();
59  return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
60 }
61 
64 bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
65 {
66  sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
67  sal_uInt64 nSize = STREAM_SEEK_TO_END;
68  bool bCompatible = isCompatible(rInStream, nPos, nSize);
69  rInStream.Seek(nPos);
70  if (bCompatible)
71  // Not converting.
72  rOutStream.WriteStream(rInStream, nSize);
73  else
74  {
75  // Downconvert to PDF-1.6.
76  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
77 
78  // Read input into a buffer.
79  SvMemoryStream aInBuffer;
80  aInBuffer.WriteStream(rInStream, nSize);
81 
82  SvMemoryStream aSaved;
83  {
84  // Load the buffer using pdfium.
85  std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
86  = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize());
87  if (!pPdfDocument)
88  return false;
89 
90  // 16 means PDF-1.6.
91  if (!pPdfDocument->saveWithVersion(aSaved, 16))
92  return false;
93  }
94 
95  aSaved.Seek(STREAM_SEEK_TO_BEGIN);
96  rOutStream.WriteStream(aSaved);
97  }
98 
99  return rOutStream.good();
100 }
101 #else
102 bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
103 {
104  rInStream.Seek(STREAM_SEEK_TO_BEGIN);
105  rOutStream.WriteStream(rInStream, STREAM_SEEK_TO_END);
106  return rOutStream.good();
107 }
108 #endif // HAVE_FEATURE_PDFIUM
109 
110 VectorGraphicDataArray createVectorGraphicDataArray(SvStream& rStream)
111 {
112  // Save the original PDF stream for later use.
113  SvMemoryStream aMemoryStream;
114  if (!getCompatibleStream(rStream, aMemoryStream))
115  return VectorGraphicDataArray();
116 
117  const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
118 
119  VectorGraphicDataArray aPdfData(nStreamLength);
120 
121  aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
122  aMemoryStream.ReadBytes(aPdfData.begin(), nStreamLength);
123  if (aMemoryStream.GetError())
124  return VectorGraphicDataArray();
125 
126  return aPdfData;
127 }
128 
129 } // end anonymous namespace
130 
131 namespace vcl
132 {
133 size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& rBitmaps,
134  const size_t nFirstPage, int nPages, const basegfx::B2DTuple* pSizeHint)
135 {
136 #if HAVE_FEATURE_PDFIUM
137  const double fResolutionDPI = 96;
138  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
139 
140  // Load the buffer using pdfium.
141  std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument = pPdfium->openDocument(pBuffer, nSize);
142  if (!pPdfDocument)
143  return 0;
144 
145  const int nPageCount = pPdfDocument->getPageCount();
146  if (nPages <= 0)
147  nPages = nPageCount;
148  const size_t nLastPage = std::min<int>(nPageCount, nFirstPage + nPages) - 1;
149  for (size_t nPageIndex = nFirstPage; nPageIndex <= nLastPage; ++nPageIndex)
150  {
151  // Render next page.
152  std::unique_ptr<vcl::pdf::PDFiumPage> pPdfPage = pPdfDocument->openPage(nPageIndex);
153  if (!pPdfPage)
154  break;
155 
156  // Calculate the bitmap size in points.
157  size_t nPageWidthPoints = pPdfPage->getWidth();
158  size_t nPageHeightPoints = pPdfPage->getHeight();
159  if (pSizeHint && pSizeHint->getX() && pSizeHint->getY())
160  {
161  // Have a size hint, prefer that over the logic size from the PDF.
162  nPageWidthPoints = convertMm100ToTwip(pSizeHint->getX()) / 20;
163  nPageHeightPoints = convertMm100ToTwip(pSizeHint->getY()) / 20;
164  }
165 
166  // Returned unit is points, convert that to pixel.
167  const size_t nPageWidth = pointToPixel(nPageWidthPoints, fResolutionDPI);
168  const size_t nPageHeight = pointToPixel(nPageHeightPoints, fResolutionDPI);
169  std::unique_ptr<vcl::pdf::PDFiumBitmap> pPdfBitmap
170  = pPdfium->createBitmap(nPageWidth, nPageHeight, /*alpha=*/1);
171  if (!pPdfBitmap)
172  break;
173 
174  bool bTransparent = pPdfPage->hasTransparency();
175  if (pSizeHint)
176  {
177  // This is the PDF-in-EMF case: force transparency, even in case pdfium would tell us
178  // the PDF is not transparent.
179  bTransparent = true;
180  }
181  const sal_uInt32 nColor = bTransparent ? 0x00000000 : 0xFFFFFFFF;
182  pPdfBitmap->fillRect(0, 0, nPageWidth, nPageHeight, nColor);
183  pPdfBitmap->renderPageBitmap(pPdfPage.get(), /*start_x=*/0,
184  /*start_y=*/0, nPageWidth, nPageHeight);
185 
186  // Save the buffer as a bitmap.
187  Bitmap aBitmap(Size(nPageWidth, nPageHeight), 24);
188  AlphaMask aMask(Size(nPageWidth, nPageHeight));
189  {
190  BitmapScopedWriteAccess pWriteAccess(aBitmap);
191  AlphaScopedWriteAccess pMaskAccess(aMask);
192  ConstScanline pPdfBuffer = pPdfBitmap->getBuffer();
193  const int nStride = pPdfBitmap->getStride();
194  std::vector<sal_uInt8> aScanlineAlpha(nPageWidth);
195  for (size_t nRow = 0; nRow < nPageHeight; ++nRow)
196  {
197  ConstScanline pPdfLine = pPdfBuffer + (nStride * nRow);
198  // pdfium byte order is BGRA.
199  pWriteAccess->CopyScanline(nRow, pPdfLine, ScanlineFormat::N32BitTcBgra, nStride);
200  for (size_t nCol = 0; nCol < nPageWidth; ++nCol)
201  {
202  // Invert alpha (source is alpha, target is opacity).
203  aScanlineAlpha[nCol] = ~pPdfLine[3];
204  pPdfLine += 4;
205  }
206  pMaskAccess->CopyScanline(nRow, aScanlineAlpha.data(), ScanlineFormat::N8BitPal,
207  nPageWidth);
208  }
209  }
210 
211  if (bTransparent)
212  {
213  rBitmaps.emplace_back(aBitmap, aMask);
214  }
215  else
216  {
217  rBitmaps.emplace_back(std::move(aBitmap));
218  }
219  }
220 
221  return rBitmaps.size();
222 #else
223  (void)pBuffer;
224  (void)nSize;
225  (void)rBitmaps;
226  (void)nFirstPage;
227  (void)nPages;
228  (void)pSizeHint;
229  return 0;
230 #endif // HAVE_FEATURE_PDFIUM
231 }
232 
234  std::shared_ptr<VectorGraphicData>& rVectorGraphicData)
235 {
236  VectorGraphicDataArray aPdfDataArray = createVectorGraphicDataArray(rStream);
237  if (!aPdfDataArray.hasElements())
238  {
239  SAL_WARN("vcl.filter", "ImportPDF: empty PDF data array");
240  return false;
241  }
242 
243  rVectorGraphicData
244  = std::make_shared<VectorGraphicData>(aPdfDataArray, VectorGraphicDataType::Pdf);
245 
246  return true;
247 }
248 
249 bool ImportPDF(SvStream& rStream, Graphic& rGraphic)
250 {
251  std::shared_ptr<VectorGraphicData> pVectorGraphicData;
252  if (!importPdfVectorGraphicData(rStream, pVectorGraphicData))
253  return false;
254  rGraphic = Graphic(pVectorGraphicData);
255  return true;
256 }
257 
258 #if HAVE_FEATURE_PDFIUM
259 namespace
260 {
261 basegfx::B2DPoint convertFromPDFInternalToHMM(basegfx::B2DSize const& rInputPoint,
262  basegfx::B2DSize const& rPageSize)
263 {
264  double x = convertPointToMm100(rInputPoint.getX());
265  double y = convertPointToMm100(rPageSize.getY() - rInputPoint.getY());
266  return basegfx::B2DPoint(x, y);
267 }
268 
269 std::vector<PDFGraphicAnnotation>
270 findAnnotations(const std::unique_ptr<vcl::pdf::PDFiumPage>& pPage, basegfx::B2DSize aPageSize)
271 {
272  std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations;
273  for (int nAnnotation = 0; nAnnotation < pPage->getAnnotationCount(); nAnnotation++)
274  {
275  auto pAnnotation = pPage->getAnnotation(nAnnotation);
276  if (pAnnotation)
277  {
278  auto eSubtype = pAnnotation->getSubType();
279 
287  {
288  OUString sAuthor = pAnnotation->getString(vcl::pdf::constDictionaryKeyTitle);
289  OUString sText = pAnnotation->getString(vcl::pdf::constDictionaryKeyContents);
290 
291  basegfx::B2DRectangle rRectangle = pAnnotation->getRectangle();
292  basegfx::B2DRectangle rRectangleHMM(
293  convertPointToMm100(rRectangle.getMinX()),
294  convertPointToMm100(aPageSize.getY() - rRectangle.getMinY()),
295  convertPointToMm100(rRectangle.getMaxX()),
296  convertPointToMm100(aPageSize.getY() - rRectangle.getMaxY()));
297 
298  OUString sDateTimeString
299  = pAnnotation->getString(vcl::pdf::constDictionaryKeyModificationDate);
300  OUString sISO8601String = vcl::pdf::convertPdfDateToISO8601(sDateTimeString);
301 
302  css::util::DateTime aDateTime;
303  if (!sISO8601String.isEmpty())
304  {
305  utl::ISO8601parseDateTime(sISO8601String, aDateTime);
306  }
307 
308  Color aColor = pAnnotation->getColor();
309 
310  aPDFGraphicAnnotations.emplace_back();
311 
312  auto& rPDFGraphicAnnotation = aPDFGraphicAnnotations.back();
313  rPDFGraphicAnnotation.maRectangle = rRectangleHMM;
314  rPDFGraphicAnnotation.maAuthor = sAuthor;
315  rPDFGraphicAnnotation.maText = sText;
316  rPDFGraphicAnnotation.maDateTime = aDateTime;
317  rPDFGraphicAnnotation.meSubType = eSubtype;
318  rPDFGraphicAnnotation.maColor = aColor;
319 
321  {
322  auto const& rVertices = pAnnotation->getVertices();
323  if (!rVertices.empty())
324  {
325  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerPolygon>();
326  rPDFGraphicAnnotation.mpMarker = pMarker;
327  for (auto const& rVertex : rVertices)
328  {
329  auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
330  pMarker->maPolygon.append(aPoint);
331  }
332  pMarker->maPolygon.setClosed(true);
333  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
334  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
335  pMarker->maFillColor = pAnnotation->getInteriorColor();
336  }
337  }
338  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Square)
339  {
340  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerSquare>();
341  rPDFGraphicAnnotation.mpMarker = pMarker;
342  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
343  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
344  pMarker->maFillColor = pAnnotation->getInteriorColor();
345  }
346  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Circle)
347  {
348  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerCircle>();
349  rPDFGraphicAnnotation.mpMarker = pMarker;
350  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
351  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
352  pMarker->maFillColor = pAnnotation->getInteriorColor();
353  }
354  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Ink)
355  {
356  auto const& rStrokesList = pAnnotation->getInkStrokes();
357  if (!rStrokesList.empty())
358  {
359  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerInk>();
360  rPDFGraphicAnnotation.mpMarker = pMarker;
361  for (auto const& rStrokes : rStrokesList)
362  {
363  basegfx::B2DPolygon aPolygon;
364  for (auto const& rVertex : rStrokes)
365  {
366  auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
367  aPolygon.append(aPoint);
368  }
369  pMarker->maStrokes.push_back(aPolygon);
370  }
371  float fWidth = pAnnotation->getBorderWidth();
372  pMarker->mnWidth = convertPointToMm100(fWidth);
373  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
374  pMarker->maFillColor = pAnnotation->getInteriorColor();
375  }
376  }
377  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Highlight)
378  {
379  size_t nCount = pAnnotation->getAttachmentPointsCount();
380  if (nCount > 0)
381  {
382  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerHighlight>(
384  rPDFGraphicAnnotation.mpMarker = pMarker;
385  for (size_t i = 0; i < nCount; ++i)
386  {
387  auto aAttachmentPoints = pAnnotation->getAttachmentPoints(i);
388  if (!aAttachmentPoints.empty())
389  {
390  basegfx::B2DPolygon aPolygon;
391  aPolygon.setClosed(true);
392 
393  auto aPoint1
394  = convertFromPDFInternalToHMM(aAttachmentPoints[0], aPageSize);
395  aPolygon.append(aPoint1);
396  auto aPoint2
397  = convertFromPDFInternalToHMM(aAttachmentPoints[1], aPageSize);
398  aPolygon.append(aPoint2);
399  auto aPoint3
400  = convertFromPDFInternalToHMM(aAttachmentPoints[3], aPageSize);
401  aPolygon.append(aPoint3);
402  auto aPoint4
403  = convertFromPDFInternalToHMM(aAttachmentPoints[2], aPageSize);
404  aPolygon.append(aPoint4);
405 
406  pMarker->maQuads.push_back(aPolygon);
407  }
408  }
409  }
410  }
411  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Line)
412  {
413  auto const& rLineGeometry = pAnnotation->getLineGeometry();
414  if (!rLineGeometry.empty())
415  {
416  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerLine>();
417  rPDFGraphicAnnotation.mpMarker = pMarker;
418 
419  auto aPoint1 = convertFromPDFInternalToHMM(rLineGeometry[0], aPageSize);
420  pMarker->maLineStart = aPoint1;
421 
422  auto aPoint2 = convertFromPDFInternalToHMM(rLineGeometry[1], aPageSize);
423  pMarker->maLineEnd = aPoint2;
424 
425  float fWidth = pAnnotation->getBorderWidth();
426  pMarker->mnWidth = convertPointToMm100(fWidth);
427  }
428  }
429  }
430  }
431  }
432  return aPDFGraphicAnnotations;
433 }
434 
435 } // end anonymous namespace
436 #endif
437 
438 size_t ImportPDFUnloaded(const OUString& rURL, std::vector<PDFGraphicResult>& rGraphics)
439 {
440 #if HAVE_FEATURE_PDFIUM
441  std::unique_ptr<SvStream> xStream(
442  ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE));
443 
444  // Save the original PDF stream for later use.
445  BinaryDataContainer aBinaryDataContainer;
446  {
447  VectorGraphicDataArray aPdfDataArray = createVectorGraphicDataArray(*xStream);
448  if (!aPdfDataArray.hasElements())
449  return 0;
450  const sal_uInt8* pData = reinterpret_cast<const sal_uInt8*>(aPdfDataArray.getConstArray());
451  aBinaryDataContainer = BinaryDataContainer(pData, aPdfDataArray.getLength());
452  }
453 
454  // Prepare the link with the PDF stream.
455  auto pGfxLink = std::make_shared<GfxLink>(aBinaryDataContainer, GfxLinkType::NativePdf);
456 
457  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
458 
459  // Load the buffer using pdfium.
460  auto pPdfDocument = pPdfium->openDocument(pGfxLink->GetData(), pGfxLink->GetDataSize());
461 
462  if (!pPdfDocument)
463  return 0;
464 
465  const int nPageCount = pPdfDocument->getPageCount();
466  if (nPageCount <= 0)
467  return 0;
468 
469  for (int nPageIndex = 0; nPageIndex < nPageCount; ++nPageIndex)
470  {
471  basegfx::B2DSize aPageSize = pPdfDocument->getPageSize(nPageIndex);
472  if (aPageSize.getX() <= 0.0 || aPageSize.getY() <= 0.0)
473  continue;
474 
475  // Returned unit is points, convert that to twip
476  // 1 pt = 20 twips
477  constexpr double pointToTwipconversionRatio = 20;
478 
479  tools::Long nPageWidth = convertTwipToMm100(aPageSize.getX() * pointToTwipconversionRatio);
480  tools::Long nPageHeight = convertTwipToMm100(aPageSize.getY() * pointToTwipconversionRatio);
481 
482  auto aVectorGraphicDataPtr = std::make_shared<VectorGraphicData>(
483  aBinaryDataContainer, VectorGraphicDataType::Pdf, nPageIndex);
484 
485  // Create the Graphic with the VectorGraphicDataPtr and link the original PDF stream.
486  // We swap out this Graphic as soon as possible, and a later swap in
487  // actually renders the correct Bitmap on demand.
488  Graphic aGraphic(aVectorGraphicDataPtr);
489  aGraphic.SetGfxLink(pGfxLink);
490 
491  auto pPage = pPdfDocument->openPage(nPageIndex);
492 
493  std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations
494  = findAnnotations(pPage, aPageSize);
495 
496  rGraphics.emplace_back(std::move(aGraphic), Size(nPageWidth, nPageHeight),
497  aPDFGraphicAnnotations);
498  }
499 
500  return rGraphics.size();
501 #else
502  (void)rURL;
503  (void)rGraphics;
504  return 0;
505 #endif // HAVE_FEATURE_PDFIUM
506 }
507 }
508 
509 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
void append(const basegfx::B2DPoint &rPoint, sal_uInt32 nCount)
size_t RenderPDFBitmaps(const void *pBuffer, int nSize, std::vector< BitmapEx > &rBitmaps, const size_t nFirstPage, int nPages, const basegfx::B2DTuple *pSizeHint)
Fills the rBitmaps vector with rendered pages.
Definition: pdfread.cxx:133
std::unique_ptr< ContentProperties > pData
bool importPdfVectorGraphicData(SvStream &rStream, std::shared_ptr< VectorGraphicData > &rVectorGraphicData)
Imports a PDF stream as a VectorGraphicData.
Definition: pdfread.cxx:233
sal_uIntPtr sal_uLong
long Long
double getX() const
#define STREAM_SEEK_TO_END
sal_uInt64 Seek(sal_uInt64 nPos)
double getY() const
float x
double getMaxX() const
virtual sal_uInt64 TellEnd() override
ErrCode GetError() const
Reference< XInputStream > xStream
static std::unique_ptr< SvStream > CreateStream(const OUString &rFileName, StreamMode eOpenMode, css::uno::Reference< css::awt::XWindow > xParentWin=nullptr)
Container for the binary data, whose responsibility is to manage the make it as simple as possible to...
int nCount
double getMaxY() const
constexpr sal_Int64 convertPointToMm100(sal_Int64 nNumber)
float y
int i
#define STREAM_SEEK_TO_BEGIN
sal_uInt64 GetSize()
SvStream & WriteStream(SvStream &rStream)
const sal_uInt8 * ConstScanline
Definition: Scanline.hxx:26
std::size_t ReadBytes(void *pData, std::size_t nSize)
void setClosed(bool bNew)
css::uno::Sequence< sal_Int8 > VectorGraphicDataArray
double getMinY() const
constexpr sal_Int64 convertTwipToMm100(sal_Int64 n)
unsigned char sal_uInt8
void CopyScanline(tools::Long nY, const BitmapReadAccess &rReadAcc)
constexpr sal_Int64 convertMm100ToTwip(sal_Int64 n)
size_t ImportPDFUnloaded(const OUString &rURL, std::vector< PDFGraphicResult > &rGraphics)
Import PDF as Graphic images (1 per page), but not loaded yet.
Definition: pdfread.cxx:438
bool good() const
#define SAL_WARN(area, stream)
double getMinX() const
bool ISO8601parseDateTime(const OUString &rString, css::util::DateTime &rDateTime)
bool ImportPDF(SvStream &rStream, Graphic &rGraphic)
Imports a PDF stream into rGraphic.
Definition: pdfread.cxx:249
const void * GetData()
typedef void(CALLTYPE *GetFuncDataPtr)(sal_uInt16 &nNo