LibreOffice Module vcl (master)  1
pdfread.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <vcl/pdfread.hxx>
11 
12 #include <tools/UnitConversion.hxx>
13 
14 #include <pdf/PdfConfig.hxx>
15 #include <vcl/graph.hxx>
18 #include <unotools/datetime.hxx>
19 
21 #include <sal/log.hxx>
22 #include <o3tl/string_view.hxx>
23 
24 using namespace com::sun::star;
25 
26 namespace
27 {
29 inline double pointToPixel(const double fPoint, const double fResolutionDPI)
30 {
31  return o3tl::convert(fPoint, o3tl::Length::pt, o3tl::Length::in) * fResolutionDPI;
32 }
33 
35 bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
36 {
37  if (nSize < 8)
38  return false;
39 
40  // %PDF-x.y
41  sal_uInt8 aFirstBytes[8];
42  rInStream.Seek(nPos);
43  sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
44  if (nRead < 8)
45  return false;
46 
47  if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
48  || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
49  return false;
50 
51  sal_Int32 nMajor = o3tl::toInt32(std::string_view(reinterpret_cast<char*>(&aFirstBytes[5]), 1));
52  sal_Int32 nMinor = o3tl::toInt32(std::string_view(reinterpret_cast<char*>(&aFirstBytes[7]), 1));
53  return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
54 }
55 
58 bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
59 {
60  sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
61  sal_uInt64 nSize = STREAM_SEEK_TO_END;
62  bool bCompatible = isCompatible(rInStream, nPos, nSize);
63  rInStream.Seek(nPos);
64  if (bCompatible)
65  // Not converting.
66  rOutStream.WriteStream(rInStream, nSize);
67  else
68  {
69  // Downconvert to PDF-1.6.
70  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
71  if (!pPdfium)
72  return false;
73 
74  // Read input into a buffer.
75  SvMemoryStream aInBuffer;
76  aInBuffer.WriteStream(rInStream, nSize);
77 
78  SvMemoryStream aSaved;
79  {
80  // Load the buffer using pdfium.
81  std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
82  = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize());
83  if (!pPdfDocument)
84  return false;
85 
86  // 16 means PDF-1.6.
87  if (!pPdfDocument->saveWithVersion(aSaved, 16))
88  return false;
89  }
90 
91  aSaved.Seek(STREAM_SEEK_TO_BEGIN);
92  rOutStream.WriteStream(aSaved);
93  }
94 
95  return rOutStream.good();
96 }
97 
98 BinaryDataContainer createBinaryDataContainer(SvStream& rStream)
99 {
100  // Save the original PDF stream for later use.
101  SvMemoryStream aMemoryStream;
102  if (!getCompatibleStream(rStream, aMemoryStream))
103  return {};
104 
105  const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
106 
107  auto aPdfData = std::make_unique<std::vector<sal_uInt8>>(nStreamLength);
108 
109  aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
110  aMemoryStream.ReadBytes(aPdfData->data(), aPdfData->size());
111  if (aMemoryStream.GetError())
112  return {};
113 
114  return { std::move(aPdfData) };
115 }
116 
117 } // end anonymous namespace
118 
119 namespace vcl
120 {
121 size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& rBitmaps,
122  const size_t nFirstPage, int nPages, const basegfx::B2DTuple* pSizeHint)
123 {
124  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
125  if (!pPdfium)
126  {
127  return 0;
128  }
129 
130  // Load the buffer using pdfium.
131  std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument = pPdfium->openDocument(pBuffer, nSize);
132  if (!pPdfDocument)
133  return 0;
134 
135  static const double fResolutionDPI = vcl::pdf::getDefaultPdfResolutionDpi();
136 
137  const int nPageCount = pPdfDocument->getPageCount();
138  if (nPages <= 0)
139  nPages = nPageCount;
140  const size_t nLastPage = std::min<int>(nPageCount, nFirstPage + nPages) - 1;
141  for (size_t nPageIndex = nFirstPage; nPageIndex <= nLastPage; ++nPageIndex)
142  {
143  // Render next page.
144  std::unique_ptr<vcl::pdf::PDFiumPage> pPdfPage = pPdfDocument->openPage(nPageIndex);
145  if (!pPdfPage)
146  break;
147 
148  // Calculate the bitmap size in points.
149  double nPageWidthPoints = pPdfPage->getWidth();
150  double nPageHeightPoints = pPdfPage->getHeight();
151  if (pSizeHint && pSizeHint->getX() && pSizeHint->getY())
152  {
153  // Have a size hint, prefer that over the logic size from the PDF.
154  nPageWidthPoints
156  nPageHeightPoints
158  }
159 
160  // Returned unit is points, convert that to pixel.
161 
162  const size_t nPageWidth = std::round(pointToPixel(nPageWidthPoints, fResolutionDPI)
164  const size_t nPageHeight = std::round(pointToPixel(nPageHeightPoints, fResolutionDPI)
165  * PDF_INSERT_MAGIC_SCALE_FACTOR);
166  std::unique_ptr<vcl::pdf::PDFiumBitmap> pPdfBitmap
167  = pPdfium->createBitmap(nPageWidth, nPageHeight, /*nAlpha=*/1);
168  if (!pPdfBitmap)
169  break;
170 
171  bool bTransparent = pPdfPage->hasTransparency();
172  if (pSizeHint)
173  {
174  // This is the PDF-in-EMF case: force transparency, even in case pdfium would tell us
175  // the PDF is not transparent.
176  bTransparent = true;
177  }
178  const sal_uInt32 nColor = bTransparent ? 0x00000000 : 0xFFFFFFFF;
179  pPdfBitmap->fillRect(0, 0, nPageWidth, nPageHeight, nColor);
180  pPdfBitmap->renderPageBitmap(pPdfDocument.get(), pPdfPage.get(), /*nStartX=*/0,
181  /*nStartY=*/0, nPageWidth, nPageHeight);
182 
183  // Save the buffer as a bitmap.
184  Bitmap aBitmap(Size(nPageWidth, nPageHeight), vcl::PixelFormat::N24_BPP);
185  AlphaMask aMask(Size(nPageWidth, nPageHeight));
186  {
187  BitmapScopedWriteAccess pWriteAccess(aBitmap);
188  AlphaScopedWriteAccess pMaskAccess(aMask);
189  ConstScanline pPdfBuffer = pPdfBitmap->getBuffer();
190  const int nStride = pPdfBitmap->getStride();
191  std::vector<sal_uInt8> aScanlineAlpha(nPageWidth);
192  for (size_t nRow = 0; nRow < nPageHeight; ++nRow)
193  {
194  ConstScanline pPdfLine = pPdfBuffer + (nStride * nRow);
195  // pdfium byte order is BGRA.
196  pWriteAccess->CopyScanline(nRow, pPdfLine, ScanlineFormat::N32BitTcBgra, nStride);
197  for (size_t nCol = 0; nCol < nPageWidth; ++nCol)
198  {
199  // Invert alpha (source is alpha, target is opacity).
200  aScanlineAlpha[nCol] = ~pPdfLine[3];
201  pPdfLine += 4;
202  }
203  pMaskAccess->CopyScanline(nRow, aScanlineAlpha.data(), ScanlineFormat::N8BitPal,
204  nPageWidth);
205  }
206  }
207 
208  if (bTransparent)
209  {
210  rBitmaps.emplace_back(aBitmap, aMask);
211  }
212  else
213  {
214  rBitmaps.emplace_back(std::move(aBitmap));
215  }
216  }
217 
218  return rBitmaps.size();
219 }
220 
222  std::shared_ptr<VectorGraphicData>& rVectorGraphicData)
223 {
224  BinaryDataContainer aDataContainer = createBinaryDataContainer(rStream);
225  if (aDataContainer.isEmpty())
226  {
227  SAL_WARN("vcl.filter", "ImportPDF: empty PDF data array");
228  return false;
229  }
230 
231  rVectorGraphicData
232  = std::make_shared<VectorGraphicData>(aDataContainer, VectorGraphicDataType::Pdf);
233 
234  return true;
235 }
236 
237 bool ImportPDF(SvStream& rStream, Graphic& rGraphic)
238 {
239  std::shared_ptr<VectorGraphicData> pVectorGraphicData;
240  if (!importPdfVectorGraphicData(rStream, pVectorGraphicData))
241  return false;
242  rGraphic = Graphic(pVectorGraphicData);
243  return true;
244 }
245 
246 namespace
247 {
248 basegfx::B2DPoint convertFromPDFInternalToHMM(basegfx::B2DSize const& rInputPoint,
249  basegfx::B2DSize const& rPageSize)
250 {
251  double x = convertPointToMm100(rInputPoint.getX());
252  double y = convertPointToMm100(rPageSize.getY() - rInputPoint.getY());
253  return { x, y };
254 }
255 
256 std::vector<PDFGraphicAnnotation>
257 findAnnotations(const std::unique_ptr<vcl::pdf::PDFiumPage>& pPage, basegfx::B2DSize aPageSize)
258 {
259  std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations;
260  for (int nAnnotation = 0; nAnnotation < pPage->getAnnotationCount(); nAnnotation++)
261  {
262  auto pAnnotation = pPage->getAnnotation(nAnnotation);
263  if (pAnnotation)
264  {
265  auto eSubtype = pAnnotation->getSubType();
266 
274  {
275  OUString sAuthor = pAnnotation->getString(vcl::pdf::constDictionaryKeyTitle);
276  OUString sText = pAnnotation->getString(vcl::pdf::constDictionaryKeyContents);
277 
278  basegfx::B2DRectangle rRectangle = pAnnotation->getRectangle();
279  basegfx::B2DRectangle rRectangleHMM(
280  convertPointToMm100(rRectangle.getMinX()),
281  convertPointToMm100(aPageSize.getY() - rRectangle.getMinY()),
282  convertPointToMm100(rRectangle.getMaxX()),
283  convertPointToMm100(aPageSize.getY() - rRectangle.getMaxY()));
284 
285  OUString sDateTimeString
286  = pAnnotation->getString(vcl::pdf::constDictionaryKeyModificationDate);
287  OUString sISO8601String = vcl::pdf::convertPdfDateToISO8601(sDateTimeString);
288 
289  css::util::DateTime aDateTime;
290  if (!sISO8601String.isEmpty())
291  {
292  utl::ISO8601parseDateTime(sISO8601String, aDateTime);
293  }
294 
295  Color aColor = pAnnotation->getColor();
296 
297  aPDFGraphicAnnotations.emplace_back();
298 
299  auto& rPDFGraphicAnnotation = aPDFGraphicAnnotations.back();
300  rPDFGraphicAnnotation.maRectangle = rRectangleHMM;
301  rPDFGraphicAnnotation.maAuthor = sAuthor;
302  rPDFGraphicAnnotation.maText = sText;
303  rPDFGraphicAnnotation.maDateTime = aDateTime;
304  rPDFGraphicAnnotation.meSubType = eSubtype;
305  rPDFGraphicAnnotation.maColor = aColor;
306 
308  {
309  auto const& rVertices = pAnnotation->getVertices();
310  if (!rVertices.empty())
311  {
312  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerPolygon>();
313  rPDFGraphicAnnotation.mpMarker = pMarker;
314  for (auto const& rVertex : rVertices)
315  {
316  auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
317  pMarker->maPolygon.append(aPoint);
318  }
319  pMarker->maPolygon.setClosed(true);
320  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
321  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
322  pMarker->maFillColor = pAnnotation->getInteriorColor();
323  }
324  }
325  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Square)
326  {
327  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerSquare>();
328  rPDFGraphicAnnotation.mpMarker = pMarker;
329  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
330  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
331  pMarker->maFillColor = pAnnotation->getInteriorColor();
332  }
333  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Circle)
334  {
335  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerCircle>();
336  rPDFGraphicAnnotation.mpMarker = pMarker;
337  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
338  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
339  pMarker->maFillColor = pAnnotation->getInteriorColor();
340  }
341  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Ink)
342  {
343  auto const& rStrokesList = pAnnotation->getInkStrokes();
344  if (!rStrokesList.empty())
345  {
346  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerInk>();
347  rPDFGraphicAnnotation.mpMarker = pMarker;
348  for (auto const& rStrokes : rStrokesList)
349  {
350  basegfx::B2DPolygon aPolygon;
351  for (auto const& rVertex : rStrokes)
352  {
353  auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
354  aPolygon.append(aPoint);
355  }
356  pMarker->maStrokes.push_back(aPolygon);
357  }
358  float fWidth = pAnnotation->getBorderWidth();
359  pMarker->mnWidth = convertPointToMm100(fWidth);
360  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
361  pMarker->maFillColor = pAnnotation->getInteriorColor();
362  }
363  }
364  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Highlight)
365  {
366  size_t nCount = pAnnotation->getAttachmentPointsCount();
367  if (nCount > 0)
368  {
369  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerHighlight>(
371  rPDFGraphicAnnotation.mpMarker = pMarker;
372  for (size_t i = 0; i < nCount; ++i)
373  {
374  auto aAttachmentPoints = pAnnotation->getAttachmentPoints(i);
375  if (!aAttachmentPoints.empty())
376  {
377  basegfx::B2DPolygon aPolygon;
378  aPolygon.setClosed(true);
379 
380  auto aPoint1
381  = convertFromPDFInternalToHMM(aAttachmentPoints[0], aPageSize);
382  aPolygon.append(aPoint1);
383  auto aPoint2
384  = convertFromPDFInternalToHMM(aAttachmentPoints[1], aPageSize);
385  aPolygon.append(aPoint2);
386  auto aPoint3
387  = convertFromPDFInternalToHMM(aAttachmentPoints[3], aPageSize);
388  aPolygon.append(aPoint3);
389  auto aPoint4
390  = convertFromPDFInternalToHMM(aAttachmentPoints[2], aPageSize);
391  aPolygon.append(aPoint4);
392 
393  pMarker->maQuads.push_back(aPolygon);
394  }
395  }
396  }
397  }
398  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Line)
399  {
400  auto const& rLineGeometry = pAnnotation->getLineGeometry();
401  if (!rLineGeometry.empty())
402  {
403  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerLine>();
404  rPDFGraphicAnnotation.mpMarker = pMarker;
405 
406  auto aPoint1 = convertFromPDFInternalToHMM(rLineGeometry[0], aPageSize);
407  pMarker->maLineStart = aPoint1;
408 
409  auto aPoint2 = convertFromPDFInternalToHMM(rLineGeometry[1], aPageSize);
410  pMarker->maLineEnd = aPoint2;
411 
412  float fWidth = pAnnotation->getBorderWidth();
413  pMarker->mnWidth = convertPointToMm100(fWidth);
414  }
415  }
416  }
417  }
418  }
419  return aPDFGraphicAnnotations;
420 }
421 
422 } // end anonymous namespace
423 
424 size_t ImportPDFUnloaded(const OUString& rURL, std::vector<PDFGraphicResult>& rGraphics)
425 {
426  std::unique_ptr<SvStream> xStream(
427  ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE));
428 
429  // Save the original PDF stream for later use.
430  BinaryDataContainer aDataContainer = createBinaryDataContainer(*xStream);
431  if (aDataContainer.isEmpty())
432  return 0;
433 
434  // Prepare the link with the PDF stream.
435  auto pGfxLink = std::make_shared<GfxLink>(aDataContainer, GfxLinkType::NativePdf);
436 
437  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
438  if (!pPdfium)
439  {
440  return 0;
441  }
442 
443  // Load the buffer using pdfium.
444  auto pPdfDocument = pPdfium->openDocument(pGfxLink->GetData(), pGfxLink->GetDataSize());
445 
446  if (!pPdfDocument)
447  return 0;
448 
449  const int nPageCount = pPdfDocument->getPageCount();
450  if (nPageCount <= 0)
451  return 0;
452 
453  for (int nPageIndex = 0; nPageIndex < nPageCount; ++nPageIndex)
454  {
455  basegfx::B2DSize aPageSize = pPdfDocument->getPageSize(nPageIndex);
456  if (aPageSize.getX() <= 0.0 || aPageSize.getY() <= 0.0)
457  continue;
458 
459  // Returned unit is points, convert that to twip
460  // 1 pt = 20 twips
461  constexpr double pointToTwipconversionRatio = 20;
462 
463  tools::Long nPageWidth = convertTwipToMm100(aPageSize.getX() * pointToTwipconversionRatio);
464  tools::Long nPageHeight = convertTwipToMm100(aPageSize.getY() * pointToTwipconversionRatio);
465 
466  // Create the Graphic with the VectorGraphicDataPtr and link the original PDF stream.
467  // We swap out this Graphic as soon as possible, and a later swap in
468  // actually renders the correct Bitmap on demand.
469  Graphic aGraphic(pGfxLink, nPageIndex);
470 
471  auto pPage = pPdfDocument->openPage(nPageIndex);
472 
473  std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations
474  = findAnnotations(pPage, aPageSize);
475 
476  rGraphics.emplace_back(std::move(aGraphic), Size(nPageWidth, nPageHeight),
477  aPDFGraphicAnnotations);
478  }
479 
480  return rGraphics.size();
481 }
482 }
483 
484 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
double getY() const
void append(const basegfx::B2DPoint &rPoint, sal_uInt32 nCount)
size_t RenderPDFBitmaps(const void *pBuffer, int nSize, std::vector< BitmapEx > &rBitmaps, const size_t nFirstPage, int nPages, const basegfx::B2DTuple *pSizeHint)
Fills the rBitmaps vector with rendered pages.
Definition: pdfread.cxx:121
bool ISO8601parseDateTime(std::u16string_view rString, css::util::DateTime &rDateTime)
bool importPdfVectorGraphicData(SvStream &rStream, std::shared_ptr< VectorGraphicData > &rVectorGraphicData)
Imports a PDF stream as a VectorGraphicData.
Definition: pdfread.cxx:221
sal_uIntPtr sal_uLong
long Long
constexpr Point convert(const Point &rPoint, o3tl::Length eFrom, o3tl::Length eTo)
#define STREAM_SEEK_TO_END
sal_uInt64 Seek(sal_uInt64 nPos)
double getDefaultPdfResolutionDpi()
Get the default PDF rendering resolution in DPI.
Definition: PdfConfig.cxx:20
This template handles BitmapAccess the RAII way.
float x
virtual sal_uInt64 TellEnd() override
constexpr auto convertTwipToMm100(N n)
ErrCode GetError() const
constexpr OStringLiteral constDictionaryKeyContents
Reference< XInputStream > xStream
static std::unique_ptr< SvStream > CreateStream(const OUString &rFileName, StreamMode eOpenMode, css::uno::Reference< css::awt::XWindow > xParentWin=nullptr)
Container for the binary data, whose responsibility is to manage the make it as simple as possible to...
constexpr int PDF_INSERT_MAGIC_SCALE_FACTOR
Definition: pdfread.hxx:60
int nCount
OUString convertPdfDateToISO8601(OUString const &rInput)
Definition: PDFiumTools.cxx:15
float y
static std::shared_ptr< PDFium > & get()
constexpr OStringLiteral constDictionaryKeyModificationDate
int i
#define STREAM_SEEK_TO_BEGIN
sal_uInt64 GetSize()
sal_Int32 toInt32(std::u16string_view str, sal_Int16 radix=10)
SvStream & WriteStream(SvStream &rStream)
const sal_uInt8 * ConstScanline
Definition: Scanline.hxx:27
std::size_t ReadBytes(void *pData, std::size_t nSize)
constexpr auto convertPointToMm100(N n)
void setClosed(bool bNew)
unsigned char sal_uInt8
constexpr OStringLiteral constDictionaryKeyTitle
size_t ImportPDFUnloaded(const OUString &rURL, std::vector< PDFGraphicResult > &rGraphics)
Import PDF as Graphic images (1 per page), but not loaded yet.
Definition: pdfread.cxx:424
bool good() const
#define SAL_WARN(area, stream)
double getX() const
bool ImportPDF(SvStream &rStream, Graphic &rGraphic)
Imports a PDF stream into rGraphic.
Definition: pdfread.cxx:237
constexpr OStringLiteral constDictionaryKeyInteriorColor
const void * GetData()