LibreOffice Module vcl (master)  1
pdfread.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <vcl/pdfread.hxx>
11 
12 #include <tools/UnitConversion.hxx>
13 
14 #include <vcl/graph.hxx>
17 #include <unotools/datetime.hxx>
18 
20 #include <sal/log.hxx>
21 
22 using namespace com::sun::star;
23 
24 namespace
25 {
27 inline double pointToPixel(const double fPoint, const double fResolutionDPI)
28 {
29  return fPoint * fResolutionDPI / 72.;
30 }
31 
33 bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
34 {
35  if (nSize < 8)
36  return false;
37 
38  // %PDF-x.y
39  sal_uInt8 aFirstBytes[8];
40  rInStream.Seek(nPos);
41  sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
42  if (nRead < 8)
43  return false;
44 
45  if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
46  || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
47  return false;
48 
49  sal_Int32 nMajor = OString(char(aFirstBytes[5])).toInt32();
50  sal_Int32 nMinor = OString(char(aFirstBytes[7])).toInt32();
51  return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
52 }
53 
56 bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
57 {
58  sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
59  sal_uInt64 nSize = STREAM_SEEK_TO_END;
60  bool bCompatible = isCompatible(rInStream, nPos, nSize);
61  rInStream.Seek(nPos);
62  if (bCompatible)
63  // Not converting.
64  rOutStream.WriteStream(rInStream, nSize);
65  else
66  {
67  // Downconvert to PDF-1.6.
68  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
69  if (!pPdfium)
70  return false;
71 
72  // Read input into a buffer.
73  SvMemoryStream aInBuffer;
74  aInBuffer.WriteStream(rInStream, nSize);
75 
76  SvMemoryStream aSaved;
77  {
78  // Load the buffer using pdfium.
79  std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
80  = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize());
81  if (!pPdfDocument)
82  return false;
83 
84  // 16 means PDF-1.6.
85  if (!pPdfDocument->saveWithVersion(aSaved, 16))
86  return false;
87  }
88 
89  aSaved.Seek(STREAM_SEEK_TO_BEGIN);
90  rOutStream.WriteStream(aSaved);
91  }
92 
93  return rOutStream.good();
94 }
95 
96 BinaryDataContainer createBinaryDataContainer(SvStream& rStream)
97 {
98  // Save the original PDF stream for later use.
99  SvMemoryStream aMemoryStream;
100  if (!getCompatibleStream(rStream, aMemoryStream))
101  return BinaryDataContainer();
102 
103  const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
104 
105  auto aPdfData = std::make_unique<std::vector<sal_uInt8>>(nStreamLength);
106 
107  aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
108  aMemoryStream.ReadBytes(aPdfData->data(), aPdfData->size());
109  if (aMemoryStream.GetError())
110  return BinaryDataContainer();
111 
112  return BinaryDataContainer(std::move(aPdfData));
113 }
114 
115 } // end anonymous namespace
116 
117 namespace vcl
118 {
121 {
122  // If an overriding default is set, use it.
123  const char* envar = ::getenv("PDFIMPORT_RESOLUTION_DPI");
124  if (envar)
125  {
126  const double dpi = atof(envar);
127  if (dpi > 0)
128  return dpi;
129  }
130 
131  // Fallback to a sensible default.
132  return 96.;
133 }
134 
135 size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& rBitmaps,
136  const size_t nFirstPage, int nPages, const basegfx::B2DTuple* pSizeHint)
137 {
138  static const double fResolutionDPI = getDefaultPdfResolutionDpi();
139  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
140  if (!pPdfium)
141  {
142  return 0;
143  }
144 
145  // Load the buffer using pdfium.
146  std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument = pPdfium->openDocument(pBuffer, nSize);
147  if (!pPdfDocument)
148  return 0;
149 
150  const int nPageCount = pPdfDocument->getPageCount();
151  if (nPages <= 0)
152  nPages = nPageCount;
153  const size_t nLastPage = std::min<int>(nPageCount, nFirstPage + nPages) - 1;
154  for (size_t nPageIndex = nFirstPage; nPageIndex <= nLastPage; ++nPageIndex)
155  {
156  // Render next page.
157  std::unique_ptr<vcl::pdf::PDFiumPage> pPdfPage = pPdfDocument->openPage(nPageIndex);
158  if (!pPdfPage)
159  break;
160 
161  // Calculate the bitmap size in points.
162  size_t nPageWidthPoints = pPdfPage->getWidth();
163  size_t nPageHeightPoints = pPdfPage->getHeight();
164  if (pSizeHint && pSizeHint->getX() && pSizeHint->getY())
165  {
166  // Have a size hint, prefer that over the logic size from the PDF.
167  nPageWidthPoints = convertMm100ToTwip(pSizeHint->getX()) / 20;
168  nPageHeightPoints = convertMm100ToTwip(pSizeHint->getY()) / 20;
169  }
170 
171  // Returned unit is points, convert that to pixel.
172  const size_t nPageWidth = pointToPixel(nPageWidthPoints, fResolutionDPI);
173  const size_t nPageHeight = pointToPixel(nPageHeightPoints, fResolutionDPI);
174  std::unique_ptr<vcl::pdf::PDFiumBitmap> pPdfBitmap
175  = pPdfium->createBitmap(nPageWidth, nPageHeight, /*alpha=*/1);
176  if (!pPdfBitmap)
177  break;
178 
179  bool bTransparent = pPdfPage->hasTransparency();
180  if (pSizeHint)
181  {
182  // This is the PDF-in-EMF case: force transparency, even in case pdfium would tell us
183  // the PDF is not transparent.
184  bTransparent = true;
185  }
186  const sal_uInt32 nColor = bTransparent ? 0x00000000 : 0xFFFFFFFF;
187  pPdfBitmap->fillRect(0, 0, nPageWidth, nPageHeight, nColor);
188  pPdfBitmap->renderPageBitmap(pPdfPage.get(), /*start_x=*/0,
189  /*start_y=*/0, nPageWidth, nPageHeight);
190 
191  // Save the buffer as a bitmap.
192  Bitmap aBitmap(Size(nPageWidth, nPageHeight), vcl::PixelFormat::N24_BPP);
193  AlphaMask aMask(Size(nPageWidth, nPageHeight));
194  {
195  BitmapScopedWriteAccess pWriteAccess(aBitmap);
196  AlphaScopedWriteAccess pMaskAccess(aMask);
197  ConstScanline pPdfBuffer = pPdfBitmap->getBuffer();
198  const int nStride = pPdfBitmap->getStride();
199  std::vector<sal_uInt8> aScanlineAlpha(nPageWidth);
200  for (size_t nRow = 0; nRow < nPageHeight; ++nRow)
201  {
202  ConstScanline pPdfLine = pPdfBuffer + (nStride * nRow);
203  // pdfium byte order is BGRA.
204  pWriteAccess->CopyScanline(nRow, pPdfLine, ScanlineFormat::N32BitTcBgra, nStride);
205  for (size_t nCol = 0; nCol < nPageWidth; ++nCol)
206  {
207  // Invert alpha (source is alpha, target is opacity).
208  aScanlineAlpha[nCol] = ~pPdfLine[3];
209  pPdfLine += 4;
210  }
211  pMaskAccess->CopyScanline(nRow, aScanlineAlpha.data(), ScanlineFormat::N8BitPal,
212  nPageWidth);
213  }
214  }
215 
216  if (bTransparent)
217  {
218  rBitmaps.emplace_back(aBitmap, aMask);
219  }
220  else
221  {
222  rBitmaps.emplace_back(std::move(aBitmap));
223  }
224  }
225 
226  return rBitmaps.size();
227 }
228 
230  std::shared_ptr<VectorGraphicData>& rVectorGraphicData)
231 {
232  BinaryDataContainer aDataContainer = createBinaryDataContainer(rStream);
233  if (aDataContainer.isEmpty())
234  {
235  SAL_WARN("vcl.filter", "ImportPDF: empty PDF data array");
236  return false;
237  }
238 
239  rVectorGraphicData
240  = std::make_shared<VectorGraphicData>(aDataContainer, VectorGraphicDataType::Pdf);
241 
242  return true;
243 }
244 
245 bool ImportPDF(SvStream& rStream, Graphic& rGraphic)
246 {
247  std::shared_ptr<VectorGraphicData> pVectorGraphicData;
248  if (!importPdfVectorGraphicData(rStream, pVectorGraphicData))
249  return false;
250  rGraphic = Graphic(pVectorGraphicData);
251  return true;
252 }
253 
254 namespace
255 {
256 basegfx::B2DPoint convertFromPDFInternalToHMM(basegfx::B2DSize const& rInputPoint,
257  basegfx::B2DSize const& rPageSize)
258 {
259  double x = convertPointToMm100(rInputPoint.getX());
260  double y = convertPointToMm100(rPageSize.getY() - rInputPoint.getY());
261  return basegfx::B2DPoint(x, y);
262 }
263 
264 std::vector<PDFGraphicAnnotation>
265 findAnnotations(const std::unique_ptr<vcl::pdf::PDFiumPage>& pPage, basegfx::B2DSize aPageSize)
266 {
267  std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations;
268  for (int nAnnotation = 0; nAnnotation < pPage->getAnnotationCount(); nAnnotation++)
269  {
270  auto pAnnotation = pPage->getAnnotation(nAnnotation);
271  if (pAnnotation)
272  {
273  auto eSubtype = pAnnotation->getSubType();
274 
282  {
283  OUString sAuthor = pAnnotation->getString(vcl::pdf::constDictionaryKeyTitle);
284  OUString sText = pAnnotation->getString(vcl::pdf::constDictionaryKeyContents);
285 
286  basegfx::B2DRectangle rRectangle = pAnnotation->getRectangle();
287  basegfx::B2DRectangle rRectangleHMM(
288  convertPointToMm100(rRectangle.getMinX()),
289  convertPointToMm100(aPageSize.getY() - rRectangle.getMinY()),
290  convertPointToMm100(rRectangle.getMaxX()),
291  convertPointToMm100(aPageSize.getY() - rRectangle.getMaxY()));
292 
293  OUString sDateTimeString
294  = pAnnotation->getString(vcl::pdf::constDictionaryKeyModificationDate);
295  OUString sISO8601String = vcl::pdf::convertPdfDateToISO8601(sDateTimeString);
296 
297  css::util::DateTime aDateTime;
298  if (!sISO8601String.isEmpty())
299  {
300  utl::ISO8601parseDateTime(sISO8601String, aDateTime);
301  }
302 
303  Color aColor = pAnnotation->getColor();
304 
305  aPDFGraphicAnnotations.emplace_back();
306 
307  auto& rPDFGraphicAnnotation = aPDFGraphicAnnotations.back();
308  rPDFGraphicAnnotation.maRectangle = rRectangleHMM;
309  rPDFGraphicAnnotation.maAuthor = sAuthor;
310  rPDFGraphicAnnotation.maText = sText;
311  rPDFGraphicAnnotation.maDateTime = aDateTime;
312  rPDFGraphicAnnotation.meSubType = eSubtype;
313  rPDFGraphicAnnotation.maColor = aColor;
314 
316  {
317  auto const& rVertices = pAnnotation->getVertices();
318  if (!rVertices.empty())
319  {
320  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerPolygon>();
321  rPDFGraphicAnnotation.mpMarker = pMarker;
322  for (auto const& rVertex : rVertices)
323  {
324  auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
325  pMarker->maPolygon.append(aPoint);
326  }
327  pMarker->maPolygon.setClosed(true);
328  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
329  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
330  pMarker->maFillColor = pAnnotation->getInteriorColor();
331  }
332  }
333  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Square)
334  {
335  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerSquare>();
336  rPDFGraphicAnnotation.mpMarker = pMarker;
337  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
338  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
339  pMarker->maFillColor = pAnnotation->getInteriorColor();
340  }
341  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Circle)
342  {
343  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerCircle>();
344  rPDFGraphicAnnotation.mpMarker = pMarker;
345  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
346  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
347  pMarker->maFillColor = pAnnotation->getInteriorColor();
348  }
349  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Ink)
350  {
351  auto const& rStrokesList = pAnnotation->getInkStrokes();
352  if (!rStrokesList.empty())
353  {
354  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerInk>();
355  rPDFGraphicAnnotation.mpMarker = pMarker;
356  for (auto const& rStrokes : rStrokesList)
357  {
358  basegfx::B2DPolygon aPolygon;
359  for (auto const& rVertex : rStrokes)
360  {
361  auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
362  aPolygon.append(aPoint);
363  }
364  pMarker->maStrokes.push_back(aPolygon);
365  }
366  float fWidth = pAnnotation->getBorderWidth();
367  pMarker->mnWidth = convertPointToMm100(fWidth);
368  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
369  pMarker->maFillColor = pAnnotation->getInteriorColor();
370  }
371  }
372  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Highlight)
373  {
374  size_t nCount = pAnnotation->getAttachmentPointsCount();
375  if (nCount > 0)
376  {
377  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerHighlight>(
379  rPDFGraphicAnnotation.mpMarker = pMarker;
380  for (size_t i = 0; i < nCount; ++i)
381  {
382  auto aAttachmentPoints = pAnnotation->getAttachmentPoints(i);
383  if (!aAttachmentPoints.empty())
384  {
385  basegfx::B2DPolygon aPolygon;
386  aPolygon.setClosed(true);
387 
388  auto aPoint1
389  = convertFromPDFInternalToHMM(aAttachmentPoints[0], aPageSize);
390  aPolygon.append(aPoint1);
391  auto aPoint2
392  = convertFromPDFInternalToHMM(aAttachmentPoints[1], aPageSize);
393  aPolygon.append(aPoint2);
394  auto aPoint3
395  = convertFromPDFInternalToHMM(aAttachmentPoints[3], aPageSize);
396  aPolygon.append(aPoint3);
397  auto aPoint4
398  = convertFromPDFInternalToHMM(aAttachmentPoints[2], aPageSize);
399  aPolygon.append(aPoint4);
400 
401  pMarker->maQuads.push_back(aPolygon);
402  }
403  }
404  }
405  }
406  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Line)
407  {
408  auto const& rLineGeometry = pAnnotation->getLineGeometry();
409  if (!rLineGeometry.empty())
410  {
411  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerLine>();
412  rPDFGraphicAnnotation.mpMarker = pMarker;
413 
414  auto aPoint1 = convertFromPDFInternalToHMM(rLineGeometry[0], aPageSize);
415  pMarker->maLineStart = aPoint1;
416 
417  auto aPoint2 = convertFromPDFInternalToHMM(rLineGeometry[1], aPageSize);
418  pMarker->maLineEnd = aPoint2;
419 
420  float fWidth = pAnnotation->getBorderWidth();
421  pMarker->mnWidth = convertPointToMm100(fWidth);
422  }
423  }
424  }
425  }
426  }
427  return aPDFGraphicAnnotations;
428 }
429 
430 } // end anonymous namespace
431 
432 size_t ImportPDFUnloaded(const OUString& rURL, std::vector<PDFGraphicResult>& rGraphics)
433 {
434  std::unique_ptr<SvStream> xStream(
435  ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE));
436 
437  // Save the original PDF stream for later use.
438  BinaryDataContainer aDataContainer = createBinaryDataContainer(*xStream);
439  if (aDataContainer.isEmpty())
440  return 0;
441 
442  // Prepare the link with the PDF stream.
443  auto pGfxLink = std::make_shared<GfxLink>(aDataContainer, GfxLinkType::NativePdf);
444 
445  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
446  if (!pPdfium)
447  {
448  return 0;
449  }
450 
451  // Load the buffer using pdfium.
452  auto pPdfDocument = pPdfium->openDocument(pGfxLink->GetData(), pGfxLink->GetDataSize());
453 
454  if (!pPdfDocument)
455  return 0;
456 
457  const int nPageCount = pPdfDocument->getPageCount();
458  if (nPageCount <= 0)
459  return 0;
460 
461  for (int nPageIndex = 0; nPageIndex < nPageCount; ++nPageIndex)
462  {
463  basegfx::B2DSize aPageSize = pPdfDocument->getPageSize(nPageIndex);
464  if (aPageSize.getX() <= 0.0 || aPageSize.getY() <= 0.0)
465  continue;
466 
467  // Returned unit is points, convert that to twip
468  // 1 pt = 20 twips
469  constexpr double pointToTwipconversionRatio = 20;
470 
471  tools::Long nPageWidth = convertTwipToMm100(aPageSize.getX() * pointToTwipconversionRatio);
472  tools::Long nPageHeight = convertTwipToMm100(aPageSize.getY() * pointToTwipconversionRatio);
473 
474  // Create the Graphic with the VectorGraphicDataPtr and link the original PDF stream.
475  // We swap out this Graphic as soon as possible, and a later swap in
476  // actually renders the correct Bitmap on demand.
477  Graphic aGraphic(pGfxLink, nPageIndex);
478 
479  auto pPage = pPdfDocument->openPage(nPageIndex);
480 
481  std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations
482  = findAnnotations(pPage, aPageSize);
483 
484  rGraphics.emplace_back(std::move(aGraphic), Size(nPageWidth, nPageHeight),
485  aPDFGraphicAnnotations);
486  }
487 
488  return rGraphics.size();
489 }
490 }
491 
492 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
void append(const basegfx::B2DPoint &rPoint, sal_uInt32 nCount)
size_t RenderPDFBitmaps(const void *pBuffer, int nSize, std::vector< BitmapEx > &rBitmaps, const size_t nFirstPage, int nPages, const basegfx::B2DTuple *pSizeHint)
Fills the rBitmaps vector with rendered pages.
Definition: pdfread.cxx:135
bool importPdfVectorGraphicData(SvStream &rStream, std::shared_ptr< VectorGraphicData > &rVectorGraphicData)
Imports a PDF stream as a VectorGraphicData.
Definition: pdfread.cxx:229
sal_uIntPtr sal_uLong
static double getDefaultPdfResolutionDpi()
Get the default PDF rendering resolution in DPI.
Definition: pdfread.cxx:120
long Long
double getX() const
#define STREAM_SEEK_TO_END
sal_uInt64 Seek(sal_uInt64 nPos)
double getY() const
This template handles BitmapAccess the RAII way.
float x
double getMaxX() const
virtual sal_uInt64 TellEnd() override
constexpr auto convertTwipToMm100(N n)
ErrCode GetError() const
constexpr OStringLiteral constDictionaryKeyContents
Reference< XInputStream > xStream
static std::unique_ptr< SvStream > CreateStream(const OUString &rFileName, StreamMode eOpenMode, css::uno::Reference< css::awt::XWindow > xParentWin=nullptr)
Container for the binary data, whose responsibility is to manage the make it as simple as possible to...
int nCount
double getMaxY() const
OUString convertPdfDateToISO8601(OUString const &rInput)
Definition: PDFiumTools.cxx:15
float y
static std::shared_ptr< PDFium > & get()
constexpr OStringLiteral constDictionaryKeyModificationDate
int i
#define STREAM_SEEK_TO_BEGIN
sal_uInt64 GetSize()
constexpr auto convertMm100ToTwip(N n)
SvStream & WriteStream(SvStream &rStream)
const sal_uInt8 * ConstScanline
Definition: Scanline.hxx:27
std::size_t ReadBytes(void *pData, std::size_t nSize)
constexpr auto convertPointToMm100(N n)
void setClosed(bool bNew)
double getMinY() const
unsigned char sal_uInt8
constexpr OStringLiteral constDictionaryKeyTitle
size_t ImportPDFUnloaded(const OUString &rURL, std::vector< PDFGraphicResult > &rGraphics)
Import PDF as Graphic images (1 per page), but not loaded yet.
Definition: pdfread.cxx:432
bool good() const
#define SAL_WARN(area, stream)
double getMinX() const
bool ISO8601parseDateTime(const OUString &rString, css::util::DateTime &rDateTime)
bool ImportPDF(SvStream &rStream, Graphic &rGraphic)
Imports a PDF stream into rGraphic.
Definition: pdfread.cxx:245
constexpr OStringLiteral constDictionaryKeyInteriorColor
const void * GetData()