LibreOffice Module vcl (master)  1
pdfread.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <vcl/pdfread.hxx>
11 
12 #include <tools/UnitConversion.hxx>
13 
14 #include <pdf/PdfConfig.hxx>
15 #include <vcl/graph.hxx>
18 #include <unotools/datetime.hxx>
19 
21 #include <sal/log.hxx>
22 
23 using namespace com::sun::star;
24 
25 namespace
26 {
28 inline double pointToPixel(const double fPoint, const double fResolutionDPI)
29 {
30  return o3tl::convert(fPoint, o3tl::Length::pt, o3tl::Length::in) * fResolutionDPI;
31 }
32 
34 bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
35 {
36  if (nSize < 8)
37  return false;
38 
39  // %PDF-x.y
40  sal_uInt8 aFirstBytes[8];
41  rInStream.Seek(nPos);
42  sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
43  if (nRead < 8)
44  return false;
45 
46  if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
47  || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
48  return false;
49 
50  sal_Int32 nMajor = OString(char(aFirstBytes[5])).toInt32();
51  sal_Int32 nMinor = OString(char(aFirstBytes[7])).toInt32();
52  return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
53 }
54 
57 bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
58 {
59  sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
60  sal_uInt64 nSize = STREAM_SEEK_TO_END;
61  bool bCompatible = isCompatible(rInStream, nPos, nSize);
62  rInStream.Seek(nPos);
63  if (bCompatible)
64  // Not converting.
65  rOutStream.WriteStream(rInStream, nSize);
66  else
67  {
68  // Downconvert to PDF-1.6.
69  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
70  if (!pPdfium)
71  return false;
72 
73  // Read input into a buffer.
74  SvMemoryStream aInBuffer;
75  aInBuffer.WriteStream(rInStream, nSize);
76 
77  SvMemoryStream aSaved;
78  {
79  // Load the buffer using pdfium.
80  std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
81  = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize());
82  if (!pPdfDocument)
83  return false;
84 
85  // 16 means PDF-1.6.
86  if (!pPdfDocument->saveWithVersion(aSaved, 16))
87  return false;
88  }
89 
90  aSaved.Seek(STREAM_SEEK_TO_BEGIN);
91  rOutStream.WriteStream(aSaved);
92  }
93 
94  return rOutStream.good();
95 }
96 
97 BinaryDataContainer createBinaryDataContainer(SvStream& rStream)
98 {
99  // Save the original PDF stream for later use.
100  SvMemoryStream aMemoryStream;
101  if (!getCompatibleStream(rStream, aMemoryStream))
102  return {};
103 
104  const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
105 
106  auto aPdfData = std::make_unique<std::vector<sal_uInt8>>(nStreamLength);
107 
108  aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
109  aMemoryStream.ReadBytes(aPdfData->data(), aPdfData->size());
110  if (aMemoryStream.GetError())
111  return {};
112 
113  return { std::move(aPdfData) };
114 }
115 
116 } // end anonymous namespace
117 
118 namespace vcl
119 {
120 size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& rBitmaps,
121  const size_t nFirstPage, int nPages, const basegfx::B2DTuple* pSizeHint)
122 {
123  static const double fResolutionDPI = vcl::pdf::getDefaultPdfResolutionDpi();
124  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
125  if (!pPdfium)
126  {
127  return 0;
128  }
129 
130  // Load the buffer using pdfium.
131  std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument = pPdfium->openDocument(pBuffer, nSize);
132  if (!pPdfDocument)
133  return 0;
134 
135  const int nPageCount = pPdfDocument->getPageCount();
136  if (nPages <= 0)
137  nPages = nPageCount;
138  const size_t nLastPage = std::min<int>(nPageCount, nFirstPage + nPages) - 1;
139  for (size_t nPageIndex = nFirstPage; nPageIndex <= nLastPage; ++nPageIndex)
140  {
141  // Render next page.
142  std::unique_ptr<vcl::pdf::PDFiumPage> pPdfPage = pPdfDocument->openPage(nPageIndex);
143  if (!pPdfPage)
144  break;
145 
146  // Calculate the bitmap size in points.
147  double nPageWidthPoints = pPdfPage->getWidth();
148  double nPageHeightPoints = pPdfPage->getHeight();
149  if (pSizeHint && pSizeHint->getX() && pSizeHint->getY())
150  {
151  // Have a size hint, prefer that over the logic size from the PDF.
152  nPageWidthPoints
154  nPageHeightPoints
156  }
157 
158  // Returned unit is points, convert that to pixel.
159  const size_t nPageWidth = pointToPixel(nPageWidthPoints, fResolutionDPI);
160  const size_t nPageHeight = pointToPixel(nPageHeightPoints, fResolutionDPI);
161  std::unique_ptr<vcl::pdf::PDFiumBitmap> pPdfBitmap
162  = pPdfium->createBitmap(nPageWidth, nPageHeight, /*nAlpha=*/1);
163  if (!pPdfBitmap)
164  break;
165 
166  bool bTransparent = pPdfPage->hasTransparency();
167  if (pSizeHint)
168  {
169  // This is the PDF-in-EMF case: force transparency, even in case pdfium would tell us
170  // the PDF is not transparent.
171  bTransparent = true;
172  }
173  const sal_uInt32 nColor = bTransparent ? 0x00000000 : 0xFFFFFFFF;
174  pPdfBitmap->fillRect(0, 0, nPageWidth, nPageHeight, nColor);
175  pPdfBitmap->renderPageBitmap(pPdfDocument.get(), pPdfPage.get(), /*nStartX=*/0,
176  /*nStartY=*/0, nPageWidth, nPageHeight);
177 
178  // Save the buffer as a bitmap.
179  Bitmap aBitmap(Size(nPageWidth, nPageHeight), vcl::PixelFormat::N24_BPP);
180  AlphaMask aMask(Size(nPageWidth, nPageHeight));
181  {
182  BitmapScopedWriteAccess pWriteAccess(aBitmap);
183  AlphaScopedWriteAccess pMaskAccess(aMask);
184  ConstScanline pPdfBuffer = pPdfBitmap->getBuffer();
185  const int nStride = pPdfBitmap->getStride();
186  std::vector<sal_uInt8> aScanlineAlpha(nPageWidth);
187  for (size_t nRow = 0; nRow < nPageHeight; ++nRow)
188  {
189  ConstScanline pPdfLine = pPdfBuffer + (nStride * nRow);
190  // pdfium byte order is BGRA.
191  pWriteAccess->CopyScanline(nRow, pPdfLine, ScanlineFormat::N32BitTcBgra, nStride);
192  for (size_t nCol = 0; nCol < nPageWidth; ++nCol)
193  {
194  // Invert alpha (source is alpha, target is opacity).
195  aScanlineAlpha[nCol] = ~pPdfLine[3];
196  pPdfLine += 4;
197  }
198  pMaskAccess->CopyScanline(nRow, aScanlineAlpha.data(), ScanlineFormat::N8BitPal,
199  nPageWidth);
200  }
201  }
202 
203  if (bTransparent)
204  {
205  rBitmaps.emplace_back(aBitmap, aMask);
206  }
207  else
208  {
209  rBitmaps.emplace_back(std::move(aBitmap));
210  }
211  }
212 
213  return rBitmaps.size();
214 }
215 
217  std::shared_ptr<VectorGraphicData>& rVectorGraphicData)
218 {
219  BinaryDataContainer aDataContainer = createBinaryDataContainer(rStream);
220  if (aDataContainer.isEmpty())
221  {
222  SAL_WARN("vcl.filter", "ImportPDF: empty PDF data array");
223  return false;
224  }
225 
226  rVectorGraphicData
227  = std::make_shared<VectorGraphicData>(aDataContainer, VectorGraphicDataType::Pdf);
228 
229  return true;
230 }
231 
232 bool ImportPDF(SvStream& rStream, Graphic& rGraphic)
233 {
234  std::shared_ptr<VectorGraphicData> pVectorGraphicData;
235  if (!importPdfVectorGraphicData(rStream, pVectorGraphicData))
236  return false;
237  rGraphic = Graphic(pVectorGraphicData);
238  return true;
239 }
240 
241 namespace
242 {
243 basegfx::B2DPoint convertFromPDFInternalToHMM(basegfx::B2DSize const& rInputPoint,
244  basegfx::B2DSize const& rPageSize)
245 {
246  double x = convertPointToMm100(rInputPoint.getX());
247  double y = convertPointToMm100(rPageSize.getY() - rInputPoint.getY());
248  return { x, y };
249 }
250 
251 std::vector<PDFGraphicAnnotation>
252 findAnnotations(const std::unique_ptr<vcl::pdf::PDFiumPage>& pPage, basegfx::B2DSize aPageSize)
253 {
254  std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations;
255  for (int nAnnotation = 0; nAnnotation < pPage->getAnnotationCount(); nAnnotation++)
256  {
257  auto pAnnotation = pPage->getAnnotation(nAnnotation);
258  if (pAnnotation)
259  {
260  auto eSubtype = pAnnotation->getSubType();
261 
269  {
270  OUString sAuthor = pAnnotation->getString(vcl::pdf::constDictionaryKeyTitle);
271  OUString sText = pAnnotation->getString(vcl::pdf::constDictionaryKeyContents);
272 
273  basegfx::B2DRectangle rRectangle = pAnnotation->getRectangle();
274  basegfx::B2DRectangle rRectangleHMM(
275  convertPointToMm100(rRectangle.getMinX()),
276  convertPointToMm100(aPageSize.getY() - rRectangle.getMinY()),
277  convertPointToMm100(rRectangle.getMaxX()),
278  convertPointToMm100(aPageSize.getY() - rRectangle.getMaxY()));
279 
280  OUString sDateTimeString
281  = pAnnotation->getString(vcl::pdf::constDictionaryKeyModificationDate);
282  OUString sISO8601String = vcl::pdf::convertPdfDateToISO8601(sDateTimeString);
283 
284  css::util::DateTime aDateTime;
285  if (!sISO8601String.isEmpty())
286  {
287  utl::ISO8601parseDateTime(sISO8601String, aDateTime);
288  }
289 
290  Color aColor = pAnnotation->getColor();
291 
292  aPDFGraphicAnnotations.emplace_back();
293 
294  auto& rPDFGraphicAnnotation = aPDFGraphicAnnotations.back();
295  rPDFGraphicAnnotation.maRectangle = rRectangleHMM;
296  rPDFGraphicAnnotation.maAuthor = sAuthor;
297  rPDFGraphicAnnotation.maText = sText;
298  rPDFGraphicAnnotation.maDateTime = aDateTime;
299  rPDFGraphicAnnotation.meSubType = eSubtype;
300  rPDFGraphicAnnotation.maColor = aColor;
301 
303  {
304  auto const& rVertices = pAnnotation->getVertices();
305  if (!rVertices.empty())
306  {
307  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerPolygon>();
308  rPDFGraphicAnnotation.mpMarker = pMarker;
309  for (auto const& rVertex : rVertices)
310  {
311  auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
312  pMarker->maPolygon.append(aPoint);
313  }
314  pMarker->maPolygon.setClosed(true);
315  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
316  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
317  pMarker->maFillColor = pAnnotation->getInteriorColor();
318  }
319  }
320  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Square)
321  {
322  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerSquare>();
323  rPDFGraphicAnnotation.mpMarker = pMarker;
324  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
325  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
326  pMarker->maFillColor = pAnnotation->getInteriorColor();
327  }
328  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Circle)
329  {
330  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerCircle>();
331  rPDFGraphicAnnotation.mpMarker = pMarker;
332  pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
333  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
334  pMarker->maFillColor = pAnnotation->getInteriorColor();
335  }
336  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Ink)
337  {
338  auto const& rStrokesList = pAnnotation->getInkStrokes();
339  if (!rStrokesList.empty())
340  {
341  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerInk>();
342  rPDFGraphicAnnotation.mpMarker = pMarker;
343  for (auto const& rStrokes : rStrokesList)
344  {
345  basegfx::B2DPolygon aPolygon;
346  for (auto const& rVertex : rStrokes)
347  {
348  auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
349  aPolygon.append(aPoint);
350  }
351  pMarker->maStrokes.push_back(aPolygon);
352  }
353  float fWidth = pAnnotation->getBorderWidth();
354  pMarker->mnWidth = convertPointToMm100(fWidth);
355  if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
356  pMarker->maFillColor = pAnnotation->getInteriorColor();
357  }
358  }
359  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Highlight)
360  {
361  size_t nCount = pAnnotation->getAttachmentPointsCount();
362  if (nCount > 0)
363  {
364  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerHighlight>(
366  rPDFGraphicAnnotation.mpMarker = pMarker;
367  for (size_t i = 0; i < nCount; ++i)
368  {
369  auto aAttachmentPoints = pAnnotation->getAttachmentPoints(i);
370  if (!aAttachmentPoints.empty())
371  {
372  basegfx::B2DPolygon aPolygon;
373  aPolygon.setClosed(true);
374 
375  auto aPoint1
376  = convertFromPDFInternalToHMM(aAttachmentPoints[0], aPageSize);
377  aPolygon.append(aPoint1);
378  auto aPoint2
379  = convertFromPDFInternalToHMM(aAttachmentPoints[1], aPageSize);
380  aPolygon.append(aPoint2);
381  auto aPoint3
382  = convertFromPDFInternalToHMM(aAttachmentPoints[3], aPageSize);
383  aPolygon.append(aPoint3);
384  auto aPoint4
385  = convertFromPDFInternalToHMM(aAttachmentPoints[2], aPageSize);
386  aPolygon.append(aPoint4);
387 
388  pMarker->maQuads.push_back(aPolygon);
389  }
390  }
391  }
392  }
393  else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Line)
394  {
395  auto const& rLineGeometry = pAnnotation->getLineGeometry();
396  if (!rLineGeometry.empty())
397  {
398  auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerLine>();
399  rPDFGraphicAnnotation.mpMarker = pMarker;
400 
401  auto aPoint1 = convertFromPDFInternalToHMM(rLineGeometry[0], aPageSize);
402  pMarker->maLineStart = aPoint1;
403 
404  auto aPoint2 = convertFromPDFInternalToHMM(rLineGeometry[1], aPageSize);
405  pMarker->maLineEnd = aPoint2;
406 
407  float fWidth = pAnnotation->getBorderWidth();
408  pMarker->mnWidth = convertPointToMm100(fWidth);
409  }
410  }
411  }
412  }
413  }
414  return aPDFGraphicAnnotations;
415 }
416 
417 } // end anonymous namespace
418 
419 size_t ImportPDFUnloaded(const OUString& rURL, std::vector<PDFGraphicResult>& rGraphics)
420 {
421  std::unique_ptr<SvStream> xStream(
422  ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE));
423 
424  // Save the original PDF stream for later use.
425  BinaryDataContainer aDataContainer = createBinaryDataContainer(*xStream);
426  if (aDataContainer.isEmpty())
427  return 0;
428 
429  // Prepare the link with the PDF stream.
430  auto pGfxLink = std::make_shared<GfxLink>(aDataContainer, GfxLinkType::NativePdf);
431 
432  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
433  if (!pPdfium)
434  {
435  return 0;
436  }
437 
438  // Load the buffer using pdfium.
439  auto pPdfDocument = pPdfium->openDocument(pGfxLink->GetData(), pGfxLink->GetDataSize());
440 
441  if (!pPdfDocument)
442  return 0;
443 
444  const int nPageCount = pPdfDocument->getPageCount();
445  if (nPageCount <= 0)
446  return 0;
447 
448  for (int nPageIndex = 0; nPageIndex < nPageCount; ++nPageIndex)
449  {
450  basegfx::B2DSize aPageSize = pPdfDocument->getPageSize(nPageIndex);
451  if (aPageSize.getX() <= 0.0 || aPageSize.getY() <= 0.0)
452  continue;
453 
454  // Returned unit is points, convert that to twip
455  // 1 pt = 20 twips
456  constexpr double pointToTwipconversionRatio = 20;
457 
458  tools::Long nPageWidth = convertTwipToMm100(aPageSize.getX() * pointToTwipconversionRatio);
459  tools::Long nPageHeight = convertTwipToMm100(aPageSize.getY() * pointToTwipconversionRatio);
460 
461  // Create the Graphic with the VectorGraphicDataPtr and link the original PDF stream.
462  // We swap out this Graphic as soon as possible, and a later swap in
463  // actually renders the correct Bitmap on demand.
464  Graphic aGraphic(pGfxLink, nPageIndex);
465 
466  auto pPage = pPdfDocument->openPage(nPageIndex);
467 
468  std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations
469  = findAnnotations(pPage, aPageSize);
470 
471  rGraphics.emplace_back(std::move(aGraphic), Size(nPageWidth, nPageHeight),
472  aPDFGraphicAnnotations);
473  }
474 
475  return rGraphics.size();
476 }
477 }
478 
479 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
double getY() const
void append(const basegfx::B2DPoint &rPoint, sal_uInt32 nCount)
size_t RenderPDFBitmaps(const void *pBuffer, int nSize, std::vector< BitmapEx > &rBitmaps, const size_t nFirstPage, int nPages, const basegfx::B2DTuple *pSizeHint)
Fills the rBitmaps vector with rendered pages.
Definition: pdfread.cxx:120
bool importPdfVectorGraphicData(SvStream &rStream, std::shared_ptr< VectorGraphicData > &rVectorGraphicData)
Imports a PDF stream as a VectorGraphicData.
Definition: pdfread.cxx:216
sal_uIntPtr sal_uLong
long Long
constexpr Point convert(const Point &rPoint, o3tl::Length eFrom, o3tl::Length eTo)
#define STREAM_SEEK_TO_END
sal_uInt64 Seek(sal_uInt64 nPos)
double getDefaultPdfResolutionDpi()
Get the default PDF rendering resolution in DPI.
Definition: PdfConfig.cxx:17
This template handles BitmapAccess the RAII way.
float x
double getMaxX() const
virtual sal_uInt64 TellEnd() override
constexpr auto convertTwipToMm100(N n)
ErrCode GetError() const
constexpr OStringLiteral constDictionaryKeyContents
Reference< XInputStream > xStream
static std::unique_ptr< SvStream > CreateStream(const OUString &rFileName, StreamMode eOpenMode, css::uno::Reference< css::awt::XWindow > xParentWin=nullptr)
Container for the binary data, whose responsibility is to manage the make it as simple as possible to...
int nCount
double getMaxY() const
OUString convertPdfDateToISO8601(OUString const &rInput)
Definition: PDFiumTools.cxx:15
float y
static std::shared_ptr< PDFium > & get()
constexpr OStringLiteral constDictionaryKeyModificationDate
int i
#define STREAM_SEEK_TO_BEGIN
sal_uInt64 GetSize()
SvStream & WriteStream(SvStream &rStream)
const sal_uInt8 * ConstScanline
Definition: Scanline.hxx:27
std::size_t ReadBytes(void *pData, std::size_t nSize)
constexpr auto convertPointToMm100(N n)
void setClosed(bool bNew)
double getMinY() const
unsigned char sal_uInt8
constexpr OStringLiteral constDictionaryKeyTitle
size_t ImportPDFUnloaded(const OUString &rURL, std::vector< PDFGraphicResult > &rGraphics)
Import PDF as Graphic images (1 per page), but not loaded yet.
Definition: pdfread.cxx:419
bool good() const
#define SAL_WARN(area, stream)
double getX() const
double getMinX() const
bool ISO8601parseDateTime(const OUString &rString, css::util::DateTime &rDateTime)
bool ImportPDF(SvStream &rStream, Graphic &rGraphic)
Imports a PDF stream into rGraphic.
Definition: pdfread.cxx:232
constexpr OStringLiteral constDictionaryKeyInteriorColor
const void * GetData()