LibreOffice Module vcl (master) 1
pdfread.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
10#include <vcl/pdfread.hxx>
11
13
14#include <pdf/PdfConfig.hxx>
15#include <vcl/graph.hxx>
18#include <unotools/datetime.hxx>
19
21#include <sal/log.hxx>
22#include <o3tl/string_view.hxx>
23
24using namespace com::sun::star;
25
26namespace
27{
29inline double pointToPixel(const double fPoint, const double fResolutionDPI)
30{
31 return o3tl::convert(fPoint, o3tl::Length::pt, o3tl::Length::in) * fResolutionDPI;
32}
33
35bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
36{
37 if (nSize < 8)
38 return false;
39
40 // %PDF-x.y
41 sal_uInt8 aFirstBytes[8];
42 rInStream.Seek(nPos);
43 sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
44 if (nRead < 8)
45 return false;
46
47 if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
48 || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
49 return false;
50
51 sal_Int32 nMajor = o3tl::toInt32(std::string_view(reinterpret_cast<char*>(&aFirstBytes[5]), 1));
52 sal_Int32 nMinor = o3tl::toInt32(std::string_view(reinterpret_cast<char*>(&aFirstBytes[7]), 1));
53 return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
54}
55
58bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
59{
60 sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
61 sal_uInt64 nSize = STREAM_SEEK_TO_END;
62 bool bCompatible = isCompatible(rInStream, nPos, nSize);
63 rInStream.Seek(nPos);
64 if (bCompatible)
65 // Not converting.
66 rOutStream.WriteStream(rInStream, nSize);
67 else
68 {
69 // Downconvert to PDF-1.6.
70 auto pPdfium = vcl::pdf::PDFiumLibrary::get();
71 if (!pPdfium)
72 return false;
73
74 // Read input into a buffer.
75 SvMemoryStream aInBuffer;
76 aInBuffer.WriteStream(rInStream, nSize);
77
78 SvMemoryStream aSaved;
79 {
80 // Load the buffer using pdfium.
81 std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
82 = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize(), OString());
83 if (!pPdfDocument)
84 return false;
85
86 // 16 means PDF-1.6.
87 if (!pPdfDocument->saveWithVersion(aSaved, 16))
88 return false;
89 }
90
91 aSaved.Seek(STREAM_SEEK_TO_BEGIN);
92 rOutStream.WriteStream(aSaved);
93 }
94
95 return rOutStream.good();
96}
97
98BinaryDataContainer createBinaryDataContainer(SvStream& rStream)
99{
100 // Save the original PDF stream for later use.
101 SvMemoryStream aMemoryStream;
102 if (!getCompatibleStream(rStream, aMemoryStream))
103 return {};
104
105 const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
106
107 auto aPdfData = std::make_unique<std::vector<sal_uInt8>>(nStreamLength);
108
109 aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
110 aMemoryStream.ReadBytes(aPdfData->data(), aPdfData->size());
111 if (aMemoryStream.GetError())
112 return {};
113
114 return { std::move(aPdfData) };
115}
116
117} // end anonymous namespace
118
119namespace vcl
120{
121size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& rBitmaps,
122 const size_t nFirstPage, int nPages, const basegfx::B2DTuple* pSizeHint)
123{
124 auto pPdfium = vcl::pdf::PDFiumLibrary::get();
125 if (!pPdfium)
126 {
127 return 0;
128 }
129
130 // Load the buffer using pdfium.
131 std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
132 = pPdfium->openDocument(pBuffer, nSize, OString());
133 if (!pPdfDocument)
134 return 0;
135
136 static const double fResolutionDPI = vcl::pdf::getDefaultPdfResolutionDpi();
137
138 const int nPageCount = pPdfDocument->getPageCount();
139 if (nPages <= 0)
140 nPages = nPageCount;
141 const size_t nLastPage = std::min<int>(nPageCount, nFirstPage + nPages) - 1;
142 for (size_t nPageIndex = nFirstPage; nPageIndex <= nLastPage; ++nPageIndex)
143 {
144 // Render next page.
145 std::unique_ptr<vcl::pdf::PDFiumPage> pPdfPage = pPdfDocument->openPage(nPageIndex);
146 if (!pPdfPage)
147 break;
148
149 // Calculate the bitmap size in points.
150 double nPageWidthPoints = pPdfPage->getWidth();
151 double nPageHeightPoints = pPdfPage->getHeight();
152 if (pSizeHint && pSizeHint->getX() && pSizeHint->getY())
153 {
154 // Have a size hint, prefer that over the logic size from the PDF.
155 nPageWidthPoints
157 nPageHeightPoints
159 }
160
161 // Returned unit is points, convert that to pixel.
162
163 const size_t nPageWidth = std::round(pointToPixel(nPageWidthPoints, fResolutionDPI)
165 const size_t nPageHeight = std::round(pointToPixel(nPageHeightPoints, fResolutionDPI)
167 std::unique_ptr<vcl::pdf::PDFiumBitmap> pPdfBitmap
168 = pPdfium->createBitmap(nPageWidth, nPageHeight, /*nAlpha=*/1);
169 if (!pPdfBitmap)
170 break;
171
172 bool bTransparent = pPdfPage->hasTransparency();
173 if (pSizeHint)
174 {
175 // This is the PDF-in-EMF case: force transparency, even in case pdfium would tell us
176 // the PDF is not transparent.
177 bTransparent = true;
178 }
179 const sal_uInt32 nColor = bTransparent ? 0x00000000 : 0xFFFFFFFF;
180 pPdfBitmap->fillRect(0, 0, nPageWidth, nPageHeight, nColor);
181 pPdfBitmap->renderPageBitmap(pPdfDocument.get(), pPdfPage.get(), /*nStartX=*/0,
182 /*nStartY=*/0, nPageWidth, nPageHeight);
183
184 // Save the buffer as a bitmap.
185 Bitmap aBitmap(Size(nPageWidth, nPageHeight), vcl::PixelFormat::N24_BPP);
186 AlphaMask aMask(Size(nPageWidth, nPageHeight));
187 {
188 BitmapScopedWriteAccess pWriteAccess(aBitmap);
189 AlphaScopedWriteAccess pMaskAccess(aMask);
190 ConstScanline pPdfBuffer = pPdfBitmap->getBuffer();
191 const int nStride = pPdfBitmap->getStride();
192 std::vector<sal_uInt8> aScanlineAlpha(nPageWidth);
193 for (size_t nRow = 0; nRow < nPageHeight; ++nRow)
194 {
195 ConstScanline pPdfLine = pPdfBuffer + (nStride * nRow);
196 // pdfium byte order is BGRA.
197 pWriteAccess->CopyScanline(nRow, pPdfLine, ScanlineFormat::N32BitTcBgra, nStride);
198 for (size_t nCol = 0; nCol < nPageWidth; ++nCol)
199 {
200 // Invert alpha (source is alpha, target is opacity).
201 aScanlineAlpha[nCol] = ~pPdfLine[3];
202 pPdfLine += 4;
203 }
204 pMaskAccess->CopyScanline(nRow, aScanlineAlpha.data(), ScanlineFormat::N8BitPal,
205 nPageWidth);
206 }
207 }
208
209 if (bTransparent)
210 {
211 rBitmaps.emplace_back(aBitmap, aMask);
212 }
213 else
214 {
215 rBitmaps.emplace_back(std::move(aBitmap));
216 }
217 }
218
219 return rBitmaps.size();
220}
221
223 std::shared_ptr<VectorGraphicData>& rVectorGraphicData)
224{
225 BinaryDataContainer aDataContainer = createBinaryDataContainer(rStream);
226 if (aDataContainer.isEmpty())
227 {
228 SAL_WARN("vcl.filter", "ImportPDF: empty PDF data array");
229 return false;
230 }
231
232 rVectorGraphicData
233 = std::make_shared<VectorGraphicData>(aDataContainer, VectorGraphicDataType::Pdf);
234
235 return true;
236}
237
238bool ImportPDF(SvStream& rStream, Graphic& rGraphic)
239{
240 std::shared_ptr<VectorGraphicData> pVectorGraphicData;
241 if (!importPdfVectorGraphicData(rStream, pVectorGraphicData))
242 return false;
243 rGraphic = Graphic(pVectorGraphicData);
244 return true;
245}
246
247namespace
248{
249basegfx::B2DPoint convertFromPDFInternalToHMM(basegfx::B2DPoint const& rInputPoint,
250 basegfx::B2DSize const& rPageSize)
251{
252 double x = convertPointToMm100(rInputPoint.getX());
253 double y = convertPointToMm100(rPageSize.getHeight() - rInputPoint.getY());
254 return { x, y };
255}
256
257std::vector<PDFGraphicAnnotation>
258findAnnotations(const std::unique_ptr<vcl::pdf::PDFiumPage>& pPage, basegfx::B2DSize aPageSize)
259{
260 std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations;
261 if (!pPage)
262 {
263 return aPDFGraphicAnnotations;
264 }
265
266 for (int nAnnotation = 0; nAnnotation < pPage->getAnnotationCount(); nAnnotation++)
267 {
268 auto pAnnotation = pPage->getAnnotation(nAnnotation);
269 if (pAnnotation)
270 {
271 auto eSubtype = pAnnotation->getSubType();
272
280 {
281 OUString sAuthor = pAnnotation->getString(vcl::pdf::constDictionaryKeyTitle);
282 OUString sText = pAnnotation->getString(vcl::pdf::constDictionaryKeyContents);
283
284 basegfx::B2DRectangle rRectangle = pAnnotation->getRectangle();
285 basegfx::B2DRectangle rRectangleHMM(
286 convertPointToMm100(rRectangle.getMinX()),
287 convertPointToMm100(aPageSize.getHeight() - rRectangle.getMinY()),
288 convertPointToMm100(rRectangle.getMaxX()),
289 convertPointToMm100(aPageSize.getHeight() - rRectangle.getMaxY()));
290
291 OUString sDateTimeString
292 = pAnnotation->getString(vcl::pdf::constDictionaryKeyModificationDate);
293 OUString sISO8601String = vcl::pdf::convertPdfDateToISO8601(sDateTimeString);
294
295 css::util::DateTime aDateTime;
296 if (!sISO8601String.isEmpty())
297 {
298 utl::ISO8601parseDateTime(sISO8601String, aDateTime);
299 }
300
301 Color aColor = pAnnotation->getColor();
302
303 aPDFGraphicAnnotations.emplace_back();
304
305 auto& rPDFGraphicAnnotation = aPDFGraphicAnnotations.back();
306 rPDFGraphicAnnotation.maRectangle = rRectangleHMM;
307 rPDFGraphicAnnotation.maAuthor = sAuthor;
308 rPDFGraphicAnnotation.maText = sText;
309 rPDFGraphicAnnotation.maDateTime = aDateTime;
310 rPDFGraphicAnnotation.meSubType = eSubtype;
311 rPDFGraphicAnnotation.maColor = aColor;
312
314 {
315 auto const& rVertices = pAnnotation->getVertices();
316 if (!rVertices.empty())
317 {
318 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerPolygon>();
319 rPDFGraphicAnnotation.mpMarker = pMarker;
320 for (auto const& rVertex : rVertices)
321 {
322 auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
323 pMarker->maPolygon.append(aPoint);
324 }
325 pMarker->maPolygon.setClosed(true);
326 pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
327 if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
328 pMarker->maFillColor = pAnnotation->getInteriorColor();
329 }
330 }
331 else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Square)
332 {
333 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerSquare>();
334 rPDFGraphicAnnotation.mpMarker = pMarker;
335 pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
336 if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
337 pMarker->maFillColor = pAnnotation->getInteriorColor();
338 }
339 else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Circle)
340 {
341 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerCircle>();
342 rPDFGraphicAnnotation.mpMarker = pMarker;
343 pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
344 if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
345 pMarker->maFillColor = pAnnotation->getInteriorColor();
346 }
347 else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Ink)
348 {
349 auto const& rStrokesList = pAnnotation->getInkStrokes();
350 if (!rStrokesList.empty())
351 {
352 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerInk>();
353 rPDFGraphicAnnotation.mpMarker = pMarker;
354 for (auto const& rStrokes : rStrokesList)
355 {
356 basegfx::B2DPolygon aPolygon;
357 for (auto const& rVertex : rStrokes)
358 {
359 auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
360 aPolygon.append(aPoint);
361 }
362 pMarker->maStrokes.push_back(aPolygon);
363 }
364 float fWidth = pAnnotation->getBorderWidth();
365 pMarker->mnWidth = convertPointToMm100(fWidth);
366 if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
367 pMarker->maFillColor = pAnnotation->getInteriorColor();
368 }
369 }
371 {
372 size_t nCount = pAnnotation->getAttachmentPointsCount();
373 if (nCount > 0)
374 {
375 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerHighlight>(
377 rPDFGraphicAnnotation.mpMarker = pMarker;
378 for (size_t i = 0; i < nCount; ++i)
379 {
380 auto aAttachmentPoints = pAnnotation->getAttachmentPoints(i);
381 if (!aAttachmentPoints.empty())
382 {
383 basegfx::B2DPolygon aPolygon;
384 aPolygon.setClosed(true);
385
386 auto aPoint1
387 = convertFromPDFInternalToHMM(aAttachmentPoints[0], aPageSize);
388 aPolygon.append(aPoint1);
389 auto aPoint2
390 = convertFromPDFInternalToHMM(aAttachmentPoints[1], aPageSize);
391 aPolygon.append(aPoint2);
392 auto aPoint3
393 = convertFromPDFInternalToHMM(aAttachmentPoints[3], aPageSize);
394 aPolygon.append(aPoint3);
395 auto aPoint4
396 = convertFromPDFInternalToHMM(aAttachmentPoints[2], aPageSize);
397 aPolygon.append(aPoint4);
398
399 pMarker->maQuads.push_back(aPolygon);
400 }
401 }
402 }
403 }
404 else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Line)
405 {
406 auto const& rLineGeometry = pAnnotation->getLineGeometry();
407 if (!rLineGeometry.empty())
408 {
409 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerLine>();
410 rPDFGraphicAnnotation.mpMarker = pMarker;
411
412 auto aPoint1 = convertFromPDFInternalToHMM(rLineGeometry[0], aPageSize);
413 pMarker->maLineStart = aPoint1;
414
415 auto aPoint2 = convertFromPDFInternalToHMM(rLineGeometry[1], aPageSize);
416 pMarker->maLineEnd = aPoint2;
417
418 float fWidth = pAnnotation->getBorderWidth();
419 pMarker->mnWidth = convertPointToMm100(fWidth);
420 }
421 }
422 }
423 }
424 }
425 return aPDFGraphicAnnotations;
426}
427
428} // end anonymous namespace
429
430size_t ImportPDFUnloaded(const OUString& rURL, std::vector<PDFGraphicResult>& rGraphics)
431{
432 std::unique_ptr<SvStream> xStream(
433 ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE));
434
435 // Save the original PDF stream for later use.
436 BinaryDataContainer aDataContainer = createBinaryDataContainer(*xStream);
437 if (aDataContainer.isEmpty())
438 return 0;
439
440 // Prepare the link with the PDF stream.
441 auto pGfxLink = std::make_shared<GfxLink>(aDataContainer, GfxLinkType::NativePdf);
442
443 auto pPdfium = vcl::pdf::PDFiumLibrary::get();
444 if (!pPdfium)
445 {
446 return 0;
447 }
448
449 // Load the buffer using pdfium.
450 auto pPdfDocument
451 = pPdfium->openDocument(pGfxLink->GetData(), pGfxLink->GetDataSize(), OString());
452
453 if (!pPdfDocument)
454 return 0;
455
456 const int nPageCount = pPdfDocument->getPageCount();
457 if (nPageCount <= 0)
458 return 0;
459
460 for (int nPageIndex = 0; nPageIndex < nPageCount; ++nPageIndex)
461 {
462 basegfx::B2DSize aPageSize = pPdfDocument->getPageSize(nPageIndex);
463 if (aPageSize.getWidth() <= 0.0 || aPageSize.getHeight() <= 0.0)
464 continue;
465
466 // Returned unit is points, convert that to twip
467 // 1 pt = 20 twips
468 constexpr double pointToTwipconversionRatio = 20;
469
470 tools::Long nPageWidth
471 = convertTwipToMm100(aPageSize.getWidth() * pointToTwipconversionRatio);
472 tools::Long nPageHeight
473 = convertTwipToMm100(aPageSize.getHeight() * pointToTwipconversionRatio);
474
475 // Create the Graphic with the VectorGraphicDataPtr and link the original PDF stream.
476 // We swap out this Graphic as soon as possible, and a later swap in
477 // actually renders the correct Bitmap on demand.
478 Graphic aGraphic(pGfxLink, nPageIndex);
479
480 auto pPage = pPdfDocument->openPage(nPageIndex);
481
482 std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations
483 = findAnnotations(pPage, aPageSize);
484
485 rGraphics.emplace_back(std::move(aGraphic), Size(nPageWidth, nPageHeight),
486 aPDFGraphicAnnotations);
487 }
488
489 return rGraphics.size();
490}
491}
492
493/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const sal_uInt8 * ConstScanline
Definition: Scanline.hxx:27
constexpr auto convertPointToMm100(N n)
constexpr auto convertTwipToMm100(N n)
Reference< XInputStream > xStream
Container for the binary data, whose responsibility is to manage the make it as simple as possible to...
const void * GetData()
sal_uInt64 GetSize()
virtual sal_uInt64 TellEnd() override
bool good() const
sal_uInt64 Seek(sal_uInt64 nPos)
std::size_t ReadBytes(void *pData, std::size_t nSize)
ErrCode GetError() const
SvStream & WriteStream(SvStream &rStream)
void append(const basegfx::B2DPoint &rPoint, sal_uInt32 nCount)
void setClosed(bool bNew)
TYPE getMaxX() const
TYPE getMinX() const
TYPE getMinY() const
TYPE getMaxY() const
TYPE getWidth() const
TYPE getHeight() const
TYPE getX() const
TYPE getY() const
static std::unique_ptr< SvStream > CreateStream(const OUString &rFileName, StreamMode eOpenMode, css::uno::Reference< css::awt::XWindow > xParentWin=nullptr)
This template handles BitmapAccess the RAII way.
int nCount
float y
float x
sal_uInt16 nPos
#define SAL_WARN(area, stream)
int i
sal_Int32 toInt32(std::u16string_view str, sal_Int16 radix=10)
constexpr Point convert(const Point &rPoint, o3tl::Length eFrom, o3tl::Length eTo)
long Long
bool ISO8601parseDateTime(std::u16string_view rString, css::util::DateTime &rDateTime)
double getDefaultPdfResolutionDpi()
Get the default PDF rendering resolution in DPI.
Definition: PdfConfig.cxx:20
constexpr OStringLiteral constDictionaryKeyInteriorColor
constexpr OStringLiteral constDictionaryKeyTitle
constexpr OStringLiteral constDictionaryKeyContents
constexpr OStringLiteral constDictionaryKeyModificationDate
OUString convertPdfDateToISO8601(std::u16string_view rInput)
Definition: PDFiumTools.cxx:15
size_t RenderPDFBitmaps(const void *pBuffer, int nSize, std::vector< BitmapEx > &rBitmaps, const size_t nFirstPage, int nPages, const basegfx::B2DTuple *pSizeHint)
Fills the rBitmaps vector with rendered pages.
Definition: pdfread.cxx:121
bool ImportPDF(SvStream &rStream, Graphic &rGraphic)
Imports a PDF stream into rGraphic.
Definition: pdfread.cxx:238
bool importPdfVectorGraphicData(SvStream &rStream, std::shared_ptr< VectorGraphicData > &rVectorGraphicData)
Imports a PDF stream as a VectorGraphicData.
Definition: pdfread.cxx:222
constexpr int PDF_INSERT_MAGIC_SCALE_FACTOR
Definition: pdfread.hxx:60
size_t ImportPDFUnloaded(const OUString &rURL, std::vector< PDFGraphicResult > &rGraphics)
Import PDF as Graphic images (1 per page), but not loaded yet.
Definition: pdfread.cxx:430
sal_uIntPtr sal_uLong
#define STREAM_SEEK_TO_END
#define STREAM_SEEK_TO_BEGIN
static std::shared_ptr< PDFium > & get()
unsigned char sal_uInt8