LibreOffice Module vcl (master)  1
pdfread.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <vcl/pdfread.hxx>
11 
12 #include <config_features.h>
13 
14 #if HAVE_FEATURE_PDFIUM
15 #include <fpdfview.h>
16 #include <fpdf_edit.h>
17 #include <fpdf_save.h>
18 #endif
19 
20 #include <vcl/graph.hxx>
21 #include <bitmapwriteaccess.hxx>
23 #include <unotools/datetime.hxx>
24 
26 
27 using namespace com::sun::star;
28 
29 namespace
30 {
31 #if HAVE_FEATURE_PDFIUM
32 
34 struct CompatibleWriter : public FPDF_FILEWRITE
35 {
36  SvMemoryStream m_aStream;
37 };
38 
39 int CompatibleWriterCallback(FPDF_FILEWRITE* pFileWrite, const void* pData, unsigned long nSize)
40 {
41  auto pImpl = static_cast<CompatibleWriter*>(pFileWrite);
42  pImpl->m_aStream.WriteBytes(pData, nSize);
43  return 1;
44 }
45 
47 inline double pointToPixel(const double fPoint, const double fResolutionDPI)
48 {
49  return fPoint * fResolutionDPI / 72.;
50 }
51 
53 bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
54 {
55  if (nSize < 8)
56  return false;
57 
58  // %PDF-x.y
59  sal_uInt8 aFirstBytes[8];
60  rInStream.Seek(nPos);
61  sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
62  if (nRead < 8)
63  return false;
64 
65  if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
66  || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
67  return false;
68 
69  sal_Int32 nMajor = OString(aFirstBytes[5]).toInt32();
70  sal_Int32 nMinor = OString(aFirstBytes[7]).toInt32();
71  return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
72 }
73 
76 bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
77 {
78  sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
79  sal_uInt64 nSize = STREAM_SEEK_TO_END;
80  bool bCompatible = isCompatible(rInStream, nPos, nSize);
81  rInStream.Seek(nPos);
82  if (bCompatible)
83  // Not converting.
84  rOutStream.WriteStream(rInStream, nSize);
85  else
86  {
87  // Downconvert to PDF-1.6.
88  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
89 
90  // Read input into a buffer.
91  SvMemoryStream aInBuffer;
92  aInBuffer.WriteStream(rInStream, nSize);
93 
94  // Load the buffer using pdfium.
95  FPDF_DOCUMENT pPdfDocument
96  = FPDF_LoadMemDocument(aInBuffer.GetData(), aInBuffer.GetSize(), /*password=*/nullptr);
97  if (!pPdfDocument)
98  return false;
99 
100  CompatibleWriter aWriter;
101  aWriter.version = 1;
102  aWriter.WriteBlock = &CompatibleWriterCallback;
103 
104  // 16 means PDF-1.6.
105  if (!FPDF_SaveWithVersion(pPdfDocument, &aWriter, 0, 16))
106  return false;
107 
108  FPDF_CloseDocument(pPdfDocument);
109 
110  aWriter.m_aStream.Seek(STREAM_SEEK_TO_BEGIN);
111  rOutStream.WriteStream(aWriter.m_aStream);
112  }
113 
114  return rOutStream.good();
115 }
116 #else
117 bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
118 {
119  rInStream.Seek(STREAM_SEEK_TO_BEGIN);
120  rOutStream.WriteStream(rInStream, STREAM_SEEK_TO_END);
121  return rOutStream.good();
122 }
123 #endif // HAVE_FEATURE_PDFIUM
124 
125 VectorGraphicDataArray createVectorGraphicDataArray(SvStream& rStream)
126 {
127  // Save the original PDF stream for later use.
128  SvMemoryStream aMemoryStream;
129  if (!getCompatibleStream(rStream, aMemoryStream))
130  return VectorGraphicDataArray();
131 
132  const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
133 
134  VectorGraphicDataArray aPdfData(nStreamLength);
135 
136  aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
137  aMemoryStream.ReadBytes(aPdfData.begin(), nStreamLength);
138  if (aMemoryStream.GetError())
139  return VectorGraphicDataArray();
140 
141  return aPdfData;
142 }
143 
144 } // end anonymous namespace
145 
146 namespace vcl
147 {
148 size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<Bitmap>& rBitmaps,
149  const size_t nFirstPage, int nPages, const double fResolutionDPI)
150 {
151 #if HAVE_FEATURE_PDFIUM
152  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
153 
154  // Load the buffer using pdfium.
155  FPDF_DOCUMENT pPdfDocument = FPDF_LoadMemDocument(pBuffer, nSize, /*password=*/nullptr);
156  if (!pPdfDocument)
157  return 0;
158 
159  const int nPageCount = FPDF_GetPageCount(pPdfDocument);
160  if (nPages <= 0)
161  nPages = nPageCount;
162  const size_t nLastPage = std::min<int>(nPageCount, nFirstPage + nPages) - 1;
163  for (size_t nPageIndex = nFirstPage; nPageIndex <= nLastPage; ++nPageIndex)
164  {
165  // Render next page.
166  FPDF_PAGE pPdfPage = FPDF_LoadPage(pPdfDocument, nPageIndex);
167  if (!pPdfPage)
168  break;
169 
170  // Returned unit is points, convert that to pixel.
171  const size_t nPageWidth = pointToPixel(FPDF_GetPageWidth(pPdfPage), fResolutionDPI);
172  const size_t nPageHeight = pointToPixel(FPDF_GetPageHeight(pPdfPage), fResolutionDPI);
173  FPDF_BITMAP pPdfBitmap = FPDFBitmap_Create(nPageWidth, nPageHeight, /*alpha=*/1);
174  if (!pPdfBitmap)
175  break;
176 
177  const FPDF_DWORD nColor = FPDFPage_HasTransparency(pPdfPage) ? 0x00000000 : 0xFFFFFFFF;
178  FPDFBitmap_FillRect(pPdfBitmap, 0, 0, nPageWidth, nPageHeight, nColor);
179  FPDF_RenderPageBitmap(pPdfBitmap, pPdfPage, /*start_x=*/0, /*start_y=*/0, nPageWidth,
180  nPageHeight, /*rotate=*/0, /*flags=*/0);
181 
182  // Save the buffer as a bitmap.
183  Bitmap aBitmap(Size(nPageWidth, nPageHeight), 24);
184  {
185  BitmapScopedWriteAccess pWriteAccess(aBitmap);
186  const auto pPdfBuffer = static_cast<ConstScanline>(FPDFBitmap_GetBuffer(pPdfBitmap));
187  const int nStride = FPDFBitmap_GetStride(pPdfBitmap);
188  for (size_t nRow = 0; nRow < nPageHeight; ++nRow)
189  {
190  ConstScanline pPdfLine = pPdfBuffer + (nStride * nRow);
191  // pdfium byte order is BGRA.
192  pWriteAccess->CopyScanline(nRow, pPdfLine, ScanlineFormat::N32BitTcBgra, nStride);
193  }
194  }
195 
196  rBitmaps.emplace_back(std::move(aBitmap));
197  FPDFBitmap_Destroy(pPdfBitmap);
198  FPDF_ClosePage(pPdfPage);
199  }
200 
201  FPDF_CloseDocument(pPdfDocument);
202 
203  return rBitmaps.size();
204 #else
205  (void)pBuffer;
206  (void)nSize;
207  (void)rBitmaps;
208  (void)nFirstPage;
209  (void)nPages;
210  (void)fResolutionDPI;
211  return 0;
212 #endif // HAVE_FEATURE_PDFIUM
213 }
214 
215 bool ImportPDF(SvStream& rStream, Graphic& rGraphic)
216 {
217  VectorGraphicDataArray aPdfDataArray = createVectorGraphicDataArray(rStream);
218  if (!aPdfDataArray.hasElements())
219  return false;
220 
221  auto aVectorGraphicDataPtr = std::make_shared<VectorGraphicData>(aPdfDataArray, OUString(),
223 
224  rGraphic = Graphic(aVectorGraphicDataPtr);
225  return true;
226 }
227 
228 size_t ImportPDFUnloaded(const OUString& rURL, std::vector<PDFGraphicResult>& rGraphics)
229 {
230 #if HAVE_FEATURE_PDFIUM
231  std::unique_ptr<SvStream> xStream(
232  ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE));
233 
234  // Save the original PDF stream for later use.
235  VectorGraphicDataArray aPdfDataArray = createVectorGraphicDataArray(*xStream);
236  if (!aPdfDataArray.hasElements())
237  return 0;
238 
239  // Prepare the link with the PDF stream.
240  const size_t nGraphicContentSize = aPdfDataArray.getLength();
241  std::unique_ptr<sal_uInt8[]> pGraphicContent(new sal_uInt8[nGraphicContentSize]);
242 
243  std::copy(aPdfDataArray.begin(), aPdfDataArray.end(), pGraphicContent.get());
244 
245  auto pGfxLink = std::make_shared<GfxLink>(std::move(pGraphicContent), nGraphicContentSize,
247 
248  auto pPdfium = vcl::pdf::PDFiumLibrary::get();
249 
250  // Load the buffer using pdfium.
251  auto pPdfDocument = pPdfium->openDocument(pGfxLink->GetData(), pGfxLink->GetDataSize());
252 
253  if (!pPdfDocument)
254  return 0;
255 
256  const int nPageCount = pPdfDocument->getPageCount();
257  if (nPageCount <= 0)
258  return 0;
259 
260  for (int nPageIndex = 0; nPageIndex < nPageCount; ++nPageIndex)
261  {
262  basegfx::B2DSize aPageSize = pPdfDocument->getPageSize(nPageIndex);
263  if (aPageSize.getX() <= 0.0 || aPageSize.getY() <= 0.0)
264  continue;
265 
266  // Returned unit is points, convert that to twip
267  // 1 pt = 20 twips
268  constexpr double pointToTwipconversionRatio = 20;
269 
270  long nPageWidth = convertTwipToMm100(aPageSize.getX() * pointToTwipconversionRatio);
271  long nPageHeight = convertTwipToMm100(aPageSize.getY() * pointToTwipconversionRatio);
272 
273  auto aVectorGraphicDataPtr = std::make_shared<VectorGraphicData>(
274  aPdfDataArray, OUString(), VectorGraphicDataType::Pdf, nPageIndex);
275 
276  // Create the Graphic with the VectorGraphicDataPtr and link the original PDF stream.
277  // We swap out this Graphic as soon as possible, and a later swap in
278  // actually renders the correct Bitmap on demand.
279  Graphic aGraphic(aVectorGraphicDataPtr);
280  aGraphic.SetGfxLink(pGfxLink);
281 
282  auto pPage = pPdfDocument->openPage(nPageIndex);
283 
284  std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations;
285  for (int nAnnotation = 0; nAnnotation < pPage->getAnnotationCount(); nAnnotation++)
286  {
287  auto pAnnotation = pPage->getAnnotation(nAnnotation);
288  if (pAnnotation && pAnnotation->getSubType() == 1 /*FPDF_ANNOT_TEXT*/
289  && pAnnotation->hasKey(vcl::pdf::constDictionaryKeyPopup))
290  {
291  OUString sAuthor = pAnnotation->getString(vcl::pdf::constDictionaryKeyTitle);
292  OUString sText = pAnnotation->getString(vcl::pdf::constDictionaryKeyContents);
293  auto pPopupAnnotation = pAnnotation->getLinked(vcl::pdf::constDictionaryKeyPopup);
294 
295  basegfx::B2DRectangle rRectangle = pAnnotation->getRectangle();
296  basegfx::B2DRectangle rRectangleHMM(
297  convertPointToMm100(rRectangle.getMinX()),
298  convertPointToMm100(aPageSize.getY() - rRectangle.getMinY()),
299  convertPointToMm100(rRectangle.getMaxX()),
300  convertPointToMm100(aPageSize.getY() - rRectangle.getMaxY()));
301 
302  OUString sDateTimeString
303  = pAnnotation->getString(vcl::pdf::constDictionaryKeyModificationDate);
304  OUString sISO8601String = vcl::pdf::convertPdfDateToISO8601(sDateTimeString);
305 
306  css::util::DateTime aDateTime;
307  if (!sISO8601String.isEmpty())
308  {
309  utl::ISO8601parseDateTime(sISO8601String, aDateTime);
310  }
311 
312  PDFGraphicAnnotation aPDFGraphicAnnotation;
313  aPDFGraphicAnnotation.maRectangle = rRectangleHMM;
314  aPDFGraphicAnnotation.maAuthor = sAuthor;
315  aPDFGraphicAnnotation.maText = sText;
316  aPDFGraphicAnnotation.maDateTime = aDateTime;
317  aPDFGraphicAnnotations.push_back(aPDFGraphicAnnotation);
318  }
319  }
320 
321  rGraphics.emplace_back(std::move(aGraphic), Size(nPageWidth, nPageHeight),
322  aPDFGraphicAnnotations);
323  }
324 
325  return rGraphics.size();
326 #else
327  (void)rURL;
328  (void)rGraphics;
329  return 0;
330 #endif // HAVE_FEATURE_PDFIUM
331 }
332 }
333 
334 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
sal_uIntPtr sal_uLong
double getX() const
#define STREAM_SEEK_TO_END
sal_uInt64 Seek(sal_uInt64 nPos)
double getY() const
basegfx::B2DRectangle maRectangle
Definition: pdfread.hxx:41
double getMaxX() const
virtual sal_uInt64 TellEnd() override
ErrCode GetError() const
Reference< XInputStream > xStream
static std::unique_ptr< SvStream > CreateStream(const OUString &rFileName, StreamMode eOpenMode, css::uno::Reference< css::awt::XWindow > xParentWin=nullptr)
css::util::DateTime maDateTime
Definition: pdfread.hxx:42
double getMaxY() const
static sal_Int32 pointToPixel(double pt)
constexpr sal_Int64 convertPointToMm100(sal_Int64 nNumber)
#define STREAM_SEEK_TO_BEGIN
sal_uInt64 GetSize()
std::size_t WriteBytes(const void *pData, std::size_t nSize)
SvStream & WriteStream(SvStream &rStream)
const sal_uInt8 * ConstScanline
Definition: Scanline.hxx:26
std::size_t ReadBytes(void *pData, std::size_t nSize)
css::uno::Sequence< sal_Int8 > VectorGraphicDataArray
double getMinY() const
constexpr sal_Int64 convertTwipToMm100(sal_Int64 n)
unsigned char sal_uInt8
size_t ImportPDFUnloaded(const OUString &rURL, std::vector< PDFGraphicResult > &rGraphics)
Import PDF as Graphic images (1 per page), but not loaded yet.
Definition: pdfread.cxx:228
bool good() const
void CopyScanline(long nY, const BitmapReadAccess &rReadAcc)
Definition: bmpacc.cxx:334
double getMinX() const
size_t RenderPDFBitmaps(const void *pBuffer, int nSize, std::vector< Bitmap > &rBitmaps, const size_t nFirstPage, int nPages, const double fResolutionDPI)
Fills the rBitmaps vector with rendered pages.
Definition: pdfread.cxx:148
bool ISO8601parseDateTime(const OUString &rString, css::util::DateTime &rDateTime)
bool ImportPDF(SvStream &rStream, Graphic &rGraphic)
Imports a PDF stream into rGraphic as VectorGraphicData.
Definition: pdfread.cxx:215
const void * GetData()
typedef void(CALLTYPE *GetFuncDataPtr)(sal_uInt16 &nNo