LibreOffice Module sdext (master)  1
wrapper.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <config_folders.h>
21 
22 #include <contentsink.hxx>
23 #include <pdfparse.hxx>
24 #include <pdfihelper.hxx>
25 #include <wrapper.hxx>
26 
27 #include <o3tl/string_view.hxx>
28 #include <osl/file.h>
29 #include <osl/file.hxx>
30 #include <osl/thread.h>
31 #include <osl/process.h>
32 #include <osl/diagnose.h>
33 #include <rtl/bootstrap.hxx>
34 #include <rtl/ustring.hxx>
35 #include <rtl/ustrbuf.hxx>
36 #include <rtl/strbuf.hxx>
37 #include <sal/log.hxx>
38 
40 #include <com/sun/star/io/XInputStream.hpp>
41 #include <com/sun/star/uno/XComponentContext.hpp>
42 #include <com/sun/star/awt/FontDescriptor.hpp>
43 #include <com/sun/star/beans/XMaterialHolder.hpp>
44 #include <com/sun/star/rendering/PathCapType.hpp>
45 #include <com/sun/star/rendering/PathJoinType.hpp>
46 #include <com/sun/star/rendering/XPolyPolygon2D.hpp>
47 #include <com/sun/star/geometry/Matrix2D.hpp>
48 #include <com/sun/star/geometry/AffineMatrix2D.hpp>
49 #include <com/sun/star/geometry/RealRectangle2D.hpp>
50 #include <com/sun/star/geometry/RealSize2D.hpp>
51 #include <com/sun/star/task/XInteractionHandler.hpp>
52 #include <tools/diagnose_ex.h>
53 
58 
59 #include <vcl/metric.hxx>
60 #include <vcl/font.hxx>
61 #include <vcl/virdev.hxx>
62 
63 #include <cstddef>
64 #include <memory>
65 #include <string_view>
66 #include <unordered_map>
67 #include <string.h>
68 #include <stdlib.h>
69 
70 #include <rtl/character.hxx>
71 
72 using namespace com::sun::star;
73 
74 namespace pdfi
75 {
76 
77 namespace
78 {
79 
80 // identifier of the strings coming from the out-of-process xpdf
81 // converter
82 enum parseKey {
83  CLIPPATH,
84  DRAWCHAR,
85  DRAWIMAGE,
86  DRAWLINK,
87  DRAWMASK,
88  DRAWMASKEDIMAGE,
89  DRAWSOFTMASKEDIMAGE,
90  ENDPAGE,
91  ENDTEXTOBJECT,
92  EOCLIPPATH,
93  EOFILLPATH,
94  FILLPATH,
95  HYPERLINK,
96  INTERSECTCLIP,
97  INTERSECTEOCLIP,
98  POPSTATE,
99  PUSHSTATE,
100  RESTORESTATE,
101  SAVESTATE,
102  SETBLENDMODE,
103  SETFILLCOLOR,
104  SETFONT,
105  SETLINECAP,
106  SETLINEDASH,
107  SETLINEJOIN,
108  SETLINEWIDTH,
109  SETMITERLIMIT,
110  SETPAGENUM,
111  SETSTROKECOLOR,
112  SETTEXTRENDERMODE,
113  SETTRANSFORMATION,
114  STARTPAGE,
115  STROKEPATH,
116  UPDATEBLENDMODE,
117  UPDATECTM,
118  UPDATEFILLCOLOR,
119  UPDATEFILLOPACITY,
120  UPDATEFLATNESS,
121  UPDATEFONT,
122  UPDATELINECAP,
123  UPDATELINEDASH,
124  UPDATELINEJOIN,
125  UPDATELINEWIDTH,
126  UPDATEMITERLIMIT,
127  UPDATESTROKECOLOR,
128  UPDATESTROKEOPACITY,
129  NONE
130 };
131 
132 #if defined _MSC_VER && defined __clang__
133 #pragma clang diagnostic push
134 #pragma clang diagnostic ignored "-Wdeprecated-register"
135 #pragma clang diagnostic ignored "-Wextra-tokens"
136 #endif
137 #include <hash.cxx>
138 #if defined _MSC_VER && defined __clang__
139 #pragma clang diagnostic pop
140 #endif
141 
142 class Parser
143 {
144  friend class LineParser;
145 
146  typedef std::unordered_map< sal_Int64,
147  FontAttributes > FontMapType;
148 
150  const uno::Reference<uno::XComponentContext> m_xContext;
152  const oslFileHandle m_pErr;
153  FontMapType m_aFontMap;
154 
155 public:
156  Parser( const ContentSinkSharedPtr& rSink,
157  oslFileHandle pErr,
158  const uno::Reference<uno::XComponentContext>& xContext ) :
159  m_xContext(xContext),
160  m_pSink(rSink),
161  m_pErr(pErr),
162  m_aFontMap(101)
163  {}
164 
165  void parseLine( const OString& rLine );
166 };
167 
168 class LineParser {
169  Parser & m_parser;
170  OString m_aLine;
171 
172  static void parseFontFamilyName( FontAttributes& aResult );
173  void readInt32( sal_Int32& o_Value );
174  void readInt64( sal_Int64& o_Value );
175  void readDouble( double& o_Value );
176  void readBinaryData( uno::Sequence<sal_Int8>& rBuf );
177 
178  uno::Sequence<beans::PropertyValue> readImageImpl();
179 
180 public:
181  std::size_t m_nCharIndex = 0;
182 
183  LineParser(Parser & parser, OString const & line): m_parser(parser), m_aLine(line) {}
184 
185  std::string_view readNextToken();
186  sal_Int32 readInt32();
187  double readDouble();
188 
189  uno::Reference<rendering::XPolyPolygon2D> readPath();
190 
191  void readChar();
192  void readLineCap();
193  void readLineDash();
194  void readLineJoin();
195  void readTransformation();
196  rendering::ARGBColor readColor();
197  void readFont();
198 
199  void readImage();
200  void readMask();
201  void readLink();
202  void readMaskedImage();
203  void readSoftMaskedImage();
204 };
205 
210 OString lcl_unescapeLineFeeds(std::string_view i_rStr)
211 {
212  const size_t nOrigLen(i_rStr.size());
213  const char* const pOrig(i_rStr.data());
214  std::unique_ptr<char[]> pBuffer(new char[nOrigLen + 1]);
215 
216  const char* pRead(pOrig);
217  char* pWrite(pBuffer.get());
218  const char* pCur(pOrig);
219  while ((pCur = strchr(pCur, '\\')) != nullptr)
220  {
221  const char cNext(pCur[1]);
222  if (cNext == 'n' || cNext == 'r' || cNext == '\\')
223  {
224  const size_t nLen(pCur - pRead);
225  strncpy(pWrite, pRead, nLen);
226  pWrite += nLen;
227  *pWrite = cNext == 'n' ? '\n' : (cNext == 'r' ? '\r' : '\\');
228  ++pWrite;
229  pCur = pRead = pCur + 2;
230  }
231  else
232  {
233  // Just continue on the next character. The current
234  // block will be copied the next time it goes through the
235  // 'if' branch.
236  ++pCur;
237  }
238  }
239  // maybe there are some data to copy yet
240  if (sal::static_int_cast<size_t>(pRead - pOrig) < nOrigLen)
241  {
242  const size_t nLen(nOrigLen - (pRead - pOrig));
243  strncpy(pWrite, pRead, nLen);
244  pWrite += nLen;
245  }
246  *pWrite = '\0';
247 
248  OString aResult(pBuffer.get());
249  return aResult;
250 }
251 
252 std::string_view LineParser::readNextToken()
253 {
254  if (m_nCharIndex == std::string_view::npos) {
255  SAL_WARN("sdext.pdfimport", "insufficient input");
256  return {};
257  }
258  return o3tl::getToken(m_aLine,' ',m_nCharIndex);
259 }
260 
261 void LineParser::readInt32( sal_Int32& o_Value )
262 {
263  std::string_view tok = readNextToken();
264  sal_Int64 n = rtl_str_toInt64_WithLength(tok.data(), 10, tok.size());
265  if (n < SAL_MIN_INT32 || n > SAL_MAX_INT32)
266  n = 0;
267  o_Value = n;
268 }
269 
270 sal_Int32 LineParser::readInt32()
271 {
272  std::string_view tok = readNextToken();
273  sal_Int64 n =rtl_str_toInt64_WithLength(tok.data(), 10, tok.size());
274  if (n < SAL_MIN_INT32 || n > SAL_MAX_INT32)
275  n = 0;
276  return n;
277 }
278 
279 void LineParser::readInt64( sal_Int64& o_Value )
280 {
281  std::string_view tok = readNextToken();
282  o_Value = rtl_str_toInt64_WithLength(tok.data(), 10, tok.size());
283 }
284 
285 void LineParser::readDouble( double& o_Value )
286 {
287  std::string_view tok = readNextToken();
288  o_Value = rtl_math_stringToDouble(tok.data(), tok.data() + tok.size(), '.', 0,
289  nullptr, nullptr);
290 }
291 
292 double LineParser::readDouble()
293 {
294  std::string_view tok = readNextToken();
295  return rtl_math_stringToDouble(tok.data(), tok.data() + tok.size(), '.', 0,
296  nullptr, nullptr);
297 }
298 
299 void LineParser::readBinaryData( uno::Sequence<sal_Int8>& rBuf )
300 {
301  sal_Int32 nFileLen( rBuf.getLength() );
302  sal_Int8* pBuf( rBuf.getArray() );
303  sal_uInt64 nBytesRead(0);
304  oslFileError nRes=osl_File_E_None;
305  while( nFileLen )
306  {
307  nRes = osl_readFile( m_parser.m_pErr, pBuf, nFileLen, &nBytesRead );
308  if (osl_File_E_None != nRes )
309  break;
310  pBuf += nBytesRead;
311  nFileLen -= sal::static_int_cast<sal_Int32>(nBytesRead);
312  }
313 
314  OSL_PRECOND(nRes==osl_File_E_None, "inconsistent data");
315 }
316 
317 uno::Reference<rendering::XPolyPolygon2D> LineParser::readPath()
318 {
319  static const std::string_view aSubPathMarker( "subpath" );
320 
321  if( readNextToken() != aSubPathMarker )
322  OSL_PRECOND(false, "broken path");
323 
324  basegfx::B2DPolyPolygon aResult;
325  while( m_nCharIndex != std::string_view::npos )
326  {
327  basegfx::B2DPolygon aSubPath;
328 
329  sal_Int32 nClosedFlag;
330  readInt32( nClosedFlag );
331  aSubPath.setClosed( nClosedFlag != 0 );
332 
333  sal_Int32 nContiguousControlPoints(0);
334 
335  while( m_nCharIndex != std::string_view::npos )
336  {
337  std::size_t nDummy=m_nCharIndex;
338  if (o3tl::getToken(m_aLine,' ',nDummy) == aSubPathMarker) {
339  break;
340  }
341 
342  sal_Int32 nCurveFlag;
343  double nX, nY;
344  readDouble( nX );
345  readDouble( nY );
346  readInt32( nCurveFlag );
347 
348  aSubPath.append(basegfx::B2DPoint(nX,nY));
349  if( nCurveFlag )
350  {
351  ++nContiguousControlPoints;
352  }
353  else if( nContiguousControlPoints )
354  {
355  OSL_PRECOND(nContiguousControlPoints==2,"broken bezier path");
356 
357  // have two control points before us. the current one
358  // is a normal point - thus, convert previous points
359  // into bezier segment
360  const sal_uInt32 nPoints( aSubPath.count() );
361  const basegfx::B2DPoint aCtrlA( aSubPath.getB2DPoint(nPoints-3) );
362  const basegfx::B2DPoint aCtrlB( aSubPath.getB2DPoint(nPoints-2) );
363  const basegfx::B2DPoint aEnd( aSubPath.getB2DPoint(nPoints-1) );
364  aSubPath.remove(nPoints-3, 3);
365  aSubPath.appendBezierSegment(aCtrlA, aCtrlB, aEnd);
366 
367  nContiguousControlPoints=0;
368  }
369  }
370 
371  aResult.append( aSubPath );
372  if( m_nCharIndex != std::string_view::npos )
373  readNextToken();
374  }
375 
376  return static_cast<rendering::XLinePolyPolygon2D*>(
377  new basegfx::unotools::UnoPolyPolygon(aResult));
378 }
379 
380 void LineParser::readChar()
381 {
382  double fontSize;
383  geometry::Matrix2D aUnoMatrix;
384  geometry::RealRectangle2D aRect;
385 
386  readDouble(aRect.X1);
387  readDouble(aRect.Y1);
388  readDouble(aRect.X2);
389  readDouble(aRect.Y2);
390  readDouble(aUnoMatrix.m00);
391  readDouble(aUnoMatrix.m01);
392  readDouble(aUnoMatrix.m10);
393  readDouble(aUnoMatrix.m11);
394  readDouble(fontSize);
395 
396  OString aChars;
397 
398  if (m_nCharIndex != std::string_view::npos)
399  aChars = lcl_unescapeLineFeeds( m_aLine.subView( m_nCharIndex ) );
400 
401  // chars gobble up rest of line
402  m_nCharIndex = std::string_view::npos;
403 
404  m_parser.m_pSink->drawGlyphs(OStringToOUString(aChars, RTL_TEXTENCODING_UTF8),
405  aRect, aUnoMatrix, fontSize);
406 }
407 
408 void LineParser::readLineCap()
409 {
410  sal_Int8 nCap(rendering::PathCapType::BUTT);
411  switch( readInt32() )
412  {
413  default:
414  case 0: nCap = rendering::PathCapType::BUTT; break;
415  case 1: nCap = rendering::PathCapType::ROUND; break;
416  case 2: nCap = rendering::PathCapType::SQUARE; break;
417  }
418  m_parser.m_pSink->setLineCap(nCap);
419 }
420 
421 void LineParser::readLineDash()
422 {
423  if( m_nCharIndex == std::string_view::npos )
424  {
425  m_parser.m_pSink->setLineDash( uno::Sequence<double>(), 0.0 );
426  return;
427  }
428 
429  const double nOffset(readDouble());
430  const sal_Int32 nLen(readInt32());
431 
432  uno::Sequence<double> aDashArray(nLen);
433  double* pArray=aDashArray.getArray();
434  for( sal_Int32 i=0; i<nLen; ++i )
435  *pArray++ = readDouble();
436 
437  m_parser.m_pSink->setLineDash( aDashArray, nOffset );
438 }
439 
440 void LineParser::readLineJoin()
441 {
442  sal_Int8 nJoin(rendering::PathJoinType::MITER);
443  switch( readInt32() )
444  {
445  default:
446  case 0: nJoin = rendering::PathJoinType::MITER; break;
447  case 1: nJoin = rendering::PathJoinType::ROUND; break;
448  case 2: nJoin = rendering::PathJoinType::BEVEL; break;
449  }
450  m_parser.m_pSink->setLineJoin(nJoin);
451 }
452 
453 void LineParser::readTransformation()
454 {
455  geometry::AffineMatrix2D aMat;
456  readDouble(aMat.m00);
457  readDouble(aMat.m10);
458  readDouble(aMat.m01);
459  readDouble(aMat.m11);
460  readDouble(aMat.m02);
461  readDouble(aMat.m12);
462  m_parser.m_pSink->setTransformation( aMat );
463 }
464 
465 rendering::ARGBColor LineParser::readColor()
466 {
467  rendering::ARGBColor aRes;
468  readDouble(aRes.Red);
469  readDouble(aRes.Green);
470  readDouble(aRes.Blue);
471  readDouble(aRes.Alpha);
472  return aRes;
473 }
474 
475 /* Parse and convert the font family name (passed from xpdfimport) to correct font names
476 e.g. TimesNewRomanPSMT -> TimesNewRoman
477  TimesNewRomanPS-BoldMT -> TimesNewRoman
478  TimesNewRomanPS-BoldItalicMT -> TimesNewRoman
479 During the conversion, also apply the font features (bold italic etc) to the result.
480 
481 TODO: Further convert the font names to real font names in the system rather than the PS names.
482 e.g., TimesNewRoman -> Times New Roman
483 */
484 void LineParser::parseFontFamilyName( FontAttributes& rResult )
485 {
486  SAL_INFO("sdext.pdfimport", "Processing " << rResult.familyName << " ---");
487  rResult.familyName = rResult.familyName.trim();
488  for (const OUString& fontAttributesSuffix: fontAttributesSuffixes)
489  {
490  if ( rResult.familyName.endsWith(fontAttributesSuffix) )
491  {
492  rResult.familyName = rResult.familyName.replaceAll(fontAttributesSuffix, "");
493  SAL_INFO("sdext.pdfimport", rResult.familyName);
494  if (fontAttributesSuffix == "Bold")
495  {
496  rResult.isBold = true;
497  } else if ( (fontAttributesSuffix == "Italic") or (fontAttributesSuffix == "Oblique") )
498  {
499  rResult.isItalic = true;
500  }
501  }
502  }
503 }
504 
505 void LineParser::readFont()
506 {
507  /*
508  xpdf line is like (separated by space):
509  updateFont <FontID> <isEmbedded> <isBold> <isItalic> <isUnderline> <TransformedFontSize> <nEmbedSize> <FontName>
510  updateFont 14 1 0 0 0 1200.000000 23068 TimesNewRomanPSMT
511 
512  If nEmbedSize > 0, then a fontFile is followed as a stream.
513  */
514 
515  OString aFontName;
516  sal_Int64 nFontID;
517  sal_Int32 nIsEmbedded, nIsBold, nIsItalic, nIsUnderline, nFileLen;
518  double nSize;
519 
520  readInt64(nFontID); // read FontID
521  readInt32(nIsEmbedded); // read isEmbedded
522  readInt32(nIsBold); // read isBold
523  readInt32(nIsItalic); // read isItalic
524  readInt32(nIsUnderline);// read isUnderline
525  readDouble(nSize); // read TransformedFontSize
526  readInt32(nFileLen); // read nEmbedSize
527 
528  nSize = nSize < 0.0 ? -nSize : nSize;
529  // Read FontName. From the current position to the end (any white spaces will be included).
530  aFontName = lcl_unescapeLineFeeds(m_aLine.subView(m_nCharIndex));
531 
532  // name gobbles up rest of line
533  m_nCharIndex = std::string_view::npos;
534 
535  // Check if this font is already in our font map list.
536  // If yes, update the font size and skip.
537  Parser::FontMapType::const_iterator pFont( m_parser.m_aFontMap.find(nFontID) );
538  if( pFont != m_parser.m_aFontMap.end() )
539  {
540  OSL_PRECOND(nFileLen==0,"font data for known font");
541  FontAttributes aRes(pFont->second);
542  aRes.size = nSize;
543  m_parser.m_pSink->setFont( aRes );
544 
545  return;
546  }
547 
548  // yet unknown font - get info and add to map
549  FontAttributes aResult( OStringToOUString( aFontName, RTL_TEXTENCODING_UTF8 ),
550  nIsBold != 0,
551  nIsItalic != 0,
552  nIsUnderline != 0,
553  nSize,
554  1.0);
555 
556  /* The above font attributes (fontName, bold, italic) are based on
557  xpdf line output and may not be reliable. To get correct attributes,
558  we do the following:
559  1. Read the embedded font file and determine the attributes based on the
560  font file.
561  2. If we failed to read the font file, or empty result is returned, then
562  determine the font attributes from the font name.
563  3. If all these attempts have failed, then use a fallback font.
564  */
565  if (nFileLen > 0)
566  {
567  uno::Sequence<sal_Int8> aFontFile(nFileLen);
568  readBinaryData(aFontFile); // Read fontFile.
569 
570  uno::Sequence<uno::Any> aArgs(1);
571  awt::FontDescriptor aFontDescriptor;
572  aArgs[0] <<= aFontFile;
573 
574  try
575  {
576  uno::Reference<beans::XMaterialHolder> xHolder(
577  m_parser.m_xContext->getServiceManager()->createInstanceWithArgumentsAndContext(
578  "com.sun.star.awt.FontIdentificator", aArgs, m_parser.m_xContext),
579  uno::UNO_QUERY);
580  if (xHolder.is())
581  {
582  uno::Any aFontReadResult(xHolder->getMaterial());
583  aFontReadResult >>= aFontDescriptor;
584  if (!aFontDescriptor.Name.isEmpty())
585  {
586  aResult.familyName = aFontDescriptor.Name;
587  // tdf#143959: there are cases when the family name returned by font descriptor
588  // is like "AAAAAA+TimesNewRoman,Bold". In this case, use the font name
589  // determined by parseFontFamilyName instead, but still determine the font
590  // attributes (bold italic etc) from the font descriptor.
591  if (aResult.familyName.getLength() > 7 and aResult.familyName.indexOf(u"+", 6) == 6)
592  {
593  aResult.familyName = aResult.familyName.copy(7, aResult.familyName.getLength() - 7);
594  parseFontFamilyName(aResult);
595  }
596  aResult.isBold = (aFontDescriptor.Weight > 100.0);
597  aResult.isItalic = (aFontDescriptor.Slant == awt::FontSlant_OBLIQUE ||
598  aFontDescriptor.Slant == awt::FontSlant_ITALIC);
599  } else
600  {
601  SAL_WARN("sdext.pdfimport",
602  "Font detection from fontFile returned empty result.\
603  Guessing font info from font name.");
604  parseFontFamilyName(aResult);
605  }
606  } else
607  {
608  SAL_WARN("sdext.pdfimport",
609  "Failed to run FontIdentificator service.\
610  Guessing font info from font name.");
611  parseFontFamilyName(aResult);
612  }
613  } catch (uno::Exception&)
614  {
615  TOOLS_WARN_EXCEPTION("sdext.pdfimport", "Exception when trying to read font file.");
616  parseFontFamilyName(aResult);
617  }
618  } else
619  parseFontFamilyName(aResult);
620 
621  // last fallback
622  if (aResult.familyName.isEmpty())
623  {
624  SAL_WARN("sdext.pdfimport", "Failed to determine the font, using a fallback font Arial.");
625  aResult.familyName = "Arial";
626  }
627 
628  if (!m_parser.m_xDev)
629  m_parser.m_xDev.disposeAndReset(VclPtr<VirtualDevice>::Create());
630 
631  vcl::Font font(aResult.familyName, Size(0, 1000));
632  m_parser.m_xDev->SetFont(font);
633  FontMetric metric(m_parser.m_xDev->GetFontMetric());
634  aResult.ascent = metric.GetAscent() / 1000.0;
635 
636  m_parser.m_aFontMap[nFontID] = aResult;
637 
638  aResult.size = nSize;
639  m_parser.m_pSink->setFont(aResult);
640 }
641 
642 uno::Sequence<beans::PropertyValue> LineParser::readImageImpl()
643 {
644  std::string_view aToken = readNextToken();
645  const sal_Int32 nImageSize( readInt32() );
646 
647  OUString aFileName;
648  if( aToken == "PNG" )
649  aFileName = "DUMMY.PNG";
650  else if( aToken == "JPEG" )
651  aFileName = "DUMMY.JPEG";
652  else if( aToken == "PBM" )
653  aFileName = "DUMMY.PBM";
654  else
655  {
656  SAL_WARN_IF(aToken != "PPM","sdext.pdfimport","Invalid bitmap format");
657  aFileName = "DUMMY.PPM";
658  }
659 
660  uno::Sequence<sal_Int8> aDataSequence(nImageSize);
661  readBinaryData( aDataSequence );
662 
663  uno::Sequence< uno::Any > aStreamCreationArgs(1);
664  aStreamCreationArgs[0] <<= aDataSequence;
665 
666  uno::Reference< uno::XComponentContext > xContext( m_parser.m_xContext, uno::UNO_SET_THROW );
667  uno::Reference< lang::XMultiComponentFactory > xFactory( xContext->getServiceManager(), uno::UNO_SET_THROW );
668  uno::Reference< io::XInputStream > xDataStream(
669  xFactory->createInstanceWithArgumentsAndContext( "com.sun.star.io.SequenceInputStream", aStreamCreationArgs, m_parser.m_xContext ),
670  uno::UNO_QUERY_THROW );
671 
672  uno::Sequence<beans::PropertyValue> aSequence( comphelper::InitPropertySequence({
673  { "URL", uno::makeAny(aFileName) },
674  { "InputStream", uno::makeAny( xDataStream ) },
675  { "InputSequence", uno::makeAny(aDataSequence) }
676  }));
677 
678  return aSequence;
679 }
680 
681 void LineParser::readImage()
682 {
683  sal_Int32 nWidth, nHeight,nMaskColors;
684  readInt32(nWidth);
685  readInt32(nHeight);
686  readInt32(nMaskColors);
687 
688  uno::Sequence<beans::PropertyValue> aImg( readImageImpl() );
689 
690  if( nMaskColors )
691  {
692  uno::Sequence<sal_Int8> aDataSequence(nMaskColors);
693  readBinaryData( aDataSequence );
694 
695  uno::Sequence<uno::Any> aMaskRanges(2);
696 
697  uno::Sequence<double> aMinRange(nMaskColors/2);
698  uno::Sequence<double> aMaxRange(nMaskColors/2);
699  for( sal_Int32 i=0; i<nMaskColors/2; ++i )
700  {
701  aMinRange[i] = aDataSequence[i] / 255.0;
702  aMaxRange[i] = aDataSequence[i+nMaskColors/2] / 255.0;
703  }
704 
705  aMaskRanges[0] <<= aMinRange;
706  aMaskRanges[1] <<= aMaxRange;
707 
708  m_parser.m_pSink->drawColorMaskedImage( aImg, aMaskRanges );
709  }
710  else
711  m_parser.m_pSink->drawImage( aImg );
712 }
713 
714 void LineParser::readMask()
715 {
716  sal_Int32 nWidth, nHeight, nInvert;
717  readInt32(nWidth);
718  readInt32(nHeight);
719  readInt32(nInvert);
720 
721  m_parser.m_pSink->drawMask( readImageImpl(), nInvert != 0);
722 }
723 
724 void LineParser::readLink()
725 {
726  geometry::RealRectangle2D aBounds;
727  readDouble(aBounds.X1);
728  readDouble(aBounds.Y1);
729  readDouble(aBounds.X2);
730  readDouble(aBounds.Y2);
731 
732  m_parser.m_pSink->hyperLink( aBounds,
733  OStringToOUString( lcl_unescapeLineFeeds(
734  m_aLine.subView(m_nCharIndex) ),
735  RTL_TEXTENCODING_UTF8 ) );
736  // name gobbles up rest of line
737  m_nCharIndex = std::string_view::npos;
738 }
739 
740 void LineParser::readMaskedImage()
741 {
742  sal_Int32 nWidth, nHeight, nMaskWidth, nMaskHeight, nMaskInvert;
743  readInt32(nWidth);
744  readInt32(nHeight);
745  readInt32(nMaskWidth);
746  readInt32(nMaskHeight);
747  readInt32(nMaskInvert);
748 
749  const uno::Sequence<beans::PropertyValue> aImage( readImageImpl() );
750  const uno::Sequence<beans::PropertyValue> aMask ( readImageImpl() );
751  m_parser.m_pSink->drawMaskedImage( aImage, aMask, nMaskInvert != 0 );
752 }
753 
754 void LineParser::readSoftMaskedImage()
755 {
756  sal_Int32 nWidth, nHeight, nMaskWidth, nMaskHeight;
757  readInt32(nWidth);
758  readInt32(nHeight);
759  readInt32(nMaskWidth);
760  readInt32(nMaskHeight);
761 
762  const uno::Sequence<beans::PropertyValue> aImage( readImageImpl() );
763  const uno::Sequence<beans::PropertyValue> aMask ( readImageImpl() );
764  m_parser.m_pSink->drawAlphaMaskedImage( aImage, aMask );
765 }
766 
767 void Parser::parseLine( const OString& rLine )
768 {
769  OSL_PRECOND( m_pSink, "Invalid sink" );
770  OSL_PRECOND( m_pErr, "Invalid filehandle" );
771  OSL_PRECOND( m_xContext.is(), "Invalid service factory" );
772 
773  LineParser lp(*this, rLine);
774  const std::string_view rCmd = lp.readNextToken();
775  const hash_entry* pEntry = PdfKeywordHash::in_word_set( rCmd.data(),
776  rCmd.size() );
777  OSL_ASSERT(pEntry);
778  switch( pEntry->eKey )
779  {
780  case CLIPPATH:
781  m_pSink->intersectClip(lp.readPath()); break;
782  case DRAWCHAR:
783  lp.readChar(); break;
784  case DRAWIMAGE:
785  lp.readImage(); break;
786  case DRAWLINK:
787  lp.readLink(); break;
788  case DRAWMASK:
789  lp.readMask(); break;
790  case DRAWMASKEDIMAGE:
791  lp.readMaskedImage(); break;
792  case DRAWSOFTMASKEDIMAGE:
793  lp.readSoftMaskedImage(); break;
794  case ENDPAGE:
795  m_pSink->endPage(); break;
796  case ENDTEXTOBJECT:
797  m_pSink->endText(); break;
798  case EOCLIPPATH:
799  m_pSink->intersectEoClip(lp.readPath()); break;
800  case EOFILLPATH:
801  m_pSink->eoFillPath(lp.readPath()); break;
802  case FILLPATH:
803  m_pSink->fillPath(lp.readPath()); break;
804  case RESTORESTATE:
805  m_pSink->popState(); break;
806  case SAVESTATE:
807  m_pSink->pushState(); break;
808  case SETPAGENUM:
809  m_pSink->setPageNum( lp.readInt32() ); break;
810  case STARTPAGE:
811  {
812  const double nWidth ( lp.readDouble() );
813  const double nHeight( lp.readDouble() );
814  m_pSink->startPage( geometry::RealSize2D( nWidth, nHeight ) );
815  break;
816  }
817  case STROKEPATH:
818  m_pSink->strokePath(lp.readPath()); break;
819  case UPDATECTM:
820  lp.readTransformation(); break;
821  case UPDATEFILLCOLOR:
822  m_pSink->setFillColor( lp.readColor() ); break;
823  case UPDATEFLATNESS:
824  m_pSink->setFlatness( lp.readDouble( ) ); break;
825  case UPDATEFONT:
826  lp.readFont(); break;
827  case UPDATELINECAP:
828  lp.readLineCap(); break;
829  case UPDATELINEDASH:
830  lp.readLineDash(); break;
831  case UPDATELINEJOIN:
832  lp.readLineJoin(); break;
833  case UPDATELINEWIDTH:
834  m_pSink->setLineWidth( lp.readDouble() );break;
835  case UPDATEMITERLIMIT:
836  m_pSink->setMiterLimit( lp.readDouble() ); break;
837  case UPDATESTROKECOLOR:
838  m_pSink->setStrokeColor( lp.readColor() ); break;
839  case UPDATESTROKEOPACITY:
840  break;
841  case SETTEXTRENDERMODE:
842  m_pSink->setTextRenderMode( lp.readInt32() ); break;
843 
844  case NONE:
845  default:
846  OSL_PRECOND(false,"Unknown input");
847  break;
848  }
849 
850  // all consumed?
851  SAL_WARN_IF(
852  lp.m_nCharIndex!=std::string_view::npos, "sdext.pdfimport", "leftover scanner input");
853 }
854 
855 } // namespace
856 
857 static bool checkEncryption( std::u16string_view i_rPath,
858  const uno::Reference< task::XInteractionHandler >& i_xIHdl,
859  OUString& io_rPwd,
860  bool& o_rIsEncrypted,
861  const OUString& i_rDocName
862  )
863 {
864  bool bSuccess = false;
865  OString aPDFFile = OUStringToOString( i_rPath, osl_getThreadTextEncoding() );
866 
867  std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( aPDFFile.getStr() ));
868  if( pEntry )
869  {
870  pdfparse::PDFFile* pPDFFile = dynamic_cast<pdfparse::PDFFile*>(pEntry.get());
871  if( pPDFFile )
872  {
873  o_rIsEncrypted = pPDFFile->isEncrypted();
874  if( o_rIsEncrypted )
875  {
876  if( pPDFFile->usesSupportedEncryptionFormat() )
877  {
878  bool bAuthenticated = false;
879  if( !io_rPwd.isEmpty() )
880  {
881  OString aIsoPwd = OUStringToOString( io_rPwd,
882  RTL_TEXTENCODING_ISO_8859_1 );
883  bAuthenticated = pPDFFile->setupDecryptionData( aIsoPwd.getStr() );
884  }
885  if( bAuthenticated )
886  bSuccess = true;
887  else
888  {
889  if( i_xIHdl.is() )
890  {
891  bool bEntered = false;
892  do
893  {
894  bEntered = getPassword( i_xIHdl, io_rPwd, ! bEntered, i_rDocName );
895  OString aIsoPwd = OUStringToOString( io_rPwd,
896  RTL_TEXTENCODING_ISO_8859_1 );
897  bAuthenticated = pPDFFile->setupDecryptionData( aIsoPwd.getStr() );
898  } while( bEntered && ! bAuthenticated );
899  }
900 
901  bSuccess = bAuthenticated;
902  }
903  }
904  else if( i_xIHdl.is() )
905  {
907  //TODO: this should either be handled further down the
908  // call stack, or else information that this has already
909  // been handled should be passed down the call stack, so
910  // that SfxBaseModel::load does not show an additional
911  // "General Error" message box
912  }
913  }
914  else
915  bSuccess = true;
916  }
917  }
918  return bSuccess;
919 }
920 
921 namespace {
922 
923 class Buffering
924 {
925  static const int SIZE = 64*1024;
926  std::unique_ptr<char[]> aBuffer;
927  oslFileHandle& pOut;
928  size_t pos;
929  sal_uInt64 left;
930 
931 public:
932  explicit Buffering(oslFileHandle& out) : aBuffer(new char[SIZE]), pOut(out), pos(0), left(0) {}
933 
934  oslFileError read(char *pChar, short count, sal_uInt64* pBytesRead)
935  {
936  oslFileError nRes = osl_File_E_None;
937  sal_uInt64 nBytesRead = 0;
938  while (count > 0)
939  {
940  if (left == 0)
941  {
942  nRes = osl_readFile(pOut, aBuffer.get(), SIZE, &left);
943  if (nRes != osl_File_E_None || left == 0)
944  {
945  *pBytesRead = nBytesRead;
946  return nRes;
947  }
948  pos = 0;
949  }
950  *pChar = aBuffer.get()[pos];
951  --count;
952  ++pos;
953  --left;
954  ++pChar;
955  ++nBytesRead;
956  }
957  *pBytesRead = nBytesRead;
958  return osl_File_E_None;
959  }
960 };
961 
962 }
963 
964 bool xpdf_ImportFromFile(const OUString& rURL,
965  const ContentSinkSharedPtr& rSink,
966  const uno::Reference<task::XInteractionHandler>& xIHdl,
967  const OUString& rPwd,
968  const uno::Reference<uno::XComponentContext>& xContext,
969  const OUString& rFilterOptions)
970 {
971  OSL_ASSERT(rSink);
972 
973  OUString aSysUPath;
974  if( osl_getSystemPathFromFileURL( rURL.pData, &aSysUPath.pData ) != osl_File_E_None )
975  {
976  SAL_WARN(
977  "sdext.pdfimport",
978  "getSystemPathFromFileURL(" << rURL << ") failed");
979  return false;
980  }
981  OUString aDocName( rURL.copy( rURL.lastIndexOf( '/' )+1 ) );
982 
983  // check for encryption, if necessary get password
984  OUString aPwd( rPwd );
985  bool bIsEncrypted = false;
986  if( !checkEncryption( aSysUPath, xIHdl, aPwd, bIsEncrypted, aDocName ) )
987  {
988  SAL_INFO(
989  "sdext.pdfimport",
990  "checkEncryption(" << aSysUPath << ") failed");
991  return false;
992  }
993 
994  // Determine xpdfimport executable URL:
995  OUString converterURL("$BRAND_BASE_DIR/" LIBO_BIN_FOLDER "/xpdfimport");
996  rtl::Bootstrap::expandMacros(converterURL); //TODO: detect failure
997 
998  // Determine pathname of xpdfimport_err.pdf:
999  OUString errPathname("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/xpdfimport/xpdfimport_err.pdf");
1000  rtl::Bootstrap::expandMacros(errPathname); //TODO: detect failure
1001  if (osl::FileBase::getSystemPathFromFileURL(errPathname, errPathname)
1002  != osl::FileBase::E_None)
1003  {
1004  SAL_WARN(
1005  "sdext.pdfimport",
1006  "getSystemPathFromFileURL(" << errPathname << ") failed");
1007  return false;
1008  }
1009 
1010  // spawn separate process to keep LGPL/GPL code apart.
1011 
1012  OUString aOptFlag("-o");
1013  rtl_uString* args[] = { aSysUPath.pData, errPathname.pData,
1014  aOptFlag.pData, rFilterOptions.pData };
1015  sal_Int32 nArgs = rFilterOptions.isEmpty() ? 2 : 4;
1016 
1017  oslProcess aProcess;
1018  oslFileHandle pIn = nullptr;
1019  oslFileHandle pOut = nullptr;
1020  oslFileHandle pErr = nullptr;
1021  oslSecurity pSecurity = osl_getCurrentSecurity ();
1022  oslProcessError eErr =
1023  osl_executeProcess_WithRedirectedIO(converterURL.pData,
1024  args,
1025  nArgs,
1026  osl_Process_SEARCHPATH|osl_Process_HIDDEN,
1027  pSecurity,
1028  nullptr, nullptr, 0,
1029  &aProcess, &pIn, &pOut, &pErr);
1030  osl_freeSecurityHandle(pSecurity);
1031 
1032  bool bRet=true;
1033  try
1034  {
1035  if( eErr!=osl_Process_E_None )
1036  {
1037  SAL_WARN(
1038  "sdext.pdfimport",
1039  "executeProcess of " << converterURL << " failed with "
1040  << +eErr);
1041  return false;
1042  }
1043 
1044  if( pIn )
1045  {
1046  OStringBuffer aBuf(256);
1047  if( bIsEncrypted )
1048  aBuf.append( OUStringToOString( aPwd, RTL_TEXTENCODING_ISO_8859_1 ) );
1049  aBuf.append( '\n' );
1050 
1051  sal_uInt64 nWritten = 0;
1052  osl_writeFile( pIn, aBuf.getStr(), sal_uInt64(aBuf.getLength()), &nWritten );
1053  }
1054 
1055  if( pOut && pErr )
1056  {
1057  // read results of PDF parser. One line - one call to
1058  // OutputDev. stderr is used for alternate streams, like
1059  // embedded fonts and bitmaps
1060  Parser aParser(rSink,pErr,xContext);
1061  Buffering aBuffering(pOut);
1062  OStringBuffer line;
1063  for( ;; )
1064  {
1065  char aChar('\n');
1066  sal_uInt64 nBytesRead;
1067  oslFileError nRes;
1068 
1069  // skip garbage \r \n at start of line
1070  for (;;)
1071  {
1072  nRes = aBuffering.read(&aChar, 1, &nBytesRead);
1073  if (osl_File_E_None != nRes || nBytesRead != 1 || (aChar != '\n' && aChar != '\r') )
1074  break;
1075  }
1076  if ( osl_File_E_None != nRes )
1077  break;
1078 
1079  if( aChar != '\n' && aChar != '\r' )
1080  line.append( aChar );
1081 
1082  for (;;)
1083  {
1084  nRes = aBuffering.read(&aChar, 1, &nBytesRead);
1085  if ( osl_File_E_None != nRes || nBytesRead != 1 || aChar == '\n' || aChar == '\r' )
1086  break;
1087  line.append( aChar );
1088  }
1089  if ( osl_File_E_None != nRes )
1090  break;
1091  if ( line.isEmpty() )
1092  break;
1093 
1094  aParser.parseLine(line.makeStringAndClear());
1095  }
1096  }
1097  }
1098  catch( uno::Exception& )
1099  {
1100  // crappy C file interface. need manual resource dealloc
1101  bRet = false;
1102  }
1103 
1104  if( pIn )
1105  osl_closeFile(pIn);
1106  if( pOut )
1107  osl_closeFile(pOut);
1108  if( pErr )
1109  osl_closeFile(pErr);
1110  eErr = osl_joinProcess(aProcess);
1111  if (eErr == osl_Process_E_None)
1112  {
1113  oslProcessInfo info;
1114  info.Size = sizeof info;
1115  eErr = osl_getProcessInfo(aProcess, osl_Process_EXITCODE, &info);
1116  if (eErr == osl_Process_E_None)
1117  {
1118  if (info.Code != 0)
1119  {
1120  SAL_WARN(
1121  "sdext.pdfimport",
1122  "getProcessInfo of " << converterURL
1123  << " failed with exit code " << info.Code);
1124  bRet = false;
1125  }
1126  }
1127  else
1128  {
1129  SAL_WARN(
1130  "sdext.pdfimport",
1131  "getProcessInfo of " << converterURL << " failed with "
1132  << +eErr);
1133  bRet = false;
1134  }
1135  }
1136  else
1137  {
1138  SAL_WARN(
1139  "sdext.pdfimport",
1140  "joinProcess of " << converterURL << " failed with " << +eErr);
1141  bRet = false;
1142  }
1143  osl_freeProcessHandle(aProcess);
1144  return bRet;
1145 }
1146 
1147 
1148 bool xpdf_ImportFromStream( const uno::Reference< io::XInputStream >& xInput,
1149  const ContentSinkSharedPtr& rSink,
1150  const uno::Reference<task::XInteractionHandler >& xIHdl,
1151  const OUString& rPwd,
1152  const uno::Reference< uno::XComponentContext >& xContext,
1153  const OUString& rFilterOptions )
1154 {
1155  OSL_ASSERT(xInput.is());
1156  OSL_ASSERT(rSink);
1157 
1158  // convert XInputStream to local temp file
1159  oslFileHandle aFile = nullptr;
1160  OUString aURL;
1161  if( osl_createTempFile( nullptr, &aFile, &aURL.pData ) != osl_File_E_None )
1162  return false;
1163 
1164  // copy content, buffered...
1165  const sal_uInt32 nBufSize = 4096;
1166  uno::Sequence<sal_Int8> aBuf( nBufSize );
1167  sal_uInt64 nBytes = 0;
1168  sal_uInt64 nWritten = 0;
1169  bool bSuccess = true;
1170  do
1171  {
1172  try
1173  {
1174  nBytes = xInput->readBytes( aBuf, nBufSize );
1175  }
1176  catch( css::uno::Exception& )
1177  {
1178  osl_closeFile( aFile );
1179  throw;
1180  }
1181  if( nBytes > 0 )
1182  {
1183  osl_writeFile( aFile, aBuf.getConstArray(), nBytes, &nWritten );
1184  if( nWritten != nBytes )
1185  {
1186  bSuccess = false;
1187  break;
1188  }
1189  }
1190  }
1191  while( nBytes == nBufSize );
1192 
1193  osl_closeFile( aFile );
1194 
1195  if ( bSuccess )
1196  bSuccess = xpdf_ImportFromFile( aURL, rSink, xIHdl, rPwd, xContext, rFilterOptions );
1197  osl_removeFile( aURL.pData );
1198 
1199  return bSuccess;
1200 }
1201 
1202 }
1203 
1204 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
tuple line
void reportUnsupportedEncryptionFormat(css::uno::Reference< css::task::XInteractionHandler > const &handler)
URL aURL
static bool checkEncryption(std::u16string_view i_rPath, const uno::Reference< task::XInteractionHandler > &i_xIHdl, OUString &io_rPwd, bool &o_rIsEncrypted, const OUString &i_rDocName)
Definition: wrapper.cxx:857
void append(const basegfx::B2DPoint &rPoint, sal_uInt32 nCount)
void remove(sal_uInt32 nIndex, sal_uInt32 nCount=1)
sal_uInt64 left
Definition: wrapper.cxx:929
signed char sal_Int8
static std::unique_ptr< PDFEntry > read(const char *pFileName)
Definition: pdfparse.cxx:608
sal_Int64 n
aBuf
void appendBezierSegment(const basegfx::B2DPoint &rNextControlPoint, const basegfx::B2DPoint &rPrevControlPoint, const basegfx::B2DPoint &rPoint)
tuple args
NONE
bool xpdf_ImportFromFile(const OUString &rURL, const ContentSinkSharedPtr &rSink, const uno::Reference< task::XInteractionHandler > &xIHdl, const OUString &rPwd, const uno::Reference< uno::XComponentContext > &xContext, const OUString &rFilterOptions)
Definition: wrapper.cxx:964
size_t pos
Definition: wrapper.cxx:928
oslFileHandle & pOut
Definition: wrapper.cxx:927
Parser & m_parser
Definition: wrapper.cxx:169
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
css::uno::Sequence< css::beans::PropertyValue > InitPropertySequence(::std::initializer_list< ::std::pair< OUString, css::uno::Any > > vInit)
bool usesSupportedEncryptionFormat() const
std::string_view getToken(std::string_view sv, char delimiter, std::size_t &position)
#define TOOLS_WARN_EXCEPTION(area, stream)
int i
bool isEncrypted() const
bool getPassword(const css::uno::Reference< css::task::XInteractionHandler > &xHandler, OUString &rOutPwd, bool bFirstTry, const OUString &rDocName)
retrieve password from user
static const int SIZE
Definition: wrapper.cxx:925
const OUString fontAttributesSuffixes[]
Definition: wrapper.hxx:56
bool xpdf_ImportFromStream(const uno::Reference< io::XInputStream > &xInput, const ContentSinkSharedPtr &rSink, const uno::Reference< task::XInteractionHandler > &xIHdl, const OUString &rPwd, const uno::Reference< uno::XComponentContext > &xContext, const OUString &rFilterOptions)
Definition: wrapper.cxx:1148
const ContentSinkSharedPtr m_pSink
Definition: wrapper.cxx:151
void append(const B2DPolygon &rPolygon, sal_uInt32 nCount=1)
void setClosed(bool bNew)
std::unique_ptr< char[]> aBuffer
Definition: wrapper.cxx:926
Any makeAny(Color const &value)
#define SAL_WARN_IF(condition, area, stream)
#define SAL_INFO(area, stream)
std::size_t m_nCharIndex
Definition: wrapper.cxx:181
#define SAL_WARN(area, stream)
Reference< XSingleServiceFactory > xFactory
const oslFileHandle m_pErr
Definition: wrapper.cxx:152
OString m_aLine
Definition: wrapper.cxx:170
ScopedVclPtr< VirtualDevice > m_xDev
Definition: wrapper.cxx:149
std::shared_ptr< ContentSink > ContentSinkSharedPtr
bool setupDecryptionData(const OString &rPwd) const
FontMapType m_aFontMap
Definition: wrapper.cxx:153
const uno::Reference< uno::XComponentContext > m_xContext
Definition: wrapper.cxx:150
sal_uInt32 count() const
basegfx::B2DPoint const & getB2DPoint(sal_uInt32 nIndex) const