26#include <osl/thread.h>
27#include <rtl/digest.h>
29#include <com/sun/star/io/IOException.hpp>
30#include <com/sun/star/io/XInputStream.hpp>
31#include <com/sun/star/io/XStream.hpp>
32#include <com/sun/star/io/XSeekable.hpp>
33#include <com/sun/star/io/TempFile.hpp>
34#include <com/sun/star/task/XInteractionHandler.hpp>
59 uno::Reference< io::XOutputStream >
m_xOut;
62 FileEmitContext(
const OUString& rOrigFile,
63 const uno::Reference< uno::XComponentContext >& xContext,
65 virtual ~FileEmitContext()
override;
67 virtual bool write(
const void* pBuf,
unsigned int nLen )
override;
68 virtual unsigned int getCurPos()
override;
69 virtual bool copyOrigBytes(
unsigned int nOrigOffset,
unsigned int nLen )
override;
70 virtual unsigned int readOrigBytes(
unsigned int nOrigOffset,
unsigned int nLen,
void* pBuf )
override;
72 const uno::Reference< io::XStream >& getContextStream()
const {
return m_xContextStream; }
77FileEmitContext::FileEmitContext(
const OUString& rOrigFile,
78 const uno::Reference< uno::XComponentContext >& xContext,
84 m_xContextStream.set( io::TempFile::create(xContext), uno::UNO_QUERY_THROW );
88 if( osl_openFile( rOrigFile.pData,
90 osl_File_OpenFlag_Read ) == osl_File_E_None )
92 oslFileError aErr = osl_setFilePos(
m_aReadHandle, osl_Pos_End, 0 );
93 if( aErr == osl_File_E_None )
95 sal_uInt64 nFileSize = 0;
97 &nFileSize )) == osl_File_E_None )
99 m_nReadLen =
static_cast<unsigned int>(nFileSize);
102 if( aErr != osl_File_E_None )
111FileEmitContext::~FileEmitContext()
117bool FileEmitContext::write(
const void* pBuf,
unsigned int nLen )
122 uno::Sequence< sal_Int8 >
aSeq( nLen );
123 memcpy(
aSeq.getArray(), pBuf, nLen );
124 m_xOut->writeBytes( aSeq );
128unsigned int FileEmitContext::getCurPos()
130 unsigned int nPos = 0;
133 nPos =
static_cast<unsigned int>(
m_xSeek->getPosition() );
138bool FileEmitContext::copyOrigBytes(
unsigned int nOrigOffset,
unsigned int nLen )
143 if( osl_setFilePos(
m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
146 uno::Sequence< sal_Int8 >
aSeq( nLen );
148 sal_uInt64 nBytesRead = 0;
152 &nBytesRead ) != osl_File_E_None
153 || nBytesRead !=
static_cast<sal_uInt64
>(nLen) )
158 m_xOut->writeBytes( aSeq );
162unsigned int FileEmitContext::readOrigBytes(
unsigned int nOrigOffset,
unsigned int nLen,
void* pBuf )
169 nOrigOffset ) != osl_File_E_None )
174 sal_uInt64 nBytesRead = 0;
178 &nBytesRead ) != osl_File_E_None )
182 return static_cast<unsigned int>(nBytesRead);
186PDFDetector::PDFDetector( uno::Reference< uno::XComponentContext > xContext) :
193sal_Int32 fillAttributes(uno::Sequence<beans::PropertyValue>
const& rFilterData, uno::Reference<io::XInputStream>& xInput, OUString& aURL, sal_Int32& nFilterNamePos, sal_Int32& nPasswordPos, OUString& aPassword)
195 const beans::PropertyValue* pAttribs = rFilterData.getConstArray();
196 sal_Int32 nAttribs = rFilterData.getLength();
197 for (sal_Int32 i = 0;
i < nAttribs;
i++)
199 OUString aVal(
"<no string>" );
200 pAttribs[
i].Value >>= aVal;
201 SAL_INFO(
"sdext.pdfimport",
"doDetection: Attrib: " + pAttribs[i].Name +
" = " + aVal);
203 if (pAttribs[i].Name ==
"InputStream")
204 pAttribs[
i].Value >>= xInput;
205 else if (pAttribs[i].Name ==
"URL")
206 pAttribs[
i].Value >>=
aURL;
207 else if (pAttribs[i].Name ==
"FilterName")
209 else if (pAttribs[i].Name ==
"Password")
212 pAttribs[
i].Value >>= aPassword;
219constexpr const sal_Int32 constHeaderSize = 1024;
221bool detectPDF(uno::Reference<io::XInputStream>
const& xInput, uno::Sequence<sal_Int8>& aHeader, sal_uInt64& nHeaderReadSize)
225 uno::Reference<io::XSeekable> xSeek(xInput, uno::UNO_QUERY);
229 nHeaderReadSize = xInput->readBytes(aHeader, constHeaderSize);
230 if (nHeaderReadSize <= 5)
233 const sal_Int8* pBytes = aHeader.getConstArray();
234 for (sal_uInt64 i = 0;
i < nHeaderReadSize - 5;
i++)
236 if (pBytes[i+0] ==
'%' &&
237 pBytes[i+1] ==
'P' &&
238 pBytes[i+2] ==
'D' &&
239 pBytes[i+3] ==
'F' &&
246 catch (
const css::io::IOException &)
253bool copyToTemp(uno::Reference<io::XInputStream>
const& xInput, oslFileHandle& rFileHandle, uno::Sequence<sal_Int8>
const& aHeader, sal_uInt64 nHeaderReadSize)
257 sal_uInt64 nWritten = 0;
258 osl_writeFile(rFileHandle, aHeader.getConstArray(), nHeaderReadSize, &nWritten);
260 const sal_uInt64 nBufferSize = 4096;
261 uno::Sequence<sal_Int8>
aBuffer(nBufferSize);
264 sal_uInt64 nRead = 0;
267 nRead = xInput->readBytes(
aBuffer, nBufferSize);
270 osl_writeFile(rFileHandle,
aBuffer.getConstArray(), nRead, &nWritten);
271 if (nWritten != nRead)
275 while (nRead == nBufferSize);
277 catch (
const css::io::IOException &)
290 bool bSuccess =
false;
293 uno::Reference<io::XInputStream> xInput;
294 uno::Reference<io::XStream> xEmbedStream;
295 OUString aOutFilterName;
296 OUString aOutTypeName;
300 sal_Int32 nFilterNamePos = -1;
301 sal_Int32 nPasswordPos = -1;
302 sal_Int32 nAttribs = fillAttributes(rFilterData, xInput,
aURL, nFilterNamePos, nPasswordPos, aPassword);
308 uno::Sequence<sal_Int8> aHeader(constHeaderSize);
309 sal_uInt64 nHeaderReadSize = 0;
310 bSuccess = detectPDF(xInput, aHeader, nHeaderReadSize);
315 oslFileHandle aFileHandle =
nullptr;
320 if (osl_createTempFile(
nullptr, &aFileHandle, &
aURL.pData) != osl_File_E_None)
326 SAL_INFO(
"sdext.pdfimport",
"created temp file " +
aURL);
327 bSuccess = copyToTemp(xInput, aFileHandle, aHeader, nHeaderReadSize);
329 osl_closeFile(aFileHandle);
335 osl_removeFile(
aURL.pData);
339 OUString aEmbedMimetype;
343 osl_removeFile(
aURL.pData);
345 if (!aEmbedMimetype.isEmpty())
347 if( aEmbedMimetype ==
"application/vnd.oasis.opendocument.text"
348 || aEmbedMimetype ==
"application/vnd.oasis.opendocument.text-master" )
349 aOutFilterName =
"writer_pdf_addstream_import";
350 else if ( aEmbedMimetype ==
"application/vnd.oasis.opendocument.presentation" )
351 aOutFilterName =
"impress_pdf_addstream_import";
352 else if( aEmbedMimetype ==
"application/vnd.oasis.opendocument.graphics"
353 || aEmbedMimetype ==
"application/vnd.oasis.opendocument.drawing" )
354 aOutFilterName =
"draw_pdf_addstream_import";
355 else if ( aEmbedMimetype ==
"application/vnd.oasis.opendocument.spreadsheet" )
356 aOutFilterName =
"calc_pdf_addstream_import";
359 if (!aOutFilterName.isEmpty())
361 if( nFilterNamePos == -1 )
363 nFilterNamePos = nAttribs;
364 rFilterData.realloc( ++nAttribs );
365 rFilterData.getArray()[ nFilterNamePos ].Name =
"FilterName";
367 auto pFilterData = rFilterData.getArray();
368 aOutTypeName =
"pdf_Portable_Document_Format";
370 pFilterData[nFilterNamePos].Value <<= aOutFilterName;
371 if( xEmbedStream.is() )
373 rFilterData.realloc( ++nAttribs );
374 pFilterData = rFilterData.getArray();
375 pFilterData[nAttribs-1].Name =
"EmbeddedSubstream";
376 pFilterData[nAttribs-1].Value <<= xEmbedStream;
378 if (!aPassword.isEmpty())
380 if (nPasswordPos == -1)
382 nPasswordPos = nAttribs;
383 rFilterData.realloc(++nAttribs);
384 pFilterData = rFilterData.getArray();
385 pFilterData[nPasswordPos].Name =
"Password";
387 pFilterData[nPasswordPos].Value <<= aPassword;
392 css::beans::PropertyValue* pFilterData;
393 if( nFilterNamePos == -1 )
395 nFilterNamePos = nAttribs;
396 rFilterData.realloc( ++nAttribs );
397 pFilterData = rFilterData.getArray();
398 pFilterData[ nFilterNamePos ].Name =
"FilterName";
401 pFilterData = rFilterData.getArray();
403 const sal_Int32 nDocumentType = 0;
404 if( nDocumentType < 0 )
410 switch (nDocumentType)
413 pFilterData[nFilterNamePos].Value <<= OUString(
"draw_pdf_import" );
417 pFilterData[nFilterNamePos].Value <<= OUString(
"impress_pdf_import" );
421 pFilterData[nFilterNamePos].Value <<= OUString(
"writer_pdf_import" );
425 assert(!
"Unexpected case");
429 aOutTypeName =
"pdf_Portable_Document_Format";
437 return "org.libreoffice.comp.documents.PDFDetector";
447 return {
"com.sun.star.document.ImportFilter"};
452 const OUString& rChkSum )
454 if( rChkSum.getLength() != 2* RTL_DIGEST_LENGTH_MD5 )
458 "checksum of length " << rChkSum.getLength() <<
", expected "
459 << 2*RTL_DIGEST_LENGTH_MD5);
464 sal_uInt8 nTestChecksum[ RTL_DIGEST_LENGTH_MD5 ];
483 ::std::vector<unsigned char> nChecksum;
485 oslFileHandle aRead =
nullptr;
486 if( osl_openFile(rInPDFFileURL.pData,
488 osl_File_OpenFlag_Read ) == osl_File_E_None )
492 sal_uInt64 nBytesRead = 0;
493 while( nCur < nBytes )
495 sal_uInt32 nPass = std::min<sal_uInt32>(nBytes - nCur,
sizeof(
aBuf ));
496 if( osl_readFile( aRead,
aBuf, nPass, &nBytesRead) != osl_File_E_None
501 nPass =
static_cast<sal_uInt32
>(nBytesRead);
507 osl_closeFile( aRead );
511 return nChecksum.size() == RTL_DIGEST_LENGTH_MD5
512 && (0 == memcmp(nChecksum.data(), nTestChecksum, nChecksum.size()));
516 OUString& rOutMimetype,
518 const uno::Reference<uno::XComponentContext>& xContext,
519 const uno::Sequence<beans::PropertyValue>& rFilterData,
522 uno::Reference< io::XStream > xEmbed;
525 if( osl_getSystemPathFromFileURL( rInPDFFileURL.pData, &aSysUPath.pData ) != osl_File_E_None )
539 if( pTrailer && pTrailer->
m_pDict )
545 SAL_INFO(
"sdext.pdfimport",
"no DocChecksum entry" );
549 if( pChkSumName ==
nullptr )
551 SAL_INFO(
"sdext.pdfimport",
"no name for DocChecksum entry" );
556 auto add_stream = pTrailer->
m_pDict->
m_aMap.find(
"AdditionalStreams" );
559 SAL_INFO(
"sdext.pdfimport",
"no AdditionalStreams entry" );
565 SAL_INFO(
"sdext.pdfimport",
"AdditionalStreams array too small" );
578 SAL_WARN_IF( !pMimeType,
"sdext.pdfimport",
"error: no mimetype element" );
579 SAL_WARN_IF( !pStreamRef,
"sdext.pdfimport",
"error: no stream ref element" );
581 if( pMimeType && pStreamRef )
589 bool bAuthenticated =
false;
590 if( !io_rPwd.isEmpty() )
593 RTL_TEXTENCODING_ISO_8859_1 );
596 if( ! bAuthenticated )
598 uno::Reference< task::XInteractionHandler > xIntHdl;
599 for(
const beans::PropertyValue& rAttrib : rFilterData )
601 if ( rAttrib.Name ==
"InteractionHandler" )
602 rAttrib.Value >>= xIntHdl;
604 if( ! bMayUseUI || ! xIntHdl.is() )
611 OUString aDocName( rInPDFFileURL.copy( rInPDFFileURL.lastIndexOf(
'/' )+1 ) );
613 bool bEntered =
false;
616 bEntered =
getPassword( xIntHdl, io_rPwd, ! bEntered, aDocName );
618 RTL_TEXTENCODING_ISO_8859_1 );
620 }
while( bEntered && ! bAuthenticated );
623 if( ! bAuthenticated )
627 FileEmitContext aContext( rInPDFFileURL,
631 pObject->writeStream( aContext, pPDFFile );
632 xEmbed = aContext.getContextStream();
645extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
647 css::uno::XComponentContext* context , css::uno::Sequence<css::uno::Any>
const&)
std::vector< unsigned char > finalize()
void update(const unsigned char *pInput, size_t length)
OUString SAL_CALL getImplementationName() override
virtual OUString SAL_CALL detect(css::uno::Sequence< css::beans::PropertyValue > &io_rDescriptor) override
css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
sal_Bool SAL_CALL supportsService(OUString const &ServiceName) override
css::uno::Reference< css::uno::XComponentContext > m_xContext
#define TOOLS_WARN_EXCEPTION(area, stream)
EmbeddedObjectRef * pObject
uno::Reference< io::XStream > m_xContextStream
uno::Reference< io::XOutputStream > m_xOut
oslFileHandle m_aReadHandle
uno::Reference< io::XSeekable > m_xSeek
Sequence< sal_Int8 > aSeq
#define SAL_WARN_IF(condition, area, stream)
#define SAL_INFO(area, stream)
COMPHELPER_DLLPUBLIC bool isFileUrl(std::u16string_view url)
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * sdext_PDFDetector_get_implementation(css::uno::XComponentContext *context, css::uno::Sequence< css::uno::Any > const &)
bool checkDocChecksum(const OUString &rInPDFFileURL, sal_uInt32 nBytes, const OUString &rChkSum)
uno::Reference< io::XStream > getAdditionalStream(const OUString &rInPDFFileURL, OUString &rOutMimetype, OUString &io_rPwd, const uno::Reference< uno::XComponentContext > &xContext, const uno::Sequence< beans::PropertyValue > &rFilterData, bool bMayUseUI)
bool getPassword(const css::uno::Reference< css::task::XInteractionHandler > &xHandler, OUString &rOutPwd, bool bFirstTry, const OUString &rDocName)
retrieve password from user
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
PDFObject * findObject(unsigned int nNumber, unsigned int nGeneration) const
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
bool setupDecryptionData(const OString &rPwd) const
OUString getFilteredName() const
unsigned int m_nGeneration
static std::unique_ptr< PDFEntry > read(const char *pFileName)
std::unique_ptr< char[]> aBuffer
const uno::Reference< uno::XComponentContext > m_xContext