22 #include <string_view>
26 #include <osl/thread.h>
27 #include <rtl/alloc.h>
28 #include <rtl/ustring.hxx>
29 #include <rtl/strbuf.hxx>
39 "USAGE: %s [-h,--help]\n"
40 " %s [-pw, --password <password>] <inputfile> [<outputfile>]\n"
41 " %s <-a, --extract-add-streams> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
42 " %s <-f, --extract-fonts> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
43 " %s <-o, --extract-objects> <o0>[:<g0>][,<o1>[:g1][,...]] [-pw, --password <password>] <inputfile> [<outputfile>]\n"
44 " -h, --help: show help\n"
45 " -a, --extract-add-streams: extracts additional streams to outputfile_object\n"
46 " and prints the mimetype found to stdout\n"
47 " -f, --extract-fonts: extracts fonts (currently only type1 and truetype are supported\n"
48 " -o, --extract-objects: extracts object streams, the syntax of the argument is comma separated\n"
49 " object numbers, where object number and generation number are separated by \':\'\n"
50 " an omitted generation number defaults to 0\n"
51 " -pw, --password: use password for decryption\n"
53 "note: -f, -a, -o and normal unzip operation are mutually exclusive\n"
54 , pExe, pExe, pExe, pExe, pExe );
61 oslFileHandle m_aHandle;
65 void openReadFile(
const char* pOrigName );
68 FileEmitContext(
const char* pFileName,
const char* pOrigName,
const PDFContainer* pTop );
69 virtual ~FileEmitContext()
override;
71 virtual bool write(
const void* pBuf,
unsigned int nLen )
throw()
override;
72 virtual unsigned int getCurPos() throw() override;
73 virtual
bool copyOrigBytes(
unsigned int nOrigOffset,
unsigned int nLen ) throw() override;
74 virtual
unsigned int readOrigBytes(
unsigned int nOrigOffset,
unsigned int nLen,
void* pBuf ) throw() override;
79 FileEmitContext::FileEmitContext( const
char* pFileName, const
char* pOrigName, const
PDFContainer* pTop )
82 m_aReadHandle(
nullptr ),
86 OStringToOUString( std::string_view( pFileName ), osl_getThreadTextEncoding() ) );
88 if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
90 fprintf( stderr,
"filename conversion \"%s\" failed\n", pFileName );
94 if( osl_openFile( aURL.pData, &m_aHandle, osl_File_OpenFlag_Write ) == osl_File_E_None )
96 if( osl_setFileSize( m_aHandle, 0 ) != osl_File_E_None )
98 fprintf( stderr,
"could not truncate %s\n", pFileName );
99 osl_closeFile( m_aHandle );
103 else if( osl_openFile( aURL.pData, &m_aHandle,
104 osl_File_OpenFlag_Write |osl_File_OpenFlag_Create ) != osl_File_E_None )
106 fprintf( stderr,
"could not open %s\n", pFileName );
111 openReadFile( pOrigName );
114 FileEmitContext::~FileEmitContext()
117 osl_closeFile( m_aHandle );
119 osl_closeFile( m_aReadHandle );
122 void FileEmitContext::openReadFile(
const char* pInFile )
125 OStringToOUString( std::string_view( pInFile ), osl_getThreadTextEncoding() ) );
127 if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
129 fprintf( stderr,
"filename conversion \"%s\" failed\n", pInFile );
133 if( osl_openFile( aURL.pData, &m_aReadHandle, osl_File_OpenFlag_Read ) != osl_File_E_None )
135 fprintf( stderr,
"could not open %s\n", pInFile );
139 if( osl_setFilePos( m_aReadHandle, osl_Pos_End, 0 ) != osl_File_E_None )
141 fprintf( stderr,
"could not seek to end of %s\n", pInFile );
142 osl_closeFile( m_aReadHandle );
146 sal_uInt64 nFileSize = 0;
147 if( osl_getFilePos( m_aReadHandle, &nFileSize ) != osl_File_E_None )
149 fprintf( stderr,
"could not get end pos of %s\n", pInFile );
150 osl_closeFile( m_aReadHandle );
154 m_nReadLen =
static_cast<unsigned int>(nFileSize);
157 bool FileEmitContext::write(
const void* pBuf,
unsigned int nLen )
throw()
162 sal_uInt64 nWrite =
static_cast<sal_uInt64
>(nLen);
163 sal_uInt64 nWritten = 0;
164 return (osl_writeFile( m_aHandle, pBuf, nWrite, &nWritten ) == osl_File_E_None)
165 && nWrite == nWritten;
168 unsigned int FileEmitContext::getCurPos() throw()
170 sal_uInt64 nFileSize = 0;
173 if( osl_getFilePos( m_aHandle, &nFileSize ) != osl_File_E_None )
176 return static_cast<unsigned int>(nFileSize);
179 bool FileEmitContext::copyOrigBytes(
unsigned int nOrigOffset,
unsigned int nLen )
throw()
181 if( nOrigOffset + nLen > m_nReadLen )
184 if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
186 fprintf( stderr,
"could not seek to offset %u\n", nOrigOffset );
189 void* pBuf = std::malloc( nLen );
192 sal_uInt64 nBytesRead = 0;
193 if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None
194 || nBytesRead != static_cast<sal_uInt64>(nLen) )
196 fprintf( stderr,
"could not read %u bytes\n", nLen );
200 bool bRet = write( pBuf, nLen );
205 unsigned int FileEmitContext::readOrigBytes(
unsigned int nOrigOffset,
unsigned int nLen,
void* pBuf )
throw()
207 if( nOrigOffset + nLen > m_nReadLen )
210 if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
212 fprintf( stderr,
"could not seek to offset %u\n", nOrigOffset );
215 sal_uInt64 nBytesRead = 0;
216 if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None )
218 return static_cast<unsigned int>(nBytesRead);
232 fprintf( stdout,
"have a %s PDF file\n", pPDFFile->
isEncrypted() ?
"encrypted" :
"unencrypted" );
234 fprintf( stdout,
"password %s\n",
236 nRet = pHdl( pInFile, pOutFile, pPDFFile );
246 FileEmitContext aContext( pOutFile, pInFile, pPDFFile );
248 pPDFFile->
emit(aContext);
256 for(
unsigned int i = 0;
i < nArrayElements-1 && nRet == 0;
i++ )
261 fprintf( stderr,
"error: no mimetype element\n" );
263 fprintf( stderr,
"error: no stream ref element\n" );
264 if( pMimeType && pStreamRef )
266 fprintf( stdout,
"found stream %d %d with mimetype %s\n",
272 OString aOutStream = pOutFile +
273 OStringLiteral(
"_stream_") +
274 OString::number( sal_Int32(pStreamRef->
m_nNumber) ) +
277 FileEmitContext aContext( aOutStream.getStr(), pInFile, pPDFFile );
279 pObject->writeStream( aContext, pPDFFile );
283 fprintf( stderr,
"object not found\n" );
298 for(
unsigned i = 0;
i < nElements && nRet == 0;
i++ )
301 if( pTrailer && pTrailer->
m_pDict )
304 auto add_stream = pTrailer->
m_pDict->
m_aMap.find(
"AdditionalStreams" );
319 for (
unsigned i = 0;
i < nElements;
i++)
329 std::unordered_map<OString,PDFEntry*>::iterator map_it =
330 pDict->
m_aMap.find(
"Type" );
331 if( map_it == pDict->
m_aMap.end() )
337 if( pName->
m_aName !=
"FontDescriptor" )
342 map_it = pDict->
m_aMap.find(
"FontName" );
343 if( map_it == pDict->
m_aMap.end() )
345 pName =
dynamic_cast<PDFName*
>(map_it->second);
348 OString aFontName( pName->
m_aName );
351 const char* pFileType =
nullptr;
353 map_it = pDict->
m_aMap.find(
"FontFile" );
354 if( map_it != pDict->
m_aMap.end() )
356 pStreamRef =
dynamic_cast<PDFObjectRef*
>(map_it->second);
364 map_it = pDict->
m_aMap.find(
"FontFile2" );
365 if( map_it != pDict->
m_aMap.end() )
367 pStreamRef =
dynamic_cast<PDFObjectRef*
>(map_it->second);
380 OStringBuffer aOutStream( i_pOutFile );
381 aOutStream.append(
"_font_" );
382 aOutStream.append( sal_Int32(pStreamRef->
m_nNumber) );
383 aOutStream.append(
"_" );
385 aOutStream.append(
"_" );
386 aOutStream.append( aFontName );
389 aOutStream.append(
"." );
390 aOutStream.append( pFileType );
392 FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
403 unsigned int nElements = s_aEmitObjects.size();
404 for (
unsigned i = 0;
i < nElements;
i++)
406 sal_Int32 nObject = s_aEmitObjects[
i].first;
407 sal_Int32 nGeneration = s_aEmitObjects[
i].second;
411 fprintf( stderr,
"object %d %d not found !\n", static_cast<int>(nObject), static_cast<int>(nGeneration) );
415 OString aOutStream = i_pOutFile +
416 OStringLiteral(
"_stream_") +
417 OString::number( nObject ) +
419 OString::number( nGeneration );
420 FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
429 const char* pInFile =
nullptr;
430 const char* pOutFile =
nullptr;
431 const char* pPassword =
nullptr;
432 OStringBuffer aOutFile( 256 );
435 for(
int nArg = 1; nArg < argc; nArg++ )
437 if( argv[nArg][0] ==
'-' )
439 if( ! rtl_str_compare(
"-pw", argv[nArg] ) ||
440 ! rtl_str_compare(
"--password" , argv[nArg] ) )
444 fprintf( stderr,
"no password given\n" );
448 pPassword = argv[nArg];
450 else if( ! rtl_str_compare(
"-h", argv[nArg] ) ||
451 ! rtl_str_compare(
"--help", argv[nArg] ) )
456 else if( ! rtl_str_compare(
"-a", argv[nArg] ) ||
457 ! rtl_str_compare(
"--extract-add-streams", argv[nArg] ) )
461 else if( ! rtl_str_compare(
"-f", argv[nArg] ) ||
462 ! rtl_str_compare(
"--extract-fonts", argv[nArg] ) )
466 else if( ! rtl_str_compare(
"-o", argv[nArg] ) ||
467 ! rtl_str_compare(
"--extract-objects", argv[nArg] ) )
473 OString aObjs( argv[nArg] );
475 while( nIndex != -1 )
477 OString aToken( aObjs.getToken( 0,
',', nIndex ) );
478 sal_Int32 nObject = 0;
479 sal_Int32 nGeneration = 0;
480 sal_Int32 nGenIndex = 0;
481 nObject = aToken.getToken( 0,
':', nGenIndex ).toInt32();
482 if( nGenIndex != -1 )
483 nGeneration = aToken.getToken( 0,
':', nGenIndex ).toInt32();
484 s_aEmitObjects.push_back( std::pair<sal_Int32,sal_Int32>(nObject,nGeneration) );
490 fprintf( stderr,
"unrecognized option \"%s\"\n",
496 else if( pInFile ==
nullptr )
497 pInFile = argv[nArg];
498 else if( pOutFile ==
nullptr )
499 pOutFile = argv[nArg];
503 fprintf( stderr,
"no input file given\n" );
508 OString aFile( pInFile );
509 if( aFile.getLength() > 0 )
511 if( aFile.getLength() > 4 )
513 if( aFile.matchIgnoreAsciiCase(
".pdf", aFile.getLength()-4 ) )
514 aOutFile.append( pInFile, aFile.getLength() - 4 );
516 aOutFile.append( aFile );
518 aOutFile.append(
"_unzip.pdf" );
519 pOutFile = aOutFile.getStr();
523 fprintf( stderr,
"no output file given\n" );
528 return handleFile( pInFile, pOutFile, pPassword, aHdl );
static int write_fonts(const char *i_pInFile, const char *i_pOutFile, PDFFile *i_pPDFFile)
static int write_unzipFile(const char *pInFile, const char *pOutFile, PDFFile *pPDFFile)
static std::unique_ptr< PDFEntry > read(const char *pFileName)
const wchar_t *typedef int(__stdcall *DllNativeUnregProc)(int
PDFObject * findObject(unsigned int nNumber, unsigned int nGeneration) const
unsigned int m_nGeneration
EmbeddedObjectRef * pObject
std::vector< std::unique_ptr< PDFEntry > > m_aSubElements
static void printHelp(const char *pExe)
oslFileHandle m_aReadHandle
int(* PDFFileHdl)(const char *, const char *, PDFFile *)
static std::vector< std::pair< sal_Int32, sal_Int32 > > s_aEmitObjects
virtual bool emit(EmitContext &rWriteContext) const override
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
static int handleFile(const char *pInFile, const char *pOutFile, const char *pPassword, PDFFileHdl pHdl)
static int write_addStreamArray(const char *pOutFile, PDFArray *pStreams, PDFFile *pPDFFile, const char *pInFile)
static int write_addStreams(const char *pInFile, const char *pOutFile, PDFFile *pPDFFile)
bool setupDecryptionData(const OString &rPwd) const
static osl::File * pStream
static int write_objects(const char *i_pInFile, const char *i_pOutFile, PDFFile *i_pPDFFile)
void writeStream(EmitContext &rContext, const PDFFile *pPDFFile) const