25#include <rtl/textenc.h>
26#include <rtl/tencinfo.h>
27#include <com/sun/star/io/NotConnectedException.hpp>
28#include <com/sun/star/io/XInputStream.hpp>
42 throw NotConnectedException();
47 nMaxToRead = ::std::max( sal_Int32(512) , nMaxToRead );
54 nRead =
m_in->readSomeBytes( seq , nMaxToRead );
56 if( nRead + seqStart.getLength())
62 if( seqStart.hasElements() )
65 sal_Int32
nLength = seq.getLength();
66 seq.realloc( seqStart.getLength() +
nLength );
68 memmove (seq.getArray() + seqStart.getLength(),
71 memcpy (seq.getArray(),
72 seqStart.getConstArray(),
73 seqStart.getLength());
89 seqStart = Sequence < sal_Int8 > ();
97 seq =
m_pUnicode2Text->convert( seqUnicode.getConstArray(), seqUnicode.getLength() );
109 nRead = seq.getLength();
119 const sal_Int8 *pSource = seq.getArray();
120 if (seq.getLength() < 5 || strncmp(
reinterpret_cast<const char *
>(pSource),
"<?xml", 5))
124 OString str(
reinterpret_cast<char const *
>(pSource), seq.getLength() );
128 int nMax = str.indexOf( 10 );
131 str = str.copy( 0 , nMax );
134 int nFound = str.indexOf(
" encoding" );
135 if( nFound < 0 )
return;
138 int nStart = str.indexOf(
"\"" , nFound );
139 if( nStart < 0 || str.indexOf(
"'" , nFound ) < nStart )
141 nStart = str.indexOf(
"'" , nFound );
142 nStop = str.indexOf(
"'" , nStart +1 );
146 nStop = str.indexOf(
"\"" , nStart +1);
149 if( nStart >= 0 && nStop >= 0 && nStart+1 < nStop )
152 memmove( &( seq.getArray()[nFound] ) ,
153 &( seq.getArray()[nStop+1]) ,
154 seq.getLength() - nStop -1);
155 seq.realloc( seq.getLength() - ( nStop+1 - nFound ) );
162 const sal_Int8 *pSource = seq.getConstArray();
163 bool bCheckIfFirstClosingBracketExists =
false;
165 if( seq.getLength() < 8 ) {
170 if( ! strncmp(
reinterpret_cast<const char *
>(pSource),
"<?xml", 5 ) ) {
172 bCheckIfFirstClosingBracketExists =
true;
174 else if( (
'<' == pSource[0] ||
'<' == pSource[2] ) &&
175 (
'?' == pSource[4] ||
'?' == pSource[6] ) )
178 bCheckIfFirstClosingBracketExists =
true;
180 else if( (
'<' == pSource[1] ||
'<' == pSource[3] ) &&
181 (
'?' == pSource[5] ||
'?' == pSource[7] ) )
184 bCheckIfFirstClosingBracketExists =
true;
187 if( bCheckIfFirstClosingBracketExists )
190 return std::find(seq.begin(), seq.end(),
'>') != seq.end();
202 if( seq.getLength() < 4 ) {
208 if (seq.getLength() >= 5 && !strncmp(
reinterpret_cast<const char *
>(pSource),
"<?xml", 5)) {
210 OString str(
reinterpret_cast<const char *
>(pSource), seq.getLength() );
214 int nMax = str.indexOf( 10 );
217 str = str.copy( 0 , nMax );
220 int nFound = str.indexOf(
" encoding" );
223 int nStart = str.indexOf(
"\"" , nFound );
224 if( nStart < 0 || str.indexOf(
"'" , nFound ) < nStart )
226 nStart = str.indexOf(
"'" , nFound );
227 nStop = str.indexOf(
"'" , nStart +1 );
231 nStop = str.indexOf(
"\"" , nStart +1);
233 if( nStart >= 0 && nStop >= 0 && nStart+1 < nStop )
236 m_sEncoding = str.copy( nStart+1 , nStop - nStart - 1 );
240 else if( 0xFE == pSource[0] &&
241 0xFF == pSource[1] ) {
246 else if( 0xFF == pSource[0] &&
247 0xFE == pSource[1] ) {
252 else if( 0x00 == pSource[0] && 0x3c == pSource[1] && 0x00 == pSource[2] && 0x3f == pSource[3] ) {
257 seq.realloc( seq.getLength() + 2 );
258 memmove( &( seq.getArray()[2] ) , seq.getArray() , seq.getLength() - 2 );
259 reinterpret_cast<sal_uInt8*
>(seq.getArray())[0] = 0xFE;
260 reinterpret_cast<sal_uInt8*
>(seq.getArray())[1] = 0xFF;
264 else if( 0x3c == pSource[0] && 0x00 == pSource[1] && 0x3f == pSource[2] && 0x00 == pSource[3] ) {
268 seq.realloc( seq.getLength() + 2 );
269 memmove( &( seq.getArray()[2] ) , seq.getArray() , seq.getLength() - 2 );
270 reinterpret_cast<sal_uInt8*
>(seq.getArray())[0] = 0xFF;
271 reinterpret_cast<sal_uInt8*
>(seq.getArray())[1] = 0xFE;
275 else if( 0xEF == pSource[0] &&
276 0xBB == pSource[1] &&
281 memmove( seq.getArray(), &( seq.getArray()[3] ), seq.getLength()-3 );
282 seq.realloc( seq.getLength() - 3 );
285 else if( 0x00 == pSource[0] && 0x00 == pSource[1] && 0x00 == pSource[2] && 0x3c == pSource[3] ) {
289 else if( 0x3c == pSource[0] && 0x00 == pSource[1] && 0x00 == pSource[2] && 0x00 == pSource[3] ) {
315 rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset(
m_sEncoding.getStr() );
316 if( encoding != RTL_TEXTENCODING_UTF8 )
319 m_pUnicode2Text = std::make_unique<Unicode2TextConverter>( RTL_TEXTENCODING_UTF8 );
329 : m_convText2Unicode(nullptr)
330 , m_contextText2Unicode(nullptr)
332 rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( sEncoding.getStr() );
333 if( RTL_TEXTENCODING_DONTKNOW == encoding )
366 sal_Size nSrcCvtBytes = 0;
367 sal_Size nTargetCount = 0;
368 sal_Size nSourceCount = 0;
371 sal_Int32 nSourceSize = seqText.getLength() +
m_seqSource.getLength();
372 Sequence<sal_Unicode> seqUnicode ( nSourceSize );
374 const sal_Int8 *pbSource = seqText.getConstArray();
375 std::unique_ptr<sal_Int8[]> pbTempMem;
379 pbTempMem.reset(
new sal_Int8[ nSourceSize ]);
381 memcpy( &(pbTempMem[
m_seqSource.getLength() ]) , seqText.getConstArray() , seqText.getLength() );
382 pbSource = pbTempMem.get();
391 nTargetCount += rtl_convertTextToUnicode(
394 reinterpret_cast<const char *
>(&( pbSource[nSourceCount] )),
395 nSourceSize - nSourceCount ,
396 &( seqUnicode.getArray()[ nTargetCount ] ),
397 seqUnicode.getLength() - nTargetCount,
398 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
399 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
400 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
403 nSourceCount += nSrcCvtBytes;
405 if( uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL ) {
407 seqUnicode.realloc( seqUnicode.getLength() * 2 );
412 if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL ) {
414 memcpy(
m_seqSource.getArray() , &(pbSource[nSourceCount]) , nSourceSize-nSourceCount );
418 seqUnicode.realloc( nTargetCount );
443 std::unique_ptr<sal_Unicode[]> puTempMem;
452 memcpy( puTempMem.get() ,
459 puSource = puTempMem.get();
466 sal_Size nTargetCount = 0;
467 sal_Size nSourceCount = 0;
470 sal_Size nSrcCvtChars;
475 sal_Int32 nSeqSize = nSourceSize * 3;
477 Sequence<sal_Int8> seqText( nSeqSize );
478 char *pTarget =
reinterpret_cast<char *
>(seqText.getArray());
481 nTargetCount += rtl_convertUnicodeToText(
484 &( puSource[nSourceCount] ),
485 nSourceSize - nSourceCount ,
486 &( pTarget[nTargetCount] ),
487 nSeqSize - nTargetCount,
488 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT |
489 RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT ,
492 nSourceCount += nSrcCvtChars;
494 if( uiInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL ) {
495 nSeqSize = nSeqSize *2;
496 seqText.realloc( nSeqSize );
497 pTarget =
reinterpret_cast<char *
>(seqText.getArray());
504 if( uiInfo & RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL ) {
507 &(puSource[nSourceCount]),
508 (nSourceSize - nSourceCount) *
sizeof(
sal_Unicode ) );
512 seqText.realloc( nTargetCount );
css::uno::Sequence< sal_Int8 > m_seqSource
void init(rtl_TextEncoding encoding)
rtl_TextToUnicodeContext m_contextText2Unicode
rtl_TextToUnicodeConverter m_convText2Unicode
Text2UnicodeConverter(const OString &sEncoding)
css::uno::Sequence< sal_Unicode > convert(const css::uno::Sequence< sal_Int8 > &)
css::uno::Sequence< sal_Int8 > convert(const sal_Unicode *, sal_Int32 nLength)
Unicode2TextConverter(rtl_TextEncoding encoding)
rtl_UnicodeToTextConverter m_convUnicode2Text
rtl_UnicodeToTextContext m_contextUnicode2Text
css::uno::Sequence< sal_Unicode > m_seqSource
css::uno::Reference< css::io::XInputStream > m_in
bool scanForEncoding(css::uno::Sequence< sal_Int8 > &seq)
sal_Int32 readAndConvert(css::uno::Sequence< sal_Int8 > &seq, sal_Int32 nMaxToRead)
void initializeDecoding()
std::unique_ptr< Text2UnicodeConverter > m_pText2Unicode
static void removeEncoding(css::uno::Sequence< sal_Int8 > &seq)
std::unique_ptr< Unicode2TextConverter > m_pUnicode2Text
static bool isEncodingRecognizable(const css::uno::Sequence< sal_Int8 > &seq)