24#include <rtl/textcvt.h>
25#include <rtl/tencinfo.h>
26#include <rtl/character.hxx>
28#include <unicode/ucsdet.h>
47 rtl_TextToUnicodeConverter
hConv;
56 ,
nToken(static_cast<T>(0))
60 ,
hContext( reinterpret_cast<rtl_TextToUnicodeContext>(1) )
70 , bTokenHasValue(false)
71 , nTokenId(static_cast<T>(0))
81 , pImplData( nullptr )
84 , bTokenHasValue( false )
85 , bFuzzing(
utl::ConfigManager::IsFuzzing())
87 , eSrcEnc( RTL_TEXTENCODING_DONTKNOW )
90 , bSwitchToUCS2(false)
91 , bRTF_InTextRead(false)
92 , nTokenStackSize( nStackSize )
96 if( nTokenStackSize < 3 )
99 pTokenStackPos = pTokenStack.get();
105 if( pImplData && pImplData->hConv )
107 rtl_destroyTextToUnicodeContext( pImplData->hConv,
108 pImplData->hContext );
109 rtl_destroyTextToUnicodeConverter( pImplData->hConv );
126template<
typename T> sal_uInt16
SvParser<T>::GetCharSize()
const {
return (RTL_TEXTENCODING_UCS2 == eSrcEnc) ? 2 : 1; }
135 if( pImplData && pImplData->hConv )
136 rtl_resetTextToUnicodeContext( pImplData->hConv, pImplData->hContext );
142 if( eEnc == eSrcEnc )
145 if( pImplData && pImplData->hConv )
147 rtl_destroyTextToUnicodeContext( pImplData->hConv,
148 pImplData->hContext );
149 rtl_destroyTextToUnicodeConverter( pImplData->hConv );
150 pImplData->hConv =
nullptr;
151 pImplData->hContext =
reinterpret_cast<rtl_TextToUnicodeContext
>(1);
154 if( rtl_isOctetTextEncoding(eEnc) ||
155 RTL_TEXTENCODING_UCS2 == eEnc )
160 pImplData->hConv = rtl_createTextToUnicodeConverter( eSrcEnc );
162 "SvParser::SetSrcEncoding: no converter for source encoding" );
163 if( !pImplData->hConv )
164 eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
166 pImplData->hContext =
167 rtl_createTextToUnicodeContext( pImplData->hConv );
172 "SvParser::SetSrcEncoding: invalid source encoding" );
173 eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
180 rInput.
Seek(nNextChPos);
181 nNextCh = GetNextChar();
192 if( bSwitchToUCS2 && 0 == rInput.
Tell() )
199 eSrcEnc = RTL_TEXTENCODING_UCS2;
201 SetSrcEncoding(RTL_TEXTENCODING_UTF8);
204 std::vector<char> buf(65535);
205 const size_t nSize = rInput.
ReadBytes(buf.data(), buf.size());
209 UErrorCode uerr = U_ZERO_ERROR;
210 UCharsetDetector* ucd = ucsdet_open(&uerr);
211 ucsdet_setText(ucd, buf.data(), nSize, &uerr);
212 if (
const UCharsetMatch* match = ucsdet_detect(ucd, &uerr))
214 const char* pEncodingName = ucsdet_getName(match, &uerr);
218 if (strcmp(
"UTF-8", pEncodingName) == 0)
220 SetSrcEncoding(RTL_TEXTENCODING_UTF8);
222 else if (strcmp(
"UTF-16LE", pEncodingName) == 0)
224 eSrcEnc = RTL_TEXTENCODING_UCS2;
225 rInput.
SetEndian(SvStreamEndian::LITTLE);
227 else if (strcmp(
"UTF-16BE", pEncodingName) == 0)
229 eSrcEnc = RTL_TEXTENCODING_UCS2;
239 bSwitchToUCS2 =
false;
243 nNextChPos = rInput.
Tell();
245 if( RTL_TEXTENCODING_UCS2 == eSrcEnc )
249 bErr = !rInput.
good();
253 if (rtl::isHighSurrogate(cUC))
255 const sal_uInt64
nPos = rInput.
Tell();
257 if (rtl::isLowSurrogate(cUC))
258 c = rtl::combineSurrogates(c, cUC);
271 bErr = !rInput.
good();
275 RTL_TEXTENCODING_DONTKNOW == eSrcEnc ||
276 RTL_TEXTENCODING_SYMBOL == eSrcEnc
280 c =
reinterpret_cast<unsigned char&
>( c1 );
285 assert(pImplData && pImplData->hConv &&
"no text converter!");
288 sal_uInt32 nInfo = 0;
290 nChars = rtl_convertTextToUnicode(
291 pImplData->hConv, pImplData->hContext,
293 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
294 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
295 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
297 if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 )
301 if( pImplData->hContext !=
reinterpret_cast<rtl_TextToUnicodeContext
>(1) )
304 while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 )
307 bErr = !rInput.
good();
311 nChars = rtl_convertTextToUnicode(
312 pImplData->hConv, pImplData->hContext,
314 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
315 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
316 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
321 if( 1 == nChars && 0 == nInfo )
323 c = sal_uInt32( sCh[0] );
325 else if( 2 == nChars && 0 == nInfo )
327 c = rtl::combineSurrogates( sCh[0], sCh[1] );
329 else if( 0 != nChars || 0 != nInfo )
331 DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0,
332 "source buffer is too small" );
333 DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0,
334 "there is a conversion error" );
336 "there is a converted character, but an error" );
349 while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 &&
353 bErr = !rInput.
good();
357 sBuffer[nLen++] = c1;
358 nChars = rtl_convertTextToUnicode(
359 pImplData->hConv,
nullptr, sBuffer, nLen, &cUC, 1,
360 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
361 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
362 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
367 if( 1 == nChars && 0 == nInfo )
370 "no all bytes have been converted!" );
375 DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0,
376 "source buffer is too small" );
377 DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0,
378 "there is a conversion error" );
380 "there is a converted character, but an error" );
384 c =
reinterpret_cast<unsigned char&
>( sBuffer[0] );
391 else if( 1 == nChars && 0 == nInfo )
395 "no all bytes have been converted!" );
398 else if( 0 != nChars || 0 != nInfo )
401 "there is a converted character, but an error" );
403 "there is no converted character and no error" );
406 c =
reinterpret_cast<unsigned char&
>( c1 );
413 while( 0 == nChars && !bErr );
416 if ( ! rtl::isUnicodeScalarValue( c ) )
421 if( ERRCODE_IO_PENDING == rInput.
GetError() )
444 T nRet =
static_cast<T
>(0);
446 if( !nTokenStackPos )
448 aToken.setLength( 0 );
450 bTokenHasValue =
false;
452 nRet = GetNextToken_();
458 if( pTokenStackPos == pTokenStack.get() + nTokenStackSize )
459 pTokenStackPos = pTokenStack.get();
467 aToken = pTokenStackPos->
sToken;
474 pTokenStackPos->
sToken = aToken;
489 pTokenStackPos = GetStackPtr( nCnt );
490 short nTmp = nTokenStackPos - nCnt;
493 else if( nTmp > nTokenStackSize )
494 nTmp = nTokenStackSize;
497 m_nTokenIndex -= nTmp;
500 aToken = pTokenStackPos->
sToken;
513 if( nCnt >= nTokenStackSize )
514 nCnt = (nTokenStackSize-1);
515 if( nCurrentPos + nCnt < nTokenStackSize )
516 nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt);
518 nCurrentPos = sal::static_int_cast< sal_uInt8 >(
519 nCurrentPos + (nCnt - nTokenStackSize));
523 if( -nCnt >= nTokenStackSize )
524 nCnt = -nTokenStackSize+1;
525 if( -nCnt <= nCurrentPos )
526 nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt);
528 nCurrentPos = sal::static_int_cast< sal_uInt8 >(
529 nCurrentPos + (nCnt + nTokenStackSize));
531 return pTokenStack.get() + nCurrentPos;
539 return pImplData ? pImplData->nSaveToken :
static_cast<T
>(0);
549 pImplData->nSaveToken =
static_cast<T
>(0);
552 pImplData->nFilePos = rInput.
Tell();
553 pImplData->nToken =
nToken;
555 pImplData->aToken = aToken;
556 pImplData->nlLineNr = nlLineNr;
557 pImplData->nlLinePos = nlLinePos;
558 pImplData->nTokenValue= nTokenValue;
559 pImplData->bTokenHasValue = bTokenHasValue;
560 pImplData->nNextCh = nNextCh;
570 if( ERRCODE_IO_PENDING == rInput.
GetError() )
572 aToken = pImplData->aToken;
573 nlLineNr = pImplData->nlLineNr;
574 nlLinePos = pImplData->nlLinePos;
575 nTokenValue= pImplData->nTokenValue;
576 bTokenHasValue=pImplData->bTokenHasValue;
577 nNextCh = pImplData->nNextCh;
579 pImplData->nSaveToken = pImplData->nToken;
581 rInput.
Seek( pImplData->nFilePos );
595 return static_cast<SvParser<T> *
>(instance)->NewDataRead(data);
606 Continue( pImplData->nToken );
608 if( ERRCODE_IO_PENDING == rInput.
GetError() )
670 mpImpl->maList.push_back(rKeyVal);
virtual void Append(const SvKeyValue &rKeyVal)
std::unique_ptr< Impl > mpImpl
virtual ~SvKeyValueIterator() override
virtual bool GetFirst(SvKeyValue &rKeyVal)
Operation.
SvKeyValueIterator()
Construction/Destruction.
virtual bool GetNext(SvKeyValue &rKeyVal)
virtual void ResetError()
void StartReadingUnicodeText(rtl_TextEncoding eReadBomCharSet)
void SetEndian(SvStreamEndian SvStreamEndian)
SvStream & ReadUtf16(sal_Unicode &rUtf16)
SvStream & ReadChar(char &rChar)
sal_uInt64 Seek(sal_uInt64 nPos)
std::size_t ReadBytes(void *pData, std::size_t nSize)
sal_uInt64 SeekRel(sal_Int64 nPos)
#define DBG_ASSERT(sCon, aError)
#define LINK(Instance, Class, Member)
#define SAL_WARN(area, stream)
SvKeyValueList_Impl maList
rtl_TextToUnicodeConverter hConv
rtl_TextToUnicodeContext hContext
std::vector< SvKeyValue > SvKeyValueList_Impl