26#include <rtl/ustrbuf.hxx>
27#include <rtl/character.hxx>
28#include <rtl/tencinfo.h>
31#include <tools/datetime.hxx>
35#include <com/sun/star/beans/PropertyAttribute.hpp>
36#include <com/sun/star/document/XDocumentProperties.hpp>
101 : aValue(
std::move(_aValue))
102 , aToken(
std::move(_aToken))
106 "HTMLOption: unknown token" );
116 "GetNumber: Option not numerical" );
118 sal_Int32 nTmp = aTmp.toInt32();
119 return nTmp >= 0 ?
static_cast<sal_uInt32
>(nTmp) : 0;
126 "GetSNumber: Option not numerical" );
128 return aTmp.toInt32();
139 for( sal_Int32
i=0;
i<
aValue.getLength();
i++ )
142 if( c>=
'0' && c<=
'9' )
150 rNumbers.push_back( nNum );
157 rNumbers.push_back( nNum );
164 "GetColor: Option is not a color." );
166 OUString aTmp(
aValue.toAsciiLowerCase());
168 if (!aTmp.isEmpty() && aTmp[0] !=
'#')
175 for (sal_uInt32
i=0;
i<6; ++
i)
182 c =
nPos<aTmp.getLength() ? aTmp[
nPos++] :
'0';
184 c =
nPos<aTmp.getLength() ? aTmp[
nPos++] :
'0';
187 if( c >=
'0' && c <=
'9' )
189 else if( c >=
'a' && c <=
'f' )
190 nColor += (c + 0xa -
'a');
219 bNewDoc(bReadNewDoc),
224 bReadTextArea(false),
227 bEndTokenFound(false),
228 bPre_IgnoreNewPara(false),
229 bReadNextChar(false),
235 SetSrcEncoding(RTL_TEXTENCODING_UTF8);
272 nNextCh = GetNextChar();
278 RefGuard aRefGuard(*
this);
290 while( IsParserWorking() )
298 if( IsParserWorking() )
378constexpr bool HTML_ISPRINTABLE(
sal_Unicode c) {
return c >= 32 && c != 127; }
384 OUStringBuffer sTmpBuffer(
MAX_LEN );
385 bool bContinue =
true;
386 bool bEqSignFound =
false;
387 sal_uInt32 cQuote = 0
U;
389 while( bContinue && IsParserWorking() )
395 bEqSignFound =
false;
397 sTmpBuffer.append(
'&' );
400 sal_uInt64 nStreamPos = rInput.
Tell();
401 sal_uInt32 nLinePos = GetLinePos();
403 sal_uInt32 cChar = 0
U;
404 if(
'#' == (nNextCh = GetNextChar()) )
406 nNextCh = GetNextChar();
407 const bool bIsHex(
'x' == nNextCh );
408 const bool bIsDecOrHex( bIsHex || rtl::isAsciiDigit(nNextCh) );
413 nNextCh = GetNextChar();
414 while ( rtl::isAsciiHexDigit(nNextCh) )
416 cChar = cChar * 16U +
418 ? sal_uInt32( nNextCh -
'0' )
420 ? sal_uInt32( nNextCh -
'A' + 10 )
421 : sal_uInt32( nNextCh -
'a' + 10 ) ) );
422 nNextCh = GetNextChar();
429 cChar = cChar * 10U + sal_uInt32( nNextCh -
'0');
430 nNextCh = GetNextChar();
432 while( rtl::isAsciiDigit(nNextCh) );
435 if( RTL_TEXTENCODING_DONTKNOW != eSrcEnc &&
436 RTL_TEXTENCODING_UCS2 != eSrcEnc &&
437 RTL_TEXTENCODING_UTF8 != eSrcEnc &&
440 const sal_uInt32 convertFlags =
441 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
442 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
443 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT;
445 char cEncodedChar =
static_cast<char>(cChar);
446 cChar = OUString(&cEncodedChar, 1, eSrcEnc, convertFlags).toChar();
452 cChar = cEncodedChar;
459 if (!rtl::isUnicodeCodePoint(cChar)
461 && cChar !=
'\r' && cChar !=
'\n' && cChar !=
'\t'))
466 else if( rtl::isAsciiAlpha( nNextCh ) )
472 sEntityBuffer.appendUtf32( nNextCh );
474 nNextCh = GetNextChar();
479 if( IsParserWorking() && !rInput.
eof() )
481 std::u16string_view sEntity(sEntityBuffer.subView(0,
nPos));
486 if( 0
U == cChar &&
';' != nNextCh )
489 static_cast<sal_uInt64
>(
nPos+1)*GetCharSize(),
490 "UTF-8 is failing here" );
491 for( sal_Int32
i =
nPos-1;
i>1;
i-- )
493 nNextCh = sEntityBuffer[
i];
494 sEntityBuffer.setLength(
i );
495 sEntity = sEntityBuffer.subView(0,
i);
499 rInput.
SeekRel( -
static_cast<sal_Int64
>
500 (
nPos-
i)*GetCharSize() );
501 nlLinePos -= sal_uInt32(
nPos-
i);
503 ClearTxtConvContext();
513 sTmpBuffer.append(
'&' );
516 static_cast<sal_uInt64
>(
nPos+1)*GetCharSize(),
517 "Wrong stream position" );
519 static_cast<sal_uInt32
>(
nPos+1),
520 "Wrong line position" );
521 rInput.
Seek( nStreamPos );
522 nlLinePos = nLinePos;
523 ClearTxtConvContext();
532 if (cChar == 1 || cChar == 2)
546 aToken.append( sTmpBuffer );
547 sTmpBuffer.setLength(0);
549 if( !aToken.isEmpty() )
554 static_cast<sal_uInt64
>(
nPos+1)*GetCharSize(),
555 "Wrong stream position" );
557 static_cast<sal_uInt32
>(
nPos+1),
558 "Wrong line position" );
559 rInput.
Seek( nStreamPos );
560 nlLinePos = nLinePos;
561 ClearTxtConvContext();
568 aToken.append(
" " );
580 else if( IsParserWorking() )
582 sTmpBuffer.append(
'&' );
587 bNextCh = (
';' == nNextCh);
588 if( cBreak==
'>' && (cChar==
'\\' || cChar==
'\'' ||
589 cChar==
'\"' || cChar==
' ') )
596 sTmpBuffer.append(
'\\' );
598 if( IsParserWorking() )
601 sTmpBuffer.appendUtf32( cChar );
607 if( !aToken.isEmpty() || !sTmpBuffer.isEmpty() )
613 rInput.
Seek( nStreamPos - GetCharSize() );
614 nlLinePos = nLinePos-1;
615 ClearTxtConvContext();
623 if(
'>'==cBreak && !cQuote )
625 sTmpBuffer.appendUtf32( nNextCh );
632 sTmpBuffer.append(
'\\' );
634 sTmpBuffer.append(
'\\' );
643 else if( cQuote && (cQuote==nNextCh ) )
646 sTmpBuffer.appendUtf32( nNextCh );
647 bEqSignFound =
false;
659 bEqSignFound =
false;
661 sTmpBuffer.appendUtf32( nNextCh );
670 sTmpBuffer.append(
' ' );
684 sTmpBuffer.appendUtf32( nNextCh );
695 if(
'\t'==nNextCh &&
bReadPRE &&
'>'!=cBreak )
711 sTmpBuffer.appendUtf32( nNextCh );
717 nNextCh = GetNextChar();
720 if( !aToken.isEmpty() || sTmpBuffer.getLength() > 1 )
723 aToken.append( sTmpBuffer );
724 sTmpBuffer.setLength(0);
732 }
while (
' ' == nNextCh ||
'\t' == nNextCh ||
733 '\r' == nNextCh ||
'\n' == nNextCh ||
740 bEqSignFound =
false;
741 if (nNextCh == cBreak && !cQuote)
749 sTmpBuffer.appendUtf32( nNextCh );
752 nNextCh = GetNextChar();
756 if( !sTmpBuffer.isEmpty() )
757 aToken.append( sTmpBuffer );
760 }
while( rtl::isAsciiAlpha( nNextCh ) || rtl::isAsciiDigit( nNextCh ) );
765 if( bContinue && bNextCh )
766 nNextCh = GetNextChar();
769 if( !sTmpBuffer.isEmpty() )
770 aToken.append( sTmpBuffer );
777 OUStringBuffer sTmpBuffer(
MAX_LEN );
792 bool bContinue =
true;
795 while( bContinue && IsParserWorking() )
805 aToken.append( sTmpBuffer );
806 sTmpBuffer.setLength(0);
809 sal_uInt64 nStreamPos = rInput.
Tell();
810 sal_uInt32 nLineNr = GetLineNr();
811 sal_uInt32 nLinePos = GetLinePos();
814 bool bOffState =
false;
815 if(
'/' == (nNextCh = GetNextChar()) )
818 nNextCh = GetNextChar();
820 else if(
'!' == nNextCh )
822 sTmpBuffer.appendUtf32( nNextCh );
823 nNextCh = GetNextChar();
827 while( (rtl::isAsciiAlpha(nNextCh) ||
'-'==nNextCh) &&
828 IsParserWorking() && sTmpBuffer.getLength() <
MAX_LEN )
830 sTmpBuffer.appendUtf32( nNextCh );
831 nNextCh = GetNextChar();
834 OUString aTok( sTmpBuffer.toString() );
835 aTok = aTok.toAsciiLowerCase();
855 if(
bReadComment &&
'>'==nNextCh && aTok.endsWith(
"--" ) )
896 rInput.
Seek( nStreamPos );
897 SetLineNr( nLineNr );
898 SetLinePos( nLinePos );
899 ClearTxtConvContext();
903 sTmpBuffer.setLength( 0 );
908 aToken.append(
"<" );
910 aToken.append(
"/" );
917 sTmpBuffer.appendUtf32( nNextCh );
920 bool bTwoMinus =
false;
921 nNextCh = GetNextChar();
922 while(
'-' == nNextCh && IsParserWorking() )
925 sTmpBuffer.appendUtf32( nNextCh );
926 nNextCh = GetNextChar();
929 if(
'>' == nNextCh && IsParserWorking() && bTwoMinus )
938 nNextCh = GetNextChar();
940 nNextCh = GetNextChar();
945 nNextCh = GetNextChar();
954 if( !aToken.isEmpty() || !sTmpBuffer.isEmpty() )
971 sTmpBuffer.appendUtf32( nNextCh );
976 if( !bContinue && !sTmpBuffer.isEmpty() )
978 aToken.append( sTmpBuffer );
979 sTmpBuffer.setLength(0);
982 if( bContinue && bNextCh )
983 nNextCh = GetNextChar();
986 if( IsParserWorking() )
1005 aToken.setLength( 0 );
1012 if( !IsParserWorking() )
1019 "Read a character despite </SCRIPT> was read?" );
1020 nNextCh = GetNextChar();
1021 if( !IsParserWorking() )
1034 bool bNextCh =
true;
1039 sal_uInt64 nStreamPos = rInput.
Tell();
1040 sal_uInt32 nLineNr = GetLineNr();
1041 sal_uInt32 nLinePos = GetLinePos();
1043 bool bOffState =
false;
1044 if(
'/' == (nNextCh = GetNextChar()) )
1047 nNextCh = GetNextChar();
1050 if (rtl::isAsciiAlpha(nNextCh) || nNextCh ==
'!' || nNextCh ==
'?')
1052 OUStringBuffer sTmpBuffer;
1054 sTmpBuffer.appendUtf32( nNextCh );
1055 nNextCh = GetNextChar();
1056 if (std::u16string_view(sTmpBuffer) ==
u"![CDATA[")
1058 if (bFuzzing && sTmpBuffer.getLength() > 1024)
1060 SAL_WARN(
"svtools",
"abandoning import for performance reasons with long tokens");
1064 }
while(
'>' != nNextCh &&
'/' != nNextCh && !rtl::isAsciiWhiteSpace( nNextCh ) &&
1066 IsParserWorking() && !rInput.
eof() );
1068 if( !sTmpBuffer.isEmpty() )
1070 aToken.append( sTmpBuffer );
1071 sTmpBuffer.setLength(0);
1075 while( rtl::isAsciiWhiteSpace( nNextCh ) && IsParserWorking() )
1076 nNextCh = GetNextChar();
1078 if( !IsParserWorking() )
1087 aToken = aToken.toString().toAsciiLowerCase();
1102 nRet =
static_cast<HtmlTokenId>(
static_cast<int>(nRet) + 1);
1119 aToken.append(
" " );
1120 sal_uInt64 nCStreamPos = 0;
1121 sal_uInt32 nCLineNr = 0;
1122 sal_uInt32 nCLinePos = 0;
1123 sal_Int32 nCStrLen = 0;
1127 sTmpBuffer = aToken;
1128 while( !bDone && !rInput.
eof() && IsParserWorking() )
1134 nCStreamPos = rInput.
Tell();
1135 nCStrLen = sTmpBuffer.getLength();
1136 nCLineNr = GetLineNr();
1137 nCLinePos = GetLinePos();
1139 bDone = sTmpBuffer.getLength() >= 2 && sTmpBuffer[sTmpBuffer.getLength() - 2] ==
'-' && sTmpBuffer[sTmpBuffer.getLength() - 1] ==
'-';
1141 sTmpBuffer.appendUtf32(nNextCh);
1144 || nNextCh ==
'\r' || nNextCh ==
'\n' || nNextCh ==
'\t')
1146 sTmpBuffer.appendUtf32(nNextCh);
1149 nNextCh = GetNextChar();
1151 aToken = sTmpBuffer;
1152 sTmpBuffer.setLength(0);
1153 if( !bDone && IsParserWorking() && nCStreamPos )
1155 rInput.
Seek( nCStreamPos );
1156 SetLineNr( nCLineNr );
1157 SetLinePos( nCLinePos );
1158 ClearTxtConvContext();
1159 aToken.truncate(nCStrLen);
1167 while (!bDone && !rInput.
eof() && IsParserWorking())
1171 if (sTmpBuffer.getLength() >= 2)
1173 bDone = sTmpBuffer[sTmpBuffer.getLength() - 2] ==
']'
1174 && sTmpBuffer[sTmpBuffer.getLength() - 1] ==
']';
1178 sTmpBuffer.setLength(sTmpBuffer.getLength() - 2);
1183 sTmpBuffer.appendUtf32(nNextCh);
1188 sTmpBuffer.appendUtf32(nNextCh);
1192 nNextCh = GetNextChar();
1195 aToken = sTmpBuffer;
1196 sTmpBuffer.setLength(0);
1201 aToken.setLength( 0 );
1205 if(
'>' != nNextCh && IsParserWorking() )
1216 aToken.setLength( aToken.getLength()-1 );
1222 rInput.
Seek( nStreamPos );
1223 SetLineNr( nLineNr );
1224 SetLinePos( nLinePos );
1225 ClearTxtConvContext();
1229 nNextCh = GetNextChar();
1247 rInput.
Seek( nStreamPos );
1248 SetLineNr( nLineNr );
1249 SetLinePos( nLinePos );
1250 ClearTxtConvContext();
1254 nNextCh = GetNextChar();
1260 aToken.setLength( 0 );
1262 else if(
'%' == nNextCh )
1266 sal_uInt64 nCStreamPos = rInput.
Tell();
1267 sal_uInt32 nCLineNr = GetLineNr(), nCLinePos = GetLinePos();
1271 sal_Unicode nLastTokenChar = !aToken.isEmpty() ? aToken[aToken.getLength() - 1] : 0;
1272 OUStringBuffer aTmpBuffer(aToken);
1273 while( !bDone && !rInput.
eof() && IsParserWorking() )
1275 bDone =
'>'==nNextCh && nLastTokenChar ==
'%';
1278 aTmpBuffer.appendUtf32(nNextCh);
1279 nLastTokenChar = aTmpBuffer[aTmpBuffer.getLength() - 1];
1280 nNextCh = GetNextChar();
1283 if( !bDone && IsParserWorking() )
1285 rInput.
Seek( nCStreamPos );
1286 SetLineNr( nCLineNr );
1287 SetLinePos( nCLinePos );
1288 ClearTxtConvContext();
1293 aToken = aTmpBuffer;
1294 aTmpBuffer.setLength(0);
1295 if( IsParserWorking() )
1298 aToken.setLength( 0 );
1310 if( IsParserWorking() )
1312 bNextCh =
'>' == nNextCh;
1371 if( (
'\n' != nNextCh ||
'\r' != c ) &&
1372 (
'\r' != nNextCh ||
'\n' != c ) )
1395 bNextCh = 0 == aToken.getLength();
1409 nNextCh = GetNextChar();
1429 bool bEscape =
false;
1430 while(
nPos < aToken.getLength() )
1432 bool bOldEscape = bEscape;
1434 if(
'\\'==aToken[
nPos] && !bOldEscape )
1436 aToken.remove(
nPos, 1 );
1454 while(
nPos < aToken.getLength() )
1457 if( rtl::isAsciiAlpha( aToken[
nPos] ) )
1461 sal_Int32 nStt =
nPos;
1467 while(
nPos < aToken.getLength() )
1469 cChar = aToken[
nPos];
1470 if (
'=' == cChar ||!HTML_ISPRINTABLE(cChar) || rtl::isAsciiWhiteSpace(cChar) )
1475 OUString
sName( aToken.subView( nStt,
nPos-nStt ) );
1480 "GetOption: unknown HTML option '" <<
sName <<
"'" );
1483 (!pNoConvertToken ||
nToken != *pNoConvertToken);
1485 while(
nPos < aToken.getLength() )
1487 cChar = aToken[
nPos];
1488 if ( HTML_ISPRINTABLE(cChar) && !rtl::isAsciiWhiteSpace(cChar) )
1494 if(
nPos!=aToken.getLength() &&
'='==cChar )
1498 while(
nPos < aToken.getLength() )
1500 cChar = aToken[
nPos];
1501 if ( HTML_ISPRINTABLE(cChar) &&
' ' != cChar &&
'\t' != cChar &&
'\r' != cChar &&
'\n' != cChar )
1506 if(
nPos != aToken.getLength() )
1510 if( (
'"'==cChar) ||
'\''==cChar )
1515 bool bEscape =
false;
1516 while(
nPos < aToken.getLength() && !bDone )
1518 bool bOldEscape = bEscape;
1520 cChar = aToken[
nPos];
1526 aToken.remove(
nPos, 1 );
1541 aToken.remove(
nPos, 1 );
1547 bDone = !bOldEscape && cChar==cEnd;
1560 if(
nPos!=aToken.getLength() )
1566 bool bEscape =
false;
1568 while(
nPos < aToken.getLength() && !bDone )
1570 bool bOldEscape = bEscape;
1576 bDone = !bOldEscape;
1598 aToken.remove(
nPos, 1 );
1604 if( HTML_ISPRINTABLE( c ) )
1617 aValue = aToken.subView( nStt, nLen );
1651 DBG_ASSERT( aToken.isEmpty(),
"Why is the token not empty?" );
1652 if (aToken.getLength() < nSpaces)
1655 OUStringBuffer
aBuf(aToken);
1836 if( !aToken.isEmpty() )
1844 aToken.append(
">" );
1886 bool bFound =
false;
1890 OUString
aName( rURL.copy(14) );
1912 OUString sTmp ( rURL );
1922enum class HtmlMeta {
1955 {
nullptr, HtmlMeta(0) }
1964 const uno::Reference<document::XDocumentProperties> & i_xDocProps,
1967 rtl_TextEncoding& o_rEnc )
1969 OUString
aName, aContent;
1970 HtmlMeta nAction = HtmlMeta::NONE;
1971 bool bHTTPEquiv =
false, bChanged =
false;
1973 for (
size_t i = aOptions.size();
i; )
1980 if ( HtmlMeta::NONE==nAction )
2003 if ( bHTTPEquiv || HtmlMeta::Description != nAction )
2006 aContent = aContent.replaceAll(
"\r",
"").replaceAll(
"\n",
"");
2014 if ( bHTTPEquiv && i_pHTTPHeader )
2017 if ( aContent.endsWith(
"\"") )
2019 aContent = aContent.copy( 0, aContent.getLength() - 1 );
2022 i_pHTTPHeader->
Append( aKeyValue );
2027 case HtmlMeta::Author:
2028 if (i_xDocProps.is()) {
2029 i_xDocProps->setAuthor( aContent );
2033 case HtmlMeta::Description:
2034 if (i_xDocProps.is()) {
2035 i_xDocProps->setDescription( aContent );
2039 case HtmlMeta::Keywords:
2040 if (i_xDocProps.is()) {
2041 i_xDocProps->setKeywords(
2042 ::comphelper::string::convertCommaSeparated(aContent));
2046 case HtmlMeta::Classification:
2047 if (i_xDocProps.is()) {
2048 i_xDocProps->setSubject( aContent );
2053 case HtmlMeta::ChangedBy:
2054 if (i_xDocProps.is()) {
2055 i_xDocProps->setModifiedBy( aContent );
2060 case HtmlMeta::Created:
2061 case HtmlMeta::Changed:
2062 if (i_xDocProps.is() && !aContent.isEmpty())
2064 ::util::DateTime uDT;
2068 sal_Int32 nIdx{ 0 };
2084 if (HtmlMeta::Created == nAction)
2085 i_xDocProps->setCreationDate(uDT);
2087 i_xDocProps->setModificationDate(uDT);
2092 case HtmlMeta::Refresh:
2093 DBG_ASSERT( !bHTTPEquiv || i_pHTTPHeader,
"Lost Reload-URL because of omitted MUST change." );
2096 case HtmlMeta::ContentType:
2097 if ( !aContent.isEmpty() )
2103 case HtmlMeta::NONE:
2106 if (i_xDocProps.is())
2108 uno::Reference<beans::XPropertyContainer> xUDProps
2109 = i_xDocProps->getUserDefinedProperties();
2111 xUDProps->addProperty(
aName,
2112 beans::PropertyAttribute::REMOVABLE,
2116 }
catch (uno::Exception &) {
2130 const uno::Reference<document::XDocumentProperties> & i_xDocProps,
2134 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
2143 if (RTL_TEXTENCODING_DONTKNOW != eEnc &&
2144 rtl_isOctetTextEncoding( eEnc ) &&
2145 rtl_isOctetTextEncoding( GetSrcEncoding() ) )
2148 SetSrcEncoding( eEnc );
2161 auto const iter = aParameters.find(
"charset");
2162 if (iter != aParameters.end())
2169 return RTL_TEXTENCODING_DONTKNOW;
2174 rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
2178 for(
bool bCont = pHTTPHeader->
GetFirst( aKV ); bCont;
2179 bCont = pHTTPHeader->
GetNext( aKV ) )
2197 if(RTL_TEXTENCODING_DONTKNOW != eEnc)
2199 SetSrcEncoding( eEnc );
void SetGreen(sal_uInt8 nGreen)
void SetRed(sal_uInt8 nRed)
void SetBlue(sal_uInt8 nBlue)
css::util::DateTime GetUNODateTime() const
Representation of an HTML option (=attribute in a start tag).
HtmlOptionId GetToken() const
HTMLTableFrame GetTableFrame() const
void GetNumbers(std::vector< sal_uInt32 > &rNumbers) const
const OUString & GetString() const
sal_Int32 GetSNumber() const
EnumT GetEnum(const HTMLOptionEnum< EnumT > *pOptEnums, EnumT nDflt=static_cast< EnumT >(0)) const
HTMLInputType GetInputType() const
void GetColor(Color &) const
HTMLOption(HtmlOptionId nTyp, OUString aToken, OUString aValue)
sal_uInt32 GetNumber() const
HTMLTableRules GetTableRules() const
HtmlTokenId mnPendingOffToken
OFF token pending for a <XX.../> ON/OFF ON token.
void SetNamespace(std::u16string_view rNamespace)
HtmlTokenId FilterXMP(HtmlTokenId nToken)
virtual ~HTMLParser() override
HtmlTokenId FilterListing(HtmlTokenId nToken)
static rtl_TextEncoding GetEncodingByHttpHeader(SvKeyValueIterator *pHTTPHeader)
virtual void AddMetaUserDefined(OUString const &i_rMetaName)
template method: called when ParseMetaOptions adds a user-defined meta
static rtl_TextEncoding GetEncodingByMIME(const OUString &rMime)
virtual SvParserState CallParser() override
HtmlTokenId FilterPRE(HtmlTokenId nToken)
virtual bool ParseMetaOptions(const css::uno::Reference< css::document::XDocumentProperties > &, SvKeyValueIterator *)
overriding method must call this implementation!
HtmlTokenId GetNextRawToken()
bool SetEncodingByHTTPHeader(SvKeyValueIterator *pHTTPHeader)
virtual void Continue(HtmlTokenId nToken) override
OUString maNamespace
XML namespace, in case of XHTML.
HtmlTokenId ScanText(const sal_Unicode cBreak=0U)
HtmlTokenId FilterToken(HtmlTokenId nToken)
virtual HtmlTokenId GetNextToken_() override
bool ParseMetaOptionsImpl(const css::uno::Reference< css::document::XDocumentProperties > &, SvKeyValueIterator *, const HTMLOptions &, rtl_TextEncoding &rEnc)
parse meta options into XDocumentProperties and encoding
static bool InternalImgToPrivateURL(OUString &rURL)
const HTMLOptions & GetOptions(HtmlOptionId const *pNoConvertToken=nullptr)
HTMLParser(SvStream &rIn, bool bReadNewDoc=true)
static bool parse(OUString const &rMediaType, OUString &rType, OUString &rSubType, INetContentTypeParameterList *pParameters=nullptr)
virtual void Append(const SvKeyValue &rKeyVal)
virtual bool GetFirst(SvKeyValue &rKeyVal)
Operation.
virtual bool GetNext(SvKeyValue &rKeyVal)
const OUString & GetKey() const
Operation.
const OUString & GetValue() const
sal_uInt64 Seek(sal_uInt64 nPos)
sal_uInt64 SeekRel(sal_Int64 nPos)
#define DBG_ASSERT(sCon, aError)
FastSaxParserImpl & m_rParser
HtmlTokenId GetHTMLToken(std::u16string_view rName)
sal_Unicode GetHTMLCharName(std::u16string_view rName)
HtmlOptionId GetHTMLOption(std::u16string_view rName)
sal_uInt32 GetHTMLColor(const OUString &rName)
#define OOO_STRING_SVTOOLS_HTML_IT_checkbox
#define OOO_STRING_SVTOOLS_HTML_head
#define OOO_STRING_SVTOOLS_HTML_META_changed
#define OOO_STRING_SVTOOLS_HTML_IT_password
#define OOO_STRING_SVTOOLS_HTML_body
#define OOO_STRING_SVTOOLS_HTML_comment
#define OOO_STRING_SVTOOLS_HTML_IT_button
#define OOO_STRING_SVTOOLS_HTML_TR_none
#define OOO_STRING_SVTOOLS_HTML_INT_ICON_notfound
#define OOO_STRING_SVTOOLS_HTML_TR_groups
#define OOO_STRING_SVTOOLS_HTML_TF_void
#define OOO_STRING_SVTOOLS_HTML_META_sdfootnote
#define OOO_STRING_SVTOOLS_HTML_IT_reset
#define OOO_STRING_SVTOOLS_HTML_IT_submit
#define OOO_STRING_SVTOOLS_HTML_IT_file
#define OOO_STRING_SVTOOLS_HTML_style
#define OOO_STRING_SVTOOLS_HTML_META_sdendnote
#define OOO_STRING_SVTOOLS_HTML_TR_rows
#define OOO_STRING_SVTOOLS_HTML_META_created
#define OOO_STRING_SVTOOLS_HTML_IT_image
#define OOO_STRING_SVTOOLS_HTML_TR_all
#define OOO_STRING_SVTOOLS_HTML_IT_hidden
#define OOO_STRING_SVTOOLS_HTML_TF_below
#define OOO_STRING_SVTOOLS_HTML_META_refresh
#define OOO_STRING_SVTOOLS_HTML_private_image
#define OOO_STRING_SVTOOLS_HTML_TR_cols
#define OOO_STRING_SVTOOLS_HTML_INT_ICON_embed
#define OOO_STRING_SVTOOLS_HTML_TF_above
#define OOO_STRING_SVTOOLS_HTML_TF_lhs
#define OOO_STRING_SVTOOLS_HTML_INT_ICON_baddata
#define OOO_STRING_SVTOOLS_HTML_TF_vsides
#define OOO_STRING_SVTOOLS_HTML_INT_ICON_insecure
#define OOO_STRING_SVTOOLS_HTML_TF_border
#define OOO_STRING_SVTOOLS_HTML_IT_text
#define OOO_STRING_SVTOOLS_HTML_internal_icon
#define OOO_STRING_SVTOOLS_HTML_META_keywords
#define OOO_STRING_SVTOOLS_HTML_IT_scribble
#define OOO_STRING_SVTOOLS_HTML_META_description
#define OOO_STRING_SVTOOLS_HTML_META_author
#define OOO_STRING_SVTOOLS_HTML_script
#define OOO_STRING_SVTOOLS_HTML_TF_hsides
#define OOO_STRING_SVTOOLS_HTML_META_changedby
#define OOO_STRING_SVTOOLS_HTML_INT_ICON_delayed
#define OOO_STRING_SVTOOLS_HTML_META_content_type
#define OOO_STRING_SVTOOLS_HTML_IT_radio
#define OOO_STRING_SVTOOLS_HTML_IT_range
#define OOO_STRING_SVTOOLS_HTML_META_classification
#define OOO_STRING_SVTOOLS_HTML_META_generator
#define OOO_STRING_SVTOOLS_HTML_TF_rhs
#define OOO_STRING_SVTOOLS_HTML_TF_box
constexpr bool isOffToken(HtmlTokenId nToken)
std::unordered_map< OString, INetContentTypeParameter > INetContentTypeParameterList
LineEnd GetSystemLineEnd()
TOOLS_DLLPUBLIC OString convertLineEnd(const OString &rIn, LineEnd eLineEnd)
#define SAL_WARN_IF(condition, area, stream)
#define SAL_WARN(area, stream)
B & padToLength(B &rBuffer, sal_Int32 nLen, U cFill)
OStringBuffer & padToLength(OStringBuffer &rBuffer, sal_Int32 nLength, char cFill='\0')
OString stripStart(const OString &rIn, char c)
sal_Int32 getTokenCount(std::string_view rIn, char cTok)
bool IsControlChar(sal_Unicode cChar)
std::enable_if< std::is_signed< T >::value, T >::type saturating_toggle_sign(T a)
constexpr bool ends_with(std::basic_string_view< charT, traits > sv, std::basic_string_view< charT, traits > x) noexcept
sal_Int32 toInt32(std::u16string_view str, sal_Int16 radix=10)
sal_Int64 toInt64(std::u16string_view str, sal_Int16 radix=10)
constexpr bool starts_with(std::basic_string_view< charT, traits > sv, std::basic_string_view< charT, traits > x) noexcept
std::basic_string_view< charT, traits > getToken(std::basic_string_view< charT, traits > sv, charT delimiter, std::size_t &position)
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
bool ISO8601parseDateTime(std::u16string_view rString, css::util::DateTime &rDateTime)
HTMLOptionEnum< HTMLInputType > const aInputTypeOptEnums[]
HTMLOptionEnum< HTMLTableRules > const aTableRulesOptEnums[]
HTMLOptionEnum< HTMLTableFrame > const aTableFrameOptEnums[]
const sal_Int32 MAX_ENTITY_LEN(8)
HTMLOptionEnum< HtmlMeta > const aHTMLMetaNameTable[]
const sal_Int32 MAX_LEN(1024)
::std::vector< HTMLOption > HTMLOptions
TOOLS_DLLPUBLIC rtl_TextEncoding GetExtendedCompatibilityTextEncoding(rtl_TextEncoding eEncoding)