LibreOffice Module svtools (master) 1
|
#include <parhtml.hxx>
Public Member Functions | |
HTMLParser (SvStream &rIn, bool bReadNewDoc=true) | |
virtual SvParserState | CallParser () override |
bool | IsNewDoc () const |
bool | IsInHeader () const |
bool | IsReadListing () const |
bool | IsReadXMP () const |
bool | IsReadPRE () const |
bool | IsReadScript () const |
bool | IsReadStyle () const |
void | StartPRE () |
void | FinishPRE () |
HtmlTokenId | FilterPRE (HtmlTokenId nToken) |
void | StartListing () |
void | FinishListing () |
HtmlTokenId | FilterListing (HtmlTokenId nToken) |
void | StartXMP () |
void | FinishXMP () |
HtmlTokenId | FilterXMP (HtmlTokenId nToken) |
void | FinishTextArea () |
void | FinishPREListingXMP () |
HtmlTokenId | FilterToken (HtmlTokenId nToken) |
void | ReadRawData (const OUString &rEndToken) |
void | UnescapeToken () |
const HTMLOptions & | GetOptions (HtmlOptionId const *pNoConvertToken=nullptr) |
virtual void | Continue (HtmlTokenId nToken) override |
virtual bool | ParseMetaOptions (const css::uno::Reference< css::document::XDocumentProperties > &, SvKeyValueIterator *) |
overriding method must call this implementation! More... | |
void | ParseScriptOptions (OUString &rLangString, std::u16string_view rBaseURL, HTMLScriptLanguage &rLang, OUString &rSrc, OUString &rLibrary, OUString &rModule) |
bool | SetEncodingByHTTPHeader (SvKeyValueIterator *pHTTPHeader) |
![]() | |
SvParser (SvStream &rIn, sal_uInt8 nStackSize=3) | |
virtual SvParserState | CallParser ()=0 |
SvParserState | GetStatus () const |
sal_uInt32 | GetLineNr () const |
sal_uInt32 | GetLinePos () const |
void | IncLineNr () |
sal_uInt32 | IncLinePos () |
void | SetLineNr (sal_uInt32 nlNum) |
void | SetLinePos (sal_uInt32 nlPos) |
sal_uInt32 | GetNextChar () |
void | RereadLookahead () |
bool | IsParserWorking () const |
Link< LinkParamNone *, void > | GetAsynchCallLink () const |
void | SaveState (T nToken) |
void | RestoreState () |
virtual void | Continue (T nToken) |
void | SetSrcEncoding (rtl_TextEncoding eSrcEnc) |
rtl_TextEncoding | GetSrcEncoding () const |
void | SetSwitchToUCS2 (bool bSet) |
bool | IsSwitchToUCS2 () const |
sal_uInt16 | GetCharSize () const |
T | GetSaveToken () const |
![]() | |
SvRefBase () | |
SvRefBase (const SvRefBase &) | |
SvRefBase & | operator= (const SvRefBase &) |
void | RestoreNoDelete () |
void | AddNextRef () |
void | AddFirstRef () |
void | ReleaseRef () |
unsigned int | GetRefCount () const |
Static Public Member Functions | |
static void | RemoveSGMLComment (OUString &rString) |
static bool | InternalImgToPrivateURL (OUString &rURL) |
static rtl_TextEncoding | GetEncodingByHttpHeader (SvKeyValueIterator *pHTTPHeader) |
Protected Member Functions | |
HtmlTokenId | ScanText (const sal_Unicode cBreak=0U) |
HtmlTokenId | GetNextRawToken () |
virtual HtmlTokenId | GetNextToken_ () override |
virtual | ~HTMLParser () override |
void | FinishHeader () |
void | SetNamespace (std::u16string_view rNamespace) |
virtual void | AddMetaUserDefined (OUString const &i_rMetaName) |
template method: called when ParseMetaOptions adds a user-defined meta More... | |
![]() | |
T | SkipToken (short nCnt=-1) |
TokenStackType * | GetStackPtr (short nCnt) |
T | GetNextToken () |
virtual T | GetNextToken_ ()=0 |
virtual void | NextToken (T nToken)=0 |
virtual | ~SvParser () override |
void | ClearTxtConvContext () |
![]() | |
virtual | ~SvRefBase () COVERITY_NOEXCEPT_FALSE |
Static Protected Member Functions | |
static rtl_TextEncoding | GetEncodingByMIME (const OUString &rMime) |
Protected Attributes | |
OUString | sSaveToken |
![]() | |
SvStream & | rInput |
OUStringBuffer | aToken |
sal_uInt32 | nlLineNr |
sal_uInt32 | nlLinePos |
std::unique_ptr< SvParser_Impl< T > > | pImplData |
tools::Long | m_nTokenIndex |
tools::Long | nTokenValue |
bool | bTokenHasValue |
bool | bFuzzing |
SvParserState | eState |
rtl_TextEncoding | eSrcEnc |
sal_uInt64 | nNextChPos |
sal_uInt32 | nNextCh |
bool | bSwitchToUCS2 |
bool | bRTF_InTextRead |
Private Member Functions | |
bool | ParseMetaOptionsImpl (const css::uno::Reference< css::document::XDocumentProperties > &, SvKeyValueIterator *, const HTMLOptions &, rtl_TextEncoding &rEnc) |
parse meta options into XDocumentProperties and encoding More... | |
Private Attributes | |
HTMLOptions | maOptions |
bool | bNewDoc: 1 |
bool | bIsInHeader: 1 |
bool | bReadListing: 1 |
bool | bReadXMP: 1 |
bool | bReadPRE: 1 |
bool | bReadTextArea: 1 |
bool | bReadScript: 1 |
bool | bReadStyle: 1 |
bool | bEndTokenFound: 1 |
bool | bPre_IgnoreNewPara: 1 |
bool | bReadNextChar: 1 |
bool | bReadComment: 1 |
sal_uInt32 | nPre_LinePos |
HtmlTokenId | mnPendingOffToken |
OFF token pending for a <XX.../> ON/OFF ON token. More... | |
OUString | aEndToken |
OUString | maNamespace |
XML namespace, in case of XHTML. More... | |
Definition at line 146 of file parhtml.hxx.
|
overrideprotectedvirtual |
Definition at line 238 of file parhtml.cxx.
HTMLParser::HTMLParser | ( | SvStream & | rIn, |
bool | bReadNewDoc = true |
||
) |
Definition at line 217 of file parhtml.cxx.
References NONE.
|
protectedvirtual |
template method: called when ParseMetaOptions adds a user-defined meta
Definition at line 1959 of file parhtml.cxx.
Referenced by ParseMetaOptionsImpl().
|
overridevirtual |
Implements SvParser< HtmlTokenId >.
Definition at line 269 of file parhtml.cxx.
References bPre_IgnoreNewPara, Continue(), NONE, nPre_LinePos, and Working.
|
overridevirtual |
Definition at line 285 of file parhtml.cxx.
References FilterToken(), NONE, and nToken.
Referenced by CallParser().
HtmlTokenId HTMLParser::FilterListing | ( | HtmlTokenId | nToken | ) |
Definition at line 1855 of file parhtml.cxx.
References bPre_IgnoreNewPara, isOffToken(), NEWPARA, NONBREAKSPACE, NONE, nToken, ONOFF_START, SOFTHYPH, TEXTTOKEN, UNKNOWNCONTROL_OFF, and UNKNOWNCONTROL_ON.
Referenced by FilterToken().
HtmlTokenId HTMLParser::FilterPRE | ( | HtmlTokenId | nToken | ) |
Definition at line 1633 of file parhtml.cxx.
References ABBREVIATION_OFF, ABBREVIATION_ON, aBuf, ACRONYM_OFF, ACRONYM_ON, ADDRESS_OFF, ADDRESS_ON, ANCHOR_OFF, ANCHOR_ON, APPLET_OFF, APPLET_ON, AUTHOR_OFF, AUTHOR_ON, BASEFONT_OFF, BASEFONT_ON, BIGPRINT_OFF, BIGPRINT_ON, BLINK_OFF, BLINK_ON, BLOCKQUOTE_OFF, BLOCKQUOTE_ON, BODY_ON, BOLD_OFF, BOLD_ON, bPre_IgnoreNewPara, CAPTION_OFF, CAPTION_ON, CENTER_OFF, CENTER_ON, CITATION_OFF, CITATION_ON, CODE_OFF, CODE_ON, COL_OFF, COL_ON, COLGROUP_OFF, COLGROUP_ON, DBG_ASSERT, DEFINSTANCE_OFF, DEFINSTANCE_ON, DELETEDTEXT_OFF, DELETEDTEXT_ON, DIVISION_OFF, DIVISION_ON, EMBED, EMPHASIS_OFF, EMPHASIS_ON, FONT_OFF, FONT_ON, FORM_OFF, FORM_ON, HEAD1_OFF, HEAD1_ON, HEAD2_OFF, HEAD2_ON, HEAD3_OFF, HEAD3_ON, HEAD4_OFF, HEAD4_ON, HEAD5_OFF, HEAD5_ON, HEAD6_OFF, HEAD6_ON, HORZRULE, IMAGE, INPUT, INSERTEDTEXT_OFF, INSERTEDTEXT_ON, isOffToken(), ITALIC_OFF, ITALIC_ON, KEYBOARD_OFF, KEYBOARD_ON, LANGUAGE_OFF, LANGUAGE_ON, LINEBREAK, NEWPARA, NONE, nPre_LinePos, nToken, ONOFF_START, OPTION, padToLength(), comphelper::string::padToLength(), PARABREAK_ON, PARAM, PERSON_OFF, PERSON_ON, RAWDATA, SAMPLE_OFF, SAMPLE_ON, SCRIPT_OFF, SCRIPT_ON, SELECT_OFF, SELECT_ON, SHORTQUOTE_OFF, SHORTQUOTE_ON, SMALLPRINT_OFF, SMALLPRINT_ON, SPAN_OFF, SPAN_ON, STRIKE_OFF, STRIKE_ON, STRIKETHROUGH_OFF, STRIKETHROUGH_ON, STRONG_OFF, STRONG_ON, SUBSCRIPT_OFF, SUBSCRIPT_ON, SUPERSCRIPT_OFF, SUPERSCRIPT_ON, TABCHAR, TABLE_OFF, TABLE_ON, TABLEDATA_OFF, TABLEDATA_ON, TABLEHEADER_OFF, TABLEHEADER_ON, TABLEROW_OFF, TABLEROW_ON, TBODY_OFF, TBODY_ON, TELETYPE_OFF, TELETYPE_ON, TEXTAREA_OFF, TEXTAREA_ON, TEXTTOKEN, TFOOT_OFF, TFOOT_ON, THEAD_OFF, THEAD_ON, UNDERLINE_OFF, UNDERLINE_ON, UNKNOWNCONTROL_OFF, UNKNOWNCONTROL_ON, VARIABLE_OFF, and VARIABLE_ON.
Referenced by FilterToken().
HtmlTokenId HTMLParser::FilterToken | ( | HtmlTokenId | nToken | ) |
Definition at line 305 of file parhtml.cxx.
References bIsInHeader, BODY_OFF, BODY_ON, bReadListing, bReadPRE, bReadXMP, FilterListing(), FilterPRE(), FilterXMP(), FinishListing(), FinishPRE(), FinishXMP(), FRAMESET_ON, HEAD_OFF, HEAD_ON, HTML_OFF, LISTING_OFF, LISTING_ON, NONE, nToken, PREFORMTXT_OFF, PREFORMTXT_ON, StartListing(), StartPRE(), StartXMP(), XMP_OFF, and XMP_ON.
Referenced by Continue().
HtmlTokenId HTMLParser::FilterXMP | ( | HtmlTokenId | nToken | ) |
Definition at line 1814 of file parhtml.cxx.
References bPre_IgnoreNewPara, isOffToken(), NEWPARA, NONBREAKSPACE, NONE, nToken, ONOFF_START, SOFTHYPH, sSaveToken, TEXTTOKEN, and UnescapeToken().
Referenced by FilterToken().
|
inlineprotected |
Definition at line 186 of file parhtml.hxx.
|
inline |
Definition at line 209 of file parhtml.hxx.
Referenced by FilterToken().
|
inline |
Definition at line 205 of file parhtml.hxx.
Referenced by FilterToken().
|
inline |
Definition at line 219 of file parhtml.hxx.
|
inline |
Definition at line 216 of file parhtml.hxx.
|
inline |
Definition at line 213 of file parhtml.hxx.
Referenced by FilterToken().
|
static |
Definition at line 2172 of file parhtml.cxx.
References GetEncodingByMIME(), SvKeyValueIterator::GetFirst(), SvKeyValue::GetKey(), SvKeyValueIterator::GetNext(), SvKeyValue::GetValue(), and OOO_STRING_SVTOOLS_HTML_META_content_type.
Referenced by SetEncodingByHTTPHeader().
|
staticprotected |
Definition at line 2154 of file parhtml.cxx.
References GetExtendedCompatibilityTextEncoding(), INetContentTypeParameter::m_sValue, OUStringToOString(), INetContentTypes::parse(), and sType.
Referenced by GetEncodingByHttpHeader(), and ParseMetaOptionsImpl().
|
protected |
Definition at line 775 of file parhtml.cxx.
References aEndToken, bEndTokenFound, bReadComment, bReadScript, bReadStyle, SvStream::eof(), linguistic::IsControlChar(), MAX_LEN(), NONE, nToken, OOO_STRING_SVTOOLS_HTML_body, OOO_STRING_SVTOOLS_HTML_comment, OOO_STRING_SVTOOLS_HTML_head, OOO_STRING_SVTOOLS_HTML_script, OOO_STRING_SVTOOLS_HTML_style, RAWDATA, SvStream::Seek(), and SvStream::Tell().
Referenced by GetNextToken_().
|
overrideprotectedvirtual |
Implements SvParser< HtmlTokenId >.
Definition at line 995 of file parhtml.cxx.
References Accepted, aEndToken, bEndTokenFound, bReadListing, bReadNextChar, bReadPRE, bReadScript, bReadStyle, bReadTextArea, bReadXMP, CDATA, COMMENT, DBG_ASSERT, o3tl::ends_with(), SvStream::eof(), Error, GetHTMLToken(), GetNextRawToken(), INVALID, linguistic::IsControlChar(), LINEBREAK, LINEFEEDCHAR, maNamespace, maOptions, mnPendingOffToken, NEWPARA, NONE, ONOFF_START, Pending, SAL_WARN, ScanText(), SCRIPT_OFF, SCRIPT_ON, SvStream::Seek(), sSaveToken, o3tl::starts_with(), STYLE_OFF, STYLE_ON, TABCHAR, SvStream::Tell(), TEXTAREA_OFF, TEXTAREA_ON, TEXTTOKEN, u, UNKNOWNCONTROL_OFF, UNKNOWNCONTROL_ON, and Working.
const HTMLOptions & HTMLParser::GetOptions | ( | HtmlOptionId const * | pNoConvertToken = nullptr | ) |
Definition at line 1446 of file parhtml.cxx.
References GetHTMLOption(), maOptions, nPos, nToken, SAL_WARN_IF, SCRIPT_END, SCRIPT_START, sName, and UNKNOWN.
Referenced by ParseMetaOptions(), and ParseScriptOptions().
|
static |
Definition at line 1884 of file parhtml.cxx.
References aName, OOO_STRING_SVTOOLS_HTML_INT_ICON_baddata, OOO_STRING_SVTOOLS_HTML_INT_ICON_delayed, OOO_STRING_SVTOOLS_HTML_INT_ICON_embed, OOO_STRING_SVTOOLS_HTML_INT_ICON_insecure, OOO_STRING_SVTOOLS_HTML_INT_ICON_notfound, OOO_STRING_SVTOOLS_HTML_internal_icon, and OOO_STRING_SVTOOLS_HTML_private_image.
|
inline |
Definition at line 196 of file parhtml.hxx.
|
inline |
Definition at line 195 of file parhtml.hxx.
|
inline |
Definition at line 197 of file parhtml.hxx.
|
inline |
Definition at line 199 of file parhtml.hxx.
|
inline |
Definition at line 200 of file parhtml.hxx.
|
inline |
Definition at line 201 of file parhtml.hxx.
|
inline |
Definition at line 198 of file parhtml.hxx.
|
virtual |
overriding method must call this implementation!
Definition at line 2129 of file parhtml.cxx.
References CONTENT, GetExtendedCompatibilityTextEncoding(), GetOptions(), and ParseMetaOptionsImpl().
|
private |
parse meta options into XDocumentProperties and encoding
Definition at line 1963 of file parhtml.cxx.
References AddMetaUserDefined(), aHTMLMetaNameTable, aName, SvKeyValueIterator::Append(), CHARSET, CONTENT, convertLineEnd(), DBG_ASSERT, GetEncodingByMIME(), HTMLOption::GetEnum(), GetExtendedCompatibilityTextEncoding(), HTMLOption::GetString(), GetSystemLineEnd(), HTMLOption::GetToken(), o3tl::getToken(), comphelper::string::getTokenCount(), DateTime::GetUNODateTime(), HTTPEQUIV, i, utl::ISO8601parseDateTime(), NAME, OUStringToOString(), o3tl::saturating_toggle_sign(), o3tl::toInt32(), and o3tl::toInt64().
Referenced by ParseMetaOptions().
void HTMLParser::ParseScriptOptions | ( | OUString & | rLangString, |
std::u16string_view | rBaseURL, | ||
HTMLScriptLanguage & | rLang, | ||
OUString & | rSrc, | ||
OUString & | rLibrary, | ||
OUString & | rModule | ||
) |
Definition at line 36 of file htmlsupp.cxx.
References aScriptLangOptEnums, INetURLObject::GetAbsURL(), HTMLOption::GetEnum(), GetOptions(), HTMLOption::GetString(), HTMLOption::GetToken(), i, JavaScript, LANGUAGE, SDLIBRARY, SDMODULE, SRC, and Unknown.
|
inline |
Definition at line 227 of file parhtml.hxx.
|
static |
Definition at line 81 of file htmlsupp.cxx.
References idx, nPos, and comphelper::string::stripEnd().
|
protected |
Definition at line 382 of file parhtml.cxx.
References bReadListing, bReadNextChar, bReadPRE, bReadTextArea, bReadXMP, DBG_ASSERT, SvStream::eof(), GetHTMLCharName(), i, linguistic::IsControlChar(), MAX_ENTITY_LEN(), MAX_LEN(), NONBREAKSPACE, NONE, nPos, Pending, SvStream::Seek(), SvStream::SeekRel(), SOFTHYPH, SvStream::Tell(), TEXTTOKEN, and U.
Referenced by GetNextToken_().
bool HTMLParser::SetEncodingByHTTPHeader | ( | SvKeyValueIterator * | pHTTPHeader | ) |
Definition at line 2193 of file parhtml.cxx.
References GetEncodingByHttpHeader().
|
protected |
Definition at line 242 of file parhtml.cxx.
References maNamespace.
|
inline |
Definition at line 279 of file parhtml.hxx.
References bPre_IgnoreNewPara, bReadListing, and nPre_LinePos.
Referenced by FilterToken().
|
inline |
Definition at line 272 of file parhtml.hxx.
References bPre_IgnoreNewPara, bReadPRE, and nPre_LinePos.
Referenced by FilterToken().
|
inline |
Definition at line 286 of file parhtml.hxx.
References bPre_IgnoreNewPara, bReadXMP, and nPre_LinePos.
Referenced by FilterToken().
void HTMLParser::UnescapeToken | ( | ) |
|
private |
Definition at line 169 of file parhtml.hxx.
Referenced by GetNextRawToken(), and GetNextToken_().
|
private |
Definition at line 159 of file parhtml.hxx.
Referenced by GetNextRawToken(), and GetNextToken_().
|
private |
Definition at line 152 of file parhtml.hxx.
Referenced by FilterToken().
|
private |
Definition at line 151 of file parhtml.hxx.
|
private |
Definition at line 161 of file parhtml.hxx.
Referenced by CallParser(), FilterListing(), FilterPRE(), FilterXMP(), StartListing(), StartPRE(), and StartXMP().
|
private |
Definition at line 163 of file parhtml.hxx.
Referenced by GetNextRawToken().
|
private |
Definition at line 153 of file parhtml.hxx.
Referenced by FilterToken(), GetNextToken_(), ScanText(), and StartListing().
|
private |
Definition at line 162 of file parhtml.hxx.
Referenced by GetNextToken_(), and ScanText().
|
private |
Definition at line 155 of file parhtml.hxx.
Referenced by FilterToken(), GetNextToken_(), ScanText(), and StartPRE().
|
private |
Definition at line 157 of file parhtml.hxx.
Referenced by GetNextRawToken(), and GetNextToken_().
|
private |
Definition at line 158 of file parhtml.hxx.
Referenced by GetNextRawToken(), and GetNextToken_().
|
private |
Definition at line 156 of file parhtml.hxx.
Referenced by GetNextToken_(), and ScanText().
|
private |
Definition at line 154 of file parhtml.hxx.
Referenced by FilterToken(), GetNextToken_(), ScanText(), and StartXMP().
|
private |
XML namespace, in case of XHTML.
Definition at line 172 of file parhtml.hxx.
Referenced by GetNextToken_(), and SetNamespace().
|
mutableprivate |
Definition at line 149 of file parhtml.hxx.
Referenced by GetNextToken_(), and GetOptions().
|
private |
OFF token pending for a <XX.../> ON/OFF ON token.
Definition at line 167 of file parhtml.hxx.
Referenced by GetNextToken_().
|
private |
Definition at line 165 of file parhtml.hxx.
Referenced by CallParser(), FilterPRE(), StartListing(), StartPRE(), and StartXMP().
|
protected |
Definition at line 175 of file parhtml.hxx.
Referenced by FilterXMP(), and GetNextToken_().