LibreOffice Module svtools (master) 1
Public Member Functions | Static Public Member Functions | Protected Member Functions | Static Protected Member Functions | Protected Attributes | Private Member Functions | Private Attributes | List of all members
HTMLParser Class Reference

#include <parhtml.hxx>

Inheritance diagram for HTMLParser:
[legend]
Collaboration diagram for HTMLParser:
[legend]

Public Member Functions

 HTMLParser (SvStream &rIn, bool bReadNewDoc=true)
 
virtual SvParserState CallParser () override
 
bool IsNewDoc () const
 
bool IsInHeader () const
 
bool IsReadListing () const
 
bool IsReadXMP () const
 
bool IsReadPRE () const
 
bool IsReadScript () const
 
bool IsReadStyle () const
 
void StartPRE ()
 
void FinishPRE ()
 
HtmlTokenId FilterPRE (HtmlTokenId nToken)
 
void StartListing ()
 
void FinishListing ()
 
HtmlTokenId FilterListing (HtmlTokenId nToken)
 
void StartXMP ()
 
void FinishXMP ()
 
HtmlTokenId FilterXMP (HtmlTokenId nToken)
 
void FinishTextArea ()
 
void FinishPREListingXMP ()
 
HtmlTokenId FilterToken (HtmlTokenId nToken)
 
void ReadRawData (const OUString &rEndToken)
 
void UnescapeToken ()
 
const HTMLOptionsGetOptions (HtmlOptionId const *pNoConvertToken=nullptr)
 
virtual void Continue (HtmlTokenId nToken) override
 
virtual bool ParseMetaOptions (const css::uno::Reference< css::document::XDocumentProperties > &, SvKeyValueIterator *)
 overriding method must call this implementation! More...
 
void ParseScriptOptions (OUString &rLangString, std::u16string_view rBaseURL, HTMLScriptLanguage &rLang, OUString &rSrc, OUString &rLibrary, OUString &rModule)
 
bool SetEncodingByHTTPHeader (SvKeyValueIterator *pHTTPHeader)
 
- Public Member Functions inherited from SvParser< HtmlTokenId >
 SvParser (SvStream &rIn, sal_uInt8 nStackSize=3)
 
virtual SvParserState CallParser ()=0
 
SvParserState GetStatus () const
 
sal_uInt32 GetLineNr () const
 
sal_uInt32 GetLinePos () const
 
void IncLineNr ()
 
sal_uInt32 IncLinePos ()
 
void SetLineNr (sal_uInt32 nlNum)
 
void SetLinePos (sal_uInt32 nlPos)
 
sal_uInt32 GetNextChar ()
 
void RereadLookahead ()
 
bool IsParserWorking () const
 
Link< LinkParamNone *, void > GetAsynchCallLink () const
 
void SaveState (T nToken)
 
void RestoreState ()
 
virtual void Continue (T nToken)
 
void SetSrcEncoding (rtl_TextEncoding eSrcEnc)
 
rtl_TextEncoding GetSrcEncoding () const
 
void SetSwitchToUCS2 (bool bSet)
 
bool IsSwitchToUCS2 () const
 
sal_uInt16 GetCharSize () const
 
GetSaveToken () const
 
- Public Member Functions inherited from SvRefBase
 SvRefBase ()
 
 SvRefBase (const SvRefBase &)
 
SvRefBaseoperator= (const SvRefBase &)
 
void RestoreNoDelete ()
 
void AddNextRef ()
 
void AddFirstRef ()
 
void ReleaseRef ()
 
unsigned int GetRefCount () const
 

Static Public Member Functions

static void RemoveSGMLComment (OUString &rString)
 
static bool InternalImgToPrivateURL (OUString &rURL)
 
static rtl_TextEncoding GetEncodingByHttpHeader (SvKeyValueIterator *pHTTPHeader)
 

Protected Member Functions

HtmlTokenId ScanText (const sal_Unicode cBreak=0U)
 
HtmlTokenId GetNextRawToken ()
 
virtual HtmlTokenId GetNextToken_ () override
 
virtual ~HTMLParser () override
 
void FinishHeader ()
 
void SetNamespace (std::u16string_view rNamespace)
 
virtual void AddMetaUserDefined (OUString const &i_rMetaName)
 template method: called when ParseMetaOptions adds a user-defined meta More...
 
- Protected Member Functions inherited from SvParser< HtmlTokenId >
SkipToken (short nCnt=-1)
 
TokenStackType * GetStackPtr (short nCnt)
 
GetNextToken ()
 
virtual T GetNextToken_ ()=0
 
virtual void NextToken (T nToken)=0
 
virtual ~SvParser () override
 
void ClearTxtConvContext ()
 
- Protected Member Functions inherited from SvRefBase
virtual ~SvRefBase () COVERITY_NOEXCEPT_FALSE
 

Static Protected Member Functions

static rtl_TextEncoding GetEncodingByMIME (const OUString &rMime)
 

Protected Attributes

OUString sSaveToken
 
- Protected Attributes inherited from SvParser< HtmlTokenId >
SvStreamrInput
 
OUStringBuffer aToken
 
sal_uInt32 nlLineNr
 
sal_uInt32 nlLinePos
 
std::unique_ptr< SvParser_Impl< T > > pImplData
 
tools::Long m_nTokenIndex
 
tools::Long nTokenValue
 
bool bTokenHasValue
 
bool bFuzzing
 
SvParserState eState
 
rtl_TextEncoding eSrcEnc
 
sal_uInt64 nNextChPos
 
sal_uInt32 nNextCh
 
bool bSwitchToUCS2
 
bool bRTF_InTextRead
 

Private Member Functions

bool ParseMetaOptionsImpl (const css::uno::Reference< css::document::XDocumentProperties > &, SvKeyValueIterator *, const HTMLOptions &, rtl_TextEncoding &rEnc)
 parse meta options into XDocumentProperties and encoding More...
 

Private Attributes

HTMLOptions maOptions
 
bool bNewDoc: 1
 
bool bIsInHeader: 1
 
bool bReadListing: 1
 
bool bReadXMP: 1
 
bool bReadPRE: 1
 
bool bReadTextArea: 1
 
bool bReadScript: 1
 
bool bReadStyle: 1
 
bool bEndTokenFound: 1
 
bool bPre_IgnoreNewPara: 1
 
bool bReadNextChar: 1
 
bool bReadComment: 1
 
sal_uInt32 nPre_LinePos
 
HtmlTokenId mnPendingOffToken
 OFF token pending for a <XX.../> ON/OFF ON token. More...
 
OUString aEndToken
 
OUString maNamespace
 XML namespace, in case of XHTML. More...
 

Detailed Description

Definition at line 146 of file parhtml.hxx.

Constructor & Destructor Documentation

◆ ~HTMLParser()

HTMLParser::~HTMLParser ( )
overrideprotectedvirtual

Definition at line 238 of file parhtml.cxx.

◆ HTMLParser()

HTMLParser::HTMLParser ( SvStream rIn,
bool  bReadNewDoc = true 
)

Definition at line 217 of file parhtml.cxx.

References NONE.

Member Function Documentation

◆ AddMetaUserDefined()

void HTMLParser::AddMetaUserDefined ( OUString const &  i_rMetaName)
protectedvirtual

template method: called when ParseMetaOptions adds a user-defined meta

Definition at line 1959 of file parhtml.cxx.

Referenced by ParseMetaOptionsImpl().

◆ CallParser()

SvParserState HTMLParser::CallParser ( )
overridevirtual

Implements SvParser< HtmlTokenId >.

Definition at line 269 of file parhtml.cxx.

References bPre_IgnoreNewPara, Continue(), NONE, nPre_LinePos, and Working.

◆ Continue()

void HTMLParser::Continue ( HtmlTokenId  nToken)
overridevirtual

Definition at line 285 of file parhtml.cxx.

References FilterToken(), NONE, and nToken.

Referenced by CallParser().

◆ FilterListing()

HtmlTokenId HTMLParser::FilterListing ( HtmlTokenId  nToken)

◆ FilterPRE()

HtmlTokenId HTMLParser::FilterPRE ( HtmlTokenId  nToken)

Definition at line 1633 of file parhtml.cxx.

References ABBREVIATION_OFF, ABBREVIATION_ON, aBuf, ACRONYM_OFF, ACRONYM_ON, ADDRESS_OFF, ADDRESS_ON, ANCHOR_OFF, ANCHOR_ON, APPLET_OFF, APPLET_ON, AUTHOR_OFF, AUTHOR_ON, BASEFONT_OFF, BASEFONT_ON, BIGPRINT_OFF, BIGPRINT_ON, BLINK_OFF, BLINK_ON, BLOCKQUOTE_OFF, BLOCKQUOTE_ON, BODY_ON, BOLD_OFF, BOLD_ON, bPre_IgnoreNewPara, CAPTION_OFF, CAPTION_ON, CENTER_OFF, CENTER_ON, CITATION_OFF, CITATION_ON, CODE_OFF, CODE_ON, COL_OFF, COL_ON, COLGROUP_OFF, COLGROUP_ON, DBG_ASSERT, DEFINSTANCE_OFF, DEFINSTANCE_ON, DELETEDTEXT_OFF, DELETEDTEXT_ON, DIVISION_OFF, DIVISION_ON, EMBED, EMPHASIS_OFF, EMPHASIS_ON, FONT_OFF, FONT_ON, FORM_OFF, FORM_ON, HEAD1_OFF, HEAD1_ON, HEAD2_OFF, HEAD2_ON, HEAD3_OFF, HEAD3_ON, HEAD4_OFF, HEAD4_ON, HEAD5_OFF, HEAD5_ON, HEAD6_OFF, HEAD6_ON, HORZRULE, IMAGE, INPUT, INSERTEDTEXT_OFF, INSERTEDTEXT_ON, isOffToken(), ITALIC_OFF, ITALIC_ON, KEYBOARD_OFF, KEYBOARD_ON, LANGUAGE_OFF, LANGUAGE_ON, LINEBREAK, NEWPARA, NONE, nPre_LinePos, nToken, ONOFF_START, OPTION, padToLength(), comphelper::string::padToLength(), PARABREAK_ON, PARAM, PERSON_OFF, PERSON_ON, RAWDATA, SAMPLE_OFF, SAMPLE_ON, SCRIPT_OFF, SCRIPT_ON, SELECT_OFF, SELECT_ON, SHORTQUOTE_OFF, SHORTQUOTE_ON, SMALLPRINT_OFF, SMALLPRINT_ON, SPAN_OFF, SPAN_ON, STRIKE_OFF, STRIKE_ON, STRIKETHROUGH_OFF, STRIKETHROUGH_ON, STRONG_OFF, STRONG_ON, SUBSCRIPT_OFF, SUBSCRIPT_ON, SUPERSCRIPT_OFF, SUPERSCRIPT_ON, TABCHAR, TABLE_OFF, TABLE_ON, TABLEDATA_OFF, TABLEDATA_ON, TABLEHEADER_OFF, TABLEHEADER_ON, TABLEROW_OFF, TABLEROW_ON, TBODY_OFF, TBODY_ON, TELETYPE_OFF, TELETYPE_ON, TEXTAREA_OFF, TEXTAREA_ON, TEXTTOKEN, TFOOT_OFF, TFOOT_ON, THEAD_OFF, THEAD_ON, UNDERLINE_OFF, UNDERLINE_ON, UNKNOWNCONTROL_OFF, UNKNOWNCONTROL_ON, VARIABLE_OFF, and VARIABLE_ON.

Referenced by FilterToken().

◆ FilterToken()

HtmlTokenId HTMLParser::FilterToken ( HtmlTokenId  nToken)

◆ FilterXMP()

HtmlTokenId HTMLParser::FilterXMP ( HtmlTokenId  nToken)

◆ FinishHeader()

void HTMLParser::FinishHeader ( )
inlineprotected

Definition at line 186 of file parhtml.hxx.

◆ FinishListing()

void HTMLParser::FinishListing ( )
inline

Definition at line 209 of file parhtml.hxx.

Referenced by FilterToken().

◆ FinishPRE()

void HTMLParser::FinishPRE ( )
inline

Definition at line 205 of file parhtml.hxx.

Referenced by FilterToken().

◆ FinishPREListingXMP()

void HTMLParser::FinishPREListingXMP ( )
inline

Definition at line 219 of file parhtml.hxx.

◆ FinishTextArea()

void HTMLParser::FinishTextArea ( )
inline

Definition at line 216 of file parhtml.hxx.

◆ FinishXMP()

void HTMLParser::FinishXMP ( )
inline

Definition at line 213 of file parhtml.hxx.

Referenced by FilterToken().

◆ GetEncodingByHttpHeader()

rtl_TextEncoding HTMLParser::GetEncodingByHttpHeader ( SvKeyValueIterator pHTTPHeader)
static

◆ GetEncodingByMIME()

rtl_TextEncoding HTMLParser::GetEncodingByMIME ( const OUString &  rMime)
staticprotected

◆ GetNextRawToken()

HtmlTokenId HTMLParser::GetNextRawToken ( )
protected

◆ GetNextToken_()

HtmlTokenId HTMLParser::GetNextToken_ ( )
overrideprotectedvirtual

◆ GetOptions()

const HTMLOptions & HTMLParser::GetOptions ( HtmlOptionId const *  pNoConvertToken = nullptr)

◆ InternalImgToPrivateURL()

bool HTMLParser::InternalImgToPrivateURL ( OUString &  rURL)
static

◆ IsInHeader()

bool HTMLParser::IsInHeader ( ) const
inline

Definition at line 196 of file parhtml.hxx.

◆ IsNewDoc()

bool HTMLParser::IsNewDoc ( ) const
inline

Definition at line 195 of file parhtml.hxx.

◆ IsReadListing()

bool HTMLParser::IsReadListing ( ) const
inline

Definition at line 197 of file parhtml.hxx.

◆ IsReadPRE()

bool HTMLParser::IsReadPRE ( ) const
inline

Definition at line 199 of file parhtml.hxx.

◆ IsReadScript()

bool HTMLParser::IsReadScript ( ) const
inline

Definition at line 200 of file parhtml.hxx.

◆ IsReadStyle()

bool HTMLParser::IsReadStyle ( ) const
inline

Definition at line 201 of file parhtml.hxx.

◆ IsReadXMP()

bool HTMLParser::IsReadXMP ( ) const
inline

Definition at line 198 of file parhtml.hxx.

◆ ParseMetaOptions()

bool HTMLParser::ParseMetaOptions ( const css::uno::Reference< css::document::XDocumentProperties > &  ,
SvKeyValueIterator  
)
virtual

overriding method must call this implementation!

Definition at line 2129 of file parhtml.cxx.

References CONTENT, GetExtendedCompatibilityTextEncoding(), GetOptions(), and ParseMetaOptionsImpl().

◆ ParseMetaOptionsImpl()

bool HTMLParser::ParseMetaOptionsImpl ( const css::uno::Reference< css::document::XDocumentProperties > &  ,
SvKeyValueIterator ,
const HTMLOptions ,
rtl_TextEncoding &  rEnc 
)
private

◆ ParseScriptOptions()

void HTMLParser::ParseScriptOptions ( OUString &  rLangString,
std::u16string_view  rBaseURL,
HTMLScriptLanguage rLang,
OUString &  rSrc,
OUString &  rLibrary,
OUString &  rModule 
)

◆ ReadRawData()

void HTMLParser::ReadRawData ( const OUString &  rEndToken)
inline

Definition at line 227 of file parhtml.hxx.

◆ RemoveSGMLComment()

void HTMLParser::RemoveSGMLComment ( OUString &  rString)
static

Definition at line 81 of file htmlsupp.cxx.

References idx, nPos, and comphelper::string::stripEnd().

◆ ScanText()

HtmlTokenId HTMLParser::ScanText ( const sal_Unicode  cBreak = 0U)
protected

◆ SetEncodingByHTTPHeader()

bool HTMLParser::SetEncodingByHTTPHeader ( SvKeyValueIterator pHTTPHeader)

Definition at line 2193 of file parhtml.cxx.

References GetEncodingByHttpHeader().

◆ SetNamespace()

void HTMLParser::SetNamespace ( std::u16string_view  rNamespace)
protected

Definition at line 242 of file parhtml.cxx.

References maNamespace.

◆ StartListing()

void HTMLParser::StartListing ( )
inline

Definition at line 279 of file parhtml.hxx.

References bPre_IgnoreNewPara, bReadListing, and nPre_LinePos.

Referenced by FilterToken().

◆ StartPRE()

void HTMLParser::StartPRE ( )
inline

Definition at line 272 of file parhtml.hxx.

References bPre_IgnoreNewPara, bReadPRE, and nPre_LinePos.

Referenced by FilterToken().

◆ StartXMP()

void HTMLParser::StartXMP ( )
inline

Definition at line 286 of file parhtml.hxx.

References bPre_IgnoreNewPara, bReadXMP, and nPre_LinePos.

Referenced by FilterToken().

◆ UnescapeToken()

void HTMLParser::UnescapeToken ( )

Definition at line 1425 of file parhtml.cxx.

References nPos.

Referenced by FilterXMP().

Member Data Documentation

◆ aEndToken

OUString HTMLParser::aEndToken
private

Definition at line 169 of file parhtml.hxx.

Referenced by GetNextRawToken(), and GetNextToken_().

◆ bEndTokenFound

bool HTMLParser::bEndTokenFound
private

Definition at line 159 of file parhtml.hxx.

Referenced by GetNextRawToken(), and GetNextToken_().

◆ bIsInHeader

bool HTMLParser::bIsInHeader
private

Definition at line 152 of file parhtml.hxx.

Referenced by FilterToken().

◆ bNewDoc

bool HTMLParser::bNewDoc
private

Definition at line 151 of file parhtml.hxx.

◆ bPre_IgnoreNewPara

bool HTMLParser::bPre_IgnoreNewPara
private

◆ bReadComment

bool HTMLParser::bReadComment
private

Definition at line 163 of file parhtml.hxx.

Referenced by GetNextRawToken().

◆ bReadListing

bool HTMLParser::bReadListing
private

Definition at line 153 of file parhtml.hxx.

Referenced by FilterToken(), GetNextToken_(), ScanText(), and StartListing().

◆ bReadNextChar

bool HTMLParser::bReadNextChar
private

Definition at line 162 of file parhtml.hxx.

Referenced by GetNextToken_(), and ScanText().

◆ bReadPRE

bool HTMLParser::bReadPRE
private

Definition at line 155 of file parhtml.hxx.

Referenced by FilterToken(), GetNextToken_(), ScanText(), and StartPRE().

◆ bReadScript

bool HTMLParser::bReadScript
private

Definition at line 157 of file parhtml.hxx.

Referenced by GetNextRawToken(), and GetNextToken_().

◆ bReadStyle

bool HTMLParser::bReadStyle
private

Definition at line 158 of file parhtml.hxx.

Referenced by GetNextRawToken(), and GetNextToken_().

◆ bReadTextArea

bool HTMLParser::bReadTextArea
private

Definition at line 156 of file parhtml.hxx.

Referenced by GetNextToken_(), and ScanText().

◆ bReadXMP

bool HTMLParser::bReadXMP
private

Definition at line 154 of file parhtml.hxx.

Referenced by FilterToken(), GetNextToken_(), ScanText(), and StartXMP().

◆ maNamespace

OUString HTMLParser::maNamespace
private

XML namespace, in case of XHTML.

Definition at line 172 of file parhtml.hxx.

Referenced by GetNextToken_(), and SetNamespace().

◆ maOptions

HTMLOptions HTMLParser::maOptions
mutableprivate

Definition at line 149 of file parhtml.hxx.

Referenced by GetNextToken_(), and GetOptions().

◆ mnPendingOffToken

HtmlTokenId HTMLParser::mnPendingOffToken
private

OFF token pending for a <XX.../> ON/OFF ON token.

Definition at line 167 of file parhtml.hxx.

Referenced by GetNextToken_().

◆ nPre_LinePos

sal_uInt32 HTMLParser::nPre_LinePos
private

Definition at line 165 of file parhtml.hxx.

Referenced by CallParser(), FilterPRE(), StartListing(), StartPRE(), and StartXMP().

◆ sSaveToken

OUString HTMLParser::sSaveToken
protected

Definition at line 175 of file parhtml.hxx.

Referenced by FilterXMP(), and GetNextToken_().


The documentation for this class was generated from the following files: