LibreOffice Module sc (master) 1
|
The HTML parser for data queries. More...
#include <htmlpars.hxx>
Public Member Functions | |
ScHTMLQueryParser (EditEngine *pEditEngine, ScDocument *pDoc) | |
virtual | ~ScHTMLQueryParser () override |
virtual ErrCode | Read (SvStream &rStrm, const OUString &rBaseURL) override |
virtual const ScHTMLTable * | GetGlobalTable () const override |
Returns the "global table" which contains the entire HTML document. More... | |
Public Member Functions inherited from ScHTMLParser | |
ScHTMLParser (EditEngine *pEditEngine, ScDocument *pDoc) | |
The destination document. More... | |
virtual | ~ScHTMLParser () override |
virtual ErrCode | Read (SvStream &rStrm, const OUString &rBaseURL) override=0 |
ScHTMLStyles & | GetStyles () |
ScDocument & | GetDoc () |
virtual const ScHTMLTable * | GetGlobalTable () const =0 |
Returns the "global table" which contains the entire HTML document. More... | |
Public Member Functions inherited from ScEEParser | |
ScEEParser (EditEngine *) | |
virtual | ~ScEEParser () |
virtual ErrCode | Read (SvStream &, const OUString &rBaseURL)=0 |
const ColWidthsMap & | GetColWidths () const |
ColWidthsMap & | GetColWidths () |
void | GetDimensions (SCCOL &nCols, SCROW &nRows) const |
size_t | ListSize () const |
ScEEParseEntry * | ListEntry (size_t index) |
const ScEEParseEntry * | ListEntry (size_t index) const |
Private Types | |
typedef ::std::unique_ptr< ScHTMLGlobalTable > | ScHTMLGlobalTablePtr |
Private Member Functions | |
void | ProcessToken (const HtmlImportInfo &rInfo) |
Handles all possible tags in the HTML document. More... | |
void | InsertText (const HtmlImportInfo &rInfo) |
Inserts a text portion into current entry. More... | |
void | FontOn (const HtmlImportInfo &rInfo) |
Processes the <font> tag. More... | |
void | MetaOn (const HtmlImportInfo &rInfo) |
Processes the <meta> tag. More... | |
void | TitleOn () |
Opens the title of the HTML document (<title> tag). More... | |
void | TitleOff (const HtmlImportInfo &rInfo) |
Closes the title of the HTML document (</title> tag). More... | |
void | TableOn (const HtmlImportInfo &rInfo) |
Opens a new table at the current position. More... | |
void | TableOff (const HtmlImportInfo &rInfo) |
Closes the current table. More... | |
void | PreOn (const HtmlImportInfo &rInfo) |
Opens a new table based on preformatted text. More... | |
void | PreOff (const HtmlImportInfo &rInfo) |
Closes the current preformatted text table. More... | |
void | CloseTable (const HtmlImportInfo &rInfo) |
Closes the current table, regardless on opening tag. More... | |
void | ParseStyle (std::u16string_view rStrm) |
DECL_LINK (HTMLImportHdl, HtmlImportInfo &, void) | |
Private Attributes | |
OUStringBuffer | maTitle |
ScHTMLGlobalTablePtr | mxGlobTable |
The title of the document. More... | |
ScHTMLTable * | mpCurrTable |
Contains the entire imported document. More... | |
ScHTMLTableId | mnUnusedId |
Pointer to current table (performance). More... | |
bool | mbTitleOn |
First unused table identifier. More... | |
Additional Inherited Members | |
Protected Member Functions inherited from ScEEParser | |
void | NewActEntry (const ScEEParseEntry *) |
Protected Attributes inherited from ScHTMLParser | |
sal_uInt32 | maFontHeights [SC_HTML_FONTSIZES] |
ScDocument * | mpDoc |
Protected Attributes inherited from ScEEParser | |
EditEngine * | pEdit |
rtl::Reference< SfxItemPool > | pPool |
rtl::Reference< SfxItemPool > | pDocPool |
std::vector< std::shared_ptr< ScEEParseEntry > > | maList |
std::shared_ptr< ScEEParseEntry > | mxActEntry |
ColWidthsMap | maColWidths |
int | nRtfLastToken |
SCCOL | nColCnt |
SCROW | nRowCnt |
SCCOL | nColMax |
SCROW | nRowMax |
The HTML parser for data queries.
Focuses on data import, not on layout.
Builds the table structure correctly, ignores extended formatting like pictures or column widths.
Definition at line 579 of file htmlpars.hxx.
|
private |
Definition at line 622 of file htmlpars.hxx.
|
explicit |
Definition at line 2772 of file htmlpars.cxx.
References ScEEParser::maList, mnUnusedId, mpCurrTable, mxGlobTable, ScEEParser::pEdit, and ScEEParser::pPool.
|
overridevirtual |
Definition at line 2782 of file htmlpars.cxx.
|
private |
Closes the current table, regardless on opening tag.
Definition at line 3020 of file htmlpars.cxx.
References ScHTMLTable::CloseTable(), and mpCurrTable.
|
private |
|
private |
Processes the <font> tag.
Definition at line 2921 of file htmlpars.cxx.
References ScGlobal::addToken(), ATTR_FONT(), ATTR_FONT_COLOR(), ATTR_FONT_HEIGHT(), FAMILY_DONTKNOW, o3tl::getToken(), ScHTMLParser::maFontHeights, mpCurrTable, nPos, PITCH_DONTKNOW, HtmlImportInfo::pParser, ScHTMLTable::PutItem(), SC_HTML_FONTSIZES, and comphelper::string::strip().
Referenced by ProcessToken().
|
overridevirtual |
Returns the "global table" which contains the entire HTML document.
Implements ScHTMLParser.
Definition at line 2824 of file htmlpars.cxx.
References mxGlobTable.
|
private |
Inserts a text portion into current entry.
Definition at line 2914 of file htmlpars.cxx.
References HtmlImportInfo::aText, maTitle, mbTitleOn, mpCurrTable, and ScHTMLTable::PutText().
Referenced by ProcessToken(), and TitleOff().
|
private |
Processes the <meta> tag.
Definition at line 2963 of file htmlpars.cxx.
References ScDocument::GetDocumentShell(), SfxObjectShell::GetHeaderAttributes(), SfxObjectShell::GetModel(), ScHTMLParser::mpDoc, HTMLParser::ParseMetaOptions(), and HtmlImportInfo::pParser.
Referenced by ProcessToken().
|
private |
Definition at line 3099 of file htmlpars.cxx.
References aStr, ScHTMLParser::GetStyles(), OUStringToOString(), rStrm, and SAL_WARN.
Referenced by ProcessToken().
|
private |
Closes the current preformatted text table.
Definition at line 3015 of file htmlpars.cxx.
References mpCurrTable, and ScHTMLTable::PreOff().
Referenced by ProcessToken().
|
private |
Opens a new table based on preformatted text.
Definition at line 3010 of file htmlpars.cxx.
References mpCurrTable, and ScHTMLTable::PreOn().
Referenced by ProcessToken().
|
private |
Handles all possible tags in the HTML document.
TODO: store current font size, use following size
TODO: store current font size, use preceding size
Definition at line 2829 of file htmlpars.cxx.
References ScHTMLTable::AnchorOn(), HtmlImportInfo::aText, ATTR_FONT_HEIGHT(), ATTR_FONT_POSTURE(), ATTR_FONT_UNDERLINE(), ATTR_FONT_WEIGHT(), ScHTMLTable::BodyOff(), ScHTMLTable::BodyOn(), ScHTMLTable::BreakOn(), ScHTMLTable::CaptionOff(), ScHTMLTable::CaptionOn(), ScHTMLTable::DataOff(), ScHTMLTable::DataOn(), FontOn(), ScHTMLTable::HeadingOn(), InsertText(), ITALIC_NORMAL, LINESTYLE_SINGLE, ScHTMLParser::maFontHeights, MetaOn(), mpCurrTable, HtmlImportInfo::nToken, ParseStyle(), PreOff(), PreOn(), ScHTMLTable::PutItem(), ScHTMLTable::RowOff(), ScHTMLTable::RowOn(), TableOff(), TableOn(), TitleOff(), TitleOn(), and WEIGHT_BOLD.
Implements ScHTMLParser.
Definition at line 2786 of file htmlpars.cxx.
References tools::SvRef< typename T >::get(), ScDocument::GetDocumentShell(), SfxObjectShell::GetHeaderAttributes(), EditEngine::GetHtmlImportHdl(), SfxObjectShell::IsLoading(), LINK, ScHTMLParser::mpDoc, mxGlobTable, ScEEParser::nColMax, ScEEParser::nRowMax, OOO_STRING_SVTOOLS_HTML_META_content_type, ScEEParser::pEdit, EditEngine::Read(), rStrm, EditEngine::SetHtmlImportHdl(), tdCol, and tdRow.
|
private |
Closes the current table.
Definition at line 3005 of file htmlpars.cxx.
References mpCurrTable, and ScHTMLTable::TableOff().
Referenced by ProcessToken().
|
private |
Opens a new table at the current position.
Definition at line 3000 of file htmlpars.cxx.
References mpCurrTable, and ScHTMLTable::TableOn().
Referenced by ProcessToken().
|
private |
Closes the title of the HTML document (</title> tag).
Definition at line 2983 of file htmlpars.cxx.
References ScDocument::GetDocumentShell(), SfxObjectShell::GetModel(), InsertText(), maTitle, mbTitleOn, and ScHTMLParser::mpDoc.
Referenced by ProcessToken().
|
private |
Opens the title of the HTML document (<title> tag).
Definition at line 2977 of file htmlpars.cxx.
References maTitle, and mbTitleOn.
Referenced by ProcessToken().
|
private |
Definition at line 624 of file htmlpars.hxx.
Referenced by InsertText(), TitleOff(), and TitleOn().
|
private |
First unused table identifier.
Definition at line 628 of file htmlpars.hxx.
Referenced by InsertText(), TitleOff(), and TitleOn().
|
private |
Pointer to current table (performance).
Definition at line 627 of file htmlpars.hxx.
Referenced by ScHTMLQueryParser().
|
private |
Contains the entire imported document.
Definition at line 626 of file htmlpars.hxx.
Referenced by CloseTable(), FontOn(), InsertText(), PreOff(), PreOn(), ProcessToken(), ScHTMLQueryParser(), TableOff(), and TableOn().
|
private |
The title of the document.
Definition at line 625 of file htmlpars.hxx.
Referenced by GetGlobalTable(), Read(), and ScHTMLQueryParser().