LibreOffice Module sc (master) 1
htmlpars.hxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#pragma once
21
22#include <memory>
23#include <map>
24#include <optional>
25#include <stack>
26#include <string_view>
27#include <unordered_map>
28#include <utility>
29#include <vector>
31
32#include <rangelst.hxx>
33#include "eeparser.hxx"
34
35const sal_uInt32 SC_HTML_FONTSIZES = 7; // like export, HTML options
36
37// Pixel tolerance for SeekOffset and related.
38const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
39const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
40
41// BASE class for HTML parser classes
42
43class ScHTMLTable;
44
50{
51 typedef std::unordered_map<OUString, OUString> PropsType;
52 typedef ::std::map<OUString, PropsType> NamePropsType;
53 typedef ::std::map<OUString, NamePropsType> ElemsType;
54
58 const OUString maEmpty;
59public:
61
62 void add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
63 const OUString& aProp, const OUString& aValue);
64
68 const OUString& getPropertyValue(
69 const OUString& rElem, const OUString& rClass, const OUString& rPropName) const;
70
71private:
72 static void insertProp(
73 NamePropsType& rProps, const OUString& aName,
74 const OUString& aProp, const OUString& aValue);
75};
76
79{
81protected:
84
85public:
86 explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
87 virtual ~ScHTMLParser() override;
88
89 virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override = 0;
90
92 ScDocument& GetDoc() { return *mpDoc;}
93
95 virtual const ScHTMLTable* GetGlobalTable() const = 0;
96};
97
99
101{
103 std::shared_ptr<ScEEParseEntry> xCellEntry;
109 sal_uInt16 nTable;
110 sal_uInt16 nTableWidth;
111 sal_uInt16 nColOffset;
112 sal_uInt16 nColOffsetStart;
114 ScHTMLTableStackEntry( std::shared_ptr<ScEEParseEntry> xE,
116 sal_uLong nFTC,
117 SCROW nRow,
118 SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
119 sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
120 bool bFR )
121 : xLockedList(std::move( xL )), xCellEntry(std::move(xE)),
122 pLocalColOffset( pTO ),
123 nFirstTableCell( nFTC ),
124 nRowCnt( nRow ),
125 nColCntStart( nStart ), nMaxCol( nMax ),
126 nTable( nTab ), nTableWidth( nTW ),
127 nColOffset( nCO ), nColOffsetStart( nCOS ),
128 bFirstRow( bFR )
129 {}
130};
131
133{
138 SCROW nCRow )
139 : nLastCol( nLCol ), nNextRow( nNRow ),
140 nCurRow( nCRow )
141 {}
142};
143
144class EditEngine;
145class ScDocument;
146class HTMLOption;
147
148// TODO these need better names
149typedef ::std::map<SCROW, SCROW> InnerMap;
150typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
151
153{
154private:
156 OUString aBaseURL;
157 ::std::stack< std::unique_ptr<ScHTMLTableStackEntry> >
159 OUString aString;
161 std::unique_ptr<OuterMap> pTables;
166 sal_uInt16 nTable;
167 sal_uInt16 nMaxTable;
168 SCCOL nColCntStart; // first Col per table
169 SCCOL nMaxCol; // per table
170 sal_uInt16 nTableWidth; // per table
171 sal_uInt16 nColOffset; // current, pixel
172 sal_uInt16 nColOffsetStart; // start value per table, in pixel
173 sal_uInt16 nOffsetTolerance; // for use with SeekOffset and related
174 bool bFirstRow; // per table, whether in first row
176 bool bInCell:1;
177 bool bInTitle:1;
178
179 DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
180 void NewActEntry( const ScEEParseEntry* );
181 static void EntryEnd( ScEEParseEntry*, const ESelection& );
182 void ProcToken( HtmlImportInfo* );
183 void CloseEntry( const HtmlImportInfo* );
184 void NextRow( const HtmlImportInfo* );
185 void SkipLocked( ScEEParseEntry*, bool bJoin = true );
186 static bool SeekOffset( const ScHTMLColOffset*, sal_uInt16 nOffset,
187 SCCOL* pCol, sal_uInt16 nOffsetTol );
188 static void MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
189 sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
190 sal_uInt16 nWidthTol );
191 static void MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
192 sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
193 sal_uInt16 nWidthTol );
194 static void ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
195 sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
196 void Colonize( ScEEParseEntry* );
197 sal_uInt16 GetWidth( const ScEEParseEntry* );
198 void SetWidths();
199 void Adjust();
200
201 sal_uInt16 GetWidthPixel( const HTMLOption& );
202 bool IsAtBeginningOfText( const HtmlImportInfo* );
203
204 void TableOn( HtmlImportInfo* );
205 void ColOn( HtmlImportInfo* );
206 void TableRowOn( const HtmlImportInfo* );
207 void TableRowOff( const HtmlImportInfo* );
209 void TableDataOff( const HtmlImportInfo* );
210 void TableOff( const HtmlImportInfo* );
211 void Image( HtmlImportInfo* );
212 void AnchorOn( HtmlImportInfo* );
213 void FontOn( HtmlImportInfo* );
214
215public:
217 virtual ~ScHTMLLayoutParser() override;
218 virtual ErrCode Read( SvStream&, const OUString& rBaseURL ) override;
219 virtual const ScHTMLTable* GetGlobalTable() const override;
220};
221
222// HTML DATA QUERY PARSER
223
225enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
226
228typedef sal_uInt16 ScHTMLTableId;
233
236{
239
240 explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
241 explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
242 mnCol( nCol ), mnRow( nRow ) {}
243 explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
244
245 SCCOLROW Get( ScHTMLOrient eOrient ) const
246 { return (eOrient == tdCol) ? mnCol : mnRow; }
247 void Set( SCCOL nCol, SCROW nRow )
248 { mnCol = nCol; mnRow = nRow; }
249 void Set( const ScAddress& rAddr )
250 { Set( rAddr.Col(), rAddr.Row() ); }
252 { return ScAddress( mnCol, mnRow, 0 ); }
253};
254
255inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
256{
257 return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
258}
259
262{
265
266 explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
267 mnCols( nCols ), mnRows( nRows ) {}
268 void Set( SCCOL nCols, SCROW nRows )
269 { mnCols = nCols; mnRows = nRows; }
270};
271
274{
275public:
276 explicit ScHTMLEntry(
277 const SfxItemSet& rItemSet,
278 ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
279
281 bool IsEmpty() const { return !aSel.HasRange(); }
283 bool HasContents() const;
285 bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
287 ScHTMLTableId GetTableId() const { return nTab; }
288
292 void AdjustStart( const HtmlImportInfo& rInfo );
294 void AdjustEnd( const HtmlImportInfo& rInfo );
296 void Strip( const EditEngine& rEditEngine );
297
301 const SfxItemSet& GetItemSet() const { return aItemSet; }
302
303private:
305};
306
309{
312
314 explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
315};
316
317class ScHTMLTableMap;
318
327{
328public:
334 explicit ScHTMLTable(
335 ScHTMLTable& rParentTable,
336 const HtmlImportInfo& rInfo,
337 bool bPreFormText,
338 const ScDocument& rDoc );
339
340 virtual ~ScHTMLTable();
341
343 const OUString& GetTableName() const { return maTableName; }
345 const OUString& GetTableCaption() const { return maCaption; }
349 ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
350
353 ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
354
356 void PutItem( const SfxPoolItem& rItem );
358 void PutText( const HtmlImportInfo& rInfo );
360 void InsertPara( const HtmlImportInfo& rInfo );
361
364 void BreakOn();
366 void HeadingOn();
368 void AnchorOn();
369
372 ScHTMLTable* TableOn( const HtmlImportInfo& rInfo );
375 ScHTMLTable* TableOff( const HtmlImportInfo& rInfo );
377 void CaptionOn();
379 void CaptionOff();
382 ScHTMLTable* PreOn( const HtmlImportInfo& rInfo );
385 ScHTMLTable* PreOff( const HtmlImportInfo& rInfo );
386
389 void RowOn( const HtmlImportInfo& rInfo );
392 void RowOff( const HtmlImportInfo& rInfo );
394 void DataOn( const HtmlImportInfo& rInfo );
397 void DataOff( const HtmlImportInfo& rInfo );
398
400 void BodyOn( const HtmlImportInfo& rInfo );
402 void BodyOff( const HtmlImportInfo& rInfo );
403
407 ScHTMLTable* CloseTable( const HtmlImportInfo& rInfo );
408
410 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
412 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
414 SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
416 ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
417
419 const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
421 SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
423 ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
424
426 void GetDocRange( ScRange& rRange ) const;
427
429 void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
430
432
433protected:
436 explicit ScHTMLTable(
437 SfxItemPool& rPool,
438 EditEngine& rEditEngine,
439 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
440 ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser,
441 const ScDocument& rDoc );
442
444 void FillEmptyCells();
446 void RecalcDocSize();
449 void RecalcDocPos( const ScHTMLPos& rBasePos );
450
451private:
452 typedef ::std::unique_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
453 typedef ::std::vector< SCCOLROW > ScSizeVec;
454 typedef ::std::vector< ScHTMLEntry* > ScHTMLEntryVector;
455 typedef ::std::unique_ptr< ScHTMLEntry > ScHTMLEntryPtr;
456
458 bool IsEmptyCell() const;
460 const SfxItemSet& GetCurrItemSet() const;
461
463 static bool IsSpaceCharInfo( const HtmlImportInfo& rInfo );
464
469 void CreateNewEntry( const HtmlImportInfo& rInfo );
470
473
475 void ImplPushEntryToVector( ScHTMLEntryVector& rEntryVector, ScHTMLEntryPtr& rxEntry );
480 bool PushEntry( ScHTMLEntryPtr& rxEntry );
485 bool PushEntry( const HtmlImportInfo& rInfo, bool bLastInCell = false );
487 void PushTableEntry( ScHTMLTableId nTableId );
488
493 ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
496 ScHTMLTable* InsertNestedTable( const HtmlImportInfo& rInfo, bool bPreFormText );
497
499 void InsertNewCell( const ScHTMLSize& rSpanSize );
500
502 void ImplRowOn();
504 void ImplRowOff();
506 void ImplDataOn( const ScHTMLSize& rSpanSize );
508 void ImplDataOff();
509
511 static void ProcessFormatOptions( SfxItemSet& rItemSet, const HtmlImportInfo& rInfo );
512
515 void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
522 ScHTMLOrient eOrient, SCCOLROW nCellPos,
523 SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
524
525private:
528 OUString maTableName;
529 OUString maCaption;
530 OUStringBuffer maCaptionBuffer;
533 std::optional<SfxItemSet> moRowItemSet;
534 std::optional<SfxItemSet> moDataItemSet;
539 std::vector<std::shared_ptr<ScEEParseEntry>>& mrEEParseList;
540 std::map< ScHTMLPos, ScHTMLEntryVector > maEntryMap;
549 bool mbBorderOn:1;
551 bool mbRowOn:1;
552 bool mbDataOn:1;
554 bool mbCaptionOn:1;
555};
556
559{
560public:
561 explicit ScHTMLGlobalTable(
562 SfxItemPool& rPool,
563 EditEngine& rEditEngine,
564 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
565 ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser,
566 const ScDocument& rDoc );
567
568 virtual ~ScHTMLGlobalTable() override;
569
571 void Recalc();
572};
573
580{
581public:
582 explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
583 virtual ~ScHTMLQueryParser() override;
584
585 virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override;
586
588 virtual const ScHTMLTable* GetGlobalTable() const override;
589
590private:
592 void ProcessToken( const HtmlImportInfo& rInfo );
594 void InsertText( const HtmlImportInfo& rInfo );
596 void FontOn( const HtmlImportInfo& rInfo );
597
599 void MetaOn( const HtmlImportInfo& rInfo );
601 void TitleOn();
603 void TitleOff( const HtmlImportInfo& rInfo );
604
606 void TableOn( const HtmlImportInfo& rInfo );
608 void TableOff( const HtmlImportInfo& rInfo );
610 void PreOn( const HtmlImportInfo& rInfo );
612 void PreOff( const HtmlImportInfo& rInfo );
613
615 void CloseTable( const HtmlImportInfo& rInfo );
616
617 void ParseStyle(std::u16string_view rStrm);
618
619 DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
620
621private:
622 typedef ::std::unique_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
623
624 OUStringBuffer maTitle;
629};
630
631/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
SCROW Row() const
Definition: address.hxx:274
SCCOL Col() const
Definition: address.hxx:279
The "global table" representing the entire HTML document.
Definition: htmlpars.hxx:559
ScHTMLGlobalTable(SfxItemPool &rPool, EditEngine &rEditEngine, std::vector< std::shared_ptr< ScEEParseEntry > > &rEEParseList, ScHTMLTableId &rnUnusedId, ScHTMLParser *pParser, const ScDocument &rDoc)
Definition: htmlpars.cxx:2746
void Recalc()
Recalculates sizes and resulting positions of all document entries.
Definition: htmlpars.cxx:2762
virtual ~ScHTMLGlobalTable() override
Definition: htmlpars.cxx:2758
DECL_LINK(HTMLImportHdl, HtmlImportInfo &, void)
ScRangeListRef xLockedList
Definition: htmlpars.hxx:160
void AnchorOn(HtmlImportInfo *)
Definition: htmlpars.cxx:1392
static void EntryEnd(ScEEParseEntry *, const ESelection &)
Definition: htmlpars.cxx:317
void ProcToken(HtmlImportInfo *)
Definition: htmlpars.cxx:1465
virtual ~ScHTMLLayoutParser() override
Definition: htmlpars.cxx:232
sal_uInt16 nColOffset
Definition: htmlpars.hxx:171
sal_uInt16 nOffsetTolerance
Definition: htmlpars.hxx:173
ScHTMLLayoutParser(EditEngine *, OUString aBaseURL, const Size &aPageSize, ScDocument *)
Definition: htmlpars.cxx:205
ScHTMLColOffset * pLocalColOffset
Definition: htmlpars.hxx:163
void Image(HtmlImportInfo *)
Definition: htmlpars.cxx:1266
sal_uInt16 nMaxTable
Definition: htmlpars.hxx:167
void NextRow(const HtmlImportInfo *)
Definition: htmlpars.cxx:333
virtual const ScHTMLTable * GetGlobalTable() const override
Returns the "global table" which contains the entire HTML document.
Definition: htmlpars.cxx:297
::std::stack< std::unique_ptr< ScHTMLTableStackEntry > > aTableStack
Definition: htmlpars.hxx:158
void TableOn(HtmlImportInfo *)
Definition: htmlpars.cxx:1011
sal_uInt16 nTable
Definition: htmlpars.hxx:166
sal_uInt16 GetWidth(const ScEEParseEntry *)
Definition: htmlpars.cxx:618
void TableRowOff(const HtmlImportInfo *)
Definition: htmlpars.cxx:1000
void TableRowOn(const HtmlImportInfo *)
Definition: htmlpars.cxx:993
std::unique_ptr< OuterMap > pTables
Definition: htmlpars.hxx:161
static bool SeekOffset(const ScHTMLColOffset *, sal_uInt16 nOffset, SCCOL *pCol, sal_uInt16 nOffsetTol)
Definition: htmlpars.cxx:344
void FontOn(HtmlImportInfo *)
Definition: htmlpars.cxx:1410
void Colonize(ScEEParseEntry *)
Definition: htmlpars.cxx:790
static void ModifyOffset(ScHTMLColOffset *, sal_uInt16 &nOldOffset, sal_uInt16 &nNewOffset, sal_uInt16 nOffsetTol)
Definition: htmlpars.cxx:403
void SkipLocked(ScEEParseEntry *, bool bJoin=true)
Definition: htmlpars.cxx:440
void CloseEntry(const HtmlImportInfo *)
Definition: htmlpars.cxx:818
bool IsAtBeginningOfText(const HtmlImportInfo *)
Definition: htmlpars.cxx:1402
sal_uInt16 GetWidthPixel(const HTMLOption &)
Definition: htmlpars.cxx:1372
void NewActEntry(const ScEEParseEntry *)
Definition: htmlpars.cxx:302
void TableDataOff(const HtmlImportInfo *)
Definition: htmlpars.cxx:1005
sal_uLong nFirstTableCell
Definition: htmlpars.hxx:164
ScHTMLColOffset maColOffset
Definition: htmlpars.hxx:162
void TableOff(const HtmlImportInfo *)
Definition: htmlpars.cxx:1111
virtual ErrCode Read(SvStream &, const OUString &rBaseURL) override
Definition: htmlpars.cxx:250
static void MakeCol(ScHTMLColOffset *, sal_uInt16 &nOffset, sal_uInt16 &nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol)
Definition: htmlpars.cxx:369
sal_uInt16 nColOffsetStart
Definition: htmlpars.hxx:172
static void MakeColNoRef(ScHTMLColOffset *, sal_uInt16 nOffset, sal_uInt16 nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol)
Definition: htmlpars.cxx:387
void ColOn(HtmlImportInfo *)
Definition: htmlpars.cxx:1358
void TableDataOn(HtmlImportInfo *)
Definition: htmlpars.cxx:900
sal_uInt16 nTableWidth
Definition: htmlpars.hxx:170
Base class for HTML parser classes.
Definition: htmlpars.hxx:79
ScHTMLParser(EditEngine *pEditEngine, ScDocument *pDoc)
The destination document.
Definition: htmlpars.cxx:188
sal_uInt32 maFontHeights[SC_HTML_FONTSIZES]
Definition: htmlpars.hxx:82
ScHTMLStyles maStyles
Definition: htmlpars.hxx:80
ScHTMLStyles & GetStyles()
Definition: htmlpars.hxx:91
ScDocument * mpDoc
Definition: htmlpars.hxx:83
virtual ~ScHTMLParser() override
Definition: htmlpars.cxx:201
virtual const ScHTMLTable * GetGlobalTable() const =0
Returns the "global table" which contains the entire HTML document.
ScDocument & GetDoc()
Definition: htmlpars.hxx:92
virtual ErrCode Read(SvStream &rStrm, const OUString &rBaseURL) override=0
The HTML parser for data queries.
Definition: htmlpars.hxx:580
void PreOn(const HtmlImportInfo &rInfo)
Opens a new table based on preformatted text.
Definition: htmlpars.cxx:3010
void TitleOff(const HtmlImportInfo &rInfo)
Closes the title of the HTML document (</title> tag).
Definition: htmlpars.cxx:2983
void TableOff(const HtmlImportInfo &rInfo)
Closes the current table.
Definition: htmlpars.cxx:3005
ScHTMLTable * mpCurrTable
Contains the entire imported document.
Definition: htmlpars.hxx:626
virtual ~ScHTMLQueryParser() override
Definition: htmlpars.cxx:2782
virtual ErrCode Read(SvStream &rStrm, const OUString &rBaseURL) override
Definition: htmlpars.cxx:2786
void PreOff(const HtmlImportInfo &rInfo)
Closes the current preformatted text table.
Definition: htmlpars.cxx:3015
::std::unique_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr
Definition: htmlpars.hxx:622
bool mbTitleOn
First unused table identifier.
Definition: htmlpars.hxx:628
void ParseStyle(std::u16string_view rStrm)
Definition: htmlpars.cxx:3099
OUStringBuffer maTitle
Definition: htmlpars.hxx:624
void ProcessToken(const HtmlImportInfo &rInfo)
Handles all possible tags in the HTML document.
Definition: htmlpars.cxx:2829
ScHTMLGlobalTablePtr mxGlobTable
The title of the document.
Definition: htmlpars.hxx:625
void FontOn(const HtmlImportInfo &rInfo)
Processes the <font> tag.
Definition: htmlpars.cxx:2921
ScHTMLQueryParser(EditEngine *pEditEngine, ScDocument *pDoc)
Definition: htmlpars.cxx:2772
void CloseTable(const HtmlImportInfo &rInfo)
Closes the current table, regardless on opening tag.
Definition: htmlpars.cxx:3020
DECL_LINK(HTMLImportHdl, HtmlImportInfo &, void)
void TitleOn()
Opens the title of the HTML document (<title> tag).
Definition: htmlpars.cxx:2977
void InsertText(const HtmlImportInfo &rInfo)
Inserts a text portion into current entry.
Definition: htmlpars.cxx:2914
virtual const ScHTMLTable * GetGlobalTable() const override
Returns the "global table" which contains the entire HTML document.
Definition: htmlpars.cxx:2824
void MetaOn(const HtmlImportInfo &rInfo)
Processes the <meta> tag.
Definition: htmlpars.cxx:2963
ScHTMLTableId mnUnusedId
Pointer to current table (performance).
Definition: htmlpars.hxx:627
void TableOn(const HtmlImportInfo &rInfo)
Opens a new table at the current position.
Definition: htmlpars.cxx:3000
Collection of HTML style data parsed from the content of <style> elements.
Definition: htmlpars.hxx:50
::std::map< OUString, PropsType > NamePropsType
Definition: htmlpars.hxx:52
void add(const char *pElemName, size_t nElemName, const char *pClassName, size_t nClassName, const OUString &aProp, const OUString &aValue)
Definition: htmlpars.cxx:75
const OUString & getPropertyValue(const OUString &rElem, const OUString &rClass, const OUString &rPropName) const
Find best-matching property value for given element and class names.
Definition: htmlpars.cxx:120
ElemsType m_ElemProps
element global properties (no class specified)
Definition: htmlpars.hxx:57
ScHTMLStyles()
just a persistent empty string.
Definition: htmlpars.cxx:73
NamePropsType m_GlobalProps
Definition: htmlpars.hxx:55
NamePropsType m_ElemGlobalProps
global properties (for a given class for all elements)
Definition: htmlpars.hxx:56
::std::map< OUString, NamePropsType > ElemsType
Definition: htmlpars.hxx:53
std::unordered_map< OUString, OUString > PropsType
Definition: htmlpars.hxx:51
const OUString maEmpty
element to class to properties (both element and class are given)
Definition: htmlpars.hxx:58
static void insertProp(NamePropsType &rProps, const OUString &aName, const OUString &aProp, const OUString &aValue)
Definition: htmlpars.cxx:165
A map of ScHTMLTable objects.
Definition: htmlpars.cxx:1696
Stores data for one table in an HTML document.
Definition: htmlpars.hxx:327
ScHTMLTable * PreOn(const HtmlImportInfo &rInfo)
Starts a new table based on preformatted text (.
Definition: htmlpars.cxx:1990
EditEngine & mrEditEngine
List of all used cells.
Definition: htmlpars.hxx:538
static bool IsSpaceCharInfo(const HtmlImportInfo &rInfo)
Returns true, if import info represents a space character.
Definition: htmlpars.cxx:2294
void RowOn(const HtmlImportInfo &rInfo)
Starts next row (tag).
Definition: htmlpars.cxx:2001
ScHTMLTableId GetTableId() const
Returns the unique identifier of the table.
Definition: htmlpars.hxx:347
std::optional< SfxItemSet > moRowItemSet
Items for the entire table.
Definition: htmlpars.hxx:533
::std::unique_ptr< ScHTMLEntry > ScHTMLEntryPtr
Definition: htmlpars.hxx:455
ScHTMLEntryVector * mpCurrEntryVector
List of entries for each cell.
Definition: htmlpars.hxx:541
ScHTMLSize maSize
Cumulated cell counts for each HTML table column/row.
Definition: htmlpars.hxx:544
bool PushEntry(ScHTMLEntryPtr &rxEntry)
Tries to insert the entry into the current cell.
Definition: htmlpars.cxx:2319
ScRangeList maUsedCells
List of all vertically merged cells.
Definition: htmlpars.hxx:537
void CalcNeededDocSize(ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nCellSpan, SCCOLROW nRealDocSize)
Calculates and sets the resulting size the cell needs in the document.
Definition: htmlpars.cxx:2567
void ImplPushEntryToVector(ScHTMLEntryVector &rEntryVector, ScHTMLEntryPtr &rxEntry)
Pushes the passed entry into the list of the current cell.
Definition: htmlpars.cxx:2311
void InsertPara(const HtmlImportInfo &rInfo)
Inserts a new line, if in preformatted text, else does nothing.
Definition: htmlpars.cxx:1930
SvNumberFormatter * GetFormatTable()
Definition: htmlpars.cxx:2284
virtual ~ScHTMLTable()
Definition: htmlpars.cxx:1882
static void ProcessFormatOptions(SfxItemSet &rItemSet, const HtmlImportInfo &rInfo)
Inserts additional formatting options from import info into the item set.
Definition: htmlpars.cxx:2496
ScHTMLTable * mpParentTable
Definition: htmlpars.hxx:526
const OUString & GetTableName() const
Returns the name of the table, specified in the TABLE tag.
Definition: htmlpars.hxx:343
void ImplDataOn(const ScHTMLSize &rSpanSize)
Set internal states for entering a new table cell.
Definition: htmlpars.cxx:2473
void DataOn(const HtmlImportInfo &rInfo)
Starts the next cell (or tag).
Definition: htmlpars.cxx:2069
ScRangeList maVMergedCells
List of all horizontally merged cells.
Definition: htmlpars.hxx:536
ScHTMLTableMapPtr mxNestedTables
Pointer to parent table.
Definition: htmlpars.hxx:527
void DataOff(const HtmlImportInfo &rInfo)
Closes the current cell ( or tag).
Definition: htmlpars.cxx:2136
void InsertNewCell(const ScHTMLSize &rSpanSize)
Inserts a new cell in an unused position, starting from current cell position.
Definition: htmlpars.cxx:2398
ScRangeList maHMergedCells
Items for the current cell.
Definition: htmlpars.hxx:535
void BodyOff(const HtmlImportInfo &rInfo)
Closes the body of the HTML document (</body> tag).
Definition: htmlpars.cxx:2159
void FillEmptyCells()
Fills all empty cells in this and nested tables with dummy parse entries.
Definition: htmlpars.cxx:2583
ScHTMLEntryPtr CreateEntry() const
Creates and returns a new empty flying entry at position (0,0).
Definition: htmlpars.cxx:2299
void PutText(const HtmlImportInfo &rInfo)
Inserts a text portion into current entry.
Definition: htmlpars.cxx:1915
void SetDocSize(ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize)
Updates the document column/row size of the specified column or row.
Definition: htmlpars.cxx:2552
OUStringBuffer maCaptionBuffer
Caption name of the table from
Definition: htmlpars.hxx:530
void RecalcDocSize()
Recalculates the size of all columns/rows in the table, regarding nested tables.
Definition: htmlpars.cxx:2619
void ApplyCellBorders(ScDocument *pDoc, const ScAddress &rFirstPos) const
Applies border formatting to the passed document.
Definition: htmlpars.cxx:2240
ScHTMLPos maDocBasePos
Address of current cell to fill.
Definition: htmlpars.hxx:546
std::map< ScHTMLPos, ScHTMLEntryVector > maEntryMap
List that owns the parse entries (from ScEEParser).
Definition: htmlpars.hxx:540
void CaptionOn()
Processes the caption of the table ( tag).
Definition: htmlpars.cxx:1976
::std::vector< SCCOLROW > ScSizeVec
Definition: htmlpars.hxx:453
ScHTMLEntryPtr mxCurrEntry
Current entry vector from map for faster access.
Definition: htmlpars.hxx:542
void BreakOn()
Inserts a line break ( tag).
Definition: htmlpars.cxx:1939
const SfxItemSet & GetCurrItemSet() const
Returns the item set from cell, row, or table, depending on current state.
Definition: htmlpars.cxx:1886
bool mbCaptionOn
true = Insert empty line before current entry.
Definition: htmlpars.hxx:554
void HeadingOn()
Inserts a heading line (.
Definition: htmlpars.cxx:1945
void CreateNewEntry(const HtmlImportInfo &rInfo)
Creates a new flying entry.
Definition: htmlpars.cxx:2304
std::optional< SfxItemSet > moDataItemSet
Items for the current table row.
Definition: htmlpars.hxx:534
void InsertLeadingEmptyLine()
Inserts an empty line in front of the next entry.
Definition: htmlpars.cxx:1951
ScHTMLParser * mpParser
Resulting base address in a Calc document.
Definition: htmlpars.hxx:547
OUString maTableName
Table of nested HTML tables.
Definition: htmlpars.hxx:528
ScHTMLTable * PreOff(const HtmlImportInfo &rInfo)
Closes this table based on preformatted text ( tag).
Definition: htmlpars.cxx:1996
const ScHTMLPos & GetDocPos() const
Returns the resulting Calc position of the top left edge of the table.
Definition: htmlpars.hxx:419
void PutItem(const SfxPoolItem &rItem)
Puts the item into the item set of the current entry.
Definition: htmlpars.cxx:1908
ScHTMLTable(ScHTMLTable &rParentTable, const HtmlImportInfo &rInfo, bool bPreFormText, const ScDocument &rDoc)
Creates a new HTML table without content.
Definition: htmlpars.cxx:1810
void AnchorOn()
Processes a hyperlink ( tag).
Definition: htmlpars.cxx:1957
std::vector< std::shared_ptr< ScEEParseEntry > > & mrEEParseList
Edit engine (from ScEEParser).
Definition: htmlpars.hxx:539
void BodyOn(const HtmlImportInfo &rInfo)
Starts the body of the HTML document (<body> tag).
Definition: htmlpars.cxx:2144
void ImplDataOff()
Set internal states for leaving a table cell.
Definition: htmlpars.cxx:2485
ScHTMLPos maCurrCell
Size of the table.
Definition: htmlpars.hxx:545
bool mbPushEmptyLine
true = Inside of or .
Definition: htmlpars.hxx:553
const OUString & GetTableCaption() const
Returns the caption of the table, specified in the tag.
Definition: htmlpars.hxx:345
ScHTMLTable * TableOn(const HtmlImportInfo &rInfo)
Starts a new table nested in this table (.
Definition: htmlpars.cxx:1965
ScSizeVec maCumSizes[2]
Working entry, not yet inserted in a list.
Definition: htmlpars.hxx:543
ScHTMLTable * FindNestedTable(ScHTMLTableId nTableId) const
Searches in all nested tables for the specified table.
Definition: htmlpars.cxx:1903
void RowOff(const HtmlImportInfo &rInfo)
Closes the current row (tag).
Definition: htmlpars.cxx:2012
bool mbPreFormText
true = Table borders on.
Definition: htmlpars.hxx:550
const ScDocument & mrDoc
Definition: htmlpars.hxx:548
void ImplRowOn()
Set internal states for a new table row.
Definition: htmlpars.cxx:2451
ScHTMLSize GetSpan(const ScHTMLPos &rCellPos) const
Returns the cell spanning of the specified cell.
Definition: htmlpars.cxx:1892
::std::unique_ptr< ScHTMLTableMap > ScHTMLTableMapPtr
Definition: htmlpars.hxx:452
ScHTMLTable * CloseTable(const HtmlImportInfo &rInfo)
Closes this table (tag) or preformatted text ( tag).
Definition: htmlpars.cxx:2170
SCCOLROW GetDocSize(ScHTMLOrient eOrient, SCCOLROW nCellPos) const
Returns the resulting document row/column count of the specified HTML row/column.
Definition: htmlpars.cxx:2186
OUString maCaption
Table name from.
Definition: htmlpars.hxx:529
::std::vector< ScHTMLEntry * > ScHTMLEntryVector
Definition: htmlpars.hxx:454
void ImplRowOff()
Set internal states for leaving a table row.
Definition: htmlpars.cxx:2461
void RecalcDocPos(const ScHTMLPos &rBasePos)
Recalculates the position of all cell entries and nested tables.
Definition: htmlpars.cxx:2668
bool IsEmptyCell() const
Returns true, if the current cell does not contain an entry yet.
Definition: htmlpars.cxx:2289
SfxItemSet maTableItemSet
Unique identifier of this table.
Definition: htmlpars.hxx:532
ScHTMLTable * GetExistingTable(ScHTMLTableId nTableId) const
Tries to find a table from the table container.
Definition: htmlpars.cxx:2381
ScHTMLTable * InsertNestedTable(const HtmlImportInfo &rInfo, bool bPreFormText)
Inserts a nested table in the current cell at the specified position.
Definition: htmlpars.cxx:2389
ScHTMLTable * TableOff(const HtmlImportInfo &rInfo)
Closes this table (tag).
Definition: htmlpars.cxx:1971
bool mbBorderOn
Definition: htmlpars.hxx:549
void CaptionOff()
Processes the caption of the table ( tag).
Definition: htmlpars.cxx:1982
bool mbRowOn
true = Table from preformatted text (
Definition: htmlpars.hxx:551
ScHTMLTableAutoId maTableId
Caption buffer of the table from
Definition: htmlpars.hxx:531
void PushTableEntry(ScHTMLTableId nTableId)
Pushes a new entry into current cell which references a nested table.
Definition: htmlpars.cxx:2371
void GetDocRange(ScRange &rRange) const
Calculates the current Calc document area of this table.
Definition: htmlpars.cxx:2229
bool mbDataOn
true = Inside of .
Definition: htmlpars.hxx:552
const ScHTMLTableId SC_HTML_NO_TABLE
Used as table index for normal (non-table) entries in ScHTMLEntry structs.
Definition: htmlpars.hxx:232
o3tl::sorted_vector< sal_uLong > ScHTMLColOffset
Definition: htmlpars.hxx:98
::std::map< sal_uInt16, InnerMap * > OuterMap
Definition: htmlpars.hxx:150
bool operator<(const ScHTMLPos &rPos1, const ScHTMLPos &rPos2)
Definition: htmlpars.hxx:255
ScHTMLOrient
Declares the orientation in or for a table: column or row.
Definition: htmlpars.hxx:225
@ tdCol
Definition: htmlpars.hxx:225
@ tdRow
Definition: htmlpars.hxx:225
::std::map< SCROW, SCROW > InnerMap
Definition: htmlpars.hxx:146
const sal_uInt32 SC_HTML_FONTSIZES
Definition: htmlpars.hxx:35
const ScHTMLTableId SC_HTML_GLOBAL_TABLE
Identifier of the "global table" (the entire HTML document).
Definition: htmlpars.hxx:230
const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL
Definition: htmlpars.hxx:38
const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE
Definition: htmlpars.hxx:39
sal_uInt16 ScHTMLTableId
Type for a unique identifier for each table.
Definition: htmlpars.hxx:228
void SvStream & rStrm
sal_uIntPtr sal_uLong
bool HasRange() const
SfxItemSet aItemSet
Definition: eeparser.hxx:55
ESelection aSel
Definition: eeparser.hxx:56
sal_uInt16 nTab
Definition: eeparser.hxx:67
ScHTMLAdjustStackEntry(SCCOL nLCol, SCROW nNRow, SCROW nCRow)
Definition: htmlpars.hxx:137
A single entry containing a line of text or representing a table.
Definition: htmlpars.hxx:274
void AdjustStart(const HtmlImportInfo &rInfo)
Sets start point of the entry selection to the start of the import info object.
Definition: htmlpars.cxx:1649
bool HasContents() const
Returns true, if the entry has any content to be imported.
Definition: htmlpars.cxx:1644
ScHTMLEntry(const SfxItemSet &rItemSet, ScHTMLTableId nTableId=SC_HTML_NO_TABLE)
Definition: htmlpars.cxx:1636
bool IsTable() const
Returns true, if the entry represents a table.
Definition: htmlpars.hxx:285
const SfxItemSet & GetItemSet() const
Returns read-only access to the item set of this entry.
Definition: htmlpars.hxx:301
bool IsEmpty() const
Returns true, if the selection of the entry is empty.
Definition: htmlpars.hxx:281
void SetImportAlways()
Sets or clears the import always state.
Definition: htmlpars.hxx:290
bool mbImportAlways
Definition: htmlpars.hxx:304
ScHTMLTableId GetTableId() const
Returns true, if the entry represents a table.
Definition: htmlpars.hxx:287
void AdjustEnd(const HtmlImportInfo &rInfo)
Sets end point of the entry selection to the end of the import info object.
Definition: htmlpars.cxx:1662
void Strip(const EditEngine &rEditEngine)
Deletes leading and trailing empty paragraphs from the entry.
Definition: htmlpars.cxx:1672
SfxItemSet & GetItemSet()
Returns read/write access to the item set of this entry.
Definition: htmlpars.hxx:299
A 2D cell position in an HTML table.
Definition: htmlpars.hxx:236
ScHTMLPos(SCCOL nCol, SCROW nRow)
Definition: htmlpars.hxx:241
ScHTMLPos(const ScAddress &rAddr)
Definition: htmlpars.hxx:243
ScAddress MakeAddr() const
Definition: htmlpars.hxx:251
void Set(SCCOL nCol, SCROW nRow)
Definition: htmlpars.hxx:247
SCCOL mnCol
Definition: htmlpars.hxx:237
void Set(const ScAddress &rAddr)
Definition: htmlpars.hxx:249
SCCOLROW Get(ScHTMLOrient eOrient) const
Definition: htmlpars.hxx:245
SCROW mnRow
Definition: htmlpars.hxx:238
A 2D cell size in an HTML table.
Definition: htmlpars.hxx:262
void Set(SCCOL nCols, SCROW nRows)
Definition: htmlpars.hxx:268
ScHTMLSize(SCCOL nCols, SCROW nRows)
Definition: htmlpars.hxx:266
SCROW mnRows
Definition: htmlpars.hxx:264
SCCOL mnCols
Definition: htmlpars.hxx:263
This struct handles creation of unique table identifiers.
Definition: htmlpars.hxx:309
ScHTMLTableAutoId(ScHTMLTableId &rnUnusedId)
Reference to global unused identifier variable.
Definition: htmlpars.cxx:1803
const ScHTMLTableId mnTableId
Definition: htmlpars.hxx:310
ScHTMLTableId & mrnUnusedId
The created unique table identifier.
Definition: htmlpars.hxx:311
std::shared_ptr< ScEEParseEntry > xCellEntry
Definition: htmlpars.hxx:103
sal_uLong nFirstTableCell
Definition: htmlpars.hxx:105
sal_uInt16 nTableWidth
Definition: htmlpars.hxx:110
ScHTMLTableStackEntry(std::shared_ptr< ScEEParseEntry > xE, ScRangeListRef xL, ScHTMLColOffset *pTO, sal_uLong nFTC, SCROW nRow, SCCOL nStart, SCCOL nMax, sal_uInt16 nTab, sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS, bool bFR)
Definition: htmlpars.hxx:114
ScRangeListRef xLockedList
Definition: htmlpars.hxx:102
ScHTMLColOffset * pLocalColOffset
Definition: htmlpars.hxx:104
sal_uInt16 nColOffset
Definition: htmlpars.hxx:111
sal_uInt16 nColOffsetStart
Definition: htmlpars.hxx:112
sal_Int32 SCCOLROW
a type capable of holding either SCCOL or SCROW
Definition: types.hxx:23
sal_Int16 SCCOL
Definition: types.hxx:21
sal_Int32 SCROW
Definition: types.hxx:17