LibreOffice Module sc (master)  1
htmlpars.hxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #ifndef INCLUDED_SC_SOURCE_FILTER_INC_HTMLPARS_HXX
21 #define INCLUDED_SC_SOURCE_FILTER_INC_HTMLPARS_HXX
22 
23 #include <memory>
24 #include <map>
25 #include <stack>
26 #include <string_view>
27 #include <unordered_map>
28 #include <vector>
29 #include <o3tl/sorted_vector.hxx>
30 
31 #include <rangelst.hxx>
32 #include "eeparser.hxx"
33 
34 const sal_uInt32 SC_HTML_FONTSIZES = 7; // like export, HTML options
35 
36 // Pixel tolerance for SeekOffset and related.
37 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
38 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
39 
40 // BASE class for HTML parser classes
41 
42 class ScHTMLTable;
43 
49 {
50  typedef std::unordered_map<OUString, OUString> PropsType;
51  typedef ::std::map<OUString, std::unique_ptr<PropsType>> NamePropsType;
52  typedef ::std::map<OUString, std::unique_ptr<NamePropsType>> ElemsType;
53 
54  NamePropsType m_GlobalProps;
55  NamePropsType m_ElemGlobalProps;
56  ElemsType m_ElemProps;
57  const OUString maEmpty;
58 public:
59  ScHTMLStyles();
60 
61  void add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
62  const OUString& aProp, const OUString& aValue);
63 
67  const OUString& getPropertyValue(
68  const OUString& rElem, const OUString& rClass, const OUString& rPropName) const;
69 
70 private:
71  static void insertProp(
72  NamePropsType& rProps, const OUString& aName,
73  const OUString& aProp, const OUString& aValue);
74 };
75 
77 class ScHTMLParser : public ScEEParser
78 {
80 protected:
83 
84 public:
85  explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
86  virtual ~ScHTMLParser() override;
87 
88  virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override = 0;
89 
91  ScDocument& GetDoc() { return *mpDoc;}
92 
94  virtual const ScHTMLTable* GetGlobalTable() const = 0;
95 };
96 
98 
100 {
102  std::shared_ptr<ScEEParseEntry> xCellEntry;
108  sal_uInt16 nTable;
109  sal_uInt16 nTableWidth;
110  sal_uInt16 nColOffset;
111  sal_uInt16 nColOffsetStart;
112  bool bFirstRow;
113  ScHTMLTableStackEntry( const std::shared_ptr<ScEEParseEntry>& rE,
114  const ScRangeListRef& rL, ScHTMLColOffset* pTO,
115  sal_uLong nFTC,
116  SCROW nRow,
117  SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
118  sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
119  bool bFR )
120  : xLockedList( rL ), xCellEntry(rE),
121  pLocalColOffset( pTO ),
122  nFirstTableCell( nFTC ),
123  nRowCnt( nRow ),
124  nColCntStart( nStart ), nMaxCol( nMax ),
125  nTable( nTab ), nTableWidth( nTW ),
126  nColOffset( nCO ), nColOffsetStart( nCOS ),
127  bFirstRow( bFR )
128  {}
129 };
130 
132 {
137  SCROW nCRow )
138  : nLastCol( nLCol ), nNextRow( nNRow ),
139  nCurRow( nCRow )
140  {}
141 };
142 
143 class EditEngine;
144 class ScDocument;
146 
147 // TODO these need better names
148 typedef ::std::map<SCROW, SCROW> InnerMap;
149 typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
150 
152 {
153 private:
155  OUString aBaseURL;
156  ::std::stack< std::unique_ptr<ScHTMLTableStackEntry> >
158  OUString aString;
160  std::unique_ptr<OuterMap> pTables;
164  short nTableLevel;
165  sal_uInt16 nTable;
166  sal_uInt16 nMaxTable;
167  SCCOL nColCntStart; // first Col per table
168  SCCOL nMaxCol; // per table
169  sal_uInt16 nTableWidth; // per table
170  sal_uInt16 nColOffset; // current, pixel
171  sal_uInt16 nColOffsetStart; // start value per table, in pixel
172  sal_uInt16 nOffsetTolerance; // for use with SeekOffset and related
173  bool bFirstRow; // per table, whether in first row
175  bool bInCell:1;
176  bool bInTitle:1;
177 
178  DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
179  void NewActEntry( const ScEEParseEntry* );
180  static void EntryEnd( ScEEParseEntry*, const ESelection& );
181  void ProcToken( HtmlImportInfo* );
182  void CloseEntry( const HtmlImportInfo* );
183  void NextRow( const HtmlImportInfo* );
184  void SkipLocked( ScEEParseEntry*, bool bJoin = true );
185  static bool SeekOffset( const ScHTMLColOffset*, sal_uInt16 nOffset,
186  SCCOL* pCol, sal_uInt16 nOffsetTol );
187  static void MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
188  sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
189  sal_uInt16 nWidthTol );
190  static void MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
191  sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
192  sal_uInt16 nWidthTol );
193  static void ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
194  sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
195  void Colonize( ScEEParseEntry* );
196  sal_uInt16 GetWidth( const ScEEParseEntry* );
197  void SetWidths();
198  void Adjust();
199 
200  sal_uInt16 GetWidthPixel( const HTMLOption& );
201  bool IsAtBeginningOfText( const HtmlImportInfo* );
202 
203  void TableOn( HtmlImportInfo* );
204  void ColOn( HtmlImportInfo* );
205  void TableRowOn( const HtmlImportInfo* );
206  void TableRowOff( const HtmlImportInfo* );
207  void TableDataOn( HtmlImportInfo* );
208  void TableDataOff( const HtmlImportInfo* );
209  void TableOff( const HtmlImportInfo* );
210  void Image( HtmlImportInfo* );
211  void AnchorOn( HtmlImportInfo* );
212  void FontOn( HtmlImportInfo* );
213 
214 public:
215  ScHTMLLayoutParser( EditEngine*, const OUString& rBaseURL, const Size& aPageSize, ScDocument* );
216  virtual ~ScHTMLLayoutParser() override;
217  virtual ErrCode Read( SvStream&, const OUString& rBaseURL ) override;
218  virtual const ScHTMLTable* GetGlobalTable() const override;
219 };
220 
221 // HTML DATA QUERY PARSER
222 
224 enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
225 
227 typedef sal_uInt16 ScHTMLTableId;
232 
234 struct ScHTMLPos
235 {
238 
239  explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
240  explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
241  mnCol( nCol ), mnRow( nRow ) {}
242  explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
243 
244  SCCOLROW Get( ScHTMLOrient eOrient ) const
245  { return (eOrient == tdCol) ? mnCol : mnRow; }
246  void Set( SCCOL nCol, SCROW nRow )
247  { mnCol = nCol; mnRow = nRow; }
248  void Set( const ScAddress& rAddr )
249  { Set( rAddr.Col(), rAddr.Row() ); }
251  { return ScAddress( mnCol, mnRow, 0 ); }
252 };
253 
254 inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
255 {
256  return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
257 }
258 
261 {
264 
265  explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
266  mnCols( nCols ), mnRows( nRows ) {}
267  void Set( SCCOL nCols, SCROW nRows )
268  { mnCols = nCols; mnRows = nRows; }
269 };
270 
273 {
274 public:
275  explicit ScHTMLEntry(
276  const SfxItemSet& rItemSet,
277  ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
278 
280  bool IsEmpty() const { return !aSel.HasRange(); }
282  bool HasContents() const;
284  bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
286  ScHTMLTableId GetTableId() const { return nTab; }
287 
289  void SetImportAlways() { mbImportAlways = true; }
291  void AdjustStart( const HtmlImportInfo& rInfo );
293  void AdjustEnd( const HtmlImportInfo& rInfo );
295  void Strip( const EditEngine& rEditEngine );
296 
300  const SfxItemSet& GetItemSet() const { return aItemSet; }
301 
302 private:
304 };
305 
308 {
311 
313  explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
314 };
315 
316 class ScHTMLTableMap;
317 
326 {
327 public:
333  explicit ScHTMLTable(
334  ScHTMLTable& rParentTable,
335  const HtmlImportInfo& rInfo,
336  bool bPreFormText );
337 
338  virtual ~ScHTMLTable();
339 
341  const OUString& GetTableName() const { return maTableName; }
345  ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
346 
349  ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
350 
352  void PutItem( const SfxPoolItem& rItem );
354  void PutText( const HtmlImportInfo& rInfo );
356  void InsertPara( const HtmlImportInfo& rInfo );
357 
360  void BreakOn();
362  void HeadingOn();
364  void AnchorOn();
365 
368  ScHTMLTable* TableOn( const HtmlImportInfo& rInfo );
371  ScHTMLTable* TableOff( const HtmlImportInfo& rInfo );
374  ScHTMLTable* PreOn( const HtmlImportInfo& rInfo );
377  ScHTMLTable* PreOff( const HtmlImportInfo& rInfo );
378 
381  void RowOn( const HtmlImportInfo& rInfo );
384  void RowOff( const HtmlImportInfo& rInfo );
386  void DataOn( const HtmlImportInfo& rInfo );
389  void DataOff( const HtmlImportInfo& rInfo );
390 
392  void BodyOn( const HtmlImportInfo& rInfo );
394  void BodyOff( const HtmlImportInfo& rInfo );
395 
399  ScHTMLTable* CloseTable( const HtmlImportInfo& rInfo );
400 
402  SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
404  SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
406  SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
408  ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
409 
411  const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
413  SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
415  ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
416 
418  void GetDocRange( ScRange& rRange ) const;
419 
421  void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
422 
424 
425 protected:
428  explicit ScHTMLTable(
429  SfxItemPool& rPool,
430  EditEngine& rEditEngine,
431  std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
432  ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
433 
435  void FillEmptyCells();
437  void RecalcDocSize();
440  void RecalcDocPos( const ScHTMLPos& rBasePos );
441 
442 private:
443  typedef ::std::unique_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
444  typedef ::std::unique_ptr< SfxItemSet > SfxItemSetPtr;
445  typedef ::std::vector< SCCOLROW > ScSizeVec;
446  typedef ::std::vector< ScHTMLEntry* > ScHTMLEntryVector;
447  typedef ::std::unique_ptr< ScHTMLEntry > ScHTMLEntryPtr;
448 
450  bool IsEmptyCell() const;
452  const SfxItemSet& GetCurrItemSet() const;
453 
455  static bool IsSpaceCharInfo( const HtmlImportInfo& rInfo );
456 
458  ScHTMLEntryPtr CreateEntry() const;
461  void CreateNewEntry( const HtmlImportInfo& rInfo );
462 
464  void InsertLeadingEmptyLine();
465 
467  void ImplPushEntryToVector( ScHTMLEntryVector& rEntryVector, ScHTMLEntryPtr& rxEntry );
472  bool PushEntry( ScHTMLEntryPtr& rxEntry );
477  bool PushEntry( const HtmlImportInfo& rInfo, bool bLastInCell = false );
479  void PushTableEntry( ScHTMLTableId nTableId );
480 
485  ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
488  ScHTMLTable* InsertNestedTable( const HtmlImportInfo& rInfo, bool bPreFormText );
489 
491  void InsertNewCell( const ScHTMLSize& rSpanSize );
492 
494  void ImplRowOn();
496  void ImplRowOff();
498  void ImplDataOn( const ScHTMLSize& rSpanSize );
500  void ImplDataOff();
501 
503  static void ProcessFormatOptions( SfxItemSet& rItemSet, const HtmlImportInfo& rInfo );
504 
507  void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
513  void CalcNeededDocSize(
514  ScHTMLOrient eOrient, SCCOLROW nCellPos,
515  SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
516 
517 private:
519  ScHTMLTableMapPtr mxNestedTables;
520  OUString maTableName;
523  SfxItemSetPtr mxRowItemSet;
524  SfxItemSetPtr mxDataItemSet;
529  std::vector<std::shared_ptr<ScEEParseEntry>>& mrEEParseList;
530  std::map< ScHTMLPos, ScHTMLEntryVector > maEntryMap;
531  ScHTMLEntryVector* mpCurrEntryVector;
532  ScHTMLEntryPtr mxCurrEntry;
533  ScSizeVec maCumSizes[ 2 ];
538  bool mbBorderOn:1;
539  bool mbPreFormText:1;
540  bool mbRowOn:1;
541  bool mbDataOn:1;
543 };
544 
547 {
548 public:
549  explicit ScHTMLGlobalTable(
550  SfxItemPool& rPool,
551  EditEngine& rEditEngine,
552  std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
553  ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
554 
555  virtual ~ScHTMLGlobalTable() override;
556 
558  void Recalc();
559 };
560 
567 {
568 public:
569  explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
570  virtual ~ScHTMLQueryParser() override;
571 
572  virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override;
573 
575  virtual const ScHTMLTable* GetGlobalTable() const override;
576 
577 private:
579  void ProcessToken( const HtmlImportInfo& rInfo );
581  void InsertText( const HtmlImportInfo& rInfo );
583  void FontOn( const HtmlImportInfo& rInfo );
584 
586  void MetaOn( const HtmlImportInfo& rInfo );
588  void TitleOn();
590  void TitleOff( const HtmlImportInfo& rInfo );
591 
593  void TableOn( const HtmlImportInfo& rInfo );
595  void TableOff( const HtmlImportInfo& rInfo );
597  void PreOn( const HtmlImportInfo& rInfo );
599  void PreOff( const HtmlImportInfo& rInfo );
600 
602  void CloseTable( const HtmlImportInfo& rInfo );
603 
604  static void ParseStyle(std::u16string_view rStrm);
605 
606  DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
607 
608 private:
609  typedef ::std::unique_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
610 
611  OUStringBuffer maTitle;
612  ScHTMLGlobalTablePtr mxGlobTable;
615  bool mbTitleOn;
616 };
617 
618 #endif
619 
620 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
DECL_LINK(HTMLImportHdl, HtmlImportInfo &, void)
std::unique_ptr< OuterMap > pTables
Definition: htmlpars.hxx:160
void RowOn(const HtmlImportInfo &rInfo)
Starts next row (tag).
Definition: htmlpars.cxx:1988
EditEngine & mrEditEngine
List of all used cells.
Definition: htmlpars.hxx:528
ScHTMLStyles()
just a persistent empty string.
Definition: htmlpars.cxx:69
std::vector< std::shared_ptr< ScEEParseEntry > > & mrEEParseList
Edit engine (from ScEEParser).
Definition: htmlpars.hxx:529
::std::unique_ptr< ScHTMLTableMap > ScHTMLTableMapPtr
Definition: htmlpars.hxx:443
SCCOL mnCols
Definition: htmlpars.hxx:262
ScHTMLTable * PreOn(const HtmlImportInfo &rInfo)
Starts a new table based on preformatted text (.
Definition: htmlpars.cxx:1977
void AnchorOn(HtmlImportInfo *)
Definition: htmlpars.cxx:1400
SfxItemSet maTableItemSet
Unique identifier of this table.
Definition: htmlpars.hxx:522
const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE
Definition: htmlpars.hxx:38
static void EntryEnd(ScEEParseEntry *, const ESelection &)
Definition: htmlpars.cxx:309
void Set(const ScAddress &rAddr)
Definition: htmlpars.hxx:248
ScHTMLSize maSize
Cumulated cell counts for each HTML table column/row.
Definition: htmlpars.hxx:534
ScDocument * mpDoc
Definition: htmlpars.hxx:82
static void MakeCol(ScHTMLColOffset *, sal_uInt16 &nOffset, sal_uInt16 &nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol)
Definition: htmlpars.cxx:361
sal_uInt16 nColOffset
Definition: htmlpars.hxx:110
ScHTMLTableAutoId(ScHTMLTableId &rnUnusedId)
Reference to global unused identifier variable.
Definition: htmlpars.cxx:1811
static void ModifyOffset(ScHTMLColOffset *, sal_uInt16 &nOldOffset, sal_uInt16 &nNewOffset, sal_uInt16 nOffsetTol)
Definition: htmlpars.cxx:395
A map of ScHTMLTable objects.
Definition: htmlpars.cxx:1703
SfxItemSet aItemSet
Definition: eeparser.hxx:54
virtual ~ScHTMLParser() override
Definition: htmlpars.cxx:193
void Strip(const EditEngine &rEditEngine)
Deletes leading and trailing empty paragraphs from the entry.
Definition: htmlpars.cxx:1680
SCROW Row() const
Definition: address.hxx:262
bool mbBorderOn
Definition: htmlpars.hxx:538
ScHTMLStyles maStyles
Definition: htmlpars.hxx:79
A single entry containing a line of text or representing a table.
Definition: htmlpars.hxx:272
bool PushEntry(ScHTMLEntryPtr &rxEntry)
Tries to insert the entry into the current cell.
Definition: htmlpars.cxx:2306
const sal_uInt32 SC_HTML_FONTSIZES
Definition: htmlpars.hxx:34
ScHTMLEntry(const SfxItemSet &rItemSet, ScHTMLTableId nTableId=SC_HTML_NO_TABLE)
Definition: htmlpars.cxx:1644
ESelection aSel
Definition: eeparser.hxx:55
void Colonize(ScEEParseEntry *)
Definition: htmlpars.cxx:773
::std::map< SCROW, SCROW > InnerMap
Definition: htmlpars.hxx:145
bool IsEmptyCell() const
Returns true, if the current cell does not contain an entry yet.
Definition: htmlpars.cxx:2276
void InsertNewCell(const ScHTMLSize &rSpanSize)
Inserts a new cell in an unused position, starting from current cell position.
Definition: htmlpars.cxx:2385
::std::map< sal_uInt16, InnerMap * > OuterMap
Definition: htmlpars.hxx:149
OUString maTableName
Table of nested HTML tables.
Definition: htmlpars.hxx:520
ScRangeList maHMergedCells
Items for the current cell.
Definition: htmlpars.hxx:525
sal_uIntPtr sal_uLong
void CloseEntry(const HtmlImportInfo *)
Definition: htmlpars.cxx:801
DECL_LINK(HTMLImportHdl, HtmlImportInfo &, void)
void AdjustEnd(const HtmlImportInfo &rInfo)
Sets end point of the entry selection to the end of the import info object.
Definition: htmlpars.cxx:1670
bool IsAtBeginningOfText(const HtmlImportInfo *)
Definition: htmlpars.cxx:1410
ScHTMLTable * TableOn(const HtmlImportInfo &rInfo)
Starts a new table nested in this table (.
Definition: htmlpars.cxx:1966
ScHTMLTable * TableOff(const HtmlImportInfo &rInfo)
Closes this table (tag).
Definition: htmlpars.cxx:1972
NamePropsType m_GlobalProps
Definition: htmlpars.hxx:54
bool mbPreFormText
true = Table borders on.
Definition: htmlpars.hxx:539
virtual ~ScHTMLTable()
Definition: htmlpars.cxx:1886
ScSizeVec maCumSizes[2]
Working entry, not yet inserted in a list.
Definition: htmlpars.hxx:533
void InsertText(const HtmlImportInfo &rInfo)
Inserts a text portion into current entry.
Definition: htmlpars.cxx:2898
ScHTMLPos(SCCOL nCol, SCROW nRow)
Definition: htmlpars.hxx:240
void CloseTable(const HtmlImportInfo &rInfo)
Closes the current table, regardless on opening tag.
Definition: htmlpars.cxx:3004
ScHTMLTableAutoId maTableId
Table name from.
Definition: htmlpars.hxx:521
SCCOLROW GetDocSize(ScHTMLOrient eOrient, SCCOLROW nCellPos) const
Returns the resulting document row/column count of the specified HTML row/column. ...
Definition: htmlpars.cxx:2173
void ProcToken(HtmlImportInfo *)
Definition: htmlpars.cxx:1473
SCROW mnRow
Definition: htmlpars.hxx:237
virtual ~ScHTMLGlobalTable() override
Definition: htmlpars.cxx:2744
ScHTMLEntryPtr mxCurrEntry
Current entry vector from map for faster access.
Definition: htmlpars.hxx:532
ScHTMLTableId & mrnUnusedId
The created unique table identifier.
Definition: htmlpars.hxx:310
ScDocument & GetDoc()
Definition: htmlpars.hxx:91
ScHTMLParser * mpParser
Resulting base address in a Calc document.
Definition: htmlpars.hxx:537
static void ParseStyle(std::u16string_view rStrm)
Definition: htmlpars.cxx:3094
void NextRow(const HtmlImportInfo *)
Definition: htmlpars.cxx:325
void BodyOff(const HtmlImportInfo &rInfo)
Closes the body of the HTML document ( tag).
Definition: htmlpars.cxx:2146
ScHTMLEntryVector * mpCurrEntryVector
List of entries for each cell.
Definition: htmlpars.hxx:531
const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL
Definition: htmlpars.hxx:37
virtual ~ScHTMLQueryParser() override
Definition: htmlpars.cxx:2768
bool mbDataOn
true = Inside of .
Definition: htmlpars.hxx:541
ScHTMLQueryParser(EditEngine *pEditEngine, ScDocument *pDoc)
Definition: htmlpars.cxx:2758
void FillEmptyCells()
Fills all empty cells in this and nested tables with dummy parse entries.
Definition: htmlpars.cxx:2570
void TableRowOn(const HtmlImportInfo *)
Definition: htmlpars.cxx:1001
const ScHTMLTableId SC_HTML_NO_TABLE
Used as table index for normal (non-table) entries in ScHTMLEntry structs.
Definition: htmlpars.hxx:231
sal_uInt16 nTableWidth
Definition: htmlpars.hxx:169
static void MakeColNoRef(ScHTMLColOffset *, sal_uInt16 nOffset, sal_uInt16 nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol)
Definition: htmlpars.cxx:379
void MetaOn(const HtmlImportInfo &rInfo)
Processes the tag.
Definition: htmlpars.cxx:2947
std::map< ScHTMLPos, ScHTMLEntryVector > maEntryMap
List that owns the parse entries (from ScEEParser).
Definition: htmlpars.hxx:530
virtual const ScHTMLTable * GetGlobalTable() const override
Returns the "global table" which contains the entire HTML document.
Definition: htmlpars.cxx:289
sal_uInt16 nTable
Definition: htmlpars.hxx:165
sal_uInt16 ScHTMLTableId
Type for a unique identifier for each table.
Definition: htmlpars.hxx:227
bool IsEmpty() const
Returns true, if the selection of the entry is empty.
Definition: htmlpars.hxx:280
void TableRowOff(const HtmlImportInfo *)
Definition: htmlpars.cxx:1008
ScRangeList maUsedCells
List of all vertically merged cells.
Definition: htmlpars.hxx:527
void RecalcDocSize()
Recalculates the size of all columns/rows in the table, regarding nested tables.
Definition: htmlpars.cxx:2606
virtual const ScHTMLTable * GetGlobalTable() const =0
Returns the "global table" which contains the entire HTML document.
sal_uInt16 nTab
Definition: eeparser.hxx:66
ScHTMLLayoutParser(EditEngine *, const OUString &rBaseURL, const Size &aPageSize, ScDocument *)
Definition: htmlpars.cxx:197
sal_uLong nFirstTableCell
Definition: htmlpars.hxx:104
sal_Int32 SCCOLROW
a type capable of holding either SCCOL or SCROW
Definition: types.hxx:24
ScHTMLGlobalTablePtr mxGlobTable
The title of the document.
Definition: htmlpars.hxx:612
o3tl::sorted_vector< sal_uLong > ScHTMLColOffset
Definition: htmlpars.hxx:97
void Recalc()
Recalculates sizes and resulting positions of all document entries.
Definition: htmlpars.cxx:2748
const OUString & getPropertyValue(const OUString &rElem, const OUString &rClass, const OUString &rPropName) const
Find best-matching property value for given element and class names.
Definition: htmlpars.cxx:116
ScHTMLTable * CloseTable(const HtmlImportInfo &rInfo)
Closes this table (tag) or preformatted text ( tag).
Definition: htmlpars.cxx:2157
::std::vector< ScHTMLEntry * > ScHTMLEntryVector
Definition: htmlpars.hxx:446
ScHTMLTable(ScHTMLTable &rParentTable, const HtmlImportInfo &rInfo, bool bPreFormText)
Creates a new HTML table without content.
Definition: htmlpars.cxx:1818
bool operator<(const ScHTMLPos &rPos1, const ScHTMLPos &rPos2)
Definition: htmlpars.hxx:254
void RecalcDocPos(const ScHTMLPos &rBasePos)
Recalculates the position of all cell entries and nested tables.
Definition: htmlpars.cxx:2655
bool mbImportAlways
Definition: htmlpars.hxx:303
bool mbPushEmptyLine
true = Inside of or .
Definition: htmlpars.hxx:542
::std::stack< std::unique_ptr< ScHTMLTableStackEntry > > aTableStack
Definition: htmlpars.hxx:157
void SetImportAlways()
Sets or clears the import always state.
Definition: htmlpars.hxx:289
std::shared_ptr< ScEEParseEntry > xCellEntry
Definition: htmlpars.hxx:102
::std::map< OUString, std::unique_ptr< PropsType > > NamePropsType
Definition: htmlpars.hxx:51
void HeadingOn()
Inserts a heading line (.
Definition: htmlpars.cxx:1946
void Image(HtmlImportInfo *)
Definition: htmlpars.cxx:1274
ScHTMLTableId GetTableId() const
Returns the unique identifier of the table.
Definition: htmlpars.hxx:343
sal_uInt16 nColOffset
Definition: htmlpars.hxx:170
void ImplRowOn()
Set internal states for a new table row.
Definition: htmlpars.cxx:2438
void ImplDataOff()
Set internal states for leaving a table cell.
Definition: htmlpars.cxx:2472
ScHTMLStyles & GetStyles()
Definition: htmlpars.hxx:90
::std::map< OUString, std::unique_ptr< NamePropsType > > ElemsType
Definition: htmlpars.hxx:52
void ImplPushEntryToVector(ScHTMLEntryVector &rEntryVector, ScHTMLEntryPtr &rxEntry)
Pushes the passed entry into the list of the current cell.
Definition: htmlpars.cxx:2298
ScHTMLEntryPtr CreateEntry() const
Creates and returns a new empty flying entry at position (0,0).
Definition: htmlpars.cxx:2286
void add(const char *pElemName, size_t nElemName, const char *pClassName, size_t nClassName, const OUString &aProp, const OUString &aValue)
Definition: htmlpars.cxx:71
sal_uInt16 nColOffsetStart
Definition: htmlpars.hxx:111
void RowOff(const HtmlImportInfo &rInfo)
Closes the current row (tag).
Definition: htmlpars.cxx:1999
virtual ~ScHTMLLayoutParser() override
Definition: htmlpars.cxx:224
void PushTableEntry(ScHTMLTableId nTableId)
Pushes a new entry into current cell which references a nested table.
Definition: htmlpars.cxx:2358
SfxItemSetPtr mxRowItemSet
Items for the entire table.
Definition: htmlpars.hxx:523
ScHTMLTable * GetExistingTable(ScHTMLTableId nTableId) const
Tries to find a table from the table container.
Definition: htmlpars.cxx:2368
sal_Int16 SCCOL
Definition: types.hxx:22
OUStringBuffer maTitle
Definition: htmlpars.hxx:611
ScHTMLPos maCurrCell
Size of the table.
Definition: htmlpars.hxx:535
bool mbRowOn
true = Table from preformatted text (
Definition: htmlpars.hxx:540
void DataOff(const HtmlImportInfo &rInfo)
Closes the current cell ( or tag).
Definition: htmlpars.cxx:2123
void PutItem(const SfxPoolItem &rItem)
Puts the item into the item set of the current entry.
Definition: htmlpars.cxx:1912
ScHTMLTableId mnUnusedId
Pointer to current table (performance).
Definition: htmlpars.hxx:614
sal_uInt16 GetWidthPixel(const HTMLOption &)
Definition: htmlpars.cxx:1380
void ProcessToken(const HtmlImportInfo &rInfo)
Handles all possible tags in the HTML document.
Definition: htmlpars.cxx:2815
sal_uInt16 nOffsetTolerance
Definition: htmlpars.hxx:172
virtual ErrCode Read(SvStream &rStrm, const OUString &rBaseURL) override=0
ScHTMLColOffset * pLocalColOffset
Definition: htmlpars.hxx:162
ScHTMLTable * mpParentTable
Definition: htmlpars.hxx:518
static bool IsSpaceCharInfo(const HtmlImportInfo &rInfo)
Returns true, if import info represents a space character.
Definition: htmlpars.cxx:2281
The HTML parser for data queries.
Definition: htmlpars.hxx:566
Collection of HTML style data parsed from the content of