1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at .
18 */
20#pragma once
22#include <memory>
23#include <map>
24#include <optional>
25#include <stack>
26#include <string_view>
27#include <unordered_map>
28#include <utility>
29#include <vector>
32#include <rangelst.hxx>
33#include "eeparser.hxx"
35const sal_uInt32 SC_HTML_FONTSIZES = 7; // like export, HTML options
37// Pixel tolerance for SeekOffset and related.
38const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
39const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
41// BASE class for HTML parser classes
43class ScHTMLTable;
51 typedef std::unordered_map<OUString, OUString> PropsType;
52 typedef ::std::map<OUString, PropsType> NamePropsType;
53 typedef ::std::map<OUString, NamePropsType> ElemsType;
58 const OUString maEmpty;
62 void add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
63 const OUString& aProp, const OUString& aValue);
68 const OUString& getPropertyValue(
69 const OUString& rElem, const OUString& rClass, const OUString& rPropName) const;
72 static void insertProp(
73 NamePropsType& rProps, const OUString& aName,
74 const OUString& aProp, const OUString& aValue);
86 explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
87 virtual ~ScHTMLParser() override;
89 virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override = 0;
92 ScDocument& GetDoc() { return *mpDoc;}
95 virtual const ScHTMLTable* GetGlobalTable() const = 0;
103 std::shared_ptr<ScEEParseEntry> xCellEntry;
109 sal_uInt16 nTable;
110 sal_uInt16 nTableWidth;
111 sal_uInt16 nColOffset;
112 sal_uInt16 nColOffsetStart;
114 ScHTMLTableStackEntry( std::shared_ptr<ScEEParseEntry> xE,
116 sal_uLong nFTC,
117 SCROW nRow,
118 SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
119 sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
120 bool bFR )
121 : xLockedList(std::move( xL )), xCellEntry(std::move(xE)),
122 pLocalColOffset( pTO ),
123 nFirstTableCell( nFTC ),
124 nRowCnt( nRow ),
125 nColCntStart( nStart ), nMaxCol( nMax ),
126 nTable( nTab ), nTableWidth( nTW ),
127 nColOffset( nCO ), nColOffsetStart( nCOS ),
128 bFirstRow( bFR )
129 {}
138 SCROW nCRow )
139 : nLastCol( nLCol ), nNextRow( nNRow ),
140 nCurRow( nCRow )
141 {}
144class EditEngine;
145class ScDocument;
146class HTMLOption;
148// TODO these need better names
149typedef ::std::map<SCROW, SCROW> InnerMap;
150typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
156 OUString aBaseURL;
157 ::std::stack< std::unique_ptr<ScHTMLTableStackEntry> >
159 OUString aString;
161 std::unique_ptr<OuterMap> pTables;
166 sal_uInt16 nTable;
167 sal_uInt16 nMaxTable;
168 SCCOL nColCntStart; // first Col per table
169 SCCOL nMaxCol; // per table
170 sal_uInt16 nTableWidth; // per table
171 sal_uInt16 nColOffset; // current, pixel
172 sal_uInt16 nColOffsetStart; // start value per table, in pixel
173 sal_uInt16 nOffsetTolerance; // for use with SeekOffset and related
174 bool bFirstRow; // per table, whether in first row
176 bool bInCell:1;
177 bool bInTitle:1;
179 DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
180 void NewActEntry( const ScEEParseEntry* );
181 static void EntryEnd( ScEEParseEntry*, const ESelection& );
182 void ProcToken( HtmlImportInfo* );
183 void CloseEntry( const HtmlImportInfo* );
184 void NextRow( const HtmlImportInfo* );
185 void SkipLocked( ScEEParseEntry*, bool bJoin = true );
186 static bool SeekOffset( const ScHTMLColOffset*, sal_uInt16 nOffset,
187 SCCOL* pCol, sal_uInt16 nOffsetTol );
188 static void MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
189 sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
190 sal_uInt16 nWidthTol );
191 static void MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
192 sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
193 sal_uInt16 nWidthTol );
194 static void ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
195 sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
196 void Colonize( ScEEParseEntry* );
197 sal_uInt16 GetWidth( const ScEEParseEntry* );
198 void SetWidths();
199 void Adjust();
201 sal_uInt16 GetWidthPixel( const HTMLOption& );
202 bool IsAtBeginningOfText( const HtmlImportInfo* );
204 void TableOn( HtmlImportInfo* );
205 void ColOn( HtmlImportInfo* );
206 void TableRowOn( const HtmlImportInfo* );
207 void TableRowOff( const HtmlImportInfo* );
209 void TableDataOff( const HtmlImportInfo* );
210 void TableOff( const HtmlImportInfo* );
211 void Image( HtmlImportInfo* );
212 void AnchorOn( HtmlImportInfo* );
213 void FontOn( HtmlImportInfo* );
217 virtual ~ScHTMLLayoutParser() override;
218 virtual ErrCode Read( SvStream&, const OUString& rBaseURL ) override;
219 virtual const ScHTMLTable* GetGlobalTable() const override;
225enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
228typedef sal_uInt16 ScHTMLTableId;
240 explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
241 explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
242 mnCol( nCol ), mnRow( nRow ) {}
243 explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
245 SCCOLROW Get( ScHTMLOrient eOrient ) const
246 { return (eOrient == tdCol) ? mnCol : mnRow; }
247 void Set( SCCOL nCol, SCROW nRow )
248 { mnCol = nCol; mnRow = nRow; }
249 void Set( const ScAddress& rAddr )
250 { Set( rAddr.Col(), rAddr.Row() ); }
252 { return ScAddress( mnCol, mnRow, 0 ); }
255inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
257 return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
266 explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
267 mnCols( nCols ), mnRows( nRows ) {}
268 void Set( SCCOL nCols, SCROW nRows )
269 { mnCols = nCols; mnRows = nRows; }
276 explicit ScHTMLEntry(
277 const SfxItemSet& rItemSet,
278 ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
281 bool IsEmpty() const { return !aSel.HasRange(); }
283 bool HasContents() const;
285 bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
287 ScHTMLTableId GetTableId() const { return nTab; }
292 void AdjustStart( const HtmlImportInfo& rInfo );
294 void AdjustEnd( const HtmlImportInfo& rInfo );
296 void Strip( const EditEngine& rEditEngine );
301 const SfxItemSet& GetItemSet() const { return aItemSet; }
314 explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
317class ScHTMLTableMap;
334 explicit ScHTMLTable(
335 ScHTMLTable& rParentTable,
336 const HtmlImportInfo& rInfo,
337 bool bPreFormText,
338 const ScDocument& rDoc );
340 virtual ~ScHTMLTable();
343 const OUString& GetTableName() const { return maTableName; }
345 const OUString& GetTableCaption() const { return maCaption; }
349 ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
353 ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
356 void PutItem( const SfxPoolItem& rItem );
358 void PutText( const HtmlImportInfo& rInfo );
360 void InsertPara( const HtmlImportInfo& rInfo );
364 void BreakOn();
366 void HeadingOn();
368 void AnchorOn();
372 ScHTMLTable* TableOn( const HtmlImportInfo& rInfo );
375 ScHTMLTable* TableOff( const HtmlImportInfo& rInfo );
377 void CaptionOn();
379 void CaptionOff();
382 ScHTMLTable* PreOn( const HtmlImportInfo& rInfo );
385 ScHTMLTable* PreOff( const HtmlImportInfo& rInfo );
389 void RowOn( const HtmlImportInfo& rInfo );
392 void RowOff( const HtmlImportInfo& rInfo );
394 void DataOn( const HtmlImportInfo& rInfo );
397 void DataOff( const HtmlImportInfo& rInfo );
400 void BodyOn( const HtmlImportInfo& rInfo );
402 void BodyOff( const HtmlImportInfo& rInfo );
407 ScHTMLTable* CloseTable( const HtmlImportInfo& rInfo );
410 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
412 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
414 SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
416 ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
419 const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
421 SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
423 ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
426 void GetDocRange( ScRange& rRange ) const;
429 void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
436 explicit ScHTMLTable(
437 SfxItemPool& rPool,
438 EditEngine& rEditEngine,
439 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
440 ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser,
441 const ScDocument& rDoc );
444 void FillEmptyCells();
446 void RecalcDocSize();
449 void RecalcDocPos( const ScHTMLPos& rBasePos );
452 typedef ::std::unique_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
453 typedef ::std::vector< SCCOLROW > ScSizeVec;
454 typedef ::std::vector< ScHTMLEntry* > ScHTMLEntryVector;
455 typedef ::std::unique_ptr< ScHTMLEntry > ScHTMLEntryPtr;
458 bool IsEmptyCell() const;
460 const SfxItemSet& GetCurrItemSet() const;
463 static bool IsSpaceCharInfo( const HtmlImportInfo& rInfo );
469 void CreateNewEntry( const HtmlImportInfo& rInfo );
475 void ImplPushEntryToVector( ScHTMLEntryVector& rEntryVector, ScHTMLEntryPtr& rxEntry );
480 bool PushEntry( ScHTMLEntryPtr& rxEntry );
485 bool PushEntry( const HtmlImportInfo& rInfo, bool bLastInCell = false );
487 void PushTableEntry( ScHTMLTableId nTableId );
493 ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
496 ScHTMLTable* InsertNestedTable( const HtmlImportInfo& rInfo, bool bPreFormText );
499 void InsertNewCell( const ScHTMLSize& rSpanSize );
502 void ImplRowOn();
504 void ImplRowOff();
506 void ImplDataOn( const ScHTMLSize& rSpanSize );
508 void ImplDataOff();
511 static void ProcessFormatOptions( SfxItemSet& rItemSet, const HtmlImportInfo& rInfo );
515 void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
522 ScHTMLOrient eOrient, SCCOLROW nCellPos,
523 SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
528 OUString maTableName;
529 OUString maCaption;
530 OUStringBuffer maCaptionBuffer;
533 std::optional<SfxItemSet> moRowItemSet;
534 std::optional<SfxItemSet> moDataItemSet;
539 std::vector<std::shared_ptr<ScEEParseEntry>>& mrEEParseList;
540 std::map< ScHTMLPos, ScHTMLEntryVector > maEntryMap;
549 bool mbBorderOn:1;
551 bool mbRowOn:1;
552 bool mbDataOn:1;
554 bool mbCaptionOn:1;
561 explicit ScHTMLGlobalTable(
562 SfxItemPool& rPool,
563 EditEngine& rEditEngine,
564 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
565 ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser,
566 const ScDocument& rDoc );
568 virtual ~ScHTMLGlobalTable() override;
571 void Recalc();
582 explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
583 virtual ~ScHTMLQueryParser() override;
585 virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override;
588 virtual const ScHTMLTable* GetGlobalTable() const override;
592 void ProcessToken( const HtmlImportInfo& rInfo );
594 void InsertText( const HtmlImportInfo& rInfo );
596 void FontOn( const HtmlImportInfo& rInfo );
599 void MetaOn( const HtmlImportInfo& rInfo );
601 void TitleOn();
603 void TitleOff( const HtmlImportInfo& rInfo );
606 void TableOn( const HtmlImportInfo& rInfo );
608 void TableOff( const HtmlImportInfo& rInfo );
610 void PreOn( const HtmlImportInfo& rInfo );
612 void PreOff( const HtmlImportInfo& rInfo );
615 void CloseTable( const HtmlImportInfo& rInfo );
617 void ParseStyle(std::u16string_view rStrm);
619 DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
622 typedef ::std::unique_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
624 OUStringBuffer maTitle;
631/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
