LibreOffice Module sc (master) 1
htmlimp.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <scitems.hxx>
21#include <osl/diagnose.h>
23
24#include <editeng/lrspitem.hxx>
25#include <editeng/paperinf.hxx>
26#include <editeng/sizeitem.hxx>
27#include <editeng/ulspitem.hxx>
28#include <editeng/boxitem.hxx>
29#include <vcl/svapp.hxx>
30#include <o3tl/string_view.hxx>
31
32#include <htmlimp.hxx>
33#include <htmlpars.hxx>
34#include <filter.hxx>
35#include <global.hxx>
36#include <document.hxx>
37#include <editutil.hxx>
38#include <stlpool.hxx>
39#include <stlsheet.hxx>
40#include <refdata.hxx>
41#include <rangenam.hxx>
42#include <attrib.hxx>
43#include <ftools.hxx>
44#include <tokenarray.hxx>
45
46ErrCode ScFormatFilterPluginImpl::ScImportHTML( SvStream &rStream, const OUString& rBaseURL, ScDocument *pDoc,
47 ScRange& rRange, double nOutputFactor, bool bCalcWidthHeight, SvNumberFormatter* pFormatter,
48 bool bConvertDate, bool bConvertScientific )
49{
50 ScHTMLImport aImp( pDoc, rBaseURL, rRange, bCalcWidthHeight );
51 ErrCode nErr = aImp.Read( rStream, rBaseURL );
52 ScRange aR = aImp.GetRange();
53 rRange.aEnd = aR.aEnd;
54 aImp.WriteToDocument( true, nOutputFactor, pFormatter, bConvertDate, bConvertScientific );
55 return nErr;
56}
57
58std::unique_ptr<ScEEAbsImport> ScFormatFilterPluginImpl::CreateHTMLImport( ScDocument* pDocP, const OUString& rBaseURL, const ScRange& rRange )
59{
60 return std::make_unique<ScHTMLImport>( pDocP, rBaseURL, rRange, true/*bCalcWidthHeight*/ );
61}
62
63ScHTMLImport::ScHTMLImport( ScDocument* pDocP, const OUString& rBaseURL, const ScRange& rRange, bool bCalcWidthHeight ) :
64 ScEEImport( pDocP, rRange )
65{
66 Size aPageSize;
68 const OUString& aPageStyle = mpDoc->GetPageStyle( rRange.aStart.Tab() );
69 ScStyleSheet* pStyleSheet = static_cast<ScStyleSheet*>(mpDoc->
70 GetStyleSheetPool()->Find( aPageStyle, SfxStyleFamily::Page ));
71 if ( pStyleSheet )
72 {
73 const SfxItemSet& rSet = pStyleSheet->GetItemSet();
74 const SvxLRSpaceItem* pLRItem = &rSet.Get( ATTR_LRSPACE );
75 tools::Long nLeftMargin = pLRItem->GetLeft();
77 const SvxULSpaceItem* pULItem = &rSet.Get( ATTR_ULSPACE );
78 tools::Long nTopMargin = pULItem->GetUpper();
80 aPageSize = rSet.Get(ATTR_PAGE_SIZE).GetSize();
81 if ( !aPageSize.Width() || !aPageSize.Height() )
82 {
83 OSL_FAIL("PageSize Null ?!?!?");
85 }
86 aPageSize.AdjustWidth( -(nLeftMargin + nRightMargin) );
87 aPageSize.AdjustHeight( -(nTopMargin + nBottomMargin) );
88 aPageSize = pDefaultDev->LogicToPixel( aPageSize, MapMode( MapUnit::MapTwip ) );
89 }
90 else
91 {
92 OSL_FAIL("no StyleSheet?!?");
93 aPageSize = pDefaultDev->LogicToPixel(
94 SvxPaperInfo::GetPaperSize( PAPER_A4 ), MapMode( MapUnit::MapTwip ) );
95 }
96 if( bCalcWidthHeight )
97 mpParser.reset( new ScHTMLLayoutParser( mpEngine.get(), rBaseURL, aPageSize, pDocP ));
98 else
99 mpParser.reset( new ScHTMLQueryParser( mpEngine.get(), pDocP ));
100}
101
102void ScHTMLImport::InsertRangeName( ScDocument& rDoc, const OUString& rName, const ScRange& rRange )
103{
104 ScComplexRefData aRefData;
105 aRefData.InitRange( rRange );
106 aRefData.Ref1.SetFlag3D( true );
107 aRefData.Ref2.SetFlag3D( aRefData.Ref2.Tab() != aRefData.Ref1.Tab() );
108 ScTokenArray aTokArray(rDoc);
109 aTokArray.AddDoubleReference( aRefData );
110 ScRangeData* pRangeData = new ScRangeData( rDoc, rName, aTokArray );
111 rDoc.GetRangeName()->insert( pRangeData );
112}
113
115 bool bSizeColsRows, double nOutputFactor, SvNumberFormatter* pFormatter, bool bConvertDate,
116 bool bConvertScientific )
117{
118 ScEEImport::WriteToDocument( bSizeColsRows, nOutputFactor, pFormatter, bConvertDate, bConvertScientific );
119
120 const ScHTMLParser* pParser = static_cast<ScHTMLParser*>(mpParser.get());
121 const ScHTMLTable* pGlobTable = pParser->GetGlobalTable();
122 if( !pGlobTable )
123 return;
124
125 // set cell borders for HTML table cells
126 pGlobTable->ApplyCellBorders( mpDoc, maRange.aStart );
127
128 // correct cell borders for merged cells
129 for ( size_t i = 0, n = pParser->ListSize(); i < n; ++i )
130 {
131 const ScEEParseEntry* pEntry = pParser->ListEntry( i );
132 if( (pEntry->nColOverlap > 1) || (pEntry->nRowOverlap > 1) )
133 {
134 SCTAB nTab = maRange.aStart.Tab();
135 const ScMergeAttr* pItem = mpDoc->GetAttr( pEntry->nCol, pEntry->nRow, nTab, ATTR_MERGE );
136 if( pItem->IsMerged() )
137 {
138 SCCOL nColMerge = pItem->GetColMerge();
139 SCROW nRowMerge = pItem->GetRowMerge();
140
141 const SvxBoxItem* pToItem = mpDoc->GetAttr( pEntry->nCol, pEntry->nRow, nTab, ATTR_BORDER );
142 SvxBoxItem aNewItem( *pToItem );
143 if( nColMerge > 1 )
144 {
145 const SvxBoxItem* pFromItem =
146 mpDoc->GetAttr( pEntry->nCol + nColMerge - 1, pEntry->nRow, nTab, ATTR_BORDER );
147 aNewItem.SetLine( pFromItem->GetLine( SvxBoxItemLine::RIGHT ), SvxBoxItemLine::RIGHT );
148 }
149 if( nRowMerge > 1 )
150 {
151 const SvxBoxItem* pFromItem =
152 mpDoc->GetAttr( pEntry->nCol, pEntry->nRow + nRowMerge - 1, nTab, ATTR_BORDER );
153 aNewItem.SetLine( pFromItem->GetLine( SvxBoxItemLine::BOTTOM ), SvxBoxItemLine::BOTTOM );
154 }
155 mpDoc->ApplyAttr( pEntry->nCol, pEntry->nRow, nTab, aNewItem );
156 }
157 }
158 }
159
160 // create ranges for HTML tables
161 // 1 - entire document
162 ScRange aNewRange( maRange.aStart );
163 aNewRange.aEnd.IncCol( static_cast<SCCOL>(pGlobTable->GetDocSize( tdCol )) - 1 );
164 aNewRange.aEnd.IncRow( pGlobTable->GetDocSize( tdRow ) - 1 );
166
167 // 2 - all tables
169
170 // 3 - single tables
171 SCCOL nColDiff = maRange.aStart.Col();
172 SCROW nRowDiff = maRange.aStart.Row();
173 SCTAB nTabDiff = maRange.aStart.Tab();
174
175 ScHTMLTable* pTable = nullptr;
177 ScRange aErrorRange( ScAddress::UNINITIALIZED );
178 while( (pTable = pGlobTable->FindNestedTable( ++nTableId )) != nullptr )
179 {
180 pTable->GetDocRange( aNewRange );
181 if (!aNewRange.Move( nColDiff, nRowDiff, nTabDiff, aErrorRange, *mpDoc ))
182 {
183 assert(!"can't move");
184 }
185 // insert table number as name
186 OUStringBuffer aName(ScfTools::GetNameFromHTMLIndex(nTableId));
187 // insert table id as name
188 if (!pTable->GetTableName().isEmpty())
189 aName.append(" - " + pTable->GetTableName());
190 // insert table caption as name
191 if (!pTable->GetTableCaption().isEmpty())
192 aName.append(" - " + pTable->GetTableCaption());
193 const OUString sName(aName.makeStringAndClear());
195 InsertRangeName(*mpDoc, sName, aNewRange);
196 }
197}
198
199OUString ScFormatFilterPluginImpl::GetHTMLRangeNameList( ScDocument& rDoc, const OUString& rOrigName )
200{
201 return ScHTMLImport::GetHTMLRangeNameList( rDoc, rOrigName );
202}
203
204OUString ScHTMLImport::GetHTMLRangeNameList( const ScDocument& rDoc, std::u16string_view rOrigName )
205{
206 if (rOrigName.empty())
207 return OUString();
208
209 OUString aNewName;
210 ScRangeName* pRangeNames = rDoc.GetRangeName();
211 ScRangeList aRangeList;
212 sal_Int32 nStringIx = 0;
213 do
214 {
215 OUString aToken( o3tl::getToken(rOrigName, 0, ';', nStringIx ) );
216 if( pRangeNames && ScfTools::IsHTMLTablesName( aToken ) )
217 { // build list with all HTML tables
218 sal_uLong nIndex = 1;
219 for(;;)
220 {
222 const ScRangeData* pRangeData = pRangeNames->findByUpperName(ScGlobal::getCharClass().uppercase(aToken));
223 if (!pRangeData)
224 break;
225 ScRange aRange;
226 if( pRangeData->IsReference( aRange ) && !aRangeList.Contains( aRange ) )
227 {
228 aNewName = ScGlobal::addToken(aNewName, aToken, ';');
229 aRangeList.push_back( aRange );
230 }
231 }
232 }
233 else
234 aNewName = ScGlobal::addToken(aNewName, aToken, ';');
235 }
236 while (nStringIx>0);
237 return aNewName;
238}
239
240/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
static OutputDevice * GetDefaultDevice()
OUString uppercase(const OUString &rStr, sal_Int32 nPos, sal_Int32 nCount) const
SAL_WARN_UNUSED_RESULT Point LogicToPixel(const Point &rLogicPt) const
@ UNINITIALIZED
Definition: address.hxx:220
SCTAB Tab() const
Definition: address.hxx:283
void IncCol(SCCOL nDelta=1)
Definition: address.hxx:316
SCROW Row() const
Definition: address.hxx:274
void IncRow(SCROW nDelta=1)
Definition: address.hxx:312
SCCOL Col() const
Definition: address.hxx:279
SC_DLLPUBLIC OUString GetPageStyle(SCTAB nTab) const
Definition: document.cxx:6176
SC_DLLPUBLIC void ApplyAttr(SCCOL nCol, SCROW nRow, SCTAB nTab, const SfxPoolItem &rAttr)
Definition: document.cxx:4741
SC_DLLPUBLIC ScRangeName * GetRangeName(SCTAB nTab) const
Definition: documen3.cxx:171
SC_DLLPUBLIC const SfxPoolItem * GetAttr(SCCOL nCol, SCROW nRow, SCTAB nTab, sal_uInt16 nWhich) const
Definition: document.cxx:4684
ScRange maRange
Definition: eeimport.hxx:38
virtual void WriteToDocument(bool bSizeColsRows=false, double nOutputFactor=1.0, SvNumberFormatter *pFormatter=nullptr, bool bConvertDate=true, bool bConvertScientific=true) override
Definition: eeimpars.cxx:115
virtual ErrCode Read(SvStream &rStream, const OUString &rBaseURL) override
Definition: eeimpars.cxx:78
std::unique_ptr< ScTabEditEngine > mpEngine
Definition: eeimport.hxx:41
virtual ScRange GetRange() override
Definition: eeimport.hxx:55
ScDocument * mpDoc
Definition: eeimport.hxx:39
std::unique_ptr< ScEEParser > mpParser
Definition: eeimport.hxx:43
ScEEParseEntry * ListEntry(size_t index)
Definition: eeparser.hxx:129
size_t ListSize() const
Definition: eeparser.hxx:128
virtual std::unique_ptr< ScEEAbsImport > CreateHTMLImport(ScDocument *pDocP, const OUString &rBaseURL, const ScRange &rRange) override
Definition: htmlimp.cxx:58
virtual OUString GetHTMLRangeNameList(ScDocument &rDoc, const OUString &rOrigName) override
Definition: htmlimp.cxx:199
virtual ErrCode ScImportHTML(SvStream &, const OUString &rBaseURL, ScDocument *, ScRange &rRange, double nOutputFactor, bool bCalcWidthHeight, SvNumberFormatter *pFormatter, bool bConvertDate, bool bConvertScientific) override
Definition: htmlimp.cxx:46
static SC_DLLPUBLIC OUString addToken(std::u16string_view rTokenList, std::u16string_view rToken, sal_Unicode cSep, sal_Int32 nSepCount=1, bool bForceSep=false)
Adds the string rToken to rTokenList, using a list separator character.
Definition: global.cxx:705
static SC_DLLPUBLIC const CharClass & getCharClass()
Definition: global.cxx:1064
static void InsertRangeName(ScDocument &rDoc, const OUString &rName, const ScRange &rRange)
Definition: htmlimp.cxx:102
virtual void WriteToDocument(bool bSizeColsRows=false, double nOutputFactor=1.0, SvNumberFormatter *pFormatter=nullptr, bool bConvertDate=true, bool bConvertScientific=true) override
Definition: htmlimp.cxx:114
ScHTMLImport(ScDocument *pDoc, const OUString &rBaseURL, const ScRange &rRange, bool bCalcWidthHeight)
Definition: htmlimp.cxx:63
static OUString GetHTMLRangeNameList(const ScDocument &rDoc, std::u16string_view rOrigName)
Definition: htmlimp.cxx:204
Base class for HTML parser classes.
Definition: htmlpars.hxx:79
virtual const ScHTMLTable * GetGlobalTable() const =0
Returns the "global table" which contains the entire HTML document.
The HTML parser for data queries.
Definition: htmlpars.hxx:580
Stores data for one table in an HTML document.
Definition: htmlpars.hxx:327
const OUString & GetTableName() const
Returns the name of the table, specified in the TABLE tag.
Definition: htmlpars.hxx:343
void ApplyCellBorders(ScDocument *pDoc, const ScAddress &rFirstPos) const
Applies border formatting to the passed document.
Definition: htmlpars.cxx:2240
const OUString & GetTableCaption() const
Returns the caption of the table, specified in the tag.
Definition: htmlpars.hxx:345
ScHTMLTable * FindNestedTable(ScHTMLTableId nTableId) const
Searches in all nested tables for the specified table.
Definition: htmlpars.cxx:1903
SCCOLROW GetDocSize(ScHTMLOrient eOrient, SCCOLROW nCellPos) const
Returns the resulting document row/column count of the specified HTML row/column.
Definition: htmlpars.cxx:2186
void GetDocRange(ScRange &rRange) const
Calculates the current Calc document area of this table.
Definition: htmlpars.cxx:2229
SCCOL GetColMerge() const
Definition: attrib.hxx:71
bool IsMerged() const
Definition: attrib.hxx:74
SCROW GetRowMerge() const
Definition: attrib.hxx:72
SC_DLLPUBLIC bool IsReference(ScRange &rRef) const
Definition: rangenam.cxx:371
bool Contains(const ScRange &) const
Definition: rangelst.cxx:1082
void push_back(const ScRange &rRange)
Definition: rangelst.cxx:1137
SC_DLLPUBLIC ScRangeData * findByUpperName(const OUString &rName)
Definition: rangenam.cxx:704
SC_DLLPUBLIC bool insert(ScRangeData *p, bool bReuseFreeIndex=true)
Insert object into set.
Definition: rangenam.cxx:802
bool Move(SCCOL aDeltaX, SCROW aDeltaY, SCTAB aDeltaZ, ScRange &rErrorRange, const ScDocument &rDoc)
Definition: address.cxx:2328
ScAddress aEnd
Definition: address.hxx:498
ScAddress aStart
Definition: address.hxx:497
virtual SC_DLLPUBLIC SfxItemSet & GetItemSet() override
Definition: stlsheet.cxx:133
formula::FormulaToken * AddDoubleReference(const ScComplexRefData &rRef)
Definition: token.cxx:2282
static const OUString & GetHTMLDocName()
Returns the built-in range name for an HTML document.
Definition: ftools.cxx:285
static bool IsHTMLTablesName(std::u16string_view rSource)
Returns true, if rSource is the built-in range name for all HTML tables.
Definition: ftools.cxx:327
static const OUString & GetHTMLTablesName()
Returns the built-in range name for all HTML tables.
Definition: ftools.cxx:291
static OUString GetNameFromHTMLIndex(sal_uInt32 nIndex)
Returns the built-in range name for an HTML table, specified by table index.
Definition: ftools.cxx:310
const SfxPoolItem & Get(sal_uInt16 nWhich, bool bSrchInParent=true) const
constexpr tools::Long Height() const
tools::Long AdjustHeight(tools::Long n)
tools::Long AdjustWidth(tools::Long n)
constexpr tools::Long Width() const
const editeng::SvxBorderLine * GetLine(SvxBoxItemLine nLine) const
void SetLine(const editeng::SvxBorderLine *pNew, SvxBoxItemLine nLine)
tools::Long GetRight() const
tools::Long GetLeft() const
static Size GetPaperSize(Paper ePaper, MapUnit eUnit=MapUnit::MapTwip)
sal_uInt16 GetUpper() const
sal_uInt16 GetLower() const
OUString sName
@ tdCol
Definition: htmlpars.hxx:225
@ tdRow
Definition: htmlpars.hxx:225
const ScHTMLTableId SC_HTML_GLOBAL_TABLE
Identifier of the "global table" (the entire HTML document).
Definition: htmlpars.hxx:230
sal_uInt16 ScHTMLTableId
Type for a unique identifier for each table.
Definition: htmlpars.hxx:228
sal_Int32 nIndex
OUString aName
sal_Int64 n
tools::Long const nRightMargin
tools::Long const nBottomMargin
tools::Long const nTopMargin
tools::Long const nLeftMargin
int i
std::basic_string_view< charT, traits > getToken(std::basic_string_view< charT, traits > sv, charT delimiter, std::size_t &position)
long Long
PAPER_A4
constexpr TypedWhichId< SvxSizeItem > ATTR_PAGE_SIZE(161)
constexpr TypedWhichId< ScMergeAttr > ATTR_MERGE(144)
constexpr TypedWhichId< SvxULSpaceItem > ATTR_ULSPACE(158)
constexpr TypedWhichId< SvxBoxItem > ATTR_BORDER(150)
constexpr TypedWhichId< SvxLRSpaceItem > ATTR_LRSPACE(157)
static SfxItemSet & rSet
sal_uIntPtr sal_uLong
Complex reference (a range) into the sheet.
Definition: refdata.hxx:123
void InitRange(const ScRange &rRange)
Definition: refdata.hxx:130
ScSingleRefData Ref2
Definition: refdata.hxx:125
ScSingleRefData Ref1
Definition: refdata.hxx:124
SCCOL nColOverlap
Definition: eeparser.hxx:69
SCROW nRowOverlap
Definition: eeparser.hxx:70
SCTAB Tab() const
Definition: refdata.cxx:254
void SetFlag3D(bool bVal)
Definition: refdata.hxx:89
sal_Int16 SCTAB
Definition: types.hxx:22
sal_Int16 SCCOL
Definition: types.hxx:21
sal_Int32 SCROW
Definition: types.hxx:17