LibreOffice Module sc (master) 1
htmlimp.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <scitems.hxx>
21#include <osl/diagnose.h>
23
24#include <editeng/lrspitem.hxx>
25#include <editeng/paperinf.hxx>
26#include <editeng/sizeitem.hxx>
27#include <editeng/ulspitem.hxx>
28#include <editeng/boxitem.hxx>
29#include <vcl/svapp.hxx>
30#include <o3tl/string_view.hxx>
31
32#include <htmlimp.hxx>
33#include <htmlpars.hxx>
34#include <filter.hxx>
35#include <global.hxx>
36#include <document.hxx>
37#include <editutil.hxx>
38#include <stlpool.hxx>
39#include <stlsheet.hxx>
40#include <refdata.hxx>
41#include <rangenam.hxx>
42#include <attrib.hxx>
43#include <ftools.hxx>
44#include <tokenarray.hxx>
45
46ErrCode ScFormatFilterPluginImpl::ScImportHTML( SvStream &rStream, const OUString& rBaseURL, ScDocument *pDoc,
47 ScRange& rRange, double nOutputFactor, bool bCalcWidthHeight, SvNumberFormatter* pFormatter,
48 bool bConvertDate )
49{
50 ScHTMLImport aImp( pDoc, rBaseURL, rRange, bCalcWidthHeight );
51 ErrCode nErr = aImp.Read( rStream, rBaseURL );
52 ScRange aR = aImp.GetRange();
53 rRange.aEnd = aR.aEnd;
54 aImp.WriteToDocument( true, nOutputFactor, pFormatter, bConvertDate );
55 return nErr;
56}
57
58std::unique_ptr<ScEEAbsImport> ScFormatFilterPluginImpl::CreateHTMLImport( ScDocument* pDocP, const OUString& rBaseURL, const ScRange& rRange )
59{
60 return std::make_unique<ScHTMLImport>( pDocP, rBaseURL, rRange, true/*bCalcWidthHeight*/ );
61}
62
63ScHTMLImport::ScHTMLImport( ScDocument* pDocP, const OUString& rBaseURL, const ScRange& rRange, bool bCalcWidthHeight ) :
64 ScEEImport( pDocP, rRange )
65{
66 Size aPageSize;
68 const OUString& aPageStyle = mpDoc->GetPageStyle( rRange.aStart.Tab() );
69 ScStyleSheet* pStyleSheet = static_cast<ScStyleSheet*>(mpDoc->
70 GetStyleSheetPool()->Find( aPageStyle, SfxStyleFamily::Page ));
71 if ( pStyleSheet )
72 {
73 const SfxItemSet& rSet = pStyleSheet->GetItemSet();
74 const SvxLRSpaceItem* pLRItem = &rSet.Get( ATTR_LRSPACE );
75 tools::Long nLeftMargin = pLRItem->GetLeft();
77 const SvxULSpaceItem* pULItem = &rSet.Get( ATTR_ULSPACE );
78 tools::Long nTopMargin = pULItem->GetUpper();
80 aPageSize = rSet.Get(ATTR_PAGE_SIZE).GetSize();
81 if ( !aPageSize.Width() || !aPageSize.Height() )
82 {
83 OSL_FAIL("PageSize Null ?!?!?");
85 }
86 aPageSize.AdjustWidth( -(nLeftMargin + nRightMargin) );
87 aPageSize.AdjustHeight( -(nTopMargin + nBottomMargin) );
88 aPageSize = pDefaultDev->LogicToPixel( aPageSize, MapMode( MapUnit::MapTwip ) );
89 }
90 else
91 {
92 OSL_FAIL("no StyleSheet?!?");
93 aPageSize = pDefaultDev->LogicToPixel(
94 SvxPaperInfo::GetPaperSize( PAPER_A4 ), MapMode( MapUnit::MapTwip ) );
95 }
96 if( bCalcWidthHeight )
97 mpParser.reset( new ScHTMLLayoutParser( mpEngine.get(), rBaseURL, aPageSize, pDocP ));
98 else
99 mpParser.reset( new ScHTMLQueryParser( mpEngine.get(), pDocP ));
100}
101
102void ScHTMLImport::InsertRangeName( ScDocument& rDoc, const OUString& rName, const ScRange& rRange )
103{
104 ScComplexRefData aRefData;
105 aRefData.InitRange( rRange );
106 aRefData.Ref1.SetFlag3D( true );
107 aRefData.Ref2.SetFlag3D( aRefData.Ref2.Tab() != aRefData.Ref1.Tab() );
108 ScTokenArray aTokArray(rDoc);
109 aTokArray.AddDoubleReference( aRefData );
110 ScRangeData* pRangeData = new ScRangeData( rDoc, rName, aTokArray );
111 rDoc.GetRangeName()->insert( pRangeData );
112}
113
115 bool bSizeColsRows, double nOutputFactor, SvNumberFormatter* pFormatter, bool bConvertDate )
116{
117 ScEEImport::WriteToDocument( bSizeColsRows, nOutputFactor, pFormatter, bConvertDate );
118
119 const ScHTMLParser* pParser = static_cast<ScHTMLParser*>(mpParser.get());
120 const ScHTMLTable* pGlobTable = pParser->GetGlobalTable();
121 if( !pGlobTable )
122 return;
123
124 // set cell borders for HTML table cells
125 pGlobTable->ApplyCellBorders( mpDoc, maRange.aStart );
126
127 // correct cell borders for merged cells
128 for ( size_t i = 0, n = pParser->ListSize(); i < n; ++i )
129 {
130 const ScEEParseEntry* pEntry = pParser->ListEntry( i );
131 if( (pEntry->nColOverlap > 1) || (pEntry->nRowOverlap > 1) )
132 {
133 SCTAB nTab = maRange.aStart.Tab();
134 const ScMergeAttr* pItem = mpDoc->GetAttr( pEntry->nCol, pEntry->nRow, nTab, ATTR_MERGE );
135 if( pItem->IsMerged() )
136 {
137 SCCOL nColMerge = pItem->GetColMerge();
138 SCROW nRowMerge = pItem->GetRowMerge();
139
140 const SvxBoxItem* pToItem = mpDoc->GetAttr( pEntry->nCol, pEntry->nRow, nTab, ATTR_BORDER );
141 SvxBoxItem aNewItem( *pToItem );
142 if( nColMerge > 1 )
143 {
144 const SvxBoxItem* pFromItem =
145 mpDoc->GetAttr( pEntry->nCol + nColMerge - 1, pEntry->nRow, nTab, ATTR_BORDER );
146 aNewItem.SetLine( pFromItem->GetLine( SvxBoxItemLine::RIGHT ), SvxBoxItemLine::RIGHT );
147 }
148 if( nRowMerge > 1 )
149 {
150 const SvxBoxItem* pFromItem =
151 mpDoc->GetAttr( pEntry->nCol, pEntry->nRow + nRowMerge - 1, nTab, ATTR_BORDER );
152 aNewItem.SetLine( pFromItem->GetLine( SvxBoxItemLine::BOTTOM ), SvxBoxItemLine::BOTTOM );
153 }
154 mpDoc->ApplyAttr( pEntry->nCol, pEntry->nRow, nTab, aNewItem );
155 }
156 }
157 }
158
159 // create ranges for HTML tables
160 // 1 - entire document
161 ScRange aNewRange( maRange.aStart );
162 aNewRange.aEnd.IncCol( static_cast<SCCOL>(pGlobTable->GetDocSize( tdCol )) - 1 );
163 aNewRange.aEnd.IncRow( pGlobTable->GetDocSize( tdRow ) - 1 );
165
166 // 2 - all tables
168
169 // 3 - single tables
170 SCCOL nColDiff = maRange.aStart.Col();
171 SCROW nRowDiff = maRange.aStart.Row();
172 SCTAB nTabDiff = maRange.aStart.Tab();
173
174 ScHTMLTable* pTable = nullptr;
176 ScRange aErrorRange( ScAddress::UNINITIALIZED );
177 while( (pTable = pGlobTable->FindNestedTable( ++nTableId )) != nullptr )
178 {
179 pTable->GetDocRange( aNewRange );
180 if (!aNewRange.Move( nColDiff, nRowDiff, nTabDiff, aErrorRange, *mpDoc ))
181 {
182 assert(!"can't move");
183 }
184 // insert table number as name
185 OUStringBuffer aName(ScfTools::GetNameFromHTMLIndex(nTableId));
186 // insert table id as name
187 if (!pTable->GetTableName().isEmpty())
188 aName.append(" - " + pTable->GetTableName());
189 // insert table caption as name
190 if (!pTable->GetTableCaption().isEmpty())
191 aName.append(" - " + pTable->GetTableCaption());
192 const OUString sName(aName.makeStringAndClear());
194 InsertRangeName(*mpDoc, sName, aNewRange);
195 }
196}
197
198OUString ScFormatFilterPluginImpl::GetHTMLRangeNameList( ScDocument& rDoc, const OUString& rOrigName )
199{
200 return ScHTMLImport::GetHTMLRangeNameList( rDoc, rOrigName );
201}
202
203OUString ScHTMLImport::GetHTMLRangeNameList( const ScDocument& rDoc, std::u16string_view rOrigName )
204{
205 if (rOrigName.empty())
206 return OUString();
207
208 OUString aNewName;
209 ScRangeName* pRangeNames = rDoc.GetRangeName();
210 ScRangeList aRangeList;
211 sal_Int32 nStringIx = 0;
212 do
213 {
214 OUString aToken( o3tl::getToken(rOrigName, 0, ';', nStringIx ) );
215 if( pRangeNames && ScfTools::IsHTMLTablesName( aToken ) )
216 { // build list with all HTML tables
217 sal_uLong nIndex = 1;
218 for(;;)
219 {
221 const ScRangeData* pRangeData = pRangeNames->findByUpperName(ScGlobal::getCharClass().uppercase(aToken));
222 if (!pRangeData)
223 break;
224 ScRange aRange;
225 if( pRangeData->IsReference( aRange ) && !aRangeList.Contains( aRange ) )
226 {
227 aNewName = ScGlobal::addToken(aNewName, aToken, ';');
228 aRangeList.push_back( aRange );
229 }
230 }
231 }
232 else
233 aNewName = ScGlobal::addToken(aNewName, aToken, ';');
234 }
235 while (nStringIx>0);
236 return aNewName;
237}
238
239/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
static OutputDevice * GetDefaultDevice()
OUString uppercase(const OUString &rStr, sal_Int32 nPos, sal_Int32 nCount) const
SAL_WARN_UNUSED_RESULT Point LogicToPixel(const Point &rLogicPt) const
@ UNINITIALIZED
Definition: address.hxx:220
SCTAB Tab() const
Definition: address.hxx:283
void IncCol(SCCOL nDelta=1)
Definition: address.hxx:316
SCROW Row() const
Definition: address.hxx:274
void IncRow(SCROW nDelta=1)
Definition: address.hxx:312
SCCOL Col() const
Definition: address.hxx:279
SC_DLLPUBLIC OUString GetPageStyle(SCTAB nTab) const
Definition: document.cxx:6313
SC_DLLPUBLIC void ApplyAttr(SCCOL nCol, SCROW nRow, SCTAB nTab, const SfxPoolItem &rAttr)
Definition: document.cxx:4846
SC_DLLPUBLIC ScRangeName * GetRangeName(SCTAB nTab) const
Definition: documen3.cxx:174
SC_DLLPUBLIC const SfxPoolItem * GetAttr(SCCOL nCol, SCROW nRow, SCTAB nTab, sal_uInt16 nWhich) const
Definition: document.cxx:4789
ScRange maRange
Definition: eeimport.hxx:38
virtual ErrCode Read(SvStream &rStream, const OUString &rBaseURL) override
Definition: eeimpars.cxx:78
std::unique_ptr< ScTabEditEngine > mpEngine
Definition: eeimport.hxx:41
virtual void WriteToDocument(bool bSizeColsRows=false, double nOutputFactor=1.0, SvNumberFormatter *pFormatter=nullptr, bool bConvertDate=true) override
Definition: eeimpars.cxx:115
virtual ScRange GetRange() override
Definition: eeimport.hxx:55
ScDocument * mpDoc
Definition: eeimport.hxx:39
std::unique_ptr< ScEEParser > mpParser
Definition: eeimport.hxx:43
ScEEParseEntry * ListEntry(size_t index)
Definition: eeparser.hxx:129
size_t ListSize() const
Definition: eeparser.hxx:128
virtual ErrCode ScImportHTML(SvStream &, const OUString &rBaseURL, ScDocument *, ScRange &rRange, double nOutputFactor, bool bCalcWidthHeight, SvNumberFormatter *pFormatter, bool bConvertDate) override
Definition: htmlimp.cxx:46
virtual std::unique_ptr< ScEEAbsImport > CreateHTMLImport(ScDocument *pDocP, const OUString &rBaseURL, const ScRange &rRange) override
Definition: htmlimp.cxx:58
virtual OUString GetHTMLRangeNameList(ScDocument &rDoc, const OUString &rOrigName) override
Definition: htmlimp.cxx:198
static SC_DLLPUBLIC OUString addToken(std::u16string_view rTokenList, std::u16string_view rToken, sal_Unicode cSep, sal_Int32 nSepCount=1, bool bForceSep=false)
Adds the string rToken to rTokenList, using a list separator character.
Definition: global.cxx:703
static SC_DLLPUBLIC const CharClass & getCharClass()
Definition: global.cxx:1062
static void InsertRangeName(ScDocument &rDoc, const OUString &rName, const ScRange &rRange)
Definition: htmlimp.cxx:102
virtual void WriteToDocument(bool bSizeColsRows=false, double nOutputFactor=1.0, SvNumberFormatter *pFormatter=nullptr, bool bConvertDate=true) override
Definition: htmlimp.cxx:114
ScHTMLImport(ScDocument *pDoc, const OUString &rBaseURL, const ScRange &rRange, bool bCalcWidthHeight)
Definition: htmlimp.cxx:63
static OUString GetHTMLRangeNameList(const ScDocument &rDoc, std::u16string_view rOrigName)
Definition: htmlimp.cxx:203
Base class for HTML parser classes.
Definition: htmlpars.hxx:79
virtual const ScHTMLTable * GetGlobalTable() const =0
Returns the "global table" which contains the entire HTML document.
The HTML parser for data queries.
Definition: htmlpars.hxx:580
Stores data for one table in an HTML document.
Definition: htmlpars.hxx:327
const OUString & GetTableName() const
Returns the name of the table, specified in the TABLE tag.
Definition: htmlpars.hxx:343
void ApplyCellBorders(ScDocument *pDoc, const ScAddress &rFirstPos) const
Applies border formatting to the passed document.
Definition: htmlpars.cxx:2241
const OUString & GetTableCaption() const
Returns the caption of the table, specified in the tag.
Definition: htmlpars.hxx:345
ScHTMLTable * FindNestedTable(ScHTMLTableId nTableId) const
Searches in all nested tables for the specified table.
Definition: htmlpars.cxx:1904
SCCOLROW GetDocSize(ScHTMLOrient eOrient, SCCOLROW nCellPos) const
Returns the resulting document row/column count of the specified HTML row/column.
Definition: htmlpars.cxx:2187
void GetDocRange(ScRange &rRange) const
Calculates the current Calc document area of this table.
Definition: htmlpars.cxx:2230
SCCOL GetColMerge() const
Definition: attrib.hxx:68
bool IsMerged() const
Definition: attrib.hxx:71
SCROW GetRowMerge() const
Definition: attrib.hxx:69
SC_DLLPUBLIC bool IsReference(ScRange &rRef) const
Definition: rangenam.cxx:371
bool Contains(const ScRange &) const
Definition: rangelst.cxx:1082
void push_back(const ScRange &rRange)
Definition: rangelst.cxx:1137
SC_DLLPUBLIC ScRangeData * findByUpperName(const OUString &rName)
Definition: rangenam.cxx:704
SC_DLLPUBLIC bool insert(ScRangeData *p, bool bReuseFreeIndex=true)
Insert object into set.
Definition: rangenam.cxx:802
bool Move(SCCOL aDeltaX, SCROW aDeltaY, SCTAB aDeltaZ, ScRange &rErrorRange, const ScDocument &rDoc)
Definition: address.cxx:2330
ScAddress aEnd
Definition: address.hxx:498
ScAddress aStart
Definition: address.hxx:497
virtual SC_DLLPUBLIC SfxItemSet & GetItemSet() override
Definition: stlsheet.cxx:133
formula::FormulaToken * AddDoubleReference(const ScComplexRefData &rRef)
Definition: token.cxx:2282
static const OUString & GetHTMLDocName()
Returns the built-in range name for an HTML document.
Definition: ftools.cxx:285
static bool IsHTMLTablesName(std::u16string_view rSource)
Returns true, if rSource is the built-in range name for all HTML tables.
Definition: ftools.cxx:327
static const OUString & GetHTMLTablesName()
Returns the built-in range name for all HTML tables.
Definition: ftools.cxx:291
static OUString GetNameFromHTMLIndex(sal_uInt32 nIndex)
Returns the built-in range name for an HTML table, specified by table index.
Definition: ftools.cxx:310
const SfxPoolItem & Get(sal_uInt16 nWhich, bool bSrchInParent=true) const
constexpr tools::Long Height() const
tools::Long AdjustHeight(tools::Long n)
tools::Long AdjustWidth(tools::Long n)
constexpr tools::Long Width() const
const editeng::SvxBorderLine * GetLine(SvxBoxItemLine nLine) const
void SetLine(const editeng::SvxBorderLine *pNew, SvxBoxItemLine nLine)
tools::Long GetRight() const
tools::Long GetLeft() const
static Size GetPaperSize(Paper ePaper, MapUnit eUnit=MapUnit::MapTwip)
sal_uInt16 GetUpper() const
sal_uInt16 GetLower() const
@ tdCol
Definition: htmlpars.hxx:225
@ tdRow
Definition: htmlpars.hxx:225
const ScHTMLTableId SC_HTML_GLOBAL_TABLE
Identifier of the "global table" (the entire HTML document).
Definition: htmlpars.hxx:230
sal_uInt16 ScHTMLTableId
Type for a unique identifier for each table.
Definition: htmlpars.hxx:228
sal_Int32 nIndex
OUString aName
sal_Int64 n
const char * sName
tools::Long const nRightMargin
tools::Long const nBottomMargin
tools::Long const nTopMargin
tools::Long const nLeftMargin
int i
std::basic_string_view< charT, traits > getToken(std::basic_string_view< charT, traits > sv, charT delimiter, std::size_t &position)
long Long
PAPER_A4
constexpr TypedWhichId< SvxSizeItem > ATTR_PAGE_SIZE(161)
constexpr TypedWhichId< ScMergeAttr > ATTR_MERGE(144)
constexpr TypedWhichId< SvxULSpaceItem > ATTR_ULSPACE(158)
constexpr TypedWhichId< SvxBoxItem > ATTR_BORDER(150)
constexpr TypedWhichId< SvxLRSpaceItem > ATTR_LRSPACE(157)
static SfxItemSet & rSet
sal_uIntPtr sal_uLong
Complex reference (a range) into the sheet.
Definition: refdata.hxx:123
void InitRange(const ScRange &rRange)
Definition: refdata.hxx:130
ScSingleRefData Ref2
Definition: refdata.hxx:125
ScSingleRefData Ref1
Definition: refdata.hxx:124
SCCOL nColOverlap
Definition: eeparser.hxx:69
SCROW nRowOverlap
Definition: eeparser.hxx:70
SCTAB Tab() const
Definition: refdata.cxx:254
void SetFlag3D(bool bVal)
Definition: refdata.hxx:89
sal_Int16 SCTAB
Definition: types.hxx:22
sal_Int16 SCCOL
Definition: types.hxx:21
sal_Int32 SCROW
Definition: types.hxx:17