LibreOffice Module sc (master)  1
htmlimp.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <scitems.hxx>
21 #include <osl/diagnose.h>
22 #include <unotools/charclass.hxx>
23 
24 #include <editeng/lrspitem.hxx>
25 #include <editeng/paperinf.hxx>
26 #include <editeng/sizeitem.hxx>
27 #include <editeng/ulspitem.hxx>
28 #include <editeng/boxitem.hxx>
29 #include <vcl/svapp.hxx>
30 
31 #include <htmlimp.hxx>
32 #include <htmlpars.hxx>
33 #include <filter.hxx>
34 #include <global.hxx>
35 #include <document.hxx>
36 #include <editutil.hxx>
37 #include <stlpool.hxx>
38 #include <stlsheet.hxx>
39 #include <refdata.hxx>
40 #include <rangenam.hxx>
41 #include <attrib.hxx>
42 #include <ftools.hxx>
43 #include <tokenarray.hxx>
44 
45 ErrCode ScFormatFilterPluginImpl::ScImportHTML( SvStream &rStream, const OUString& rBaseURL, ScDocument *pDoc,
46  ScRange& rRange, double nOutputFactor, bool bCalcWidthHeight, SvNumberFormatter* pFormatter,
47  bool bConvertDate )
48 {
49  ScHTMLImport aImp( pDoc, rBaseURL, rRange, bCalcWidthHeight );
50  ErrCode nErr = aImp.Read( rStream, rBaseURL );
51  ScRange aR = aImp.GetRange();
52  rRange.aEnd = aR.aEnd;
53  aImp.WriteToDocument( true, nOutputFactor, pFormatter, bConvertDate );
54  return nErr;
55 }
56 
57 std::unique_ptr<ScEEAbsImport> ScFormatFilterPluginImpl::CreateHTMLImport( ScDocument* pDocP, const OUString& rBaseURL, const ScRange& rRange )
58 {
59  return std::make_unique<ScHTMLImport>( pDocP, rBaseURL, rRange, true/*bCalcWidthHeight*/ );
60 }
61 
62 ScHTMLImport::ScHTMLImport( ScDocument* pDocP, const OUString& rBaseURL, const ScRange& rRange, bool bCalcWidthHeight ) :
63  ScEEImport( pDocP, rRange )
64 {
65  Size aPageSize;
67  const OUString& aPageStyle = mpDoc->GetPageStyle( rRange.aStart.Tab() );
68  ScStyleSheet* pStyleSheet = static_cast<ScStyleSheet*>(mpDoc->
69  GetStyleSheetPool()->Find( aPageStyle, SfxStyleFamily::Page ));
70  if ( pStyleSheet )
71  {
72  const SfxItemSet& rSet = pStyleSheet->GetItemSet();
73  const SvxLRSpaceItem* pLRItem = &rSet.Get( ATTR_LRSPACE );
74  long nLeftMargin = pLRItem->GetLeft();
75  long nRightMargin = pLRItem->GetRight();
76  const SvxULSpaceItem* pULItem = &rSet.Get( ATTR_ULSPACE );
77  long nTopMargin = pULItem->GetUpper();
78  long nBottomMargin = pULItem->GetLower();
79  aPageSize = rSet.Get(ATTR_PAGE_SIZE).GetSize();
80  if ( !aPageSize.Width() || !aPageSize.Height() )
81  {
82  OSL_FAIL("PageSize Null ?!?!?");
83  aPageSize = SvxPaperInfo::GetPaperSize( PAPER_A4 );
84  }
85  aPageSize.AdjustWidth( -(nLeftMargin + nRightMargin) );
86  aPageSize.AdjustHeight( -(nTopMargin + nBottomMargin) );
87  aPageSize = pDefaultDev->LogicToPixel( aPageSize, MapMode( MapUnit::MapTwip ) );
88  }
89  else
90  {
91  OSL_FAIL("no StyleSheet?!?");
92  aPageSize = pDefaultDev->LogicToPixel(
93  SvxPaperInfo::GetPaperSize( PAPER_A4 ), MapMode( MapUnit::MapTwip ) );
94  }
95  if( bCalcWidthHeight )
96  mpParser.reset( new ScHTMLLayoutParser( mpEngine.get(), rBaseURL, aPageSize, pDocP ));
97  else
98  mpParser.reset( new ScHTMLQueryParser( mpEngine.get(), pDocP ));
99 }
100 
101 void ScHTMLImport::InsertRangeName( ScDocument* pDoc, const OUString& rName, const ScRange& rRange )
102 {
103  ScComplexRefData aRefData;
104  aRefData.InitRange( rRange );
105  aRefData.Ref1.SetFlag3D( true );
106  aRefData.Ref2.SetFlag3D( aRefData.Ref2.Tab() != aRefData.Ref1.Tab() );
107  ScTokenArray aTokArray(pDoc);
108  aTokArray.AddDoubleReference( aRefData );
109  ScRangeData* pRangeData = new ScRangeData( pDoc, rName, aTokArray );
110  pDoc->GetRangeName()->insert( pRangeData );
111 }
112 
114  bool bSizeColsRows, double nOutputFactor, SvNumberFormatter* pFormatter, bool bConvertDate )
115 {
116  ScEEImport::WriteToDocument( bSizeColsRows, nOutputFactor, pFormatter, bConvertDate );
117 
118  const ScHTMLParser* pParser = static_cast<ScHTMLParser*>(mpParser.get());
119  const ScHTMLTable* pGlobTable = pParser->GetGlobalTable();
120  if( !pGlobTable )
121  return;
122 
123  // set cell borders for HTML table cells
124  pGlobTable->ApplyCellBorders( mpDoc, maRange.aStart );
125 
126  // correct cell borders for merged cells
127  for ( size_t i = 0, n = pParser->ListSize(); i < n; ++i )
128  {
129  const ScEEParseEntry* pEntry = pParser->ListEntry( i );
130  if( (pEntry->nColOverlap > 1) || (pEntry->nRowOverlap > 1) )
131  {
132  SCTAB nTab = maRange.aStart.Tab();
133  const ScMergeAttr* pItem = mpDoc->GetAttr( pEntry->nCol, pEntry->nRow, nTab, ATTR_MERGE );
134  if( pItem->IsMerged() )
135  {
136  SCCOL nColMerge = pItem->GetColMerge();
137  SCROW nRowMerge = pItem->GetRowMerge();
138 
139  const SvxBoxItem* pToItem = mpDoc->GetAttr( pEntry->nCol, pEntry->nRow, nTab, ATTR_BORDER );
140  SvxBoxItem aNewItem( *pToItem );
141  if( nColMerge > 1 )
142  {
143  const SvxBoxItem* pFromItem =
144  mpDoc->GetAttr( pEntry->nCol + nColMerge - 1, pEntry->nRow, nTab, ATTR_BORDER );
145  aNewItem.SetLine( pFromItem->GetLine( SvxBoxItemLine::RIGHT ), SvxBoxItemLine::RIGHT );
146  }
147  if( nRowMerge > 1 )
148  {
149  const SvxBoxItem* pFromItem =
150  mpDoc->GetAttr( pEntry->nCol, pEntry->nRow + nRowMerge - 1, nTab, ATTR_BORDER );
151  aNewItem.SetLine( pFromItem->GetLine( SvxBoxItemLine::BOTTOM ), SvxBoxItemLine::BOTTOM );
152  }
153  mpDoc->ApplyAttr( pEntry->nCol, pEntry->nRow, nTab, aNewItem );
154  }
155  }
156  }
157 
158  // create ranges for HTML tables
159  // 1 - entire document
160  ScRange aNewRange( maRange.aStart );
161  aNewRange.aEnd.IncCol( static_cast<SCCOL>(pGlobTable->GetDocSize( tdCol )) - 1 );
162  aNewRange.aEnd.IncRow( pGlobTable->GetDocSize( tdRow ) - 1 );
164 
165  // 2 - all tables
167 
168  // 3 - single tables
169  SCCOL nColDiff = maRange.aStart.Col();
170  SCROW nRowDiff = maRange.aStart.Row();
171  SCTAB nTabDiff = maRange.aStart.Tab();
172 
173  ScHTMLTable* pTable = nullptr;
175  ScRange aErrorRange( ScAddress::UNINITIALIZED );
176  while( (pTable = pGlobTable->FindNestedTable( ++nTableId )) != nullptr )
177  {
178  pTable->GetDocRange( aNewRange );
179  if (!aNewRange.Move( nColDiff, nRowDiff, nTabDiff, aErrorRange ))
180  {
181  assert(!"can't move");
182  }
183  // insert table number as name
184  InsertRangeName( mpDoc, ScfTools::GetNameFromHTMLIndex( nTableId ), aNewRange );
185  // insert table id as name
186  if (!pTable->GetTableName().isEmpty())
187  {
188  OUString aName( ScfTools::GetNameFromHTMLName( pTable->GetTableName() ) );
189  if (!mpDoc->GetRangeName()->findByUpperName(ScGlobal::getCharClassPtr()->uppercase(aName)))
190  InsertRangeName( mpDoc, aName, aNewRange );
191  }
192  }
193 }
194 
195 OUString ScFormatFilterPluginImpl::GetHTMLRangeNameList( ScDocument* pDoc, const OUString& rOrigName )
196 {
197  return ScHTMLImport::GetHTMLRangeNameList( pDoc, rOrigName );
198 }
199 
200 OUString ScHTMLImport::GetHTMLRangeNameList( const ScDocument* pDoc, const OUString& rOrigName )
201 {
202  OSL_ENSURE( pDoc, "ScHTMLImport::GetHTMLRangeNameList - missing document" );
203 
204  if (rOrigName.isEmpty())
205  return OUString();
206 
207  OUString aNewName;
208  ScRangeName* pRangeNames = pDoc->GetRangeName();
209  ScRangeList aRangeList;
210  sal_Int32 nStringIx = 0;
211  do
212  {
213  OUString aToken( rOrigName.getToken( 0, ';', nStringIx ) );
214  if( pRangeNames && ScfTools::IsHTMLTablesName( aToken ) )
215  { // build list with all HTML tables
216  sal_uLong nIndex = 1;
217  for(;;)
218  {
219  aToken = ScfTools::GetNameFromHTMLIndex( nIndex++ );
220  const ScRangeData* pRangeData = pRangeNames->findByUpperName(ScGlobal::getCharClassPtr()->uppercase(aToken));
221  if (!pRangeData)
222  break;
223  ScRange aRange;
224  if( pRangeData->IsReference( aRange ) && !aRangeList.In( aRange ) )
225  {
226  aNewName = ScGlobal::addToken(aNewName, aToken, ';');
227  aRangeList.push_back( aRange );
228  }
229  }
230  }
231  else
232  aNewName = ScGlobal::addToken(aNewName, aToken, ';');
233  }
234  while (nStringIx>0);
235  return aNewName;
236 }
237 
238 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
long GetLeft() const
long Width() const
formula::FormulaToken * AddDoubleReference(const ScComplexRefData &rRef)
Definition: token.cxx:2212
SCCOL GetColMerge() const
Definition: attrib.hxx:69
sal_Int32 nIndex
ScDocument * mpDoc
Definition: eeimport.hxx:40
ScAddress aStart
Definition: address.hxx:500
constexpr TypedWhichId< SvxSizeItem > ATTR_PAGE_SIZE(161)
sal_uInt16 GetLower() const
constexpr TypedWhichId< SvxBoxItem > ATTR_BORDER(150)
SCROW Row() const
Definition: address.hxx:262
long AdjustWidth(long n)
static bool IsHTMLTablesName(const OUString &rSource)
Returns true, if rSource is the built-in range name for all HTML tables.
Definition: ftools.cxx:323
long Height() const
sal_uIntPtr sal_uLong
SC_DLLPUBLIC ScRangeName * GetRangeName(SCTAB nTab) const
Definition: documen3.cxx:168
sal_Int64 n
SC_DLLPUBLIC bool Move(SCCOL aDeltaX, SCROW aDeltaY, SCTAB aDeltaZ, ScRange &rErrorRange, const ScDocument *pDocument=nullptr)
Definition: address.cxx:2367
ScAddress aEnd
Definition: address.hxx:501
SCCOLROW GetDocSize(ScHTMLOrient eOrient, SCCOLROW nCellPos) const
Returns the resulting document row/column count of the specified HTML row/column. ...
Definition: htmlpars.cxx:2172
std::unique_ptr< ScTabEditEngine > mpEngine
Definition: eeimport.hxx:42
SC_DLLPUBLIC void ApplyAttr(SCCOL nCol, SCROW nRow, SCTAB nTab, const SfxPoolItem &rAttr)
Definition: document.cxx:4759
long AdjustHeight(long n)
constexpr TypedWhichId< ScMergeAttr > ATTR_MERGE(144)
static OutputDevice * GetDefaultDevice()
sal_uInt16 ScHTMLTableId
Type for a unique identifier for each table.
Definition: htmlpars.hxx:226
virtual const ScHTMLTable * GetGlobalTable() const =0
Returns the "global table" which contains the entire HTML document.
static void InsertRangeName(ScDocument *pDoc, const OUString &rName, const ScRange &rRange)
Definition: htmlimp.cxx:101
static const OUString & GetHTMLTablesName()
Returns the built-in range name for all HTML tables.
Definition: ftools.cxx:287
const BorderLinePrimitive2D *pCandidateB assert(pCandidateA)
bool In(const ScRange &) const
Definition: rangelst.cxx:1089
PAPER_A4
void push_back(const ScRange &rRange)
Definition: rangelst.cxx:1144
SCTAB Tab() const
Definition: address.hxx:271
const editeng::SvxBorderLine * GetLine(SvxBoxItemLine nLine) const
SCROW nRowOverlap
Definition: eeparser.hxx:68
static OUString GetNameFromHTMLIndex(sal_uInt32 nIndex)
Returns the built-in range name for an HTML table, specified by table index.
Definition: ftools.cxx:306
SC_DLLPUBLIC const SfxPoolItem * GetAttr(SCCOL nCol, SCROW nRow, SCTAB nTab, sal_uInt16 nWhich) const
Definition: document.cxx:4716
long const nTopMargin
virtual OUString GetHTMLRangeNameList(ScDocument *pDoc, const OUString &rOrigName) override
Definition: htmlimp.cxx:195
SC_DLLPUBLIC ScRangeData * findByUpperName(const OUString &rName)
Definition: rangenam.cxx:682
int i
void IncCol(SCCOL nDelta=1)
Definition: address.hxx:304
void SetFlag3D(bool bVal)
Definition: refdata.hxx:90
sal_Int16 SCCOL
Definition: types.hxx:22
ScSingleRefData Ref1
Definition: refdata.hxx:125
std::unique_ptr< ScEEParser > mpParser
Definition: eeimport.hxx:44
ScSingleRefData Ref2
Definition: refdata.hxx:126
void IncRow(SCROW nDelta=1)
Definition: address.hxx:300
static const OUString & GetHTMLDocName()
Returns the built-in range name for an HTML document.
Definition: ftools.cxx:281
The HTML parser for data queries.
Definition: htmlpars.hxx:565
SC_DLLPUBLIC OUString GetPageStyle(SCTAB nTab) const
Definition: document.cxx:6188
const ScHTMLTableId SC_HTML_GLOBAL_TABLE
Identifier of the "global table" (the entire HTML document).
Definition: htmlpars.hxx:228
void GetDocRange(ScRange &rRange) const
Calculates the current Calc document area of this table.
Definition: htmlpars.cxx:2215
static Size GetPaperSize(Paper ePaper, MapUnit eUnit=MapUnit::MapTwip)
virtual std::unique_ptr< ScEEAbsImport > CreateHTMLImport(ScDocument *pDocP, const OUString &rBaseURL, const ScRange &rRange) override
Definition: htmlimp.cxx:57
constexpr TypedWhichId< SvxLRSpaceItem > ATTR_LRSPACE(157)
SCTAB Tab() const
Definition: refdata.cxx:254
SCCOL Col() const
Definition: address.hxx:267
SCCOL nColOverlap
Definition: eeparser.hxx:67
Point LogicToPixel(const Point &rLogicPt) const
bool IsMerged() const
Definition: attrib.hxx:72
sal_Int32 SCROW
Definition: types.hxx:18
static OUString GetNameFromHTMLName(const OUString &rTabName)
Returns the built-in range name for an HTML table, specified by table name.
Definition: ftools.cxx:313
long const nLeftMargin
const SfxPoolItem & Get(sal_uInt16 nWhich, bool bSrchInParent=true) const
static SC_DLLPUBLIC OUString addToken(const OUString &rTokenList, const OUString &rToken, sal_Unicode cSep, sal_Int32 nSepCount=1, bool bForceSep=false)
Adds the string rToken to rTokenList, using a list separator character.
Definition: global.cxx:664
static OUString GetHTMLRangeNameList(const ScDocument *pDoc, const OUString &rOrigName)
Definition: htmlimp.cxx:200
ScHTMLTable * FindNestedTable(ScHTMLTableId nTableId) const
Searches in all nested tables for the specified table.
Definition: htmlpars.cxx:1906
OUString aName
void InitRange(const ScRange &rRange)
Definition: refdata.hxx:130
virtual SC_DLLPUBLIC SfxItemSet & GetItemSet() override
Definition: stlsheet.cxx:126
Base class for HTML parser classes.
Definition: htmlpars.hxx:76
constexpr TypedWhichId< SvxULSpaceItem > ATTR_ULSPACE(158)
static SC_DLLPUBLIC const CharClass * getCharClassPtr()
Definition: global.cxx:1018
virtual void WriteToDocument(bool bSizeColsRows=false, double nOutputFactor=1.0, SvNumberFormatter *pFormatter=nullptr, bool bConvertDate=true) override
Definition: htmlimp.cxx:113
virtual void WriteToDocument(bool bSizeColsRows=false, double nOutputFactor=1.0, SvNumberFormatter *pFormatter=nullptr, bool bConvertDate=true) override
Definition: eeimpars.cxx:111
const OUString & GetTableName() const
Returns the name of the table, specified in the TABLE tag.
Definition: htmlpars.hxx:340
ScHTMLImport(ScDocument *pDoc, const OUString &rBaseURL, const ScRange &rRange, bool bCalcWidthHeight)
Definition: htmlimp.cxx:62
ScRange maRange
Definition: eeimport.hxx:39
long GetRight() const
SC_DLLPUBLIC bool IsReference(ScRange &rRef) const
Definition: rangenam.cxx:369
Complex reference (a range) into the sheet.
Definition: refdata.hxx:123
virtual ErrCode ScImportHTML(SvStream &, const OUString &rBaseURL, ScDocument *, ScRange &rRange, double nOutputFactor, bool bCalcWidthHeight, SvNumberFormatter *pFormatter, bool bConvertDate) override
Definition: htmlimp.cxx:45
Stores data for one table in an HTML document.
Definition: htmlpars.hxx:324
size_t ListSize() const
Definition: eeparser.hxx:126
SCROW GetRowMerge() const
Definition: attrib.hxx:70
long const nBottomMargin
long const nRightMargin
SC_DLLPUBLIC bool insert(ScRangeData *p, bool bReuseFreeIndex=true)
Insert object into set.
Definition: rangenam.cxx:810
virtual ErrCode Read(SvStream &rStream, const OUString &rBaseURL) override
Definition: eeimpars.cxx:74
sal_Int16 SCTAB
Definition: types.hxx:23
virtual ScRange GetRange() override
Definition: eeimport.hxx:56
sal_uInt16 GetUpper() const
void ApplyCellBorders(ScDocument *pDoc, const ScAddress &rFirstPos) const
Applies border formatting to the passed document.
Definition: htmlpars.cxx:2226
void SetLine(const editeng::SvxBorderLine *pNew, SvxBoxItemLine nLine)
ScEEParseEntry * ListEntry(size_t index)
Definition: eeparser.hxx:127