LibreOffice Module sc (master)  1
htmlpars.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <memory>
21 #include <sal/config.h>
22 
23 #include <comphelper/string.hxx>
24 
25 #include <scitems.hxx>
26 
27 #include <svtools/htmlcfg.hxx>
28 #include <editeng/colritem.hxx>
29 #include <editeng/brushitem.hxx>
30 #include <editeng/editeng.hxx>
31 #include <editeng/fhgtitem.hxx>
32 #include <editeng/fontitem.hxx>
33 #include <editeng/postitem.hxx>
34 #include <editeng/udlnitem.hxx>
35 #include <editeng/wghtitem.hxx>
36 #include <editeng/borderline.hxx>
37 #include <editeng/boxitem.hxx>
38 #include <editeng/justifyitem.hxx>
39 #include <sfx2/objsh.hxx>
40 #include <svl/intitem.hxx>
41 #include <vcl/graphicfilter.hxx>
42 #include <svtools/parhtml.hxx>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/htmltokn.h>
45 
46 #include <vcl/outdev.hxx>
47 #include <vcl/svapp.hxx>
48 #include <tools/urlobj.hxx>
49 #include <osl/diagnose.h>
50 
51 #include <rtl/tencinfo.h>
52 
53 #include <attrib.hxx>
54 #include <htmlpars.hxx>
55 #include <global.hxx>
56 #include <document.hxx>
57 #include <rangelst.hxx>
58 
59 #include <orcus/css_parser.hpp>
60 
61 #include <com/sun/star/document/XDocumentProperties.hpp>
62 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
63 #include <com/sun/star/frame/XModel.hpp>
64 #include <numeric>
65 #include <utility>
66 #include <officecfg/Office/Common.hxx>
67 
68 using ::editeng::SvxBorderLine;
69 using namespace ::com::sun::star;
70 
72 
73 void ScHTMLStyles::add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
74  const OUString& aProp, const OUString& aValue)
75 {
76  if (pElemName)
77  {
78  OUString aElem(pElemName, nElemName, RTL_TEXTENCODING_UTF8);
79  aElem = aElem.toAsciiLowerCase();
80  if (pClassName)
81  {
82  // Both element and class names given.
83  ElemsType::iterator itrElem = m_ElemProps.find(aElem);
84  if (itrElem == m_ElemProps.end())
85  {
86  // new element
87  std::pair<ElemsType::iterator, bool> r =
88  m_ElemProps.insert(std::make_pair(aElem, std::make_unique<NamePropsType>()));
89  if (!r.second)
90  // insertion failed.
91  return;
92  itrElem = r.first;
93  }
94 
95  NamePropsType *const pClsProps = itrElem->second.get();
96  OUString aClass(pClassName, nClassName, RTL_TEXTENCODING_UTF8);
97  aClass = aClass.toAsciiLowerCase();
98  insertProp(*pClsProps, aClass, aProp, aValue);
99  }
100  else
101  {
102  // Element name only. Add it to the element global.
103  insertProp(m_ElemGlobalProps, aElem, aProp, aValue);
104  }
105  }
106  else
107  {
108  if (pClassName)
109  {
110  // Class name only. Add it to the global.
111  OUString aClass(pClassName, nClassName, RTL_TEXTENCODING_UTF8);
112  aClass = aClass.toAsciiLowerCase();
113  insertProp(m_GlobalProps, aClass, aProp, aValue);
114  }
115  }
116 }
117 
119  const OUString& rElem, const OUString& rClass, const OUString& rPropName) const
120 {
121  // First, look into the element-class storage.
122  {
123  auto const itr = m_ElemProps.find(rElem);
124  if (itr != m_ElemProps.end())
125  {
126  const NamePropsType *const pClasses = itr->second.get();
127  NamePropsType::const_iterator itr2 = pClasses->find(rClass);
128  if (itr2 != pClasses->end())
129  {
130  const PropsType *const pProps = itr2->second.get();
131  PropsType::const_iterator itr3 = pProps->find(rPropName);
132  if (itr3 != pProps->end())
133  return itr3->second;
134  }
135  }
136  }
137  // Next, look into the class global storage.
138  {
139  auto const itr = m_GlobalProps.find(rClass);
140  if (itr != m_GlobalProps.end())
141  {
142  const PropsType *const pProps = itr->second.get();
143  PropsType::const_iterator itr2 = pProps->find(rPropName);
144  if (itr2 != pProps->end())
145  return itr2->second;
146  }
147  }
148  // As the last resort, look into the element global storage.
149  {
150  auto const itr = m_ElemGlobalProps.find(rClass);
151  if (itr != m_ElemGlobalProps.end())
152  {
153  const PropsType *const pProps = itr->second.get();
154  PropsType::const_iterator itr2 = pProps->find(rPropName);
155  if (itr2 != pProps->end())
156  return itr2->second;
157  }
158  }
159 
160  return maEmpty; // nothing found.
161 }
162 
164  NamePropsType& rStore, const OUString& aName,
165  const OUString& aProp, const OUString& aValue)
166 {
167  NamePropsType::iterator itr = rStore.find(aName);
168  if (itr == rStore.end())
169  {
170  // new element
171  std::pair<NamePropsType::iterator, bool> r =
172  rStore.insert(std::make_pair(aName, std::make_unique<PropsType>()));
173  if (!r.second)
174  // insertion failed.
175  return;
176 
177  itr = r.first;
178  }
179 
180  PropsType *const pProps = itr->second.get();
181  pProps->emplace(aProp, aValue);
182 }
183 
184 // BASE class for HTML parser classes
185 
187  ScEEParser( pEditEngine ),
188  mpDoc( pDoc )
189 {
190  maFontHeights[0] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_1::get();
191  maFontHeights[1] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_2::get();
192  maFontHeights[2] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_3::get();
193  maFontHeights[3] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_4::get();
194  maFontHeights[4] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_5::get();
195  maFontHeights[5] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_6::get();
196  maFontHeights[6] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_7::get();
197 }
198 
200 {
201 }
202 
204  EditEngine* pEditP, const OUString& rBaseURL, const Size& aPageSizeP,
205  ScDocument* pDocP ) :
206  ScHTMLParser( pEditP, pDocP ),
207  aPageSize( aPageSizeP ),
208  aBaseURL( rBaseURL ),
209  xLockedList( new ScRangeList ),
210  pLocalColOffset( new ScHTMLColOffset ),
211  nFirstTableCell(0),
212  nTableLevel(0),
213  nTable(0),
214  nMaxTable(0),
215  nColCntStart(0),
216  nMaxCol(0),
217  nTableWidth(0),
218  nColOffset(0),
219  nColOffsetStart(0),
220  nOffsetTolerance( SC_HTML_OFFSET_TOLERANCE_SMALL ),
221  bFirstRow( true ),
222  bTabInTabCell( false ),
223  bInCell( false ),
224  bInTitle( false )
225 {
226  MakeColNoRef( pLocalColOffset, 0, 0, 0, 0 );
227  MakeColNoRef( &maColOffset, 0, 0, 0, 0 );
228 }
229 
231 {
232  while ( !aTableStack.empty() )
233  {
234  ScHTMLTableStackEntry * pS = aTableStack.top().get();
235  if ( pS->pLocalColOffset != pLocalColOffset )
236  delete pS->pLocalColOffset;
237  aTableStack.pop();
238  }
239  delete pLocalColOffset;
240  if ( pTables )
241  {
242  for( const auto& rEntry : *pTables)
243  delete rEntry.second;
244  pTables.reset();
245  }
246 }
247 
248 ErrCode ScHTMLLayoutParser::Read( SvStream& rStream, const OUString& rBaseURL )
249 {
251  pEdit->SetHtmlImportHdl( LINK( this, ScHTMLLayoutParser, HTMLImportHdl ) );
252 
254  bool bLoading = pObjSh && pObjSh->IsLoading();
255 
256  SvKeyValueIteratorRef xValues;
257  SvKeyValueIterator* pAttributes = nullptr;
258  if ( bLoading )
259  pAttributes = pObjSh->GetHeaderAttributes();
260  else
261  {
262  // When not loading, set up fake http headers to force the SfxHTMLParser to use UTF8
263  // (used when pasting from clipboard)
264  const char* pCharSet = rtl_getBestMimeCharsetFromTextEncoding( RTL_TEXTENCODING_UTF8 );
265  if( pCharSet )
266  {
267  OUString aContentType = "text/html; charset=" +
268  OUString::createFromAscii( pCharSet );
269 
270  xValues = new SvKeyValueIterator;
271  xValues->Append( SvKeyValue( OOO_STRING_SVTOOLS_HTML_META_content_type, aContentType ) );
272  pAttributes = xValues.get();
273  }
274  }
275 
276  ErrCode nErr = pEdit->Read( rStream, rBaseURL, EETextFormat::Html, pAttributes );
277 
278  pEdit->SetHtmlImportHdl( aOldLink );
279  // Create column width
280  Adjust();
282  sal_uInt16 nCount = maColOffset.size();
283  sal_uLong nOff = maColOffset[0];
284  Size aSize;
285  for ( sal_uInt16 j = 1; j < nCount; j++ )
286  {
287  aSize.setWidth( maColOffset[j] - nOff );
288  aSize = pDefaultDev->PixelToLogic( aSize, MapMode( MapUnit::MapTwip ) );
289  maColWidths[ j-1 ] = aSize.Width();
290  nOff = maColOffset[j];
291  }
292  return nErr;
293 }
294 
296 {
297  return nullptr;
298 }
299 
301 {
303  if ( pE )
304  {
305  if ( !pE->aSel.HasRange() )
306  { // Completely empty, following text ends up in the same paragraph!
307  mxActEntry->aSel.nStartPara = pE->aSel.nEndPara;
308  mxActEntry->aSel.nStartPos = pE->aSel.nEndPos;
309  }
310  }
311  mxActEntry->aSel.nEndPara = mxActEntry->aSel.nStartPara;
312  mxActEntry->aSel.nEndPos = mxActEntry->aSel.nStartPos;
313 }
314 
316 {
317  if ( rSel.nEndPara >= pE->aSel.nStartPara )
318  {
319  pE->aSel.nEndPara = rSel.nEndPara;
320  pE->aSel.nEndPos = rSel.nEndPos;
321  }
322  else if ( rSel.nStartPara == pE->aSel.nStartPara - 1 && !pE->aSel.HasRange() )
323  { // Did not attach a paragraph, but empty, do nothing
324  }
325  else
326  {
327  OSL_FAIL( "EntryEnd: EditEngine ESelection End < Start" );
328  }
329 }
330 
332 {
333  if ( bInCell )
334  CloseEntry( pInfo );
335  if ( nRowMax < ++nRowCnt )
336  nRowMax = nRowCnt;
339  bFirstRow = false;
340 }
341 
342 bool ScHTMLLayoutParser::SeekOffset( const ScHTMLColOffset* pOffset, sal_uInt16 nOffset,
343  SCCOL* pCol, sal_uInt16 nOffsetTol )
344 {
345  OSL_ENSURE( pOffset, "ScHTMLLayoutParser::SeekOffset - illegal call" );
346  ScHTMLColOffset::const_iterator it = pOffset->find( nOffset );
347  bool bFound = it != pOffset->end();
348  sal_uInt16 nPos = it - pOffset->begin();
349  *pCol = static_cast<SCCOL>(nPos);
350  if ( bFound )
351  return true;
352  sal_uInt16 nCount = pOffset->size();
353  if ( !nCount )
354  return false;
355  // nPos is the position of insertion, that's where the next higher one is (or isn't)
356  if ( nPos < nCount && (((*pOffset)[nPos] - nOffsetTol) <= nOffset) )
357  return true;
358  // Not smaller than everything else? Then compare with the next lower one
359  else if ( nPos && (((*pOffset)[nPos-1] + nOffsetTol) >= nOffset) )
360  {
361  (*pCol)--;
362  return true;
363  }
364  return false;
365 }
366 
367 void ScHTMLLayoutParser::MakeCol( ScHTMLColOffset* pOffset, sal_uInt16& nOffset,
368  sal_uInt16& nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol )
369 {
370  OSL_ENSURE( pOffset, "ScHTMLLayoutParser::MakeCol - illegal call" );
371  SCCOL nPos;
372  if ( SeekOffset( pOffset, nOffset, &nPos, nOffsetTol ) )
373  nOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
374  else
375  pOffset->insert( nOffset );
376  if ( nWidth )
377  {
378  if ( SeekOffset( pOffset, nOffset + nWidth, &nPos, nWidthTol ) )
379  nWidth = static_cast<sal_uInt16>((*pOffset)[nPos]) - nOffset;
380  else
381  pOffset->insert( nOffset + nWidth );
382  }
383 }
384 
385 void ScHTMLLayoutParser::MakeColNoRef( ScHTMLColOffset* pOffset, sal_uInt16 nOffset,
386  sal_uInt16 nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol )
387 {
388  OSL_ENSURE( pOffset, "ScHTMLLayoutParser::MakeColNoRef - illegal call" );
389  SCCOL nPos;
390  if ( SeekOffset( pOffset, nOffset, &nPos, nOffsetTol ) )
391  nOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
392  else
393  pOffset->insert( nOffset );
394  if ( nWidth )
395  {
396  if ( !SeekOffset( pOffset, nOffset + nWidth, &nPos, nWidthTol ) )
397  pOffset->insert( nOffset + nWidth );
398  }
399 }
400 
401 void ScHTMLLayoutParser::ModifyOffset( ScHTMLColOffset* pOffset, sal_uInt16& nOldOffset,
402  sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol )
403 {
404  OSL_ENSURE( pOffset, "ScHTMLLayoutParser::ModifyOffset - illegal call" );
405  SCCOL nPos;
406  if ( !SeekOffset( pOffset, nOldOffset, &nPos, nOffsetTol ) )
407  {
408  if ( SeekOffset( pOffset, nNewOffset, &nPos, nOffsetTol ) )
409  nNewOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
410  else
411  pOffset->insert( nNewOffset );
412  return ;
413  }
414  nOldOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
415  SCCOL nPos2;
416  if ( SeekOffset( pOffset, nNewOffset, &nPos2, nOffsetTol ) )
417  {
418  nNewOffset = static_cast<sal_uInt16>((*pOffset)[nPos2]);
419  return ;
420  }
421  tools::Long nDiff = nNewOffset - nOldOffset;
422  if ( nDiff < 0 )
423  {
424  do
425  {
426  const_cast<sal_uLong&>((*pOffset)[nPos]) += nDiff;
427  } while ( nPos-- );
428  }
429  else
430  {
431  do
432  {
433  const_cast<sal_uLong&>((*pOffset)[nPos]) += nDiff;
434  } while ( ++nPos < static_cast<sal_uInt16>(pOffset->size()) );
435  }
436 }
437 
439 {
440  if ( !mpDoc->ValidCol(pE->nCol) )
441  return;
442 
443 // Or else this would create a wrong value at ScAddress (chance for an infinite loop)!
444  bool bBadCol = false;
445  bool bAgain;
446  ScRange aRange( pE->nCol, pE->nRow, 0,
447  pE->nCol + pE->nColOverlap - 1, pE->nRow + pE->nRowOverlap - 1, 0 );
448  do
449  {
450  bAgain = false;
451  for ( size_t i = 0, nRanges = xLockedList->size(); i < nRanges; ++i )
452  {
453  ScRange & rR = (*xLockedList)[i];
454  if ( rR.Intersects( aRange ) )
455  {
456  pE->nCol = rR.aEnd.Col() + 1;
457  SCCOL nTmp = pE->nCol + pE->nColOverlap - 1;
458  if ( pE->nCol > mpDoc->MaxCol() || nTmp > mpDoc->MaxCol() )
459  bBadCol = true;
460  else
461  {
462  bAgain = true;
463  aRange.aStart.SetCol( pE->nCol );
464  aRange.aEnd.SetCol( nTmp );
465  }
466  break;
467  }
468  }
469  } while ( bAgain );
470  if ( bJoin && !bBadCol )
471  xLockedList->Join( aRange );
472 }
473 
475 {
477 
478  std::stack< std::unique_ptr<ScHTMLAdjustStackEntry> > aStack;
479  sal_uInt16 nTab = 0;
480  SCCOL nLastCol = SCCOL_MAX;
481  SCROW nNextRow = 0;
482  SCROW nCurRow = 0;
483  sal_uInt16 nPageWidth = static_cast<sal_uInt16>(aPageSize.Width());
484  InnerMap* pTab = nullptr;
485  for (auto& pE : maList)
486  {
487  if ( pE->nTab < nTab )
488  { // Table finished
489  if ( !aStack.empty() )
490  {
491  std::unique_ptr<ScHTMLAdjustStackEntry> pS = std::move(aStack.top());
492  aStack.pop();
493 
494  nLastCol = pS->nLastCol;
495  nNextRow = pS->nNextRow;
496  nCurRow = pS->nCurRow;
497  }
498  nTab = pE->nTab;
499  if (pTables)
500  {
501  OuterMap::const_iterator it = pTables->find( nTab );
502  if ( it != pTables->end() )
503  pTab = it->second;
504  }
505 
506  }
507  SCROW nRow = pE->nRow;
508  if ( pE->nCol <= nLastCol )
509  { // Next row
510  if ( pE->nRow < nNextRow )
511  pE->nRow = nCurRow = nNextRow;
512  else
513  nCurRow = nNextRow = pE->nRow;
514  SCROW nR = 0;
515  if ( pTab )
516  {
517  InnerMap::const_iterator it = pTab->find( nCurRow );
518  if ( it != pTab->end() )
519  nR = it->second;
520  }
521  if ( nR )
522  nNextRow += nR;
523  else
524  nNextRow++;
525  }
526  else
527  pE->nRow = nCurRow;
528  nLastCol = pE->nCol; // Read column
529  if ( pE->nTab > nTab )
530  { // New table
531  aStack.push( std::make_unique<ScHTMLAdjustStackEntry>(
532  nLastCol, nNextRow, nCurRow ) );
533  nTab = pE->nTab;
534  if ( pTables )
535  {
536  OuterMap::const_iterator it = pTables->find( nTab );
537  if ( it != pTables->end() )
538  pTab = it->second;
539  }
540  // New line spacing
541  SCROW nR = 0;
542  if ( pTab )
543  {
544  InnerMap::const_iterator it = pTab->find( nCurRow );
545  if ( it != pTab->end() )
546  nR = it->second;
547  }
548  if ( nR )
549  nNextRow = nCurRow + nR;
550  else
551  nNextRow = nCurRow + 1;
552  }
553  if ( nTab == 0 )
554  pE->nWidth = nPageWidth;
555  else
556  { // Real table, no paragraphs on the field
557  if ( pTab )
558  {
559  SCROW nRowSpan = pE->nRowOverlap;
560  for ( SCROW j=0; j < nRowSpan; j++ )
561  { // RowSpan resulting from merged rows
562  SCROW nRows = 0;
563  InnerMap::const_iterator it = pTab->find( nRow+j );
564  if ( it != pTab->end() )
565  nRows = it->second;
566  if ( nRows > 1 )
567  {
568  pE->nRowOverlap += nRows - 1;
569  if ( j == 0 )
570  { // Merged rows move the next row
571  SCROW nTmp = nCurRow + nRows;
572  if ( nNextRow < nTmp )
573  nNextRow = nTmp;
574  }
575  }
576  }
577  }
578  }
579  // Real column
580  (void)SeekOffset( &maColOffset, pE->nOffset, &pE->nCol, nOffsetTolerance );
581  SCCOL nColBeforeSkip = pE->nCol;
582  SkipLocked(pE.get(), false);
583  if ( pE->nCol != nColBeforeSkip )
584  {
585  SCCOL nCount = static_cast<SCCOL>(maColOffset.size());
586  if ( nCount <= pE->nCol )
587  {
588  pE->nOffset = static_cast<sal_uInt16>(maColOffset[nCount-1]);
589  MakeCol( &maColOffset, pE->nOffset, pE->nWidth, nOffsetTolerance, nOffsetTolerance );
590  }
591  else
592  {
593  pE->nOffset = static_cast<sal_uInt16>(maColOffset[pE->nCol]);
594  }
595  }
596  SCCOL nPos;
597  if ( pE->nWidth && SeekOffset( &maColOffset, pE->nOffset + pE->nWidth, &nPos, nOffsetTolerance ) )
598  pE->nColOverlap = (nPos > pE->nCol ? nPos - pE->nCol : 1);
599  else
600  {
601  //FIXME: This may not be correct, but works anyway ...
602  pE->nColOverlap = 1;
603  }
604  xLockedList->Join( ScRange( pE->nCol, pE->nRow, 0,
605  pE->nCol + pE->nColOverlap - 1, pE->nRow + pE->nRowOverlap - 1, 0 ) );
606  // Take over MaxDimensions
607  SCCOL nColTmp = pE->nCol + pE->nColOverlap;
608  if ( nColMax < nColTmp )
609  nColMax = nColTmp;
610  SCROW nRowTmp = pE->nRow + pE->nRowOverlap;
611  if ( nRowMax < nRowTmp )
612  nRowMax = nRowTmp;
613  }
614 }
615 
617 {
618  if ( pE->nWidth )
619  return pE->nWidth;
620  sal_Int32 nTmp = std::min( static_cast<sal_Int32>( pE->nCol -
621  nColCntStart + pE->nColOverlap),
622  static_cast<sal_Int32>( pLocalColOffset->size() - 1));
623  SCCOL nPos = (nTmp < 0 ? 0 : static_cast<SCCOL>(nTmp));
624  sal_uInt16 nOff2 = static_cast<sal_uInt16>((*pLocalColOffset)[nPos]);
625  if ( pE->nOffset < nOff2 )
626  return nOff2 - pE->nOffset;
627  return 0;
628 }
629 
631 {
632  SCCOL nCol;
633  if ( !nTableWidth )
634  nTableWidth = static_cast<sal_uInt16>(aPageSize.Width());
635  SCCOL nColsPerRow = nMaxCol - nColCntStart;
636  if ( nColsPerRow <= 0 )
637  nColsPerRow = 1;
638  if ( pLocalColOffset->size() <= 2 )
639  { // Only PageSize, there was no width setting
640  sal_uInt16 nWidth = nTableWidth / static_cast<sal_uInt16>(nColsPerRow);
641  sal_uInt16 nOff = nColOffsetStart;
643  for ( nCol = 0; nCol <= nColsPerRow; ++nCol, nOff = nOff + nWidth )
644  {
645  MakeColNoRef( pLocalColOffset, nOff, 0, 0, 0 );
646  }
647  nTableWidth = static_cast<sal_uInt16>(pLocalColOffset->back() - pLocalColOffset->front());
648  for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
649  {
650  auto& pE = maList[ i ];
651  if ( pE->nTab == nTable )
652  {
653  pE->nOffset = static_cast<sal_uInt16>((*pLocalColOffset)[pE->nCol - nColCntStart]);
654  pE->nWidth = 0; // to be recalculated later
655  }
656  }
657  }
658  else
659  { // Some without width
660  // Why actually no pE?
661  if ( nFirstTableCell < maList.size() )
662  {
663  std::unique_ptr<sal_uInt16[]> pOffsets(new sal_uInt16[ nColsPerRow+1 ]);
664  memset( pOffsets.get(), 0, (nColsPerRow+1) * sizeof(sal_uInt16) );
665  std::unique_ptr<sal_uInt16[]> pWidths(new sal_uInt16[ nColsPerRow ]);
666  memset( pWidths.get(), 0, nColsPerRow * sizeof(sal_uInt16) );
667  pOffsets[0] = nColOffsetStart;
668  for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
669  {
670  auto& pE = maList[ i ];
671  if ( pE->nTab == nTable && pE->nWidth )
672  {
673  nCol = pE->nCol - nColCntStart;
674  if ( nCol < nColsPerRow )
675  {
676  if ( pE->nColOverlap == 1 )
677  {
678  if ( pWidths[nCol] < pE->nWidth )
679  pWidths[nCol] = pE->nWidth;
680  }
681  else
682  { // try to find a single undefined width
683  sal_uInt16 nTotal = 0;
684  bool bFound = false;
685  SCCOL nHere = 0;
686  SCCOL nStop = std::min( static_cast<SCCOL>(nCol + pE->nColOverlap), nColsPerRow );
687  for ( ; nCol < nStop; nCol++ )
688  {
689  if ( pWidths[nCol] )
690  nTotal = nTotal + pWidths[nCol];
691  else
692  {
693  if ( bFound )
694  {
695  bFound = false;
696  break; // for
697  }
698  bFound = true;
699  nHere = nCol;
700  }
701  }
702  if ( bFound && pE->nWidth > nTotal )
703  pWidths[nHere] = pE->nWidth - nTotal;
704  }
705  }
706  }
707  }
708  sal_uInt16 nWidths = 0;
709  sal_uInt16 nUnknown = 0;
710  for ( nCol = 0; nCol < nColsPerRow; nCol++ )
711  {
712  if ( pWidths[nCol] )
713  nWidths = nWidths + pWidths[nCol];
714  else
715  nUnknown++;
716  }
717  if ( nUnknown )
718  {
719  sal_uInt16 nW = ((nWidths < nTableWidth) ?
720  ((nTableWidth - nWidths) / nUnknown) :
721  (nTableWidth / nUnknown));
722  for ( nCol = 0; nCol < nColsPerRow; nCol++ )
723  {
724  if ( !pWidths[nCol] )
725  pWidths[nCol] = nW;
726  }
727  }
728  for ( nCol = 1; nCol <= nColsPerRow; nCol++ )
729  {
730  pOffsets[nCol] = pOffsets[nCol-1] + pWidths[nCol-1];
731  }
733  for ( nCol = 0; nCol <= nColsPerRow; nCol++ )
734  {
735  MakeColNoRef( pLocalColOffset, pOffsets[nCol], 0, 0, 0 );
736  }
737  nTableWidth = pOffsets[nColsPerRow] - pOffsets[0];
738 
739  for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
740  {
741  auto& pE = maList[ i ];
742  if ( pE->nTab == nTable )
743  {
744  nCol = pE->nCol - nColCntStart;
745  OSL_ENSURE( nCol < nColsPerRow, "ScHTMLLayoutParser::SetWidths: column overflow" );
746  if ( nCol < nColsPerRow )
747  {
748  pE->nOffset = pOffsets[nCol];
749  nCol = nCol + pE->nColOverlap;
750  if ( nCol > nColsPerRow )
751  nCol = nColsPerRow;
752  pE->nWidth = pOffsets[nCol] - pE->nOffset;
753  }
754  }
755  }
756  }
757  }
758  if ( !pLocalColOffset->empty() )
759  {
760  sal_uInt16 nMax = static_cast<sal_uInt16>(pLocalColOffset->back());
761  if ( aPageSize.Width() < nMax )
762  aPageSize.setWidth( nMax );
763  }
764  for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
765  {
766  auto& pE = maList[ i ];
767  if ( pE->nTab == nTable )
768  {
769  if ( !pE->nWidth )
770  {
771  pE->nWidth = GetWidth(pE.get());
772  OSL_ENSURE( pE->nWidth, "SetWidths: pE->nWidth == 0" );
773  }
774  MakeCol( &maColOffset, pE->nOffset, pE->nWidth, nOffsetTolerance, nOffsetTolerance );
775  }
776  }
777 }
778 
780 {
781  if ( pE->nCol == SCCOL_MAX )
782  pE->nCol = nColCnt;
783  if ( pE->nRow == SCROW_MAX )
784  pE->nRow = nRowCnt;
785  SCCOL nCol = pE->nCol;
786  SkipLocked( pE ); // Change of columns to the right
787 
788  if ( nCol < pE->nCol )
789  { // Replaced
790  nCol = pE->nCol - nColCntStart;
791  SCCOL nCount = static_cast<SCCOL>(pLocalColOffset->size());
792  if ( nCol < nCount )
793  nColOffset = static_cast<sal_uInt16>((*pLocalColOffset)[nCol]);
794  else
795  nColOffset = static_cast<sal_uInt16>((*pLocalColOffset)[nCount - 1]);
796  }
797  pE->nOffset = nColOffset;
798  sal_uInt16 nWidth = GetWidth( pE );
800  if ( pE->nWidth )
801  pE->nWidth = nWidth;
802  nColOffset = pE->nOffset + nWidth;
805 }
806 
808 {
809  bInCell = false;
810  if ( bTabInTabCell )
811  { // From the stack in TableOff
812  bTabInTabCell = false;
813  NewActEntry(maList.back().get()); // New free flying mxActEntry
814  return ;
815  }
816  if (mxActEntry->nTab == 0)
817  mxActEntry->nWidth = static_cast<sal_uInt16>(aPageSize.Width());
818  Colonize(mxActEntry.get());
819  nColCnt = mxActEntry->nCol + mxActEntry->nColOverlap;
820  if ( nMaxCol < nColCnt )
821  nMaxCol = nColCnt; // TableStack MaxCol
822  if ( nColMax < nColCnt )
823  nColMax = nColCnt; // Global MaxCol for ScEEParser GetDimensions!
824  EntryEnd(mxActEntry.get(), pInfo->aSelection);
825  ESelection& rSel = mxActEntry->aSel;
826  while ( rSel.nStartPara < rSel.nEndPara
827  && pEdit->GetTextLen( rSel.nStartPara ) == 0 )
828  { // Strip preceding empty paragraphs
829  rSel.nStartPara++;
830  }
831  while ( rSel.nEndPos == 0 && rSel.nEndPara > rSel.nStartPara )
832  { // Strip successive empty paragraphs
833  rSel.nEndPara--;
834  rSel.nEndPos = pEdit->GetTextLen( rSel.nEndPara );
835  }
836  if ( rSel.nStartPara > rSel.nEndPara )
837  { // Gives GPF in CreateTextObject
838  OSL_FAIL( "CloseEntry: EditEngine ESelection Start > End" );
839  rSel.nEndPara = rSel.nStartPara;
840  }
841  if ( rSel.HasRange() )
842  mxActEntry->aItemSet.Put( ScLineBreakCell(true) );
843  maList.push_back(mxActEntry);
844  NewActEntry(mxActEntry.get()); // New free flying mxActEntry
845 }
846 
847 IMPL_LINK( ScHTMLLayoutParser, HTMLImportHdl, HtmlImportInfo&, rInfo, void )
848 {
849  switch ( rInfo.eState )
850  {
851  case HtmlImportState::NextToken:
852  ProcToken( &rInfo );
853  break;
854  case HtmlImportState::Start:
855  break;
856  case HtmlImportState::End:
857  if ( rInfo.aSelection.nEndPos )
858  {
859  // If text remains: create paragraph, without calling CloseEntry().
860  if( bInCell ) // ...but only in opened table cells.
861  {
862  bInCell = false;
863  NextRow( &rInfo );
864  bInCell = true;
865  }
866  CloseEntry( &rInfo );
867  }
868  while ( nTableLevel > 0 )
869  TableOff( &rInfo ); // close tables, if </TABLE> missing
870  break;
871  case HtmlImportState::SetAttr:
872  break;
873  case HtmlImportState::InsertText:
874  break;
875  case HtmlImportState::InsertPara:
876  if ( nTableLevel < 1 )
877  {
878  CloseEntry( &rInfo );
879  NextRow( &rInfo );
880  }
881  break;
882  case HtmlImportState::InsertField:
883  break;
884  default:
885  OSL_FAIL("HTMLImportHdl: unknown ImportInfo.eState");
886  }
887 }
888 
890 {
891  if ( bInCell )
892  CloseEntry( pInfo );
893  if ( !nTableLevel )
894  {
895  OSL_FAIL( "dumbo doc! <TH> or <TD> without previous <TABLE>" );
896  TableOn( pInfo );
897  }
898  bInCell = true;
899  bool bHorJustifyCenterTH = (pInfo->nToken == HtmlTokenId::TABLEHEADER_ON);
900  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
901  for (const auto & rOption : rOptions)
902  {
903  switch( rOption.GetToken() )
904  {
905  case HtmlOptionId::COLSPAN:
906  {
907  mxActEntry->nColOverlap = static_cast<SCCOL>(rOption.GetString().toInt32());
908  }
909  break;
910  case HtmlOptionId::ROWSPAN:
911  {
912  mxActEntry->nRowOverlap = static_cast<SCROW>(rOption.GetString().toInt32());
913  }
914  break;
915  case HtmlOptionId::ALIGN:
916  {
917  bHorJustifyCenterTH = false;
918  SvxCellHorJustify eVal;
919  const OUString& rOptVal = rOption.GetString();
920  if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
921  eVal = SvxCellHorJustify::Right;
922  else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_center ) )
923  eVal = SvxCellHorJustify::Center;
924  else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
925  eVal = SvxCellHorJustify::Left;
926  else
927  eVal = SvxCellHorJustify::Standard;
928  if ( eVal != SvxCellHorJustify::Standard )
929  mxActEntry->aItemSet.Put(SvxHorJustifyItem(eVal, ATTR_HOR_JUSTIFY));
930  }
931  break;
932  case HtmlOptionId::VALIGN:
933  {
934  SvxCellVerJustify eVal;
935  const OUString& rOptVal = rOption.GetString();
936  if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_top ) )
937  eVal = SvxCellVerJustify::Top;
938  else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_middle ) )
939  eVal = SvxCellVerJustify::Center;
940  else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_bottom ) )
941  eVal = SvxCellVerJustify::Bottom;
942  else
943  eVal = SvxCellVerJustify::Standard;
944  mxActEntry->aItemSet.Put(SvxVerJustifyItem(eVal, ATTR_VER_JUSTIFY));
945  }
946  break;
947  case HtmlOptionId::WIDTH:
948  {
949  mxActEntry->nWidth = GetWidthPixel(rOption);
950  }
951  break;
952  case HtmlOptionId::BGCOLOR:
953  {
954  Color aColor;
955  rOption.GetColor( aColor );
956  mxActEntry->aItemSet.Put(SvxBrushItem(aColor, ATTR_BACKGROUND));
957  }
958  break;
959  case HtmlOptionId::SDVAL:
960  {
961  mxActEntry->pValStr = rOption.GetString();
962  }
963  break;
964  case HtmlOptionId::SDNUM:
965  {
966  mxActEntry->pNumStr = rOption.GetString();
967  }
968  break;
969  default: break;
970  }
971  }
972 
973  mxActEntry->nCol = nColCnt;
974  mxActEntry->nRow = nRowCnt;
975  mxActEntry->nTab = nTable;
976 
977  if ( bHorJustifyCenterTH )
978  mxActEntry->aItemSet.Put(
979  SvxHorJustifyItem( SvxCellHorJustify::Center, ATTR_HOR_JUSTIFY) );
980 }
981 
983 {
984  if ( nColCnt > nColCntStart )
985  NextRow( pInfo ); // The optional TableRowOff wasn't there
987 }
988 
990 {
991  NextRow( pInfo );
992 }
993 
995 {
996  if ( bInCell )
997  CloseEntry( pInfo ); // Only if it really was one
998 }
999 
1001 {
1002  if ( ++nTableLevel > 1 )
1003  { // Table in Table
1004  sal_uInt16 nTmpColOffset = nColOffset; // Will be changed in Colonize()
1005  Colonize(mxActEntry.get());
1006  aTableStack.push( std::make_unique<ScHTMLTableStackEntry>(
1010  bFirstRow ) );
1011  sal_uInt16 nLastWidth = nTableWidth;
1012  nTableWidth = GetWidth(mxActEntry.get());
1013  if ( nTableWidth == nLastWidth && nMaxCol - nColCntStart > 1 )
1014  { // There must be more than one, so this one cannot be enough
1015  nTableWidth = nLastWidth / static_cast<sal_uInt16>((nMaxCol - nColCntStart));
1016  }
1017  nLastWidth = nTableWidth;
1018  if ( pInfo->nToken == HtmlTokenId::TABLE_ON )
1019  { // It can still be TD or TH, if we didn't have a TABLE earlier
1020  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1021  for (const auto & rOption : rOptions)
1022  {
1023  switch( rOption.GetToken() )
1024  {
1025  case HtmlOptionId::WIDTH:
1026  { // Percent: of document width or outer cell
1027  nTableWidth = GetWidthPixel( rOption );
1028  }
1029  break;
1030  case HtmlOptionId::BORDER:
1031  // Border is: ((pOption->GetString().Len() == 0) || (pOption->GetNumber() != 0));
1032  break;
1033  default: break;
1034  }
1035  }
1036  }
1037  bInCell = false;
1038  if ( bTabInTabCell && (nTableWidth >= nLastWidth) )
1039  { // Multiple tables in one cell, underneath each other
1040  bTabInTabCell = false;
1041  NextRow( pInfo );
1042  }
1043  else
1044  { // It start's in this cell or next to each other
1045  bTabInTabCell = false;
1047  nColOffset = nTmpColOffset;
1049  }
1050 
1051  NewActEntry(!maList.empty() ? maList.back().get() : nullptr); // New free flying mxActEntry
1052  xLockedList = new ScRangeList;
1053  }
1054  else
1055  { // Simple table at the document level
1056  EntryEnd(mxActEntry.get(), pInfo->aSelection);
1057  if (mxActEntry->aSel.HasRange())
1058  { // Flying text left
1059  CloseEntry( pInfo );
1060  NextRow( pInfo );
1061  }
1062  aTableStack.push( std::make_unique<ScHTMLTableStackEntry>(
1066  bFirstRow ) );
1067  // As soon as we have multiple tables we need to be tolerant with the offsets.
1068  if (nMaxTable > 0)
1070  nTableWidth = 0;
1071  if ( pInfo->nToken == HtmlTokenId::TABLE_ON )
1072  {
1073  // It can still be TD or TH, if we didn't have a TABLE earlier
1074  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1075  for (const auto & rOption : rOptions)
1076  {
1077  switch( rOption.GetToken() )
1078  {
1079  case HtmlOptionId::WIDTH:
1080  { // Percent: of document width or outer cell
1081  nTableWidth = GetWidthPixel( rOption );
1082  }
1083  break;
1084  case HtmlOptionId::BORDER:
1085  //BorderOn is: ((pOption->GetString().Len() == 0) || (pOption->GetNumber() != 0));
1086  break;
1087  default: break;
1088  }
1089  }
1090  }
1091  }
1092  nTable = ++nMaxTable;
1093  bFirstRow = true;
1094  nFirstTableCell = maList.size();
1095 
1098 }
1099 
1101 {
1102  if ( bInCell )
1103  CloseEntry( pInfo );
1104  if ( nColCnt > nColCntStart )
1105  TableRowOff( pInfo ); // The optional TableRowOff wasn't
1106  if ( !nTableLevel )
1107  {
1108  OSL_FAIL( "dumbo doc! </TABLE> without opening <TABLE>" );
1109  return ;
1110  }
1111  if ( --nTableLevel > 0 )
1112  { // Table in Table done
1113  if ( !aTableStack.empty() )
1114  {
1115  std::unique_ptr<ScHTMLTableStackEntry> pS = std::move(aTableStack.top());
1116  aTableStack.pop();
1117 
1118  auto& pE = pS->xCellEntry;
1119  SCROW nRows = nRowCnt - pS->nRowCnt;
1120  if ( nRows > 1 )
1121  { // Insert size of table at this position
1122  SCROW nRow = pS->nRowCnt;
1123  sal_uInt16 nTab = pS->nTable;
1124  if ( !pTables )
1125  pTables.reset( new OuterMap );
1126  // Height of outer table
1127  OuterMap::const_iterator it = pTables->find( nTab );
1128  InnerMap* pTab1;
1129  if ( it == pTables->end() )
1130  {
1131  pTab1 = new InnerMap;
1132  (*pTables)[ nTab ] = pTab1;
1133  }
1134  else
1135  pTab1 = it->second;
1136  SCROW nRowSpan = pE->nRowOverlap;
1137  SCROW nRowKGV;
1138  SCROW nRowsPerRow1; // Outer table
1139  SCROW nRowsPerRow2; // Inner table
1140  if ( nRowSpan > 1 )
1141  { // LCM to which we can map the inner and outer rows
1142  nRowKGV = std::lcm( nRowSpan, nRows );
1143  nRowsPerRow1 = nRowKGV / nRowSpan;
1144  nRowsPerRow2 = nRowKGV / nRows;
1145  }
1146  else
1147  {
1148  nRowKGV = nRowsPerRow1 = nRows;
1149  nRowsPerRow2 = 1;
1150  }
1151  InnerMap* pTab2 = nullptr;
1152  if ( nRowsPerRow2 > 1 )
1153  { // Height of the inner table
1154  pTab2 = new InnerMap;
1155  (*pTables)[ nTable ] = pTab2;
1156  }
1157  // Abuse void* Data entry of the Table class for height mapping
1158  if ( nRowKGV > 1 )
1159  {
1160  if ( nRowsPerRow1 > 1 )
1161  { // Outer
1162  for ( SCROW j=0; j < nRowSpan; j++ )
1163  {
1164  sal_uLong nRowKey = nRow + j;
1165  SCROW nR = (*pTab1)[ nRowKey ];
1166  if ( !nR )
1167  (*pTab1)[ nRowKey ] = nRowsPerRow1;
1168  else if ( nRowsPerRow1 > nR )
1169  (*pTab1)[ nRowKey ] = nRowsPerRow1;
1170  //TODO: How can we improve on this?
1171  else if ( nRowsPerRow1 < nR && nRowSpan == 1
1172  && nTable == nMaxTable )
1173  { // Still some space left, merge in a better way (if possible)
1174  SCROW nAdd = nRowsPerRow1 - (nR % nRowsPerRow1);
1175  nR += nAdd;
1176  if ( (nR % nRows) == 0 )
1177  { // Only if representable
1178  SCROW nR2 = (*pTab1)[ nRowKey+1 ];
1179  if ( nR2 > nAdd )
1180  { // Only if we really have enough space
1181  (*pTab1)[ nRowKey ] = nR;
1182  (*pTab1)[ nRowKey+1 ] = nR2 - nAdd;
1183  nRowsPerRow2 = nR / nRows;
1184  }
1185  }
1186  }
1187  }
1188  }
1189  if ( nRowsPerRow2 > 1 )
1190  { // Inner
1191  if ( !pTab2 )
1192  { // nRowsPerRow2 could be've been incremented
1193  pTab2 = new InnerMap;
1194  (*pTables)[ nTable ] = pTab2;
1195  }
1196  for ( SCROW j=0; j < nRows; j++ )
1197  {
1198  sal_uLong nRowKey = nRow + j;
1199  (*pTab2)[ nRowKey ] = nRowsPerRow2;
1200  }
1201  }
1202  }
1203  }
1204 
1205  SetWidths();
1206 
1207  if ( !pE->nWidth )
1208  pE->nWidth = nTableWidth;
1209  else if ( pE->nWidth < nTableWidth )
1210  {
1211  sal_uInt16 nOldOffset = pE->nOffset + pE->nWidth;
1212  sal_uInt16 nNewOffset = pE->nOffset + nTableWidth;
1213  ModifyOffset( pS->pLocalColOffset, nOldOffset, nNewOffset, nOffsetTolerance );
1214  sal_uInt16 nTmp = nNewOffset - pE->nOffset - pE->nWidth;
1215  pE->nWidth = nNewOffset - pE->nOffset;
1216  pS->nTableWidth = pS->nTableWidth + nTmp;
1217  if ( pS->nColOffset >= nOldOffset )
1218  pS->nColOffset = pS->nColOffset + nTmp;
1219  }
1220 
1221  nColCnt = pE->nCol + pE->nColOverlap;
1222  nRowCnt = pS->nRowCnt;
1223  nColCntStart = pS->nColCntStart;
1224  nMaxCol = pS->nMaxCol;
1225  nTable = pS->nTable;
1226  nTableWidth = pS->nTableWidth;
1227  nFirstTableCell = pS->nFirstTableCell;
1228  nColOffset = pS->nColOffset;
1229  nColOffsetStart = pS->nColOffsetStart;
1230  bFirstRow = pS->bFirstRow;
1231  xLockedList = pS->xLockedList;
1232  pLocalColOffset = pS->pLocalColOffset;
1233  // mxActEntry is kept around if a table is started in the same row
1234  // (anything's possible in HTML); will be deleted by CloseEntry
1235  mxActEntry = pE;
1236  }
1237  bTabInTabCell = true;
1238  bInCell = true;
1239  }
1240  else
1241  { // Simple table finished
1242  SetWidths();
1243  nMaxCol = 0;
1244  nTable = 0;
1245  if ( !aTableStack.empty() )
1246  {
1247  ScHTMLTableStackEntry* pS = aTableStack.top().get();
1248  delete pLocalColOffset;
1250  aTableStack.pop();
1251  }
1252  }
1253 }
1254 
1256 {
1257  mxActEntry->maImageList.push_back(std::make_unique<ScHTMLImage>());
1258  ScHTMLImage* pImage = mxActEntry->maImageList.back().get();
1259  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1260  for (const auto & rOption : rOptions)
1261  {
1262  switch( rOption.GetToken() )
1263  {
1264  case HtmlOptionId::SRC:
1265  {
1266  pImage->aURL = INetURLObject::GetAbsURL( aBaseURL, rOption.GetString() );
1267  }
1268  break;
1269  case HtmlOptionId::ALT:
1270  {
1271  if (!mxActEntry->bHasGraphic)
1272  { // ALT text only if not any image loaded
1273  if (!mxActEntry->aAltText.isEmpty())
1274  mxActEntry->aAltText += "; ";
1275 
1276  mxActEntry->aAltText += rOption.GetString();
1277  }
1278  }
1279  break;
1280  case HtmlOptionId::WIDTH:
1281  {
1282  pImage->aSize.setWidth( static_cast<tools::Long>(rOption.GetNumber()) );
1283  }
1284  break;
1285  case HtmlOptionId::HEIGHT:
1286  {
1287  pImage->aSize.setHeight( static_cast<tools::Long>(rOption.GetNumber()) );
1288  }
1289  break;
1290  case HtmlOptionId::HSPACE:
1291  {
1292  pImage->aSpace.setX( static_cast<tools::Long>(rOption.GetNumber()) );
1293  }
1294  break;
1295  case HtmlOptionId::VSPACE:
1296  {
1297  pImage->aSpace.setY( static_cast<tools::Long>(rOption.GetNumber()) );
1298  }
1299  break;
1300  default: break;
1301  }
1302  }
1303  if (pImage->aURL.isEmpty())
1304  {
1305  OSL_FAIL( "Image: graphic without URL ?!?" );
1306  return ;
1307  }
1308 
1309  sal_uInt16 nFormat;
1310  std::unique_ptr<Graphic> pGraphic(new Graphic);
1312  if ( ERRCODE_NONE != GraphicFilter::LoadGraphic( pImage->aURL, pImage->aFilterName,
1313  *pGraphic, &rFilter, &nFormat ) )
1314  {
1315  return ; // Bad luck
1316  }
1317  if (!mxActEntry->bHasGraphic)
1318  { // discard any ALT text in this cell if we have any image
1319  mxActEntry->bHasGraphic = true;
1320  mxActEntry->aAltText.clear();
1321  }
1322  pImage->aFilterName = rFilter.GetImportFormatName( nFormat );
1323  pImage->pGraphic = std::move( pGraphic );
1324  if ( !(pImage->aSize.Width() && pImage->aSize.Height()) )
1325  {
1327  pImage->aSize = pDefaultDev->LogicToPixel( pImage->pGraphic->GetPrefSize(),
1328  pImage->pGraphic->GetPrefMapMode() );
1329  }
1330  if (mxActEntry->maImageList.empty())
1331  return;
1332 
1333  tools::Long nWidth = 0;
1334  for (const std::unique_ptr<ScHTMLImage> & pI : mxActEntry->maImageList)
1335  {
1336  if ( pI->nDir & nHorizontal )
1337  nWidth += pI->aSize.Width() + 2 * pI->aSpace.X();
1338  else
1339  nWidth = 0;
1340  }
1341  if ( mxActEntry->nWidth
1342  && (nWidth + pImage->aSize.Width() + 2 * pImage->aSpace.X()
1343  >= mxActEntry->nWidth) )
1344  mxActEntry->maImageList.back()->nDir = nVertical;
1345 }
1346 
1348 {
1349  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1350  for (const auto & rOption : rOptions)
1351  {
1352  if( rOption.GetToken() == HtmlOptionId::WIDTH )
1353  {
1354  sal_uInt16 nVal = GetWidthPixel( rOption );
1355  MakeCol( pLocalColOffset, nColOffset, nVal, 0, 0 );
1356  nColOffset = nColOffset + nVal;
1357  }
1358  }
1359 }
1360 
1361 sal_uInt16 ScHTMLLayoutParser::GetWidthPixel( const HTMLOption& rOption )
1362 {
1363  const OUString& rOptVal = rOption.GetString();
1364  if ( rOptVal.indexOf('%') != -1 )
1365  { // Percent
1366  sal_uInt16 nW = (nTableWidth ? nTableWidth : static_cast<sal_uInt16>(aPageSize.Width()));
1367  return static_cast<sal_uInt16>((rOption.GetNumber() * nW) / 100);
1368  }
1369  else
1370  {
1371  if ( rOptVal.indexOf('*') != -1 )
1372  { // Relative to what?
1373  // TODO: Collect all relative values in ColArray and then MakeCol
1374  return 0;
1375  }
1376  else
1377  return static_cast<sal_uInt16>(rOption.GetNumber()); // Pixel
1378  }
1379 }
1380 
1382 {
1383  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1384  for (const auto & rOption : rOptions)
1385  {
1386  if( rOption.GetToken() == HtmlOptionId::NAME )
1387  mxActEntry->pName = rOption.GetString();
1388  }
1389 }
1390 
1392 {
1393  ESelection& rSel = mxActEntry->aSel;
1394  return rSel.nStartPara == rSel.nEndPara &&
1395  rSel.nStartPara <= pInfo->aSelection.nEndPara &&
1396  pEdit->GetTextLen( rSel.nStartPara ) == 0;
1397 }
1398 
1400 {
1401  if ( !IsAtBeginningOfText( pInfo ) )
1402  return;
1403 
1404 // Only at the start of the text; applies to whole line
1405  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1406  for (const auto & rOption : rOptions)
1407  {
1408  switch( rOption.GetToken() )
1409  {
1410  case HtmlOptionId::FACE :
1411  {
1412  const OUString& rFace = rOption.GetString();
1413  OUStringBuffer aFontName;
1414  sal_Int32 nPos = 0;
1415  while( nPos != -1 )
1416  {
1417  // Font list, VCL uses the semicolon as separator
1418  // HTML uses the comma
1419  OUString aFName = rFace.getToken( 0, ',', nPos );
1420  aFName = comphelper::string::strip(aFName, ' ');
1421  if( !aFontName.isEmpty() )
1422  aFontName.append(";");
1423  aFontName.append(aFName);
1424  }
1425  if ( !aFontName.isEmpty() )
1426  mxActEntry->aItemSet.Put( SvxFontItem( FAMILY_DONTKNOW,
1427  aFontName.makeStringAndClear(), EMPTY_OUSTRING, PITCH_DONTKNOW,
1428  RTL_TEXTENCODING_DONTKNOW, ATTR_FONT ) );
1429  }
1430  break;
1431  case HtmlOptionId::SIZE :
1432  {
1433  sal_uInt16 nSize = static_cast<sal_uInt16>(rOption.GetNumber());
1434  if ( nSize == 0 )
1435  nSize = 1;
1436  else if ( nSize > SC_HTML_FONTSIZES )
1437  nSize = SC_HTML_FONTSIZES;
1438  mxActEntry->aItemSet.Put( SvxFontHeightItem(
1439  maFontHeights[nSize-1], 100, ATTR_FONT_HEIGHT ) );
1440  }
1441  break;
1442  case HtmlOptionId::COLOR :
1443  {
1444  Color aColor;
1445  rOption.GetColor( aColor );
1446  mxActEntry->aItemSet.Put( SvxColorItem( aColor, ATTR_FONT_COLOR ) );
1447  }
1448  break;
1449  default: break;
1450  }
1451  }
1452 }
1453 
1455 {
1456  switch ( pInfo->nToken )
1457  {
1458  case HtmlTokenId::META:
1459  {
1460  HTMLParser* pParser = static_cast<HTMLParser*>(pInfo->pParser);
1461  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1462  mpDoc->GetDocumentShell()->GetModel(), uno::UNO_QUERY_THROW);
1463  pParser->ParseMetaOptions(
1464  xDPS->getDocumentProperties(),
1466  }
1467  break;
1468  case HtmlTokenId::TITLE_ON:
1469  {
1470  bInTitle = true;
1471  aString.clear();
1472  }
1473  break;
1474  case HtmlTokenId::TITLE_OFF:
1475  {
1476  if ( bInTitle && !aString.isEmpty() )
1477  {
1478  // Remove blanks from line brakes
1479  aString = aString.trim();
1480  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1482  uno::UNO_QUERY_THROW);
1483  xDPS->getDocumentProperties()->setTitle(aString);
1484  }
1485  bInTitle = false;
1486  }
1487  break;
1488  case HtmlTokenId::TABLE_ON:
1489  {
1490  TableOn( pInfo );
1491  }
1492  break;
1493  case HtmlTokenId::COL_ON:
1494  {
1495  ColOn( pInfo );
1496  }
1497  break;
1498  case HtmlTokenId::TABLEHEADER_ON: // Opens row
1499  {
1500  if ( bInCell )
1501  CloseEntry( pInfo );
1502  // Do not set bInCell to true, TableDataOn does that
1503  mxActEntry->aItemSet.Put(
1505  [[fallthrough]];
1506  }
1507  case HtmlTokenId::TABLEDATA_ON: // Opens cell
1508  {
1509  TableDataOn( pInfo );
1510  }
1511  break;
1512  case HtmlTokenId::TABLEHEADER_OFF:
1513  case HtmlTokenId::TABLEDATA_OFF: // Closes cell
1514  {
1515  TableDataOff( pInfo );
1516  }
1517  break;
1518  case HtmlTokenId::TABLEROW_ON: // Before first cell in row
1519  {
1520  TableRowOn( pInfo );
1521  }
1522  break;
1523  case HtmlTokenId::TABLEROW_OFF: // After last cell in row
1524  {
1525  TableRowOff( pInfo );
1526  }
1527  break;
1528  case HtmlTokenId::TABLE_OFF:
1529  {
1530  TableOff( pInfo );
1531  }
1532  break;
1533  case HtmlTokenId::IMAGE:
1534  {
1535  Image( pInfo );
1536  }
1537  break;
1538  case HtmlTokenId::PARABREAK_OFF:
1539  { // We continue vertically after an image
1540  if (!mxActEntry->maImageList.empty())
1541  mxActEntry->maImageList.back()->nDir = nVertical;
1542  }
1543  break;
1544  case HtmlTokenId::ANCHOR_ON:
1545  {
1546  AnchorOn( pInfo );
1547  }
1548  break;
1549  case HtmlTokenId::FONT_ON :
1550  {
1551  FontOn( pInfo );
1552  }
1553  break;
1554  case HtmlTokenId::BIGPRINT_ON :
1555  {
1556  // TODO: Remember current font size and increase by 1
1557  if ( IsAtBeginningOfText( pInfo ) )
1558  mxActEntry->aItemSet.Put( SvxFontHeightItem(
1559  maFontHeights[3], 100, ATTR_FONT_HEIGHT ) );
1560  }
1561  break;
1562  case HtmlTokenId::SMALLPRINT_ON :
1563  {
1564  // TODO: Remember current font size and decrease by 1
1565  if ( IsAtBeginningOfText( pInfo ) )
1566  mxActEntry->aItemSet.Put( SvxFontHeightItem(
1567  maFontHeights[0], 100, ATTR_FONT_HEIGHT ) );
1568  }
1569  break;
1570  case HtmlTokenId::BOLD_ON :
1571  case HtmlTokenId::STRONG_ON :
1572  {
1573  if ( IsAtBeginningOfText( pInfo ) )
1574  mxActEntry->aItemSet.Put( SvxWeightItem( WEIGHT_BOLD,
1575  ATTR_FONT_WEIGHT ) );
1576  }
1577  break;
1578  case HtmlTokenId::ITALIC_ON :
1579  case HtmlTokenId::EMPHASIS_ON :
1580  case HtmlTokenId::ADDRESS_ON :
1581  case HtmlTokenId::BLOCKQUOTE_ON :
1582  case HtmlTokenId::BLOCKQUOTE30_ON :
1583  case HtmlTokenId::CITATION_ON :
1584  case HtmlTokenId::VARIABLE_ON :
1585  {
1586  if ( IsAtBeginningOfText( pInfo ) )
1587  mxActEntry->aItemSet.Put( SvxPostureItem( ITALIC_NORMAL,
1588  ATTR_FONT_POSTURE ) );
1589  }
1590  break;
1591  case HtmlTokenId::DEFINSTANCE_ON :
1592  {
1593  if ( IsAtBeginningOfText( pInfo ) )
1594  {
1595  mxActEntry->aItemSet.Put( SvxWeightItem( WEIGHT_BOLD,
1596  ATTR_FONT_WEIGHT ) );
1597  mxActEntry->aItemSet.Put( SvxPostureItem( ITALIC_NORMAL,
1598  ATTR_FONT_POSTURE ) );
1599  }
1600  }
1601  break;
1602  case HtmlTokenId::UNDERLINE_ON :
1603  {
1604  if ( IsAtBeginningOfText( pInfo ) )
1606  ATTR_FONT_UNDERLINE ) );
1607  }
1608  break;
1609  case HtmlTokenId::TEXTTOKEN:
1610  {
1611  if ( bInTitle )
1612  aString += pInfo->aText;
1613  }
1614  break;
1615  default: ;
1616  }
1617 }
1618 
1619 // HTML DATA QUERY PARSER
1620 
1621 template< typename Type >
1622 static Type getLimitedValue( const Type& rValue, const Type& rMin, const Type& rMax )
1623 { return std::clamp( rValue, rMin, rMax ); }
1624 
1625 ScHTMLEntry::ScHTMLEntry( const SfxItemSet& rItemSet, ScHTMLTableId nTableId ) :
1626  ScEEParseEntry( rItemSet ),
1627  mbImportAlways( false )
1628 {
1629  nTab = nTableId;
1630  bEntirePara = false;
1631 }
1632 
1634 {
1635  return mbImportAlways || aSel.HasRange() || !aAltText.isEmpty() || IsTable();
1636 }
1637 
1639 {
1640  // set start position
1643  // adjust end position
1645  {
1648  }
1649 }
1650 
1652 {
1653  OSL_ENSURE( (aSel.nEndPara < rInfo.aSelection.nEndPara) ||
1654  ((aSel.nEndPara == rInfo.aSelection.nEndPara) && (aSel.nEndPos <= rInfo.aSelection.nEndPos)),
1655  "ScHTMLQueryParser::AdjustEntryEnd - invalid end position" );
1656  // set end position
1657  aSel.nEndPara = rInfo.aSelection.nEndPara;
1658  aSel.nEndPos = rInfo.aSelection.nEndPos;
1659 }
1660 
1661 void ScHTMLEntry::Strip( const EditEngine& rEditEngine )
1662 {
1663  // strip leading empty paragraphs
1664  while( (aSel.nStartPara < aSel.nEndPara) && (rEditEngine.GetTextLen( aSel.nStartPara ) <= aSel.nStartPos) )
1665  {
1666  ++aSel.nStartPara;
1667  aSel.nStartPos = 0;
1668  }
1669  // strip trailing empty paragraphs
1670  while( (aSel.nStartPara < aSel.nEndPara) && (aSel.nEndPos == 0) )
1671  {
1672  --aSel.nEndPara;
1673  aSel.nEndPos = rEditEngine.GetTextLen( aSel.nEndPara );
1674  }
1675 }
1676 
1684 class ScHTMLTableMap final
1685 {
1686 private:
1687  typedef std::shared_ptr< ScHTMLTable > ScHTMLTablePtr;
1688  typedef std::map< ScHTMLTableId, ScHTMLTablePtr > ScHTMLTableStdMap;
1689 
1690 public:
1691  typedef ScHTMLTableStdMap::iterator iterator;
1692  typedef ScHTMLTableStdMap::const_iterator const_iterator;
1693 
1694 private:
1696  ScHTMLTableStdMap maTables;
1698 
1699 public:
1700  explicit ScHTMLTableMap( ScHTMLTable& rParentTable );
1701 
1702  const_iterator begin() const { return maTables.begin(); }
1703  const_iterator end() const { return maTables.end(); }
1704 
1708  ScHTMLTable* FindTable( ScHTMLTableId nTableId, bool bDeep = true ) const;
1709 
1712  ScHTMLTable* CreateTable( const HtmlImportInfo& rInfo, bool bPreFormText );
1713 
1714 private:
1716  void SetCurrTable( ScHTMLTable* pTable ) const
1717  { if( pTable ) mpCurrTable = pTable; }
1718 };
1719 
1721  mrParentTable(rParentTable),
1722  mpCurrTable(nullptr)
1723 {
1724 }
1725 
1727 {
1728  ScHTMLTable* pResult = nullptr;
1729  if( mpCurrTable && (nTableId == mpCurrTable->GetTableId()) )
1730  pResult = mpCurrTable; // cached table
1731  else
1732  {
1733  const_iterator aFind = maTables.find( nTableId );
1734  if( aFind != maTables.end() )
1735  pResult = aFind->second.get(); // table from this container
1736  }
1737 
1738  // not found -> search deep in nested tables
1739  if( !pResult && bDeep )
1740  for( const_iterator aIter = begin(), aEnd = end(); !pResult && (aIter != aEnd); ++aIter )
1741  pResult = aIter->second->FindNestedTable( nTableId );
1742 
1743  SetCurrTable( pResult );
1744  return pResult;
1745 }
1746 
1747 ScHTMLTable* ScHTMLTableMap::CreateTable( const HtmlImportInfo& rInfo, bool bPreFormText )
1748 {
1749  ScHTMLTable* pTable = new ScHTMLTable( mrParentTable, rInfo, bPreFormText );
1750  maTables[ pTable->GetTableId() ].reset( pTable );
1751  SetCurrTable( pTable );
1752  return pTable;
1753 }
1754 
1755 namespace {
1756 
1763 class ScHTMLTableIterator
1764 {
1765 public:
1768  explicit ScHTMLTableIterator( const ScHTMLTableMap* pTableMap );
1769 
1770  bool is() const { return mpTableMap && maIter != maEnd; }
1771  ScHTMLTable* operator->() { return maIter->second.get(); }
1772  ScHTMLTableIterator& operator++() { ++maIter; return *this; }
1773 
1774 private:
1777  const ScHTMLTableMap* mpTableMap;
1778 };
1779 
1780 }
1781 
1782 ScHTMLTableIterator::ScHTMLTableIterator( const ScHTMLTableMap* pTableMap ) :
1783  mpTableMap(pTableMap)
1784 {
1785  if( pTableMap )
1786  {
1787  maIter = pTableMap->begin();
1788  maEnd = pTableMap->end();
1789  }
1790 }
1791 
1793  mnTableId( rnUnusedId ),
1794  mrnUnusedId( rnUnusedId )
1795 {
1796  ++mrnUnusedId;
1797 }
1798 
1799 ScHTMLTable::ScHTMLTable( ScHTMLTable& rParentTable, const HtmlImportInfo& rInfo, bool bPreFormText ) :
1800  mpParentTable( &rParentTable ),
1801  maTableId( rParentTable.maTableId.mrnUnusedId ),
1802  maTableItemSet( rParentTable.GetCurrItemSet() ),
1803  mrEditEngine( rParentTable.mrEditEngine ),
1804  mrEEParseList( rParentTable.mrEEParseList ),
1805  mpCurrEntryVector( nullptr ),
1806  maSize( 1, 1 ),
1807  mpParser(rParentTable.mpParser),
1808  mbBorderOn( false ),
1809  mbPreFormText( bPreFormText ),
1810  mbRowOn( false ),
1811  mbDataOn( false ),
1812  mbPushEmptyLine( false ),
1813  mbCaptionOn ( false )
1814 {
1815  if( mbPreFormText )
1816  {
1817  ImplRowOn();
1818  ImplDataOn( ScHTMLSize( 1, 1 ) );
1819  }
1820  else
1821  {
1823  const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
1824  for (const auto& rOption : rOptions)
1825  {
1826  switch( rOption.GetToken() )
1827  {
1828  case HtmlOptionId::BORDER:
1829  mbBorderOn = rOption.GetString().isEmpty() || (rOption.GetNumber() != 0);
1830  break;
1831  case HtmlOptionId::ID:
1832  maTableName = rOption.GetString();
1833  break;
1834  default: break;
1835  }
1836  }
1837  }
1838 
1839  CreateNewEntry( rInfo );
1840 }
1841 
1843  SfxItemPool& rPool,
1844  EditEngine& rEditEngine,
1845  std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
1846  ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser
1847 ) :
1848  mpParentTable( nullptr ),
1849  maTableId( rnUnusedId ),
1850  maTableItemSet( rPool ),
1851  mrEditEngine( rEditEngine ),
1852  mrEEParseList( rEEParseList ),
1853  mpCurrEntryVector( nullptr ),
1854  maSize( 1, 1 ),
1855  mpParser(pParser),
1856  mbBorderOn( false ),
1857  mbPreFormText( false ),
1858  mbRowOn( false ),
1859  mbDataOn( false ),
1860  mbPushEmptyLine( false ),
1861  mbCaptionOn ( false )
1862 {
1863  // open the first "cell" of the document
1864  ImplRowOn();
1865  ImplDataOn( ScHTMLSize( 1, 1 ) );
1867 }
1868 
1870 {
1871 }
1872 
1874 {
1875  // first try cell item set, then row item set, then table item set
1877 }
1878 
1880 {
1881  ScHTMLSize aSpan( 1, 1 );
1882  const ScRange* pRange = maVMergedCells.Find( rCellPos.MakeAddr() );
1883  if (!pRange)
1884  pRange = maHMergedCells.Find( rCellPos.MakeAddr() );
1885  if (pRange)
1886  aSpan.Set( pRange->aEnd.Col() - pRange->aStart.Col() + 1, pRange->aEnd.Row() - pRange->aStart.Row() + 1 );
1887  return aSpan;
1888 }
1889 
1891 {
1892  return mxNestedTables ? mxNestedTables->FindTable( nTableId ) : nullptr;
1893 }
1894 
1895 void ScHTMLTable::PutItem( const SfxPoolItem& rItem )
1896 {
1897  OSL_ENSURE( mxCurrEntry, "ScHTMLTable::PutItem - no current entry" );
1898  if( mxCurrEntry && mxCurrEntry->IsEmpty() )
1899  mxCurrEntry->GetItemSet().Put( rItem );
1900 }
1901 
1903 {
1904  OSL_ENSURE( mxCurrEntry, "ScHTMLTable::PutText - no current entry" );
1905  if( mxCurrEntry )
1906  {
1907  if( !mxCurrEntry->HasContents() && IsSpaceCharInfo( rInfo ) )
1908  mxCurrEntry->AdjustStart( rInfo );
1909  else
1910  mxCurrEntry->AdjustEnd( rInfo );
1911  if (mbCaptionOn)
1912  maCaptionBuffer.append(rInfo.aText);
1913 
1914  }
1915 }
1916 
1918 {
1919  if( mxCurrEntry && mbDataOn && !IsEmptyCell() )
1920  mxCurrEntry->SetImportAlways();
1921  PushEntry( rInfo );
1922  CreateNewEntry( rInfo );
1924 }
1925 
1927 {
1928  // empty line, if <br> is at start of cell
1930 }
1931 
1933 {
1934  // call directly, InsertPara() has not been called before
1936 }
1937 
1939 {
1940  // empty line, if <p>, </p>, <h?>, or </h*> are not at start of cell
1942 }
1943 
1945 {
1946  OSL_ENSURE( mxCurrEntry, "ScHTMLTable::AnchorOn - no current entry" );
1947  // don't skip entries with single hyperlinks
1948  if( mxCurrEntry )
1949  mxCurrEntry->SetImportAlways();
1950 }
1951 
1953 {
1954  PushEntry( rInfo );
1955  return InsertNestedTable( rInfo, false );
1956 }
1957 
1959 {
1960  return mbPreFormText ? this : CloseTable( rInfo );
1961 }
1962 
1964 {
1965  mbCaptionOn = true;
1966  maCaptionBuffer.setLength(0);
1967 }
1968 
1970 {
1971  if (!mbCaptionOn)
1972  return;
1973  maCaption = maCaptionBuffer.makeStringAndClear().trim();
1974  mbCaptionOn = false;
1975 }
1976 
1978 {
1979  PushEntry( rInfo );
1980  return InsertNestedTable( rInfo, true );
1981 }
1982 
1984 {
1985  return mbPreFormText ? CloseTable( rInfo ) : this;
1986 }
1987 
1989 {
1990  PushEntry( rInfo, true );
1991  if( mpParentTable && !mbPreFormText ) // no rows allowed in global and preformatted tables
1992  {
1993  ImplRowOn();
1995  }
1996  CreateNewEntry( rInfo );
1997 }
1998 
2000 {
2001  PushEntry( rInfo, true );
2002  if( mpParentTable && !mbPreFormText ) // no rows allowed in global and preformatted tables
2003  ImplRowOff();
2004  CreateNewEntry( rInfo );
2005 }
2006 
2007 namespace {
2008 
2013 OUString decodeNumberFormat(const OUString& rFmt)
2014 {
2015  OUStringBuffer aBuf;
2016  const sal_Unicode* p = rFmt.getStr();
2017  sal_Int32 n = rFmt.getLength();
2018  for (sal_Int32 i = 0; i < n; ++i, ++p)
2019  {
2020  if (*p == '\\')
2021  {
2022  // Skip '\'.
2023  ++i;
2024  ++p;
2025 
2026  // Parse all subsequent digits until first non-digit is found.
2027  sal_Int32 nDigitCount = 0;
2028  const sal_Unicode* p1 = p;
2029  for (; i < n; ++i, ++p, ++nDigitCount)
2030  {
2031  if (*p < '0' || '9' < *p)
2032  {
2033  --i;
2034  --p;
2035  break;
2036  }
2037 
2038  }
2039  if (nDigitCount)
2040  {
2041  // Hex-encoded character found. Decode it back into its
2042  // original character. An example of number format with
2043  // hex-encoded chars: "\0022$\0022\#\,\#\#0\.00"
2044  sal_uInt32 nVal = OUString(p1, nDigitCount).toUInt32(16);
2045  aBuf.append(static_cast<sal_Unicode>(nVal));
2046  }
2047  }
2048  else
2049  aBuf.append(*p);
2050  }
2051  return aBuf.makeStringAndClear();
2052 }
2053 
2054 }
2055 
2057 {
2058  PushEntry( rInfo, true );
2059  if( mpParentTable && !mbPreFormText ) // no cells allowed in global and preformatted tables
2060  {
2061  // read needed options from the <td> tag
2062  ScHTMLSize aSpanSize( 1, 1 );
2063  std::optional<OUString> pValStr, pNumStr;
2064  const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2065  sal_uInt32 nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
2066  for (const auto& rOption : rOptions)
2067  {
2068  switch (rOption.GetToken())
2069  {
2070  case HtmlOptionId::COLSPAN:
2071  aSpanSize.mnCols = static_cast<SCCOL>( getLimitedValue<sal_Int32>( rOption.GetString().toInt32(), 1, 256 ) );
2072  break;
2073  case HtmlOptionId::ROWSPAN:
2074  aSpanSize.mnRows = static_cast<SCROW>( getLimitedValue<sal_Int32>( rOption.GetString().toInt32(), 1, 256 ) );
2075  break;
2076  case HtmlOptionId::SDVAL:
2077  pValStr = rOption.GetString();
2078  break;
2079  case HtmlOptionId::SDNUM:
2080  pNumStr = rOption.GetString();
2081  break;
2082  case HtmlOptionId::CLASS:
2083  {
2084  // Pick up the number format associated with this class (if
2085  // any).
2086  OUString aClass = rOption.GetString();
2087  const ScHTMLStyles& rStyles = mpParser->GetStyles();
2088  const OUString& rVal = rStyles.getPropertyValue("td", aClass, "mso-number-format");
2089  if (!rVal.isEmpty())
2090  {
2091  OUString aNumFmt = decodeNumberFormat(rVal);
2092 
2093  nNumberFormat = GetFormatTable()->GetEntryKey(aNumFmt);
2094  if (nNumberFormat == NUMBERFORMAT_ENTRY_NOT_FOUND)
2095  {
2096  sal_Int32 nErrPos = 0;
2097  SvNumFormatType nDummy;
2098  bool bValidFmt = GetFormatTable()->PutEntry(aNumFmt, nErrPos, nDummy, nNumberFormat);
2099  if (!bValidFmt)
2100  nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
2101  }
2102  }
2103  }
2104  break;
2105  default: break;
2106  }
2107  }
2108 
2109  ImplDataOn( aSpanSize );
2110 
2111  if (nNumberFormat != NUMBERFORMAT_ENTRY_NOT_FOUND)
2112  mxDataItemSet->Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nNumberFormat) );
2113 
2115  CreateNewEntry( rInfo );
2116  mxCurrEntry->pValStr = std::move(pValStr);
2117  mxCurrEntry->pNumStr = std::move(pNumStr);
2118  }
2119  else
2120  CreateNewEntry( rInfo );
2121 }
2122 
2124 {
2125  PushEntry( rInfo, true );
2126  if( mpParentTable && !mbPreFormText ) // no cells allowed in global and preformatted tables
2127  ImplDataOff();
2128  CreateNewEntry( rInfo );
2129 }
2130 
2132 {
2133  bool bPushed = PushEntry( rInfo );
2134  if( !mpParentTable )
2135  {
2136  // do not start new row, if nothing (no title) precedes the body.
2137  if( bPushed || !mbRowOn )
2138  ImplRowOn();
2139  if( bPushed || !mbDataOn )
2140  ImplDataOn( ScHTMLSize( 1, 1 ) );
2142  }
2143  CreateNewEntry( rInfo );
2144 }
2145 
2147 {
2148  PushEntry( rInfo );
2149  if( !mpParentTable )
2150  {
2151  ImplDataOff();
2152  ImplRowOff();
2153  }
2154  CreateNewEntry( rInfo );
2155 }
2156 
2158 {
2159  if( mpParentTable ) // not allowed to close global table
2160  {
2161  PushEntry( rInfo, mbDataOn );
2162  ImplDataOff();
2163  ImplRowOff();
2165  mpParentTable->CreateNewEntry( rInfo );
2166  if( mbPreFormText ) // enclose preformatted table with empty lines in parent table
2168  return mpParentTable;
2169  }
2170  return this;
2171 }
2172 
2174 {
2175  const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2176  size_t nIndex = static_cast< size_t >( nCellPos );
2177  if( nIndex >= rSizes.size() ) return 0;
2178  return (nIndex == 0) ? rSizes.front() : (rSizes[ nIndex ] - rSizes[ nIndex - 1 ]);
2179 }
2180 
2181 SCCOLROW ScHTMLTable::GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const
2182 {
2183  const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2184  size_t nBeginIdx = static_cast< size_t >( std::max< SCCOLROW >( nCellBegin, 0 ) );
2185  size_t nEndIdx = static_cast< size_t >( std::min< SCCOLROW >( nCellEnd, static_cast< SCCOLROW >( rSizes.size() ) ) );
2186  if (nBeginIdx >= nEndIdx ) return 0;
2187  return rSizes[ nEndIdx - 1 ] - ((nBeginIdx == 0) ? 0 : rSizes[ nBeginIdx - 1 ]);
2188 }
2189 
2191 {
2192  const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2193  return rSizes.empty() ? 0 : rSizes.back();
2194 }
2195 
2197 {
2198  ScHTMLSize aCellSpan = GetSpan( rCellPos );
2199  return ScHTMLSize(
2200  static_cast< SCCOL >( GetDocSize( tdCol, rCellPos.mnCol, rCellPos.mnCol + aCellSpan.mnCols ) ),
2201  static_cast< SCROW >( GetDocSize( tdRow, rCellPos.mnRow, rCellPos.mnRow + aCellSpan.mnRows ) ) );
2202 }
2203 
2205 {
2206  return maDocBasePos.Get( eOrient ) + GetDocSize( eOrient, 0, nCellPos );
2207 }
2208 
2210 {
2211  return ScHTMLPos(
2212  static_cast< SCCOL >( GetDocPos( tdCol, rCellPos.mnCol ) ),
2213  static_cast< SCROW >( GetDocPos( tdRow, rCellPos.mnRow ) ) );
2214 }
2215 
2216 void ScHTMLTable::GetDocRange( ScRange& rRange ) const
2217 {
2218  rRange.aStart = rRange.aEnd = maDocBasePos.MakeAddr();
2219  ScAddress aErrorPos( ScAddress::UNINITIALIZED );
2220  if (!rRange.aEnd.Move( static_cast< SCCOL >( GetDocSize( tdCol ) ) - 1,
2221  static_cast< SCROW >( GetDocSize( tdRow ) ) - 1, 0, aErrorPos))
2222  {
2223  assert(!"can't move");
2224  }
2225 }
2226 
2227 void ScHTMLTable::ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const
2228 {
2229  OSL_ENSURE( pDoc, "ScHTMLTable::ApplyCellBorders - no document" );
2230  if( pDoc && mbBorderOn )
2231  {
2232  const SCCOL nLastCol = maSize.mnCols - 1;
2233  const SCROW nLastRow = maSize.mnRows - 1;
2234  const tools::Long nOuterLine = DEF_LINE_WIDTH_2;
2235  const tools::Long nInnerLine = DEF_LINE_WIDTH_0;
2236  SvxBorderLine aOuterLine(nullptr, nOuterLine, SvxBorderLineStyle::SOLID);
2237  SvxBorderLine aInnerLine(nullptr, nInnerLine, SvxBorderLineStyle::SOLID);
2238  SvxBoxItem aBorderItem( ATTR_BORDER );
2239 
2240  for( SCCOL nCol = 0; nCol <= nLastCol; ++nCol )
2241  {
2242  SvxBorderLine* pLeftLine = (nCol == 0) ? &aOuterLine : &aInnerLine;
2243  SvxBorderLine* pRightLine = (nCol == nLastCol) ? &aOuterLine : &aInnerLine;
2244  SCCOL nCellCol1 = static_cast< SCCOL >( GetDocPos( tdCol, nCol ) ) + rFirstPos.Col();
2245  SCCOL nCellCol2 = nCellCol1 + static_cast< SCCOL >( GetDocSize( tdCol, nCol ) ) - 1;
2246  for( SCROW nRow = 0; nRow <= nLastRow; ++nRow )
2247  {
2248  SvxBorderLine* pTopLine = (nRow == 0) ? &aOuterLine : &aInnerLine;
2249  SvxBorderLine* pBottomLine = (nRow == nLastRow) ? &aOuterLine : &aInnerLine;
2250  SCROW nCellRow1 = GetDocPos( tdRow, nRow ) + rFirstPos.Row();
2251  SCROW nCellRow2 = nCellRow1 + GetDocSize( tdRow, nRow ) - 1;
2252  for( SCCOL nCellCol = nCellCol1; nCellCol <= nCellCol2; ++nCellCol )
2253  {
2254  aBorderItem.SetLine( (nCellCol == nCellCol1) ? pLeftLine : nullptr, SvxBoxItemLine::LEFT );
2255  aBorderItem.SetLine( (nCellCol == nCellCol2) ? pRightLine : nullptr, SvxBoxItemLine::RIGHT );
2256  for( SCROW nCellRow = nCellRow1; nCellRow <= nCellRow2; ++nCellRow )
2257  {
2258  aBorderItem.SetLine( (nCellRow == nCellRow1) ? pTopLine : nullptr, SvxBoxItemLine::TOP );
2259  aBorderItem.SetLine( (nCellRow == nCellRow2) ? pBottomLine : nullptr, SvxBoxItemLine::BOTTOM );
2260  pDoc->ApplyAttr( nCellCol, nCellRow, rFirstPos.Tab(), aBorderItem );
2261  }
2262  }
2263  }
2264  }
2265  }
2266 
2267  for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2268  aIter->ApplyCellBorders( pDoc, rFirstPos );
2269 }
2270 
2272 {
2273  return mpParser->GetDoc().GetFormatTable();
2274 }
2275 
2277 {
2278  return mpCurrEntryVector && mpCurrEntryVector->empty();
2279 }
2280 
2282 {
2283  return (rInfo.nToken == HtmlTokenId::TEXTTOKEN) && (rInfo.aText.getLength() == 1) && (rInfo.aText[ 0 ] == ' ');
2284 }
2285 
2287 {
2288  return std::make_unique<ScHTMLEntry>( GetCurrItemSet() );
2289 }
2290 
2292 {
2293  OSL_ENSURE( !mxCurrEntry, "ScHTMLTable::CreateNewEntry - old entry still present" );
2295  mxCurrEntry->aSel = rInfo.aSelection;
2296 }
2297 
2299 {
2300  // HTML entry list does not own the entries
2301  rEntryVector.push_back( rxEntry.get() );
2302  // mrEEParseList (reference to member of ScEEParser) owns the entries
2303  mrEEParseList.push_back(std::shared_ptr<ScEEParseEntry>(rxEntry.release()));
2304 }
2305 
2307 {
2308  bool bPushed = false;
2309  if( rxEntry && rxEntry->HasContents() )
2310  {
2311  if( mpCurrEntryVector )
2312  {
2313  if( mbPushEmptyLine )
2314  {
2315  ScHTMLEntryPtr xEmptyEntry = CreateEntry();
2316  ImplPushEntryToVector( *mpCurrEntryVector, xEmptyEntry );
2317  mbPushEmptyLine = false;
2318  }
2320  bPushed = true;
2321  }
2322  else if( mpParentTable )
2323  {
2324  bPushed = mpParentTable->PushEntry( rxEntry );
2325  }
2326  else
2327  {
2328  OSL_FAIL( "ScHTMLTable::PushEntry - cannot push entry, no parent found" );
2329  }
2330  }
2331  return bPushed;
2332 }
2333 
2334 bool ScHTMLTable::PushEntry( const HtmlImportInfo& rInfo, bool bLastInCell )
2335 {
2336  OSL_ENSURE( mxCurrEntry, "ScHTMLTable::PushEntry - no current entry" );
2337  bool bPushed = false;
2338  if( mxCurrEntry )
2339  {
2340  mxCurrEntry->AdjustEnd( rInfo );
2341  mxCurrEntry->Strip( mrEditEngine );
2342 
2343  // import entry always, if it is the last in cell, and cell is still empty
2344  if( bLastInCell && IsEmptyCell() )
2345  {
2346  mxCurrEntry->SetImportAlways();
2347  // don't insert empty lines before single empty entries
2348  if( mxCurrEntry->IsEmpty() )
2349  mbPushEmptyLine = false;
2350  }
2351 
2352  bPushed = PushEntry( mxCurrEntry );
2353  mxCurrEntry.reset();
2354  }
2355  return bPushed;
2356 }
2357 
2359 {
2360  OSL_ENSURE( nTableId != SC_HTML_GLOBAL_TABLE, "ScHTMLTable::PushTableEntry - cannot push global table" );
2361  if( nTableId != SC_HTML_GLOBAL_TABLE )
2362  {
2363  ScHTMLEntryPtr xEntry( new ScHTMLEntry( maTableItemSet, nTableId ) );
2364  PushEntry( xEntry );
2365  }
2366 }
2367 
2369 {
2370  ScHTMLTable* pTable = ((nTableId != SC_HTML_GLOBAL_TABLE) && mxNestedTables) ?
2371  mxNestedTables->FindTable( nTableId, false ) : nullptr;
2372  OSL_ENSURE( pTable || (nTableId == SC_HTML_GLOBAL_TABLE), "ScHTMLTable::GetExistingTable - table not found" );
2373  return pTable;
2374 }
2375 
2376 ScHTMLTable* ScHTMLTable::InsertNestedTable( const HtmlImportInfo& rInfo, bool bPreFormText )
2377 {
2378  if( !mxNestedTables )
2379  mxNestedTables.reset( new ScHTMLTableMap( *this ) );
2380  if( bPreFormText ) // enclose new preformatted table with empty lines
2382  return mxNestedTables->CreateTable( rInfo, bPreFormText );
2383 }
2384 
2385 void ScHTMLTable::InsertNewCell( const ScHTMLSize& rSpanSize )
2386 {
2387  ScRange* pRange;
2388 
2389  /* Find an unused cell by skipping all merged ranges that cover the
2390  current cell position stored in maCurrCell. */
2391  for (;;)
2392  {
2393  pRange = maVMergedCells.Find( maCurrCell.MakeAddr() );
2394  if (!pRange)
2395  pRange = maHMergedCells.Find( maCurrCell.MakeAddr() );
2396  if (!pRange)
2397  break;
2398  maCurrCell.mnCol = pRange->aEnd.Col() + 1;
2399  }
2401 
2402  /* If the new cell is merged horizontally, try to find collisions with
2403  other vertically merged ranges. In this case, shrink existing
2404  vertically merged ranges (do not shrink the new cell). */
2405  SCCOL nColEnd = maCurrCell.mnCol + rSpanSize.mnCols;
2406  for( ScAddress aAddr( maCurrCell.MakeAddr() ); aAddr.Col() < nColEnd; aAddr.IncCol() )
2407  if( (pRange = maVMergedCells.Find( aAddr )) != nullptr )
2408  pRange->aEnd.SetRow( maCurrCell.mnRow - 1 );
2409 
2410  // insert the new range into the cell lists
2411  ScRange aNewRange( maCurrCell.MakeAddr() );
2412  ScAddress aErrorPos( ScAddress::UNINITIALIZED );
2413  if (!aNewRange.aEnd.Move( rSpanSize.mnCols - 1, rSpanSize.mnRows - 1, 0, aErrorPos))
2414  {
2415  assert(!"can't move");
2416  }
2417  if( rSpanSize.mnRows > 1 )
2418  {
2419  maVMergedCells.push_back( aNewRange );
2420  /* Do not insert vertically merged ranges into maUsedCells yet,
2421  because they may be shrunken (see above). The final vertically
2422  merged ranges are inserted in FillEmptyCells(). */
2423  }
2424  else
2425  {
2426  if( rSpanSize.mnCols > 1 )
2427  maHMergedCells.push_back( aNewRange );
2428  /* Insert horizontally merged ranges and single cells into
2429  maUsedCells, they will not be changed anymore. */
2430  maUsedCells.Join( aNewRange );
2431  }
2432 
2433  // adjust table size
2434  maSize.mnCols = std::max< SCCOL >( maSize.mnCols, aNewRange.aEnd.Col() + 1 );
2435  maSize.mnRows = std::max< SCROW >( maSize.mnRows, aNewRange.aEnd.Row() + 1 );
2436 }
2437 
2439 {
2440  if( mbRowOn )
2441  ImplRowOff();
2442  mxRowItemSet.reset( new SfxItemSet( maTableItemSet ) );
2443  maCurrCell.mnCol = 0;
2444  mbRowOn = true;
2445  mbDataOn = false;
2446 }
2447 
2449 {
2450  if( mbDataOn )
2451  ImplDataOff();
2452  if( mbRowOn )
2453  {
2454  mxRowItemSet.reset();
2455  ++maCurrCell.mnRow;
2456  mbRowOn = mbDataOn = false;
2457  }
2458 }
2459 
2460 void ScHTMLTable::ImplDataOn( const ScHTMLSize& rSpanSize )
2461 {
2462  if( mbDataOn )
2463  ImplDataOff();
2464  if( !mbRowOn )
2465  ImplRowOn();
2466  mxDataItemSet.reset( new SfxItemSet( *mxRowItemSet ) );
2467  InsertNewCell( rSpanSize );
2468  mbDataOn = true;
2469  mbPushEmptyLine = false;
2470 }
2471 
2473 {
2474  if( mbDataOn )
2475  {
2476  mxDataItemSet.reset();
2477  ++maCurrCell.mnCol;
2478  mpCurrEntryVector = nullptr;
2479  mbDataOn = false;
2480  }
2481 }
2482 
2484 {
2485  // special handling for table header cells
2486  if( rInfo.nToken == HtmlTokenId::TABLEHEADER_ON )
2487  {
2489  rItemSet.Put( SvxHorJustifyItem( SvxCellHorJustify::Center, ATTR_HOR_JUSTIFY ) );
2490  }
2491 
2492  const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2493  for (const auto& rOption : rOptions)
2494  {
2495  switch( rOption.GetToken() )
2496  {
2497  case HtmlOptionId::ALIGN:
2498  {
2499  SvxCellHorJustify eVal = SvxCellHorJustify::Standard;
2500  const OUString& rOptVal = rOption.GetString();
2501  if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
2502  eVal = SvxCellHorJustify::Right;
2503  else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_center ) )
2504  eVal = SvxCellHorJustify::Center;
2505  else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
2506  eVal = SvxCellHorJustify::Left;
2507  if( eVal != SvxCellHorJustify::Standard )
2508  rItemSet.Put( SvxHorJustifyItem( eVal, ATTR_HOR_JUSTIFY ) );
2509  }
2510  break;
2511 
2512  case HtmlOptionId::VALIGN:
2513  {
2514  SvxCellVerJustify eVal = SvxCellVerJustify::Standard;
2515  const OUString& rOptVal = rOption.GetString();
2516  if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_top ) )
2517  eVal = SvxCellVerJustify::Top;
2518  else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_middle ) )
2519  eVal = SvxCellVerJustify::Center;
2520  else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_bottom ) )
2521  eVal = SvxCellVerJustify::Bottom;
2522  if( eVal != SvxCellVerJustify::Standard )
2523  rItemSet.Put( SvxVerJustifyItem( eVal, ATTR_VER_JUSTIFY ) );
2524  }
2525  break;
2526 
2527  case HtmlOptionId::BGCOLOR:
2528  {
2529  Color aColor;
2530  rOption.GetColor( aColor );
2531  rItemSet.Put( SvxBrushItem( aColor, ATTR_BACKGROUND ) );
2532  }
2533  break;
2534  default: break;
2535  }
2536  }
2537 }
2538 
2539 void ScHTMLTable::SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize )
2540 {
2541  OSL_ENSURE( nCellPos >= 0, "ScHTMLTable::SetDocSize - unexpected negative position" );
2542  ScSizeVec& rSizes = maCumSizes[ eOrient ];
2543  size_t nIndex = static_cast< size_t >( nCellPos );
2544  // expand with height/width == 1
2545  while( nIndex >= rSizes.size() )
2546  rSizes.push_back( rSizes.empty() ? 1 : (rSizes.back() + 1) );
2547  // update size of passed position and all following
2548  // #i109987# only grow, don't shrink - use the largest needed size
2549  SCCOLROW nDiff = nSize - ((nIndex == 0) ? rSizes.front() : (rSizes[ nIndex ] - rSizes[ nIndex - 1 ]));
2550  if( nDiff > 0 )
2551  std::for_each(rSizes.begin() + nIndex, rSizes.end(), [&nDiff](SCCOLROW& rSize) { rSize += nDiff; });
2552 }
2553 
2555  ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nCellSpan, SCCOLROW nRealDocSize )
2556 {
2557  SCCOLROW nDiffSize = 0;
2558  // in merged columns/rows: reduce needed size by size of leading columns
2559  while( nCellSpan > 1 )
2560  {
2561  nDiffSize += GetDocSize( eOrient, nCellPos );
2562  --nCellSpan;
2563  ++nCellPos;
2564  }
2565  // set remaining needed size to last column/row
2566  nRealDocSize -= std::min< SCCOLROW >( nRealDocSize - 1, nDiffSize );
2567  SetDocSize( eOrient, nCellPos, nRealDocSize );
2568 }
2569 
2571 {
2572  for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2573  aIter->FillEmptyCells();
2574 
2575  // insert the final vertically merged ranges into maUsedCells
2576  for ( size_t i = 0, nRanges = maVMergedCells.size(); i < nRanges; ++i )
2577  {
2578  ScRange & rRange = maVMergedCells[ i ];
2579  maUsedCells.Join( rRange );
2580  }
2581 
2582  for( ScAddress aAddr; aAddr.Row() < maSize.mnRows; aAddr.IncRow() )
2583  {
2584  for( aAddr.SetCol( 0 ); aAddr.Col() < maSize.mnCols; aAddr.IncCol() )
2585  {
2586  if( !maUsedCells.Find( aAddr ) )
2587  {
2588  // create a range for the lock list (used to calc. cell span)
2589  ScRange aRange( aAddr );
2590  do
2591  {
2592  aRange.aEnd.IncCol();
2593  }
2594  while( (aRange.aEnd.Col() < maSize.mnCols) && !maUsedCells.Find( aRange.aEnd ) );
2595  aRange.aEnd.IncCol( -1 );
2596  maUsedCells.Join( aRange );
2597 
2598  // insert a dummy entry
2599  ScHTMLEntryPtr xEntry = CreateEntry();
2600  ImplPushEntryToVector( maEntryMap[ ScHTMLPos( aAddr ) ], xEntry );
2601  }
2602  }
2603  }
2604 }
2605 
2607 {
2608  // recalc table sizes recursively from inner to outer
2609  for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2610  aIter->RecalcDocSize();
2611 
2612  /* Two passes: first calculates the sizes of single columns/rows, then
2613  the sizes of spanned columns/rows. This allows to fill nested tables
2614  into merged cells optimally. */
2615  static const sal_uInt16 PASS_SINGLE = 0;
2616  static const sal_uInt16 PASS_SPANNED = 1;
2617  for( sal_uInt16 nPass = PASS_SINGLE; nPass <= PASS_SPANNED; ++nPass )
2618  {
2619  // iterate through every table cell
2620  for( const auto& [rCellPos, rEntryVector] : maEntryMap )
2621  {
2622  ScHTMLSize aCellSpan = GetSpan( rCellPos );
2623 
2624  // process the dimension of the current cell in this pass?
2625  // (pass is single and span is 1) or (pass is not single and span is not 1)
2626  bool bProcessColWidth = ((nPass == PASS_SINGLE) == (aCellSpan.mnCols == 1));
2627  bool bProcessRowHeight = ((nPass == PASS_SINGLE) == (aCellSpan.mnRows == 1));
2628  if( bProcessColWidth || bProcessRowHeight )
2629  {
2630  ScHTMLSize aDocSize( 1, 0 ); // resulting size of the cell in document
2631 
2632  // expand the cell size for each cell parse entry
2633  for( const auto& rpEntry : rEntryVector )
2634  {
2635  ScHTMLTable* pTable = GetExistingTable( rpEntry->GetTableId() );
2636  // find entry with maximum width
2637  if( bProcessColWidth && pTable )
2638  aDocSize.mnCols = std::max( aDocSize.mnCols, static_cast< SCCOL >( pTable->GetDocSize( tdCol ) ) );
2639  // add up height of each entry
2640  if( bProcessRowHeight )
2641  aDocSize.mnRows += pTable ? pTable->GetDocSize( tdRow ) : 1;
2642  }
2643  if( !aDocSize.mnRows )
2644  aDocSize.mnRows = 1;
2645 
2646  if( bProcessColWidth )
2647  CalcNeededDocSize( tdCol, rCellPos.mnCol, aCellSpan.mnCols, aDocSize.mnCols );
2648  if( bProcessRowHeight )
2649  CalcNeededDocSize( tdRow, rCellPos.mnRow, aCellSpan.mnRows, aDocSize.mnRows );
2650  }
2651  }
2652  }
2653 }
2654 
2655 void ScHTMLTable::RecalcDocPos( const ScHTMLPos& rBasePos )
2656 {
2657  maDocBasePos = rBasePos;
2658  // after the previous assignment it is allowed to call GetDocPos() methods
2659 
2660  // iterate through every table cell
2661  for( auto& [rCellPos, rEntryVector] : maEntryMap )
2662  {
2663  // fixed doc position of the entire cell (first entry)
2664  const ScHTMLPos aCellDocPos( GetDocPos( rCellPos ) );
2665  // fixed doc size of the entire cell
2666  const ScHTMLSize aCellDocSize( GetDocSize( rCellPos ) );
2667 
2668  // running doc position for single entries
2669  ScHTMLPos aEntryDocPos( aCellDocPos );
2670 
2671  ScHTMLEntry* pEntry = nullptr;
2672  for( const auto& rpEntry : rEntryVector )
2673  {
2674  pEntry = rpEntry;
2675  if( ScHTMLTable* pTable = GetExistingTable( pEntry->GetTableId() ) )
2676  {
2677  pTable->RecalcDocPos( aEntryDocPos ); // recalc nested table
2678  pEntry->nCol = SCCOL_MAX;
2679  pEntry->nRow = SCROW_MAX;
2680  SCROW nTableRows = static_cast< SCROW >( pTable->GetDocSize( tdRow ) );
2681 
2682  // use this entry to pad empty space right of table
2683  if( mpParentTable ) // ... but not in global table
2684  {
2685  SCCOL nStartCol = aEntryDocPos.mnCol + static_cast< SCCOL >( pTable->GetDocSize( tdCol ) );
2686  SCCOL nNextCol = aEntryDocPos.mnCol + aCellDocSize.mnCols;
2687  if( nStartCol < nNextCol )
2688  {
2689  pEntry->nCol = nStartCol;
2690  pEntry->nRow = aEntryDocPos.mnRow;
2691  pEntry->nColOverlap = nNextCol - nStartCol;
2692  pEntry->nRowOverlap = nTableRows;
2693  }
2694  }
2695  aEntryDocPos.mnRow += nTableRows;
2696  }
2697  else
2698  {
2699  pEntry->nCol = aEntryDocPos.mnCol;
2700  pEntry->nRow = aEntryDocPos.mnRow;
2701  if( mpParentTable ) // do not merge in global table
2702  pEntry->nColOverlap = aCellDocSize.mnCols;
2703  ++aEntryDocPos.mnRow;
2704  }
2705  }
2706 
2707  // pEntry points now to last entry.
2708  if( pEntry )
2709  {
2710  if( (pEntry == rEntryVector.front()) && (pEntry->GetTableId() == SC_HTML_NO_TABLE) )
2711  {
2712  // pEntry is the only entry in this cell - merge rows of cell with single non-table entry.
2713  pEntry->nRowOverlap = aCellDocSize.mnRows;
2714  }
2715  else
2716  {
2717  // fill up incomplete entry lists
2718  SCROW nFirstUnusedRow = aCellDocPos.mnRow + aCellDocSize.mnRows;
2719  while( aEntryDocPos.mnRow < nFirstUnusedRow )
2720  {
2721  ScHTMLEntryPtr xDummyEntry( new ScHTMLEntry( pEntry->GetItemSet() ) );
2722  xDummyEntry->nCol = aEntryDocPos.mnCol;
2723  xDummyEntry->nRow = aEntryDocPos.mnRow;
2724  xDummyEntry->nColOverlap = aCellDocSize.mnCols;
2725  ImplPushEntryToVector( rEntryVector, xDummyEntry );
2726  ++aEntryDocPos.mnRow;
2727  }
2728  }
2729  }
2730  }
2731 }
2732 
2734  SfxItemPool& rPool,
2735  EditEngine& rEditEngine,
2736  std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseVector,
2737  ScHTMLTableId& rnUnusedId,
2738  ScHTMLParser* pParser
2739 ) :
2740  ScHTMLTable( rPool, rEditEngine, rEEParseVector, rnUnusedId, pParser )
2741 {
2742 }
2743 
2745 {
2746 }
2747 
2749 {
2750  // Fills up empty cells with a dummy entry. */
2751  FillEmptyCells();
2752  // recalc table sizes of all nested tables and this table
2753  RecalcDocSize();
2754  // recalc document positions of all entries in this table and in nested tables
2755  RecalcDocPos( GetDocPos() );
2756 }
2757 
2759  ScHTMLParser( pEditEngine, pDoc ),
2760  mnUnusedId( SC_HTML_GLOBAL_TABLE ),
2761  mbTitleOn( false )
2762 {
2763  mxGlobTable.reset(
2764  new ScHTMLGlobalTable(*pPool, *pEdit, maList, mnUnusedId, this));
2765  mpCurrTable = mxGlobTable.get();
2766 }
2767 
2769 {
2770 }
2771 
2772 ErrCode ScHTMLQueryParser::Read( SvStream& rStrm, const OUString& rBaseURL )
2773 {
2774  SvKeyValueIteratorRef xValues;
2775  SvKeyValueIterator* pAttributes = nullptr;
2776 
2777  SfxObjectShell* pObjSh = mpDoc->GetDocumentShell();
2778  if( pObjSh && pObjSh->IsLoading() )
2779  {
2780  pAttributes = pObjSh->GetHeaderAttributes();
2781  }
2782  else
2783  {
2784  /* When not loading, set up fake HTTP headers to force the SfxHTMLParser
2785  to use UTF8 (used when pasting from clipboard) */
2786  const char* pCharSet = rtl_getBestMimeCharsetFromTextEncoding( RTL_TEXTENCODING_UTF8 );
2787  if( pCharSet )
2788  {
2789  OUString aContentType = "text/html; charset=" +
2790  OUString::createFromAscii( pCharSet );
2791 
2792  xValues = new SvKeyValueIterator;
2793  xValues->Append( SvKeyValue( OOO_STRING_SVTOOLS_HTML_META_content_type, aContentType ) );
2794  pAttributes = xValues.get();
2795  }
2796  }
2797 
2799  pEdit->SetHtmlImportHdl( LINK( this, ScHTMLQueryParser, HTMLImportHdl ) );
2800  ErrCode nErr = pEdit->Read( rStrm, rBaseURL, EETextFormat::Html, pAttributes );
2801  pEdit->SetHtmlImportHdl( aOldLink );
2802 
2803  mxGlobTable->Recalc();
2804  nColMax = static_cast< SCCOL >( mxGlobTable->GetDocSize( tdCol ) - 1 );
2805  nRowMax = static_cast< SCROW >( mxGlobTable->GetDocSize( tdRow ) - 1 );
2806 
2807  return nErr;
2808 }
2809 
2811 {
2812  return mxGlobTable.get();
2813 }
2814 
2816 {
2817  switch( rInfo.nToken )
2818  {
2819 // --- meta data ---
2820  case HtmlTokenId::META: MetaOn( rInfo ); break; // <meta>
2821 
2822 // --- title handling ---
2823  case HtmlTokenId::TITLE_ON: TitleOn(); break; // <title>
2824  case HtmlTokenId::TITLE_OFF: TitleOff( rInfo ); break; // </title>
2825 
2826  case HtmlTokenId::STYLE_ON: break;
2827  case HtmlTokenId::STYLE_OFF: ParseStyle(rInfo.aText); break;
2828 
2829 // --- body handling ---
2830  case HtmlTokenId::BODY_ON: mpCurrTable->BodyOn( rInfo ); break; // <body>
2831  case HtmlTokenId::BODY_OFF: mpCurrTable->BodyOff( rInfo ); break; // </body>
2832 
2833 // --- insert text ---
2834  case HtmlTokenId::TEXTTOKEN: InsertText( rInfo ); break; // any text
2835  case HtmlTokenId::LINEBREAK: mpCurrTable->BreakOn(); break; // <br>
2836  case HtmlTokenId::HEAD1_ON: // <h1>
2837  case HtmlTokenId::HEAD2_ON: // <h2>
2838  case HtmlTokenId::HEAD3_ON: // <h3>
2839  case HtmlTokenId::HEAD4_ON: // <h4>
2840  case HtmlTokenId::HEAD5_ON: // <h5>
2841  case HtmlTokenId::HEAD6_ON: // <h6>
2842  case HtmlTokenId::PARABREAK_ON: mpCurrTable->HeadingOn(); break; // <p>
2843 
2844 // --- misc. contents ---
2845  case HtmlTokenId::ANCHOR_ON: mpCurrTable->AnchorOn(); break; // <a>
2846 
2847 // --- table handling ---
2848  case HtmlTokenId::TABLE_ON: TableOn( rInfo ); break; // <table>
2849  case HtmlTokenId::TABLE_OFF: TableOff( rInfo ); break; // </table>
2850  case HtmlTokenId::CAPTION_ON: mpCurrTable->CaptionOn(); break; // <caption>
2851  case HtmlTokenId::CAPTION_OFF: mpCurrTable->CaptionOff(); break; // </caption>
2852  case HtmlTokenId::TABLEROW_ON: mpCurrTable->RowOn( rInfo ); break; // <tr>
2853  case HtmlTokenId::TABLEROW_OFF: mpCurrTable->RowOff( rInfo ); break; // </tr>
2854  case HtmlTokenId::TABLEHEADER_ON: // <th>
2855  case HtmlTokenId::TABLEDATA_ON: mpCurrTable->DataOn( rInfo ); break; // <td>
2856  case HtmlTokenId::TABLEHEADER_OFF: // </th>
2857  case HtmlTokenId::TABLEDATA_OFF: mpCurrTable->DataOff( rInfo ); break; // </td>
2858  case HtmlTokenId::PREFORMTXT_ON: PreOn( rInfo ); break; // <pre>
2859  case HtmlTokenId::PREFORMTXT_OFF: PreOff( rInfo ); break; // </pre>
2860 
2861 // --- formatting ---
2862  case HtmlTokenId::FONT_ON: FontOn( rInfo ); break; // <font>
2863 
2864  case HtmlTokenId::BIGPRINT_ON: // <big>
2867  break;
2868  case HtmlTokenId::SMALLPRINT_ON: // <small>
2871  break;
2872 
2873  case HtmlTokenId::BOLD_ON: // <b>
2874  case HtmlTokenId::STRONG_ON: // <strong>
2876  break;
2877 
2878  case HtmlTokenId::ITALIC_ON: // <i>
2879  case HtmlTokenId::EMPHASIS_ON: // <em>
2880  case HtmlTokenId::ADDRESS_ON: // <address>
2881  case HtmlTokenId::BLOCKQUOTE_ON: // <blockquote>
2882  case HtmlTokenId::BLOCKQUOTE30_ON: // <bq>
2883  case HtmlTokenId::CITATION_ON: // <cite>
2884  case HtmlTokenId::VARIABLE_ON: // <var>
2886  break;
2887 
2888  case HtmlTokenId::DEFINSTANCE_ON: // <dfn>
2891  break;
2892 
2893  case HtmlTokenId::UNDERLINE_ON: // <u>
2895  break;
2896  default: break;
2897  }
2898 }
2899 
2901 {
2902  mpCurrTable->PutText( rInfo );
2903  if( mbTitleOn )
2904  maTitle.append(rInfo.aText);
2905 }
2906 
2908 {
2909  const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2910  for (const auto& rOption : rOptions)
2911  {
2912  switch( rOption.GetToken() )
2913  {
2914  case HtmlOptionId::FACE :
2915  {
2916  const OUString& rFace = rOption.GetString();
2917  OUString aFontName;
2918  sal_Int32 nPos = 0;
2919  while( nPos != -1 )
2920  {
2921  // font list separator: VCL = ';' HTML = ','
2922  OUString aFName = comphelper::string::strip(rFace.getToken(0, ',', nPos), ' ');
2923  aFontName = ScGlobal::addToken(aFontName, aFName, ';');
2924  }
2925  if ( !aFontName.isEmpty() )
2927  aFontName, EMPTY_OUSTRING, PITCH_DONTKNOW,
2928  RTL_TEXTENCODING_DONTKNOW, ATTR_FONT ) );
2929  }
2930  break;
2931  case HtmlOptionId::SIZE :
2932  {
2933  sal_uInt32 nSize = getLimitedValue< sal_uInt32 >( rOption.GetNumber(), 1, SC_HTML_FONTSIZES );
2935  }
2936  break;
2937  case HtmlOptionId::COLOR :
2938  {
2939  Color aColor;
2940  rOption.GetColor( aColor );
2942  }
2943  break;
2944  default: break;
2945  }
2946  }
2947 }
2948 
2950 {
2951  if( mpDoc->GetDocumentShell() )
2952  {
2953  HTMLParser* pParser = static_cast< HTMLParser* >( rInfo.pParser );
2954 
2955  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
2956  mpDoc->GetDocumentShell()->GetModel(), uno::UNO_QUERY_THROW);
2957  pParser->ParseMetaOptions(
2958  xDPS->getDocumentProperties(),
2960  }
2961 }
2962 
2964 {
2965  mbTitleOn = true;
2966  maTitle.setLength(0);
2967 }
2968 
2970 {
2971  if( !mbTitleOn )
2972  return;
2973 
2974  OUString aTitle = maTitle.makeStringAndClear().trim();
2975  if (!aTitle.isEmpty() && mpDoc->GetDocumentShell())
2976  {
2977  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
2978  mpDoc->GetDocumentShell()->GetModel(), uno::UNO_QUERY_THROW);
2979 
2980  xDPS->getDocumentProperties()->setTitle(aTitle);
2981  }
2982  InsertText( rInfo );
2983  mbTitleOn = false;
2984 }
2985 
2987 {
2988  mpCurrTable = mpCurrTable->TableOn( rInfo );
2989 }
2990 
2992 {
2993  mpCurrTable = mpCurrTable->TableOff( rInfo );
2994 }
2995 
2997 {
2998  mpCurrTable = mpCurrTable->PreOn( rInfo );
2999 }
3000 
3002 {
3003  mpCurrTable = mpCurrTable->PreOff( rInfo );
3004 }
3005 
3007 {
3008  mpCurrTable = mpCurrTable->CloseTable( rInfo );
3009 }
3010 
3011 namespace {
3012 
3016 class CSSHandler
3017 {
3018  struct MemStr
3019  {
3020  const char* mp;
3021  size_t mn;
3022 
3023  MemStr() : mp(nullptr), mn(0) {}
3024  MemStr(const char* p, size_t n) : mp(p), mn(n) {}
3025  MemStr& operator=(const MemStr& r) = default;
3026  };
3027 
3028  MemStr maPropName;
3029  MemStr maPropValue;
3030 
3031 public:
3032  explicit CSSHandler() {}
3033 
3034  static void at_rule_name(const char* /*p*/, size_t /*n*/)
3035  {
3036  // TODO: For now, we ignore at-rule properties
3037  }
3038 
3039  void property_name(const char* p, size_t n)
3040  {
3041  maPropName = MemStr(p, n);
3042  }
3043 
3044  void value(const char* p, size_t n)
3045  {
3046  maPropValue = MemStr(p, n);
3047  }
3048 
3049  static void begin_parse() {}
3050 
3051  static void end_parse() {}
3052 
3053  static void begin_block() {}
3054 
3055  static void end_block() {}
3056 
3057  static void begin_property() {}
3058 
3059  void end_property()
3060  {
3061  maPropName = MemStr();
3062  maPropValue = MemStr();
3063  }
3064 
3065  // new members
3066  static void simple_selector_type(const char* /*p*/, size_t /*n*/) {}
3067 
3068  static void simple_selector_class(const char* /*p*/, size_t /*n*/) {}
3069 
3070  static void simple_selector_pseudo_element(orcus::css::pseudo_element_t /*pe*/) {}
3071 
3072  static void simple_selector_pseudo_class(orcus::css::pseudo_class_t /*pc*/) {}
3073 
3074  static void simple_selector_id(const char* /*p*/, size_t /*n*/) {}
3075 
3076  static void end_simple_selector() {}
3077 
3078  static void end_selector() {}
3079 
3080  static void combinator(orcus::css::combinator_t /*combinator*/) {}
3081 
3082  static void rgb(uint8_t /*red*/ , uint8_t /*green*/ , uint8_t /*blue*/ ) {}
3083 
3084  static void rgba(uint8_t /*red*/ , uint8_t /*green*/ , uint8_t /*blue*/ , double /*alpha*/ ) {}
3085 
3086  static void hsl(uint8_t /*hue*/ , uint8_t /*sat*/ , uint8_t /*light*/ ) {}
3087 
3088  static void hsla(uint8_t /*hue*/ , uint8_t /*sat*/ , uint8_t /*light*/ , double /*alpha*/ ) {}
3089 
3090  static void url(const char* /*p*/, size_t /*n*/) {}
3091 
3092 };
3093 
3094 }
3095 
3096 void ScHTMLQueryParser::ParseStyle(std::u16string_view rStrm)
3097 {
3098  OString aStr = OUStringToOString(rStrm, RTL_TEXTENCODING_UTF8);
3099  CSSHandler aHdl;
3100  orcus::css_parser<CSSHandler> aParser(aStr.getStr(), aStr.getLength(), aHdl);
3101  try
3102  {
3103  aParser.parse();
3104  }
3105  catch (const orcus::css::parse_error&)
3106  {
3107  // TODO: Parsing of CSS failed. Do nothing for now.
3108  }
3109 }
3110 
3111 IMPL_LINK( ScHTMLQueryParser, HTMLImportHdl, HtmlImportInfo&, rInfo, void )
3112 {
3113  switch( rInfo.eState )
3114  {
3115  case HtmlImportState::Start:
3116  break;
3117 
3118  case HtmlImportState::NextToken:
3119  ProcessToken( rInfo );
3120  break;
3121 
3122  case HtmlImportState::InsertPara:
3123  mpCurrTable->InsertPara( rInfo );
3124  break;
3125 
3126  case HtmlImportState::SetAttr:
3127  case HtmlImportState::InsertText:
3128  case HtmlImportState::InsertField:
3129  break;
3130 
3131  case HtmlImportState::End:
3132  while( mpCurrTable->GetTableId() != SC_HTML_GLOBAL_TABLE )
3133  CloseTable( rInfo );
3134  break;
3135 
3136  default:
3137  OSL_FAIL( "ScHTMLQueryParser::HTMLImportHdl - unknown ImportInfo::eState" );
3138  }
3139 }
3140 
3141 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
OUString maCaption
Table name from.
Definition: htmlpars.hxx:526
std::unique_ptr< OuterMap > pTables
Definition: htmlpars.hxx:159
void RowOn(const HtmlImportInfo &rInfo)
Starts next row (tag).
Definition: htmlpars.cxx:1988
EditEngine * pEdit
Definition: eeparser.hxx:102
EditEngine & mrEditEngine
List of all used cells.
Definition: htmlpars.hxx:535
Type
SvxCellHorJustify
ScHTMLStyles()
just a persistent empty string.
Definition: htmlpars.cxx:71
std::vector< std::shared_ptr< ScEEParseEntry > > & mrEEParseList
Edit engine (from ScEEParser).
Definition: htmlpars.hxx:536
SCCOL mnCols
Definition: htmlpars.hxx:261
ScHTMLTable * PreOn(const HtmlImportInfo &rInfo)
Starts a new table based on preformatted text (.
Definition: htmlpars.cxx:1977
sal_Int32 nStartPara
SvKeyValueIterator * GetHeaderAttributes()
const Value & back() const
void AnchorOn(HtmlImportInfo *)
Definition: htmlpars.cxx:1381
SfxItemSet maTableItemSet
Unique identifier of this table.
Definition: htmlpars.hxx:529
const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE
Definition: htmlpars.hxx:37
sal_Int32 nIndex
const char nHorizontal
Definition: eeparser.hxx:34
ScAddress aStart
Definition: address.hxx:499
static void EntryEnd(ScEEParseEntry *, const ESelection &)
Definition: htmlpars.cxx:315
#define OOO_STRING_SVTOOLS_HTML_VA_bottom
ScHTMLSize maSize
Cumulated cell counts for each HTML table column/row.
Definition: htmlpars.hxx:541
ScDocument * mpDoc
Definition: htmlpars.hxx:81
static void MakeCol(ScHTMLColOffset *, sal_uInt16 &nOffset, sal_uInt16 &nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol)
Definition: htmlpars.cxx:367
void SetHtmlImportHdl(const Link< HtmlImportInfo &, void > &rLink)
#define EMPTY_OUSTRING
Definition: global.hxx:213
ScHTMLTableAutoId(ScHTMLTableId &rnUnusedId)
Reference to global unused identifier variable.
Definition: htmlpars.cxx:1792
static void ModifyOffset(ScHTMLColOffset *, sal_uInt16 &nOldOffset, sal_uInt16 &nNewOffset, sal_uInt16 nOffsetTol)
Definition: htmlpars.cxx:401
A map of ScHTMLTable objects.
Definition: htmlpars.cxx:1684
FormulaCommand pE
constexpr TypedWhichId< SvxBoxItem > ATTR_BORDER(150)
virtual ~ScHTMLParser() override
Definition: htmlpars.cxx:199
void Strip(const EditEngine &rEditEngine)
Deletes leading and trailing empty paragraphs from the entry.
Definition: htmlpars.cxx:1661
SCROW Row() const
Definition: address.hxx:261
bool mbBorderOn
Definition: htmlpars.hxx:545
void setWidth(tools::Long nWidth)
OUString aText
SCCOL nColCnt
Definition: eeparser.hxx:109
A single entry containing a line of text or representing a table.
Definition: htmlpars.hxx:271
#define OOO_STRING_SVTOOLS_HTML_AL_center
bool PushEntry(ScHTMLEntryPtr &rxEntry)
Tries to insert the entry into the current cell.
Definition: htmlpars.cxx:2306
void NewActEntry(const ScEEParseEntry *)
Definition: eeimpars.cxx:644
ScHTMLTableStdMap::const_iterator const_iterator
Definition: htmlpars.cxx:1692
const sal_uInt32 SC_HTML_FONTSIZES
Definition: htmlpars.hxx:33
ScHTMLEntry(const SfxItemSet &rItemSet, ScHTMLTableId nTableId=SC_HTML_NO_TABLE)
Definition: htmlpars.cxx:1625
OString strip(std::string_view rIn, char c)
ESelection aSel
Definition: eeparser.hxx:55
void Colonize(ScEEParseEntry *)
Definition: htmlpars.cxx:779
::std::map< SCROW, SCROW > InnerMap
Definition: htmlpars.hxx:144
bool Intersects(const ScRange &rRange) const
Definition: address.cxx:1558
bool IsEmptyCell() const
Returns true, if the current cell does not contain an entry yet.
Definition: htmlpars.cxx:2276
void InsertNewCell(const ScHTMLSize &rSpanSize)
Inserts a new cell in an unused position, starting from current cell position.
Definition: htmlpars.cxx:2385
::std::map< sal_uInt16, InnerMap * > OuterMap
Definition: htmlpars.hxx:148
OUString maTableName
Table of nested HTML tables.
Definition: htmlpars.hxx:525
ScRangeList maHMergedCells
Items for the current cell.
Definition: htmlpars.hxx:532
sal_uIntPtr sal_uLong
std::map< ScHTMLTableId, ScHTMLTablePtr > ScHTMLTableStdMap
Definition: htmlpars.cxx:1688
long Long
void CloseEntry(const HtmlImportInfo *)
Definition: htmlpars.cxx:807
FAMILY_DONTKNOW
void AdjustEnd(const HtmlImportInfo &rInfo)
Sets end point of the entry selection to the end of the import info object.
Definition: htmlpars.cxx:1651
bool IsAtBeginningOfText(const HtmlImportInfo *)
Definition: htmlpars.cxx:1391
ScHTMLTable * TableOn(const HtmlImportInfo &rInfo)
Starts a new table nested in this table (.
Definition: htmlpars.cxx:1952
OUString GetImportFormatName(sal_uInt16 nFormat)
ScHTMLTable * TableOff(const HtmlImportInfo &rInfo)
Closes this table (tag).
Definition: htmlpars.cxx:1958
const SCCOL SCCOL_MAX
Definition: address.hxx:56
NamePropsType m_GlobalProps
Definition: htmlpars.hxx:53
bool mbPreFormText
true = Table borders on.
Definition: htmlpars.hxx:546
sal_Int64 n
css::uno::Reference< css::frame::XModel3 > GetModel() const
const OUString & GetString() const
SCROW nRowMax
Definition: eeparser.hxx:112
aBuf
virtual ~ScHTMLTable()
Definition: htmlpars.cxx:1869
ScSizeVec maCumSizes[2]
Working entry, not yet inserted in a list.
Definition: htmlpars.hxx:540
void InsertText(const HtmlImportInfo &rInfo)
Inserts a text portion into current entry.
Definition: htmlpars.cxx:2900
const_iterator find(const Value &x) const
void CloseTable(const HtmlImportInfo &rInfo)
Closes the current table, regardless on opening tag.
Definition: htmlpars.cxx:3006
ScAddress aEnd
Definition: address.hxx:500
SCCOLROW GetDocSize(ScHTMLOrient eOrient, SCCOLROW nCellPos) const
Returns the resulting document row/column count of the specified HTML row/column. ...
Definition: htmlpars.cxx:2173
void ProcToken(HtmlImportInfo *)
Definition: htmlpars.cxx:1454
constexpr TypedWhichId< SvxFontItem > ATTR_FONT(100)
SCROW mnRow
Definition: htmlpars.hxx:236
virtual ~ScHTMLGlobalTable() override
Definition: htmlpars.cxx:2744
ScHTMLEntryPtr mxCurrEntry
Current entry vector from map for faster access.
Definition: htmlpars.hxx:539
ScHTMLTableId & mrnUnusedId
The created unique table identifier.
Definition: htmlpars.hxx:309
SotClipboardFormatId & operator++(SotClipboardFormatId &eFormat)
SC_DLLPUBLIC void ApplyAttr(SCCOL nCol, SCROW nRow, SCTAB nTab, const SfxPoolItem &rAttr)
Definition: document.cxx:4788
ScDocument & GetDoc()
Definition: htmlpars.hxx:90
const SCROW SCROW_MAX
Definition: address.hxx:55
ScHTMLParser * mpParser
Resulting base address in a Calc document.
Definition: htmlpars.hxx:544
static void ParseStyle(std::u16string_view rStrm)
Definition: htmlpars.cxx:3096
void NextRow(const HtmlImportInfo *)
Definition: htmlpars.cxx:331
void BodyOff(const HtmlImportInfo &rInfo)
Closes the body of the HTML document ( tag).
Definition: htmlpars.cxx:2146
ScHTMLEntryVector * mpCurrEntryVector
List of entries for each cell.
Definition: htmlpars.hxx:538
const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL
Definition: htmlpars.hxx:36
virtual ~ScHTMLQueryParser() override
Definition: htmlpars.cxx:2768
bool mbDataOn
true = Inside of .
Definition: htmlpars.hxx:548
ScHTMLQueryParser(EditEngine *pEditEngine, ScDocument *pDoc)
Definition: htmlpars.cxx:2758
void FillEmptyCells()
Fills all empty cells in this and nested tables with dummy parse entries.
Definition: htmlpars.cxx:2570
const Value & front() const
void TableRowOn(const HtmlImportInfo *)
Definition: htmlpars.cxx:982
static OutputDevice * GetDefaultDevice()
WEIGHT_BOLD
constexpr tools::Long Width() const
const ScHTMLTableId SC_HTML_NO_TABLE
Used as table index for normal (non-table) entries in ScHTMLEntry structs.
Definition: htmlpars.hxx:230
SvParser< HtmlTokenId > * pParser
sal_uInt16 nTableWidth
Definition: htmlpars.hxx:168
static void MakeColNoRef(ScHTMLColOffset *, sal_uInt16 nOffset, sal_uInt16 nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol)
Definition: htmlpars.cxx:385
void MetaOn(const HtmlImportInfo &rInfo)
Processes the tag.
Definition: htmlpars.cxx:2949
std::map< ScHTMLPos, ScHTMLEntryVector > maEntryMap
List that owns the parse entries (from ScEEParser).
Definition: htmlpars.hxx:537
virtual const ScHTMLTable * GetGlobalTable() const override
Returns the "global table" which contains the entire HTML document.
Definition: htmlpars.cxx:295
sal_uInt16 nTable
Definition: htmlpars.hxx:164
sal_uInt16 sal_Unicode
sal_uInt16 ScHTMLTableId
Type for a unique identifier for each table.
Definition: htmlpars.hxx:226
ScHTMLTable & mrParentTable
Definition: htmlpars.cxx:1695
void TableRowOff(const HtmlImportInfo *)
Definition: htmlpars.cxx:989
ScRangeList maUsedCells
List of all vertically merged cells.
Definition: htmlpars.hxx:534
SvxCellVerJustify
void RecalcDocSize()
Recalculates the size of all columns/rows in the table, regarding nested tables.
Definition: htmlpars.cxx:2606
sal_uInt16 nTab
Definition: eeparser.hxx:66
void CaptionOff()
Processes the caption of the table ( tag).
Definition: htmlpars.cxx:1969
ScHTMLLayoutParser(EditEngine *, const OUString &rBaseURL, const Size &aPageSize, ScDocument *)
Definition: htmlpars.cxx:203
ScHTMLTable * CreateTable(const HtmlImportInfo &rInfo, bool bPreFormText)
Inserts a new table into the container.
Definition: htmlpars.cxx:1747
bool PutEntry(OUString &rString, sal_Int32 &nCheckPos, SvNumFormatType &nType, sal_uInt32 &nKey, LanguageType eLnge=LANGUAGE_DONTKNOW, bool bReplaceBooleanEquivalent=true)
sal_Int32 SCCOLROW
a type capable of holding either SCCOL or SCROW
Definition: types.hxx:23
ScHTMLGlobalTablePtr mxGlobTable
The title of the document.
Definition: htmlpars.hxx:620
SCROW nRowCnt
Definition: eeparser.hxx:110
constexpr sal_uInt32 NUMBERFORMAT_ENTRY_NOT_FOUND
o3tl::sorted_vector< sal_uLong > ScHTMLColOffset
Definition: htmlpars.hxx:96
bool bEntirePara
Definition: eeparser.hxx:73
int nCount
void Recalc()
Recalculates sizes and resulting positions of all document entries.
Definition: htmlpars.cxx:2748
const OUString & getPropertyValue(const OUString &rElem, const OUString &rClass, const OUString &rPropName) const
Find best-matching property value for given element and class names.
Definition: htmlpars.cxx:118
static OUString GetAbsURL(OUString const &rTheBaseURIRef, OUString const &rTheRelURIRef, EncodeMechanism eEncodeMechanism=EncodeMechanism::WasEncoded, DecodeMechanism eDecodeMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
ScHTMLTable * CloseTable(const HtmlImportInfo &rInfo)
Closes this table (tag) or preformatted text ( tag).
Definition: htmlpars.cxx:2157
void push_back(const ScRange &rRange)
Definition: rangelst.cxx:1142
::std::vector< ScHTMLEntry * > ScHTMLEntryVector
Definition: htmlpars.hxx:451
ScHTMLTable(ScHTMLTable &rParentTable, const HtmlImportInfo &rInfo, bool bPreFormText)
Creates a new HTML table without content.
Definition: htmlpars.cxx:1799
SCTAB Tab() const
Definition: address.hxx:270
#define OOO_STRING_SVTOOLS_HTML_VA_top
sal_uInt16 nWidth
Definition: eeparser.hxx:71
void SetRow(SCROW nRowP)
Definition: address.hxx:274
#define OOO_STRING_SVTOOLS_HTML_META_content_type
void RecalcDocPos(const ScHTMLPos &rBasePos)
Recalculates the position of all cell entries and nested tables.
Definition: htmlpars.cxx:2655
bool mbImportAlways
Definition: htmlpars.hxx:302
bool mbPushEmptyLine
true = Inside of or .
Definition: htmlpars.hxx:549
::std::stack< std::unique_ptr< ScHTMLTableStackEntry > > aTableStack
Definition: htmlpars.hxx:156
SCROW nRowOverlap
Definition: eeparser.hxx:69
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
sal_Int32 nEndPos
const ScRange * Find(const ScAddress &) const
Definition: rangelst.cxx:1031
size_type size() const
std::unique_ptr< Graphic > pGraphic
Definition: eeparser.hxx:44
void SetCol(SCCOL nColP)
Definition: address.hxx:278
ErrCode Read(SvStream &rInput, const OUString &rBaseURL, EETextFormat, SvKeyValueIterator *pHTTPHeaderAttrs=nullptr)
::std::map< OUString, std::unique_ptr< PropsType > > NamePropsType
Definition: htmlpars.hxx:50
void HeadingOn()
Inserts a heading line (.
Definition: htmlpars.cxx:1932
const Link< HtmlImportInfo &, void > & GetHtmlImportHdl() const
constexpr TypedWhichId< SvxUnderlineItem > ATTR_FONT_UNDERLINE(104)
sal_uInt16 nOffset
Definition: eeparser.hxx:70
void Image(HtmlImportInfo *)
Definition: htmlpars.cxx:1255
ScHTMLTableId GetTableId() const
Returns the unique identifier of the table.
Definition: htmlpars.hxx:344
sal_uInt16 nColOffset
Definition: htmlpars.hxx:169
void ImplRowOn()
Set internal states for a new table row.
Definition: htmlpars.cxx:2438
void ImplDataOff()
Set internal states for leaving a table cell.
Definition: htmlpars.cxx:2472
static ErrCode LoadGraphic(const OUString &rPath, const OUString &rFilter, Graphic &rGraphic, GraphicFilter *pFilter=nullptr, sal_uInt16 *pDeterminedFormat=nullptr)
ScHTMLTableMap(ScHTMLTable &rParentTable)
Current table, used for fast search.
Definition: htmlpars.cxx:1720
ScHTMLStyles & GetStyles()
Definition: htmlpars.hxx:89
SC_DLLPUBLIC SCCOL MaxCol() const
Definition: document.hxx:871
constexpr TypedWhichId< SvxPostureItem > ATTR_FONT_POSTURE(103)
T * get() const
SC_DLLPUBLIC SvNumberFormatter * GetFormatTable() const
Definition: documen2.cxx:440
sal_Int32 nEndPara
void ImplPushEntryToVector(ScHTMLEntryVector &rEntryVector, ScHTMLEntryPtr &rxEntry)
Pushes the passed entry into the list of the current cell.
Definition: htmlpars.cxx:2298
ScHTMLEntryPtr CreateEntry() const
Creates and returns a new empty flying entry at position (0,0).
Definition: htmlpars.cxx:2286
void add(const char *pElemName, size_t nElemName, const char *pClassName, size_t nClassName, const OUString &aProp, const OUString &aValue)
Definition: htmlpars.cxx:73
const char nVertical
Definition: eeparser.hxx:35
int i
void RowOff(const HtmlImportInfo &rInfo)
Closes the current row (tag).
Definition: htmlpars.cxx:1999
virtual ~ScHTMLLayoutParser() override
Definition: htmlpars.cxx:230
void PushTableEntry(ScHTMLTableId nTableId)
Pushes a new entry into current cell which references a nested table.
Definition: htmlpars.cxx:2358
void IncCol(SCCOL nDelta=1)
Definition: address.hxx:303
SfxItemSetPtr mxRowItemSet
Items for the entire table.
Definition: htmlpars.hxx:530
sal_uInt32 GetTextLen() const
ScHTMLTable * GetExistingTable(ScHTMLTableId nTableId) const
Tries to find a table from the table container.
Definition: htmlpars.cxx:2368
std::vector< std::shared_ptr< ScEEParseEntry > > maList
Definition: eeparser.hxx:105
sal_Int16 SCCOL
Definition: types.hxx:21
bool ValidCol(SCCOL nCol) const
Definition: document.hxx:874
OUStringBuffer maTitle
Definition: htmlpars.hxx:619
constexpr TypedWhichId< SvxVerJustifyItem > ATTR_VER_JUSTIFY(132)
ScHTMLPos maCurrCell
Size of the table.
Definition: htmlpars.hxx:542
bool mbRowOn
true = Table from preformatted text (
Definition: htmlpars.hxx:547
void DataOff(const HtmlImportInfo &rInfo)
Closes the current cell ( or tag).
Definition: htmlpars.cxx:2123
Point aSpace
Definition: eeparser.hxx:41
void PutItem(const SfxPoolItem &rItem)
Puts the item into the item set of the current entry.
Definition: htmlpars.cxx:1895
void SetCurrTable(ScHTMLTable *pTable) const
Sets a working table with its index for search optimization.
Definition: htmlpars.cxx:1716
ScHTMLTableId mnUnusedId
Pointer to current table (performance).
Definition: htmlpars.hxx:622
sal_uInt16 GetWidthPixel(const HTMLOption &)
Definition: htmlpars.cxx:1361
ColWidthsMap maColWidths
Definition: eeparser.hxx:107
void ProcessToken(const HtmlImportInfo &rInfo)
Handles all possible tags in the HTML document.
Definition: htmlpars.cxx:2815
sal_uInt16 nOffsetTolerance
Definition: htmlpars.hxx:171
ScHTMLColOffset * pLocalColOffset
Definition: htmlpars.hxx:161
ScHTMLTable * mpParentTable
Definition: htmlpars.hxx:523
size_t size() const
Definition: rangelst.hxx:89
#define OOO_STRING_SVTOOLS_HTML_AL_left
static bool IsSpaceCharInfo(const HtmlImportInfo &rInfo)
Returns true, if import info represents a space character.
Definition: htmlpars.cxx:2281
LINESTYLE_SINGLE
The HTML parser for data queries.
Definition: htmlpars.hxx:574
virtual bool ParseMetaOptions(const css::uno::Reference< css::document::XDocumentProperties > &, SvKeyValueIterator *)
Collection of HTML style data parsed from the content of