LibreOffice Module sc (master)  1
htmlpars.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <memory>
21 #include <sal/config.h>
22 
23 #include <comphelper/string.hxx>
24 
25 #include <scitems.hxx>
26 
27 #include <editeng/colritem.hxx>
28 #include <editeng/brushitem.hxx>
29 #include <editeng/editeng.hxx>
30 #include <editeng/fhgtitem.hxx>
31 #include <editeng/fontitem.hxx>
32 #include <editeng/postitem.hxx>
33 #include <editeng/udlnitem.hxx>
34 #include <editeng/wghtitem.hxx>
35 #include <editeng/borderline.hxx>
36 #include <editeng/boxitem.hxx>
37 #include <editeng/justifyitem.hxx>
38 #include <sal/log.hxx>
39 #include <sfx2/objsh.hxx>
40 #include <svl/numformat.hxx>
41 #include <svl/intitem.hxx>
42 #include <vcl/graphicfilter.hxx>
43 #include <svtools/parhtml.hxx>
44 #include <svtools/htmlkywd.hxx>
45 #include <svtools/htmltokn.h>
46 
47 #include <vcl/outdev.hxx>
48 #include <vcl/svapp.hxx>
49 #include <tools/urlobj.hxx>
50 #include <osl/diagnose.h>
51 #include <o3tl/string_view.hxx>
52 
53 #include <rtl/tencinfo.h>
54 
55 #include <attrib.hxx>
56 #include <htmlpars.hxx>
57 #include <global.hxx>
58 #include <document.hxx>
59 #include <rangelst.hxx>
60 
61 #include <orcus/css_parser.hpp>
62 
63 #include <com/sun/star/document/XDocumentProperties.hpp>
64 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
65 #include <com/sun/star/frame/XModel.hpp>
66 #include <numeric>
67 #include <utility>
68 #include <officecfg/Office/Common.hxx>
69 
70 using ::editeng::SvxBorderLine;
71 using namespace ::com::sun::star;
72 
74 
75 void ScHTMLStyles::add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
76  const OUString& aProp, const OUString& aValue)
77 {
78  if (pElemName)
79  {
80  OUString aElem(pElemName, nElemName, RTL_TEXTENCODING_UTF8);
81  aElem = aElem.toAsciiLowerCase();
82  if (pClassName)
83  {
84  // Both element and class names given.
85  ElemsType::iterator itrElem = m_ElemProps.find(aElem);
86  if (itrElem == m_ElemProps.end())
87  {
88  // new element
89  std::pair<ElemsType::iterator, bool> r =
90  m_ElemProps.insert(std::make_pair(aElem, NamePropsType()));
91  if (!r.second)
92  // insertion failed.
93  return;
94  itrElem = r.first;
95  }
96 
97  NamePropsType& rClsProps = itrElem->second;
98  OUString aClass(pClassName, nClassName, RTL_TEXTENCODING_UTF8);
99  aClass = aClass.toAsciiLowerCase();
100  insertProp(rClsProps, aClass, aProp, aValue);
101  }
102  else
103  {
104  // Element name only. Add it to the element global.
105  insertProp(m_ElemGlobalProps, aElem, aProp, aValue);
106  }
107  }
108  else
109  {
110  if (pClassName)
111  {
112  // Class name only. Add it to the global.
113  OUString aClass(pClassName, nClassName, RTL_TEXTENCODING_UTF8);
114  aClass = aClass.toAsciiLowerCase();
115  insertProp(m_GlobalProps, aClass, aProp, aValue);
116  }
117  }
118 }
119 
121  const OUString& rElem, const OUString& rClass, const OUString& rPropName) const
122 {
123  // First, look into the element-class storage.
124  {
125  auto const itr = m_ElemProps.find(rElem);
126  if (itr != m_ElemProps.end())
127  {
128  const NamePropsType& rClasses = itr->second;
129  NamePropsType::const_iterator itr2 = rClasses.find(rClass);
130  if (itr2 != rClasses.end())
131  {
132  const PropsType& rProps = itr2->second;
133  PropsType::const_iterator itr3 = rProps.find(rPropName);
134  if (itr3 != rProps.end())
135  return itr3->second;
136  }
137  }
138  }
139  // Next, look into the class global storage.
140  {
141  auto const itr = m_GlobalProps.find(rClass);
142  if (itr != m_GlobalProps.end())
143  {
144  const PropsType& rProps = itr->second;
145  PropsType::const_iterator itr2 = rProps.find(rPropName);
146  if (itr2 != rProps.end())
147  return itr2->second;
148  }
149  }
150  // As the last resort, look into the element global storage.
151  {
152  auto const itr = m_ElemGlobalProps.find(rClass);
153  if (itr != m_ElemGlobalProps.end())
154  {
155  const PropsType& rProps = itr->second;
156  PropsType::const_iterator itr2 = rProps.find(rPropName);
157  if (itr2 != rProps.end())
158  return itr2->second;
159  }
160  }
161 
162  return maEmpty; // nothing found.
163 }
164 
166  NamePropsType& rStore, const OUString& aName,
167  const OUString& aProp, const OUString& aValue)
168 {
169  NamePropsType::iterator itr = rStore.find(aName);
170  if (itr == rStore.end())
171  {
172  // new element
173  std::pair<NamePropsType::iterator, bool> r =
174  rStore.insert(std::make_pair(aName, PropsType()));
175  if (!r.second)
176  // insertion failed.
177  return;
178 
179  itr = r.first;
180  }
181 
182  PropsType& rProps = itr->second;
183  rProps.emplace(aProp, aValue);
184 }
185 
186 // BASE class for HTML parser classes
187 
189  ScEEParser( pEditEngine ),
190  mpDoc( pDoc )
191 {
192  maFontHeights[0] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_1::get() * 20;
193  maFontHeights[1] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_2::get() * 20;
194  maFontHeights[2] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_3::get() * 20;
195  maFontHeights[3] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_4::get() * 20;
196  maFontHeights[4] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_5::get() * 20;
197  maFontHeights[5] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_6::get() * 20;
198  maFontHeights[6] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_7::get() * 20;
199 }
200 
202 {
203 }
204 
206  EditEngine* pEditP, const OUString& rBaseURL, const Size& aPageSizeP,
207  ScDocument* pDocP ) :
208  ScHTMLParser( pEditP, pDocP ),
209  aPageSize( aPageSizeP ),
210  aBaseURL( rBaseURL ),
211  xLockedList( new ScRangeList ),
212  pLocalColOffset( new ScHTMLColOffset ),
213  nFirstTableCell(0),
214  nTableLevel(0),
215  nTable(0),
216  nMaxTable(0),
217  nColCntStart(0),
218  nMaxCol(0),
219  nTableWidth(0),
220  nColOffset(0),
221  nColOffsetStart(0),
222  nOffsetTolerance( SC_HTML_OFFSET_TOLERANCE_SMALL ),
223  bFirstRow( true ),
224  bTabInTabCell( false ),
225  bInCell( false ),
226  bInTitle( false )
227 {
228  MakeColNoRef( pLocalColOffset, 0, 0, 0, 0 );
229  MakeColNoRef( &maColOffset, 0, 0, 0, 0 );
230 }
231 
233 {
234  while ( !aTableStack.empty() )
235  {
236  ScHTMLTableStackEntry * pS = aTableStack.top().get();
237  if ( pS->pLocalColOffset != pLocalColOffset )
238  delete pS->pLocalColOffset;
239  aTableStack.pop();
240  }
241  delete pLocalColOffset;
242  if ( pTables )
243  {
244  for( const auto& rEntry : *pTables)
245  delete rEntry.second;
246  pTables.reset();
247  }
248 }
249 
250 ErrCode ScHTMLLayoutParser::Read( SvStream& rStream, const OUString& rBaseURL )
251 {
253  pEdit->SetHtmlImportHdl( LINK( this, ScHTMLLayoutParser, HTMLImportHdl ) );
254 
256  bool bLoading = pObjSh && pObjSh->IsLoading();
257 
258  SvKeyValueIteratorRef xValues;
259  SvKeyValueIterator* pAttributes = nullptr;
260  if ( bLoading )
261  pAttributes = pObjSh->GetHeaderAttributes();
262  else
263  {
264  // When not loading, set up fake http headers to force the SfxHTMLParser to use UTF8
265  // (used when pasting from clipboard)
266  const char* pCharSet = rtl_getBestMimeCharsetFromTextEncoding( RTL_TEXTENCODING_UTF8 );
267  if( pCharSet )
268  {
269  OUString aContentType = "text/html; charset=" +
270  OUString::createFromAscii( pCharSet );
271 
272  xValues = new SvKeyValueIterator;
273  xValues->Append( SvKeyValue( OOO_STRING_SVTOOLS_HTML_META_content_type, aContentType ) );
274  pAttributes = xValues.get();
275  }
276  }
277 
278  ErrCode nErr = pEdit->Read( rStream, rBaseURL, EETextFormat::Html, pAttributes );
279 
280  pEdit->SetHtmlImportHdl( aOldLink );
281  // Create column width
282  Adjust();
284  sal_uInt16 nCount = maColOffset.size();
285  sal_uLong nOff = maColOffset[0];
286  Size aSize;
287  for ( sal_uInt16 j = 1; j < nCount; j++ )
288  {
289  aSize.setWidth( maColOffset[j] - nOff );
290  aSize = pDefaultDev->PixelToLogic( aSize, MapMode( MapUnit::MapTwip ) );
291  maColWidths[ j-1 ] = aSize.Width();
292  nOff = maColOffset[j];
293  }
294  return nErr;
295 }
296 
298 {
299  return nullptr;
300 }
301 
303 {
305  if ( pE )
306  {
307  if ( !pE->aSel.HasRange() )
308  { // Completely empty, following text ends up in the same paragraph!
309  mxActEntry->aSel.nStartPara = pE->aSel.nEndPara;
310  mxActEntry->aSel.nStartPos = pE->aSel.nEndPos;
311  }
312  }
313  mxActEntry->aSel.nEndPara = mxActEntry->aSel.nStartPara;
314  mxActEntry->aSel.nEndPos = mxActEntry->aSel.nStartPos;
315 }
316 
318 {
319  if ( rSel.nEndPara >= pE->aSel.nStartPara )
320  {
321  pE->aSel.nEndPara = rSel.nEndPara;
322  pE->aSel.nEndPos = rSel.nEndPos;
323  }
324  else if ( rSel.nStartPara == pE->aSel.nStartPara - 1 && !pE->aSel.HasRange() )
325  { // Did not attach a paragraph, but empty, do nothing
326  }
327  else
328  {
329  OSL_FAIL( "EntryEnd: EditEngine ESelection End < Start" );
330  }
331 }
332 
334 {
335  if ( bInCell )
336  CloseEntry( pInfo );
337  if ( nRowMax < ++nRowCnt )
338  nRowMax = nRowCnt;
341  bFirstRow = false;
342 }
343 
344 bool ScHTMLLayoutParser::SeekOffset( const ScHTMLColOffset* pOffset, sal_uInt16 nOffset,
345  SCCOL* pCol, sal_uInt16 nOffsetTol )
346 {
347  OSL_ENSURE( pOffset, "ScHTMLLayoutParser::SeekOffset - illegal call" );
348  ScHTMLColOffset::const_iterator it = pOffset->find( nOffset );
349  bool bFound = it != pOffset->end();
350  sal_uInt16 nPos = it - pOffset->begin();
351  *pCol = static_cast<SCCOL>(nPos);
352  if ( bFound )
353  return true;
354  sal_uInt16 nCount = pOffset->size();
355  if ( !nCount )
356  return false;
357  // nPos is the position of insertion, that's where the next higher one is (or isn't)
358  if ( nPos < nCount && (((*pOffset)[nPos] - nOffsetTol) <= nOffset) )
359  return true;
360  // Not smaller than everything else? Then compare with the next lower one
361  else if ( nPos && (((*pOffset)[nPos-1] + nOffsetTol) >= nOffset) )
362  {
363  (*pCol)--;
364  return true;
365  }
366  return false;
367 }
368 
369 void ScHTMLLayoutParser::MakeCol( ScHTMLColOffset* pOffset, sal_uInt16& nOffset,
370  sal_uInt16& nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol )
371 {
372  OSL_ENSURE( pOffset, "ScHTMLLayoutParser::MakeCol - illegal call" );
373  SCCOL nPos;
374  if ( SeekOffset( pOffset, nOffset, &nPos, nOffsetTol ) )
375  nOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
376  else
377  pOffset->insert( nOffset );
378  if ( nWidth )
379  {
380  if ( SeekOffset( pOffset, nOffset + nWidth, &nPos, nWidthTol ) )
381  nWidth = static_cast<sal_uInt16>((*pOffset)[nPos]) - nOffset;
382  else
383  pOffset->insert( nOffset + nWidth );
384  }
385 }
386 
387 void ScHTMLLayoutParser::MakeColNoRef( ScHTMLColOffset* pOffset, sal_uInt16 nOffset,
388  sal_uInt16 nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol )
389 {
390  OSL_ENSURE( pOffset, "ScHTMLLayoutParser::MakeColNoRef - illegal call" );
391  SCCOL nPos;
392  if ( SeekOffset( pOffset, nOffset, &nPos, nOffsetTol ) )
393  nOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
394  else
395  pOffset->insert( nOffset );
396  if ( nWidth )
397  {
398  if ( !SeekOffset( pOffset, nOffset + nWidth, &nPos, nWidthTol ) )
399  pOffset->insert( nOffset + nWidth );
400  }
401 }
402 
403 void ScHTMLLayoutParser::ModifyOffset( ScHTMLColOffset* pOffset, sal_uInt16& nOldOffset,
404  sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol )
405 {
406  OSL_ENSURE( pOffset, "ScHTMLLayoutParser::ModifyOffset - illegal call" );
407  SCCOL nPos;
408  if ( !SeekOffset( pOffset, nOldOffset, &nPos, nOffsetTol ) )
409  {
410  if ( SeekOffset( pOffset, nNewOffset, &nPos, nOffsetTol ) )
411  nNewOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
412  else
413  pOffset->insert( nNewOffset );
414  return ;
415  }
416  nOldOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
417  SCCOL nPos2;
418  if ( SeekOffset( pOffset, nNewOffset, &nPos2, nOffsetTol ) )
419  {
420  nNewOffset = static_cast<sal_uInt16>((*pOffset)[nPos2]);
421  return ;
422  }
423  tools::Long nDiff = nNewOffset - nOldOffset;
424  if ( nDiff < 0 )
425  {
426  do
427  {
428  const_cast<sal_uLong&>((*pOffset)[nPos]) += nDiff;
429  } while ( nPos-- );
430  }
431  else
432  {
433  do
434  {
435  const_cast<sal_uLong&>((*pOffset)[nPos]) += nDiff;
436  } while ( ++nPos < static_cast<sal_uInt16>(pOffset->size()) );
437  }
438 }
439 
441 {
442  if ( !mpDoc->ValidCol(pE->nCol) )
443  return;
444 
445 // Or else this would create a wrong value at ScAddress (chance for an infinite loop)!
446  bool bBadCol = false;
447  bool bAgain;
448  ScRange aRange( pE->nCol, pE->nRow, 0,
449  pE->nCol + pE->nColOverlap - 1, pE->nRow + pE->nRowOverlap - 1, 0 );
450  do
451  {
452  bAgain = false;
453  for ( size_t i = 0, nRanges = xLockedList->size(); i < nRanges; ++i )
454  {
455  ScRange & rR = (*xLockedList)[i];
456  if ( rR.Intersects( aRange ) )
457  {
458  pE->nCol = rR.aEnd.Col() + 1;
459  SCCOL nTmp = pE->nCol + pE->nColOverlap - 1;
460  if ( pE->nCol > mpDoc->MaxCol() || nTmp > mpDoc->MaxCol() )
461  bBadCol = true;
462  else
463  {
464  bAgain = true;
465  aRange.aStart.SetCol( pE->nCol );
466  aRange.aEnd.SetCol( nTmp );
467  }
468  break;
469  }
470  }
471  } while ( bAgain );
472  if ( bJoin && !bBadCol )
473  xLockedList->Join( aRange );
474 }
475 
477 {
479 
480  std::stack< std::unique_ptr<ScHTMLAdjustStackEntry> > aStack;
481  sal_uInt16 nTab = 0;
482  SCCOL nLastCol = SCCOL_MAX;
483  SCROW nNextRow = 0;
484  SCROW nCurRow = 0;
485  sal_uInt16 nPageWidth = static_cast<sal_uInt16>(aPageSize.Width());
486  InnerMap* pTab = nullptr;
487  for (auto& pE : maList)
488  {
489  if ( pE->nTab < nTab )
490  { // Table finished
491  if ( !aStack.empty() )
492  {
493  std::unique_ptr<ScHTMLAdjustStackEntry> pS = std::move(aStack.top());
494  aStack.pop();
495 
496  nLastCol = pS->nLastCol;
497  nNextRow = pS->nNextRow;
498  nCurRow = pS->nCurRow;
499  }
500  nTab = pE->nTab;
501  if (pTables)
502  {
503  OuterMap::const_iterator it = pTables->find( nTab );
504  if ( it != pTables->end() )
505  pTab = it->second;
506  }
507 
508  }
509  SCROW nRow = pE->nRow;
510  if ( pE->nCol <= nLastCol )
511  { // Next row
512  if ( pE->nRow < nNextRow )
513  pE->nRow = nCurRow = nNextRow;
514  else
515  nCurRow = nNextRow = pE->nRow;
516  SCROW nR = 0;
517  if ( pTab )
518  {
519  InnerMap::const_iterator it = pTab->find( nCurRow );
520  if ( it != pTab->end() )
521  nR = it->second;
522  }
523  if ( nR )
524  nNextRow += nR;
525  else
526  nNextRow++;
527  }
528  else
529  pE->nRow = nCurRow;
530  nLastCol = pE->nCol; // Read column
531  if ( pE->nTab > nTab )
532  { // New table
533  aStack.push( std::make_unique<ScHTMLAdjustStackEntry>(
534  nLastCol, nNextRow, nCurRow ) );
535  nTab = pE->nTab;
536  if ( pTables )
537  {
538  OuterMap::const_iterator it = pTables->find( nTab );
539  if ( it != pTables->end() )
540  pTab = it->second;
541  }
542  // New line spacing
543  SCROW nR = 0;
544  if ( pTab )
545  {
546  InnerMap::const_iterator it = pTab->find( nCurRow );
547  if ( it != pTab->end() )
548  nR = it->second;
549  }
550  if ( nR )
551  nNextRow = nCurRow + nR;
552  else
553  nNextRow = nCurRow + 1;
554  }
555  if ( nTab == 0 )
556  pE->nWidth = nPageWidth;
557  else
558  { // Real table, no paragraphs on the field
559  if ( pTab )
560  {
561  SCROW nRowSpan = pE->nRowOverlap;
562  for ( SCROW j=0; j < nRowSpan; j++ )
563  { // RowSpan resulting from merged rows
564  SCROW nRows = 0;
565  InnerMap::const_iterator it = pTab->find( nRow+j );
566  if ( it != pTab->end() )
567  nRows = it->second;
568  if ( nRows > 1 )
569  {
570  pE->nRowOverlap += nRows - 1;
571  if ( j == 0 )
572  { // Merged rows move the next row
573  SCROW nTmp = nCurRow + nRows;
574  if ( nNextRow < nTmp )
575  nNextRow = nTmp;
576  }
577  }
578  }
579  }
580  }
581  // Real column
582  (void)SeekOffset( &maColOffset, pE->nOffset, &pE->nCol, nOffsetTolerance );
583  SCCOL nColBeforeSkip = pE->nCol;
584  SkipLocked(pE.get(), false);
585  if ( pE->nCol != nColBeforeSkip )
586  {
587  SCCOL nCount = static_cast<SCCOL>(maColOffset.size());
588  if ( nCount <= pE->nCol )
589  {
590  pE->nOffset = static_cast<sal_uInt16>(maColOffset[nCount-1]);
591  MakeCol( &maColOffset, pE->nOffset, pE->nWidth, nOffsetTolerance, nOffsetTolerance );
592  }
593  else
594  {
595  pE->nOffset = static_cast<sal_uInt16>(maColOffset[pE->nCol]);
596  }
597  }
598  SCCOL nPos;
599  if ( pE->nWidth && SeekOffset( &maColOffset, pE->nOffset + pE->nWidth, &nPos, nOffsetTolerance ) )
600  pE->nColOverlap = (nPos > pE->nCol ? nPos - pE->nCol : 1);
601  else
602  {
603  //FIXME: This may not be correct, but works anyway ...
604  pE->nColOverlap = 1;
605  }
606  xLockedList->Join( ScRange( pE->nCol, pE->nRow, 0,
607  pE->nCol + pE->nColOverlap - 1, pE->nRow + pE->nRowOverlap - 1, 0 ) );
608  // Take over MaxDimensions
609  SCCOL nColTmp = pE->nCol + pE->nColOverlap;
610  if ( nColMax < nColTmp )
611  nColMax = nColTmp;
612  SCROW nRowTmp = pE->nRow + pE->nRowOverlap;
613  if ( nRowMax < nRowTmp )
614  nRowMax = nRowTmp;
615  }
616 }
617 
619 {
620  if ( pE->nWidth )
621  return pE->nWidth;
622  sal_Int32 nTmp = std::min( static_cast<sal_Int32>( pE->nCol -
623  nColCntStart + pE->nColOverlap),
624  static_cast<sal_Int32>( pLocalColOffset->size() - 1));
625  SCCOL nPos = (nTmp < 0 ? 0 : static_cast<SCCOL>(nTmp));
626  sal_uInt16 nOff2 = static_cast<sal_uInt16>((*pLocalColOffset)[nPos]);
627  if ( pE->nOffset < nOff2 )
628  return nOff2 - pE->nOffset;
629  return 0;
630 }
631 
633 {
634  SCCOL nCol;
635  if ( !nTableWidth )
636  nTableWidth = static_cast<sal_uInt16>(aPageSize.Width());
637  SCCOL nColsPerRow = nMaxCol - nColCntStart;
638  if ( nColsPerRow <= 0 )
639  nColsPerRow = 1;
640  if ( pLocalColOffset->size() <= 2 )
641  { // Only PageSize, there was no width setting
642  sal_uInt16 nWidth = nTableWidth / static_cast<sal_uInt16>(nColsPerRow);
643  sal_uInt16 nOff = nColOffsetStart;
645  for ( nCol = 0; nCol <= nColsPerRow; ++nCol, nOff = nOff + nWidth )
646  {
647  MakeColNoRef( pLocalColOffset, nOff, 0, 0, 0 );
648  }
649  nTableWidth = static_cast<sal_uInt16>(pLocalColOffset->back() - pLocalColOffset->front());
650  for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
651  {
652  auto& pE = maList[ i ];
653  if ( pE->nTab == nTable )
654  {
655  pE->nOffset = static_cast<sal_uInt16>((*pLocalColOffset)[pE->nCol - nColCntStart]);
656  pE->nWidth = 0; // to be recalculated later
657  }
658  }
659  }
660  else
661  { // Some without width
662  // Why actually no pE?
663  if ( nFirstTableCell < maList.size() )
664  {
665  std::unique_ptr<sal_uInt16[]> pOffsets(new sal_uInt16[ nColsPerRow+1 ]);
666  memset( pOffsets.get(), 0, (nColsPerRow+1) * sizeof(sal_uInt16) );
667  std::unique_ptr<sal_uInt16[]> pWidths(new sal_uInt16[ nColsPerRow ]);
668  memset( pWidths.get(), 0, nColsPerRow * sizeof(sal_uInt16) );
669  pOffsets[0] = nColOffsetStart;
670  for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
671  {
672  auto& pE = maList[ i ];
673  if ( pE->nTab == nTable && pE->nWidth )
674  {
675  nCol = pE->nCol - nColCntStart;
676  if ( nCol < nColsPerRow )
677  {
678  if ( pE->nColOverlap == 1 )
679  {
680  if ( pWidths[nCol] < pE->nWidth )
681  pWidths[nCol] = pE->nWidth;
682  }
683  else
684  { // try to find a single undefined width
685  sal_uInt16 nTotal = 0;
686  bool bFound = false;
687  SCCOL nHere = 0;
688  SCCOL nStop = std::min( static_cast<SCCOL>(nCol + pE->nColOverlap), nColsPerRow );
689  for ( ; nCol < nStop; nCol++ )
690  {
691  if ( pWidths[nCol] )
692  nTotal = nTotal + pWidths[nCol];
693  else
694  {
695  if ( bFound )
696  {
697  bFound = false;
698  break; // for
699  }
700  bFound = true;
701  nHere = nCol;
702  }
703  }
704  if ( bFound && pE->nWidth > nTotal )
705  pWidths[nHere] = pE->nWidth - nTotal;
706  }
707  }
708  }
709  }
710  sal_uInt16 nWidths = 0;
711  sal_uInt16 nUnknown = 0;
712  for ( nCol = 0; nCol < nColsPerRow; nCol++ )
713  {
714  if ( pWidths[nCol] )
715  nWidths = nWidths + pWidths[nCol];
716  else
717  nUnknown++;
718  }
719  if ( nUnknown )
720  {
721  sal_uInt16 nW = ((nWidths < nTableWidth) ?
722  ((nTableWidth - nWidths) / nUnknown) :
723  (nTableWidth / nUnknown));
724  for ( nCol = 0; nCol < nColsPerRow; nCol++ )
725  {
726  if ( !pWidths[nCol] )
727  pWidths[nCol] = nW;
728  }
729  }
730  for ( nCol = 1; nCol <= nColsPerRow; nCol++ )
731  {
732  pOffsets[nCol] = pOffsets[nCol-1] + pWidths[nCol-1];
733  }
735  for ( nCol = 0; nCol <= nColsPerRow; nCol++ )
736  {
737  MakeColNoRef( pLocalColOffset, pOffsets[nCol], 0, 0, 0 );
738  }
739  nTableWidth = pOffsets[nColsPerRow] - pOffsets[0];
740 
741  for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
742  {
743  auto& pE = maList[ i ];
744  if ( pE->nTab == nTable )
745  {
746  nCol = pE->nCol - nColCntStart;
747  OSL_ENSURE( nCol < nColsPerRow, "ScHTMLLayoutParser::SetWidths: column overflow" );
748  if ( nCol < nColsPerRow )
749  {
750  pE->nOffset = pOffsets[nCol];
751  nCol = nCol + pE->nColOverlap;
752  if ( nCol > nColsPerRow )
753  nCol = nColsPerRow;
754  pE->nWidth = pOffsets[nCol] - pE->nOffset;
755  }
756  }
757  }
758  }
759  }
760  if ( !pLocalColOffset->empty() )
761  {
762  sal_uInt16 nMax = static_cast<sal_uInt16>(pLocalColOffset->back());
763  if ( aPageSize.Width() < nMax )
764  aPageSize.setWidth( nMax );
765  if (nTableLevel == 0)
766  {
767  // Local table is very outer table, create missing offsets.
768  for (auto it = pLocalColOffset->begin(); it != pLocalColOffset->end(); ++it)
769  {
770  // Only exact offsets, do not use MakeColNoRef().
771  if (maColOffset.find(*it) == maColOffset.end())
772  maColOffset.insert(*it);
773  }
774  }
775  }
776  for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
777  {
778  auto& pE = maList[ i ];
779  if ( pE->nTab == nTable )
780  {
781  if ( !pE->nWidth )
782  {
783  pE->nWidth = GetWidth(pE.get());
784  OSL_ENSURE( pE->nWidth, "SetWidths: pE->nWidth == 0" );
785  }
786  MakeCol( &maColOffset, pE->nOffset, pE->nWidth, nOffsetTolerance, nOffsetTolerance );
787  }
788  }
789 }
790 
792 {
793  if ( pE->nCol == SCCOL_MAX )
794  pE->nCol = nColCnt;
795  if ( pE->nRow == SCROW_MAX )
796  pE->nRow = nRowCnt;
797  SCCOL nCol = pE->nCol;
798  SkipLocked( pE ); // Change of columns to the right
799 
800  if ( nCol < pE->nCol )
801  { // Replaced
802  nCol = pE->nCol - nColCntStart;
803  SCCOL nCount = static_cast<SCCOL>(pLocalColOffset->size());
804  if ( nCol < nCount )
805  nColOffset = static_cast<sal_uInt16>((*pLocalColOffset)[nCol]);
806  else
807  nColOffset = static_cast<sal_uInt16>((*pLocalColOffset)[nCount - 1]);
808  }
809  pE->nOffset = nColOffset;
810  sal_uInt16 nWidth = GetWidth( pE );
812  if ( pE->nWidth )
813  pE->nWidth = nWidth;
814  nColOffset = pE->nOffset + nWidth;
817 }
818 
820 {
821  bInCell = false;
822  if ( bTabInTabCell )
823  { // From the stack in TableOff
824  bTabInTabCell = false;
825  NewActEntry(maList.back().get()); // New free flying mxActEntry
826  return ;
827  }
828  if (mxActEntry->nTab == 0)
829  mxActEntry->nWidth = static_cast<sal_uInt16>(aPageSize.Width());
830  Colonize(mxActEntry.get());
831  nColCnt = mxActEntry->nCol + mxActEntry->nColOverlap;
832  if ( nMaxCol < nColCnt )
833  nMaxCol = nColCnt; // TableStack MaxCol
834  if ( nColMax < nColCnt )
835  nColMax = nColCnt; // Global MaxCol for ScEEParser GetDimensions!
836  EntryEnd(mxActEntry.get(), pInfo->aSelection);
837  ESelection& rSel = mxActEntry->aSel;
838  while ( rSel.nStartPara < rSel.nEndPara
839  && pEdit->GetTextLen( rSel.nStartPara ) == 0 )
840  { // Strip preceding empty paragraphs
841  rSel.nStartPara++;
842  }
843  while ( rSel.nEndPos == 0 && rSel.nEndPara > rSel.nStartPara )
844  { // Strip successive empty paragraphs
845  rSel.nEndPara--;
846  rSel.nEndPos = pEdit->GetTextLen( rSel.nEndPara );
847  }
848  if ( rSel.nStartPara > rSel.nEndPara )
849  { // Gives GPF in CreateTextObject
850  OSL_FAIL( "CloseEntry: EditEngine ESelection Start > End" );
851  rSel.nEndPara = rSel.nStartPara;
852  }
853  if ( rSel.HasRange() )
854  mxActEntry->aItemSet.Put( ScLineBreakCell(true) );
855  maList.push_back(mxActEntry);
856  NewActEntry(mxActEntry.get()); // New free flying mxActEntry
857 }
858 
859 IMPL_LINK( ScHTMLLayoutParser, HTMLImportHdl, HtmlImportInfo&, rInfo, void )
860 {
861  switch ( rInfo.eState )
862  {
863  case HtmlImportState::NextToken:
864  ProcToken( &rInfo );
865  break;
866  case HtmlImportState::Start:
867  break;
868  case HtmlImportState::End:
869  if ( rInfo.aSelection.nEndPos )
870  {
871  // If text remains: create paragraph, without calling CloseEntry().
872  if( bInCell ) // ...but only in opened table cells.
873  {
874  bInCell = false;
875  NextRow( &rInfo );
876  bInCell = true;
877  }
878  CloseEntry( &rInfo );
879  }
880  while ( nTableLevel > 0 )
881  TableOff( &rInfo ); // close tables, if </TABLE> missing
882  break;
883  case HtmlImportState::SetAttr:
884  break;
885  case HtmlImportState::InsertText:
886  break;
887  case HtmlImportState::InsertPara:
888  if ( nTableLevel < 1 )
889  {
890  CloseEntry( &rInfo );
891  NextRow( &rInfo );
892  }
893  break;
894  case HtmlImportState::InsertField:
895  break;
896  default:
897  OSL_FAIL("HTMLImportHdl: unknown ImportInfo.eState");
898  }
899 }
900 
902 {
903  if ( bInCell )
904  CloseEntry( pInfo );
905  if ( !nTableLevel )
906  {
907  OSL_FAIL( "dumbo doc! <TH> or <TD> without previous <TABLE>" );
908  TableOn( pInfo );
909  }
910  bInCell = true;
911  bool bHorJustifyCenterTH = (pInfo->nToken == HtmlTokenId::TABLEHEADER_ON);
912  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
913  for (const auto & rOption : rOptions)
914  {
915  switch( rOption.GetToken() )
916  {
917  case HtmlOptionId::COLSPAN:
918  {
919  mxActEntry->nColOverlap = static_cast<SCCOL>(rOption.GetString().toInt32());
920  }
921  break;
922  case HtmlOptionId::ROWSPAN:
923  {
924  mxActEntry->nRowOverlap = static_cast<SCROW>(rOption.GetString().toInt32());
925  }
926  break;
927  case HtmlOptionId::ALIGN:
928  {
929  bHorJustifyCenterTH = false;
930  SvxCellHorJustify eVal;
931  const OUString& rOptVal = rOption.GetString();
932  if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
933  eVal = SvxCellHorJustify::Right;
934  else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_center ) )
935  eVal = SvxCellHorJustify::Center;
936  else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
937  eVal = SvxCellHorJustify::Left;
938  else
939  eVal = SvxCellHorJustify::Standard;
940  if ( eVal != SvxCellHorJustify::Standard )
941  mxActEntry->aItemSet.Put(SvxHorJustifyItem(eVal, ATTR_HOR_JUSTIFY));
942  }
943  break;
944  case HtmlOptionId::VALIGN:
945  {
946  SvxCellVerJustify eVal;
947  const OUString& rOptVal = rOption.GetString();
948  if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_top ) )
949  eVal = SvxCellVerJustify::Top;
950  else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_middle ) )
951  eVal = SvxCellVerJustify::Center;
952  else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_bottom ) )
953  eVal = SvxCellVerJustify::Bottom;
954  else
955  eVal = SvxCellVerJustify::Standard;
956  mxActEntry->aItemSet.Put(SvxVerJustifyItem(eVal, ATTR_VER_JUSTIFY));
957  }
958  break;
959  case HtmlOptionId::WIDTH:
960  {
961  mxActEntry->nWidth = GetWidthPixel(rOption);
962  }
963  break;
964  case HtmlOptionId::BGCOLOR:
965  {
966  Color aColor;
967  rOption.GetColor( aColor );
968  mxActEntry->aItemSet.Put(SvxBrushItem(aColor, ATTR_BACKGROUND));
969  }
970  break;
971  case HtmlOptionId::SDVAL:
972  {
973  mxActEntry->pValStr = rOption.GetString();
974  }
975  break;
976  case HtmlOptionId::SDNUM:
977  {
978  mxActEntry->pNumStr = rOption.GetString();
979  }
980  break;
981  default: break;
982  }
983  }
984 
985  mxActEntry->nCol = nColCnt;
986  mxActEntry->nRow = nRowCnt;
987  mxActEntry->nTab = nTable;
988 
989  if ( bHorJustifyCenterTH )
990  mxActEntry->aItemSet.Put(
991  SvxHorJustifyItem( SvxCellHorJustify::Center, ATTR_HOR_JUSTIFY) );
992 }
993 
995 {
996  if ( nColCnt > nColCntStart )
997  NextRow( pInfo ); // The optional TableRowOff wasn't there
999 }
1000 
1002 {
1003  NextRow( pInfo );
1004 }
1005 
1007 {
1008  if ( bInCell )
1009  CloseEntry( pInfo ); // Only if it really was one
1010 }
1011 
1013 {
1014  if ( ++nTableLevel > 1 )
1015  { // Table in Table
1016  sal_uInt16 nTmpColOffset = nColOffset; // Will be changed in Colonize()
1017  Colonize(mxActEntry.get());
1018  aTableStack.push( std::make_unique<ScHTMLTableStackEntry>(
1022  bFirstRow ) );
1023  sal_uInt16 nLastWidth = nTableWidth;
1024  nTableWidth = GetWidth(mxActEntry.get());
1025  if ( nTableWidth == nLastWidth && nMaxCol - nColCntStart > 1 )
1026  { // There must be more than one, so this one cannot be enough
1027  nTableWidth = nLastWidth / static_cast<sal_uInt16>((nMaxCol - nColCntStart));
1028  }
1029  nLastWidth = nTableWidth;
1030  if ( pInfo->nToken == HtmlTokenId::TABLE_ON )
1031  { // It can still be TD or TH, if we didn't have a TABLE earlier
1032  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1033  for (const auto & rOption : rOptions)
1034  {
1035  switch( rOption.GetToken() )
1036  {
1037  case HtmlOptionId::WIDTH:
1038  { // Percent: of document width or outer cell
1039  nTableWidth = GetWidthPixel( rOption );
1040  }
1041  break;
1042  case HtmlOptionId::BORDER:
1043  // Border is: ((pOption->GetString().Len() == 0) || (pOption->GetNumber() != 0));
1044  break;
1045  default: break;
1046  }
1047  }
1048  }
1049  bInCell = false;
1050  if ( bTabInTabCell && (nTableWidth >= nLastWidth) )
1051  { // Multiple tables in one cell, underneath each other
1052  bTabInTabCell = false;
1053  NextRow( pInfo );
1054  }
1055  else
1056  { // It start's in this cell or next to each other
1057  bTabInTabCell = false;
1059  nColOffset = nTmpColOffset;
1061  }
1062 
1063  NewActEntry(!maList.empty() ? maList.back().get() : nullptr); // New free flying mxActEntry
1064  xLockedList = new ScRangeList;
1065  }
1066  else
1067  { // Simple table at the document level
1068  EntryEnd(mxActEntry.get(), pInfo->aSelection);
1069  if (mxActEntry->aSel.HasRange())
1070  { // Flying text left
1071  CloseEntry( pInfo );
1072  NextRow( pInfo );
1073  }
1074  aTableStack.push( std::make_unique<ScHTMLTableStackEntry>(
1078  bFirstRow ) );
1079  // As soon as we have multiple tables we need to be tolerant with the offsets.
1080  if (nMaxTable > 0)
1082  nTableWidth = 0;
1083  if ( pInfo->nToken == HtmlTokenId::TABLE_ON )
1084  {
1085  // It can still be TD or TH, if we didn't have a TABLE earlier
1086  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1087  for (const auto & rOption : rOptions)
1088  {
1089  switch( rOption.GetToken() )
1090  {
1091  case HtmlOptionId::WIDTH:
1092  { // Percent: of document width or outer cell
1093  nTableWidth = GetWidthPixel( rOption );
1094  }
1095  break;
1096  case HtmlOptionId::BORDER:
1097  //BorderOn is: ((pOption->GetString().Len() == 0) || (pOption->GetNumber() != 0));
1098  break;
1099  default: break;
1100  }
1101  }
1102  }
1103  }
1104  nTable = ++nMaxTable;
1105  bFirstRow = true;
1106  nFirstTableCell = maList.size();
1107 
1110 }
1111 
1113 {
1114  if ( bInCell )
1115  CloseEntry( pInfo );
1116  if ( nColCnt > nColCntStart )
1117  TableRowOff( pInfo ); // The optional TableRowOff wasn't
1118  if ( !nTableLevel )
1119  {
1120  OSL_FAIL( "dumbo doc! </TABLE> without opening <TABLE>" );
1121  return ;
1122  }
1123  if ( --nTableLevel > 0 )
1124  { // Table in Table done
1125  if ( !aTableStack.empty() )
1126  {
1127  std::unique_ptr<ScHTMLTableStackEntry> pS = std::move(aTableStack.top());
1128  aTableStack.pop();
1129 
1130  auto& pE = pS->xCellEntry;
1131  SCROW nRows = nRowCnt - pS->nRowCnt;
1132  if ( nRows > 1 )
1133  { // Insert size of table at this position
1134  SCROW nRow = pS->nRowCnt;
1135  sal_uInt16 nTab = pS->nTable;
1136  if ( !pTables )
1137  pTables.reset( new OuterMap );
1138  // Height of outer table
1139  OuterMap::const_iterator it = pTables->find( nTab );
1140  InnerMap* pTab1;
1141  if ( it == pTables->end() )
1142  {
1143  pTab1 = new InnerMap;
1144  (*pTables)[ nTab ] = pTab1;
1145  }
1146  else
1147  pTab1 = it->second;
1148  SCROW nRowSpan = pE->nRowOverlap;
1149  SCROW nRowKGV;
1150  SCROW nRowsPerRow1; // Outer table
1151  SCROW nRowsPerRow2; // Inner table
1152  if ( nRowSpan > 1 )
1153  { // LCM to which we can map the inner and outer rows
1154  nRowKGV = std::lcm( nRowSpan, nRows );
1155  nRowsPerRow1 = nRowKGV / nRowSpan;
1156  nRowsPerRow2 = nRowKGV / nRows;
1157  }
1158  else
1159  {
1160  nRowKGV = nRowsPerRow1 = nRows;
1161  nRowsPerRow2 = 1;
1162  }
1163  InnerMap* pTab2 = nullptr;
1164  if ( nRowsPerRow2 > 1 )
1165  { // Height of the inner table
1166  pTab2 = new InnerMap;
1167  (*pTables)[ nTable ] = pTab2;
1168  }
1169  // Abuse void* Data entry of the Table class for height mapping
1170  if ( nRowKGV > 1 )
1171  {
1172  if ( nRowsPerRow1 > 1 )
1173  { // Outer
1174  for ( SCROW j=0; j < nRowSpan; j++ )
1175  {
1176  sal_uLong nRowKey = nRow + j;
1177  SCROW nR = (*pTab1)[ nRowKey ];
1178  if ( !nR )
1179  (*pTab1)[ nRowKey ] = nRowsPerRow1;
1180  else if ( nRowsPerRow1 > nR )
1181  (*pTab1)[ nRowKey ] = nRowsPerRow1;
1182  //TODO: How can we improve on this?
1183  else if ( nRowsPerRow1 < nR && nRowSpan == 1
1184  && nTable == nMaxTable )
1185  { // Still some space left, merge in a better way (if possible)
1186  SCROW nAdd = nRowsPerRow1 - (nR % nRowsPerRow1);
1187  nR += nAdd;
1188  if ( (nR % nRows) == 0 )
1189  { // Only if representable
1190  SCROW nR2 = (*pTab1)[ nRowKey+1 ];
1191  if ( nR2 > nAdd )
1192  { // Only if we really have enough space
1193  (*pTab1)[ nRowKey ] = nR;
1194  (*pTab1)[ nRowKey+1 ] = nR2 - nAdd;
1195  nRowsPerRow2 = nR / nRows;
1196  }
1197  }
1198  }
1199  }
1200  }
1201  if ( nRowsPerRow2 > 1 )
1202  { // Inner
1203  if ( !pTab2 )
1204  { // nRowsPerRow2 could be've been incremented
1205  pTab2 = new InnerMap;
1206  (*pTables)[ nTable ] = pTab2;
1207  }
1208  for ( SCROW j=0; j < nRows; j++ )
1209  {
1210  sal_uLong nRowKey = nRow + j;
1211  (*pTab2)[ nRowKey ] = nRowsPerRow2;
1212  }
1213  }
1214  }
1215  }
1216 
1217  SetWidths();
1218 
1219  if ( !pE->nWidth )
1220  pE->nWidth = nTableWidth;
1221  else if ( pE->nWidth < nTableWidth )
1222  {
1223  sal_uInt16 nOldOffset = pE->nOffset + pE->nWidth;
1224  sal_uInt16 nNewOffset = pE->nOffset + nTableWidth;
1225  ModifyOffset( pS->pLocalColOffset, nOldOffset, nNewOffset, nOffsetTolerance );
1226  sal_uInt16 nTmp = nNewOffset - pE->nOffset - pE->nWidth;
1227  pE->nWidth = nNewOffset - pE->nOffset;
1228  pS->nTableWidth = pS->nTableWidth + nTmp;
1229  if ( pS->nColOffset >= nOldOffset )
1230  pS->nColOffset = pS->nColOffset + nTmp;
1231  }
1232 
1233  nColCnt = pE->nCol + pE->nColOverlap;
1234  nRowCnt = pS->nRowCnt;
1235  nColCntStart = pS->nColCntStart;
1236  nMaxCol = pS->nMaxCol;
1237  nTable = pS->nTable;
1238  nTableWidth = pS->nTableWidth;
1239  nFirstTableCell = pS->nFirstTableCell;
1240  nColOffset = pS->nColOffset;
1241  nColOffsetStart = pS->nColOffsetStart;
1242  bFirstRow = pS->bFirstRow;
1243  xLockedList = pS->xLockedList;
1244  pLocalColOffset = pS->pLocalColOffset;
1245  // mxActEntry is kept around if a table is started in the same row
1246  // (anything's possible in HTML); will be deleted by CloseEntry
1247  mxActEntry = pE;
1248  }
1249  bTabInTabCell = true;
1250  bInCell = true;
1251  }
1252  else
1253  { // Simple table finished
1254  SetWidths();
1255  nMaxCol = 0;
1256  nTable = 0;
1257  if ( !aTableStack.empty() )
1258  {
1259  ScHTMLTableStackEntry* pS = aTableStack.top().get();
1260  delete pLocalColOffset;
1262  aTableStack.pop();
1263  }
1264  }
1265 }
1266 
1268 {
1269  mxActEntry->maImageList.push_back(std::make_unique<ScHTMLImage>());
1270  ScHTMLImage* pImage = mxActEntry->maImageList.back().get();
1271  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1272  for (const auto & rOption : rOptions)
1273  {
1274  switch( rOption.GetToken() )
1275  {
1276  case HtmlOptionId::SRC:
1277  {
1278  pImage->aURL = INetURLObject::GetAbsURL( aBaseURL, rOption.GetString() );
1279  }
1280  break;
1281  case HtmlOptionId::ALT:
1282  {
1283  if (!mxActEntry->bHasGraphic)
1284  { // ALT text only if not any image loaded
1285  if (!mxActEntry->aAltText.isEmpty())
1286  mxActEntry->aAltText += "; ";
1287 
1288  mxActEntry->aAltText += rOption.GetString();
1289  }
1290  }
1291  break;
1292  case HtmlOptionId::WIDTH:
1293  {
1294  pImage->aSize.setWidth( static_cast<tools::Long>(rOption.GetNumber()) );
1295  }
1296  break;
1297  case HtmlOptionId::HEIGHT:
1298  {
1299  pImage->aSize.setHeight( static_cast<tools::Long>(rOption.GetNumber()) );
1300  }
1301  break;
1302  case HtmlOptionId::HSPACE:
1303  {
1304  pImage->aSpace.setX( static_cast<tools::Long>(rOption.GetNumber()) );
1305  }
1306  break;
1307  case HtmlOptionId::VSPACE:
1308  {
1309  pImage->aSpace.setY( static_cast<tools::Long>(rOption.GetNumber()) );
1310  }
1311  break;
1312  default: break;
1313  }
1314  }
1315  if (pImage->aURL.isEmpty())
1316  {
1317  OSL_FAIL( "Image: graphic without URL ?!?" );
1318  return ;
1319  }
1320 
1321  sal_uInt16 nFormat;
1322  std::unique_ptr<Graphic> pGraphic(new Graphic);
1324  if ( ERRCODE_NONE != GraphicFilter::LoadGraphic( pImage->aURL, pImage->aFilterName,
1325  *pGraphic, &rFilter, &nFormat ) )
1326  {
1327  return ; // Bad luck
1328  }
1329  if (!mxActEntry->bHasGraphic)
1330  { // discard any ALT text in this cell if we have any image
1331  mxActEntry->bHasGraphic = true;
1332  mxActEntry->aAltText.clear();
1333  }
1334  pImage->aFilterName = rFilter.GetImportFormatName( nFormat );
1335  pImage->pGraphic = std::move( pGraphic );
1336  if ( !(pImage->aSize.Width() && pImage->aSize.Height()) )
1337  {
1339  pImage->aSize = pDefaultDev->LogicToPixel( pImage->pGraphic->GetPrefSize(),
1340  pImage->pGraphic->GetPrefMapMode() );
1341  }
1342  if (mxActEntry->maImageList.empty())
1343  return;
1344 
1345  tools::Long nWidth = 0;
1346  for (const std::unique_ptr<ScHTMLImage> & pI : mxActEntry->maImageList)
1347  {
1348  if ( pI->nDir & nHorizontal )
1349  nWidth += pI->aSize.Width() + 2 * pI->aSpace.X();
1350  else
1351  nWidth = 0;
1352  }
1353  if ( mxActEntry->nWidth
1354  && (nWidth + pImage->aSize.Width() + 2 * pImage->aSpace.X()
1355  >= mxActEntry->nWidth) )
1356  mxActEntry->maImageList.back()->nDir = nVertical;
1357 }
1358 
1360 {
1361  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1362  for (const auto & rOption : rOptions)
1363  {
1364  if( rOption.GetToken() == HtmlOptionId::WIDTH )
1365  {
1366  sal_uInt16 nVal = GetWidthPixel( rOption );
1367  MakeCol( pLocalColOffset, nColOffset, nVal, 0, 0 );
1368  nColOffset = nColOffset + nVal;
1369  }
1370  }
1371 }
1372 
1373 sal_uInt16 ScHTMLLayoutParser::GetWidthPixel( const HTMLOption& rOption )
1374 {
1375  const OUString& rOptVal = rOption.GetString();
1376  if ( rOptVal.indexOf('%') != -1 )
1377  { // Percent
1378  sal_uInt16 nW = (nTableWidth ? nTableWidth : static_cast<sal_uInt16>(aPageSize.Width()));
1379  return static_cast<sal_uInt16>((rOption.GetNumber() * nW) / 100);
1380  }
1381  else
1382  {
1383  if ( rOptVal.indexOf('*') != -1 )
1384  { // Relative to what?
1385  // TODO: Collect all relative values in ColArray and then MakeCol
1386  return 0;
1387  }
1388  else
1389  return static_cast<sal_uInt16>(rOption.GetNumber()); // Pixel
1390  }
1391 }
1392 
1394 {
1395  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1396  for (const auto & rOption : rOptions)
1397  {
1398  if( rOption.GetToken() == HtmlOptionId::NAME )
1399  mxActEntry->pName = rOption.GetString();
1400  }
1401 }
1402 
1404 {
1405  ESelection& rSel = mxActEntry->aSel;
1406  return rSel.nStartPara == rSel.nEndPara &&
1407  rSel.nStartPara <= pInfo->aSelection.nEndPara &&
1408  pEdit->GetTextLen( rSel.nStartPara ) == 0;
1409 }
1410 
1412 {
1413  if ( !IsAtBeginningOfText( pInfo ) )
1414  return;
1415 
1416 // Only at the start of the text; applies to whole line
1417  const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1418  for (const auto & rOption : rOptions)
1419  {
1420  switch( rOption.GetToken() )
1421  {
1422  case HtmlOptionId::FACE :
1423  {
1424  const OUString& rFace = rOption.GetString();
1425  OUStringBuffer aFontName;
1426  sal_Int32 nPos = 0;
1427  while( nPos != -1 )
1428  {
1429  // Font list, VCL uses the semicolon as separator
1430  // HTML uses the comma
1431  std::u16string_view aFName = o3tl::getToken(rFace, 0, ',', nPos );
1432  aFName = comphelper::string::strip(aFName, ' ');
1433  if( !aFontName.isEmpty() )
1434  aFontName.append(";");
1435  aFontName.append(aFName);
1436  }
1437  if ( !aFontName.isEmpty() )
1438  mxActEntry->aItemSet.Put( SvxFontItem( FAMILY_DONTKNOW,
1439  aFontName.makeStringAndClear(), OUString(), PITCH_DONTKNOW,
1440  RTL_TEXTENCODING_DONTKNOW, ATTR_FONT ) );
1441  }
1442  break;
1443  case HtmlOptionId::SIZE :
1444  {
1445  sal_uInt16 nSize = static_cast<sal_uInt16>(rOption.GetNumber());
1446  if ( nSize == 0 )
1447  nSize = 1;
1448  else if ( nSize > SC_HTML_FONTSIZES )
1449  nSize = SC_HTML_FONTSIZES;
1450  mxActEntry->aItemSet.Put( SvxFontHeightItem(
1451  maFontHeights[nSize-1], 100, ATTR_FONT_HEIGHT ) );
1452  }
1453  break;
1454  case HtmlOptionId::COLOR :
1455  {
1456  Color aColor;
1457  rOption.GetColor( aColor );
1458  mxActEntry->aItemSet.Put( SvxColorItem( aColor, ATTR_FONT_COLOR ) );
1459  }
1460  break;
1461  default: break;
1462  }
1463  }
1464 }
1465 
1467 {
1468  switch ( pInfo->nToken )
1469  {
1470  case HtmlTokenId::META:
1471  {
1472  HTMLParser* pParser = static_cast<HTMLParser*>(pInfo->pParser);
1473  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1474  mpDoc->GetDocumentShell()->GetModel(), uno::UNO_QUERY_THROW);
1475  pParser->ParseMetaOptions(
1476  xDPS->getDocumentProperties(),
1478  }
1479  break;
1480  case HtmlTokenId::TITLE_ON:
1481  {
1482  bInTitle = true;
1483  aString.clear();
1484  }
1485  break;
1486  case HtmlTokenId::TITLE_OFF:
1487  {
1488  if ( bInTitle && !aString.isEmpty() )
1489  {
1490  // Remove blanks from line breaks
1491  aString = aString.trim();
1492  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1494  uno::UNO_QUERY_THROW);
1495  xDPS->getDocumentProperties()->setTitle(aString);
1496  }
1497  bInTitle = false;
1498  }
1499  break;
1500  case HtmlTokenId::TABLE_ON:
1501  {
1502  TableOn( pInfo );
1503  }
1504  break;
1505  case HtmlTokenId::COL_ON:
1506  {
1507  ColOn( pInfo );
1508  }
1509  break;
1510  case HtmlTokenId::TABLEHEADER_ON: // Opens row
1511  {
1512  if ( bInCell )
1513  CloseEntry( pInfo );
1514  // Do not set bInCell to true, TableDataOn does that
1515  mxActEntry->aItemSet.Put(
1517  [[fallthrough]];
1518  }
1519  case HtmlTokenId::TABLEDATA_ON: // Opens cell
1520  {
1521  TableDataOn( pInfo );
1522  }
1523  break;
1524  case HtmlTokenId::TABLEHEADER_OFF:
1525  case HtmlTokenId::TABLEDATA_OFF: // Closes cell
1526  {
1527  TableDataOff( pInfo );
1528  }
1529  break;
1530  case HtmlTokenId::TABLEROW_ON: // Before first cell in row
1531  {
1532  TableRowOn( pInfo );
1533  }
1534  break;
1535  case HtmlTokenId::TABLEROW_OFF: // After last cell in row
1536  {
1537  TableRowOff( pInfo );
1538  }
1539  break;
1540  case HtmlTokenId::TABLE_OFF:
1541  {
1542  TableOff( pInfo );
1543  }
1544  break;
1545  case HtmlTokenId::IMAGE:
1546  {
1547  Image( pInfo );
1548  }
1549  break;
1550  case HtmlTokenId::PARABREAK_OFF:
1551  { // We continue vertically after an image
1552  if (!mxActEntry->maImageList.empty())
1553  mxActEntry->maImageList.back()->nDir = nVertical;
1554  }
1555  break;
1556  case HtmlTokenId::ANCHOR_ON:
1557  {
1558  AnchorOn( pInfo );
1559  }
1560  break;
1561  case HtmlTokenId::FONT_ON :
1562  {
1563  FontOn( pInfo );
1564  }
1565  break;
1566  case HtmlTokenId::BIGPRINT_ON :
1567  {
1568  // TODO: Remember current font size and increase by 1
1569  if ( IsAtBeginningOfText( pInfo ) )
1570  mxActEntry->aItemSet.Put( SvxFontHeightItem(
1571  maFontHeights[3], 100, ATTR_FONT_HEIGHT ) );
1572  }
1573  break;
1574  case HtmlTokenId::SMALLPRINT_ON :
1575  {
1576  // TODO: Remember current font size and decrease by 1
1577  if ( IsAtBeginningOfText( pInfo ) )
1578  mxActEntry->aItemSet.Put( SvxFontHeightItem(
1579  maFontHeights[0], 100, ATTR_FONT_HEIGHT ) );
1580  }
1581  break;
1582  case HtmlTokenId::BOLD_ON :
1583  case HtmlTokenId::STRONG_ON :
1584  {
1585  if ( IsAtBeginningOfText( pInfo ) )
1586  mxActEntry->aItemSet.Put( SvxWeightItem( WEIGHT_BOLD,
1587  ATTR_FONT_WEIGHT ) );
1588  }
1589  break;
1590  case HtmlTokenId::ITALIC_ON :
1591  case HtmlTokenId::EMPHASIS_ON :
1592  case HtmlTokenId::ADDRESS_ON :
1593  case HtmlTokenId::BLOCKQUOTE_ON :
1594  case HtmlTokenId::BLOCKQUOTE30_ON :
1595  case HtmlTokenId::CITATION_ON :
1596  case HtmlTokenId::VARIABLE_ON :
1597  {
1598  if ( IsAtBeginningOfText( pInfo ) )
1599  mxActEntry->aItemSet.Put( SvxPostureItem( ITALIC_NORMAL,
1600  ATTR_FONT_POSTURE ) );
1601  }
1602  break;
1603  case HtmlTokenId::DEFINSTANCE_ON :
1604  {
1605  if ( IsAtBeginningOfText( pInfo ) )
1606  {
1607  mxActEntry->aItemSet.Put( SvxWeightItem( WEIGHT_BOLD,
1608  ATTR_FONT_WEIGHT ) );
1609  mxActEntry->aItemSet.Put( SvxPostureItem( ITALIC_NORMAL,
1610  ATTR_FONT_POSTURE ) );
1611  }
1612  }
1613  break;
1614  case HtmlTokenId::UNDERLINE_ON :
1615  {
1616  if ( IsAtBeginningOfText( pInfo ) )
1618  ATTR_FONT_UNDERLINE ) );
1619  }
1620  break;
1621  case HtmlTokenId::TEXTTOKEN:
1622  {
1623  if ( bInTitle )
1624  aString += pInfo->aText;
1625  }
1626  break;
1627  default: ;
1628  }
1629 }
1630 
1631 // HTML DATA QUERY PARSER
1632 
1633 template< typename Type >
1634 static Type getLimitedValue( const Type& rValue, const Type& rMin, const Type& rMax )
1635 { return std::clamp( rValue, rMin, rMax ); }
1636 
1637 ScHTMLEntry::ScHTMLEntry( const SfxItemSet& rItemSet, ScHTMLTableId nTableId ) :
1638  ScEEParseEntry( rItemSet ),
1639  mbImportAlways( false )
1640 {
1641  nTab = nTableId;
1642  bEntirePara = false;
1643 }
1644 
1646 {
1647  return mbImportAlways || aSel.HasRange() || !aAltText.isEmpty() || IsTable();
1648 }
1649 
1651 {
1652  // set start position
1655  // adjust end position
1657  {
1660  }
1661 }
1662 
1664 {
1665  OSL_ENSURE( (aSel.nEndPara < rInfo.aSelection.nEndPara) ||
1666  ((aSel.nEndPara == rInfo.aSelection.nEndPara) && (aSel.nEndPos <= rInfo.aSelection.nEndPos)),
1667  "ScHTMLQueryParser::AdjustEntryEnd - invalid end position" );
1668  // set end position
1669  aSel.nEndPara = rInfo.aSelection.nEndPara;
1670  aSel.nEndPos = rInfo.aSelection.nEndPos;
1671 }
1672 
1673 void ScHTMLEntry::Strip( const EditEngine& rEditEngine )
1674 {
1675  // strip leading empty paragraphs
1676  while( (aSel.nStartPara < aSel.nEndPara) && (rEditEngine.GetTextLen( aSel.nStartPara ) <= aSel.nStartPos) )
1677  {
1678  ++aSel.nStartPara;
1679  aSel.nStartPos = 0;
1680  }
1681  // strip trailing empty paragraphs
1682  while( (aSel.nStartPara < aSel.nEndPara) && (aSel.nEndPos == 0) )
1683  {
1684  --aSel.nEndPara;
1685  aSel.nEndPos = rEditEngine.GetTextLen( aSel.nEndPara );
1686  }
1687 }
1688 
1696 class ScHTMLTableMap final
1697 {
1698 private:
1699  typedef std::shared_ptr< ScHTMLTable > ScHTMLTablePtr;
1700  typedef std::map< ScHTMLTableId, ScHTMLTablePtr > ScHTMLTableStdMap;
1701 
1702 public:
1703  typedef ScHTMLTableStdMap::iterator iterator;
1704  typedef ScHTMLTableStdMap::const_iterator const_iterator;
1705 
1706 private:
1708  ScHTMLTableStdMap maTables;
1710 
1711 public:
1712  explicit ScHTMLTableMap( ScHTMLTable& rParentTable );
1713 
1714  const_iterator begin() const { return maTables.begin(); }
1715  const_iterator end() const { return maTables.end(); }
1716 
1720  ScHTMLTable* FindTable( ScHTMLTableId nTableId, bool bDeep = true ) const;
1721 
1724  ScHTMLTable* CreateTable( const HtmlImportInfo& rInfo, bool bPreFormText, const ScDocument& rDoc );
1725 
1726 private:
1728  void SetCurrTable( ScHTMLTable* pTable ) const
1729  { if( pTable ) mpCurrTable = pTable; }
1730 };
1731 
1733  mrParentTable(rParentTable),
1734  mpCurrTable(nullptr)
1735 {
1736 }
1737 
1739 {
1740  ScHTMLTable* pResult = nullptr;
1741  if( mpCurrTable && (nTableId == mpCurrTable->GetTableId()) )
1742  pResult = mpCurrTable; // cached table
1743  else
1744  {
1745  const_iterator aFind = maTables.find( nTableId );
1746  if( aFind != maTables.end() )
1747  pResult = aFind->second.get(); // table from this container
1748  }
1749 
1750  // not found -> search deep in nested tables
1751  if( !pResult && bDeep )
1752  for( const_iterator aIter = begin(), aEnd = end(); !pResult && (aIter != aEnd); ++aIter )
1753  pResult = aIter->second->FindNestedTable( nTableId );
1754 
1755  SetCurrTable( pResult );
1756  return pResult;
1757 }
1758 
1759 ScHTMLTable* ScHTMLTableMap::CreateTable( const HtmlImportInfo& rInfo, bool bPreFormText, const ScDocument& rDoc )
1760 {
1761  ScHTMLTable* pTable = new ScHTMLTable( mrParentTable, rInfo, bPreFormText, rDoc );
1762  maTables[ pTable->GetTableId() ].reset( pTable );
1763  SetCurrTable( pTable );
1764  return pTable;
1765 }
1766 
1767 namespace {
1768 
1775 class ScHTMLTableIterator
1776 {
1777 public:
1780  explicit ScHTMLTableIterator( const ScHTMLTableMap* pTableMap );
1781 
1782  bool is() const { return mpTableMap && maIter != maEnd; }
1783  ScHTMLTable* operator->() { return maIter->second.get(); }
1784  ScHTMLTableIterator& operator++() { ++maIter; return *this; }
1785 
1786 private:
1789  const ScHTMLTableMap* mpTableMap;
1790 };
1791 
1792 }
1793 
1794 ScHTMLTableIterator::ScHTMLTableIterator( const ScHTMLTableMap* pTableMap ) :
1795  mpTableMap(pTableMap)
1796 {
1797  if( pTableMap )
1798  {
1799  maIter = pTableMap->begin();
1800  maEnd = pTableMap->end();
1801  }
1802 }
1803 
1805  mnTableId( rnUnusedId ),
1806  mrnUnusedId( rnUnusedId )
1807 {
1808  ++mrnUnusedId;
1809 }
1810 
1811 ScHTMLTable::ScHTMLTable( ScHTMLTable& rParentTable, const HtmlImportInfo& rInfo, bool bPreFormText, const ScDocument& rDoc ) :
1812  mpParentTable( &rParentTable ),
1813  maTableId( rParentTable.maTableId.mrnUnusedId ),
1814  maTableItemSet( rParentTable.GetCurrItemSet() ),
1815  mrEditEngine( rParentTable.mrEditEngine ),
1816  mrEEParseList( rParentTable.mrEEParseList ),
1817  mpCurrEntryVector( nullptr ),
1818  maSize( 1, 1 ),
1819  mpParser(rParentTable.mpParser),
1820  mrDoc(rDoc),
1821  mbBorderOn( false ),
1822  mbPreFormText( bPreFormText ),
1823  mbRowOn( false ),
1824  mbDataOn( false ),
1825  mbPushEmptyLine( false ),
1826  mbCaptionOn ( false )
1827 {
1828  if( mbPreFormText )
1829  {
1830  ImplRowOn();
1831  ImplDataOn( ScHTMLSize( 1, 1 ) );
1832  }
1833  else
1834  {
1836  const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
1837  for (const auto& rOption : rOptions)
1838  {
1839  switch( rOption.GetToken() )
1840  {
1841  case HtmlOptionId::BORDER:
1842  mbBorderOn = rOption.GetString().isEmpty() || (rOption.GetNumber() != 0);
1843  break;
1844  case HtmlOptionId::ID:
1845  maTableName = rOption.GetString();
1846  break;
1847  default: break;
1848  }
1849  }
1850  }
1851 
1852  CreateNewEntry( rInfo );
1853 }
1854 
1856  SfxItemPool& rPool,
1857  EditEngine& rEditEngine,
1858  std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
1859  ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser, const ScDocument& rDoc
1860 ) :
1861  mpParentTable( nullptr ),
1862  maTableId( rnUnusedId ),
1863  maTableItemSet( rPool ),
1864  mrEditEngine( rEditEngine ),
1865  mrEEParseList( rEEParseList ),
1866  mpCurrEntryVector( nullptr ),
1867  maSize( 1, 1 ),
1868  mpParser(pParser),
1869  mrDoc(rDoc),
1870  mbBorderOn( false ),
1871  mbPreFormText( false ),
1872  mbRowOn( false ),
1873  mbDataOn( false ),
1874  mbPushEmptyLine( false ),
1875  mbCaptionOn ( false )
1876 {
1877  // open the first "cell" of the document
1878  ImplRowOn();
1879  ImplDataOn( ScHTMLSize( 1, 1 ) );
1881 }
1882 
1884 {
1885 }
1886 
1888 {
1889  // first try cell item set, then row item set, then table item set
1891 }
1892 
1894 {
1895  ScHTMLSize aSpan( 1, 1 );
1896  const ScRange* pRange = maVMergedCells.Find( rCellPos.MakeAddr() );
1897  if (!pRange)
1898  pRange = maHMergedCells.Find( rCellPos.MakeAddr() );
1899  if (pRange)
1900  aSpan.Set( pRange->aEnd.Col() - pRange->aStart.Col() + 1, pRange->aEnd.Row() - pRange->aStart.Row() + 1 );
1901  return aSpan;
1902 }
1903 
1905 {
1906  return mxNestedTables ? mxNestedTables->FindTable( nTableId ) : nullptr;
1907 }
1908 
1909 void ScHTMLTable::PutItem( const SfxPoolItem& rItem )
1910 {
1911  OSL_ENSURE( mxCurrEntry, "ScHTMLTable::PutItem - no current entry" );
1912  if( mxCurrEntry && mxCurrEntry->IsEmpty() )
1913  mxCurrEntry->GetItemSet().Put( rItem );
1914 }
1915 
1917 {
1918  OSL_ENSURE( mxCurrEntry, "ScHTMLTable::PutText - no current entry" );
1919  if( mxCurrEntry )
1920  {
1921  if( !mxCurrEntry->HasContents() && IsSpaceCharInfo( rInfo ) )
1922  mxCurrEntry->AdjustStart( rInfo );
1923  else
1924  mxCurrEntry->AdjustEnd( rInfo );
1925  if (mbCaptionOn)
1926  maCaptionBuffer.append(rInfo.aText);
1927 
1928  }
1929 }
1930 
1932 {
1933  if( mxCurrEntry && mbDataOn && !IsEmptyCell() )
1934  mxCurrEntry->SetImportAlways();
1935  PushEntry( rInfo );
1936  CreateNewEntry( rInfo );
1938 }
1939 
1941 {
1942  // empty line, if <br> is at start of cell
1944 }
1945 
1947 {
1948  // call directly, InsertPara() has not been called before
1950 }
1951 
1953 {
1954  // empty line, if <p>, </p>, <h?>, or </h*> are not at start of cell
1956 }
1957 
1959 {
1960  OSL_ENSURE( mxCurrEntry, "ScHTMLTable::AnchorOn - no current entry" );
1961  // don't skip entries with single hyperlinks
1962  if( mxCurrEntry )
1963  mxCurrEntry->SetImportAlways();
1964 }
1965 
1967 {
1968  PushEntry( rInfo );
1969  return InsertNestedTable( rInfo, false );
1970 }
1971 
1973 {
1974  return mbPreFormText ? this : CloseTable( rInfo );
1975 }
1976 
1978 {
1979  mbCaptionOn = true;
1980  maCaptionBuffer.setLength(0);
1981 }
1982 
1984 {
1985  if (!mbCaptionOn)
1986  return;
1987  maCaption = maCaptionBuffer.makeStringAndClear().trim();
1988  mbCaptionOn = false;
1989 }
1990 
1992 {
1993  PushEntry( rInfo );
1994  return InsertNestedTable( rInfo, true );
1995 }
1996 
1998 {
1999  return mbPreFormText ? CloseTable( rInfo ) : this;
2000 }
2001 
2003 {
2004  PushEntry( rInfo, true );
2005  if( mpParentTable && !mbPreFormText ) // no rows allowed in global and preformatted tables
2006  {
2007  ImplRowOn();
2009  }
2010  CreateNewEntry( rInfo );
2011 }
2012 
2014 {
2015  PushEntry( rInfo, true );
2016  if( mpParentTable && !mbPreFormText ) // no rows allowed in global and preformatted tables
2017  ImplRowOff();
2018  CreateNewEntry( rInfo );
2019 }
2020 
2021 namespace {
2022 
2027 OUString decodeNumberFormat(const OUString& rFmt)
2028 {
2029  OUStringBuffer aBuf;
2030  const sal_Unicode* p = rFmt.getStr();
2031  sal_Int32 n = rFmt.getLength();
2032  for (sal_Int32 i = 0; i < n; ++i, ++p)
2033  {
2034  if (*p == '\\')
2035  {
2036  // Skip '\'.
2037  ++i;
2038  ++p;
2039 
2040  // Parse all subsequent digits until first non-digit is found.
2041  sal_Int32 nDigitCount = 0;
2042  const sal_Unicode* p1 = p;
2043  for (; i < n; ++i, ++p, ++nDigitCount)
2044  {
2045  if (*p < '0' || '9' < *p)
2046  {
2047  --i;
2048  --p;
2049  break;
2050  }
2051 
2052  }
2053  if (nDigitCount)
2054  {
2055  // Hex-encoded character found. Decode it back into its
2056  // original character. An example of number format with
2057  // hex-encoded chars: "\0022$\0022\#\,\#\#0\.00"
2058  sal_uInt32 nVal = OUString(p1, nDigitCount).toUInt32(16);
2059  aBuf.append(static_cast<sal_Unicode>(nVal));
2060  }
2061  }
2062  else
2063  aBuf.append(*p);
2064  }
2065  return aBuf.makeStringAndClear();
2066 }
2067 
2068 }
2069 
2071 {
2072  PushEntry( rInfo, true );
2073  if( mpParentTable && !mbPreFormText ) // no cells allowed in global and preformatted tables
2074  {
2075  // read needed options from the <td> tag
2076  ScHTMLSize aSpanSize( 1, 1 );
2077  std::optional<OUString> pValStr, pNumStr;
2078  const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2079  sal_uInt32 nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
2080  for (const auto& rOption : rOptions)
2081  {
2082  switch (rOption.GetToken())
2083  {
2084  case HtmlOptionId::COLSPAN:
2085  aSpanSize.mnCols = static_cast<SCCOL>( getLimitedValue<sal_Int32>( rOption.GetString().toInt32(), 1, 256 ) );
2086  break;
2087  case HtmlOptionId::ROWSPAN:
2088  aSpanSize.mnRows = static_cast<SCROW>( getLimitedValue<sal_Int32>( rOption.GetString().toInt32(), 1, 256 ) );
2089  break;
2090  case HtmlOptionId::SDVAL:
2091  pValStr = rOption.GetString();
2092  break;
2093  case HtmlOptionId::SDNUM:
2094  pNumStr = rOption.GetString();
2095  break;
2096  case HtmlOptionId::CLASS:
2097  {
2098  // Pick up the number format associated with this class (if
2099  // any).
2100  OUString aClass = rOption.GetString();
2101  const ScHTMLStyles& rStyles = mpParser->GetStyles();
2102  const OUString& rVal = rStyles.getPropertyValue("td", aClass, "mso-number-format");
2103  if (!rVal.isEmpty())
2104  {
2105  OUString aNumFmt = decodeNumberFormat(rVal);
2106 
2107  nNumberFormat = GetFormatTable()->GetEntryKey(aNumFmt);
2108  if (nNumberFormat == NUMBERFORMAT_ENTRY_NOT_FOUND)
2109  {
2110  sal_Int32 nErrPos = 0;
2111  SvNumFormatType nDummy;
2112  bool bValidFmt = GetFormatTable()->PutEntry(aNumFmt, nErrPos, nDummy, nNumberFormat);
2113  if (!bValidFmt)
2114  nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
2115  }
2116  }
2117  }
2118  break;
2119  default: break;
2120  }
2121  }
2122 
2123  ImplDataOn( aSpanSize );
2124 
2125  if (nNumberFormat != NUMBERFORMAT_ENTRY_NOT_FOUND)
2126  moDataItemSet->Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nNumberFormat) );
2127 
2129  CreateNewEntry( rInfo );
2130  mxCurrEntry->pValStr = std::move(pValStr);
2131  mxCurrEntry->pNumStr = std::move(pNumStr);
2132  }
2133  else
2134  CreateNewEntry( rInfo );
2135 }
2136 
2138 {
2139  PushEntry( rInfo, true );
2140  if( mpParentTable && !mbPreFormText ) // no cells allowed in global and preformatted tables
2141  ImplDataOff();
2142  CreateNewEntry( rInfo );
2143 }
2144 
2146 {
2147  bool bPushed = PushEntry( rInfo );
2148  if( !mpParentTable )
2149  {
2150  // do not start new row, if nothing (no title) precedes the body.
2151  if( bPushed || !mbRowOn )
2152  ImplRowOn();
2153  if( bPushed || !mbDataOn )
2154  ImplDataOn( ScHTMLSize( 1, 1 ) );
2156  }
2157  CreateNewEntry( rInfo );
2158 }
2159 
2161 {
2162  PushEntry( rInfo );
2163  if( !mpParentTable )
2164  {
2165  ImplDataOff();
2166  ImplRowOff();
2167  }
2168  CreateNewEntry( rInfo );
2169 }
2170 
2172 {
2173  if( mpParentTable ) // not allowed to close global table
2174  {
2175  PushEntry( rInfo, mbDataOn );
2176  ImplDataOff();
2177  ImplRowOff();
2179  mpParentTable->CreateNewEntry( rInfo );
2180  if( mbPreFormText ) // enclose preformatted table with empty lines in parent table
2182  return mpParentTable;
2183  }
2184  return this;
2185 }
2186 
2188 {
2189  const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2190  size_t nIndex = static_cast< size_t >( nCellPos );
2191  if( nIndex >= rSizes.size() ) return 0;
2192  return (nIndex == 0) ? rSizes.front() : (rSizes[ nIndex ] - rSizes[ nIndex - 1 ]);
2193 }
2194 
2195 SCCOLROW ScHTMLTable::GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const
2196 {
2197  const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2198  size_t nBeginIdx = static_cast< size_t >( std::max< SCCOLROW >( nCellBegin, 0 ) );
2199  size_t nEndIdx = static_cast< size_t >( std::min< SCCOLROW >( nCellEnd, static_cast< SCCOLROW >( rSizes.size() ) ) );
2200  if (nBeginIdx >= nEndIdx ) return 0;
2201  return rSizes[ nEndIdx - 1 ] - ((nBeginIdx == 0) ? 0 : rSizes[ nBeginIdx - 1 ]);
2202 }
2203 
2205 {
2206  const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2207  return rSizes.empty() ? 0 : rSizes.back();
2208 }
2209 
2211 {
2212  ScHTMLSize aCellSpan = GetSpan( rCellPos );
2213  return ScHTMLSize(
2214  static_cast< SCCOL >( GetDocSize( tdCol, rCellPos.mnCol, rCellPos.mnCol + aCellSpan.mnCols ) ),
2215  static_cast< SCROW >( GetDocSize( tdRow, rCellPos.mnRow, rCellPos.mnRow + aCellSpan.mnRows ) ) );
2216 }
2217 
2219 {
2220  return maDocBasePos.Get( eOrient ) + GetDocSize( eOrient, 0, nCellPos );
2221 }
2222 
2224 {
2225  return ScHTMLPos(
2226  static_cast< SCCOL >( GetDocPos( tdCol, rCellPos.mnCol ) ),
2227  static_cast< SCROW >( GetDocPos( tdRow, rCellPos.mnRow ) ) );
2228 }
2229 
2230 void ScHTMLTable::GetDocRange( ScRange& rRange ) const
2231 {
2232  rRange.aStart = rRange.aEnd = maDocBasePos.MakeAddr();
2233  ScAddress aErrorPos( ScAddress::UNINITIALIZED );
2234  if (!rRange.aEnd.Move( static_cast< SCCOL >( GetDocSize( tdCol ) ) - 1,
2235  static_cast< SCROW >( GetDocSize( tdRow ) ) - 1, 0, aErrorPos, mrDoc ))
2236  {
2237  assert(!"can't move");
2238  }
2239 }
2240 
2241 void ScHTMLTable::ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const
2242 {
2243  OSL_ENSURE( pDoc, "ScHTMLTable::ApplyCellBorders - no document" );
2244  if( pDoc && mbBorderOn )
2245  {
2246  const SCCOL nLastCol = maSize.mnCols - 1;
2247  const SCROW nLastRow = maSize.mnRows - 1;
2248  const tools::Long nOuterLine = SvxBorderLineWidth::Medium;
2249  const tools::Long nInnerLine = SvxBorderLineWidth::Hairline;
2250  SvxBorderLine aOuterLine(nullptr, nOuterLine, SvxBorderLineStyle::SOLID);
2251  SvxBorderLine aInnerLine(nullptr, nInnerLine, SvxBorderLineStyle::SOLID);
2252  SvxBoxItem aBorderItem( ATTR_BORDER );
2253 
2254  for( SCCOL nCol = 0; nCol <= nLastCol; ++nCol )
2255  {
2256  SvxBorderLine* pLeftLine = (nCol == 0) ? &aOuterLine : &aInnerLine;
2257  SvxBorderLine* pRightLine = (nCol == nLastCol) ? &aOuterLine : &aInnerLine;
2258  SCCOL nCellCol1 = static_cast< SCCOL >( GetDocPos( tdCol, nCol ) ) + rFirstPos.Col();
2259  SCCOL nCellCol2 = nCellCol1 + static_cast< SCCOL >( GetDocSize( tdCol, nCol ) ) - 1;
2260  for( SCROW nRow = 0; nRow <= nLastRow; ++nRow )
2261  {
2262  SvxBorderLine* pTopLine = (nRow == 0) ? &aOuterLine : &aInnerLine;
2263  SvxBorderLine* pBottomLine = (nRow == nLastRow) ? &aOuterLine : &aInnerLine;
2264  SCROW nCellRow1 = GetDocPos( tdRow, nRow ) + rFirstPos.Row();
2265  SCROW nCellRow2 = nCellRow1 + GetDocSize( tdRow, nRow ) - 1;
2266  for( SCCOL nCellCol = nCellCol1; nCellCol <= nCellCol2; ++nCellCol )
2267  {
2268  aBorderItem.SetLine( (nCellCol == nCellCol1) ? pLeftLine : nullptr, SvxBoxItemLine::LEFT );
2269  aBorderItem.SetLine( (nCellCol == nCellCol2) ? pRightLine : nullptr, SvxBoxItemLine::RIGHT );
2270  for( SCROW nCellRow = nCellRow1; nCellRow <= nCellRow2; ++nCellRow )
2271  {
2272  aBorderItem.SetLine( (nCellRow == nCellRow1) ? pTopLine : nullptr, SvxBoxItemLine::TOP );
2273  aBorderItem.SetLine( (nCellRow == nCellRow2) ? pBottomLine : nullptr, SvxBoxItemLine::BOTTOM );
2274  pDoc->ApplyAttr( nCellCol, nCellRow, rFirstPos.Tab(), aBorderItem );
2275  }
2276  }
2277  }
2278  }
2279  }
2280 
2281  for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2282  aIter->ApplyCellBorders( pDoc, rFirstPos );
2283 }
2284 
2286 {
2287  return mpParser->GetDoc().GetFormatTable();
2288 }
2289 
2291 {
2292  return mpCurrEntryVector && mpCurrEntryVector->empty();
2293 }
2294 
2296 {
2297  return (rInfo.nToken == HtmlTokenId::TEXTTOKEN) && (rInfo.aText.getLength() == 1) && (rInfo.aText[ 0 ] == ' ');
2298 }
2299 
2301 {
2302  return std::make_unique<ScHTMLEntry>( GetCurrItemSet() );
2303 }
2304 
2306 {
2307  OSL_ENSURE( !mxCurrEntry, "ScHTMLTable::CreateNewEntry - old entry still present" );
2309  mxCurrEntry->aSel = rInfo.aSelection;
2310 }
2311 
2313 {
2314  // HTML entry list does not own the entries
2315  rEntryVector.push_back( rxEntry.get() );
2316  // mrEEParseList (reference to member of ScEEParser) owns the entries
2317  mrEEParseList.push_back(std::shared_ptr<ScEEParseEntry>(rxEntry.release()));
2318 }
2319 
2321 {
2322  bool bPushed = false;
2323  if( rxEntry && rxEntry->HasContents() )
2324  {
2325  if( mpCurrEntryVector )
2326  {
2327  if( mbPushEmptyLine )
2328  {
2329  ScHTMLEntryPtr xEmptyEntry = CreateEntry();
2330  ImplPushEntryToVector( *mpCurrEntryVector, xEmptyEntry );
2331  mbPushEmptyLine = false;
2332  }
2334  bPushed = true;
2335  }
2336  else if( mpParentTable )
2337  {
2338  bPushed = mpParentTable->PushEntry( rxEntry );
2339  }
2340  else
2341  {
2342  OSL_FAIL( "ScHTMLTable::PushEntry - cannot push entry, no parent found" );
2343  }
2344  }
2345  return bPushed;
2346 }
2347 
2348 bool ScHTMLTable::PushEntry( const HtmlImportInfo& rInfo, bool bLastInCell )
2349 {
2350  OSL_ENSURE( mxCurrEntry, "ScHTMLTable::PushEntry - no current entry" );
2351  bool bPushed = false;
2352  if( mxCurrEntry )
2353  {
2354  mxCurrEntry->AdjustEnd( rInfo );
2355  mxCurrEntry->Strip( mrEditEngine );
2356 
2357  // import entry always, if it is the last in cell, and cell is still empty
2358  if( bLastInCell && IsEmptyCell() )
2359  {
2360  mxCurrEntry->SetImportAlways();
2361  // don't insert empty lines before single empty entries
2362  if( mxCurrEntry->IsEmpty() )
2363  mbPushEmptyLine = false;
2364  }
2365 
2366  bPushed = PushEntry( mxCurrEntry );
2367  mxCurrEntry.reset();
2368  }
2369  return bPushed;
2370 }
2371 
2373 {
2374  OSL_ENSURE( nTableId != SC_HTML_GLOBAL_TABLE, "ScHTMLTable::PushTableEntry - cannot push global table" );
2375  if( nTableId != SC_HTML_GLOBAL_TABLE )
2376  {
2377  ScHTMLEntryPtr xEntry( new ScHTMLEntry( maTableItemSet, nTableId ) );
2378  PushEntry( xEntry );
2379  }
2380 }
2381 
2383 {
2384  ScHTMLTable* pTable = ((nTableId != SC_HTML_GLOBAL_TABLE) && mxNestedTables) ?
2385  mxNestedTables->FindTable( nTableId, false ) : nullptr;
2386  OSL_ENSURE( pTable || (nTableId == SC_HTML_GLOBAL_TABLE), "ScHTMLTable::GetExistingTable - table not found" );
2387  return pTable;
2388 }
2389 
2390 ScHTMLTable* ScHTMLTable::InsertNestedTable( const HtmlImportInfo& rInfo, bool bPreFormText )
2391 {
2392  if( !mxNestedTables )
2393  mxNestedTables.reset( new ScHTMLTableMap( *this ) );
2394  if( bPreFormText ) // enclose new preformatted table with empty lines
2396  return mxNestedTables->CreateTable( rInfo, bPreFormText, mrDoc );
2397 }
2398 
2399 void ScHTMLTable::InsertNewCell( const ScHTMLSize& rSpanSize )
2400 {
2401  ScRange* pRange;
2402 
2403  /* Find an unused cell by skipping all merged ranges that cover the
2404  current cell position stored in maCurrCell. */
2405  for (;;)
2406  {
2407  pRange = maVMergedCells.Find( maCurrCell.MakeAddr() );
2408  if (!pRange)
2409  pRange = maHMergedCells.Find( maCurrCell.MakeAddr() );
2410  if (!pRange)
2411  break;
2412  maCurrCell.mnCol = pRange->aEnd.Col() + 1;
2413  }
2415 
2416  /* If the new cell is merged horizontally, try to find collisions with
2417  other vertically merged ranges. In this case, shrink existing
2418  vertically merged ranges (do not shrink the new cell). */
2419  SCCOL nColEnd = maCurrCell.mnCol + rSpanSize.mnCols;
2420  for( ScAddress aAddr( maCurrCell.MakeAddr() ); aAddr.Col() < nColEnd; aAddr.IncCol() )
2421  if( (pRange = maVMergedCells.Find( aAddr )) != nullptr )
2422  pRange->aEnd.SetRow( maCurrCell.mnRow - 1 );
2423 
2424  // insert the new range into the cell lists
2425  ScRange aNewRange( maCurrCell.MakeAddr() );
2426  ScAddress aErrorPos( ScAddress::UNINITIALIZED );
2427  if (!aNewRange.aEnd.Move( rSpanSize.mnCols - 1, rSpanSize.mnRows - 1, 0, aErrorPos, mrDoc ))
2428  {
2429  assert(!"can't move");
2430  }
2431  if( rSpanSize.mnRows > 1 )
2432  {
2433  maVMergedCells.push_back( aNewRange );
2434  /* Do not insert vertically merged ranges into maUsedCells yet,
2435  because they may be shrunken (see above). The final vertically
2436  merged ranges are inserted in FillEmptyCells(). */
2437  }
2438  else
2439  {
2440  if( rSpanSize.mnCols > 1 )
2441  maHMergedCells.push_back( aNewRange );
2442  /* Insert horizontally merged ranges and single cells into
2443  maUsedCells, they will not be changed anymore. */
2444  maUsedCells.Join( aNewRange );
2445  }
2446 
2447  // adjust table size
2448  maSize.mnCols = std::max< SCCOL >( maSize.mnCols, aNewRange.aEnd.Col() + 1 );
2449  maSize.mnRows = std::max< SCROW >( maSize.mnRows, aNewRange.aEnd.Row() + 1 );
2450 }
2451 
2453 {
2454  if( mbRowOn )
2455  ImplRowOff();
2456  moRowItemSet.emplace( maTableItemSet );
2457  maCurrCell.mnCol = 0;
2458  mbRowOn = true;
2459  mbDataOn = false;
2460 }
2461 
2463 {
2464  if( mbDataOn )
2465  ImplDataOff();
2466  if( mbRowOn )
2467  {
2468  moRowItemSet.reset();
2469  ++maCurrCell.mnRow;
2470  mbRowOn = mbDataOn = false;
2471  }
2472 }
2473 
2474 void ScHTMLTable::ImplDataOn( const ScHTMLSize& rSpanSize )
2475 {
2476  if( mbDataOn )
2477  ImplDataOff();
2478  if( !mbRowOn )
2479  ImplRowOn();
2480  moDataItemSet.emplace( *moRowItemSet );
2481  InsertNewCell( rSpanSize );
2482  mbDataOn = true;
2483  mbPushEmptyLine = false;
2484 }
2485 
2487 {
2488  if( mbDataOn )
2489  {
2490  moDataItemSet.reset();
2491  ++maCurrCell.mnCol;
2492  mpCurrEntryVector = nullptr;
2493  mbDataOn = false;
2494  }
2495 }
2496 
2498 {
2499  // special handling for table header cells
2500  if( rInfo.nToken == HtmlTokenId::TABLEHEADER_ON )
2501  {
2503  rItemSet.Put( SvxHorJustifyItem( SvxCellHorJustify::Center, ATTR_HOR_JUSTIFY ) );
2504  }
2505 
2506  const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2507  for (const auto& rOption : rOptions)
2508  {
2509  switch( rOption.GetToken() )
2510  {
2511  case HtmlOptionId::ALIGN:
2512  {
2513  SvxCellHorJustify eVal = SvxCellHorJustify::Standard;
2514  const OUString& rOptVal = rOption.GetString();
2515  if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
2516  eVal = SvxCellHorJustify::Right;
2517  else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_center ) )
2518  eVal = SvxCellHorJustify::Center;
2519  else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
2520  eVal = SvxCellHorJustify::Left;
2521  if( eVal != SvxCellHorJustify::Standard )
2522  rItemSet.Put( SvxHorJustifyItem( eVal, ATTR_HOR_JUSTIFY ) );
2523  }
2524  break;
2525 
2526  case HtmlOptionId::VALIGN:
2527  {
2528  SvxCellVerJustify eVal = SvxCellVerJustify::Standard;
2529  const OUString& rOptVal = rOption.GetString();
2530  if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_top ) )
2531  eVal = SvxCellVerJustify::Top;
2532  else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_middle ) )
2533  eVal = SvxCellVerJustify::Center;
2534  else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_bottom ) )
2535  eVal = SvxCellVerJustify::Bottom;
2536  if( eVal != SvxCellVerJustify::Standard )
2537  rItemSet.Put( SvxVerJustifyItem( eVal, ATTR_VER_JUSTIFY ) );
2538  }
2539  break;
2540 
2541  case HtmlOptionId::BGCOLOR:
2542  {
2543  Color aColor;
2544  rOption.GetColor( aColor );
2545  rItemSet.Put( SvxBrushItem( aColor, ATTR_BACKGROUND ) );
2546  }
2547  break;
2548  default: break;
2549  }
2550  }
2551 }
2552 
2553 void ScHTMLTable::SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize )
2554 {
2555  OSL_ENSURE( nCellPos >= 0, "ScHTMLTable::SetDocSize - unexpected negative position" );
2556  ScSizeVec& rSizes = maCumSizes[ eOrient ];
2557  size_t nIndex = static_cast< size_t >( nCellPos );
2558  // expand with height/width == 1
2559  while( nIndex >= rSizes.size() )
2560  rSizes.push_back( rSizes.empty() ? 1 : (rSizes.back() + 1) );
2561  // update size of passed position and all following
2562  // #i109987# only grow, don't shrink - use the largest needed size
2563  SCCOLROW nDiff = nSize - ((nIndex == 0) ? rSizes.front() : (rSizes[ nIndex ] - rSizes[ nIndex - 1 ]));
2564  if( nDiff > 0 )
2565  std::for_each(rSizes.begin() + nIndex, rSizes.end(), [&nDiff](SCCOLROW& rSize) { rSize += nDiff; });
2566 }
2567 
2569  ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nCellSpan, SCCOLROW nRealDocSize )
2570 {
2571  SCCOLROW nDiffSize = 0;
2572  // in merged columns/rows: reduce needed size by size of leading columns
2573  while( nCellSpan > 1 )
2574  {
2575  nDiffSize += GetDocSize( eOrient, nCellPos );
2576  --nCellSpan;
2577  ++nCellPos;
2578  }
2579  // set remaining needed size to last column/row
2580  nRealDocSize -= std::min< SCCOLROW >( nRealDocSize - 1, nDiffSize );
2581  SetDocSize( eOrient, nCellPos, nRealDocSize );
2582 }
2583 
2585 {
2586  for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2587  aIter->FillEmptyCells();
2588 
2589  // insert the final vertically merged ranges into maUsedCells
2590  for ( size_t i = 0, nRanges = maVMergedCells.size(); i < nRanges; ++i )
2591  {
2592  ScRange & rRange = maVMergedCells[ i ];
2593  maUsedCells.Join( rRange );
2594  }
2595 
2596  for( ScAddress aAddr; aAddr.Row() < maSize.mnRows; aAddr.IncRow() )
2597  {
2598  for( aAddr.SetCol( 0 ); aAddr.Col() < maSize.mnCols; aAddr.IncCol() )
2599  {
2600  if( !maUsedCells.Find( aAddr ) )
2601  {
2602  // create a range for the lock list (used to calc. cell span)
2603  ScRange aRange( aAddr );
2604  do
2605  {
2606  aRange.aEnd.IncCol();
2607  }
2608  while( (aRange.aEnd.Col() < maSize.mnCols) && !maUsedCells.Find( aRange.aEnd ) );
2609  aRange.aEnd.IncCol( -1 );
2610  maUsedCells.Join( aRange );
2611 
2612  // insert a dummy entry
2613  ScHTMLEntryPtr xEntry = CreateEntry();
2614  ImplPushEntryToVector( maEntryMap[ ScHTMLPos( aAddr ) ], xEntry );
2615  }
2616  }
2617  }
2618 }
2619 
2621 {
2622  // recalc table sizes recursively from inner to outer
2623  for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2624  aIter->RecalcDocSize();
2625 
2626  /* Two passes: first calculates the sizes of single columns/rows, then
2627  the sizes of spanned columns/rows. This allows to fill nested tables
2628  into merged cells optimally. */
2629  static const sal_uInt16 PASS_SINGLE = 0;
2630  static const sal_uInt16 PASS_SPANNED = 1;
2631  for( sal_uInt16 nPass = PASS_SINGLE; nPass <= PASS_SPANNED; ++nPass )
2632  {
2633  // iterate through every table cell
2634  for( const auto& [rCellPos, rEntryVector] : maEntryMap )
2635  {
2636  ScHTMLSize aCellSpan = GetSpan( rCellPos );
2637 
2638  // process the dimension of the current cell in this pass?
2639  // (pass is single and span is 1) or (pass is not single and span is not 1)
2640  bool bProcessColWidth = ((nPass == PASS_SINGLE) == (aCellSpan.mnCols == 1));
2641  bool bProcessRowHeight = ((nPass == PASS_SINGLE) == (aCellSpan.mnRows == 1));
2642  if( bProcessColWidth || bProcessRowHeight )
2643  {
2644  ScHTMLSize aDocSize( 1, 0 ); // resulting size of the cell in document
2645 
2646  // expand the cell size for each cell parse entry
2647  for( const auto& rpEntry : rEntryVector )
2648  {
2649  ScHTMLTable* pTable = GetExistingTable( rpEntry->GetTableId() );
2650  // find entry with maximum width
2651  if( bProcessColWidth && pTable )
2652  aDocSize.mnCols = std::max( aDocSize.mnCols, static_cast< SCCOL >( pTable->GetDocSize( tdCol ) ) );
2653  // add up height of each entry
2654  if( bProcessRowHeight )
2655  aDocSize.mnRows += pTable ? pTable->GetDocSize( tdRow ) : 1;
2656  }
2657  if( !aDocSize.mnRows )
2658  aDocSize.mnRows = 1;
2659 
2660  if( bProcessColWidth )
2661  CalcNeededDocSize( tdCol, rCellPos.mnCol, aCellSpan.mnCols, aDocSize.mnCols );
2662  if( bProcessRowHeight )
2663  CalcNeededDocSize( tdRow, rCellPos.mnRow, aCellSpan.mnRows, aDocSize.mnRows );
2664  }
2665  }
2666  }
2667 }
2668 
2669 void ScHTMLTable::RecalcDocPos( const ScHTMLPos& rBasePos )
2670 {
2671  maDocBasePos = rBasePos;
2672  // after the previous assignment it is allowed to call GetDocPos() methods
2673 
2674  // iterate through every table cell
2675  for( auto& [rCellPos, rEntryVector] : maEntryMap )
2676  {
2677  // fixed doc position of the entire cell (first entry)
2678  const ScHTMLPos aCellDocPos( GetDocPos( rCellPos ) );
2679  // fixed doc size of the entire cell
2680  const ScHTMLSize aCellDocSize( GetDocSize( rCellPos ) );
2681 
2682  // running doc position for single entries
2683  ScHTMLPos aEntryDocPos( aCellDocPos );
2684 
2685  ScHTMLEntry* pEntry = nullptr;
2686  for( const auto& rpEntry : rEntryVector )
2687  {
2688  pEntry = rpEntry;
2689  if( ScHTMLTable* pTable = GetExistingTable( pEntry->GetTableId() ) )
2690  {
2691  pTable->RecalcDocPos( aEntryDocPos ); // recalc nested table
2692  pEntry->nCol = SCCOL_MAX;
2693  pEntry->nRow = SCROW_MAX;
2694  SCROW nTableRows = static_cast< SCROW >( pTable->GetDocSize( tdRow ) );
2695 
2696  // use this entry to pad empty space right of table
2697  if( mpParentTable ) // ... but not in global table
2698  {
2699  SCCOL nStartCol = aEntryDocPos.mnCol + static_cast< SCCOL >( pTable->GetDocSize( tdCol ) );
2700  SCCOL nNextCol = aEntryDocPos.mnCol + aCellDocSize.mnCols;
2701  if( nStartCol < nNextCol )
2702  {
2703  pEntry->nCol = nStartCol;
2704  pEntry->nRow = aEntryDocPos.mnRow;
2705  pEntry->nColOverlap = nNextCol - nStartCol;
2706  pEntry->nRowOverlap = nTableRows;
2707  }
2708  }
2709  aEntryDocPos.mnRow += nTableRows;
2710  }
2711  else
2712  {
2713  pEntry->nCol = aEntryDocPos.mnCol;
2714  pEntry->nRow = aEntryDocPos.mnRow;
2715  if( mpParentTable ) // do not merge in global table
2716  pEntry->nColOverlap = aCellDocSize.mnCols;
2717  ++aEntryDocPos.mnRow;
2718  }
2719  }
2720 
2721  // pEntry points now to last entry.
2722  if( pEntry )
2723  {
2724  if( (pEntry == rEntryVector.front()) && (pEntry->GetTableId() == SC_HTML_NO_TABLE) )
2725  {
2726  // pEntry is the only entry in this cell - merge rows of cell with single non-table entry.
2727  pEntry->nRowOverlap = aCellDocSize.mnRows;
2728  }
2729  else
2730  {
2731  // fill up incomplete entry lists
2732  SCROW nFirstUnusedRow = aCellDocPos.mnRow + aCellDocSize.mnRows;
2733  while( aEntryDocPos.mnRow < nFirstUnusedRow )
2734  {
2735  ScHTMLEntryPtr xDummyEntry( new ScHTMLEntry( pEntry->GetItemSet() ) );
2736  xDummyEntry->nCol = aEntryDocPos.mnCol;
2737  xDummyEntry->nRow = aEntryDocPos.mnRow;
2738  xDummyEntry->nColOverlap = aCellDocSize.mnCols;
2739  ImplPushEntryToVector( rEntryVector, xDummyEntry );
2740  ++aEntryDocPos.mnRow;
2741  }
2742  }
2743  }
2744  }
2745 }
2746 
2748  SfxItemPool& rPool,
2749  EditEngine& rEditEngine,
2750  std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseVector,
2751  ScHTMLTableId& rnUnusedId,
2752  ScHTMLParser* pParser,
2753  const ScDocument& rDoc
2754 ) :
2755  ScHTMLTable( rPool, rEditEngine, rEEParseVector, rnUnusedId, pParser, rDoc )
2756 {
2757 }
2758 
2760 {
2761 }
2762 
2764 {
2765  // Fills up empty cells with a dummy entry. */
2766  FillEmptyCells();
2767  // recalc table sizes of all nested tables and this table
2768  RecalcDocSize();
2769  // recalc document positions of all entries in this table and in nested tables
2770  RecalcDocPos( GetDocPos() );
2771 }
2772 
2774  ScHTMLParser( pEditEngine, pDoc ),
2775  mnUnusedId( SC_HTML_GLOBAL_TABLE ),
2776  mbTitleOn( false )
2777 {
2778  mxGlobTable.reset(
2779  new ScHTMLGlobalTable(*pPool, *pEdit, maList, mnUnusedId, this, *pDoc));
2780  mpCurrTable = mxGlobTable.get();
2781 }
2782 
2784 {
2785 }
2786 
2787 ErrCode ScHTMLQueryParser::Read( SvStream& rStrm, const OUString& rBaseURL )
2788 {
2789  SvKeyValueIteratorRef xValues;
2790  SvKeyValueIterator* pAttributes = nullptr;
2791 
2792  SfxObjectShell* pObjSh = mpDoc->GetDocumentShell();
2793  if( pObjSh && pObjSh->IsLoading() )
2794  {
2795  pAttributes = pObjSh->GetHeaderAttributes();
2796  }
2797  else
2798  {
2799  /* When not loading, set up fake HTTP headers to force the SfxHTMLParser
2800  to use UTF8 (used when pasting from clipboard) */
2801  const char* pCharSet = rtl_getBestMimeCharsetFromTextEncoding( RTL_TEXTENCODING_UTF8 );
2802  if( pCharSet )
2803  {
2804  OUString aContentType = "text/html; charset=" +
2805  OUString::createFromAscii( pCharSet );
2806 
2807  xValues = new SvKeyValueIterator;
2808  xValues->Append( SvKeyValue( OOO_STRING_SVTOOLS_HTML_META_content_type, aContentType ) );
2809  pAttributes = xValues.get();
2810  }
2811  }
2812 
2814  pEdit->SetHtmlImportHdl( LINK( this, ScHTMLQueryParser, HTMLImportHdl ) );
2815  ErrCode nErr = pEdit->Read( rStrm, rBaseURL, EETextFormat::Html, pAttributes );
2816  pEdit->SetHtmlImportHdl( aOldLink );
2817 
2818  mxGlobTable->Recalc();
2819  nColMax = static_cast< SCCOL >( mxGlobTable->GetDocSize( tdCol ) - 1 );
2820  nRowMax = static_cast< SCROW >( mxGlobTable->GetDocSize( tdRow ) - 1 );
2821 
2822  return nErr;
2823 }
2824 
2826 {
2827  return mxGlobTable.get();
2828 }
2829 
2831 {
2832  switch( rInfo.nToken )
2833  {
2834 // --- meta data ---
2835  case HtmlTokenId::META: MetaOn( rInfo ); break; // <meta>
2836 
2837 // --- title handling ---
2838  case HtmlTokenId::TITLE_ON: TitleOn(); break; // <title>
2839  case HtmlTokenId::TITLE_OFF: TitleOff( rInfo ); break; // </title>
2840 
2841  case HtmlTokenId::STYLE_ON: break;
2842  case HtmlTokenId::STYLE_OFF: ParseStyle(rInfo.aText); break;
2843 
2844 // --- body handling ---
2845  case HtmlTokenId::BODY_ON: mpCurrTable->BodyOn( rInfo ); break; // <body>
2846  case HtmlTokenId::BODY_OFF: mpCurrTable->BodyOff( rInfo ); break; // </body>
2847 
2848 // --- insert text ---
2849  case HtmlTokenId::TEXTTOKEN: InsertText( rInfo ); break; // any text
2850  case HtmlTokenId::LINEBREAK: mpCurrTable->BreakOn(); break; // <br>
2851  case HtmlTokenId::HEAD1_ON: // <h1>
2852  case HtmlTokenId::HEAD2_ON: // <h2>
2853  case HtmlTokenId::HEAD3_ON: // <h3>
2854  case HtmlTokenId::HEAD4_ON: // <h4>
2855  case HtmlTokenId::HEAD5_ON: // <h5>
2856  case HtmlTokenId::HEAD6_ON: // <h6>
2857  case HtmlTokenId::PARABREAK_ON: mpCurrTable->HeadingOn(); break; // <p>
2858 
2859 // --- misc. contents ---
2860  case HtmlTokenId::ANCHOR_ON: mpCurrTable->AnchorOn(); break; // <a>
2861 
2862 // --- table handling ---
2863  case HtmlTokenId::TABLE_ON: TableOn( rInfo ); break; // <table>
2864  case HtmlTokenId::TABLE_OFF: TableOff( rInfo ); break; // </table>
2865  case HtmlTokenId::CAPTION_ON: mpCurrTable->CaptionOn(); break; // <caption>
2866  case HtmlTokenId::CAPTION_OFF: mpCurrTable->CaptionOff(); break; // </caption>
2867  case HtmlTokenId::TABLEROW_ON: mpCurrTable->RowOn( rInfo ); break; // <tr>
2868  case HtmlTokenId::TABLEROW_OFF: mpCurrTable->RowOff( rInfo ); break; // </tr>
2869  case HtmlTokenId::TABLEHEADER_ON: // <th>
2870  case HtmlTokenId::TABLEDATA_ON: mpCurrTable->DataOn( rInfo ); break; // <td>
2871  case HtmlTokenId::TABLEHEADER_OFF: // </th>
2872  case HtmlTokenId::TABLEDATA_OFF: mpCurrTable->DataOff( rInfo ); break; // </td>
2873  case HtmlTokenId::PREFORMTXT_ON: PreOn( rInfo ); break; // <pre>
2874  case HtmlTokenId::PREFORMTXT_OFF: PreOff( rInfo ); break; // </pre>
2875 
2876 // --- formatting ---
2877  case HtmlTokenId::FONT_ON: FontOn( rInfo ); break; // <font>
2878 
2879  case HtmlTokenId::BIGPRINT_ON: // <big>
2882  break;
2883  case HtmlTokenId::SMALLPRINT_ON: // <small>
2886  break;
2887 
2888  case HtmlTokenId::BOLD_ON: // <b>
2889  case HtmlTokenId::STRONG_ON: // <strong>
2891  break;
2892 
2893  case HtmlTokenId::ITALIC_ON: // <i>
2894  case HtmlTokenId::EMPHASIS_ON: // <em>
2895  case HtmlTokenId::ADDRESS_ON: // <address>
2896  case HtmlTokenId::BLOCKQUOTE_ON: // <blockquote>
2897  case HtmlTokenId::BLOCKQUOTE30_ON: // <bq>
2898  case HtmlTokenId::CITATION_ON: // <cite>
2899  case HtmlTokenId::VARIABLE_ON: // <var>
2901  break;
2902 
2903  case HtmlTokenId::DEFINSTANCE_ON: // <dfn>
2906  break;
2907 
2908  case HtmlTokenId::UNDERLINE_ON: // <u>
2910  break;
2911  default: break;
2912  }
2913 }
2914 
2916 {
2917  mpCurrTable->PutText( rInfo );
2918  if( mbTitleOn )
2919  maTitle.append(rInfo.aText);
2920 }
2921 
2923 {
2924  const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2925  for (const auto& rOption : rOptions)
2926  {
2927  switch( rOption.GetToken() )
2928  {
2929  case HtmlOptionId::FACE :
2930  {
2931  const OUString& rFace = rOption.GetString();
2932  OUString aFontName;
2933  sal_Int32 nPos = 0;
2934  while( nPos != -1 )
2935  {
2936  // font list separator: VCL = ';' HTML = ','
2937  std::u16string_view aFName = comphelper::string::strip(o3tl::getToken(rFace, 0, ',', nPos), ' ');
2938  aFontName = ScGlobal::addToken(aFontName, aFName, ';');
2939  }
2940  if ( !aFontName.isEmpty() )
2942  aFontName, OUString(), PITCH_DONTKNOW,
2943  RTL_TEXTENCODING_DONTKNOW, ATTR_FONT ) );
2944  }
2945  break;
2946  case HtmlOptionId::SIZE :
2947  {
2948  sal_uInt32 nSize = getLimitedValue< sal_uInt32 >( rOption.GetNumber(), 1, SC_HTML_FONTSIZES );
2950  }
2951  break;
2952  case HtmlOptionId::COLOR :
2953  {
2954  Color aColor;
2955  rOption.GetColor( aColor );
2957  }
2958  break;
2959  default: break;
2960  }
2961  }
2962 }
2963 
2965 {
2966  if( mpDoc->GetDocumentShell() )
2967  {
2968  HTMLParser* pParser = static_cast< HTMLParser* >( rInfo.pParser );
2969 
2970  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
2971  mpDoc->GetDocumentShell()->GetModel(), uno::UNO_QUERY_THROW);
2972  pParser->ParseMetaOptions(
2973  xDPS->getDocumentProperties(),
2975  }
2976 }
2977 
2979 {
2980  mbTitleOn = true;
2981  maTitle.setLength(0);
2982 }
2983 
2985 {
2986  if( !mbTitleOn )
2987  return;
2988 
2989  OUString aTitle = maTitle.makeStringAndClear().trim();
2990  if (!aTitle.isEmpty() && mpDoc->GetDocumentShell())
2991  {
2992  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
2993  mpDoc->GetDocumentShell()->GetModel(), uno::UNO_QUERY_THROW);
2994 
2995  xDPS->getDocumentProperties()->setTitle(aTitle);
2996  }
2997  InsertText( rInfo );
2998  mbTitleOn = false;
2999 }
3000 
3002 {
3003  mpCurrTable = mpCurrTable->TableOn( rInfo );
3004 }
3005 
3007 {
3008  mpCurrTable = mpCurrTable->TableOff( rInfo );
3009 }
3010 
3012 {
3013  mpCurrTable = mpCurrTable->PreOn( rInfo );
3014 }
3015 
3017 {
3018  mpCurrTable = mpCurrTable->PreOff( rInfo );
3019 }
3020 
3022 {
3023  mpCurrTable = mpCurrTable->CloseTable( rInfo );
3024 }
3025 
3026 namespace {
3027 
3031 class CSSHandler: public orcus::css_handler
3032 {
3033  struct MemStr
3034  {
3035  const char* mp;
3036  size_t mn;
3037 
3038  MemStr() : mp(nullptr), mn(0) {}
3039  MemStr(const char* p, size_t n) : mp(p), mn(n) {}
3040  MemStr(const MemStr& r) : mp(r.mp), mn(r.mn) {}
3041  MemStr& operator=(const MemStr& r) = default;
3042  };
3043 
3044  typedef std::pair<MemStr, MemStr> SelectorName; // element : class
3045  typedef std::vector<SelectorName> SelectorNames;
3046 
3047  SelectorNames maSelectorNames; // current selector names
3048  MemStr maPropName; // current property name.
3049  MemStr maPropValue; // current property value.
3050  ScHTMLStyles& mrStyles;
3051 
3052 public:
3053  explicit CSSHandler(ScHTMLStyles& rStyles):
3054  maPropName(),
3055  maPropValue(),
3056  mrStyles(rStyles)
3057  {}
3058 
3059  // selector name starting with "@"
3060  static void at_rule_name(const char* /*p*/, size_t /*n*/)
3061  {
3062  // TODO: For now, we ignore at-rule properties
3063  }
3064 
3065  // selector name not starting with "." or "#" (i.e. element selectors)
3066  void simple_selector_type(const char* pElem, size_t nElem)
3067  {
3068  MemStr aElem(pElem, nElem); // element given
3069  MemStr aClass(nullptr, 0); // class name not given - to be added in the "element global" storage
3070  SelectorName aName(aElem, aClass);
3071 
3072  maSelectorNames.push_back(aName);
3073  }
3074 
3075  // selector names starting with a "." (i.e. class selector)
3076  void simple_selector_class(const char* pClassName, size_t nClassName)
3077  {
3078  MemStr aElem(nullptr, 0); // no element given - should be added in the "global" storage
3079  MemStr aClass(pClassName, nClassName);
3080  SelectorName aName(aElem, aClass);
3081 
3082  maSelectorNames.push_back(aName);
3083  }
3084 
3085  // TODO: Add other selectors
3086 
3087  void property_name(const char* p, size_t n)
3088  {
3089  maPropName = MemStr(p, n);
3090  }
3091 
3092  void value(const char* p, size_t n)
3093  {
3094  maPropValue = MemStr(p, n);
3095  }
3096 
3097  void end_block() {
3098  maSelectorNames.clear();
3099  }
3100 
3101  void end_property()
3102  {
3103  SelectorNames::const_iterator itr = maSelectorNames.begin(), itrEnd = maSelectorNames.end();
3104  for (; itr != itrEnd; ++itr)
3105  {
3106  // Add this property to the collection for each selector.
3107  const SelectorName& rSelName = *itr;
3108  const MemStr& rElem = rSelName.first;
3109  const MemStr& rClass = rSelName.second;
3110  OUString aName(maPropName.mp, maPropName.mn, RTL_TEXTENCODING_UTF8);
3111  OUString aValue(maPropValue.mp, maPropValue.mn, RTL_TEXTENCODING_UTF8);
3112  mrStyles.add(rElem.mp, rElem.mn, rClass.mp, rClass.mn, aName, aValue);
3113  }
3114  maPropName = MemStr();
3115  maPropValue = MemStr();
3116  }
3117 
3118 };
3119 
3120 }
3121 
3122 void ScHTMLQueryParser::ParseStyle(std::u16string_view rStrm)
3123 {
3124  OString aStr = OUStringToOString(rStrm, RTL_TEXTENCODING_UTF8);
3125  CSSHandler aHdl(GetStyles());
3126  orcus::css_parser<CSSHandler> aParser(aStr.getStr(), aStr.getLength(), aHdl);
3127  try
3128  {
3129  aParser.parse();
3130  }
3131  catch (const orcus::css::parse_error& rOrcusParseError)
3132  {
3133  SAL_WARN("sc", "ScHTMLQueryParser::ParseStyle: " << rOrcusParseError.what());
3134  // TODO: Parsing of CSS failed. Do nothing for now.
3135  }
3136 }
3137 
3138 IMPL_LINK( ScHTMLQueryParser, HTMLImportHdl, HtmlImportInfo&, rInfo, void )
3139 {
3140  switch( rInfo.eState )
3141  {
3142  case HtmlImportState::Start:
3143  break;
3144 
3145  case HtmlImportState::NextToken:
3146  ProcessToken( rInfo );
3147  break;
3148 
3149  case HtmlImportState::InsertPara:
3150  mpCurrTable->InsertPara( rInfo );
3151  break;
3152 
3153  case HtmlImportState::SetAttr:
3154  case HtmlImportState::InsertText:
3155  case HtmlImportState::InsertField:
3156  break;
3157 
3158  case HtmlImportState::End:
3159  while( mpCurrTable->GetTableId() != SC_HTML_GLOBAL_TABLE )
3160  CloseTable( rInfo );
3161  break;
3162 
3163  default:
3164  OSL_FAIL( "ScHTMLQueryParser::HTMLImportHdl - unknown ImportInfo::eState" );
3165  }
3166 }
3167 
3168 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
OUString maCaption
Table name from.
Definition: htmlpars.hxx:528
std::unique_ptr< OuterMap > pTables
Definition: htmlpars.hxx:160
void RowOn(const HtmlImportInfo &rInfo)
Starts next row (tag).
Definition: htmlpars.cxx:2002
EditEngine * pEdit
Definition: eeparser.hxx:102
EditEngine & mrEditEngine
List of all used cells.
Definition: htmlpars.hxx:537
Type
SvxCellHorJustify
ScHTMLStyles()
just a persistent empty string.
Definition: htmlpars.cxx:73
std::vector< std::shared_ptr< ScEEParseEntry > > & mrEEParseList
Edit engine (from ScEEParser).
Definition: htmlpars.hxx:538
SCCOL mnCols
Definition: htmlpars.hxx:262
ScHTMLTable * PreOn(const HtmlImportInfo &rInfo)
Starts a new table based on preformatted text (.
Definition: htmlpars.cxx:1991
sal_Int32 nStartPara
SvKeyValueIterator * GetHeaderAttributes()
const Value & back() const
void AnchorOn(HtmlImportInfo *)
Definition: htmlpars.cxx:1393
SfxItemSet maTableItemSet
Unique identifier of this table.
Definition: htmlpars.hxx:531
const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE
Definition: htmlpars.hxx:38
sal_Int32 nIndex
const char nHorizontal
Definition: eeparser.hxx:34
ScAddress aStart
Definition: address.hxx:497
static void EntryEnd(ScEEParseEntry *, const ESelection &)
Definition: htmlpars.cxx:317
#define OOO_STRING_SVTOOLS_HTML_VA_bottom
ScHTMLSize maSize
Cumulated cell counts for each HTML table column/row.
Definition: htmlpars.hxx:543
ScDocument * mpDoc
Definition: htmlpars.hxx:82
static void MakeCol(ScHTMLColOffset *, sal_uInt16 &nOffset, sal_uInt16 &nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol)
Definition: htmlpars.cxx:369
void SetHtmlImportHdl(const Link< HtmlImportInfo &, void > &rLink)
ScHTMLTableAutoId(ScHTMLTableId &rnUnusedId)
Reference to global unused identifier variable.
Definition: htmlpars.cxx:1804
ScHTMLGlobalTable(SfxItemPool &rPool, EditEngine &rEditEngine, std::vector< std::shared_ptr< ScEEParseEntry >> &rEEParseList, ScHTMLTableId &rnUnusedId, ScHTMLParser *pParser, const ScDocument &rDoc)
Definition: htmlpars.cxx:2747
static void ModifyOffset(ScHTMLColOffset *, sal_uInt16 &nOldOffset, sal_uInt16 &nNewOffset, sal_uInt16 nOffsetTol)
Definition: htmlpars.cxx:403
A map of ScHTMLTable objects.
Definition: htmlpars.cxx:1696
FormulaCommand pE
constexpr TypedWhichId< SvxBoxItem > ATTR_BORDER(150)
virtual ~ScHTMLParser() override
Definition: htmlpars.cxx:201
void Strip(const EditEngine &rEditEngine)
Deletes leading and trailing empty paragraphs from the entry.
Definition: htmlpars.cxx:1673
SCROW Row() const
Definition: address.hxx:274
bool mbBorderOn
Definition: htmlpars.hxx:548
void setWidth(tools::Long nWidth)
OUString aText
SCCOL nColCnt
Definition: eeparser.hxx:109
A single entry containing a line of text or representing a table.
Definition: htmlpars.hxx:272
#define OOO_STRING_SVTOOLS_HTML_AL_center
bool PushEntry(ScHTMLEntryPtr &rxEntry)
Tries to insert the entry into the current cell.
Definition: htmlpars.cxx:2320
void NewActEntry(const ScEEParseEntry *)
Definition: eeimpars.cxx:644
ScHTMLTableStdMap::const_iterator const_iterator
Definition: htmlpars.cxx:1704
const sal_uInt32 SC_HTML_FONTSIZES
Definition: htmlpars.hxx:34
ScHTMLEntry(const SfxItemSet &rItemSet, ScHTMLTableId nTableId=SC_HTML_NO_TABLE)
Definition: htmlpars.cxx:1637
ESelection aSel
Definition: eeparser.hxx:55
void Colonize(ScEEParseEntry *)
Definition: htmlpars.cxx:791
::std::map< SCROW, SCROW > InnerMap
Definition: htmlpars.hxx:145
bool Intersects(const ScRange &rRange) const
Definition: address.hxx:734
bool IsEmptyCell() const
Returns true, if the current cell does not contain an entry yet.
Definition: htmlpars.cxx:2290
void InsertNewCell(const ScHTMLSize &rSpanSize)
Inserts a new cell in an unused position, starting from current cell position.
Definition: htmlpars.cxx:2399
::std::map< sal_uInt16, InnerMap * > OuterMap
Definition: htmlpars.hxx:149
OUString maTableName
Table of nested HTML tables.
Definition: htmlpars.hxx:527
ScRangeList maHMergedCells
Items for the current cell.
Definition: htmlpars.hxx:534
sal_uIntPtr sal_uLong
std::map< ScHTMLTableId, ScHTMLTablePtr > ScHTMLTableStdMap
Definition: htmlpars.cxx:1700
long Long
void CloseEntry(const HtmlImportInfo *)
Definition: htmlpars.cxx:819
FAMILY_DONTKNOW
void AdjustEnd(const HtmlImportInfo &rInfo)
Sets end point of the entry selection to the end of the import info object.
Definition: htmlpars.cxx:1663
bool IsAtBeginningOfText(const HtmlImportInfo *)
Definition: htmlpars.cxx:1403
ScHTMLTable * TableOn(const HtmlImportInfo &rInfo)
Starts a new table nested in this table (.
Definition: htmlpars.cxx:1966
OUString GetImportFormatName(sal_uInt16 nFormat)
ScHTMLTable * TableOff(const HtmlImportInfo &rInfo)
Closes this table (tag).
Definition: htmlpars.cxx:1972
const SCCOL SCCOL_MAX
Definition: address.hxx:56
NamePropsType m_GlobalProps
Definition: htmlpars.hxx:54
bool mbPreFormText
true = Table borders on.
Definition: htmlpars.hxx:549
sal_Int64 n
css::uno::Reference< css::frame::XModel3 > GetModel() const
const OUString & GetString() const
SCROW nRowMax
Definition: eeparser.hxx:112
aBuf
virtual ~ScHTMLTable()
Definition: htmlpars.cxx:1883
ScSizeVec maCumSizes[2]
Working entry, not yet inserted in a list.
Definition: htmlpars.hxx:542
void InsertText(const HtmlImportInfo &rInfo)
Inserts a text portion into current entry.
Definition: htmlpars.cxx:2915
const_iterator find(const Value &x) const
std::basic_string_view< charT, traits > getToken(std::basic_string_view< charT, traits > sv, charT delimiter, std::size_t &position)
void CloseTable(const HtmlImportInfo &rInfo)
Closes the current table, regardless on opening tag.
Definition: htmlpars.cxx:3021
ScAddress aEnd
Definition: address.hxx:498
SCCOLROW GetDocSize(ScHTMLOrient eOrient, SCCOLROW nCellPos) const
Returns the resulting document row/column count of the specified HTML row/column. ...
Definition: htmlpars.cxx:2187
void ProcToken(HtmlImportInfo *)
Definition: htmlpars.cxx:1466
constexpr TypedWhichId< SvxFontItem > ATTR_FONT(100)
SCROW mnRow
Definition: htmlpars.hxx:237
::std::map< OUString, PropsType > NamePropsType
Definition: htmlpars.hxx:51
virtual ~ScHTMLGlobalTable() override
Definition: htmlpars.cxx:2759
ScHTMLEntryPtr mxCurrEntry
Current entry vector from map for faster access.
Definition: htmlpars.hxx:541
ScHTMLTableId & mrnUnusedId
The created unique table identifier.
Definition: htmlpars.hxx:310
SotClipboardFormatId & operator++(SotClipboardFormatId &eFormat)
SC_DLLPUBLIC void ApplyAttr(SCCOL nCol, SCROW nRow, SCTAB nTab, const SfxPoolItem &rAttr)
Definition: document.cxx:4821
ScDocument & GetDoc()
Definition: htmlpars.hxx:91
const SCROW SCROW_MAX
Definition: address.hxx:55
ScHTMLParser * mpParser
Resulting base address in a Calc document.
Definition: htmlpars.hxx:546
void ParseStyle(std::u16string_view rStrm)
Definition: htmlpars.cxx:3122
void NextRow(const HtmlImportInfo *)
Definition: htmlpars.cxx:333
void BodyOff(const HtmlImportInfo &rInfo)
Closes the body of the HTML document ( tag).
Definition: htmlpars.cxx:2160
ScHTMLEntryVector * mpCurrEntryVector
List of entries for each cell.
Definition: htmlpars.hxx:540
const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL
Definition: htmlpars.hxx:37
virtual ~ScHTMLQueryParser() override
Definition: htmlpars.cxx:2783
bool mbDataOn
true = Inside of .
Definition: htmlpars.hxx:551
ScHTMLQueryParser(EditEngine *pEditEngine, ScDocument *pDoc)
Definition: htmlpars.cxx:2773
void FillEmptyCells()
Fills all empty cells in this and nested tables with dummy parse entries.
Definition: htmlpars.cxx:2584
const Value & front() const
void TableRowOn(const HtmlImportInfo *)
Definition: htmlpars.cxx:994
static OutputDevice * GetDefaultDevice()
WEIGHT_BOLD
constexpr tools::Long Width() const
const ScHTMLTableId SC_HTML_NO_TABLE
Used as table index for normal (non-table) entries in ScHTMLEntry structs.
Definition: htmlpars.hxx:231
SvParser< HtmlTokenId > * pParser
sal_uInt16 nTableWidth
Definition: htmlpars.hxx:169
static void MakeColNoRef(ScHTMLColOffset *, sal_uInt16 nOffset, sal_uInt16 nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol)
Definition: htmlpars.cxx:387
void MetaOn(const HtmlImportInfo &rInfo)
Processes the tag.
Definition: htmlpars.cxx:2964
std::map< ScHTMLPos, ScHTMLEntryVector > maEntryMap
List that owns the parse entries (from ScEEParser).
Definition: htmlpars.hxx:539
virtual const ScHTMLTable * GetGlobalTable() const override
Returns the "global table" which contains the entire HTML document.
Definition: htmlpars.cxx:297
sal_uInt16 nTable
Definition: htmlpars.hxx:165
sal_uInt16 sal_Unicode
sal_uInt16 ScHTMLTableId
Type for a unique identifier for each table.
Definition: htmlpars.hxx:227
ScHTMLTable & mrParentTable
Definition: htmlpars.cxx:1707
void TableRowOff(const HtmlImportInfo *)
Definition: htmlpars.cxx:1001
SC_DLLPUBLIC bool Move(SCCOL nDeltaX, SCROW nDeltaY, SCTAB nDeltaZ, ScAddress &rErrorPos, const ScDocument &rDoc)
Definition: address.cxx:2282
ScRangeList maUsedCells
List of all vertically merged cells.
Definition: htmlpars.hxx:536
SvxCellVerJustify
void RecalcDocSize()
Recalculates the size of all columns/rows in the table, regarding nested tables.
Definition: htmlpars.cxx:2620
sal_uInt16 nTab
Definition: eeparser.hxx:66
void CaptionOff()
Processes the caption of the table ( tag).
Definition: htmlpars.cxx:1983
ScHTMLLayoutParser(EditEngine *, const OUString &rBaseURL, const Size &aPageSize, ScDocument *)
Definition: htmlpars.cxx:205
bool PutEntry(OUString &rString, sal_Int32 &nCheckPos, SvNumFormatType &nType, sal_uInt32 &nKey, LanguageType eLnge=LANGUAGE_DONTKNOW, bool bReplaceBooleanEquivalent=true)
sal_Int32 SCCOLROW
a type capable of holding either SCCOL or SCROW
Definition: types.hxx:23
ScHTMLGlobalTablePtr mxGlobTable
The title of the document.
Definition: htmlpars.hxx:624
SCROW nRowCnt
Definition: eeparser.hxx:110
static const sal_Int16 Medium
constexpr sal_uInt32 NUMBERFORMAT_ENTRY_NOT_FOUND
o3tl::sorted_vector< sal_uLong > ScHTMLColOffset
Definition: htmlpars.hxx:97
bool bEntirePara
Definition: eeparser.hxx:73
int nCount
void Recalc()
Recalculates sizes and resulting positions of all document entries.
Definition: htmlpars.cxx:2763
const OUString & getPropertyValue(const OUString &rElem, const OUString &rClass, const OUString &rPropName) const
Find best-matching property value for given element and class names.
Definition: htmlpars.cxx:120
ScHTMLTable * CloseTable(const HtmlImportInfo &rInfo)
Closes this table (tag) or preformatted text ( tag).
Definition: htmlpars.cxx:2171
void push_back(const ScRange &rRange)
Definition: rangelst.cxx:1137
::std::vector< ScHTMLEntry * > ScHTMLEntryVector
Definition: htmlpars.hxx:453
SCTAB Tab() const
Definition: address.hxx:283
#define OOO_STRING_SVTOOLS_HTML_VA_top
sal_uInt16 nWidth
Definition: eeparser.hxx:71
void SetRow(SCROW nRowP)
Definition: address.hxx:287
#define OOO_STRING_SVTOOLS_HTML_META_content_type
void RecalcDocPos(const ScHTMLPos &rBasePos)
Recalculates the position of all cell entries and nested tables.
Definition: htmlpars.cxx:2669
bool mbImportAlways
Definition: htmlpars.hxx:303
bool mbPushEmptyLine
true = Inside of or .
Definition: htmlpars.hxx:552
::std::stack< std::unique_ptr< ScHTMLTableStackEntry > > aTableStack
Definition: htmlpars.hxx:157
SCROW nRowOverlap
Definition: eeparser.hxx:69
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
sal_Int32 nEndPos
ScHTMLTable(ScHTMLTable &rParentTable, const HtmlImportInfo &rInfo, bool bPreFormText, const ScDocument &rDoc)
Creates a new HTML table without content.
Definition: htmlpars.cxx:1811
const ScRange * Find(const ScAddress &) const
Definition: rangelst.cxx:1027
size_type size() const
std::unique_ptr< Graphic > pGraphic
Definition: eeparser.hxx:44
sal_Int32 GetTextLen() const
void SetCol(SCCOL nColP)
Definition: address.hxx:291
ErrCode Read(SvStream &rInput, const OUString &rBaseURL, EETextFormat, SvKeyValueIterator *pHTTPHeaderAttrs=nullptr)
ScHTMLTable * CreateTable(const HtmlImportInfo &rInfo, bool bPreFormText, const ScDocument &rDoc)
Inserts a new table into the container.
Definition: htmlpars.cxx:1759
void HeadingOn()
Inserts a heading line (.
Definition: htmlpars.cxx:1946
const Link< HtmlImportInfo &, void > & GetHtmlImportHdl() const
constexpr TypedWhichId< SvxUnderlineItem > ATTR_FONT_UNDERLINE(104)
sal_uInt16 nOffset
Definition: eeparser.hxx:70
void Image(HtmlImportInfo *)
Definition: htmlpars.cxx:1267
ScHTMLTableId GetTableId() const
Returns the unique identifier of the table.
Definition: htmlpars.hxx:346
sal_uInt16 nColOffset
Definition: htmlpars.hxx:170
void ImplRowOn()
Set internal states for a new table row.
Definition: htmlpars.cxx:2452
void ImplDataOff()
Set internal states for leaving a table cell.
Definition: htmlpars.cxx:2486
static ErrCode LoadGraphic(const OUString &rPath, const OUString &rFilter, Graphic &rGraphic, GraphicFilter *pFilter=nullptr, sal_uInt16 *pDeterminedFormat=nullptr)
ScHTMLTableMap(ScHTMLTable &rParentTable)
Current table, used for fast search.
Definition: htmlpars.cxx:1732
ScHTMLStyles & GetStyles()
Definition: htmlpars.hxx:90
SC_DLLPUBLIC SCCOL MaxCol() const
Definition: document.hxx:890
constexpr TypedWhichId< SvxPostureItem > ATTR_FONT_POSTURE(103)
T * get() const
SC_DLLPUBLIC SvNumberFormatter * GetFormatTable() const
Definition: documen2.cxx:459
sal_Int32 nEndPara
void ImplPushEntryToVector(ScHTMLEntryVector &rEntryVector, ScHTMLEntryPtr &rxEntry)
Pushes the passed entry into the list of the current cell.
Definition: htmlpars.cxx:2312
ScHTMLEntryPtr CreateEntry() const
Creates and returns a new empty flying entry at position (0,0).
Definition: htmlpars.cxx:2300
void add(const char *pElemName, size_t nElemName, const char *pClassName, size_t nClassName, const OUString &aProp, const OUString &aValue)
Definition: htmlpars.cxx:75
const char nVertical
Definition: eeparser.hxx:35
int i
void RowOff(const HtmlImportInfo &rInfo)
Closes the current row (tag).
Definition: htmlpars.cxx:2013
virtual ~ScHTMLLayoutParser() override
Definition: htmlpars.cxx:232
void PushTableEntry(ScHTMLTableId nTableId)
Pushes a new entry into current cell which references a nested table.
Definition: htmlpars.cxx:2372
void IncCol(SCCOL nDelta=1)
Definition: address.hxx:316
ScHTMLTable * GetExistingTable(ScHTMLTableId nTableId) const
Tries to find a table from the table container.
Definition: htmlpars.cxx:2382
std::vector< std::shared_ptr< ScEEParseEntry > > maList
Definition: eeparser.hxx:105
sal_Int16 SCCOL
Definition: types.hxx:21
bool ValidCol(SCCOL nCol) const
Definition: document.hxx:897
OUStringBuffer maTitle
Definition: htmlpars.hxx:623
constexpr TypedWhichId< SvxVerJustifyItem > ATTR_VER_JUSTIFY(132)
ScHTMLPos maCurrCell
Size of the table.
Definition: htmlpars.hxx:544
bool mbRowOn
true = Table from preformatted text (
Definition: htmlpars.hxx:550
void DataOff(const HtmlImportInfo &rInfo)
Closes the current cell ( or tag).
Definition: htmlpars.cxx:2137
Point aSpace
Definition: eeparser.hxx:41
void PutItem(const SfxPoolItem &rItem)
Puts the item into the item set of the current entry.
Definition: htmlpars.cxx:1909
void SetCurrTable(ScHTMLTable *pTable) const
Sets a working table with its index for search optimization.
Definition: htmlpars.cxx:1728
ScHTMLTableId mnUnusedId
Pointer to current table (performance).
Definition: htmlpars.hxx:626
sal_uInt16 GetWidthPixel(const HTMLOption &)
Definition: htmlpars.cxx:1373
ColWidthsMap maColWidths
Definition: eeparser.hxx:107
void ProcessToken(const HtmlImportInfo &rInfo)
Handles all possible tags in the HTML document.
Definition: htmlpars.cxx:2830
sal_uInt16 nOffsetTolerance
Definition: htmlpars.hxx:172
ScHTMLColOffset * pLocalColOffset
Definition: htmlpars.hxx:162
ScHTMLTable * mpParentTable
Definition: htmlpars.hxx:525
size_t size() const
Definition: rangelst.hxx:89
#define OOO_STRING_SVTOOLS_HTML_AL_left
static bool IsSpaceCharInfo(const HtmlImportInfo &rInfo)
Returns true, if import info represents a space character.
Definition: htmlpars.cxx:2295
LINESTYLE_SINGLE
The HTML parser for data queries.
Definition: htmlpars.hxx:578
virtual bool ParseMetaOptions(const css::uno::Reference< css::document::XDocumentProperties > &, SvKeyValueIterator *)
Collection of HTML style data parsed from the content of