LibreOffice Module sw (master)  1
swhtml.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
25 
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
38 
39 #include <vcl/errinf.hxx>
40 #include <svl/stritem.hxx>
41 #include <vcl/imap.hxx>
42 #include <svtools/htmltokn.h>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/ctrltool.hxx>
45 #include <unotools/configmgr.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <sfx2/event.hxx>
49 #include <sfx2/docfile.hxx>
50 
51 #include <sfx2/linkmgr.hxx>
52 #include <editeng/kernitem.hxx>
53 #include <editeng/boxitem.hxx>
54 #include <editeng/fhgtitem.hxx>
56 #include <editeng/postitem.hxx>
57 #include <editeng/wghtitem.hxx>
59 #include <editeng/udlnitem.hxx>
61 #include <editeng/blinkitem.hxx>
62 #include <editeng/ulspitem.hxx>
63 #include <editeng/colritem.hxx>
64 #include <editeng/fontitem.hxx>
65 #include <editeng/adjustitem.hxx>
66 #include <editeng/lrspitem.hxx>
67 #include <editeng/protitem.hxx>
68 #include <editeng/flstitem.hxx>
70 
71 #include <frmatr.hxx>
72 #include <charatr.hxx>
73 #include <fmtfld.hxx>
74 #include <fmtpdsc.hxx>
75 #include <fmtanchr.hxx>
76 #include <fmtsrnd.hxx>
77 #include <fmtfsize.hxx>
78 #include <fmtclds.hxx>
79 #include <fchrfmt.hxx>
80 #include <fmtinfmt.hxx>
81 #include <fmtfollowtextflow.hxx>
82 #include <fmtornt.hxx>
83 #include <doc.hxx>
84 #include <IDocumentUndoRedo.hxx>
91 #include <IDocumentStatistics.hxx>
92 #include <IDocumentState.hxx>
93 #include <pam.hxx>
94 #include <ndtxt.hxx>
95 #include <mdiexp.hxx>
96 #include <poolfmt.hxx>
97 #include <pagedesc.hxx>
98 #include <IMark.hxx>
99 #include <docsh.hxx>
100 #include <editsh.hxx>
101 #include <docufld.hxx>
102 #include "swcss1.hxx"
103 #include <fltini.hxx>
104 #include <htmltbl.hxx>
105 #include "htmlnum.hxx"
106 #include "swhtml.hxx"
107 #include "wrthtml.hxx"
108 #include <linkenum.hxx>
109 #include <breakit.hxx>
110 #include <SwAppletImpl.hxx>
111 #include <swdll.hxx>
112 #include <txatbase.hxx>
113 
114 #include <sfx2/viewfrm.hxx>
115 #include <svx/svdobj.hxx>
116 #include <officecfg/Office/Writer.hxx>
118 #include <comphelper/sequence.hxx>
119 #include <officecfg/Office/Common.hxx>
120 
121 #include <swerror.h>
122 #include <ndole.hxx>
123 #include <unoframe.hxx>
124 #include "css1atr.hxx"
125 #include <frameformats.hxx>
126 
127 #define FONTSIZE_MASK 7
128 
129 #define HTML_ESC_PROP 80
130 #define HTML_ESC_SUPER DFLT_ESC_SUPER
131 #define HTML_ESC_SUB DFLT_ESC_SUB
132 
133 #define HTML_SPTYPE_BLOCK 1
134 #define HTML_SPTYPE_HORI 2
135 #define HTML_SPTYPE_VERT 3
136 
138 using namespace ::com::sun::star;
139 
140 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
142 {
143  { OOO_STRING_SVTOOLS_HTML_AL_left, SvxAdjust::Left },
144  { OOO_STRING_SVTOOLS_HTML_AL_center, SvxAdjust::Center },
145  { OOO_STRING_SVTOOLS_HTML_AL_middle, SvxAdjust::Center }, // Netscape
146  { OOO_STRING_SVTOOLS_HTML_AL_right, SvxAdjust::Right },
147  { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block },
148  { OOO_STRING_SVTOOLS_HTML_AL_char, SvxAdjust::Left },
149  { nullptr, SvxAdjust(0) }
150 };
151 
152 // <SPACER TYPE=...>
154 {
158  { nullptr, 0 }
159 };
160 
162 {
163  m_bTemplateBrowseMode = true;
164 }
165 
166 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
167 {
169  // HTML import into Writer, avoid loading the Writer/Web template.
170  return OUString();
171 
172  static const OUStringLiteral sTemplateWithoutExt(u"internal/html");
173  SvtPathOptions aPathOpt;
174 
175  // first search for OpenDocument Writer/Web template
176  // OpenDocument Writer/Web template (extension .oth)
177  OUString sTemplate( sTemplateWithoutExt + ".oth" );
178  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
179  return sTemplate;
180 
181  // no OpenDocument Writer/Web template found.
182  // search for OpenOffice.org Writer/Web template
183  sTemplate = sTemplateWithoutExt + ".stw";
184  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
185  return sTemplate;
186 
187  OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
188 
189  return OUString();
190 }
191 
193 {
194  OSL_ENSURE( m_pMedium, "Where is the medium??" );
195 
196  if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
197  {
199  return true;
200  }
201  return false;
202 
203 }
204 
205 // Call for the general Reader-Interface
206 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
207 {
209 
210  if( !m_pStream )
211  {
212  OSL_ENSURE( m_pStream, "HTML-Read without stream" );
213  return ERR_SWG_READ_ERROR;
214  }
215 
216  if( !m_bInsertMode )
217  {
219 
220  // Set the HTML page style, when it isn't a HTML document,
221  // otherwise it's already set.
223  {
226  }
227  }
228 
229  // so nobody steals the document!
230  rtl::Reference<SwDoc> xHoldAlive(&rDoc);
231  ErrCode nRet = ERRCODE_NONE;
232  tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
233  rName, rBaseURL, !m_bInsertMode, m_pMedium,
234  IsReadUTF8(),
236 
237  SvParserState eState = xParser->CallParser();
238 
239  if( SvParserState::Pending == eState )
241  else if( SvParserState::Accepted != eState )
242  {
243  const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
244  + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
245 
246  // use the stream as transport for error number
247  nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
248  DialogMask::ButtonsOk | DialogMask::MessageError );
249  }
250 
251  return nRet;
252 }
253 
255  const OUString& rPath,
256  const OUString& rBaseURL,
257  bool bReadNewDoc,
258  SfxMedium* pMed, bool bReadUTF8,
259  bool bNoHTMLComments,
260  const OUString& rNamespace )
261  : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
262  m_aPathToFile( rPath ),
263  m_sBaseURL( rBaseURL ),
264  m_xAttrTab(std::make_shared<HTMLAttrTable>()),
265  m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
266  m_xDoc( pD ),
267  m_pActionViewShell( nullptr ),
268  m_pSttNdIdx( nullptr ),
269  m_pFormImpl( nullptr ),
270  m_pMarquee( nullptr ),
271  m_pImageMap( nullptr ),
272  m_nBaseFontStMin( 0 ),
273  m_nFontStMin( 0 ),
274  m_nDefListDeep( 0 ),
275  m_nFontStHeadStart( 0 ),
276  m_nSBModuleCnt( 0 ),
277  m_nMissingImgMaps( 0 ),
278  m_nParaCnt( 5 ),
279  // #i83625#
280  m_nContextStMin( 0 ),
281  m_nContextStAttrMin( 0 ),
282  m_nSelectEntryCnt( 0 ),
283  m_nOpenParaToken( HtmlTokenId::NONE ),
284  m_eJumpTo( JumpToMarks::NONE ),
285 #ifdef DBG_UTIL
286  m_nContinue( 0 ),
287 #endif
288  m_eParaAdjust( SvxAdjust::End ),
289  m_bDocInitialized( false ),
290  m_bSetModEnabled( false ),
291  m_bInFloatingFrame( false ),
292  m_bInField( false ),
293  m_bKeepUnknown( false ),
294  m_bCallNextToken( false ),
295  m_bIgnoreRawData( false ),
296  m_bLBEntrySelected ( false ),
297  m_bTAIgnoreNewPara ( false ),
298  m_bFixMarqueeWidth ( false ),
299  m_bNoParSpace( false ),
300  m_bInNoEmbed( false ),
301  m_bInTitle( false ),
302  m_bUpdateDocStat( false ),
303  m_bFixSelectWidth( false ),
304  m_bTextArea( false ),
305  m_bSelect( false ),
306  m_bInFootEndNoteAnchor( false ),
307  m_bInFootEndNoteSymbol( false ),
308  m_bIgnoreHTMLComments( bNoHTMLComments ),
309  m_bRemoveHidden( false ),
310  m_bBodySeen( false ),
311  m_bReadingHeaderOrFooter( false ),
312  m_bNotifyMacroEventRead( false ),
313  m_bFuzzing(utl::ConfigManager::IsFuzzing()),
314  m_isInTableStructure(false),
315  m_nTableDepth( 0 ),
316  m_nFloatingFrames( 0 ),
317  m_nListItems( 0 ),
318  m_pTempViewFrame(nullptr)
319 {
320  // If requested explicitly, then force ignoring of comments (don't create postits for them).
321  if (!m_bFuzzing)
322  {
324  m_bIgnoreHTMLComments = true;
326  }
327 
328  m_nEventId = nullptr;
330 
331  m_eScriptLang = HTMLScriptLanguage::Unknown;
332 
333  rCursor.DeleteMark();
334  m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert
335  memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
336 
337  // Read the font sizes 1-7 from the INI file
338  if (!m_bFuzzing)
339  {
347  }
348  else
349  {
351  m_aFontHeights[4] = m_aFontHeights[5] = m_aFontHeights[6] = 12 * 20;
352  }
353 
354  if(bReadNewDoc)
355  {
356  //CJK has different defaults, so a different object should be used for this
357  //RES_CHARTR_CJK_FONTSIZE is a valid value
359  m_xDoc->SetDefault( aFontHeight );
361  m_xDoc->SetDefault( aFontHeightCJK );
363  m_xDoc->SetDefault( aFontHeightCTL );
364 
365  // #i18732# - adjust default of option 'FollowTextFlow'
366  // TODO: not sure what the appropriate default for HTML should be?
367  m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
368  }
369 
370  // Change to HTML mode during the import, so that the right styles are created
371  m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
372  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
373 
374  m_pCSS1Parser.reset(new SwCSS1Parser(m_xDoc.get(), *this, m_aFontHeights, m_sBaseURL, IsNewDoc()));
375  if (!m_bFuzzing)
377 
378  if( bReadUTF8 )
379  {
380  SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
381  }
382  else
383  {
384  SwDocShell *pDocSh = m_xDoc->GetDocShell();
385  SvKeyValueIterator *pHeaderAttrs =
386  pDocSh->GetHeaderAttributes();
387  if( pHeaderAttrs )
388  SetEncodingByHTTPHeader( pHeaderAttrs );
389  }
390  m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
391 
392  SwDocShell* pDocSh = m_xDoc->GetDocShell();
393  if( pDocSh )
394  {
395  m_bViewCreated = true; // not, load synchronous
396 
397  // a jump mark is present
398 
399  if( pMed )
400  {
401  m_sJmpMark = pMed->GetURLObject().GetMark();
402  if( !m_sJmpMark.isEmpty() )
403  {
405  sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
406  sal_Int32 nPos = nLastPos != -1 ? nLastPos : 0;
407 
408  OUString sCmp;
409  if (nPos)
410  {
411  sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
412  }
413 
414  if( !sCmp.isEmpty() )
415  {
416  sCmp = sCmp.toAsciiLowerCase();
417  if( sCmp == "region" )
419  else if( sCmp == "table" )
421  else if( sCmp == "graphic" )
423  else if( sCmp == "outline" ||
424  sCmp == "text" ||
425  sCmp == "frame" )
426  m_eJumpTo = JumpToMarks::NONE; // this is nothing valid!
427  else
428  // otherwise this is a normal (book)mark
429  nPos = -1;
430  }
431  else
432  nPos = -1;
433 
434  if( nPos != -1 )
435  m_sJmpMark = m_sJmpMark.copy( 0, nPos );
436  if( m_sJmpMark.isEmpty() )
438  }
439  }
440  }
441 
442  if (!rNamespace.isEmpty())
443  {
444  SetNamespace(rNamespace);
445  m_bXHTML = true;
446  if (rNamespace == "reqif-xhtml")
447  m_bReqIF = true;
448  }
449 
450  // Extract load parameters which are specific to this filter.
451  if (!pMed)
452  {
453  return;
454  }
455 
456  comphelper::SequenceAsHashMap aLoadMap(pMed->GetArgs());
457  auto it = aLoadMap.find("AllowedRTFOLEMimeTypes");
458  if (it == aLoadMap.end())
459  {
460  return;
461  }
462 
463  uno::Sequence<OUString> aTypes;
464  it->second >>= aTypes;
465  m_aAllowedRTFOLEMimeTypes = comphelper::sequenceToContainer<std::set<OUString>>(aTypes);
466 }
467 
469 {
470 #ifdef DBG_UTIL
471  OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
472 #endif
473 
474  OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
475  OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
476  m_nContextStMin = 0;
477  while (!m_aContexts.empty())
478  {
479  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
480  ClearContext(xCntxt.get());
481  }
482 
483  bool bAsync = m_xDoc->IsInLoadAsynchron();
484  m_xDoc->SetInLoadAsynchron( false );
485  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
486 
487  if( m_xDoc->GetDocShell() && m_nEventId )
489 
490  // the DocumentDetected maybe can delete the DocShells, therefore fetch again
491  if( m_xDoc->GetDocShell() )
492  {
493  // update linked sections
494  sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
495  if( nLinkMode != NEVER && bAsync &&
496  SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
497  m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
498 
499  if ( m_xDoc->GetDocShell()->IsLoading() )
500  {
501  // #i59688#
502  m_xDoc->GetDocShell()->LoadingFinished();
503  }
504  }
505 
506  delete m_pSttNdIdx;
507 
508  if( !m_aSetAttrTab.empty() )
509  {
510  OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
511  for ( const auto& rpAttr : m_aSetAttrTab )
512  delete rpAttr;
513  m_aSetAttrTab.clear();
514  }
515 
516  m_pCSS1Parser.reset();
517  m_pNumRuleInfo.reset();
518  DeleteFormImpl();
519  m_pFootEndNoteImpl.reset();
520 
521  OSL_ENSURE(!m_xTable, "It exists still an open table");
522  m_pImageMaps.reset();
523 
524  OSL_ENSURE( m_vPendingStack.empty(),
525  "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
526  m_vPendingStack.clear();
527 
528  m_xDoc.clear();
529 
530  if ( m_pTempViewFrame )
531  {
533 
534  // the temporary view frame is hidden, so the hidden flag might need to be removed
535  if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
536  m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
537  }
538 }
539 
540 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
541 {
542  m_nEventId=nullptr;
543 
544  // #i47907# - If the document has already been destructed,
545  // the parser should be aware of this:
546  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
547  || 1 == m_xDoc->getReferenceCount() )
548  {
549  // was the import aborted by SFX?
550  eState = SvParserState::Error;
551  }
552 
553  GetAsynchCallLink().Call(nullptr);
554 }
555 
557 {
558  // create temporary index on position 0, so it won't be moved!
559  m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
560  if( !IsNewDoc() ) // insert into existing document ?
561  {
562  const SwPosition* pPos = m_pPam->GetPoint();
563 
564  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
565 
566  *m_pSttNdIdx = pPos->nNode.GetIndex()-1;
567  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
568 
569  SwPaM aInsertionRangePam( *pPos );
570 
572 
573  // split any redline over the insertion point
574  aInsertionRangePam.SetMark();
575  *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
576  aInsertionRangePam.Move( fnMoveBackward );
577  m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
578 
579  m_xDoc->SetTextFormatColl( *m_pPam,
580  m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
581  }
582 
583  if( GetMedium() )
584  {
585  if( !m_bViewCreated )
586  {
587  m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
588  }
589  else
590  {
591  m_bViewCreated = true;
592  m_nEventId = nullptr;
593  }
594  }
595  else // show progress bar
596  {
597  rInput.Seek(STREAM_SEEK_TO_END);
598  rInput.ResetError();
599 
600  m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
601 
602  rInput.Seek(STREAM_SEEK_TO_BEGIN);
603  rInput.ResetError();
604  }
605 
606  StartListening(m_xDoc->GetPageDesc( 0 ).GetNotifier());
607 
609  return eRet;
610 }
611 
613 {
614  const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
615  return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
616 }
617 
619 {
620 #ifdef DBG_UTIL
621  OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
622  m_nContinue++;
623 #endif
624 
625  // When the import (of SFX) is aborted, an error will be set but
626  // we still continue, so that we clean up properly.
627  OSL_ENSURE( SvParserState::Error!=eState,
628  "SwHTMLParser::Continue: already set an error" );
629  if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
630  eState = SvParserState::Error;
631 
632  // Fetch SwViewShell from document, save it and set as current.
633  SwViewShell *pInitVSh = CallStartAction();
634 
635  if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
636  {
637  // At first call first return, show document and wait for callback
638  // time.
639  // At this point in CallParser only one digit was read and
640  // a SaveState(0) was called.
641  eState = SvParserState::Pending;
642  m_bViewCreated = true;
643  m_xDoc->SetInLoadAsynchron( true );
644 
645 #ifdef DBG_UTIL
646  m_nContinue--;
647 #endif
648 
649  return;
650  }
651 
652  m_bSetModEnabled = false;
653  if( m_xDoc->GetDocShell() )
654  {
655  m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
656  if( m_bSetModEnabled )
657  {
658  m_xDoc->GetDocShell()->EnableSetModified( false );
659  }
660  }
661 
662  // during import don't call OLE-Modified
663  Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
664  m_xDoc->SetOle2Link( Link<bool,void>() );
665 
666  bool bModified = m_xDoc->getIDocumentState().IsModified();
667  bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
668  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
669 
670  // When the import will be aborted, don't call Continue anymore.
671  // If a Pending-Stack exists make sure the stack is ended with a call
672  // of NextToken.
673  if( SvParserState::Error == eState )
674  {
675  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
676  "SwHTMLParser::Continue: Pending-Stack without Token" );
677  if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
678  NextToken( m_vPendingStack.back().nToken );
679  OSL_ENSURE( m_vPendingStack.empty(),
680  "SwHTMLParser::Continue: There is again a Pending-Stack" );
681  }
682  else
683  {
684  HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
685  }
686 
687  // disable progress bar again
688  m_xProgress.reset();
689 
690  bool bLFStripped = false;
691  if( SvParserState::Pending != GetStatus() )
692  {
693  // set the last attributes yet
694  {
695  if( !m_aScriptSource.isEmpty() )
696  {
697  SwScriptFieldType *pType =
698  static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
699 
701  false );
702  InsertAttr( SwFormatField( aField ), false );
703  }
704 
705  if( m_pAppletImpl )
706  {
707  if( m_pAppletImpl->GetApplet().is() )
708  EndApplet();
709  else
710  EndObject();
711  }
712 
713  // maybe remove an existing LF after the last paragraph
714  if( IsNewDoc() )
715  bLFStripped = StripTrailingLF() > 0;
716 
717  // close still open numbering
718  while( GetNumInfo().GetNumRule() )
720 
721  OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
722  // try this twice, first normally to let m_nContextStMin decrease
723  // naturally and get contexts popped in desired order, and if that
724  // fails force it
725  for (int i = 0; i < 2; ++i)
726  {
727  while (m_aContexts.size() > m_nContextStMin)
728  {
729  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
730  if (xCntxt)
731  EndContext(xCntxt.get());
732  }
733  if (!m_nContextStMin)
734  break;
735  OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
736  m_nContextStMin = 0;
737  }
738 
739  m_aParaAttrs.clear();
740 
741  SetAttr( false );
742 
743  // set the first delayed styles
744  m_pCSS1Parser->SetDelayedStyles();
745  }
746 
747  // again correct the start
748  if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
749  {
750  SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
751  SwNodeIndex aNxtIdx( *m_pSttNdIdx );
752  if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
753  {
754  const sal_Int32 nStt = pTextNode->GetText().getLength();
755  // when the cursor is still in the node, then set him at the end
756  if( m_pPam->GetPoint()->nNode == aNxtIdx )
757  {
759  m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt );
760  }
761 
762 #if OSL_DEBUG_LEVEL > 0
763 // !!! shouldn't be possible, or ??
764  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(),
765  "Pam.Bound1 is still in the node" );
766  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(),
767  "Pam.Bound2 is still in the node" );
768 
769  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() )
770  {
771  const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex();
772  m_pPam->GetBound().nContent.Assign( pTextNode,
773  pTextNode->GetText().getLength() + nCntPos );
774  }
775  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() )
776  {
777  const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex();
778  m_pPam->GetBound( false ).nContent.Assign( pTextNode,
779  pTextNode->GetText().getLength() + nCntPos );
780  }
781 #endif
782  // Keep character attribute!
783  SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
784  if (pTextNode->GetText().getLength())
785  pDelNd->FormatToTextAttr( pTextNode );
786  else
787  pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
788  pTextNode->JoinNext();
789  }
790  }
791  }
792 
793  if( SvParserState::Accepted == eState )
794  {
795  if( m_nMissingImgMaps )
796  {
797  // Some Image-Map relations are still missing.
798  // Maybe now the Image-Maps are there?
800  }
801 
802  // now remove the last useless paragraph
803  SwPosition* pPos = m_pPam->GetPoint();
804  if( !pPos->nContent.GetIndex() && !bLFStripped )
805  {
806  SwTextNode* pCurrentNd;
807  SwNodeOffset nNodeIdx = pPos->nNode.GetIndex();
808 
809  bool bHasFlysOrMarks =
811 
812  if( IsNewDoc() )
813  {
814  if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx))
815  {
817  if( pCNd && pCNd->StartOfSectionIndex()+2 <
818  pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
819  {
821  SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
822  if( pCursorSh &&
823  pCursorSh->GetCursor()->GetPoint()
824  ->nNode.GetIndex() == nNodeIdx )
825  {
826  pCursorSh->MovePara(GoPrevPara, fnParaEnd );
827  pCursorSh->SetMark();
828  pCursorSh->ClearMark();
829  }
830  m_pPam->GetBound().nContent.Assign( nullptr, 0 );
831  m_pPam->GetBound(false).nContent.Assign( nullptr, 0 );
832  m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode );
833  }
834  }
835  }
836  else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
837  {
838  if( pCurrentNd->CanJoinNext( &pPos->nNode ))
839  {
840  SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode();
841  pPos->nContent.Assign( pNextNd, 0 );
843  pNextNd->JoinPrev();
844  }
845  else if (pCurrentNd->GetText().isEmpty())
846  {
847  pPos->nContent.Assign( nullptr, 0 );
849  m_xDoc->GetNodes().Delete( pPos->nNode );
851  }
852  }
853  }
854 
855  // annul the SplitNode from the beginning
856  else if( !IsNewDoc() )
857  {
858  if( pPos->nContent.GetIndex() ) // then there was no <p> at the end
859  m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next
860  SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode();
861  SwNodeIndex aPrvIdx( pPos->nNode );
862  if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
863  *m_pSttNdIdx <= aPrvIdx )
864  {
865  // Normally here should take place a JoinNext, but all cursors and
866  // so are registered in pTextNode, so that it MUST remain.
867 
868  // Convert paragraph to character attribute, from Prev adopt
869  // the paragraph attribute and the template!
870  SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
871  pTextNode->ChgFormatColl( pPrev->GetTextColl() );
872  pTextNode->FormatToTextAttr( pPrev );
873  pTextNode->ResetAllAttr();
874 
875  if( pPrev->HasSwAttrSet() )
876  pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
877 
878  if( &m_pPam->GetBound().nNode.GetNode() == pPrev )
879  m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
880  if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev )
881  m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
882 
883  pTextNode->JoinPrev();
884  }
885  }
886 
887  // adjust AutoLoad in DocumentProperties
888  if (!m_bFuzzing && IsNewDoc())
889  {
890  SwDocShell *pDocShell(m_xDoc->GetDocShell());
891  OSL_ENSURE(pDocShell, "no SwDocShell");
892  if (pDocShell) {
893  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
894  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
895  uno::Reference<document::XDocumentProperties> xDocProps(
896  xDPS->getDocumentProperties());
897  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
898  if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
899  (xDocProps->getAutoloadURL().isEmpty()) )
900  {
901  xDocProps->setAutoloadURL(m_aPathToFile);
902  }
903  }
904  }
905 
906  if( m_bUpdateDocStat )
907  {
908  m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
909  }
910  }
911 
912  if( SvParserState::Pending != GetStatus() )
913  {
914  delete m_pSttNdIdx;
915  m_pSttNdIdx = nullptr;
916  }
917 
918  // should the parser be the last one who hold the document, then nothing
919  // has to be done anymore, document will be destroyed shortly!
920  if( 1 < m_xDoc->getReferenceCount() )
921  {
922  if( bWasUndo )
923  {
924  m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
925  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
926  }
927  else if( !pInitVSh )
928  {
929  // When at the beginning of Continue no Shell was available,
930  // it's possible in the meantime one was created.
931  // In that case the bWasUndo flag is wrong and we must
932  // enable Undo.
933  SwViewShell *pTmpVSh = CheckActionViewShell();
934  if( pTmpVSh )
935  {
936  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
937  }
938  }
939 
940  m_xDoc->SetOle2Link( aOLELink );
941  if( !bModified )
942  m_xDoc->getIDocumentState().ResetModified();
943  if( m_bSetModEnabled && m_xDoc->GetDocShell() )
944  {
945  m_xDoc->GetDocShell()->EnableSetModified();
946  m_bSetModEnabled = false; // this is unnecessary here
947  }
948  }
949 
950  // When the Document-SwVievShell still exists and an Action is open
951  // (doesn't have to be by abort), end the Action, disconnect from Shell
952  // and finally reconstruct the old Shell.
953  CallEndAction( true );
954 
955 #ifdef DBG_UTIL
956  m_nContinue--;
957 #endif
958 }
959 
960 void SwHTMLParser::Notify(const SfxHint& rHint)
961 {
962  if(rHint.GetId() == SfxHintId::Dying)
963  {
964  EndListeningAll();
965  ReleaseRef();
966  }
967 }
968 
970 {
971  OSL_ENSURE( !m_bDocInitialized, "DocumentDetected called multiple times" );
972  m_bDocInitialized = true;
973  if( IsNewDoc() )
974  {
975  if( IsInHeader() )
976  FinishHeader();
977 
978  CallEndAction( true );
979 
980  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
981  // For DocumentDetected in general a SwViewShell is created.
982  // But it also can be created later, in case the UI is captured.
983  CallStartAction();
984  }
985 }
986 
987 // is called for every token that is recognised in CallParser
989 {
990  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
991  || 1 == m_xDoc->getReferenceCount() )
992  {
993  // Was the import cancelled by SFX? If a pending stack
994  // exists, clean it.
995  eState = SvParserState::Error;
996  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
997  "SwHTMLParser::NextToken: Pending-Stack without token" );
998  if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
999  return ;
1000  }
1001 
1002 #if OSL_DEBUG_LEVEL > 0
1003  if( !m_vPendingStack.empty() )
1004  {
1005  switch( nToken )
1006  {
1007  // tables are read by recursive method calls
1008  case HtmlTokenId::TABLE_ON:
1009  // For CSS declarations we might have to wait
1010  // for a file download to finish
1011  case HtmlTokenId::LINK:
1012  // For controls we might have to set the size.
1013  case HtmlTokenId::INPUT:
1014  case HtmlTokenId::TEXTAREA_ON:
1015  case HtmlTokenId::SELECT_ON:
1016  case HtmlTokenId::SELECT_OFF:
1017  break;
1018  default:
1019  OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
1020  break;
1021  }
1022  }
1023 #endif
1024 
1025  // The following special cases have to be treated before the
1026  // filter detection, because Netscape doesn't reference the content
1027  // of the title for filter detection either.
1028  if( m_vPendingStack.empty() )
1029  {
1030  if( m_bInTitle )
1031  {
1032  switch( nToken )
1033  {
1034  case HtmlTokenId::TITLE_OFF:
1035  {
1036  OUString sTitle = m_sTitle.makeStringAndClear();
1037  if( IsNewDoc() && !sTitle.isEmpty() )
1038  {
1039  if( m_xDoc->GetDocShell() ) {
1040  uno::Reference<document::XDocumentPropertiesSupplier>
1041  xDPS(m_xDoc->GetDocShell()->GetModel(),
1042  uno::UNO_QUERY_THROW);
1043  uno::Reference<document::XDocumentProperties> xDocProps(
1044  xDPS->getDocumentProperties());
1045  OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1046  if (xDocProps.is()) {
1047  xDocProps->setTitle(sTitle);
1048  }
1049 
1050  m_xDoc->GetDocShell()->SetTitle(sTitle);
1051  }
1052  }
1053  m_bInTitle = false;
1054  break;
1055  }
1056 
1057  case HtmlTokenId::NONBREAKSPACE:
1058  m_sTitle.append(" ");
1059  break;
1060 
1061  case HtmlTokenId::SOFTHYPH:
1062  m_sTitle.append("-");
1063  break;
1064 
1065  case HtmlTokenId::TEXTTOKEN:
1066  m_sTitle.append(aToken);
1067  break;
1068 
1069  default:
1070  m_sTitle.append("<");
1071  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1072  m_sTitle.append("/");
1073  m_sTitle.append(sSaveToken);
1074  if( !aToken.isEmpty() )
1075  {
1076  m_sTitle.append(" ");
1077  m_sTitle.append(aToken);
1078  }
1079  m_sTitle.append(">");
1080  break;
1081  }
1082 
1083  return;
1084  }
1085  }
1086 
1087  // Find out what type of document it is if we don't know already.
1088  // For Controls this has to be finished before the control is inserted
1089  // because for inserting a View is needed.
1090  if( !m_bDocInitialized )
1091  DocumentDetected();
1092 
1093  bool bGetIDOption = false, bInsertUnknown = false;
1094  bool bUpperSpaceSave = m_bUpperSpace;
1095  m_bUpperSpace = false;
1096 
1097  // The following special cases may or have to be treated after the
1098  // filter detection
1099  if( m_vPendingStack.empty() )
1100  {
1101  if( m_bInFloatingFrame )
1102  {
1103  // <SCRIPT> is ignored here (from us), because it is ignored in
1104  // Applets as well
1105  if( HtmlTokenId::IFRAME_OFF == nToken )
1106  {
1107  m_bCallNextToken = false;
1108  m_bInFloatingFrame = false;
1109  }
1110 
1111  return;
1112  }
1113  else if( m_bInNoEmbed )
1114  {
1115  switch( nToken )
1116  {
1117  case HtmlTokenId::NOEMBED_OFF:
1120  m_aContents.clear();
1121  m_bCallNextToken = false;
1122  m_bInNoEmbed = false;
1123  break;
1124 
1125  case HtmlTokenId::RAWDATA:
1127  break;
1128 
1129  default:
1130  OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1131  break;
1132  }
1133 
1134  return;
1135  }
1136  else if( m_pAppletImpl )
1137  {
1138  // in an applet only <PARAM> tags and the </APPLET> tag
1139  // are of interest for us (for the moment)
1140  // <SCRIPT> is ignored here (from Netscape)!
1141 
1142  switch( nToken )
1143  {
1144  case HtmlTokenId::APPLET_OFF:
1145  m_bCallNextToken = false;
1146  EndApplet();
1147  break;
1148  case HtmlTokenId::OBJECT_OFF:
1149  m_bCallNextToken = false;
1150  EndObject();
1151  break;
1152  case HtmlTokenId::PARAM:
1153  InsertParam();
1154  break;
1155  default: break;
1156  }
1157 
1158  return;
1159  }
1160  else if( m_bTextArea )
1161  {
1162  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1163  // <SCRIPT> is ignored here (from Netscape)!
1164 
1165  switch( nToken )
1166  {
1167  case HtmlTokenId::TEXTAREA_OFF:
1168  m_bCallNextToken = false;
1169  EndTextArea();
1170  break;
1171 
1172  default:
1173  InsertTextAreaText( nToken );
1174  break;
1175  }
1176 
1177  return;
1178  }
1179  else if( m_bSelect )
1180  {
1181  // HAS to be treated after bNoScript!
1182  switch( nToken )
1183  {
1184  case HtmlTokenId::SELECT_OFF:
1185  m_bCallNextToken = false;
1186  EndSelect();
1187  return;
1188 
1189  case HtmlTokenId::OPTION:
1191  return;
1192 
1193  case HtmlTokenId::TEXTTOKEN:
1194  InsertSelectText();
1195  return;
1196 
1197  case HtmlTokenId::INPUT:
1198  case HtmlTokenId::SCRIPT_ON:
1199  case HtmlTokenId::SCRIPT_OFF:
1200  case HtmlTokenId::NOSCRIPT_ON:
1201  case HtmlTokenId::NOSCRIPT_OFF:
1202  case HtmlTokenId::RAWDATA:
1203  // treat in normal switch
1204  break;
1205 
1206  default:
1207  // ignore
1208  return;
1209  }
1210  }
1211  else if( m_pMarquee )
1212  {
1213  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1214  // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1215  // script.
1216  switch( nToken )
1217  {
1218  case HtmlTokenId::MARQUEE_OFF:
1219  m_bCallNextToken = false;
1220  EndMarquee();
1221  break;
1222 
1223  case HtmlTokenId::TEXTTOKEN:
1225  break;
1226  default: break;
1227  }
1228 
1229  return;
1230  }
1231  else if( m_bInField )
1232  {
1233  switch( nToken )
1234  {
1235  case HtmlTokenId::SDFIELD_OFF:
1236  m_bCallNextToken = false;
1237  EndField();
1238  break;
1239 
1240  case HtmlTokenId::TEXTTOKEN:
1241  InsertFieldText();
1242  break;
1243  default: break;
1244  }
1245 
1246  return;
1247  }
1249  {
1250  switch( nToken )
1251  {
1252  case HtmlTokenId::ANCHOR_OFF:
1253  EndAnchor();
1254  m_bCallNextToken = false;
1255  break;
1256 
1257  case HtmlTokenId::TEXTTOKEN:
1259  break;
1260  default: break;
1261  }
1262  return;
1263  }
1264  else if( !m_aUnknownToken.isEmpty() )
1265  {
1266  // Paste content of unknown tags.
1267  // (but surely if we are not in the header section) fdo#36080 fdo#34666
1268  if (!aToken.isEmpty() && !IsInHeader() )
1269  {
1270  if( !m_bDocInitialized )
1271  DocumentDetected();
1272  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken.toString());
1273 
1274  // if there are temporary paragraph attributes and the
1275  // paragraph isn't empty then the paragraph attributes
1276  // are final.
1277  m_aParaAttrs.clear();
1278 
1279  SetAttr();
1280  }
1281 
1282  // Unknown token in the header are only closed by a matching
1283  // end-token, </HEAD> or <BODY>. Text inside is ignored.
1284  switch( nToken )
1285  {
1286  case HtmlTokenId::UNKNOWNCONTROL_OFF:
1287  if( m_aUnknownToken != sSaveToken )
1288  return;
1289  [[fallthrough]];
1290  case HtmlTokenId::FRAMESET_ON:
1291  case HtmlTokenId::HEAD_OFF:
1292  case HtmlTokenId::BODY_ON:
1293  case HtmlTokenId::IMAGE: // Don't know why Netscape acts this way.
1294  m_aUnknownToken.clear();
1295  break;
1296  case HtmlTokenId::TEXTTOKEN:
1297  return;
1298  default:
1299  m_aUnknownToken.clear();
1300  break;
1301  }
1302  }
1303  }
1304 
1305  switch( nToken )
1306  {
1307  case HtmlTokenId::BODY_ON:
1308  if (!m_bBodySeen)
1309  {
1310  m_bBodySeen = true;
1311  if( !m_aStyleSource.isEmpty() )
1312  {
1313  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1314  m_aStyleSource.clear();
1315  }
1316  if( IsNewDoc() )
1317  {
1319  // If there is a template for the first or the right page,
1320  // it is set here.
1321  const SwPageDesc *pPageDesc = nullptr;
1322  if( m_pCSS1Parser->IsSetFirstPageDesc() )
1323  pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1324  else if( m_pCSS1Parser->IsSetRightPageDesc() )
1325  pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1326 
1327  if( pPageDesc )
1328  {
1329  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1330  }
1331  }
1332  }
1333  break;
1334 
1335  case HtmlTokenId::LINK:
1336  InsertLink();
1337  break;
1338 
1339  case HtmlTokenId::BASE:
1340  {
1341  const HTMLOptions& rHTMLOptions = GetOptions();
1342  for (size_t i = rHTMLOptions.size(); i; )
1343  {
1344  const HTMLOption& rOption = rHTMLOptions[--i];
1345  switch( rOption.GetToken() )
1346  {
1347  case HtmlOptionId::HREF:
1348  m_sBaseURL = rOption.GetString();
1349  break;
1350  case HtmlOptionId::TARGET:
1351  if( IsNewDoc() )
1352  {
1353  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1354  OSL_ENSURE(pDocShell, "no SwDocShell");
1355  if (pDocShell) {
1356  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1357  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1358  uno::Reference<document::XDocumentProperties>
1359  xDocProps(xDPS->getDocumentProperties());
1360  OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1361  if (xDocProps.is()) {
1362  xDocProps->setDefaultTarget(
1363  rOption.GetString());
1364  }
1365  }
1366  }
1367  break;
1368  default: break;
1369  }
1370  }
1371  }
1372  break;
1373 
1374  case HtmlTokenId::META:
1375  {
1376  SvKeyValueIterator *pHTTPHeader = nullptr;
1377  if( IsNewDoc() )
1378  {
1379  SwDocShell *pDocSh = m_xDoc->GetDocShell();
1380  if( pDocSh )
1381  pHTTPHeader = pDocSh->GetHeaderAttributes();
1382  }
1383  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1384  OSL_ENSURE(pDocShell, "no SwDocShell");
1385  if (pDocShell)
1386  {
1387  uno::Reference<document::XDocumentProperties> xDocProps;
1388  if (IsNewDoc())
1389  {
1390  const uno::Reference<document::XDocumentPropertiesSupplier>
1391  xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1392  xDocProps = xDPS->getDocumentProperties();
1393  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1394  }
1395  ParseMetaOptions( xDocProps, pHTTPHeader );
1396  }
1397  }
1398  break;
1399 
1400  case HtmlTokenId::TITLE_ON:
1401  m_bInTitle = true;
1402  break;
1403 
1404  case HtmlTokenId::SCRIPT_ON:
1405  NewScript();
1406  break;
1407 
1408  case HtmlTokenId::SCRIPT_OFF:
1409  EndScript();
1410  break;
1411 
1412  case HtmlTokenId::NOSCRIPT_ON:
1413  case HtmlTokenId::NOSCRIPT_OFF:
1414  bInsertUnknown = true;
1415  break;
1416 
1417  case HtmlTokenId::STYLE_ON:
1418  NewStyle();
1419  break;
1420 
1421  case HtmlTokenId::STYLE_OFF:
1422  EndStyle();
1423  break;
1424 
1425  case HtmlTokenId::RAWDATA:
1426  if( !m_bIgnoreRawData )
1427  {
1428  if( IsReadScript() )
1429  {
1430  AddScriptSource();
1431  }
1432  else if( IsReadStyle() )
1433  {
1434  if( !m_aStyleSource.isEmpty() )
1435  m_aStyleSource += "\n";
1436  m_aStyleSource += aToken;
1437  }
1438  }
1439  break;
1440 
1441  case HtmlTokenId::OBJECT_ON:
1442  if (m_bXHTML)
1443  {
1444  if (!InsertEmbed())
1445  InsertImage();
1446  break;
1447  }
1448 #if HAVE_FEATURE_JAVA
1449  NewObject();
1450  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1451 #endif
1452  break;
1453 
1454  case HtmlTokenId::OBJECT_OFF:
1455  if (!m_aEmbeds.empty())
1456  m_aEmbeds.pop();
1457  break;
1458 
1459  case HtmlTokenId::APPLET_ON:
1460 #if HAVE_FEATURE_JAVA
1461  InsertApplet();
1462  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1463 #endif
1464  break;
1465 
1466  case HtmlTokenId::IFRAME_ON:
1467  if (m_bFuzzing && m_nFloatingFrames > 64)
1468  SAL_WARN("sw.html", "Not importing any more FloatingFrames for fuzzing performance");
1469  else
1470  {
1473  }
1474  break;
1475 
1476  case HtmlTokenId::LINEBREAK:
1477  if( !IsReadPRE() )
1478  {
1479  InsertLineBreak();
1480  break;
1481  }
1482  else
1483  bGetIDOption = true;
1484  // <BR>s in <PRE> resemble true LFs, hence no break
1485  [[fallthrough]];
1486 
1487  case HtmlTokenId::NEWPARA:
1488  // CR in PRE/LISTING/XMP
1489  {
1490  if( HtmlTokenId::NEWPARA==nToken ||
1492  {
1493  AppendTextNode(); // there is no LF at this place
1494  // therefore it will cause no problems
1495  SetTextCollAttrs();
1496  }
1497  // progress bar
1498  if (m_xProgress)
1499  m_xProgress->Update(rInput.Tell());
1500  }
1501  break;
1502 
1503  case HtmlTokenId::NONBREAKSPACE:
1504  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1505  break;
1506 
1507  case HtmlTokenId::SOFTHYPH:
1508  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1509  break;
1510 
1511  case HtmlTokenId::LINEFEEDCHAR:
1512  if( m_pPam->GetPoint()->nContent.GetIndex() )
1513  AppendTextNode();
1514  if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode))
1515  {
1516  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1517  EndAttr( m_xAttrTab->pBreak, false );
1518  }
1519  break;
1520 
1521  case HtmlTokenId::TEXTTOKEN:
1522  // insert string without spanning attributes at the end.
1523  if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1524  {
1525  sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
1526  const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
1527  if (pTextNode)
1528  {
1529  const OUString& rText = pTextNode->GetText();
1530  sal_Unicode cLast = rText[--nPos];
1531  if( ' ' == cLast || '\x0a' == cLast)
1532  aToken.remove(0, 1);
1533  }
1534  else
1535  aToken.remove(0, 1);
1536 
1537  if( aToken.isEmpty() )
1538  {
1539  m_bUpperSpace = bUpperSpaceSave;
1540  break;
1541  }
1542  }
1543 
1544  if( !aToken.isEmpty() )
1545  {
1546  if( !m_bDocInitialized )
1547  DocumentDetected();
1548 
1549  if (!m_aEmbeds.empty())
1550  {
1551  // The text token is inside an OLE object, which means
1552  // alternate text.
1553  SwOLENode* pOLENode = m_aEmbeds.top();
1554  if (!pOLENode)
1555  {
1556  // <object> is mapped to an image -> ignore.
1557  break;
1558  }
1559 
1560  if (SwFlyFrameFormat* pFormat
1561  = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1562  {
1564  {
1565  pObject->SetTitle(pObject->GetTitle() + aToken);
1566  break;
1567  }
1568  }
1569  }
1570 
1571  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken.toString());
1572 
1573  // if there are temporary paragraph attributes and the
1574  // paragraph isn't empty then the paragraph attributes
1575  // are final.
1576  m_aParaAttrs.clear();
1577 
1578  SetAttr();
1579  }
1580  break;
1581 
1582  case HtmlTokenId::HORZRULE:
1583  InsertHorzRule();
1584  break;
1585 
1586  case HtmlTokenId::IMAGE:
1587  InsertImage();
1588  // if only the parser references the doc, we can break and set
1589  // an error code
1590  if( 1 == m_xDoc->getReferenceCount() )
1591  {
1592  eState = SvParserState::Error;
1593  }
1594  break;
1595 
1596  case HtmlTokenId::SPACER:
1597  InsertSpacer();
1598  break;
1599 
1600  case HtmlTokenId::EMBED:
1601  InsertEmbed();
1602  break;
1603 
1604  case HtmlTokenId::NOEMBED_ON:
1605  m_bInNoEmbed = true;
1606  m_bCallNextToken = bool(m_xTable);
1607  ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1608  break;
1609 
1610  case HtmlTokenId::DEFLIST_ON:
1611  if( m_nOpenParaToken != HtmlTokenId::NONE )
1612  EndPara();
1613  NewDefList();
1614  break;
1615  case HtmlTokenId::DEFLIST_OFF:
1616  if( m_nOpenParaToken != HtmlTokenId::NONE )
1617  EndPara();
1618  EndDefListItem( HtmlTokenId::NONE );
1619  EndDefList();
1620  break;
1621 
1622  case HtmlTokenId::DD_ON:
1623  case HtmlTokenId::DT_ON:
1624  if( m_nOpenParaToken != HtmlTokenId::NONE )
1625  EndPara();
1626  EndDefListItem();// close <DD>/<DT> and set no template
1627  NewDefListItem( nToken );
1628  break;
1629 
1630  case HtmlTokenId::DD_OFF:
1631  case HtmlTokenId::DT_OFF:
1632  // c.f. HtmlTokenId::LI_OFF
1633  // Actually we should close a DD/DT now.
1634  // But neither Netscape nor Microsoft do this and so don't we.
1635  EndDefListItem( nToken );
1636  break;
1637 
1638  // divisions
1639  case HtmlTokenId::DIVISION_ON:
1640  case HtmlTokenId::CENTER_ON:
1641  if (!m_isInTableStructure)
1642  {
1643  if (m_nOpenParaToken != HtmlTokenId::NONE)
1644  {
1645  if (IsReadPRE())
1646  m_nOpenParaToken = HtmlTokenId::NONE;
1647  else
1648  EndPara();
1649  }
1650  NewDivision( nToken );
1651  }
1652  break;
1653 
1654  case HtmlTokenId::DIVISION_OFF:
1655  case HtmlTokenId::CENTER_OFF:
1656  if (!m_isInTableStructure)
1657  {
1658  if (m_nOpenParaToken != HtmlTokenId::NONE)
1659  {
1660  if (IsReadPRE())
1661  m_nOpenParaToken = HtmlTokenId::NONE;
1662  else
1663  EndPara();
1664  }
1665  EndDivision();
1666  }
1667  break;
1668 
1669  case HtmlTokenId::MULTICOL_ON:
1670  if( m_nOpenParaToken != HtmlTokenId::NONE )
1671  EndPara();
1672  NewMultiCol();
1673  break;
1674 
1675  case HtmlTokenId::MULTICOL_OFF:
1676  if( m_nOpenParaToken != HtmlTokenId::NONE )
1677  EndPara();
1678  EndTag( HtmlTokenId::MULTICOL_ON );
1679  break;
1680 
1681  case HtmlTokenId::MARQUEE_ON:
1682  NewMarquee();
1683  m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1684  break;
1685 
1686  case HtmlTokenId::FORM_ON:
1687  NewForm();
1688  break;
1689  case HtmlTokenId::FORM_OFF:
1690  EndForm();
1691  break;
1692 
1693  // templates
1694  case HtmlTokenId::PARABREAK_ON:
1695  if( m_nOpenParaToken != HtmlTokenId::NONE )
1696  EndPara( true );
1697  NewPara();
1698  break;
1699 
1700  case HtmlTokenId::PARABREAK_OFF:
1701  EndPara( true );
1702  break;
1703 
1704  case HtmlTokenId::ADDRESS_ON:
1705  if( m_nOpenParaToken != HtmlTokenId::NONE )
1706  EndPara();
1707  NewTextFormatColl(HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SEND_ADDRESS);
1708  break;
1709 
1710  case HtmlTokenId::ADDRESS_OFF:
1711  if( m_nOpenParaToken != HtmlTokenId::NONE )
1712  EndPara();
1713  EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1714  break;
1715 
1716  case HtmlTokenId::BLOCKQUOTE_ON:
1717  case HtmlTokenId::BLOCKQUOTE30_ON:
1718  if( m_nOpenParaToken != HtmlTokenId::NONE )
1719  EndPara();
1720  NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1721  break;
1722 
1723  case HtmlTokenId::BLOCKQUOTE_OFF:
1724  case HtmlTokenId::BLOCKQUOTE30_OFF:
1725  if( m_nOpenParaToken != HtmlTokenId::NONE )
1726  EndPara();
1727  EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1728  break;
1729 
1730  case HtmlTokenId::PREFORMTXT_ON:
1731  case HtmlTokenId::LISTING_ON:
1732  case HtmlTokenId::XMP_ON:
1733  if( m_nOpenParaToken != HtmlTokenId::NONE )
1734  EndPara();
1736  break;
1737 
1738  case HtmlTokenId::PREFORMTXT_OFF:
1739  m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1740  EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1741  break;
1742 
1743  case HtmlTokenId::LISTING_OFF:
1744  case HtmlTokenId::XMP_OFF:
1745  EndTextFormatColl( nToken );
1746  break;
1747 
1748  case HtmlTokenId::HEAD1_ON:
1749  case HtmlTokenId::HEAD2_ON:
1750  case HtmlTokenId::HEAD3_ON:
1751  case HtmlTokenId::HEAD4_ON:
1752  case HtmlTokenId::HEAD5_ON:
1753  case HtmlTokenId::HEAD6_ON:
1754  if( m_nOpenParaToken != HtmlTokenId::NONE )
1755  {
1756  if( IsReadPRE() )
1757  m_nOpenParaToken = HtmlTokenId::NONE;
1758  else
1759  EndPara();
1760  }
1761  NewHeading( nToken );
1762  break;
1763 
1764  case HtmlTokenId::HEAD1_OFF:
1765  case HtmlTokenId::HEAD2_OFF:
1766  case HtmlTokenId::HEAD3_OFF:
1767  case HtmlTokenId::HEAD4_OFF:
1768  case HtmlTokenId::HEAD5_OFF:
1769  case HtmlTokenId::HEAD6_OFF:
1770  EndHeading();
1771  break;
1772 
1773  case HtmlTokenId::TABLE_ON:
1774  if( !m_vPendingStack.empty() )
1775  BuildTable( SvxAdjust::End );
1776  else
1777  {
1778  if( m_nOpenParaToken != HtmlTokenId::NONE )
1779  EndPara();
1780  OSL_ENSURE(!m_xTable, "table in table not allowed here");
1781  if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) &&
1782  (m_pPam->GetPoint()->nNode.GetIndex() >
1783  m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1785  {
1786  if ( m_nParaCnt < 5 )
1787  Show(); // show what we have up to here
1788 
1789  SvxAdjust eAdjust = m_xAttrTab->pAdjust
1790  ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1791  GetAdjust()
1792  : SvxAdjust::End;
1793  BuildTable( eAdjust );
1794  }
1795  else
1796  bInsertUnknown = m_bKeepUnknown;
1797  }
1798  break;
1799 
1800  // lists
1801  case HtmlTokenId::DIRLIST_ON:
1802  case HtmlTokenId::MENULIST_ON:
1803  case HtmlTokenId::ORDERLIST_ON:
1804  case HtmlTokenId::UNORDERLIST_ON:
1805  if( m_nOpenParaToken != HtmlTokenId::NONE )
1806  EndPara();
1807  NewNumberBulletList( nToken );
1808  break;
1809 
1810  case HtmlTokenId::DIRLIST_OFF:
1811  case HtmlTokenId::MENULIST_OFF:
1812  case HtmlTokenId::ORDERLIST_OFF:
1813  case HtmlTokenId::UNORDERLIST_OFF:
1814  if( m_nOpenParaToken != HtmlTokenId::NONE )
1815  EndPara();
1816  EndNumberBulletListItem( HtmlTokenId::NONE, true );
1817  EndNumberBulletList( nToken );
1818  break;
1819 
1820  case HtmlTokenId::LI_ON:
1821  case HtmlTokenId::LISTHEADER_ON:
1822  if( m_nOpenParaToken != HtmlTokenId::NONE &&
1824  || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1825  {
1826  // only finish paragraph for <P><LI>, not for <DD><LI>
1827  EndPara();
1828  }
1829 
1830  if (m_bFuzzing && m_nListItems > 1024)
1831  {
1832  SAL_WARN("sw.html", "skipping remaining bullet import for performance during fuzzing");
1833  }
1834  else
1835  {
1836  EndNumberBulletListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1837  NewNumberBulletListItem( nToken );
1838  }
1839 
1840  ++m_nListItems;
1841 
1842  break;
1843  case HtmlTokenId::LI_OFF:
1844  case HtmlTokenId::LISTHEADER_OFF:
1845  EndNumberBulletListItem( nToken, false );
1846  break;
1847 
1848  // Attribute :
1849  case HtmlTokenId::ITALIC_ON:
1850  {
1854  NewStdAttr( HtmlTokenId::ITALIC_ON,
1855  &m_xAttrTab->pItalic, aPosture,
1856  &m_xAttrTab->pItalicCJK, &aPostureCJK,
1857  &m_xAttrTab->pItalicCTL, &aPostureCTL );
1858  }
1859  break;
1860 
1861  case HtmlTokenId::BOLD_ON:
1862  {
1866  NewStdAttr( HtmlTokenId::BOLD_ON,
1867  &m_xAttrTab->pBold, aWeight,
1868  &m_xAttrTab->pBoldCJK, &aWeightCJK,
1869  &m_xAttrTab->pBoldCTL, &aWeightCTL );
1870  }
1871  break;
1872 
1873  case HtmlTokenId::STRIKE_ON:
1874  case HtmlTokenId::STRIKETHROUGH_ON:
1875  {
1876  NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1878  }
1879  break;
1880 
1881  case HtmlTokenId::UNDERLINE_ON:
1882  {
1883  NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1885  }
1886  break;
1887 
1888  case HtmlTokenId::SUPERSCRIPT_ON:
1889  {
1890  NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1892  }
1893  break;
1894 
1895  case HtmlTokenId::SUBSCRIPT_ON:
1896  {
1897  NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1899  }
1900  break;
1901 
1902  case HtmlTokenId::BLINK_ON:
1903  {
1904  NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1905  SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1906  }
1907  break;
1908 
1909  case HtmlTokenId::SPAN_ON:
1910  NewStdAttr( HtmlTokenId::SPAN_ON );
1911  break;
1912 
1913  case HtmlTokenId::ITALIC_OFF:
1914  case HtmlTokenId::BOLD_OFF:
1915  case HtmlTokenId::STRIKE_OFF:
1916  case HtmlTokenId::UNDERLINE_OFF:
1917  case HtmlTokenId::SUPERSCRIPT_OFF:
1918  case HtmlTokenId::SUBSCRIPT_OFF:
1919  case HtmlTokenId::BLINK_OFF:
1920  case HtmlTokenId::SPAN_OFF:
1921  EndTag( nToken );
1922  break;
1923 
1924  case HtmlTokenId::STRIKETHROUGH_OFF:
1925  EndTag( HtmlTokenId::STRIKE_OFF );
1926  break;
1927 
1928  case HtmlTokenId::BASEFONT_ON:
1929  NewBasefontAttr();
1930  break;
1931  case HtmlTokenId::BASEFONT_OFF:
1932  EndBasefontAttr();
1933  break;
1934  case HtmlTokenId::FONT_ON:
1935  case HtmlTokenId::BIGPRINT_ON:
1936  case HtmlTokenId::SMALLPRINT_ON:
1937  NewFontAttr( nToken );
1938  break;
1939  case HtmlTokenId::FONT_OFF:
1940  case HtmlTokenId::BIGPRINT_OFF:
1941  case HtmlTokenId::SMALLPRINT_OFF:
1942  EndFontAttr( nToken );
1943  break;
1944 
1945  case HtmlTokenId::EMPHASIS_ON:
1946  case HtmlTokenId::CITATION_ON:
1947  case HtmlTokenId::STRONG_ON:
1948  case HtmlTokenId::CODE_ON:
1949  case HtmlTokenId::SAMPLE_ON:
1950  case HtmlTokenId::KEYBOARD_ON:
1951  case HtmlTokenId::VARIABLE_ON:
1952  case HtmlTokenId::DEFINSTANCE_ON:
1953  case HtmlTokenId::SHORTQUOTE_ON:
1954  case HtmlTokenId::LANGUAGE_ON:
1955  case HtmlTokenId::AUTHOR_ON:
1956  case HtmlTokenId::PERSON_ON:
1957  case HtmlTokenId::ACRONYM_ON:
1958  case HtmlTokenId::ABBREVIATION_ON:
1959  case HtmlTokenId::INSERTEDTEXT_ON:
1960  case HtmlTokenId::DELETEDTEXT_ON:
1961 
1962  case HtmlTokenId::TELETYPE_ON:
1963  NewCharFormat( nToken );
1964  break;
1965 
1966  case HtmlTokenId::SDFIELD_ON:
1967  NewField();
1969  break;
1970 
1971  case HtmlTokenId::EMPHASIS_OFF:
1972  case HtmlTokenId::CITATION_OFF:
1973  case HtmlTokenId::STRONG_OFF:
1974  case HtmlTokenId::CODE_OFF:
1975  case HtmlTokenId::SAMPLE_OFF:
1976  case HtmlTokenId::KEYBOARD_OFF:
1977  case HtmlTokenId::VARIABLE_OFF:
1978  case HtmlTokenId::DEFINSTANCE_OFF:
1979  case HtmlTokenId::SHORTQUOTE_OFF:
1980  case HtmlTokenId::LANGUAGE_OFF:
1981  case HtmlTokenId::AUTHOR_OFF:
1982  case HtmlTokenId::PERSON_OFF:
1983  case HtmlTokenId::ACRONYM_OFF:
1984  case HtmlTokenId::ABBREVIATION_OFF:
1985  case HtmlTokenId::INSERTEDTEXT_OFF:
1986  case HtmlTokenId::DELETEDTEXT_OFF:
1987 
1988  case HtmlTokenId::TELETYPE_OFF:
1989  EndTag( nToken );
1990  break;
1991 
1992  case HtmlTokenId::HEAD_OFF:
1993  if( !m_aStyleSource.isEmpty() )
1994  {
1995  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1996  m_aStyleSource.clear();
1997  }
1998  break;
1999 
2000  case HtmlTokenId::DOCTYPE:
2001  case HtmlTokenId::BODY_OFF:
2002  case HtmlTokenId::HTML_OFF:
2003  case HtmlTokenId::HEAD_ON:
2004  case HtmlTokenId::TITLE_OFF:
2005  break; // don't evaluate further???
2006  case HtmlTokenId::HTML_ON:
2007  {
2008  const HTMLOptions& rHTMLOptions = GetOptions();
2009  for (size_t i = rHTMLOptions.size(); i; )
2010  {
2011  const HTMLOption& rOption = rHTMLOptions[--i];
2012  if( HtmlOptionId::DIR == rOption.GetToken() )
2013  {
2014  const OUString& rDir = rOption.GetString();
2015  SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
2016  m_pCSS1Parser->GetWhichMap() );
2017  SvxCSS1PropertyInfo aPropInfo;
2018  OUString aDummy;
2019  ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
2020  aPropInfo, nullptr, &rDir );
2021 
2022  m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
2023  break;
2024  }
2025  }
2026  }
2027  break;
2028 
2029  case HtmlTokenId::INPUT:
2030  InsertInput();
2031  break;
2032 
2033  case HtmlTokenId::TEXTAREA_ON:
2034  NewTextArea();
2036  break;
2037 
2038  case HtmlTokenId::SELECT_ON:
2039  NewSelect();
2041  break;
2042 
2043  case HtmlTokenId::ANCHOR_ON:
2044  NewAnchor();
2045  break;
2046 
2047  case HtmlTokenId::ANCHOR_OFF:
2048  EndAnchor();
2049  break;
2050 
2051  case HtmlTokenId::COMMENT:
2052  if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2053  {
2054  // insert as Post-It
2055  // If there are no space characters right behind
2056  // the <!-- and on front of the -->, leave the comment untouched.
2057  if( ' ' == aToken[ 3 ] &&
2058  ' ' == aToken[ aToken.getLength()-3 ] )
2059  {
2060  std::u16string_view aComment( aToken.subView( 3, aToken.getLength()-5 ) );
2061  InsertComment(OUString(comphelper::string::strip(aComment, ' ')));
2062  }
2063  else
2064  {
2065  OUString aComment = "<" + aToken + ">";
2066  InsertComment( aComment );
2067  }
2068  }
2069  break;
2070 
2071  case HtmlTokenId::MAP_ON:
2072  // Image Maps are read asynchronously: At first only an image map is created
2073  // Areas are processed later. Nevertheless the
2074  // ImageMap is inserted into the IMap-Array, because it might be used
2075  // already.
2076  m_pImageMap = new ImageMap;
2078  {
2079  if (!m_pImageMaps)
2080  m_pImageMaps.reset( new ImageMaps );
2081  m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2082  }
2083  else
2084  {
2085  delete m_pImageMap;
2086  m_pImageMap = nullptr;
2087  }
2088  break;
2089 
2090  case HtmlTokenId::MAP_OFF:
2091  // there is no ImageMap anymore (don't delete IMap, because it's
2092  // already contained in the array!)
2093  m_pImageMap = nullptr;
2094  break;
2095 
2096  case HtmlTokenId::AREA:
2097  if( m_pImageMap )
2098  ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2099  SvMacroItemId::OnMouseOut );
2100  break;
2101 
2102  case HtmlTokenId::FRAMESET_ON:
2103  bInsertUnknown = m_bKeepUnknown;
2104  break;
2105 
2106  case HtmlTokenId::NOFRAMES_ON:
2107  if( IsInHeader() )
2108  FinishHeader();
2109  bInsertUnknown = m_bKeepUnknown;
2110  break;
2111 
2112  case HtmlTokenId::UNKNOWNCONTROL_ON:
2113  // Ignore content of unknown token in the header, if the token
2114  // does not start with a '!'.
2115  // (but judging from the code, also if does not start with a '%')
2116  // (and also if we're not somewhere we consider PRE)
2117  if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2118  !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2119  '%' != sSaveToken[0] )
2120  m_aUnknownToken = sSaveToken;
2121  [[fallthrough]];
2122 
2123  default:
2124  bInsertUnknown = m_bKeepUnknown;
2125  break;
2126  }
2127 
2128  if( bGetIDOption )
2129  InsertIDOption();
2130 
2131  if( bInsertUnknown )
2132  {
2133  OUStringBuffer aComment("HTML: <");
2134  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2135  aComment.append("/");
2136  aComment.append(sSaveToken);
2137  if( !aToken.isEmpty() )
2138  {
2139  UnescapeToken();
2140  aComment.append(" " + aToken);
2141  }
2142  aComment.append(">");
2143  InsertComment( aComment.makeStringAndClear() );
2144  }
2145 
2146  // if there are temporary paragraph attributes and the
2147  // paragraph isn't empty then the paragraph attributes are final.
2148  if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() )
2149  m_aParaAttrs.clear();
2150 }
2151 
2152 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2153  bool& rScriptDependent,
2154  sal_uInt16& rScriptType )
2155 {
2156  switch( rAttr.GetItem().Which() )
2157  {
2158  case RES_CHRATR_FONT:
2159  case RES_CHRATR_FONTSIZE:
2160  case RES_CHRATR_LANGUAGE:
2161  case RES_CHRATR_POSTURE:
2162  case RES_CHRATR_WEIGHT:
2163  rScriptType = i18n::ScriptType::LATIN;
2164  rScriptDependent = true;
2165  break;
2166  case RES_CHRATR_CJK_FONT:
2170  case RES_CHRATR_CJK_WEIGHT:
2171  rScriptType = i18n::ScriptType::ASIAN;
2172  rScriptDependent = true;
2173  break;
2174  case RES_CHRATR_CTL_FONT:
2178  case RES_CHRATR_CTL_WEIGHT:
2179  rScriptType = i18n::ScriptType::COMPLEX;
2180  rScriptDependent = true;
2181  break;
2182  default:
2183  rScriptDependent = false;
2184  break;
2185  }
2186 }
2187 
2188 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2189 {
2190  // A hard line break at the end always must be removed.
2191  // A second one we replace with paragraph spacing.
2192  sal_Int32 nLFStripped = StripTrailingLF();
2193  if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2194  eMode = AM_SPACE;
2195 
2196  // the hard attributes of this paragraph will never be invalid again
2197  m_aParaAttrs.clear();
2198 
2199  SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2200  m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
2201 
2202  if (pTextNode)
2203  {
2204  const SvxULSpaceItem& rULSpace =
2205  pTextNode->SwContentNode::GetAttr( RES_UL_SPACE );
2206 
2207  bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2208  : rULSpace.GetLower() == 0;
2209 
2210  if( bChange )
2211  {
2212  const SvxULSpaceItem& rCollULSpace =
2213  pTextNode->GetAnyFormatColl().GetULSpace();
2214 
2215  bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2216  : rCollULSpace.GetLower() > 0;
2217 
2218  if( bMayReset &&
2219  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2220  {
2221  pTextNode->ResetAttr( RES_UL_SPACE );
2222  }
2223  else
2224  {
2225  pTextNode->SetAttr(
2226  SvxULSpaceItem( rULSpace.GetUpper(),
2227  AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2228  }
2229  }
2230  }
2231  m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2232 
2233  SwPosition aOldPos( *m_pPam->GetPoint() );
2234 
2235  bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2236 
2237  // split character attributes and maybe set none,
2238  // which are set for the whole paragraph
2239  const SwNodeIndex& rEndIdx = aOldPos.nNode;
2240  const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex();
2241  const SwPosition& rPos = *m_pPam->GetPoint();
2242 
2243  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2244  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2245  {
2246  HTMLAttr *pAttr = *pHTMLAttributes;
2247  if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2248  {
2249  bool bWholePara = false;
2250 
2251  while( pAttr )
2252  {
2253  HTMLAttr *pNext = pAttr->GetNext();
2254  if( pAttr->GetStartParagraphIdx() < rEndIdx.GetIndex() ||
2255  (!bWholePara &&
2256  pAttr->GetStartParagraph() == rEndIdx &&
2257  pAttr->GetStartContent() != nEndCnt) )
2258  {
2259  bWholePara =
2260  pAttr->GetStartParagraph() == rEndIdx &&
2261  pAttr->GetStartContent() == 0;
2262 
2263  sal_Int32 nStt = pAttr->m_nStartContent;
2264  bool bScript = false;
2265  sal_uInt16 nScriptItem;
2266  bool bInsert = true;
2267  lcl_swhtml_getItemInfo( *pAttr, bScript,
2268  nScriptItem );
2269  // set previous part
2270  if( bScript )
2271  {
2272  const SwTextNode *pTextNd =
2273  pAttr->GetStartParagraph().GetNode().GetTextNode();
2274  OSL_ENSURE( pTextNd, "No text node" );
2275  if( pTextNd )
2276  {
2277  const OUString& rText = pTextNd->GetText();
2278  sal_uInt16 nScriptText =
2279  g_pBreakIt->GetBreakIter()->getScriptType(
2280  rText, pAttr->GetStartContent() );
2281  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2282  ->endOfScript( rText, nStt, nScriptText );
2283  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2284  {
2285  if( nScriptItem == nScriptText )
2286  {
2287  HTMLAttr *pSetAttr =
2288  pAttr->Clone( rEndIdx, nScriptEnd );
2289  pSetAttr->m_nStartContent = nStt;
2290  pSetAttr->ClearPrev();
2291  if( !pNext || bWholePara )
2292  {
2293  if (pSetAttr->m_bInsAtStart)
2294  m_aSetAttrTab.push_front( pSetAttr );
2295  else
2296  m_aSetAttrTab.push_back( pSetAttr );
2297  }
2298  else
2299  pNext->InsertPrev( pSetAttr );
2300  }
2301  nStt = nScriptEnd;
2302  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2303  rText, nStt );
2304  nScriptEnd = g_pBreakIt->GetBreakIter()
2305  ->endOfScript( rText, nStt, nScriptText );
2306  }
2307  bInsert = nScriptItem == nScriptText;
2308  }
2309  }
2310  if( bInsert )
2311  {
2312  HTMLAttr *pSetAttr =
2313  pAttr->Clone( rEndIdx, nEndCnt );
2314  pSetAttr->m_nStartContent = nStt;
2315 
2316  // When the attribute is for the whole paragraph, the outer
2317  // attributes aren't effective anymore. Hence it may not be inserted
2318  // in the Prev-List of an outer attribute, because that won't be
2319  // set. That leads to shifting when fields are used.
2320  if( !pNext || bWholePara )
2321  {
2322  if (pSetAttr->m_bInsAtStart)
2323  m_aSetAttrTab.push_front( pSetAttr );
2324  else
2325  m_aSetAttrTab.push_back( pSetAttr );
2326  }
2327  else
2328  pNext->InsertPrev( pSetAttr );
2329  }
2330  else
2331  {
2332  HTMLAttr *pPrev = pAttr->GetPrev();
2333  if( pPrev )
2334  {
2335  // the previous attributes must be set anyway
2336  if( !pNext || bWholePara )
2337  {
2338  if (pPrev->m_bInsAtStart)
2339  m_aSetAttrTab.push_front( pPrev );
2340  else
2341  m_aSetAttrTab.push_back( pPrev );
2342  }
2343  else
2344  pNext->InsertPrev( pPrev );
2345  }
2346  }
2347  pAttr->ClearPrev();
2348  }
2349 
2350  pAttr->SetStart( rPos );
2351  pAttr = pNext;
2352  }
2353  }
2354  }
2355 
2356  if( bUpdateNum )
2357  {
2358  if( GetNumInfo().GetDepth() )
2359  {
2360  sal_uInt8 nLvl = GetNumInfo().GetLevel();
2361  SetNodeNum( nLvl );
2362  }
2363  else
2365  }
2366 
2367  // We must set the attribute of the paragraph before now (because of JavaScript)
2368  SetAttr();
2369 
2370  // Now it is time to get rid of all script dependent hints that are
2371  // equal to the settings in the style
2372  SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode();
2373  OSL_ENSURE( pTextNd, "There is the txt node" );
2374  size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints())
2375  ? pTextNd->GetSwpHints().Count() : 0;
2376  if( nCntAttr )
2377  {
2378  // These are the end position of all script dependent hints.
2379  // If we find a hint that starts before the current end position,
2380  // we have to set it. If we find a hint that start behind or at
2381  // that position, we have to take the hint value into account.
2382  // If it is equal to the style, or in fact the paragraph value
2383  // for that hint, the hint is removed. Otherwise its end position
2384  // is remembered.
2385  sal_Int32 aEndPos[15] =
2386  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2387  SwpHints& rHints = pTextNd->GetSwpHints();
2388  for( size_t i=0; i < nCntAttr; i++ )
2389  {
2390  SwTextAttr *pHt = rHints.Get( i );
2391  sal_uInt16 nWhich = pHt->Which();
2392  sal_Int16 nIdx = 0;
2393  bool bFont = false;
2394  switch( nWhich )
2395  {
2396  case RES_CHRATR_FONT:
2397  nIdx = 0;
2398  bFont = true;
2399  break;
2400  case RES_CHRATR_FONTSIZE:
2401  nIdx = 1;
2402  break;
2403  case RES_CHRATR_LANGUAGE:
2404  nIdx = 2;
2405  break;
2406  case RES_CHRATR_POSTURE:
2407  nIdx = 3;
2408  break;
2409  case RES_CHRATR_WEIGHT:
2410  nIdx = 4;
2411  break;
2412  case RES_CHRATR_CJK_FONT:
2413  nIdx = 5;
2414  bFont = true;
2415  break;
2417  nIdx = 6;
2418  break;
2420  nIdx = 7;
2421  break;
2423  nIdx = 8;
2424  break;
2425  case RES_CHRATR_CJK_WEIGHT:
2426  nIdx = 9;
2427  break;
2428  case RES_CHRATR_CTL_FONT:
2429  nIdx = 10;
2430  bFont = true;
2431  break;
2433  nIdx = 11;
2434  break;
2436  nIdx = 12;
2437  break;
2439  nIdx = 13;
2440  break;
2441  case RES_CHRATR_CTL_WEIGHT:
2442  nIdx = 14;
2443  break;
2444  default:
2445  // Skip to next attribute
2446  continue;
2447  }
2448  const sal_Int32 nStt = pHt->GetStart();
2449  if( nStt >= aEndPos[nIdx] )
2450  {
2451  const SfxPoolItem& rItem =
2452  static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2453  if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2454  : rItem == pHt->GetAttr() )
2455  {
2456  // The hint is the same as set in the paragraph and
2457  // therefore, it can be deleted
2458  // CAUTION!!! This WILL delete the hint and it MAY
2459  // also delete the SwpHints!!! To avoid any trouble
2460  // we leave the loop immediately if this is the last
2461  // hint.
2462  pTextNd->DeleteAttribute( pHt );
2463  if( 1 == nCntAttr )
2464  break;
2465  i--;
2466  nCntAttr--;
2467  }
2468  else
2469  {
2470  // The hint is different. Therefore all hints within that
2471  // hint have to be ignored.
2472  aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2473  }
2474  }
2475  else
2476  {
2477  // The hint starts before another one ends.
2478  // The hint in this case is not deleted
2479  OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2480  "hints aren't nested properly!" );
2481  }
2482  }
2483  }
2484 
2485  if (!m_xTable && !--m_nParaCnt)
2486  Show();
2487 
2488  return bRet;
2489 }
2490 
2492 {
2493  //If it already has ParSpace, return
2494  if( !m_bNoParSpace )
2495  return;
2496 
2497  m_bNoParSpace = false;
2498 
2499  SwNodeOffset nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1;
2500 
2501  SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2502  if( !pTextNode )
2503  return;
2504 
2505  SvxULSpaceItem rULSpace =
2506  pTextNode->SwContentNode::GetAttr( RES_UL_SPACE );
2507  if( rULSpace.GetLower() )
2508  return;
2509 
2510  const SvxULSpaceItem& rCollULSpace =
2511  pTextNode->GetAnyFormatColl().GetULSpace();
2512  if( rCollULSpace.GetLower() &&
2513  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2514  {
2515  pTextNode->ResetAttr( RES_UL_SPACE );
2516  }
2517  else
2518  {
2519  //What I do here, is that I examine the attributes, and if
2520  //I find out, that it's CJK/CTL, then I set the paragraph space
2521  //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2522 
2523  bool bIsCJK = false;
2524  bool bIsCTL = false;
2525 
2526  const size_t nCntAttr = pTextNode->GetpSwpHints()
2527  ? pTextNode->GetSwpHints().Count() : 0;
2528 
2529  for(size_t i = 0; i < nCntAttr; ++i)
2530  {
2531  SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2532  sal_uInt16 const nWhich = pHt->Which();
2533  if (RES_CHRATR_CJK_FONT == nWhich ||
2534  RES_CHRATR_CJK_FONTSIZE == nWhich ||
2535  RES_CHRATR_CJK_LANGUAGE == nWhich ||
2536  RES_CHRATR_CJK_POSTURE == nWhich ||
2537  RES_CHRATR_CJK_WEIGHT == nWhich)
2538  {
2539  bIsCJK = true;
2540  break;
2541  }
2542  if (RES_CHRATR_CTL_FONT == nWhich ||
2543  RES_CHRATR_CTL_FONTSIZE == nWhich ||
2544  RES_CHRATR_CTL_LANGUAGE == nWhich ||
2545  RES_CHRATR_CTL_POSTURE == nWhich ||
2546  RES_CHRATR_CTL_WEIGHT == nWhich)
2547  {
2548  bIsCTL = true;
2549  break;
2550  }
2551  }
2552 
2553  if( bIsCTL )
2554  {
2555  pTextNode->SetAttr(
2557  }
2558  else if( bIsCJK )
2559  {
2560  pTextNode->SetAttr(
2562  } else {
2563  pTextNode->SetAttr(
2565  }
2566  }
2567 }
2568 
2570 {
2571  // Here
2572  // - a EndAction is called, so the document is formatted
2573  // - a Reschedule is called,
2574  // - the own View-Shell is set again
2575  // - and a StartAction is called
2576 
2577  OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2578  SwViewShell *pOldVSh = CallEndAction();
2579 
2581 
2582  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2583  || 1 == m_xDoc->getReferenceCount() )
2584  {
2585  // was the import aborted by SFX?
2586  eState = SvParserState::Error;
2587  }
2588 
2589  // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2590  SwViewShell *pVSh = CallStartAction( pOldVSh );
2591 
2592  // is the current node not visible anymore, then we use a bigger increment
2593  if( pVSh )
2594  {
2596  ? 5 : 50;
2597  }
2598 }
2599 
2601 {
2602  // Here
2603  // - a Reschedule is called, so it can be scrolled
2604  // - the own View-Shell is set again
2605  // - a StartAction/EndAction is called, when there was scrolling.
2606 
2607  OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2608 
2609  // scroll bar
2610  if (m_xProgress)
2611  {
2612  m_xProgress->Update(rInput.Tell());
2614  }
2615  else
2616  {
2618 
2619  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2620  || 1 == m_xDoc->getReferenceCount() )
2621  // was the import aborted by SFX?
2622  eState = SvParserState::Error;
2623 
2625  if( pVSh && pVSh->HasInvalidRect() )
2626  {
2627  CallEndAction( false, false );
2628  CallStartAction( pVSh, false );
2629  }
2630  }
2631 }
2632 
2634 {
2635  OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2636 
2637  if( !pVSh || bChkPtr )
2638  {
2639 #if OSL_DEBUG_LEVEL > 0
2640  SwViewShell *pOldVSh = pVSh;
2641 #endif
2642  pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2643 #if OSL_DEBUG_LEVEL > 0
2644  OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2645  if( pOldVSh && !pVSh )
2646  pVSh = nullptr;
2647 #endif
2648  }
2649  m_pActionViewShell = pVSh;
2650 
2651  if( m_pActionViewShell )
2652  {
2653  if( auto pEditShell = dynamic_cast< SwEditShell *>( m_pActionViewShell ) )
2654  pEditShell->StartAction();
2655  else
2657  }
2658 
2659  return m_pActionViewShell;
2660 }
2661 
2662 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2663 {
2664  if( bChkPtr )
2665  {
2666  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2667  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2668  "CallEndAction: Who swapped the SwViewShell?" );
2669 #if OSL_DEBUG_LEVEL > 0
2670  if( m_pActionViewShell && !pVSh )
2671  pVSh = nullptr;
2672 #endif
2673  if( pVSh != m_pActionViewShell )
2674  m_pActionViewShell = nullptr;
2675  }
2676 
2677  if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2678  return m_pActionViewShell;
2679 
2680  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2681  {
2682  // Already scrolled?, then make sure that the view doesn't move!
2683  const bool bOldLock = m_pActionViewShell->IsViewLocked();
2684  m_pActionViewShell->LockView( true );
2685  const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev();
2687  static_cast<SwEditShell*>(m_pActionViewShell)->EndAction();
2688  m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev );
2689  m_pActionViewShell->LockView( bOldLock );
2690 
2691  // bChkJumpMark is only set when the object was also found
2692  if( m_bChkJumpMark )
2693  {
2694  const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2695  if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2697  GetMedium()->GetURLObject().GetMark() );
2698  m_bChkJumpMark = false;
2699  }
2700  }
2701  else
2703 
2704  // if the parser holds the last reference to the document, then we can
2705  // abort here and set an error.
2706  if( 1 == m_xDoc->getReferenceCount() )
2707  {
2708  eState = SvParserState::Error;
2709  }
2710 
2712  m_pActionViewShell = nullptr;
2713 
2714  return pVSh;
2715 }
2716 
2718 {
2719  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2720  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2721  "CheckActionViewShell: Who has swapped SwViewShell?" );
2722 #if OSL_DEBUG_LEVEL > 0
2723  if( m_pActionViewShell && !pVSh )
2724  pVSh = nullptr;
2725 #endif
2726  if( pVSh != m_pActionViewShell )
2727  m_pActionViewShell = nullptr;
2728 
2729  return m_pActionViewShell;
2730 }
2731 
2733  : m_pFrameFormat(pFrameFormat)
2734 {
2736 }
2737 
2739 {
2740  if (rHint.GetId() == SfxHintId::Dying)
2741  m_pFrameFormat = nullptr;
2742 }
2743 
2744 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2745  std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2746 {
2747  SwPaM aAttrPam( *m_pPam->GetPoint() );
2748  const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode;
2749  const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
2750  HTMLAttr* pAttr;
2751  SwContentNode* pCNd;
2752 
2753  std::vector<std::unique_ptr<HTMLAttr>> aFields;
2754 
2755  for( auto n = m_aSetAttrTab.size(); n; )
2756  {
2757  pAttr = m_aSetAttrTab[ --n ];
2758  sal_uInt16 nWhich = pAttr->m_pItem->Which();
2759 
2760  SwNodeOffset nEndParaIdx = pAttr->GetEndParagraphIdx();
2761  bool bSetAttr;
2762  if( bChkEnd )
2763  {
2764  // Set character attribute with end early on, so set them still in
2765  // the current paragraph (because of JavaScript and various "chats"(?)).
2766  // This shouldn't be done for attributes which are used for
2767  // the whole paragraph, because they could be from a paragraph style
2768  // which can't be set. Because the attributes are inserted with
2769  // SETATTR_DONTREPLACE, they should be able to be set later.
2770  bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() &&
2771  (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) ||
2772  ( !pAttr->IsLikePara() &&
2773  nEndParaIdx == rEndIdx.GetIndex() &&
2774  pAttr->GetEndContent() < nEndCnt &&
2775  (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2776  ( bBeforeTable &&
2777  nEndParaIdx == rEndIdx.GetIndex() &&
2778  !pAttr->GetEndContent() );
2779  }
2780  else
2781  {
2782  // Attributes in body nodes array section shouldn't be set if we are in a
2783  // special nodes array section, but vice versa it's possible.
2784  SwNodeOffset nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2785  bSetAttr = nEndParaIdx < rEndIdx.GetIndex() ||
2786  rEndIdx.GetIndex() > nEndOfIcons ||
2787  nEndParaIdx <= nEndOfIcons;
2788  }
2789 
2790  if( bSetAttr )
2791  {
2792  // The attribute shouldn't be in the list of temporary paragraph
2793  // attributes, because then it would be deleted.
2794  while( !m_aParaAttrs.empty() )
2795  {
2796  OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2797  "SetAttr: Attribute must not yet be set" );
2798  m_aParaAttrs.pop_back();
2799  }
2800 
2801  // then set it
2802  m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2803 
2804  while( pAttr )
2805  {
2806  HTMLAttr *pPrev = pAttr->GetPrev();
2807  if( !pAttr->m_bValid )
2808  {
2809  // invalid attributes can be deleted
2810  delete pAttr;
2811  pAttr = pPrev;
2812  continue;
2813  }
2814 
2815  pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2816  if( !pCNd )
2817  {
2818  // because of the awful deleting of nodes an index can also
2819  // point to an end node :-(
2820  if ( (pAttr->GetStartParagraph() == pAttr->GetEndParagraph()) &&
2821  !isTXTATR_NOEND(nWhich) )
2822  {
2823  // when the end index also points to the node, we don't
2824  // need to set attributes anymore, except if it's a text attribute.
2825  delete pAttr;
2826  pAttr = pPrev;
2827  continue;
2828  }
2829  pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2830  if( pCNd )
2831  pAttr->m_nStartContent = 0;
2832  else
2833  {
2834  OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2835  delete pAttr;
2836  pAttr = pPrev;
2837  continue;
2838  }
2839  }
2840  aAttrPam.GetPoint()->nNode = pAttr->m_nStartPara;
2841 
2842  // because of the deleting of BRs the start index can also
2843  // point behind the end the text
2844  if( pAttr->m_nStartContent > pCNd->Len() )
2845  pAttr->m_nStartContent = pCNd->Len();
2846  aAttrPam.GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent );
2847 
2848  aAttrPam.SetMark();
2849  if ( (pAttr->GetStartParagraph() != pAttr->GetEndParagraph()) &&
2850  !isTXTATR_NOEND(nWhich) )
2851  {
2852  pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2853  if( !pCNd )
2854  {
2855  pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2856  if( pCNd )
2857  pAttr->m_nEndContent = pCNd->Len();
2858  else
2859  {
2860  OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2861  aAttrPam.DeleteMark();
2862  delete pAttr;
2863  pAttr = pPrev;
2864  continue;
2865  }
2866  }
2867 
2868  aAttrPam.GetPoint()->nNode = pAttr->m_nEndPara;
2869  }
2870  else if( pAttr->IsLikePara() )
2871  {
2872  pAttr->m_nEndContent = pCNd->Len();
2873  }
2874 
2875  // because of the deleting of BRs the start index can also
2876  // point behind the end the text
2877  if( pAttr->m_nEndContent > pCNd->Len() )
2878  pAttr->m_nEndContent = pCNd->Len();
2879 
2880  aAttrPam.GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent );
2881  if( bBeforeTable &&
2882  aAttrPam.GetPoint()->nNode.GetIndex() ==
2883  rEndIdx.GetIndex() )
2884  {
2885  // If we're before inserting a table and the attribute ends
2886  // in the current node, then we must end it in the previous
2887  // node or discard it, if it starts in that node.
2888  if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2889  !isTXTATR_NOEND(nWhich) )
2890  {
2891  if( aAttrPam.GetMark()->nNode.GetIndex() !=
2892  rEndIdx.GetIndex() )
2893  {
2894  OSL_ENSURE( !aAttrPam.GetPoint()->nContent.GetIndex(),
2895  "Content-Position before table not 0???" );
2896  aAttrPam.Move( fnMoveBackward );
2897  }
2898  else
2899  {
2900  aAttrPam.DeleteMark();
2901  delete pAttr;
2902  pAttr = pPrev;
2903  continue;
2904  }
2905  }
2906  }
2907 
2908  switch( nWhich )
2909  {
2910  case RES_FLTR_BOOKMARK: // insert bookmark
2911  {
2912  const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2913  IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2914  IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2915  if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2916  (*ppBkmk)->GetMarkStart() == *aAttrPam.GetPoint() )
2917  break; // do not generate duplicates on this position
2918  aAttrPam.DeleteMark();
2919  const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2920  aAttrPam,
2921  sName,
2924 
2925  // jump to bookmark
2926  if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2927  {
2928  m_bChkJumpMark = true;
2930  }
2931  }
2932  break;
2933  case RES_TXTATR_FIELD:
2934  case RES_TXTATR_ANNOTATION:
2935  case RES_TXTATR_INPUTFIELD:
2936  {
2937  SwFieldIds nFieldWhich =
2938  pPostIts
2939  ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2941  if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2942  SwFieldIds::Script == nFieldWhich) )
2943  {
2944  pPostIts->emplace_front( pAttr );
2945  }
2946  else
2947  {
2948  aFields.emplace_back( pAttr);
2949  }
2950  }
2951  aAttrPam.DeleteMark();
2952  pAttr = pPrev;
2953  continue;
2954 
2955  case RES_LR_SPACE:
2956  if( aAttrPam.GetPoint()->nNode.GetIndex() ==
2957  aAttrPam.GetMark()->nNode.GetIndex())
2958  {
2959  // because of numbering set this attribute directly at node
2960  pCNd->SetAttr( *pAttr->m_pItem );
2961  break;
2962  }
2963  OSL_ENSURE( false,
2964  "LRSpace set over multiple paragraphs!" );
2965  [[fallthrough]]; // (shouldn't reach this point anyway)
2966 
2967  // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2968  // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2969  // This is the right place in the future if the adapted fill attributes
2970  // may be handled more directly in HTML import to handle them.
2971  case RES_BACKGROUND:
2972  {
2973  const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2975 
2977  m_xDoc->getIDocumentContentOperations().InsertItemSet(aAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2978  break;
2979  }
2980  default:
2981 
2982  // maybe jump to a bookmark
2983  if( RES_TXTATR_INETFMT == nWhich &&
2985  m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2986  {
2987  m_bChkJumpMark = true;
2989  }
2990 
2991  m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2992  }
2993  aAttrPam.DeleteMark();
2994 
2995  delete pAttr;
2996  pAttr = pPrev;
2997  }
2998  }
2999  }
3000 
3001  for( auto n = m_aMoveFlyFrames.size(); n; )
3002  {
3003  SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[--n]->GetFrameFormat();
3004  if (!pFrameFormat)
3005  {
3006  SAL_WARN("sw.html", "SwFrameFormat deleted during import");
3007  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
3008  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3009  continue;
3010  }
3011 
3012  const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
3013  OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
3014  "Only At-Para flys need special handling" );
3015  const SwPosition *pFlyPos = rAnchor.GetContentAnchor();
3016  SwNodeOffset nFlyParaIdx = pFlyPos->nNode.GetIndex();
3017  bool bMoveFly;
3018  if( bChkEnd )
3019  {
3020  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
3021  ( nFlyParaIdx == rEndIdx.GetIndex() &&
3022  m_aMoveFlyCnts[n] < nEndCnt );
3023  }
3024  else
3025  {
3026  SwNodeOffset nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
3027  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
3028  rEndIdx.GetIndex() > nEndOfIcons ||
3029  nFlyParaIdx <= nEndOfIcons;
3030  }
3031  if( bMoveFly )
3032  {
3033  pFrameFormat->DelFrames();
3034  *aAttrPam.GetPoint() = *pFlyPos;
3035  aAttrPam.GetPoint()->nContent.Assign( aAttrPam.GetContentNode(),
3036  m_aMoveFlyCnts[n] );
3037  SwFormatAnchor aAnchor( rAnchor );
3038  aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
3039  aAnchor.SetAnchor( aAttrPam.GetPoint() );
3040  pFrameFormat->SetFormatAttr( aAnchor );
3041 
3042  const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
3043  if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
3044  {
3045  SwFormatHoriOrient aHoriOri( rHoriOri );
3046  aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
3047  pFrameFormat->SetFormatAttr( aHoriOri );
3048  }
3049  const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
3050  if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
3051  {
3052  SwFormatVertOrient aVertOri( rVertOri );
3053  aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
3054  pFrameFormat->SetFormatAttr( aVertOri );
3055  }
3056 
3057  pFrameFormat->MakeFrames();
3058  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
3059  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3060  }
3061  }
3062  for (auto & field : aFields)
3063  {
3064  pCNd = field->m_nStartPara.GetNode().GetContentNode();
3065  aAttrPam.GetPoint()->nNode = field->m_nStartPara;
3066  aAttrPam.GetPoint()->nContent.Assign( pCNd, field->m_nStartContent );
3067 
3068  if( bBeforeTable &&
3069  aAttrPam.GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() )
3070  {
3071  OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3072  OSL_ENSURE( !aAttrPam.GetPoint()->nContent.GetIndex(),
3073  "Content-Position before table not 0???" );
3074  // !!!
3075  aAttrPam.Move( fnMoveBackward );
3076  }
3077 
3078  m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *field->m_pItem );
3079 
3080  field.reset();
3081  }
3082  aFields.clear();
3083 }
3084 
3085 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3086 {
3087  // Font height and font colour as well as escape attributes may not be
3088  // combined. Therefore they're saved in a list and in it the last opened
3089  // attribute is at the beginning and count is always one. For all other
3090  // attributes count is just incremented.
3091  if( *ppAttr )
3092  {
3093  HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3094  pAttr->InsertNext( *ppAttr );
3095  (*ppAttr) = pAttr;
3096  }
3097  else
3098  (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3099 }
3100 
3101 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3102 {
3103  bool bRet = true;
3104 
3105  // The list header is saved in the attribute.
3106  HTMLAttr **ppHead = pAttr->m_ppHead;
3107 
3108  OSL_ENSURE( ppHead, "No list header attribute found!" );
3109 
3110  // save the current position as end position
3111  const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode;
3112  sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3113 
3114  // Is the last started or an earlier started attribute being ended?
3115  HTMLAttr *pLast = nullptr;
3116  if( ppHead && pAttr != *ppHead )
3117  {
3118  // The last started attribute isn't being ended
3119 
3120  // Then we look for attribute which was started immediately afterwards,
3121  // which has also not yet been ended (otherwise it would no longer be
3122  // in the list).
3123  pLast = *ppHead;
3124  while( pLast && pLast->GetNext() != pAttr )
3125  pLast = pLast->GetNext();
3126 
3127  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3128  }
3129 
3130  bool bMoveBack = false;
3131  sal_uInt16 nWhich = pAttr->m_pItem->Which();
3132  if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3133  *pEndIdx != pAttr->GetStartParagraph() )
3134  {
3135  // Then move back one position in the content!
3136  bMoveBack = m_pPam->Move( fnMoveBackward );
3137  nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3138  }
3139 
3140  // now end the attribute
3141  HTMLAttr *pNext = pAttr->GetNext();
3142 
3143  bool bInsert;
3144  sal_uInt16 nScriptItem = 0;
3145  bool bScript = false;
3146  // does it have a non-empty range?
3147  if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3148  RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3149  *pEndIdx != pAttr->GetStartParagraph() ||
3150  nEndCnt != pAttr->GetStartContent() )
3151  {
3152  bInsert = true;
3153  // We do some optimization for script dependent attributes here.
3154  if( *pEndIdx == pAttr->GetStartParagraph() )
3155  {
3156  lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3157  }
3158  }
3159  else
3160  {
3161  bInsert = false;
3162  }
3163 
3164  const SwTextNode *pTextNd = (bInsert && bScript) ?
3165  pAttr->GetStartParagraph().GetNode().GetTextNode() :
3166  nullptr;
3167 
3168  if (pTextNd)
3169  {
3170  const OUString& rText = pTextNd->GetText();
3171  sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3172  rText, pAttr->GetStartContent() );
3173  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3174  ->endOfScript( rText, pAttr->GetStartContent(), nScriptText );
3175  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3176  {
3177  if( nScriptItem == nScriptText )
3178  {
3179  HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd );
3180  pSetAttr->ClearPrev();
3181  if( pNext )
3182  pNext->InsertPrev( pSetAttr );
3183  else
3184  {
3185  if (pSetAttr->m_bInsAtStart)
3186  m_aSetAttrTab.push_front( pSetAttr );
3187  else
3188  m_aSetAttrTab.push_back( pSetAttr );
3189  }
3190  }
3191  pAttr->m_nStartContent = nScriptEnd;
3192  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3193  rText, nScriptEnd );
3194  nScriptEnd = g_pBreakIt->GetBreakIter()
3195  ->endOfScript( rText, nScriptEnd, nScriptText );
3196  }
3197  bInsert = nScriptItem == nScriptText;
3198  }
3199  if( bInsert )
3200  {
3201  pAttr->m_nEndPara = *pEndIdx;
3202  pAttr->m_nEndContent = nEndCnt;
3203  pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3204  RES_TXTATR_CHARFMT != nWhich;
3205 
3206  if( !pNext )
3207  {
3208  // No open attributes of that type exists any longer, so all
3209  // can be set. Except they depend on another attribute, then
3210  // they're appended there.
3211  if (pAttr->m_bInsAtStart)
3212  m_aSetAttrTab.push_front( pAttr );
3213  else
3214  m_aSetAttrTab.push_back( pAttr );
3215  }
3216  else
3217  {
3218  // There are other open attributes of that type,
3219  // therefore the setting must be postponed.
3220  // Hence the current attribute is added at the end
3221  // of the Prev-List of the successor.
3222  pNext->InsertPrev( pAttr );
3223  }
3224  }
3225  else
3226  {
3227  // Then don't insert, but delete. Because of the "faking" of styles
3228  // by hard attributing there can be also other empty attributes in the
3229  // Prev-List, which must be set anyway.
3230  HTMLAttr *pPrev = pAttr->GetPrev();
3231  bRet = false;
3232  delete pAttr;
3233 
3234  if( pPrev )
3235  {
3236  // The previous attributes must be set anyway.
3237  if( pNext )
3238  pNext->InsertPrev( pPrev );
3239  else
3240  {
3241  if (pPrev->m_bInsAtStart)
3242  m_aSetAttrTab.push_front( pPrev );
3243  else
3244  m_aSetAttrTab.push_back( pPrev );
3245  }
3246  }
3247 
3248  }
3249 
3250  // If the first attribute of the list was set, then the list header
3251  // must be corrected as well.
3252  if( pLast )
3253  pLast->m_pNext = pNext;
3254  else if( ppHead )
3255  *ppHead = pNext;
3256 
3257  if( bMoveBack )
3259 
3260  return bRet;
3261 }
3262 
3264 {
3265  // preliminary paragraph attributes are not allowed here, they could
3266  // be set here and then the pointers become invalid!
3267  OSL_ENSURE(m_aParaAttrs.empty(),
3268  "Danger: there are non-final paragraph attributes");
3269  m_aParaAttrs.clear();
3270 
3271  // The list header is saved in the attribute
3272  HTMLAttr **ppHead = pAttr->m_ppHead;
3273 
3274  OSL_ENSURE( ppHead, "no list header attribute found!" );
3275 
3276  // Is the last started or an earlier started attribute being removed?
3277  HTMLAttr *pLast = nullptr;
3278  if( ppHead && pAttr != *ppHead )
3279  {
3280  // The last started attribute isn't being ended
3281 
3282  // Then we look for attribute which was started immediately afterwards,
3283  // which has also not yet been ended (otherwise it would no longer be
3284  // in the list).
3285  pLast = *ppHead;
3286  while( pLast && pLast->GetNext() != pAttr )
3287  pLast = pLast->GetNext();
3288 
3289  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3290  }
3291 
3292  // now delete the attribute
3293  HTMLAttr *pNext = pAttr->GetNext();
3294  HTMLAttr *pPrev = pAttr->GetPrev();
3295  //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3296  std::shared_ptr<HTMLAttrTable> xKeepAlive(pAttr->m_xAttrTab);
3297  delete pAttr;
3298 
3299  if( pPrev )
3300  {
3301  // The previous attributes must be set anyway.
3302  if( pNext )
3303  pNext->InsertPrev( pPrev );
3304  else
3305  {
3306  if (pPrev->m_bInsAtStart)
3307  m_aSetAttrTab.push_front( pPrev );
3308  else
3309  m_aSetAttrTab.push_back( pPrev );
3310  }
3311  }
3312 
3313  // If the first attribute of the list was deleted, then the list header
3314  // must be corrected as well.
3315  if( pLast )
3316  pLast->m_pNext = pNext;
3317  else if( ppHead )
3318  *ppHead = pNext;
3319 }
3320 
3321 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3322 {
3323  // preliminary paragraph attributes are not allowed here, they could
3324  // be set here and then the pointers become invalid!
3325  OSL_ENSURE(m_aParaAttrs.empty(),
3326  "Danger: there are non-final paragraph attributes");
3327  m_aParaAttrs.clear();
3328 
3329  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3330  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3331 
3332  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3333  {
3334  *pSaveAttributes = *pHTMLAttributes;
3335 
3336  HTMLAttr *pAttr = *pSaveAttributes;
3337  while (pAttr)
3338  {
3339  pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3340  pAttr = pAttr->GetNext();
3341  }
3342 
3343  *pHTMLAttributes = nullptr;
3344  }
3345 }
3346 
3347 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3348  bool bMoveEndBack )
3349 {
3350  // preliminary paragraph attributes are not allowed here, they could
3351  // be set here and then the pointers become invalid!
3352  OSL_ENSURE(m_aParaAttrs.empty(),
3353  "Danger: there are non-final paragraph attributes");
3354  m_aParaAttrs.clear();
3355 
3356  const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode;
3357  SwNodeIndex nEndIdx( nSttIdx );
3358 
3359  // close all still open attributes and re-open them after the table
3360  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3361  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3362  bool bSetAttr = true;
3363  const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex();
3364  sal_Int32 nEndCnt = nSttCnt;
3365 
3366  if( bMoveEndBack )
3367  {
3368  SwNodeOffset nOldEnd = nEndIdx.GetIndex();
3369  SwNodeOffset nTmpIdx;
3370  if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3371  ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3372  {
3373  nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3374  }
3375  SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3376 
3377  // Don't set attributes, when the PaM was moved outside of the content area.
3378  bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3379 
3380  nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3381  }
3382  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3383  {
3384  HTMLAttr *pAttr = *pHTMLAttributes;
3385  *pSaveAttributes = nullptr;
3386  while( pAttr )
3387  {
3388  HTMLAttr *pNext = pAttr->GetNext();
3389  HTMLAttr *pPrev = pAttr->GetPrev();
3390 
3391  if( bSetAttr &&
3392  ( pAttr->GetStartParagraphIdx() < nEndIdx.GetIndex() ||
3393  (pAttr->GetStartParagraph() == nEndIdx &&
3394  pAttr->GetStartContent() != nEndCnt) ) )
3395  {
3396  // The attribute must be set before the list. We need the
3397  // original and therefore we clone it, because pointer to the
3398  // attribute exist in the other contexts. The Next-List is lost
3399  // in doing so, but the Previous-List is preserved.
3400  HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt );
3401 
3402  if( pNext )
3403  pNext->InsertPrev( pSetAttr );
3404  else
3405  {
3406  if (pSetAttr->m_bInsAtStart)
3407  m_aSetAttrTab.push_front( pSetAttr );
3408  else
3409  m_aSetAttrTab.push_back( pSetAttr );
3410  }
3411  }
3412  else if( pPrev )
3413  {
3414  // If the attribute doesn't need to be set before the table, then
3415  // the previous attributes must still be set.
3416  if( pNext )
3417  pNext->InsertPrev( pPrev );
3418  else
3419  {
3420  if (pPrev->m_bInsAtStart)
3421  m_aSetAttrTab.push_front( pPrev );
3422  else
3423  m_aSetAttrTab.push_back( pPrev );
3424  }
3425  }
3426 
3427  // set the start of the attribute anew and break link
3428  pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab);
3429 
3430  if (*pSaveAttributes)
3431  {
3432  HTMLAttr *pSAttr = *pSaveAttributes;
3433  while( pSAttr->GetNext() )
3434  pSAttr = pSAttr->GetNext();
3435  pSAttr->InsertNext( pAttr );
3436  }
3437  else
3438  *pSaveAttributes = pAttr;
3439 
3440  pAttr = pNext;
3441  }
3442 
3443  *pHTMLAttributes = nullptr;
3444  }
3445 }
3446 
3447 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3448 {
3449  // preliminary paragraph attributes are not allowed here, they could
3450  // be set here and then the pointers become invalid!
3451  OSL_ENSURE(m_aParaAttrs.empty(),
3452  "Danger: there are non-final paragraph attributes");
3453  m_aParaAttrs.clear();
3454 
3455  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3456  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3457 
3458  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3459  {
3460  OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3461 
3462  *pHTMLAttributes = *pSaveAttributes;
3463 
3464  HTMLAttr *pAttr = *pHTMLAttributes;
3465  while (pAttr)
3466  {
3467  OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3468  "Previous attribute has still a header" );
3469  pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3470  pAttr = pAttr->GetNext();
3471  }
3472 
3473  *pSaveAttributes = nullptr;
3474  }
3475 }
3476 
3477 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3478 {
3479  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3480  if (bInsAtStart)
3481  m_aSetAttrTab.push_front( pTmp );
3482  else
3483  m_aSetAttrTab.push_back( pTmp );
3484 }
3485 
3486 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3487 {
3488  while( !rAttrs.empty() )
3489  {
3490  std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3491  InsertAttr( pAttr->GetItem(), false );
3492  rAttrs.pop_front();
3493  }
3494 }
3495 
3497 {
3498  OUString aId, aStyle, aLang, aDir;
3499  OUString aClass;
3500 
3501  const HTMLOptions& rHTMLOptions = GetOptions();
3502  for (size_t i = rHTMLOptions.size(); i; )
3503  {
3504  const HTMLOption& rOption = rHTMLOptions[--i];
3505  switch( rOption.GetToken() )
3506  {
3507  case HtmlOptionId::ID:
3508  aId = rOption.GetString();
3509  break;
3510  case HtmlOptionId::STYLE:
3511  aStyle = rOption.GetString();
3512  break;
3513  case HtmlOptionId::CLASS:
3514  aClass = rOption.GetString();
3515  break;
3516  case HtmlOptionId::LANG:
3517  aLang = rOption.GetString();
3518  break;
3519  case HtmlOptionId::DIR:
3520  aDir = rOption.GetString();
3521  break;
3522  default: break;
3523  }
3524  }
3525 
3526  // create a new context
3527  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3528 
3529  // parse styles
3530  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3531  {
3532  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3533  SvxCSS1PropertyInfo aPropInfo;
3534 
3535  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3536  {
3537  if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3538  !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3539  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3540  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3541  }
3542  }
3543 
3544  // save the context
3545  PushContext(xCntxt);
3546 }
3547 
3549  HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3550  HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3551  HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3552 {
3553  OUString aId, aStyle, aClass, aLang, aDir;
3554 
3555  const HTMLOptions& rHTMLOptions = GetOptions();
3556  for (size_t i = rHTMLOptions.size(); i; )
3557  {
3558  const HTMLOption& rOption = rHTMLOptions[--i];
3559  switch( rOption.GetToken() )
3560  {
3561  case HtmlOptionId::ID:
3562  aId = rOption.GetString();
3563  break;
3564  case HtmlOptionId::STYLE:
3565  aStyle = rOption.GetString();
3566  break;
3567  case HtmlOptionId::CLASS:
3568  aClass = rOption.GetString();
3569  break;
3570  case HtmlOptionId::LANG:
3571  aLang = rOption.GetString();
3572  break;
3573  case HtmlOptionId::DIR:
3574  aDir = rOption.GetString();
3575  break;
3576  default: break;
3577  }
3578  }
3579 
3580  // create a new context
3581  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3582 
3583  // parse styles
3584  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3585  {
3586  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3587  SvxCSS1PropertyInfo aPropInfo;
3588 
3589  aItemSet.Put( rItem );
3590  if( pItem2 )
3591  aItemSet.Put( *pItem2 );
3592  if( pItem3 )
3593  aItemSet.Put( *pItem3 );
3594 
3595  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3596  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3597 
3598  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3599  }
3600  else
3601  {
3602  InsertAttr( ppAttr ,rItem, xCntxt.get() );
3603  if( pItem2 )
3604  {
3605  OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3606  InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3607  }
3608  if( pItem3 )
3609  {
3610  OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3611  InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3612  }
3613  }
3614 
3615  // save the context
3616  PushContext(xCntxt);
3617 }
3618 
3620 {
3621  // fetch context
3622  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3623  if (xCntxt)
3624  {
3625  // and maybe end the attributes
3626  EndContext(xCntxt.get());
3627  }
3628 }
3629 
3631 {
3632  OUString aId, aStyle, aClass, aLang, aDir;
3633  sal_uInt16 nSize = 3;
3634 
3635  const HTMLOptions& rHTMLOptions = GetOptions();
3636  for (size_t i = rHTMLOptions.size(); i; )
3637  {
3638  const HTMLOption& rOption = rHTMLOptions[--i];
3639  switch( rOption.GetToken() )
3640  {
3641  case HtmlOptionId::SIZE:
3642  nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
3643  break;
3644  case HtmlOptionId::ID:
3645  aId = rOption.GetString();
3646  break;
3647  case HtmlOptionId::STYLE:
3648  aStyle = rOption.GetString();
3649  break;
3650  case HtmlOptionId::CLASS:
3651  aClass = rOption.GetString();
3652  break;
3653  case HtmlOptionId::LANG:
3654  aLang = rOption.GetString();
3655  break;
3656  case HtmlOptionId::DIR:
3657  aDir = rOption.GetString();
3658  break;
3659  default: break;
3660  }
3661  }
3662 
3663  if( nSize < 1 )
3664  nSize = 1;
3665 
3666  if( nSize > 7 )
3667  nSize = 7;
3668 
3669  // create a new context
3670  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3671 
3672  // parse styles
3673  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3674  {
3675  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3676  SvxCSS1PropertyInfo aPropInfo;
3677 
3678  //CJK has different defaults
3679  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3680  aItemSet.Put( aFontHeight );
3681  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3682  aItemSet.Put( aFontHeightCJK );
3683  //Complex type can contain so many types of letters,
3684  //that it's not really worthy to bother, IMO.
3685  //Still, I have set a default.
3686  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3687  aItemSet.Put( aFontHeightCTL );
3688 
3689  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3690  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3691 
3692  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3693  }
3694  else
3695  {
3696  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3697  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3698  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3699  InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3700  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3701  InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3702  }
3703 
3704  // save the context
3705  PushContext(xCntxt);
3706 
3707  // save the font size
3708  m_aBaseFontStack.push_back( nSize );
3709 }
3710 
3712 {
3713  EndTag( HtmlTokenId::BASEFONT_ON );
3714 
3715  // avoid stack underflow in tables
3716  if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3717  m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3718 }
3719 
3721 {
3722  sal_uInt16 nBaseSize =
3725  : 3 );
3726  sal_uInt16 nFontSize =
3727  ( m_aFontStack.size() > m_nFontStMin
3728  ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3729  : nBaseSize );
3730 
3731  OUString aFace, aId, aStyle, aClass, aLang, aDir;
3732  Color aColor;
3733  sal_uLong nFontHeight = 0; // actual font height to set
3734  sal_uInt16 nSize = 0; // font height in Netscape notation (1-7)
3735  bool bColor = false;
3736 
3737  const HTMLOptions& rHTMLOptions = GetOptions();
3738  for (size_t i = rHTMLOptions.size(); i; )
3739  {
3740  const HTMLOption& rOption = rHTMLOptions[--i];
3741  switch( rOption.GetToken() )
3742  {
3743  case HtmlOptionId::SIZE:
3744  if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3745  {
3746  sal_Int32 nSSize;
3747  if( '+' == rOption.GetString()[0] ||
3748  '-' == rOption.GetString()[0] )
3749  nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3750  else
3751  nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3752 
3753  if( nSSize < 1 )
3754  nSSize = 1;
3755  else if( nSSize > 7 )
3756  nSSize = 7;
3757 
3758  nSize = o3tl::narrowing<sal_uInt16>(nSSize);
3759  nFontHeight = m_aFontHeights[nSize-1];
3760  }
3761  break;
3762  case HtmlOptionId::COLOR:
3763  if( HtmlTokenId::FONT_ON==nToken )
3764  {
3765  rOption.GetColor( aColor );
3766  bColor = true;
3767  }
3768  break;
3769  case HtmlOptionId::FACE:
3770  if( HtmlTokenId::FONT_ON==nToken )
3771  aFace = rOption.GetString();
3772  break;
3773  case HtmlOptionId::ID:
3774  aId = rOption.GetString();
3775  break;
3776  case HtmlOptionId::STYLE:
3777  aStyle = rOption.GetString();
3778  break;
3779  case HtmlOptionId::CLASS:
3780  aClass = rOption.GetString();
3781  break;
3782  case HtmlOptionId::LANG:
3783  aLang = rOption.GetString();
3784  break;
3785  case HtmlOptionId::DIR:
3786  aDir = rOption.GetString();
3787  break;
3788  default: break;
3789  }
3790  }
3791 
3792  if( HtmlTokenId::FONT_ON != nToken )
3793  {
3794  // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3795 
3796  // In headings the current heading sets the font height
3797  // and not BASEFONT.
3798  const SwFormatColl *pColl = GetCurrFormatColl();
3799  sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3800  if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3801  nPoolId<=RES_POOLCOLL_HEADLINE6 )
3802  {
3803  // If the font height in the heading wasn't changed yet,
3804  // then take the one from the style.
3805  if( m_nFontStHeadStart==m_aFontStack.size() )
3806  nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3807  }
3808  else
3809  nPoolId = 0;
3810 
3811  if( HtmlTokenId::BIGPRINT_ON == nToken )
3812  nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3813  else
3814  nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3815 
3816  // If possible in headlines we fetch the new font height
3817  // from the style.
3818  if( nPoolId && nSize>=1 && nSize <=6 )
3819  nFontHeight =
3820  m_pCSS1Parser->GetTextCollFromPool(
3821  RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3822  else
3823  nFontHeight = m_aFontHeights[nSize-1];
3824  }
3825 
3826  OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3827 
3828  OUString aFontName;
3829  const OUString aStyleName;
3830  FontFamily eFamily = FAMILY_DONTKNOW; // family and pitch,
3831  FontPitch ePitch = PITCH_DONTKNOW; // if not found
3832  rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3833 
3834  if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3835  {
3836  const FontList *pFList = nullptr;
3837  SwDocShell *pDocSh = m_xDoc->GetDocShell();
3838  if( pDocSh )
3839  {
3840  const SvxFontListItem *pFListItem =
3841  static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3842  if( pFListItem )
3843  pFList = pFListItem->GetFontList();
3844  }
3845 
3846  bool bFound = false;
3847  sal_Int32 nStrPos = 0;
3848  while( nStrPos!= -1 )
3849  {
3850  OUString aFName = aFace.getToken( 0, ',', nStrPos );
3851  aFName = comphelper::string::strip(aFName, ' ');
3852  if( !aFName.isEmpty() )
3853  {
3854  if( !bFound && pFList )
3855  {
3856  sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3857  if( nullptr != hFont )
3858  {
3859  const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3860  if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3861  {
3862  bFound = true;
3863  if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3864  eEnc = RTL_TEXTENCODING_SYMBOL;
3865  }
3866  }
3867  }
3868  if( !aFontName.isEmpty() )
3869  aFontName += ";";
3870  aFontName += aFName;
3871  }
3872  }
3873  }
3874 
3875  // create a new context
3876  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3877 
3878  // parse styles
3879  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3880  {
3881  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3882  SvxCSS1PropertyInfo aPropInfo;
3883 
3884  if( nFontHeight )
3885  {
3886  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3887  aItemSet.Put( aFontHeight );
3888  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3889  aItemSet.Put( aFontHeightCJK );
3890  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3891  aItemSet.Put( aFontHeightCTL );
3892  }
3893  if( bColor )
3894  aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3895  if( !aFontName.isEmpty() )
3896  {
3897  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3898  aItemSet.Put( aFont );
3899  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3900  aItemSet.Put( aFontCJK );
3901  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3902  aItemSet.Put( aFontCTL );
3903  }
3904 
3905  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3906  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3907 
3908  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3909  }
3910  else
3911  {
3912  if( nFontHeight )
3913  {
3914  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3915  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3916  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3917  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3918  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3919  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3920  }
3921  if( bColor )
3922  InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3923  if( !aFontName.isEmpty() )
3924  {
3925  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3926  InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3927  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3928  InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3929  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3930  InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3931  }
3932  }
3933 
3934  // save the context
3935  PushContext(xCntxt);
3936 
3937  m_aFontStack.push_back( nSize );
3938 }
3939 
3941 {
3942  EndTag( nToken );
3943 
3944  // avoid stack underflow in tables
3945  if( m_aFontStack.size() > m_nFontStMin )
3946  m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3947 }
3948 
3950 {
3951  if( m_pPam->GetPoint()->nContent.GetIndex() )
3953  else
3954  AddParSpace();
3955 
3956  m_eParaAdjust = SvxAdjust::End;
3957  OUString aId, aStyle, aClass, aLang, aDir;
3958 
3959  const HTMLOptions& rHTMLOptions = GetOptions();
3960  for (size_t i = rHTMLOptions.size(); i; )
3961  {
3962  const HTMLOption& rOption = rHTMLOptions[--i];
3963  switch( rOption.GetToken() )
3964  {
3965  case HtmlOptionId::ID:
3966  aId = rOption.GetString();
3967  break;
3968  case HtmlOptionId::ALIGN:
3969  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3970  break;
3971  case HtmlOptionId::STYLE:
3972  aStyle = rOption.GetString();
3973  break;
3974  case HtmlOptionId::CLASS:
3975  aClass = rOption.GetString();
3976  break;
3977  case HtmlOptionId::LANG:
3978  aLang = rOption.GetString();
3979  break;
3980  case HtmlOptionId::DIR:
3981  aDir = rOption.GetString();
3982  break;
3983  default: break;
3984  }
3985  }
3986 
3987  // create a new context
3988  std::unique_ptr<HTMLAttrContext> xCntxt(
3989  !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3990  RES_POOLCOLL_TEXT, aClass )
3991  : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3992 
3993  // parse styles (Don't consider class. This is only possible as long as none of
3994  // the CSS1 properties of the class must be formatted hard!!!)
3995  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
3996  {
3997  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3998  SvxCSS1PropertyInfo aPropInfo;
3999 
4000  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4001  {
4002  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4003  "Class is not considered" );
4004  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4005  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4006  }
4007  }
4008 
4009  if( SvxAdjust::End != m_eParaAdjust )
4010  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4011 
4012  // and push on stack
4013  PushContext( xCntxt );
4014 
4015  // set the current style or its attributes
4016  SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
4017 
4018  // progress bar
4019  ShowStatline();
4020 
4021  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now an open paragraph element will be lost." );
4022  m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
4023 }
4024 
4025 void SwHTMLParser::EndPara( bool bReal )
4026 {
4027  if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
4028  {
4029 #if OSL_DEBUG_LEVEL > 0
4030  const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule();
4031  OSL_ENSURE( pNumRule, "Where is the NumRule" );
4032 #endif
4033  }
4034 
4035  // Netscape skips empty paragraphs, we do the same.
4036  if( bReal )
4037  {
4038  if( m_pPam->GetPoint()->nContent.GetIndex() )
4040  else
4041  AddParSpace();
4042  }
4043 
4044  // If a DD or DT was open, it's an implied definition list,
4045  // which must be closed now.
4046  if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
4048  {
4049  m_nDefListDeep--;
4050  }
4051 
4052  // Pop the context of the stack. It can also be from an
4053  // implied opened definition list.
4054  std::unique_ptr<HTMLAttrContext> xCntxt(
4055  PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
4056 
4057  // close attribute
4058  if (xCntxt)
4059  {
4060  EndContext(xCntxt.get());
4061  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4062  xCntxt.reset();
4063  }
4064 
4065  // reset the existing style
4066  if( bReal )
4067  SetTextCollAttrs();
4068 
4069  m_nOpenParaToken = HtmlTokenId::NONE;
4070 }
4071 
4073 {
4074  m_eParaAdjust = SvxAdjust::End;
4075 
4076  OUString aId, aStyle, aClass, aLang, aDir;
4077 
4078  const HTMLOptions& rHTMLOptions = GetOptions();
4079  for (size_t i = rHTMLOptions.size(); i; )
4080  {
4081  const HTMLOption& rOption = rHTMLOptions[--i];
4082  switch( rOption.GetToken() )
4083  {
4084  case HtmlOptionId::ID:
4085  aId = rOption.GetString();
4086  break;
4087  case HtmlOptionId::ALIGN:
4088  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4089  break;
4090  case HtmlOptionId::STYLE:
4091  aStyle = rOption.GetString();
4092  break;
4093  case HtmlOptionId::CLASS:
4094  aClass = rOption.GetString();
4095  break;
4096  case HtmlOptionId::LANG:
4097  aLang = rOption.GetString();
4098  break;
4099  case HtmlOptionId::DIR:
4100  aDir = rOption.GetString();
4101  break;
4102  default: break;
4103  }
4104  }
4105 
4106  // open a new paragraph
4107  if( m_pPam->GetPoint()->nContent.GetIndex() )
4109  else
4110  AddParSpace();
4111 
4112  // search for the matching style
4113  sal_uInt16 nTextColl;
4114  switch( nToken )
4115  {
4116  case HtmlTokenId::HEAD1_ON: nTextColl = RES_POOLCOLL_HEADLINE1; break;
4117  case HtmlTokenId::HEAD2_ON: nTextColl = RES_POOLCOLL_HEADLINE2; break;
4118  case HtmlTokenId::HEAD3_ON: nTextColl = RES_POOLCOLL_HEADLINE3; break;
4119  case HtmlTokenId::HEAD4_ON: nTextColl = RES_POOLCOLL_HEADLINE4; break;
4120  case HtmlTokenId::HEAD5_ON: nTextColl = RES_POOLCOLL_HEADLINE5; break;
4121  case HtmlTokenId::HEAD6_ON: nTextColl = RES_POOLCOLL_HEADLINE6; break;
4122  default: nTextColl = RES_POOLCOLL_STANDARD; break;
4123  }
4124 
4125  // create the context
4126  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4127 
4128  // parse styles (regarding class see also NewPara)
4129  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4130  {
4131  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4132  SvxCSS1PropertyInfo aPropInfo;
4133 
4134  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4135  {
4136  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4137  "Class is not considered" );
4138  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4139  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4140  }
4141  }
4142 
4143  if( SvxAdjust::End != m_eParaAdjust )
4144  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4145 
4146  // and push on stack
4147  PushContext(xCntxt);
4148 
4149  // set the current style or its attributes
4150  SetTextCollAttrs(m_aContexts.back().get());
4151 
4153 
4154  // progress bar
4155  ShowStatline();
4156 }
4157 
4159 {
4160  // open a new paragraph
4161  if( m_pPam->GetPoint()->nContent.GetIndex() )
4163  else
4164  AddParSpace();
4165 
4166  // search context matching the token and fetch it from stack
4167  std::unique_ptr<HTMLAttrContext> xCntxt;
4168  auto nPos = m_aContexts.size();
4169  while( !xCntxt && nPos>m_nContextStMin )
4170  {
4171  switch( m_aContexts[--nPos]->GetToken() )
4172  {
4173  case HtmlTokenId::HEAD1_ON:
4174  case HtmlTokenId::HEAD2_ON:
4175  case HtmlTokenId::HEAD3_ON:
4176  case HtmlTokenId::HEAD4_ON:
4177  case HtmlTokenId::HEAD5_ON:
4178  case HtmlTokenId::HEAD6_ON:
4179  xCntxt = std::move(m_aContexts[nPos]);
4180  m_aContexts.erase( m_aContexts.begin() + nPos );
4181  break;
4182  default: break;
4183  }
4184  }
4185 
4186  // and now end attributes
4187  if (xCntxt)
4188  {
4189  EndContext(xCntxt.get());
4190  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4191  xCntxt.reset();
4192  }
4193 
4194  // reset existing style
4195  SetTextCollAttrs();
4196 
4198 }
4199 
4200 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4201 {
4202  OUString aId, aStyle, aClass, aLang, aDir;
4203 
4204  const HTMLOptions& rHTMLOptions = GetOptions();
4205  for (size_t i = rHTMLOptions.size(); i; )
4206  {
4207  const HTMLOption& rOption = rHTMLOptions[--i];
4208  switch( rOption.GetToken() )
4209  {
4210  case HtmlOptionId::ID:
4211  aId = rOption.GetString();
4212  break;
4213  case HtmlOptionId::STYLE:
4214  aStyle = rOption.GetString();
4215  break;
4216  case HtmlOptionId::CLASS:
4217  aClass = rOption.GetString();
4218  break;
4219  case HtmlOptionId::LANG:
4220  aLang = rOption.GetString();
4221  break;
4222  case HtmlOptionId::DIR:
4223  aDir = rOption.GetString();
4224  break;
4225  default: break;
4226  }
4227  }
4228 
4229  // open a new paragraph
4231  switch( nToken )
4232  {
4233  case HtmlTokenId::LISTING_ON:
4234  case HtmlTokenId::XMP_ON:
4235  // These both tags will be mapped to the PRE style. For the case that a
4236  // a CLASS exists we will delete it so that we don't get the CLASS of
4237  // the PRE style.
4238  aClass.clear();
4239  [[fallthrough]];
4240  case HtmlTokenId::BLOCKQUOTE_ON:
4241  case HtmlTokenId::BLOCKQUOTE30_ON:
4242  case HtmlTokenId::PREFORMTXT_ON:
4243  eMode = AM_SPACE;
4244  break;
4245  case HtmlTokenId::ADDRESS_ON:
4246  eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4247  break;
4248  case HtmlTokenId::DT_ON:
4249  case HtmlTokenId::DD_ON:
4250  eMode = AM_SOFTNOSPACE;
4251  break;
4252  default:
4253  OSL_ENSURE( false, "unknown style" );
4254  break;
4255  }
4256  if( m_pPam->GetPoint()->nContent.GetIndex() )
4257  AppendTextNode( eMode );
4258  else if( AM_SPACE==eMode )
4259  AddParSpace();
4260 
4261  // ... and save in a context
4262  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4263 
4264  // parse styles (regarding class see also NewPara)
4265  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4266  {
4267  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4268  SvxCSS1PropertyInfo aPropInfo;
4269 
4270  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4271  {
4272  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4273  "Class is not considered" );
4274  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4275  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4276  }
4277  }
4278 
4279  PushContext(xCntxt);
4280 
4281  // set the new style
4282  SetTextCollAttrs(m_aContexts.back().get());
4283 
4284  // update progress bar
4285  ShowStatline();
4286 }
4287 
4289 {
4291  switch( getOnToken(nToken) )
4292  {
4293  case HtmlTokenId::BLOCKQUOTE_ON:
4294  case HtmlTokenId::BLOCKQUOTE30_ON:
4295  case HtmlTokenId::PREFORMTXT_ON:
4296  case HtmlTokenId::LISTING_ON:
4297  case HtmlTokenId::XMP_ON:
4298  eMode = AM_SPACE;
4299  break;
4300  case HtmlTokenId::ADDRESS_ON:
4301  case HtmlTokenId::DT_ON:
4302  case HtmlTokenId::DD_ON:
4303  eMode = AM_SOFTNOSPACE;
4304  break;
4305  default:
4306  OSL_ENSURE( false, "unknown style" );
4307  break;
4308  }
4309  if( m_pPam->GetPoint()->nContent.GetIndex() )
4310  AppendTextNode( eMode );
4311  else if( AM_SPACE==eMode )
4312  AddParSpace();
4313 
4314  // pop current context of stack
4315  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4316 
4317  // and now end attributes
4318  if (xCntxt)
4319  {
4320  EndContext(xCntxt.get());
4321  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4322  xCntxt.reset();
4323  }
4324 
4325  // reset existing style
4326  SetTextCollAttrs();
4327 }
4328 
4330 {
4331  OUString aId, aStyle, aClass, aLang, aDir;
4332 
4333  const HTMLOptions& rHTMLOptions = GetOptions();
4334  for (size_t i = rHTMLOptions.size(); i; )
4335  {
4336  const HTMLOption& rOption = rHTMLOptions[--i];
4337  switch( rOption.GetToken() )
4338  {
4339  case HtmlOptionId::ID:
4340  aId = rOption.GetString();
4341  break;
4342  case HtmlOptionId::STYLE:
4343  aStyle = rOption.GetString();
4344  break;
4345  case HtmlOptionId::CLASS:
4346  aClass = rOption.GetString();
4347  break;
4348  case HtmlOptionId::LANG:
4349  aLang = rOption.GetString();
4350  break;
4351  case HtmlOptionId::DIR:
4352  aDir = rOption.GetString();
4353  break;
4354  default: break;
4355  }
4356  }
4357 
4358  // open a new paragraph
4359  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4360  if( m_pPam->GetPoint()->nContent.GetIndex() )
4361  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4362  else if( bSpace )
4363  AddParSpace();
4364 
4365  // one level more
4366  m_nDefListDeep++;
4367 
4368  bool bInDD = false, bNotInDD = false;
4369  auto nPos = m_aContexts.size();
4370  while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4371  {
4372  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4373  switch( nCntxtToken )
4374  {
4375  case HtmlTokenId::DEFLIST_ON:
4376  case HtmlTokenId::DIRLIST_ON:
4377  case HtmlTokenId::MENULIST_ON:
4378  case HtmlTokenId::ORDERLIST_ON:
4379  case HtmlTokenId::UNORDERLIST_ON:
4380  bNotInDD = true;
4381  break;
4382  case HtmlTokenId::DD_ON:
4383  bInDD = true;
4384  break;
4385  default: break;
4386  }
4387  }
4388 
4389  // ... and save in a context
4390  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4391 
4392  // in it save also the margins
4393  sal_uInt16 nLeft=0, nRight=0;
4394  short nIndent=0;
4395  GetMarginsFromContext( nLeft, nRight, nIndent );
4396 
4397  // The indentation, which already results from a DL, correlates with a DT
4398  // on the current level and this correlates to a DD from the previous level.
4399  // For a level >=2 we must add DD distance.
4400  if( !bInDD && m_nDefListDeep > 1 )
4401  {
4402 
4403  // and the one of the DT-style of the current level
4404  SvxLRSpaceItem rLRSpace =
4405  m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4406  ->GetLRSpace();
4407  nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft());
4408  }
4409 
4410  xCntxt->SetMargins( nLeft, nRight, nIndent );
4411 
4412  // parse styles
4413  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4414  {
4415  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4416  SvxCSS1PropertyInfo aPropInfo;
4417 
4418  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4419  {
4420  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4421  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4422  }
4423  }
4424 
4425  PushContext(xCntxt);
4426 
4427  // set the attributes of the new style
4428  if( m_nDefListDeep > 1 )
4429  SetTextCollAttrs(m_aContexts.back().get());
4430 }
4431 
4433 {
4434  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4435  if( m_pPam->GetPoint()->nContent.GetIndex() )
4436  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4437  else if( bSpace )
4438  AddParSpace();
4439 
4440  // one level less
4441  if( m_nDefListDeep > 0 )
4442  m_nDefListDeep--;
4443 
4444  // pop current context of stack
4445  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4446 
4447  // and now end attributes
4448  if (xCntxt)
4449  {
4450  EndContext(xCntxt.get());
4451  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4452  xCntxt.reset();
4453  }
4454 
4455  // and set style
4456  SetTextCollAttrs();
4457 }
4458 
4460 {
4461  // determine if the DD/DT exist in a DL
4462  bool bInDefList = false, bNotInDefList = false;
4463  auto nPos = m_aContexts.size();
4464  while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4465  {
4466  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4467  switch( nCntxtToken )
4468  {
4469  case HtmlTokenId::DEFLIST_ON:
4470  bInDefList = true;
4471  break;
4472  case HtmlTokenId::DIRLIST_ON:
4473  case HtmlTokenId::MENULIST_ON:
4474  case HtmlTokenId::ORDERLIST_ON:
4475  case HtmlTokenId::UNORDERLIST_ON:
4476  bNotInDefList = true;
4477  break;
4478  default: break;
4479  }
4480  }
4481 
4482  // if not, then implicitly open a new DL
4483  if( !bInDefList )
4484  {
4485  m_nDefListDeep++;
4486  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4487  "Now an open paragraph element will be lost." );
4488  m_nOpenParaToken = nToken;
4489  }
4490 
4491  NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4492  : RES_POOLCOLL_HTML_DT) );
4493 }
4494 
4496 {
4497  // open a new paragraph
4498  if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() )
4500 
4501  // search context matching the token and fetch it from stack
4502  nToken = getOnToken(nToken);
4503  std::unique_ptr<HTMLAttrContext> xCntxt;
4504  auto nPos = m_aContexts.size();
4505  while( !xCntxt && nPos>m_nContextStMin )
4506  {
4507  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4508  switch( nCntxtToken )
4509  {
4510  case HtmlTokenId::DD_ON:
4511  case HtmlTokenId::DT_ON:
4512  if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken )
4513  {
4514  xCntxt = std::move(m_aContexts[nPos]);
4515  m_aContexts.erase( m_aContexts.begin() + nPos );
4516  }
4517  break;
4518  case HtmlTokenId::DEFLIST_ON:
4519  // don't look at DD/DT outside the current DefList
4520  case HtmlTokenId::DIRLIST_ON:
4521  case HtmlTokenId::MENULIST_ON:
4522  case HtmlTokenId::ORDERLIST_ON:
4523  case HtmlTokenId::UNORDERLIST_ON:
4524  // and also not outside another list
4526  break;
4527  default: break;
4528  }
4529  }
4530 
4531  // and now end attributes
4532  if (xCntxt)
4533  {
4534  EndContext(xCntxt.get());
4535  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4536  }
4537 }
4538 
4548 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4549  bool bSurroundOnly ) const
4550 {
4551  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
4552 
4553  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
4554 
4555  bool bFound = false;
4556  for ( size_t i=0; i<rFrameFormatTable.size(); i++ )
4557  {
4558  const SwFrameFormat *const pFormat = rFrameFormatTable[i];
4559  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4560  // A frame was found, when
4561  // - it is paragraph-bound, and
4562  // - is anchored in current paragraph, and
4563  // - every paragraph-bound frame counts, or
4564  // - (only frames without wrapping count and) the frame doesn't have
4565  // a wrapping
4566  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
4567  if (pAPos &&
4568  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4569  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4570  pAPos->nNode == rNodeIdx )
4571  {
4572  if( !(bNoSurroundOnly || bSurroundOnly) )
4573  {
4574  bFound = true;
4575  break;
4576  }
4577  else
4578  {
4579  // When looking for frames with wrapping, also disregard
4580  // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4581  // and you don't want to evade those when positioning.
4582  css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4583  if( bNoSurroundOnly )
4584  {
4585  if( css::text::WrapTextMode_NONE==eSurround )
4586  {
4587  bFound = true;
4588  break;
4589  }
4590  }
4591  if( bSurroundOnly )
4592  {
4593  if( css::text::WrapTextMode_NONE==eSurround )
4594  {
4595  bFound = false;
4596  break;
4597  }
4598  else if( css::text::WrapTextMode_THROUGH!=eSurround )
4599  {
4600  bFound = true;
4601  // Continue searching: It's possible that some without
4602  // wrapping will follow...
4603  }
4604  }
4605  }
4606  }
4607  }
4608 
4609  return bFound;
4610 }
4611 
4612 // the special methods for inserting of objects
4613 
4615 {
4616  const SwContentNode* pCNd = m_pPam->GetContentNode();
4617  return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4618 }
4619 
4621 {
4622  SwTextFormatColl *pCollToSet = nullptr; // the style to set
4623  SfxItemSet *pItemSet = nullptr; // set of hard attributes
4624  sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4625  const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4626  sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4627 
4628  bool bInPRE=false; // some context info
4629 
4630  sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and
4631  short nFirstLineIndent = 0; // indentations
4632 
4633  for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i )
4634  {
4635  const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4636 
4637  sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4638  if( nColl )
4639  {
4640  // There is a style to set. Then at first we must decide,
4641  // if the style can be set.
4642  bool bSetThis = true;
4643  switch( nColl )
4644  {
4645  case RES_POOLCOLL_HTML_PRE:
4646  bInPRE = true;
4647  break;
4648  case RES_POOLCOLL_TEXT:
4649  // <TD><P CLASS=xxx> must become TD.xxx
4650  if( nDfltColl==RES_POOLCOLL_TABLE ||
4651  nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4652  nColl = nDfltColl;
4653  break;
4654  case RES_POOLCOLL_HTML_HR:
4655  // also <HR> in <PRE> set as style, otherwise it can't
4656  // be exported anymore
4657  break;
4658  default:
4659  if( bInPRE )
4660  bSetThis = false;
4661  break;
4662  }
4663 
4664  SwTextFormatColl *pNewColl =
4665  m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4666 
4667  if( bSetThis )
4668  {
4669  // If now a different style should be set as previously, the
4670  // previous style must be replaced by hard attribution.
4671 
4672  if( pCollToSet )
4673  {
4674  // insert the attributes hard, which previous style sets
4675  if( !pItemSet )
4676  pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4677  else
4678  {
4679  const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4680  SfxItemSet aItemSet( *rCollSet.GetPool(),
4681  rCollSet.GetRanges() );
4682  aItemSet.Set( rCollSet );
4683  pItemSet->Put( aItemSet );
4684  }
4685  // but remove the attributes, which the current style sets,
4686  // because otherwise they will be overwritten later
4687  pItemSet->Differentiate( pNewColl->GetAttrSet() );
4688  }
4689 
4690  pCollToSet = pNewColl;
4691  }
4692  else
4693  {
4694  // hard attribution
4695  if( !pItemSet )
4696  pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4697  else
4698  {
4699  const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4700  SfxItemSet aItemSet( *rCollSet.GetPool(),
4701  rCollSet.GetRanges() );
4702  aItemSet.Set( rCollSet );
4703  pItemSet->Put( aItemSet );
4704  }
4705  }
4706  }
4707  else
4708  {
4709  // Maybe a default style exists?
4710  nColl = pCntxt->GetDefaultTextFormatColl();
4711  if( nColl )
4712  nDfltColl = nColl;
4713  }
4714 
4715  // if applicable fetch new paragraph indents
4716  if( pCntxt->IsLRSpaceChanged() )
4717  {
4718  sal_uInt16 nLeft=0, nRight=0;
4719 
4720  pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4721  nLeftMargin = nLeft;
4722  nRightMargin = nRight;
4723  }
4724  }
4725 
4726  // If in current context a new style should be set,
4727  // its paragraph margins must be inserted in the context.
4728  if( pContext && nTopColl )
4729  {
4730  // <TD><P CLASS=xxx> must become TD.xxx
4731  if( nTopColl==RES_POOLCOLL_TEXT &&
4732  (nDfltColl==RES_POOLCOLL_TABLE ||
4733  nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4734  nTopColl = nDfltColl;
4735 
4736  const SwTextFormatColl *pTopColl =
4737  m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4738  const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4739  if( const SvxLRSpaceItem *pLRItem = rItemSet.GetItemIfSet(RES_LR_SPACE) )
4740  {
4741  sal_Int32 nLeft = pLRItem->GetTextLeft();
4742  sal_Int32 nRight = pLRItem->GetRight();
4743  nFirstLineIndent = pLRItem->GetTextFirstLineOffset();
4744 
4745  // In Definition lists the margins also contain the margins from the previous levels
4746  if( RES_POOLCOLL_HTML_DD == nTopColl )
4747  {
4748  const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser
4749  ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())
4750  ->GetLRSpace();
4751  nLeft -= rDTLRSpace.GetTextLeft();
4752  nRight -= rDTLRSpace.GetRight();
4753  }
4754  else if( RES_POOLCOLL_HTML_DT == nTopColl )
4755  {
4756  nLeft = 0;
4757  nRight = 0;
4758  }
4759 
4760  // the paragraph margins add up
4761  nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4762  nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4763 
4764  pContext->SetMargins( nLeftMargin, nRightMargin,
4765  nFirstLineIndent );
4766  }
4767  if( const SvxULSpaceItem* pULItem = rItemSet.GetItemIfSet(RES_UL_SPACE) )
4768  {
4769  pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4770  }
4771  }
4772 
4773  // If no style is set in the context use the text body.
4774  if( !pCollToSet )
4775  {
4776  pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4777  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4778  if( !nLeftMargin )
4779  nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft());
4780  if( !nRightMargin )
4781  nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight());
4782  if( !nFirstLineIndent )
4783  nFirstLineIndent = rLRItem.GetTextFirstLineOffset();
4784  }
4785 
4786  // remove previous hard attribution of paragraph
4787  for( auto pParaAttr : m_aParaAttrs )
4788  pParaAttr->Invalidate();
4789  m_aParaAttrs.clear();
4790 
4791  // set the style
4792  m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4793 
4794  // if applicable correct the paragraph indent
4795  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4796  bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() ||
4797  nFirstLineIndent != rLRItem.GetTextFirstLineOffset() ||
4798  nRightMargin != rLRItem.GetRight();
4799 
4800  if( bSetLRSpace )
4801  {
4802  SvxLRSpaceItem aLRItem( rLRItem );
4803  aLRItem.SetTextLeft( nLeftMargin );
4804  aLRItem.SetRight( nRightMargin );
4805  aLRItem.SetTextFirstLineOffset( nFirstLineIndent );
4806  if( pItemSet )
4807  pItemSet->Put( aLRItem );
4808  else
4809  {
4810  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4811  m_xAttrTab->pLRSpace->SetLikePara();
4812  m_aParaAttrs.push_back( m_xAttrTab->pLRSpace );
4813  EndAttr( m_xAttrTab->pLRSpace, false );
4814  }
4815  }
4816 
4817  // and now set the attributes
4818  if( pItemSet )
4819  {
4820  InsertParaAttrs( *pItemSet );
4821  delete pItemSet;
4822  }
4823 }
4824 
4826 {
4827  OUString aId, aStyle, aLang, aDir;
4828  OUString aClass;
4829 
4830  const HTMLOptions& rHTMLOptions = GetOptions();
4831  for (size_t i = rHTMLOptions.size(); i; )
4832  {
4833  const HTMLOption& rOption = rHTMLOptions[--i];
4834  switch( rOption.GetToken() )
4835  {
4836  case HtmlOptionId::ID:
4837  aId = rOption.GetString();
4838  break;
4839  case HtmlOptionId::STYLE:
4840  aStyle = rOption.GetString();
4841  break;
4842  case HtmlOptionId::CLASS:
4843  aClass = rOption.GetString();
4844  break;
4845  case HtmlOptionId::LANG:
4846  aLang = rOption.GetString();
4847  break;
4848  case HtmlOptionId::DIR:
4849  aDir = rOption.GetString();
4850  break;
4851  default: break;
4852  }
4853  }
4854 
4855  // create a new context
4856  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4857 
4858  // set the style and save it in the context
4859  SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4860  OSL_ENSURE( pCFormat, "No character format found for token" );
4861 
4862  // parse styles (regarding class see also NewPara)
4863  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4864  {
4865  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4866  SvxCSS1PropertyInfo aPropInfo;
4867 
4868  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4869  {
4870  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4871  "Class is not considered" );
4872  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4873  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4874  }
4875  }
4876 
4877  // Character formats are stored in their own stack and can never be inserted
4878  // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4879  if( pCFormat )
4880  InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4881 
4882  // save the context
4883  PushContext(xCntxt);
4884 }
4885 
4887 {
4888  // and if applicable change it via the options
4889  sal_Int16 eVertOri = text::VertOrientation::TOP;
4890  sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4891  Size aSize( 0, 0);
4892  tools::Long nSize = 0;
4893  bool bPercentWidth = false;
4894  bool bPercentHeight = false;
4895  sal_uInt16 nType = HTML_SPTYPE_HORI;
4896 
4897  const HTMLOptions& rHTMLOptions = GetOptions();
4898  for (size_t i = rHTMLOptions.size(); i; )
4899  {
4900  const HTMLOption& rOption = rHTMLOptions[--i];
4901  switch( rOption.GetToken() )
4902  {
4903  case HtmlOptionId::TYPE:
4904  rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4905  break;
4906  case HtmlOptionId::ALIGN:
4907  eVertOri =
4908  rOption.GetEnum( aHTMLImgVAlignTable,
4909  eVertOri );
4910  eHoriOri =
4911  rOption.GetEnum( aHTMLImgHAlignTable,
4912  eHoriOri );
4913  break;
4914  case HtmlOptionId::WIDTH:
4915  // First only save as pixel value!
4916  bPercentWidth = (rOption.GetString().indexOf('%') != -1);
4917  aSize.setWidth( static_cast<tools::Long>(rOption.GetNumber()) );
4918  break;
4919  case HtmlOptionId::HEIGHT:
4920  // First only save as pixel value!
4921  bPercentHeight = (rOption.GetString().indexOf('%') != -1);
4922  aSize.setHeight( static_cast<tools::Long>(rOption.GetNumber()) );
4923  break;
4924  case HtmlOptionId::SIZE:
4925  // First only save as pixel value!
4926  nSize = rOption.GetNumber();
4927  break;
4928  default: break;
4929  }
4930  }
4931 
4932  switch( nType )
4933  {
4934  case HTML_SPTYPE_BLOCK:
4935  {
4936  // create an empty text frame
4937 
4938  // fetch the ItemSet
4939  SfxItemSetFixed<RES_FRMATR_BEGIN, RES_FRMATR_END-1> aFrameSet( m_xDoc->GetAttrPool() );
4940  if( !IsNewDoc() )
4941  Reader::ResetFrameFormatAttrs( aFrameSet );
4942 
4943  // set the anchor and the adjustment
4944  SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4945 
4946  // and the size of the frame
4947  Size aDfltSz( MINFLY, MINFLY );
4948  Size aSpace( 0, 0 );
4949  SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4950  m_pCSS1Parser->GetWhichMap() );
4951  SvxCSS1PropertyInfo aDummyPropInfo;
4952 
4953  SetFixSize( aSize, aDfltSz, bPercentWidth, bPercentHeight,
4954  aDummyPropInfo, aFrameSet );
4955  SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4956 
4957  // protect the content
4958  SvxProtectItem aProtectItem( RES_PROTECT) ;
4959  aProtectItem.SetContentProtect( true );
4960  aFrameSet.Put( aProtectItem );
4961 
4962  // create the frame
4963  RndStdIds eAnchorId =
4964  aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4965  SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4966  m_pPam->GetPoint(), &aFrameSet );
4967  // Possibly create frames and register auto-bound frames.
4968  RegisterFlyFrame( pFlyFormat );
4969  }
4970  break;
4971  case HTML_SPTYPE_VERT:
4972  if( nSize > 0 )
4973  {
4975  {
4977  ->PixelToLogic( Size(0,nSize),
4978  MapMode(MapUnit::MapTwip) ).Height();
4979  }
4980 
4981  // set a paragraph margin
4982  SwTextNode *pTextNode = nullptr;
4983  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4984  {
4985  // if possible change the bottom paragraph margin
4986  // of previous node
4987 
4988  SetAttr(); // set still open paragraph attributes
4989 
4990  pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1]
4991  ->GetTextNode();
4992 
4993  // If the previous paragraph isn't a text node, then now an
4994  // empty paragraph is created, which already generates a single
4995  // line of spacing.
4996  if( !pTextNode )
4997  nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
4998  }
4999 
5000  if( pTextNode )
5001  {
5002  SvxULSpaceItem aULSpace( pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ) );
5003  aULSpace.SetLower( aULSpace.GetLower() + o3tl::narrowing<sal_uInt16>(nSize) );
5004  pTextNode->SetAttr( aULSpace );
5005  }
5006  else
5007  {
5008  NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, o3tl::narrowing<sal_uInt16>(nSize), RES_UL_SPACE));
5009  EndAttr( m_xAttrTab->pULSpace, false );
5010 
5011  AppendTextNode(); // Don't change spacing!
5012  }
5013  }
5014  break;
5015  case HTML_SPTYPE_HORI:
5016  if( nSize > 0 )
5017  {
5018  // If the paragraph is still empty, set first line
5019  // indentation, otherwise apply letter spacing over a space.
5020 
5022  {
5024  ->PixelToLogic( Size(nSize,0),
5025  MapMode(MapUnit::MapTwip) ).Width();
5026  }
5027 
5028  if( !m_pPam->GetPoint()->nContent.GetIndex() )
5029  {
5030  sal_uInt16 nLeft=0, nRight=0;
5031  short nIndent = 0;
5032 
5033  GetMarginsFromContextWithNumberBullet( nLeft, nRight, nIndent );
5034  nIndent = nIndent + static_cast<short>(nSize);
5035 
5036  SvxLRSpaceItem aLRItem( RES_LR_SPACE );
5037  aLRItem.SetTextLeft( nLeft );
5038  aLRItem.SetRight( nRight );
5039  aLRItem.SetTextFirstLineOffset( nIndent );
5040 
5041  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
5042  EndAttr( m_xAttrTab->pLRSpace, false );
5043  }
5044  else
5045  {
5046  NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
5047  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, " " );
5048  EndAttr( m_xAttrTab->pKerning );
5049  }
5050  }
5051  }
5052 }
5053 
5054 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
5055 {
5056  if( nPixel && Application::GetDefaultDevice() )
5057  {
5059  Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width();
5060  return o3tl::narrowing<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16)));
5061  }
5062  else
5063  return nPixel;
5064 }
5065 
5067 {
5069  if( nWidth )
5070  return nWidth;
5071 
5072  if( !m_aHTMLPageSize.Width() )
5073  {
5074  const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5075 
5076  const SwFormatFrameSize& rSz = rPgFormat.GetFrameSize();
5077  const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5078  const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5079  const SwFormatCol& rCol = rPgFormat.GetCol();
5080 
5081  m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5082  m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5083 
5084  if( 1 < rCol.GetNumCols() )
5086  }
5087 
5088  return m_aHTMLPageSize.Width();
5089 }
5090 
5092 {
5093  OUString aId;
5094  const HTMLOptions& rHTMLOptions = GetOptions();
5095  for (size_t i = rHTMLOptions.size(); i; )
5096  {
5097  const HTMLOption& rOption = rHTMLOptions[--i];
5098  if( HtmlOptionId::ID==rOption.GetToken() )
5099  {
5100  aId = rOption.GetString();
5101  break;
5102  }
5103  }
5104 
5105  if( !aId.isEmpty() )
5106  InsertBookmark( aId );
5107 }
5108 
5110 {
5111  OUString aId, aStyle, aClass; // the id of bookmark
5113 
5114  // then we fetch the options
5115  const HTMLOptions& rHTMLOptions = GetOptions();
5116  for (size_t i = rHTMLOptions.size(); i; )
5117  {
5118  const HTMLOption& rOption = rHTMLOptions[--i];
5119  switch( rOption.GetToken() )
5120  {
5121  case HtmlOptionId::CLEAR:
5122  {
5123  const OUString &rClear = rOption.GetString();
5124  if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5125  {
5126  eClear = SwLineBreakClear::ALL;
5127  }
5128  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5129  {
5130  eClear = SwLineBreakClear::LEFT;
5131  }
5132  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5133  {
5134  eClear = SwLineBreakClear::LEFT;
5135  }
5136  }
5137  break;
5138  case HtmlOptionId::ID:
5139  aId = rOption.GetString();
5140  break;
5141  case HtmlOptionId::STYLE:
5142  aStyle = rOption.GetString();
5143  break;
5144  case HtmlOptionId::CLASS:
5145  aClass = rOption.GetString();
5146  break;
5147  default: break;
5148  }
5149  }
5150 
5151  // parse styles
5152  std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5153  bool bBreakItem = false;
5154  if( HasStyleOptions( aStyle, aId, aClass ) )
5155  {
5156  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5157  SvxCSS1PropertyInfo aPropInfo;
5158 
5159  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5160  {
5161  if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5162  {
5163  aBreakItem.reset(aItemSet.Get(RES_BREAK).Clone());
5164  bBreakItem = true;
5165  }
5166  if( !aPropInfo.m_aId.isEmpty() )
5167  InsertBookmark( aPropInfo.m_aId );
5168  }
5169  }
5170 
5171  if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5172  {
5173  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5174  EndAttr( m_xAttrTab->pBreak, false );
5175  }
5176 
5177  if (!bBreakItem)
5178  {
5179  if (eClear == SwLineBreakClear::NONE)
5180  {
5181  // If no CLEAR could or should be executed, a line break will be inserted
5182  m_xDoc->getIDocumentContentOperations().InsertString(*m_pPam, "\x0A");
5183  }
5184  else
5185  {
5186  // <BR CLEAR=xxx> is mapped an SwFormatLineBreak.
5187  SwTextNode* pTextNode = m_pPam->GetNode().GetTextNode();
5188  if (pTextNode)
5189  {
5190  SwFormatLineBreak aLineBreak(eClear);
5191  sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
5192  pTextNode->InsertItem(aLineBreak, nPos, nPos);
5193  }
5194  }
5195  }
5196  else if( m_pPam->GetPoint()->nContent.GetIndex() )
5197  {
5198  // If a CLEAR is executed in a non-empty paragraph, then after it
5199  // a new paragraph has to be opened.
5200  // MIB 21.02.97: Here actually we should change the bottom paragraph
5201  // margin to zero. This will fail for something like this <BR ..><P>
5202  // (>Netscape). That's why we don't do it.
5204  }
5205  if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5206  {
5207  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5208  EndAttr( m_xAttrTab->pBreak, false );
5209  }
5210 }
5211 
5213 {
5214  sal_uInt16 nSize = 0;
5215  sal_uInt16 nWidth = 0;
5216 
5217  SvxAdjust eAdjust = SvxAdjust::End;
5218 
5219  bool bPercentWidth = false;
5220  bool bNoShade = false;
5221  bool bColor = false;
5222 
5223  Color aColor;
5224  OUString aId;
5225 
5226  // let's fetch the options
5227  const HTMLOptions& rHTMLOptions = GetOptions();
5228  for (size_t i = rHTMLOptions.size(); i; )
5229  {
5230  const HTMLOption& rOption = rHTMLOptions[--i];
5231  switch( rOption.GetToken() )
5232  {
5233  case HtmlOptionId::ID:
5234  aId = rOption.GetString();
5235  break;
5236  case HtmlOptionId::SIZE:
5237  nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
5238  break;
5239  case HtmlOptionId::WIDTH:
5240  bPercentWidth = (rOption.GetString().indexOf('%') != -1);
5241  nWidth = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
5242  if( bPercentWidth && nWidth>=100 )
5243  {
5244  // the default case are 100% lines (no attributes necessary)
5245  nWidth = 0;
5246  bPercentWidth = false;
5247  }
5248  break;
5249  case HtmlOptionId::ALIGN:
5250  eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5251  break;
5252  case HtmlOptionId::NOSHADE:
5253  bNoShade = true;
5254  break;
5255  case HtmlOptionId::COLOR:
5256  rOption.GetColor( aColor );
5257  bColor = true;
5258  break;
5259  default: break;
5260  }
5261  }
5262 
5263  if( m_pPam->GetPoint()->nContent.GetIndex() )
5265  if( m_nOpenParaToken != HtmlTokenId::NONE )
5266  EndPara();
5267  AppendTextNode();
5269 
5270  // ...and save in a context
5271  std::unique_ptr<HTMLAttrContext> xCntxt(
5272  new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5273 
5274  PushContext(xCntxt);
5275 
5276  // set the new style
5277  SetTextCollAttrs(m_aContexts.back().get());
5278 
5279  // the hard attributes of the current paragraph will never become invalid
5280  m_aParaAttrs.clear();
5281 
5282  if( nSize>0 || bColor || bNoShade )
5283  {
5284  // set line colour and/or width
5285  if( !bColor )
5286  aColor = COL_GRAY;
5287 
5288  SvxBorderLine aBorderLine( &aColor );
5289  if( nSize )
5290  {
5291  tools::Long nPWidth = 0;
5292  tools::Long nPHeight = static_cast<tools::Long>(nSize);
5293  SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5294  if ( !bNoShade )
5295  {
5296  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5297  }
5298  aBorderLine.SetWidth( nPHeight );
5299  }
5300  else if( bNoShade )
5301  {
5302  aBorderLine.SetWidth( SvxBorderLineWidth::Medium );
5303  }
5304  else
5305  {
5306  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5308  }
5309 
5310  SvxBoxItem aBoxItem(RES_BOX);
5311  aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5312  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5313  m_aSetAttrTab.push_back( pTmp );
5314  }
5315  if( nWidth )
5316  {
5317  // If we aren't in a table, then the width value will be "faked" with
5318  // paragraph indents. That makes little sense in a table. In order to
5319  // avoid that the line is considered during the width calculation, it
5320  // still gets an appropriate LRSpace-Item.
5321  if (!m_xTable)
5322  {
5323  // fake length and alignment of line above paragraph indents
5324  tools::Long nBrowseWidth = GetCurrentBrowseWidth();
5325  nWidth = bPercentWidth ? o3tl::narrowing<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5326  : ToTwips( o3tl::narrowing<sal_uInt16>(nBrowseWidth) );
5327  if( nWidth < MINLAY )
5328  nWidth = MINLAY;
5329 
5330  const SwFormatColl *pColl = (static_cast<tools::Long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5331  if (pColl)
5332  {
5333  SvxLRSpaceItem aLRItem( pColl->GetLRSpace() );
5334  tools::Long nDist = nBrowseWidth - nWidth;
5335 
5336  switch( eAdjust )
5337  {
5338  case SvxAdjust::Right:
5339  aLRItem.SetTextLeft( o3tl::narrowing<sal_uInt16>(nDist) );
5340  break;
5341  case SvxAdjust::Left:
5342  aLRItem.SetRight( o3tl::narrowing<sal_uInt16>(nDist) );
5343  break;
5344  case SvxAdjust::Center:
5345  default:
5346  nDist /= 2;
5347  aLRItem.SetTextLeft( o3tl::narrowing<sal_uInt16>(nDist) );
5348  aLRItem.SetRight( o3tl::narrowing<sal_uInt16>(nDist) );
5349  break;
5350  }
5351 
5352  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aLRItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5353  m_aSetAttrTab.push_back( pTmp );
5354  }
5355  }
5356  }
5357 
5358  // it's not possible to insert bookmarks in links
5359  if( !aId.isEmpty() )
5360  InsertBookmark( aId );
5361 
5362  // pop current context of stack
5363  std::unique_ptr<HTMLAttrContext> xPoppedContext(PopContext(HtmlTokenId::HORZRULE));
5364  xPoppedContext.reset();
5365 
5367 
5368  // and set the current style in the next paragraph
5369  SetTextCollAttrs();
5370 }
5371 
5373 {
5374  OUString aName, aContent;
5375  bool bHTTPEquiv = false;
5376 
5377  const HTMLOptions& rHTMLOptions = GetOptions();
5378  for (size_t i = rHTMLOptions.size(); i; )
5379  {
5380  const