LibreOffice Module sw (master)  1
swhtml.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
25 
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
38 
39 #include <vcl/errinf.hxx>
40 #include <svl/stritem.hxx>
41 #include <vcl/imap.hxx>
42 #include <svtools/htmltokn.h>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/ctrltool.hxx>
45 #include <unotools/configmgr.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <sfx2/event.hxx>
49 #include <sfx2/docfile.hxx>
50 
51 #include <svtools/htmlcfg.hxx>
52 #include <sfx2/linkmgr.hxx>
53 #include <editeng/kernitem.hxx>
54 #include <editeng/boxitem.hxx>
55 #include <editeng/fhgtitem.hxx>
57 #include <editeng/postitem.hxx>
58 #include <editeng/wghtitem.hxx>
60 #include <editeng/udlnitem.hxx>
62 #include <editeng/blinkitem.hxx>
63 #include <editeng/ulspitem.hxx>
64 #include <editeng/colritem.hxx>
65 #include <editeng/fontitem.hxx>
66 #include <editeng/adjustitem.hxx>
67 #include <editeng/lrspitem.hxx>
68 #include <editeng/protitem.hxx>
69 #include <editeng/flstitem.hxx>
71 
72 #include <frmatr.hxx>
73 #include <charatr.hxx>
74 #include <fmtfld.hxx>
75 #include <fmtpdsc.hxx>
76 #include <fmtanchr.hxx>
77 #include <fmtsrnd.hxx>
78 #include <fmtfsize.hxx>
79 #include <fmtclds.hxx>
80 #include <fchrfmt.hxx>
81 #include <fmtinfmt.hxx>
82 #include <fmtfollowtextflow.hxx>
83 #include <fmtornt.hxx>
84 #include <doc.hxx>
85 #include <IDocumentUndoRedo.hxx>
92 #include <IDocumentStatistics.hxx>
93 #include <IDocumentState.hxx>
94 #include <pam.hxx>
95 #include <ndtxt.hxx>
96 #include <mdiexp.hxx>
97 #include <poolfmt.hxx>
98 #include <pagedesc.hxx>
99 #include <IMark.hxx>
100 #include <docsh.hxx>
101 #include <editsh.hxx>
102 #include <docufld.hxx>
103 #include "swcss1.hxx"
104 #include <fltini.hxx>
105 #include <htmltbl.hxx>
106 #include "htmlnum.hxx"
107 #include "swhtml.hxx"
108 #include "wrthtml.hxx"
109 #include <linkenum.hxx>
110 #include <breakit.hxx>
111 #include <SwAppletImpl.hxx>
112 #include <swdll.hxx>
113 #include <txatbase.hxx>
114 
115 #include <sfx2/viewfrm.hxx>
116 #include <svx/svdobj.hxx>
117 #include <officecfg/Office/Writer.hxx>
119 #include <comphelper/sequence.hxx>
120 
121 #include <swerror.h>
122 #include <ndole.hxx>
123 #include <unoframe.hxx>
124 #include "css1atr.hxx"
125 #include <frameformats.hxx>
126 
127 #define FONTSIZE_MASK 7
128 
129 #define HTML_ESC_PROP 80
130 #define HTML_ESC_SUPER DFLT_ESC_SUPER
131 #define HTML_ESC_SUB DFLT_ESC_SUB
132 
133 #define HTML_SPTYPE_BLOCK 1
134 #define HTML_SPTYPE_HORI 2
135 #define HTML_SPTYPE_VERT 3
136 
138 using namespace ::com::sun::star;
139 
140 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
142 {
143  { OOO_STRING_SVTOOLS_HTML_AL_left, SvxAdjust::Left },
144  { OOO_STRING_SVTOOLS_HTML_AL_center, SvxAdjust::Center },
145  { OOO_STRING_SVTOOLS_HTML_AL_middle, SvxAdjust::Center }, // Netscape
146  { OOO_STRING_SVTOOLS_HTML_AL_right, SvxAdjust::Right },
147  { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block },
148  { OOO_STRING_SVTOOLS_HTML_AL_char, SvxAdjust::Left },
149  { nullptr, SvxAdjust(0) }
150 };
151 
152 // <SPACER TYPE=...>
154 {
158  { nullptr, 0 }
159 };
160 
162 {
163  m_bTemplateBrowseMode = true;
164 }
165 
166 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
167 {
169  // HTML import into Writer, avoid loading the Writer/Web template.
170  return OUString();
171 
172  static const OUStringLiteral sTemplateWithoutExt(u"internal/html");
173  SvtPathOptions aPathOpt;
174 
175  // first search for OpenDocument Writer/Web template
176  // OpenDocument Writer/Web template (extension .oth)
177  OUString sTemplate( sTemplateWithoutExt + ".oth" );
178  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
179  return sTemplate;
180 
181  // no OpenDocument Writer/Web template found.
182  // search for OpenOffice.org Writer/Web template
183  sTemplate = sTemplateWithoutExt + ".stw";
184  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
185  return sTemplate;
186 
187  OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
188 
189  return OUString();
190 }
191 
193 {
194  OSL_ENSURE( m_pMedium, "Where is the medium??" );
195 
196  if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
197  {
199  return true;
200  }
201  return false;
202 
203 }
204 
205 // Call for the general Reader-Interface
206 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
207 {
209 
210  if( !m_pStream )
211  {
212  OSL_ENSURE( m_pStream, "HTML-Read without stream" );
213  return ERR_SWG_READ_ERROR;
214  }
215 
216  if( !m_bInsertMode )
217  {
219 
220  // Set the HTML page style, when it isn't a HTML document,
221  // otherwise it's already set.
223  {
226  }
227  }
228 
229  // so nobody steals the document!
230  rtl::Reference<SwDoc> xHoldAlive(&rDoc);
231  ErrCode nRet = ERRCODE_NONE;
232  tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
233  rName, rBaseURL, !m_bInsertMode, m_pMedium,
234  IsReadUTF8(),
236 
237  SvParserState eState = xParser->CallParser();
238 
239  if( SvParserState::Pending == eState )
241  else if( SvParserState::Accepted != eState )
242  {
243  const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
244  + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
245 
246  // use the stream as transport for error number
247  nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
248  DialogMask::ButtonsOk | DialogMask::MessageError );
249  }
250 
251  return nRet;
252 }
253 
255  const OUString& rPath,
256  const OUString& rBaseURL,
257  bool bReadNewDoc,
258  SfxMedium* pMed, bool bReadUTF8,
259  bool bNoHTMLComments,
260  const OUString& rNamespace )
261  : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
262  m_aPathToFile( rPath ),
263  m_sBaseURL( rBaseURL ),
264  m_xAttrTab(std::make_shared<HTMLAttrTable>()),
265  m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
266  m_xDoc( pD ),
267  m_pActionViewShell( nullptr ),
268  m_pSttNdIdx( nullptr ),
269  m_pFormImpl( nullptr ),
270  m_pMarquee( nullptr ),
271  m_pImageMap( nullptr ),
272  m_nBaseFontStMin( 0 ),
273  m_nFontStMin( 0 ),
274  m_nDefListDeep( 0 ),
275  m_nFontStHeadStart( 0 ),
276  m_nSBModuleCnt( 0 ),
277  m_nMissingImgMaps( 0 ),
278  m_nParaCnt( 5 ),
279  // #i83625#
280  m_nContextStMin( 0 ),
281  m_nContextStAttrMin( 0 ),
282  m_nSelectEntryCnt( 0 ),
283  m_nOpenParaToken( HtmlTokenId::NONE ),
284  m_eJumpTo( JumpToMarks::NONE ),
285 #ifdef DBG_UTIL
286  m_nContinue( 0 ),
287 #endif
288  m_eParaAdjust( SvxAdjust::End ),
289  m_bDocInitialized( false ),
290  m_bSetModEnabled( false ),
291  m_bInFloatingFrame( false ),
292  m_bInField( false ),
293  m_bCallNextToken( false ),
294  m_bIgnoreRawData( false ),
295  m_bLBEntrySelected ( false ),
296  m_bTAIgnoreNewPara ( false ),
297  m_bFixMarqueeWidth ( false ),
298  m_bNoParSpace( false ),
299  m_bInNoEmbed( false ),
300  m_bInTitle( false ),
301  m_bUpdateDocStat( false ),
302  m_bFixSelectWidth( false ),
303  m_bTextArea( false ),
304  m_bSelect( false ),
305  m_bInFootEndNoteAnchor( false ),
306  m_bInFootEndNoteSymbol( false ),
307  m_bIgnoreHTMLComments( bNoHTMLComments ),
308  m_bRemoveHidden( false ),
309  m_bBodySeen( false ),
310  m_bReadingHeaderOrFooter( false ),
311  m_bNotifyMacroEventRead( false ),
312  m_isInTableStructure(false),
313  m_nTableDepth( 0 ),
314  m_pTempViewFrame(nullptr)
315 {
316  // If requested explicitly, then force ignoring of comments (don't create postits for them).
318  m_bIgnoreHTMLComments = true;
319 
320  m_nEventId = nullptr;
322 
323  m_eScriptLang = HTMLScriptLanguage::Unknown;
324 
325  rCursor.DeleteMark();
326  m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert
327  memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
328 
329  // Read the font sizes 1-7 from the INI file
330  SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get();
331  m_aFontHeights[0] = rHtmlOptions.GetFontSize( 0 ) * 20;
332  m_aFontHeights[1] = rHtmlOptions.GetFontSize( 1 ) * 20;
333  m_aFontHeights[2] = rHtmlOptions.GetFontSize( 2 ) * 20;
334  m_aFontHeights[3] = rHtmlOptions.GetFontSize( 3 ) * 20;
335  m_aFontHeights[4] = rHtmlOptions.GetFontSize( 4 ) * 20;
336  m_aFontHeights[5] = rHtmlOptions.GetFontSize( 5 ) * 20;
337  m_aFontHeights[6] = rHtmlOptions.GetFontSize( 6 ) * 20;
338 
339  m_bKeepUnknown = rHtmlOptions.IsImportUnknown();
340 
341  if(bReadNewDoc)
342  {
343  //CJK has different defaults, so a different object should be used for this
344  //RES_CHARTR_CJK_FONTSIZE is a valid value
346  m_xDoc->SetDefault( aFontHeight );
348  m_xDoc->SetDefault( aFontHeightCJK );
350  m_xDoc->SetDefault( aFontHeightCTL );
351 
352  // #i18732# - adjust default of option 'FollowTextFlow'
353  // TODO: not sure what the appropriate default for HTML should be?
354  m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
355  }
356 
357  // Change to HTML mode during the import, so that the right styles are created
358  m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
359  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
360 
361  m_pCSS1Parser.reset(new SwCSS1Parser(m_xDoc.get(), *this, m_aFontHeights, m_sBaseURL, IsNewDoc()));
362  m_pCSS1Parser->SetIgnoreFontFamily( rHtmlOptions.IsIgnoreFontFamily() );
363 
364  if( bReadUTF8 )
365  {
366  SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
367  }
368  else
369  {
370  SwDocShell *pDocSh = m_xDoc->GetDocShell();
371  SvKeyValueIterator *pHeaderAttrs =
372  pDocSh->GetHeaderAttributes();
373  if( pHeaderAttrs )
374  SetEncodingByHTTPHeader( pHeaderAttrs );
375  }
376  m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
377 
378  SwDocShell* pDocSh = m_xDoc->GetDocShell();
379  if( pDocSh )
380  {
381  m_bViewCreated = true; // not, load synchronous
382 
383  // a jump mark is present
384 
385  if( pMed )
386  {
387  m_sJmpMark = pMed->GetURLObject().GetMark();
388  if( !m_sJmpMark.isEmpty() )
389  {
391  sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
392  sal_Int32 nPos = nLastPos != -1 ? nLastPos : 0;
393 
394  OUString sCmp;
395  if (nPos)
396  {
397  sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
398  }
399 
400  if( !sCmp.isEmpty() )
401  {
402  sCmp = sCmp.toAsciiLowerCase();
403  if( sCmp == "region" )
405  else if( sCmp == "table" )
407  else if( sCmp == "graphic" )
409  else if( sCmp == "outline" ||
410  sCmp == "text" ||
411  sCmp == "frame" )
412  m_eJumpTo = JumpToMarks::NONE; // this is nothing valid!
413  else
414  // otherwise this is a normal (book)mark
415  nPos = -1;
416  }
417  else
418  nPos = -1;
419 
420  if( nPos != -1 )
421  m_sJmpMark = m_sJmpMark.copy( 0, nPos );
422  if( m_sJmpMark.isEmpty() )
424  }
425  }
426  }
427 
428  if (!rNamespace.isEmpty())
429  {
430  SetNamespace(rNamespace);
431  m_bXHTML = true;
432  if (rNamespace == "reqif-xhtml")
433  m_bReqIF = true;
434  }
435 
436  // Extract load parameters which are specific to this filter.
437  if (!pMed)
438  {
439  return;
440  }
441 
442  comphelper::SequenceAsHashMap aLoadMap(pMed->GetArgs());
443  auto it = aLoadMap.find("AllowedRTFOLEMimeTypes");
444  if (it == aLoadMap.end())
445  {
446  return;
447  }
448 
449  uno::Sequence<OUString> aTypes;
450  it->second >>= aTypes;
451  m_aAllowedRTFOLEMimeTypes = comphelper::sequenceToContainer<std::set<OUString>>(aTypes);
452 }
453 
455 {
456 #ifdef DBG_UTIL
457  OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
458 #endif
459 
460  OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
461  OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
462  m_nContextStMin = 0;
463  while (!m_aContexts.empty())
464  {
465  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
466  ClearContext(xCntxt.get());
467  }
468 
469  bool bAsync = m_xDoc->IsInLoadAsynchron();
470  m_xDoc->SetInLoadAsynchron( false );
471  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
472 
473  if( m_xDoc->GetDocShell() && m_nEventId )
475 
476  // the DocumentDetected maybe can delete the DocShells, therefore fetch again
477  if( m_xDoc->GetDocShell() )
478  {
479  // update linked sections
480  sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
481  if( nLinkMode != NEVER && bAsync &&
482  SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
483  m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
484 
485  if ( m_xDoc->GetDocShell()->IsLoading() )
486  {
487  // #i59688#
488  m_xDoc->GetDocShell()->LoadingFinished();
489  }
490  }
491 
492  delete m_pSttNdIdx;
493 
494  if( !m_aSetAttrTab.empty() )
495  {
496  OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
497  for ( const auto& rpAttr : m_aSetAttrTab )
498  delete rpAttr;
499  m_aSetAttrTab.clear();
500  }
501 
502  m_pCSS1Parser.reset();
503  m_pNumRuleInfo.reset();
504  DeleteFormImpl();
505  m_pFootEndNoteImpl.reset();
506 
507  OSL_ENSURE(!m_xTable, "It exists still an open table");
508  m_pImageMaps.reset();
509 
510  OSL_ENSURE( m_vPendingStack.empty(),
511  "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
512  m_vPendingStack.clear();
513 
514  m_xDoc.clear();
515 
516  if ( m_pTempViewFrame )
517  {
519 
520  // the temporary view frame is hidden, so the hidden flag might need to be removed
521  if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
522  m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
523  }
524 }
525 
526 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
527 {
528  m_nEventId=nullptr;
529 
530  // #i47907# - If the document has already been destructed,
531  // the parser should be aware of this:
532  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
533  || 1 == m_xDoc->getReferenceCount() )
534  {
535  // was the import aborted by SFX?
536  eState = SvParserState::Error;
537  }
538 
539  GetAsynchCallLink().Call(nullptr);
540 }
541 
543 {
544  // create temporary index on position 0, so it won't be moved!
545  m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
546  if( !IsNewDoc() ) // insert into existing document ?
547  {
548  const SwPosition* pPos = m_pPam->GetPoint();
549 
550  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
551 
552  *m_pSttNdIdx = pPos->nNode.GetIndex()-1;
553  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
554 
555  SwPaM aInsertionRangePam( *pPos );
556 
558 
559  // split any redline over the insertion point
560  aInsertionRangePam.SetMark();
561  *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
562  aInsertionRangePam.Move( fnMoveBackward );
563  m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
564 
565  m_xDoc->SetTextFormatColl( *m_pPam,
566  m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
567  }
568 
569  if( GetMedium() )
570  {
571  if( !m_bViewCreated )
572  {
573  m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
574  }
575  else
576  {
577  m_bViewCreated = true;
578  m_nEventId = nullptr;
579  }
580  }
581  else // show progress bar
582  {
583  rInput.Seek(STREAM_SEEK_TO_END);
584  rInput.ResetError();
585 
586  m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
587 
588  rInput.Seek(STREAM_SEEK_TO_BEGIN);
589  rInput.ResetError();
590  }
591 
592  StartListening(m_xDoc->GetPageDesc( 0 ).GetNotifier());
593 
595  return eRet;
596 }
597 
599 {
600  const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
601  return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
602 }
603 
605 {
606 #ifdef DBG_UTIL
607  OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
608  m_nContinue++;
609 #endif
610 
611  // When the import (of SFX) is aborted, an error will be set but
612  // we still continue, so that we clean up properly.
613  OSL_ENSURE( SvParserState::Error!=eState,
614  "SwHTMLParser::Continue: already set an error" );
615  if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
616  eState = SvParserState::Error;
617 
618  // Fetch SwViewShell from document, save it and set as current.
619  SwViewShell *pInitVSh = CallStartAction();
620 
621  if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
622  {
623  // At first call first return, show document and wait for callback
624  // time.
625  // At this point in CallParser only one digit was read and
626  // a SaveState(0) was called.
627  eState = SvParserState::Pending;
628  m_bViewCreated = true;
629  m_xDoc->SetInLoadAsynchron( true );
630 
631 #ifdef DBG_UTIL
632  m_nContinue--;
633 #endif
634 
635  return;
636  }
637 
638  m_bSetModEnabled = false;
639  if( m_xDoc->GetDocShell() )
640  {
641  m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
642  if( m_bSetModEnabled )
643  {
644  m_xDoc->GetDocShell()->EnableSetModified( false );
645  }
646  }
647 
648  // during import don't call OLE-Modified
649  Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
650  m_xDoc->SetOle2Link( Link<bool,void>() );
651 
652  bool bModified = m_xDoc->getIDocumentState().IsModified();
653  bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
654  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
655 
656  // When the import will be aborted, don't call Continue anymore.
657  // If a Pending-Stack exists make sure the stack is ended with a call
658  // of NextToken.
659  if( SvParserState::Error == eState )
660  {
661  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
662  "SwHTMLParser::Continue: Pending-Stack without Token" );
663  if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
664  NextToken( m_vPendingStack.back().nToken );
665  OSL_ENSURE( m_vPendingStack.empty(),
666  "SwHTMLParser::Continue: There is again a Pending-Stack" );
667  }
668  else
669  {
670  HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
671  }
672 
673  // disable progress bar again
674  m_xProgress.reset();
675 
676  bool bLFStripped = false;
677  if( SvParserState::Pending != GetStatus() )
678  {
679  // set the last attributes yet
680  {
681  if( !m_aScriptSource.isEmpty() )
682  {
683  SwScriptFieldType *pType =
684  static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
685 
687  false );
688  InsertAttr( SwFormatField( aField ), false );
689  }
690 
691  if( m_pAppletImpl )
692  {
693  if( m_pAppletImpl->GetApplet().is() )
694  EndApplet();
695  else
696  EndObject();
697  }
698 
699  // maybe remove an existing LF after the last paragraph
700  if( IsNewDoc() )
701  bLFStripped = StripTrailingLF() > 0;
702 
703  // close still open numbering
704  while( GetNumInfo().GetNumRule() )
706 
707  OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
708  // try this twice, first normally to let m_nContextStMin decrease
709  // naturally and get contexts popped in desired order, and if that
710  // fails force it
711  for (int i = 0; i < 2; ++i)
712  {
713  while (m_aContexts.size() > m_nContextStMin)
714  {
715  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
716  if (xCntxt)
717  EndContext(xCntxt.get());
718  }
719  if (!m_nContextStMin)
720  break;
721  OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
722  m_nContextStMin = 0;
723  }
724 
725  m_aParaAttrs.clear();
726 
727  SetAttr( false );
728 
729  // set the first delayed styles
730  m_pCSS1Parser->SetDelayedStyles();
731  }
732 
733  // again correct the start
734  if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
735  {
736  SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
737  SwNodeIndex aNxtIdx( *m_pSttNdIdx );
738  if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
739  {
740  const sal_Int32 nStt = pTextNode->GetText().getLength();
741  // when the cursor is still in the node, then set him at the end
742  if( m_pPam->GetPoint()->nNode == aNxtIdx )
743  {
745  m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt );
746  }
747 
748 #if OSL_DEBUG_LEVEL > 0
749 // !!! shouldn't be possible, or ??
750  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(),
751  "Pam.Bound1 is still in the node" );
752  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(),
753  "Pam.Bound2 is still in the node" );
754 
755  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() )
756  {
757  const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex();
758  m_pPam->GetBound().nContent.Assign( pTextNode,
759  pTextNode->GetText().getLength() + nCntPos );
760  }
761  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() )
762  {
763  const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex();
764  m_pPam->GetBound( false ).nContent.Assign( pTextNode,
765  pTextNode->GetText().getLength() + nCntPos );
766  }
767 #endif
768  // Keep character attribute!
769  SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
770  if (pTextNode->GetText().getLength())
771  pDelNd->FormatToTextAttr( pTextNode );
772  else
773  pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
774  pTextNode->JoinNext();
775  }
776  }
777  }
778 
779  if( SvParserState::Accepted == eState )
780  {
781  if( m_nMissingImgMaps )
782  {
783  // Some Image-Map relations are still missing.
784  // Maybe now the Image-Maps are there?
786  }
787 
788  // now remove the last useless paragraph
789  SwPosition* pPos = m_pPam->GetPoint();
790  if( !pPos->nContent.GetIndex() && !bLFStripped )
791  {
792  SwTextNode* pCurrentNd;
793  sal_uLong nNodeIdx = pPos->nNode.GetIndex();
794 
795  bool bHasFlysOrMarks =
797 
798  if( IsNewDoc() )
799  {
800  if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx))
801  {
803  if( pCNd && pCNd->StartOfSectionIndex()+2 <
804  pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
805  {
807  SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
808  if( pCursorSh &&
809  pCursorSh->GetCursor()->GetPoint()
810  ->nNode.GetIndex() == nNodeIdx )
811  {
812  pCursorSh->MovePara(GoPrevPara, fnParaEnd );
813  pCursorSh->SetMark();
814  pCursorSh->ClearMark();
815  }
816  m_pPam->GetBound().nContent.Assign( nullptr, 0 );
817  m_pPam->GetBound(false).nContent.Assign( nullptr, 0 );
818  m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode );
819  }
820  }
821  }
822  else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
823  {
824  if( pCurrentNd->CanJoinNext( &pPos->nNode ))
825  {
826  SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode();
827  pPos->nContent.Assign( pNextNd, 0 );
829  pNextNd->JoinPrev();
830  }
831  else if (pCurrentNd->GetText().isEmpty())
832  {
833  pPos->nContent.Assign( nullptr, 0 );
835  m_xDoc->GetNodes().Delete( pPos->nNode );
837  }
838  }
839  }
840 
841  // annul the SplitNode from the beginning
842  else if( !IsNewDoc() )
843  {
844  if( pPos->nContent.GetIndex() ) // then there was no <p> at the end
845  m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next
846  SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode();
847  SwNodeIndex aPrvIdx( pPos->nNode );
848  if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
849  *m_pSttNdIdx <= aPrvIdx )
850  {
851  // Normally here should take place a JoinNext, but all cursors and
852  // so are registered in pTextNode, so that it MUST remain.
853 
854  // Convert paragraph to character attribute, from Prev adopt
855  // the paragraph attribute and the template!
856  SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
857  pTextNode->ChgFormatColl( pPrev->GetTextColl() );
858  pTextNode->FormatToTextAttr( pPrev );
859  pTextNode->ResetAllAttr();
860 
861  if( pPrev->HasSwAttrSet() )
862  pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
863 
864  if( &m_pPam->GetBound().nNode.GetNode() == pPrev )
865  m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
866  if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev )
867  m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
868 
869  pTextNode->JoinPrev();
870  }
871  }
872 
873  // adjust AutoLoad in DocumentProperties
874  if (!utl::ConfigManager::IsFuzzing() && IsNewDoc())
875  {
876  SwDocShell *pDocShell(m_xDoc->GetDocShell());
877  OSL_ENSURE(pDocShell, "no SwDocShell");
878  if (pDocShell) {
879  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
880  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
881  uno::Reference<document::XDocumentProperties> xDocProps(
882  xDPS->getDocumentProperties());
883  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
884  if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
885  (xDocProps->getAutoloadURL().isEmpty()) )
886  {
887  xDocProps->setAutoloadURL(m_aPathToFile);
888  }
889  }
890  }
891 
892  if( m_bUpdateDocStat )
893  {
894  m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
895  }
896  }
897 
898  if( SvParserState::Pending != GetStatus() )
899  {
900  delete m_pSttNdIdx;
901  m_pSttNdIdx = nullptr;
902  }
903 
904  // should the parser be the last one who hold the document, then nothing
905  // has to be done anymore, document will be destroyed shortly!
906  if( 1 < m_xDoc->getReferenceCount() )
907  {
908  if( bWasUndo )
909  {
910  m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
911  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
912  }
913  else if( !pInitVSh )
914  {
915  // When at the beginning of Continue no Shell was available,
916  // it's possible in the meantime one was created.
917  // In that case the bWasUndo flag is wrong and we must
918  // enable Undo.
919  SwViewShell *pTmpVSh = CheckActionViewShell();
920  if( pTmpVSh )
921  {
922  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
923  }
924  }
925 
926  m_xDoc->SetOle2Link( aOLELink );
927  if( !bModified )
928  m_xDoc->getIDocumentState().ResetModified();
929  if( m_bSetModEnabled && m_xDoc->GetDocShell() )
930  {
931  m_xDoc->GetDocShell()->EnableSetModified();
932  m_bSetModEnabled = false; // this is unnecessary here
933  }
934  }
935 
936  // When the Document-SwVievShell still exists and an Action is open
937  // (doesn't have to be by abort), end the Action, disconnect from Shell
938  // and finally reconstruct the old Shell.
939  CallEndAction( true );
940 
941 #ifdef DBG_UTIL
942  m_nContinue--;
943 #endif
944 }
945 
946 void SwHTMLParser::Notify(const SfxHint& rHint)
947 {
948  if(rHint.GetId() == SfxHintId::Dying)
949  {
950  EndListeningAll();
951  ReleaseRef();
952  }
953 }
954 
956 {
957  OSL_ENSURE( !m_bDocInitialized, "DocumentDetected called multiple times" );
958  m_bDocInitialized = true;
959  if( IsNewDoc() )
960  {
961  if( IsInHeader() )
962  FinishHeader();
963 
964  CallEndAction( true );
965 
966  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
967  // For DocumentDetected in general a SwViewShell is created.
968  // But it also can be created later, in case the UI is captured.
969  CallStartAction();
970  }
971 }
972 
973 // is called for every token that is recognised in CallParser
975 {
976  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
977  || 1 == m_xDoc->getReferenceCount() )
978  {
979  // Was the import cancelled by SFX? If a pending stack
980  // exists, clean it.
981  eState = SvParserState::Error;
982  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
983  "SwHTMLParser::NextToken: Pending-Stack without token" );
984  if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
985  return ;
986  }
987 
988 #if OSL_DEBUG_LEVEL > 0
989  if( !m_vPendingStack.empty() )
990  {
991  switch( nToken )
992  {
993  // tables are read by recursive method calls
994  case HtmlTokenId::TABLE_ON:
995  // For CSS declarations we might have to wait
996  // for a file download to finish
997  case HtmlTokenId::LINK:
998  // For controls we might have to set the size.
999  case HtmlTokenId::INPUT:
1000  case HtmlTokenId::TEXTAREA_ON:
1001  case HtmlTokenId::SELECT_ON:
1002  case HtmlTokenId::SELECT_OFF:
1003  break;
1004  default:
1005  OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
1006  break;
1007  }
1008  }
1009 #endif
1010 
1011  // The following special cases have to be treated before the
1012  // filter detection, because Netscape doesn't reference the content
1013  // of the title for filter detection either.
1014  if( m_vPendingStack.empty() )
1015  {
1016  if( m_bInTitle )
1017  {
1018  switch( nToken )
1019  {
1020  case HtmlTokenId::TITLE_OFF:
1021  {
1022  OUString sTitle = m_sTitle.makeStringAndClear();
1023  if( IsNewDoc() && !sTitle.isEmpty() )
1024  {
1025  if( m_xDoc->GetDocShell() ) {
1026  uno::Reference<document::XDocumentPropertiesSupplier>
1027  xDPS(m_xDoc->GetDocShell()->GetModel(),
1028  uno::UNO_QUERY_THROW);
1029  uno::Reference<document::XDocumentProperties> xDocProps(
1030  xDPS->getDocumentProperties());
1031  OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1032  if (xDocProps.is()) {
1033  xDocProps->setTitle(sTitle);
1034  }
1035 
1036  m_xDoc->GetDocShell()->SetTitle(sTitle);
1037  }
1038  }
1039  m_bInTitle = false;
1040  break;
1041  }
1042 
1043  case HtmlTokenId::NONBREAKSPACE:
1044  m_sTitle.append(" ");
1045  break;
1046 
1047  case HtmlTokenId::SOFTHYPH:
1048  m_sTitle.append("-");
1049  break;
1050 
1051  case HtmlTokenId::TEXTTOKEN:
1052  m_sTitle.append(aToken);
1053  break;
1054 
1055  default:
1056  m_sTitle.append("<");
1057  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1058  m_sTitle.append("/");
1059  m_sTitle.append(sSaveToken);
1060  if( !aToken.isEmpty() )
1061  {
1062  m_sTitle.append(" ");
1063  m_sTitle.append(aToken);
1064  }
1065  m_sTitle.append(">");
1066  break;
1067  }
1068 
1069  return;
1070  }
1071  }
1072 
1073  // Find out what type of document it is if we don't know already.
1074  // For Controls this has to be finished before the control is inserted
1075  // because for inserting a View is needed.
1076  if( !m_bDocInitialized )
1077  DocumentDetected();
1078 
1079  bool bGetIDOption = false, bInsertUnknown = false;
1080  bool bUpperSpaceSave = m_bUpperSpace;
1081  m_bUpperSpace = false;
1082 
1083  // The following special cases may or have to be treated after the
1084  // filter detection
1085  if( m_vPendingStack.empty() )
1086  {
1087  if( m_bInFloatingFrame )
1088  {
1089  // <SCRIPT> is ignored here (from us), because it is ignored in
1090  // Applets as well
1091  if( HtmlTokenId::IFRAME_OFF == nToken )
1092  {
1093  m_bCallNextToken = false;
1094  m_bInFloatingFrame = false;
1095  }
1096 
1097  return;
1098  }
1099  else if( m_bInNoEmbed )
1100  {
1101  switch( nToken )
1102  {
1103  case HtmlTokenId::NOEMBED_OFF:
1106  m_aContents.clear();
1107  m_bCallNextToken = false;
1108  m_bInNoEmbed = false;
1109  break;
1110 
1111  case HtmlTokenId::RAWDATA:
1113  break;
1114 
1115  default:
1116  OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1117  break;
1118  }
1119 
1120  return;
1121  }
1122  else if( m_pAppletImpl )
1123  {
1124  // in an applet only <PARAM> tags and the </APPLET> tag
1125  // are of interest for us (for the moment)
1126  // <SCRIPT> is ignored here (from Netscape)!
1127 
1128  switch( nToken )
1129  {
1130  case HtmlTokenId::APPLET_OFF:
1131  m_bCallNextToken = false;
1132  EndApplet();
1133  break;
1134  case HtmlTokenId::OBJECT_OFF:
1135  m_bCallNextToken = false;
1136  EndObject();
1137  break;
1138  case HtmlTokenId::PARAM:
1139  InsertParam();
1140  break;
1141  default: break;
1142  }
1143 
1144  return;
1145  }
1146  else if( m_bTextArea )
1147  {
1148  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1149  // <SCRIPT> is ignored here (from Netscape)!
1150 
1151  switch( nToken )
1152  {
1153  case HtmlTokenId::TEXTAREA_OFF:
1154  m_bCallNextToken = false;
1155  EndTextArea();
1156  break;
1157 
1158  default:
1159  InsertTextAreaText( nToken );
1160  break;
1161  }
1162 
1163  return;
1164  }
1165  else if( m_bSelect )
1166  {
1167  // HAS to be treated after bNoScript!
1168  switch( nToken )
1169  {
1170  case HtmlTokenId::SELECT_OFF:
1171  m_bCallNextToken = false;
1172  EndSelect();
1173  return;
1174 
1175  case HtmlTokenId::OPTION:
1177  return;
1178 
1179  case HtmlTokenId::TEXTTOKEN:
1180  InsertSelectText();
1181  return;
1182 
1183  case HtmlTokenId::INPUT:
1184  case HtmlTokenId::SCRIPT_ON:
1185  case HtmlTokenId::SCRIPT_OFF:
1186  case HtmlTokenId::NOSCRIPT_ON:
1187  case HtmlTokenId::NOSCRIPT_OFF:
1188  case HtmlTokenId::RAWDATA:
1189  // treat in normal switch
1190  break;
1191 
1192  default:
1193  // ignore
1194  return;
1195  }
1196  }
1197  else if( m_pMarquee )
1198  {
1199  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1200  // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1201  // script.
1202  switch( nToken )
1203  {
1204  case HtmlTokenId::MARQUEE_OFF:
1205  m_bCallNextToken = false;
1206  EndMarquee();
1207  break;
1208 
1209  case HtmlTokenId::TEXTTOKEN:
1211  break;
1212  default: break;
1213  }
1214 
1215  return;
1216  }
1217  else if( m_bInField )
1218  {
1219  switch( nToken )
1220  {
1221  case HtmlTokenId::SDFIELD_OFF:
1222  m_bCallNextToken = false;
1223  EndField();
1224  break;
1225 
1226  case HtmlTokenId::TEXTTOKEN:
1227  InsertFieldText();
1228  break;
1229  default: break;
1230  }
1231 
1232  return;
1233  }
1235  {
1236  switch( nToken )
1237  {
1238  case HtmlTokenId::ANCHOR_OFF:
1239  EndAnchor();
1240  m_bCallNextToken = false;
1241  break;
1242 
1243  case HtmlTokenId::TEXTTOKEN:
1245  break;
1246  default: break;
1247  }
1248  return;
1249  }
1250  else if( !m_aUnknownToken.isEmpty() )
1251  {
1252  // Paste content of unknown tags.
1253  // (but surely if we are not in the header section) fdo#36080 fdo#34666
1254  if (!aToken.isEmpty() && !IsInHeader() )
1255  {
1256  if( !m_bDocInitialized )
1257  DocumentDetected();
1258  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1259 
1260  // if there are temporary paragraph attributes and the
1261  // paragraph isn't empty then the paragraph attributes
1262  // are final.
1263  m_aParaAttrs.clear();
1264 
1265  SetAttr();
1266  }
1267 
1268  // Unknown token in the header are only closed by a matching
1269  // end-token, </HEAD> or <BODY>. Text inside is ignored.
1270  switch( nToken )
1271  {
1272  case HtmlTokenId::UNKNOWNCONTROL_OFF:
1273  if( m_aUnknownToken != sSaveToken )
1274  return;
1275  [[fallthrough]];
1276  case HtmlTokenId::FRAMESET_ON:
1277  case HtmlTokenId::HEAD_OFF:
1278  case HtmlTokenId::BODY_ON:
1279  case HtmlTokenId::IMAGE: // Don't know why Netscape acts this way.
1280  m_aUnknownToken.clear();
1281  break;
1282  case HtmlTokenId::TEXTTOKEN:
1283  return;
1284  default:
1285  m_aUnknownToken.clear();
1286  break;
1287  }
1288  }
1289  }
1290 
1291  switch( nToken )
1292  {
1293  case HtmlTokenId::BODY_ON:
1294  if (!m_bBodySeen)
1295  {
1296  m_bBodySeen = true;
1297  if( !m_aStyleSource.isEmpty() )
1298  {
1299  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1300  m_aStyleSource.clear();
1301  }
1302  if( IsNewDoc() )
1303  {
1305  // If there is a template for the first or the right page,
1306  // it is set here.
1307  const SwPageDesc *pPageDesc = nullptr;
1308  if( m_pCSS1Parser->IsSetFirstPageDesc() )
1309  pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1310  else if( m_pCSS1Parser->IsSetRightPageDesc() )
1311  pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1312 
1313  if( pPageDesc )
1314  {
1315  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1316  }
1317  }
1318  }
1319  break;
1320 
1321  case HtmlTokenId::LINK:
1322  InsertLink();
1323  break;
1324 
1325  case HtmlTokenId::BASE:
1326  {
1327  const HTMLOptions& rHTMLOptions = GetOptions();
1328  for (size_t i = rHTMLOptions.size(); i; )
1329  {
1330  const HTMLOption& rOption = rHTMLOptions[--i];
1331  switch( rOption.GetToken() )
1332  {
1333  case HtmlOptionId::HREF:
1334  m_sBaseURL = rOption.GetString();
1335  break;
1336  case HtmlOptionId::TARGET:
1337  if( IsNewDoc() )
1338  {
1339  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1340  OSL_ENSURE(pDocShell, "no SwDocShell");
1341  if (pDocShell) {
1342  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1343  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1344  uno::Reference<document::XDocumentProperties>
1345  xDocProps(xDPS->getDocumentProperties());
1346  OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1347  if (xDocProps.is()) {
1348  xDocProps->setDefaultTarget(
1349  rOption.GetString());
1350  }
1351  }
1352  }
1353  break;
1354  default: break;
1355  }
1356  }
1357  }
1358  break;
1359 
1360  case HtmlTokenId::META:
1361  {
1362  SvKeyValueIterator *pHTTPHeader = nullptr;
1363  if( IsNewDoc() )
1364  {
1365  SwDocShell *pDocSh = m_xDoc->GetDocShell();
1366  if( pDocSh )
1367  pHTTPHeader = pDocSh->GetHeaderAttributes();
1368  }
1369  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1370  OSL_ENSURE(pDocShell, "no SwDocShell");
1371  if (pDocShell)
1372  {
1373  uno::Reference<document::XDocumentProperties> xDocProps;
1374  if (IsNewDoc())
1375  {
1376  const uno::Reference<document::XDocumentPropertiesSupplier>
1377  xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1378  xDocProps = xDPS->getDocumentProperties();
1379  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1380  }
1381  ParseMetaOptions( xDocProps, pHTTPHeader );
1382  }
1383  }
1384  break;
1385 
1386  case HtmlTokenId::TITLE_ON:
1387  m_bInTitle = true;
1388  break;
1389 
1390  case HtmlTokenId::SCRIPT_ON:
1391  NewScript();
1392  break;
1393 
1394  case HtmlTokenId::SCRIPT_OFF:
1395  EndScript();
1396  break;
1397 
1398  case HtmlTokenId::NOSCRIPT_ON:
1399  case HtmlTokenId::NOSCRIPT_OFF:
1400  bInsertUnknown = true;
1401  break;
1402 
1403  case HtmlTokenId::STYLE_ON:
1404  NewStyle();
1405  break;
1406 
1407  case HtmlTokenId::STYLE_OFF:
1408  EndStyle();
1409  break;
1410 
1411  case HtmlTokenId::RAWDATA:
1412  if( !m_bIgnoreRawData )
1413  {
1414  if( IsReadScript() )
1415  {
1416  AddScriptSource();
1417  }
1418  else if( IsReadStyle() )
1419  {
1420  if( !m_aStyleSource.isEmpty() )
1421  m_aStyleSource += "\n";
1422  m_aStyleSource += aToken;
1423  }
1424  }
1425  break;
1426 
1427  case HtmlTokenId::OBJECT_ON:
1428  if (m_bXHTML)
1429  {
1430  if (!InsertEmbed())
1431  InsertImage();
1432  break;
1433  }
1434 #if HAVE_FEATURE_JAVA
1435  NewObject();
1436  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1437 #endif
1438  break;
1439 
1440  case HtmlTokenId::OBJECT_OFF:
1441  if (!m_aEmbeds.empty())
1442  m_aEmbeds.pop();
1443  break;
1444 
1445  case HtmlTokenId::APPLET_ON:
1446 #if HAVE_FEATURE_JAVA
1447  InsertApplet();
1448  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1449 #endif
1450  break;
1451 
1452  case HtmlTokenId::IFRAME_ON:
1455  break;
1456 
1457  case HtmlTokenId::LINEBREAK:
1458  if( !IsReadPRE() )
1459  {
1460  InsertLineBreak();
1461  break;
1462  }
1463  else
1464  bGetIDOption = true;
1465  // <BR>s in <PRE> resemble true LFs, hence no break
1466  [[fallthrough]];
1467 
1468  case HtmlTokenId::NEWPARA:
1469  // CR in PRE/LISTING/XMP
1470  {
1471  if( HtmlTokenId::NEWPARA==nToken ||
1473  {
1474  AppendTextNode(); // there is no LF at this place
1475  // therefore it will cause no problems
1476  SetTextCollAttrs();
1477  }
1478  // progress bar
1479  if (m_xProgress)
1480  m_xProgress->Update(rInput.Tell());
1481  }
1482  break;
1483 
1484  case HtmlTokenId::NONBREAKSPACE:
1485  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1486  break;
1487 
1488  case HtmlTokenId::SOFTHYPH:
1489  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1490  break;
1491 
1492  case HtmlTokenId::LINEFEEDCHAR:
1493  if( m_pPam->GetPoint()->nContent.GetIndex() )
1494  AppendTextNode();
1495  if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode))
1496  {
1497  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1498  EndAttr( m_xAttrTab->pBreak, false );
1499  }
1500  break;
1501 
1502  case HtmlTokenId::TEXTTOKEN:
1503  // insert string without spanning attributes at the end.
1504  if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1505  {
1506  sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
1507  const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
1508  if (pTextNode)
1509  {
1510  const OUString& rText = pTextNode->GetText();
1511  sal_Unicode cLast = rText[--nPos];
1512  if( ' ' == cLast || '\x0a' == cLast)
1513  aToken = aToken.copy(1);
1514  }
1515  else
1516  aToken = aToken.copy(1);
1517 
1518  if( aToken.isEmpty() )
1519  {
1520  m_bUpperSpace = bUpperSpaceSave;
1521  break;
1522  }
1523  }
1524 
1525  if( !aToken.isEmpty() )
1526  {
1527  if( !m_bDocInitialized )
1528  DocumentDetected();
1529 
1530  if (!m_aEmbeds.empty())
1531  {
1532  // The text token is inside an OLE object, which means
1533  // alternate text.
1534  SwOLENode* pOLENode = m_aEmbeds.top();
1535  if (SwFlyFrameFormat* pFormat
1536  = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1537  {
1539  {
1540  pObject->SetTitle(pObject->GetTitle() + aToken);
1541  break;
1542  }
1543  }
1544  }
1545 
1546  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1547 
1548  // if there are temporary paragraph attributes and the
1549  // paragraph isn't empty then the paragraph attributes
1550  // are final.
1551  m_aParaAttrs.clear();
1552 
1553  SetAttr();
1554  }
1555  break;
1556 
1557  case HtmlTokenId::HORZRULE:
1558  InsertHorzRule();
1559  break;
1560 
1561  case HtmlTokenId::IMAGE:
1562  InsertImage();
1563  // if only the parser references the doc, we can break and set
1564  // an error code
1565  if( 1 == m_xDoc->getReferenceCount() )
1566  {
1567  eState = SvParserState::Error;
1568  }
1569  break;
1570 
1571  case HtmlTokenId::SPACER:
1572  InsertSpacer();
1573  break;
1574 
1575  case HtmlTokenId::EMBED:
1576  InsertEmbed();
1577  break;
1578 
1579  case HtmlTokenId::NOEMBED_ON:
1580  m_bInNoEmbed = true;
1581  m_bCallNextToken = bool(m_xTable);
1582  ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1583  break;
1584 
1585  case HtmlTokenId::DEFLIST_ON:
1586  if( m_nOpenParaToken != HtmlTokenId::NONE )
1587  EndPara();
1588  NewDefList();
1589  break;
1590  case HtmlTokenId::DEFLIST_OFF:
1591  if( m_nOpenParaToken != HtmlTokenId::NONE )
1592  EndPara();
1593  EndDefListItem( HtmlTokenId::NONE );
1594  EndDefList();
1595  break;
1596 
1597  case HtmlTokenId::DD_ON:
1598  case HtmlTokenId::DT_ON:
1599  if( m_nOpenParaToken != HtmlTokenId::NONE )
1600  EndPara();
1601  EndDefListItem();// close <DD>/<DT> and set no template
1602  NewDefListItem( nToken );
1603  break;
1604 
1605  case HtmlTokenId::DD_OFF:
1606  case HtmlTokenId::DT_OFF:
1607  // c.f. HtmlTokenId::LI_OFF
1608  // Actually we should close a DD/DT now.
1609  // But neither Netscape nor Microsoft do this and so don't we.
1610  EndDefListItem( nToken );
1611  break;
1612 
1613  // divisions
1614  case HtmlTokenId::DIVISION_ON:
1615  case HtmlTokenId::CENTER_ON:
1616  if (!m_isInTableStructure)
1617  {
1618  if (m_nOpenParaToken != HtmlTokenId::NONE)
1619  {
1620  if (IsReadPRE())
1621  m_nOpenParaToken = HtmlTokenId::NONE;
1622  else
1623  EndPara();
1624  }
1625  NewDivision( nToken );
1626  }
1627  break;
1628 
1629  case HtmlTokenId::DIVISION_OFF:
1630  case HtmlTokenId::CENTER_OFF:
1631  if (!m_isInTableStructure)
1632  {
1633  if (m_nOpenParaToken != HtmlTokenId::NONE)
1634  {
1635  if (IsReadPRE())
1636  m_nOpenParaToken = HtmlTokenId::NONE;
1637  else
1638  EndPara();
1639  }
1640  EndDivision();
1641  }
1642  break;
1643 
1644  case HtmlTokenId::MULTICOL_ON:
1645  if( m_nOpenParaToken != HtmlTokenId::NONE )
1646  EndPara();
1647  NewMultiCol();
1648  break;
1649 
1650  case HtmlTokenId::MULTICOL_OFF:
1651  if( m_nOpenParaToken != HtmlTokenId::NONE )
1652  EndPara();
1653  EndTag( HtmlTokenId::MULTICOL_ON );
1654  break;
1655 
1656  case HtmlTokenId::MARQUEE_ON:
1657  NewMarquee();
1658  m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1659  break;
1660 
1661  case HtmlTokenId::FORM_ON:
1662  NewForm();
1663  break;
1664  case HtmlTokenId::FORM_OFF:
1665  EndForm();
1666  break;
1667 
1668  // templates
1669  case HtmlTokenId::PARABREAK_ON:
1670  if( m_nOpenParaToken != HtmlTokenId::NONE )
1671  EndPara( true );
1672  NewPara();
1673  break;
1674 
1675  case HtmlTokenId::PARABREAK_OFF:
1676  EndPara( true );
1677  break;
1678 
1679  case HtmlTokenId::ADDRESS_ON:
1680  if( m_nOpenParaToken != HtmlTokenId::NONE )
1681  EndPara();
1682  NewTextFormatColl(HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SEND_ADDRESS);
1683  break;
1684 
1685  case HtmlTokenId::ADDRESS_OFF:
1686  if( m_nOpenParaToken != HtmlTokenId::NONE )
1687  EndPara();
1688  EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1689  break;
1690 
1691  case HtmlTokenId::BLOCKQUOTE_ON:
1692  case HtmlTokenId::BLOCKQUOTE30_ON:
1693  if( m_nOpenParaToken != HtmlTokenId::NONE )
1694  EndPara();
1695  NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1696  break;
1697 
1698  case HtmlTokenId::BLOCKQUOTE_OFF:
1699  case HtmlTokenId::BLOCKQUOTE30_OFF:
1700  if( m_nOpenParaToken != HtmlTokenId::NONE )
1701  EndPara();
1702  EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1703  break;
1704 
1705  case HtmlTokenId::PREFORMTXT_ON:
1706  case HtmlTokenId::LISTING_ON:
1707  case HtmlTokenId::XMP_ON:
1708  if( m_nOpenParaToken != HtmlTokenId::NONE )
1709  EndPara();
1711  break;
1712 
1713  case HtmlTokenId::PREFORMTXT_OFF:
1714  m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1715  EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1716  break;
1717 
1718  case HtmlTokenId::LISTING_OFF:
1719  case HtmlTokenId::XMP_OFF:
1720  EndTextFormatColl( nToken );
1721  break;
1722 
1723  case HtmlTokenId::HEAD1_ON:
1724  case HtmlTokenId::HEAD2_ON:
1725  case HtmlTokenId::HEAD3_ON:
1726  case HtmlTokenId::HEAD4_ON:
1727  case HtmlTokenId::HEAD5_ON:
1728  case HtmlTokenId::HEAD6_ON:
1729  if( m_nOpenParaToken != HtmlTokenId::NONE )
1730  {
1731  if( IsReadPRE() )
1732  m_nOpenParaToken = HtmlTokenId::NONE;
1733  else
1734  EndPara();
1735  }
1736  NewHeading( nToken );
1737  break;
1738 
1739  case HtmlTokenId::HEAD1_OFF:
1740  case HtmlTokenId::HEAD2_OFF:
1741  case HtmlTokenId::HEAD3_OFF:
1742  case HtmlTokenId::HEAD4_OFF:
1743  case HtmlTokenId::HEAD5_OFF:
1744  case HtmlTokenId::HEAD6_OFF:
1745  EndHeading();
1746  break;
1747 
1748  case HtmlTokenId::TABLE_ON:
1749  if( !m_vPendingStack.empty() )
1750  BuildTable( SvxAdjust::End );
1751  else
1752  {
1753  if( m_nOpenParaToken != HtmlTokenId::NONE )
1754  EndPara();
1755  OSL_ENSURE(!m_xTable, "table in table not allowed here");
1756  if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) &&
1757  (m_pPam->GetPoint()->nNode.GetIndex() >
1758  m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1760  {
1761  if ( m_nParaCnt < 5 )
1762  Show(); // show what we have up to here
1763 
1764  SvxAdjust eAdjust = m_xAttrTab->pAdjust
1765  ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1766  GetAdjust()
1767  : SvxAdjust::End;
1768  BuildTable( eAdjust );
1769  }
1770  else
1771  bInsertUnknown = m_bKeepUnknown;
1772  }
1773  break;
1774 
1775  // lists
1776  case HtmlTokenId::DIRLIST_ON:
1777  case HtmlTokenId::MENULIST_ON:
1778  case HtmlTokenId::ORDERLIST_ON:
1779  case HtmlTokenId::UNORDERLIST_ON:
1780  if( m_nOpenParaToken != HtmlTokenId::NONE )
1781  EndPara();
1782  NewNumberBulletList( nToken );
1783  break;
1784 
1785  case HtmlTokenId::DIRLIST_OFF:
1786  case HtmlTokenId::MENULIST_OFF:
1787  case HtmlTokenId::ORDERLIST_OFF:
1788  case HtmlTokenId::UNORDERLIST_OFF:
1789  if( m_nOpenParaToken != HtmlTokenId::NONE )
1790  EndPara();
1791  EndNumberBulletListItem( HtmlTokenId::NONE, true );
1792  EndNumberBulletList( nToken );
1793  break;
1794 
1795  case HtmlTokenId::LI_ON:
1796  case HtmlTokenId::LISTHEADER_ON:
1797  if( m_nOpenParaToken != HtmlTokenId::NONE &&
1799  || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1800  {
1801  // only finish paragraph for <P><LI>, not for <DD><LI>
1802  EndPara();
1803  }
1804 
1805  EndNumberBulletListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1806  NewNumberBulletListItem( nToken );
1807  break;
1808 
1809  case HtmlTokenId::LI_OFF:
1810  case HtmlTokenId::LISTHEADER_OFF:
1811  EndNumberBulletListItem( nToken, false );
1812  break;
1813 
1814  // Attribute :
1815  case HtmlTokenId::ITALIC_ON:
1816  {
1820  NewStdAttr( HtmlTokenId::ITALIC_ON,
1821  &m_xAttrTab->pItalic, aPosture,
1822  &m_xAttrTab->pItalicCJK, &aPostureCJK,
1823  &m_xAttrTab->pItalicCTL, &aPostureCTL );
1824  }
1825  break;
1826 
1827  case HtmlTokenId::BOLD_ON:
1828  {
1832  NewStdAttr( HtmlTokenId::BOLD_ON,
1833  &m_xAttrTab->pBold, aWeight,
1834  &m_xAttrTab->pBoldCJK, &aWeightCJK,
1835  &m_xAttrTab->pBoldCTL, &aWeightCTL );
1836  }
1837  break;
1838 
1839  case HtmlTokenId::STRIKE_ON:
1840  case HtmlTokenId::STRIKETHROUGH_ON:
1841  {
1842  NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1844  }
1845  break;
1846 
1847  case HtmlTokenId::UNDERLINE_ON:
1848  {
1849  NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1851  }
1852  break;
1853 
1854  case HtmlTokenId::SUPERSCRIPT_ON:
1855  {
1856  NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1858  }
1859  break;
1860 
1861  case HtmlTokenId::SUBSCRIPT_ON:
1862  {
1863  NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1865  }
1866  break;
1867 
1868  case HtmlTokenId::BLINK_ON:
1869  {
1870  NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1871  SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1872  }
1873  break;
1874 
1875  case HtmlTokenId::SPAN_ON:
1876  NewStdAttr( HtmlTokenId::SPAN_ON );
1877  break;
1878 
1879  case HtmlTokenId::ITALIC_OFF:
1880  case HtmlTokenId::BOLD_OFF:
1881  case HtmlTokenId::STRIKE_OFF:
1882  case HtmlTokenId::UNDERLINE_OFF:
1883  case HtmlTokenId::SUPERSCRIPT_OFF:
1884  case HtmlTokenId::SUBSCRIPT_OFF:
1885  case HtmlTokenId::BLINK_OFF:
1886  case HtmlTokenId::SPAN_OFF:
1887  EndTag( nToken );
1888  break;
1889 
1890  case HtmlTokenId::STRIKETHROUGH_OFF:
1891  EndTag( HtmlTokenId::STRIKE_OFF );
1892  break;
1893 
1894  case HtmlTokenId::BASEFONT_ON:
1895  NewBasefontAttr();
1896  break;
1897  case HtmlTokenId::BASEFONT_OFF:
1898  EndBasefontAttr();
1899  break;
1900  case HtmlTokenId::FONT_ON:
1901  case HtmlTokenId::BIGPRINT_ON:
1902  case HtmlTokenId::SMALLPRINT_ON:
1903  NewFontAttr( nToken );
1904  break;
1905  case HtmlTokenId::FONT_OFF:
1906  case HtmlTokenId::BIGPRINT_OFF:
1907  case HtmlTokenId::SMALLPRINT_OFF:
1908  EndFontAttr( nToken );
1909  break;
1910 
1911  case HtmlTokenId::EMPHASIS_ON:
1912  case HtmlTokenId::CITATION_ON:
1913  case HtmlTokenId::STRONG_ON:
1914  case HtmlTokenId::CODE_ON:
1915  case HtmlTokenId::SAMPLE_ON:
1916  case HtmlTokenId::KEYBOARD_ON:
1917  case HtmlTokenId::VARIABLE_ON:
1918  case HtmlTokenId::DEFINSTANCE_ON:
1919  case HtmlTokenId::SHORTQUOTE_ON:
1920  case HtmlTokenId::LANGUAGE_ON:
1921  case HtmlTokenId::AUTHOR_ON:
1922  case HtmlTokenId::PERSON_ON:
1923  case HtmlTokenId::ACRONYM_ON:
1924  case HtmlTokenId::ABBREVIATION_ON:
1925  case HtmlTokenId::INSERTEDTEXT_ON:
1926  case HtmlTokenId::DELETEDTEXT_ON:
1927 
1928  case HtmlTokenId::TELETYPE_ON:
1929  NewCharFormat( nToken );
1930  break;
1931 
1932  case HtmlTokenId::SDFIELD_ON:
1933  NewField();
1935  break;
1936 
1937  case HtmlTokenId::EMPHASIS_OFF:
1938  case HtmlTokenId::CITATION_OFF:
1939  case HtmlTokenId::STRONG_OFF:
1940  case HtmlTokenId::CODE_OFF:
1941  case HtmlTokenId::SAMPLE_OFF:
1942  case HtmlTokenId::KEYBOARD_OFF:
1943  case HtmlTokenId::VARIABLE_OFF:
1944  case HtmlTokenId::DEFINSTANCE_OFF:
1945  case HtmlTokenId::SHORTQUOTE_OFF:
1946  case HtmlTokenId::LANGUAGE_OFF:
1947  case HtmlTokenId::AUTHOR_OFF:
1948  case HtmlTokenId::PERSON_OFF:
1949  case HtmlTokenId::ACRONYM_OFF:
1950  case HtmlTokenId::ABBREVIATION_OFF:
1951  case HtmlTokenId::INSERTEDTEXT_OFF:
1952  case HtmlTokenId::DELETEDTEXT_OFF:
1953 
1954  case HtmlTokenId::TELETYPE_OFF:
1955  EndTag( nToken );
1956  break;
1957 
1958  case HtmlTokenId::HEAD_OFF:
1959  if( !m_aStyleSource.isEmpty() )
1960  {
1961  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1962  m_aStyleSource.clear();
1963  }
1964  break;
1965 
1966  case HtmlTokenId::DOCTYPE:
1967  case HtmlTokenId::BODY_OFF:
1968  case HtmlTokenId::HTML_OFF:
1969  case HtmlTokenId::HEAD_ON:
1970  case HtmlTokenId::TITLE_OFF:
1971  break; // don't evaluate further???
1972  case HtmlTokenId::HTML_ON:
1973  {
1974  const HTMLOptions& rHTMLOptions = GetOptions();
1975  for (size_t i = rHTMLOptions.size(); i; )
1976  {
1977  const HTMLOption& rOption = rHTMLOptions[--i];
1978  if( HtmlOptionId::DIR == rOption.GetToken() )
1979  {
1980  const OUString& rDir = rOption.GetString();
1981  SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
1982  m_pCSS1Parser->GetWhichMap() );
1983  SvxCSS1PropertyInfo aPropInfo;
1984  OUString aDummy;
1985  ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
1986  aPropInfo, nullptr, &rDir );
1987 
1988  m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
1989  break;
1990  }
1991  }
1992  }
1993  break;
1994 
1995  case HtmlTokenId::INPUT:
1996  InsertInput();
1997  break;
1998 
1999  case HtmlTokenId::TEXTAREA_ON:
2000  NewTextArea();
2002  break;
2003 
2004  case HtmlTokenId::SELECT_ON:
2005  NewSelect();
2007  break;
2008 
2009  case HtmlTokenId::ANCHOR_ON:
2010  NewAnchor();
2011  break;
2012 
2013  case HtmlTokenId::ANCHOR_OFF:
2014  EndAnchor();
2015  break;
2016 
2017  case HtmlTokenId::COMMENT:
2018  if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2019  {
2020  // insert as Post-It
2021  // If there are no space characters right behind
2022  // the <!-- and on front of the -->, leave the comment untouched.
2023  if( ' ' == aToken[ 3 ] &&
2024  ' ' == aToken[ aToken.getLength()-3 ] )
2025  {
2026  OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) );
2027  InsertComment(comphelper::string::strip(aComment, ' '));
2028  }
2029  else
2030  {
2031  OUString aComment = "<" + aToken + ">";
2032  InsertComment( aComment );
2033  }
2034  }
2035  break;
2036 
2037  case HtmlTokenId::MAP_ON:
2038  // Image Maps are read asynchronously: At first only an image map is created
2039  // Areas are processed later. Nevertheless the
2040  // ImageMap is inserted into the IMap-Array, because it might be used
2041  // already.
2042  m_pImageMap = new ImageMap;
2044  {
2045  if (!m_pImageMaps)
2046  m_pImageMaps.reset( new ImageMaps );
2047  m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2048  }
2049  else
2050  {
2051  delete m_pImageMap;
2052  m_pImageMap = nullptr;
2053  }
2054  break;
2055 
2056  case HtmlTokenId::MAP_OFF:
2057  // there is no ImageMap anymore (don't delete IMap, because it's
2058  // already contained in the array!)
2059  m_pImageMap = nullptr;
2060  break;
2061 
2062  case HtmlTokenId::AREA:
2063  if( m_pImageMap )
2064  ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2065  SvMacroItemId::OnMouseOut );
2066  break;
2067 
2068  case HtmlTokenId::FRAMESET_ON:
2069  bInsertUnknown = m_bKeepUnknown;
2070  break;
2071 
2072  case HtmlTokenId::NOFRAMES_ON:
2073  if( IsInHeader() )
2074  FinishHeader();
2075  bInsertUnknown = m_bKeepUnknown;
2076  break;
2077 
2078  case HtmlTokenId::UNKNOWNCONTROL_ON:
2079  // Ignore content of unknown token in the header, if the token
2080  // does not start with a '!'.
2081  // (but judging from the code, also if does not start with a '%')
2082  // (and also if we're not somewhere we consider PRE)
2083  if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2084  !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2085  '%' != sSaveToken[0] )
2086  m_aUnknownToken = sSaveToken;
2087  [[fallthrough]];
2088 
2089  default:
2090  bInsertUnknown = m_bKeepUnknown;
2091  break;
2092  }
2093 
2094  if( bGetIDOption )
2095  InsertIDOption();
2096 
2097  if( bInsertUnknown )
2098  {
2099  OUStringBuffer aComment("HTML: <");
2100  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2101  aComment.append("/");
2102  aComment.append(sSaveToken);
2103  if( !aToken.isEmpty() )
2104  {
2105  UnescapeToken();
2106  aComment.append(" ").append(aToken);
2107  }
2108  aComment.append(">");
2109  InsertComment( aComment.makeStringAndClear() );
2110  }
2111 
2112  // if there are temporary paragraph attributes and the
2113  // paragraph isn't empty then the paragraph attributes are final.
2114  if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() )
2115  m_aParaAttrs.clear();
2116 }
2117 
2118 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2119  bool& rScriptDependent,
2120  sal_uInt16& rScriptType )
2121 {
2122  switch( rAttr.GetItem().Which() )
2123  {
2124  case RES_CHRATR_FONT:
2125  case RES_CHRATR_FONTSIZE:
2126  case RES_CHRATR_LANGUAGE:
2127  case RES_CHRATR_POSTURE:
2128  case RES_CHRATR_WEIGHT:
2129  rScriptType = i18n::ScriptType::LATIN;
2130  rScriptDependent = true;
2131  break;
2132  case RES_CHRATR_CJK_FONT:
2136  case RES_CHRATR_CJK_WEIGHT:
2137  rScriptType = i18n::ScriptType::ASIAN;
2138  rScriptDependent = true;
2139  break;
2140  case RES_CHRATR_CTL_FONT:
2144  case RES_CHRATR_CTL_WEIGHT:
2145  rScriptType = i18n::ScriptType::COMPLEX;
2146  rScriptDependent = true;
2147  break;
2148  default:
2149  rScriptDependent = false;
2150  break;
2151  }
2152 }
2153 
2154 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2155 {
2156  // A hard line break at the end always must be removed.
2157  // A second one we replace with paragraph spacing.
2158  sal_Int32 nLFStripped = StripTrailingLF();
2159  if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2160  eMode = AM_SPACE;
2161 
2162  // the hard attributes of this paragraph will never be invalid again
2163  m_aParaAttrs.clear();
2164 
2165  SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2166  m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
2167 
2168  if (pTextNode)
2169  {
2170  const SvxULSpaceItem& rULSpace =
2171  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2172 
2173  bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2174  : rULSpace.GetLower() == 0;
2175 
2176  if( bChange )
2177  {
2178  const SvxULSpaceItem& rCollULSpace =
2179  pTextNode->GetAnyFormatColl().GetULSpace();
2180 
2181  bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2182  : rCollULSpace.GetLower() > 0;
2183 
2184  if( bMayReset &&
2185  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2186  {
2187  pTextNode->ResetAttr( RES_UL_SPACE );
2188  }
2189  else
2190  {
2191  pTextNode->SetAttr(
2192  SvxULSpaceItem( rULSpace.GetUpper(),
2193  AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2194  }
2195  }
2196  }
2197  m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2198 
2199  SwPosition aOldPos( *m_pPam->GetPoint() );
2200 
2201  bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2202 
2203  // split character attributes and maybe set none,
2204  // which are set for the whole paragraph
2205  const SwNodeIndex& rEndIdx = aOldPos.nNode;
2206  const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex();
2207  const SwPosition& rPos = *m_pPam->GetPoint();
2208 
2209  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2210  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2211  {
2212  HTMLAttr *pAttr = *pHTMLAttributes;
2213  if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2214  {
2215  bool bWholePara = false;
2216 
2217  while( pAttr )
2218  {
2219  HTMLAttr *pNext = pAttr->GetNext();
2220  if( pAttr->GetSttParaIdx() < rEndIdx.GetIndex() ||
2221  (!bWholePara &&
2222  pAttr->GetSttPara() == rEndIdx &&
2223  pAttr->GetSttCnt() != nEndCnt) )
2224  {
2225  bWholePara =
2226  pAttr->GetSttPara() == rEndIdx &&
2227  pAttr->GetSttCnt() == 0;
2228 
2229  sal_Int32 nStt = pAttr->m_nStartContent;
2230  bool bScript = false;
2231  sal_uInt16 nScriptItem;
2232  bool bInsert = true;
2233  lcl_swhtml_getItemInfo( *pAttr, bScript,
2234  nScriptItem );
2235  // set previous part
2236  if( bScript )
2237  {
2238  const SwTextNode *pTextNd =
2239  pAttr->GetSttPara().GetNode().GetTextNode();
2240  OSL_ENSURE( pTextNd, "No text node" );
2241  if( pTextNd )
2242  {
2243  const OUString& rText = pTextNd->GetText();
2244  sal_uInt16 nScriptText =
2245  g_pBreakIt->GetBreakIter()->getScriptType(
2246  rText, pAttr->GetSttCnt() );
2247  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2248  ->endOfScript( rText, nStt, nScriptText );
2249  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2250  {
2251  if( nScriptItem == nScriptText )
2252  {
2253  HTMLAttr *pSetAttr =
2254  pAttr->Clone( rEndIdx, nScriptEnd );
2255  pSetAttr->m_nStartContent = nStt;
2256  pSetAttr->ClearPrev();
2257  if( !pNext || bWholePara )
2258  {
2259  if (pSetAttr->m_bInsAtStart)
2260  m_aSetAttrTab.push_front( pSetAttr );
2261  else
2262  m_aSetAttrTab.push_back( pSetAttr );
2263  }
2264  else
2265  pNext->InsertPrev( pSetAttr );
2266  }
2267  nStt = nScriptEnd;
2268  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2269  rText, nStt );
2270  nScriptEnd = g_pBreakIt->GetBreakIter()
2271  ->endOfScript( rText, nStt, nScriptText );
2272  }
2273  bInsert = nScriptItem == nScriptText;
2274  }
2275  }
2276  if( bInsert )
2277  {
2278  HTMLAttr *pSetAttr =
2279  pAttr->Clone( rEndIdx, nEndCnt );
2280  pSetAttr->m_nStartContent = nStt;
2281 
2282  // When the attribute is for the whole paragraph, the outer
2283  // attributes aren't effective anymore. Hence it may not be inserted
2284  // in the Prev-List of an outer attribute, because that won't be
2285  // set. That leads to shifting when fields are used.
2286  if( !pNext || bWholePara )
2287  {
2288  if (pSetAttr->m_bInsAtStart)
2289  m_aSetAttrTab.push_front( pSetAttr );
2290  else
2291  m_aSetAttrTab.push_back( pSetAttr );
2292  }
2293  else
2294  pNext->InsertPrev( pSetAttr );
2295  }
2296  else
2297  {
2298  HTMLAttr *pPrev = pAttr->GetPrev();
2299  if( pPrev )
2300  {
2301  // the previous attributes must be set anyway
2302  if( !pNext || bWholePara )
2303  {
2304  if (pPrev->m_bInsAtStart)
2305  m_aSetAttrTab.push_front( pPrev );
2306  else
2307  m_aSetAttrTab.push_back( pPrev );
2308  }
2309  else
2310  pNext->InsertPrev( pPrev );
2311  }
2312  }
2313  pAttr->ClearPrev();
2314  }
2315 
2316  pAttr->SetStart( rPos );
2317  pAttr = pNext;
2318  }
2319  }
2320  }
2321 
2322  if( bUpdateNum )
2323  {
2324  if( GetNumInfo().GetDepth() )
2325  {
2326  sal_uInt8 nLvl = GetNumInfo().GetLevel();
2327  SetNodeNum( nLvl );
2328  }
2329  else
2331  }
2332 
2333  // We must set the attribute of the paragraph before now (because of JavaScript)
2334  SetAttr();
2335 
2336  // Now it is time to get rid of all script dependent hints that are
2337  // equal to the settings in the style
2338  SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode();
2339  OSL_ENSURE( pTextNd, "There is the txt node" );
2340  size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints())
2341  ? pTextNd->GetSwpHints().Count() : 0;
2342  if( nCntAttr )
2343  {
2344  // These are the end position of all script dependent hints.
2345  // If we find a hint that starts before the current end position,
2346  // we have to set it. If we find a hint that start behind or at
2347  // that position, we have to take the hint value into account.
2348  // If it is equal to the style, or in fact the paragraph value
2349  // for that hint, the hint is removed. Otherwise its end position
2350  // is remembered.
2351  sal_Int32 aEndPos[15] =
2352  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2353  SwpHints& rHints = pTextNd->GetSwpHints();
2354  for( size_t i=0; i < nCntAttr; i++ )
2355  {
2356  SwTextAttr *pHt = rHints.Get( i );
2357  sal_uInt16 nWhich = pHt->Which();
2358  sal_Int16 nIdx = 0;
2359  bool bFont = false;
2360  switch( nWhich )
2361  {
2362  case RES_CHRATR_FONT:
2363  nIdx = 0;
2364  bFont = true;
2365  break;
2366  case RES_CHRATR_FONTSIZE:
2367  nIdx = 1;
2368  break;
2369  case RES_CHRATR_LANGUAGE:
2370  nIdx = 2;
2371  break;
2372  case RES_CHRATR_POSTURE:
2373  nIdx = 3;
2374  break;
2375  case RES_CHRATR_WEIGHT:
2376  nIdx = 4;
2377  break;
2378  case RES_CHRATR_CJK_FONT:
2379  nIdx = 5;
2380  bFont = true;
2381  break;
2383  nIdx = 6;
2384  break;
2386  nIdx = 7;
2387  break;
2389  nIdx = 8;
2390  break;
2391  case RES_CHRATR_CJK_WEIGHT:
2392  nIdx = 9;
2393  break;
2394  case RES_CHRATR_CTL_FONT:
2395  nIdx = 10;
2396  bFont = true;
2397  break;
2399  nIdx = 11;
2400  break;
2402  nIdx = 12;
2403  break;
2405  nIdx = 13;
2406  break;
2407  case RES_CHRATR_CTL_WEIGHT:
2408  nIdx = 14;
2409  break;
2410  default:
2411  // Skip to next attribute
2412  continue;
2413  }
2414  const sal_Int32 nStt = pHt->GetStart();
2415  if( nStt >= aEndPos[nIdx] )
2416  {
2417  const SfxPoolItem& rItem =
2418  static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2419  if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2420  : rItem == pHt->GetAttr() )
2421  {
2422  // The hint is the same as set in the paragraph and
2423  // therefore, it can be deleted
2424  // CAUTION!!! This WILL delete the hint and it MAY
2425  // also delete the SwpHints!!! To avoid any trouble
2426  // we leave the loop immediately if this is the last
2427  // hint.
2428  pTextNd->DeleteAttribute( pHt );
2429  if( 1 == nCntAttr )
2430  break;
2431  i--;
2432  nCntAttr--;
2433  }
2434  else
2435  {
2436  // The hint is different. Therefore all hints within that
2437  // hint have to be ignored.
2438  aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2439  }
2440  }
2441  else
2442  {
2443  // The hint starts before another one ends.
2444  // The hint in this case is not deleted
2445  OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2446  "hints aren't nested properly!" );
2447  }
2448  }
2449  }
2450 
2451  if (!m_xTable && !--m_nParaCnt)
2452  Show();
2453 
2454  return bRet;
2455 }
2456 
2458 {
2459  //If it already has ParSpace, return
2460  if( !m_bNoParSpace )
2461  return;
2462 
2463  m_bNoParSpace = false;
2464 
2465  sal_uLong nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1;
2466 
2467  SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2468  if( !pTextNode )
2469  return;
2470 
2471  SvxULSpaceItem rULSpace =
2472  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2473  if( rULSpace.GetLower() )
2474  return;
2475 
2476  const SvxULSpaceItem& rCollULSpace =
2477  pTextNode->GetAnyFormatColl().GetULSpace();
2478  if( rCollULSpace.GetLower() &&
2479  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2480  {
2481  pTextNode->ResetAttr( RES_UL_SPACE );
2482  }
2483  else
2484  {
2485  //What I do here, is that I examine the attributes, and if
2486  //I find out, that it's CJK/CTL, then I set the paragraph space
2487  //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2488 
2489  bool bIsCJK = false;
2490  bool bIsCTL = false;
2491 
2492  const size_t nCntAttr = pTextNode->GetpSwpHints()
2493  ? pTextNode->GetSwpHints().Count() : 0;
2494 
2495  for(size_t i = 0; i < nCntAttr; ++i)
2496  {
2497  SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2498  sal_uInt16 const nWhich = pHt->Which();
2499  if (RES_CHRATR_CJK_FONT == nWhich ||
2500  RES_CHRATR_CJK_FONTSIZE == nWhich ||
2501  RES_CHRATR_CJK_LANGUAGE == nWhich ||
2502  RES_CHRATR_CJK_POSTURE == nWhich ||
2503  RES_CHRATR_CJK_WEIGHT == nWhich)
2504  {
2505  bIsCJK = true;
2506  break;
2507  }
2508  if (RES_CHRATR_CTL_FONT == nWhich ||
2509  RES_CHRATR_CTL_FONTSIZE == nWhich ||
2510  RES_CHRATR_CTL_LANGUAGE == nWhich ||
2511  RES_CHRATR_CTL_POSTURE == nWhich ||
2512  RES_CHRATR_CTL_WEIGHT == nWhich)
2513  {
2514  bIsCTL = true;
2515  break;
2516  }
2517  }
2518 
2519  if( bIsCTL )
2520  {
2521  pTextNode->SetAttr(
2523  }
2524  else if( bIsCJK )
2525  {
2526  pTextNode->SetAttr(
2528  } else {
2529  pTextNode->SetAttr(
2531  }
2532  }
2533 }
2534 
2536 {
2537  // Here
2538  // - a EndAction is called, so the document is formatted
2539  // - a Reschedule is called,
2540  // - the own View-Shell is set again
2541  // - and a StartAction is called
2542 
2543  OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2544  SwViewShell *pOldVSh = CallEndAction();
2545 
2547 
2548  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2549  || 1 == m_xDoc->getReferenceCount() )
2550  {
2551  // was the import aborted by SFX?
2552  eState = SvParserState::Error;
2553  }
2554 
2555  // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2556  SwViewShell *pVSh = CallStartAction( pOldVSh );
2557 
2558  // is the current node not visible anymore, then we use a bigger increment
2559  if( pVSh )
2560  {
2562  ? 5 : 50;
2563  }
2564 }
2565 
2567 {
2568  // Here
2569  // - a Reschedule is called, so it can be scrolled
2570  // - the own View-Shell is set again
2571  // - a StartAction/EndAction is called, when there was scrolling.
2572 
2573  OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2574 
2575  // scroll bar
2576  if (m_xProgress)
2577  {
2578  m_xProgress->Update(rInput.Tell());
2580  }
2581  else
2582  {
2584 
2585  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2586  || 1 == m_xDoc->getReferenceCount() )
2587  // was the import aborted by SFX?
2588  eState = SvParserState::Error;
2589 
2591  if( pVSh && pVSh->HasInvalidRect() )
2592  {
2593  CallEndAction( false, false );
2594  CallStartAction( pVSh, false );
2595  }
2596  }
2597 }
2598 
2600 {
2601  OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2602 
2603  if( !pVSh || bChkPtr )
2604  {
2605 #if OSL_DEBUG_LEVEL > 0
2606  SwViewShell *pOldVSh = pVSh;
2607 #endif
2608  pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2609 #if OSL_DEBUG_LEVEL > 0
2610  OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2611  if( pOldVSh && !pVSh )
2612  pVSh = nullptr;
2613 #endif
2614  }
2615  m_pActionViewShell = pVSh;
2616 
2617  if( m_pActionViewShell )
2618  {
2619  if( auto pEditShell = dynamic_cast< SwEditShell *>( m_pActionViewShell ) )
2620  pEditShell->StartAction();
2621  else
2623  }
2624 
2625  return m_pActionViewShell;
2626 }
2627 
2628 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2629 {
2630  if( bChkPtr )
2631  {
2632  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2633  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2634  "CallEndAction: Who swapped the SwViewShell?" );
2635 #if OSL_DEBUG_LEVEL > 0
2636  if( m_pActionViewShell && !pVSh )
2637  pVSh = nullptr;
2638 #endif
2639  if( pVSh != m_pActionViewShell )
2640  m_pActionViewShell = nullptr;
2641  }
2642 
2643  if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2644  return m_pActionViewShell;
2645 
2646  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2647  {
2648  // Already scrolled?, then make sure that the view doesn't move!
2649  const bool bOldLock = m_pActionViewShell->IsViewLocked();
2650  m_pActionViewShell->LockView( true );
2651  const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev();
2653  static_cast<SwEditShell*>(m_pActionViewShell)->EndAction();
2654  m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev );
2655  m_pActionViewShell->LockView( bOldLock );
2656 
2657  // bChkJumpMark is only set when the object was also found
2658  if( m_bChkJumpMark )
2659  {
2660  const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2661  if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2663  GetMedium()->GetURLObject().GetMark() );
2664  m_bChkJumpMark = false;
2665  }
2666  }
2667  else
2669 
2670  // if the parser holds the last reference to the document, then we can
2671  // abort here and set an error.
2672  if( 1 == m_xDoc->getReferenceCount() )
2673  {
2674  eState = SvParserState::Error;
2675  }
2676 
2678  m_pActionViewShell = nullptr;
2679 
2680  return pVSh;
2681 }
2682 
2684 {
2685  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2686  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2687  "CheckActionViewShell: Who has swapped SwViewShell?" );
2688 #if OSL_DEBUG_LEVEL > 0
2689  if( m_pActionViewShell && !pVSh )
2690  pVSh = nullptr;
2691 #endif
2692  if( pVSh != m_pActionViewShell )
2693  m_pActionViewShell = nullptr;
2694 
2695  return m_pActionViewShell;
2696 }
2697 
2699  : m_pFrameFormat(pFrameFormat)
2700 {
2702 }
2703 
2705 {
2706  if (rHint.GetId() == SfxHintId::Dying)
2707  m_pFrameFormat = nullptr;
2708 }
2709 
2710 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2711  std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2712 {
2713  std::unique_ptr<SwPaM> pAttrPam( new SwPaM( *m_pPam->GetPoint() ) );
2714  const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode;
2715  const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
2716  HTMLAttr* pAttr;
2717  SwContentNode* pCNd;
2718 
2719  std::vector<std::unique_ptr<HTMLAttr>> aFields;
2720 
2721  for( auto n = m_aSetAttrTab.size(); n; )
2722  {
2723  pAttr = m_aSetAttrTab[ --n ];
2724  sal_uInt16 nWhich = pAttr->m_pItem->Which();
2725 
2726  sal_uLong nEndParaIdx = pAttr->GetEndParaIdx();
2727  bool bSetAttr;
2728  if( bChkEnd )
2729  {
2730  // Set character attribute with end early on, so set them still in
2731  // the current paragraph (because of JavaScript and various "chats"(?)).
2732  // This shouldn't be done for attributes which are used for
2733  // the whole paragraph, because they could be from a paragraph style
2734  // which can't be set. Because the attributes are inserted with
2735  // SETATTR_DONTREPLACE, they should be able to be set later.
2736  bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() &&
2737  (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) ||
2738  ( !pAttr->IsLikePara() &&
2739  nEndParaIdx == rEndIdx.GetIndex() &&
2740  pAttr->GetEndCnt() < nEndCnt &&
2741  (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2742  ( bBeforeTable &&
2743  nEndParaIdx == rEndIdx.GetIndex() &&
2744  !pAttr->GetEndCnt() );
2745  }
2746  else
2747  {
2748  // Attributes in body nodes array section shouldn't be set if we are in a
2749  // special nodes array section, but vice versa it's possible.
2750  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2751  bSetAttr = nEndParaIdx < rEndIdx.GetIndex() ||
2752  rEndIdx.GetIndex() > nEndOfIcons ||
2753  nEndParaIdx <= nEndOfIcons;
2754  }
2755 
2756  if( bSetAttr )
2757  {
2758  // The attribute shouldn't be in the list of temporary paragraph
2759  // attributes, because then it would be deleted.
2760  while( !m_aParaAttrs.empty() )
2761  {
2762  OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2763  "SetAttr: Attribute must not yet be set" );
2764  m_aParaAttrs.pop_back();
2765  }
2766 
2767  // then set it
2768  m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2769 
2770  while( pAttr )
2771  {
2772  HTMLAttr *pPrev = pAttr->GetPrev();
2773  if( !pAttr->m_bValid )
2774  {
2775  // invalid attributes can be deleted
2776  delete pAttr;
2777  pAttr = pPrev;
2778  continue;
2779  }
2780 
2781  pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2782  if( !pCNd )
2783  {
2784  // because of the awful deleting of nodes an index can also
2785  // point to an end node :-(
2786  if ( (pAttr->GetSttPara() == pAttr->GetEndPara()) &&
2787  !isTXTATR_NOEND(nWhich) )
2788  {
2789  // when the end index also points to the node, we don't
2790  // need to set attributes anymore, except if it's a text attribute.
2791  delete pAttr;
2792  pAttr = pPrev;
2793  continue;
2794  }
2795  pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2796  if( pCNd )
2797  pAttr->m_nStartContent = 0;
2798  else
2799  {
2800  OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2801  delete pAttr;
2802  pAttr = pPrev;
2803  continue;
2804  }
2805  }
2806  pAttrPam->GetPoint()->nNode = pAttr->m_nStartPara;
2807 
2808  // because of the deleting of BRs the start index can also
2809  // point behind the end the text
2810  if( pAttr->m_nStartContent > pCNd->Len() )
2811  pAttr->m_nStartContent = pCNd->Len();
2812  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent );
2813 
2814  pAttrPam->SetMark();
2815  if ( (pAttr->GetSttPara() != pAttr->GetEndPara()) &&
2816  !isTXTATR_NOEND(nWhich) )
2817  {
2818  pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2819  if( !pCNd )
2820  {
2821  pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2822  if( pCNd )
2823  pAttr->m_nEndContent = pCNd->Len();
2824  else
2825  {
2826  OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2827  pAttrPam->DeleteMark();
2828  delete pAttr;
2829  pAttr = pPrev;
2830  continue;
2831  }
2832  }
2833 
2834  pAttrPam->GetPoint()->nNode = pAttr->m_nEndPara;
2835  }
2836  else if( pAttr->IsLikePara() )
2837  {
2838  pAttr->m_nEndContent = pCNd->Len();
2839  }
2840 
2841  // because of the deleting of BRs the start index can also
2842  // point behind the end the text
2843  if( pAttr->m_nEndContent > pCNd->Len() )
2844  pAttr->m_nEndContent = pCNd->Len();
2845 
2846  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent );
2847  if( bBeforeTable &&
2848  pAttrPam->GetPoint()->nNode.GetIndex() ==
2849  rEndIdx.GetIndex() )
2850  {
2851  // If we're before inserting a table and the attribute ends
2852  // in the current node, then we must end it in the previous
2853  // node or discard it, if it starts in that node.
2854  if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2855  !isTXTATR_NOEND(nWhich) )
2856  {
2857  if( pAttrPam->GetMark()->nNode.GetIndex() !=
2858  rEndIdx.GetIndex() )
2859  {
2860  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
2861  "Content-Position before table not 0???" );
2862  pAttrPam->Move( fnMoveBackward );
2863  }
2864  else
2865  {
2866  pAttrPam->DeleteMark();
2867  delete pAttr;
2868  pAttr = pPrev;
2869  continue;
2870  }
2871  }
2872  }
2873 
2874  switch( nWhich )
2875  {
2876  case RES_FLTR_BOOKMARK: // insert bookmark
2877  {
2878  const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2879  IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2880  IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2881  if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2882  (*ppBkmk)->GetMarkStart() == *pAttrPam->GetPoint() )
2883  break; // do not generate duplicates on this position
2884  pAttrPam->DeleteMark();
2885  const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2886  *pAttrPam,
2887  sName,
2890 
2891  // jump to bookmark
2892  if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2893  {
2894  m_bChkJumpMark = true;
2896  }
2897  }
2898  break;
2899  case RES_TXTATR_FIELD:
2900  case RES_TXTATR_ANNOTATION:
2901  case RES_TXTATR_INPUTFIELD:
2902  {
2903  SwFieldIds nFieldWhich =
2904  pPostIts
2905  ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2907  if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2908  SwFieldIds::Script == nFieldWhich) )
2909  {
2910  pPostIts->emplace_front( pAttr );
2911  }
2912  else
2913  {
2914  aFields.emplace_back( pAttr);
2915  }
2916  }
2917  pAttrPam->DeleteMark();
2918  pAttr = pPrev;
2919  continue;
2920 
2921  case RES_LR_SPACE:
2922  if( pAttrPam->GetPoint()->nNode.GetIndex() ==
2923  pAttrPam->GetMark()->nNode.GetIndex())
2924  {
2925  // because of numbering set this attribute directly at node
2926  pCNd->SetAttr( *pAttr->m_pItem );
2927  break;
2928  }
2929  OSL_ENSURE( false,
2930  "LRSpace set over multiple paragraphs!" );
2931  [[fallthrough]]; // (shouldn't reach this point anyway)
2932 
2933  // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2934  // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2935  // This is the right place in the future if the adapted fill attributes
2936  // may be handled more directly in HTML import to handle them.
2937  case RES_BACKGROUND:
2938  {
2939  const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2941 
2943  m_xDoc->getIDocumentContentOperations().InsertItemSet(*pAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2944  break;
2945  }
2946  default:
2947 
2948  // maybe jump to a bookmark
2949  if( RES_TXTATR_INETFMT == nWhich &&
2951  m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2952  {
2953  m_bChkJumpMark = true;
2955  }
2956 
2957  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2958  }
2959  pAttrPam->DeleteMark();
2960 
2961  delete pAttr;
2962  pAttr = pPrev;
2963  }
2964  }
2965  }
2966 
2967  for( auto n = m_aMoveFlyFrames.size(); n; )
2968  {
2969  SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[--n]->GetFrameFormat();
2970  if (!pFrameFormat)
2971  {
2972  SAL_WARN("sw.html", "SwFrameFormat deleted during import");
2973  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
2974  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
2975  continue;
2976  }
2977 
2978  const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
2979  OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
2980  "Only At-Para flys need special handling" );
2981  const SwPosition *pFlyPos = rAnchor.GetContentAnchor();
2982  sal_uLong nFlyParaIdx = pFlyPos->nNode.GetIndex();
2983  bool bMoveFly;
2984  if( bChkEnd )
2985  {
2986  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2987  ( nFlyParaIdx == rEndIdx.GetIndex() &&
2988  m_aMoveFlyCnts[n] < nEndCnt );
2989  }
2990  else
2991  {
2992  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2993  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2994  rEndIdx.GetIndex() > nEndOfIcons ||
2995  nFlyParaIdx <= nEndOfIcons;
2996  }
2997  if( bMoveFly )
2998  {
2999  pFrameFormat->DelFrames();
3000  *pAttrPam->GetPoint() = *pFlyPos;
3001  pAttrPam->GetPoint()->nContent.Assign( pAttrPam->GetContentNode(),
3002  m_aMoveFlyCnts[n] );
3003  SwFormatAnchor aAnchor( rAnchor );
3004  aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
3005  aAnchor.SetAnchor( pAttrPam->GetPoint() );
3006  pFrameFormat->SetFormatAttr( aAnchor );
3007 
3008  const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
3009  if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
3010  {
3011  SwFormatHoriOrient aHoriOri( rHoriOri );
3012  aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
3013  pFrameFormat->SetFormatAttr( aHoriOri );
3014  }
3015  const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
3016  if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
3017  {
3018  SwFormatVertOrient aVertOri( rVertOri );
3019  aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
3020  pFrameFormat->SetFormatAttr( aVertOri );
3021  }
3022 
3023  pFrameFormat->MakeFrames();
3024  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
3025  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3026  }
3027  }
3028  for (auto & field : aFields)
3029  {
3030  pCNd = field->m_nStartPara.GetNode().GetContentNode();
3031  pAttrPam->GetPoint()->nNode = field->m_nStartPara;
3032  pAttrPam->GetPoint()->nContent.Assign( pCNd, field->m_nStartContent );
3033 
3034  if( bBeforeTable &&
3035  pAttrPam->GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() )
3036  {
3037  OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3038  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
3039  "Content-Position before table not 0???" );
3040  // !!!
3041  pAttrPam->Move( fnMoveBackward );
3042  }
3043 
3044  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *field->m_pItem );
3045 
3046  field.reset();
3047  }
3048  aFields.clear();
3049 }
3050 
3051 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3052 {
3053  // Font height and font colour as well as escape attributes may not be
3054  // combined. Therefore they're saved in a list and in it the last opened
3055  // attribute is at the beginning and count is always one. For all other
3056  // attributes count is just incremented.
3057  if( *ppAttr )
3058  {
3059  HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3060  pAttr->InsertNext( *ppAttr );
3061  (*ppAttr) = pAttr;
3062  }
3063  else
3064  (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3065 }
3066 
3067 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3068 {
3069  bool bRet = true;
3070 
3071  // The list header is saved in the attribute.
3072  HTMLAttr **ppHead = pAttr->m_ppHead;
3073 
3074  OSL_ENSURE( ppHead, "No list header attribute found!" );
3075 
3076  // save the current position as end position
3077  const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode;
3078  sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3079 
3080  // Is the last started or an earlier started attribute being ended?
3081  HTMLAttr *pLast = nullptr;
3082  if( ppHead && pAttr != *ppHead )
3083  {
3084  // The last started attribute isn't being ended
3085 
3086  // Then we look for attribute which was started immediately afterwards,
3087  // which has also not yet been ended (otherwise it would no longer be
3088  // in the list).
3089  pLast = *ppHead;
3090  while( pLast && pLast->GetNext() != pAttr )
3091  pLast = pLast->GetNext();
3092 
3093  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3094  }
3095 
3096  bool bMoveBack = false;
3097  sal_uInt16 nWhich = pAttr->m_pItem->Which();
3098  if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3099  *pEndIdx != pAttr->GetSttPara() )
3100  {
3101  // Then move back one position in the content!
3102  bMoveBack = m_pPam->Move( fnMoveBackward );
3103  nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3104  }
3105 
3106  // now end the attribute
3107  HTMLAttr *pNext = pAttr->GetNext();
3108 
3109  bool bInsert;
3110  sal_uInt16 nScriptItem = 0;
3111  bool bScript = false;
3112  // does it have a non-empty range?
3113  if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3114  RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3115  *pEndIdx != pAttr->GetSttPara() ||
3116  nEndCnt != pAttr->GetSttCnt() )
3117  {
3118  bInsert = true;
3119  // We do some optimization for script dependent attributes here.
3120  if( *pEndIdx == pAttr->GetSttPara() )
3121  {
3122  lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3123  }
3124  }
3125  else
3126  {
3127  bInsert = false;
3128  }
3129 
3130  const SwTextNode *pTextNd = (bInsert && bScript) ?
3131  pAttr->GetSttPara().GetNode().GetTextNode() :
3132  nullptr;
3133 
3134  if (pTextNd)
3135  {
3136  const OUString& rText = pTextNd->GetText();
3137  sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3138  rText, pAttr->GetSttCnt() );
3139  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3140  ->endOfScript( rText, pAttr->GetSttCnt(), nScriptText );
3141  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3142  {
3143  if( nScriptItem == nScriptText )
3144  {
3145  HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd );
3146  pSetAttr->ClearPrev();
3147  if( pNext )
3148  pNext->InsertPrev( pSetAttr );
3149  else
3150  {
3151  if (pSetAttr->m_bInsAtStart)
3152  m_aSetAttrTab.push_front( pSetAttr );
3153  else
3154  m_aSetAttrTab.push_back( pSetAttr );
3155  }
3156  }
3157  pAttr->m_nStartContent = nScriptEnd;
3158  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3159  rText, nScriptEnd );
3160  nScriptEnd = g_pBreakIt->GetBreakIter()
3161  ->endOfScript( rText, nScriptEnd, nScriptText );
3162  }
3163  bInsert = nScriptItem == nScriptText;
3164  }
3165  if( bInsert )
3166  {
3167  pAttr->m_nEndPara = *pEndIdx;
3168  pAttr->m_nEndContent = nEndCnt;
3169  pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3170  RES_TXTATR_CHARFMT != nWhich;
3171 
3172  if( !pNext )
3173  {
3174  // No open attributes of that type exists any longer, so all
3175  // can be set. Except they depend on another attribute, then
3176  // they're appended there.
3177  if (pAttr->m_bInsAtStart)
3178  m_aSetAttrTab.push_front( pAttr );
3179  else
3180  m_aSetAttrTab.push_back( pAttr );
3181  }
3182  else
3183  {
3184  // There are other open attributes of that type,
3185  // therefore the setting must be postponed.
3186  // Hence the current attribute is added at the end
3187  // of the Prev-List of the successor.
3188  pNext->InsertPrev( pAttr );
3189  }
3190  }
3191  else
3192  {
3193  // Then don't insert, but delete. Because of the "faking" of styles
3194  // by hard attributing there can be also other empty attributes in the
3195  // Prev-List, which must be set anyway.
3196  HTMLAttr *pPrev = pAttr->GetPrev();
3197  bRet = false;
3198  delete pAttr;
3199 
3200  if( pPrev )
3201  {
3202  // The previous attributes must be set anyway.
3203  if( pNext )
3204  pNext->InsertPrev( pPrev );
3205  else
3206  {
3207  if (pPrev->m_bInsAtStart)
3208  m_aSetAttrTab.push_front( pPrev );
3209  else
3210  m_aSetAttrTab.push_back( pPrev );
3211  }
3212  }
3213 
3214  }
3215 
3216  // If the first attribute of the list was set, then the list header
3217  // must be corrected as well.
3218  if( pLast )
3219  pLast->m_pNext = pNext;
3220  else if( ppHead )
3221  *ppHead = pNext;
3222 
3223  if( bMoveBack )
3225 
3226  return bRet;
3227 }
3228 
3230 {
3231  // preliminary paragraph attributes are not allowed here, they could
3232  // be set here and then the pointers become invalid!
3233  OSL_ENSURE(m_aParaAttrs.empty(),
3234  "Danger: there are non-final paragraph attributes");
3235  m_aParaAttrs.clear();
3236 
3237  // The list header is saved in the attribute
3238  HTMLAttr **ppHead = pAttr->m_ppHead;
3239 
3240  OSL_ENSURE( ppHead, "no list header attribute found!" );
3241 
3242  // Is the last started or an earlier started attribute being removed?
3243  HTMLAttr *pLast = nullptr;
3244  if( ppHead && pAttr != *ppHead )
3245  {
3246  // The last started attribute isn't being ended
3247 
3248  // Then we look for attribute which was started immediately afterwards,
3249  // which has also not yet been ended (otherwise it would no longer be
3250  // in the list).
3251  pLast = *ppHead;
3252  while( pLast && pLast->GetNext() != pAttr )
3253  pLast = pLast->GetNext();
3254 
3255  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3256  }
3257 
3258  // now delete the attribute
3259  HTMLAttr *pNext = pAttr->GetNext();
3260  HTMLAttr *pPrev = pAttr->GetPrev();
3261  //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3262  std::shared_ptr<HTMLAttrTable> xKeepAlive(pAttr->m_xAttrTab);
3263  delete pAttr;
3264 
3265  if( pPrev )
3266  {
3267  // The previous attributes must be set anyway.
3268  if( pNext )
3269  pNext->InsertPrev( pPrev );
3270  else
3271  {
3272  if (pPrev->m_bInsAtStart)
3273  m_aSetAttrTab.push_front( pPrev );
3274  else
3275  m_aSetAttrTab.push_back( pPrev );
3276  }
3277  }
3278 
3279  // If the first attribute of the list was deleted, then the list header
3280  // must be corrected as well.
3281  if( pLast )
3282  pLast->m_pNext = pNext;
3283  else if( ppHead )
3284  *ppHead = pNext;
3285 }
3286 
3287 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3288 {
3289  // preliminary paragraph attributes are not allowed here, they could
3290  // be set here and then the pointers become invalid!
3291  OSL_ENSURE(m_aParaAttrs.empty(),
3292  "Danger: there are non-final paragraph attributes");
3293  m_aParaAttrs.clear();
3294 
3295  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3296  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3297 
3298  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3299  {
3300  *pSaveAttributes = *pHTMLAttributes;
3301 
3302  HTMLAttr *pAttr = *pSaveAttributes;
3303  while (pAttr)
3304  {
3305  pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3306  pAttr = pAttr->GetNext();
3307  }
3308 
3309  *pHTMLAttributes = nullptr;
3310  }
3311 }
3312 
3313 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3314  bool bMoveEndBack )
3315 {
3316  // preliminary paragraph attributes are not allowed here, they could
3317  // be set here and then the pointers become invalid!
3318  OSL_ENSURE(m_aParaAttrs.empty(),
3319  "Danger: there are non-final paragraph attributes");
3320  m_aParaAttrs.clear();
3321 
3322  const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode;
3323  SwNodeIndex nEndIdx( nSttIdx );
3324 
3325  // close all still open attributes and re-open them after the table
3326  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3327  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3328  bool bSetAttr = true;
3329  const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex();
3330  sal_Int32 nEndCnt = nSttCnt;
3331 
3332  if( bMoveEndBack )
3333  {
3334  sal_uLong nOldEnd = nEndIdx.GetIndex();
3335  sal_uLong nTmpIdx;
3336  if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3337  ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3338  {
3339  nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3340  }
3341  SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3342 
3343  // Don't set attributes, when the PaM was moved outside of the content area.
3344  bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3345 
3346  nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3347  }
3348  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3349  {
3350  HTMLAttr *pAttr = *pHTMLAttributes;
3351  *pSaveAttributes = nullptr;
3352  while( pAttr )
3353  {
3354  HTMLAttr *pNext = pAttr->GetNext();
3355  HTMLAttr *pPrev = pAttr->GetPrev();
3356 
3357  if( bSetAttr &&
3358  ( pAttr->GetSttParaIdx() < nEndIdx.GetIndex() ||
3359  (pAttr->GetSttPara() == nEndIdx &&
3360  pAttr->GetSttCnt() != nEndCnt) ) )
3361  {
3362  // The attribute must be set before the list. We need the
3363  // original and therefore we clone it, because pointer to the
3364  // attribute exist in the other contexts. The Next-List is lost
3365  // in doing so, but the Previous-List is preserved.
3366  HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt );
3367 
3368  if( pNext )
3369  pNext->InsertPrev( pSetAttr );
3370  else
3371  {
3372  if (pSetAttr->m_bInsAtStart)
3373  m_aSetAttrTab.push_front( pSetAttr );
3374  else
3375  m_aSetAttrTab.push_back( pSetAttr );
3376  }
3377  }
3378  else if( pPrev )
3379  {
3380  // If the attribute doesn't need to be set before the table, then
3381  // the previous attributes must still be set.
3382  if( pNext )
3383  pNext->InsertPrev( pPrev );
3384  else
3385  {
3386  if (pPrev->m_bInsAtStart)
3387  m_aSetAttrTab.push_front( pPrev );
3388  else
3389  m_aSetAttrTab.push_back( pPrev );
3390  }
3391  }
3392 
3393  // set the start of the attribute anew and break link
3394  pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab);
3395 
3396  if (*pSaveAttributes)
3397  {
3398  HTMLAttr *pSAttr = *pSaveAttributes;
3399  while( pSAttr->GetNext() )
3400  pSAttr = pSAttr->GetNext();
3401  pSAttr->InsertNext( pAttr );
3402  }
3403  else
3404  *pSaveAttributes = pAttr;
3405 
3406  pAttr = pNext;
3407  }
3408 
3409  *pHTMLAttributes = nullptr;
3410  }
3411 }
3412 
3413 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3414 {
3415  // preliminary paragraph attributes are not allowed here, they could
3416  // be set here and then the pointers become invalid!
3417  OSL_ENSURE(m_aParaAttrs.empty(),
3418  "Danger: there are non-final paragraph attributes");
3419  m_aParaAttrs.clear();
3420 
3421  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3422  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3423 
3424  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3425  {
3426  OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3427 
3428  *pHTMLAttributes = *pSaveAttributes;
3429 
3430  HTMLAttr *pAttr = *pHTMLAttributes;
3431  while (pAttr)
3432  {
3433  OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3434  "Previous attribute has still a header" );
3435  pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3436  pAttr = pAttr->GetNext();
3437  }
3438 
3439  *pSaveAttributes = nullptr;
3440  }
3441 }
3442 
3443 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3444 {
3445  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3446  if (bInsAtStart)
3447  m_aSetAttrTab.push_front( pTmp );
3448  else
3449  m_aSetAttrTab.push_back( pTmp );
3450 }
3451 
3452 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3453 {
3454  while( !rAttrs.empty() )
3455  {
3456  std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3457  InsertAttr( pAttr->GetItem(), false );
3458  rAttrs.pop_front();
3459  }
3460 }
3461 
3463 {
3464  OUString aId, aStyle, aLang, aDir;
3465  OUString aClass;
3466 
3467  const HTMLOptions& rHTMLOptions = GetOptions();
3468  for (size_t i = rHTMLOptions.size(); i; )
3469  {
3470  const HTMLOption& rOption = rHTMLOptions[--i];
3471  switch( rOption.GetToken() )
3472  {
3473  case HtmlOptionId::ID:
3474  aId = rOption.GetString();
3475  break;
3476  case HtmlOptionId::STYLE:
3477  aStyle = rOption.GetString();
3478  break;
3479  case HtmlOptionId::CLASS:
3480  aClass = rOption.GetString();
3481  break;
3482  case HtmlOptionId::LANG:
3483  aLang = rOption.GetString();
3484  break;
3485  case HtmlOptionId::DIR:
3486  aDir = rOption.GetString();
3487  break;
3488  default: break;
3489  }
3490  }
3491 
3492  // create a new context
3493  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3494 
3495  // parse styles
3496  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3497  {
3498  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3499  SvxCSS1PropertyInfo aPropInfo;
3500 
3501  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3502  {
3503  if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3504  !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3505  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3506  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3507  }
3508  }
3509 
3510  // save the context
3511  PushContext(xCntxt);
3512 }
3513 
3515  HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3516  HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3517  HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3518 {
3519  OUString aId, aStyle, aClass, aLang, aDir;
3520 
3521  const HTMLOptions& rHTMLOptions = GetOptions();
3522  for (size_t i = rHTMLOptions.size(); i; )
3523  {
3524  const HTMLOption& rOption = rHTMLOptions[--i];
3525  switch( rOption.GetToken() )
3526  {
3527  case HtmlOptionId::ID:
3528  aId = rOption.GetString();
3529  break;
3530  case HtmlOptionId::STYLE:
3531  aStyle = rOption.GetString();
3532  break;
3533  case HtmlOptionId::CLASS:
3534  aClass = rOption.GetString();
3535  break;
3536  case HtmlOptionId::LANG:
3537  aLang = rOption.GetString();
3538  break;
3539  case HtmlOptionId::DIR:
3540  aDir = rOption.GetString();
3541  break;
3542  default: break;
3543  }
3544  }
3545 
3546  // create a new context
3547  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3548 
3549  // parse styles
3550  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3551  {
3552  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3553  SvxCSS1PropertyInfo aPropInfo;
3554 
3555  aItemSet.Put( rItem );
3556  if( pItem2 )
3557  aItemSet.Put( *pItem2 );
3558  if( pItem3 )
3559  aItemSet.Put( *pItem3 );
3560 
3561  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3562  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3563 
3564  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3565  }
3566  else
3567  {
3568  InsertAttr( ppAttr ,rItem, xCntxt.get() );
3569  if( pItem2 )
3570  {
3571  OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3572  InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3573  }
3574  if( pItem3 )
3575  {
3576  OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3577  InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3578  }
3579  }
3580 
3581  // save the context
3582  PushContext(xCntxt);
3583 }
3584 
3586 {
3587  // fetch context
3588  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3589  if (xCntxt)
3590  {
3591  // and maybe end the attributes
3592  EndContext(xCntxt.get());
3593  }
3594 }
3595 
3597 {
3598  OUString aId, aStyle, aClass, aLang, aDir;
3599  sal_uInt16 nSize = 3;
3600 
3601  const HTMLOptions& rHTMLOptions = GetOptions();
3602  for (size_t i = rHTMLOptions.size(); i; )
3603  {
3604  const HTMLOption& rOption = rHTMLOptions[--i];
3605  switch( rOption.GetToken() )
3606  {
3607  case HtmlOptionId::SIZE:
3608  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
3609  break;
3610  case HtmlOptionId::ID:
3611  aId = rOption.GetString();
3612  break;
3613  case HtmlOptionId::STYLE:
3614  aStyle = rOption.GetString();
3615  break;
3616  case HtmlOptionId::CLASS:
3617  aClass = rOption.GetString();
3618  break;
3619  case HtmlOptionId::LANG:
3620  aLang = rOption.GetString();
3621  break;
3622  case HtmlOptionId::DIR:
3623  aDir = rOption.GetString();
3624  break;
3625  default: break;
3626  }
3627  }
3628 
3629  if( nSize < 1 )
3630  nSize = 1;
3631 
3632  if( nSize > 7 )
3633  nSize = 7;
3634 
3635  // create a new context
3636  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3637 
3638  // parse styles
3639  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3640  {
3641  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3642  SvxCSS1PropertyInfo aPropInfo;
3643 
3644  //CJK has different defaults
3645  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3646  aItemSet.Put( aFontHeight );
3647  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3648  aItemSet.Put( aFontHeightCJK );
3649  //Complex type can contain so many types of letters,
3650  //that it's not really worthy to bother, IMO.
3651  //Still, I have set a default.
3652  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3653  aItemSet.Put( aFontHeightCTL );
3654 
3655  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3656  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3657 
3658  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3659  }
3660  else
3661  {
3662  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3663  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3664  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3665  InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3666  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3667  InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3668  }
3669 
3670  // save the context
3671  PushContext(xCntxt);
3672 
3673  // save the font size
3674  m_aBaseFontStack.push_back( nSize );
3675 }
3676 
3678 {
3679  EndTag( HtmlTokenId::BASEFONT_ON );
3680 
3681  // avoid stack underflow in tables
3682  if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3683  m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3684 }
3685 
3687 {
3688  sal_uInt16 nBaseSize =
3691  : 3 );
3692  sal_uInt16 nFontSize =
3693  ( m_aFontStack.size() > m_nFontStMin
3694  ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3695  : nBaseSize );
3696 
3697  OUString aFace, aId, aStyle, aClass, aLang, aDir;
3698  Color aColor;
3699  sal_uLong nFontHeight = 0; // actual font height to set
3700  sal_uInt16 nSize = 0; // font height in Netscape notation (1-7)
3701  bool bColor = false;
3702 
3703  const HTMLOptions& rHTMLOptions = GetOptions();
3704  for (size_t i = rHTMLOptions.size(); i; )
3705  {
3706  const HTMLOption& rOption = rHTMLOptions[--i];
3707  switch( rOption.GetToken() )
3708  {
3709  case HtmlOptionId::SIZE:
3710  if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3711  {
3712  sal_Int32 nSSize;
3713  if( '+' == rOption.GetString()[0] ||
3714  '-' == rOption.GetString()[0] )
3715  nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3716  else
3717  nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3718 
3719  if( nSSize < 1 )
3720  nSSize = 1;
3721  else if( nSSize > 7 )
3722  nSSize = 7;
3723 
3724  nSize = static_cast<sal_uInt16>(nSSize);
3725  nFontHeight = m_aFontHeights[nSize-1];
3726  }
3727  break;
3728  case HtmlOptionId::COLOR:
3729  if( HtmlTokenId::FONT_ON==nToken )
3730  {
3731  rOption.GetColor( aColor );
3732  bColor = true;
3733  }
3734  break;
3735  case HtmlOptionId::FACE:
3736  if( HtmlTokenId::FONT_ON==nToken )
3737  aFace = rOption.GetString();
3738  break;
3739  case HtmlOptionId::ID:
3740  aId = rOption.GetString();
3741  break;
3742  case HtmlOptionId::STYLE:
3743  aStyle = rOption.GetString();
3744  break;
3745  case HtmlOptionId::CLASS:
3746  aClass = rOption.GetString();
3747  break;
3748  case HtmlOptionId::LANG:
3749  aLang = rOption.GetString();
3750  break;
3751  case HtmlOptionId::DIR:
3752  aDir = rOption.GetString();
3753  break;
3754  default: break;
3755  }
3756  }
3757 
3758  if( HtmlTokenId::FONT_ON != nToken )
3759  {
3760  // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3761 
3762  // In headings the current heading sets the font height
3763  // and not BASEFONT.
3764  const SwFormatColl *pColl = GetCurrFormatColl();
3765  sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3766  if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3767  nPoolId<=RES_POOLCOLL_HEADLINE6 )
3768  {
3769  // If the font height in the heading wasn't changed yet,
3770  // then take the one from the style.
3771  if( m_nFontStHeadStart==m_aFontStack.size() )
3772  nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3773  }
3774  else
3775  nPoolId = 0;
3776 
3777  if( HtmlTokenId::BIGPRINT_ON == nToken )
3778  nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3779  else
3780  nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3781 
3782  // If possible in headlines we fetch the new font height
3783  // from the style.
3784  if( nPoolId && nSize>=1 && nSize <=6 )
3785  nFontHeight =
3786  m_pCSS1Parser->GetTextCollFromPool(
3787  RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3788  else
3789  nFontHeight = m_aFontHeights[nSize-1];
3790  }
3791 
3792  OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3793 
3794  OUString aFontName;
3795  const OUString aStyleName;
3796  FontFamily eFamily = FAMILY_DONTKNOW; // family and pitch,
3797  FontPitch ePitch = PITCH_DONTKNOW; // if not found
3798  rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3799 
3800  if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3801  {
3802  const FontList *pFList = nullptr;
3803  SwDocShell *pDocSh = m_xDoc->GetDocShell();
3804  if( pDocSh )
3805  {
3806  const SvxFontListItem *pFListItem =
3807  static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3808  if( pFListItem )
3809  pFList = pFListItem->GetFontList();
3810  }
3811 
3812  bool bFound = false;
3813  sal_Int32 nStrPos = 0;
3814  while( nStrPos!= -1 )
3815  {
3816  OUString aFName = aFace.getToken( 0, ',', nStrPos );
3817  aFName = comphelper::string::strip(aFName, ' ');
3818  if( !aFName.isEmpty() )
3819  {
3820  if( !bFound && pFList )
3821  {
3822  sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3823  if( nullptr != hFont )
3824  {
3825  const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3826  if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3827  {
3828  bFound = true;
3829  if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3830  eEnc = RTL_TEXTENCODING_SYMBOL;
3831  }
3832  }
3833  }
3834  if( !aFontName.isEmpty() )
3835  aFontName += ";";
3836  aFontName += aFName;
3837  }
3838  }
3839  }
3840 
3841  // create a new context
3842  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3843 
3844  // parse styles
3845  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3846  {
3847  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3848  SvxCSS1PropertyInfo aPropInfo;
3849 
3850  if( nFontHeight )
3851  {
3852  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3853  aItemSet.Put( aFontHeight );
3854  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3855  aItemSet.Put( aFontHeightCJK );
3856  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3857  aItemSet.Put( aFontHeightCTL );
3858  }
3859  if( bColor )
3860  aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3861  if( !aFontName.isEmpty() )
3862  {
3863  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3864  aItemSet.Put( aFont );
3865  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3866  aItemSet.Put( aFontCJK );
3867  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3868  aItemSet.Put( aFontCTL );
3869  }
3870 
3871  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3872  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3873 
3874  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3875  }
3876  else
3877  {
3878  if( nFontHeight )
3879  {
3880  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3881  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3882  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3883  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3884  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3885  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3886  }
3887  if( bColor )
3888  InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3889  if( !aFontName.isEmpty() )
3890  {
3891  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3892  InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3893  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3894  InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3895  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3896  InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3897  }
3898  }
3899 
3900  // save the context
3901  PushContext(xCntxt);
3902 
3903  m_aFontStack.push_back( nSize );
3904 }
3905 
3907 {
3908  EndTag( nToken );
3909 
3910  // avoid stack underflow in tables
3911  if( m_aFontStack.size() > m_nFontStMin )
3912  m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3913 }
3914 
3916 {
3917  if( m_pPam->GetPoint()->nContent.GetIndex() )
3919  else
3920  AddParSpace();
3921 
3922  m_eParaAdjust = SvxAdjust::End;
3923  OUString aId, aStyle, aClass, aLang, aDir;
3924 
3925  const HTMLOptions& rHTMLOptions = GetOptions();
3926  for (size_t i = rHTMLOptions.size(); i; )
3927  {
3928  const HTMLOption& rOption = rHTMLOptions[--i];
3929  switch( rOption.GetToken() )
3930  {
3931  case HtmlOptionId::ID:
3932  aId = rOption.GetString();
3933  break;
3934  case HtmlOptionId::ALIGN:
3935  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3936  break;
3937  case HtmlOptionId::STYLE:
3938  aStyle = rOption.GetString();
3939  break;
3940  case HtmlOptionId::CLASS:
3941  aClass = rOption.GetString();
3942  break;
3943  case HtmlOptionId::LANG:
3944  aLang = rOption.GetString();
3945  break;
3946  case HtmlOptionId::DIR:
3947  aDir = rOption.GetString();
3948  break;
3949  default: break;
3950  }
3951  }
3952 
3953  // create a new context
3954  std::unique_ptr<HTMLAttrContext> xCntxt(
3955  !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3956  RES_POOLCOLL_TEXT, aClass )
3957  : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3958 
3959  // parse styles (Don't consider class. This is only possible as long as none of
3960  // the CSS1 properties of the class must be formatted hard!!!)
3961  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
3962  {
3963  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3964  SvxCSS1PropertyInfo aPropInfo;
3965 
3966  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
3967  {
3968  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
3969  "Class is not considered" );
3970  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3971  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
3972  }
3973  }
3974 
3975  if( SvxAdjust::End != m_eParaAdjust )
3976  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
3977 
3978  // and push on stack
3979  PushContext( xCntxt );
3980 
3981  // set the current style or its attributes
3982  SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
3983 
3984  // progress bar
3985  ShowStatline();
3986 
3987  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now an open paragraph element will be lost." );
3988  m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
3989 }
3990 
3991 void SwHTMLParser::EndPara( bool bReal )
3992 {
3993  if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
3994  {
3995 #if OSL_DEBUG_LEVEL > 0
3996  const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule();
3997  OSL_ENSURE( pNumRule, "Where is the NumRule" );
3998 #endif
3999  }
4000 
4001  // Netscape skips empty paragraphs, we do the same.
4002  if( bReal )
4003  {
4004  if( m_pPam->GetPoint()->nContent.GetIndex() )
4006  else
4007  AddParSpace();
4008  }
4009 
4010  // If a DD or DT was open, it's an implied definition list,
4011  // which must be closed now.
4012  if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
4014  {
4015  m_nDefListDeep--;
4016  }
4017 
4018  // Pop the context of the stack. It can also be from an
4019  // implied opened definition list.
4020  std::unique_ptr<HTMLAttrContext> xCntxt(
4021  PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
4022 
4023  // close attribute
4024  if (xCntxt)
4025  {
4026  EndContext(xCntxt.get());
4027  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4028  xCntxt.reset();
4029  }
4030 
4031  // reset the existing style
4032  if( bReal )
4033  SetTextCollAttrs();
4034 
4035  m_nOpenParaToken = HtmlTokenId::NONE;
4036 }
4037 
4039 {
4040  m_eParaAdjust = SvxAdjust::End;
4041 
4042  OUString aId, aStyle, aClass, aLang, aDir;
4043 
4044  const HTMLOptions& rHTMLOptions = GetOptions();
4045  for (size_t i = rHTMLOptions.size(); i; )
4046  {
4047  const HTMLOption& rOption = rHTMLOptions[--i];
4048  switch( rOption.GetToken() )
4049  {
4050  case HtmlOptionId::ID:
4051  aId = rOption.GetString();
4052  break;
4053  case HtmlOptionId::ALIGN:
4054  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4055  break;
4056  case HtmlOptionId::STYLE:
4057  aStyle = rOption.GetString();
4058  break;
4059  case HtmlOptionId::CLASS:
4060  aClass = rOption.GetString();
4061  break;
4062  case HtmlOptionId::LANG:
4063  aLang = rOption.GetString();
4064  break;
4065  case HtmlOptionId::DIR:
4066  aDir = rOption.GetString();
4067  break;
4068  default: break;
4069  }
4070  }
4071 
4072  // open a new paragraph
4073  if( m_pPam->GetPoint()->nContent.GetIndex() )
4075  else
4076  AddParSpace();
4077 
4078  // search for the matching style
4079  sal_uInt16 nTextColl;
4080  switch( nToken )
4081  {
4082  case HtmlTokenId::HEAD1_ON: nTextColl = RES_POOLCOLL_HEADLINE1; break;
4083  case HtmlTokenId::HEAD2_ON: nTextColl = RES_POOLCOLL_HEADLINE2; break;
4084  case HtmlTokenId::HEAD3_ON: nTextColl = RES_POOLCOLL_HEADLINE3; break;
4085  case HtmlTokenId::HEAD4_ON: nTextColl = RES_POOLCOLL_HEADLINE4; break;
4086  case HtmlTokenId::HEAD5_ON: nTextColl = RES_POOLCOLL_HEADLINE5; break;
4087  case HtmlTokenId::HEAD6_ON: nTextColl = RES_POOLCOLL_HEADLINE6; break;
4088  default: nTextColl = RES_POOLCOLL_STANDARD; break;
4089  }
4090 
4091  // create the context
4092  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4093 
4094  // parse styles (regarding class see also NewPara)
4095  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4096  {
4097  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4098  SvxCSS1PropertyInfo aPropInfo;
4099 
4100  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4101  {
4102  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4103  "Class is not considered" );
4104  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4105  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4106  }
4107  }
4108 
4109  if( SvxAdjust::End != m_eParaAdjust )
4110  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4111 
4112  // and push on stack
4113  PushContext(xCntxt);
4114 
4115  // set the current style or its attributes
4116  SetTextCollAttrs(m_aContexts.back().get());
4117 
4119 
4120  // progress bar
4121  ShowStatline();
4122 }
4123 
4125 {
4126  // open a new paragraph
4127  if( m_pPam->GetPoint()->nContent.GetIndex() )
4129  else
4130  AddParSpace();
4131 
4132  // search context matching the token and fetch it from stack
4133  std::unique_ptr<HTMLAttrContext> xCntxt;
4134  auto nPos = m_aContexts.size();
4135  while( !xCntxt && nPos>m_nContextStMin )
4136  {
4137  switch( m_aContexts[--nPos]->GetToken() )
4138  {
4139  case HtmlTokenId::HEAD1_ON:
4140  case HtmlTokenId::HEAD2_ON:
4141  case HtmlTokenId::HEAD3_ON:
4142  case HtmlTokenId::HEAD4_ON:
4143  case HtmlTokenId::HEAD5_ON:
4144  case HtmlTokenId::HEAD6_ON:
4145  xCntxt = std::move(m_aContexts[nPos]);
4146  m_aContexts.erase( m_aContexts.begin() + nPos );
4147  break;
4148  default: break;
4149  }
4150  }
4151 
4152  // and now end attributes
4153  if (xCntxt)
4154  {
4155  EndContext(xCntxt.get());
4156  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4157  xCntxt.reset();
4158  }
4159 
4160  // reset existing style
4161  SetTextCollAttrs();
4162 
4164 }
4165 
4166 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4167 {
4168  OUString aId, aStyle, aClass, aLang, aDir;
4169 
4170  const HTMLOptions& rHTMLOptions = GetOptions();
4171  for (size_t i = rHTMLOptions.size(); i; )
4172  {
4173  const HTMLOption& rOption = rHTMLOptions[--i];
4174  switch( rOption.GetToken() )
4175  {
4176  case HtmlOptionId::ID:
4177  aId = rOption.GetString();
4178  break;
4179  case HtmlOptionId::STYLE:
4180  aStyle = rOption.GetString();
4181  break;
4182  case HtmlOptionId::CLASS:
4183  aClass = rOption.GetString();
4184  break;
4185  case HtmlOptionId::LANG:
4186  aLang = rOption.GetString();
4187  break;
4188  case HtmlOptionId::DIR:
4189  aDir = rOption.GetString();
4190  break;
4191  default: break;
4192  }
4193  }
4194 
4195  // open a new paragraph
4197  switch( nToken )
4198  {
4199  case HtmlTokenId::LISTING_ON:
4200  case HtmlTokenId::XMP_ON:
4201  // These both tags will be mapped to the PRE style. For the case that a
4202  // a CLASS exists we will delete it so that we don't get the CLASS of
4203  // the PRE style.
4204  aClass.clear();
4205  [[fallthrough]];
4206  case HtmlTokenId::BLOCKQUOTE_ON:
4207  case HtmlTokenId::BLOCKQUOTE30_ON:
4208  case HtmlTokenId::PREFORMTXT_ON:
4209  eMode = AM_SPACE;
4210  break;
4211  case HtmlTokenId::ADDRESS_ON:
4212  eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4213  break;
4214  case HtmlTokenId::DT_ON:
4215  case HtmlTokenId::DD_ON:
4216  eMode = AM_SOFTNOSPACE;
4217  break;
4218  default:
4219  OSL_ENSURE( false, "unknown style" );
4220  break;
4221  }
4222  if( m_pPam->GetPoint()->nContent.GetIndex() )
4223  AppendTextNode( eMode );
4224  else if( AM_SPACE==eMode )
4225  AddParSpace();
4226 
4227  // ... and save in a context
4228  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4229 
4230  // parse styles (regarding class see also NewPara)
4231  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4232  {
4233  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4234  SvxCSS1PropertyInfo aPropInfo;
4235 
4236  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4237  {
4238  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4239  "Class is not considered" );
4240  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4241  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4242  }
4243  }
4244 
4245  PushContext(xCntxt);
4246 
4247  // set the new style
4248  SetTextCollAttrs(m_aContexts.back().get());
4249 
4250  // update progress bar
4251  ShowStatline();
4252 }
4253 
4255 {
4257  switch( getOnToken(nToken) )
4258  {
4259  case HtmlTokenId::BLOCKQUOTE_ON:
4260  case HtmlTokenId::BLOCKQUOTE30_ON:
4261  case HtmlTokenId::PREFORMTXT_ON:
4262  case HtmlTokenId::LISTING_ON:
4263  case HtmlTokenId::XMP_ON:
4264  eMode = AM_SPACE;
4265  break;
4266  case HtmlTokenId::ADDRESS_ON:
4267  case HtmlTokenId::DT_ON:
4268  case HtmlTokenId::DD_ON:
4269  eMode = AM_SOFTNOSPACE;
4270  break;
4271  default:
4272  OSL_ENSURE( false, "unknown style" );
4273  break;
4274  }
4275  if( m_pPam->GetPoint()->nContent.GetIndex() )
4276  AppendTextNode( eMode );
4277  else if( AM_SPACE==eMode )
4278  AddParSpace();
4279 
4280  // pop current context of stack
4281  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4282 
4283  // and now end attributes
4284  if (xCntxt)
4285  {
4286  EndContext(xCntxt.get());
4287  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4288  xCntxt.reset();
4289  }
4290 
4291  // reset existing style
4292  SetTextCollAttrs();
4293 }
4294 
4296 {
4297  OUString aId, aStyle, aClass, aLang, aDir;
4298 
4299  const HTMLOptions& rHTMLOptions = GetOptions();
4300  for (size_t i = rHTMLOptions.size(); i; )
4301  {
4302  const HTMLOption& rOption = rHTMLOptions[--i];
4303  switch( rOption.GetToken() )
4304  {
4305  case HtmlOptionId::ID:
4306  aId = rOption.GetString();
4307  break;
4308  case HtmlOptionId::STYLE:
4309  aStyle = rOption.GetString();
4310  break;
4311  case HtmlOptionId::CLASS:
4312  aClass = rOption.GetString();
4313  break;
4314  case HtmlOptionId::LANG:
4315  aLang = rOption.GetString();
4316  break;
4317  case HtmlOptionId::DIR:
4318  aDir = rOption.GetString();
4319  break;
4320  default: break;
4321  }
4322  }
4323 
4324  // open a new paragraph
4325  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4326  if( m_pPam->GetPoint()->nContent.GetIndex() )
4327  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4328  else if( bSpace )
4329  AddParSpace();
4330 
4331  // one level more
4332  m_nDefListDeep++;
4333 
4334  bool bInDD = false, bNotInDD = false;
4335  auto nPos = m_aContexts.size();
4336  while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4337  {
4338  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4339  switch( nCntxtToken )
4340  {
4341  case HtmlTokenId::DEFLIST_ON:
4342  case HtmlTokenId::DIRLIST_ON:
4343  case HtmlTokenId::MENULIST_ON:
4344  case HtmlTokenId::ORDERLIST_ON:
4345  case HtmlTokenId::UNORDERLIST_ON:
4346  bNotInDD = true;
4347  break;
4348  case HtmlTokenId::DD_ON:
4349  bInDD = true;
4350  break;
4351  default: break;
4352  }
4353  }
4354 
4355  // ... and save in a context
4356  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4357 
4358  // in it save also the margins
4359  sal_uInt16 nLeft=0, nRight=0;
4360  short nIndent=0;
4361  GetMarginsFromContext( nLeft, nRight, nIndent );
4362 
4363  // The indentation, which already results from a DL, correlates with a DT
4364  // on the current level and this correlates to a DD from the previous level.
4365  // For a level >=2 we must add DD distance.
4366  if( !bInDD && m_nDefListDeep > 1 )
4367  {
4368 
4369  // and the one of the DT-style of the current level
4370  SvxLRSpaceItem rLRSpace =
4371  m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4372  ->GetLRSpace();
4373  nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft());
4374  }
4375 
4376  xCntxt->SetMargins( nLeft, nRight, nIndent );
4377 
4378  // parse styles
4379  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4380  {
4381  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4382  SvxCSS1PropertyInfo aPropInfo;
4383 
4384  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4385  {
4386  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4387  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4388  }
4389  }
4390 
4391  PushContext(xCntxt);
4392 
4393  // set the attributes of the new style
4394  if( m_nDefListDeep > 1 )
4395  SetTextCollAttrs(m_aContexts.back().get());
4396 }
4397 
4399 {
4400  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4401  if( m_pPam->GetPoint()->nContent.GetIndex() )
4402  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4403  else if( bSpace )
4404  AddParSpace();
4405 
4406  // one level less
4407  if( m_nDefListDeep > 0 )
4408  m_nDefListDeep--;
4409 
4410  // pop current context of stack
4411  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4412 
4413  // and now end attributes
4414  if (xCntxt)
4415  {
4416  EndContext(xCntxt.get());
4417  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4418  xCntxt.reset();
4419  }
4420 
4421  // and set style
4422  SetTextCollAttrs();
4423 }
4424 
4426 {
4427  // determine if the DD/DT exist in a DL
4428  bool bInDefList = false, bNotInDefList = false;
4429  auto nPos = m_aContexts.size();
4430  while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4431  {
4432  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4433  switch( nCntxtToken )
4434  {
4435  case HtmlTokenId::DEFLIST_ON:
4436  bInDefList = true;
4437  break;
4438  case HtmlTokenId::DIRLIST_ON:
4439  case HtmlTokenId::MENULIST_ON:
4440  case HtmlTokenId::ORDERLIST_ON:
4441  case HtmlTokenId::UNORDERLIST_ON:
4442  bNotInDefList = true;
4443  break;
4444  default: break;
4445  }
4446  }
4447 
4448  // if not, then implicitly open a new DL
4449  if( !bInDefList )
4450  {
4451  m_nDefListDeep++;
4452  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4453  "Now an open paragraph element will be lost." );
4454  m_nOpenParaToken = nToken;
4455  }
4456 
4457  NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4458  : RES_POOLCOLL_HTML_DT) );
4459 }
4460 
4462 {
4463  // open a new paragraph
4464  if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() )
4466 
4467  // search context matching the token and fetch it from stack
4468  nToken = getOnToken(nToken);
4469  std::unique_ptr<HTMLAttrContext> xCntxt;
4470  auto nPos = m_aContexts.size();
4471  while( !xCntxt && nPos>m_nContextStMin )
4472  {
4473  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4474  switch( nCntxtToken )
4475  {
4476  case HtmlTokenId::DD_ON:
4477  case HtmlTokenId::DT_ON:
4478  if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken )
4479  {
4480  xCntxt = std::move(m_aContexts[nPos]);
4481  m_aContexts.erase( m_aContexts.begin() + nPos );
4482  }
4483  break;
4484  case HtmlTokenId::DEFLIST_ON:
4485  // don't look at DD/DT outside the current DefList
4486  case HtmlTokenId::DIRLIST_ON:
4487  case HtmlTokenId::MENULIST_ON:
4488  case HtmlTokenId::ORDERLIST_ON:
4489  case HtmlTokenId::UNORDERLIST_ON:
4490  // and also not outside another list
4492  break;
4493  default: break;
4494  }
4495  }
4496 
4497  // and now end attributes
4498  if (xCntxt)
4499  {
4500  EndContext(xCntxt.get());
4501  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4502  }
4503 }
4504 
4514 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4515  bool bSurroundOnly ) const
4516 {
4517  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
4518 
4519  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
4520 
4521  bool bFound = false;
4522  for ( size_t i=0; i<rFrameFormatTable.size(); i++ )
4523  {
4524  const SwFrameFormat *const pFormat = rFrameFormatTable[i];
4525  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4526  // A frame was found, when
4527  // - it is paragraph-bound, and
4528  // - is anchored in current paragraph, and
4529  // - every paragraph-bound frame counts, or
4530  // - (only frames without wrapping count and) the frame doesn't have
4531  // a wrapping
4532  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
4533  if (pAPos &&
4534  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4535  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4536  pAPos->nNode == rNodeIdx )
4537  {
4538  if( !(bNoSurroundOnly || bSurroundOnly) )
4539  {
4540  bFound = true;
4541  break;
4542  }
4543  else
4544  {
4545  // When looking for frames with wrapping, also disregard
4546  // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4547  // and you don't want to evade those when positioning.
4548  css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4549  if( bNoSurroundOnly )
4550  {
4551  if( css::text::WrapTextMode_NONE==eSurround )
4552  {
4553  bFound = true;
4554  break;
4555  }
4556  }
4557  if( bSurroundOnly )
4558  {
4559  if( css::text::WrapTextMode_NONE==eSurround )
4560  {
4561  bFound = false;
4562  break;
4563  }
4564  else if( css::text::WrapTextMode_THROUGH!=eSurround )
4565  {
4566  bFound = true;
4567  // Continue searching: It's possible that some without
4568  // wrapping will follow...
4569  }
4570  }
4571  }
4572  }
4573  }
4574 
4575  return bFound;
4576 }
4577 
4578 // the special methods for inserting of objects
4579 
4581 {
4582  const SwContentNode* pCNd = m_pPam->GetContentNode();
4583  return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4584 }
4585 
4587 {
4588  SwTextFormatColl *pCollToSet = nullptr; // the style to set
4589  SfxItemSet *pItemSet = nullptr; // set of hard attributes
4590  sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4591  const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4592  sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4593 
4594  bool bInPRE=false; // some context info
4595 
4596  sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and
4597  short nFirstLineIndent = 0; // indentations
4598 
4599  for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i )
4600  {
4601  const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4602 
4603  sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4604  if( nColl )
4605  {
4606  // There is a style to set. Then at first we must decide,
4607  // if the style can be set.
4608  bool bSetThis = true;
4609  switch( nColl )
4610  {
4611  case RES_POOLCOLL_HTML_PRE:
4612  bInPRE = true;
4613  break;
4614  case RES_POOLCOLL_TEXT:
4615  // <TD><P CLASS=xxx> must become TD.xxx
4616  if( nDfltColl==RES_POOLCOLL_TABLE ||
4617  nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4618  nColl = nDfltColl;
4619  break;
4620  case RES_POOLCOLL_HTML_HR:
4621  // also <HR> in <PRE> set as style, otherwise it can't
4622  // be exported anymore
4623  break;
4624  default:
4625  if( bInPRE )
4626  bSetThis = false;
4627  break;
4628  }
4629 
4630  SwTextFormatColl *pNewColl =
4631  m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4632 
4633  if( bSetThis )
4634  {
4635  // If now a different style should be set as previously, the
4636  // previous style must be replaced by hard attribution.
4637 
4638  if( pCollToSet )
4639  {
4640  // insert the attributes hard, which previous style sets
4641  if( !pItemSet )
4642  pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4643  else
4644  {
4645  const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4646  SfxItemSet aItemSet( *rCollSet.GetPool(),
4647  rCollSet.GetRanges() );
4648  aItemSet.Set( rCollSet );
4649  pItemSet->Put( aItemSet );
4650  }
4651  // but remove the attributes, which the current style sets,
4652  // because otherwise they will be overwritten later
4653  pItemSet->Differentiate( pNewColl->GetAttrSet() );
4654  }
4655 
4656  pCollToSet = pNewColl;
4657  }
4658  else
4659  {
4660  // hard attribution
4661  if( !pItemSet )
4662  pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4663  else
4664  {
4665  const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4666  SfxItemSet aItemSet( *rCollSet.GetPool(),
4667  rCollSet.GetRanges() );
4668  aItemSet.Set( rCollSet );
4669  pItemSet->Put( aItemSet );
4670  }
4671  }
4672  }
4673  else
4674  {
4675  // Maybe a default style exists?
4676  nColl = pCntxt->GetDfltTextFormatColl();
4677  if( nColl )
4678  nDfltColl = nColl;
4679  }
4680 
4681  // if applicable fetch new paragraph indents
4682  if( pCntxt->IsLRSpaceChanged() )
4683  {
4684  sal_uInt16 nLeft=0, nRight=0;
4685 
4686  pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4687  nLeftMargin = nLeft;
4688  nRightMargin = nRight;
4689  }
4690  }
4691 
4692  // If in current context a new style should be set,
4693  // its paragraph margins must be inserted in the context.
4694  if( pContext && nTopColl )
4695  {
4696  // <TD><P CLASS=xxx> must become TD.xxx
4697  if( nTopColl==RES_POOLCOLL_TEXT &&
4698  (nDfltColl==RES_POOLCOLL_TABLE ||
4699  nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4700  nTopColl = nDfltColl;
4701 
4702  const SwTextFormatColl *pTopColl =
4703  m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4704  const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4705  const SfxPoolItem *pItem;
4706  if( SfxItemState::SET == rItemSet.GetItemState(RES_LR_SPACE,true, &pItem) )
4707  {
4708  const SvxLRSpaceItem *pLRItem =
4709  static_cast<const SvxLRSpaceItem *>(pItem);
4710 
4711  sal_Int32 nLeft = pLRItem->GetTextLeft();
4712  sal_Int32 nRight = pLRItem->GetRight();
4713  nFirstLineIndent = pLRItem->GetTextFirstLineOffset();
4714 
4715  // In Definition lists the margins also contain the margins from the previous levels
4716  if( RES_POOLCOLL_HTML_DD == nTopColl )
4717  {
4718  const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser
4719  ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())
4720  ->GetLRSpace();
4721  nLeft -= rDTLRSpace.GetTextLeft();
4722  nRight -= rDTLRSpace.GetRight();
4723  }
4724  else if( RES_POOLCOLL_HTML_DT == nTopColl )
4725  {
4726  nLeft = 0;
4727  nRight = 0;
4728  }
4729 
4730  // the paragraph margins add up
4731  nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4732  nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4733 
4734  pContext->SetMargins( nLeftMargin, nRightMargin,
4735  nFirstLineIndent );
4736  }
4737  if( SfxItemState::SET == rItemSet.GetItemState(RES_UL_SPACE,true, &pItem) )
4738  {
4739  const SvxULSpaceItem *pULItem =
4740  static_cast<const SvxULSpaceItem *>(pItem);
4741  pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4742  }
4743  }
4744 
4745  // If no style is set in the context use the text body.
4746  if( !pCollToSet )
4747  {
4748  pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4749  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4750  if( !nLeftMargin )
4751  nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft());
4752  if( !nRightMargin )
4753  nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight());
4754  if( !nFirstLineIndent )
4755  nFirstLineIndent = rLRItem.GetTextFirstLineOffset();
4756  }
4757 
4758  // remove previous hard attribution of paragraph
4759  for( auto pParaAttr : m_aParaAttrs )
4760  pParaAttr->Invalidate();
4761  m_aParaAttrs.clear();
4762 
4763  // set the style
4764  m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4765 
4766  // if applicable correct the paragraph indent
4767  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4768  bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() ||
4769  nFirstLineIndent != rLRItem.GetTextFirstLineOffset() ||
4770  nRightMargin != rLRItem.GetRight();
4771 
4772  if( bSetLRSpace )
4773  {
4774  SvxLRSpaceItem aLRItem( rLRItem );
4775  aLRItem.SetTextLeft( nLeftMargin );
4776  aLRItem.SetRight( nRightMargin );
4777  aLRItem.SetTextFirstLineOffset( nFirstLineIndent );
4778  if( pItemSet )
4779  pItemSet->Put( aLRItem );
4780  else
4781  {
4782  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4783  m_xAttrTab->pLRSpace->SetLikePara();
4784  m_aParaAttrs.push_back( m_xAttrTab->pLRSpace );
4785  EndAttr( m_xAttrTab->pLRSpace, false );
4786  }
4787  }
4788 
4789  // and now set the attributes
4790  if( pItemSet )
4791  {
4792  InsertParaAttrs( *pItemSet );
4793  delete pItemSet;
4794  }
4795 }
4796 
4798 {
4799  OUString aId, aStyle, aLang, aDir;
4800  OUString aClass;
4801 
4802  const HTMLOptions& rHTMLOptions = GetOptions();
4803  for (size_t i = rHTMLOptions.size(); i; )
4804  {
4805  const HTMLOption& rOption = rHTMLOptions[--i];
4806  switch( rOption.GetToken() )
4807  {
4808  case HtmlOptionId::ID:
4809  aId = rOption.GetString();
4810  break;
4811  case HtmlOptionId::STYLE:
4812  aStyle = rOption.GetString();
4813  break;
4814  case HtmlOptionId::CLASS:
4815  aClass = rOption.GetString();
4816  break;
4817  case HtmlOptionId::LANG:
4818  aLang = rOption.GetString();
4819  break;
4820  case HtmlOptionId::DIR:
4821  aDir = rOption.GetString();
4822  break;
4823  default: break;
4824  }
4825  }
4826 
4827  // create a new context
4828  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4829 
4830  // set the style and save it in the context
4831  SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4832  OSL_ENSURE( pCFormat, "No character format found for token" );
4833 
4834  // parse styles (regarding class see also NewPara)
4835  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4836  {
4837  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4838  SvxCSS1PropertyInfo aPropInfo;
4839 
4840  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4841  {
4842  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4843  "Class is not considered" );
4844  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4845  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4846  }
4847  }
4848 
4849  // Character formats are stored in their own stack and can never be inserted
4850  // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4851  if( pCFormat )
4852  InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4853 
4854  // save the context
4855  PushContext(xCntxt);
4856 }
4857 
4859 {
4860  // and if applicable change it via the options
4861  sal_Int16 eVertOri = text::VertOrientation::TOP;
4862  sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4863  Size aSize( 0, 0);
4864  tools::Long nSize = 0;
4865  bool bPercentWidth = false;
4866  bool bPercentHeight = false;
4867  sal_uInt16 nType = HTML_SPTYPE_HORI;
4868 
4869  const HTMLOptions& rHTMLOptions = GetOptions();
4870  for (size_t i = rHTMLOptions.size(); i; )
4871  {
4872  const HTMLOption& rOption = rHTMLOptions[--i];
4873  switch( rOption.GetToken() )
4874  {
4875  case HtmlOptionId::TYPE:
4876  rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4877  break;
4878  case HtmlOptionId::ALIGN:
4879  eVertOri =
4880  rOption.GetEnum( aHTMLImgVAlignTable,
4881  eVertOri );
4882  eHoriOri =
4883  rOption.GetEnum( aHTMLImgHAlignTable,
4884  eHoriOri );
4885  break;
4886  case HtmlOptionId::WIDTH:
4887  // First only save as pixel value!
4888  bPercentWidth = (rOption.GetString().indexOf('%') != -1);
4889  aSize.setWidth( static_cast<tools::Long>(rOption.GetNumber()) );
4890  break;
4891  case HtmlOptionId::HEIGHT:
4892  // First only save as pixel value!
4893  bPercentHeight = (rOption.GetString().indexOf('%') != -1);
4894  aSize.setHeight( static_cast<tools::Long>(rOption.GetNumber()) );
4895  break;
4896  case HtmlOptionId::SIZE:
4897  // First only save as pixel value!
4898  nSize = rOption.GetNumber();
4899  break;
4900  default: break;
4901  }
4902  }
4903 
4904  switch( nType )
4905  {
4906  case HTML_SPTYPE_BLOCK:
4907  {
4908  // create an empty text frame
4909 
4910  // fetch the ItemSet
4911  SfxItemSet aFrameSet( m_xDoc->GetAttrPool(),
4913  if( !IsNewDoc() )
4914  Reader::ResetFrameFormatAttrs( aFrameSet );
4915 
4916  // set the anchor and the adjustment
4917  SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4918 
4919  // and the size of the frame
4920  Size aDfltSz( MINFLY, MINFLY );
4921  Size aSpace( 0, 0 );
4922  SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4923  m_pCSS1Parser->GetWhichMap() );
4924  SvxCSS1PropertyInfo aDummyPropInfo;
4925 
4926  SetFixSize( aSize, aDfltSz, bPercentWidth, bPercentHeight,
4927  aDummyPropInfo, aFrameSet );
4928  SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4929 
4930  // protect the content
4931  SvxProtectItem aProtectItem( RES_PROTECT) ;
4932  aProtectItem.SetContentProtect( true );
4933  aFrameSet.Put( aProtectItem );
4934 
4935  // create the frame
4936  RndStdIds eAnchorId =
4937  aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4938  SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4939  m_pPam->GetPoint(), &aFrameSet );
4940  // Possibly create frames and register auto-bound frames.
4941  RegisterFlyFrame( pFlyFormat );
4942  }
4943  break;
4944  case HTML_SPTYPE_VERT:
4945  if( nSize > 0 )
4946  {
4948  {
4950  ->PixelToLogic( Size(0,nSize),
4951  MapMode(MapUnit::MapTwip) ).Height();
4952  }
4953 
4954  // set a paragraph margin
4955  SwTextNode *pTextNode = nullptr;
4956  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4957  {
4958  // if possible change the bottom paragraph margin
4959  // of previous node
4960 
4961  SetAttr(); // set still open paragraph attributes
4962 
4963  pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1]
4964  ->GetTextNode();
4965 
4966  // If the previous paragraph isn't a text node, then now an
4967  // empty paragraph is created, which already generates a single
4968  // line of spacing.
4969  if( !pTextNode )
4970  nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
4971  }
4972 
4973  if( pTextNode )
4974  {
4975  SvxULSpaceItem aULSpace( static_cast<const SvxULSpaceItem&>(pTextNode
4977  aULSpace.SetLower( aULSpace.GetLower() + static_cast<sal_uInt16>(nSize) );
4978  pTextNode->SetAttr( aULSpace );
4979  }
4980  else
4981  {
4982  NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, static_cast<sal_uInt16>(nSize), RES_UL_SPACE));
4983  EndAttr( m_xAttrTab->pULSpace, false );
4984 
4985  AppendTextNode(); // Don't change spacing!
4986  }
4987  }
4988  break;
4989  case HTML_SPTYPE_HORI:
4990  if( nSize > 0 )
4991  {
4992  // If the paragraph is still empty, set first line
4993  // indentation, otherwise apply letter spacing over a space.
4994 
4996  {
4998  ->PixelToLogic( Size(nSize,0),
4999  MapMode(MapUnit::MapTwip) ).Width();
5000  }
5001 
5002  if( !m_pPam->GetPoint()->nContent.GetIndex() )
5003  {
5004  sal_uInt16 nLeft=0, nRight=0;
5005  short nIndent = 0;
5006 
5007  GetMarginsFromContextWithNumberBullet( nLeft, nRight, nIndent );
5008  nIndent = nIndent + static_cast<short>(nSize);
5009 
5010  SvxLRSpaceItem aLRItem( RES_LR_SPACE );
5011  aLRItem.SetTextLeft( nLeft );
5012  aLRItem.SetRight( nRight );
5013  aLRItem.SetTextFirstLineOffset( nIndent );
5014 
5015  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
5016  EndAttr( m_xAttrTab->pLRSpace, false );
5017  }
5018  else
5019  {
5020  NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
5021  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, " " );
5022  EndAttr( m_xAttrTab->pKerning );
5023  }
5024  }
5025  }
5026 }
5027 
5028 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
5029 {
5030  if( nPixel && Application::GetDefaultDevice() )
5031  {
5033  Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width();
5034  return static_cast<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16)));
5035  }
5036  else
5037  return nPixel;
5038 }
5039 
5041 {
5043  if( nWidth )
5044  return nWidth;
5045 
5046  if( !m_aHTMLPageSize.Width() )
5047  {
5048  const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5049 
5050  const SwFormatFrameSize& rSz = rPgFormat.GetFrameSize();
5051  const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5052  const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5053  const SwFormatCol& rCol = rPgFormat.GetCol();
5054 
5055  m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5056  m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5057 
5058  if( 1 < rCol.GetNumCols() )
5060  }
5061 
5062  return m_aHTMLPageSize.Width();
5063 }
5064 
5066 {
5067  OUString aId;
5068  const HTMLOptions& rHTMLOptions = GetOptions();
5069  for (size_t i = rHTMLOptions.size(); i; )
5070  {
5071  const HTMLOption& rOption = rHTMLOptions[--i];
5072  if( HtmlOptionId::ID==rOption.GetToken() )
5073  {
5074  aId = rOption.GetString();
5075  break;
5076  }
5077  }
5078 
5079  if( !aId.isEmpty() )
5080  InsertBookmark( aId );
5081 }
5082 
5084 {
5085  // <BR CLEAR=xxx> is handled as:
5086  // 1.) Only regard the paragraph-bound frames anchored in current paragraph.
5087  // 2.) For left-justified aligned frames, CLEAR=LEFT or ALL, and for right-
5088  // justified aligned frames, CLEAR=RIGHT or ALL, the wrap-through is
5089  // changed as following:
5090  // 3.) If the paragraph contains no text, then the frames don't get a wrapping
5091  // 4.) otherwise a left aligned frame gets a right "only anchor" wrapping
5092  // and a right aligned frame gets a left "only anchor" wrapping.
5093  // 5.) if in a non-empty paragraph the wrapping of a frame is changed,
5094  // then a new paragraph is opened
5095  // 6.) If no wrappings of frames are changed, a hard line break is inserted.
5096 
5097  OUString aId, aStyle, aClass; // the id of bookmark
5098  bool bClearLeft = false, bClearRight = false;
5099  bool bCleared = false; // Was a CLEAR executed?
5100 
5101  // then we fetch the options
5102  const HTMLOptions& rHTMLOptions = GetOptions();
5103  for (size_t i = rHTMLOptions.size(); i; )
5104  {
5105  const HTMLOption& rOption = rHTMLOptions[--i];
5106  switch( rOption.GetToken() )
5107  {
5108  case HtmlOptionId::CLEAR:
5109  {
5110  const OUString &rClear = rOption.GetString();
5111  if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5112  {
5113  bClearLeft = true;
5114  bClearRight = true;
5115  }
5116  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5117  bClearLeft = true;
5118  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5119  bClearRight = true;
5120  }
5121  break;
5122  case HtmlOptionId::ID:
5123  aId = rOption.GetString();
5124  break;
5125  case HtmlOptionId::STYLE:
5126  aStyle = rOption.GetString();
5127  break;
5128  case HtmlOptionId::CLASS:
5129  aClass = rOption.GetString();
5130  break;
5131  default: break;
5132  }
5133  }
5134 
5135  // CLEAR is only supported for the current paragraph
5136  if( bClearLeft || bClearRight )
5137  {
5138  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
5139  SwTextNode* pTextNd = rNodeIdx.GetNode().GetTextNode();
5140  if( pTextNd )
5141  {
5142  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
5143 
5144  for( size_t i=0; i<rFrameFormatTable.size(); i++ )
5145  {
5146  SwFrameFormat *const pFormat = rFrameFormatTable[i];
5147  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
5148  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
5149  if (pAPos &&
5150  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
5151  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
5152  pAPos->nNode == rNodeIdx &&
5153  pFormat->GetSurround().GetSurround() != css::text::WrapTextMode_NONE )
5154  {
5155  sal_Int16 eHori = RES_DRAWFRMFMT == pFormat->Which()
5156  ? text::HoriOrientation::LEFT
5157  : pFormat->GetHoriOrient().GetHoriOrient();
5158 
5159  css::text::WrapTextMode eSurround = css::text::WrapTextMode_PARALLEL;
5160  if( m_pPam->GetPoint()->nContent.GetIndex() )
5161  {
5162  if( bClearLeft && text::HoriOrientation::LEFT==eHori )
5163  eSurround = css::text::WrapTextMode_RIGHT;
5164  else if( bClearRight && text::HoriOrientation::RIGHT==eHori )
5165  eSurround = css::text::WrapTextMode_LEFT;
5166  }
5167  else if( (bClearLeft && text::HoriOrientation::LEFT==eHori) ||
5168  (bClearRight && text::HoriOrientation::RIGHT==eHori) )
5169  {
5170  eSurround = css::text::WrapTextMode_NONE;
5171  }
5172 
5173  if( css::text::WrapTextMode_PARALLEL != eSurround )
5174  {
5175  SwFormatSurround aSurround( eSurround );
5176  if( css::text::WrapTextMode_NONE != eSurround )
5177  aSurround.SetAnchorOnly( true );
5178  pFormat->SetFormatAttr( aSurround );
5179  bCleared = true;
5180  }
5181  }
5182  }
5183  }
5184  }
5185 
5186  // parse styles
5187  std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5188  bool bBreakItem = false;
5189  if( HasStyleOptions( aStyle, aId, aClass ) )
5190  {
5191  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5192  SvxCSS1PropertyInfo aPropInfo;
5193 
5194  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5195  {
5196  if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5197  {
5198  aBreakItem.reset(aItemSet.Get(RES_BREAK).Clone());
5199  bBreakItem = true;
5200  }
5201  if( !aPropInfo.m_aId.isEmpty() )
5202  InsertBookmark( aPropInfo.m_aId );
5203  }
5204  }
5205 
5206  if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5207  {
5208  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5209  EndAttr( m_xAttrTab->pBreak, false );
5210  }
5211 
5212  if( !bCleared && !bBreakItem )
5213  {
5214  // If no CLEAR could or should be executed, a line break will be inserted
5215  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, "\x0A" );
5216  }
5217  else if( m_pPam->GetPoint()->nContent.GetIndex() )
5218  {
5219  // If a CLEAR is executed in a non-empty paragraph, then after it
5220  // a new paragraph has to be opened.
5221  // MIB 21.02.97: Here actually we should change the bottom paragraph
5222  // margin to zero. This will fail for something like this <BR ..><P>
5223  // (>Netscape). That's why we don't do it.
5225  }
5226  if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5227  {
5228  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5229  EndAttr( m_xAttrTab->pBreak, false );
5230  }
5231 }
5232 
5234 {
5235  sal_uInt16 nSize = 0;
5236  sal_uInt16 nWidth = 0;
5237 
5238  SvxAdjust eAdjust = SvxAdjust::End;
5239 
5240  bool bPercentWidth = false;
5241  bool bNoShade = false;
5242  bool bColor = false;
5243 
5244  Color aColor;
5245  OUString aId;
5246 
5247  // let's fetch the options
5248  const HTMLOptions& rHTMLOptions = GetOptions();
5249  for (size_t i = rHTMLOptions.size(); i; )
5250  {
5251  const HTMLOption& rOption = rHTMLOptions[--i];
5252  switch( rOption.GetToken() )
5253  {
5254  case HtmlOptionId::ID:
5255  aId = rOption.GetString();
5256  break;
5257  case HtmlOptionId::SIZE:
5258  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
5259  break;
5260  case HtmlOptionId::WIDTH:
5261  bPercentWidth = (rOption.GetString().indexOf('%') != -1);
5262  nWidth = static_cast<sal_uInt16>(rOption.GetNumber());
5263  if( bPercentWidth && nWidth>=100 )
5264  {
5265  // the default case are 100% lines (no attributes necessary)
5266  nWidth = 0;
5267  bPercentWidth = false;
5268  }
5269  break;
5270  case HtmlOptionId::ALIGN:
5271  eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5272  break;
5273  case HtmlOptionId::NOSHADE:
5274  bNoShade = true;
5275  break;
5276  case HtmlOptionId::COLOR:
5277  rOption.GetColor( aColor );
5278  bColor = true;
5279  break;
5280  default: break;
5281  }
5282  }
5283 
5284  if( m_pPam->GetPoint()->nContent.GetIndex() )
5286  if( m_nOpenParaToken != HtmlTokenId::NONE )
5287  EndPara();
5288  AppendTextNode();
5290 
5291  // ...and save in a context
5292  std::unique_ptr<HTMLAttrContext> xCntxt(
5293  new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5294 
5295  PushContext(xCntxt);
5296 
5297  // set the new style
5298  SetTextCollAttrs(m_aContexts.back().get());
5299 
5300  // the hard attributes of the current paragraph will never become invalid
5301  m_aParaAttrs.clear();
5302 
5303  if( nSize>0 || bColor || bNoShade )
5304  {
5305  // set line colour and/or width
5306  if( !bColor )
5307  aColor = COL_GRAY;
5308 
5309  SvxBorderLine aBorderLine( &aColor );
5310  if( nSize )
5311  {
5312  tools::Long nPWidth = 0;
5313  tools::Long nPHeight = static_cast<tools::Long>(nSize);
5314  SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5315  if ( !bNoShade )
5316  {
5317  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5318  }
5319  aBorderLine.SetWidth( nPHeight );
5320  }
5321  else if( bNoShade )
5322  {
5323  aBorderLine.SetWidth( DEF_LINE_WIDTH_2 );
5324  }
5325  else
5326  {
5327  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5328  aBorderLine.SetWidth( DEF_LINE_WIDTH_0 );
5329  }
5330 
5331  SvxBoxItem aBoxItem(RES_BOX);
5332  aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5333  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5334  m_aSetAttrTab.push_back( pTmp );
5335  }
5336  if( nWidth )
5337  {
5338  // If we aren't in a table, then the width value will be "faked" with
5339  // paragraph indents. That makes little sense in a table. In order to
5340  // avoid that the line is considered during the width calculation, it
5341  // still gets an appropriate LRSpace-Item.
5342  if (!m_xTable)
5343  {
5344  // fake length and alignment of line above paragraph indents
5345  tools::Long nBrowseWidth = GetCurrentBrowseWidth();
5346  nWidth = bPercentWidth ? static_cast<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5347  : ToTwips( static_cast<sal_uInt16>(nBrowseWidth) );
5348  if( nWidth < MINLAY )
5349  nWidth = MINLAY;
5350 
5351  const SwFormatColl *pColl = (static_cast<tools::Long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5352  if (pColl)
5353  {
5354  SvxLRSpaceItem aLRItem( pColl->GetLRSpace() );
5355  tools::Long nDist = nBrowseWidth - nWidth;
5356 
5357  switch( eAdjust )
5358  {
5359  case SvxAdjust::Right:
5360  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5361  break;
5362  case SvxAdjust::Left:
5363  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5364  break;
5365  case SvxAdjust::Center:
5366  default:
5367  nDist /= 2;
5368  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5369  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5370  break;
5371  }
5372 
5373  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aLRItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5374  m_aSetAttrTab.push_back( pTmp );
5375  }
5376  }
5377  }