LibreOffice Module sw (master)  1
swhtml.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
25 
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
38 
39 #include <vcl/errinf.hxx>
40 #include <svl/stritem.hxx>
41 #include <vcl/imap.hxx>
42 #include <svtools/htmltokn.h>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/ctrltool.hxx>
45 #include <unotools/configmgr.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <sfx2/event.hxx>
49 #include <sfx2/docfile.hxx>
50 
51 #include <svtools/htmlcfg.hxx>
52 #include <sfx2/linkmgr.hxx>
53 #include <editeng/kernitem.hxx>
54 #include <editeng/boxitem.hxx>
55 #include <editeng/fhgtitem.hxx>
57 #include <editeng/postitem.hxx>
58 #include <editeng/wghtitem.hxx>
60 #include <editeng/udlnitem.hxx>
62 #include <editeng/blinkitem.hxx>
63 #include <editeng/ulspitem.hxx>
64 #include <editeng/colritem.hxx>
65 #include <editeng/fontitem.hxx>
66 #include <editeng/adjustitem.hxx>
67 #include <editeng/lrspitem.hxx>
68 #include <editeng/protitem.hxx>
69 #include <editeng/flstitem.hxx>
71 
72 #include <frmatr.hxx>
73 #include <charatr.hxx>
74 #include <fmtfld.hxx>
75 #include <fmtpdsc.hxx>
76 #include <fmtanchr.hxx>
77 #include <fmtsrnd.hxx>
78 #include <fmtfsize.hxx>
79 #include <fmtclds.hxx>
80 #include <fchrfmt.hxx>
81 #include <fmtinfmt.hxx>
82 #include <fmtfollowtextflow.hxx>
83 #include <fmtornt.hxx>
84 #include <docary.hxx>
85 #include <doc.hxx>
86 #include <IDocumentUndoRedo.hxx>
93 #include <IDocumentStatistics.hxx>
94 #include <IDocumentState.hxx>
95 #include <pam.hxx>
96 #include <ndtxt.hxx>
97 #include <mdiexp.hxx>
98 #include <poolfmt.hxx>
99 #include <pagedesc.hxx>
100 #include <IMark.hxx>
101 #include <docsh.hxx>
102 #include <editsh.hxx>
103 #include <docufld.hxx>
104 #include "swcss1.hxx"
105 #include <fltini.hxx>
106 #include <htmltbl.hxx>
107 #include "htmlnum.hxx"
108 #include "swhtml.hxx"
109 #include "wrthtml.hxx"
110 #include <linkenum.hxx>
111 #include <breakit.hxx>
112 #include <SwAppletImpl.hxx>
113 #include <swdll.hxx>
114 #include <txatbase.hxx>
115 
116 #include <sfx2/viewfrm.hxx>
117 #include <svx/svdobj.hxx>
118 #include <officecfg/Office/Writer.hxx>
120 #include <comphelper/sequence.hxx>
121 
122 #include <swerror.h>
123 #include <ndole.hxx>
124 #include <unoframe.hxx>
125 #include "css1atr.hxx"
126 
127 #define FONTSIZE_MASK 7
128 
129 #define HTML_ESC_PROP 80
130 #define HTML_ESC_SUPER DFLT_ESC_SUPER
131 #define HTML_ESC_SUB DFLT_ESC_SUB
132 
133 #define HTML_SPTYPE_BLOCK 1
134 #define HTML_SPTYPE_HORI 2
135 #define HTML_SPTYPE_VERT 3
136 
138 using namespace ::com::sun::star;
139 
140 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
142 {
143  { OOO_STRING_SVTOOLS_HTML_AL_left, SvxAdjust::Left },
144  { OOO_STRING_SVTOOLS_HTML_AL_center, SvxAdjust::Center },
145  { OOO_STRING_SVTOOLS_HTML_AL_middle, SvxAdjust::Center }, // Netscape
146  { OOO_STRING_SVTOOLS_HTML_AL_right, SvxAdjust::Right },
147  { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block },
148  { OOO_STRING_SVTOOLS_HTML_AL_char, SvxAdjust::Left },
149  { nullptr, SvxAdjust(0) }
150 };
151 
152 // <SPACER TYPE=...>
154 {
158  { nullptr, 0 }
159 };
160 
162 {
163  m_bTemplateBrowseMode = true;
164 }
165 
166 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
167 {
169  // HTML import into Writer, avoid loading the Writer/Web template.
170  return OUString();
171 
172  const OUString sTemplateWithoutExt("internal/html");
173  SvtPathOptions aPathOpt;
174 
175  // first search for OpenDocument Writer/Web template
176  // OpenDocument Writer/Web template (extension .oth)
177  OUString sTemplate( sTemplateWithoutExt + ".oth" );
178  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
179  return sTemplate;
180 
181  // no OpenDocument Writer/Web template found.
182  // search for OpenOffice.org Writer/Web template
183  sTemplate = sTemplateWithoutExt + ".stw";
184  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
185  return sTemplate;
186 
187  OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
188 
189  return OUString();
190 }
191 
193 {
194  OSL_ENSURE( m_pMedium, "Where is the medium??" );
195 
196  if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
197  {
199  return true;
200  }
201  return false;
202 
203 }
204 
205 // Call for the general Reader-Interface
206 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
207 {
209 
210  if( !m_pStream )
211  {
212  OSL_ENSURE( m_pStream, "HTML-Read without stream" );
213  return ERR_SWG_READ_ERROR;
214  }
215 
216  if( !m_bInsertMode )
217  {
219 
220  // Set the HTML page style, when it isn't a HTML document,
221  // otherwise it's already set.
223  {
226  }
227  }
228 
229  // so nobody steals the document!
230  rtl::Reference<SwDoc> aHoldRef(&rDoc);
231  ErrCode nRet = ERRCODE_NONE;
232  tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
233  rName, rBaseURL, !m_bInsertMode, m_pMedium,
234  IsReadUTF8(),
236 
237  SvParserState eState = xParser->CallParser();
238 
239  if( SvParserState::Pending == eState )
241  else if( SvParserState::Accepted != eState )
242  {
243  const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
244  + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
245 
246  // use the stream as transport for error number
247  nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
248  DialogMask::ButtonsOk | DialogMask::MessageError );
249  }
250 
251  return nRet;
252 }
253 
255  const OUString& rPath,
256  const OUString& rBaseURL,
257  bool bReadNewDoc,
258  SfxMedium* pMed, bool bReadUTF8,
259  bool bNoHTMLComments,
260  const OUString& rNamespace )
261  : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
262  m_aPathToFile( rPath ),
263  m_sBaseURL( rBaseURL ),
264  m_xAttrTab(new HTMLAttrTable),
265  m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
266  m_xDoc( pD ),
267  m_pActionViewShell( nullptr ),
268  m_pSttNdIdx( nullptr ),
269  m_pFormImpl( nullptr ),
270  m_pMarquee( nullptr ),
271  m_pImageMap( nullptr ),
272  m_nBaseFontStMin( 0 ),
273  m_nFontStMin( 0 ),
274  m_nDefListDeep( 0 ),
275  m_nFontStHeadStart( 0 ),
276  m_nSBModuleCnt( 0 ),
277  m_nMissingImgMaps( 0 ),
278  m_nParaCnt( 5 ),
279  // #i83625#
280  m_nContextStMin( 0 ),
281  m_nContextStAttrMin( 0 ),
282  m_nSelectEntryCnt( 0 ),
283  m_nOpenParaToken( HtmlTokenId::NONE ),
284  m_eJumpTo( JumpToMarks::NONE ),
285 #ifdef DBG_UTIL
286  m_nContinue( 0 ),
287 #endif
288  m_eParaAdjust( SvxAdjust::End ),
289  m_bDocInitalized( false ),
290  m_bSetModEnabled( false ),
291  m_bInFloatingFrame( false ),
292  m_bInField( false ),
293  m_bCallNextToken( false ),
294  m_bIgnoreRawData( false ),
295  m_bLBEntrySelected ( false ),
296  m_bTAIgnoreNewPara ( false ),
297  m_bFixMarqueeWidth ( false ),
298  m_bNoParSpace( false ),
299  m_bInNoEmbed( false ),
300  m_bInTitle( false ),
301  m_bUpdateDocStat( false ),
302  m_bFixSelectWidth( false ),
303  m_bTextArea( false ),
304  m_bSelect( false ),
305  m_bInFootEndNoteAnchor( false ),
306  m_bInFootEndNoteSymbol( false ),
307  m_bIgnoreHTMLComments( bNoHTMLComments ),
308  m_bRemoveHidden( false ),
309  m_bBodySeen( false ),
310  m_bReadingHeaderOrFooter( false ),
311  m_bNotifyMacroEventRead( false ),
312  m_isInTableStructure(false),
313  m_nTableDepth( 0 ),
314  m_pTempViewFrame(nullptr)
315 {
316  // If requested explicitly, then force ignoring of comments (don't create postits for them).
318  m_bIgnoreHTMLComments = true;
319 
320  m_nEventId = nullptr;
322 
323  m_eScriptLang = HTMLScriptLanguage::Unknown;
324 
325  rCursor.DeleteMark();
326  m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert
327  memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
328 
329  // Read the font sizes 1-7 from the INI file
330  SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get();
331  m_aFontHeights[0] = rHtmlOptions.GetFontSize( 0 ) * 20;
332  m_aFontHeights[1] = rHtmlOptions.GetFontSize( 1 ) * 20;
333  m_aFontHeights[2] = rHtmlOptions.GetFontSize( 2 ) * 20;
334  m_aFontHeights[3] = rHtmlOptions.GetFontSize( 3 ) * 20;
335  m_aFontHeights[4] = rHtmlOptions.GetFontSize( 4 ) * 20;
336  m_aFontHeights[5] = rHtmlOptions.GetFontSize( 5 ) * 20;
337  m_aFontHeights[6] = rHtmlOptions.GetFontSize( 6 ) * 20;
338 
339  m_bKeepUnknown = rHtmlOptions.IsImportUnknown();
340 
341  if(bReadNewDoc)
342  {
343  //CJK has different defaults, so a different object should be used for this
344  //RES_CHARTR_CJK_FONTSIZE is a valid value
346  m_xDoc->SetDefault( aFontHeight );
348  m_xDoc->SetDefault( aFontHeightCJK );
350  m_xDoc->SetDefault( aFontHeightCTL );
351 
352  // #i18732# - adjust default of option 'FollowTextFlow'
353  // TODO: not sure what the appropriate default for HTML should be?
354  m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
355  }
356 
357  // Change to HTML mode during the import, so that the right styles are created
358  m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
359  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
360 
361  m_pCSS1Parser.reset( new SwCSS1Parser( m_xDoc.get(), m_aFontHeights, m_sBaseURL, IsNewDoc() ) );
362  m_pCSS1Parser->SetIgnoreFontFamily( rHtmlOptions.IsIgnoreFontFamily() );
363 
364  if( bReadUTF8 )
365  {
366  SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
367  }
368  else
369  {
370  SwDocShell *pDocSh = m_xDoc->GetDocShell();
371  SvKeyValueIterator *pHeaderAttrs =
372  pDocSh->GetHeaderAttributes();
373  if( pHeaderAttrs )
374  SetEncodingByHTTPHeader( pHeaderAttrs );
375  }
376  m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
377 
378  SwDocShell* pDocSh = m_xDoc->GetDocShell();
379  if( pDocSh )
380  {
381  m_bViewCreated = true; // not, load synchronous
382 
383  // a jump mark is present
384 
385  if( pMed )
386  {
387  m_sJmpMark = pMed->GetURLObject().GetMark();
388  if( !m_sJmpMark.isEmpty() )
389  {
391  sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
392  sal_Int32 nPos = nLastPos != -1 ? nLastPos : 0;
393 
394  OUString sCmp;
395  if (nPos)
396  {
397  sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
398  }
399 
400  if( !sCmp.isEmpty() )
401  {
402  sCmp = sCmp.toAsciiLowerCase();
403  if( sCmp == "region" )
405  else if( sCmp == "table" )
407  else if( sCmp == "graphic" )
409  else if( sCmp == "outline" ||
410  sCmp == "text" ||
411  sCmp == "frame" )
412  m_eJumpTo = JumpToMarks::NONE; // this is nothing valid!
413  else
414  // otherwise this is a normal (book)mark
415  nPos = -1;
416  }
417  else
418  nPos = -1;
419 
420  if( nPos != -1 )
421  m_sJmpMark = m_sJmpMark.copy( 0, nPos );
422  if( m_sJmpMark.isEmpty() )
424  }
425  }
426  }
427 
428  if (!rNamespace.isEmpty())
429  {
430  SetNamespace(rNamespace);
431  m_bXHTML = true;
432  if (rNamespace == "reqif-xhtml")
433  m_bReqIF = true;
434  }
435 
436  // Extract load parameters which are specific to this filter.
437  if (!pMed)
438  {
439  return;
440  }
441 
442  comphelper::SequenceAsHashMap aLoadMap(pMed->GetArgs());
443  auto it = aLoadMap.find("AllowedRTFOLEMimeTypes");
444  if (it == aLoadMap.end())
445  {
446  return;
447  }
448 
449  uno::Sequence<OUString> aTypes;
450  it->second >>= aTypes;
451  m_aAllowedRTFOLEMimeTypes = comphelper::sequenceToContainer<std::set<OUString>>(aTypes);
452 }
453 
455 {
456 #ifdef DBG_UTIL
457  OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
458 #endif
459 
460  OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
461  OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
462  m_nContextStMin = 0;
463  while (!m_aContexts.empty())
464  {
465  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
466  ClearContext(xCntxt.get());
467  }
468 
469  bool bAsync = m_xDoc->IsInLoadAsynchron();
470  m_xDoc->SetInLoadAsynchron( false );
471  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
472 
473  if( m_xDoc->GetDocShell() && m_nEventId )
475 
476  // the DocumentDetected maybe can delete the DocShells, therefore fetch again
477  if( m_xDoc->GetDocShell() )
478  {
479  // update linked sections
480  sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
481  if( nLinkMode != NEVER && bAsync &&
482  SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
483  m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
484 
485  if ( m_xDoc->GetDocShell()->IsLoading() )
486  {
487  // #i59688#
488  m_xDoc->GetDocShell()->LoadingFinished();
489  }
490  }
491 
492  delete m_pSttNdIdx;
493 
494  if( !m_aSetAttrTab.empty() )
495  {
496  OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
497  for ( const auto& rpAttr : m_aSetAttrTab )
498  delete rpAttr;
499  m_aSetAttrTab.clear();
500  }
501 
502  m_pCSS1Parser.reset();
503  m_pNumRuleInfo.reset();
504  DeleteFormImpl();
505  m_pFootEndNoteImpl.reset();
506 
507  OSL_ENSURE(!m_xTable.get(), "It exists still an open table");
508  m_pImageMaps.reset();
509 
510  OSL_ENSURE( m_vPendingStack.empty(),
511  "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
512  m_vPendingStack.clear();
513 
514  m_xDoc.clear();
515 
516  if ( m_pTempViewFrame )
517  {
519 
520  // the temporary view frame is hidden, so the hidden flag might need to be removed
521  if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
522  m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
523  }
524 }
525 
526 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
527 {
528  m_nEventId=nullptr;
529 
530  // #i47907# - If the document has already been destructed,
531  // the parser should be aware of this:
532  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
533  || 1 == m_xDoc->getReferenceCount() )
534  {
535  // was the import aborted by SFX?
536  eState = SvParserState::Error;
537  }
538 
539  GetAsynchCallLink().Call(nullptr);
540 }
541 
543 {
544  // create temporary index on position 0, so it won't be moved!
545  m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
546  if( !IsNewDoc() ) // insert into existing document ?
547  {
548  const SwPosition* pPos = m_pPam->GetPoint();
549 
550  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
551 
552  *m_pSttNdIdx = pPos->nNode.GetIndex()-1;
553  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
554 
555  SwPaM aInsertionRangePam( *pPos );
556 
558 
559  // split any redline over the insertion point
560  aInsertionRangePam.SetMark();
561  *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
562  aInsertionRangePam.Move( fnMoveBackward );
563  m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
564 
565  m_xDoc->SetTextFormatColl( *m_pPam,
566  m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
567  }
568 
569  if( GetMedium() )
570  {
571  if( !m_bViewCreated )
572  {
573  m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
574  }
575  else
576  {
577  m_bViewCreated = true;
578  m_nEventId = nullptr;
579  }
580  }
581  else // show progress bar
582  {
583  rInput.Seek(STREAM_SEEK_TO_END);
584  rInput.ResetError();
585 
586  m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
587 
588  rInput.Seek(STREAM_SEEK_TO_BEGIN);
589  rInput.ResetError();
590  }
591 
592  StartListening(m_xDoc->GetPageDesc( 0 ).GetNotifier());
593 
595  return eRet;
596 }
597 
599 {
600  const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
601  return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
602 }
603 
605 {
606 #ifdef DBG_UTIL
607  OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
608  m_nContinue++;
609 #endif
610 
611  // When the import (of SFX) is aborted, an error will be set but
612  // we still continue, so that we clean up properly.
613  OSL_ENSURE( SvParserState::Error!=eState,
614  "SwHTMLParser::Continue: already set an error" );
615  if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
616  eState = SvParserState::Error;
617 
618  // Fetch SwViewShell from document, save it and set as current.
619  SwViewShell *pInitVSh = CallStartAction();
620 
621  if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
622  {
623  // At first call first return, show document and wait for callback
624  // time.
625  // At this point in CallParser only one digit was read and
626  // a SaveState(0) was called.
627  eState = SvParserState::Pending;
628  m_bViewCreated = true;
629  m_xDoc->SetInLoadAsynchron( true );
630 
631 #ifdef DBG_UTIL
632  m_nContinue--;
633 #endif
634 
635  return;
636  }
637 
638  m_bSetModEnabled = false;
639  if( m_xDoc->GetDocShell() )
640  {
641  m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
642  if( m_bSetModEnabled )
643  {
644  m_xDoc->GetDocShell()->EnableSetModified( false );
645  }
646  }
647 
648  // during import don't call OLE-Modified
649  Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
650  m_xDoc->SetOle2Link( Link<bool,void>() );
651 
652  bool bModified = m_xDoc->getIDocumentState().IsModified();
653  bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
654  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
655 
656  // When the import will be aborted, don't call Continue anymore.
657  // If a Pending-Stack exists make sure the stack is ended with a call
658  // of NextToken.
659  if( SvParserState::Error == eState )
660  {
661  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
662  "SwHTMLParser::Continue: Pending-Stack without Token" );
663  if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
664  NextToken( m_vPendingStack.back().nToken );
665  OSL_ENSURE( m_vPendingStack.empty(),
666  "SwHTMLParser::Continue: There is again a Pending-Stack" );
667  }
668  else
669  {
670  HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
671  }
672 
673  // disable progress bar again
674  m_xProgress.reset();
675 
676  bool bLFStripped = false;
677  if( SvParserState::Pending != GetStatus() )
678  {
679  // set the last attributes yet
680  {
681  if( !m_aScriptSource.isEmpty() )
682  {
683  SwScriptFieldType *pType =
684  static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
685 
687  false );
688  InsertAttr( SwFormatField( aField ), false );
689  }
690 
691  if( m_pAppletImpl )
692  {
693  if( m_pAppletImpl->GetApplet().is() )
694  EndApplet();
695  else
696  EndObject();
697  }
698 
699  // maybe remove an existing LF after the last paragraph
700  if( IsNewDoc() )
701  bLFStripped = StripTrailingLF() > 0;
702 
703  // close still open numbering
704  while( GetNumInfo().GetNumRule() )
706 
707  OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
708  // try this twice, first normally to let m_nContextStMin decrease
709  // naturally and get contexts popped in desired order, and if that
710  // fails force it
711  for (int i = 0; i < 2; ++i)
712  {
713  while (m_aContexts.size() > m_nContextStMin)
714  {
715  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
716  if (xCntxt)
717  EndContext(xCntxt.get());
718  }
719  if (!m_nContextStMin)
720  break;
721  OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
722  m_nContextStMin = 0;
723  }
724 
725  m_aParaAttrs.clear();
726 
727  SetAttr( false );
728 
729  // set the first delayed styles
730  m_pCSS1Parser->SetDelayedStyles();
731  }
732 
733  // again correct the start
734  if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
735  {
736  SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
737  SwNodeIndex aNxtIdx( *m_pSttNdIdx );
738  if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
739  {
740  const sal_Int32 nStt = pTextNode->GetText().getLength();
741  // when the cursor is still in the node, then set him at the end
742  if( m_pPam->GetPoint()->nNode == aNxtIdx )
743  {
745  m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt );
746  }
747 
748 #if OSL_DEBUG_LEVEL > 0
749 // !!! shouldn't be possible, or ??
750  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(),
751  "Pam.Bound1 is still in the node" );
752  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(),
753  "Pam.Bound2 is still in the node" );
754 
755  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() )
756  {
757  const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex();
758  m_pPam->GetBound().nContent.Assign( pTextNode,
759  pTextNode->GetText().getLength() + nCntPos );
760  }
761  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() )
762  {
763  const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex();
764  m_pPam->GetBound( false ).nContent.Assign( pTextNode,
765  pTextNode->GetText().getLength() + nCntPos );
766  }
767 #endif
768  // Keep character attribute!
769  SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
770  if (pTextNode->GetText().getLength())
771  pDelNd->FormatToTextAttr( pTextNode );
772  else
773  pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
774  pTextNode->JoinNext();
775  }
776  }
777  }
778 
779  if( SvParserState::Accepted == eState )
780  {
781  if( m_nMissingImgMaps )
782  {
783  // Some Image-Map relations are still missing.
784  // Maybe now the Image-Maps are there?
786  }
787 
788  // now remove the last useless paragraph
789  SwPosition* pPos = m_pPam->GetPoint();
790  if( !pPos->nContent.GetIndex() && !bLFStripped )
791  {
792  SwTextNode* pCurrentNd;
793  sal_uLong nNodeIdx = pPos->nNode.GetIndex();
794 
795  bool bHasFlysOrMarks =
797 
798  if( IsNewDoc() )
799  {
800  if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx))
801  {
803  if( pCNd && pCNd->StartOfSectionIndex()+2 <
804  pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
805  {
807  SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
808  if( pCursorSh &&
809  pCursorSh->GetCursor()->GetPoint()
810  ->nNode.GetIndex() == nNodeIdx )
811  {
812  pCursorSh->MovePara(GoPrevPara, fnParaEnd );
813  pCursorSh->SetMark();
814  pCursorSh->ClearMark();
815  }
816  m_pPam->GetBound().nContent.Assign( nullptr, 0 );
817  m_pPam->GetBound(false).nContent.Assign( nullptr, 0 );
818  m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode );
819  }
820  }
821  }
822  else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
823  {
824  if( pCurrentNd->CanJoinNext( &pPos->nNode ))
825  {
826  SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode();
827  pPos->nContent.Assign( pNextNd, 0 );
829  pNextNd->JoinPrev();
830  }
831  else if (pCurrentNd->GetText().isEmpty())
832  {
833  pPos->nContent.Assign( nullptr, 0 );
835  m_xDoc->GetNodes().Delete( pPos->nNode );
837  }
838  }
839  }
840 
841  // annul the SplitNode from the beginning
842  else if( !IsNewDoc() )
843  {
844  if( pPos->nContent.GetIndex() ) // then there was no <p> at the end
845  m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next
846  SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode();
847  SwNodeIndex aPrvIdx( pPos->nNode );
848  if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
849  *m_pSttNdIdx <= aPrvIdx )
850  {
851  // Normally here should take place a JoinNext, but all cursors and
852  // so are registered in pTextNode, so that it MUST remain.
853 
854  // Convert paragraph to character attribute, from Prev adopt
855  // the paragraph attribute and the template!
856  SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
857  pTextNode->ChgFormatColl( pPrev->GetTextColl() );
858  pTextNode->FormatToTextAttr( pPrev );
859  pTextNode->ResetAllAttr();
860 
861  if( pPrev->HasSwAttrSet() )
862  pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
863 
864  if( &m_pPam->GetBound().nNode.GetNode() == pPrev )
865  m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
866  if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev )
867  m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
868 
869  pTextNode->JoinPrev();
870  }
871  }
872 
873  // adjust AutoLoad in DocumentProperties
874  if (!utl::ConfigManager::IsFuzzing() && IsNewDoc())
875  {
876  SwDocShell *pDocShell(m_xDoc->GetDocShell());
877  OSL_ENSURE(pDocShell, "no SwDocShell");
878  if (pDocShell) {
879  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
880  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
881  uno::Reference<document::XDocumentProperties> xDocProps(
882  xDPS->getDocumentProperties());
883  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
884  if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
885  (xDocProps->getAutoloadURL().isEmpty()) )
886  {
887  xDocProps->setAutoloadURL(m_aPathToFile);
888  }
889  }
890  }
891 
892  if( m_bUpdateDocStat )
893  {
894  m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
895  }
896  }
897 
898  if( SvParserState::Pending != GetStatus() )
899  {
900  delete m_pSttNdIdx;
901  m_pSttNdIdx = nullptr;
902  }
903 
904  // should the parser be the last one who hold the document, then nothing
905  // has to be done anymore, document will be destroyed shortly!
906  if( 1 < m_xDoc->getReferenceCount() )
907  {
908  if( bWasUndo )
909  {
910  m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
911  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
912  }
913  else if( !pInitVSh )
914  {
915  // When at the beginning of Continue no Shell was available,
916  // it's possible in the meantime one was created.
917  // In that case the bWasUndo flag is wrong and we must
918  // enable Undo.
919  SwViewShell *pTmpVSh = CheckActionViewShell();
920  if( pTmpVSh )
921  {
922  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
923  }
924  }
925 
926  m_xDoc->SetOle2Link( aOLELink );
927  if( !bModified )
928  m_xDoc->getIDocumentState().ResetModified();
929  if( m_bSetModEnabled && m_xDoc->GetDocShell() )
930  {
931  m_xDoc->GetDocShell()->EnableSetModified();
932  m_bSetModEnabled = false; // this is unnecessary here
933  }
934  }
935 
936  // When the Document-SwVievShell still exists and an Action is open
937  // (doesn't have to be by abort), end the Action, disconnect from Shell
938  // and finally reconstruct the old Shell.
939  CallEndAction( true );
940 
941 #ifdef DBG_UTIL
942  m_nContinue--;
943 #endif
944 }
945 
946 void SwHTMLParser::Notify(const SfxHint& rHint)
947 {
948  if(rHint.GetId() == SfxHintId::Dying)
949  {
950  EndListeningAll();
951  ReleaseRef();
952  }
953 }
954 
956 {
957  OSL_ENSURE( !m_bDocInitalized, "DocumentDetected called multiple times" );
958  m_bDocInitalized = true;
959  if( IsNewDoc() )
960  {
961  if( IsInHeader() )
962  FinishHeader();
963 
964  CallEndAction( true );
965 
966  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
967  // For DocumentDetected in general a SwViewShell is created.
968  // But it also can be created later, in case the UI is captured.
969  CallStartAction();
970  }
971 }
972 
973 // is called for every token that is recognised in CallParser
975 {
976  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
977  || 1 == m_xDoc->getReferenceCount() )
978  {
979  // Was the import cancelled by SFX? If a pending stack
980  // exists, clean it.
981  eState = SvParserState::Error;
982  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
983  "SwHTMLParser::NextToken: Pending-Stack without token" );
984  if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
985  return ;
986  }
987 
988 #if OSL_DEBUG_LEVEL > 0
989  if( !m_vPendingStack.empty() )
990  {
991  switch( nToken )
992  {
993  // tables are read by recursive method calls
994  case HtmlTokenId::TABLE_ON:
995  // For CSS declarations we might have to wait
996  // for a file download to finish
997  case HtmlTokenId::LINK:
998  // For controls we might have to set the size.
999  case HtmlTokenId::INPUT:
1000  case HtmlTokenId::TEXTAREA_ON:
1001  case HtmlTokenId::SELECT_ON:
1002  case HtmlTokenId::SELECT_OFF:
1003  break;
1004  default:
1005  OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
1006  break;
1007  }
1008  }
1009 #endif
1010 
1011  // The following special cases have to be treated before the
1012  // filter detection, because Netscape doesn't reference the content
1013  // of the title for filter detection either.
1014  if( m_vPendingStack.empty() )
1015  {
1016  if( m_bInTitle )
1017  {
1018  switch( nToken )
1019  {
1020  case HtmlTokenId::TITLE_OFF:
1021  {
1022  OUString sTitle = m_sTitle.makeStringAndClear();
1023  if( IsNewDoc() && !sTitle.isEmpty() )
1024  {
1025  if( m_xDoc->GetDocShell() ) {
1026  uno::Reference<document::XDocumentPropertiesSupplier>
1027  xDPS(m_xDoc->GetDocShell()->GetModel(),
1028  uno::UNO_QUERY_THROW);
1029  uno::Reference<document::XDocumentProperties> xDocProps(
1030  xDPS->getDocumentProperties());
1031  OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1032  if (xDocProps.is()) {
1033  xDocProps->setTitle(sTitle);
1034  }
1035 
1036  m_xDoc->GetDocShell()->SetTitle(sTitle);
1037  }
1038  }
1039  m_bInTitle = false;
1040  break;
1041  }
1042 
1043  case HtmlTokenId::NONBREAKSPACE:
1044  m_sTitle.append(" ");
1045  break;
1046 
1047  case HtmlTokenId::SOFTHYPH:
1048  m_sTitle.append("-");
1049  break;
1050 
1051  case HtmlTokenId::TEXTTOKEN:
1052  m_sTitle.append(aToken);
1053  break;
1054 
1055  default:
1056  m_sTitle.append("<");
1057  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1058  m_sTitle.append("/");
1059  m_sTitle.append(sSaveToken);
1060  if( !aToken.isEmpty() )
1061  {
1062  m_sTitle.append(" ");
1063  m_sTitle.append(aToken);
1064  }
1065  m_sTitle.append(">");
1066  break;
1067  }
1068 
1069  return;
1070  }
1071  }
1072 
1073  // Find out what type of document it is if we don't know already.
1074  // For Controls this has to be finished before the control is inserted
1075  // because for inserting a View is needed.
1076  if( !m_bDocInitalized )
1077  DocumentDetected();
1078 
1079  bool bGetIDOption = false, bInsertUnknown = false;
1080  bool bUpperSpaceSave = m_bUpperSpace;
1081  m_bUpperSpace = false;
1082 
1083  // The following special cases may or have to be treated after the
1084  // filter detection
1085  if( m_vPendingStack.empty() )
1086  {
1087  if( m_bInFloatingFrame )
1088  {
1089  // <SCRIPT> is ignored here (from us), because it is ignored in
1090  // Applets as well
1091  if( HtmlTokenId::IFRAME_OFF == nToken )
1092  {
1093  m_bCallNextToken = false;
1094  m_bInFloatingFrame = false;
1095  }
1096 
1097  return;
1098  }
1099  else if( m_bInNoEmbed )
1100  {
1101  switch( nToken )
1102  {
1103  case HtmlTokenId::NOEMBED_OFF:
1106  m_aContents.clear();
1107  m_bCallNextToken = false;
1108  m_bInNoEmbed = false;
1109  break;
1110 
1111  case HtmlTokenId::RAWDATA:
1113  break;
1114 
1115  default:
1116  OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1117  break;
1118  }
1119 
1120  return;
1121  }
1122  else if( m_pAppletImpl )
1123  {
1124  // in an applet only <PARAM> tags and the </APPLET> tag
1125  // are of interest for us (for the moment)
1126  // <SCRIPT> is ignored here (from Netscape)!
1127 
1128  switch( nToken )
1129  {
1130  case HtmlTokenId::APPLET_OFF:
1131  m_bCallNextToken = false;
1132  EndApplet();
1133  break;
1134  case HtmlTokenId::OBJECT_OFF:
1135  m_bCallNextToken = false;
1136  EndObject();
1137  break;
1138  case HtmlTokenId::PARAM:
1139  InsertParam();
1140  break;
1141  default: break;
1142  }
1143 
1144  return;
1145  }
1146  else if( m_bTextArea )
1147  {
1148  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1149  // <SCRIPT> is ignored here (from Netscape)!
1150 
1151  switch( nToken )
1152  {
1153  case HtmlTokenId::TEXTAREA_OFF:
1154  m_bCallNextToken = false;
1155  EndTextArea();
1156  break;
1157 
1158  default:
1159  InsertTextAreaText( nToken );
1160  break;
1161  }
1162 
1163  return;
1164  }
1165  else if( m_bSelect )
1166  {
1167  // HAS to be treated after bNoScript!
1168  switch( nToken )
1169  {
1170  case HtmlTokenId::SELECT_OFF:
1171  m_bCallNextToken = false;
1172  EndSelect();
1173  return;
1174 
1175  case HtmlTokenId::OPTION:
1177  return;
1178 
1179  case HtmlTokenId::TEXTTOKEN:
1180  InsertSelectText();
1181  return;
1182 
1183  case HtmlTokenId::INPUT:
1184  case HtmlTokenId::SCRIPT_ON:
1185  case HtmlTokenId::SCRIPT_OFF:
1186  case HtmlTokenId::NOSCRIPT_ON:
1187  case HtmlTokenId::NOSCRIPT_OFF:
1188  case HtmlTokenId::RAWDATA:
1189  // treat in normal switch
1190  break;
1191 
1192  default:
1193  // ignore
1194  return;
1195  }
1196  }
1197  else if( m_pMarquee )
1198  {
1199  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1200  // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1201  // script.
1202  switch( nToken )
1203  {
1204  case HtmlTokenId::MARQUEE_OFF:
1205  m_bCallNextToken = false;
1206  EndMarquee();
1207  break;
1208 
1209  case HtmlTokenId::TEXTTOKEN:
1211  break;
1212  default: break;
1213  }
1214 
1215  return;
1216  }
1217  else if( m_bInField )
1218  {
1219  switch( nToken )
1220  {
1221  case HtmlTokenId::SDFIELD_OFF:
1222  m_bCallNextToken = false;
1223  EndField();
1224  break;
1225 
1226  case HtmlTokenId::TEXTTOKEN:
1227  InsertFieldText();
1228  break;
1229  default: break;
1230  }
1231 
1232  return;
1233  }
1235  {
1236  switch( nToken )
1237  {
1238  case HtmlTokenId::ANCHOR_OFF:
1239  EndAnchor();
1240  m_bCallNextToken = false;
1241  break;
1242 
1243  case HtmlTokenId::TEXTTOKEN:
1245  break;
1246  default: break;
1247  }
1248  return;
1249  }
1250  else if( !m_aUnknownToken.isEmpty() )
1251  {
1252  // Paste content of unknown tags.
1253  // (but surely if we are not in the header section) fdo#36080 fdo#34666
1254  if (!aToken.isEmpty() && !IsInHeader() )
1255  {
1256  if( !m_bDocInitalized )
1257  DocumentDetected();
1258  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1259 
1260  // if there are temporary paragraph attributes and the
1261  // paragraph isn't empty then the paragraph attributes
1262  // are final.
1263  m_aParaAttrs.clear();
1264 
1265  SetAttr();
1266  }
1267 
1268  // Unknown token in the header are only closed by a matching
1269  // end-token, </HEAD> or <BODY>. Text inside is ignored.
1270  switch( nToken )
1271  {
1272  case HtmlTokenId::UNKNOWNCONTROL_OFF:
1273  if( m_aUnknownToken != sSaveToken )
1274  return;
1275  [[fallthrough]];
1276  case HtmlTokenId::FRAMESET_ON:
1277  case HtmlTokenId::HEAD_OFF:
1278  case HtmlTokenId::BODY_ON:
1279  case HtmlTokenId::IMAGE: // Don't know why Netscape acts this way.
1280  m_aUnknownToken.clear();
1281  break;
1282  case HtmlTokenId::TEXTTOKEN:
1283  return;
1284  default:
1285  m_aUnknownToken.clear();
1286  break;
1287  }
1288  }
1289  }
1290 
1291  switch( nToken )
1292  {
1293  case HtmlTokenId::BODY_ON:
1294  if (!m_bBodySeen)
1295  {
1296  m_bBodySeen = true;
1297  if( !m_aStyleSource.isEmpty() )
1298  {
1299  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1300  m_aStyleSource.clear();
1301  }
1302  if( IsNewDoc() )
1303  {
1305  // If there is a template for the first or the right page,
1306  // it is set here.
1307  const SwPageDesc *pPageDesc = nullptr;
1308  if( m_pCSS1Parser->IsSetFirstPageDesc() )
1309  pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1310  else if( m_pCSS1Parser->IsSetRightPageDesc() )
1311  pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1312 
1313  if( pPageDesc )
1314  {
1315  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1316  }
1317  }
1318  }
1319  break;
1320 
1321  case HtmlTokenId::LINK:
1322  InsertLink();
1323  break;
1324 
1325  case HtmlTokenId::BASE:
1326  {
1327  const HTMLOptions& rHTMLOptions = GetOptions();
1328  for (size_t i = rHTMLOptions.size(); i; )
1329  {
1330  const HTMLOption& rOption = rHTMLOptions[--i];
1331  switch( rOption.GetToken() )
1332  {
1333  case HtmlOptionId::HREF:
1334  m_sBaseURL = rOption.GetString();
1335  break;
1336  case HtmlOptionId::TARGET:
1337  if( IsNewDoc() )
1338  {
1339  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1340  OSL_ENSURE(pDocShell, "no SwDocShell");
1341  if (pDocShell) {
1342  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1343  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1344  uno::Reference<document::XDocumentProperties>
1345  xDocProps(xDPS->getDocumentProperties());
1346  OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1347  if (xDocProps.is()) {
1348  xDocProps->setDefaultTarget(
1349  rOption.GetString());
1350  }
1351  }
1352  }
1353  break;
1354  default: break;
1355  }
1356  }
1357  }
1358  break;
1359 
1360  case HtmlTokenId::META:
1361  {
1362  SvKeyValueIterator *pHTTPHeader = nullptr;
1363  if( IsNewDoc() )
1364  {
1365  SwDocShell *pDocSh = m_xDoc->GetDocShell();
1366  if( pDocSh )
1367  pHTTPHeader = pDocSh->GetHeaderAttributes();
1368  }
1369  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1370  OSL_ENSURE(pDocShell, "no SwDocShell");
1371  if (pDocShell)
1372  {
1373  uno::Reference<document::XDocumentProperties> xDocProps;
1374  if (IsNewDoc())
1375  {
1376  const uno::Reference<document::XDocumentPropertiesSupplier>
1377  xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1378  xDocProps = xDPS->getDocumentProperties();
1379  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1380  }
1381  ParseMetaOptions( xDocProps, pHTTPHeader );
1382  }
1383  }
1384  break;
1385 
1386  case HtmlTokenId::TITLE_ON:
1387  m_bInTitle = true;
1388  break;
1389 
1390  case HtmlTokenId::SCRIPT_ON:
1391  NewScript();
1392  break;
1393 
1394  case HtmlTokenId::SCRIPT_OFF:
1395  EndScript();
1396  break;
1397 
1398  case HtmlTokenId::NOSCRIPT_ON:
1399  case HtmlTokenId::NOSCRIPT_OFF:
1400  bInsertUnknown = true;
1401  break;
1402 
1403  case HtmlTokenId::STYLE_ON:
1404  NewStyle();
1405  break;
1406 
1407  case HtmlTokenId::STYLE_OFF:
1408  EndStyle();
1409  break;
1410 
1411  case HtmlTokenId::RAWDATA:
1412  if( !m_bIgnoreRawData )
1413  {
1414  if( IsReadScript() )
1415  {
1416  AddScriptSource();
1417  }
1418  else if( IsReadStyle() )
1419  {
1420  if( !m_aStyleSource.isEmpty() )
1421  m_aStyleSource += "\n";
1422  m_aStyleSource += aToken;
1423  }
1424  }
1425  break;
1426 
1427  case HtmlTokenId::OBJECT_ON:
1428  if (m_bXHTML)
1429  {
1430  if (!InsertEmbed())
1431  InsertImage();
1432  break;
1433  }
1434 #if HAVE_FEATURE_JAVA
1435  NewObject();
1436  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1437 #endif
1438  break;
1439 
1440  case HtmlTokenId::OBJECT_OFF:
1441  if (!m_aEmbeds.empty())
1442  m_aEmbeds.pop();
1443  break;
1444 
1445  case HtmlTokenId::APPLET_ON:
1446 #if HAVE_FEATURE_JAVA
1447  InsertApplet();
1448  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1449 #endif
1450  break;
1451 
1452  case HtmlTokenId::IFRAME_ON:
1455  break;
1456 
1457  case HtmlTokenId::LINEBREAK:
1458  if( !IsReadPRE() )
1459  {
1460  InsertLineBreak();
1461  break;
1462  }
1463  else
1464  bGetIDOption = true;
1465  // <BR>s in <PRE> resemble true LFs, hence no break
1466  [[fallthrough]];
1467 
1468  case HtmlTokenId::NEWPARA:
1469  // CR in PRE/LISTING/XMP
1470  {
1471  if( HtmlTokenId::NEWPARA==nToken ||
1473  {
1474  AppendTextNode(); // there is no LF at this place
1475  // therefore it will cause no problems
1476  SetTextCollAttrs();
1477  }
1478  // progress bar
1479  if (m_xProgress)
1480  m_xProgress->Update(rInput.Tell());
1481  }
1482  break;
1483 
1484  case HtmlTokenId::NONBREAKSPACE:
1485  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1486  break;
1487 
1488  case HtmlTokenId::SOFTHYPH:
1489  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1490  break;
1491 
1492  case HtmlTokenId::LINEFEEDCHAR:
1493  if( m_pPam->GetPoint()->nContent.GetIndex() )
1494  AppendTextNode();
1495  if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode))
1496  {
1497  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1498  EndAttr( m_xAttrTab->pBreak, false );
1499  }
1500  break;
1501 
1502  case HtmlTokenId::TEXTTOKEN:
1503  // insert string without spanning attributes at the end.
1504  if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1505  {
1506  sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
1507  const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
1508  if (pTextNode)
1509  {
1510  const OUString& rText = pTextNode->GetText();
1511  sal_Unicode cLast = rText[--nPos];
1512  if( ' ' == cLast || '\x0a' == cLast)
1513  aToken = aToken.copy(1);
1514  }
1515  else
1516  aToken = aToken.copy(1);
1517 
1518  if( aToken.isEmpty() )
1519  {
1520  m_bUpperSpace = bUpperSpaceSave;
1521  break;
1522  }
1523  }
1524 
1525  if( !aToken.isEmpty() )
1526  {
1527  if( !m_bDocInitalized )
1528  DocumentDetected();
1529 
1530  if (!m_aEmbeds.empty())
1531  {
1532  // The text token is inside an OLE object, which means
1533  // alternate text.
1534  SwOLENode* pOLENode = m_aEmbeds.top();
1535  if (SwFlyFrameFormat* pFormat
1536  = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1537  {
1539  {
1540  pObject->SetTitle(pObject->GetTitle() + aToken);
1541  break;
1542  }
1543  }
1544  }
1545 
1546  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1547 
1548  // if there are temporary paragraph attributes and the
1549  // paragraph isn't empty then the paragraph attributes
1550  // are final.
1551  m_aParaAttrs.clear();
1552 
1553  SetAttr();
1554  }
1555  break;
1556 
1557  case HtmlTokenId::HORZRULE:
1558  InsertHorzRule();
1559  break;
1560 
1561  case HtmlTokenId::IMAGE:
1562  InsertImage();
1563  // if only the parser references the doc, we can break and set
1564  // an error code
1565  if( 1 == m_xDoc->getReferenceCount() )
1566  {
1567  eState = SvParserState::Error;
1568  }
1569  break;
1570 
1571  case HtmlTokenId::SPACER:
1572  InsertSpacer();
1573  break;
1574 
1575  case HtmlTokenId::EMBED:
1576  InsertEmbed();
1577  break;
1578 
1579  case HtmlTokenId::NOEMBED_ON:
1580  m_bInNoEmbed = true;
1581  m_bCallNextToken = bool(m_xTable);
1582  ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1583  break;
1584 
1585  case HtmlTokenId::DEFLIST_ON:
1586  if( m_nOpenParaToken != HtmlTokenId::NONE )
1587  EndPara();
1588  NewDefList();
1589  break;
1590  case HtmlTokenId::DEFLIST_OFF:
1591  if( m_nOpenParaToken != HtmlTokenId::NONE )
1592  EndPara();
1593  EndDefListItem( HtmlTokenId::NONE );
1594  EndDefList();
1595  break;
1596 
1597  case HtmlTokenId::DD_ON:
1598  case HtmlTokenId::DT_ON:
1599  if( m_nOpenParaToken != HtmlTokenId::NONE )
1600  EndPara();
1601  EndDefListItem();// close <DD>/<DT> and set no template
1602  NewDefListItem( nToken );
1603  break;
1604 
1605  case HtmlTokenId::DD_OFF:
1606  case HtmlTokenId::DT_OFF:
1607  // c.f. HtmlTokenId::LI_OFF
1608  // Actually we should close a DD/DT now.
1609  // But neither Netscape nor Microsoft do this and so don't we.
1610  EndDefListItem( nToken );
1611  break;
1612 
1613  // divisions
1614  case HtmlTokenId::DIVISION_ON:
1615  case HtmlTokenId::CENTER_ON:
1616  if (!m_isInTableStructure)
1617  {
1618  if (m_nOpenParaToken != HtmlTokenId::NONE)
1619  {
1620  if (IsReadPRE())
1621  m_nOpenParaToken = HtmlTokenId::NONE;
1622  else
1623  EndPara();
1624  }
1625  NewDivision( nToken );
1626  }
1627  break;
1628 
1629  case HtmlTokenId::DIVISION_OFF:
1630  case HtmlTokenId::CENTER_OFF:
1631  if (!m_isInTableStructure)
1632  {
1633  if (m_nOpenParaToken != HtmlTokenId::NONE)
1634  {
1635  if (IsReadPRE())
1636  m_nOpenParaToken = HtmlTokenId::NONE;
1637  else
1638  EndPara();
1639  }
1640  EndDivision();
1641  }
1642  break;
1643 
1644  case HtmlTokenId::MULTICOL_ON:
1645  if( m_nOpenParaToken != HtmlTokenId::NONE )
1646  EndPara();
1647  NewMultiCol();
1648  break;
1649 
1650  case HtmlTokenId::MULTICOL_OFF:
1651  if( m_nOpenParaToken != HtmlTokenId::NONE )
1652  EndPara();
1653  EndTag( HtmlTokenId::MULTICOL_ON );
1654  break;
1655 
1656  case HtmlTokenId::MARQUEE_ON:
1657  NewMarquee();
1658  m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1659  break;
1660 
1661  case HtmlTokenId::FORM_ON:
1662  NewForm();
1663  break;
1664  case HtmlTokenId::FORM_OFF:
1665  EndForm();
1666  break;
1667 
1668  // templates
1669  case HtmlTokenId::PARABREAK_ON:
1670  if( m_nOpenParaToken != HtmlTokenId::NONE )
1671  EndPara( true );
1672  NewPara();
1673  break;
1674 
1675  case HtmlTokenId::PARABREAK_OFF:
1676  EndPara( true );
1677  break;
1678 
1679  case HtmlTokenId::ADDRESS_ON:
1680  if( m_nOpenParaToken != HtmlTokenId::NONE )
1681  EndPara();
1682  NewTextFormatColl( HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SENDADRESS );
1683  break;
1684 
1685  case HtmlTokenId::ADDRESS_OFF:
1686  if( m_nOpenParaToken != HtmlTokenId::NONE )
1687  EndPara();
1688  EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1689  break;
1690 
1691  case HtmlTokenId::BLOCKQUOTE_ON:
1692  case HtmlTokenId::BLOCKQUOTE30_ON:
1693  if( m_nOpenParaToken != HtmlTokenId::NONE )
1694  EndPara();
1695  NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1696  break;
1697 
1698  case HtmlTokenId::BLOCKQUOTE_OFF:
1699  case HtmlTokenId::BLOCKQUOTE30_OFF:
1700  if( m_nOpenParaToken != HtmlTokenId::NONE )
1701  EndPara();
1702  EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1703  break;
1704 
1705  case HtmlTokenId::PREFORMTXT_ON:
1706  case HtmlTokenId::LISTING_ON:
1707  case HtmlTokenId::XMP_ON:
1708  if( m_nOpenParaToken != HtmlTokenId::NONE )
1709  EndPara();
1711  break;
1712 
1713  case HtmlTokenId::PREFORMTXT_OFF:
1714  m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1715  EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1716  break;
1717 
1718  case HtmlTokenId::LISTING_OFF:
1719  case HtmlTokenId::XMP_OFF:
1720  EndTextFormatColl( nToken );
1721  break;
1722 
1723  case HtmlTokenId::HEAD1_ON:
1724  case HtmlTokenId::HEAD2_ON:
1725  case HtmlTokenId::HEAD3_ON:
1726  case HtmlTokenId::HEAD4_ON:
1727  case HtmlTokenId::HEAD5_ON:
1728  case HtmlTokenId::HEAD6_ON:
1729  if( m_nOpenParaToken != HtmlTokenId::NONE )
1730  {
1731  if( IsReadPRE() )
1732  m_nOpenParaToken = HtmlTokenId::NONE;
1733  else
1734  EndPara();
1735  }
1736  NewHeading( nToken );
1737  break;
1738 
1739  case HtmlTokenId::HEAD1_OFF:
1740  case HtmlTokenId::HEAD2_OFF:
1741  case HtmlTokenId::HEAD3_OFF:
1742  case HtmlTokenId::HEAD4_OFF:
1743  case HtmlTokenId::HEAD5_OFF:
1744  case HtmlTokenId::HEAD6_OFF:
1745  EndHeading();
1746  break;
1747 
1748  case HtmlTokenId::TABLE_ON:
1749  if( !m_vPendingStack.empty() )
1750  BuildTable( SvxAdjust::End );
1751  else
1752  {
1753  if( m_nOpenParaToken != HtmlTokenId::NONE )
1754  EndPara();
1755  OSL_ENSURE(!m_xTable.get(), "table in table not allowed here");
1756  if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) &&
1757  (m_pPam->GetPoint()->nNode.GetIndex() >
1758  m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1760  {
1761  if ( m_nParaCnt < 5 )
1762  Show(); // show what we have up to here
1763 
1764  SvxAdjust eAdjust = m_xAttrTab->pAdjust
1765  ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1766  GetAdjust()
1767  : SvxAdjust::End;
1768  BuildTable( eAdjust );
1769  }
1770  else
1771  bInsertUnknown = m_bKeepUnknown;
1772  }
1773  break;
1774 
1775  // lists
1776  case HtmlTokenId::DIRLIST_ON:
1777  case HtmlTokenId::MENULIST_ON:
1778  case HtmlTokenId::ORDERLIST_ON:
1779  case HtmlTokenId::UNORDERLIST_ON:
1780  if( m_nOpenParaToken != HtmlTokenId::NONE )
1781  EndPara();
1782  NewNumberBulletList( nToken );
1783  break;
1784 
1785  case HtmlTokenId::DIRLIST_OFF:
1786  case HtmlTokenId::MENULIST_OFF:
1787  case HtmlTokenId::ORDERLIST_OFF:
1788  case HtmlTokenId::UNORDERLIST_OFF:
1789  if( m_nOpenParaToken != HtmlTokenId::NONE )
1790  EndPara();
1791  EndNumberBulletListItem( HtmlTokenId::NONE, true );
1792  EndNumberBulletList( nToken );
1793  break;
1794 
1795  case HtmlTokenId::LI_ON:
1796  case HtmlTokenId::LISTHEADER_ON:
1797  if( m_nOpenParaToken != HtmlTokenId::NONE &&
1799  || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1800  {
1801  // only finish paragraph for <P><LI>, not for <DD><LI>
1802  EndPara();
1803  }
1804 
1805  EndNumberBulletListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1806  NewNumberBulletListItem( nToken );
1807  break;
1808 
1809  case HtmlTokenId::LI_OFF:
1810  case HtmlTokenId::LISTHEADER_OFF:
1811  EndNumberBulletListItem( nToken, false );
1812  break;
1813 
1814  // Attribute :
1815  case HtmlTokenId::ITALIC_ON:
1816  {
1820  NewStdAttr( HtmlTokenId::ITALIC_ON,
1821  &m_xAttrTab->pItalic, aPosture,
1822  &m_xAttrTab->pItalicCJK, &aPostureCJK,
1823  &m_xAttrTab->pItalicCTL, &aPostureCTL );
1824  }
1825  break;
1826 
1827  case HtmlTokenId::BOLD_ON:
1828  {
1832  NewStdAttr( HtmlTokenId::BOLD_ON,
1833  &m_xAttrTab->pBold, aWeight,
1834  &m_xAttrTab->pBoldCJK, &aWeightCJK,
1835  &m_xAttrTab->pBoldCTL, &aWeightCTL );
1836  }
1837  break;
1838 
1839  case HtmlTokenId::STRIKE_ON:
1840  case HtmlTokenId::STRIKETHROUGH_ON:
1841  {
1842  NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1844  }
1845  break;
1846 
1847  case HtmlTokenId::UNDERLINE_ON:
1848  {
1849  NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1851  }
1852  break;
1853 
1854  case HtmlTokenId::SUPERSCRIPT_ON:
1855  {
1856  NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1858  }
1859  break;
1860 
1861  case HtmlTokenId::SUBSCRIPT_ON:
1862  {
1863  NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1865  }
1866  break;
1867 
1868  case HtmlTokenId::BLINK_ON:
1869  {
1870  NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1871  SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1872  }
1873  break;
1874 
1875  case HtmlTokenId::SPAN_ON:
1876  NewStdAttr( HtmlTokenId::SPAN_ON );
1877  break;
1878 
1879  case HtmlTokenId::ITALIC_OFF:
1880  case HtmlTokenId::BOLD_OFF:
1881  case HtmlTokenId::STRIKE_OFF:
1882  case HtmlTokenId::UNDERLINE_OFF:
1883  case HtmlTokenId::SUPERSCRIPT_OFF:
1884  case HtmlTokenId::SUBSCRIPT_OFF:
1885  case HtmlTokenId::BLINK_OFF:
1886  case HtmlTokenId::SPAN_OFF:
1887  EndTag( nToken );
1888  break;
1889 
1890  case HtmlTokenId::STRIKETHROUGH_OFF:
1891  EndTag( HtmlTokenId::STRIKE_OFF );
1892  break;
1893 
1894  case HtmlTokenId::BASEFONT_ON:
1895  NewBasefontAttr();
1896  break;
1897  case HtmlTokenId::BASEFONT_OFF:
1898  EndBasefontAttr();
1899  break;
1900  case HtmlTokenId::FONT_ON:
1901  case HtmlTokenId::BIGPRINT_ON:
1902  case HtmlTokenId::SMALLPRINT_ON:
1903  NewFontAttr( nToken );
1904  break;
1905  case HtmlTokenId::FONT_OFF:
1906  case HtmlTokenId::BIGPRINT_OFF:
1907  case HtmlTokenId::SMALLPRINT_OFF:
1908  EndFontAttr( nToken );
1909  break;
1910 
1911  case HtmlTokenId::EMPHASIS_ON:
1912  case HtmlTokenId::CITIATION_ON:
1913  case HtmlTokenId::STRONG_ON:
1914  case HtmlTokenId::CODE_ON:
1915  case HtmlTokenId::SAMPLE_ON:
1916  case HtmlTokenId::KEYBOARD_ON:
1917  case HtmlTokenId::VARIABLE_ON:
1918  case HtmlTokenId::DEFINSTANCE_ON:
1919  case HtmlTokenId::SHORTQUOTE_ON:
1920  case HtmlTokenId::LANGUAGE_ON:
1921  case HtmlTokenId::AUTHOR_ON:
1922  case HtmlTokenId::PERSON_ON:
1923  case HtmlTokenId::ACRONYM_ON:
1924  case HtmlTokenId::ABBREVIATION_ON:
1925  case HtmlTokenId::INSERTEDTEXT_ON:
1926  case HtmlTokenId::DELETEDTEXT_ON:
1927 
1928  case HtmlTokenId::TELETYPE_ON:
1929  NewCharFormat( nToken );
1930  break;
1931 
1932  case HtmlTokenId::SDFIELD_ON:
1933  NewField();
1935  break;
1936 
1937  case HtmlTokenId::EMPHASIS_OFF:
1938  case HtmlTokenId::CITIATION_OFF:
1939  case HtmlTokenId::STRONG_OFF:
1940  case HtmlTokenId::CODE_OFF:
1941  case HtmlTokenId::SAMPLE_OFF:
1942  case HtmlTokenId::KEYBOARD_OFF:
1943  case HtmlTokenId::VARIABLE_OFF:
1944  case HtmlTokenId::DEFINSTANCE_OFF:
1945  case HtmlTokenId::SHORTQUOTE_OFF:
1946  case HtmlTokenId::LANGUAGE_OFF:
1947  case HtmlTokenId::AUTHOR_OFF:
1948  case HtmlTokenId::PERSON_OFF:
1949  case HtmlTokenId::ACRONYM_OFF:
1950  case HtmlTokenId::ABBREVIATION_OFF:
1951  case HtmlTokenId::INSERTEDTEXT_OFF:
1952  case HtmlTokenId::DELETEDTEXT_OFF:
1953 
1954  case HtmlTokenId::TELETYPE_OFF:
1955  EndTag( nToken );
1956  break;
1957 
1958  case HtmlTokenId::HEAD_OFF:
1959  if( !m_aStyleSource.isEmpty() )
1960  {
1961  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1962  m_aStyleSource.clear();
1963  }
1964  break;
1965 
1966  case HtmlTokenId::DOCTYPE:
1967  case HtmlTokenId::BODY_OFF:
1968  case HtmlTokenId::HTML_OFF:
1969  case HtmlTokenId::HEAD_ON:
1970  case HtmlTokenId::TITLE_OFF:
1971  break; // don't evaluate further???
1972  case HtmlTokenId::HTML_ON:
1973  {
1974  const HTMLOptions& rHTMLOptions = GetOptions();
1975  for (size_t i = rHTMLOptions.size(); i; )
1976  {
1977  const HTMLOption& rOption = rHTMLOptions[--i];
1978  if( HtmlOptionId::DIR == rOption.GetToken() )
1979  {
1980  const OUString& rDir = rOption.GetString();
1981  SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
1982  m_pCSS1Parser->GetWhichMap() );
1983  SvxCSS1PropertyInfo aPropInfo;
1984  OUString aDummy;
1985  ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
1986  aPropInfo, nullptr, &rDir );
1987 
1988  m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
1989  break;
1990  }
1991  }
1992  }
1993  break;
1994 
1995  case HtmlTokenId::INPUT:
1996  InsertInput();
1997  break;
1998 
1999  case HtmlTokenId::TEXTAREA_ON:
2000  NewTextArea();
2002  break;
2003 
2004  case HtmlTokenId::SELECT_ON:
2005  NewSelect();
2007  break;
2008 
2009  case HtmlTokenId::ANCHOR_ON:
2010  NewAnchor();
2011  break;
2012 
2013  case HtmlTokenId::ANCHOR_OFF:
2014  EndAnchor();
2015  break;
2016 
2017  case HtmlTokenId::COMMENT:
2018  if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2019  {
2020  // insert as Post-It
2021  // If there are no space characters right behind
2022  // the <!-- and on front of the -->, leave the comment untouched.
2023  if( ' ' == aToken[ 3 ] &&
2024  ' ' == aToken[ aToken.getLength()-3 ] )
2025  {
2026  OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) );
2027  InsertComment(comphelper::string::strip(aComment, ' '));
2028  }
2029  else
2030  {
2031  OUString aComment = "<" + aToken + ">";
2032  InsertComment( aComment );
2033  }
2034  }
2035  break;
2036 
2037  case HtmlTokenId::MAP_ON:
2038  // Image Maps are read asynchronously: At first only an image map is created
2039  // Areas are processed later. Nevertheless the
2040  // ImageMap is inserted into the IMap-Array, because it might be used
2041  // already.
2042  m_pImageMap = new ImageMap;
2044  {
2045  if (!m_pImageMaps)
2046  m_pImageMaps.reset( new ImageMaps );
2047  m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2048  }
2049  else
2050  {
2051  delete m_pImageMap;
2052  m_pImageMap = nullptr;
2053  }
2054  break;
2055 
2056  case HtmlTokenId::MAP_OFF:
2057  // there is no ImageMap anymore (don't delete IMap, because it's
2058  // already contained in the array!)
2059  m_pImageMap = nullptr;
2060  break;
2061 
2062  case HtmlTokenId::AREA:
2063  if( m_pImageMap )
2064  ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2065  SvMacroItemId::OnMouseOut );
2066  break;
2067 
2068  case HtmlTokenId::FRAMESET_ON:
2069  bInsertUnknown = m_bKeepUnknown;
2070  break;
2071 
2072  case HtmlTokenId::NOFRAMES_ON:
2073  if( IsInHeader() )
2074  FinishHeader();
2075  bInsertUnknown = m_bKeepUnknown;
2076  break;
2077 
2078  case HtmlTokenId::UNKNOWNCONTROL_ON:
2079  // Ignore content of unknown token in the header, if the token
2080  // does not start with a '!'.
2081  // (but judging from the code, also if does not start with a '%')
2082  // (and also if we're not somewhere we consider PRE)
2083  if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2084  !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2085  '%' != sSaveToken[0] )
2086  m_aUnknownToken = sSaveToken;
2087  [[fallthrough]];
2088 
2089  default:
2090  bInsertUnknown = m_bKeepUnknown;
2091  break;
2092  }
2093 
2094  if( bGetIDOption )
2095  InsertIDOption();
2096 
2097  if( bInsertUnknown )
2098  {
2099  OUStringBuffer aComment("HTML: <");
2100  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2101  aComment.append("/");
2102  aComment.append(sSaveToken);
2103  if( !aToken.isEmpty() )
2104  {
2105  UnescapeToken();
2106  aComment.append(" ").append(aToken);
2107  }
2108  aComment.append(">");
2109  InsertComment( aComment.makeStringAndClear() );
2110  }
2111 
2112  // if there are temporary paragraph attributes and the
2113  // paragraph isn't empty then the paragraph attributes are final.
2114  if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() )
2115  m_aParaAttrs.clear();
2116 }
2117 
2118 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2119  bool& rScriptDependent,
2120  sal_uInt16& rScriptType )
2121 {
2122  switch( rAttr.GetItem().Which() )
2123  {
2124  case RES_CHRATR_FONT:
2125  case RES_CHRATR_FONTSIZE:
2126  case RES_CHRATR_LANGUAGE:
2127  case RES_CHRATR_POSTURE:
2128  case RES_CHRATR_WEIGHT:
2129  rScriptType = i18n::ScriptType::LATIN;
2130  rScriptDependent = true;
2131  break;
2132  case RES_CHRATR_CJK_FONT:
2136  case RES_CHRATR_CJK_WEIGHT:
2137  rScriptType = i18n::ScriptType::ASIAN;
2138  rScriptDependent = true;
2139  break;
2140  case RES_CHRATR_CTL_FONT:
2144  case RES_CHRATR_CTL_WEIGHT:
2145  rScriptType = i18n::ScriptType::COMPLEX;
2146  rScriptDependent = true;
2147  break;
2148  default:
2149  rScriptDependent = false;
2150  break;
2151  }
2152 }
2153 
2154 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2155 {
2156  // A hard line break at the end always must be removed.
2157  // A second one we replace with paragraph spacing.
2158  sal_Int32 nLFStripped = StripTrailingLF();
2159  if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2160  eMode = AM_SPACE;
2161 
2162  // the hard attributes of this paragraph will never be invalid again
2163  m_aParaAttrs.clear();
2164 
2165  SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2166  m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
2167 
2168  if (pTextNode)
2169  {
2170  const SvxULSpaceItem& rULSpace =
2171  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2172 
2173  bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2174  : rULSpace.GetLower() == 0;
2175 
2176  if( bChange )
2177  {
2178  const SvxULSpaceItem& rCollULSpace =
2179  pTextNode->GetAnyFormatColl().GetULSpace();
2180 
2181  bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2182  : rCollULSpace.GetLower() > 0;
2183 
2184  if( bMayReset &&
2185  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2186  {
2187  pTextNode->ResetAttr( RES_UL_SPACE );
2188  }
2189  else
2190  {
2191  pTextNode->SetAttr(
2192  SvxULSpaceItem( rULSpace.GetUpper(),
2193  AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2194  }
2195  }
2196  }
2197  m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2198 
2199  SwPosition aOldPos( *m_pPam->GetPoint() );
2200 
2201  bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2202 
2203  // split character attributes and maybe set none,
2204  // which are set for the whole paragraph
2205  const SwNodeIndex& rEndIdx = aOldPos.nNode;
2206  const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex();
2207  const SwPosition& rPos = *m_pPam->GetPoint();
2208 
2209  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2210  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2211  {
2212  HTMLAttr *pAttr = *pHTMLAttributes;
2213  if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2214  {
2215  bool bWholePara = false;
2216 
2217  while( pAttr )
2218  {
2219  HTMLAttr *pNext = pAttr->GetNext();
2220  if( pAttr->GetSttParaIdx() < rEndIdx.GetIndex() ||
2221  (!bWholePara &&
2222  pAttr->GetSttPara() == rEndIdx &&
2223  pAttr->GetSttCnt() != nEndCnt) )
2224  {
2225  bWholePara =
2226  pAttr->GetSttPara() == rEndIdx &&
2227  pAttr->GetSttCnt() == 0;
2228 
2229  sal_Int32 nStt = pAttr->m_nStartContent;
2230  bool bScript = false;
2231  sal_uInt16 nScriptItem;
2232  bool bInsert = true;
2233  lcl_swhtml_getItemInfo( *pAttr, bScript,
2234  nScriptItem );
2235  // set previous part
2236  if( bScript )
2237  {
2238  const SwTextNode *pTextNd =
2239  pAttr->GetSttPara().GetNode().GetTextNode();
2240  OSL_ENSURE( pTextNd, "No text node" );
2241  if( pTextNd )
2242  {
2243  const OUString& rText = pTextNd->GetText();
2244  sal_uInt16 nScriptText =
2245  g_pBreakIt->GetBreakIter()->getScriptType(
2246  rText, pAttr->GetSttCnt() );
2247  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2248  ->endOfScript( rText, nStt, nScriptText );
2249  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2250  {
2251  if( nScriptItem == nScriptText )
2252  {
2253  HTMLAttr *pSetAttr =
2254  pAttr->Clone( rEndIdx, nScriptEnd );
2255  pSetAttr->m_nStartContent = nStt;
2256  pSetAttr->ClearPrev();
2257  if( !pNext || bWholePara )
2258  {
2259  if (pSetAttr->m_bInsAtStart)
2260  m_aSetAttrTab.push_front( pSetAttr );
2261  else
2262  m_aSetAttrTab.push_back( pSetAttr );
2263  }
2264  else
2265  pNext->InsertPrev( pSetAttr );
2266  }
2267  nStt = nScriptEnd;
2268  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2269  rText, nStt );
2270  nScriptEnd = g_pBreakIt->GetBreakIter()
2271  ->endOfScript( rText, nStt, nScriptText );
2272  }
2273  bInsert = nScriptItem == nScriptText;
2274  }
2275  }
2276  if( bInsert )
2277  {
2278  HTMLAttr *pSetAttr =
2279  pAttr->Clone( rEndIdx, nEndCnt );
2280  pSetAttr->m_nStartContent = nStt;
2281 
2282  // When the attribute is for the whole paragraph, the outer
2283  // attributes aren't effective anymore. Hence it may not be inserted
2284  // in the Prev-List of an outer attribute, because that won't be
2285  // set. That leads to shifting when fields are used.
2286  if( !pNext || bWholePara )
2287  {
2288  if (pSetAttr->m_bInsAtStart)
2289  m_aSetAttrTab.push_front( pSetAttr );
2290  else
2291  m_aSetAttrTab.push_back( pSetAttr );
2292  }
2293  else
2294  pNext->InsertPrev( pSetAttr );
2295  }
2296  else
2297  {
2298  HTMLAttr *pPrev = pAttr->GetPrev();
2299  if( pPrev )
2300  {
2301  // the previous attributes must be set anyway
2302  if( !pNext || bWholePara )
2303  {
2304  if (pPrev->m_bInsAtStart)
2305  m_aSetAttrTab.push_front( pPrev );
2306  else
2307  m_aSetAttrTab.push_back( pPrev );
2308  }
2309  else
2310  pNext->InsertPrev( pPrev );
2311  }
2312  }
2313  pAttr->ClearPrev();
2314  }
2315 
2316  pAttr->SetStart( rPos );
2317  pAttr = pNext;
2318  }
2319  }
2320  }
2321 
2322  if( bUpdateNum )
2323  {
2324  if( GetNumInfo().GetDepth() )
2325  {
2326  sal_uInt8 nLvl = GetNumInfo().GetLevel();
2327  SetNodeNum( nLvl );
2328  }
2329  else
2331  }
2332 
2333  // We must set the attribute of the paragraph before now (because of JavaScript)
2334  SetAttr();
2335 
2336  // Now it is time to get rid of all script dependent hints that are
2337  // equal to the settings in the style
2338  SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode();
2339  OSL_ENSURE( pTextNd, "There is the txt node" );
2340  size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints())
2341  ? pTextNd->GetSwpHints().Count() : 0;
2342  if( nCntAttr )
2343  {
2344  // These are the end position of all script dependent hints.
2345  // If we find a hint that starts before the current end position,
2346  // we have to set it. If we find a hint that start behind or at
2347  // that position, we have to take the hint value into account.
2348  // If it is equal to the style, or in fact the paragraph value
2349  // for that hint, the hint is removed. Otherwise its end position
2350  // is remembered.
2351  sal_Int32 aEndPos[15] =
2352  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2353  SwpHints& rHints = pTextNd->GetSwpHints();
2354  for( size_t i=0; i < nCntAttr; i++ )
2355  {
2356  SwTextAttr *pHt = rHints.Get( i );
2357  sal_uInt16 nWhich = pHt->Which();
2358  sal_Int16 nIdx = 0;
2359  bool bFont = false;
2360  switch( nWhich )
2361  {
2362  case RES_CHRATR_FONT:
2363  nIdx = 0;
2364  bFont = true;
2365  break;
2366  case RES_CHRATR_FONTSIZE:
2367  nIdx = 1;
2368  break;
2369  case RES_CHRATR_LANGUAGE:
2370  nIdx = 2;
2371  break;
2372  case RES_CHRATR_POSTURE:
2373  nIdx = 3;
2374  break;
2375  case RES_CHRATR_WEIGHT:
2376  nIdx = 4;
2377  break;
2378  case RES_CHRATR_CJK_FONT:
2379  nIdx = 5;
2380  bFont = true;
2381  break;
2383  nIdx = 6;
2384  break;
2386  nIdx = 7;
2387  break;
2389  nIdx = 8;
2390  break;
2391  case RES_CHRATR_CJK_WEIGHT:
2392  nIdx = 9;
2393  break;
2394  case RES_CHRATR_CTL_FONT:
2395  nIdx = 10;
2396  bFont = true;
2397  break;
2399  nIdx = 11;
2400  break;
2402  nIdx = 12;
2403  break;
2405  nIdx = 13;
2406  break;
2407  case RES_CHRATR_CTL_WEIGHT:
2408  nIdx = 14;
2409  break;
2410  default:
2411  // Skip to next attribute
2412  continue;
2413  }
2414  const sal_Int32 nStt = pHt->GetStart();
2415  if( nStt >= aEndPos[nIdx] )
2416  {
2417  const SfxPoolItem& rItem =
2418  static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2419  if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2420  : rItem == pHt->GetAttr() )
2421  {
2422  // The hint is the same as set in the paragraph and
2423  // therefore, it can be deleted
2424  // CAUTION!!! This WILL delete the hint and it MAY
2425  // also delete the SwpHints!!! To avoid any trouble
2426  // we leave the loop immediately if this is the last
2427  // hint.
2428  pTextNd->DeleteAttribute( pHt );
2429  if( 1 == nCntAttr )
2430  break;
2431  i--;
2432  nCntAttr--;
2433  }
2434  else
2435  {
2436  // The hint is different. Therefore all hints within that
2437  // hint have to be ignored.
2438  aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2439  }
2440  }
2441  else
2442  {
2443  // The hint starts before another one ends.
2444  // The hint in this case is not deleted
2445  OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2446  "hints aren't nested properly!" );
2447  }
2448  }
2449  }
2450 
2451  if (!m_xTable && !--m_nParaCnt)
2452  Show();
2453 
2454  return bRet;
2455 }
2456 
2458 {
2459  //If it already has ParSpace, return
2460  if( !m_bNoParSpace )
2461  return;
2462 
2463  m_bNoParSpace = false;
2464 
2465  sal_uLong nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1;
2466 
2467  SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2468  if( !pTextNode )
2469  return;
2470 
2471  SvxULSpaceItem rULSpace =
2472  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2473  if( !rULSpace.GetLower() )
2474  {
2475  const SvxULSpaceItem& rCollULSpace =
2476  pTextNode->GetAnyFormatColl().GetULSpace();
2477  if( rCollULSpace.GetLower() &&
2478  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2479  {
2480  pTextNode->ResetAttr( RES_UL_SPACE );
2481  }
2482  else
2483  {
2484  //What I do here, is that I examine the attributes, and if
2485  //I find out, that it's CJK/CTL, then I set the paragraph space
2486  //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2487 
2488  bool bIsCJK = false;
2489  bool bIsCTL = false;
2490 
2491  const size_t nCntAttr = pTextNode->GetpSwpHints()
2492  ? pTextNode->GetSwpHints().Count() : 0;
2493 
2494  for(size_t i = 0; i < nCntAttr; ++i)
2495  {
2496  SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2497  sal_uInt16 const nWhich = pHt->Which();
2498  if (RES_CHRATR_CJK_FONT == nWhich ||
2499  RES_CHRATR_CJK_FONTSIZE == nWhich ||
2500  RES_CHRATR_CJK_LANGUAGE == nWhich ||
2501  RES_CHRATR_CJK_POSTURE == nWhich ||
2502  RES_CHRATR_CJK_WEIGHT == nWhich)
2503  {
2504  bIsCJK = true;
2505  break;
2506  }
2507  if (RES_CHRATR_CTL_FONT == nWhich ||
2508  RES_CHRATR_CTL_FONTSIZE == nWhich ||
2509  RES_CHRATR_CTL_LANGUAGE == nWhich ||
2510  RES_CHRATR_CTL_POSTURE == nWhich ||
2511  RES_CHRATR_CTL_WEIGHT == nWhich)
2512  {
2513  bIsCTL = true;
2514  break;
2515  }
2516  }
2517 
2518  if( bIsCTL )
2519  {
2520  pTextNode->SetAttr(
2522  }
2523  else if( bIsCJK )
2524  {
2525  pTextNode->SetAttr(
2527  } else {
2528  pTextNode->SetAttr(
2530  }
2531  }
2532  }
2533 }
2534 
2536 {
2537  // Here
2538  // - a EndAction is called, so the document is formatted
2539  // - a Reschedule is called,
2540  // - the own View-Shell is set again
2541  // - and a StartAction is called
2542 
2543  OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2544  SwViewShell *pOldVSh = CallEndAction();
2545 
2547 
2548  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2549  || 1 == m_xDoc->getReferenceCount() )
2550  {
2551  // was the import aborted by SFX?
2552  eState = SvParserState::Error;
2553  }
2554 
2555  // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2556  SwViewShell *pVSh = CallStartAction( pOldVSh );
2557 
2558  // is the current node not visible anymore, then we use a bigger increment
2559  if( pVSh )
2560  {
2562  ? 5 : 50;
2563  }
2564 }
2565 
2567 {
2568  // Here
2569  // - a Reschedule is called, so it can be scrolled
2570  // - the own View-Shell is set again
2571  // - a StartAction/EndAction is called, when there was scrolling.
2572 
2573  OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2574 
2575  // scroll bar
2576  if (m_xProgress)
2577  {
2578  m_xProgress->Update(rInput.Tell());
2580  }
2581  else
2582  {
2584 
2585  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2586  || 1 == m_xDoc->getReferenceCount() )
2587  // was the import aborted by SFX?
2588  eState = SvParserState::Error;
2589 
2591  if( pVSh && pVSh->HasInvalidRect() )
2592  {
2593  CallEndAction( false, false );
2594  CallStartAction( pVSh, false );
2595  }
2596  }
2597 }
2598 
2600 {
2601  OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2602 
2603  if( !pVSh || bChkPtr )
2604  {
2605 #if OSL_DEBUG_LEVEL > 0
2606  SwViewShell *pOldVSh = pVSh;
2607 #endif
2608  pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2609 #if OSL_DEBUG_LEVEL > 0
2610  OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2611  if( pOldVSh && !pVSh )
2612  pVSh = nullptr;
2613 #endif
2614  }
2615  m_pActionViewShell = pVSh;
2616 
2617  if( m_pActionViewShell )
2618  {
2619  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2620  static_cast<SwEditShell*>(m_pActionViewShell)->StartAction();
2621  else
2623  }
2624 
2625  return m_pActionViewShell;
2626 }
2627 
2628 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2629 {
2630  if( bChkPtr )
2631  {
2632  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2633  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2634  "CallEndAction: Who swapped the SwViewShell?" );
2635 #if OSL_DEBUG_LEVEL > 0
2636  if( m_pActionViewShell && !pVSh )
2637  pVSh = nullptr;
2638 #endif
2639  if( pVSh != m_pActionViewShell )
2640  m_pActionViewShell = nullptr;
2641  }
2642 
2643  if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2644  return m_pActionViewShell;
2645 
2646  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2647  {
2648  // Already scrolled?, then make sure that the view doesn't move!
2649  const bool bOldLock = m_pActionViewShell->IsViewLocked();
2650  m_pActionViewShell->LockView( true );
2651  const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev();
2653  static_cast<SwEditShell*>(m_pActionViewShell)->EndAction();
2654  m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev );
2655  m_pActionViewShell->LockView( bOldLock );
2656 
2657  // bChkJumpMark is only set when the object was also found
2658  if( m_bChkJumpMark )
2659  {
2660  const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2661  if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2663  GetMedium()->GetURLObject().GetMark() );
2664  m_bChkJumpMark = false;
2665  }
2666  }
2667  else
2669 
2670  // if the parser holds the last reference to the document, then we can
2671  // abort here and set an error.
2672  if( 1 == m_xDoc->getReferenceCount() )
2673  {
2674  eState = SvParserState::Error;
2675  }
2676 
2678  m_pActionViewShell = nullptr;
2679 
2680  return pVSh;
2681 }
2682 
2684 {
2685  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2686  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2687  "CheckActionViewShell: Who has swapped SwViewShell?" );
2688 #if OSL_DEBUG_LEVEL > 0
2689  if( m_pActionViewShell && !pVSh )
2690  pVSh = nullptr;
2691 #endif
2692  if( pVSh != m_pActionViewShell )
2693  m_pActionViewShell = nullptr;
2694 
2695  return m_pActionViewShell;
2696 }
2697 
2698 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2699  std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2700 {
2701  std::unique_ptr<SwPaM> pAttrPam( new SwPaM( *m_pPam->GetPoint() ) );
2702  const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode;
2703  const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
2704  HTMLAttr* pAttr;
2705  SwContentNode* pCNd;
2706 
2707  std::vector<std::unique_ptr<HTMLAttr>> aFields;
2708 
2709  for( auto n = m_aSetAttrTab.size(); n; )
2710  {
2711  pAttr = m_aSetAttrTab[ --n ];
2712  sal_uInt16 nWhich = pAttr->m_pItem->Which();
2713 
2714  sal_uLong nEndParaIdx = pAttr->GetEndParaIdx();
2715  bool bSetAttr;
2716  if( bChkEnd )
2717  {
2718  // Set character attribute with end early on, so set them still in
2719  // the current paragraph (because of JavaScript and various "chats"(?)).
2720  // This shouldn't be done for attributes which are used for
2721  // the whole paragraph, because they could be from a paragraph style
2722  // which can't be set. Because the attributes are inserted with
2723  // SETATTR_DONTREPLACE, they should be able to be set later.
2724  bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() &&
2725  (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) ||
2726  ( !pAttr->IsLikePara() &&
2727  nEndParaIdx == rEndIdx.GetIndex() &&
2728  pAttr->GetEndCnt() < nEndCnt &&
2729  (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2730  ( bBeforeTable &&
2731  nEndParaIdx == rEndIdx.GetIndex() &&
2732  !pAttr->GetEndCnt() );
2733  }
2734  else
2735  {
2736  // Attributes in body nodes array section shouldn't be set if we are in a
2737  // special nodes array section, but vice versa it's possible.
2738  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2739  bSetAttr = nEndParaIdx < rEndIdx.GetIndex() ||
2740  rEndIdx.GetIndex() > nEndOfIcons ||
2741  nEndParaIdx <= nEndOfIcons;
2742  }
2743 
2744  if( bSetAttr )
2745  {
2746  // The attribute shouldn't be in the list of temporary paragraph
2747  // attributes, because then it would be deleted.
2748  while( !m_aParaAttrs.empty() )
2749  {
2750  OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2751  "SetAttr: Attribute must not yet be set" );
2752  m_aParaAttrs.pop_back();
2753  }
2754 
2755  // then set it
2756  m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2757 
2758  while( pAttr )
2759  {
2760  HTMLAttr *pPrev = pAttr->GetPrev();
2761  if( !pAttr->m_bValid )
2762  {
2763  // invalid attributes can be deleted
2764  delete pAttr;
2765  pAttr = pPrev;
2766  continue;
2767  }
2768 
2769  pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2770  if( !pCNd )
2771  {
2772  // because of the awful deleting of nodes an index can also
2773  // point to an end node :-(
2774  if ( (pAttr->GetSttPara() == pAttr->GetEndPara()) &&
2775  !isTXTATR_NOEND(nWhich) )
2776  {
2777  // when the end index also points to the node, we don't
2778  // need to set attributes anymore, except if it's a text attribute.
2779  delete pAttr;
2780  pAttr = pPrev;
2781  continue;
2782  }
2783  pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2784  if( pCNd )
2785  pAttr->m_nStartContent = 0;
2786  else
2787  {
2788  OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2789  delete pAttr;
2790  pAttr = pPrev;
2791  continue;
2792  }
2793  }
2794  pAttrPam->GetPoint()->nNode = pAttr->m_nStartPara;
2795 
2796  // because of the deleting of BRs the start index can also
2797  // point behind the end the text
2798  if( pAttr->m_nStartContent > pCNd->Len() )
2799  pAttr->m_nStartContent = pCNd->Len();
2800  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent );
2801 
2802  pAttrPam->SetMark();
2803  if ( (pAttr->GetSttPara() != pAttr->GetEndPara()) &&
2804  !isTXTATR_NOEND(nWhich) )
2805  {
2806  pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2807  if( !pCNd )
2808  {
2809  pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2810  if( pCNd )
2811  pAttr->m_nEndContent = pCNd->Len();
2812  else
2813  {
2814  OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2815  pAttrPam->DeleteMark();
2816  delete pAttr;
2817  pAttr = pPrev;
2818  continue;
2819  }
2820  }
2821 
2822  pAttrPam->GetPoint()->nNode = pAttr->m_nEndPara;
2823  }
2824  else if( pAttr->IsLikePara() )
2825  {
2826  pAttr->m_nEndContent = pCNd->Len();
2827  }
2828 
2829  // because of the deleting of BRs the start index can also
2830  // point behind the end the text
2831  if( pAttr->m_nEndContent > pCNd->Len() )
2832  pAttr->m_nEndContent = pCNd->Len();
2833 
2834  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent );
2835  if( bBeforeTable &&
2836  pAttrPam->GetPoint()->nNode.GetIndex() ==
2837  rEndIdx.GetIndex() )
2838  {
2839  // If we're before inserting a table and the attribute ends
2840  // in the current node, then we must end it in the previous
2841  // node or discard it, if it starts in that node.
2842  if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2843  !isTXTATR_NOEND(nWhich) )
2844  {
2845  if( pAttrPam->GetMark()->nNode.GetIndex() !=
2846  rEndIdx.GetIndex() )
2847  {
2848  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
2849  "Content-Position before table not 0???" );
2850  pAttrPam->Move( fnMoveBackward );
2851  }
2852  else
2853  {
2854  pAttrPam->DeleteMark();
2855  delete pAttr;
2856  pAttr = pPrev;
2857  continue;
2858  }
2859  }
2860  }
2861 
2862  switch( nWhich )
2863  {
2864  case RES_FLTR_BOOKMARK: // insert bookmark
2865  {
2866  const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2867  IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2868  IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2869  if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2870  (*ppBkmk)->GetMarkStart() == *pAttrPam->GetPoint() )
2871  break; // do not generate duplicates on this position
2872  pAttrPam->DeleteMark();
2873  const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2874  *pAttrPam,
2875  sName,
2878 
2879  // jump to bookmark
2880  if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2881  {
2882  m_bChkJumpMark = true;
2884  }
2885  }
2886  break;
2887  case RES_TXTATR_FIELD:
2888  case RES_TXTATR_ANNOTATION:
2889  case RES_TXTATR_INPUTFIELD:
2890  {
2891  SwFieldIds nFieldWhich =
2892  pPostIts
2893  ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2895  if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2896  SwFieldIds::Script == nFieldWhich) )
2897  {
2898  pPostIts->emplace_front( pAttr );
2899  }
2900  else
2901  {
2902  aFields.emplace_back( pAttr);
2903  }
2904  }
2905  pAttrPam->DeleteMark();
2906  pAttr = pPrev;
2907  continue;
2908 
2909  case RES_LR_SPACE:
2910  if( pAttrPam->GetPoint()->nNode.GetIndex() ==
2911  pAttrPam->GetMark()->nNode.GetIndex())
2912  {
2913  // because of numbering set this attribute directly at node
2914  pCNd->SetAttr( *pAttr->m_pItem );
2915  break;
2916  }
2917  OSL_ENSURE( false,
2918  "LRSpace set over multiple paragraphs!" );
2919  [[fallthrough]]; // (shouldn't reach this point anyway)
2920 
2921  // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2922  // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2923  // This is the right place in the future if the adapted fill attributes
2924  // may be handled more directly in HTML import to handle them.
2925  case RES_BACKGROUND:
2926  {
2927  const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2929 
2931  m_xDoc->getIDocumentContentOperations().InsertItemSet(*pAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2932  break;
2933  }
2934  default:
2935 
2936  // maybe jump to a bookmark
2937  if( RES_TXTATR_INETFMT == nWhich &&
2939  m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2940  {
2941  m_bChkJumpMark = true;
2943  }
2944 
2945  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2946  }
2947  pAttrPam->DeleteMark();
2948 
2949  delete pAttr;
2950  pAttr = pPrev;
2951  }
2952  }
2953  }
2954 
2955  for( auto n = m_aMoveFlyFrames.size(); n; )
2956  {
2957  SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[ --n ];
2958 
2959  const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
2960  OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
2961  "Only At-Para flys need special handling" );
2962  const SwPosition *pFlyPos = rAnchor.GetContentAnchor();
2963  sal_uLong nFlyParaIdx = pFlyPos->nNode.GetIndex();
2964  bool bMoveFly;
2965  if( bChkEnd )
2966  {
2967  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2968  ( nFlyParaIdx == rEndIdx.GetIndex() &&
2969  m_aMoveFlyCnts[n] < nEndCnt );
2970  }
2971  else
2972  {
2973  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2974  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2975  rEndIdx.GetIndex() > nEndOfIcons ||
2976  nFlyParaIdx <= nEndOfIcons;
2977  }
2978  if( bMoveFly )
2979  {
2980  pFrameFormat->DelFrames();
2981  *pAttrPam->GetPoint() = *pFlyPos;
2982  pAttrPam->GetPoint()->nContent.Assign( pAttrPam->GetContentNode(),
2983  m_aMoveFlyCnts[n] );
2984  SwFormatAnchor aAnchor( rAnchor );
2985  aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
2986  aAnchor.SetAnchor( pAttrPam->GetPoint() );
2987  pFrameFormat->SetFormatAttr( aAnchor );
2988 
2989  const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
2990  if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
2991  {
2992  SwFormatHoriOrient aHoriOri( rHoriOri );
2993  aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
2994  pFrameFormat->SetFormatAttr( aHoriOri );
2995  }
2996  const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
2997  if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
2998  {
2999  SwFormatVertOrient aVertOri( rVertOri );
3000  aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
3001  pFrameFormat->SetFormatAttr( aVertOri );
3002  }
3003 
3004  pFrameFormat->MakeFrames();
3005  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
3006  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3007  }
3008  }
3009  for (auto & field : aFields)
3010  {
3011  pCNd = field->m_nStartPara.GetNode().GetContentNode();
3012  pAttrPam->GetPoint()->nNode = field->m_nStartPara;
3013  pAttrPam->GetPoint()->nContent.Assign( pCNd, field->m_nStartContent );
3014 
3015  if( bBeforeTable &&
3016  pAttrPam->GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() )
3017  {
3018  OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3019  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
3020  "Content-Position before table not 0???" );
3021  // !!!
3022  pAttrPam->Move( fnMoveBackward );
3023  }
3024 
3025  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *field->m_pItem );
3026 
3027  field.reset();
3028  }
3029  aFields.clear();
3030 }
3031 
3032 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3033 {
3034  // Font height and font colour as well as escape attributes may not be
3035  // combined. Therefore they're saved in a list and in it the last opened
3036  // attribute is at the beginning and count is always one. For all other
3037  // attributes count is just incremented.
3038  if( *ppAttr )
3039  {
3040  HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3041  pAttr->InsertNext( *ppAttr );
3042  (*ppAttr) = pAttr;
3043  }
3044  else
3045  (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3046 }
3047 
3048 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3049 {
3050  bool bRet = true;
3051 
3052  // The list header is saved in the attribute.
3053  HTMLAttr **ppHead = pAttr->m_ppHead;
3054 
3055  OSL_ENSURE( ppHead, "No list header attribute found!" );
3056 
3057  // save the current position as end position
3058  const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode;
3059  sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3060 
3061  // Is the last started or an earlier started attribute being ended?
3062  HTMLAttr *pLast = nullptr;
3063  if( ppHead && pAttr != *ppHead )
3064  {
3065  // The last started attribute isn't being ended
3066 
3067  // Then we look for attribute which was started immediately afterwards,
3068  // which has also not yet been ended (otherwise it would no longer be
3069  // in the list).
3070  pLast = *ppHead;
3071  while( pLast && pLast->GetNext() != pAttr )
3072  pLast = pLast->GetNext();
3073 
3074  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3075  }
3076 
3077  bool bMoveBack = false;
3078  sal_uInt16 nWhich = pAttr->m_pItem->Which();
3079  if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3080  *pEndIdx != pAttr->GetSttPara() )
3081  {
3082  // Then move back one position in the content!
3083  bMoveBack = m_pPam->Move( fnMoveBackward );
3084  nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3085  }
3086 
3087  // now end the attribute
3088  HTMLAttr *pNext = pAttr->GetNext();
3089 
3090  bool bInsert;
3091  sal_uInt16 nScriptItem = 0;
3092  bool bScript = false;
3093  // does it have a non-empty range?
3094  if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3095  RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3096  *pEndIdx != pAttr->GetSttPara() ||
3097  nEndCnt != pAttr->GetSttCnt() )
3098  {
3099  bInsert = true;
3100  // We do some optimization for script dependent attributes here.
3101  if( *pEndIdx == pAttr->GetSttPara() )
3102  {
3103  lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3104  }
3105  }
3106  else
3107  {
3108  bInsert = false;
3109  }
3110 
3111  const SwTextNode *pTextNd = (bInsert && bScript) ?
3112  pAttr->GetSttPara().GetNode().GetTextNode() :
3113  nullptr;
3114 
3115  if (pTextNd)
3116  {
3117  const OUString& rText = pTextNd->GetText();
3118  sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3119  rText, pAttr->GetSttCnt() );
3120  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3121  ->endOfScript( rText, pAttr->GetSttCnt(), nScriptText );
3122  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3123  {
3124  if( nScriptItem == nScriptText )
3125  {
3126  HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd );
3127  pSetAttr->ClearPrev();
3128  if( pNext )
3129  pNext->InsertPrev( pSetAttr );
3130  else
3131  {
3132  if (pSetAttr->m_bInsAtStart)
3133  m_aSetAttrTab.push_front( pSetAttr );
3134  else
3135  m_aSetAttrTab.push_back( pSetAttr );
3136  }
3137  }
3138  pAttr->m_nStartContent = nScriptEnd;
3139  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3140  rText, nScriptEnd );
3141  nScriptEnd = g_pBreakIt->GetBreakIter()
3142  ->endOfScript( rText, nScriptEnd, nScriptText );
3143  }
3144  bInsert = nScriptItem == nScriptText;
3145  }
3146  if( bInsert )
3147  {
3148  pAttr->m_nEndPara = *pEndIdx;
3149  pAttr->m_nEndContent = nEndCnt;
3150  pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3151  RES_TXTATR_CHARFMT != nWhich;
3152 
3153  if( !pNext )
3154  {
3155  // No open attributes of that type exists any longer, so all
3156  // can be set. Except they depend on another attribute, then
3157  // they're appended there.
3158  if (pAttr->m_bInsAtStart)
3159  m_aSetAttrTab.push_front( pAttr );
3160  else
3161  m_aSetAttrTab.push_back( pAttr );
3162  }
3163  else
3164  {
3165  // There are other open attributes of that type,
3166  // therefore the setting must be postponed.
3167  // Hence the current attribute is added at the end
3168  // of the Prev-List of the successor.
3169  pNext->InsertPrev( pAttr );
3170  }
3171  }
3172  else
3173  {
3174  // Then don't insert, but delete. Because of the "faking" of styles
3175  // by hard attributing there can be also other empty attributes in the
3176  // Prev-List, which must be set anyway.
3177  HTMLAttr *pPrev = pAttr->GetPrev();
3178  bRet = false;
3179  delete pAttr;
3180 
3181  if( pPrev )
3182  {
3183  // The previous attributes must be set anyway.
3184  if( pNext )
3185  pNext->InsertPrev( pPrev );
3186  else
3187  {
3188  if (pPrev->m_bInsAtStart)
3189  m_aSetAttrTab.push_front( pPrev );
3190  else
3191  m_aSetAttrTab.push_back( pPrev );
3192  }
3193  }
3194 
3195  }
3196 
3197  // If the first attribute of the list was set, then the list header
3198  // must be corrected as well.
3199  if( pLast )
3200  pLast->m_pNext = pNext;
3201  else if( ppHead )
3202  *ppHead = pNext;
3203 
3204  if( bMoveBack )
3206 
3207  return bRet;
3208 }
3209 
3211 {
3212  // preliminary paragraph attributes are not allowed here, they could
3213  // be set here and then the pointers become invalid!
3214  OSL_ENSURE(m_aParaAttrs.empty(),
3215  "Danger: there are non-final paragraph attributes");
3216  m_aParaAttrs.clear();
3217 
3218  // The list header is saved in the attribute
3219  HTMLAttr **ppHead = pAttr->m_ppHead;
3220 
3221  OSL_ENSURE( ppHead, "no list header attribute found!" );
3222 
3223  // Is the last started or an earlier started attribute being removed?
3224  HTMLAttr *pLast = nullptr;
3225  if( ppHead && pAttr != *ppHead )
3226  {
3227  // The last started attribute isn't being ended
3228 
3229  // Then we look for attribute which was started immediately afterwards,
3230  // which has also not yet been ended (otherwise it would no longer be
3231  // in the list).
3232  pLast = *ppHead;
3233  while( pLast && pLast->GetNext() != pAttr )
3234  pLast = pLast->GetNext();
3235 
3236  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3237  }
3238 
3239  // now delete the attribute
3240  HTMLAttr *pNext = pAttr->GetNext();
3241  HTMLAttr *pPrev = pAttr->GetPrev();
3242  //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3243  std::shared_ptr<HTMLAttrTable> xAttrTab(pAttr->m_xAttrTab);
3244  delete pAttr;
3245 
3246  if( pPrev )
3247  {
3248  // The previous attributes must be set anyway.
3249  if( pNext )
3250  pNext->InsertPrev( pPrev );
3251  else
3252  {
3253  if (pPrev->m_bInsAtStart)
3254  m_aSetAttrTab.push_front( pPrev );
3255  else
3256  m_aSetAttrTab.push_back( pPrev );
3257  }
3258  }
3259 
3260  // If the first attribute of the list was deleted, then the list header
3261  // must be corrected as well.
3262  if( pLast )
3263  pLast->m_pNext = pNext;
3264  else if( ppHead )
3265  *ppHead = pNext;
3266 }
3267 
3268 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3269 {
3270  // preliminary paragraph attributes are not allowed here, they could
3271  // be set here and then the pointers become invalid!
3272  OSL_ENSURE(m_aParaAttrs.empty(),
3273  "Danger: there are non-final paragraph attributes");
3274  m_aParaAttrs.clear();
3275 
3276  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3277  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3278 
3279  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3280  {
3281  *pSaveAttributes = *pHTMLAttributes;
3282 
3283  HTMLAttr *pAttr = *pSaveAttributes;
3284  while (pAttr)
3285  {
3286  pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3287  pAttr = pAttr->GetNext();
3288  }
3289 
3290  *pHTMLAttributes = nullptr;
3291  }
3292 }
3293 
3294 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3295  bool bMoveEndBack )
3296 {
3297  // preliminary paragraph attributes are not allowed here, they could
3298  // be set here and then the pointers become invalid!
3299  OSL_ENSURE(m_aParaAttrs.empty(),
3300  "Danger: there are non-final paragraph attributes");
3301  m_aParaAttrs.clear();
3302 
3303  const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode;
3304  SwNodeIndex nEndIdx( nSttIdx );
3305 
3306  // close all still open attributes and re-open them after the table
3307  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3308  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3309  bool bSetAttr = true;
3310  const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex();
3311  sal_Int32 nEndCnt = nSttCnt;
3312 
3313  if( bMoveEndBack )
3314  {
3315  sal_uLong nOldEnd = nEndIdx.GetIndex();
3316  sal_uLong nTmpIdx;
3317  if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3318  ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3319  {
3320  nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3321  }
3322  SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3323 
3324  // Don't set attributes, when the PaM was moved outside of the content area.
3325  bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3326 
3327  nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3328  }
3329  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3330  {
3331  HTMLAttr *pAttr = *pHTMLAttributes;
3332  *pSaveAttributes = nullptr;
3333  while( pAttr )
3334  {
3335  HTMLAttr *pNext = pAttr->GetNext();
3336  HTMLAttr *pPrev = pAttr->GetPrev();
3337 
3338  if( bSetAttr &&
3339  ( pAttr->GetSttParaIdx() < nEndIdx.GetIndex() ||
3340  (pAttr->GetSttPara() == nEndIdx &&
3341  pAttr->GetSttCnt() != nEndCnt) ) )
3342  {
3343  // The attribute must be set before the list. We need the
3344  // original and therefore we clone it, because pointer to the
3345  // attribute exist in the other contexts. The Next-List is lost
3346  // in doing so, but the Previous-List is preserved.
3347  HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt );
3348 
3349  if( pNext )
3350  pNext->InsertPrev( pSetAttr );
3351  else
3352  {
3353  if (pSetAttr->m_bInsAtStart)
3354  m_aSetAttrTab.push_front( pSetAttr );
3355  else
3356  m_aSetAttrTab.push_back( pSetAttr );
3357  }
3358  }
3359  else if( pPrev )
3360  {
3361  // If the attribute doesn't need to be set before the table, then
3362  // the previous attributes must still be set.
3363  if( pNext )
3364  pNext->InsertPrev( pPrev );
3365  else
3366  {
3367  if (pPrev->m_bInsAtStart)
3368  m_aSetAttrTab.push_front( pPrev );
3369  else
3370  m_aSetAttrTab.push_back( pPrev );
3371  }
3372  }
3373 
3374  // set the start of the attribute anew and break link
3375  pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab);
3376 
3377  if (*pSaveAttributes)
3378  {
3379  HTMLAttr *pSAttr = *pSaveAttributes;
3380  while( pSAttr->GetNext() )
3381  pSAttr = pSAttr->GetNext();
3382  pSAttr->InsertNext( pAttr );
3383  }
3384  else
3385  *pSaveAttributes = pAttr;
3386 
3387  pAttr = pNext;
3388  }
3389 
3390  *pHTMLAttributes = nullptr;
3391  }
3392 }
3393 
3394 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3395 {
3396  // preliminary paragraph attributes are not allowed here, they could
3397  // be set here and then the pointers become invalid!
3398  OSL_ENSURE(m_aParaAttrs.empty(),
3399  "Danger: there are non-final paragraph attributes");
3400  m_aParaAttrs.clear();
3401 
3402  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3403  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3404 
3405  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3406  {
3407  OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3408 
3409  *pHTMLAttributes = *pSaveAttributes;
3410 
3411  HTMLAttr *pAttr = *pHTMLAttributes;
3412  while (pAttr)
3413  {
3414  OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3415  "Previous attribute has still a header" );
3416  pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3417  pAttr = pAttr->GetNext();
3418  }
3419 
3420  *pSaveAttributes = nullptr;
3421  }
3422 }
3423 
3424 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3425 {
3426  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3427  if (bInsAtStart)
3428  m_aSetAttrTab.push_front( pTmp );
3429  else
3430  m_aSetAttrTab.push_back( pTmp );
3431 }
3432 
3433 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3434 {
3435  while( !rAttrs.empty() )
3436  {
3437  std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3438  InsertAttr( pAttr->GetItem(), false );
3439  rAttrs.pop_front();
3440  }
3441 }
3442 
3444 {
3445  OUString aId, aStyle, aLang, aDir;
3446  OUString aClass;
3447 
3448  const HTMLOptions& rHTMLOptions = GetOptions();
3449  for (size_t i = rHTMLOptions.size(); i; )
3450  {
3451  const HTMLOption& rOption = rHTMLOptions[--i];
3452  switch( rOption.GetToken() )
3453  {
3454  case HtmlOptionId::ID:
3455  aId = rOption.GetString();
3456  break;
3457  case HtmlOptionId::STYLE:
3458  aStyle = rOption.GetString();
3459  break;
3460  case HtmlOptionId::CLASS:
3461  aClass = rOption.GetString();
3462  break;
3463  case HtmlOptionId::LANG:
3464  aLang = rOption.GetString();
3465  break;
3466  case HtmlOptionId::DIR:
3467  aDir = rOption.GetString();
3468  break;
3469  default: break;
3470  }
3471  }
3472 
3473  // create a new context
3474  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3475 
3476  // parse styles
3477  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3478  {
3479  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3480  SvxCSS1PropertyInfo aPropInfo;
3481 
3482  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3483  {
3484  if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3485  !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3486  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3487  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3488  }
3489  }
3490 
3491  // save the context
3492  PushContext(xCntxt);
3493 }
3494 
3496  HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3497  HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3498  HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3499 {
3500  OUString aId, aStyle, aClass, aLang, aDir;
3501 
3502  const HTMLOptions& rHTMLOptions = GetOptions();
3503  for (size_t i = rHTMLOptions.size(); i; )
3504  {
3505  const HTMLOption& rOption = rHTMLOptions[--i];
3506  switch( rOption.GetToken() )
3507  {
3508  case HtmlOptionId::ID:
3509  aId = rOption.GetString();
3510  break;
3511  case HtmlOptionId::STYLE:
3512  aStyle = rOption.GetString();
3513  break;
3514  case HtmlOptionId::CLASS:
3515  aClass = rOption.GetString();
3516  break;
3517  case HtmlOptionId::LANG:
3518  aLang = rOption.GetString();
3519  break;
3520  case HtmlOptionId::DIR:
3521  aDir = rOption.GetString();
3522  break;
3523  default: break;
3524  }
3525  }
3526 
3527  // create a new context
3528  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3529 
3530  // parse styles
3531  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3532  {
3533  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3534  SvxCSS1PropertyInfo aPropInfo;
3535 
3536  aItemSet.Put( rItem );
3537  if( pItem2 )
3538  aItemSet.Put( *pItem2 );
3539  if( pItem3 )
3540  aItemSet.Put( *pItem3 );
3541 
3542  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3543  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3544 
3545  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3546  }
3547  else
3548  {
3549  InsertAttr( ppAttr ,rItem, xCntxt.get() );
3550  if( pItem2 )
3551  {
3552  OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3553  InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3554  }
3555  if( pItem3 )
3556  {
3557  OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3558  InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3559  }
3560  }
3561 
3562  // save the context
3563  PushContext(xCntxt);
3564 }
3565 
3567 {
3568  // fetch context
3569  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3570  if (xCntxt)
3571  {
3572  // and maybe end the attributes
3573  EndContext(xCntxt.get());
3574  }
3575 }
3576 
3578 {
3579  OUString aId, aStyle, aClass, aLang, aDir;
3580  sal_uInt16 nSize = 3;
3581 
3582  const HTMLOptions& rHTMLOptions = GetOptions();
3583  for (size_t i = rHTMLOptions.size(); i; )
3584  {
3585  const HTMLOption& rOption = rHTMLOptions[--i];
3586  switch( rOption.GetToken() )
3587  {
3588  case HtmlOptionId::SIZE:
3589  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
3590  break;
3591  case HtmlOptionId::ID:
3592  aId = rOption.GetString();
3593  break;
3594  case HtmlOptionId::STYLE:
3595  aStyle = rOption.GetString();
3596  break;
3597  case HtmlOptionId::CLASS:
3598  aClass = rOption.GetString();
3599  break;
3600  case HtmlOptionId::LANG:
3601  aLang = rOption.GetString();
3602  break;
3603  case HtmlOptionId::DIR:
3604  aDir = rOption.GetString();
3605  break;
3606  default: break;
3607  }
3608  }
3609 
3610  if( nSize < 1 )
3611  nSize = 1;
3612 
3613  if( nSize > 7 )
3614  nSize = 7;
3615 
3616  // create a new context
3617  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3618 
3619  // parse styles
3620  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3621  {
3622  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3623  SvxCSS1PropertyInfo aPropInfo;
3624 
3625  //CJK has different defaults
3626  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3627  aItemSet.Put( aFontHeight );
3628  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3629  aItemSet.Put( aFontHeightCJK );
3630  //Complex type can contain so many types of letters,
3631  //that it's not really worthy to bother, IMO.
3632  //Still, I have set a default.
3633  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3634  aItemSet.Put( aFontHeightCTL );
3635 
3636  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3637  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3638 
3639  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3640  }
3641  else
3642  {
3643  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3644  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3645  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3646  InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3647  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3648  InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3649  }
3650 
3651  // save the context
3652  PushContext(xCntxt);
3653 
3654  // save the font size
3655  m_aBaseFontStack.push_back( nSize );
3656 }
3657 
3659 {
3660  EndTag( HtmlTokenId::BASEFONT_ON );
3661 
3662  // avoid stack underflow in tables
3663  if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3664  m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3665 }
3666 
3668 {
3669  sal_uInt16 nBaseSize =
3672  : 3 );
3673  sal_uInt16 nFontSize =
3674  ( m_aFontStack.size() > m_nFontStMin
3675  ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3676  : nBaseSize );
3677 
3678  OUString aFace, aId, aStyle, aClass, aLang, aDir;
3679  Color aColor;
3680  sal_uLong nFontHeight = 0; // actual font height to set
3681  sal_uInt16 nSize = 0; // font height in Netscape notation (1-7)
3682  bool bColor = false;
3683 
3684  const HTMLOptions& rHTMLOptions = GetOptions();
3685  for (size_t i = rHTMLOptions.size(); i; )
3686  {
3687  const HTMLOption& rOption = rHTMLOptions[--i];
3688  switch( rOption.GetToken() )
3689  {
3690  case HtmlOptionId::SIZE:
3691  if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3692  {
3693  sal_Int32 nSSize;
3694  if( '+' == rOption.GetString()[0] ||
3695  '-' == rOption.GetString()[0] )
3696  nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3697  else
3698  nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3699 
3700  if( nSSize < 1 )
3701  nSSize = 1;
3702  else if( nSSize > 7 )
3703  nSSize = 7;
3704 
3705  nSize = static_cast<sal_uInt16>(nSSize);
3706  nFontHeight = m_aFontHeights[nSize-1];
3707  }
3708  break;
3709  case HtmlOptionId::COLOR:
3710  if( HtmlTokenId::FONT_ON==nToken )
3711  {
3712  rOption.GetColor( aColor );
3713  bColor = true;
3714  }
3715  break;
3716  case HtmlOptionId::FACE:
3717  if( HtmlTokenId::FONT_ON==nToken )
3718  aFace = rOption.GetString();
3719  break;
3720  case HtmlOptionId::ID:
3721  aId = rOption.GetString();
3722  break;
3723  case HtmlOptionId::STYLE:
3724  aStyle = rOption.GetString();
3725  break;
3726  case HtmlOptionId::CLASS:
3727  aClass = rOption.GetString();
3728  break;
3729  case HtmlOptionId::LANG:
3730  aLang = rOption.GetString();
3731  break;
3732  case HtmlOptionId::DIR:
3733  aDir = rOption.GetString();
3734  break;
3735  default: break;
3736  }
3737  }
3738 
3739  if( HtmlTokenId::FONT_ON != nToken )
3740  {
3741  // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3742 
3743  // In headings the current heading sets the font height
3744  // and not BASEFONT.
3745  const SwFormatColl *pColl = GetCurrFormatColl();
3746  sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3747  if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3748  nPoolId<=RES_POOLCOLL_HEADLINE6 )
3749  {
3750  // If the font height in the heading wasn't changed yet,
3751  // then take the one from the style.
3752  if( m_nFontStHeadStart==m_aFontStack.size() )
3753  nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3754  }
3755  else
3756  nPoolId = 0;
3757 
3758  if( HtmlTokenId::BIGPRINT_ON == nToken )
3759  nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3760  else
3761  nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3762 
3763  // If possible in headlines we fetch the new font height
3764  // from the style.
3765  if( nPoolId && nSize>=1 && nSize <=6 )
3766  nFontHeight =
3767  m_pCSS1Parser->GetTextCollFromPool(
3768  RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3769  else
3770  nFontHeight = m_aFontHeights[nSize-1];
3771  }
3772 
3773  OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3774 
3775  OUString aFontName, aStyleName;
3776  FontFamily eFamily = FAMILY_DONTKNOW; // family and pitch,
3777  FontPitch ePitch = PITCH_DONTKNOW; // if not found
3778  rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3779 
3780  if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3781  {
3782  const FontList *pFList = nullptr;
3783  SwDocShell *pDocSh = m_xDoc->GetDocShell();
3784  if( pDocSh )
3785  {
3786  const SvxFontListItem *pFListItem =
3787  static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3788  if( pFListItem )
3789  pFList = pFListItem->GetFontList();
3790  }
3791 
3792  bool bFound = false;
3793  sal_Int32 nStrPos = 0;
3794  while( nStrPos!= -1 )
3795  {
3796  OUString aFName = aFace.getToken( 0, ',', nStrPos );
3797  aFName = comphelper::string::strip(aFName, ' ');
3798  if( !aFName.isEmpty() )
3799  {
3800  if( !bFound && pFList )
3801  {
3802  sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3803  if( nullptr != hFont )
3804  {
3805  const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3806  if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3807  {
3808  bFound = true;
3809  if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3810  eEnc = RTL_TEXTENCODING_SYMBOL;
3811  }
3812  }
3813  }
3814  if( !aFontName.isEmpty() )
3815  aFontName += ";";
3816  aFontName += aFName;
3817  }
3818  }
3819  }
3820 
3821  // create a new context
3822  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3823 
3824  // parse styles
3825  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3826  {
3827  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3828  SvxCSS1PropertyInfo aPropInfo;
3829 
3830  if( nFontHeight )
3831  {
3832  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3833  aItemSet.Put( aFontHeight );
3834  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3835  aItemSet.Put( aFontHeightCJK );
3836  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3837  aItemSet.Put( aFontHeightCTL );
3838  }
3839  if( bColor )
3840  aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3841  if( !aFontName.isEmpty() )
3842  {
3843  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3844  aItemSet.Put( aFont );
3845  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3846  aItemSet.Put( aFontCJK );
3847  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3848  aItemSet.Put( aFontCTL );
3849  }
3850 
3851  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3852  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3853 
3854  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3855  }
3856  else
3857  {
3858  if( nFontHeight )
3859  {
3860  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3861  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3862  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3863  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3864  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3865  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3866  }
3867  if( bColor )
3868  InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3869  if( !aFontName.isEmpty() )
3870  {
3871  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3872  InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3873  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3874  InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3875  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3876  InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3877  }
3878  }
3879 
3880  // save the context
3881  PushContext(xCntxt);
3882 
3883  m_aFontStack.push_back( nSize );
3884 }
3885 
3887 {
3888  EndTag( nToken );
3889 
3890  // avoid stack underflow in tables
3891  if( m_aFontStack.size() > m_nFontStMin )
3892  m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3893 }
3894 
3896 {
3897  if( m_pPam->GetPoint()->nContent.GetIndex() )
3899  else
3900  AddParSpace();
3901 
3902  m_eParaAdjust = SvxAdjust::End;
3903  OUString aId, aStyle, aClass, aLang, aDir;
3904 
3905  const HTMLOptions& rHTMLOptions = GetOptions();
3906  for (size_t i = rHTMLOptions.size(); i; )
3907  {
3908  const HTMLOption& rOption = rHTMLOptions[--i];
3909  switch( rOption.GetToken() )
3910  {
3911  case HtmlOptionId::ID:
3912  aId = rOption.GetString();
3913  break;
3914  case HtmlOptionId::ALIGN:
3915  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3916  break;
3917  case HtmlOptionId::STYLE:
3918  aStyle = rOption.GetString();
3919  break;
3920  case HtmlOptionId::CLASS:
3921  aClass = rOption.GetString();
3922  break;
3923  case HtmlOptionId::LANG:
3924  aLang = rOption.GetString();
3925  break;
3926  case HtmlOptionId::DIR:
3927  aDir = rOption.GetString();
3928  break;
3929  default: break;
3930  }
3931  }
3932 
3933  // create a new context
3934  std::unique_ptr<HTMLAttrContext> xCntxt(
3935  !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3936  RES_POOLCOLL_TEXT, aClass )
3937  : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3938 
3939  // parse styles (Don't consider class. This is only possible as long as none of
3940  // the CSS1 properties of the class must be formatted hard!!!)
3941  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
3942  {
3943  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3944  SvxCSS1PropertyInfo aPropInfo;
3945 
3946  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
3947  {
3948  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
3949  "Class is not considered" );
3950  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3951  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
3952  }
3953  }
3954 
3955  if( SvxAdjust::End != m_eParaAdjust )
3956  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
3957 
3958  // and push on stack
3959  PushContext( xCntxt );
3960 
3961  // set the current style or its attributes
3962  SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
3963 
3964  // progress bar
3965  ShowStatline();
3966 
3967  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now an open paragraph element will be lost." );
3968  m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
3969 }
3970 
3971 void SwHTMLParser::EndPara( bool bReal )
3972 {
3973  if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
3974  {
3975 #if OSL_DEBUG_LEVEL > 0
3976  const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule();
3977  OSL_ENSURE( pNumRule, "Where is the NumRule" );
3978 #endif
3979  }
3980 
3981  // Netscape skips empty paragraphs, we do the same.
3982  if( bReal )
3983  {
3984  if( m_pPam->GetPoint()->nContent.GetIndex() )
3986  else
3987  AddParSpace();
3988  }
3989 
3990  // If a DD or DT was open, it's an implied definition list,
3991  // which must be closed now.
3992  if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
3994  {
3995  m_nDefListDeep--;
3996  }
3997 
3998  // Pop the context of the stack. It can also be from an
3999  // implied opened definition list.
4000  std::unique_ptr<HTMLAttrContext> xCntxt(
4001  PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
4002 
4003  // close attribute
4004  if (xCntxt)
4005  {
4006  EndContext(xCntxt.get());
4007  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4008  xCntxt.reset();
4009  }
4010 
4011  // reset the existing style
4012  if( bReal )
4013  SetTextCollAttrs();
4014 
4015  m_nOpenParaToken = HtmlTokenId::NONE;
4016 }
4017 
4019 {
4020  m_eParaAdjust = SvxAdjust::End;
4021 
4022  OUString aId, aStyle, aClass, aLang, aDir;
4023 
4024  const HTMLOptions& rHTMLOptions = GetOptions();
4025  for (size_t i = rHTMLOptions.size(); i; )
4026  {
4027  const HTMLOption& rOption = rHTMLOptions[--i];
4028  switch( rOption.GetToken() )
4029  {
4030  case HtmlOptionId::ID:
4031  aId = rOption.GetString();
4032  break;
4033  case HtmlOptionId::ALIGN:
4034  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4035  break;
4036  case HtmlOptionId::STYLE:
4037  aStyle = rOption.GetString();
4038  break;
4039  case HtmlOptionId::CLASS:
4040  aClass = rOption.GetString();
4041  break;
4042  case HtmlOptionId::LANG:
4043  aLang = rOption.GetString();
4044  break;
4045  case HtmlOptionId::DIR:
4046  aDir = rOption.GetString();
4047  break;
4048  default: break;
4049  }
4050  }
4051 
4052  // open a new paragraph
4053  if( m_pPam->GetPoint()->nContent.GetIndex() )
4055  else
4056  AddParSpace();
4057 
4058  // search for the matching style
4059  sal_uInt16 nTextColl;
4060  switch( nToken )
4061  {
4062  case HtmlTokenId::HEAD1_ON: nTextColl = RES_POOLCOLL_HEADLINE1; break;
4063  case HtmlTokenId::HEAD2_ON: nTextColl = RES_POOLCOLL_HEADLINE2; break;
4064  case HtmlTokenId::HEAD3_ON: nTextColl = RES_POOLCOLL_HEADLINE3; break;
4065  case HtmlTokenId::HEAD4_ON: nTextColl = RES_POOLCOLL_HEADLINE4; break;
4066  case HtmlTokenId::HEAD5_ON: nTextColl = RES_POOLCOLL_HEADLINE5; break;
4067  case HtmlTokenId::HEAD6_ON: nTextColl = RES_POOLCOLL_HEADLINE6; break;
4068  default: nTextColl = RES_POOLCOLL_STANDARD; break;
4069  }
4070 
4071  // create the context
4072  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4073 
4074  // parse styles (regarding class see also NewPara)
4075  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4076  {
4077  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4078  SvxCSS1PropertyInfo aPropInfo;
4079 
4080  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4081  {
4082  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4083  "Class is not considered" );
4084  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4085  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4086  }
4087  }
4088 
4089  if( SvxAdjust::End != m_eParaAdjust )
4090  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4091 
4092  // and push on stack
4093  PushContext(xCntxt);
4094 
4095  // set the current style or its attributes
4096  SetTextCollAttrs(m_aContexts.back().get());
4097 
4099 
4100  // progress bar
4101  ShowStatline();
4102 }
4103 
4105 {
4106  // open a new paragraph
4107  if( m_pPam->GetPoint()->nContent.GetIndex() )
4109  else
4110  AddParSpace();
4111 
4112  // search context matching the token and fetch it from stack
4113  std::unique_ptr<HTMLAttrContext> xCntxt;
4114  auto nPos = m_aContexts.size();
4115  while( !xCntxt && nPos>m_nContextStMin )
4116  {
4117  switch( m_aContexts[--nPos]->GetToken() )
4118  {
4119  case HtmlTokenId::HEAD1_ON:
4120  case HtmlTokenId::HEAD2_ON:
4121  case HtmlTokenId::HEAD3_ON:
4122  case HtmlTokenId::HEAD4_ON:
4123  case HtmlTokenId::HEAD5_ON:
4124  case HtmlTokenId::HEAD6_ON:
4125  xCntxt = std::move(m_aContexts[nPos]);
4126  m_aContexts.erase( m_aContexts.begin() + nPos );
4127  break;
4128  default: break;
4129  }
4130  }
4131 
4132  // and now end attributes
4133  if (xCntxt)
4134  {
4135  EndContext(xCntxt.get());
4136  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4137  xCntxt.reset();
4138  }
4139 
4140  // reset existing style
4141  SetTextCollAttrs();
4142 
4144 }
4145 
4146 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4147 {
4148  OUString aId, aStyle, aClass, aLang, aDir;
4149 
4150  const HTMLOptions& rHTMLOptions = GetOptions();
4151  for (size_t i = rHTMLOptions.size(); i; )
4152  {
4153  const HTMLOption& rOption = rHTMLOptions[--i];
4154  switch( rOption.GetToken() )
4155  {
4156  case HtmlOptionId::ID:
4157  aId = rOption.GetString();
4158  break;
4159  case HtmlOptionId::STYLE:
4160  aStyle = rOption.GetString();
4161  break;
4162  case HtmlOptionId::CLASS:
4163  aClass = rOption.GetString();
4164  break;
4165  case HtmlOptionId::LANG:
4166  aLang = rOption.GetString();
4167  break;
4168  case HtmlOptionId::DIR:
4169  aDir = rOption.GetString();
4170  break;
4171  default: break;
4172  }
4173  }
4174 
4175  // open a new paragraph
4177  switch( nToken )
4178  {
4179  case HtmlTokenId::LISTING_ON:
4180  case HtmlTokenId::XMP_ON:
4181  // These both tags will be mapped to the PRE style. For the case that a
4182  // a CLASS exists we will delete it so that we don't get the CLASS of
4183  // the PRE style.
4184  aClass.clear();
4185  [[fallthrough]];
4186  case HtmlTokenId::BLOCKQUOTE_ON:
4187  case HtmlTokenId::BLOCKQUOTE30_ON:
4188  case HtmlTokenId::PREFORMTXT_ON:
4189  eMode = AM_SPACE;
4190  break;
4191  case HtmlTokenId::ADDRESS_ON:
4192  eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4193  break;
4194  case HtmlTokenId::DT_ON:
4195  case HtmlTokenId::DD_ON:
4196  eMode = AM_SOFTNOSPACE;
4197  break;
4198  default:
4199  OSL_ENSURE( false, "unknown style" );
4200  break;
4201  }
4202  if( m_pPam->GetPoint()->nContent.GetIndex() )
4203  AppendTextNode( eMode );
4204  else if( AM_SPACE==eMode )
4205  AddParSpace();
4206 
4207  // ... and save in a context
4208  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4209 
4210  // parse styles (regarding class see also NewPara)
4211  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4212  {
4213  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4214  SvxCSS1PropertyInfo aPropInfo;
4215 
4216  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4217  {
4218  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4219  "Class is not considered" );
4220  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4221  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4222  }
4223  }
4224 
4225  PushContext(xCntxt);
4226 
4227  // set the new style
4228  SetTextCollAttrs(m_aContexts.back().get());
4229 
4230  // update progress bar
4231  ShowStatline();
4232 }
4233 
4235 {
4237  switch( getOnToken(nToken) )
4238  {
4239  case HtmlTokenId::BLOCKQUOTE_ON:
4240  case HtmlTokenId::BLOCKQUOTE30_ON:
4241  case HtmlTokenId::PREFORMTXT_ON:
4242  case HtmlTokenId::LISTING_ON:
4243  case HtmlTokenId::XMP_ON:
4244  eMode = AM_SPACE;
4245  break;
4246  case HtmlTokenId::ADDRESS_ON:
4247  case HtmlTokenId::DT_ON:
4248  case HtmlTokenId::DD_ON:
4249  eMode = AM_SOFTNOSPACE;
4250  break;
4251  default:
4252  OSL_ENSURE( false, "unknown style" );
4253  break;
4254  }
4255  if( m_pPam->GetPoint()->nContent.GetIndex() )
4256  AppendTextNode( eMode );
4257  else if( AM_SPACE==eMode )
4258  AddParSpace();
4259 
4260  // pop current context of stack
4261  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4262 
4263  // and now end attributes
4264  if (xCntxt)
4265  {
4266  EndContext(xCntxt.get());
4267  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4268  xCntxt.reset();
4269  }
4270 
4271  // reset existing style
4272  SetTextCollAttrs();
4273 }
4274 
4276 {
4277  OUString aId, aStyle, aClass, aLang, aDir;
4278 
4279  const HTMLOptions& rHTMLOptions = GetOptions();
4280  for (size_t i = rHTMLOptions.size(); i; )
4281  {
4282  const HTMLOption& rOption = rHTMLOptions[--i];
4283  switch( rOption.GetToken() )
4284  {
4285  case HtmlOptionId::ID:
4286  aId = rOption.GetString();
4287  break;
4288  case HtmlOptionId::STYLE:
4289  aStyle = rOption.GetString();
4290  break;
4291  case HtmlOptionId::CLASS:
4292  aClass = rOption.GetString();
4293  break;
4294  case HtmlOptionId::LANG:
4295  aLang = rOption.GetString();
4296  break;
4297  case HtmlOptionId::DIR:
4298  aDir = rOption.GetString();
4299  break;
4300  default: break;
4301  }
4302  }
4303 
4304  // open a new paragraph
4305  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4306  if( m_pPam->GetPoint()->nContent.GetIndex() )
4307  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4308  else if( bSpace )
4309  AddParSpace();
4310 
4311  // one level more
4312  m_nDefListDeep++;
4313 
4314  bool bInDD = false, bNotInDD = false;
4315  auto nPos = m_aContexts.size();
4316  while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4317  {
4318  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4319  switch( nCntxtToken )
4320  {
4321  case HtmlTokenId::DEFLIST_ON:
4322  case HtmlTokenId::DIRLIST_ON:
4323  case HtmlTokenId::MENULIST_ON:
4324  case HtmlTokenId::ORDERLIST_ON:
4325  case HtmlTokenId::UNORDERLIST_ON:
4326  bNotInDD = true;
4327  break;
4328  case HtmlTokenId::DD_ON:
4329  bInDD = true;
4330  break;
4331  default: break;
4332  }
4333  }
4334 
4335  // ... and save in a context
4336  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4337 
4338  // in it save also the margins
4339  sal_uInt16 nLeft=0, nRight=0;
4340  short nIndent=0;
4341  GetMarginsFromContext( nLeft, nRight, nIndent );
4342 
4343  // The indentation, which already results from a DL, correlates with a DT
4344  // on the current level and this correlates to a DD from the previous level.
4345  // For a level >=2 we must add DD distance.
4346  if( !bInDD && m_nDefListDeep > 1 )
4347  {
4348 
4349  // and the one of the DT-style of the current level
4350  SvxLRSpaceItem rLRSpace =
4351  m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4352  ->GetLRSpace();
4353  nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft());
4354  }
4355 
4356  xCntxt->SetMargins( nLeft, nRight, nIndent );
4357 
4358  // parse styles
4359  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4360  {
4361  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4362  SvxCSS1PropertyInfo aPropInfo;
4363 
4364  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4365  {
4366  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4367  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4368  }
4369  }
4370 
4371  PushContext(xCntxt);
4372 
4373  // set the attributes of the new style
4374  if( m_nDefListDeep > 1 )
4375  SetTextCollAttrs(m_aContexts.back().get());
4376 }
4377 
4379 {
4380  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4381  if( m_pPam->GetPoint()->nContent.GetIndex() )
4382  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4383  else if( bSpace )
4384  AddParSpace();
4385 
4386  // one level less
4387  if( m_nDefListDeep > 0 )
4388  m_nDefListDeep--;
4389 
4390  // pop current context of stack
4391  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4392 
4393  // and now end attributes
4394  if (xCntxt)
4395  {
4396  EndContext(xCntxt.get());
4397  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4398  xCntxt.reset();
4399  }
4400 
4401  // and set style
4402  SetTextCollAttrs();
4403 }
4404 
4406 {
4407  // determine if the DD/DT exist in a DL
4408  bool bInDefList = false, bNotInDefList = false;
4409  auto nPos = m_aContexts.size();
4410  while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4411  {
4412  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4413  switch( nCntxtToken )
4414  {
4415  case HtmlTokenId::DEFLIST_ON:
4416  bInDefList = true;
4417  break;
4418  case HtmlTokenId::DIRLIST_ON:
4419  case HtmlTokenId::MENULIST_ON:
4420  case HtmlTokenId::ORDERLIST_ON:
4421  case HtmlTokenId::UNORDERLIST_ON:
4422  bNotInDefList = true;
4423  break;
4424  default: break;
4425  }
4426  }
4427 
4428  // if not, then implicitly open a new DL
4429  if( !bInDefList )
4430  {
4431  m_nDefListDeep++;
4432  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4433  "Now an open paragraph element will be lost." );
4434  m_nOpenParaToken = nToken;
4435  }
4436 
4437  NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4438  : RES_POOLCOLL_HTML_DT) );
4439 }
4440 
4442 {
4443  // open a new paragraph
4444  if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() )
4446 
4447  // search context matching the token and fetch it from stack
4448  nToken = getOnToken(nToken);
4449  std::unique_ptr<HTMLAttrContext> xCntxt;
4450  auto nPos = m_aContexts.size();
4451  while( !xCntxt && nPos>m_nContextStMin )
4452  {
4453  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4454  switch( nCntxtToken )
4455  {
4456  case HtmlTokenId::DD_ON:
4457  case HtmlTokenId::DT_ON:
4458  if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken )
4459  {
4460  xCntxt = std::move(m_aContexts[nPos]);
4461  m_aContexts.erase( m_aContexts.begin() + nPos );
4462  }
4463  break;
4464  case HtmlTokenId::DEFLIST_ON:
4465  // don't look at DD/DT outside the current DefList
4466  case HtmlTokenId::DIRLIST_ON:
4467  case HtmlTokenId::MENULIST_ON:
4468  case HtmlTokenId::ORDERLIST_ON:
4469  case HtmlTokenId::UNORDERLIST_ON:
4470  // and also not outside another list
4472  break;
4473  default: break;
4474  }
4475  }
4476 
4477  // and now end attributes
4478  if (xCntxt)
4479  {
4480  EndContext(xCntxt.get());
4481  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4482  }
4483 }
4484 
4494 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4495  bool bSurroundOnly ) const
4496 {
4497  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
4498 
4499  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
4500 
4501  bool bFound = false;
4502  for ( size_t i=0; i<rFrameFormatTable.size(); i++ )
4503  {
4504  const SwFrameFormat *const pFormat = rFrameFormatTable[i];
4505  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4506  // A frame was found, when
4507  // - it is paragraph-bound, and
4508  // - is anchored in current paragraph, and
4509  // - every paragraph-bound frame counts, or
4510  // - (only frames without wrapping count and) the frame doesn't have
4511  // a wrapping
4512  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
4513  if (pAPos &&
4514  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4515  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4516  pAPos->nNode == rNodeIdx )
4517  {
4518  if( !(bNoSurroundOnly || bSurroundOnly) )
4519  {
4520  bFound = true;
4521  break;
4522  }
4523  else
4524  {
4525  // When looking for frames with wrapping, also disregard
4526  // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4527  // and you don't want to evade those when positioning.
4528  css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4529  if( bNoSurroundOnly )
4530  {
4531  if( css::text::WrapTextMode_NONE==eSurround )
4532  {
4533  bFound = true;
4534  break;
4535  }
4536  }
4537  if( bSurroundOnly )
4538  {
4539  if( css::text::WrapTextMode_NONE==eSurround )
4540  {
4541  bFound = false;
4542  break;
4543  }
4544  else if( css::text::WrapTextMode_THROUGH!=eSurround )
4545  {
4546  bFound = true;
4547  // Continue searching: It's possible that some without
4548  // wrapping will follow...
4549  }
4550  }
4551  }
4552  }
4553  }
4554 
4555  return bFound;
4556 }
4557 
4558 // the special methods for inserting of objects
4559 
4561 {
4562  const SwContentNode* pCNd = m_pPam->GetContentNode();
4563  return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4564 }
4565 
4567 {
4568  SwTextFormatColl *pCollToSet = nullptr; // the style to set
4569  SfxItemSet *pItemSet = nullptr; // set of hard attributes
4570  sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4571  const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4572  sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4573 
4574  bool bInPRE=false; // some context info
4575 
4576  sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and
4577  short nFirstLineIndent = 0; // indentations
4578 
4579  for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i )
4580  {
4581  const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4582 
4583  sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4584  if( nColl )
4585  {
4586  // There is a style to set. Then at first we must decide,
4587  // if the style can be set.
4588  bool bSetThis = true;
4589  switch( nColl )
4590  {
4591  case RES_POOLCOLL_HTML_PRE:
4592  bInPRE = true;
4593  break;
4594  case RES_POOLCOLL_TEXT:
4595  // <TD><P CLASS=xxx> must become TD.xxx
4596  if( nDfltColl==RES_POOLCOLL_TABLE ||
4597  nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4598  nColl = nDfltColl;
4599  break;
4600  case RES_POOLCOLL_HTML_HR:
4601  // also <HR> in <PRE> set as style, otherwise it can't
4602  // be exported anymore
4603  break;
4604  default:
4605  if( bInPRE )
4606  bSetThis = false;
4607  break;
4608  }
4609 
4610  SwTextFormatColl *pNewColl =
4611  m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4612 
4613  if( bSetThis )
4614  {
4615  // If now a different style should be set as previously, the
4616  // previous style must be replaced by hard attribution.
4617 
4618  if( pCollToSet )
4619  {
4620  // insert the attributes hard, which previous style sets
4621  if( !pItemSet )
4622  pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4623  else
4624  {
4625  const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4626  SfxItemSet aItemSet( *rCollSet.GetPool(),
4627  rCollSet.GetRanges() );
4628  aItemSet.Set( rCollSet );
4629  pItemSet->Put( aItemSet );
4630  }
4631  // but remove the attributes, which the current style sets,
4632  // because otherwise they will be overwritten later
4633  pItemSet->Differentiate( pNewColl->GetAttrSet() );
4634  }
4635 
4636  pCollToSet = pNewColl;
4637  }
4638  else
4639  {
4640  // hard attribution
4641  if( !pItemSet )
4642  pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4643  else
4644  {
4645  const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4646  SfxItemSet aItemSet( *rCollSet.GetPool(),
4647  rCollSet.GetRanges() );
4648  aItemSet.Set( rCollSet );
4649  pItemSet->Put( aItemSet );
4650  }
4651  }
4652  }
4653  else
4654  {
4655  // Maybe a default style exists?
4656  nColl = pCntxt->GetDfltTextFormatColl();
4657  if( nColl )
4658  nDfltColl = nColl;
4659  }
4660 
4661  // if applicable fetch new paragraph indents
4662  if( pCntxt->IsLRSpaceChanged() )
4663  {
4664  sal_uInt16 nLeft=0, nRight=0;
4665 
4666  pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4667  nLeftMargin = nLeft;
4668  nRightMargin = nRight;
4669  }
4670  }
4671 
4672  // If in current context a new style should be set,
4673  // its paragraph margins must be inserted in the context.
4674  if( pContext && nTopColl )
4675  {
4676  // <TD><P CLASS=xxx> must become TD.xxx
4677  if( nTopColl==RES_POOLCOLL_TEXT &&
4678  (nDfltColl==RES_POOLCOLL_TABLE ||
4679  nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4680  nTopColl = nDfltColl;
4681 
4682  const SwTextFormatColl *pTopColl =
4683  m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4684  const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4685  const SfxPoolItem *pItem;
4686  if( SfxItemState::SET == rItemSet.GetItemState(RES_LR_SPACE,true, &pItem) )
4687  {
4688  const SvxLRSpaceItem *pLRItem =
4689  static_cast<const SvxLRSpaceItem *>(pItem);
4690 
4691  sal_Int32 nLeft = pLRItem->GetTextLeft();
4692  sal_Int32 nRight = pLRItem->GetRight();
4693  nFirstLineIndent = pLRItem->GetTextFirstLineOfst();
4694 
4695  // In Definition lists the margins also contain the margins from the previous levels
4696  if( RES_POOLCOLL_HTML_DD == nTopColl )
4697  {
4698  const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser
4699  ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())
4700  ->GetLRSpace();
4701  nLeft -= rDTLRSpace.GetTextLeft();
4702  nRight -= rDTLRSpace.GetRight();
4703  }
4704  else if( RES_POOLCOLL_HTML_DT == nTopColl )
4705  {
4706  nLeft = 0;
4707  nRight = 0;
4708  }
4709 
4710  // the paragraph margins add up
4711  nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4712  nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4713 
4714  pContext->SetMargins( nLeftMargin, nRightMargin,
4715  nFirstLineIndent );
4716  }
4717  if( SfxItemState::SET == rItemSet.GetItemState(RES_UL_SPACE,true, &pItem) )
4718  {
4719  const SvxULSpaceItem *pULItem =
4720  static_cast<const SvxULSpaceItem *>(pItem);
4721  pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4722  }
4723  }
4724 
4725  // If no style is set in the context use the text body.
4726  if( !pCollToSet )
4727  {
4728  pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4729  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4730  if( !nLeftMargin )
4731  nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft());
4732  if( !nRightMargin )
4733  nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight());
4734  if( !nFirstLineIndent )
4735  nFirstLineIndent = rLRItem.GetTextFirstLineOfst();
4736  }
4737 
4738  // remove previous hard attribution of paragraph
4739  for( auto pParaAttr : m_aParaAttrs )
4740  pParaAttr->Invalidate();
4741  m_aParaAttrs.clear();
4742 
4743  // set the style
4744  m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4745 
4746  // if applicable correct the paragraph indent
4747  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4748  bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() ||
4749  nFirstLineIndent != rLRItem.GetTextFirstLineOfst() ||
4750  nRightMargin != rLRItem.GetRight();
4751 
4752  if( bSetLRSpace )
4753  {
4754  SvxLRSpaceItem aLRItem( rLRItem );
4755  aLRItem.SetTextLeft( nLeftMargin );
4756  aLRItem.SetRight( nRightMargin );
4757  aLRItem.SetTextFirstLineOfst( nFirstLineIndent );
4758  if( pItemSet )
4759  pItemSet->Put( aLRItem );
4760  else
4761  {
4762  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4763  m_xAttrTab->pLRSpace->SetLikePara();
4764  m_aParaAttrs.push_back( m_xAttrTab->pLRSpace );
4765  EndAttr( m_xAttrTab->pLRSpace, false );
4766  }
4767  }
4768 
4769  // and now set the attributes
4770  if( pItemSet )
4771  {
4772  InsertParaAttrs( *pItemSet );
4773  delete pItemSet;
4774  }
4775 }
4776 
4778 {
4779  OUString aId, aStyle, aLang, aDir;
4780  OUString aClass;
4781 
4782  const HTMLOptions& rHTMLOptions = GetOptions();
4783  for (size_t i = rHTMLOptions.size(); i; )
4784  {
4785  const HTMLOption& rOption = rHTMLOptions[--i];
4786  switch( rOption.GetToken() )
4787  {
4788  case HtmlOptionId::ID:
4789  aId = rOption.GetString();
4790  break;
4791  case HtmlOptionId::STYLE:
4792  aStyle = rOption.GetString();
4793  break;
4794  case HtmlOptionId::CLASS:
4795  aClass = rOption.GetString();
4796  break;
4797  case HtmlOptionId::LANG:
4798  aLang = rOption.GetString();
4799  break;
4800  case HtmlOptionId::DIR:
4801  aDir = rOption.GetString();
4802  break;
4803  default: break;
4804  }
4805  }
4806 
4807  // create a new context
4808  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4809 
4810  // set the style and save it in the context
4811  SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4812  OSL_ENSURE( pCFormat, "No character format found for token" );
4813 
4814  // parse styles (regarding class see also NewPara)
4815  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4816  {
4817  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4818  SvxCSS1PropertyInfo aPropInfo;
4819 
4820  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4821  {
4822  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4823  "Class is not considered" );
4824  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4825  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4826  }
4827  }
4828 
4829  // Character formats are stored in their own stack and can never be inserted
4830  // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4831  if( pCFormat )
4832  InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4833 
4834  // save the context
4835  PushContext(xCntxt);
4836 }
4837 
4839 {
4840  // and if applicable change it via the options
4841  sal_Int16 eVertOri = text::VertOrientation::TOP;
4842  sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4843  Size aSize( 0, 0);
4844  long nSize = 0;
4845  bool bPercentWidth = false;
4846  bool bPercentHeight = false;
4847  sal_uInt16 nType = HTML_SPTYPE_HORI;
4848 
4849  const HTMLOptions& rHTMLOptions = GetOptions();
4850  for (size_t i = rHTMLOptions.size(); i; )
4851  {
4852  const HTMLOption& rOption = rHTMLOptions[--i];
4853  switch( rOption.GetToken() )
4854  {
4855  case HtmlOptionId::TYPE:
4856  rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4857  break;
4858  case HtmlOptionId::ALIGN:
4859  eVertOri =
4860  rOption.GetEnum( aHTMLImgVAlignTable,
4861  eVertOri );
4862  eHoriOri =
4863  rOption.GetEnum( aHTMLImgHAlignTable,
4864  eHoriOri );
4865  break;
4866  case HtmlOptionId::WIDTH:
4867  // First only save as pixel value!
4868  bPercentWidth = (rOption.GetString().indexOf('%') != -1);
4869  aSize.setWidth( static_cast<long>(rOption.GetNumber()) );
4870  break;
4871  case HtmlOptionId::HEIGHT:
4872  // First only save as pixel value!
4873  bPercentHeight = (rOption.GetString().indexOf('%') != -1);
4874  aSize.setHeight( static_cast<long>(rOption.GetNumber()) );
4875  break;
4876  case HtmlOptionId::SIZE:
4877  // First only save as pixel value!
4878  nSize = rOption.GetNumber();
4879  break;
4880  default: break;
4881  }
4882  }
4883 
4884  switch( nType )
4885  {
4886  case HTML_SPTYPE_BLOCK:
4887  {
4888  // create an empty text frame
4889 
4890  // fetch the ItemSet
4891  SfxItemSet aFrameSet( m_xDoc->GetAttrPool(),
4893  if( !IsNewDoc() )
4894  Reader::ResetFrameFormatAttrs( aFrameSet );
4895 
4896  // set the anchor and the adjustment
4897  SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4898 
4899  // and the size of the frame
4900  Size aDfltSz( MINFLY, MINFLY );
4901  Size aSpace( 0, 0 );
4902  SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4903  m_pCSS1Parser->GetWhichMap() );
4904  SvxCSS1PropertyInfo aDummyPropInfo;
4905 
4906  SetFixSize( aSize, aDfltSz, bPercentWidth, bPercentHeight,
4907  aDummyPropInfo, aFrameSet );
4908  SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4909 
4910  // protect the content
4911  SvxProtectItem aProtectItem( RES_PROTECT) ;
4912  aProtectItem.SetContentProtect( true );
4913  aFrameSet.Put( aProtectItem );
4914 
4915  // create the frame
4916  RndStdIds eAnchorId =
4917  aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4918  SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4919  m_pPam->GetPoint(), &aFrameSet );
4920  // Possibly create frames and register auto-bound frames.
4921  RegisterFlyFrame( pFlyFormat );
4922  }
4923  break;
4924  case HTML_SPTYPE_VERT:
4925  if( nSize > 0 )
4926  {
4928  {
4930  ->PixelToLogic( Size(0,nSize),
4931  MapMode(MapUnit::MapTwip) ).Height();
4932  }
4933 
4934  // set a paragraph margin
4935  SwTextNode *pTextNode = nullptr;
4936  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4937  {
4938  // if possible change the bottom paragraph margin
4939  // of previous node
4940 
4941  SetAttr(); // set still open paragraph attributes
4942 
4943  pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1]
4944  ->GetTextNode();
4945 
4946  // If the previous paragraph isn't a text node, then now an
4947  // empty paragraph is created, which already generates a single
4948  // line of spacing.
4949  if( !pTextNode )
4950  nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
4951  }
4952 
4953  if( pTextNode )
4954  {
4955  SvxULSpaceItem aULSpace( static_cast<const SvxULSpaceItem&>(pTextNode
4957  aULSpace.SetLower( aULSpace.GetLower() + static_cast<sal_uInt16>(nSize) );
4958  pTextNode->SetAttr( aULSpace );
4959  }
4960  else
4961  {
4962  NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, static_cast<sal_uInt16>(nSize), RES_UL_SPACE));
4963  EndAttr( m_xAttrTab->pULSpace, false );
4964 
4965  AppendTextNode(); // Don't change spacing!
4966  }
4967  }
4968  break;
4969  case HTML_SPTYPE_HORI:
4970  if( nSize > 0 )
4971  {
4972  // If the paragraph is still empty, set first line
4973  // indentation, otherwise apply letter spacing over a space.
4974 
4976  {
4978  ->PixelToLogic( Size(nSize,0),
4979  MapMode(MapUnit::MapTwip) ).Width();
4980  }
4981 
4982  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4983  {
4984  sal_uInt16 nLeft=0, nRight=0;
4985  short nIndent = 0;
4986 
4987  GetMarginsFromContextWithNumberBullet( nLeft, nRight, nIndent );
4988  nIndent = nIndent + static_cast<short>(nSize);
4989 
4990  SvxLRSpaceItem aLRItem( RES_LR_SPACE );
4991  aLRItem.SetTextLeft( nLeft );
4992  aLRItem.SetRight( nRight );
4993  aLRItem.SetTextFirstLineOfst( nIndent );
4994 
4995  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4996  EndAttr( m_xAttrTab->pLRSpace, false );
4997  }
4998  else
4999  {
5000  NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
5001  OUString aTmp( ' ' );
5002  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aTmp );
5003  EndAttr( m_xAttrTab->pKerning );
5004  }
5005  }
5006  }
5007 }
5008 
5009 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
5010 {
5011  if( nPixel && Application::GetDefaultDevice() )
5012  {
5014  Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width();
5015  return static_cast<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16)));
5016  }
5017  else
5018  return nPixel;
5019 }
5020 
5022 {
5024  if( nWidth )
5025  return nWidth;
5026 
5027  if( !m_aHTMLPageSize.Width() )
5028  {
5029  const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5030 
5031  const SwFormatFrameSize& rSz = rPgFormat.GetFrameSize();
5032  const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5033  const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5034  const SwFormatCol& rCol = rPgFormat.GetCol();
5035 
5036  m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5037  m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5038 
5039  if( 1 < rCol.GetNumCols() )
5041  }
5042 
5043  return m_aHTMLPageSize.Width();
5044 }
5045 
5047 {
5048  OUString aId;
5049  const HTMLOptions& rHTMLOptions = GetOptions();
5050  for (size_t i = rHTMLOptions.size(); i; )
5051  {
5052  const HTMLOption& rOption = rHTMLOptions[--i];
5053  if( HtmlOptionId::ID==rOption.GetToken() )
5054  {
5055  aId = rOption.GetString();
5056  break;
5057  }
5058  }
5059 
5060  if( !aId.isEmpty() )
5061  InsertBookmark( aId );
5062 }
5063 
5065 {
5066  // <BR CLEAR=xxx> is handled as:
5067  // 1.) Only regard the paragraph-bound frames anchored in current paragraph.
5068  // 2.) For left-justified aligned frames, CLEAR=LEFT or ALL, and for right-
5069  // justified aligned frames, CLEAR=RIGHT or ALL, the wrap-through is
5070  // changed as following:
5071  // 3.) If the paragraph contains no text, then the frames don't get a wrapping
5072  // 4.) otherwise a left aligned frame gets a right "only anchor" wrapping
5073  // and a right aligned frame gets a left "only anchor" wrapping.
5074  // 5.) if in a non-empty paragraph the wrapping of a frame is changed,
5075  // then a new paragraph is opened
5076  // 6.) If no wrappings of frames are changed, a hard line break is inserted.
5077 
5078  OUString aId, aStyle, aClass; // the id of bookmark
5079  bool bClearLeft = false, bClearRight = false;
5080  bool bCleared = false; // Was a CLEAR executed?
5081 
5082  // then we fetch the options
5083  const HTMLOptions& rHTMLOptions = GetOptions();
5084  for (size_t i = rHTMLOptions.size(); i; )
5085  {
5086  const HTMLOption& rOption = rHTMLOptions[--i];
5087  switch( rOption.GetToken() )
5088  {
5089  case HtmlOptionId::CLEAR:
5090  {
5091  const OUString &rClear = rOption.GetString();
5092  if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5093  {
5094  bClearLeft = true;
5095  bClearRight = true;
5096  }
5097  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5098  bClearLeft = true;
5099  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5100  bClearRight = true;
5101  }
5102  break;
5103  case HtmlOptionId::ID:
5104  aId = rOption.GetString();
5105  break;
5106  case HtmlOptionId::STYLE:
5107  aStyle = rOption.GetString();
5108  break;
5109  case HtmlOptionId::CLASS:
5110  aClass = rOption.GetString();
5111  break;
5112  default: break;
5113  }
5114  }
5115 
5116  // CLEAR is only supported for the current paragraph
5117  if( bClearLeft || bClearRight )
5118  {
5119  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
5120  SwTextNode* pTextNd = rNodeIdx.GetNode().GetTextNode();
5121  if( pTextNd )
5122  {
5123  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
5124 
5125  for( size_t i=0; i<rFrameFormatTable.size(); i++ )
5126  {
5127  SwFrameFormat *const pFormat = rFrameFormatTable[i];
5128  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
5129  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
5130  if (pAPos &&
5131  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
5132  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
5133  pAPos->nNode == rNodeIdx &&
5134  pFormat->GetSurround().GetSurround() != css::text::WrapTextMode_NONE )
5135  {
5136  sal_Int16 eHori = RES_DRAWFRMFMT == pFormat->Which()
5137  ? text::HoriOrientation::LEFT
5138  : pFormat->GetHoriOrient().GetHoriOrient();
5139 
5140  css::text::WrapTextMode eSurround = css::text::WrapTextMode_PARALLEL;
5141  if( m_pPam->GetPoint()->nContent.GetIndex() )
5142  {
5143  if( bClearLeft && text::HoriOrientation::LEFT==eHori )
5144  eSurround = css::text::WrapTextMode_RIGHT;
5145  else if( bClearRight && text::HoriOrientation::RIGHT==eHori )
5146  eSurround = css::text::WrapTextMode_LEFT;
5147  }
5148  else if( (bClearLeft && text::HoriOrientation::LEFT==eHori) ||
5149  (bClearRight && text::HoriOrientation::RIGHT==eHori) )
5150  {
5151  eSurround = css::text::WrapTextMode_NONE;
5152  }
5153 
5154  if( css::text::WrapTextMode_PARALLEL != eSurround )
5155  {
5156  SwFormatSurround aSurround( eSurround );
5157  if( css::text::WrapTextMode_NONE != eSurround )
5158  aSurround.SetAnchorOnly( true );
5159  pFormat->SetFormatAttr( aSurround );
5160  bCleared = true;
5161  }
5162  }
5163  }
5164  }
5165  }
5166 
5167  // parse styles
5168  std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5169  bool bBreakItem = false;
5170  if( HasStyleOptions( aStyle, aId, aClass ) )
5171  {
5172  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5173  SvxCSS1PropertyInfo aPropInfo;
5174 
5175  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5176  {
5177  if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5178  {
5179  aBreakItem.reset(aItemSet.Get(RES_BREAK).Clone());
5180  bBreakItem = true;
5181  }
5182  if( !aPropInfo.m_aId.isEmpty() )
5183  InsertBookmark( aPropInfo.m_aId );
5184  }
5185  }
5186 
5187  if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5188  {
5189  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5190  EndAttr( m_xAttrTab->pBreak, false );
5191  }
5192 
5193  if( !bCleared && !bBreakItem )
5194  {
5195  // If no CLEAR could or should be executed, a line break will be inserted
5196  OUString sTmp( u'\x000a' ); // make the Mac happy :-)
5197  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, sTmp );
5198  }
5199  else if( m_pPam->GetPoint()->nContent.GetIndex() )
5200  {
5201  // If a CLEAR is executed in a non-empty paragraph, then after it
5202  // a new paragraph has to be opened.
5203  // MIB 21.02.97: Here actually we should change the bottom paragraph
5204  // margin to zero. This will fail for something like this <BR ..><P>
5205  // (>Netscape). That's why we don't do it.
5207  }
5208  if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5209  {
5210  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5211  EndAttr( m_xAttrTab->pBreak, false );
5212  }
5213 }
5214 
5216 {
5217  sal_uInt16 nSize = 0;
5218  sal_uInt16 nWidth = 0;
5219 
5220  SvxAdjust eAdjust = SvxAdjust::End;
5221 
5222  bool bPercentWidth = false;
5223  bool bNoShade = false;
5224  bool bColor = false;
5225 
5226  Color aColor;
5227  OUString aId;
5228 
5229  // let's fetch the options
5230  const HTMLOptions& rHTMLOptions = GetOptions();
5231  for (size_t i = rHTMLOptions.size(); i; )
5232  {
5233  const HTMLOption& rOption = rHTMLOptions[--i];
5234  switch( rOption.GetToken() )
5235  {
5236  case HtmlOptionId::ID:
5237  aId = rOption.GetString();
5238  break;
5239  case HtmlOptionId::SIZE:
5240  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
5241  break;
5242  case HtmlOptionId::WIDTH:
5243  bPercentWidth = (rOption.GetString().indexOf('%') != -1);
5244  nWidth = static_cast<sal_uInt16>(rOption.GetNumber());
5245  if( bPercentWidth && nWidth>=100 )
5246  {
5247  // the default case are 100% lines (no attributes necessary)
5248  nWidth = 0;
5249  bPercentWidth = false;
5250  }
5251  break;
5252  case HtmlOptionId::ALIGN:
5253  eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5254  break;
5255  case HtmlOptionId::NOSHADE:
5256  bNoShade = true;
5257  break;
5258  case HtmlOptionId::COLOR:
5259  rOption.GetColor( aColor );
5260  bColor = true;
5261  break;
5262  default: break;
5263  }
5264  }
5265 
5266  if( m_pPam->GetPoint()->nContent.GetIndex() )
5268  if( m_nOpenParaToken != HtmlTokenId::NONE )
5269  EndPara();
5270  AppendTextNode();
5272 
5273  // ...and save in a context
5274  std::unique_ptr<HTMLAttrContext> xCntxt(
5275  new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5276 
5277  PushContext(xCntxt);
5278 
5279  // set the new style
5280  SetTextCollAttrs(m_aContexts.back().get());
5281 
5282  // the hard attributes of the current paragraph will never become invalid
5283  m_aParaAttrs.clear();
5284 
5285  if( nSize>0 || bColor || bNoShade )
5286  {
5287  // set line colour and/or width
5288  if( !bColor )
5289  aColor = COL_GRAY;
5290 
5291  SvxBorderLine aBorderLine( &aColor );
5292  if( nSize )
5293  {
5294  long nPWidth = 0;
5295  long nPHeight = static_cast<long>(nSize);
5296  SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5297  if ( !bNoShade )
5298  {
5299  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5300  }
5301  aBorderLine.SetWidth( nPHeight );
5302  }
5303  else if( bNoShade )
5304  {
5305  aBorderLine.SetWidth( DEF_LINE_WIDTH_2 );
5306  }
5307  else
5308  {
5309  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5310  aBorderLine.SetWidth( DEF_LINE_WIDTH_0 );
5311  }
5312 
5313  SvxBoxItem aBoxItem(RES_BOX);
5314  aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5315  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5316  m_aSetAttrTab.push_back( pTmp );
5317  }
5318  if( nWidth )
5319  {
5320  // If we aren't in a table, then the width value will be "faked" with
5321  // paragraph indents. That makes little sense in a table. In order to
5322  // avoid that the line is considered during the width calculation, it
5323  // still gets an appropriate LRSpace-Item.
5324  if (!m_xTable)
5325  {
5326  // fake length and alignment of line above paragraph indents
5327  long nBrowseWidth = GetCurrentBrowseWidth();
5328  nWidth = bPercentWidth ? static_cast<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5329  : ToTwips( static_cast<sal_uInt16>(nBrowseWidth) );
5330  if( nWidth < MINLAY )
5331  nWidth = MINLAY;
5332 
5333  const SwFormatColl *pColl = (static_cast<long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5334  if (pColl)
5335  {
5336  SvxLRSpaceItem aLRItem( pColl->GetLRSpace() );
5337  long nDist = nBrowseWidth - nWidth;
5338 
5339  switch( eAdjust )
5340  {
5341  case SvxAdjust::Right:
5342  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5343  break;
5344  case SvxAdjust::Left:
5345  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5346  break;
5347  case SvxAdjust::Center:
5348  default:
5349  nDist /= 2;
5350  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5351  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5352  break;
5353  }
5354 
5355  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aLRItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5356  m_aSetAttrTab.push_back( pTmp );
5357  }
5358  }
5359  }
5360 
5361  // it's not possible to insert bookmarks in links
5362  if( !aId.isEmpty() )
5363  InsertBookmark( aId );
5364 
5365  // pop current context of stack
5366  std::unique_ptr<HTMLAttrContext> xPoppedContext(PopContext(HtmlTokenId::HORZRULE));
5367  xPoppedContext.reset();
5368 
5370 
5371  // and set the current style in the next paragraph
5372  SetTextCollAttrs();
5373 }
5374 
5376 {
5377  OUString aName, aContent;
5378  bool bHTTPEquiv = false;
5379 
5380  const HTMLOptions& rHTMLOptions = GetOptions();
5381  for (size_t i = rHTMLOptions.size(); i; )
5382  {
5383  const HTMLOption& rOption = rHTMLOptions[--i];
5384  switch( rOption.GetToken() )
5385  {
5386  case HtmlOptionId::NAME:
5387  aName = rOption.GetString();
5388  bHTTPEquiv = false;
5389  break;
5390