LibreOffice Module sw (master)  1
swhtml.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
25 
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
38 
39 #include <vcl/errinf.hxx>
40 #include <svl/stritem.hxx>
41 #include <vcl/imap.hxx>
42 #include <svtools/htmltokn.h>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/ctrltool.hxx>
45 #include <unotools/configmgr.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <sfx2/event.hxx>
49 #include <sfx2/docfile.hxx>
50 
51 #include <svtools/htmlcfg.hxx>
52 #include <sfx2/linkmgr.hxx>
53 #include <editeng/kernitem.hxx>
54 #include <editeng/boxitem.hxx>
55 #include <editeng/fhgtitem.hxx>
57 #include <editeng/postitem.hxx>
58 #include <editeng/wghtitem.hxx>
60 #include <editeng/udlnitem.hxx>
62 #include <editeng/blinkitem.hxx>
63 #include <editeng/ulspitem.hxx>
64 #include <editeng/colritem.hxx>
65 #include <editeng/fontitem.hxx>
66 #include <editeng/adjustitem.hxx>
67 #include <editeng/lrspitem.hxx>
68 #include <editeng/protitem.hxx>
69 #include <editeng/flstitem.hxx>
71 
72 #include <frmatr.hxx>
73 #include <charatr.hxx>
74 #include <fmtfld.hxx>
75 #include <fmtpdsc.hxx>
76 #include <fmtanchr.hxx>
77 #include <fmtsrnd.hxx>
78 #include <fmtfsize.hxx>
79 #include <fmtclds.hxx>
80 #include <fchrfmt.hxx>
81 #include <fmtinfmt.hxx>
82 #include <fmtfollowtextflow.hxx>
83 #include <fmtornt.hxx>
84 #include <doc.hxx>
85 #include <IDocumentUndoRedo.hxx>
92 #include <IDocumentStatistics.hxx>
93 #include <IDocumentState.hxx>
94 #include <pam.hxx>
95 #include <ndtxt.hxx>
96 #include <mdiexp.hxx>
97 #include <poolfmt.hxx>
98 #include <pagedesc.hxx>
99 #include <IMark.hxx>
100 #include <docsh.hxx>
101 #include <editsh.hxx>
102 #include <docufld.hxx>
103 #include "swcss1.hxx"
104 #include <fltini.hxx>
105 #include <htmltbl.hxx>
106 #include "htmlnum.hxx"
107 #include "swhtml.hxx"
108 #include "wrthtml.hxx"
109 #include <linkenum.hxx>
110 #include <breakit.hxx>
111 #include <SwAppletImpl.hxx>
112 #include <swdll.hxx>
113 #include <txatbase.hxx>
114 
115 #include <sfx2/viewfrm.hxx>
116 #include <svx/svdobj.hxx>
117 #include <officecfg/Office/Writer.hxx>
119 #include <comphelper/sequence.hxx>
120 #include <officecfg/Office/Common.hxx>
121 
122 #include <swerror.h>
123 #include <ndole.hxx>
124 #include <unoframe.hxx>
125 #include "css1atr.hxx"
126 #include <frameformats.hxx>
127 
128 #define FONTSIZE_MASK 7
129 
130 #define HTML_ESC_PROP 80
131 #define HTML_ESC_SUPER DFLT_ESC_SUPER
132 #define HTML_ESC_SUB DFLT_ESC_SUB
133 
134 #define HTML_SPTYPE_BLOCK 1
135 #define HTML_SPTYPE_HORI 2
136 #define HTML_SPTYPE_VERT 3
137 
139 using namespace ::com::sun::star;
140 
141 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
143 {
144  { OOO_STRING_SVTOOLS_HTML_AL_left, SvxAdjust::Left },
145  { OOO_STRING_SVTOOLS_HTML_AL_center, SvxAdjust::Center },
146  { OOO_STRING_SVTOOLS_HTML_AL_middle, SvxAdjust::Center }, // Netscape
147  { OOO_STRING_SVTOOLS_HTML_AL_right, SvxAdjust::Right },
148  { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block },
149  { OOO_STRING_SVTOOLS_HTML_AL_char, SvxAdjust::Left },
150  { nullptr, SvxAdjust(0) }
151 };
152 
153 // <SPACER TYPE=...>
155 {
159  { nullptr, 0 }
160 };
161 
163 {
164  m_bTemplateBrowseMode = true;
165 }
166 
167 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
168 {
170  // HTML import into Writer, avoid loading the Writer/Web template.
171  return OUString();
172 
173  static const OUStringLiteral sTemplateWithoutExt(u"internal/html");
174  SvtPathOptions aPathOpt;
175 
176  // first search for OpenDocument Writer/Web template
177  // OpenDocument Writer/Web template (extension .oth)
178  OUString sTemplate( sTemplateWithoutExt + ".oth" );
179  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
180  return sTemplate;
181 
182  // no OpenDocument Writer/Web template found.
183  // search for OpenOffice.org Writer/Web template
184  sTemplate = sTemplateWithoutExt + ".stw";
185  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
186  return sTemplate;
187 
188  OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
189 
190  return OUString();
191 }
192 
194 {
195  OSL_ENSURE( m_pMedium, "Where is the medium??" );
196 
197  if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
198  {
200  return true;
201  }
202  return false;
203 
204 }
205 
206 // Call for the general Reader-Interface
207 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
208 {
210 
211  if( !m_pStream )
212  {
213  OSL_ENSURE( m_pStream, "HTML-Read without stream" );
214  return ERR_SWG_READ_ERROR;
215  }
216 
217  if( !m_bInsertMode )
218  {
220 
221  // Set the HTML page style, when it isn't a HTML document,
222  // otherwise it's already set.
224  {
227  }
228  }
229 
230  // so nobody steals the document!
231  rtl::Reference<SwDoc> xHoldAlive(&rDoc);
232  ErrCode nRet = ERRCODE_NONE;
233  tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
234  rName, rBaseURL, !m_bInsertMode, m_pMedium,
235  IsReadUTF8(),
237 
238  SvParserState eState = xParser->CallParser();
239 
240  if( SvParserState::Pending == eState )
242  else if( SvParserState::Accepted != eState )
243  {
244  const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
245  + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
246 
247  // use the stream as transport for error number
248  nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
249  DialogMask::ButtonsOk | DialogMask::MessageError );
250  }
251 
252  return nRet;
253 }
254 
256  const OUString& rPath,
257  const OUString& rBaseURL,
258  bool bReadNewDoc,
259  SfxMedium* pMed, bool bReadUTF8,
260  bool bNoHTMLComments,
261  const OUString& rNamespace )
262  : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
263  m_aPathToFile( rPath ),
264  m_sBaseURL( rBaseURL ),
265  m_xAttrTab(std::make_shared<HTMLAttrTable>()),
266  m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
267  m_xDoc( pD ),
268  m_pActionViewShell( nullptr ),
269  m_pSttNdIdx( nullptr ),
270  m_pFormImpl( nullptr ),
271  m_pMarquee( nullptr ),
272  m_pImageMap( nullptr ),
273  m_nBaseFontStMin( 0 ),
274  m_nFontStMin( 0 ),
275  m_nDefListDeep( 0 ),
276  m_nFontStHeadStart( 0 ),
277  m_nSBModuleCnt( 0 ),
278  m_nMissingImgMaps( 0 ),
279  m_nParaCnt( 5 ),
280  // #i83625#
281  m_nContextStMin( 0 ),
282  m_nContextStAttrMin( 0 ),
283  m_nSelectEntryCnt( 0 ),
284  m_nOpenParaToken( HtmlTokenId::NONE ),
285  m_eJumpTo( JumpToMarks::NONE ),
286 #ifdef DBG_UTIL
287  m_nContinue( 0 ),
288 #endif
289  m_eParaAdjust( SvxAdjust::End ),
290  m_bDocInitialized( false ),
291  m_bSetModEnabled( false ),
292  m_bInFloatingFrame( false ),
293  m_bInField( false ),
294  m_bKeepUnknown( false ),
295  m_bCallNextToken( false ),
296  m_bIgnoreRawData( false ),
297  m_bLBEntrySelected ( false ),
298  m_bTAIgnoreNewPara ( false ),
299  m_bFixMarqueeWidth ( false ),
300  m_bNoParSpace( false ),
301  m_bInNoEmbed( false ),
302  m_bInTitle( false ),
303  m_bUpdateDocStat( false ),
304  m_bFixSelectWidth( false ),
305  m_bTextArea( false ),
306  m_bSelect( false ),
307  m_bInFootEndNoteAnchor( false ),
308  m_bInFootEndNoteSymbol( false ),
309  m_bIgnoreHTMLComments( bNoHTMLComments ),
310  m_bRemoveHidden( false ),
311  m_bBodySeen( false ),
312  m_bReadingHeaderOrFooter( false ),
313  m_bNotifyMacroEventRead( false ),
314  m_bFuzzing(utl::ConfigManager::IsFuzzing()),
315  m_isInTableStructure(false),
316  m_nTableDepth( 0 ),
317  m_nFloatingFrames( 0 ),
318  m_nListItems( 0 ),
319  m_pTempViewFrame(nullptr)
320 {
321  // If requested explicitly, then force ignoring of comments (don't create postits for them).
322  if (!m_bFuzzing)
323  {
325  m_bIgnoreHTMLComments = true;
327  }
328 
329  m_nEventId = nullptr;
331 
332  m_eScriptLang = HTMLScriptLanguage::Unknown;
333 
334  rCursor.DeleteMark();
335  m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert
336  memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
337 
338  // Read the font sizes 1-7 from the INI file
339  if (!m_bFuzzing)
340  {
348  }
349  else
350  {
352  m_aFontHeights[4] = m_aFontHeights[5] = m_aFontHeights[6] = 12 * 20;
353  }
354 
355  if(bReadNewDoc)
356  {
357  //CJK has different defaults, so a different object should be used for this
358  //RES_CHARTR_CJK_FONTSIZE is a valid value
360  m_xDoc->SetDefault( aFontHeight );
362  m_xDoc->SetDefault( aFontHeightCJK );
364  m_xDoc->SetDefault( aFontHeightCTL );
365 
366  // #i18732# - adjust default of option 'FollowTextFlow'
367  // TODO: not sure what the appropriate default for HTML should be?
368  m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
369  }
370 
371  // Change to HTML mode during the import, so that the right styles are created
372  m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
373  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
374 
375  m_pCSS1Parser.reset(new SwCSS1Parser(m_xDoc.get(), *this, m_aFontHeights, m_sBaseURL, IsNewDoc()));
376  if (!m_bFuzzing)
378 
379  if( bReadUTF8 )
380  {
381  SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
382  }
383  else
384  {
385  SwDocShell *pDocSh = m_xDoc->GetDocShell();
386  SvKeyValueIterator *pHeaderAttrs =
387  pDocSh->GetHeaderAttributes();
388  if( pHeaderAttrs )
389  SetEncodingByHTTPHeader( pHeaderAttrs );
390  }
391  m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
392 
393  SwDocShell* pDocSh = m_xDoc->GetDocShell();
394  if( pDocSh )
395  {
396  m_bViewCreated = true; // not, load synchronous
397 
398  // a jump mark is present
399 
400  if( pMed )
401  {
402  m_sJmpMark = pMed->GetURLObject().GetMark();
403  if( !m_sJmpMark.isEmpty() )
404  {
406  sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
407  sal_Int32 nPos = nLastPos != -1 ? nLastPos : 0;
408 
409  OUString sCmp;
410  if (nPos)
411  {
412  sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
413  }
414 
415  if( !sCmp.isEmpty() )
416  {
417  sCmp = sCmp.toAsciiLowerCase();
418  if( sCmp == "region" )
420  else if( sCmp == "table" )
422  else if( sCmp == "graphic" )
424  else if( sCmp == "outline" ||
425  sCmp == "text" ||
426  sCmp == "frame" )
427  m_eJumpTo = JumpToMarks::NONE; // this is nothing valid!
428  else
429  // otherwise this is a normal (book)mark
430  nPos = -1;
431  }
432  else
433  nPos = -1;
434 
435  if( nPos != -1 )
436  m_sJmpMark = m_sJmpMark.copy( 0, nPos );
437  if( m_sJmpMark.isEmpty() )
439  }
440  }
441  }
442 
443  if (!rNamespace.isEmpty())
444  {
445  SetNamespace(rNamespace);
446  m_bXHTML = true;
447  if (rNamespace == "reqif-xhtml")
448  m_bReqIF = true;
449  }
450 
451  // Extract load parameters which are specific to this filter.
452  if (!pMed)
453  {
454  return;
455  }
456 
457  comphelper::SequenceAsHashMap aLoadMap(pMed->GetArgs());
458  auto it = aLoadMap.find("AllowedRTFOLEMimeTypes");
459  if (it == aLoadMap.end())
460  {
461  return;
462  }
463 
464  uno::Sequence<OUString> aTypes;
465  it->second >>= aTypes;
466  m_aAllowedRTFOLEMimeTypes = comphelper::sequenceToContainer<std::set<OUString>>(aTypes);
467 }
468 
470 {
471 #ifdef DBG_UTIL
472  OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
473 #endif
474 
475  OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
476  OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
477  m_nContextStMin = 0;
478  while (!m_aContexts.empty())
479  {
480  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
481  ClearContext(xCntxt.get());
482  }
483 
484  bool bAsync = m_xDoc->IsInLoadAsynchron();
485  m_xDoc->SetInLoadAsynchron( false );
486  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
487 
488  if( m_xDoc->GetDocShell() && m_nEventId )
490 
491  // the DocumentDetected maybe can delete the DocShells, therefore fetch again
492  if( m_xDoc->GetDocShell() )
493  {
494  // update linked sections
495  sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
496  if( nLinkMode != NEVER && bAsync &&
497  SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
498  m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
499 
500  if ( m_xDoc->GetDocShell()->IsLoading() )
501  {
502  // #i59688#
503  m_xDoc->GetDocShell()->LoadingFinished();
504  }
505  }
506 
507  delete m_pSttNdIdx;
508 
509  if( !m_aSetAttrTab.empty() )
510  {
511  OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
512  for ( const auto& rpAttr : m_aSetAttrTab )
513  delete rpAttr;
514  m_aSetAttrTab.clear();
515  }
516 
517  m_pCSS1Parser.reset();
518  m_pNumRuleInfo.reset();
519  DeleteFormImpl();
520  m_pFootEndNoteImpl.reset();
521 
522  OSL_ENSURE(!m_xTable, "It exists still an open table");
523  m_pImageMaps.reset();
524 
525  OSL_ENSURE( m_vPendingStack.empty(),
526  "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
527  m_vPendingStack.clear();
528 
529  m_xDoc.clear();
530 
531  if ( m_pTempViewFrame )
532  {
534 
535  // the temporary view frame is hidden, so the hidden flag might need to be removed
536  if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
537  m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
538  }
539 }
540 
541 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
542 {
543  m_nEventId=nullptr;
544 
545  // #i47907# - If the document has already been destructed,
546  // the parser should be aware of this:
547  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
548  || 1 == m_xDoc->getReferenceCount() )
549  {
550  // was the import aborted by SFX?
551  eState = SvParserState::Error;
552  }
553 
554  GetAsynchCallLink().Call(nullptr);
555 }
556 
558 {
559  // create temporary index on position 0, so it won't be moved!
560  m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
561  if( !IsNewDoc() ) // insert into existing document ?
562  {
563  const SwPosition* pPos = m_pPam->GetPoint();
564 
565  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
566 
567  *m_pSttNdIdx = pPos->nNode.GetIndex()-1;
568  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
569 
570  SwPaM aInsertionRangePam( *pPos );
571 
573 
574  // split any redline over the insertion point
575  aInsertionRangePam.SetMark();
576  *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
577  aInsertionRangePam.Move( fnMoveBackward );
578  m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
579 
580  m_xDoc->SetTextFormatColl( *m_pPam,
581  m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
582  }
583 
584  if( GetMedium() )
585  {
586  if( !m_bViewCreated )
587  {
588  m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
589  }
590  else
591  {
592  m_bViewCreated = true;
593  m_nEventId = nullptr;
594  }
595  }
596  else // show progress bar
597  {
598  rInput.Seek(STREAM_SEEK_TO_END);
599  rInput.ResetError();
600 
601  m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
602 
603  rInput.Seek(STREAM_SEEK_TO_BEGIN);
604  rInput.ResetError();
605  }
606 
607  StartListening(m_xDoc->GetPageDesc( 0 ).GetNotifier());
608 
610  return eRet;
611 }
612 
614 {
615  const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
616  return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
617 }
618 
620 {
621 #ifdef DBG_UTIL
622  OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
623  m_nContinue++;
624 #endif
625 
626  // When the import (of SFX) is aborted, an error will be set but
627  // we still continue, so that we clean up properly.
628  OSL_ENSURE( SvParserState::Error!=eState,
629  "SwHTMLParser::Continue: already set an error" );
630  if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
631  eState = SvParserState::Error;
632 
633  // Fetch SwViewShell from document, save it and set as current.
634  SwViewShell *pInitVSh = CallStartAction();
635 
636  if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
637  {
638  // At first call first return, show document and wait for callback
639  // time.
640  // At this point in CallParser only one digit was read and
641  // a SaveState(0) was called.
642  eState = SvParserState::Pending;
643  m_bViewCreated = true;
644  m_xDoc->SetInLoadAsynchron( true );
645 
646 #ifdef DBG_UTIL
647  m_nContinue--;
648 #endif
649 
650  return;
651  }
652 
653  m_bSetModEnabled = false;
654  if( m_xDoc->GetDocShell() )
655  {
656  m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
657  if( m_bSetModEnabled )
658  {
659  m_xDoc->GetDocShell()->EnableSetModified( false );
660  }
661  }
662 
663  // during import don't call OLE-Modified
664  Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
665  m_xDoc->SetOle2Link( Link<bool,void>() );
666 
667  bool bModified = m_xDoc->getIDocumentState().IsModified();
668  bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
669  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
670 
671  // When the import will be aborted, don't call Continue anymore.
672  // If a Pending-Stack exists make sure the stack is ended with a call
673  // of NextToken.
674  if( SvParserState::Error == eState )
675  {
676  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
677  "SwHTMLParser::Continue: Pending-Stack without Token" );
678  if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
679  NextToken( m_vPendingStack.back().nToken );
680  OSL_ENSURE( m_vPendingStack.empty(),
681  "SwHTMLParser::Continue: There is again a Pending-Stack" );
682  }
683  else
684  {
685  HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
686  }
687 
688  // disable progress bar again
689  m_xProgress.reset();
690 
691  bool bLFStripped = false;
692  if( SvParserState::Pending != GetStatus() )
693  {
694  // set the last attributes yet
695  {
696  if( !m_aScriptSource.isEmpty() )
697  {
698  SwScriptFieldType *pType =
699  static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
700 
702  false );
703  InsertAttr( SwFormatField( aField ), false );
704  }
705 
706  if( m_pAppletImpl )
707  {
708  if( m_pAppletImpl->GetApplet().is() )
709  EndApplet();
710  else
711  EndObject();
712  }
713 
714  // maybe remove an existing LF after the last paragraph
715  if( IsNewDoc() )
716  bLFStripped = StripTrailingLF() > 0;
717 
718  // close still open numbering
719  while( GetNumInfo().GetNumRule() )
721 
722  OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
723  // try this twice, first normally to let m_nContextStMin decrease
724  // naturally and get contexts popped in desired order, and if that
725  // fails force it
726  for (int i = 0; i < 2; ++i)
727  {
728  while (m_aContexts.size() > m_nContextStMin)
729  {
730  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
731  if (xCntxt)
732  EndContext(xCntxt.get());
733  }
734  if (!m_nContextStMin)
735  break;
736  OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
737  m_nContextStMin = 0;
738  }
739 
740  m_aParaAttrs.clear();
741 
742  SetAttr( false );
743 
744  // set the first delayed styles
745  m_pCSS1Parser->SetDelayedStyles();
746  }
747 
748  // again correct the start
749  if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
750  {
751  SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
752  SwNodeIndex aNxtIdx( *m_pSttNdIdx );
753  if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
754  {
755  const sal_Int32 nStt = pTextNode->GetText().getLength();
756  // when the cursor is still in the node, then set him at the end
757  if( m_pPam->GetPoint()->nNode == aNxtIdx )
758  {
760  m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt );
761  }
762 
763 #if OSL_DEBUG_LEVEL > 0
764 // !!! shouldn't be possible, or ??
765  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(),
766  "Pam.Bound1 is still in the node" );
767  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(),
768  "Pam.Bound2 is still in the node" );
769 
770  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() )
771  {
772  const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex();
773  m_pPam->GetBound().nContent.Assign( pTextNode,
774  pTextNode->GetText().getLength() + nCntPos );
775  }
776  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() )
777  {
778  const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex();
779  m_pPam->GetBound( false ).nContent.Assign( pTextNode,
780  pTextNode->GetText().getLength() + nCntPos );
781  }
782 #endif
783  // Keep character attribute!
784  SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
785  if (pTextNode->GetText().getLength())
786  pDelNd->FormatToTextAttr( pTextNode );
787  else
788  pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
789  pTextNode->JoinNext();
790  }
791  }
792  }
793 
794  if( SvParserState::Accepted == eState )
795  {
796  if( m_nMissingImgMaps )
797  {
798  // Some Image-Map relations are still missing.
799  // Maybe now the Image-Maps are there?
801  }
802 
803  // now remove the last useless paragraph
804  SwPosition* pPos = m_pPam->GetPoint();
805  if( !pPos->nContent.GetIndex() && !bLFStripped )
806  {
807  SwTextNode* pCurrentNd;
808  sal_uLong nNodeIdx = pPos->nNode.GetIndex();
809 
810  bool bHasFlysOrMarks =
812 
813  if( IsNewDoc() )
814  {
815  if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx))
816  {
818  if( pCNd && pCNd->StartOfSectionIndex()+2 <
819  pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
820  {
822  SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
823  if( pCursorSh &&
824  pCursorSh->GetCursor()->GetPoint()
825  ->nNode.GetIndex() == nNodeIdx )
826  {
827  pCursorSh->MovePara(GoPrevPara, fnParaEnd );
828  pCursorSh->SetMark();
829  pCursorSh->ClearMark();
830  }
831  m_pPam->GetBound().nContent.Assign( nullptr, 0 );
832  m_pPam->GetBound(false).nContent.Assign( nullptr, 0 );
833  m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode );
834  }
835  }
836  }
837  else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
838  {
839  if( pCurrentNd->CanJoinNext( &pPos->nNode ))
840  {
841  SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode();
842  pPos->nContent.Assign( pNextNd, 0 );
844  pNextNd->JoinPrev();
845  }
846  else if (pCurrentNd->GetText().isEmpty())
847  {
848  pPos->nContent.Assign( nullptr, 0 );
850  m_xDoc->GetNodes().Delete( pPos->nNode );
852  }
853  }
854  }
855 
856  // annul the SplitNode from the beginning
857  else if( !IsNewDoc() )
858  {
859  if( pPos->nContent.GetIndex() ) // then there was no <p> at the end
860  m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next
861  SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode();
862  SwNodeIndex aPrvIdx( pPos->nNode );
863  if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
864  *m_pSttNdIdx <= aPrvIdx )
865  {
866  // Normally here should take place a JoinNext, but all cursors and
867  // so are registered in pTextNode, so that it MUST remain.
868 
869  // Convert paragraph to character attribute, from Prev adopt
870  // the paragraph attribute and the template!
871  SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
872  pTextNode->ChgFormatColl( pPrev->GetTextColl() );
873  pTextNode->FormatToTextAttr( pPrev );
874  pTextNode->ResetAllAttr();
875 
876  if( pPrev->HasSwAttrSet() )
877  pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
878 
879  if( &m_pPam->GetBound().nNode.GetNode() == pPrev )
880  m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
881  if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev )
882  m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
883 
884  pTextNode->JoinPrev();
885  }
886  }
887 
888  // adjust AutoLoad in DocumentProperties
889  if (!m_bFuzzing && IsNewDoc())
890  {
891  SwDocShell *pDocShell(m_xDoc->GetDocShell());
892  OSL_ENSURE(pDocShell, "no SwDocShell");
893  if (pDocShell) {
894  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
895  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
896  uno::Reference<document::XDocumentProperties> xDocProps(
897  xDPS->getDocumentProperties());
898  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
899  if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
900  (xDocProps->getAutoloadURL().isEmpty()) )
901  {
902  xDocProps->setAutoloadURL(m_aPathToFile);
903  }
904  }
905  }
906 
907  if( m_bUpdateDocStat )
908  {
909  m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
910  }
911  }
912 
913  if( SvParserState::Pending != GetStatus() )
914  {
915  delete m_pSttNdIdx;
916  m_pSttNdIdx = nullptr;
917  }
918 
919  // should the parser be the last one who hold the document, then nothing
920  // has to be done anymore, document will be destroyed shortly!
921  if( 1 < m_xDoc->getReferenceCount() )
922  {
923  if( bWasUndo )
924  {
925  m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
926  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
927  }
928  else if( !pInitVSh )
929  {
930  // When at the beginning of Continue no Shell was available,
931  // it's possible in the meantime one was created.
932  // In that case the bWasUndo flag is wrong and we must
933  // enable Undo.
934  SwViewShell *pTmpVSh = CheckActionViewShell();
935  if( pTmpVSh )
936  {
937  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
938  }
939  }
940 
941  m_xDoc->SetOle2Link( aOLELink );
942  if( !bModified )
943  m_xDoc->getIDocumentState().ResetModified();
944  if( m_bSetModEnabled && m_xDoc->GetDocShell() )
945  {
946  m_xDoc->GetDocShell()->EnableSetModified();
947  m_bSetModEnabled = false; // this is unnecessary here
948  }
949  }
950 
951  // When the Document-SwVievShell still exists and an Action is open
952  // (doesn't have to be by abort), end the Action, disconnect from Shell
953  // and finally reconstruct the old Shell.
954  CallEndAction( true );
955 
956 #ifdef DBG_UTIL
957  m_nContinue--;
958 #endif
959 }
960 
961 void SwHTMLParser::Notify(const SfxHint& rHint)
962 {
963  if(rHint.GetId() == SfxHintId::Dying)
964  {
965  EndListeningAll();
966  ReleaseRef();
967  }
968 }
969 
971 {
972  OSL_ENSURE( !m_bDocInitialized, "DocumentDetected called multiple times" );
973  m_bDocInitialized = true;
974  if( IsNewDoc() )
975  {
976  if( IsInHeader() )
977  FinishHeader();
978 
979  CallEndAction( true );
980 
981  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
982  // For DocumentDetected in general a SwViewShell is created.
983  // But it also can be created later, in case the UI is captured.
984  CallStartAction();
985  }
986 }
987 
988 // is called for every token that is recognised in CallParser
990 {
991  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
992  || 1 == m_xDoc->getReferenceCount() )
993  {
994  // Was the import cancelled by SFX? If a pending stack
995  // exists, clean it.
996  eState = SvParserState::Error;
997  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
998  "SwHTMLParser::NextToken: Pending-Stack without token" );
999  if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
1000  return ;
1001  }
1002 
1003 #if OSL_DEBUG_LEVEL > 0
1004  if( !m_vPendingStack.empty() )
1005  {
1006  switch( nToken )
1007  {
1008  // tables are read by recursive method calls
1009  case HtmlTokenId::TABLE_ON:
1010  // For CSS declarations we might have to wait
1011  // for a file download to finish
1012  case HtmlTokenId::LINK:
1013  // For controls we might have to set the size.
1014  case HtmlTokenId::INPUT:
1015  case HtmlTokenId::TEXTAREA_ON:
1016  case HtmlTokenId::SELECT_ON:
1017  case HtmlTokenId::SELECT_OFF:
1018  break;
1019  default:
1020  OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
1021  break;
1022  }
1023  }
1024 #endif
1025 
1026  // The following special cases have to be treated before the
1027  // filter detection, because Netscape doesn't reference the content
1028  // of the title for filter detection either.
1029  if( m_vPendingStack.empty() )
1030  {
1031  if( m_bInTitle )
1032  {
1033  switch( nToken )
1034  {
1035  case HtmlTokenId::TITLE_OFF:
1036  {
1037  OUString sTitle = m_sTitle.makeStringAndClear();
1038  if( IsNewDoc() && !sTitle.isEmpty() )
1039  {
1040  if( m_xDoc->GetDocShell() ) {
1041  uno::Reference<document::XDocumentPropertiesSupplier>
1042  xDPS(m_xDoc->GetDocShell()->GetModel(),
1043  uno::UNO_QUERY_THROW);
1044  uno::Reference<document::XDocumentProperties> xDocProps(
1045  xDPS->getDocumentProperties());
1046  OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1047  if (xDocProps.is()) {
1048  xDocProps->setTitle(sTitle);
1049  }
1050 
1051  m_xDoc->GetDocShell()->SetTitle(sTitle);
1052  }
1053  }
1054  m_bInTitle = false;
1055  break;
1056  }
1057 
1058  case HtmlTokenId::NONBREAKSPACE:
1059  m_sTitle.append(" ");
1060  break;
1061 
1062  case HtmlTokenId::SOFTHYPH:
1063  m_sTitle.append("-");
1064  break;
1065 
1066  case HtmlTokenId::TEXTTOKEN:
1067  m_sTitle.append(aToken);
1068  break;
1069 
1070  default:
1071  m_sTitle.append("<");
1072  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1073  m_sTitle.append("/");
1074  m_sTitle.append(sSaveToken);
1075  if( !aToken.isEmpty() )
1076  {
1077  m_sTitle.append(" ");
1078  m_sTitle.append(aToken);
1079  }
1080  m_sTitle.append(">");
1081  break;
1082  }
1083 
1084  return;
1085  }
1086  }
1087 
1088  // Find out what type of document it is if we don't know already.
1089  // For Controls this has to be finished before the control is inserted
1090  // because for inserting a View is needed.
1091  if( !m_bDocInitialized )
1092  DocumentDetected();
1093 
1094  bool bGetIDOption = false, bInsertUnknown = false;
1095  bool bUpperSpaceSave = m_bUpperSpace;
1096  m_bUpperSpace = false;
1097 
1098  // The following special cases may or have to be treated after the
1099  // filter detection
1100  if( m_vPendingStack.empty() )
1101  {
1102  if( m_bInFloatingFrame )
1103  {
1104  // <SCRIPT> is ignored here (from us), because it is ignored in
1105  // Applets as well
1106  if( HtmlTokenId::IFRAME_OFF == nToken )
1107  {
1108  m_bCallNextToken = false;
1109  m_bInFloatingFrame = false;
1110  }
1111 
1112  return;
1113  }
1114  else if( m_bInNoEmbed )
1115  {
1116  switch( nToken )
1117  {
1118  case HtmlTokenId::NOEMBED_OFF:
1121  m_aContents.clear();
1122  m_bCallNextToken = false;
1123  m_bInNoEmbed = false;
1124  break;
1125 
1126  case HtmlTokenId::RAWDATA:
1128  break;
1129 
1130  default:
1131  OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1132  break;
1133  }
1134 
1135  return;
1136  }
1137  else if( m_pAppletImpl )
1138  {
1139  // in an applet only <PARAM> tags and the </APPLET> tag
1140  // are of interest for us (for the moment)
1141  // <SCRIPT> is ignored here (from Netscape)!
1142 
1143  switch( nToken )
1144  {
1145  case HtmlTokenId::APPLET_OFF:
1146  m_bCallNextToken = false;
1147  EndApplet();
1148  break;
1149  case HtmlTokenId::OBJECT_OFF:
1150  m_bCallNextToken = false;
1151  EndObject();
1152  break;
1153  case HtmlTokenId::PARAM:
1154  InsertParam();
1155  break;
1156  default: break;
1157  }
1158 
1159  return;
1160  }
1161  else if( m_bTextArea )
1162  {
1163  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1164  // <SCRIPT> is ignored here (from Netscape)!
1165 
1166  switch( nToken )
1167  {
1168  case HtmlTokenId::TEXTAREA_OFF:
1169  m_bCallNextToken = false;
1170  EndTextArea();
1171  break;
1172 
1173  default:
1174  InsertTextAreaText( nToken );
1175  break;
1176  }
1177 
1178  return;
1179  }
1180  else if( m_bSelect )
1181  {
1182  // HAS to be treated after bNoScript!
1183  switch( nToken )
1184  {
1185  case HtmlTokenId::SELECT_OFF:
1186  m_bCallNextToken = false;
1187  EndSelect();
1188  return;
1189 
1190  case HtmlTokenId::OPTION:
1192  return;
1193 
1194  case HtmlTokenId::TEXTTOKEN:
1195  InsertSelectText();
1196  return;
1197 
1198  case HtmlTokenId::INPUT:
1199  case HtmlTokenId::SCRIPT_ON:
1200  case HtmlTokenId::SCRIPT_OFF:
1201  case HtmlTokenId::NOSCRIPT_ON:
1202  case HtmlTokenId::NOSCRIPT_OFF:
1203  case HtmlTokenId::RAWDATA:
1204  // treat in normal switch
1205  break;
1206 
1207  default:
1208  // ignore
1209  return;
1210  }
1211  }
1212  else if( m_pMarquee )
1213  {
1214  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1215  // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1216  // script.
1217  switch( nToken )
1218  {
1219  case HtmlTokenId::MARQUEE_OFF:
1220  m_bCallNextToken = false;
1221  EndMarquee();
1222  break;
1223 
1224  case HtmlTokenId::TEXTTOKEN:
1226  break;
1227  default: break;
1228  }
1229 
1230  return;
1231  }
1232  else if( m_bInField )
1233  {
1234  switch( nToken )
1235  {
1236  case HtmlTokenId::SDFIELD_OFF:
1237  m_bCallNextToken = false;
1238  EndField();
1239  break;
1240 
1241  case HtmlTokenId::TEXTTOKEN:
1242  InsertFieldText();
1243  break;
1244  default: break;
1245  }
1246 
1247  return;
1248  }
1250  {
1251  switch( nToken )
1252  {
1253  case HtmlTokenId::ANCHOR_OFF:
1254  EndAnchor();
1255  m_bCallNextToken = false;
1256  break;
1257 
1258  case HtmlTokenId::TEXTTOKEN:
1260  break;
1261  default: break;
1262  }
1263  return;
1264  }
1265  else if( !m_aUnknownToken.isEmpty() )
1266  {
1267  // Paste content of unknown tags.
1268  // (but surely if we are not in the header section) fdo#36080 fdo#34666
1269  if (!aToken.isEmpty() && !IsInHeader() )
1270  {
1271  if( !m_bDocInitialized )
1272  DocumentDetected();
1273  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1274 
1275  // if there are temporary paragraph attributes and the
1276  // paragraph isn't empty then the paragraph attributes
1277  // are final.
1278  m_aParaAttrs.clear();
1279 
1280  SetAttr();
1281  }
1282 
1283  // Unknown token in the header are only closed by a matching
1284  // end-token, </HEAD> or <BODY>. Text inside is ignored.
1285  switch( nToken )
1286  {
1287  case HtmlTokenId::UNKNOWNCONTROL_OFF:
1288  if( m_aUnknownToken != sSaveToken )
1289  return;
1290  [[fallthrough]];
1291  case HtmlTokenId::FRAMESET_ON:
1292  case HtmlTokenId::HEAD_OFF:
1293  case HtmlTokenId::BODY_ON:
1294  case HtmlTokenId::IMAGE: // Don't know why Netscape acts this way.
1295  m_aUnknownToken.clear();
1296  break;
1297  case HtmlTokenId::TEXTTOKEN:
1298  return;
1299  default:
1300  m_aUnknownToken.clear();
1301  break;
1302  }
1303  }
1304  }
1305 
1306  switch( nToken )
1307  {
1308  case HtmlTokenId::BODY_ON:
1309  if (!m_bBodySeen)
1310  {
1311  m_bBodySeen = true;
1312  if( !m_aStyleSource.isEmpty() )
1313  {
1314  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1315  m_aStyleSource.clear();
1316  }
1317  if( IsNewDoc() )
1318  {
1320  // If there is a template for the first or the right page,
1321  // it is set here.
1322  const SwPageDesc *pPageDesc = nullptr;
1323  if( m_pCSS1Parser->IsSetFirstPageDesc() )
1324  pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1325  else if( m_pCSS1Parser->IsSetRightPageDesc() )
1326  pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1327 
1328  if( pPageDesc )
1329  {
1330  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1331  }
1332  }
1333  }
1334  break;
1335 
1336  case HtmlTokenId::LINK:
1337  InsertLink();
1338  break;
1339 
1340  case HtmlTokenId::BASE:
1341  {
1342  const HTMLOptions& rHTMLOptions = GetOptions();
1343  for (size_t i = rHTMLOptions.size(); i; )
1344  {
1345  const HTMLOption& rOption = rHTMLOptions[--i];
1346  switch( rOption.GetToken() )
1347  {
1348  case HtmlOptionId::HREF:
1349  m_sBaseURL = rOption.GetString();
1350  break;
1351  case HtmlOptionId::TARGET:
1352  if( IsNewDoc() )
1353  {
1354  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1355  OSL_ENSURE(pDocShell, "no SwDocShell");
1356  if (pDocShell) {
1357  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1358  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1359  uno::Reference<document::XDocumentProperties>
1360  xDocProps(xDPS->getDocumentProperties());
1361  OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1362  if (xDocProps.is()) {
1363  xDocProps->setDefaultTarget(
1364  rOption.GetString());
1365  }
1366  }
1367  }
1368  break;
1369  default: break;
1370  }
1371  }
1372  }
1373  break;
1374 
1375  case HtmlTokenId::META:
1376  {
1377  SvKeyValueIterator *pHTTPHeader = nullptr;
1378  if( IsNewDoc() )
1379  {
1380  SwDocShell *pDocSh = m_xDoc->GetDocShell();
1381  if( pDocSh )
1382  pHTTPHeader = pDocSh->GetHeaderAttributes();
1383  }
1384  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1385  OSL_ENSURE(pDocShell, "no SwDocShell");
1386  if (pDocShell)
1387  {
1388  uno::Reference<document::XDocumentProperties> xDocProps;
1389  if (IsNewDoc())
1390  {
1391  const uno::Reference<document::XDocumentPropertiesSupplier>
1392  xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1393  xDocProps = xDPS->getDocumentProperties();
1394  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1395  }
1396  ParseMetaOptions( xDocProps, pHTTPHeader );
1397  }
1398  }
1399  break;
1400 
1401  case HtmlTokenId::TITLE_ON:
1402  m_bInTitle = true;
1403  break;
1404 
1405  case HtmlTokenId::SCRIPT_ON:
1406  NewScript();
1407  break;
1408 
1409  case HtmlTokenId::SCRIPT_OFF:
1410  EndScript();
1411  break;
1412 
1413  case HtmlTokenId::NOSCRIPT_ON:
1414  case HtmlTokenId::NOSCRIPT_OFF:
1415  bInsertUnknown = true;
1416  break;
1417 
1418  case HtmlTokenId::STYLE_ON:
1419  NewStyle();
1420  break;
1421 
1422  case HtmlTokenId::STYLE_OFF:
1423  EndStyle();
1424  break;
1425 
1426  case HtmlTokenId::RAWDATA:
1427  if( !m_bIgnoreRawData )
1428  {
1429  if( IsReadScript() )
1430  {
1431  AddScriptSource();
1432  }
1433  else if( IsReadStyle() )
1434  {
1435  if( !m_aStyleSource.isEmpty() )
1436  m_aStyleSource += "\n";
1437  m_aStyleSource += aToken;
1438  }
1439  }
1440  break;
1441 
1442  case HtmlTokenId::OBJECT_ON:
1443  if (m_bXHTML)
1444  {
1445  if (!InsertEmbed())
1446  InsertImage();
1447  break;
1448  }
1449 #if HAVE_FEATURE_JAVA
1450  NewObject();
1451  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1452 #endif
1453  break;
1454 
1455  case HtmlTokenId::OBJECT_OFF:
1456  if (!m_aEmbeds.empty())
1457  m_aEmbeds.pop();
1458  break;
1459 
1460  case HtmlTokenId::APPLET_ON:
1461 #if HAVE_FEATURE_JAVA
1462  InsertApplet();
1463  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1464 #endif
1465  break;
1466 
1467  case HtmlTokenId::IFRAME_ON:
1468  if (m_bFuzzing && m_nFloatingFrames > 64)
1469  SAL_WARN("sw.html", "Not importing any more FloatingFrames for fuzzing performance");
1470  else
1471  {
1474  }
1475  break;
1476 
1477  case HtmlTokenId::LINEBREAK:
1478  if( !IsReadPRE() )
1479  {
1480  InsertLineBreak();
1481  break;
1482  }
1483  else
1484  bGetIDOption = true;
1485  // <BR>s in <PRE> resemble true LFs, hence no break
1486  [[fallthrough]];
1487 
1488  case HtmlTokenId::NEWPARA:
1489  // CR in PRE/LISTING/XMP
1490  {
1491  if( HtmlTokenId::NEWPARA==nToken ||
1493  {
1494  AppendTextNode(); // there is no LF at this place
1495  // therefore it will cause no problems
1496  SetTextCollAttrs();
1497  }
1498  // progress bar
1499  if (m_xProgress)
1500  m_xProgress->Update(rInput.Tell());
1501  }
1502  break;
1503 
1504  case HtmlTokenId::NONBREAKSPACE:
1505  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1506  break;
1507 
1508  case HtmlTokenId::SOFTHYPH:
1509  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1510  break;
1511 
1512  case HtmlTokenId::LINEFEEDCHAR:
1513  if( m_pPam->GetPoint()->nContent.GetIndex() )
1514  AppendTextNode();
1515  if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode))
1516  {
1517  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1518  EndAttr( m_xAttrTab->pBreak, false );
1519  }
1520  break;
1521 
1522  case HtmlTokenId::TEXTTOKEN:
1523  // insert string without spanning attributes at the end.
1524  if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1525  {
1526  sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
1527  const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
1528  if (pTextNode)
1529  {
1530  const OUString& rText = pTextNode->GetText();
1531  sal_Unicode cLast = rText[--nPos];
1532  if( ' ' == cLast || '\x0a' == cLast)
1533  aToken = aToken.copy(1);
1534  }
1535  else
1536  aToken = aToken.copy(1);
1537 
1538  if( aToken.isEmpty() )
1539  {
1540  m_bUpperSpace = bUpperSpaceSave;
1541  break;
1542  }
1543  }
1544 
1545  if( !aToken.isEmpty() )
1546  {
1547  if( !m_bDocInitialized )
1548  DocumentDetected();
1549 
1550  if (!m_aEmbeds.empty())
1551  {
1552  // The text token is inside an OLE object, which means
1553  // alternate text.
1554  SwOLENode* pOLENode = m_aEmbeds.top();
1555  if (!pOLENode)
1556  {
1557  // <object> is mapped to an image -> ignore.
1558  break;
1559  }
1560 
1561  if (SwFlyFrameFormat* pFormat
1562  = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1563  {
1565  {
1566  pObject->SetTitle(pObject->GetTitle() + aToken);
1567  break;
1568  }
1569  }
1570  }
1571 
1572  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1573 
1574  // if there are temporary paragraph attributes and the
1575  // paragraph isn't empty then the paragraph attributes
1576  // are final.
1577  m_aParaAttrs.clear();
1578 
1579  SetAttr();
1580  }
1581  break;
1582 
1583  case HtmlTokenId::HORZRULE:
1584  InsertHorzRule();
1585  break;
1586 
1587  case HtmlTokenId::IMAGE:
1588  InsertImage();
1589  // if only the parser references the doc, we can break and set
1590  // an error code
1591  if( 1 == m_xDoc->getReferenceCount() )
1592  {
1593  eState = SvParserState::Error;
1594  }
1595  break;
1596 
1597  case HtmlTokenId::SPACER:
1598  InsertSpacer();
1599  break;
1600 
1601  case HtmlTokenId::EMBED:
1602  InsertEmbed();
1603  break;
1604 
1605  case HtmlTokenId::NOEMBED_ON:
1606  m_bInNoEmbed = true;
1607  m_bCallNextToken = bool(m_xTable);
1608  ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1609  break;
1610 
1611  case HtmlTokenId::DEFLIST_ON:
1612  if( m_nOpenParaToken != HtmlTokenId::NONE )
1613  EndPara();
1614  NewDefList();
1615  break;
1616  case HtmlTokenId::DEFLIST_OFF:
1617  if( m_nOpenParaToken != HtmlTokenId::NONE )
1618  EndPara();
1619  EndDefListItem( HtmlTokenId::NONE );
1620  EndDefList();
1621  break;
1622 
1623  case HtmlTokenId::DD_ON:
1624  case HtmlTokenId::DT_ON:
1625  if( m_nOpenParaToken != HtmlTokenId::NONE )
1626  EndPara();
1627  EndDefListItem();// close <DD>/<DT> and set no template
1628  NewDefListItem( nToken );
1629  break;
1630 
1631  case HtmlTokenId::DD_OFF:
1632  case HtmlTokenId::DT_OFF:
1633  // c.f. HtmlTokenId::LI_OFF
1634  // Actually we should close a DD/DT now.
1635  // But neither Netscape nor Microsoft do this and so don't we.
1636  EndDefListItem( nToken );
1637  break;
1638 
1639  // divisions
1640  case HtmlTokenId::DIVISION_ON:
1641  case HtmlTokenId::CENTER_ON:
1642  if (!m_isInTableStructure)
1643  {
1644  if (m_nOpenParaToken != HtmlTokenId::NONE)
1645  {
1646  if (IsReadPRE())
1647  m_nOpenParaToken = HtmlTokenId::NONE;
1648  else
1649  EndPara();
1650  }
1651  NewDivision( nToken );
1652  }
1653  break;
1654 
1655  case HtmlTokenId::DIVISION_OFF:
1656  case HtmlTokenId::CENTER_OFF:
1657  if (!m_isInTableStructure)
1658  {
1659  if (m_nOpenParaToken != HtmlTokenId::NONE)
1660  {
1661  if (IsReadPRE())
1662  m_nOpenParaToken = HtmlTokenId::NONE;
1663  else
1664  EndPara();
1665  }
1666  EndDivision();
1667  }
1668  break;
1669 
1670  case HtmlTokenId::MULTICOL_ON:
1671  if( m_nOpenParaToken != HtmlTokenId::NONE )
1672  EndPara();
1673  NewMultiCol();
1674  break;
1675 
1676  case HtmlTokenId::MULTICOL_OFF:
1677  if( m_nOpenParaToken != HtmlTokenId::NONE )
1678  EndPara();
1679  EndTag( HtmlTokenId::MULTICOL_ON );
1680  break;
1681 
1682  case HtmlTokenId::MARQUEE_ON:
1683  NewMarquee();
1684  m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1685  break;
1686 
1687  case HtmlTokenId::FORM_ON:
1688  NewForm();
1689  break;
1690  case HtmlTokenId::FORM_OFF:
1691  EndForm();
1692  break;
1693 
1694  // templates
1695  case HtmlTokenId::PARABREAK_ON:
1696  if( m_nOpenParaToken != HtmlTokenId::NONE )
1697  EndPara( true );
1698  NewPara();
1699  break;
1700 
1701  case HtmlTokenId::PARABREAK_OFF:
1702  EndPara( true );
1703  break;
1704 
1705  case HtmlTokenId::ADDRESS_ON:
1706  if( m_nOpenParaToken != HtmlTokenId::NONE )
1707  EndPara();
1708  NewTextFormatColl(HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SEND_ADDRESS);
1709  break;
1710 
1711  case HtmlTokenId::ADDRESS_OFF:
1712  if( m_nOpenParaToken != HtmlTokenId::NONE )
1713  EndPara();
1714  EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1715  break;
1716 
1717  case HtmlTokenId::BLOCKQUOTE_ON:
1718  case HtmlTokenId::BLOCKQUOTE30_ON:
1719  if( m_nOpenParaToken != HtmlTokenId::NONE )
1720  EndPara();
1721  NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1722  break;
1723 
1724  case HtmlTokenId::BLOCKQUOTE_OFF:
1725  case HtmlTokenId::BLOCKQUOTE30_OFF:
1726  if( m_nOpenParaToken != HtmlTokenId::NONE )
1727  EndPara();
1728  EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1729  break;
1730 
1731  case HtmlTokenId::PREFORMTXT_ON:
1732  case HtmlTokenId::LISTING_ON:
1733  case HtmlTokenId::XMP_ON:
1734  if( m_nOpenParaToken != HtmlTokenId::NONE )
1735  EndPara();
1737  break;
1738 
1739  case HtmlTokenId::PREFORMTXT_OFF:
1740  m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1741  EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1742  break;
1743 
1744  case HtmlTokenId::LISTING_OFF:
1745  case HtmlTokenId::XMP_OFF:
1746  EndTextFormatColl( nToken );
1747  break;
1748 
1749  case HtmlTokenId::HEAD1_ON:
1750  case HtmlTokenId::HEAD2_ON:
1751  case HtmlTokenId::HEAD3_ON:
1752  case HtmlTokenId::HEAD4_ON:
1753  case HtmlTokenId::HEAD5_ON:
1754  case HtmlTokenId::HEAD6_ON:
1755  if( m_nOpenParaToken != HtmlTokenId::NONE )
1756  {
1757  if( IsReadPRE() )
1758  m_nOpenParaToken = HtmlTokenId::NONE;
1759  else
1760  EndPara();
1761  }
1762  NewHeading( nToken );
1763  break;
1764 
1765  case HtmlTokenId::HEAD1_OFF:
1766  case HtmlTokenId::HEAD2_OFF:
1767  case HtmlTokenId::HEAD3_OFF:
1768  case HtmlTokenId::HEAD4_OFF:
1769  case HtmlTokenId::HEAD5_OFF:
1770  case HtmlTokenId::HEAD6_OFF:
1771  EndHeading();
1772  break;
1773 
1774  case HtmlTokenId::TABLE_ON:
1775  if( !m_vPendingStack.empty() )
1776  BuildTable( SvxAdjust::End );
1777  else
1778  {
1779  if( m_nOpenParaToken != HtmlTokenId::NONE )
1780  EndPara();
1781  OSL_ENSURE(!m_xTable, "table in table not allowed here");
1782  if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) &&
1783  (m_pPam->GetPoint()->nNode.GetIndex() >
1784  m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1786  {
1787  if ( m_nParaCnt < 5 )
1788  Show(); // show what we have up to here
1789 
1790  SvxAdjust eAdjust = m_xAttrTab->pAdjust
1791  ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1792  GetAdjust()
1793  : SvxAdjust::End;
1794  BuildTable( eAdjust );
1795  }
1796  else
1797  bInsertUnknown = m_bKeepUnknown;
1798  }
1799  break;
1800 
1801  // lists
1802  case HtmlTokenId::DIRLIST_ON:
1803  case HtmlTokenId::MENULIST_ON:
1804  case HtmlTokenId::ORDERLIST_ON:
1805  case HtmlTokenId::UNORDERLIST_ON:
1806  if( m_nOpenParaToken != HtmlTokenId::NONE )
1807  EndPara();
1808  NewNumberBulletList( nToken );
1809  break;
1810 
1811  case HtmlTokenId::DIRLIST_OFF:
1812  case HtmlTokenId::MENULIST_OFF:
1813  case HtmlTokenId::ORDERLIST_OFF:
1814  case HtmlTokenId::UNORDERLIST_OFF:
1815  if( m_nOpenParaToken != HtmlTokenId::NONE )
1816  EndPara();
1817  EndNumberBulletListItem( HtmlTokenId::NONE, true );
1818  EndNumberBulletList( nToken );
1819  break;
1820 
1821  case HtmlTokenId::LI_ON:
1822  case HtmlTokenId::LISTHEADER_ON:
1823  if( m_nOpenParaToken != HtmlTokenId::NONE &&
1825  || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1826  {
1827  // only finish paragraph for <P><LI>, not for <DD><LI>
1828  EndPara();
1829  }
1830 
1831  if (m_bFuzzing && m_nListItems > 1024)
1832  {
1833  SAL_WARN("sw.html", "skipping remaining bullet import for performance during fuzzing");
1834  }
1835  else
1836  {
1837  EndNumberBulletListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1838  NewNumberBulletListItem( nToken );
1839  }
1840 
1841  ++m_nListItems;
1842 
1843  break;
1844  case HtmlTokenId::LI_OFF:
1845  case HtmlTokenId::LISTHEADER_OFF:
1846  EndNumberBulletListItem( nToken, false );
1847  break;
1848 
1849  // Attribute :
1850  case HtmlTokenId::ITALIC_ON:
1851  {
1855  NewStdAttr( HtmlTokenId::ITALIC_ON,
1856  &m_xAttrTab->pItalic, aPosture,
1857  &m_xAttrTab->pItalicCJK, &aPostureCJK,
1858  &m_xAttrTab->pItalicCTL, &aPostureCTL );
1859  }
1860  break;
1861 
1862  case HtmlTokenId::BOLD_ON:
1863  {
1867  NewStdAttr( HtmlTokenId::BOLD_ON,
1868  &m_xAttrTab->pBold, aWeight,
1869  &m_xAttrTab->pBoldCJK, &aWeightCJK,
1870  &m_xAttrTab->pBoldCTL, &aWeightCTL );
1871  }
1872  break;
1873 
1874  case HtmlTokenId::STRIKE_ON:
1875  case HtmlTokenId::STRIKETHROUGH_ON:
1876  {
1877  NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1879  }
1880  break;
1881 
1882  case HtmlTokenId::UNDERLINE_ON:
1883  {
1884  NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1886  }
1887  break;
1888 
1889  case HtmlTokenId::SUPERSCRIPT_ON:
1890  {
1891  NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1893  }
1894  break;
1895 
1896  case HtmlTokenId::SUBSCRIPT_ON:
1897  {
1898  NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1900  }
1901  break;
1902 
1903  case HtmlTokenId::BLINK_ON:
1904  {
1905  NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1906  SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1907  }
1908  break;
1909 
1910  case HtmlTokenId::SPAN_ON:
1911  NewStdAttr( HtmlTokenId::SPAN_ON );
1912  break;
1913 
1914  case HtmlTokenId::ITALIC_OFF:
1915  case HtmlTokenId::BOLD_OFF:
1916  case HtmlTokenId::STRIKE_OFF:
1917  case HtmlTokenId::UNDERLINE_OFF:
1918  case HtmlTokenId::SUPERSCRIPT_OFF:
1919  case HtmlTokenId::SUBSCRIPT_OFF:
1920  case HtmlTokenId::BLINK_OFF:
1921  case HtmlTokenId::SPAN_OFF:
1922  EndTag( nToken );
1923  break;
1924 
1925  case HtmlTokenId::STRIKETHROUGH_OFF:
1926  EndTag( HtmlTokenId::STRIKE_OFF );
1927  break;
1928 
1929  case HtmlTokenId::BASEFONT_ON:
1930  NewBasefontAttr();
1931  break;
1932  case HtmlTokenId::BASEFONT_OFF:
1933  EndBasefontAttr();
1934  break;
1935  case HtmlTokenId::FONT_ON:
1936  case HtmlTokenId::BIGPRINT_ON:
1937  case HtmlTokenId::SMALLPRINT_ON:
1938  NewFontAttr( nToken );
1939  break;
1940  case HtmlTokenId::FONT_OFF:
1941  case HtmlTokenId::BIGPRINT_OFF:
1942  case HtmlTokenId::SMALLPRINT_OFF:
1943  EndFontAttr( nToken );
1944  break;
1945 
1946  case HtmlTokenId::EMPHASIS_ON:
1947  case HtmlTokenId::CITATION_ON:
1948  case HtmlTokenId::STRONG_ON:
1949  case HtmlTokenId::CODE_ON:
1950  case HtmlTokenId::SAMPLE_ON:
1951  case HtmlTokenId::KEYBOARD_ON:
1952  case HtmlTokenId::VARIABLE_ON:
1953  case HtmlTokenId::DEFINSTANCE_ON:
1954  case HtmlTokenId::SHORTQUOTE_ON:
1955  case HtmlTokenId::LANGUAGE_ON:
1956  case HtmlTokenId::AUTHOR_ON:
1957  case HtmlTokenId::PERSON_ON:
1958  case HtmlTokenId::ACRONYM_ON:
1959  case HtmlTokenId::ABBREVIATION_ON:
1960  case HtmlTokenId::INSERTEDTEXT_ON:
1961  case HtmlTokenId::DELETEDTEXT_ON:
1962 
1963  case HtmlTokenId::TELETYPE_ON:
1964  NewCharFormat( nToken );
1965  break;
1966 
1967  case HtmlTokenId::SDFIELD_ON:
1968  NewField();
1970  break;
1971 
1972  case HtmlTokenId::EMPHASIS_OFF:
1973  case HtmlTokenId::CITATION_OFF:
1974  case HtmlTokenId::STRONG_OFF:
1975  case HtmlTokenId::CODE_OFF:
1976  case HtmlTokenId::SAMPLE_OFF:
1977  case HtmlTokenId::KEYBOARD_OFF:
1978  case HtmlTokenId::VARIABLE_OFF:
1979  case HtmlTokenId::DEFINSTANCE_OFF:
1980  case HtmlTokenId::SHORTQUOTE_OFF:
1981  case HtmlTokenId::LANGUAGE_OFF:
1982  case HtmlTokenId::AUTHOR_OFF:
1983  case HtmlTokenId::PERSON_OFF:
1984  case HtmlTokenId::ACRONYM_OFF:
1985  case HtmlTokenId::ABBREVIATION_OFF:
1986  case HtmlTokenId::INSERTEDTEXT_OFF:
1987  case HtmlTokenId::DELETEDTEXT_OFF:
1988 
1989  case HtmlTokenId::TELETYPE_OFF:
1990  EndTag( nToken );
1991  break;
1992 
1993  case HtmlTokenId::HEAD_OFF:
1994  if( !m_aStyleSource.isEmpty() )
1995  {
1996  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1997  m_aStyleSource.clear();
1998  }
1999  break;
2000 
2001  case HtmlTokenId::DOCTYPE:
2002  case HtmlTokenId::BODY_OFF:
2003  case HtmlTokenId::HTML_OFF:
2004  case HtmlTokenId::HEAD_ON:
2005  case HtmlTokenId::TITLE_OFF:
2006  break; // don't evaluate further???
2007  case HtmlTokenId::HTML_ON:
2008  {
2009  const HTMLOptions& rHTMLOptions = GetOptions();
2010  for (size_t i = rHTMLOptions.size(); i; )
2011  {
2012  const HTMLOption& rOption = rHTMLOptions[--i];
2013  if( HtmlOptionId::DIR == rOption.GetToken() )
2014  {
2015  const OUString& rDir = rOption.GetString();
2016  SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
2017  m_pCSS1Parser->GetWhichMap() );
2018  SvxCSS1PropertyInfo aPropInfo;
2019  OUString aDummy;
2020  ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
2021  aPropInfo, nullptr, &rDir );
2022 
2023  m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
2024  break;
2025  }
2026  }
2027  }
2028  break;
2029 
2030  case HtmlTokenId::INPUT:
2031  InsertInput();
2032  break;
2033 
2034  case HtmlTokenId::TEXTAREA_ON:
2035  NewTextArea();
2037  break;
2038 
2039  case HtmlTokenId::SELECT_ON:
2040  NewSelect();
2042  break;
2043 
2044  case HtmlTokenId::ANCHOR_ON:
2045  NewAnchor();
2046  break;
2047 
2048  case HtmlTokenId::ANCHOR_OFF:
2049  EndAnchor();
2050  break;
2051 
2052  case HtmlTokenId::COMMENT:
2053  if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2054  {
2055  // insert as Post-It
2056  // If there are no space characters right behind
2057  // the <!-- and on front of the -->, leave the comment untouched.
2058  if( ' ' == aToken[ 3 ] &&
2059  ' ' == aToken[ aToken.getLength()-3 ] )
2060  {
2061  OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) );
2062  InsertComment(comphelper::string::strip(aComment, ' '));
2063  }
2064  else
2065  {
2066  OUString aComment = "<" + aToken + ">";
2067  InsertComment( aComment );
2068  }
2069  }
2070  break;
2071 
2072  case HtmlTokenId::MAP_ON:
2073  // Image Maps are read asynchronously: At first only an image map is created
2074  // Areas are processed later. Nevertheless the
2075  // ImageMap is inserted into the IMap-Array, because it might be used
2076  // already.
2077  m_pImageMap = new ImageMap;
2079  {
2080  if (!m_pImageMaps)
2081  m_pImageMaps.reset( new ImageMaps );
2082  m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2083  }
2084  else
2085  {
2086  delete m_pImageMap;
2087  m_pImageMap = nullptr;
2088  }
2089  break;
2090 
2091  case HtmlTokenId::MAP_OFF:
2092  // there is no ImageMap anymore (don't delete IMap, because it's
2093  // already contained in the array!)
2094  m_pImageMap = nullptr;
2095  break;
2096 
2097  case HtmlTokenId::AREA:
2098  if( m_pImageMap )
2099  ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2100  SvMacroItemId::OnMouseOut );
2101  break;
2102 
2103  case HtmlTokenId::FRAMESET_ON:
2104  bInsertUnknown = m_bKeepUnknown;
2105  break;
2106 
2107  case HtmlTokenId::NOFRAMES_ON:
2108  if( IsInHeader() )
2109  FinishHeader();
2110  bInsertUnknown = m_bKeepUnknown;
2111  break;
2112 
2113  case HtmlTokenId::UNKNOWNCONTROL_ON:
2114  // Ignore content of unknown token in the header, if the token
2115  // does not start with a '!'.
2116  // (but judging from the code, also if does not start with a '%')
2117  // (and also if we're not somewhere we consider PRE)
2118  if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2119  !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2120  '%' != sSaveToken[0] )
2121  m_aUnknownToken = sSaveToken;
2122  [[fallthrough]];
2123 
2124  default:
2125  bInsertUnknown = m_bKeepUnknown;
2126  break;
2127  }
2128 
2129  if( bGetIDOption )
2130  InsertIDOption();
2131 
2132  if( bInsertUnknown )
2133  {
2134  OUStringBuffer aComment("HTML: <");
2135  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2136  aComment.append("/");
2137  aComment.append(sSaveToken);
2138  if( !aToken.isEmpty() )
2139  {
2140  UnescapeToken();
2141  aComment.append(" " + aToken);
2142  }
2143  aComment.append(">");
2144  InsertComment( aComment.makeStringAndClear() );
2145  }
2146 
2147  // if there are temporary paragraph attributes and the
2148  // paragraph isn't empty then the paragraph attributes are final.
2149  if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() )
2150  m_aParaAttrs.clear();
2151 }
2152 
2153 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2154  bool& rScriptDependent,
2155  sal_uInt16& rScriptType )
2156 {
2157  switch( rAttr.GetItem().Which() )
2158  {
2159  case RES_CHRATR_FONT:
2160  case RES_CHRATR_FONTSIZE:
2161  case RES_CHRATR_LANGUAGE:
2162  case RES_CHRATR_POSTURE:
2163  case RES_CHRATR_WEIGHT:
2164  rScriptType = i18n::ScriptType::LATIN;
2165  rScriptDependent = true;
2166  break;
2167  case RES_CHRATR_CJK_FONT:
2171  case RES_CHRATR_CJK_WEIGHT:
2172  rScriptType = i18n::ScriptType::ASIAN;
2173  rScriptDependent = true;
2174  break;
2175  case RES_CHRATR_CTL_FONT:
2179  case RES_CHRATR_CTL_WEIGHT:
2180  rScriptType = i18n::ScriptType::COMPLEX;
2181  rScriptDependent = true;
2182  break;
2183  default:
2184  rScriptDependent = false;
2185  break;
2186  }
2187 }
2188 
2189 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2190 {
2191  // A hard line break at the end always must be removed.
2192  // A second one we replace with paragraph spacing.
2193  sal_Int32 nLFStripped = StripTrailingLF();
2194  if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2195  eMode = AM_SPACE;
2196 
2197  // the hard attributes of this paragraph will never be invalid again
2198  m_aParaAttrs.clear();
2199 
2200  SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2201  m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
2202 
2203  if (pTextNode)
2204  {
2205  const SvxULSpaceItem& rULSpace =
2206  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2207 
2208  bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2209  : rULSpace.GetLower() == 0;
2210 
2211  if( bChange )
2212  {
2213  const SvxULSpaceItem& rCollULSpace =
2214  pTextNode->GetAnyFormatColl().GetULSpace();
2215 
2216  bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2217  : rCollULSpace.GetLower() > 0;
2218 
2219  if( bMayReset &&
2220  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2221  {
2222  pTextNode->ResetAttr( RES_UL_SPACE );
2223  }
2224  else
2225  {
2226  pTextNode->SetAttr(
2227  SvxULSpaceItem( rULSpace.GetUpper(),
2228  AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2229  }
2230  }
2231  }
2232  m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2233 
2234  SwPosition aOldPos( *m_pPam->GetPoint() );
2235 
2236  bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2237 
2238  // split character attributes and maybe set none,
2239  // which are set for the whole paragraph
2240  const SwNodeIndex& rEndIdx = aOldPos.nNode;
2241  const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex();
2242  const SwPosition& rPos = *m_pPam->GetPoint();
2243 
2244  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2245  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2246  {
2247  HTMLAttr *pAttr = *pHTMLAttributes;
2248  if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2249  {
2250  bool bWholePara = false;
2251 
2252  while( pAttr )
2253  {
2254  HTMLAttr *pNext = pAttr->GetNext();
2255  if( pAttr->GetStartParagraphIdx() < rEndIdx.GetIndex() ||
2256  (!bWholePara &&
2257  pAttr->GetStartParagraph() == rEndIdx &&
2258  pAttr->GetStartContent() != nEndCnt) )
2259  {
2260  bWholePara =
2261  pAttr->GetStartParagraph() == rEndIdx &&
2262  pAttr->GetStartContent() == 0;
2263 
2264  sal_Int32 nStt = pAttr->m_nStartContent;
2265  bool bScript = false;
2266  sal_uInt16 nScriptItem;
2267  bool bInsert = true;
2268  lcl_swhtml_getItemInfo( *pAttr, bScript,
2269  nScriptItem );
2270  // set previous part
2271  if( bScript )
2272  {
2273  const SwTextNode *pTextNd =
2274  pAttr->GetStartParagraph().GetNode().GetTextNode();
2275  OSL_ENSURE( pTextNd, "No text node" );
2276  if( pTextNd )
2277  {
2278  const OUString& rText = pTextNd->GetText();
2279  sal_uInt16 nScriptText =
2280  g_pBreakIt->GetBreakIter()->getScriptType(
2281  rText, pAttr->GetStartContent() );
2282  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2283  ->endOfScript( rText, nStt, nScriptText );
2284  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2285  {
2286  if( nScriptItem == nScriptText )
2287  {
2288  HTMLAttr *pSetAttr =
2289  pAttr->Clone( rEndIdx, nScriptEnd );
2290  pSetAttr->m_nStartContent = nStt;
2291  pSetAttr->ClearPrev();
2292  if( !pNext || bWholePara )
2293  {
2294  if (pSetAttr->m_bInsAtStart)
2295  m_aSetAttrTab.push_front( pSetAttr );
2296  else
2297  m_aSetAttrTab.push_back( pSetAttr );
2298  }
2299  else
2300  pNext->InsertPrev( pSetAttr );
2301  }
2302  nStt = nScriptEnd;
2303  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2304  rText, nStt );
2305  nScriptEnd = g_pBreakIt->GetBreakIter()
2306  ->endOfScript( rText, nStt, nScriptText );
2307  }
2308  bInsert = nScriptItem == nScriptText;
2309  }
2310  }
2311  if( bInsert )
2312  {
2313  HTMLAttr *pSetAttr =
2314  pAttr->Clone( rEndIdx, nEndCnt );
2315  pSetAttr->m_nStartContent = nStt;
2316 
2317  // When the attribute is for the whole paragraph, the outer
2318  // attributes aren't effective anymore. Hence it may not be inserted
2319  // in the Prev-List of an outer attribute, because that won't be
2320  // set. That leads to shifting when fields are used.
2321  if( !pNext || bWholePara )
2322  {
2323  if (pSetAttr->m_bInsAtStart)
2324  m_aSetAttrTab.push_front( pSetAttr );
2325  else
2326  m_aSetAttrTab.push_back( pSetAttr );
2327  }
2328  else
2329  pNext->InsertPrev( pSetAttr );
2330  }
2331  else
2332  {
2333  HTMLAttr *pPrev = pAttr->GetPrev();
2334  if( pPrev )
2335  {
2336  // the previous attributes must be set anyway
2337  if( !pNext || bWholePara )
2338  {
2339  if (pPrev->m_bInsAtStart)
2340  m_aSetAttrTab.push_front( pPrev );
2341  else
2342  m_aSetAttrTab.push_back( pPrev );
2343  }
2344  else
2345  pNext->InsertPrev( pPrev );
2346  }
2347  }
2348  pAttr->ClearPrev();
2349  }
2350 
2351  pAttr->SetStart( rPos );
2352  pAttr = pNext;
2353  }
2354  }
2355  }
2356 
2357  if( bUpdateNum )
2358  {
2359  if( GetNumInfo().GetDepth() )
2360  {
2361  sal_uInt8 nLvl = GetNumInfo().GetLevel();
2362  SetNodeNum( nLvl );
2363  }
2364  else
2366  }
2367 
2368  // We must set the attribute of the paragraph before now (because of JavaScript)
2369  SetAttr();
2370 
2371  // Now it is time to get rid of all script dependent hints that are
2372  // equal to the settings in the style
2373  SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode();
2374  OSL_ENSURE( pTextNd, "There is the txt node" );
2375  size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints())
2376  ? pTextNd->GetSwpHints().Count() : 0;
2377  if( nCntAttr )
2378  {
2379  // These are the end position of all script dependent hints.
2380  // If we find a hint that starts before the current end position,
2381  // we have to set it. If we find a hint that start behind or at
2382  // that position, we have to take the hint value into account.
2383  // If it is equal to the style, or in fact the paragraph value
2384  // for that hint, the hint is removed. Otherwise its end position
2385  // is remembered.
2386  sal_Int32 aEndPos[15] =
2387  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2388  SwpHints& rHints = pTextNd->GetSwpHints();
2389  for( size_t i=0; i < nCntAttr; i++ )
2390  {
2391  SwTextAttr *pHt = rHints.Get( i );
2392  sal_uInt16 nWhich = pHt->Which();
2393  sal_Int16 nIdx = 0;
2394  bool bFont = false;
2395  switch( nWhich )
2396  {
2397  case RES_CHRATR_FONT:
2398  nIdx = 0;
2399  bFont = true;
2400  break;
2401  case RES_CHRATR_FONTSIZE:
2402  nIdx = 1;
2403  break;
2404  case RES_CHRATR_LANGUAGE:
2405  nIdx = 2;
2406  break;
2407  case RES_CHRATR_POSTURE:
2408  nIdx = 3;
2409  break;
2410  case RES_CHRATR_WEIGHT:
2411  nIdx = 4;
2412  break;
2413  case RES_CHRATR_CJK_FONT:
2414  nIdx = 5;
2415  bFont = true;
2416  break;
2418  nIdx = 6;
2419  break;
2421  nIdx = 7;
2422  break;
2424  nIdx = 8;
2425  break;
2426  case RES_CHRATR_CJK_WEIGHT:
2427  nIdx = 9;
2428  break;
2429  case RES_CHRATR_CTL_FONT:
2430  nIdx = 10;
2431  bFont = true;
2432  break;
2434  nIdx = 11;
2435  break;
2437  nIdx = 12;
2438  break;
2440  nIdx = 13;
2441  break;
2442  case RES_CHRATR_CTL_WEIGHT:
2443  nIdx = 14;
2444  break;
2445  default:
2446  // Skip to next attribute
2447  continue;
2448  }
2449  const sal_Int32 nStt = pHt->GetStart();
2450  if( nStt >= aEndPos[nIdx] )
2451  {
2452  const SfxPoolItem& rItem =
2453  static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2454  if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2455  : rItem == pHt->GetAttr() )
2456  {
2457  // The hint is the same as set in the paragraph and
2458  // therefore, it can be deleted
2459  // CAUTION!!! This WILL delete the hint and it MAY
2460  // also delete the SwpHints!!! To avoid any trouble
2461  // we leave the loop immediately if this is the last
2462  // hint.
2463  pTextNd->DeleteAttribute( pHt );
2464  if( 1 == nCntAttr )
2465  break;
2466  i--;
2467  nCntAttr--;
2468  }
2469  else
2470  {
2471  // The hint is different. Therefore all hints within that
2472  // hint have to be ignored.
2473  aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2474  }
2475  }
2476  else
2477  {
2478  // The hint starts before another one ends.
2479  // The hint in this case is not deleted
2480  OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2481  "hints aren't nested properly!" );
2482  }
2483  }
2484  }
2485 
2486  if (!m_xTable && !--m_nParaCnt)
2487  Show();
2488 
2489  return bRet;
2490 }
2491 
2493 {
2494  //If it already has ParSpace, return
2495  if( !m_bNoParSpace )
2496  return;
2497 
2498  m_bNoParSpace = false;
2499 
2500  sal_uLong nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1;
2501 
2502  SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2503  if( !pTextNode )
2504  return;
2505 
2506  SvxULSpaceItem rULSpace =
2507  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2508  if( rULSpace.GetLower() )
2509  return;
2510 
2511  const SvxULSpaceItem& rCollULSpace =
2512  pTextNode->GetAnyFormatColl().GetULSpace();
2513  if( rCollULSpace.GetLower() &&
2514  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2515  {
2516  pTextNode->ResetAttr( RES_UL_SPACE );
2517  }
2518  else
2519  {
2520  //What I do here, is that I examine the attributes, and if
2521  //I find out, that it's CJK/CTL, then I set the paragraph space
2522  //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2523 
2524  bool bIsCJK = false;
2525  bool bIsCTL = false;
2526 
2527  const size_t nCntAttr = pTextNode->GetpSwpHints()
2528  ? pTextNode->GetSwpHints().Count() : 0;
2529 
2530  for(size_t i = 0; i < nCntAttr; ++i)
2531  {
2532  SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2533  sal_uInt16 const nWhich = pHt->Which();
2534  if (RES_CHRATR_CJK_FONT == nWhich ||
2535  RES_CHRATR_CJK_FONTSIZE == nWhich ||
2536  RES_CHRATR_CJK_LANGUAGE == nWhich ||
2537  RES_CHRATR_CJK_POSTURE == nWhich ||
2538  RES_CHRATR_CJK_WEIGHT == nWhich)
2539  {
2540  bIsCJK = true;
2541  break;
2542  }
2543  if (RES_CHRATR_CTL_FONT == nWhich ||
2544  RES_CHRATR_CTL_FONTSIZE == nWhich ||
2545  RES_CHRATR_CTL_LANGUAGE == nWhich ||
2546  RES_CHRATR_CTL_POSTURE == nWhich ||
2547  RES_CHRATR_CTL_WEIGHT == nWhich)
2548  {
2549  bIsCTL = true;
2550  break;
2551  }
2552  }
2553 
2554  if( bIsCTL )
2555  {
2556  pTextNode->SetAttr(
2558  }
2559  else if( bIsCJK )
2560  {
2561  pTextNode->SetAttr(
2563  } else {
2564  pTextNode->SetAttr(
2566  }
2567  }
2568 }
2569 
2571 {
2572  // Here
2573  // - a EndAction is called, so the document is formatted
2574  // - a Reschedule is called,
2575  // - the own View-Shell is set again
2576  // - and a StartAction is called
2577 
2578  OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2579  SwViewShell *pOldVSh = CallEndAction();
2580 
2582 
2583  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2584  || 1 == m_xDoc->getReferenceCount() )
2585  {
2586  // was the import aborted by SFX?
2587  eState = SvParserState::Error;
2588  }
2589 
2590  // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2591  SwViewShell *pVSh = CallStartAction( pOldVSh );
2592 
2593  // is the current node not visible anymore, then we use a bigger increment
2594  if( pVSh )
2595  {
2597  ? 5 : 50;
2598  }
2599 }
2600 
2602 {
2603  // Here
2604  // - a Reschedule is called, so it can be scrolled
2605  // - the own View-Shell is set again
2606  // - a StartAction/EndAction is called, when there was scrolling.
2607 
2608  OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2609 
2610  // scroll bar
2611  if (m_xProgress)
2612  {
2613  m_xProgress->Update(rInput.Tell());
2615  }
2616  else
2617  {
2619 
2620  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2621  || 1 == m_xDoc->getReferenceCount() )
2622  // was the import aborted by SFX?
2623  eState = SvParserState::Error;
2624 
2626  if( pVSh && pVSh->HasInvalidRect() )
2627  {
2628  CallEndAction( false, false );
2629  CallStartAction( pVSh, false );
2630  }
2631  }
2632 }
2633 
2635 {
2636  OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2637 
2638  if( !pVSh || bChkPtr )
2639  {
2640 #if OSL_DEBUG_LEVEL > 0
2641  SwViewShell *pOldVSh = pVSh;
2642 #endif
2643  pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2644 #if OSL_DEBUG_LEVEL > 0
2645  OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2646  if( pOldVSh && !pVSh )
2647  pVSh = nullptr;
2648 #endif
2649  }
2650  m_pActionViewShell = pVSh;
2651 
2652  if( m_pActionViewShell )
2653  {
2654  if( auto pEditShell = dynamic_cast< SwEditShell *>( m_pActionViewShell ) )
2655  pEditShell->StartAction();
2656  else
2658  }
2659 
2660  return m_pActionViewShell;
2661 }
2662 
2663 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2664 {
2665  if( bChkPtr )
2666  {
2667  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2668  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2669  "CallEndAction: Who swapped the SwViewShell?" );
2670 #if OSL_DEBUG_LEVEL > 0
2671  if( m_pActionViewShell && !pVSh )
2672  pVSh = nullptr;
2673 #endif
2674  if( pVSh != m_pActionViewShell )
2675  m_pActionViewShell = nullptr;
2676  }
2677 
2678  if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2679  return m_pActionViewShell;
2680 
2681  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2682  {
2683  // Already scrolled?, then make sure that the view doesn't move!
2684  const bool bOldLock = m_pActionViewShell->IsViewLocked();
2685  m_pActionViewShell->LockView( true );
2686  const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev();
2688  static_cast<SwEditShell*>(m_pActionViewShell)->EndAction();
2689  m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev );
2690  m_pActionViewShell->LockView( bOldLock );
2691 
2692  // bChkJumpMark is only set when the object was also found
2693  if( m_bChkJumpMark )
2694  {
2695  const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2696  if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2698  GetMedium()->GetURLObject().GetMark() );
2699  m_bChkJumpMark = false;
2700  }
2701  }
2702  else
2704 
2705  // if the parser holds the last reference to the document, then we can
2706  // abort here and set an error.
2707  if( 1 == m_xDoc->getReferenceCount() )
2708  {
2709  eState = SvParserState::Error;
2710  }
2711 
2713  m_pActionViewShell = nullptr;
2714 
2715  return pVSh;
2716 }
2717 
2719 {
2720  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2721  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2722  "CheckActionViewShell: Who has swapped SwViewShell?" );
2723 #if OSL_DEBUG_LEVEL > 0
2724  if( m_pActionViewShell && !pVSh )
2725  pVSh = nullptr;
2726 #endif
2727  if( pVSh != m_pActionViewShell )
2728  m_pActionViewShell = nullptr;
2729 
2730  return m_pActionViewShell;
2731 }
2732 
2734  : m_pFrameFormat(pFrameFormat)
2735 {
2737 }
2738 
2740 {
2741  if (rHint.GetId() == SfxHintId::Dying)
2742  m_pFrameFormat = nullptr;
2743 }
2744 
2745 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2746  std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2747 {
2748  SwPaM aAttrPam( *m_pPam->GetPoint() );
2749  const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode;
2750  const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
2751  HTMLAttr* pAttr;
2752  SwContentNode* pCNd;
2753 
2754  std::vector<std::unique_ptr<HTMLAttr>> aFields;
2755 
2756  for( auto n = m_aSetAttrTab.size(); n; )
2757  {
2758  pAttr = m_aSetAttrTab[ --n ];
2759  sal_uInt16 nWhich = pAttr->m_pItem->Which();
2760 
2761  sal_uLong nEndParaIdx = pAttr->GetEndParagraphIdx();
2762  bool bSetAttr;
2763  if( bChkEnd )
2764  {
2765  // Set character attribute with end early on, so set them still in
2766  // the current paragraph (because of JavaScript and various "chats"(?)).
2767  // This shouldn't be done for attributes which are used for
2768  // the whole paragraph, because they could be from a paragraph style
2769  // which can't be set. Because the attributes are inserted with
2770  // SETATTR_DONTREPLACE, they should be able to be set later.
2771  bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() &&
2772  (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) ||
2773  ( !pAttr->IsLikePara() &&
2774  nEndParaIdx == rEndIdx.GetIndex() &&
2775  pAttr->GetEndContent() < nEndCnt &&
2776  (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2777  ( bBeforeTable &&
2778  nEndParaIdx == rEndIdx.GetIndex() &&
2779  !pAttr->GetEndContent() );
2780  }
2781  else
2782  {
2783  // Attributes in body nodes array section shouldn't be set if we are in a
2784  // special nodes array section, but vice versa it's possible.
2785  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2786  bSetAttr = nEndParaIdx < rEndIdx.GetIndex() ||
2787  rEndIdx.GetIndex() > nEndOfIcons ||
2788  nEndParaIdx <= nEndOfIcons;
2789  }
2790 
2791  if( bSetAttr )
2792  {
2793  // The attribute shouldn't be in the list of temporary paragraph
2794  // attributes, because then it would be deleted.
2795  while( !m_aParaAttrs.empty() )
2796  {
2797  OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2798  "SetAttr: Attribute must not yet be set" );
2799  m_aParaAttrs.pop_back();
2800  }
2801 
2802  // then set it
2803  m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2804 
2805  while( pAttr )
2806  {
2807  HTMLAttr *pPrev = pAttr->GetPrev();
2808  if( !pAttr->m_bValid )
2809  {
2810  // invalid attributes can be deleted
2811  delete pAttr;
2812  pAttr = pPrev;
2813  continue;
2814  }
2815 
2816  pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2817  if( !pCNd )
2818  {
2819  // because of the awful deleting of nodes an index can also
2820  // point to an end node :-(
2821  if ( (pAttr->GetStartParagraph() == pAttr->GetEndParagraph()) &&
2822  !isTXTATR_NOEND(nWhich) )
2823  {
2824  // when the end index also points to the node, we don't
2825  // need to set attributes anymore, except if it's a text attribute.
2826  delete pAttr;
2827  pAttr = pPrev;
2828  continue;
2829  }
2830  pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2831  if( pCNd )
2832  pAttr->m_nStartContent = 0;
2833  else
2834  {
2835  OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2836  delete pAttr;
2837  pAttr = pPrev;
2838  continue;
2839  }
2840  }
2841  aAttrPam.GetPoint()->nNode = pAttr->m_nStartPara;
2842 
2843  // because of the deleting of BRs the start index can also
2844  // point behind the end the text
2845  if( pAttr->m_nStartContent > pCNd->Len() )
2846  pAttr->m_nStartContent = pCNd->Len();
2847  aAttrPam.GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent );
2848 
2849  aAttrPam.SetMark();
2850  if ( (pAttr->GetStartParagraph() != pAttr->GetEndParagraph()) &&
2851  !isTXTATR_NOEND(nWhich) )
2852  {
2853  pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2854  if( !pCNd )
2855  {
2856  pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2857  if( pCNd )
2858  pAttr->m_nEndContent = pCNd->Len();
2859  else
2860  {
2861  OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2862  aAttrPam.DeleteMark();
2863  delete pAttr;
2864  pAttr = pPrev;
2865  continue;
2866  }
2867  }
2868 
2869  aAttrPam.GetPoint()->nNode = pAttr->m_nEndPara;
2870  }
2871  else if( pAttr->IsLikePara() )
2872  {
2873  pAttr->m_nEndContent = pCNd->Len();
2874  }
2875 
2876  // because of the deleting of BRs the start index can also
2877  // point behind the end the text
2878  if( pAttr->m_nEndContent > pCNd->Len() )
2879  pAttr->m_nEndContent = pCNd->Len();
2880 
2881  aAttrPam.GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent );
2882  if( bBeforeTable &&
2883  aAttrPam.GetPoint()->nNode.GetIndex() ==
2884  rEndIdx.GetIndex() )
2885  {
2886  // If we're before inserting a table and the attribute ends
2887  // in the current node, then we must end it in the previous
2888  // node or discard it, if it starts in that node.
2889  if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2890  !isTXTATR_NOEND(nWhich) )
2891  {
2892  if( aAttrPam.GetMark()->nNode.GetIndex() !=
2893  rEndIdx.GetIndex() )
2894  {
2895  OSL_ENSURE( !aAttrPam.GetPoint()->nContent.GetIndex(),
2896  "Content-Position before table not 0???" );
2897  aAttrPam.Move( fnMoveBackward );
2898  }
2899  else
2900  {
2901  aAttrPam.DeleteMark();
2902  delete pAttr;
2903  pAttr = pPrev;
2904  continue;
2905  }
2906  }
2907  }
2908 
2909  switch( nWhich )
2910  {
2911  case RES_FLTR_BOOKMARK: // insert bookmark
2912  {
2913  const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2914  IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2915  IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2916  if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2917  (*ppBkmk)->GetMarkStart() == *aAttrPam.GetPoint() )
2918  break; // do not generate duplicates on this position
2919  aAttrPam.DeleteMark();
2920  const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2921  aAttrPam,
2922  sName,
2925 
2926  // jump to bookmark
2927  if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2928  {
2929  m_bChkJumpMark = true;
2931  }
2932  }
2933  break;
2934  case RES_TXTATR_FIELD:
2935  case RES_TXTATR_ANNOTATION:
2936  case RES_TXTATR_INPUTFIELD:
2937  {
2938  SwFieldIds nFieldWhich =
2939  pPostIts
2940  ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2942  if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2943  SwFieldIds::Script == nFieldWhich) )
2944  {
2945  pPostIts->emplace_front( pAttr );
2946  }
2947  else
2948  {
2949  aFields.emplace_back( pAttr);
2950  }
2951  }
2952  aAttrPam.DeleteMark();
2953  pAttr = pPrev;
2954  continue;
2955 
2956  case RES_LR_SPACE:
2957  if( aAttrPam.GetPoint()->nNode.GetIndex() ==
2958  aAttrPam.GetMark()->nNode.GetIndex())
2959  {
2960  // because of numbering set this attribute directly at node
2961  pCNd->SetAttr( *pAttr->m_pItem );
2962  break;
2963  }
2964  OSL_ENSURE( false,
2965  "LRSpace set over multiple paragraphs!" );
2966  [[fallthrough]]; // (shouldn't reach this point anyway)
2967 
2968  // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2969  // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2970  // This is the right place in the future if the adapted fill attributes
2971  // may be handled more directly in HTML import to handle them.
2972  case RES_BACKGROUND:
2973  {
2974  const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2975  SfxItemSet aNewSet(m_xDoc->GetAttrPool(), svl::Items<XATTR_FILL_FIRST, XATTR_FILL_LAST>);
2976 
2978  m_xDoc->getIDocumentContentOperations().InsertItemSet(aAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2979  break;
2980  }
2981  default:
2982 
2983  // maybe jump to a bookmark
2984  if( RES_TXTATR_INETFMT == nWhich &&
2986  m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2987  {
2988  m_bChkJumpMark = true;
2990  }
2991 
2992  m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2993  }
2994  aAttrPam.DeleteMark();
2995 
2996  delete pAttr;
2997  pAttr = pPrev;
2998  }
2999  }
3000  }
3001 
3002  for( auto n = m_aMoveFlyFrames.size(); n; )
3003  {
3004  SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[--n]->GetFrameFormat();
3005  if (!pFrameFormat)
3006  {
3007  SAL_WARN("sw.html", "SwFrameFormat deleted during import");
3008  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
3009  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3010  continue;
3011  }
3012 
3013  const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
3014  OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
3015  "Only At-Para flys need special handling" );
3016  const SwPosition *pFlyPos = rAnchor.GetContentAnchor();
3017  sal_uLong nFlyParaIdx = pFlyPos->nNode.GetIndex();
3018  bool bMoveFly;
3019  if( bChkEnd )
3020  {
3021  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
3022  ( nFlyParaIdx == rEndIdx.GetIndex() &&
3023  m_aMoveFlyCnts[n] < nEndCnt );
3024  }
3025  else
3026  {
3027  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
3028  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
3029  rEndIdx.GetIndex() > nEndOfIcons ||
3030  nFlyParaIdx <= nEndOfIcons;
3031  }
3032  if( bMoveFly )
3033  {
3034  pFrameFormat->DelFrames();
3035  *aAttrPam.GetPoint() = *pFlyPos;
3036  aAttrPam.GetPoint()->nContent.Assign( aAttrPam.GetContentNode(),
3037  m_aMoveFlyCnts[n] );
3038  SwFormatAnchor aAnchor( rAnchor );
3039  aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
3040  aAnchor.SetAnchor( aAttrPam.GetPoint() );
3041  pFrameFormat->SetFormatAttr( aAnchor );
3042 
3043  const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
3044  if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
3045  {
3046  SwFormatHoriOrient aHoriOri( rHoriOri );
3047  aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
3048  pFrameFormat->SetFormatAttr( aHoriOri );
3049  }
3050  const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
3051  if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
3052  {
3053  SwFormatVertOrient aVertOri( rVertOri );
3054  aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
3055  pFrameFormat->SetFormatAttr( aVertOri );
3056  }
3057 
3058  pFrameFormat->MakeFrames();
3059  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
3060  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3061  }
3062  }
3063  for (auto & field : aFields)
3064  {
3065  pCNd = field->m_nStartPara.GetNode().GetContentNode();
3066  aAttrPam.GetPoint()->nNode = field->m_nStartPara;
3067  aAttrPam.GetPoint()->nContent.Assign( pCNd, field->m_nStartContent );
3068 
3069  if( bBeforeTable &&
3070  aAttrPam.GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() )
3071  {
3072  OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3073  OSL_ENSURE( !aAttrPam.GetPoint()->nContent.GetIndex(),
3074  "Content-Position before table not 0???" );
3075  // !!!
3076  aAttrPam.Move( fnMoveBackward );
3077  }
3078 
3079  m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *field->m_pItem );
3080 
3081  field.reset();
3082  }
3083  aFields.clear();
3084 }
3085 
3086 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3087 {
3088  // Font height and font colour as well as escape attributes may not be
3089  // combined. Therefore they're saved in a list and in it the last opened
3090  // attribute is at the beginning and count is always one. For all other
3091  // attributes count is just incremented.
3092  if( *ppAttr )
3093  {
3094  HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3095  pAttr->InsertNext( *ppAttr );
3096  (*ppAttr) = pAttr;
3097  }
3098  else
3099  (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3100 }
3101 
3102 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3103 {
3104  bool bRet = true;
3105 
3106  // The list header is saved in the attribute.
3107  HTMLAttr **ppHead = pAttr->m_ppHead;
3108 
3109  OSL_ENSURE( ppHead, "No list header attribute found!" );
3110 
3111  // save the current position as end position
3112  const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode;
3113  sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3114 
3115  // Is the last started or an earlier started attribute being ended?
3116  HTMLAttr *pLast = nullptr;
3117  if( ppHead && pAttr != *ppHead )
3118  {
3119  // The last started attribute isn't being ended
3120 
3121  // Then we look for attribute which was started immediately afterwards,
3122  // which has also not yet been ended (otherwise it would no longer be
3123  // in the list).
3124  pLast = *ppHead;
3125  while( pLast && pLast->GetNext() != pAttr )
3126  pLast = pLast->GetNext();
3127 
3128  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3129  }
3130 
3131  bool bMoveBack = false;
3132  sal_uInt16 nWhich = pAttr->m_pItem->Which();
3133  if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3134  *pEndIdx != pAttr->GetStartParagraph() )
3135  {
3136  // Then move back one position in the content!
3137  bMoveBack = m_pPam->Move( fnMoveBackward );
3138  nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3139  }
3140 
3141  // now end the attribute
3142  HTMLAttr *pNext = pAttr->GetNext();
3143 
3144  bool bInsert;
3145  sal_uInt16 nScriptItem = 0;
3146  bool bScript = false;
3147  // does it have a non-empty range?
3148  if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3149  RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3150  *pEndIdx != pAttr->GetStartParagraph() ||
3151  nEndCnt != pAttr->GetStartContent() )
3152  {
3153  bInsert = true;
3154  // We do some optimization for script dependent attributes here.
3155  if( *pEndIdx == pAttr->GetStartParagraph() )
3156  {
3157  lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3158  }
3159  }
3160  else
3161  {
3162  bInsert = false;
3163  }
3164 
3165  const SwTextNode *pTextNd = (bInsert && bScript) ?
3166  pAttr->GetStartParagraph().GetNode().GetTextNode() :
3167  nullptr;
3168 
3169  if (pTextNd)
3170  {
3171  const OUString& rText = pTextNd->GetText();
3172  sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3173  rText, pAttr->GetStartContent() );
3174  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3175  ->endOfScript( rText, pAttr->GetStartContent(), nScriptText );
3176  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3177  {
3178  if( nScriptItem == nScriptText )
3179  {
3180  HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd );
3181  pSetAttr->ClearPrev();
3182  if( pNext )
3183  pNext->InsertPrev( pSetAttr );
3184  else
3185  {
3186  if (pSetAttr->m_bInsAtStart)
3187  m_aSetAttrTab.push_front( pSetAttr );
3188  else
3189  m_aSetAttrTab.push_back( pSetAttr );
3190  }
3191  }
3192  pAttr->m_nStartContent = nScriptEnd;
3193  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3194  rText, nScriptEnd );
3195  nScriptEnd = g_pBreakIt->GetBreakIter()
3196  ->endOfScript( rText, nScriptEnd, nScriptText );
3197  }
3198  bInsert = nScriptItem == nScriptText;
3199  }
3200  if( bInsert )
3201  {
3202  pAttr->m_nEndPara = *pEndIdx;
3203  pAttr->m_nEndContent = nEndCnt;
3204  pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3205  RES_TXTATR_CHARFMT != nWhich;
3206 
3207  if( !pNext )
3208  {
3209  // No open attributes of that type exists any longer, so all
3210  // can be set. Except they depend on another attribute, then
3211  // they're appended there.
3212  if (pAttr->m_bInsAtStart)
3213  m_aSetAttrTab.push_front( pAttr );
3214  else
3215  m_aSetAttrTab.push_back( pAttr );
3216  }
3217  else
3218  {
3219  // There are other open attributes of that type,
3220  // therefore the setting must be postponed.
3221  // Hence the current attribute is added at the end
3222  // of the Prev-List of the successor.
3223  pNext->InsertPrev( pAttr );
3224  }
3225  }
3226  else
3227  {
3228  // Then don't insert, but delete. Because of the "faking" of styles
3229  // by hard attributing there can be also other empty attributes in the
3230  // Prev-List, which must be set anyway.
3231  HTMLAttr *pPrev = pAttr->GetPrev();
3232  bRet = false;
3233  delete pAttr;
3234 
3235  if( pPrev )
3236  {
3237  // The previous attributes must be set anyway.
3238  if( pNext )
3239  pNext->InsertPrev( pPrev );
3240  else
3241  {
3242  if (pPrev->m_bInsAtStart)
3243  m_aSetAttrTab.push_front( pPrev );
3244  else
3245  m_aSetAttrTab.push_back( pPrev );
3246  }
3247  }
3248 
3249  }
3250 
3251  // If the first attribute of the list was set, then the list header
3252  // must be corrected as well.
3253  if( pLast )
3254  pLast->m_pNext = pNext;
3255  else if( ppHead )
3256  *ppHead = pNext;
3257 
3258  if( bMoveBack )
3260 
3261  return bRet;
3262 }
3263 
3265 {
3266  // preliminary paragraph attributes are not allowed here, they could
3267  // be set here and then the pointers become invalid!
3268  OSL_ENSURE(m_aParaAttrs.empty(),
3269  "Danger: there are non-final paragraph attributes");
3270  m_aParaAttrs.clear();
3271 
3272  // The list header is saved in the attribute
3273  HTMLAttr **ppHead = pAttr->m_ppHead;
3274 
3275  OSL_ENSURE( ppHead, "no list header attribute found!" );
3276 
3277  // Is the last started or an earlier started attribute being removed?
3278  HTMLAttr *pLast = nullptr;
3279  if( ppHead && pAttr != *ppHead )
3280  {
3281  // The last started attribute isn't being ended
3282 
3283  // Then we look for attribute which was started immediately afterwards,
3284  // which has also not yet been ended (otherwise it would no longer be
3285  // in the list).
3286  pLast = *ppHead;
3287  while( pLast && pLast->GetNext() != pAttr )
3288  pLast = pLast->GetNext();
3289 
3290  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3291  }
3292 
3293  // now delete the attribute
3294  HTMLAttr *pNext = pAttr->GetNext();
3295  HTMLAttr *pPrev = pAttr->GetPrev();
3296  //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3297  std::shared_ptr<HTMLAttrTable> xKeepAlive(pAttr->m_xAttrTab);
3298  delete pAttr;
3299 
3300  if( pPrev )
3301  {
3302  // The previous attributes must be set anyway.
3303  if( pNext )
3304  pNext->InsertPrev( pPrev );
3305  else
3306  {
3307  if (pPrev->m_bInsAtStart)
3308  m_aSetAttrTab.push_front( pPrev );
3309  else
3310  m_aSetAttrTab.push_back( pPrev );
3311  }
3312  }
3313 
3314  // If the first attribute of the list was deleted, then the list header
3315  // must be corrected as well.
3316  if( pLast )
3317  pLast->m_pNext = pNext;
3318  else if( ppHead )
3319  *ppHead = pNext;
3320 }
3321 
3322 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3323 {
3324  // preliminary paragraph attributes are not allowed here, they could
3325  // be set here and then the pointers become invalid!
3326  OSL_ENSURE(m_aParaAttrs.empty(),
3327  "Danger: there are non-final paragraph attributes");
3328  m_aParaAttrs.clear();
3329 
3330  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3331  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3332 
3333  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3334  {
3335  *pSaveAttributes = *pHTMLAttributes;
3336 
3337  HTMLAttr *pAttr = *pSaveAttributes;
3338  while (pAttr)
3339  {
3340  pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3341  pAttr = pAttr->GetNext();
3342  }
3343 
3344  *pHTMLAttributes = nullptr;
3345  }
3346 }
3347 
3348 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3349  bool bMoveEndBack )
3350 {
3351  // preliminary paragraph attributes are not allowed here, they could
3352  // be set here and then the pointers become invalid!
3353  OSL_ENSURE(m_aParaAttrs.empty(),
3354  "Danger: there are non-final paragraph attributes");
3355  m_aParaAttrs.clear();
3356 
3357  const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode;
3358  SwNodeIndex nEndIdx( nSttIdx );
3359 
3360  // close all still open attributes and re-open them after the table
3361  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3362  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3363  bool bSetAttr = true;
3364  const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex();
3365  sal_Int32 nEndCnt = nSttCnt;
3366 
3367  if( bMoveEndBack )
3368  {
3369  sal_uLong nOldEnd = nEndIdx.GetIndex();
3370  sal_uLong nTmpIdx;
3371  if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3372  ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3373  {
3374  nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3375  }
3376  SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3377 
3378  // Don't set attributes, when the PaM was moved outside of the content area.
3379  bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3380 
3381  nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3382  }
3383  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3384  {
3385  HTMLAttr *pAttr = *pHTMLAttributes;
3386  *pSaveAttributes = nullptr;
3387  while( pAttr )
3388  {
3389  HTMLAttr *pNext = pAttr->GetNext();
3390  HTMLAttr *pPrev = pAttr->GetPrev();
3391 
3392  if( bSetAttr &&
3393  ( pAttr->GetStartParagraphIdx() < nEndIdx.GetIndex() ||
3394  (pAttr->GetStartParagraph() == nEndIdx &&
3395  pAttr->GetStartContent() != nEndCnt) ) )
3396  {
3397  // The attribute must be set before the list. We need the
3398  // original and therefore we clone it, because pointer to the
3399  // attribute exist in the other contexts. The Next-List is lost
3400  // in doing so, but the Previous-List is preserved.
3401  HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt );
3402 
3403  if( pNext )
3404  pNext->InsertPrev( pSetAttr );
3405  else
3406  {
3407  if (pSetAttr->m_bInsAtStart)
3408  m_aSetAttrTab.push_front( pSetAttr );
3409  else
3410  m_aSetAttrTab.push_back( pSetAttr );
3411  }
3412  }
3413  else if( pPrev )
3414  {
3415  // If the attribute doesn't need to be set before the table, then
3416  // the previous attributes must still be set.
3417  if( pNext )
3418  pNext->InsertPrev( pPrev );
3419  else
3420  {
3421  if (pPrev->m_bInsAtStart)
3422  m_aSetAttrTab.push_front( pPrev );
3423  else
3424  m_aSetAttrTab.push_back( pPrev );
3425  }
3426  }
3427 
3428  // set the start of the attribute anew and break link
3429  pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab);
3430 
3431  if (*pSaveAttributes)
3432  {
3433  HTMLAttr *pSAttr = *pSaveAttributes;
3434  while( pSAttr->GetNext() )
3435  pSAttr = pSAttr->GetNext();
3436  pSAttr->InsertNext( pAttr );
3437  }
3438  else
3439  *pSaveAttributes = pAttr;
3440 
3441  pAttr = pNext;
3442  }
3443 
3444  *pHTMLAttributes = nullptr;
3445  }
3446 }
3447 
3448 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3449 {
3450  // preliminary paragraph attributes are not allowed here, they could
3451  // be set here and then the pointers become invalid!
3452  OSL_ENSURE(m_aParaAttrs.empty(),
3453  "Danger: there are non-final paragraph attributes");
3454  m_aParaAttrs.clear();
3455 
3456  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3457  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3458 
3459  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3460  {
3461  OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3462 
3463  *pHTMLAttributes = *pSaveAttributes;
3464 
3465  HTMLAttr *pAttr = *pHTMLAttributes;
3466  while (pAttr)
3467  {
3468  OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3469  "Previous attribute has still a header" );
3470  pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3471  pAttr = pAttr->GetNext();
3472  }
3473 
3474  *pSaveAttributes = nullptr;
3475  }
3476 }
3477 
3478 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3479 {
3480  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3481  if (bInsAtStart)
3482  m_aSetAttrTab.push_front( pTmp );
3483  else
3484  m_aSetAttrTab.push_back( pTmp );
3485 }
3486 
3487 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3488 {
3489  while( !rAttrs.empty() )
3490  {
3491  std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3492  InsertAttr( pAttr->GetItem(), false );
3493  rAttrs.pop_front();
3494  }
3495 }
3496 
3498 {
3499  OUString aId, aStyle, aLang, aDir;
3500  OUString aClass;
3501 
3502  const HTMLOptions& rHTMLOptions = GetOptions();
3503  for (size_t i = rHTMLOptions.size(); i; )
3504  {
3505  const HTMLOption& rOption = rHTMLOptions[--i];
3506  switch( rOption.GetToken() )
3507  {
3508  case HtmlOptionId::ID:
3509  aId = rOption.GetString();
3510  break;
3511  case HtmlOptionId::STYLE:
3512  aStyle = rOption.GetString();
3513  break;
3514  case HtmlOptionId::CLASS:
3515  aClass = rOption.GetString();
3516  break;
3517  case HtmlOptionId::LANG:
3518  aLang = rOption.GetString();
3519  break;
3520  case HtmlOptionId::DIR:
3521  aDir = rOption.GetString();
3522  break;
3523  default: break;
3524  }
3525  }
3526 
3527  // create a new context
3528  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3529 
3530  // parse styles
3531  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3532  {
3533  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3534  SvxCSS1PropertyInfo aPropInfo;
3535 
3536  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3537  {
3538  if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3539  !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3540  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3541  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3542  }
3543  }
3544 
3545  // save the context
3546  PushContext(xCntxt);
3547 }
3548 
3550  HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3551  HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3552  HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3553 {
3554  OUString aId, aStyle, aClass, aLang, aDir;
3555 
3556  const HTMLOptions& rHTMLOptions = GetOptions();
3557  for (size_t i = rHTMLOptions.size(); i; )
3558  {
3559  const HTMLOption& rOption = rHTMLOptions[--i];
3560  switch( rOption.GetToken() )
3561  {
3562  case HtmlOptionId::ID:
3563  aId = rOption.GetString();
3564  break;
3565  case HtmlOptionId::STYLE:
3566  aStyle = rOption.GetString();
3567  break;
3568  case HtmlOptionId::CLASS:
3569  aClass = rOption.GetString();
3570  break;
3571  case HtmlOptionId::LANG:
3572  aLang = rOption.GetString();
3573  break;
3574  case HtmlOptionId::DIR:
3575  aDir = rOption.GetString();
3576  break;
3577  default: break;
3578  }
3579  }
3580 
3581  // create a new context
3582  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3583 
3584  // parse styles
3585  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3586  {
3587  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3588  SvxCSS1PropertyInfo aPropInfo;
3589 
3590  aItemSet.Put( rItem );
3591  if( pItem2 )
3592  aItemSet.Put( *pItem2 );
3593  if( pItem3 )
3594  aItemSet.Put( *pItem3 );
3595 
3596  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3597  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3598 
3599  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3600  }
3601  else
3602  {
3603  InsertAttr( ppAttr ,rItem, xCntxt.get() );
3604  if( pItem2 )
3605  {
3606  OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3607  InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3608  }
3609  if( pItem3 )
3610  {
3611  OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3612  InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3613  }
3614  }
3615 
3616  // save the context
3617  PushContext(xCntxt);
3618 }
3619 
3621 {
3622  // fetch context
3623  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3624  if (xCntxt)
3625  {
3626  // and maybe end the attributes
3627  EndContext(xCntxt.get());
3628  }
3629 }
3630 
3632 {
3633  OUString aId, aStyle, aClass, aLang, aDir;
3634  sal_uInt16 nSize = 3;
3635 
3636  const HTMLOptions& rHTMLOptions = GetOptions();
3637  for (size_t i = rHTMLOptions.size(); i; )
3638  {
3639  const HTMLOption& rOption = rHTMLOptions[--i];
3640  switch( rOption.GetToken() )
3641  {
3642  case HtmlOptionId::SIZE:
3643  nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
3644  break;
3645  case HtmlOptionId::ID:
3646  aId = rOption.GetString();
3647  break;
3648  case HtmlOptionId::STYLE:
3649  aStyle = rOption.GetString();
3650  break;
3651  case HtmlOptionId::CLASS:
3652  aClass = rOption.GetString();
3653  break;
3654  case HtmlOptionId::LANG:
3655  aLang = rOption.GetString();
3656  break;
3657  case HtmlOptionId::DIR:
3658  aDir = rOption.GetString();
3659  break;
3660  default: break;
3661  }
3662  }
3663 
3664  if( nSize < 1 )
3665  nSize = 1;
3666 
3667  if( nSize > 7 )
3668  nSize = 7;
3669 
3670  // create a new context
3671  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3672 
3673  // parse styles
3674  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3675  {
3676  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3677  SvxCSS1PropertyInfo aPropInfo;
3678 
3679  //CJK has different defaults
3680  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3681  aItemSet.Put( aFontHeight );
3682  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3683  aItemSet.Put( aFontHeightCJK );
3684  //Complex type can contain so many types of letters,
3685  //that it's not really worthy to bother, IMO.
3686  //Still, I have set a default.
3687  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3688  aItemSet.Put( aFontHeightCTL );
3689 
3690  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3691  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3692 
3693  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3694  }
3695  else
3696  {
3697  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3698  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3699  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3700  InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3701  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3702  InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3703  }
3704 
3705  // save the context
3706  PushContext(xCntxt);
3707 
3708  // save the font size
3709  m_aBaseFontStack.push_back( nSize );
3710 }
3711 
3713 {
3714  EndTag( HtmlTokenId::BASEFONT_ON );
3715 
3716  // avoid stack underflow in tables
3717  if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3718  m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3719 }
3720 
3722 {
3723  sal_uInt16 nBaseSize =
3726  : 3 );
3727  sal_uInt16 nFontSize =
3728  ( m_aFontStack.size() > m_nFontStMin
3729  ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3730  : nBaseSize );
3731 
3732  OUString aFace, aId, aStyle, aClass, aLang, aDir;
3733  Color aColor;
3734  sal_uLong nFontHeight = 0; // actual font height to set
3735  sal_uInt16 nSize = 0; // font height in Netscape notation (1-7)
3736  bool bColor = false;
3737 
3738  const HTMLOptions& rHTMLOptions = GetOptions();
3739  for (size_t i = rHTMLOptions.size(); i; )
3740  {
3741  const HTMLOption& rOption = rHTMLOptions[--i];
3742  switch( rOption.GetToken() )
3743  {
3744  case HtmlOptionId::SIZE:
3745  if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3746  {
3747  sal_Int32 nSSize;
3748  if( '+' == rOption.GetString()[0] ||
3749  '-' == rOption.GetString()[0] )
3750  nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3751  else
3752  nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3753 
3754  if( nSSize < 1 )
3755  nSSize = 1;
3756  else if( nSSize > 7 )
3757  nSSize = 7;
3758 
3759  nSize = o3tl::narrowing<sal_uInt16>(nSSize);
3760  nFontHeight = m_aFontHeights[nSize-1];
3761  }
3762  break;
3763  case HtmlOptionId::COLOR:
3764  if( HtmlTokenId::FONT_ON==nToken )
3765  {
3766  rOption.GetColor( aColor );
3767  bColor = true;
3768  }
3769  break;
3770  case HtmlOptionId::FACE:
3771  if( HtmlTokenId::FONT_ON==nToken )
3772  aFace = rOption.GetString();
3773  break;
3774  case HtmlOptionId::ID:
3775  aId = rOption.GetString();
3776  break;
3777  case HtmlOptionId::STYLE:
3778  aStyle = rOption.GetString();
3779  break;
3780  case HtmlOptionId::CLASS:
3781  aClass = rOption.GetString();
3782  break;
3783  case HtmlOptionId::LANG:
3784  aLang = rOption.GetString();
3785  break;
3786  case HtmlOptionId::DIR:
3787  aDir = rOption.GetString();
3788  break;
3789  default: break;
3790  }
3791  }
3792 
3793  if( HtmlTokenId::FONT_ON != nToken )
3794  {
3795  // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3796 
3797  // In headings the current heading sets the font height
3798  // and not BASEFONT.
3799  const SwFormatColl *pColl = GetCurrFormatColl();
3800  sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3801  if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3802  nPoolId<=RES_POOLCOLL_HEADLINE6 )
3803  {
3804  // If the font height in the heading wasn't changed yet,
3805  // then take the one from the style.
3806  if( m_nFontStHeadStart==m_aFontStack.size() )
3807  nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3808  }
3809  else
3810  nPoolId = 0;
3811 
3812  if( HtmlTokenId::BIGPRINT_ON == nToken )
3813  nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3814  else
3815  nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3816 
3817  // If possible in headlines we fetch the new font height
3818  // from the style.
3819  if( nPoolId && nSize>=1 && nSize <=6 )
3820  nFontHeight =
3821  m_pCSS1Parser->GetTextCollFromPool(
3822  RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3823  else
3824  nFontHeight = m_aFontHeights[nSize-1];
3825  }
3826 
3827  OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3828 
3829  OUString aFontName;
3830  const OUString aStyleName;
3831  FontFamily eFamily = FAMILY_DONTKNOW; // family and pitch,
3832  FontPitch ePitch = PITCH_DONTKNOW; // if not found
3833  rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3834 
3835  if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3836  {
3837  const FontList *pFList = nullptr;
3838  SwDocShell *pDocSh = m_xDoc->GetDocShell();
3839  if( pDocSh )
3840  {
3841  const SvxFontListItem *pFListItem =
3842  static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3843  if( pFListItem )
3844  pFList = pFListItem->GetFontList();
3845  }
3846 
3847  bool bFound = false;
3848  sal_Int32 nStrPos = 0;
3849  while( nStrPos!= -1 )
3850  {
3851  OUString aFName = aFace.getToken( 0, ',', nStrPos );
3852  aFName = comphelper::string::strip(aFName, ' ');
3853  if( !aFName.isEmpty() )
3854  {
3855  if( !bFound && pFList )
3856  {
3857  sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3858  if( nullptr != hFont )
3859  {
3860  const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3861  if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3862  {
3863  bFound = true;
3864  if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3865  eEnc = RTL_TEXTENCODING_SYMBOL;
3866  }
3867  }
3868  }
3869  if( !aFontName.isEmpty() )
3870  aFontName += ";";
3871  aFontName += aFName;
3872  }
3873  }
3874  }
3875 
3876  // create a new context
3877  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3878 
3879  // parse styles
3880  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3881  {
3882  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3883  SvxCSS1PropertyInfo aPropInfo;
3884 
3885  if( nFontHeight )
3886  {
3887  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3888  aItemSet.Put( aFontHeight );
3889  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3890  aItemSet.Put( aFontHeightCJK );
3891  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3892  aItemSet.Put( aFontHeightCTL );
3893  }
3894  if( bColor )
3895  aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3896  if( !aFontName.isEmpty() )
3897  {
3898  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3899  aItemSet.Put( aFont );
3900  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3901  aItemSet.Put( aFontCJK );
3902  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3903  aItemSet.Put( aFontCTL );
3904  }
3905 
3906  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3907  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3908 
3909  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3910  }
3911  else
3912  {
3913  if( nFontHeight )
3914  {
3915  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3916  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3917  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3918  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3919  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3920  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3921  }
3922  if( bColor )
3923  InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3924  if( !aFontName.isEmpty() )
3925  {
3926  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3927  InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3928  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3929  InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3930  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3931  InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3932  }
3933  }
3934 
3935  // save the context
3936  PushContext(xCntxt);
3937 
3938  m_aFontStack.push_back( nSize );
3939 }
3940 
3942 {
3943  EndTag( nToken );
3944 
3945  // avoid stack underflow in tables
3946  if( m_aFontStack.size() > m_nFontStMin )
3947  m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3948 }
3949 
3951 {
3952  if( m_pPam->GetPoint()->nContent.GetIndex() )
3954  else
3955  AddParSpace();
3956 
3957  m_eParaAdjust = SvxAdjust::End;
3958  OUString aId, aStyle, aClass, aLang, aDir;
3959 
3960  const HTMLOptions& rHTMLOptions = GetOptions();
3961  for (size_t i = rHTMLOptions.size(); i; )
3962  {
3963  const HTMLOption& rOption = rHTMLOptions[--i];
3964  switch( rOption.GetToken() )
3965  {
3966  case HtmlOptionId::ID:
3967  aId = rOption.GetString();
3968  break;
3969  case HtmlOptionId::ALIGN:
3970  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3971  break;
3972  case HtmlOptionId::STYLE:
3973  aStyle = rOption.GetString();
3974  break;
3975  case HtmlOptionId::CLASS:
3976  aClass = rOption.GetString();
3977  break;
3978  case HtmlOptionId::LANG:
3979  aLang = rOption.GetString();
3980  break;
3981  case HtmlOptionId::DIR:
3982  aDir = rOption.GetString();
3983  break;
3984  default: break;
3985  }
3986  }
3987 
3988  // create a new context
3989  std::unique_ptr<HTMLAttrContext> xCntxt(
3990  !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3991  RES_POOLCOLL_TEXT, aClass )
3992  : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3993 
3994  // parse styles (Don't consider class. This is only possible as long as none of
3995  // the CSS1 properties of the class must be formatted hard!!!)
3996  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
3997  {
3998  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3999  SvxCSS1PropertyInfo aPropInfo;
4000 
4001  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4002  {
4003  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4004  "Class is not considered" );
4005  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4006  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4007  }
4008  }
4009 
4010  if( SvxAdjust::End != m_eParaAdjust )
4011  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4012 
4013  // and push on stack
4014  PushContext( xCntxt );
4015 
4016  // set the current style or its attributes
4017  SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
4018 
4019  // progress bar
4020  ShowStatline();
4021 
4022  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now an open paragraph element will be lost." );
4023  m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
4024 }
4025 
4026 void SwHTMLParser::EndPara( bool bReal )
4027 {
4028  if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
4029  {
4030 #if OSL_DEBUG_LEVEL > 0
4031  const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule();
4032  OSL_ENSURE( pNumRule, "Where is the NumRule" );
4033 #endif
4034  }
4035 
4036  // Netscape skips empty paragraphs, we do the same.
4037  if( bReal )
4038  {
4039  if( m_pPam->GetPoint()->nContent.GetIndex() )
4041  else
4042  AddParSpace();
4043  }
4044 
4045  // If a DD or DT was open, it's an implied definition list,
4046  // which must be closed now.
4047  if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
4049  {
4050  m_nDefListDeep--;
4051  }
4052 
4053  // Pop the context of the stack. It can also be from an
4054  // implied opened definition list.
4055  std::unique_ptr<HTMLAttrContext> xCntxt(
4056  PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
4057 
4058  // close attribute
4059  if (xCntxt)
4060  {
4061  EndContext(xCntxt.get());
4062  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4063  xCntxt.reset();
4064  }
4065 
4066  // reset the existing style
4067  if( bReal )
4068  SetTextCollAttrs();
4069 
4070  m_nOpenParaToken = HtmlTokenId::NONE;
4071 }
4072 
4074 {
4075  m_eParaAdjust = SvxAdjust::End;
4076 
4077  OUString aId, aStyle, aClass, aLang, aDir;
4078 
4079  const HTMLOptions& rHTMLOptions = GetOptions();
4080  for (size_t i = rHTMLOptions.size(); i; )
4081  {
4082  const HTMLOption& rOption = rHTMLOptions[--i];
4083  switch( rOption.GetToken() )
4084  {
4085  case HtmlOptionId::ID:
4086  aId = rOption.GetString();
4087  break;
4088  case HtmlOptionId::ALIGN:
4089  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4090  break;
4091  case HtmlOptionId::STYLE:
4092  aStyle = rOption.GetString();
4093  break;
4094  case HtmlOptionId::CLASS:
4095  aClass = rOption.GetString();
4096  break;
4097  case HtmlOptionId::LANG:
4098  aLang = rOption.GetString();
4099  break;
4100  case HtmlOptionId::DIR:
4101  aDir = rOption.GetString();
4102  break;
4103  default: break;
4104  }
4105  }
4106 
4107  // open a new paragraph
4108  if( m_pPam->GetPoint()->nContent.GetIndex() )
4110  else
4111  AddParSpace();
4112 
4113  // search for the matching style
4114  sal_uInt16 nTextColl;
4115  switch( nToken )
4116  {
4117  case HtmlTokenId::HEAD1_ON: nTextColl = RES_POOLCOLL_HEADLINE1; break;
4118  case HtmlTokenId::HEAD2_ON: nTextColl = RES_POOLCOLL_HEADLINE2; break;
4119  case HtmlTokenId::HEAD3_ON: nTextColl = RES_POOLCOLL_HEADLINE3; break;
4120  case HtmlTokenId::HEAD4_ON: nTextColl = RES_POOLCOLL_HEADLINE4; break;
4121  case HtmlTokenId::HEAD5_ON: nTextColl = RES_POOLCOLL_HEADLINE5; break;
4122  case HtmlTokenId::HEAD6_ON: nTextColl = RES_POOLCOLL_HEADLINE6; break;
4123  default: nTextColl = RES_POOLCOLL_STANDARD; break;
4124  }
4125 
4126  // create the context
4127  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4128 
4129  // parse styles (regarding class see also NewPara)
4130  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4131  {
4132  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4133  SvxCSS1PropertyInfo aPropInfo;
4134 
4135  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4136  {
4137  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4138  "Class is not considered" );
4139  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4140  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4141  }
4142  }
4143 
4144  if( SvxAdjust::End != m_eParaAdjust )
4145  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4146 
4147  // and push on stack
4148  PushContext(xCntxt);
4149 
4150  // set the current style or its attributes
4151  SetTextCollAttrs(m_aContexts.back().get());
4152 
4154 
4155  // progress bar
4156  ShowStatline();
4157 }
4158 
4160 {
4161  // open a new paragraph
4162  if( m_pPam->GetPoint()->nContent.GetIndex() )
4164  else
4165  AddParSpace();
4166 
4167  // search context matching the token and fetch it from stack
4168  std::unique_ptr<HTMLAttrContext> xCntxt;
4169  auto nPos = m_aContexts.size();
4170  while( !xCntxt && nPos>m_nContextStMin )
4171  {
4172  switch( m_aContexts[--nPos]->GetToken() )
4173  {
4174  case HtmlTokenId::HEAD1_ON:
4175  case HtmlTokenId::HEAD2_ON:
4176  case HtmlTokenId::HEAD3_ON:
4177  case HtmlTokenId::HEAD4_ON:
4178  case HtmlTokenId::HEAD5_ON:
4179  case HtmlTokenId::HEAD6_ON:
4180  xCntxt = std::move(m_aContexts[nPos]);
4181  m_aContexts.erase( m_aContexts.begin() + nPos );
4182  break;
4183  default: break;
4184  }
4185  }
4186 
4187  // and now end attributes
4188  if (xCntxt)
4189  {
4190  EndContext(xCntxt.get());
4191  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4192  xCntxt.reset();
4193  }
4194 
4195  // reset existing style
4196  SetTextCollAttrs();
4197 
4199 }
4200 
4201 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4202 {
4203  OUString aId, aStyle, aClass, aLang, aDir;
4204 
4205  const HTMLOptions& rHTMLOptions = GetOptions();
4206  for (size_t i = rHTMLOptions.size(); i; )
4207  {
4208  const HTMLOption& rOption = rHTMLOptions[--i];
4209  switch( rOption.GetToken() )
4210  {
4211  case HtmlOptionId::ID:
4212  aId = rOption.GetString();
4213  break;
4214  case HtmlOptionId::STYLE:
4215  aStyle = rOption.GetString();
4216  break;
4217  case HtmlOptionId::CLASS:
4218  aClass = rOption.GetString();
4219  break;
4220  case HtmlOptionId::LANG:
4221  aLang = rOption.GetString();
4222  break;
4223  case HtmlOptionId::DIR:
4224  aDir = rOption.GetString();
4225  break;
4226  default: break;
4227  }
4228  }
4229 
4230  // open a new paragraph
4232  switch( nToken )
4233  {
4234  case HtmlTokenId::LISTING_ON:
4235  case HtmlTokenId::XMP_ON:
4236  // These both tags will be mapped to the PRE style. For the case that a
4237  // a CLASS exists we will delete it so that we don't get the CLASS of
4238  // the PRE style.
4239  aClass.clear();
4240  [[fallthrough]];
4241  case HtmlTokenId::BLOCKQUOTE_ON:
4242  case HtmlTokenId::BLOCKQUOTE30_ON:
4243  case HtmlTokenId::PREFORMTXT_ON:
4244  eMode = AM_SPACE;
4245  break;
4246  case HtmlTokenId::ADDRESS_ON:
4247  eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4248  break;
4249  case HtmlTokenId::DT_ON:
4250  case HtmlTokenId::DD_ON:
4251  eMode = AM_SOFTNOSPACE;
4252  break;
4253  default:
4254  OSL_ENSURE( false, "unknown style" );
4255  break;
4256  }
4257  if( m_pPam->GetPoint()->nContent.GetIndex() )
4258  AppendTextNode( eMode );
4259  else if( AM_SPACE==eMode )
4260  AddParSpace();
4261 
4262  // ... and save in a context
4263  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4264 
4265  // parse styles (regarding class see also NewPara)
4266  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4267  {
4268  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4269  SvxCSS1PropertyInfo aPropInfo;
4270 
4271  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4272  {
4273  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4274  "Class is not considered" );
4275  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4276  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4277  }
4278  }
4279 
4280  PushContext(xCntxt);
4281 
4282  // set the new style
4283  SetTextCollAttrs(m_aContexts.back().get());
4284 
4285  // update progress bar
4286  ShowStatline();
4287 }
4288 
4290 {
4292  switch( getOnToken(nToken) )
4293  {
4294  case HtmlTokenId::BLOCKQUOTE_ON:
4295  case HtmlTokenId::BLOCKQUOTE30_ON:
4296  case HtmlTokenId::PREFORMTXT_ON:
4297  case HtmlTokenId::LISTING_ON:
4298  case HtmlTokenId::XMP_ON:
4299  eMode = AM_SPACE;
4300  break;
4301  case HtmlTokenId::ADDRESS_ON:
4302  case HtmlTokenId::DT_ON:
4303  case HtmlTokenId::DD_ON:
4304  eMode = AM_SOFTNOSPACE;
4305  break;
4306  default:
4307  OSL_ENSURE( false, "unknown style" );
4308  break;
4309  }
4310  if( m_pPam->GetPoint()->nContent.GetIndex() )
4311  AppendTextNode( eMode );
4312  else if( AM_SPACE==eMode )
4313  AddParSpace();
4314 
4315  // pop current context of stack
4316  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4317 
4318  // and now end attributes
4319  if (xCntxt)
4320  {
4321  EndContext(xCntxt.get());
4322  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4323  xCntxt.reset();
4324  }
4325 
4326  // reset existing style
4327  SetTextCollAttrs();
4328 }
4329 
4331 {
4332  OUString aId, aStyle, aClass, aLang, aDir;
4333 
4334  const HTMLOptions& rHTMLOptions = GetOptions();
4335  for (size_t i = rHTMLOptions.size(); i; )
4336  {
4337  const HTMLOption& rOption = rHTMLOptions[--i];
4338  switch( rOption.GetToken() )
4339  {
4340  case HtmlOptionId::ID:
4341  aId = rOption.GetString();
4342  break;
4343  case HtmlOptionId::STYLE:
4344  aStyle = rOption.GetString();
4345  break;
4346  case HtmlOptionId::CLASS:
4347  aClass = rOption.GetString();
4348  break;
4349  case HtmlOptionId::LANG:
4350  aLang = rOption.GetString();
4351  break;
4352  case HtmlOptionId::DIR:
4353  aDir = rOption.GetString();
4354  break;
4355  default: break;
4356  }
4357  }
4358 
4359  // open a new paragraph
4360  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4361  if( m_pPam->GetPoint()->nContent.GetIndex() )
4362  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4363  else if( bSpace )
4364  AddParSpace();
4365 
4366  // one level more
4367  m_nDefListDeep++;
4368 
4369  bool bInDD = false, bNotInDD = false;
4370  auto nPos = m_aContexts.size();
4371  while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4372  {
4373  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4374  switch( nCntxtToken )
4375  {
4376  case HtmlTokenId::DEFLIST_ON:
4377  case HtmlTokenId::DIRLIST_ON:
4378  case HtmlTokenId::MENULIST_ON:
4379  case HtmlTokenId::ORDERLIST_ON:
4380  case HtmlTokenId::UNORDERLIST_ON:
4381  bNotInDD = true;
4382  break;
4383  case HtmlTokenId::DD_ON:
4384  bInDD = true;
4385  break;
4386  default: break;
4387  }
4388  }
4389 
4390  // ... and save in a context
4391  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4392 
4393  // in it save also the margins
4394  sal_uInt16 nLeft=0, nRight=0;
4395  short nIndent=0;
4396  GetMarginsFromContext( nLeft, nRight, nIndent );
4397 
4398  // The indentation, which already results from a DL, correlates with a DT
4399  // on the current level and this correlates to a DD from the previous level.
4400  // For a level >=2 we must add DD distance.
4401  if( !bInDD && m_nDefListDeep > 1 )
4402  {
4403 
4404  // and the one of the DT-style of the current level
4405  SvxLRSpaceItem rLRSpace =
4406  m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4407  ->GetLRSpace();
4408  nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft());
4409  }
4410 
4411  xCntxt->SetMargins( nLeft, nRight, nIndent );
4412 
4413  // parse styles
4414  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4415  {
4416  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4417  SvxCSS1PropertyInfo aPropInfo;
4418 
4419  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4420  {
4421  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4422  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4423  }
4424  }
4425 
4426  PushContext(xCntxt);
4427 
4428  // set the attributes of the new style
4429  if( m_nDefListDeep > 1 )
4430  SetTextCollAttrs(m_aContexts.back().get());
4431 }
4432 
4434 {
4435  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4436  if( m_pPam->GetPoint()->nContent.GetIndex() )
4437  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4438  else if( bSpace )
4439  AddParSpace();
4440 
4441  // one level less
4442  if( m_nDefListDeep > 0 )
4443  m_nDefListDeep--;
4444 
4445  // pop current context of stack
4446  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4447 
4448  // and now end attributes
4449  if (xCntxt)
4450  {
4451  EndContext(xCntxt.get());
4452  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4453  xCntxt.reset();
4454  }
4455 
4456  // and set style
4457  SetTextCollAttrs();
4458 }
4459 
4461 {
4462  // determine if the DD/DT exist in a DL
4463  bool bInDefList = false, bNotInDefList = false;
4464  auto nPos = m_aContexts.size();
4465  while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4466  {
4467  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4468  switch( nCntxtToken )
4469  {
4470  case HtmlTokenId::DEFLIST_ON:
4471  bInDefList = true;
4472  break;
4473  case HtmlTokenId::DIRLIST_ON:
4474  case HtmlTokenId::MENULIST_ON:
4475  case HtmlTokenId::ORDERLIST_ON:
4476  case HtmlTokenId::UNORDERLIST_ON:
4477  bNotInDefList = true;
4478  break;
4479  default: break;
4480  }
4481  }
4482 
4483  // if not, then implicitly open a new DL
4484  if( !bInDefList )
4485  {
4486  m_nDefListDeep++;
4487  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4488  "Now an open paragraph element will be lost." );
4489  m_nOpenParaToken = nToken;
4490  }
4491 
4492  NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4493  : RES_POOLCOLL_HTML_DT) );
4494 }
4495 
4497 {
4498  // open a new paragraph
4499  if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() )
4501 
4502  // search context matching the token and fetch it from stack
4503  nToken = getOnToken(nToken);
4504  std::unique_ptr<HTMLAttrContext> xCntxt;
4505  auto nPos = m_aContexts.size();
4506  while( !xCntxt && nPos>m_nContextStMin )
4507  {
4508  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4509  switch( nCntxtToken )
4510  {
4511  case HtmlTokenId::DD_ON:
4512  case HtmlTokenId::DT_ON:
4513  if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken )
4514  {
4515  xCntxt = std::move(m_aContexts[nPos]);
4516  m_aContexts.erase( m_aContexts.begin() + nPos );
4517  }
4518  break;
4519  case HtmlTokenId::DEFLIST_ON:
4520  // don't look at DD/DT outside the current DefList
4521  case HtmlTokenId::DIRLIST_ON:
4522  case HtmlTokenId::MENULIST_ON:
4523  case HtmlTokenId::ORDERLIST_ON:
4524  case HtmlTokenId::UNORDERLIST_ON:
4525  // and also not outside another list
4527  break;
4528  default: break;
4529  }
4530  }
4531 
4532  // and now end attributes
4533  if (xCntxt)
4534  {
4535  EndContext(xCntxt.get());
4536  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4537  }
4538 }
4539 
4549 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4550  bool bSurroundOnly ) const
4551 {
4552  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
4553 
4554  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
4555 
4556  bool bFound = false;
4557  for ( size_t i=0; i<rFrameFormatTable.size(); i++ )
4558  {
4559  const SwFrameFormat *const pFormat = rFrameFormatTable[i];
4560  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4561  // A frame was found, when
4562  // - it is paragraph-bound, and
4563  // - is anchored in current paragraph, and
4564  // - every paragraph-bound frame counts, or
4565  // - (only frames without wrapping count and) the frame doesn't have
4566  // a wrapping
4567  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
4568  if (pAPos &&
4569  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4570  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4571  pAPos->nNode == rNodeIdx )
4572  {
4573  if( !(bNoSurroundOnly || bSurroundOnly) )
4574  {
4575  bFound = true;
4576  break;
4577  }
4578  else
4579  {
4580  // When looking for frames with wrapping, also disregard
4581  // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4582  // and you don't want to evade those when positioning.
4583  css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4584  if( bNoSurroundOnly )
4585  {
4586  if( css::text::WrapTextMode_NONE==eSurround )
4587  {
4588  bFound = true;
4589  break;
4590  }
4591  }
4592  if( bSurroundOnly )
4593  {
4594  if( css::text::WrapTextMode_NONE==eSurround )
4595  {
4596  bFound = false;
4597  break;
4598  }
4599  else if( css::text::WrapTextMode_THROUGH!=eSurround )
4600  {
4601  bFound = true;
4602  // Continue searching: It's possible that some without
4603  // wrapping will follow...
4604  }
4605  }
4606  }
4607  }
4608  }
4609 
4610  return bFound;
4611 }
4612 
4613 // the special methods for inserting of objects
4614 
4616 {
4617  const SwContentNode* pCNd = m_pPam->GetContentNode();
4618  return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4619 }
4620 
4622 {
4623  SwTextFormatColl *pCollToSet = nullptr; // the style to set
4624  SfxItemSet *pItemSet = nullptr; // set of hard attributes
4625  sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4626  const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4627  sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4628 
4629  bool bInPRE=false; // some context info
4630 
4631  sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and
4632  short nFirstLineIndent = 0; // indentations
4633 
4634  for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i )
4635  {
4636  const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4637 
4638  sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4639  if( nColl )
4640  {
4641  // There is a style to set. Then at first we must decide,
4642  // if the style can be set.
4643  bool bSetThis = true;
4644  switch( nColl )
4645  {
4646  case RES_POOLCOLL_HTML_PRE:
4647  bInPRE = true;
4648  break;
4649  case RES_POOLCOLL_TEXT:
4650  // <TD><P CLASS=xxx> must become TD.xxx
4651  if( nDfltColl==RES_POOLCOLL_TABLE ||
4652  nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4653  nColl = nDfltColl;
4654  break;
4655  case RES_POOLCOLL_HTML_HR:
4656  // also <HR> in <PRE> set as style, otherwise it can't
4657  // be exported anymore
4658  break;
4659  default:
4660  if( bInPRE )
4661  bSetThis = false;
4662  break;
4663  }
4664 
4665  SwTextFormatColl *pNewColl =
4666  m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4667 
4668  if( bSetThis )
4669  {
4670  // If now a different style should be set as previously, the
4671  // previous style must be replaced by hard attribution.
4672 
4673  if( pCollToSet )
4674  {
4675  // insert the attributes hard, which previous style sets
4676  if( !pItemSet )
4677  pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4678  else
4679  {
4680  const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4681  SfxItemSet aItemSet( *rCollSet.GetPool(),
4682  rCollSet.GetRanges() );
4683  aItemSet.Set( rCollSet );
4684  pItemSet->Put( aItemSet );
4685  }
4686  // but remove the attributes, which the current style sets,
4687  // because otherwise they will be overwritten later
4688  pItemSet->Differentiate( pNewColl->GetAttrSet() );
4689  }
4690 
4691  pCollToSet = pNewColl;
4692  }
4693  else
4694  {
4695  // hard attribution
4696  if( !pItemSet )
4697  pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4698  else
4699  {
4700  const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4701  SfxItemSet aItemSet( *rCollSet.GetPool(),
4702  rCollSet.GetRanges() );
4703  aItemSet.Set( rCollSet );
4704  pItemSet->Put( aItemSet );
4705  }
4706  }
4707  }
4708  else
4709  {
4710  // Maybe a default style exists?
4711  nColl = pCntxt->GetDefaultTextFormatColl();
4712  if( nColl )
4713  nDfltColl = nColl;
4714  }
4715 
4716  // if applicable fetch new paragraph indents
4717  if( pCntxt->IsLRSpaceChanged() )
4718  {
4719  sal_uInt16 nLeft=0, nRight=0;
4720 
4721  pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4722  nLeftMargin = nLeft;
4723  nRightMargin = nRight;
4724  }
4725  }
4726 
4727  // If in current context a new style should be set,
4728  // its paragraph margins must be inserted in the context.
4729  if( pContext && nTopColl )
4730  {
4731  // <TD><P CLASS=xxx> must become TD.xxx
4732  if( nTopColl==RES_POOLCOLL_TEXT &&
4733  (nDfltColl==RES_POOLCOLL_TABLE ||
4734  nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4735  nTopColl = nDfltColl;
4736 
4737  const SwTextFormatColl *pTopColl =
4738  m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4739  const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4740  const SfxPoolItem *pItem;
4741  if( SfxItemState::SET == rItemSet.GetItemState(RES_LR_SPACE,true, &pItem) )
4742  {
4743  const SvxLRSpaceItem *pLRItem =
4744  static_cast<const SvxLRSpaceItem *>(pItem);
4745 
4746  sal_Int32 nLeft = pLRItem->GetTextLeft();
4747  sal_Int32 nRight = pLRItem->GetRight();
4748  nFirstLineIndent = pLRItem->GetTextFirstLineOffset();
4749 
4750  // In Definition lists the margins also contain the margins from the previous levels
4751  if( RES_POOLCOLL_HTML_DD == nTopColl )
4752  {
4753  const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser
4754  ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())
4755  ->GetLRSpace();
4756  nLeft -= rDTLRSpace.GetTextLeft();
4757  nRight -= rDTLRSpace.GetRight();
4758  }
4759  else if( RES_POOLCOLL_HTML_DT == nTopColl )
4760  {
4761  nLeft = 0;
4762  nRight = 0;
4763  }
4764 
4765  // the paragraph margins add up
4766  nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4767  nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4768 
4769  pContext->SetMargins( nLeftMargin, nRightMargin,
4770  nFirstLineIndent );
4771  }
4772  if( SfxItemState::SET == rItemSet.GetItemState(RES_UL_SPACE,true, &pItem) )
4773  {
4774  const SvxULSpaceItem *pULItem =
4775  static_cast<const SvxULSpaceItem *>(pItem);
4776  pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4777  }
4778  }
4779 
4780  // If no style is set in the context use the text body.
4781  if( !pCollToSet )
4782  {
4783  pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4784  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4785  if( !nLeftMargin )
4786  nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft());
4787  if( !nRightMargin )
4788  nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight());
4789  if( !nFirstLineIndent )
4790  nFirstLineIndent = rLRItem.GetTextFirstLineOffset();
4791  }
4792 
4793  // remove previous hard attribution of paragraph
4794  for( auto pParaAttr : m_aParaAttrs )
4795  pParaAttr->Invalidate();
4796  m_aParaAttrs.clear();
4797 
4798  // set the style
4799  m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4800 
4801  // if applicable correct the paragraph indent
4802  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4803  bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() ||
4804  nFirstLineIndent != rLRItem.GetTextFirstLineOffset() ||
4805  nRightMargin != rLRItem.GetRight();
4806 
4807  if( bSetLRSpace )
4808  {
4809  SvxLRSpaceItem aLRItem( rLRItem );
4810  aLRItem.SetTextLeft( nLeftMargin );
4811  aLRItem.SetRight( nRightMargin );
4812  aLRItem.SetTextFirstLineOffset( nFirstLineIndent );
4813  if( pItemSet )
4814  pItemSet->Put( aLRItem );
4815  else
4816  {
4817  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4818  m_xAttrTab->pLRSpace->SetLikePara();
4819  m_aParaAttrs.push_back( m_xAttrTab->pLRSpace );
4820  EndAttr( m_xAttrTab->pLRSpace, false );
4821  }
4822  }
4823 
4824  // and now set the attributes
4825  if( pItemSet )
4826  {
4827  InsertParaAttrs( *pItemSet );
4828  delete pItemSet;
4829  }
4830 }
4831 
4833 {
4834  OUString aId, aStyle, aLang, aDir;
4835  OUString aClass;
4836 
4837  const HTMLOptions& rHTMLOptions = GetOptions();
4838  for (size_t i = rHTMLOptions.size(); i; )
4839  {
4840  const HTMLOption& rOption = rHTMLOptions[--i];
4841  switch( rOption.GetToken() )
4842  {
4843  case HtmlOptionId::ID:
4844  aId = rOption.GetString();
4845  break;
4846  case HtmlOptionId::STYLE:
4847  aStyle = rOption.GetString();
4848  break;
4849  case HtmlOptionId::CLASS:
4850  aClass = rOption.GetString();
4851  break;
4852  case HtmlOptionId::LANG:
4853  aLang = rOption.GetString();
4854  break;
4855  case HtmlOptionId::DIR:
4856  aDir = rOption.GetString();
4857  break;
4858  default: break;
4859  }
4860  }
4861 
4862  // create a new context
4863  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4864 
4865  // set the style and save it in the context
4866  SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4867  OSL_ENSURE( pCFormat, "No character format found for token" );
4868 
4869  // parse styles (regarding class see also NewPara)
4870  if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4871  {
4872  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4873  SvxCSS1PropertyInfo aPropInfo;
4874 
4875  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4876  {
4877  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4878  "Class is not considered" );
4879  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4880  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4881  }
4882  }
4883 
4884  // Character formats are stored in their own stack and can never be inserted
4885  // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4886  if( pCFormat )
4887  InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4888 
4889  // save the context
4890  PushContext(xCntxt);
4891 }
4892 
4894 {
4895  // and if applicable change it via the options
4896  sal_Int16 eVertOri = text::VertOrientation::TOP;
4897  sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4898  Size aSize( 0, 0);
4899  tools::Long nSize = 0;
4900  bool bPercentWidth = false;
4901  bool bPercentHeight = false;
4902  sal_uInt16 nType = HTML_SPTYPE_HORI;
4903 
4904  const HTMLOptions& rHTMLOptions = GetOptions();
4905  for (size_t i = rHTMLOptions.size(); i; )
4906  {
4907  const HTMLOption& rOption = rHTMLOptions[--i];
4908  switch( rOption.GetToken() )
4909  {
4910  case HtmlOptionId::TYPE:
4911  rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4912  break;
4913  case HtmlOptionId::ALIGN:
4914  eVertOri =
4915  rOption.GetEnum( aHTMLImgVAlignTable,
4916  eVertOri );
4917  eHoriOri =
4918  rOption.GetEnum( aHTMLImgHAlignTable,
4919  eHoriOri );
4920  break;
4921  case HtmlOptionId::WIDTH:
4922  // First only save as pixel value!
4923  bPercentWidth = (rOption.GetString().indexOf('%') != -1);
4924  aSize.setWidth( static_cast<tools::Long>(rOption.GetNumber()) );
4925  break;
4926  case HtmlOptionId::HEIGHT:
4927  // First only save as pixel value!
4928  bPercentHeight = (rOption.GetString().indexOf('%') != -1);
4929  aSize.setHeight( static_cast<tools::Long>(rOption.GetNumber()) );
4930  break;
4931  case HtmlOptionId::SIZE:
4932  // First only save as pixel value!
4933  nSize = rOption.GetNumber();
4934  break;
4935  default: break;
4936  }
4937  }
4938 
4939  switch( nType )
4940  {
4941  case HTML_SPTYPE_BLOCK:
4942  {
4943  // create an empty text frame
4944 
4945  // fetch the ItemSet
4946  SfxItemSet aFrameSet( m_xDoc->GetAttrPool(),
4948  if( !IsNewDoc() )
4949  Reader::ResetFrameFormatAttrs( aFrameSet );
4950 
4951  // set the anchor and the adjustment
4952  SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4953 
4954  // and the size of the frame
4955  Size aDfltSz( MINFLY, MINFLY );
4956  Size aSpace( 0, 0 );
4957  SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4958  m_pCSS1Parser->GetWhichMap() );
4959  SvxCSS1PropertyInfo aDummyPropInfo;
4960 
4961  SetFixSize( aSize, aDfltSz, bPercentWidth, bPercentHeight,
4962  aDummyPropInfo, aFrameSet );
4963  SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4964 
4965  // protect the content
4966  SvxProtectItem aProtectItem( RES_PROTECT) ;
4967  aProtectItem.SetContentProtect( true );
4968  aFrameSet.Put( aProtectItem );
4969 
4970  // create the frame
4971  RndStdIds eAnchorId =
4972  aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4973  SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4974  m_pPam->GetPoint(), &aFrameSet );
4975  // Possibly create frames and register auto-bound frames.
4976  RegisterFlyFrame( pFlyFormat );
4977  }
4978  break;
4979  case HTML_SPTYPE_VERT:
4980  if( nSize > 0 )
4981  {
4983  {
4985  ->PixelToLogic( Size(0,nSize),
4986  MapMode(MapUnit::MapTwip) ).Height();
4987  }
4988 
4989  // set a paragraph margin
4990  SwTextNode *pTextNode = nullptr;
4991  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4992  {
4993  // if possible change the bottom paragraph margin
4994  // of previous node
4995 
4996  SetAttr(); // set still open paragraph attributes
4997 
4998  pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1]
4999  ->GetTextNode();
5000 
5001  // If the previous paragraph isn't a text node, then now an
5002  // empty paragraph is created, which already generates a single
5003  // line of spacing.
5004  if( !pTextNode )
5005  nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
5006  }
5007 
5008  if( pTextNode )
5009  {
5010  SvxULSpaceItem aULSpace( static_cast<const SvxULSpaceItem&>(pTextNode
5012  aULSpace.SetLower( aULSpace.GetLower() + o3tl::narrowing<sal_uInt16>(nSize) );
5013  pTextNode->SetAttr( aULSpace );
5014  }
5015  else
5016  {
5017  NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, o3tl::narrowing<sal_uInt16>(nSize), RES_UL_SPACE));
5018  EndAttr( m_xAttrTab->pULSpace, false );
5019 
5020  AppendTextNode(); // Don't change spacing!
5021  }
5022  }
5023  break;
5024  case HTML_SPTYPE_HORI:
5025  if( nSize > 0 )
5026  {
5027  // If the paragraph is still empty, set first line
5028  // indentation, otherwise apply letter spacing over a space.
5029 
5031  {
5033  ->PixelToLogic( Size(nSize,0),
5034  MapMode(MapUnit::MapTwip) ).Width();
5035  }
5036 
5037  if( !m_pPam->GetPoint()->nContent.GetIndex() )
5038  {
5039  sal_uInt16 nLeft=0, nRight=0;
5040  short nIndent = 0;
5041 
5042  GetMarginsFromContextWithNumberBullet( nLeft, nRight, nIndent );
5043  nIndent = nIndent + static_cast<short>(nSize);
5044 
5045  SvxLRSpaceItem aLRItem( RES_LR_SPACE );
5046  aLRItem.SetTextLeft( nLeft );
5047  aLRItem.SetRight( nRight );
5048  aLRItem.SetTextFirstLineOffset( nIndent );
5049 
5050  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
5051  EndAttr( m_xAttrTab->pLRSpace, false );
5052  }
5053  else
5054  {
5055  NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
5056  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, " " );
5057  EndAttr( m_xAttrTab->pKerning );
5058  }
5059  }
5060  }
5061 }
5062 
5063 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
5064 {
5065  if( nPixel && Application::GetDefaultDevice() )
5066  {
5068  Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width();
5069  return o3tl::narrowing<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16)));
5070  }
5071  else
5072  return nPixel;
5073 }
5074 
5076 {
5078  if( nWidth )
5079  return nWidth;
5080 
5081  if( !m_aHTMLPageSize.Width() )
5082  {
5083  const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5084 
5085  const SwFormatFrameSize& rSz = rPgFormat.GetFrameSize();
5086  const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5087  const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5088  const SwFormatCol& rCol = rPgFormat.GetCol();
5089 
5090  m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5091  m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5092 
5093  if( 1 < rCol.GetNumCols() )
5095  }
5096 
5097  return m_aHTMLPageSize.Width();
5098 }
5099 
5101 {
5102  OUString aId;
5103  const HTMLOptions& rHTMLOptions = GetOptions();
5104  for (size_t i = rHTMLOptions.size(); i; )
5105  {
5106  const HTMLOption& rOption = rHTMLOptions[--i];
5107  if( HtmlOptionId::ID==rOption.GetToken() )
5108  {
5109  aId = rOption.GetString();
5110  break;
5111  }
5112  }
5113 
5114  if( !aId.isEmpty() )
5115  InsertBookmark( aId );
5116 }
5117 
5119 {
5120  // <BR CLEAR=xxx> is handled as:
5121  // 1.) Only regard the paragraph-bound frames anchored in current paragraph.
5122  // 2.) For left-justified aligned frames, CLEAR=LEFT or ALL, and for right-
5123  // justified aligned frames, CLEAR=RIGHT or ALL, the wrap-through is
5124  // changed as following:
5125  // 3.) If the paragraph contains no text, then the frames don't get a wrapping
5126  // 4.) otherwise a left aligned frame gets a right "only anchor" wrapping
5127  // and a right aligned frame gets a left "only anchor" wrapping.
5128  // 5.) if in a non-empty paragraph the wrapping of a frame is changed,
5129  // then a new paragraph is opened
5130  // 6.) If no wrappings of frames are changed, a hard line break is inserted.
5131 
5132  OUString aId, aStyle, aClass; // the id of bookmark
5133  bool bClearLeft = false, bClearRight = false;
5134  bool bCleared = false; // Was a CLEAR executed?
5135 
5136  // then we fetch the options
5137  const HTMLOptions& rHTMLOptions = GetOptions();
5138  for (size_t i = rHTMLOptions.size(); i; )
5139  {
5140  const HTMLOption& rOption = rHTMLOptions[--i];
5141  switch( rOption.GetToken() )
5142  {
5143  case HtmlOptionId::CLEAR:
5144  {
5145  const OUString &rClear = rOption.GetString();
5146  if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5147  {
5148  bClearLeft = true;
5149  bClearRight = true;
5150  }
5151  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5152  bClearLeft = true;
5153  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5154  bClearRight = true;
5155  }
5156  break;
5157  case HtmlOptionId::ID:
5158  aId = rOption.GetString();
5159  break;
5160  case HtmlOptionId::STYLE:
5161  aStyle = rOption.GetString();
5162  break;
5163  case HtmlOptionId::CLASS:
5164  aClass = rOption.GetString();
5165  break;
5166  default: break;
5167  }
5168  }
5169 
5170  // CLEAR is only supported for the current paragraph
5171  if( bClearLeft || bClearRight )
5172  {
5173  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
5174  SwTextNode* pTextNd = rNodeIdx.GetNode().GetTextNode();
5175  if( pTextNd )
5176  {
5177  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
5178 
5179  for( size_t i=0; i<rFrameFormatTable.size(); i++ )
5180  {
5181  SwFrameFormat *const pFormat = rFrameFormatTable[i];
5182  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
5183  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
5184  if (pAPos &&
5185  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
5186  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
5187  pAPos->nNode == rNodeIdx &&
5188  pFormat->GetSurround().GetSurround() != css::text::WrapTextMode_NONE )
5189  {
5190  sal_Int16 eHori = RES_DRAWFRMFMT == pFormat->Which()
5191  ? text::HoriOrientation::LEFT
5192  : pFormat->GetHoriOrient().GetHoriOrient();
5193 
5194  css::text::WrapTextMode eSurround = css::text::WrapTextMode_PARALLEL;
5195  if( m_pPam->GetPoint()->nContent.GetIndex() )
5196  {
5197  if( bClearLeft && text::HoriOrientation::LEFT==eHori )
5198  eSurround = css::text::WrapTextMode_RIGHT;
5199  else if( bClearRight && text::HoriOrientation::RIGHT==eHori )
5200  eSurround = css::text::WrapTextMode_LEFT;
5201  }
5202  else if( (bClearLeft && text::HoriOrientation::LEFT==eHori) ||
5203  (bClearRight && text::HoriOrientation::RIGHT==eHori) )
5204  {
5205  eSurround = css::text::WrapTextMode_NONE;
5206  }
5207 
5208  if( css::text::WrapTextMode_PARALLEL != eSurround )
5209  {
5210  SwFormatSurround aSurround( eSurround );
5211  if( css::text::WrapTextMode_NONE != eSurround )
5212  aSurround.SetAnchorOnly( true );
5213  pFormat->SetFormatAttr( aSurround );
5214  bCleared = true;
5215  }
5216  }
5217  }
5218  }
5219  }
5220 
5221  // parse styles
5222  std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5223  bool bBreakItem = false;
5224  if( HasStyleOptions( aStyle, aId, aClass ) )
5225  {
5226  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5227  SvxCSS1PropertyInfo aPropInfo;
5228 
5229  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5230  {
5231  if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5232  {
5233  aBreakItem.reset(aItemSet.Get(RES_BREAK).Clone());
5234  bBreakItem = true;
5235  }
5236  if( !aPropInfo.m_aId.isEmpty() )
5237  InsertBookmark( aPropInfo.m_aId );
5238  }
5239  }
5240 
5241  if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5242  {
5243  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5244  EndAttr( m_xAttrTab->pBreak, false );
5245  }
5246 
5247  if( !bCleared && !bBreakItem )
5248  {
5249  // If no CLEAR could or should be executed, a line break will be inserted
5250  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, "\x0A" );
5251  }
5252  else if( m_pPam->GetPoint()->nContent.GetIndex() )
5253  {
5254  // If a CLEAR is executed in a non-empty paragraph, then after it
5255  // a new paragraph has to be opened.
5256  // MIB 21.02.97: Here actually we should change the bottom paragraph
5257  // margin to zero. This will fail for something like this <BR ..><P>
5258  // (>Netscape). That's why we don't do it.
5260  }
5261  if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5262  {
5263  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5264  EndAttr( m_xAttrTab->pBreak, false );
5265  }
5266 }
5267 
5269 {
5270  sal_uInt16 nSize = 0;
5271  sal_uInt16 nWidth = 0;
5272 
5273  SvxAdjust eAdjust = SvxAdjust::End;
5274 
5275  bool bPercentWidth = false;
5276  bool bNoShade = false;
5277  bool bColor = false;
5278 
5279  Color aColor;
5280  OUString aId;
5281 
5282  // let's fetch the options
5283  const HTMLOptions& rHTMLOptions = GetOptions();
5284  for (size_t i = rHTMLOptions.size(); i; )
5285  {
5286  const HTMLOption& rOption = rHTMLOptions[--i];
5287  switch( rOption.GetToken() )
5288  {
5289  case HtmlOptionId::ID:
5290  aId = rOption.GetString();
5291  break;
5292  case HtmlOptionId::SIZE:
5293  nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
5294  break;
5295  case HtmlOptionId::WIDTH:
5296  bPercentWidth = (rOption.GetString().indexOf('%') != -1);
5297  nWidth = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
5298  if( bPercentWidth && nWidth>=100 )
5299  {
5300  // the default case are 100% lines (no attributes necessary)
5301  nWidth = 0;
5302  bPercentWidth = false;
5303  }
5304  break;
5305  case HtmlOptionId::ALIGN:
5306  eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5307  break;
5308  case HtmlOptionId::NOSHADE:
5309  bNoShade = true;
5310  break;
5311  case HtmlOptionId::COLOR:
5312  rOption.GetColor( aColor );
5313  bColor = true;
5314  break;
5315  default: break;
5316  }
5317  }
5318 
5319  if( m_pPam->GetPoint()->nContent.GetIndex() )
5321  if( m_nOpenParaToken != HtmlTokenId::NONE )
5322  EndPara();
5323  AppendTextNode();
5325 
5326  // ...and save in a context
5327  std::unique_ptr<HTMLAttrContext> xCntxt(
5328  new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5329 
5330  PushContext(xCntxt);
5331 
5332  // set the new style
5333  SetTextCollAttrs(m_aContexts.back().get());
5334 
5335  // the hard attributes of the current paragraph will never become invalid
5336  m_aParaAttrs.clear();
5337 
5338  if( nSize>0 || bColor || bNoShade )
5339  {
5340  // set line colour and/or width
5341  if( !bColor )
5342  aColor = COL_GRAY;
5343 
5344  SvxBorderLine aBorderLine( &aColor );
5345  if( nSize )
5346  {
5347  tools::Long nPWidth = 0;
5348  tools::Long nPHeight = static_cast<tools::Long>(nSize);
5349  SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5350  if ( !bNoShade )
5351  {
5352  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5353  }
5354  aBorderLine.SetWidth( nPHeight );
5355  }
5356  else if( bNoShade )
5357  {
5358  aBorderLine.SetWidth( DEF_LINE_WIDTH_2 );
5359  }
5360  else
5361  {
5362  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5363  aBorderLine.SetWidth( DEF_LINE_WIDTH_0 );
5364  }
5365 
5366  SvxBoxItem aBoxItem(RES_BOX);
5367  aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5368  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5369  m_aSetAttrTab.push_back( pTmp );
5370  }
5371  if( nWidth )
5372  {
5373  // If we aren't in a table, then the width value will be "faked" with
5374  // paragraph indents. That makes little sense in a table. In order to
5375  // avoid that the line is considered during the width calculation, it
5376  // still gets an appropriate LRSpace-Item.
5377  if (!m_xTable)
5378  {
5379  // fake length and alignment of line above paragraph indents
5380  tools::Long nBrowseWidth = GetCurrentBrowseWidth();
5381  nWidth = bPercentWidth ? o3tl::narrowing<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5382  : ToTwips( o3tl::narrowing<sal_uInt16>(nBrowseWidth) );
5383  if( nWidth < MINLAY )
5384  nWidth = MINLAY;
5385 
5386  const SwFormatColl *pColl = (static_cast<tools::Long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5387  if (pColl)
5388  {
5389  SvxLRSpaceItem aLRItem( pColl->GetLRSpace() );
5390