LibreOffice Module sw (master)  1
swhtml.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
25 
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
38 
39 #include <vcl/errinf.hxx>
40 #include <svl/stritem.hxx>
41 #include <vcl/imap.hxx>
42 #include <svtools/htmltokn.h>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/ctrltool.hxx>
45 #include <unotools/configmgr.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <sfx2/event.hxx>
49 #include <sfx2/docfile.hxx>
50 
51 #include <svtools/htmlcfg.hxx>
52 #include <sfx2/linkmgr.hxx>
53 #include <editeng/kernitem.hxx>
54 #include <editeng/boxitem.hxx>
55 #include <editeng/fhgtitem.hxx>
57 #include <editeng/postitem.hxx>
58 #include <editeng/wghtitem.hxx>
60 #include <editeng/udlnitem.hxx>
62 #include <editeng/blinkitem.hxx>
63 #include <editeng/ulspitem.hxx>
64 #include <editeng/colritem.hxx>
65 #include <editeng/fontitem.hxx>
66 #include <editeng/adjustitem.hxx>
67 #include <editeng/lrspitem.hxx>
68 #include <editeng/protitem.hxx>
69 #include <editeng/flstitem.hxx>
71 
72 #include <frmatr.hxx>
73 #include <charatr.hxx>
74 #include <fmtfld.hxx>
75 #include <fmtpdsc.hxx>
76 #include <fmtanchr.hxx>
77 #include <fmtsrnd.hxx>
78 #include <fmtfsize.hxx>
79 #include <fmtclds.hxx>
80 #include <fchrfmt.hxx>
81 #include <fmtinfmt.hxx>
82 #include <fmtfollowtextflow.hxx>
83 #include <fmtornt.hxx>
84 #include <docary.hxx>
85 #include <doc.hxx>
86 #include <IDocumentUndoRedo.hxx>
93 #include <IDocumentStatistics.hxx>
94 #include <IDocumentState.hxx>
95 #include <pam.hxx>
96 #include <ndtxt.hxx>
97 #include <mdiexp.hxx>
98 #include <poolfmt.hxx>
99 #include <pagedesc.hxx>
100 #include <IMark.hxx>
101 #include <docsh.hxx>
102 #include <editsh.hxx>
103 #include <docufld.hxx>
104 #include "swcss1.hxx"
105 #include <fltini.hxx>
106 #include <htmltbl.hxx>
107 #include "htmlnum.hxx"
108 #include "swhtml.hxx"
109 #include "wrthtml.hxx"
110 #include <linkenum.hxx>
111 #include <breakit.hxx>
112 #include <SwAppletImpl.hxx>
113 #include <swdll.hxx>
114 #include <txatbase.hxx>
115 
116 #include <sfx2/viewfrm.hxx>
117 #include <svx/svdobj.hxx>
118 #include <officecfg/Office/Writer.hxx>
120 #include <comphelper/sequence.hxx>
121 
122 #include <swerror.h>
123 #include <ndole.hxx>
124 #include <unoframe.hxx>
125 #include "css1atr.hxx"
126 #include <frameformats.hxx>
127 
128 #define FONTSIZE_MASK 7
129 
130 #define HTML_ESC_PROP 80
131 #define HTML_ESC_SUPER DFLT_ESC_SUPER
132 #define HTML_ESC_SUB DFLT_ESC_SUB
133 
134 #define HTML_SPTYPE_BLOCK 1
135 #define HTML_SPTYPE_HORI 2
136 #define HTML_SPTYPE_VERT 3
137 
139 using namespace ::com::sun::star;
140 
141 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
143 {
144  { OOO_STRING_SVTOOLS_HTML_AL_left, SvxAdjust::Left },
145  { OOO_STRING_SVTOOLS_HTML_AL_center, SvxAdjust::Center },
146  { OOO_STRING_SVTOOLS_HTML_AL_middle, SvxAdjust::Center }, // Netscape
147  { OOO_STRING_SVTOOLS_HTML_AL_right, SvxAdjust::Right },
148  { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block },
149  { OOO_STRING_SVTOOLS_HTML_AL_char, SvxAdjust::Left },
150  { nullptr, SvxAdjust(0) }
151 };
152 
153 // <SPACER TYPE=...>
155 {
159  { nullptr, 0 }
160 };
161 
163 {
164  m_bTemplateBrowseMode = true;
165 }
166 
167 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
168 {
170  // HTML import into Writer, avoid loading the Writer/Web template.
171  return OUString();
172 
173  const OUString sTemplateWithoutExt("internal/html");
174  SvtPathOptions aPathOpt;
175 
176  // first search for OpenDocument Writer/Web template
177  // OpenDocument Writer/Web template (extension .oth)
178  OUString sTemplate( sTemplateWithoutExt + ".oth" );
179  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
180  return sTemplate;
181 
182  // no OpenDocument Writer/Web template found.
183  // search for OpenOffice.org Writer/Web template
184  sTemplate = sTemplateWithoutExt + ".stw";
185  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
186  return sTemplate;
187 
188  OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
189 
190  return OUString();
191 }
192 
194 {
195  OSL_ENSURE( m_pMedium, "Where is the medium??" );
196 
197  if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
198  {
200  return true;
201  }
202  return false;
203 
204 }
205 
206 // Call for the general Reader-Interface
207 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
208 {
210 
211  if( !m_pStream )
212  {
213  OSL_ENSURE( m_pStream, "HTML-Read without stream" );
214  return ERR_SWG_READ_ERROR;
215  }
216 
217  if( !m_bInsertMode )
218  {
220 
221  // Set the HTML page style, when it isn't a HTML document,
222  // otherwise it's already set.
224  {
227  }
228  }
229 
230  // so nobody steals the document!
231  rtl::Reference<SwDoc> xHoldAlive(&rDoc);
232  ErrCode nRet = ERRCODE_NONE;
233  tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
234  rName, rBaseURL, !m_bInsertMode, m_pMedium,
235  IsReadUTF8(),
237 
238  SvParserState eState = xParser->CallParser();
239 
240  if( SvParserState::Pending == eState )
242  else if( SvParserState::Accepted != eState )
243  {
244  const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
245  + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
246 
247  // use the stream as transport for error number
248  nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
249  DialogMask::ButtonsOk | DialogMask::MessageError );
250  }
251 
252  return nRet;
253 }
254 
256  const OUString& rPath,
257  const OUString& rBaseURL,
258  bool bReadNewDoc,
259  SfxMedium* pMed, bool bReadUTF8,
260  bool bNoHTMLComments,
261  const OUString& rNamespace )
262  : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
263  m_aPathToFile( rPath ),
264  m_sBaseURL( rBaseURL ),
265  m_xAttrTab(std::make_shared<HTMLAttrTable>()),
266  m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
267  m_xDoc( pD ),
268  m_pActionViewShell( nullptr ),
269  m_pSttNdIdx( nullptr ),
270  m_pFormImpl( nullptr ),
271  m_pMarquee( nullptr ),
272  m_pImageMap( nullptr ),
273  m_nBaseFontStMin( 0 ),
274  m_nFontStMin( 0 ),
275  m_nDefListDeep( 0 ),
276  m_nFontStHeadStart( 0 ),
277  m_nSBModuleCnt( 0 ),
278  m_nMissingImgMaps( 0 ),
279  m_nParaCnt( 5 ),
280  // #i83625#
281  m_nContextStMin( 0 ),
282  m_nContextStAttrMin( 0 ),
283  m_nSelectEntryCnt( 0 ),
284  m_nOpenParaToken( HtmlTokenId::NONE ),
285  m_eJumpTo( JumpToMarks::NONE ),
286 #ifdef DBG_UTIL
287  m_nContinue( 0 ),
288 #endif
289  m_eParaAdjust( SvxAdjust::End ),
290  m_bDocInitalized( false ),
291  m_bSetModEnabled( false ),
292  m_bInFloatingFrame( false ),
293  m_bInField( false ),
294  m_bCallNextToken( false ),
295  m_bIgnoreRawData( false ),
296  m_bLBEntrySelected ( false ),
297  m_bTAIgnoreNewPara ( false ),
298  m_bFixMarqueeWidth ( false ),
299  m_bNoParSpace( false ),
300  m_bInNoEmbed( false ),
301  m_bInTitle( false ),
302  m_bUpdateDocStat( false ),
303  m_bFixSelectWidth( false ),
304  m_bTextArea( false ),
305  m_bSelect( false ),
306  m_bInFootEndNoteAnchor( false ),
307  m_bInFootEndNoteSymbol( false ),
308  m_bIgnoreHTMLComments( bNoHTMLComments ),
309  m_bRemoveHidden( false ),
310  m_bBodySeen( false ),
311  m_bReadingHeaderOrFooter( false ),
312  m_bNotifyMacroEventRead( false ),
313  m_isInTableStructure(false),
314  m_nTableDepth( 0 ),
315  m_pTempViewFrame(nullptr)
316 {
317  // If requested explicitly, then force ignoring of comments (don't create postits for them).
319  m_bIgnoreHTMLComments = true;
320 
321  m_nEventId = nullptr;
323 
324  m_eScriptLang = HTMLScriptLanguage::Unknown;
325 
326  rCursor.DeleteMark();
327  m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert
328  memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
329 
330  // Read the font sizes 1-7 from the INI file
331  SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get();
332  m_aFontHeights[0] = rHtmlOptions.GetFontSize( 0 ) * 20;
333  m_aFontHeights[1] = rHtmlOptions.GetFontSize( 1 ) * 20;
334  m_aFontHeights[2] = rHtmlOptions.GetFontSize( 2 ) * 20;
335  m_aFontHeights[3] = rHtmlOptions.GetFontSize( 3 ) * 20;
336  m_aFontHeights[4] = rHtmlOptions.GetFontSize( 4 ) * 20;
337  m_aFontHeights[5] = rHtmlOptions.GetFontSize( 5 ) * 20;
338  m_aFontHeights[6] = rHtmlOptions.GetFontSize( 6 ) * 20;
339 
340  m_bKeepUnknown = rHtmlOptions.IsImportUnknown();
341 
342  if(bReadNewDoc)
343  {
344  //CJK has different defaults, so a different object should be used for this
345  //RES_CHARTR_CJK_FONTSIZE is a valid value
347  m_xDoc->SetDefault( aFontHeight );
349  m_xDoc->SetDefault( aFontHeightCJK );
351  m_xDoc->SetDefault( aFontHeightCTL );
352 
353  // #i18732# - adjust default of option 'FollowTextFlow'
354  // TODO: not sure what the appropriate default for HTML should be?
355  m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
356  }
357 
358  // Change to HTML mode during the import, so that the right styles are created
359  m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
360  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
361 
362  m_pCSS1Parser.reset(new SwCSS1Parser(m_xDoc.get(), *this, m_aFontHeights, m_sBaseURL, IsNewDoc()));
363  m_pCSS1Parser->SetIgnoreFontFamily( rHtmlOptions.IsIgnoreFontFamily() );
364 
365  if( bReadUTF8 )
366  {
367  SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
368  }
369  else
370  {
371  SwDocShell *pDocSh = m_xDoc->GetDocShell();
372  SvKeyValueIterator *pHeaderAttrs =
373  pDocSh->GetHeaderAttributes();
374  if( pHeaderAttrs )
375  SetEncodingByHTTPHeader( pHeaderAttrs );
376  }
377  m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
378 
379  SwDocShell* pDocSh = m_xDoc->GetDocShell();
380  if( pDocSh )
381  {
382  m_bViewCreated = true; // not, load synchronous
383 
384  // a jump mark is present
385 
386  if( pMed )
387  {
388  m_sJmpMark = pMed->GetURLObject().GetMark();
389  if( !m_sJmpMark.isEmpty() )
390  {
392  sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
393  sal_Int32 nPos = nLastPos != -1 ? nLastPos : 0;
394 
395  OUString sCmp;
396  if (nPos)
397  {
398  sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
399  }
400 
401  if( !sCmp.isEmpty() )
402  {
403  sCmp = sCmp.toAsciiLowerCase();
404  if( sCmp == "region" )
406  else if( sCmp == "table" )
408  else if( sCmp == "graphic" )
410  else if( sCmp == "outline" ||
411  sCmp == "text" ||
412  sCmp == "frame" )
413  m_eJumpTo = JumpToMarks::NONE; // this is nothing valid!
414  else
415  // otherwise this is a normal (book)mark
416  nPos = -1;
417  }
418  else
419  nPos = -1;
420 
421  if( nPos != -1 )
422  m_sJmpMark = m_sJmpMark.copy( 0, nPos );
423  if( m_sJmpMark.isEmpty() )
425  }
426  }
427  }
428 
429  if (!rNamespace.isEmpty())
430  {
431  SetNamespace(rNamespace);
432  m_bXHTML = true;
433  if (rNamespace == "reqif-xhtml")
434  m_bReqIF = true;
435  }
436 
437  // Extract load parameters which are specific to this filter.
438  if (!pMed)
439  {
440  return;
441  }
442 
443  comphelper::SequenceAsHashMap aLoadMap(pMed->GetArgs());
444  auto it = aLoadMap.find("AllowedRTFOLEMimeTypes");
445  if (it == aLoadMap.end())
446  {
447  return;
448  }
449 
450  uno::Sequence<OUString> aTypes;
451  it->second >>= aTypes;
452  m_aAllowedRTFOLEMimeTypes = comphelper::sequenceToContainer<std::set<OUString>>(aTypes);
453 }
454 
456 {
457 #ifdef DBG_UTIL
458  OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
459 #endif
460 
461  OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
462  OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
463  m_nContextStMin = 0;
464  while (!m_aContexts.empty())
465  {
466  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
467  ClearContext(xCntxt.get());
468  }
469 
470  bool bAsync = m_xDoc->IsInLoadAsynchron();
471  m_xDoc->SetInLoadAsynchron( false );
472  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
473 
474  if( m_xDoc->GetDocShell() && m_nEventId )
476 
477  // the DocumentDetected maybe can delete the DocShells, therefore fetch again
478  if( m_xDoc->GetDocShell() )
479  {
480  // update linked sections
481  sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
482  if( nLinkMode != NEVER && bAsync &&
483  SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
484  m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
485 
486  if ( m_xDoc->GetDocShell()->IsLoading() )
487  {
488  // #i59688#
489  m_xDoc->GetDocShell()->LoadingFinished();
490  }
491  }
492 
493  delete m_pSttNdIdx;
494 
495  if( !m_aSetAttrTab.empty() )
496  {
497  OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
498  for ( const auto& rpAttr : m_aSetAttrTab )
499  delete rpAttr;
500  m_aSetAttrTab.clear();
501  }
502 
503  m_pCSS1Parser.reset();
504  m_pNumRuleInfo.reset();
505  DeleteFormImpl();
506  m_pFootEndNoteImpl.reset();
507 
508  OSL_ENSURE(!m_xTable, "It exists still an open table");
509  m_pImageMaps.reset();
510 
511  OSL_ENSURE( m_vPendingStack.empty(),
512  "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
513  m_vPendingStack.clear();
514 
515  m_xDoc.clear();
516 
517  if ( m_pTempViewFrame )
518  {
520 
521  // the temporary view frame is hidden, so the hidden flag might need to be removed
522  if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
523  m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
524  }
525 }
526 
527 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
528 {
529  m_nEventId=nullptr;
530 
531  // #i47907# - If the document has already been destructed,
532  // the parser should be aware of this:
533  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
534  || 1 == m_xDoc->getReferenceCount() )
535  {
536  // was the import aborted by SFX?
537  eState = SvParserState::Error;
538  }
539 
540  GetAsynchCallLink().Call(nullptr);
541 }
542 
544 {
545  // create temporary index on position 0, so it won't be moved!
546  m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
547  if( !IsNewDoc() ) // insert into existing document ?
548  {
549  const SwPosition* pPos = m_pPam->GetPoint();
550 
551  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
552 
553  *m_pSttNdIdx = pPos->nNode.GetIndex()-1;
554  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
555 
556  SwPaM aInsertionRangePam( *pPos );
557 
559 
560  // split any redline over the insertion point
561  aInsertionRangePam.SetMark();
562  *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
563  aInsertionRangePam.Move( fnMoveBackward );
564  m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
565 
566  m_xDoc->SetTextFormatColl( *m_pPam,
567  m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
568  }
569 
570  if( GetMedium() )
571  {
572  if( !m_bViewCreated )
573  {
574  m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
575  }
576  else
577  {
578  m_bViewCreated = true;
579  m_nEventId = nullptr;
580  }
581  }
582  else // show progress bar
583  {
584  rInput.Seek(STREAM_SEEK_TO_END);
585  rInput.ResetError();
586 
587  m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
588 
589  rInput.Seek(STREAM_SEEK_TO_BEGIN);
590  rInput.ResetError();
591  }
592 
593  StartListening(m_xDoc->GetPageDesc( 0 ).GetNotifier());
594 
596  return eRet;
597 }
598 
600 {
601  const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
602  return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
603 }
604 
606 {
607 #ifdef DBG_UTIL
608  OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
609  m_nContinue++;
610 #endif
611 
612  // When the import (of SFX) is aborted, an error will be set but
613  // we still continue, so that we clean up properly.
614  OSL_ENSURE( SvParserState::Error!=eState,
615  "SwHTMLParser::Continue: already set an error" );
616  if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
617  eState = SvParserState::Error;
618 
619  // Fetch SwViewShell from document, save it and set as current.
620  SwViewShell *pInitVSh = CallStartAction();
621 
622  if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
623  {
624  // At first call first return, show document and wait for callback
625  // time.
626  // At this point in CallParser only one digit was read and
627  // a SaveState(0) was called.
628  eState = SvParserState::Pending;
629  m_bViewCreated = true;
630  m_xDoc->SetInLoadAsynchron( true );
631 
632 #ifdef DBG_UTIL
633  m_nContinue--;
634 #endif
635 
636  return;
637  }
638 
639  m_bSetModEnabled = false;
640  if( m_xDoc->GetDocShell() )
641  {
642  m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
643  if( m_bSetModEnabled )
644  {
645  m_xDoc->GetDocShell()->EnableSetModified( false );
646  }
647  }
648 
649  // during import don't call OLE-Modified
650  Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
651  m_xDoc->SetOle2Link( Link<bool,void>() );
652 
653  bool bModified = m_xDoc->getIDocumentState().IsModified();
654  bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
655  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
656 
657  // When the import will be aborted, don't call Continue anymore.
658  // If a Pending-Stack exists make sure the stack is ended with a call
659  // of NextToken.
660  if( SvParserState::Error == eState )
661  {
662  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
663  "SwHTMLParser::Continue: Pending-Stack without Token" );
664  if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
665  NextToken( m_vPendingStack.back().nToken );
666  OSL_ENSURE( m_vPendingStack.empty(),
667  "SwHTMLParser::Continue: There is again a Pending-Stack" );
668  }
669  else
670  {
671  HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
672  }
673 
674  // disable progress bar again
675  m_xProgress.reset();
676 
677  bool bLFStripped = false;
678  if( SvParserState::Pending != GetStatus() )
679  {
680  // set the last attributes yet
681  {
682  if( !m_aScriptSource.isEmpty() )
683  {
684  SwScriptFieldType *pType =
685  static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
686 
688  false );
689  InsertAttr( SwFormatField( aField ), false );
690  }
691 
692  if( m_pAppletImpl )
693  {
694  if( m_pAppletImpl->GetApplet().is() )
695  EndApplet();
696  else
697  EndObject();
698  }
699 
700  // maybe remove an existing LF after the last paragraph
701  if( IsNewDoc() )
702  bLFStripped = StripTrailingLF() > 0;
703 
704  // close still open numbering
705  while( GetNumInfo().GetNumRule() )
707 
708  OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
709  // try this twice, first normally to let m_nContextStMin decrease
710  // naturally and get contexts popped in desired order, and if that
711  // fails force it
712  for (int i = 0; i < 2; ++i)
713  {
714  while (m_aContexts.size() > m_nContextStMin)
715  {
716  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
717  if (xCntxt)
718  EndContext(xCntxt.get());
719  }
720  if (!m_nContextStMin)
721  break;
722  OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
723  m_nContextStMin = 0;
724  }
725 
726  m_aParaAttrs.clear();
727 
728  SetAttr( false );
729 
730  // set the first delayed styles
731  m_pCSS1Parser->SetDelayedStyles();
732  }
733 
734  // again correct the start
735  if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
736  {
737  SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
738  SwNodeIndex aNxtIdx( *m_pSttNdIdx );
739  if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
740  {
741  const sal_Int32 nStt = pTextNode->GetText().getLength();
742  // when the cursor is still in the node, then set him at the end
743  if( m_pPam->GetPoint()->nNode == aNxtIdx )
744  {
746  m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt );
747  }
748 
749 #if OSL_DEBUG_LEVEL > 0
750 // !!! shouldn't be possible, or ??
751  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(),
752  "Pam.Bound1 is still in the node" );
753  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(),
754  "Pam.Bound2 is still in the node" );
755 
756  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() )
757  {
758  const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex();
759  m_pPam->GetBound().nContent.Assign( pTextNode,
760  pTextNode->GetText().getLength() + nCntPos );
761  }
762  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() )
763  {
764  const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex();
765  m_pPam->GetBound( false ).nContent.Assign( pTextNode,
766  pTextNode->GetText().getLength() + nCntPos );
767  }
768 #endif
769  // Keep character attribute!
770  SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
771  if (pTextNode->GetText().getLength())
772  pDelNd->FormatToTextAttr( pTextNode );
773  else
774  pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
775  pTextNode->JoinNext();
776  }
777  }
778  }
779 
780  if( SvParserState::Accepted == eState )
781  {
782  if( m_nMissingImgMaps )
783  {
784  // Some Image-Map relations are still missing.
785  // Maybe now the Image-Maps are there?
787  }
788 
789  // now remove the last useless paragraph
790  SwPosition* pPos = m_pPam->GetPoint();
791  if( !pPos->nContent.GetIndex() && !bLFStripped )
792  {
793  SwTextNode* pCurrentNd;
794  sal_uLong nNodeIdx = pPos->nNode.GetIndex();
795 
796  bool bHasFlysOrMarks =
798 
799  if( IsNewDoc() )
800  {
801  if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx))
802  {
804  if( pCNd && pCNd->StartOfSectionIndex()+2 <
805  pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
806  {
808  SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
809  if( pCursorSh &&
810  pCursorSh->GetCursor()->GetPoint()
811  ->nNode.GetIndex() == nNodeIdx )
812  {
813  pCursorSh->MovePara(GoPrevPara, fnParaEnd );
814  pCursorSh->SetMark();
815  pCursorSh->ClearMark();
816  }
817  m_pPam->GetBound().nContent.Assign( nullptr, 0 );
818  m_pPam->GetBound(false).nContent.Assign( nullptr, 0 );
819  m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode );
820  }
821  }
822  }
823  else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
824  {
825  if( pCurrentNd->CanJoinNext( &pPos->nNode ))
826  {
827  SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode();
828  pPos->nContent.Assign( pNextNd, 0 );
830  pNextNd->JoinPrev();
831  }
832  else if (pCurrentNd->GetText().isEmpty())
833  {
834  pPos->nContent.Assign( nullptr, 0 );
836  m_xDoc->GetNodes().Delete( pPos->nNode );
838  }
839  }
840  }
841 
842  // annul the SplitNode from the beginning
843  else if( !IsNewDoc() )
844  {
845  if( pPos->nContent.GetIndex() ) // then there was no <p> at the end
846  m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next
847  SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode();
848  SwNodeIndex aPrvIdx( pPos->nNode );
849  if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
850  *m_pSttNdIdx <= aPrvIdx )
851  {
852  // Normally here should take place a JoinNext, but all cursors and
853  // so are registered in pTextNode, so that it MUST remain.
854 
855  // Convert paragraph to character attribute, from Prev adopt
856  // the paragraph attribute and the template!
857  SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
858  pTextNode->ChgFormatColl( pPrev->GetTextColl() );
859  pTextNode->FormatToTextAttr( pPrev );
860  pTextNode->ResetAllAttr();
861 
862  if( pPrev->HasSwAttrSet() )
863  pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
864 
865  if( &m_pPam->GetBound().nNode.GetNode() == pPrev )
866  m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
867  if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev )
868  m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
869 
870  pTextNode->JoinPrev();
871  }
872  }
873 
874  // adjust AutoLoad in DocumentProperties
875  if (!utl::ConfigManager::IsFuzzing() && IsNewDoc())
876  {
877  SwDocShell *pDocShell(m_xDoc->GetDocShell());
878  OSL_ENSURE(pDocShell, "no SwDocShell");
879  if (pDocShell) {
880  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
881  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
882  uno::Reference<document::XDocumentProperties> xDocProps(
883  xDPS->getDocumentProperties());
884  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
885  if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
886  (xDocProps->getAutoloadURL().isEmpty()) )
887  {
888  xDocProps->setAutoloadURL(m_aPathToFile);
889  }
890  }
891  }
892 
893  if( m_bUpdateDocStat )
894  {
895  m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
896  }
897  }
898 
899  if( SvParserState::Pending != GetStatus() )
900  {
901  delete m_pSttNdIdx;
902  m_pSttNdIdx = nullptr;
903  }
904 
905  // should the parser be the last one who hold the document, then nothing
906  // has to be done anymore, document will be destroyed shortly!
907  if( 1 < m_xDoc->getReferenceCount() )
908  {
909  if( bWasUndo )
910  {
911  m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
912  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
913  }
914  else if( !pInitVSh )
915  {
916  // When at the beginning of Continue no Shell was available,
917  // it's possible in the meantime one was created.
918  // In that case the bWasUndo flag is wrong and we must
919  // enable Undo.
920  SwViewShell *pTmpVSh = CheckActionViewShell();
921  if( pTmpVSh )
922  {
923  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
924  }
925  }
926 
927  m_xDoc->SetOle2Link( aOLELink );
928  if( !bModified )
929  m_xDoc->getIDocumentState().ResetModified();
930  if( m_bSetModEnabled && m_xDoc->GetDocShell() )
931  {
932  m_xDoc->GetDocShell()->EnableSetModified();
933  m_bSetModEnabled = false; // this is unnecessary here
934  }
935  }
936 
937  // When the Document-SwVievShell still exists and an Action is open
938  // (doesn't have to be by abort), end the Action, disconnect from Shell
939  // and finally reconstruct the old Shell.
940  CallEndAction( true );
941 
942 #ifdef DBG_UTIL
943  m_nContinue--;
944 #endif
945 }
946 
947 void SwHTMLParser::Notify(const SfxHint& rHint)
948 {
949  if(rHint.GetId() == SfxHintId::Dying)
950  {
951  EndListeningAll();
952  ReleaseRef();
953  }
954 }
955 
957 {
958  OSL_ENSURE( !m_bDocInitalized, "DocumentDetected called multiple times" );
959  m_bDocInitalized = true;
960  if( IsNewDoc() )
961  {
962  if( IsInHeader() )
963  FinishHeader();
964 
965  CallEndAction( true );
966 
967  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
968  // For DocumentDetected in general a SwViewShell is created.
969  // But it also can be created later, in case the UI is captured.
970  CallStartAction();
971  }
972 }
973 
974 // is called for every token that is recognised in CallParser
976 {
977  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
978  || 1 == m_xDoc->getReferenceCount() )
979  {
980  // Was the import cancelled by SFX? If a pending stack
981  // exists, clean it.
982  eState = SvParserState::Error;
983  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
984  "SwHTMLParser::NextToken: Pending-Stack without token" );
985  if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
986  return ;
987  }
988 
989 #if OSL_DEBUG_LEVEL > 0
990  if( !m_vPendingStack.empty() )
991  {
992  switch( nToken )
993  {
994  // tables are read by recursive method calls
995  case HtmlTokenId::TABLE_ON:
996  // For CSS declarations we might have to wait
997  // for a file download to finish
998  case HtmlTokenId::LINK:
999  // For controls we might have to set the size.
1000  case HtmlTokenId::INPUT:
1001  case HtmlTokenId::TEXTAREA_ON:
1002  case HtmlTokenId::SELECT_ON:
1003  case HtmlTokenId::SELECT_OFF:
1004  break;
1005  default:
1006  OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
1007  break;
1008  }
1009  }
1010 #endif
1011 
1012  // The following special cases have to be treated before the
1013  // filter detection, because Netscape doesn't reference the content
1014  // of the title for filter detection either.
1015  if( m_vPendingStack.empty() )
1016  {
1017  if( m_bInTitle )
1018  {
1019  switch( nToken )
1020  {
1021  case HtmlTokenId::TITLE_OFF:
1022  {
1023  OUString sTitle = m_sTitle.makeStringAndClear();
1024  if( IsNewDoc() && !sTitle.isEmpty() )
1025  {
1026  if( m_xDoc->GetDocShell() ) {
1027  uno::Reference<document::XDocumentPropertiesSupplier>
1028  xDPS(m_xDoc->GetDocShell()->GetModel(),
1029  uno::UNO_QUERY_THROW);
1030  uno::Reference<document::XDocumentProperties> xDocProps(
1031  xDPS->getDocumentProperties());
1032  OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1033  if (xDocProps.is()) {
1034  xDocProps->setTitle(sTitle);
1035  }
1036 
1037  m_xDoc->GetDocShell()->SetTitle(sTitle);
1038  }
1039  }
1040  m_bInTitle = false;
1041  break;
1042  }
1043 
1044  case HtmlTokenId::NONBREAKSPACE:
1045  m_sTitle.append(" ");
1046  break;
1047 
1048  case HtmlTokenId::SOFTHYPH:
1049  m_sTitle.append("-");
1050  break;
1051 
1052  case HtmlTokenId::TEXTTOKEN:
1053  m_sTitle.append(aToken);
1054  break;
1055 
1056  default:
1057  m_sTitle.append("<");
1058  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1059  m_sTitle.append("/");
1060  m_sTitle.append(sSaveToken);
1061  if( !aToken.isEmpty() )
1062  {
1063  m_sTitle.append(" ");
1064  m_sTitle.append(aToken);
1065  }
1066  m_sTitle.append(">");
1067  break;
1068  }
1069 
1070  return;
1071  }
1072  }
1073 
1074  // Find out what type of document it is if we don't know already.
1075  // For Controls this has to be finished before the control is inserted
1076  // because for inserting a View is needed.
1077  if( !m_bDocInitalized )
1078  DocumentDetected();
1079 
1080  bool bGetIDOption = false, bInsertUnknown = false;
1081  bool bUpperSpaceSave = m_bUpperSpace;
1082  m_bUpperSpace = false;
1083 
1084  // The following special cases may or have to be treated after the
1085  // filter detection
1086  if( m_vPendingStack.empty() )
1087  {
1088  if( m_bInFloatingFrame )
1089  {
1090  // <SCRIPT> is ignored here (from us), because it is ignored in
1091  // Applets as well
1092  if( HtmlTokenId::IFRAME_OFF == nToken )
1093  {
1094  m_bCallNextToken = false;
1095  m_bInFloatingFrame = false;
1096  }
1097 
1098  return;
1099  }
1100  else if( m_bInNoEmbed )
1101  {
1102  switch( nToken )
1103  {
1104  case HtmlTokenId::NOEMBED_OFF:
1107  m_aContents.clear();
1108  m_bCallNextToken = false;
1109  m_bInNoEmbed = false;
1110  break;
1111 
1112  case HtmlTokenId::RAWDATA:
1114  break;
1115 
1116  default:
1117  OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1118  break;
1119  }
1120 
1121  return;
1122  }
1123  else if( m_pAppletImpl )
1124  {
1125  // in an applet only <PARAM> tags and the </APPLET> tag
1126  // are of interest for us (for the moment)
1127  // <SCRIPT> is ignored here (from Netscape)!
1128 
1129  switch( nToken )
1130  {
1131  case HtmlTokenId::APPLET_OFF:
1132  m_bCallNextToken = false;
1133  EndApplet();
1134  break;
1135  case HtmlTokenId::OBJECT_OFF:
1136  m_bCallNextToken = false;
1137  EndObject();
1138  break;
1139  case HtmlTokenId::PARAM:
1140  InsertParam();
1141  break;
1142  default: break;
1143  }
1144 
1145  return;
1146  }
1147  else if( m_bTextArea )
1148  {
1149  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1150  // <SCRIPT> is ignored here (from Netscape)!
1151 
1152  switch( nToken )
1153  {
1154  case HtmlTokenId::TEXTAREA_OFF:
1155  m_bCallNextToken = false;
1156  EndTextArea();
1157  break;
1158 
1159  default:
1160  InsertTextAreaText( nToken );
1161  break;
1162  }
1163 
1164  return;
1165  }
1166  else if( m_bSelect )
1167  {
1168  // HAS to be treated after bNoScript!
1169  switch( nToken )
1170  {
1171  case HtmlTokenId::SELECT_OFF:
1172  m_bCallNextToken = false;
1173  EndSelect();
1174  return;
1175 
1176  case HtmlTokenId::OPTION:
1178  return;
1179 
1180  case HtmlTokenId::TEXTTOKEN:
1181  InsertSelectText();
1182  return;
1183 
1184  case HtmlTokenId::INPUT:
1185  case HtmlTokenId::SCRIPT_ON:
1186  case HtmlTokenId::SCRIPT_OFF:
1187  case HtmlTokenId::NOSCRIPT_ON:
1188  case HtmlTokenId::NOSCRIPT_OFF:
1189  case HtmlTokenId::RAWDATA:
1190  // treat in normal switch
1191  break;
1192 
1193  default:
1194  // ignore
1195  return;
1196  }
1197  }
1198  else if( m_pMarquee )
1199  {
1200  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1201  // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1202  // script.
1203  switch( nToken )
1204  {
1205  case HtmlTokenId::MARQUEE_OFF:
1206  m_bCallNextToken = false;
1207  EndMarquee();
1208  break;
1209 
1210  case HtmlTokenId::TEXTTOKEN:
1212  break;
1213  default: break;
1214  }
1215 
1216  return;
1217  }
1218  else if( m_bInField )
1219  {
1220  switch( nToken )
1221  {
1222  case HtmlTokenId::SDFIELD_OFF:
1223  m_bCallNextToken = false;
1224  EndField();
1225  break;
1226 
1227  case HtmlTokenId::TEXTTOKEN:
1228  InsertFieldText();
1229  break;
1230  default: break;
1231  }
1232 
1233  return;
1234  }
1236  {
1237  switch( nToken )
1238  {
1239  case HtmlTokenId::ANCHOR_OFF:
1240  EndAnchor();
1241  m_bCallNextToken = false;
1242  break;
1243 
1244  case HtmlTokenId::TEXTTOKEN:
1246  break;
1247  default: break;
1248  }
1249  return;
1250  }
1251  else if( !m_aUnknownToken.isEmpty() )
1252  {
1253  // Paste content of unknown tags.
1254  // (but surely if we are not in the header section) fdo#36080 fdo#34666
1255  if (!aToken.isEmpty() && !IsInHeader() )
1256  {
1257  if( !m_bDocInitalized )
1258  DocumentDetected();
1259  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1260 
1261  // if there are temporary paragraph attributes and the
1262  // paragraph isn't empty then the paragraph attributes
1263  // are final.
1264  m_aParaAttrs.clear();
1265 
1266  SetAttr();
1267  }
1268 
1269  // Unknown token in the header are only closed by a matching
1270  // end-token, </HEAD> or <BODY>. Text inside is ignored.
1271  switch( nToken )
1272  {
1273  case HtmlTokenId::UNKNOWNCONTROL_OFF:
1274  if( m_aUnknownToken != sSaveToken )
1275  return;
1276  [[fallthrough]];
1277  case HtmlTokenId::FRAMESET_ON:
1278  case HtmlTokenId::HEAD_OFF:
1279  case HtmlTokenId::BODY_ON:
1280  case HtmlTokenId::IMAGE: // Don't know why Netscape acts this way.
1281  m_aUnknownToken.clear();
1282  break;
1283  case HtmlTokenId::TEXTTOKEN:
1284  return;
1285  default:
1286  m_aUnknownToken.clear();
1287  break;
1288  }
1289  }
1290  }
1291 
1292  switch( nToken )
1293  {
1294  case HtmlTokenId::BODY_ON:
1295  if (!m_bBodySeen)
1296  {
1297  m_bBodySeen = true;
1298  if( !m_aStyleSource.isEmpty() )
1299  {
1300  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1301  m_aStyleSource.clear();
1302  }
1303  if( IsNewDoc() )
1304  {
1306  // If there is a template for the first or the right page,
1307  // it is set here.
1308  const SwPageDesc *pPageDesc = nullptr;
1309  if( m_pCSS1Parser->IsSetFirstPageDesc() )
1310  pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1311  else if( m_pCSS1Parser->IsSetRightPageDesc() )
1312  pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1313 
1314  if( pPageDesc )
1315  {
1316  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1317  }
1318  }
1319  }
1320  break;
1321 
1322  case HtmlTokenId::LINK:
1323  InsertLink();
1324  break;
1325 
1326  case HtmlTokenId::BASE:
1327  {
1328  const HTMLOptions& rHTMLOptions = GetOptions();
1329  for (size_t i = rHTMLOptions.size(); i; )
1330  {
1331  const HTMLOption& rOption = rHTMLOptions[--i];
1332  switch( rOption.GetToken() )
1333  {
1334  case HtmlOptionId::HREF:
1335  m_sBaseURL = rOption.GetString();
1336  break;
1337  case HtmlOptionId::TARGET:
1338  if( IsNewDoc() )
1339  {
1340  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1341  OSL_ENSURE(pDocShell, "no SwDocShell");
1342  if (pDocShell) {
1343  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1344  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1345  uno::Reference<document::XDocumentProperties>
1346  xDocProps(xDPS->getDocumentProperties());
1347  OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1348  if (xDocProps.is()) {
1349  xDocProps->setDefaultTarget(
1350  rOption.GetString());
1351  }
1352  }
1353  }
1354  break;
1355  default: break;
1356  }
1357  }
1358  }
1359  break;
1360 
1361  case HtmlTokenId::META:
1362  {
1363  SvKeyValueIterator *pHTTPHeader = nullptr;
1364  if( IsNewDoc() )
1365  {
1366  SwDocShell *pDocSh = m_xDoc->GetDocShell();
1367  if( pDocSh )
1368  pHTTPHeader = pDocSh->GetHeaderAttributes();
1369  }
1370  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1371  OSL_ENSURE(pDocShell, "no SwDocShell");
1372  if (pDocShell)
1373  {
1374  uno::Reference<document::XDocumentProperties> xDocProps;
1375  if (IsNewDoc())
1376  {
1377  const uno::Reference<document::XDocumentPropertiesSupplier>
1378  xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1379  xDocProps = xDPS->getDocumentProperties();
1380  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1381  }
1382  ParseMetaOptions( xDocProps, pHTTPHeader );
1383  }
1384  }
1385  break;
1386 
1387  case HtmlTokenId::TITLE_ON:
1388  m_bInTitle = true;
1389  break;
1390 
1391  case HtmlTokenId::SCRIPT_ON:
1392  NewScript();
1393  break;
1394 
1395  case HtmlTokenId::SCRIPT_OFF:
1396  EndScript();
1397  break;
1398 
1399  case HtmlTokenId::NOSCRIPT_ON:
1400  case HtmlTokenId::NOSCRIPT_OFF:
1401  bInsertUnknown = true;
1402  break;
1403 
1404  case HtmlTokenId::STYLE_ON:
1405  NewStyle();
1406  break;
1407 
1408  case HtmlTokenId::STYLE_OFF:
1409  EndStyle();
1410  break;
1411 
1412  case HtmlTokenId::RAWDATA:
1413  if( !m_bIgnoreRawData )
1414  {
1415  if( IsReadScript() )
1416  {
1417  AddScriptSource();
1418  }
1419  else if( IsReadStyle() )
1420  {
1421  if( !m_aStyleSource.isEmpty() )
1422  m_aStyleSource += "\n";
1423  m_aStyleSource += aToken;
1424  }
1425  }
1426  break;
1427 
1428  case HtmlTokenId::OBJECT_ON:
1429  if (m_bXHTML)
1430  {
1431  if (!InsertEmbed())
1432  InsertImage();
1433  break;
1434  }
1435 #if HAVE_FEATURE_JAVA
1436  NewObject();
1437  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1438 #endif
1439  break;
1440 
1441  case HtmlTokenId::OBJECT_OFF:
1442  if (!m_aEmbeds.empty())
1443  m_aEmbeds.pop();
1444  break;
1445 
1446  case HtmlTokenId::APPLET_ON:
1447 #if HAVE_FEATURE_JAVA
1448  InsertApplet();
1449  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1450 #endif
1451  break;
1452 
1453  case HtmlTokenId::IFRAME_ON:
1456  break;
1457 
1458  case HtmlTokenId::LINEBREAK:
1459  if( !IsReadPRE() )
1460  {
1461  InsertLineBreak();
1462  break;
1463  }
1464  else
1465  bGetIDOption = true;
1466  // <BR>s in <PRE> resemble true LFs, hence no break
1467  [[fallthrough]];
1468 
1469  case HtmlTokenId::NEWPARA:
1470  // CR in PRE/LISTING/XMP
1471  {
1472  if( HtmlTokenId::NEWPARA==nToken ||
1474  {
1475  AppendTextNode(); // there is no LF at this place
1476  // therefore it will cause no problems
1477  SetTextCollAttrs();
1478  }
1479  // progress bar
1480  if (m_xProgress)
1481  m_xProgress->Update(rInput.Tell());
1482  }
1483  break;
1484 
1485  case HtmlTokenId::NONBREAKSPACE:
1486  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1487  break;
1488 
1489  case HtmlTokenId::SOFTHYPH:
1490  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1491  break;
1492 
1493  case HtmlTokenId::LINEFEEDCHAR:
1494  if( m_pPam->GetPoint()->nContent.GetIndex() )
1495  AppendTextNode();
1496  if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode))
1497  {
1498  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1499  EndAttr( m_xAttrTab->pBreak, false );
1500  }
1501  break;
1502 
1503  case HtmlTokenId::TEXTTOKEN:
1504  // insert string without spanning attributes at the end.
1505  if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1506  {
1507  sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
1508  const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
1509  if (pTextNode)
1510  {
1511  const OUString& rText = pTextNode->GetText();
1512  sal_Unicode cLast = rText[--nPos];
1513  if( ' ' == cLast || '\x0a' == cLast)
1514  aToken = aToken.copy(1);
1515  }
1516  else
1517  aToken = aToken.copy(1);
1518 
1519  if( aToken.isEmpty() )
1520  {
1521  m_bUpperSpace = bUpperSpaceSave;
1522  break;
1523  }
1524  }
1525 
1526  if( !aToken.isEmpty() )
1527  {
1528  if( !m_bDocInitalized )
1529  DocumentDetected();
1530 
1531  if (!m_aEmbeds.empty())
1532  {
1533  // The text token is inside an OLE object, which means
1534  // alternate text.
1535  SwOLENode* pOLENode = m_aEmbeds.top();
1536  if (SwFlyFrameFormat* pFormat
1537  = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1538  {
1540  {
1541  pObject->SetTitle(pObject->GetTitle() + aToken);
1542  break;
1543  }
1544  }
1545  }
1546 
1547  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1548 
1549  // if there are temporary paragraph attributes and the
1550  // paragraph isn't empty then the paragraph attributes
1551  // are final.
1552  m_aParaAttrs.clear();
1553 
1554  SetAttr();
1555  }
1556  break;
1557 
1558  case HtmlTokenId::HORZRULE:
1559  InsertHorzRule();
1560  break;
1561 
1562  case HtmlTokenId::IMAGE:
1563  InsertImage();
1564  // if only the parser references the doc, we can break and set
1565  // an error code
1566  if( 1 == m_xDoc->getReferenceCount() )
1567  {
1568  eState = SvParserState::Error;
1569  }
1570  break;
1571 
1572  case HtmlTokenId::SPACER:
1573  InsertSpacer();
1574  break;
1575 
1576  case HtmlTokenId::EMBED:
1577  InsertEmbed();
1578  break;
1579 
1580  case HtmlTokenId::NOEMBED_ON:
1581  m_bInNoEmbed = true;
1582  m_bCallNextToken = bool(m_xTable);
1583  ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1584  break;
1585 
1586  case HtmlTokenId::DEFLIST_ON:
1587  if( m_nOpenParaToken != HtmlTokenId::NONE )
1588  EndPara();
1589  NewDefList();
1590  break;
1591  case HtmlTokenId::DEFLIST_OFF:
1592  if( m_nOpenParaToken != HtmlTokenId::NONE )
1593  EndPara();
1594  EndDefListItem( HtmlTokenId::NONE );
1595  EndDefList();
1596  break;
1597 
1598  case HtmlTokenId::DD_ON:
1599  case HtmlTokenId::DT_ON:
1600  if( m_nOpenParaToken != HtmlTokenId::NONE )
1601  EndPara();
1602  EndDefListItem();// close <DD>/<DT> and set no template
1603  NewDefListItem( nToken );
1604  break;
1605 
1606  case HtmlTokenId::DD_OFF:
1607  case HtmlTokenId::DT_OFF:
1608  // c.f. HtmlTokenId::LI_OFF
1609  // Actually we should close a DD/DT now.
1610  // But neither Netscape nor Microsoft do this and so don't we.
1611  EndDefListItem( nToken );
1612  break;
1613 
1614  // divisions
1615  case HtmlTokenId::DIVISION_ON:
1616  case HtmlTokenId::CENTER_ON:
1617  if (!m_isInTableStructure)
1618  {
1619  if (m_nOpenParaToken != HtmlTokenId::NONE)
1620  {
1621  if (IsReadPRE())
1622  m_nOpenParaToken = HtmlTokenId::NONE;
1623  else
1624  EndPara();
1625  }
1626  NewDivision( nToken );
1627  }
1628  break;
1629 
1630  case HtmlTokenId::DIVISION_OFF:
1631  case HtmlTokenId::CENTER_OFF:
1632  if (!m_isInTableStructure)
1633  {
1634  if (m_nOpenParaToken != HtmlTokenId::NONE)
1635  {
1636  if (IsReadPRE())
1637  m_nOpenParaToken = HtmlTokenId::NONE;
1638  else
1639  EndPara();
1640  }
1641  EndDivision();
1642  }
1643  break;
1644 
1645  case HtmlTokenId::MULTICOL_ON:
1646  if( m_nOpenParaToken != HtmlTokenId::NONE )
1647  EndPara();
1648  NewMultiCol();
1649  break;
1650 
1651  case HtmlTokenId::MULTICOL_OFF:
1652  if( m_nOpenParaToken != HtmlTokenId::NONE )
1653  EndPara();
1654  EndTag( HtmlTokenId::MULTICOL_ON );
1655  break;
1656 
1657  case HtmlTokenId::MARQUEE_ON:
1658  NewMarquee();
1659  m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1660  break;
1661 
1662  case HtmlTokenId::FORM_ON:
1663  NewForm();
1664  break;
1665  case HtmlTokenId::FORM_OFF:
1666  EndForm();
1667  break;
1668 
1669  // templates
1670  case HtmlTokenId::PARABREAK_ON:
1671  if( m_nOpenParaToken != HtmlTokenId::NONE )
1672  EndPara( true );
1673  NewPara();
1674  break;
1675 
1676  case HtmlTokenId::PARABREAK_OFF:
1677  EndPara( true );
1678  break;
1679 
1680  case HtmlTokenId::ADDRESS_ON:
1681  if( m_nOpenParaToken != HtmlTokenId::NONE )
1682  EndPara();
1683  NewTextFormatColl(HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SEND_ADDRESS);
1684  break;
1685 
1686  case HtmlTokenId::ADDRESS_OFF:
1687  if( m_nOpenParaToken != HtmlTokenId::NONE )
1688  EndPara();
1689  EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1690  break;
1691 
1692  case HtmlTokenId::BLOCKQUOTE_ON:
1693  case HtmlTokenId::BLOCKQUOTE30_ON:
1694  if( m_nOpenParaToken != HtmlTokenId::NONE )
1695  EndPara();
1696  NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1697  break;
1698 
1699  case HtmlTokenId::BLOCKQUOTE_OFF:
1700  case HtmlTokenId::BLOCKQUOTE30_OFF:
1701  if( m_nOpenParaToken != HtmlTokenId::NONE )
1702  EndPara();
1703  EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1704  break;
1705 
1706  case HtmlTokenId::PREFORMTXT_ON:
1707  case HtmlTokenId::LISTING_ON:
1708  case HtmlTokenId::XMP_ON:
1709  if( m_nOpenParaToken != HtmlTokenId::NONE )
1710  EndPara();
1712  break;
1713 
1714  case HtmlTokenId::PREFORMTXT_OFF:
1715  m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1716  EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1717  break;
1718 
1719  case HtmlTokenId::LISTING_OFF:
1720  case HtmlTokenId::XMP_OFF:
1721  EndTextFormatColl( nToken );
1722  break;
1723 
1724  case HtmlTokenId::HEAD1_ON:
1725  case HtmlTokenId::HEAD2_ON:
1726  case HtmlTokenId::HEAD3_ON:
1727  case HtmlTokenId::HEAD4_ON:
1728  case HtmlTokenId::HEAD5_ON:
1729  case HtmlTokenId::HEAD6_ON:
1730  if( m_nOpenParaToken != HtmlTokenId::NONE )
1731  {
1732  if( IsReadPRE() )
1733  m_nOpenParaToken = HtmlTokenId::NONE;
1734  else
1735  EndPara();
1736  }
1737  NewHeading( nToken );
1738  break;
1739 
1740  case HtmlTokenId::HEAD1_OFF:
1741  case HtmlTokenId::HEAD2_OFF:
1742  case HtmlTokenId::HEAD3_OFF:
1743  case HtmlTokenId::HEAD4_OFF:
1744  case HtmlTokenId::HEAD5_OFF:
1745  case HtmlTokenId::HEAD6_OFF:
1746  EndHeading();
1747  break;
1748 
1749  case HtmlTokenId::TABLE_ON:
1750  if( !m_vPendingStack.empty() )
1751  BuildTable( SvxAdjust::End );
1752  else
1753  {
1754  if( m_nOpenParaToken != HtmlTokenId::NONE )
1755  EndPara();
1756  OSL_ENSURE(!m_xTable, "table in table not allowed here");
1757  if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) &&
1758  (m_pPam->GetPoint()->nNode.GetIndex() >
1759  m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1761  {
1762  if ( m_nParaCnt < 5 )
1763  Show(); // show what we have up to here
1764 
1765  SvxAdjust eAdjust = m_xAttrTab->pAdjust
1766  ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1767  GetAdjust()
1768  : SvxAdjust::End;
1769  BuildTable( eAdjust );
1770  }
1771  else
1772  bInsertUnknown = m_bKeepUnknown;
1773  }
1774  break;
1775 
1776  // lists
1777  case HtmlTokenId::DIRLIST_ON:
1778  case HtmlTokenId::MENULIST_ON:
1779  case HtmlTokenId::ORDERLIST_ON:
1780  case HtmlTokenId::UNORDERLIST_ON:
1781  if( m_nOpenParaToken != HtmlTokenId::NONE )
1782  EndPara();
1783  NewNumberBulletList( nToken );
1784  break;
1785 
1786  case HtmlTokenId::DIRLIST_OFF:
1787  case HtmlTokenId::MENULIST_OFF:
1788  case HtmlTokenId::ORDERLIST_OFF:
1789  case HtmlTokenId::UNORDERLIST_OFF:
1790  if( m_nOpenParaToken != HtmlTokenId::NONE )
1791  EndPara();
1792  EndNumberBulletListItem( HtmlTokenId::NONE, true );
1793  EndNumberBulletList( nToken );
1794  break;
1795 
1796  case HtmlTokenId::LI_ON:
1797  case HtmlTokenId::LISTHEADER_ON:
1798  if( m_nOpenParaToken != HtmlTokenId::NONE &&
1800  || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1801  {
1802  // only finish paragraph for <P><LI>, not for <DD><LI>
1803  EndPara();
1804  }
1805 
1806  EndNumberBulletListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1807  NewNumberBulletListItem( nToken );
1808  break;
1809 
1810  case HtmlTokenId::LI_OFF:
1811  case HtmlTokenId::LISTHEADER_OFF:
1812  EndNumberBulletListItem( nToken, false );
1813  break;
1814 
1815  // Attribute :
1816  case HtmlTokenId::ITALIC_ON:
1817  {
1821  NewStdAttr( HtmlTokenId::ITALIC_ON,
1822  &m_xAttrTab->pItalic, aPosture,
1823  &m_xAttrTab->pItalicCJK, &aPostureCJK,
1824  &m_xAttrTab->pItalicCTL, &aPostureCTL );
1825  }
1826  break;
1827 
1828  case HtmlTokenId::BOLD_ON:
1829  {
1833  NewStdAttr( HtmlTokenId::BOLD_ON,
1834  &m_xAttrTab->pBold, aWeight,
1835  &m_xAttrTab->pBoldCJK, &aWeightCJK,
1836  &m_xAttrTab->pBoldCTL, &aWeightCTL );
1837  }
1838  break;
1839 
1840  case HtmlTokenId::STRIKE_ON:
1841  case HtmlTokenId::STRIKETHROUGH_ON:
1842  {
1843  NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1845  }
1846  break;
1847 
1848  case HtmlTokenId::UNDERLINE_ON:
1849  {
1850  NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1852  }
1853  break;
1854 
1855  case HtmlTokenId::SUPERSCRIPT_ON:
1856  {
1857  NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1859  }
1860  break;
1861 
1862  case HtmlTokenId::SUBSCRIPT_ON:
1863  {
1864  NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1866  }
1867  break;
1868 
1869  case HtmlTokenId::BLINK_ON:
1870  {
1871  NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1872  SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1873  }
1874  break;
1875 
1876  case HtmlTokenId::SPAN_ON:
1877  NewStdAttr( HtmlTokenId::SPAN_ON );
1878  break;
1879 
1880  case HtmlTokenId::ITALIC_OFF:
1881  case HtmlTokenId::BOLD_OFF:
1882  case HtmlTokenId::STRIKE_OFF:
1883  case HtmlTokenId::UNDERLINE_OFF:
1884  case HtmlTokenId::SUPERSCRIPT_OFF:
1885  case HtmlTokenId::SUBSCRIPT_OFF:
1886  case HtmlTokenId::BLINK_OFF:
1887  case HtmlTokenId::SPAN_OFF:
1888  EndTag( nToken );
1889  break;
1890 
1891  case HtmlTokenId::STRIKETHROUGH_OFF:
1892  EndTag( HtmlTokenId::STRIKE_OFF );
1893  break;
1894 
1895  case HtmlTokenId::BASEFONT_ON:
1896  NewBasefontAttr();
1897  break;
1898  case HtmlTokenId::BASEFONT_OFF:
1899  EndBasefontAttr();
1900  break;
1901  case HtmlTokenId::FONT_ON:
1902  case HtmlTokenId::BIGPRINT_ON:
1903  case HtmlTokenId::SMALLPRINT_ON:
1904  NewFontAttr( nToken );
1905  break;
1906  case HtmlTokenId::FONT_OFF:
1907  case HtmlTokenId::BIGPRINT_OFF:
1908  case HtmlTokenId::SMALLPRINT_OFF:
1909  EndFontAttr( nToken );
1910  break;
1911 
1912  case HtmlTokenId::EMPHASIS_ON:
1913  case HtmlTokenId::CITIATION_ON:
1914  case HtmlTokenId::STRONG_ON:
1915  case HtmlTokenId::CODE_ON:
1916  case HtmlTokenId::SAMPLE_ON:
1917  case HtmlTokenId::KEYBOARD_ON:
1918  case HtmlTokenId::VARIABLE_ON:
1919  case HtmlTokenId::DEFINSTANCE_ON:
1920  case HtmlTokenId::SHORTQUOTE_ON:
1921  case HtmlTokenId::LANGUAGE_ON:
1922  case HtmlTokenId::AUTHOR_ON:
1923  case HtmlTokenId::PERSON_ON:
1924  case HtmlTokenId::ACRONYM_ON:
1925  case HtmlTokenId::ABBREVIATION_ON:
1926  case HtmlTokenId::INSERTEDTEXT_ON:
1927  case HtmlTokenId::DELETEDTEXT_ON:
1928 
1929  case HtmlTokenId::TELETYPE_ON:
1930  NewCharFormat( nToken );
1931  break;
1932 
1933  case HtmlTokenId::SDFIELD_ON:
1934  NewField();
1936  break;
1937 
1938  case HtmlTokenId::EMPHASIS_OFF:
1939  case HtmlTokenId::CITIATION_OFF:
1940  case HtmlTokenId::STRONG_OFF:
1941  case HtmlTokenId::CODE_OFF:
1942  case HtmlTokenId::SAMPLE_OFF:
1943  case HtmlTokenId::KEYBOARD_OFF:
1944  case HtmlTokenId::VARIABLE_OFF:
1945  case HtmlTokenId::DEFINSTANCE_OFF:
1946  case HtmlTokenId::SHORTQUOTE_OFF:
1947  case HtmlTokenId::LANGUAGE_OFF:
1948  case HtmlTokenId::AUTHOR_OFF:
1949  case HtmlTokenId::PERSON_OFF:
1950  case HtmlTokenId::ACRONYM_OFF:
1951  case HtmlTokenId::ABBREVIATION_OFF:
1952  case HtmlTokenId::INSERTEDTEXT_OFF:
1953  case HtmlTokenId::DELETEDTEXT_OFF:
1954 
1955  case HtmlTokenId::TELETYPE_OFF:
1956  EndTag( nToken );
1957  break;
1958 
1959  case HtmlTokenId::HEAD_OFF:
1960  if( !m_aStyleSource.isEmpty() )
1961  {
1962  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1963  m_aStyleSource.clear();
1964  }
1965  break;
1966 
1967  case HtmlTokenId::DOCTYPE:
1968  case HtmlTokenId::BODY_OFF:
1969  case HtmlTokenId::HTML_OFF:
1970  case HtmlTokenId::HEAD_ON:
1971  case HtmlTokenId::TITLE_OFF:
1972  break; // don't evaluate further???
1973  case HtmlTokenId::HTML_ON:
1974  {
1975  const HTMLOptions& rHTMLOptions = GetOptions();
1976  for (size_t i = rHTMLOptions.size(); i; )
1977  {
1978  const HTMLOption& rOption = rHTMLOptions[--i];
1979  if( HtmlOptionId::DIR == rOption.GetToken() )
1980  {
1981  const OUString& rDir = rOption.GetString();
1982  SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
1983  m_pCSS1Parser->GetWhichMap() );
1984  SvxCSS1PropertyInfo aPropInfo;
1985  OUString aDummy;
1986  ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
1987  aPropInfo, nullptr, &rDir );
1988 
1989  m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
1990  break;
1991  }
1992  }
1993  }
1994  break;
1995 
1996  case HtmlTokenId::INPUT:
1997  InsertInput();
1998  break;
1999 
2000  case HtmlTokenId::TEXTAREA_ON:
2001  NewTextArea();
2003  break;
2004 
2005  case HtmlTokenId::SELECT_ON:
2006  NewSelect();
2008  break;
2009 
2010  case HtmlTokenId::ANCHOR_ON:
2011  NewAnchor();
2012  break;
2013 
2014  case HtmlTokenId::ANCHOR_OFF:
2015  EndAnchor();
2016  break;
2017 
2018  case HtmlTokenId::COMMENT:
2019  if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2020  {
2021  // insert as Post-It
2022  // If there are no space characters right behind
2023  // the <!-- and on front of the -->, leave the comment untouched.
2024  if( ' ' == aToken[ 3 ] &&
2025  ' ' == aToken[ aToken.getLength()-3 ] )
2026  {
2027  OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) );
2028  InsertComment(comphelper::string::strip(aComment, ' '));
2029  }
2030  else
2031  {
2032  OUString aComment = "<" + aToken + ">";
2033  InsertComment( aComment );
2034  }
2035  }
2036  break;
2037 
2038  case HtmlTokenId::MAP_ON:
2039  // Image Maps are read asynchronously: At first only an image map is created
2040  // Areas are processed later. Nevertheless the
2041  // ImageMap is inserted into the IMap-Array, because it might be used
2042  // already.
2043  m_pImageMap = new ImageMap;
2045  {
2046  if (!m_pImageMaps)
2047  m_pImageMaps.reset( new ImageMaps );
2048  m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2049  }
2050  else
2051  {
2052  delete m_pImageMap;
2053  m_pImageMap = nullptr;
2054  }
2055  break;
2056 
2057  case HtmlTokenId::MAP_OFF:
2058  // there is no ImageMap anymore (don't delete IMap, because it's
2059  // already contained in the array!)
2060  m_pImageMap = nullptr;
2061  break;
2062 
2063  case HtmlTokenId::AREA:
2064  if( m_pImageMap )
2065  ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2066  SvMacroItemId::OnMouseOut );
2067  break;
2068 
2069  case HtmlTokenId::FRAMESET_ON:
2070  bInsertUnknown = m_bKeepUnknown;
2071  break;
2072 
2073  case HtmlTokenId::NOFRAMES_ON:
2074  if( IsInHeader() )
2075  FinishHeader();
2076  bInsertUnknown = m_bKeepUnknown;
2077  break;
2078 
2079  case HtmlTokenId::UNKNOWNCONTROL_ON:
2080  // Ignore content of unknown token in the header, if the token
2081  // does not start with a '!'.
2082  // (but judging from the code, also if does not start with a '%')
2083  // (and also if we're not somewhere we consider PRE)
2084  if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2085  !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2086  '%' != sSaveToken[0] )
2087  m_aUnknownToken = sSaveToken;
2088  [[fallthrough]];
2089 
2090  default:
2091  bInsertUnknown = m_bKeepUnknown;
2092  break;
2093  }
2094 
2095  if( bGetIDOption )
2096  InsertIDOption();
2097 
2098  if( bInsertUnknown )
2099  {
2100  OUStringBuffer aComment("HTML: <");
2101  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2102  aComment.append("/");
2103  aComment.append(sSaveToken);
2104  if( !aToken.isEmpty() )
2105  {
2106  UnescapeToken();
2107  aComment.append(" ").append(aToken);
2108  }
2109  aComment.append(">");
2110  InsertComment( aComment.makeStringAndClear() );
2111  }
2112 
2113  // if there are temporary paragraph attributes and the
2114  // paragraph isn't empty then the paragraph attributes are final.
2115  if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() )
2116  m_aParaAttrs.clear();
2117 }
2118 
2119 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2120  bool& rScriptDependent,
2121  sal_uInt16& rScriptType )
2122 {
2123  switch( rAttr.GetItem().Which() )
2124  {
2125  case RES_CHRATR_FONT:
2126  case RES_CHRATR_FONTSIZE:
2127  case RES_CHRATR_LANGUAGE:
2128  case RES_CHRATR_POSTURE:
2129  case RES_CHRATR_WEIGHT:
2130  rScriptType = i18n::ScriptType::LATIN;
2131  rScriptDependent = true;
2132  break;
2133  case RES_CHRATR_CJK_FONT:
2137  case RES_CHRATR_CJK_WEIGHT:
2138  rScriptType = i18n::ScriptType::ASIAN;
2139  rScriptDependent = true;
2140  break;
2141  case RES_CHRATR_CTL_FONT:
2145  case RES_CHRATR_CTL_WEIGHT:
2146  rScriptType = i18n::ScriptType::COMPLEX;
2147  rScriptDependent = true;
2148  break;
2149  default:
2150  rScriptDependent = false;
2151  break;
2152  }
2153 }
2154 
2155 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2156 {
2157  // A hard line break at the end always must be removed.
2158  // A second one we replace with paragraph spacing.
2159  sal_Int32 nLFStripped = StripTrailingLF();
2160  if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2161  eMode = AM_SPACE;
2162 
2163  // the hard attributes of this paragraph will never be invalid again
2164  m_aParaAttrs.clear();
2165 
2166  SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2167  m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
2168 
2169  if (pTextNode)
2170  {
2171  const SvxULSpaceItem& rULSpace =
2172  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2173 
2174  bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2175  : rULSpace.GetLower() == 0;
2176 
2177  if( bChange )
2178  {
2179  const SvxULSpaceItem& rCollULSpace =
2180  pTextNode->GetAnyFormatColl().GetULSpace();
2181 
2182  bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2183  : rCollULSpace.GetLower() > 0;
2184 
2185  if( bMayReset &&
2186  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2187  {
2188  pTextNode->ResetAttr( RES_UL_SPACE );
2189  }
2190  else
2191  {
2192  pTextNode->SetAttr(
2193  SvxULSpaceItem( rULSpace.GetUpper(),
2194  AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2195  }
2196  }
2197  }
2198  m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2199 
2200  SwPosition aOldPos( *m_pPam->GetPoint() );
2201 
2202  bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2203 
2204  // split character attributes and maybe set none,
2205  // which are set for the whole paragraph
2206  const SwNodeIndex& rEndIdx = aOldPos.nNode;
2207  const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex();
2208  const SwPosition& rPos = *m_pPam->GetPoint();
2209 
2210  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2211  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2212  {
2213  HTMLAttr *pAttr = *pHTMLAttributes;
2214  if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2215  {
2216  bool bWholePara = false;
2217 
2218  while( pAttr )
2219  {
2220  HTMLAttr *pNext = pAttr->GetNext();
2221  if( pAttr->GetSttParaIdx() < rEndIdx.GetIndex() ||
2222  (!bWholePara &&
2223  pAttr->GetSttPara() == rEndIdx &&
2224  pAttr->GetSttCnt() != nEndCnt) )
2225  {
2226  bWholePara =
2227  pAttr->GetSttPara() == rEndIdx &&
2228  pAttr->GetSttCnt() == 0;
2229 
2230  sal_Int32 nStt = pAttr->m_nStartContent;
2231  bool bScript = false;
2232  sal_uInt16 nScriptItem;
2233  bool bInsert = true;
2234  lcl_swhtml_getItemInfo( *pAttr, bScript,
2235  nScriptItem );
2236  // set previous part
2237  if( bScript )
2238  {
2239  const SwTextNode *pTextNd =
2240  pAttr->GetSttPara().GetNode().GetTextNode();
2241  OSL_ENSURE( pTextNd, "No text node" );
2242  if( pTextNd )
2243  {
2244  const OUString& rText = pTextNd->GetText();
2245  sal_uInt16 nScriptText =
2246  g_pBreakIt->GetBreakIter()->getScriptType(
2247  rText, pAttr->GetSttCnt() );
2248  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2249  ->endOfScript( rText, nStt, nScriptText );
2250  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2251  {
2252  if( nScriptItem == nScriptText )
2253  {
2254  HTMLAttr *pSetAttr =
2255  pAttr->Clone( rEndIdx, nScriptEnd );
2256  pSetAttr->m_nStartContent = nStt;
2257  pSetAttr->ClearPrev();
2258  if( !pNext || bWholePara )
2259  {
2260  if (pSetAttr->m_bInsAtStart)
2261  m_aSetAttrTab.push_front( pSetAttr );
2262  else
2263  m_aSetAttrTab.push_back( pSetAttr );
2264  }
2265  else
2266  pNext->InsertPrev( pSetAttr );
2267  }
2268  nStt = nScriptEnd;
2269  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2270  rText, nStt );
2271  nScriptEnd = g_pBreakIt->GetBreakIter()
2272  ->endOfScript( rText, nStt, nScriptText );
2273  }
2274  bInsert = nScriptItem == nScriptText;
2275  }
2276  }
2277  if( bInsert )
2278  {
2279  HTMLAttr *pSetAttr =
2280  pAttr->Clone( rEndIdx, nEndCnt );
2281  pSetAttr->m_nStartContent = nStt;
2282 
2283  // When the attribute is for the whole paragraph, the outer
2284  // attributes aren't effective anymore. Hence it may not be inserted
2285  // in the Prev-List of an outer attribute, because that won't be
2286  // set. That leads to shifting when fields are used.
2287  if( !pNext || bWholePara )
2288  {
2289  if (pSetAttr->m_bInsAtStart)
2290  m_aSetAttrTab.push_front( pSetAttr );
2291  else
2292  m_aSetAttrTab.push_back( pSetAttr );
2293  }
2294  else
2295  pNext->InsertPrev( pSetAttr );
2296  }
2297  else
2298  {
2299  HTMLAttr *pPrev = pAttr->GetPrev();
2300  if( pPrev )
2301  {
2302  // the previous attributes must be set anyway
2303  if( !pNext || bWholePara )
2304  {
2305  if (pPrev->m_bInsAtStart)
2306  m_aSetAttrTab.push_front( pPrev );
2307  else
2308  m_aSetAttrTab.push_back( pPrev );
2309  }
2310  else
2311  pNext->InsertPrev( pPrev );
2312  }
2313  }
2314  pAttr->ClearPrev();
2315  }
2316 
2317  pAttr->SetStart( rPos );
2318  pAttr = pNext;
2319  }
2320  }
2321  }
2322 
2323  if( bUpdateNum )
2324  {
2325  if( GetNumInfo().GetDepth() )
2326  {
2327  sal_uInt8 nLvl = GetNumInfo().GetLevel();
2328  SetNodeNum( nLvl );
2329  }
2330  else
2332  }
2333 
2334  // We must set the attribute of the paragraph before now (because of JavaScript)
2335  SetAttr();
2336 
2337  // Now it is time to get rid of all script dependent hints that are
2338  // equal to the settings in the style
2339  SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode();
2340  OSL_ENSURE( pTextNd, "There is the txt node" );
2341  size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints())
2342  ? pTextNd->GetSwpHints().Count() : 0;
2343  if( nCntAttr )
2344  {
2345  // These are the end position of all script dependent hints.
2346  // If we find a hint that starts before the current end position,
2347  // we have to set it. If we find a hint that start behind or at
2348  // that position, we have to take the hint value into account.
2349  // If it is equal to the style, or in fact the paragraph value
2350  // for that hint, the hint is removed. Otherwise its end position
2351  // is remembered.
2352  sal_Int32 aEndPos[15] =
2353  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2354  SwpHints& rHints = pTextNd->GetSwpHints();
2355  for( size_t i=0; i < nCntAttr; i++ )
2356  {
2357  SwTextAttr *pHt = rHints.Get( i );
2358  sal_uInt16 nWhich = pHt->Which();
2359  sal_Int16 nIdx = 0;
2360  bool bFont = false;
2361  switch( nWhich )
2362  {
2363  case RES_CHRATR_FONT:
2364  nIdx = 0;
2365  bFont = true;
2366  break;
2367  case RES_CHRATR_FONTSIZE:
2368  nIdx = 1;
2369  break;
2370  case RES_CHRATR_LANGUAGE:
2371  nIdx = 2;
2372  break;
2373  case RES_CHRATR_POSTURE:
2374  nIdx = 3;
2375  break;
2376  case RES_CHRATR_WEIGHT:
2377  nIdx = 4;
2378  break;
2379  case RES_CHRATR_CJK_FONT:
2380  nIdx = 5;
2381  bFont = true;
2382  break;
2384  nIdx = 6;
2385  break;
2387  nIdx = 7;
2388  break;
2390  nIdx = 8;
2391  break;
2392  case RES_CHRATR_CJK_WEIGHT:
2393  nIdx = 9;
2394  break;
2395  case RES_CHRATR_CTL_FONT:
2396  nIdx = 10;
2397  bFont = true;
2398  break;
2400  nIdx = 11;
2401  break;
2403  nIdx = 12;
2404  break;
2406  nIdx = 13;
2407  break;
2408  case RES_CHRATR_CTL_WEIGHT:
2409  nIdx = 14;
2410  break;
2411  default:
2412  // Skip to next attribute
2413  continue;
2414  }
2415  const sal_Int32 nStt = pHt->GetStart();
2416  if( nStt >= aEndPos[nIdx] )
2417  {
2418  const SfxPoolItem& rItem =
2419  static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2420  if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2421  : rItem == pHt->GetAttr() )
2422  {
2423  // The hint is the same as set in the paragraph and
2424  // therefore, it can be deleted
2425  // CAUTION!!! This WILL delete the hint and it MAY
2426  // also delete the SwpHints!!! To avoid any trouble
2427  // we leave the loop immediately if this is the last
2428  // hint.
2429  pTextNd->DeleteAttribute( pHt );
2430  if( 1 == nCntAttr )
2431  break;
2432  i--;
2433  nCntAttr--;
2434  }
2435  else
2436  {
2437  // The hint is different. Therefore all hints within that
2438  // hint have to be ignored.
2439  aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2440  }
2441  }
2442  else
2443  {
2444  // The hint starts before another one ends.
2445  // The hint in this case is not deleted
2446  OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2447  "hints aren't nested properly!" );
2448  }
2449  }
2450  }
2451 
2452  if (!m_xTable && !--m_nParaCnt)
2453  Show();
2454 
2455  return bRet;
2456 }
2457 
2459 {
2460  //If it already has ParSpace, return
2461  if( !m_bNoParSpace )
2462  return;
2463 
2464  m_bNoParSpace = false;
2465 
2466  sal_uLong nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1;
2467 
2468  SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2469  if( !pTextNode )
2470  return;
2471 
2472  SvxULSpaceItem rULSpace =
2473  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2474  if( rULSpace.GetLower() )
2475  return;
2476 
2477  const SvxULSpaceItem& rCollULSpace =
2478  pTextNode->GetAnyFormatColl().GetULSpace();
2479  if( rCollULSpace.GetLower() &&
2480  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2481  {
2482  pTextNode->ResetAttr( RES_UL_SPACE );
2483  }
2484  else
2485  {
2486  //What I do here, is that I examine the attributes, and if
2487  //I find out, that it's CJK/CTL, then I set the paragraph space
2488  //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2489 
2490  bool bIsCJK = false;
2491  bool bIsCTL = false;
2492 
2493  const size_t nCntAttr = pTextNode->GetpSwpHints()
2494  ? pTextNode->GetSwpHints().Count() : 0;
2495 
2496  for(size_t i = 0; i < nCntAttr; ++i)
2497  {
2498  SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2499  sal_uInt16 const nWhich = pHt->Which();
2500  if (RES_CHRATR_CJK_FONT == nWhich ||
2501  RES_CHRATR_CJK_FONTSIZE == nWhich ||
2502  RES_CHRATR_CJK_LANGUAGE == nWhich ||
2503  RES_CHRATR_CJK_POSTURE == nWhich ||
2504  RES_CHRATR_CJK_WEIGHT == nWhich)
2505  {
2506  bIsCJK = true;
2507  break;
2508  }
2509  if (RES_CHRATR_CTL_FONT == nWhich ||
2510  RES_CHRATR_CTL_FONTSIZE == nWhich ||
2511  RES_CHRATR_CTL_LANGUAGE == nWhich ||
2512  RES_CHRATR_CTL_POSTURE == nWhich ||
2513  RES_CHRATR_CTL_WEIGHT == nWhich)
2514  {
2515  bIsCTL = true;
2516  break;
2517  }
2518  }
2519 
2520  if( bIsCTL )
2521  {
2522  pTextNode->SetAttr(
2524  }
2525  else if( bIsCJK )
2526  {
2527  pTextNode->SetAttr(
2529  } else {
2530  pTextNode->SetAttr(
2532  }
2533  }
2534 }
2535 
2537 {
2538  // Here
2539  // - a EndAction is called, so the document is formatted
2540  // - a Reschedule is called,
2541  // - the own View-Shell is set again
2542  // - and a StartAction is called
2543 
2544  OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2545  SwViewShell *pOldVSh = CallEndAction();
2546 
2548 
2549  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2550  || 1 == m_xDoc->getReferenceCount() )
2551  {
2552  // was the import aborted by SFX?
2553  eState = SvParserState::Error;
2554  }
2555 
2556  // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2557  SwViewShell *pVSh = CallStartAction( pOldVSh );
2558 
2559  // is the current node not visible anymore, then we use a bigger increment
2560  if( pVSh )
2561  {
2563  ? 5 : 50;
2564  }
2565 }
2566 
2568 {
2569  // Here
2570  // - a Reschedule is called, so it can be scrolled
2571  // - the own View-Shell is set again
2572  // - a StartAction/EndAction is called, when there was scrolling.
2573 
2574  OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2575 
2576  // scroll bar
2577  if (m_xProgress)
2578  {
2579  m_xProgress->Update(rInput.Tell());
2581  }
2582  else
2583  {
2585 
2586  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2587  || 1 == m_xDoc->getReferenceCount() )
2588  // was the import aborted by SFX?
2589  eState = SvParserState::Error;
2590 
2592  if( pVSh && pVSh->HasInvalidRect() )
2593  {
2594  CallEndAction( false, false );
2595  CallStartAction( pVSh, false );
2596  }
2597  }
2598 }
2599 
2601 {
2602  OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2603 
2604  if( !pVSh || bChkPtr )
2605  {
2606 #if OSL_DEBUG_LEVEL > 0
2607  SwViewShell *pOldVSh = pVSh;
2608 #endif
2609  pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2610 #if OSL_DEBUG_LEVEL > 0
2611  OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2612  if( pOldVSh && !pVSh )
2613  pVSh = nullptr;
2614 #endif
2615  }
2616  m_pActionViewShell = pVSh;
2617 
2618  if( m_pActionViewShell )
2619  {
2620  if( auto pEditShell = dynamic_cast< SwEditShell *>( m_pActionViewShell ) )
2621  pEditShell->StartAction();
2622  else
2624  }
2625 
2626  return m_pActionViewShell;
2627 }
2628 
2629 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2630 {
2631  if( bChkPtr )
2632  {
2633  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2634  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2635  "CallEndAction: Who swapped the SwViewShell?" );
2636 #if OSL_DEBUG_LEVEL > 0
2637  if( m_pActionViewShell && !pVSh )
2638  pVSh = nullptr;
2639 #endif
2640  if( pVSh != m_pActionViewShell )
2641  m_pActionViewShell = nullptr;
2642  }
2643 
2644  if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2645  return m_pActionViewShell;
2646 
2647  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2648  {
2649  // Already scrolled?, then make sure that the view doesn't move!
2650  const bool bOldLock = m_pActionViewShell->IsViewLocked();
2651  m_pActionViewShell->LockView( true );
2652  const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev();
2654  static_cast<SwEditShell*>(m_pActionViewShell)->EndAction();
2655  m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev );
2656  m_pActionViewShell->LockView( bOldLock );
2657 
2658  // bChkJumpMark is only set when the object was also found
2659  if( m_bChkJumpMark )
2660  {
2661  const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2662  if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2664  GetMedium()->GetURLObject().GetMark() );
2665  m_bChkJumpMark = false;
2666  }
2667  }
2668  else
2670 
2671  // if the parser holds the last reference to the document, then we can
2672  // abort here and set an error.
2673  if( 1 == m_xDoc->getReferenceCount() )
2674  {
2675  eState = SvParserState::Error;
2676  }
2677 
2679  m_pActionViewShell = nullptr;
2680 
2681  return pVSh;
2682 }
2683 
2685 {
2686  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2687  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2688  "CheckActionViewShell: Who has swapped SwViewShell?" );
2689 #if OSL_DEBUG_LEVEL > 0
2690  if( m_pActionViewShell && !pVSh )
2691  pVSh = nullptr;
2692 #endif
2693  if( pVSh != m_pActionViewShell )
2694  m_pActionViewShell = nullptr;
2695 
2696  return m_pActionViewShell;
2697 }
2698 
2699 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2700  std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2701 {
2702  std::unique_ptr<SwPaM> pAttrPam( new SwPaM( *m_pPam->GetPoint() ) );
2703  const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode;
2704  const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
2705  HTMLAttr* pAttr;
2706  SwContentNode* pCNd;
2707 
2708  std::vector<std::unique_ptr<HTMLAttr>> aFields;
2709 
2710  for( auto n = m_aSetAttrTab.size(); n; )
2711  {
2712  pAttr = m_aSetAttrTab[ --n ];
2713  sal_uInt16 nWhich = pAttr->m_pItem->Which();
2714 
2715  sal_uLong nEndParaIdx = pAttr->GetEndParaIdx();
2716  bool bSetAttr;
2717  if( bChkEnd )
2718  {
2719  // Set character attribute with end early on, so set them still in
2720  // the current paragraph (because of JavaScript and various "chats"(?)).
2721  // This shouldn't be done for attributes which are used for
2722  // the whole paragraph, because they could be from a paragraph style
2723  // which can't be set. Because the attributes are inserted with
2724  // SETATTR_DONTREPLACE, they should be able to be set later.
2725  bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() &&
2726  (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) ||
2727  ( !pAttr->IsLikePara() &&
2728  nEndParaIdx == rEndIdx.GetIndex() &&
2729  pAttr->GetEndCnt() < nEndCnt &&
2730  (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2731  ( bBeforeTable &&
2732  nEndParaIdx == rEndIdx.GetIndex() &&
2733  !pAttr->GetEndCnt() );
2734  }
2735  else
2736  {
2737  // Attributes in body nodes array section shouldn't be set if we are in a
2738  // special nodes array section, but vice versa it's possible.
2739  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2740  bSetAttr = nEndParaIdx < rEndIdx.GetIndex() ||
2741  rEndIdx.GetIndex() > nEndOfIcons ||
2742  nEndParaIdx <= nEndOfIcons;
2743  }
2744 
2745  if( bSetAttr )
2746  {
2747  // The attribute shouldn't be in the list of temporary paragraph
2748  // attributes, because then it would be deleted.
2749  while( !m_aParaAttrs.empty() )
2750  {
2751  OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2752  "SetAttr: Attribute must not yet be set" );
2753  m_aParaAttrs.pop_back();
2754  }
2755 
2756  // then set it
2757  m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2758 
2759  while( pAttr )
2760  {
2761  HTMLAttr *pPrev = pAttr->GetPrev();
2762  if( !pAttr->m_bValid )
2763  {
2764  // invalid attributes can be deleted
2765  delete pAttr;
2766  pAttr = pPrev;
2767  continue;
2768  }
2769 
2770  pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2771  if( !pCNd )
2772  {
2773  // because of the awful deleting of nodes an index can also
2774  // point to an end node :-(
2775  if ( (pAttr->GetSttPara() == pAttr->GetEndPara()) &&
2776  !isTXTATR_NOEND(nWhich) )
2777  {
2778  // when the end index also points to the node, we don't
2779  // need to set attributes anymore, except if it's a text attribute.
2780  delete pAttr;
2781  pAttr = pPrev;
2782  continue;
2783  }
2784  pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2785  if( pCNd )
2786  pAttr->m_nStartContent = 0;
2787  else
2788  {
2789  OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2790  delete pAttr;
2791  pAttr = pPrev;
2792  continue;
2793  }
2794  }
2795  pAttrPam->GetPoint()->nNode = pAttr->m_nStartPara;
2796 
2797  // because of the deleting of BRs the start index can also
2798  // point behind the end the text
2799  if( pAttr->m_nStartContent > pCNd->Len() )
2800  pAttr->m_nStartContent = pCNd->Len();
2801  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent );
2802 
2803  pAttrPam->SetMark();
2804  if ( (pAttr->GetSttPara() != pAttr->GetEndPara()) &&
2805  !isTXTATR_NOEND(nWhich) )
2806  {
2807  pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2808  if( !pCNd )
2809  {
2810  pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2811  if( pCNd )
2812  pAttr->m_nEndContent = pCNd->Len();
2813  else
2814  {
2815  OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2816  pAttrPam->DeleteMark();
2817  delete pAttr;
2818  pAttr = pPrev;
2819  continue;
2820  }
2821  }
2822 
2823  pAttrPam->GetPoint()->nNode = pAttr->m_nEndPara;
2824  }
2825  else if( pAttr->IsLikePara() )
2826  {
2827  pAttr->m_nEndContent = pCNd->Len();
2828  }
2829 
2830  // because of the deleting of BRs the start index can also
2831  // point behind the end the text
2832  if( pAttr->m_nEndContent > pCNd->Len() )
2833  pAttr->m_nEndContent = pCNd->Len();
2834 
2835  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent );
2836  if( bBeforeTable &&
2837  pAttrPam->GetPoint()->nNode.GetIndex() ==
2838  rEndIdx.GetIndex() )
2839  {
2840  // If we're before inserting a table and the attribute ends
2841  // in the current node, then we must end it in the previous
2842  // node or discard it, if it starts in that node.
2843  if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2844  !isTXTATR_NOEND(nWhich) )
2845  {
2846  if( pAttrPam->GetMark()->nNode.GetIndex() !=
2847  rEndIdx.GetIndex() )
2848  {
2849  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
2850  "Content-Position before table not 0???" );
2851  pAttrPam->Move( fnMoveBackward );
2852  }
2853  else
2854  {
2855  pAttrPam->DeleteMark();
2856  delete pAttr;
2857  pAttr = pPrev;
2858  continue;
2859  }
2860  }
2861  }
2862 
2863  switch( nWhich )
2864  {
2865  case RES_FLTR_BOOKMARK: // insert bookmark
2866  {
2867  const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2868  IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2869  IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2870  if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2871  (*ppBkmk)->GetMarkStart() == *pAttrPam->GetPoint() )
2872  break; // do not generate duplicates on this position
2873  pAttrPam->DeleteMark();
2874  const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2875  *pAttrPam,
2876  sName,
2879 
2880  // jump to bookmark
2881  if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2882  {
2883  m_bChkJumpMark = true;
2885  }
2886  }
2887  break;
2888  case RES_TXTATR_FIELD:
2889  case RES_TXTATR_ANNOTATION:
2890  case RES_TXTATR_INPUTFIELD:
2891  {
2892  SwFieldIds nFieldWhich =
2893  pPostIts
2894  ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2896  if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2897  SwFieldIds::Script == nFieldWhich) )
2898  {
2899  pPostIts->emplace_front( pAttr );
2900  }
2901  else
2902  {
2903  aFields.emplace_back( pAttr);
2904  }
2905  }
2906  pAttrPam->DeleteMark();
2907  pAttr = pPrev;
2908  continue;
2909 
2910  case RES_LR_SPACE:
2911  if( pAttrPam->GetPoint()->nNode.GetIndex() ==
2912  pAttrPam->GetMark()->nNode.GetIndex())
2913  {
2914  // because of numbering set this attribute directly at node
2915  pCNd->SetAttr( *pAttr->m_pItem );
2916  break;
2917  }
2918  OSL_ENSURE( false,
2919  "LRSpace set over multiple paragraphs!" );
2920  [[fallthrough]]; // (shouldn't reach this point anyway)
2921 
2922  // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2923  // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2924  // This is the right place in the future if the adapted fill attributes
2925  // may be handled more directly in HTML import to handle them.
2926  case RES_BACKGROUND:
2927  {
2928  const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2930 
2932  m_xDoc->getIDocumentContentOperations().InsertItemSet(*pAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2933  break;
2934  }
2935  default:
2936 
2937  // maybe jump to a bookmark
2938  if( RES_TXTATR_INETFMT == nWhich &&
2940  m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2941  {
2942  m_bChkJumpMark = true;
2944  }
2945 
2946  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2947  }
2948  pAttrPam->DeleteMark();
2949 
2950  delete pAttr;
2951  pAttr = pPrev;
2952  }
2953  }
2954  }
2955 
2956  for( auto n = m_aMoveFlyFrames.size(); n; )
2957  {
2958  SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[ --n ];
2959 
2960  const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
2961  OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
2962  "Only At-Para flys need special handling" );
2963  const SwPosition *pFlyPos = rAnchor.GetContentAnchor();
2964  sal_uLong nFlyParaIdx = pFlyPos->nNode.GetIndex();
2965  bool bMoveFly;
2966  if( bChkEnd )
2967  {
2968  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2969  ( nFlyParaIdx == rEndIdx.GetIndex() &&
2970  m_aMoveFlyCnts[n] < nEndCnt );
2971  }
2972  else
2973  {
2974  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2975  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2976  rEndIdx.GetIndex() > nEndOfIcons ||
2977  nFlyParaIdx <= nEndOfIcons;
2978  }
2979  if( bMoveFly )
2980  {
2981  pFrameFormat->DelFrames();
2982  *pAttrPam->GetPoint() = *pFlyPos;
2983  pAttrPam->GetPoint()->nContent.Assign( pAttrPam->GetContentNode(),
2984  m_aMoveFlyCnts[n] );
2985  SwFormatAnchor aAnchor( rAnchor );
2986  aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
2987  aAnchor.SetAnchor( pAttrPam->GetPoint() );
2988  pFrameFormat->SetFormatAttr( aAnchor );
2989 
2990  const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
2991  if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
2992  {
2993  SwFormatHoriOrient aHoriOri( rHoriOri );
2994  aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
2995  pFrameFormat->SetFormatAttr( aHoriOri );
2996  }
2997  const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
2998  if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
2999  {
3000  SwFormatVertOrient aVertOri( rVertOri );
3001  aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
3002  pFrameFormat->SetFormatAttr( aVertOri );
3003  }
3004 
3005  pFrameFormat->MakeFrames();
3006  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
3007  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3008  }
3009  }
3010  for (auto & field : aFields)
3011  {
3012  pCNd = field->m_nStartPara.GetNode().GetContentNode();
3013  pAttrPam->GetPoint()->nNode = field->m_nStartPara;
3014  pAttrPam->GetPoint()->nContent.Assign( pCNd, field->m_nStartContent );
3015 
3016  if( bBeforeTable &&
3017  pAttrPam->GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() )
3018  {
3019  OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3020  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
3021  "Content-Position before table not 0???" );
3022  // !!!
3023  pAttrPam->Move( fnMoveBackward );
3024  }
3025 
3026  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *field->m_pItem );
3027 
3028  field.reset();
3029  }
3030  aFields.clear();
3031 }
3032 
3033 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3034 {
3035  // Font height and font colour as well as escape attributes may not be
3036  // combined. Therefore they're saved in a list and in it the last opened
3037  // attribute is at the beginning and count is always one. For all other
3038  // attributes count is just incremented.
3039  if( *ppAttr )
3040  {
3041  HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3042  pAttr->InsertNext( *ppAttr );
3043  (*ppAttr) = pAttr;
3044  }
3045  else
3046  (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3047 }
3048 
3049 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3050 {
3051  bool bRet = true;
3052 
3053  // The list header is saved in the attribute.
3054  HTMLAttr **ppHead = pAttr->m_ppHead;
3055 
3056  OSL_ENSURE( ppHead, "No list header attribute found!" );
3057 
3058  // save the current position as end position
3059  const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode;
3060  sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3061 
3062  // Is the last started or an earlier started attribute being ended?
3063  HTMLAttr *pLast = nullptr;
3064  if( ppHead && pAttr != *ppHead )
3065  {
3066  // The last started attribute isn't being ended
3067 
3068  // Then we look for attribute which was started immediately afterwards,
3069  // which has also not yet been ended (otherwise it would no longer be
3070  // in the list).
3071  pLast = *ppHead;
3072  while( pLast && pLast->GetNext() != pAttr )
3073  pLast = pLast->GetNext();
3074 
3075  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3076  }
3077 
3078  bool bMoveBack = false;
3079  sal_uInt16 nWhich = pAttr->m_pItem->Which();
3080  if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3081  *pEndIdx != pAttr->GetSttPara() )
3082  {
3083  // Then move back one position in the content!
3084  bMoveBack = m_pPam->Move( fnMoveBackward );
3085  nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3086  }
3087 
3088  // now end the attribute
3089  HTMLAttr *pNext = pAttr->GetNext();
3090 
3091  bool bInsert;
3092  sal_uInt16 nScriptItem = 0;
3093  bool bScript = false;
3094  // does it have a non-empty range?
3095  if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3096  RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3097  *pEndIdx != pAttr->GetSttPara() ||
3098  nEndCnt != pAttr->GetSttCnt() )
3099  {
3100  bInsert = true;
3101  // We do some optimization for script dependent attributes here.
3102  if( *pEndIdx == pAttr->GetSttPara() )
3103  {
3104  lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3105  }
3106  }
3107  else
3108  {
3109  bInsert = false;
3110  }
3111 
3112  const SwTextNode *pTextNd = (bInsert && bScript) ?
3113  pAttr->GetSttPara().GetNode().GetTextNode() :
3114  nullptr;
3115 
3116  if (pTextNd)
3117  {
3118  const OUString& rText = pTextNd->GetText();
3119  sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3120  rText, pAttr->GetSttCnt() );
3121  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3122  ->endOfScript( rText, pAttr->GetSttCnt(), nScriptText );
3123  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3124  {
3125  if( nScriptItem == nScriptText )
3126  {
3127  HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd );
3128  pSetAttr->ClearPrev();
3129  if( pNext )
3130  pNext->InsertPrev( pSetAttr );
3131  else
3132  {
3133  if (pSetAttr->m_bInsAtStart)
3134  m_aSetAttrTab.push_front( pSetAttr );
3135  else
3136  m_aSetAttrTab.push_back( pSetAttr );
3137  }
3138  }
3139  pAttr->m_nStartContent = nScriptEnd;
3140  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3141  rText, nScriptEnd );
3142  nScriptEnd = g_pBreakIt->GetBreakIter()
3143  ->endOfScript( rText, nScriptEnd, nScriptText );
3144  }
3145  bInsert = nScriptItem == nScriptText;
3146  }
3147  if( bInsert )
3148  {
3149  pAttr->m_nEndPara = *pEndIdx;
3150  pAttr->m_nEndContent = nEndCnt;
3151  pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3152  RES_TXTATR_CHARFMT != nWhich;
3153 
3154  if( !pNext )
3155  {
3156  // No open attributes of that type exists any longer, so all
3157  // can be set. Except they depend on another attribute, then
3158  // they're appended there.
3159  if (pAttr->m_bInsAtStart)
3160  m_aSetAttrTab.push_front( pAttr );
3161  else
3162  m_aSetAttrTab.push_back( pAttr );
3163  }
3164  else
3165  {
3166  // There are other open attributes of that type,
3167  // therefore the setting must be postponed.
3168  // Hence the current attribute is added at the end
3169  // of the Prev-List of the successor.
3170  pNext->InsertPrev( pAttr );
3171  }
3172  }
3173  else
3174  {
3175  // Then don't insert, but delete. Because of the "faking" of styles
3176  // by hard attributing there can be also other empty attributes in the
3177  // Prev-List, which must be set anyway.
3178  HTMLAttr *pPrev = pAttr->GetPrev();
3179  bRet = false;
3180  delete pAttr;
3181 
3182  if( pPrev )
3183  {
3184  // The previous attributes must be set anyway.
3185  if( pNext )
3186  pNext->InsertPrev( pPrev );
3187  else
3188  {
3189  if (pPrev->m_bInsAtStart)
3190  m_aSetAttrTab.push_front( pPrev );
3191  else
3192  m_aSetAttrTab.push_back( pPrev );
3193  }
3194  }
3195 
3196  }
3197 
3198  // If the first attribute of the list was set, then the list header
3199  // must be corrected as well.
3200  if( pLast )
3201  pLast->m_pNext = pNext;
3202  else if( ppHead )
3203  *ppHead = pNext;
3204 
3205  if( bMoveBack )
3207 
3208  return bRet;
3209 }
3210 
3212 {
3213  // preliminary paragraph attributes are not allowed here, they could
3214  // be set here and then the pointers become invalid!
3215  OSL_ENSURE(m_aParaAttrs.empty(),
3216  "Danger: there are non-final paragraph attributes");
3217  m_aParaAttrs.clear();
3218 
3219  // The list header is saved in the attribute
3220  HTMLAttr **ppHead = pAttr->m_ppHead;
3221 
3222  OSL_ENSURE( ppHead, "no list header attribute found!" );
3223 
3224  // Is the last started or an earlier started attribute being removed?
3225  HTMLAttr *pLast = nullptr;
3226  if( ppHead && pAttr != *ppHead )
3227  {
3228  // The last started attribute isn't being ended
3229 
3230  // Then we look for attribute which was started immediately afterwards,
3231  // which has also not yet been ended (otherwise it would no longer be
3232  // in the list).
3233  pLast = *ppHead;
3234  while( pLast && pLast->GetNext() != pAttr )
3235  pLast = pLast->GetNext();
3236 
3237  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3238  }
3239 
3240  // now delete the attribute
3241  HTMLAttr *pNext = pAttr->GetNext();
3242  HTMLAttr *pPrev = pAttr->GetPrev();
3243  //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3244  std::shared_ptr<HTMLAttrTable> xKeepAlive(pAttr->m_xAttrTab);
3245  delete pAttr;
3246 
3247  if( pPrev )
3248  {
3249  // The previous attributes must be set anyway.
3250  if( pNext )
3251  pNext->InsertPrev( pPrev );
3252  else
3253  {
3254  if (pPrev->m_bInsAtStart)
3255  m_aSetAttrTab.push_front( pPrev );
3256  else
3257  m_aSetAttrTab.push_back( pPrev );
3258  }
3259  }
3260 
3261  // If the first attribute of the list was deleted, then the list header
3262  // must be corrected as well.
3263  if( pLast )
3264  pLast->m_pNext = pNext;
3265  else if( ppHead )
3266  *ppHead = pNext;
3267 }
3268 
3269 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3270 {
3271  // preliminary paragraph attributes are not allowed here, they could
3272  // be set here and then the pointers become invalid!
3273  OSL_ENSURE(m_aParaAttrs.empty(),
3274  "Danger: there are non-final paragraph attributes");
3275  m_aParaAttrs.clear();
3276 
3277  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3278  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3279 
3280  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3281  {
3282  *pSaveAttributes = *pHTMLAttributes;
3283 
3284  HTMLAttr *pAttr = *pSaveAttributes;
3285  while (pAttr)
3286  {
3287  pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3288  pAttr = pAttr->GetNext();
3289  }
3290 
3291  *pHTMLAttributes = nullptr;
3292  }
3293 }
3294 
3295 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3296  bool bMoveEndBack )
3297 {
3298  // preliminary paragraph attributes are not allowed here, they could
3299  // be set here and then the pointers become invalid!
3300  OSL_ENSURE(m_aParaAttrs.empty(),
3301  "Danger: there are non-final paragraph attributes");
3302  m_aParaAttrs.clear();
3303 
3304  const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode;
3305  SwNodeIndex nEndIdx( nSttIdx );
3306 
3307  // close all still open attributes and re-open them after the table
3308  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3309  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3310  bool bSetAttr = true;
3311  const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex();
3312  sal_Int32 nEndCnt = nSttCnt;
3313 
3314  if( bMoveEndBack )
3315  {
3316  sal_uLong nOldEnd = nEndIdx.GetIndex();
3317  sal_uLong nTmpIdx;
3318  if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3319  ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3320  {
3321  nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3322  }
3323  SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3324 
3325  // Don't set attributes, when the PaM was moved outside of the content area.
3326  bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3327 
3328  nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3329  }
3330  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3331  {
3332  HTMLAttr *pAttr = *pHTMLAttributes;
3333  *pSaveAttributes = nullptr;
3334  while( pAttr )
3335  {
3336  HTMLAttr *pNext = pAttr->GetNext();
3337  HTMLAttr *pPrev = pAttr->GetPrev();
3338 
3339  if( bSetAttr &&
3340  ( pAttr->GetSttParaIdx() < nEndIdx.GetIndex() ||
3341  (pAttr->GetSttPara() == nEndIdx &&
3342  pAttr->GetSttCnt() != nEndCnt) ) )
3343  {
3344  // The attribute must be set before the list. We need the
3345  // original and therefore we clone it, because pointer to the
3346  // attribute exist in the other contexts. The Next-List is lost
3347  // in doing so, but the Previous-List is preserved.
3348  HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt );
3349 
3350  if( pNext )
3351  pNext->InsertPrev( pSetAttr );
3352  else
3353  {
3354  if (pSetAttr->m_bInsAtStart)
3355  m_aSetAttrTab.push_front( pSetAttr );
3356  else
3357  m_aSetAttrTab.push_back( pSetAttr );
3358  }
3359  }
3360  else if( pPrev )
3361  {
3362  // If the attribute doesn't need to be set before the table, then
3363  // the previous attributes must still be set.
3364  if( pNext )
3365  pNext->InsertPrev( pPrev );
3366  else
3367  {
3368  if (pPrev->m_bInsAtStart)
3369  m_aSetAttrTab.push_front( pPrev );
3370  else
3371  m_aSetAttrTab.push_back( pPrev );
3372  }
3373  }
3374 
3375  // set the start of the attribute anew and break link
3376  pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab);
3377 
3378  if (*pSaveAttributes)
3379  {
3380  HTMLAttr *pSAttr = *pSaveAttributes;
3381  while( pSAttr->GetNext() )
3382  pSAttr = pSAttr->GetNext();
3383  pSAttr->InsertNext( pAttr );
3384  }
3385  else
3386  *pSaveAttributes = pAttr;
3387 
3388  pAttr = pNext;
3389  }
3390 
3391  *pHTMLAttributes = nullptr;
3392  }
3393 }
3394 
3395 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3396 {
3397  // preliminary paragraph attributes are not allowed here, they could
3398  // be set here and then the pointers become invalid!
3399  OSL_ENSURE(m_aParaAttrs.empty(),
3400  "Danger: there are non-final paragraph attributes");
3401  m_aParaAttrs.clear();
3402 
3403  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3404  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3405 
3406  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3407  {
3408  OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3409 
3410  *pHTMLAttributes = *pSaveAttributes;
3411 
3412  HTMLAttr *pAttr = *pHTMLAttributes;
3413  while (pAttr)
3414  {
3415  OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3416  "Previous attribute has still a header" );
3417  pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3418  pAttr = pAttr->GetNext();
3419  }
3420 
3421  *pSaveAttributes = nullptr;
3422  }
3423 }
3424 
3425 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3426 {
3427  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3428  if (bInsAtStart)
3429  m_aSetAttrTab.push_front( pTmp );
3430  else
3431  m_aSetAttrTab.push_back( pTmp );
3432 }
3433 
3434 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3435 {
3436  while( !rAttrs.empty() )
3437  {
3438  std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3439  InsertAttr( pAttr->GetItem(), false );
3440  rAttrs.pop_front();
3441  }
3442 }
3443 
3445 {
3446  OUString aId, aStyle, aLang, aDir;
3447  OUString aClass;
3448 
3449  const HTMLOptions& rHTMLOptions = GetOptions();
3450  for (size_t i = rHTMLOptions.size(); i; )
3451  {
3452  const HTMLOption& rOption = rHTMLOptions[--i];
3453  switch( rOption.GetToken() )
3454  {
3455  case HtmlOptionId::ID:
3456  aId = rOption.GetString();
3457  break;
3458  case HtmlOptionId::STYLE:
3459  aStyle = rOption.GetString();
3460  break;
3461  case HtmlOptionId::CLASS:
3462  aClass = rOption.GetString();
3463  break;
3464  case HtmlOptionId::LANG:
3465  aLang = rOption.GetString();
3466  break;
3467  case HtmlOptionId::DIR:
3468  aDir = rOption.GetString();
3469  break;
3470  default: break;
3471  }
3472  }
3473 
3474  // create a new context
3475  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3476 
3477  // parse styles
3478  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3479  {
3480  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3481  SvxCSS1PropertyInfo aPropInfo;
3482 
3483  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3484  {
3485  if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3486  !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3487  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3488  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3489  }
3490  }
3491 
3492  // save the context
3493  PushContext(xCntxt);
3494 }
3495 
3497  HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3498  HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3499  HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3500 {
3501  OUString aId, aStyle, aClass, aLang, aDir;
3502 
3503  const HTMLOptions& rHTMLOptions = GetOptions();
3504  for (size_t i = rHTMLOptions.size(); i; )
3505  {
3506  const HTMLOption& rOption = rHTMLOptions[--i];
3507  switch( rOption.GetToken() )
3508  {
3509  case HtmlOptionId::ID:
3510  aId = rOption.GetString();
3511  break;
3512  case HtmlOptionId::STYLE:
3513  aStyle = rOption.GetString();
3514  break;
3515  case HtmlOptionId::CLASS:
3516  aClass = rOption.GetString();
3517  break;
3518  case HtmlOptionId::LANG:
3519  aLang = rOption.GetString();
3520  break;
3521  case HtmlOptionId::DIR:
3522  aDir = rOption.GetString();
3523  break;
3524  default: break;
3525  }
3526  }
3527 
3528  // create a new context
3529  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3530 
3531  // parse styles
3532  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3533  {
3534  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3535  SvxCSS1PropertyInfo aPropInfo;
3536 
3537  aItemSet.Put( rItem );
3538  if( pItem2 )
3539  aItemSet.Put( *pItem2 );
3540  if( pItem3 )
3541  aItemSet.Put( *pItem3 );
3542 
3543  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3544  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3545 
3546  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3547  }
3548  else
3549  {
3550  InsertAttr( ppAttr ,rItem, xCntxt.get() );
3551  if( pItem2 )
3552  {
3553  OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3554  InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3555  }
3556  if( pItem3 )
3557  {
3558  OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3559  InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3560  }
3561  }
3562 
3563  // save the context
3564  PushContext(xCntxt);
3565 }
3566 
3568 {
3569  // fetch context
3570  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3571  if (xCntxt)
3572  {
3573  // and maybe end the attributes
3574  EndContext(xCntxt.get());
3575  }
3576 }
3577 
3579 {
3580  OUString aId, aStyle, aClass, aLang, aDir;
3581  sal_uInt16 nSize = 3;
3582 
3583  const HTMLOptions& rHTMLOptions = GetOptions();
3584  for (size_t i = rHTMLOptions.size(); i; )
3585  {
3586  const HTMLOption& rOption = rHTMLOptions[--i];
3587  switch( rOption.GetToken() )
3588  {
3589  case HtmlOptionId::SIZE:
3590  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
3591  break;
3592  case HtmlOptionId::ID:
3593  aId = rOption.GetString();
3594  break;
3595  case HtmlOptionId::STYLE:
3596  aStyle = rOption.GetString();
3597  break;
3598  case HtmlOptionId::CLASS:
3599  aClass = rOption.GetString();
3600  break;
3601  case HtmlOptionId::LANG:
3602  aLang = rOption.GetString();
3603  break;
3604  case HtmlOptionId::DIR:
3605  aDir = rOption.GetString();
3606  break;
3607  default: break;
3608  }
3609  }
3610 
3611  if( nSize < 1 )
3612  nSize = 1;
3613 
3614  if( nSize > 7 )
3615  nSize = 7;
3616 
3617  // create a new context
3618  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3619 
3620  // parse styles
3621  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3622  {
3623  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3624  SvxCSS1PropertyInfo aPropInfo;
3625 
3626  //CJK has different defaults
3627  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3628  aItemSet.Put( aFontHeight );
3629  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3630  aItemSet.Put( aFontHeightCJK );
3631  //Complex type can contain so many types of letters,
3632  //that it's not really worthy to bother, IMO.
3633  //Still, I have set a default.
3634  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3635  aItemSet.Put( aFontHeightCTL );
3636 
3637  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3638  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3639 
3640  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3641  }
3642  else
3643  {
3644  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3645  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3646  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3647  InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3648  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3649  InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3650  }
3651 
3652  // save the context
3653  PushContext(xCntxt);
3654 
3655  // save the font size
3656  m_aBaseFontStack.push_back( nSize );
3657 }
3658 
3660 {
3661  EndTag( HtmlTokenId::BASEFONT_ON );
3662 
3663  // avoid stack underflow in tables
3664  if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3665  m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3666 }
3667 
3669 {
3670  sal_uInt16 nBaseSize =
3673  : 3 );
3674  sal_uInt16 nFontSize =
3675  ( m_aFontStack.size() > m_nFontStMin
3676  ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3677  : nBaseSize );
3678 
3679  OUString aFace, aId, aStyle, aClass, aLang, aDir;
3680  Color aColor;
3681  sal_uLong nFontHeight = 0; // actual font height to set
3682  sal_uInt16 nSize = 0; // font height in Netscape notation (1-7)
3683  bool bColor = false;
3684 
3685  const HTMLOptions& rHTMLOptions = GetOptions();
3686  for (size_t i = rHTMLOptions.size(); i; )
3687  {
3688  const HTMLOption& rOption = rHTMLOptions[--i];
3689  switch( rOption.GetToken() )
3690  {
3691  case HtmlOptionId::SIZE:
3692  if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3693  {
3694  sal_Int32 nSSize;
3695  if( '+' == rOption.GetString()[0] ||
3696  '-' == rOption.GetString()[0] )
3697  nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3698  else
3699  nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3700 
3701  if( nSSize < 1 )
3702  nSSize = 1;
3703  else if( nSSize > 7 )
3704  nSSize = 7;
3705 
3706  nSize = static_cast<sal_uInt16>(nSSize);
3707  nFontHeight = m_aFontHeights[nSize-1];
3708  }
3709  break;
3710  case HtmlOptionId::COLOR:
3711  if( HtmlTokenId::FONT_ON==nToken )
3712  {
3713  rOption.GetColor( aColor );
3714  bColor = true;
3715  }
3716  break;
3717  case HtmlOptionId::FACE:
3718  if( HtmlTokenId::FONT_ON==nToken )
3719  aFace = rOption.GetString();
3720  break;
3721  case HtmlOptionId::ID:
3722  aId = rOption.GetString();
3723  break;
3724  case HtmlOptionId::STYLE:
3725  aStyle = rOption.GetString();
3726  break;
3727  case HtmlOptionId::CLASS:
3728  aClass = rOption.GetString();
3729  break;
3730  case HtmlOptionId::LANG:
3731  aLang = rOption.GetString();
3732  break;
3733  case HtmlOptionId::DIR:
3734  aDir = rOption.GetString();
3735  break;
3736  default: break;
3737  }
3738  }
3739 
3740  if( HtmlTokenId::FONT_ON != nToken )
3741  {
3742  // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3743 
3744  // In headings the current heading sets the font height
3745  // and not BASEFONT.
3746  const SwFormatColl *pColl = GetCurrFormatColl();
3747  sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3748  if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3749  nPoolId<=RES_POOLCOLL_HEADLINE6 )
3750  {
3751  // If the font height in the heading wasn't changed yet,
3752  // then take the one from the style.
3753  if( m_nFontStHeadStart==m_aFontStack.size() )
3754  nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3755  }
3756  else
3757  nPoolId = 0;
3758 
3759  if( HtmlTokenId::BIGPRINT_ON == nToken )
3760  nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3761  else
3762  nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3763 
3764  // If possible in headlines we fetch the new font height
3765  // from the style.
3766  if( nPoolId && nSize>=1 && nSize <=6 )
3767  nFontHeight =
3768  m_pCSS1Parser->GetTextCollFromPool(
3769  RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3770  else
3771  nFontHeight = m_aFontHeights[nSize-1];
3772  }
3773 
3774  OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3775 
3776  OUString aFontName;
3777  const OUString aStyleName;
3778  FontFamily eFamily = FAMILY_DONTKNOW; // family and pitch,
3779  FontPitch ePitch = PITCH_DONTKNOW; // if not found
3780  rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3781 
3782  if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3783  {
3784  const FontList *pFList = nullptr;
3785  SwDocShell *pDocSh = m_xDoc->GetDocShell();
3786  if( pDocSh )
3787  {
3788  const SvxFontListItem *pFListItem =
3789  static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3790  if( pFListItem )
3791  pFList = pFListItem->GetFontList();
3792  }
3793 
3794  bool bFound = false;
3795  sal_Int32 nStrPos = 0;
3796  while( nStrPos!= -1 )
3797  {
3798  OUString aFName = aFace.getToken( 0, ',', nStrPos );
3799  aFName = comphelper::string::strip(aFName, ' ');
3800  if( !aFName.isEmpty() )
3801  {
3802  if( !bFound && pFList )
3803  {
3804  sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3805  if( nullptr != hFont )
3806  {
3807  const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3808  if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3809  {
3810  bFound = true;
3811  if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3812  eEnc = RTL_TEXTENCODING_SYMBOL;
3813  }
3814  }
3815  }
3816  if( !aFontName.isEmpty() )
3817  aFontName += ";";
3818  aFontName += aFName;
3819  }
3820  }
3821  }
3822 
3823  // create a new context
3824  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3825 
3826  // parse styles
3827  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3828  {
3829  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3830  SvxCSS1PropertyInfo aPropInfo;
3831 
3832  if( nFontHeight )
3833  {
3834  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3835  aItemSet.Put( aFontHeight );
3836  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3837  aItemSet.Put( aFontHeightCJK );
3838  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3839  aItemSet.Put( aFontHeightCTL );
3840  }
3841  if( bColor )
3842  aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3843  if( !aFontName.isEmpty() )
3844  {
3845  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3846  aItemSet.Put( aFont );
3847  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3848  aItemSet.Put( aFontCJK );
3849  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3850  aItemSet.Put( aFontCTL );
3851  }
3852 
3853  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3854  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3855 
3856  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3857  }
3858  else
3859  {
3860  if( nFontHeight )
3861  {
3862  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3863  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3864  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3865  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3866  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3867  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3868  }
3869  if( bColor )
3870  InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3871  if( !aFontName.isEmpty() )
3872  {
3873  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3874  InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3875  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3876  InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3877  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3878  InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3879  }
3880  }
3881 
3882  // save the context
3883  PushContext(xCntxt);
3884 
3885  m_aFontStack.push_back( nSize );
3886 }
3887 
3889 {
3890  EndTag( nToken );
3891 
3892  // avoid stack underflow in tables
3893  if( m_aFontStack.size() > m_nFontStMin )
3894  m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3895 }
3896 
3898 {
3899  if( m_pPam->GetPoint()->nContent.GetIndex() )
3901  else
3902  AddParSpace();
3903 
3904  m_eParaAdjust = SvxAdjust::End;
3905  OUString aId, aStyle, aClass, aLang, aDir;
3906 
3907  const HTMLOptions& rHTMLOptions = GetOptions();
3908  for (size_t i = rHTMLOptions.size(); i; )
3909  {
3910  const HTMLOption& rOption = rHTMLOptions[--i];
3911  switch( rOption.GetToken() )
3912  {
3913  case HtmlOptionId::ID:
3914  aId = rOption.GetString();
3915  break;
3916  case HtmlOptionId::ALIGN:
3917  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3918  break;
3919  case HtmlOptionId::STYLE:
3920  aStyle = rOption.GetString();
3921  break;
3922  case HtmlOptionId::CLASS:
3923  aClass = rOption.GetString();
3924  break;
3925  case HtmlOptionId::LANG:
3926  aLang = rOption.GetString();
3927  break;
3928  case HtmlOptionId::DIR:
3929  aDir = rOption.GetString();
3930  break;
3931  default: break;
3932  }
3933  }
3934 
3935  // create a new context
3936  std::unique_ptr<HTMLAttrContext> xCntxt(
3937  !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3938  RES_POOLCOLL_TEXT, aClass )
3939  : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3940 
3941  // parse styles (Don't consider class. This is only possible as long as none of
3942  // the CSS1 properties of the class must be formatted hard!!!)
3943  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
3944  {
3945  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3946  SvxCSS1PropertyInfo aPropInfo;
3947 
3948  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
3949  {
3950  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
3951  "Class is not considered" );
3952  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3953  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
3954  }
3955  }
3956 
3957  if( SvxAdjust::End != m_eParaAdjust )
3958  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
3959 
3960  // and push on stack
3961  PushContext( xCntxt );
3962 
3963  // set the current style or its attributes
3964  SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
3965 
3966  // progress bar
3967  ShowStatline();
3968 
3969  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now an open paragraph element will be lost." );
3970  m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
3971 }
3972 
3973 void SwHTMLParser::EndPara( bool bReal )
3974 {
3975  if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
3976  {
3977 #if OSL_DEBUG_LEVEL > 0
3978  const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule();
3979  OSL_ENSURE( pNumRule, "Where is the NumRule" );
3980 #endif
3981  }
3982 
3983  // Netscape skips empty paragraphs, we do the same.
3984  if( bReal )
3985  {
3986  if( m_pPam->GetPoint()->nContent.GetIndex() )
3988  else
3989  AddParSpace();
3990  }
3991 
3992  // If a DD or DT was open, it's an implied definition list,
3993  // which must be closed now.
3994  if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
3996  {
3997  m_nDefListDeep--;
3998  }
3999 
4000  // Pop the context of the stack. It can also be from an
4001  // implied opened definition list.
4002  std::unique_ptr<HTMLAttrContext> xCntxt(
4003  PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
4004 
4005  // close attribute
4006  if (xCntxt)
4007  {
4008  EndContext(xCntxt.get());
4009  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4010  xCntxt.reset();
4011  }
4012 
4013  // reset the existing style
4014  if( bReal )
4015  SetTextCollAttrs();
4016 
4017  m_nOpenParaToken = HtmlTokenId::NONE;
4018 }
4019 
4021 {
4022  m_eParaAdjust = SvxAdjust::End;
4023 
4024  OUString aId, aStyle, aClass, aLang, aDir;
4025 
4026  const HTMLOptions& rHTMLOptions = GetOptions();
4027  for (size_t i = rHTMLOptions.size(); i; )
4028  {
4029  const HTMLOption& rOption = rHTMLOptions[--i];
4030  switch( rOption.GetToken() )
4031  {
4032  case HtmlOptionId::ID:
4033  aId = rOption.GetString();
4034  break;
4035  case HtmlOptionId::ALIGN:
4036  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4037  break;
4038  case HtmlOptionId::STYLE:
4039  aStyle = rOption.GetString();
4040  break;
4041  case HtmlOptionId::CLASS:
4042  aClass = rOption.GetString();
4043  break;
4044  case HtmlOptionId::LANG:
4045  aLang = rOption.GetString();
4046  break;
4047  case HtmlOptionId::DIR:
4048  aDir = rOption.GetString();
4049  break;
4050  default: break;
4051  }
4052  }
4053 
4054  // open a new paragraph
4055  if( m_pPam->GetPoint()->nContent.GetIndex() )
4057  else
4058  AddParSpace();
4059 
4060  // search for the matching style
4061  sal_uInt16 nTextColl;
4062  switch( nToken )
4063  {
4064  case HtmlTokenId::HEAD1_ON: nTextColl = RES_POOLCOLL_HEADLINE1; break;
4065  case HtmlTokenId::HEAD2_ON: nTextColl = RES_POOLCOLL_HEADLINE2; break;
4066  case HtmlTokenId::HEAD3_ON: nTextColl = RES_POOLCOLL_HEADLINE3; break;
4067  case HtmlTokenId::HEAD4_ON: nTextColl = RES_POOLCOLL_HEADLINE4; break;
4068  case HtmlTokenId::HEAD5_ON: nTextColl = RES_POOLCOLL_HEADLINE5; break;
4069  case HtmlTokenId::HEAD6_ON: nTextColl = RES_POOLCOLL_HEADLINE6; break;
4070  default: nTextColl = RES_POOLCOLL_STANDARD; break;
4071  }
4072 
4073  // create the context
4074  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4075 
4076  // parse styles (regarding class see also NewPara)
4077  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4078  {
4079  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4080  SvxCSS1PropertyInfo aPropInfo;
4081 
4082  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4083  {
4084  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4085  "Class is not considered" );
4086  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4087  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4088  }
4089  }
4090 
4091  if( SvxAdjust::End != m_eParaAdjust )
4092  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4093 
4094  // and push on stack
4095  PushContext(xCntxt);
4096 
4097  // set the current style or its attributes
4098  SetTextCollAttrs(m_aContexts.back().get());
4099 
4101 
4102  // progress bar
4103  ShowStatline();
4104 }
4105 
4107 {
4108  // open a new paragraph
4109  if( m_pPam->GetPoint()->nContent.GetIndex() )
4111  else
4112  AddParSpace();
4113 
4114  // search context matching the token and fetch it from stack
4115  std::unique_ptr<HTMLAttrContext> xCntxt;
4116  auto nPos = m_aContexts.size();
4117  while( !xCntxt && nPos>m_nContextStMin )
4118  {
4119  switch( m_aContexts[--nPos]->GetToken() )
4120  {
4121  case HtmlTokenId::HEAD1_ON:
4122  case HtmlTokenId::HEAD2_ON:
4123  case HtmlTokenId::HEAD3_ON:
4124  case HtmlTokenId::HEAD4_ON:
4125  case HtmlTokenId::HEAD5_ON:
4126  case HtmlTokenId::HEAD6_ON:
4127  xCntxt = std::move(m_aContexts[nPos]);
4128  m_aContexts.erase( m_aContexts.begin() + nPos );
4129  break;
4130  default: break;
4131  }
4132  }
4133 
4134  // and now end attributes
4135  if (xCntxt)
4136  {
4137  EndContext(xCntxt.get());
4138  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4139  xCntxt.reset();
4140  }
4141 
4142  // reset existing style
4143  SetTextCollAttrs();
4144 
4146 }
4147 
4148 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4149 {
4150  OUString aId, aStyle, aClass, aLang, aDir;
4151 
4152  const HTMLOptions& rHTMLOptions = GetOptions();
4153  for (size_t i = rHTMLOptions.size(); i; )
4154  {
4155  const HTMLOption& rOption = rHTMLOptions[--i];
4156  switch( rOption.GetToken() )
4157  {
4158  case HtmlOptionId::ID:
4159  aId = rOption.GetString();
4160  break;
4161  case HtmlOptionId::STYLE:
4162  aStyle = rOption.GetString();
4163  break;
4164  case HtmlOptionId::CLASS:
4165  aClass = rOption.GetString();
4166  break;
4167  case HtmlOptionId::LANG:
4168  aLang = rOption.GetString();
4169  break;
4170  case HtmlOptionId::DIR:
4171  aDir = rOption.GetString();
4172  break;
4173  default: break;
4174  }
4175  }
4176 
4177  // open a new paragraph
4179  switch( nToken )
4180  {
4181  case HtmlTokenId::LISTING_ON:
4182  case HtmlTokenId::XMP_ON:
4183  // These both tags will be mapped to the PRE style. For the case that a
4184  // a CLASS exists we will delete it so that we don't get the CLASS of
4185  // the PRE style.
4186  aClass.clear();
4187  [[fallthrough]];
4188  case HtmlTokenId::BLOCKQUOTE_ON:
4189  case HtmlTokenId::BLOCKQUOTE30_ON:
4190  case HtmlTokenId::PREFORMTXT_ON:
4191  eMode = AM_SPACE;
4192  break;
4193  case HtmlTokenId::ADDRESS_ON:
4194  eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4195  break;
4196  case HtmlTokenId::DT_ON:
4197  case HtmlTokenId::DD_ON:
4198  eMode = AM_SOFTNOSPACE;
4199  break;
4200  default:
4201  OSL_ENSURE( false, "unknown style" );
4202  break;
4203  }
4204  if( m_pPam->GetPoint()->nContent.GetIndex() )
4205  AppendTextNode( eMode );
4206  else if( AM_SPACE==eMode )
4207  AddParSpace();
4208 
4209  // ... and save in a context
4210  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4211 
4212  // parse styles (regarding class see also NewPara)
4213  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4214  {
4215  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4216  SvxCSS1PropertyInfo aPropInfo;
4217 
4218  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4219  {
4220  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4221  "Class is not considered" );
4222  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4223  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4224  }
4225  }
4226 
4227  PushContext(xCntxt);
4228 
4229  // set the new style
4230  SetTextCollAttrs(m_aContexts.back().get());
4231 
4232  // update progress bar
4233  ShowStatline();
4234 }
4235 
4237 {
4239  switch( getOnToken(nToken) )
4240  {
4241  case HtmlTokenId::BLOCKQUOTE_ON:
4242  case HtmlTokenId::BLOCKQUOTE30_ON:
4243  case HtmlTokenId::PREFORMTXT_ON:
4244  case HtmlTokenId::LISTING_ON:
4245  case HtmlTokenId::XMP_ON:
4246  eMode = AM_SPACE;
4247  break;
4248  case HtmlTokenId::ADDRESS_ON:
4249  case HtmlTokenId::DT_ON:
4250  case HtmlTokenId::DD_ON:
4251  eMode = AM_SOFTNOSPACE;
4252  break;
4253  default:
4254  OSL_ENSURE( false, "unknown style" );
4255  break;
4256  }
4257  if( m_pPam->GetPoint()->nContent.GetIndex() )
4258  AppendTextNode( eMode );
4259  else if( AM_SPACE==eMode )
4260  AddParSpace();
4261 
4262  // pop current context of stack
4263  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4264 
4265  // and now end attributes
4266  if (xCntxt)
4267  {
4268  EndContext(xCntxt.get());
4269  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4270  xCntxt.reset();
4271  }
4272 
4273  // reset existing style
4274  SetTextCollAttrs();
4275 }
4276 
4278 {
4279  OUString aId, aStyle, aClass, aLang, aDir;
4280 
4281  const HTMLOptions& rHTMLOptions = GetOptions();
4282  for (size_t i = rHTMLOptions.size(); i; )
4283  {
4284  const HTMLOption& rOption = rHTMLOptions[--i];
4285  switch( rOption.GetToken() )
4286  {
4287  case HtmlOptionId::ID:
4288  aId = rOption.GetString();
4289  break;
4290  case HtmlOptionId::STYLE:
4291  aStyle = rOption.GetString();
4292  break;
4293  case HtmlOptionId::CLASS:
4294  aClass = rOption.GetString();
4295  break;
4296  case HtmlOptionId::LANG:
4297  aLang = rOption.GetString();
4298  break;
4299  case HtmlOptionId::DIR:
4300  aDir = rOption.GetString();
4301  break;
4302  default: break;
4303  }
4304  }
4305 
4306  // open a new paragraph
4307  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4308  if( m_pPam->GetPoint()->nContent.GetIndex() )
4309  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4310  else if( bSpace )
4311  AddParSpace();
4312 
4313  // one level more
4314  m_nDefListDeep++;
4315 
4316  bool bInDD = false, bNotInDD = false;
4317  auto nPos = m_aContexts.size();
4318  while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4319  {
4320  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4321  switch( nCntxtToken )
4322  {
4323  case HtmlTokenId::DEFLIST_ON:
4324  case HtmlTokenId::DIRLIST_ON:
4325  case HtmlTokenId::MENULIST_ON:
4326  case HtmlTokenId::ORDERLIST_ON:
4327  case HtmlTokenId::UNORDERLIST_ON:
4328  bNotInDD = true;
4329  break;
4330  case HtmlTokenId::DD_ON:
4331  bInDD = true;
4332  break;
4333  default: break;
4334  }
4335  }
4336 
4337  // ... and save in a context
4338  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4339 
4340  // in it save also the margins
4341  sal_uInt16 nLeft=0, nRight=0;
4342  short nIndent=0;
4343  GetMarginsFromContext( nLeft, nRight, nIndent );
4344 
4345  // The indentation, which already results from a DL, correlates with a DT
4346  // on the current level and this correlates to a DD from the previous level.
4347  // For a level >=2 we must add DD distance.
4348  if( !bInDD && m_nDefListDeep > 1 )
4349  {
4350 
4351  // and the one of the DT-style of the current level
4352  SvxLRSpaceItem rLRSpace =
4353  m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4354  ->GetLRSpace();
4355  nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft());
4356  }
4357 
4358  xCntxt->SetMargins( nLeft, nRight, nIndent );
4359 
4360  // parse styles
4361  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4362  {
4363  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4364  SvxCSS1PropertyInfo aPropInfo;
4365 
4366  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4367  {
4368  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4369  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4370  }
4371  }
4372 
4373  PushContext(xCntxt);
4374 
4375  // set the attributes of the new style
4376  if( m_nDefListDeep > 1 )
4377  SetTextCollAttrs(m_aContexts.back().get());
4378 }
4379 
4381 {
4382  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4383  if( m_pPam->GetPoint()->nContent.GetIndex() )
4384  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4385  else if( bSpace )
4386  AddParSpace();
4387 
4388  // one level less
4389  if( m_nDefListDeep > 0 )
4390  m_nDefListDeep--;
4391 
4392  // pop current context of stack
4393  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4394 
4395  // and now end attributes
4396  if (xCntxt)
4397  {
4398  EndContext(xCntxt.get());
4399  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4400  xCntxt.reset();
4401  }
4402 
4403  // and set style
4404  SetTextCollAttrs();
4405 }
4406 
4408 {
4409  // determine if the DD/DT exist in a DL
4410  bool bInDefList = false, bNotInDefList = false;
4411  auto nPos = m_aContexts.size();
4412  while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4413  {
4414  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4415  switch( nCntxtToken )
4416  {
4417  case HtmlTokenId::DEFLIST_ON:
4418  bInDefList = true;
4419  break;
4420  case HtmlTokenId::DIRLIST_ON:
4421  case HtmlTokenId::MENULIST_ON:
4422  case HtmlTokenId::ORDERLIST_ON:
4423  case HtmlTokenId::UNORDERLIST_ON:
4424  bNotInDefList = true;
4425  break;
4426  default: break;
4427  }
4428  }
4429 
4430  // if not, then implicitly open a new DL
4431  if( !bInDefList )
4432  {
4433  m_nDefListDeep++;
4434  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4435  "Now an open paragraph element will be lost." );
4436  m_nOpenParaToken = nToken;
4437  }
4438 
4439  NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4440  : RES_POOLCOLL_HTML_DT) );
4441 }
4442 
4444 {
4445  // open a new paragraph
4446  if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() )
4448 
4449  // search context matching the token and fetch it from stack
4450  nToken = getOnToken(nToken);
4451  std::unique_ptr<HTMLAttrContext> xCntxt;
4452  auto nPos = m_aContexts.size();
4453  while( !xCntxt && nPos>m_nContextStMin )
4454  {
4455  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4456  switch( nCntxtToken )
4457  {
4458  case HtmlTokenId::DD_ON:
4459  case HtmlTokenId::DT_ON:
4460  if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken )
4461  {
4462  xCntxt = std::move(m_aContexts[nPos]);
4463  m_aContexts.erase( m_aContexts.begin() + nPos );
4464  }
4465  break;
4466  case HtmlTokenId::DEFLIST_ON:
4467  // don't look at DD/DT outside the current DefList
4468  case HtmlTokenId::DIRLIST_ON:
4469  case HtmlTokenId::MENULIST_ON:
4470  case HtmlTokenId::ORDERLIST_ON:
4471  case HtmlTokenId::UNORDERLIST_ON:
4472  // and also not outside another list
4474  break;
4475  default: break;
4476  }
4477  }
4478 
4479  // and now end attributes
4480  if (xCntxt)
4481  {
4482  EndContext(xCntxt.get());
4483  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4484  }
4485 }
4486 
4496 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4497  bool bSurroundOnly ) const
4498 {
4499  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
4500 
4501  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
4502 
4503  bool bFound = false;
4504  for ( size_t i=0; i<rFrameFormatTable.size(); i++ )
4505  {
4506  const SwFrameFormat *const pFormat = rFrameFormatTable[i];
4507  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4508  // A frame was found, when
4509  // - it is paragraph-bound, and
4510  // - is anchored in current paragraph, and
4511  // - every paragraph-bound frame counts, or
4512  // - (only frames without wrapping count and) the frame doesn't have
4513  // a wrapping
4514  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
4515  if (pAPos &&
4516  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4517  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4518  pAPos->nNode == rNodeIdx )
4519  {
4520  if( !(bNoSurroundOnly || bSurroundOnly) )
4521  {
4522  bFound = true;
4523  break;
4524  }
4525  else
4526  {
4527  // When looking for frames with wrapping, also disregard
4528  // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4529  // and you don't want to evade those when positioning.
4530  css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4531  if( bNoSurroundOnly )
4532  {
4533  if( css::text::WrapTextMode_NONE==eSurround )
4534  {
4535  bFound = true;
4536  break;
4537  }
4538  }
4539  if( bSurroundOnly )
4540  {
4541  if( css::text::WrapTextMode_NONE==eSurround )
4542  {
4543  bFound = false;
4544  break;
4545  }
4546  else if( css::text::WrapTextMode_THROUGH!=eSurround )
4547  {
4548  bFound = true;
4549  // Continue searching: It's possible that some without
4550  // wrapping will follow...
4551  }
4552  }
4553  }
4554  }
4555  }
4556 
4557  return bFound;
4558 }
4559 
4560 // the special methods for inserting of objects
4561 
4563 {
4564  const SwContentNode* pCNd = m_pPam->GetContentNode();
4565  return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4566 }
4567 
4569 {
4570  SwTextFormatColl *pCollToSet = nullptr; // the style to set
4571  SfxItemSet *pItemSet = nullptr; // set of hard attributes
4572  sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4573  const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4574  sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4575 
4576  bool bInPRE=false; // some context info
4577 
4578  sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and
4579  short nFirstLineIndent = 0; // indentations
4580 
4581  for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i )
4582  {
4583  const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4584 
4585  sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4586  if( nColl )
4587  {
4588  // There is a style to set. Then at first we must decide,
4589  // if the style can be set.
4590  bool bSetThis = true;
4591  switch( nColl )
4592  {
4593  case RES_POOLCOLL_HTML_PRE:
4594  bInPRE = true;
4595  break;
4596  case RES_POOLCOLL_TEXT:
4597  // <TD><P CLASS=xxx> must become TD.xxx
4598  if( nDfltColl==RES_POOLCOLL_TABLE ||
4599  nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4600  nColl = nDfltColl;
4601  break;
4602  case RES_POOLCOLL_HTML_HR:
4603  // also <HR> in <PRE> set as style, otherwise it can't
4604  // be exported anymore
4605  break;
4606  default:
4607  if( bInPRE )
4608  bSetThis = false;
4609  break;
4610  }
4611 
4612  SwTextFormatColl *pNewColl =
4613  m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4614 
4615  if( bSetThis )
4616  {
4617  // If now a different style should be set as previously, the
4618  // previous style must be replaced by hard attribution.
4619 
4620  if( pCollToSet )
4621  {
4622  // insert the attributes hard, which previous style sets
4623  if( !pItemSet )
4624  pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4625  else
4626  {
4627  const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4628  SfxItemSet aItemSet( *rCollSet.GetPool(),
4629  rCollSet.GetRanges() );
4630  aItemSet.Set( rCollSet );
4631  pItemSet->Put( aItemSet );
4632  }
4633  // but remove the attributes, which the current style sets,
4634  // because otherwise they will be overwritten later
4635  pItemSet->Differentiate( pNewColl->GetAttrSet() );
4636  }
4637 
4638  pCollToSet = pNewColl;
4639  }
4640  else
4641  {
4642  // hard attribution
4643  if( !pItemSet )
4644  pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4645  else
4646  {
4647  const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4648  SfxItemSet aItemSet( *rCollSet.GetPool(),
4649  rCollSet.GetRanges() );
4650  aItemSet.Set( rCollSet );
4651  pItemSet->Put( aItemSet );
4652  }
4653  }
4654  }
4655  else
4656  {
4657  // Maybe a default style exists?
4658  nColl = pCntxt->GetDfltTextFormatColl();
4659  if( nColl )
4660  nDfltColl = nColl;
4661  }
4662 
4663  // if applicable fetch new paragraph indents
4664  if( pCntxt->IsLRSpaceChanged() )
4665  {
4666  sal_uInt16 nLeft=0, nRight=0;
4667 
4668  pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4669  nLeftMargin = nLeft;
4670  nRightMargin = nRight;
4671  }
4672  }
4673 
4674  // If in current context a new style should be set,
4675  // its paragraph margins must be inserted in the context.
4676  if( pContext && nTopColl )
4677  {
4678  // <TD><P CLASS=xxx> must become TD.xxx
4679  if( nTopColl==RES_POOLCOLL_TEXT &&
4680  (nDfltColl==RES_POOLCOLL_TABLE ||
4681  nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4682  nTopColl = nDfltColl;
4683 
4684  const SwTextFormatColl *pTopColl =
4685  m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4686  const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4687  const SfxPoolItem *pItem;
4688  if( SfxItemState::SET == rItemSet.GetItemState(RES_LR_SPACE,true, &pItem) )
4689  {
4690  const SvxLRSpaceItem *pLRItem =
4691  static_cast<const SvxLRSpaceItem *>(pItem);
4692 
4693  sal_Int32 nLeft = pLRItem->GetTextLeft();
4694  sal_Int32 nRight = pLRItem->GetRight();
4695  nFirstLineIndent = pLRItem->GetTextFirstLineOffset();
4696 
4697  // In Definition lists the margins also contain the margins from the previous levels
4698  if( RES_POOLCOLL_HTML_DD == nTopColl )
4699  {
4700  const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser
4701  ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())
4702  ->GetLRSpace();
4703  nLeft -= rDTLRSpace.GetTextLeft();
4704  nRight -= rDTLRSpace.GetRight();
4705  }
4706  else if( RES_POOLCOLL_HTML_DT == nTopColl )
4707  {
4708  nLeft = 0;
4709  nRight = 0;
4710  }
4711 
4712  // the paragraph margins add up
4713  nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4714  nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4715 
4716  pContext->SetMargins( nLeftMargin, nRightMargin,
4717  nFirstLineIndent );
4718  }
4719  if( SfxItemState::SET == rItemSet.GetItemState(RES_UL_SPACE,true, &pItem) )
4720  {
4721  const SvxULSpaceItem *pULItem =
4722  static_cast<const SvxULSpaceItem *>(pItem);
4723  pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4724  }
4725  }
4726 
4727  // If no style is set in the context use the text body.
4728  if( !pCollToSet )
4729  {
4730  pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4731  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4732  if( !nLeftMargin )
4733  nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft());
4734  if( !nRightMargin )
4735  nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight());
4736  if( !nFirstLineIndent )
4737  nFirstLineIndent = rLRItem.GetTextFirstLineOffset();
4738  }
4739 
4740  // remove previous hard attribution of paragraph
4741  for( auto pParaAttr : m_aParaAttrs )
4742  pParaAttr->Invalidate();
4743  m_aParaAttrs.clear();
4744 
4745  // set the style
4746  m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4747 
4748  // if applicable correct the paragraph indent
4749  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4750  bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() ||
4751  nFirstLineIndent != rLRItem.GetTextFirstLineOffset() ||
4752  nRightMargin != rLRItem.GetRight();
4753 
4754  if( bSetLRSpace )
4755  {
4756  SvxLRSpaceItem aLRItem( rLRItem );
4757  aLRItem.SetTextLeft( nLeftMargin );
4758  aLRItem.SetRight( nRightMargin );
4759  aLRItem.SetTextFirstLineOffset( nFirstLineIndent );
4760  if( pItemSet )
4761  pItemSet->Put( aLRItem );
4762  else
4763  {
4764  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4765  m_xAttrTab->pLRSpace->SetLikePara();
4766  m_aParaAttrs.push_back( m_xAttrTab->pLRSpace );
4767  EndAttr( m_xAttrTab->pLRSpace, false );
4768  }
4769  }
4770 
4771  // and now set the attributes
4772  if( pItemSet )
4773  {
4774  InsertParaAttrs( *pItemSet );
4775  delete pItemSet;
4776  }
4777 }
4778 
4780 {
4781  OUString aId, aStyle, aLang, aDir;
4782  OUString aClass;
4783 
4784  const HTMLOptions& rHTMLOptions = GetOptions();
4785  for (size_t i = rHTMLOptions.size(); i; )
4786  {
4787  const HTMLOption& rOption = rHTMLOptions[--i];
4788  switch( rOption.GetToken() )
4789  {
4790  case HtmlOptionId::ID:
4791  aId = rOption.GetString();
4792  break;
4793  case HtmlOptionId::STYLE:
4794  aStyle = rOption.GetString();
4795  break;
4796  case HtmlOptionId::CLASS:
4797  aClass = rOption.GetString();
4798  break;
4799  case HtmlOptionId::LANG:
4800  aLang = rOption.GetString();
4801  break;
4802  case HtmlOptionId::DIR:
4803  aDir = rOption.GetString();
4804  break;
4805  default: break;
4806  }
4807  }
4808 
4809  // create a new context
4810  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4811 
4812  // set the style and save it in the context
4813  SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4814  OSL_ENSURE( pCFormat, "No character format found for token" );
4815 
4816  // parse styles (regarding class see also NewPara)
4817  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4818  {
4819  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4820  SvxCSS1PropertyInfo aPropInfo;
4821 
4822  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4823  {
4824  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4825  "Class is not considered" );
4826  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4827  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4828  }
4829  }
4830 
4831  // Character formats are stored in their own stack and can never be inserted
4832  // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4833  if( pCFormat )
4834  InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4835 
4836  // save the context
4837  PushContext(xCntxt);
4838 }
4839 
4841 {
4842  // and if applicable change it via the options
4843  sal_Int16 eVertOri = text::VertOrientation::TOP;
4844  sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4845  Size aSize( 0, 0);
4846  long nSize = 0;
4847  bool bPercentWidth = false;
4848  bool bPercentHeight = false;
4849  sal_uInt16 nType = HTML_SPTYPE_HORI;
4850 
4851  const HTMLOptions& rHTMLOptions = GetOptions();
4852  for (size_t i = rHTMLOptions.size(); i; )
4853  {
4854  const HTMLOption& rOption = rHTMLOptions[--i];
4855  switch( rOption.GetToken() )
4856  {
4857  case HtmlOptionId::TYPE:
4858  rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4859  break;
4860  case HtmlOptionId::ALIGN:
4861  eVertOri =
4862  rOption.GetEnum( aHTMLImgVAlignTable,
4863  eVertOri );
4864  eHoriOri =
4865  rOption.GetEnum( aHTMLImgHAlignTable,
4866  eHoriOri );
4867  break;
4868  case HtmlOptionId::WIDTH:
4869  // First only save as pixel value!
4870  bPercentWidth = (rOption.GetString().indexOf('%') != -1);
4871  aSize.setWidth( static_cast<long>(rOption.GetNumber()) );
4872  break;
4873  case HtmlOptionId::HEIGHT:
4874  // First only save as pixel value!
4875  bPercentHeight = (rOption.GetString().indexOf('%') != -1);
4876  aSize.setHeight( static_cast<long>(rOption.GetNumber()) );
4877  break;
4878  case HtmlOptionId::SIZE:
4879  // First only save as pixel value!
4880  nSize = rOption.GetNumber();
4881  break;
4882  default: break;
4883  }
4884  }
4885 
4886  switch( nType )
4887  {
4888  case HTML_SPTYPE_BLOCK:
4889  {
4890  // create an empty text frame
4891 
4892  // fetch the ItemSet
4893  SfxItemSet aFrameSet( m_xDoc->GetAttrPool(),
4895  if( !IsNewDoc() )
4896  Reader::ResetFrameFormatAttrs( aFrameSet );
4897 
4898  // set the anchor and the adjustment
4899  SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4900 
4901  // and the size of the frame
4902  Size aDfltSz( MINFLY, MINFLY );
4903  Size aSpace( 0, 0 );
4904  SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4905  m_pCSS1Parser->GetWhichMap() );
4906  SvxCSS1PropertyInfo aDummyPropInfo;
4907 
4908  SetFixSize( aSize, aDfltSz, bPercentWidth, bPercentHeight,
4909  aDummyPropInfo, aFrameSet );
4910  SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4911 
4912  // protect the content
4913  SvxProtectItem aProtectItem( RES_PROTECT) ;
4914  aProtectItem.SetContentProtect( true );
4915  aFrameSet.Put( aProtectItem );
4916 
4917  // create the frame
4918  RndStdIds eAnchorId =
4919  aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4920  SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4921  m_pPam->GetPoint(), &aFrameSet );
4922  // Possibly create frames and register auto-bound frames.
4923  RegisterFlyFrame( pFlyFormat );
4924  }
4925  break;
4926  case HTML_SPTYPE_VERT:
4927  if( nSize > 0 )
4928  {
4930  {
4932  ->PixelToLogic( Size(0,nSize),
4933  MapMode(MapUnit::MapTwip) ).Height();
4934  }
4935 
4936  // set a paragraph margin
4937  SwTextNode *pTextNode = nullptr;
4938  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4939  {
4940  // if possible change the bottom paragraph margin
4941  // of previous node
4942 
4943  SetAttr(); // set still open paragraph attributes
4944 
4945  pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1]
4946  ->GetTextNode();
4947 
4948  // If the previous paragraph isn't a text node, then now an
4949  // empty paragraph is created, which already generates a single
4950  // line of spacing.
4951  if( !pTextNode )
4952  nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
4953  }
4954 
4955  if( pTextNode )
4956  {
4957  SvxULSpaceItem aULSpace( static_cast<const SvxULSpaceItem&>(pTextNode
4959  aULSpace.SetLower( aULSpace.GetLower() + static_cast<sal_uInt16>(nSize) );
4960  pTextNode->SetAttr( aULSpace );
4961  }
4962  else
4963  {
4964  NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, static_cast<sal_uInt16>(nSize), RES_UL_SPACE));
4965  EndAttr( m_xAttrTab->pULSpace, false );
4966 
4967  AppendTextNode(); // Don't change spacing!
4968  }
4969  }
4970  break;
4971  case HTML_SPTYPE_HORI:
4972  if( nSize > 0 )
4973  {
4974  // If the paragraph is still empty, set first line
4975  // indentation, otherwise apply letter spacing over a space.
4976 
4978  {
4980  ->PixelToLogic( Size(nSize,0),
4981  MapMode(MapUnit::MapTwip) ).Width();
4982  }
4983 
4984  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4985  {
4986  sal_uInt16 nLeft=0, nRight=0;
4987  short nIndent = 0;
4988 
4989  GetMarginsFromContextWithNumberBullet( nLeft, nRight, nIndent );
4990  nIndent = nIndent + static_cast<short>(nSize);
4991 
4992  SvxLRSpaceItem aLRItem( RES_LR_SPACE );
4993  aLRItem.SetTextLeft( nLeft );
4994  aLRItem.SetRight( nRight );
4995  aLRItem.SetTextFirstLineOffset( nIndent );
4996 
4997  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4998  EndAttr( m_xAttrTab->pLRSpace, false );
4999  }
5000  else
5001  {
5002  NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
5003  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, " " );
5004  EndAttr( m_xAttrTab->pKerning );
5005  }
5006  }
5007  }
5008 }
5009 
5010 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
5011 {
5012  if( nPixel && Application::GetDefaultDevice() )
5013  {
5015  Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width();
5016  return static_cast<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16)));
5017  }
5018  else
5019  return nPixel;
5020 }
5021 
5023 {
5025  if( nWidth )
5026  return nWidth;
5027 
5028  if( !m_aHTMLPageSize.Width() )
5029  {
5030  const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5031 
5032  const SwFormatFrameSize& rSz = rPgFormat.GetFrameSize();
5033  const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5034  const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5035  const SwFormatCol& rCol = rPgFormat.GetCol();
5036 
5037  m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5038  m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5039 
5040  if( 1 < rCol.GetNumCols() )
5042  }
5043 
5044  return m_aHTMLPageSize.Width();
5045 }
5046 
5048 {
5049  OUString aId;
5050  const HTMLOptions& rHTMLOptions = GetOptions();
5051  for (size_t i = rHTMLOptions.size(); i; )
5052  {
5053  const HTMLOption& rOption = rHTMLOptions[--i];
5054  if( HtmlOptionId::ID==rOption.GetToken() )
5055  {
5056  aId = rOption.GetString();
5057  break;
5058  }
5059  }
5060 
5061  if( !aId.isEmpty() )
5062  InsertBookmark( aId );
5063 }
5064 
5066 {
5067  // <BR CLEAR=xxx> is handled as:
5068  // 1.) Only regard the paragraph-bound frames anchored in current paragraph.
5069  // 2.) For left-justified aligned frames, CLEAR=LEFT or ALL, and for right-
5070  // justified aligned frames, CLEAR=RIGHT or ALL, the wrap-through is
5071  // changed as following:
5072  // 3.) If the paragraph contains no text, then the frames don't get a wrapping
5073  // 4.) otherwise a left aligned frame gets a right "only anchor" wrapping
5074  // and a right aligned frame gets a left "only anchor" wrapping.
5075  // 5.) if in a non-empty paragraph the wrapping of a frame is changed,
5076  // then a new paragraph is opened
5077  // 6.) If no wrappings of frames are changed, a hard line break is inserted.
5078 
5079  OUString aId, aStyle, aClass; // the id of bookmark
5080  bool bClearLeft = false, bClearRight = false;
5081  bool bCleared = false; // Was a CLEAR executed?
5082 
5083  // then we fetch the options
5084  const HTMLOptions& rHTMLOptions = GetOptions();
5085  for (size_t i = rHTMLOptions.size(); i; )
5086  {
5087  const HTMLOption& rOption = rHTMLOptions[--i];
5088  switch( rOption.GetToken() )
5089  {
5090  case HtmlOptionId::CLEAR:
5091  {
5092  const OUString &rClear = rOption.GetString();
5093  if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5094  {
5095  bClearLeft = true;
5096  bClearRight = true;
5097  }
5098  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5099  bClearLeft = true;
5100  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5101  bClearRight = true;
5102  }
5103  break;
5104  case HtmlOptionId::ID:
5105  aId = rOption.GetString();
5106  break;
5107  case HtmlOptionId::STYLE:
5108  aStyle = rOption.GetString();
5109  break;
5110  case HtmlOptionId::CLASS:
5111  aClass = rOption.GetString();
5112  break;
5113  default: break;
5114  }
5115  }
5116 
5117  // CLEAR is only supported for the current paragraph
5118  if( bClearLeft || bClearRight )
5119  {
5120  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
5121  SwTextNode* pTextNd = rNodeIdx.GetNode().GetTextNode();
5122  if( pTextNd )
5123  {
5124  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
5125 
5126  for( size_t i=0; i<rFrameFormatTable.size(); i++ )
5127  {
5128  SwFrameFormat *const pFormat = rFrameFormatTable[i];
5129  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
5130  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
5131  if (pAPos &&
5132  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
5133  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
5134  pAPos->nNode == rNodeIdx &&
5135  pFormat->GetSurround().GetSurround() != css::text::WrapTextMode_NONE )
5136  {
5137  sal_Int16 eHori = RES_DRAWFRMFMT == pFormat->Which()
5138  ? text::HoriOrientation::LEFT
5139  : pFormat->GetHoriOrient().GetHoriOrient();
5140 
5141  css::text::WrapTextMode eSurround = css::text::WrapTextMode_PARALLEL;
5142  if( m_pPam->GetPoint()->nContent.GetIndex() )
5143  {
5144  if( bClearLeft && text::HoriOrientation::LEFT==eHori )
5145  eSurround = css::text::WrapTextMode_RIGHT;
5146  else if( bClearRight && text::HoriOrientation::RIGHT==eHori )
5147  eSurround = css::text::WrapTextMode_LEFT;
5148  }
5149  else if( (bClearLeft && text::HoriOrientation::LEFT==eHori) ||
5150  (bClearRight && text::HoriOrientation::RIGHT==eHori) )
5151  {
5152  eSurround = css::text::WrapTextMode_NONE;
5153  }
5154 
5155  if( css::text::WrapTextMode_PARALLEL != eSurround )
5156  {
5157  SwFormatSurround aSurround( eSurround );
5158  if( css::text::WrapTextMode_NONE != eSurround )
5159  aSurround.SetAnchorOnly( true );
5160  pFormat->SetFormatAttr( aSurround );
5161  bCleared = true;
5162  }
5163  }
5164  }
5165  }
5166  }
5167 
5168  // parse styles
5169  std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5170  bool bBreakItem = false;
5171  if( HasStyleOptions( aStyle, aId, aClass ) )
5172  {
5173  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5174  SvxCSS1PropertyInfo aPropInfo;
5175 
5176  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5177  {
5178  if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5179  {
5180  aBreakItem.reset(aItemSet.Get(RES_BREAK).Clone());
5181  bBreakItem = true;
5182  }
5183  if( !aPropInfo.m_aId.isEmpty() )
5184  InsertBookmark( aPropInfo.m_aId );
5185  }
5186  }
5187 
5188  if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5189  {
5190  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5191  EndAttr( m_xAttrTab->pBreak, false );
5192  }
5193 
5194  if( !bCleared && !bBreakItem )
5195  {
5196  // If no CLEAR could or should be executed, a line break will be inserted
5197  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, "\x0A" );
5198  }
5199  else if( m_pPam->GetPoint()->nContent.GetIndex() )
5200  {
5201  // If a CLEAR is executed in a non-empty paragraph, then after it
5202  // a new paragraph has to be opened.
5203  // MIB 21.02.97: Here actually we should change the bottom paragraph
5204  // margin to zero. This will fail for something like this <BR ..><P>
5205  // (>Netscape). That's why we don't do it.
5207  }
5208  if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5209  {
5210  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5211  EndAttr( m_xAttrTab->pBreak, false );
5212  }
5213 }
5214 
5216 {
5217  sal_uInt16 nSize = 0;
5218  sal_uInt16 nWidth = 0;
5219 
5220  SvxAdjust eAdjust = SvxAdjust::End;
5221 
5222  bool bPercentWidth = false;
5223  bool bNoShade = false;
5224  bool bColor = false;
5225 
5226  Color aColor;
5227  OUString aId;
5228 
5229  // let's fetch the options
5230  const HTMLOptions& rHTMLOptions = GetOptions();
5231  for (size_t i = rHTMLOptions.size(); i; )
5232  {
5233  const HTMLOption& rOption = rHTMLOptions[--i];
5234  switch( rOption.GetToken() )
5235  {
5236  case HtmlOptionId::ID:
5237  aId = rOption.GetString();
5238  break;
5239  case HtmlOptionId::SIZE:
5240  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
5241  break;
5242  case HtmlOptionId::WIDTH:
5243  bPercentWidth = (rOption.GetString().indexOf('%') != -1);
5244  nWidth = static_cast<sal_uInt16>(rOption.GetNumber());
5245  if( bPercentWidth && nWidth>=100 )
5246  {
5247  // the default case are 100% lines (no attributes necessary)
5248  nWidth = 0;
5249  bPercentWidth = false;
5250  }
5251  break;
5252  case HtmlOptionId::ALIGN:
5253  eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5254  break;
5255  case HtmlOptionId::NOSHADE:
5256  bNoShade = true;
5257  break;
5258  case HtmlOptionId::COLOR:
5259  rOption.GetColor( aColor );
5260  bColor = true;
5261  break;
5262  default: break;
5263  }
5264  }
5265 
5266  if( m_pPam->GetPoint()->nContent.GetIndex() )
5268  if( m_nOpenParaToken != HtmlTokenId::NONE )
5269  EndPara();
5270  AppendTextNode();
5272 
5273  // ...and save in a context
5274  std::unique_ptr<HTMLAttrContext> xCntxt(
5275  new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5276 
5277  PushContext(xCntxt);
5278 
5279  // set the new style
5280  SetTextCollAttrs(m_aContexts.back().get());
5281 
5282  // the hard attributes of the current paragraph will never become invalid
5283  m_aParaAttrs.clear();
5284 
5285  if( nSize>0 || bColor || bNoShade )
5286  {
5287  // set line colour and/or width
5288  if( !bColor )
5289  aColor = COL_GRAY;
5290 
5291  SvxBorderLine aBorderLine( &aColor );
5292  if( nSize )
5293  {
5294  long nPWidth = 0;
5295  long nPHeight = static_cast<long>(nSize);
5296  SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5297  if ( !bNoShade )
5298  {
5299  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5300  }
5301  aBorderLine.SetWidth( nPHeight );
5302  }
5303  else if( bNoShade )
5304  {
5305  aBorderLine.SetWidth( DEF_LINE_WIDTH_2 );
5306  }
5307  else
5308  {
5309  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5310  aBorderLine.SetWidth( DEF_LINE_WIDTH_0 );
5311  }
5312 
5313  SvxBoxItem aBoxItem(RES_BOX);
5314  aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5315  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5316  m_aSetAttrTab.push_back( pTmp );
5317  }
5318  if( nWidth )
5319  {
5320  // If we aren't in a table, then the width value will be "faked" with
5321  // paragraph indents. That makes little sense in a table. In order to
5322  // avoid that the line is considered during the width calculation, it
5323  // still gets an appropriate LRSpace-Item.
5324  if (!m_xTable)
5325  {
5326  // fake length and alignment of line above paragraph indents
5327  long nBrowseWidth = GetCurrentBrowseWidth();
5328  nWidth = bPercentWidth ? static_cast<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5329  : ToTwips( static_cast<sal_uInt16>(nBrowseWidth) );
5330  if( nWidth < MINLAY )
5331  nWidth = MINLAY;
5332 
5333  const SwFormatColl *pColl = (static_cast<long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5334  if (pColl)
5335  {
5336  SvxLRSpaceItem aLRItem( pColl->GetLRSpace() );
5337  long nDist = nBrowseWidth - nWidth;
5338 
5339  switch( eAdjust )
5340  {
5341  case SvxAdjust::Right:
5342  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5343  break;
5344  case SvxAdjust::Left:
5345  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5346  break;
5347  case SvxAdjust::Center:
5348  default:
5349  nDist /= 2;
5350  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5351  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5352  break;
5353  }
5354 
5355  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aLRItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5356  m_aSetAttrTab.push_back( pTmp );
5357  }
5358  }
5359  }
5360 
5361  // it's not possible to insert bookmarks in links
5362  if( !aId.isEmpty() )
5363  InsertBookmark( aId );
5364 
5365  // pop current context of stack
5366  std::unique_ptr<HTMLAttrContext> xPoppedContext(PopContext(HtmlTokenId::HORZRULE));
5367  xPoppedContext.reset();
5368 
5370 
5371  // and set the current style in the next paragraph
5372  SetTextCollAttrs();
5373 }
5374 
5376 {
5377  OUString aName, aContent;
5378  bool bHTTPEquiv = false;
5379 
5380  const HTMLOptions& rHTMLOptions = GetOptions();
5381  for (size_t i = rHTMLOptions.size(); i; )
5382  {
5383  const HTMLOption& rOption = rHTMLOptions[--i];
5384  switch( rOption.GetToken() )
5385  {
5386  case HtmlOptionId::NAME:
5387  aName = rOption.GetString();
5388  bHTTPEquiv = false;