LibreOffice Module sw (master)  1
swhtml.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
25 
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
38 
39 #include <vcl/errinf.hxx>
40 #include <svl/stritem.hxx>
41 #include <vcl/imap.hxx>
42 #include <svtools/htmltokn.h>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/ctrltool.hxx>
45 #include <unotools/configmgr.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <vcl/wrkwin.hxx>
49 #include <sfx2/event.hxx>
50 #include <sfx2/fcontnr.hxx>
51 #include <sfx2/docfile.hxx>
52 
53 #include <svtools/htmlcfg.hxx>
54 #include <sfx2/linkmgr.hxx>
55 #include <editeng/kernitem.hxx>
56 #include <editeng/boxitem.hxx>
57 #include <editeng/fhgtitem.hxx>
59 #include <editeng/postitem.hxx>
60 #include <editeng/wghtitem.hxx>
62 #include <editeng/udlnitem.hxx>
64 #include <editeng/blinkitem.hxx>
65 #include <editeng/ulspitem.hxx>
66 #include <editeng/colritem.hxx>
67 #include <editeng/fontitem.hxx>
68 #include <editeng/adjustitem.hxx>
69 #include <editeng/lrspitem.hxx>
70 #include <editeng/protitem.hxx>
71 #include <editeng/flstitem.hxx>
73 
74 #include <frmatr.hxx>
75 #include <charatr.hxx>
76 #include <fmtfld.hxx>
77 #include <fmtpdsc.hxx>
78 #include <txtfld.hxx>
79 #include <fmtanchr.hxx>
80 #include <fmtsrnd.hxx>
81 #include <fmtfsize.hxx>
82 #include <fmtclds.hxx>
83 #include <fchrfmt.hxx>
84 #include <fmtinfmt.hxx>
85 #include <fmtfollowtextflow.hxx>
86 #include <fmtornt.hxx>
87 #include <docary.hxx>
88 #include <docstat.hxx>
89 #include <doc.hxx>
90 #include <IDocumentUndoRedo.hxx>
97 #include <IDocumentStatistics.hxx>
98 #include <IDocumentState.hxx>
99 #include <pam.hxx>
100 #include <ndtxt.hxx>
101 #include <mdiexp.hxx>
102 #include <expfld.hxx>
103 #include <poolfmt.hxx>
104 #include <pagedesc.hxx>
105 #include <IMark.hxx>
106 #include <docsh.hxx>
107 #include <editsh.hxx>
108 #include <docufld.hxx>
109 #include "swcss1.hxx"
110 #include <fltini.hxx>
111 #include <htmltbl.hxx>
112 #include "htmlnum.hxx"
113 #include "swhtml.hxx"
114 #include <linkenum.hxx>
115 #include <breakit.hxx>
116 #include <SwAppletImpl.hxx>
117 #include <swdll.hxx>
118 
119 #include <sfx2/viewfrm.hxx>
120 #include <svx/svdobj.hxx>
121 #include <officecfg/Office/Writer.hxx>
122 
123 #include <swerror.h>
124 #include <hints.hxx>
125 #include <ndole.hxx>
126 #include <unoframe.hxx>
127 #include "css1atr.hxx"
128 
129 #define FONTSIZE_MASK 7
130 
131 #define HTML_ESC_PROP 80
132 #define HTML_ESC_SUPER DFLT_ESC_SUPER
133 #define HTML_ESC_SUB DFLT_ESC_SUB
134 
135 #define HTML_SPTYPE_BLOCK 1
136 #define HTML_SPTYPE_HORI 2
137 #define HTML_SPTYPE_VERT 3
138 
140 using namespace ::com::sun::star;
141 
142 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
144 {
145  { OOO_STRING_SVTOOLS_HTML_AL_left, SvxAdjust::Left },
146  { OOO_STRING_SVTOOLS_HTML_AL_center, SvxAdjust::Center },
147  { OOO_STRING_SVTOOLS_HTML_AL_middle, SvxAdjust::Center }, // Netscape
148  { OOO_STRING_SVTOOLS_HTML_AL_right, SvxAdjust::Right },
149  { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block },
150  { OOO_STRING_SVTOOLS_HTML_AL_char, SvxAdjust::Left },
151  { nullptr, SvxAdjust(0) }
152 };
153 
154 // <SPACER TYPE=...>
156 {
160  { nullptr, 0 }
161 };
162 
164 {
165  m_bTemplateBrowseMode = true;
166 }
167 
168 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
169 {
171  // HTML import into Writer, avoid loading the Writer/Web template.
172  return OUString();
173 
174  const OUString sTemplateWithoutExt("internal/html");
175  SvtPathOptions aPathOpt;
176 
177  // first search for OpenDocument Writer/Web template
178  // OpenDocument Writer/Web template (extension .oth)
179  OUString sTemplate( sTemplateWithoutExt + ".oth" );
180  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
181  return sTemplate;
182 
183  // no OpenDocument Writer/Web template found.
184  // search for OpenOffice.org Writer/Web template
185  sTemplate = sTemplateWithoutExt + ".stw";
186  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
187  return sTemplate;
188 
189  OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
190 
191  return OUString();
192 }
193 
195 {
196  OSL_ENSURE( m_pMedium, "Where is the medium??" );
197 
198  if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
199  {
201  return true;
202  }
203  return false;
204 
205 }
206 
207 // Call for the general Reader-Interface
208 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
209 {
211 
212  if( !m_pStream )
213  {
214  OSL_ENSURE( m_pStream, "HTML-Read without stream" );
215  return ERR_SWG_READ_ERROR;
216  }
217 
218  if( !m_bInsertMode )
219  {
221 
222  // Set the HTML page style, when it isn't a HTML document,
223  // otherwise it's already set.
225  {
228  }
229  }
230 
231  // so nobody steals the document!
232  rtl::Reference<SwDoc> aHoldRef(&rDoc);
233  ErrCode nRet = ERRCODE_NONE;
234  tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
235  rName, rBaseURL, !m_bInsertMode, m_pMedium,
236  IsReadUTF8(),
238 
239  SvParserState eState = xParser->CallParser();
240 
241  if( SvParserState::Pending == eState )
243  else if( SvParserState::Accepted != eState )
244  {
245  const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
246  + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
247 
248  // use the stream as transport for error number
249  nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
250  DialogMask::ButtonsOk | DialogMask::MessageError );
251  }
252 
253  return nRet;
254 }
255 
257  const OUString& rPath,
258  const OUString& rBaseURL,
259  bool bReadNewDoc,
260  SfxMedium* pMed, bool bReadUTF8,
261  bool bNoHTMLComments,
262  const OUString& rNamespace )
263  : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
264  SwClient( nullptr ),
265  m_aPathToFile( rPath ),
266  m_sBaseURL( rBaseURL ),
267  m_xAttrTab(new HTMLAttrTable),
268  m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
269  m_xDoc( pD ),
270  m_pActionViewShell( nullptr ),
271  m_pSttNdIdx( nullptr ),
272  m_pFormImpl( nullptr ),
273  m_pMarquee( nullptr ),
274  m_pImageMap( nullptr ),
275  m_nBaseFontStMin( 0 ),
276  m_nFontStMin( 0 ),
277  m_nDefListDeep( 0 ),
278  m_nFontStHeadStart( 0 ),
279  m_nSBModuleCnt( 0 ),
280  m_nMissingImgMaps( 0 ),
281  m_nParaCnt( 5 ),
282  // #i83625#
283  m_nContextStMin( 0 ),
284  m_nContextStAttrMin( 0 ),
285  m_nSelectEntryCnt( 0 ),
286  m_nOpenParaToken( HtmlTokenId::NONE ),
287  m_eJumpTo( JumpToMarks::NONE ),
288 #ifdef DBG_UTIL
289  m_nContinue( 0 ),
290 #endif
291  m_eParaAdjust( SvxAdjust::End ),
292  m_bDocInitalized( false ),
293  m_bSetModEnabled( false ),
294  m_bInFloatingFrame( false ),
295  m_bInField( false ),
296  m_bCallNextToken( false ),
297  m_bIgnoreRawData( false ),
298  m_bLBEntrySelected ( false ),
299  m_bTAIgnoreNewPara ( false ),
300  m_bFixMarqueeWidth ( false ),
301  m_bNoParSpace( false ),
302  m_bInNoEmbed( false ),
303  m_bInTitle( false ),
304  m_bUpdateDocStat( false ),
305  m_bFixSelectWidth( false ),
306  m_bTextArea( false ),
307  m_bSelect( false ),
308  m_bInFootEndNoteAnchor( false ),
309  m_bInFootEndNoteSymbol( false ),
310  m_bIgnoreHTMLComments( bNoHTMLComments ),
311  m_bRemoveHidden( false ),
312  m_bBodySeen( false ),
313  m_bReadingHeaderOrFooter( false ),
314  m_bNotifyMacroEventRead( false ),
315  m_isInTableStructure(false),
316  m_nTableDepth( 0 ),
317  m_pTempViewFrame(nullptr)
318 {
319  // If requested explicitly, then force ignoring of comments (don't create postits for them).
321  m_bIgnoreHTMLComments = true;
322 
323  m_nEventId = nullptr;
325 
326  m_eScriptLang = HTMLScriptLanguage::Unknown;
327 
328  rCursor.DeleteMark();
329  m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert
330  memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
331 
332  // Read the font sizes 1-7 from the INI file
333  SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get();
334  m_aFontHeights[0] = rHtmlOptions.GetFontSize( 0 ) * 20;
335  m_aFontHeights[1] = rHtmlOptions.GetFontSize( 1 ) * 20;
336  m_aFontHeights[2] = rHtmlOptions.GetFontSize( 2 ) * 20;
337  m_aFontHeights[3] = rHtmlOptions.GetFontSize( 3 ) * 20;
338  m_aFontHeights[4] = rHtmlOptions.GetFontSize( 4 ) * 20;
339  m_aFontHeights[5] = rHtmlOptions.GetFontSize( 5 ) * 20;
340  m_aFontHeights[6] = rHtmlOptions.GetFontSize( 6 ) * 20;
341 
342  m_bKeepUnknown = rHtmlOptions.IsImportUnknown();
343 
344  if(bReadNewDoc)
345  {
346  //CJK has different defaults, so a different object should be used for this
347  //RES_CHARTR_CJK_FONTSIZE is a valid value
349  m_xDoc->SetDefault( aFontHeight );
351  m_xDoc->SetDefault( aFontHeightCJK );
353  m_xDoc->SetDefault( aFontHeightCTL );
354 
355  // #i18732# - adjust default of option 'FollowTextFlow'
356  // TODO: not sure what the appropriate default for HTML should be?
357  m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
358  }
359 
360  // Change to HTML mode during the import, so that the right styles are created
361  m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
362  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
363 
364  m_pCSS1Parser.reset( new SwCSS1Parser( m_xDoc.get(), m_aFontHeights, m_sBaseURL, IsNewDoc() ) );
365  m_pCSS1Parser->SetIgnoreFontFamily( rHtmlOptions.IsIgnoreFontFamily() );
366 
367  if( bReadUTF8 )
368  {
369  SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
370  }
371  else
372  {
373  SwDocShell *pDocSh = m_xDoc->GetDocShell();
374  SvKeyValueIterator *pHeaderAttrs =
375  pDocSh->GetHeaderAttributes();
376  if( pHeaderAttrs )
377  SetEncodingByHTTPHeader( pHeaderAttrs );
378  }
379  m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
380 
381  SwDocShell* pDocSh = m_xDoc->GetDocShell();
382  if( pDocSh )
383  {
384  m_bViewCreated = true; // not, load synchronous
385 
386  // a jump mark is present
387 
388  if( pMed )
389  {
390  m_sJmpMark = pMed->GetURLObject().GetMark();
391  if( !m_sJmpMark.isEmpty() )
392  {
394  sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
395  sal_Int32 nPos = nLastPos != -1 ? nLastPos : 0;
396 
397  OUString sCmp;
398  if (nPos)
399  {
400  sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
401  }
402 
403  if( !sCmp.isEmpty() )
404  {
405  sCmp = sCmp.toAsciiLowerCase();
406  if( sCmp == "region" )
408  else if( sCmp == "table" )
410  else if( sCmp == "graphic" )
412  else if( sCmp == "outline" ||
413  sCmp == "text" ||
414  sCmp == "frame" )
415  m_eJumpTo = JumpToMarks::NONE; // this is nothing valid!
416  else
417  // otherwise this is a normal (book)mark
418  nPos = -1;
419  }
420  else
421  nPos = -1;
422 
423  if( nPos != -1 )
424  m_sJmpMark = m_sJmpMark.copy( 0, nPos );
425  if( m_sJmpMark.isEmpty() )
427  }
428  }
429  }
430 
431  if (!rNamespace.isEmpty())
432  {
433  SetNamespace(rNamespace);
434  m_bXHTML = true;
435  if (rNamespace == "reqif-xhtml")
436  m_bReqIF = true;
437  }
438 }
439 
441 {
442 #ifdef DBG_UTIL
443  OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
444 #endif
445 
446  OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
447  OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
448  m_nContextStMin = 0;
449  while (!m_aContexts.empty())
450  {
451  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
452  ClearContext(xCntxt.get());
453  }
454 
455  bool bAsync = m_xDoc->IsInLoadAsynchron();
456  m_xDoc->SetInLoadAsynchron( false );
457  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
458 
459  if( m_xDoc->GetDocShell() && m_nEventId )
461 
462  // the DocumentDetected maybe can delete the DocShells, therefore fetch again
463  if( m_xDoc->GetDocShell() )
464  {
465  // update linked sections
466  sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
467  if( nLinkMode != NEVER && bAsync &&
468  SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
469  m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
470 
471  if ( m_xDoc->GetDocShell()->IsLoading() )
472  {
473  // #i59688#
474  m_xDoc->GetDocShell()->LoadingFinished();
475  }
476  }
477 
478  delete m_pSttNdIdx;
479 
480  if( !m_aSetAttrTab.empty() )
481  {
482  OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
483  for ( const auto& rpAttr : m_aSetAttrTab )
484  delete rpAttr;
485  m_aSetAttrTab.clear();
486  }
487 
488  m_pCSS1Parser.reset();
489  m_pNumRuleInfo.reset();
490  DeleteFormImpl();
492 
493  OSL_ENSURE(!m_xTable.get(), "It exists still an open table");
494  m_pImageMaps.reset();
495 
496  OSL_ENSURE( m_vPendingStack.empty(),
497  "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
498  m_vPendingStack.clear();
499 
500  m_xDoc.clear();
501 
502  if ( m_pTempViewFrame )
503  {
505 
506  // the temporary view frame is hidden, so the hidden flag might need to be removed
507  if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
508  m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
509  }
510 }
511 
512 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
513 {
514  m_nEventId=nullptr;
515 
516  // #i47907# - If the document has already been destructed,
517  // the parser should be aware of this:
518  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
519  || 1 == m_xDoc->getReferenceCount() )
520  {
521  // was the import aborted by SFX?
522  eState = SvParserState::Error;
523  }
524 
525  GetAsynchCallLink().Call(nullptr);
526 }
527 
529 {
530  // create temporary index on position 0, so it won't be moved!
531  m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
532  if( !IsNewDoc() ) // insert into existing document ?
533  {
534  const SwPosition* pPos = m_pPam->GetPoint();
535 
536  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
537 
538  *m_pSttNdIdx = pPos->nNode.GetIndex()-1;
539  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
540 
541  SwPaM aInsertionRangePam( *pPos );
542 
544 
545  // split any redline over the insertion point
546  aInsertionRangePam.SetMark();
547  *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
548  aInsertionRangePam.Move( fnMoveBackward );
549  m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
550 
551  m_xDoc->SetTextFormatColl( *m_pPam,
552  m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
553  }
554 
555  if( GetMedium() )
556  {
557  if( !m_bViewCreated )
558  {
559  m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
560  }
561  else
562  {
563  m_bViewCreated = true;
564  m_nEventId = nullptr;
565  }
566  }
567  else // show progress bar
568  {
569  rInput.Seek(STREAM_SEEK_TO_END);
570  rInput.ResetError();
571 
572  m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
573 
574  rInput.Seek(STREAM_SEEK_TO_BEGIN);
575  rInput.ResetError();
576  }
577 
578  m_xDoc->GetPageDesc( 0 ).Add( this );
579 
581  return eRet;
582 }
583 
585 {
586  const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
587  return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
588 }
589 
591 {
592 #ifdef DBG_UTIL
593  OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
594  m_nContinue++;
595 #endif
596 
597  // When the import (of SFX) is aborted, an error will be set but
598  // we still continue, so that we clean up properly.
599  OSL_ENSURE( SvParserState::Error!=eState,
600  "SwHTMLParser::Continue: already set an error" );
601  if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
602  eState = SvParserState::Error;
603 
604  // Fetch SwViewShell from document, save it and set as current.
605  SwViewShell *pInitVSh = CallStartAction();
606 
607  if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
608  {
609  // At first call first return, show document and wait for callback
610  // time.
611  // At this point in CallParser only one digit was read and
612  // a SaveState(0) was called.
613  eState = SvParserState::Pending;
614  m_bViewCreated = true;
615  m_xDoc->SetInLoadAsynchron( true );
616 
617 #ifdef DBG_UTIL
618  m_nContinue--;
619 #endif
620 
621  return;
622  }
623 
624  m_bSetModEnabled = false;
625  if( m_xDoc->GetDocShell() )
626  {
627  m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
628  if( m_bSetModEnabled )
629  {
630  m_xDoc->GetDocShell()->EnableSetModified( false );
631  }
632  }
633 
634  // during import don't call OLE-Modified
635  Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
636  m_xDoc->SetOle2Link( Link<bool,void>() );
637 
638  bool bModified = m_xDoc->getIDocumentState().IsModified();
639  bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
640  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
641 
642  // When the import will be aborted, don't call Continue anymore.
643  // If a Pending-Stack exists make sure the stack is ended with a call
644  // of NextToken.
645  if( SvParserState::Error == eState )
646  {
647  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
648  "SwHTMLParser::Continue: Pending-Stack without Token" );
649  if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
650  NextToken( m_vPendingStack.back().nToken );
651  OSL_ENSURE( m_vPendingStack.empty(),
652  "SwHTMLParser::Continue: There is again a Pending-Stack" );
653  }
654  else
655  {
656  HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
657  }
658 
659  // disable progress bar again
660  m_xProgress.reset();
661 
662  bool bLFStripped = false;
663  if( SvParserState::Pending != GetStatus() )
664  {
665  // set the last attributes yet
666  {
667  if( !m_aScriptSource.isEmpty() )
668  {
669  SwScriptFieldType *pType =
670  static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
671 
673  false );
674  InsertAttr( SwFormatField( aField ), false );
675  }
676 
677  if( m_pAppletImpl )
678  {
679  if( m_pAppletImpl->GetApplet().is() )
680  EndApplet();
681  else
682  EndObject();
683  }
684 
685  // maybe remove an existing LF after the last paragraph
686  if( IsNewDoc() )
687  bLFStripped = StripTrailingLF() > 0;
688 
689  // close still open numbering
690  while( GetNumInfo().GetNumRule() )
691  EndNumBulList();
692 
693  OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
694  // try this twice, first normally to let m_nContextStMin decrease
695  // naturally and get contexts popped in desired order, and if that
696  // fails force it
697  for (int i = 0; i < 2; ++i)
698  {
699  while (m_aContexts.size() > m_nContextStMin)
700  {
701  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
702  if (xCntxt)
703  EndContext(xCntxt.get());
704  }
705  if (!m_nContextStMin)
706  break;
707  OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
708  m_nContextStMin = 0;
709  }
710 
711  m_aParaAttrs.clear();
712 
713  SetAttr( false );
714 
715  // set the first delayed styles
716  m_pCSS1Parser->SetDelayedStyles();
717  }
718 
719  // again correct the start
720  if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
721  {
722  SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
723  SwNodeIndex aNxtIdx( *m_pSttNdIdx );
724  if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
725  {
726  const sal_Int32 nStt = pTextNode->GetText().getLength();
727  // when the cursor is still in the node, then set him at the end
728  if( m_pPam->GetPoint()->nNode == aNxtIdx )
729  {
731  m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt );
732  }
733 
734 #if OSL_DEBUG_LEVEL > 0
735 // !!! shouldn't be possible, or ??
736  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(),
737  "Pam.Bound1 is still in the node" );
738  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(),
739  "Pam.Bound2 is still in the node" );
740 
741  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() )
742  {
743  const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex();
744  m_pPam->GetBound().nContent.Assign( pTextNode,
745  pTextNode->GetText().getLength() + nCntPos );
746  }
747  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() )
748  {
749  const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex();
750  m_pPam->GetBound( false ).nContent.Assign( pTextNode,
751  pTextNode->GetText().getLength() + nCntPos );
752  }
753 #endif
754  // Keep character attribute!
755  SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
756  if (pTextNode->GetText().getLength())
757  pDelNd->FormatToTextAttr( pTextNode );
758  else
759  pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
760  pTextNode->JoinNext();
761  }
762  }
763  }
764 
765  if( SvParserState::Accepted == eState )
766  {
767  if( m_nMissingImgMaps )
768  {
769  // Some Image-Map relations are still missing.
770  // Maybe now the Image-Maps are there?
772  }
773 
774  // now remove the last useless paragraph
775  SwPosition* pPos = m_pPam->GetPoint();
776  if( !pPos->nContent.GetIndex() && !bLFStripped )
777  {
778  SwTextNode* pCurrentNd;
779  sal_uLong nNodeIdx = pPos->nNode.GetIndex();
780 
781  bool bHasFlysOrMarks =
783 
784  if( IsNewDoc() )
785  {
786  if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx))
787  {
789  if( pCNd && pCNd->StartOfSectionIndex()+2 <
790  pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
791  {
793  SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
794  if( pCursorSh &&
795  pCursorSh->GetCursor()->GetPoint()
796  ->nNode.GetIndex() == nNodeIdx )
797  {
798  pCursorSh->MovePara(GoPrevPara, fnParaEnd );
799  pCursorSh->SetMark();
800  pCursorSh->ClearMark();
801  }
802  m_pPam->GetBound().nContent.Assign( nullptr, 0 );
803  m_pPam->GetBound(false).nContent.Assign( nullptr, 0 );
804  m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode );
805  }
806  }
807  }
808  else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
809  {
810  if( pCurrentNd->CanJoinNext( &pPos->nNode ))
811  {
812  SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode();
813  pPos->nContent.Assign( pNextNd, 0 );
815  pNextNd->JoinPrev();
816  }
817  else if (pCurrentNd->GetText().isEmpty())
818  {
819  pPos->nContent.Assign( nullptr, 0 );
821  m_xDoc->GetNodes().Delete( pPos->nNode );
823  }
824  }
825  }
826 
827  // annul the SplitNode from the beginning
828  else if( !IsNewDoc() )
829  {
830  if( pPos->nContent.GetIndex() ) // then there was no <p> at the end
831  m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next
832  SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode();
833  SwNodeIndex aPrvIdx( pPos->nNode );
834  if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
835  *m_pSttNdIdx <= aPrvIdx )
836  {
837  // Normally here should take place a JoinNext, but all cursors and
838  // so are registered in pTextNode, so that it MUST remain.
839 
840  // Convert paragraph to character attribute, from Prev adopt
841  // the paragraph attribute and the template!
842  SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
843  pTextNode->ChgFormatColl( pPrev->GetTextColl() );
844  pTextNode->FormatToTextAttr( pPrev );
845  pTextNode->ResetAllAttr();
846 
847  if( pPrev->HasSwAttrSet() )
848  pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
849 
850  if( &m_pPam->GetBound().nNode.GetNode() == pPrev )
851  m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
852  if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev )
853  m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
854 
855  pTextNode->JoinPrev();
856  }
857  }
858 
859  // adjust AutoLoad in DocumentProperties
860  if (!utl::ConfigManager::IsFuzzing() && IsNewDoc())
861  {
862  SwDocShell *pDocShell(m_xDoc->GetDocShell());
863  OSL_ENSURE(pDocShell, "no SwDocShell");
864  if (pDocShell) {
865  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
866  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
867  uno::Reference<document::XDocumentProperties> xDocProps(
868  xDPS->getDocumentProperties());
869  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
870  if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
871  (xDocProps->getAutoloadURL().isEmpty()) )
872  {
873  xDocProps->setAutoloadURL(m_aPathToFile);
874  }
875  }
876  }
877 
878  if( m_bUpdateDocStat )
879  {
880  m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
881  }
882  }
883 
884  if( SvParserState::Pending != GetStatus() )
885  {
886  delete m_pSttNdIdx;
887  m_pSttNdIdx = nullptr;
888  }
889 
890  // should the parser be the last one who hold the document, then nothing
891  // has to be done anymore, document will be destroyed shortly!
892  if( 1 < m_xDoc->getReferenceCount() )
893  {
894  if( bWasUndo )
895  {
896  m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
897  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
898  }
899  else if( !pInitVSh )
900  {
901  // When at the beginning of Continue no Shell was available,
902  // it's possible in the meantime one was created.
903  // In that case the bWasUndo flag is wrong and we must
904  // enable Undo.
905  SwViewShell *pTmpVSh = CheckActionViewShell();
906  if( pTmpVSh )
907  {
908  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
909  }
910  }
911 
912  m_xDoc->SetOle2Link( aOLELink );
913  if( !bModified )
914  m_xDoc->getIDocumentState().ResetModified();
915  if( m_bSetModEnabled && m_xDoc->GetDocShell() )
916  {
917  m_xDoc->GetDocShell()->EnableSetModified();
918  m_bSetModEnabled = false; // this is unnecessary here
919  }
920  }
921 
922  // When the Document-SwVievShell still exists and an Action is open
923  // (doesn't have to be by abort), end the Action, disconnect from Shell
924  // and finally reconstruct the old Shell.
925  CallEndAction( true );
926 
927 #ifdef DBG_UTIL
928  m_nContinue--;
929 #endif
930 }
931 
932 void SwHTMLParser::Modify( const SfxPoolItem* pOld, const SfxPoolItem *pNew )
933 {
934  switch( pOld ? pOld->Which() : pNew ? pNew->Which() : 0 )
935  {
936  case RES_OBJECTDYING:
937  if (pOld && static_cast<const SwPtrMsgPoolItem *>(pOld)->pObject == GetRegisteredIn())
938  {
939  // then we kill ourself
940  EndListeningAll();
941  ReleaseRef(); // otherwise we're done!
942  }
943  break;
944  }
945 }
946 
948 {
949  OSL_ENSURE( !m_bDocInitalized, "DocumentDetected called multiple times" );
950  m_bDocInitalized = true;
951  if( IsNewDoc() )
952  {
953  if( IsInHeader() )
954  FinishHeader();
955 
956  CallEndAction( true );
957 
958  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
959  // For DocumentDetected in general a SwViewShell is created.
960  // But it also can be created later, in case the UI is captured.
961  CallStartAction();
962  }
963 }
964 
965 // is called for every token that is recognised in CallParser
967 {
968  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
969  || 1 == m_xDoc->getReferenceCount() )
970  {
971  // Was the import cancelled by SFX? If a pending stack
972  // exists, clean it.
973  eState = SvParserState::Error;
974  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
975  "SwHTMLParser::NextToken: Pending-Stack without token" );
976  if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
977  return ;
978  }
979 
980 #if OSL_DEBUG_LEVEL > 0
981  if( !m_vPendingStack.empty() )
982  {
983  switch( nToken )
984  {
985  // tables are read by recursive method calls
986  case HtmlTokenId::TABLE_ON:
987  // For CSS declarations we might have to wait
988  // for a file download to finish
989  case HtmlTokenId::LINK:
990  // For controls we might have to set the size.
991  case HtmlTokenId::INPUT:
992  case HtmlTokenId::TEXTAREA_ON:
993  case HtmlTokenId::SELECT_ON:
994  case HtmlTokenId::SELECT_OFF:
995  break;
996  default:
997  OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
998  break;
999  }
1000  }
1001 #endif
1002 
1003  // The following special cases have to be treated before the
1004  // filter detection, because Netscape doesn't reference the content
1005  // of the title for filter detection either.
1006  if( m_vPendingStack.empty() )
1007  {
1008  if( m_bInTitle )
1009  {
1010  switch( nToken )
1011  {
1012  case HtmlTokenId::TITLE_OFF:
1013  {
1014  OUString sTitle = m_sTitle.makeStringAndClear();
1015  if( IsNewDoc() && !sTitle.isEmpty() )
1016  {
1017  if( m_xDoc->GetDocShell() ) {
1018  uno::Reference<document::XDocumentPropertiesSupplier>
1019  xDPS(m_xDoc->GetDocShell()->GetModel(),
1020  uno::UNO_QUERY_THROW);
1021  uno::Reference<document::XDocumentProperties> xDocProps(
1022  xDPS->getDocumentProperties());
1023  OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1024  if (xDocProps.is()) {
1025  xDocProps->setTitle(sTitle);
1026  }
1027 
1028  m_xDoc->GetDocShell()->SetTitle(sTitle);
1029  }
1030  }
1031  m_bInTitle = false;
1032  break;
1033  }
1034 
1035  case HtmlTokenId::NONBREAKSPACE:
1036  m_sTitle.append(" ");
1037  break;
1038 
1039  case HtmlTokenId::SOFTHYPH:
1040  m_sTitle.append("-");
1041  break;
1042 
1043  case HtmlTokenId::TEXTTOKEN:
1044  m_sTitle.append(aToken);
1045  break;
1046 
1047  default:
1048  m_sTitle.append("<");
1049  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1050  m_sTitle.append("/");
1051  m_sTitle.append(sSaveToken);
1052  if( !aToken.isEmpty() )
1053  {
1054  m_sTitle.append(" ");
1055  m_sTitle.append(aToken);
1056  }
1057  m_sTitle.append(">");
1058  break;
1059  }
1060 
1061  return;
1062  }
1063  }
1064 
1065  // Find out what type of document it is if we don't know already.
1066  // For Controls this has to be finished before the control is inserted
1067  // because for inserting a View is needed.
1068  if( !m_bDocInitalized )
1069  DocumentDetected();
1070 
1071  bool bGetIDOption = false, bInsertUnknown = false;
1072  bool bUpperSpaceSave = m_bUpperSpace;
1073  m_bUpperSpace = false;
1074 
1075  // The following special cases may or have to be treated after the
1076  // filter detection
1077  if( m_vPendingStack.empty() )
1078  {
1079  if( m_bInFloatingFrame )
1080  {
1081  // <SCRIPT> is ignored here (from us), because it is ignored in
1082  // Applets as well
1083  if( HtmlTokenId::IFRAME_OFF == nToken )
1084  {
1085  m_bCallNextToken = false;
1086  m_bInFloatingFrame = false;
1087  }
1088 
1089  return;
1090  }
1091  else if( m_bInNoEmbed )
1092  {
1093  switch( nToken )
1094  {
1095  case HtmlTokenId::NOEMBED_OFF:
1098  m_aContents.clear();
1099  m_bCallNextToken = false;
1100  m_bInNoEmbed = false;
1101  break;
1102 
1103  case HtmlTokenId::RAWDATA:
1105  break;
1106 
1107  default:
1108  OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1109  break;
1110  }
1111 
1112  return;
1113  }
1114  else if( m_pAppletImpl )
1115  {
1116  // in an applet only <PARAM> tags and the </APPLET> tag
1117  // are of interest for us (for the moment)
1118  // <SCRIPT> is ignored here (from Netscape)!
1119 
1120  switch( nToken )
1121  {
1122  case HtmlTokenId::APPLET_OFF:
1123  m_bCallNextToken = false;
1124  EndApplet();
1125  break;
1126  case HtmlTokenId::OBJECT_OFF:
1127  m_bCallNextToken = false;
1128  EndObject();
1129  break;
1130  case HtmlTokenId::PARAM:
1131  InsertParam();
1132  break;
1133  default: break;
1134  }
1135 
1136  return;
1137  }
1138  else if( m_bTextArea )
1139  {
1140  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1141  // <SCRIPT> is ignored here (from Netscape)!
1142 
1143  switch( nToken )
1144  {
1145  case HtmlTokenId::TEXTAREA_OFF:
1146  m_bCallNextToken = false;
1147  EndTextArea();
1148  break;
1149 
1150  default:
1151  InsertTextAreaText( nToken );
1152  break;
1153  }
1154 
1155  return;
1156  }
1157  else if( m_bSelect )
1158  {
1159  // HAS to be treated after bNoScript!
1160  switch( nToken )
1161  {
1162  case HtmlTokenId::SELECT_OFF:
1163  m_bCallNextToken = false;
1164  EndSelect();
1165  return;
1166 
1167  case HtmlTokenId::OPTION:
1169  return;
1170 
1171  case HtmlTokenId::TEXTTOKEN:
1172  InsertSelectText();
1173  return;
1174 
1175  case HtmlTokenId::INPUT:
1176  case HtmlTokenId::SCRIPT_ON:
1177  case HtmlTokenId::SCRIPT_OFF:
1178  case HtmlTokenId::NOSCRIPT_ON:
1179  case HtmlTokenId::NOSCRIPT_OFF:
1180  case HtmlTokenId::RAWDATA:
1181  // treat in normal switch
1182  break;
1183 
1184  default:
1185  // ignore
1186  return;
1187  }
1188  }
1189  else if( m_pMarquee )
1190  {
1191  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1192  // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1193  // script.
1194  switch( nToken )
1195  {
1196  case HtmlTokenId::MARQUEE_OFF:
1197  m_bCallNextToken = false;
1198  EndMarquee();
1199  break;
1200 
1201  case HtmlTokenId::TEXTTOKEN:
1203  break;
1204  default: break;
1205  }
1206 
1207  return;
1208  }
1209  else if( m_bInField )
1210  {
1211  switch( nToken )
1212  {
1213  case HtmlTokenId::SDFIELD_OFF:
1214  m_bCallNextToken = false;
1215  EndField();
1216  break;
1217 
1218  case HtmlTokenId::TEXTTOKEN:
1219  InsertFieldText();
1220  break;
1221  default: break;
1222  }
1223 
1224  return;
1225  }
1227  {
1228  switch( nToken )
1229  {
1230  case HtmlTokenId::ANCHOR_OFF:
1231  EndAnchor();
1232  m_bCallNextToken = false;
1233  break;
1234 
1235  case HtmlTokenId::TEXTTOKEN:
1237  break;
1238  default: break;
1239  }
1240  return;
1241  }
1242  else if( !m_aUnknownToken.isEmpty() )
1243  {
1244  // Paste content of unknown tags.
1245  // (but surely if we are not in the header section) fdo#36080 fdo#34666
1246  if (!aToken.isEmpty() && !IsInHeader() )
1247  {
1248  if( !m_bDocInitalized )
1249  DocumentDetected();
1250  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1251 
1252  // if there are temporary paragraph attributes and the
1253  // paragraph isn't empty then the paragraph attributes
1254  // are final.
1255  m_aParaAttrs.clear();
1256 
1257  SetAttr();
1258  }
1259 
1260  // Unknown token in the header are only closed by a matching
1261  // end-token, </HEAD> or <BODY>. Text inside is ignored.
1262  switch( nToken )
1263  {
1264  case HtmlTokenId::UNKNOWNCONTROL_OFF:
1265  if( m_aUnknownToken != sSaveToken )
1266  return;
1267  [[fallthrough]];
1268  case HtmlTokenId::FRAMESET_ON:
1269  case HtmlTokenId::HEAD_OFF:
1270  case HtmlTokenId::BODY_ON:
1271  case HtmlTokenId::IMAGE: // Don't know why Netscape acts this way.
1272  m_aUnknownToken.clear();
1273  break;
1274  case HtmlTokenId::TEXTTOKEN:
1275  return;
1276  default:
1277  m_aUnknownToken.clear();
1278  break;
1279  }
1280  }
1281  }
1282 
1283  switch( nToken )
1284  {
1285  case HtmlTokenId::BODY_ON:
1286  if (!m_bBodySeen)
1287  {
1288  m_bBodySeen = true;
1289  if( !m_aStyleSource.isEmpty() )
1290  {
1291  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1292  m_aStyleSource.clear();
1293  }
1294  if( IsNewDoc() )
1295  {
1297  // If there is a template for the first or the right page,
1298  // it is set here.
1299  const SwPageDesc *pPageDesc = nullptr;
1300  if( m_pCSS1Parser->IsSetFirstPageDesc() )
1301  pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1302  else if( m_pCSS1Parser->IsSetRightPageDesc() )
1303  pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1304 
1305  if( pPageDesc )
1306  {
1307  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1308  }
1309  }
1310  }
1311  break;
1312 
1313  case HtmlTokenId::LINK:
1314  InsertLink();
1315  break;
1316 
1317  case HtmlTokenId::BASE:
1318  {
1319  const HTMLOptions& rHTMLOptions = GetOptions();
1320  for (size_t i = rHTMLOptions.size(); i; )
1321  {
1322  const HTMLOption& rOption = rHTMLOptions[--i];
1323  switch( rOption.GetToken() )
1324  {
1325  case HtmlOptionId::HREF:
1326  m_sBaseURL = rOption.GetString();
1327  break;
1328  case HtmlOptionId::TARGET:
1329  if( IsNewDoc() )
1330  {
1331  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1332  OSL_ENSURE(pDocShell, "no SwDocShell");
1333  if (pDocShell) {
1334  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1335  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1336  uno::Reference<document::XDocumentProperties>
1337  xDocProps(xDPS->getDocumentProperties());
1338  OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1339  if (xDocProps.is()) {
1340  xDocProps->setDefaultTarget(
1341  rOption.GetString());
1342  }
1343  }
1344  }
1345  break;
1346  default: break;
1347  }
1348  }
1349  }
1350  break;
1351 
1352  case HtmlTokenId::META:
1353  {
1354  SvKeyValueIterator *pHTTPHeader = nullptr;
1355  if( IsNewDoc() )
1356  {
1357  SwDocShell *pDocSh = m_xDoc->GetDocShell();
1358  if( pDocSh )
1359  pHTTPHeader = pDocSh->GetHeaderAttributes();
1360  }
1361  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1362  OSL_ENSURE(pDocShell, "no SwDocShell");
1363  if (pDocShell)
1364  {
1365  uno::Reference<document::XDocumentProperties> xDocProps;
1366  if (IsNewDoc())
1367  {
1368  const uno::Reference<document::XDocumentPropertiesSupplier>
1369  xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1370  xDocProps = xDPS->getDocumentProperties();
1371  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1372  }
1373  ParseMetaOptions( xDocProps, pHTTPHeader );
1374  }
1375  }
1376  break;
1377 
1378  case HtmlTokenId::TITLE_ON:
1379  m_bInTitle = true;
1380  break;
1381 
1382  case HtmlTokenId::SCRIPT_ON:
1383  NewScript();
1384  break;
1385 
1386  case HtmlTokenId::SCRIPT_OFF:
1387  EndScript();
1388  break;
1389 
1390  case HtmlTokenId::NOSCRIPT_ON:
1391  case HtmlTokenId::NOSCRIPT_OFF:
1392  bInsertUnknown = true;
1393  break;
1394 
1395  case HtmlTokenId::STYLE_ON:
1396  NewStyle();
1397  break;
1398 
1399  case HtmlTokenId::STYLE_OFF:
1400  EndStyle();
1401  break;
1402 
1403  case HtmlTokenId::RAWDATA:
1404  if( !m_bIgnoreRawData )
1405  {
1406  if( IsReadScript() )
1407  {
1408  AddScriptSource();
1409  }
1410  else if( IsReadStyle() )
1411  {
1412  if( !m_aStyleSource.isEmpty() )
1413  m_aStyleSource += "\n";
1414  m_aStyleSource += aToken;
1415  }
1416  }
1417  break;
1418 
1419  case HtmlTokenId::OBJECT_ON:
1420  if (m_bXHTML)
1421  {
1422  if (!InsertEmbed())
1423  InsertImage();
1424  break;
1425  }
1426 #if HAVE_FEATURE_JAVA
1427  NewObject();
1428  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1429 #endif
1430  break;
1431 
1432  case HtmlTokenId::OBJECT_OFF:
1433  if (!m_aEmbeds.empty())
1434  m_aEmbeds.pop();
1435  break;
1436 
1437  case HtmlTokenId::APPLET_ON:
1438 #if HAVE_FEATURE_JAVA
1439  InsertApplet();
1440  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1441 #endif
1442  break;
1443 
1444  case HtmlTokenId::IFRAME_ON:
1447  break;
1448 
1449  case HtmlTokenId::LINEBREAK:
1450  if( !IsReadPRE() )
1451  {
1452  InsertLineBreak();
1453  break;
1454  }
1455  else
1456  bGetIDOption = true;
1457  // <BR>s in <PRE> resemble true LFs, hence no break
1458  [[fallthrough]];
1459 
1460  case HtmlTokenId::NEWPARA:
1461  // CR in PRE/LISTING/XMP
1462  {
1463  if( HtmlTokenId::NEWPARA==nToken ||
1465  {
1466  AppendTextNode(); // there is no LF at this place
1467  // therefore it will cause no problems
1468  SetTextCollAttrs();
1469  }
1470  // progress bar
1471  if (m_xProgress)
1472  m_xProgress->Update(rInput.Tell());
1473  }
1474  break;
1475 
1476  case HtmlTokenId::NONBREAKSPACE:
1477  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1478  break;
1479 
1480  case HtmlTokenId::SOFTHYPH:
1481  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1482  break;
1483 
1484  case HtmlTokenId::LINEFEEDCHAR:
1485  if( m_pPam->GetPoint()->nContent.GetIndex() )
1486  AppendTextNode();
1487  if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode))
1488  {
1489  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1490  EndAttr( m_xAttrTab->pBreak, false );
1491  }
1492  break;
1493 
1494  case HtmlTokenId::TEXTTOKEN:
1495  // insert string without spanning attributes at the end.
1496  if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1497  {
1498  sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
1499  const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
1500  if (pTextNode)
1501  {
1502  const OUString& rText = pTextNode->GetText();
1503  sal_Unicode cLast = rText[--nPos];
1504  if( ' ' == cLast || '\x0a' == cLast)
1505  aToken = aToken.copy(1);
1506  }
1507  else
1508  aToken = aToken.copy(1);
1509 
1510  if( aToken.isEmpty() )
1511  {
1512  m_bUpperSpace = bUpperSpaceSave;
1513  break;
1514  }
1515  }
1516 
1517  if( !aToken.isEmpty() )
1518  {
1519  if( !m_bDocInitalized )
1520  DocumentDetected();
1521 
1522  if (!m_aEmbeds.empty())
1523  {
1524  // The text token is inside an OLE object, which means
1525  // alternate text.
1526  SwOLENode* pOLENode = m_aEmbeds.top();
1527  if (SwFlyFrameFormat* pFormat
1528  = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1529  {
1530  if (SdrObject* pObject = SwXFrame::GetOrCreateSdrObject(*pFormat))
1531  {
1532  pObject->SetTitle(pObject->GetTitle() + aToken);
1533  break;
1534  }
1535  }
1536  }
1537 
1538  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1539 
1540  // if there are temporary paragraph attributes and the
1541  // paragraph isn't empty then the paragraph attributes
1542  // are final.
1543  m_aParaAttrs.clear();
1544 
1545  SetAttr();
1546  }
1547  break;
1548 
1549  case HtmlTokenId::HORZRULE:
1550  InsertHorzRule();
1551  break;
1552 
1553  case HtmlTokenId::IMAGE:
1554  InsertImage();
1555  // if only the parser references the doc, we can break and set
1556  // an error code
1557  if( 1 == m_xDoc->getReferenceCount() )
1558  {
1559  eState = SvParserState::Error;
1560  }
1561  break;
1562 
1563  case HtmlTokenId::SPACER:
1564  InsertSpacer();
1565  break;
1566 
1567  case HtmlTokenId::EMBED:
1568  InsertEmbed();
1569  break;
1570 
1571  case HtmlTokenId::NOEMBED_ON:
1572  m_bInNoEmbed = true;
1573  m_bCallNextToken = bool(m_xTable);
1574  ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1575  break;
1576 
1577  case HtmlTokenId::DEFLIST_ON:
1578  if( m_nOpenParaToken != HtmlTokenId::NONE )
1579  EndPara();
1580  NewDefList();
1581  break;
1582  case HtmlTokenId::DEFLIST_OFF:
1583  if( m_nOpenParaToken != HtmlTokenId::NONE )
1584  EndPara();
1585  EndDefListItem( HtmlTokenId::NONE );
1586  EndDefList();
1587  break;
1588 
1589  case HtmlTokenId::DD_ON:
1590  case HtmlTokenId::DT_ON:
1591  if( m_nOpenParaToken != HtmlTokenId::NONE )
1592  EndPara();
1593  EndDefListItem();// close <DD>/<DT> and set no template
1594  NewDefListItem( nToken );
1595  break;
1596 
1597  case HtmlTokenId::DD_OFF:
1598  case HtmlTokenId::DT_OFF:
1599  // c.f. HtmlTokenId::LI_OFF
1600  // Actually we should close a DD/DT now.
1601  // But neither Netscape nor Microsoft do this and so don't we.
1602  EndDefListItem( nToken );
1603  break;
1604 
1605  // divisions
1606  case HtmlTokenId::DIVISION_ON:
1607  case HtmlTokenId::CENTER_ON:
1608  if (!m_isInTableStructure)
1609  {
1610  if (m_nOpenParaToken != HtmlTokenId::NONE)
1611  {
1612  if (IsReadPRE())
1613  m_nOpenParaToken = HtmlTokenId::NONE;
1614  else
1615  EndPara();
1616  }
1617  NewDivision( nToken );
1618  }
1619  break;
1620 
1621  case HtmlTokenId::DIVISION_OFF:
1622  case HtmlTokenId::CENTER_OFF:
1623  if (!m_isInTableStructure)
1624  {
1625  if (m_nOpenParaToken != HtmlTokenId::NONE)
1626  {
1627  if (IsReadPRE())
1628  m_nOpenParaToken = HtmlTokenId::NONE;
1629  else
1630  EndPara();
1631  }
1632  EndDivision();
1633  }
1634  break;
1635 
1636  case HtmlTokenId::MULTICOL_ON:
1637  if( m_nOpenParaToken != HtmlTokenId::NONE )
1638  EndPara();
1639  NewMultiCol();
1640  break;
1641 
1642  case HtmlTokenId::MULTICOL_OFF:
1643  if( m_nOpenParaToken != HtmlTokenId::NONE )
1644  EndPara();
1645  EndTag( HtmlTokenId::MULTICOL_ON );
1646  break;
1647 
1648  case HtmlTokenId::MARQUEE_ON:
1649  NewMarquee();
1650  m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1651  break;
1652 
1653  case HtmlTokenId::FORM_ON:
1654  NewForm();
1655  break;
1656  case HtmlTokenId::FORM_OFF:
1657  EndForm();
1658  break;
1659 
1660  // templates
1661  case HtmlTokenId::PARABREAK_ON:
1662  if( m_nOpenParaToken != HtmlTokenId::NONE )
1663  EndPara( true );
1664  NewPara();
1665  break;
1666 
1667  case HtmlTokenId::PARABREAK_OFF:
1668  EndPara( true );
1669  break;
1670 
1671  case HtmlTokenId::ADDRESS_ON:
1672  if( m_nOpenParaToken != HtmlTokenId::NONE )
1673  EndPara();
1674  NewTextFormatColl( HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SENDADRESS );
1675  break;
1676 
1677  case HtmlTokenId::ADDRESS_OFF:
1678  if( m_nOpenParaToken != HtmlTokenId::NONE )
1679  EndPara();
1680  EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1681  break;
1682 
1683  case HtmlTokenId::BLOCKQUOTE_ON:
1684  case HtmlTokenId::BLOCKQUOTE30_ON:
1685  if( m_nOpenParaToken != HtmlTokenId::NONE )
1686  EndPara();
1687  NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1688  break;
1689 
1690  case HtmlTokenId::BLOCKQUOTE_OFF:
1691  case HtmlTokenId::BLOCKQUOTE30_OFF:
1692  if( m_nOpenParaToken != HtmlTokenId::NONE )
1693  EndPara();
1694  EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1695  break;
1696 
1697  case HtmlTokenId::PREFORMTXT_ON:
1698  case HtmlTokenId::LISTING_ON:
1699  case HtmlTokenId::XMP_ON:
1700  if( m_nOpenParaToken != HtmlTokenId::NONE )
1701  EndPara();
1703  break;
1704 
1705  case HtmlTokenId::PREFORMTXT_OFF:
1706  m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1707  EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1708  break;
1709 
1710  case HtmlTokenId::LISTING_OFF:
1711  case HtmlTokenId::XMP_OFF:
1712  EndTextFormatColl( nToken );
1713  break;
1714 
1715  case HtmlTokenId::HEAD1_ON:
1716  case HtmlTokenId::HEAD2_ON:
1717  case HtmlTokenId::HEAD3_ON:
1718  case HtmlTokenId::HEAD4_ON:
1719  case HtmlTokenId::HEAD5_ON:
1720  case HtmlTokenId::HEAD6_ON:
1721  if( m_nOpenParaToken != HtmlTokenId::NONE )
1722  {
1723  if( IsReadPRE() )
1724  m_nOpenParaToken = HtmlTokenId::NONE;
1725  else
1726  EndPara();
1727  }
1728  NewHeading( nToken );
1729  break;
1730 
1731  case HtmlTokenId::HEAD1_OFF:
1732  case HtmlTokenId::HEAD2_OFF:
1733  case HtmlTokenId::HEAD3_OFF:
1734  case HtmlTokenId::HEAD4_OFF:
1735  case HtmlTokenId::HEAD5_OFF:
1736  case HtmlTokenId::HEAD6_OFF:
1737  EndHeading();
1738  break;
1739 
1740  case HtmlTokenId::TABLE_ON:
1741  if( !m_vPendingStack.empty() )
1742  BuildTable( SvxAdjust::End );
1743  else
1744  {
1745  if( m_nOpenParaToken != HtmlTokenId::NONE )
1746  EndPara();
1747  OSL_ENSURE(!m_xTable.get(), "table in table not allowed here");
1748  if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) &&
1749  (m_pPam->GetPoint()->nNode.GetIndex() >
1750  m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1752  {
1753  if ( m_nParaCnt < 5 )
1754  Show(); // show what we have up to here
1755 
1756  SvxAdjust eAdjust = m_xAttrTab->pAdjust
1757  ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1758  GetAdjust()
1759  : SvxAdjust::End;
1760  BuildTable( eAdjust );
1761  }
1762  else
1763  bInsertUnknown = m_bKeepUnknown;
1764  }
1765  break;
1766 
1767  // lists
1768  case HtmlTokenId::DIRLIST_ON:
1769  case HtmlTokenId::MENULIST_ON:
1770  case HtmlTokenId::ORDERLIST_ON:
1771  case HtmlTokenId::UNORDERLIST_ON:
1772  if( m_nOpenParaToken != HtmlTokenId::NONE )
1773  EndPara();
1774  NewNumBulList( nToken );
1775  break;
1776 
1777  case HtmlTokenId::DIRLIST_OFF:
1778  case HtmlTokenId::MENULIST_OFF:
1779  case HtmlTokenId::ORDERLIST_OFF:
1780  case HtmlTokenId::UNORDERLIST_OFF:
1781  if( m_nOpenParaToken != HtmlTokenId::NONE )
1782  EndPara();
1783  EndNumBulListItem( HtmlTokenId::NONE, true );
1784  EndNumBulList( nToken );
1785  break;
1786 
1787  case HtmlTokenId::LI_ON:
1788  case HtmlTokenId::LISTHEADER_ON:
1789  if( m_nOpenParaToken != HtmlTokenId::NONE &&
1791  || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1792  {
1793  // only finish paragraph for <P><LI>, not for <DD><LI>
1794  EndPara();
1795  }
1796 
1797  EndNumBulListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1798  NewNumBulListItem( nToken );
1799  break;
1800 
1801  case HtmlTokenId::LI_OFF:
1802  case HtmlTokenId::LISTHEADER_OFF:
1803  EndNumBulListItem( nToken, false );
1804  break;
1805 
1806  // Attribute :
1807  case HtmlTokenId::ITALIC_ON:
1808  {
1812  NewStdAttr( HtmlTokenId::ITALIC_ON,
1813  &m_xAttrTab->pItalic, aPosture,
1814  &m_xAttrTab->pItalicCJK, &aPostureCJK,
1815  &m_xAttrTab->pItalicCTL, &aPostureCTL );
1816  }
1817  break;
1818 
1819  case HtmlTokenId::BOLD_ON:
1820  {
1824  NewStdAttr( HtmlTokenId::BOLD_ON,
1825  &m_xAttrTab->pBold, aWeight,
1826  &m_xAttrTab->pBoldCJK, &aWeightCJK,
1827  &m_xAttrTab->pBoldCTL, &aWeightCTL );
1828  }
1829  break;
1830 
1831  case HtmlTokenId::STRIKE_ON:
1832  case HtmlTokenId::STRIKETHROUGH_ON:
1833  {
1834  NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1836  }
1837  break;
1838 
1839  case HtmlTokenId::UNDERLINE_ON:
1840  {
1841  NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1843  }
1844  break;
1845 
1846  case HtmlTokenId::SUPERSCRIPT_ON:
1847  {
1848  NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1850  }
1851  break;
1852 
1853  case HtmlTokenId::SUBSCRIPT_ON:
1854  {
1855  NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1857  }
1858  break;
1859 
1860  case HtmlTokenId::BLINK_ON:
1861  {
1862  NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1863  SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1864  }
1865  break;
1866 
1867  case HtmlTokenId::SPAN_ON:
1868  NewStdAttr( HtmlTokenId::SPAN_ON );
1869  break;
1870 
1871  case HtmlTokenId::ITALIC_OFF:
1872  case HtmlTokenId::BOLD_OFF:
1873  case HtmlTokenId::STRIKE_OFF:
1874  case HtmlTokenId::UNDERLINE_OFF:
1875  case HtmlTokenId::SUPERSCRIPT_OFF:
1876  case HtmlTokenId::SUBSCRIPT_OFF:
1877  case HtmlTokenId::BLINK_OFF:
1878  case HtmlTokenId::SPAN_OFF:
1879  EndTag( nToken );
1880  break;
1881 
1882  case HtmlTokenId::STRIKETHROUGH_OFF:
1883  EndTag( HtmlTokenId::STRIKE_OFF );
1884  break;
1885 
1886  case HtmlTokenId::BASEFONT_ON:
1887  NewBasefontAttr();
1888  break;
1889  case HtmlTokenId::BASEFONT_OFF:
1890  EndBasefontAttr();
1891  break;
1892  case HtmlTokenId::FONT_ON:
1893  case HtmlTokenId::BIGPRINT_ON:
1894  case HtmlTokenId::SMALLPRINT_ON:
1895  NewFontAttr( nToken );
1896  break;
1897  case HtmlTokenId::FONT_OFF:
1898  case HtmlTokenId::BIGPRINT_OFF:
1899  case HtmlTokenId::SMALLPRINT_OFF:
1900  EndFontAttr( nToken );
1901  break;
1902 
1903  case HtmlTokenId::EMPHASIS_ON:
1904  case HtmlTokenId::CITIATION_ON:
1905  case HtmlTokenId::STRONG_ON:
1906  case HtmlTokenId::CODE_ON:
1907  case HtmlTokenId::SAMPLE_ON:
1908  case HtmlTokenId::KEYBOARD_ON:
1909  case HtmlTokenId::VARIABLE_ON:
1910  case HtmlTokenId::DEFINSTANCE_ON:
1911  case HtmlTokenId::SHORTQUOTE_ON:
1912  case HtmlTokenId::LANGUAGE_ON:
1913  case HtmlTokenId::AUTHOR_ON:
1914  case HtmlTokenId::PERSON_ON:
1915  case HtmlTokenId::ACRONYM_ON:
1916  case HtmlTokenId::ABBREVIATION_ON:
1917  case HtmlTokenId::INSERTEDTEXT_ON:
1918  case HtmlTokenId::DELETEDTEXT_ON:
1919 
1920  case HtmlTokenId::TELETYPE_ON:
1921  NewCharFormat( nToken );
1922  break;
1923 
1924  case HtmlTokenId::SDFIELD_ON:
1925  NewField();
1927  break;
1928 
1929  case HtmlTokenId::EMPHASIS_OFF:
1930  case HtmlTokenId::CITIATION_OFF:
1931  case HtmlTokenId::STRONG_OFF:
1932  case HtmlTokenId::CODE_OFF:
1933  case HtmlTokenId::SAMPLE_OFF:
1934  case HtmlTokenId::KEYBOARD_OFF:
1935  case HtmlTokenId::VARIABLE_OFF:
1936  case HtmlTokenId::DEFINSTANCE_OFF:
1937  case HtmlTokenId::SHORTQUOTE_OFF:
1938  case HtmlTokenId::LANGUAGE_OFF:
1939  case HtmlTokenId::AUTHOR_OFF:
1940  case HtmlTokenId::PERSON_OFF:
1941  case HtmlTokenId::ACRONYM_OFF:
1942  case HtmlTokenId::ABBREVIATION_OFF:
1943  case HtmlTokenId::INSERTEDTEXT_OFF:
1944  case HtmlTokenId::DELETEDTEXT_OFF:
1945 
1946  case HtmlTokenId::TELETYPE_OFF:
1947  EndTag( nToken );
1948  break;
1949 
1950  case HtmlTokenId::HEAD_OFF:
1951  if( !m_aStyleSource.isEmpty() )
1952  {
1953  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1954  m_aStyleSource.clear();
1955  }
1956  break;
1957 
1958  case HtmlTokenId::DOCTYPE:
1959  case HtmlTokenId::BODY_OFF:
1960  case HtmlTokenId::HTML_OFF:
1961  case HtmlTokenId::HEAD_ON:
1962  case HtmlTokenId::TITLE_OFF:
1963  break; // don't evaluate further???
1964  case HtmlTokenId::HTML_ON:
1965  {
1966  const HTMLOptions& rHTMLOptions = GetOptions();
1967  for (size_t i = rHTMLOptions.size(); i; )
1968  {
1969  const HTMLOption& rOption = rHTMLOptions[--i];
1970  if( HtmlOptionId::DIR == rOption.GetToken() )
1971  {
1972  const OUString& rDir = rOption.GetString();
1973  SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
1974  m_pCSS1Parser->GetWhichMap() );
1975  SvxCSS1PropertyInfo aPropInfo;
1976  OUString aDummy;
1977  ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
1978  aPropInfo, nullptr, &rDir );
1979 
1980  m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
1981  break;
1982  }
1983  }
1984  }
1985  break;
1986 
1987  case HtmlTokenId::INPUT:
1988  InsertInput();
1989  break;
1990 
1991  case HtmlTokenId::TEXTAREA_ON:
1992  NewTextArea();
1994  break;
1995 
1996  case HtmlTokenId::SELECT_ON:
1997  NewSelect();
1999  break;
2000 
2001  case HtmlTokenId::ANCHOR_ON:
2002  NewAnchor();
2003  break;
2004 
2005  case HtmlTokenId::ANCHOR_OFF:
2006  EndAnchor();
2007  break;
2008 
2009  case HtmlTokenId::COMMENT:
2010  if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2011  {
2012  // insert as Post-It
2013  // If there are no space characters right behind
2014  // the <!-- and on front of the -->, leave the comment untouched.
2015  if( ' ' == aToken[ 3 ] &&
2016  ' ' == aToken[ aToken.getLength()-3 ] )
2017  {
2018  OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) );
2019  InsertComment(comphelper::string::strip(aComment, ' '));
2020  }
2021  else
2022  {
2023  OUStringBuffer aComment;
2024  aComment.append('<').append(aToken).append('>');
2025  InsertComment( aComment.makeStringAndClear() );
2026  }
2027  }
2028  break;
2029 
2030  case HtmlTokenId::MAP_ON:
2031  // Image Maps are read asynchronously: At first only an image map is created
2032  // Areas are processed later. Nevertheless the
2033  // ImageMap is inserted into the IMap-Array, because it might be used
2034  // already.
2035  m_pImageMap = new ImageMap;
2037  {
2038  if (!m_pImageMaps)
2039  m_pImageMaps.reset( new ImageMaps );
2040  m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2041  }
2042  else
2043  {
2044  delete m_pImageMap;
2045  m_pImageMap = nullptr;
2046  }
2047  break;
2048 
2049  case HtmlTokenId::MAP_OFF:
2050  // there is no ImageMap anymore (don't delete IMap, because it's
2051  // already contained in the array!)
2052  m_pImageMap = nullptr;
2053  break;
2054 
2055  case HtmlTokenId::AREA:
2056  if( m_pImageMap )
2057  ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2058  SvMacroItemId::OnMouseOut );
2059  break;
2060 
2061  case HtmlTokenId::FRAMESET_ON:
2062  bInsertUnknown = m_bKeepUnknown;
2063  break;
2064 
2065  case HtmlTokenId::NOFRAMES_ON:
2066  if( IsInHeader() )
2067  FinishHeader();
2068  bInsertUnknown = m_bKeepUnknown;
2069  break;
2070 
2071  case HtmlTokenId::UNKNOWNCONTROL_ON:
2072  // Ignore content of unknown token in the header, if the token
2073  // does not start with a '!'.
2074  // (but judging from the code, also if does not start with a '%')
2075  // (and also if we're not somewhere we consider PRE)
2076  if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2077  !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2078  '%' != sSaveToken[0] )
2079  m_aUnknownToken = sSaveToken;
2080  [[fallthrough]];
2081 
2082  default:
2083  bInsertUnknown = m_bKeepUnknown;
2084  break;
2085  }
2086 
2087  if( bGetIDOption )
2088  InsertIDOption();
2089 
2090  if( bInsertUnknown )
2091  {
2092  OUStringBuffer aComment("HTML: <");
2093  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2094  aComment.append("/");
2095  aComment.append(sSaveToken);
2096  if( !aToken.isEmpty() )
2097  {
2098  UnescapeToken();
2099  aComment.append(" ").append(aToken);
2100  }
2101  aComment.append(">");
2102  InsertComment( aComment.makeStringAndClear() );
2103  }
2104 
2105  // if there are temporary paragraph attributes and the
2106  // paragraph isn't empty then the paragraph attributes are final.
2107  if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() )
2108  m_aParaAttrs.clear();
2109 }
2110 
2111 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2112  bool& rScriptDependent,
2113  sal_uInt16& rScriptType )
2114 {
2115  switch( rAttr.GetItem().Which() )
2116  {
2117  case RES_CHRATR_FONT:
2118  case RES_CHRATR_FONTSIZE:
2119  case RES_CHRATR_LANGUAGE:
2120  case RES_CHRATR_POSTURE:
2121  case RES_CHRATR_WEIGHT:
2122  rScriptType = i18n::ScriptType::LATIN;
2123  rScriptDependent = true;
2124  break;
2125  case RES_CHRATR_CJK_FONT:
2129  case RES_CHRATR_CJK_WEIGHT:
2130  rScriptType = i18n::ScriptType::ASIAN;
2131  rScriptDependent = true;
2132  break;
2133  case RES_CHRATR_CTL_FONT:
2137  case RES_CHRATR_CTL_WEIGHT:
2138  rScriptType = i18n::ScriptType::COMPLEX;
2139  rScriptDependent = true;
2140  break;
2141  default:
2142  rScriptDependent = false;
2143  break;
2144  }
2145 }
2146 
2147 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2148 {
2149  // A hard line break at the end always must be removed.
2150  // A second one we replace with paragraph spacing.
2151  sal_Int32 nLFStripped = StripTrailingLF();
2152  if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2153  eMode = AM_SPACE;
2154 
2155  // the hard attributes of this paragraph will never be invalid again
2156  m_aParaAttrs.clear();
2157 
2158  SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2159  m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
2160 
2161  if (pTextNode)
2162  {
2163  const SvxULSpaceItem& rULSpace =
2164  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2165 
2166  bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2167  : rULSpace.GetLower() == 0;
2168 
2169  if( bChange )
2170  {
2171  const SvxULSpaceItem& rCollULSpace =
2172  pTextNode->GetAnyFormatColl().GetULSpace();
2173 
2174  bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2175  : rCollULSpace.GetLower() > 0;
2176 
2177  if( bMayReset &&
2178  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2179  {
2180  pTextNode->ResetAttr( RES_UL_SPACE );
2181  }
2182  else
2183  {
2184  pTextNode->SetAttr(
2185  SvxULSpaceItem( rULSpace.GetUpper(),
2186  AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2187  }
2188  }
2189  }
2190  m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2191 
2192  SwPosition aOldPos( *m_pPam->GetPoint() );
2193 
2194  bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2195 
2196  // split character attributes and maybe set none,
2197  // which are set for the whole paragraph
2198  const SwNodeIndex& rEndIdx = aOldPos.nNode;
2199  const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex();
2200  const SwPosition& rPos = *m_pPam->GetPoint();
2201 
2202  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2203  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2204  {
2205  HTMLAttr *pAttr = *pHTMLAttributes;
2206  if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2207  {
2208  bool bWholePara = false;
2209 
2210  while( pAttr )
2211  {
2212  HTMLAttr *pNext = pAttr->GetNext();
2213  if( pAttr->GetSttParaIdx() < rEndIdx.GetIndex() ||
2214  (!bWholePara &&
2215  pAttr->GetSttPara() == rEndIdx &&
2216  pAttr->GetSttCnt() != nEndCnt) )
2217  {
2218  bWholePara =
2219  pAttr->GetSttPara() == rEndIdx &&
2220  pAttr->GetSttCnt() == 0;
2221 
2222  sal_Int32 nStt = pAttr->m_nStartContent;
2223  bool bScript = false;
2224  sal_uInt16 nScriptItem;
2225  bool bInsert = true;
2226  lcl_swhtml_getItemInfo( *pAttr, bScript,
2227  nScriptItem );
2228  // set previous part
2229  if( bScript )
2230  {
2231  const SwTextNode *pTextNd =
2232  pAttr->GetSttPara().GetNode().GetTextNode();
2233  OSL_ENSURE( pTextNd, "No text node" );
2234  if( pTextNd )
2235  {
2236  const OUString& rText = pTextNd->GetText();
2237  sal_uInt16 nScriptText =
2238  g_pBreakIt->GetBreakIter()->getScriptType(
2239  rText, pAttr->GetSttCnt() );
2240  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2241  ->endOfScript( rText, nStt, nScriptText );
2242  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2243  {
2244  if( nScriptItem == nScriptText )
2245  {
2246  HTMLAttr *pSetAttr =
2247  pAttr->Clone( rEndIdx, nScriptEnd );
2248  pSetAttr->m_nStartContent = nStt;
2249  pSetAttr->ClearPrev();
2250  if( !pNext || bWholePara )
2251  {
2252  if (pSetAttr->m_bInsAtStart)
2253  m_aSetAttrTab.push_front( pSetAttr );
2254  else
2255  m_aSetAttrTab.push_back( pSetAttr );
2256  }
2257  else
2258  pNext->InsertPrev( pSetAttr );
2259  }
2260  nStt = nScriptEnd;
2261  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2262  rText, nStt );
2263  nScriptEnd = g_pBreakIt->GetBreakIter()
2264  ->endOfScript( rText, nStt, nScriptText );
2265  }
2266  bInsert = nScriptItem == nScriptText;
2267  }
2268  }
2269  if( bInsert )
2270  {
2271  HTMLAttr *pSetAttr =
2272  pAttr->Clone( rEndIdx, nEndCnt );
2273  pSetAttr->m_nStartContent = nStt;
2274 
2275  // When the attribute is for the whole paragraph, the outer
2276  // attributes aren't effective anymore. Hence it may not be inserted
2277  // in the Prev-List of an outer attribute, because that won't be
2278  // set. That leads to shifting when fields are used.
2279  if( !pNext || bWholePara )
2280  {
2281  if (pSetAttr->m_bInsAtStart)
2282  m_aSetAttrTab.push_front( pSetAttr );
2283  else
2284  m_aSetAttrTab.push_back( pSetAttr );
2285  }
2286  else
2287  pNext->InsertPrev( pSetAttr );
2288  }
2289  else
2290  {
2291  HTMLAttr *pPrev = pAttr->GetPrev();
2292  if( pPrev )
2293  {
2294  // the previous attributes must be set anyway
2295  if( !pNext || bWholePara )
2296  {
2297  if (pPrev->m_bInsAtStart)
2298  m_aSetAttrTab.push_front( pPrev );
2299  else
2300  m_aSetAttrTab.push_back( pPrev );
2301  }
2302  else
2303  pNext->InsertPrev( pPrev );
2304  }
2305  }
2306  pAttr->ClearPrev();
2307  }
2308 
2309  pAttr->SetStart( rPos );
2310  pAttr = pNext;
2311  }
2312  }
2313  }
2314 
2315  if( bUpdateNum )
2316  {
2317  if( GetNumInfo().GetDepth() )
2318  {
2319  sal_uInt8 nLvl = GetNumInfo().GetLevel();
2320  SetNodeNum( nLvl );
2321  }
2322  else
2324  }
2325 
2326  // We must set the attribute of the paragraph before now (because of JavaScript)
2327  SetAttr();
2328 
2329  // Now it is time to get rid of all script dependent hints that are
2330  // equal to the settings in the style
2331  SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode();
2332  OSL_ENSURE( pTextNd, "There is the txt node" );
2333  size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints())
2334  ? pTextNd->GetSwpHints().Count() : 0;
2335  if( nCntAttr )
2336  {
2337  // These are the end position of all script dependent hints.
2338  // If we find a hint that starts before the current end position,
2339  // we have to set it. If we find a hint that start behind or at
2340  // that position, we have to take the hint value into account.
2341  // If it is equal to the style, or in fact the paragraph value
2342  // for that hint, the hint is removed. Otherwise its end position
2343  // is remembered.
2344  sal_Int32 aEndPos[15] =
2345  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2346  SwpHints& rHints = pTextNd->GetSwpHints();
2347  for( size_t i=0; i < nCntAttr; i++ )
2348  {
2349  SwTextAttr *pHt = rHints.Get( i );
2350  sal_uInt16 nWhich = pHt->Which();
2351  sal_Int16 nIdx = 0;
2352  bool bFont = false;
2353  switch( nWhich )
2354  {
2355  case RES_CHRATR_FONT:
2356  nIdx = 0;
2357  bFont = true;
2358  break;
2359  case RES_CHRATR_FONTSIZE:
2360  nIdx = 1;
2361  break;
2362  case RES_CHRATR_LANGUAGE:
2363  nIdx = 2;
2364  break;
2365  case RES_CHRATR_POSTURE:
2366  nIdx = 3;
2367  break;
2368  case RES_CHRATR_WEIGHT:
2369  nIdx = 4;
2370  break;
2371  case RES_CHRATR_CJK_FONT:
2372  nIdx = 5;
2373  bFont = true;
2374  break;
2376  nIdx = 6;
2377  break;
2379  nIdx = 7;
2380  break;
2382  nIdx = 8;
2383  break;
2384  case RES_CHRATR_CJK_WEIGHT:
2385  nIdx = 9;
2386  break;
2387  case RES_CHRATR_CTL_FONT:
2388  nIdx = 10;
2389  bFont = true;
2390  break;
2392  nIdx = 11;
2393  break;
2395  nIdx = 12;
2396  break;
2398  nIdx = 13;
2399  break;
2400  case RES_CHRATR_CTL_WEIGHT:
2401  nIdx = 14;
2402  break;
2403  default:
2404  // Skip to next attribute
2405  continue;
2406  }
2407  const sal_Int32 nStt = pHt->GetStart();
2408  if( nStt >= aEndPos[nIdx] )
2409  {
2410  const SfxPoolItem& rItem =
2411  static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2412  if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2413  : rItem == pHt->GetAttr() )
2414  {
2415  // The hint is the same as set in the paragraph and
2416  // therefore, it can be deleted
2417  // CAUTION!!! This WILL delete the hint and it MAY
2418  // also delete the SwpHints!!! To avoid any trouble
2419  // we leave the loop immediately if this is the last
2420  // hint.
2421  pTextNd->DeleteAttribute( pHt );
2422  if( 1 == nCntAttr )
2423  break;
2424  i--;
2425  nCntAttr--;
2426  }
2427  else
2428  {
2429  // The hint is different. Therefore all hints within that
2430  // hint have to be ignored.
2431  aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2432  }
2433  }
2434  else
2435  {
2436  // The hint starts before another one ends.
2437  // The hint in this case is not deleted
2438  OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2439  "hints aren't nested properly!" );
2440  }
2441  }
2442  }
2443 
2444  if (!m_xTable && !--m_nParaCnt)
2445  Show();
2446 
2447  return bRet;
2448 }
2449 
2451 {
2452  //If it already has ParSpace, return
2453  if( !m_bNoParSpace )
2454  return;
2455 
2456  m_bNoParSpace = false;
2457 
2458  sal_uLong nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1;
2459 
2460  SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2461  if( !pTextNode )
2462  return;
2463 
2464  SvxULSpaceItem rULSpace =
2465  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2466  if( !rULSpace.GetLower() )
2467  {
2468  const SvxULSpaceItem& rCollULSpace =
2469  pTextNode->GetAnyFormatColl().GetULSpace();
2470  if( rCollULSpace.GetLower() &&
2471  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2472  {
2473  pTextNode->ResetAttr( RES_UL_SPACE );
2474  }
2475  else
2476  {
2477  //What I do here, is that I examine the attributes, and if
2478  //I find out, that it's CJK/CTL, then I set the paragraph space
2479  //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2480 
2481  bool bIsCJK = false;
2482  bool bIsCTL = false;
2483 
2484  const size_t nCntAttr = pTextNode->GetpSwpHints()
2485  ? pTextNode->GetSwpHints().Count() : 0;
2486 
2487  for(size_t i = 0; i < nCntAttr; ++i)
2488  {
2489  SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2490  sal_uInt16 const nWhich = pHt->Which();
2491  if (RES_CHRATR_CJK_FONT == nWhich ||
2492  RES_CHRATR_CJK_FONTSIZE == nWhich ||
2493  RES_CHRATR_CJK_LANGUAGE == nWhich ||
2494  RES_CHRATR_CJK_POSTURE == nWhich ||
2495  RES_CHRATR_CJK_WEIGHT == nWhich)
2496  {
2497  bIsCJK = true;
2498  break;
2499  }
2500  if (RES_CHRATR_CTL_FONT == nWhich ||
2501  RES_CHRATR_CTL_FONTSIZE == nWhich ||
2502  RES_CHRATR_CTL_LANGUAGE == nWhich ||
2503  RES_CHRATR_CTL_POSTURE == nWhich ||
2504  RES_CHRATR_CTL_WEIGHT == nWhich)
2505  {
2506  bIsCTL = true;
2507  break;
2508  }
2509  }
2510 
2511  if( bIsCTL )
2512  {
2513  pTextNode->SetAttr(
2515  }
2516  else if( bIsCJK )
2517  {
2518  pTextNode->SetAttr(
2520  } else {
2521  pTextNode->SetAttr(
2523  }
2524  }
2525  }
2526 }
2527 
2529 {
2530  // Here
2531  // - a EndAction is called, so the document is formatted
2532  // - a Reschedule is called,
2533  // - the own View-Shell is set again
2534  // - and a StartAction is called
2535 
2536  OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2537  SwViewShell *pOldVSh = CallEndAction();
2538 
2540 
2541  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2542  || 1 == m_xDoc->getReferenceCount() )
2543  {
2544  // was the import aborted by SFX?
2545  eState = SvParserState::Error;
2546  }
2547 
2548  // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2549  SwViewShell *pVSh = CallStartAction( pOldVSh );
2550 
2551  // is the current node not visible anymore, then we use a bigger increment
2552  if( pVSh )
2553  {
2555  ? 5 : 50;
2556  }
2557 }
2558 
2560 {
2561  // Here
2562  // - a Reschedule is called, so it can be scrolled
2563  // - the own View-Shell is set again
2564  // - a StartAction/EndAction is called, when there was scrolling.
2565 
2566  OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2567 
2568  // scroll bar
2569  if (m_xProgress)
2570  {
2571  m_xProgress->Update(rInput.Tell());
2573  }
2574  else
2575  {
2577 
2578  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2579  || 1 == m_xDoc->getReferenceCount() )
2580  // was the import aborted by SFX?
2581  eState = SvParserState::Error;
2582 
2584  if( pVSh && pVSh->HasInvalidRect() )
2585  {
2586  CallEndAction( false, false );
2587  CallStartAction( pVSh, false );
2588  }
2589  }
2590 }
2591 
2593 {
2594  OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2595 
2596  if( !pVSh || bChkPtr )
2597  {
2598 #if OSL_DEBUG_LEVEL > 0
2599  SwViewShell *pOldVSh = pVSh;
2600 #endif
2601  pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2602 #if OSL_DEBUG_LEVEL > 0
2603  OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2604  if( pOldVSh && !pVSh )
2605  pVSh = nullptr;
2606 #endif
2607  }
2608  m_pActionViewShell = pVSh;
2609 
2610  if( m_pActionViewShell )
2611  {
2612  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2613  static_cast<SwEditShell*>(m_pActionViewShell)->StartAction();
2614  else
2616  }
2617 
2618  return m_pActionViewShell;
2619 }
2620 
2621 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2622 {
2623  if( bChkPtr )
2624  {
2625  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2626  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2627  "CallEndAction: Who swapped the SwViewShell?" );
2628 #if OSL_DEBUG_LEVEL > 0
2629  if( m_pActionViewShell && !pVSh )
2630  pVSh = nullptr;
2631 #endif
2632  if( pVSh != m_pActionViewShell )
2633  m_pActionViewShell = nullptr;
2634  }
2635 
2636  if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2637  return m_pActionViewShell;
2638 
2639  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2640  {
2641  // Already scrolled?, then make sure that the view doesn't move!
2642  const bool bOldLock = m_pActionViewShell->IsViewLocked();
2643  m_pActionViewShell->LockView( true );
2644  const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev();
2646  static_cast<SwEditShell*>(m_pActionViewShell)->EndAction();
2647  m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev );
2648  m_pActionViewShell->LockView( bOldLock );
2649 
2650  // bChkJumpMark is only set when the object was also found
2651  if( m_bChkJumpMark )
2652  {
2653  const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2654  if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2656  GetMedium()->GetURLObject().GetMark() );
2657  m_bChkJumpMark = false;
2658  }
2659  }
2660  else
2662 
2663  // if the parser holds the last reference to the document, then we can
2664  // abort here and set an error.
2665  if( 1 == m_xDoc->getReferenceCount() )
2666  {
2667  eState = SvParserState::Error;
2668  }
2669 
2671  m_pActionViewShell = nullptr;
2672 
2673  return pVSh;
2674 }
2675 
2677 {
2678  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2679  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2680  "CheckActionViewShell: Who has swapped SwViewShell?" );
2681 #if OSL_DEBUG_LEVEL > 0
2682  if( m_pActionViewShell && !pVSh )
2683  pVSh = nullptr;
2684 #endif
2685  if( pVSh != m_pActionViewShell )
2686  m_pActionViewShell = nullptr;
2687 
2688  return m_pActionViewShell;
2689 }
2690 
2691 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2692  std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2693 {
2694  std::unique_ptr<SwPaM> pAttrPam( new SwPaM( *m_pPam->GetPoint() ) );
2695  const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode;
2696  const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
2697  HTMLAttr* pAttr;
2698  SwContentNode* pCNd;
2699 
2700  std::vector<std::unique_ptr<HTMLAttr>> aFields;
2701 
2702  for( auto n = m_aSetAttrTab.size(); n; )
2703  {
2704  pAttr = m_aSetAttrTab[ --n ];
2705  sal_uInt16 nWhich = pAttr->m_pItem->Which();
2706 
2707  sal_uLong nEndParaIdx = pAttr->GetEndParaIdx();
2708  bool bSetAttr;
2709  if( bChkEnd )
2710  {
2711  // Set character attribute with end early on, so set them still in
2712  // the current paragraph (because of JavaScript and various "chats"(?)).
2713  // This shouldn't be done for attributes which are used for
2714  // the whole paragraph, because they could be from a paragraph style
2715  // which can't be set. Because the attributes are inserted with
2716  // SETATTR_DONTREPLACE, they should be able to be set later.
2717  bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() &&
2718  (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) ||
2719  ( !pAttr->IsLikePara() &&
2720  nEndParaIdx == rEndIdx.GetIndex() &&
2721  pAttr->GetEndCnt() < nEndCnt &&
2722  (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2723  ( bBeforeTable &&
2724  nEndParaIdx == rEndIdx.GetIndex() &&
2725  !pAttr->GetEndCnt() );
2726  }
2727  else
2728  {
2729  // Attributes in body nodes array section shouldn't be set if we are in a
2730  // special nodes array section, but vice versa it's possible.
2731  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2732  bSetAttr = nEndParaIdx < rEndIdx.GetIndex() ||
2733  rEndIdx.GetIndex() > nEndOfIcons ||
2734  nEndParaIdx <= nEndOfIcons;
2735  }
2736 
2737  if( bSetAttr )
2738  {
2739  // The attribute shouldn't be in the list of temporary paragraph
2740  // attributes, because then it would be deleted.
2741  while( !m_aParaAttrs.empty() )
2742  {
2743  OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2744  "SetAttr: Attribute must not yet be set" );
2745  m_aParaAttrs.pop_back();
2746  }
2747 
2748  // then set it
2749  m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2750 
2751  while( pAttr )
2752  {
2753  HTMLAttr *pPrev = pAttr->GetPrev();
2754  if( !pAttr->m_bValid )
2755  {
2756  // invalid attributes can be deleted
2757  delete pAttr;
2758  pAttr = pPrev;
2759  continue;
2760  }
2761 
2762  pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2763  if( !pCNd )
2764  {
2765  // because of the awful deleting of nodes an index can also
2766  // point to an end node :-(
2767  if ( (pAttr->GetSttPara() == pAttr->GetEndPara()) &&
2768  !isTXTATR_NOEND(nWhich) )
2769  {
2770  // when the end index also points to the node, we don't
2771  // need to set attributes anymore, except if it's a text attribute.
2772  delete pAttr;
2773  pAttr = pPrev;
2774  continue;
2775  }
2776  pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2777  if( pCNd )
2778  pAttr->m_nStartContent = 0;
2779  else
2780  {
2781  OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2782  delete pAttr;
2783  pAttr = pPrev;
2784  continue;
2785  }
2786  }
2787  pAttrPam->GetPoint()->nNode = pAttr->m_nStartPara;
2788 
2789  // because of the deleting of BRs the start index can also
2790  // point behind the end the text
2791  if( pAttr->m_nStartContent > pCNd->Len() )
2792  pAttr->m_nStartContent = pCNd->Len();
2793  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent );
2794 
2795  pAttrPam->SetMark();
2796  if ( (pAttr->GetSttPara() != pAttr->GetEndPara()) &&
2797  !isTXTATR_NOEND(nWhich) )
2798  {
2799  pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2800  if( !pCNd )
2801  {
2802  pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2803  if( pCNd )
2804  pAttr->m_nEndContent = pCNd->Len();
2805  else
2806  {
2807  OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2808  pAttrPam->DeleteMark();
2809  delete pAttr;
2810  pAttr = pPrev;
2811  continue;
2812  }
2813  }
2814 
2815  pAttrPam->GetPoint()->nNode = pAttr->m_nEndPara;
2816  }
2817  else if( pAttr->IsLikePara() )
2818  {
2819  pAttr->m_nEndContent = pCNd->Len();
2820  }
2821 
2822  // because of the deleting of BRs the start index can also
2823  // point behind the end the text
2824  if( pAttr->m_nEndContent > pCNd->Len() )
2825  pAttr->m_nEndContent = pCNd->Len();
2826 
2827  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent );
2828  if( bBeforeTable &&
2829  pAttrPam->GetPoint()->nNode.GetIndex() ==
2830  rEndIdx.GetIndex() )
2831  {
2832  // If we're before inserting a table and the attribute ends
2833  // in the current node, then we must end it in the previous
2834  // node or discard it, if it starts in that node.
2835  if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2836  !isTXTATR_NOEND(nWhich) )
2837  {
2838  if( pAttrPam->GetMark()->nNode.GetIndex() !=
2839  rEndIdx.GetIndex() )
2840  {
2841  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
2842  "Content-Position before table not 0???" );
2843  pAttrPam->Move( fnMoveBackward );
2844  }
2845  else
2846  {
2847  pAttrPam->DeleteMark();
2848  delete pAttr;
2849  pAttr = pPrev;
2850  continue;
2851  }
2852  }
2853  }
2854 
2855  switch( nWhich )
2856  {
2857  case RES_FLTR_BOOKMARK: // insert bookmark
2858  {
2859  const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2860  IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2861  IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2862  if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2863  (*ppBkmk)->GetMarkStart() == *pAttrPam->GetPoint() )
2864  break; // do not generate duplicates on this position
2865  pAttrPam->DeleteMark();
2866  const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2867  *pAttrPam,
2868  sName,
2871 
2872  // jump to bookmark
2873  if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2874  {
2875  m_bChkJumpMark = true;
2877  }
2878  }
2879  break;
2880  case RES_TXTATR_FIELD:
2881  case RES_TXTATR_ANNOTATION:
2882  case RES_TXTATR_INPUTFIELD:
2883  {
2884  SwFieldIds nFieldWhich =
2885  pPostIts
2886  ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2888  if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2889  SwFieldIds::Script == nFieldWhich) )
2890  {
2891  pPostIts->emplace_front( pAttr );
2892  }
2893  else
2894  {
2895  aFields.emplace_back( pAttr);
2896  }
2897  }
2898  pAttrPam->DeleteMark();
2899  pAttr = pPrev;
2900  continue;
2901 
2902  case RES_LR_SPACE:
2903  if( pAttrPam->GetPoint()->nNode.GetIndex() ==
2904  pAttrPam->GetMark()->nNode.GetIndex())
2905  {
2906  // because of numbering set this attribute directly at node
2907  pCNd->SetAttr( *pAttr->m_pItem );
2908  break;
2909  }
2910  OSL_ENSURE( false,
2911  "LRSpace set over multiple paragraphs!" );
2912  [[fallthrough]]; // (shouldn't reach this point anyway)
2913 
2914  // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2915  // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2916  // This is the right place in the future if the adapted fill attributes
2917  // may be handled more directly in HTML import to handle them.
2918  case RES_BACKGROUND:
2919  {
2920  const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2922 
2924  m_xDoc->getIDocumentContentOperations().InsertItemSet(*pAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2925  break;
2926  }
2927  default:
2928 
2929  // maybe jump to a bookmark
2930  if( RES_TXTATR_INETFMT == nWhich &&
2932  m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2933  {
2934  m_bChkJumpMark = true;
2936  }
2937 
2938  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2939  }
2940  pAttrPam->DeleteMark();
2941 
2942  delete pAttr;
2943  pAttr = pPrev;
2944  }
2945  }
2946  }
2947 
2948  for( auto n = m_aMoveFlyFrames.size(); n; )
2949  {
2950  SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[ --n ];
2951 
2952  const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
2953  OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
2954  "Only At-Para flys need special handling" );
2955  const SwPosition *pFlyPos = rAnchor.GetContentAnchor();
2956  sal_uLong nFlyParaIdx = pFlyPos->nNode.GetIndex();
2957  bool bMoveFly;
2958  if( bChkEnd )
2959  {
2960  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2961  ( nFlyParaIdx == rEndIdx.GetIndex() &&
2962  m_aMoveFlyCnts[n] < nEndCnt );
2963  }
2964  else
2965  {
2966  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2967  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2968  rEndIdx.GetIndex() > nEndOfIcons ||
2969  nFlyParaIdx <= nEndOfIcons;
2970  }
2971  if( bMoveFly )
2972  {
2973  pFrameFormat->DelFrames();
2974  *pAttrPam->GetPoint() = *pFlyPos;
2975  pAttrPam->GetPoint()->nContent.Assign( pAttrPam->GetContentNode(),
2976  m_aMoveFlyCnts[n] );
2977  SwFormatAnchor aAnchor( rAnchor );
2978  aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
2979  aAnchor.SetAnchor( pAttrPam->GetPoint() );
2980  pFrameFormat->SetFormatAttr( aAnchor );
2981 
2982  const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
2983  if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
2984  {
2985  SwFormatHoriOrient aHoriOri( rHoriOri );
2986  aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
2987  pFrameFormat->SetFormatAttr( aHoriOri );
2988  }
2989  const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
2990  if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
2991  {
2992  SwFormatVertOrient aVertOri( rVertOri );
2993  aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
2994  pFrameFormat->SetFormatAttr( aVertOri );
2995  }
2996 
2997  pFrameFormat->MakeFrames();
2998  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
2999  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3000  }
3001  }
3002  for (auto & field : aFields)
3003  {
3004  pCNd = field->m_nStartPara.GetNode().GetContentNode();
3005  pAttrPam->GetPoint()->nNode = field->m_nStartPara;
3006  pAttrPam->GetPoint()->nContent.Assign( pCNd, field->m_nStartContent );
3007 
3008  if( bBeforeTable &&
3009  pAttrPam->GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() )
3010  {
3011  OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3012  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
3013  "Content-Position before table not 0???" );
3014  // !!!
3015  pAttrPam->Move( fnMoveBackward );
3016  }
3017 
3018  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *field->m_pItem );
3019 
3020  field.reset();
3021  }
3022  aFields.clear();
3023 }
3024 
3025 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3026 {
3027  // Font height and font colour as well as escape attributes may not be
3028  // combined. Therefore they're saved in a list and in it the last opened
3029  // attribute is at the beginning and count is always one. For all other
3030  // attributes count is just incremented.
3031  if( *ppAttr )
3032  {
3033  HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3034  pAttr->InsertNext( *ppAttr );
3035  (*ppAttr) = pAttr;
3036  }
3037  else
3038  (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3039 }
3040 
3041 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3042 {
3043  bool bRet = true;
3044 
3045  // The list header is saved in the attribute.
3046  HTMLAttr **ppHead = pAttr->m_ppHead;
3047 
3048  OSL_ENSURE( ppHead, "No list header attribute found!" );
3049 
3050  // save the current position as end position
3051  const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode;
3052  sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3053 
3054  // Is the last started or an earlier started attribute being ended?
3055  HTMLAttr *pLast = nullptr;
3056  if( ppHead && pAttr != *ppHead )
3057  {
3058  // The last started attribute isn't being ended
3059 
3060  // Then we look for attribute which was started immediately afterwards,
3061  // which has also not yet been ended (otherwise it would no longer be
3062  // in the list).
3063  pLast = *ppHead;
3064  while( pLast && pLast->GetNext() != pAttr )
3065  pLast = pLast->GetNext();
3066 
3067  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3068  }
3069 
3070  bool bMoveBack = false;
3071  sal_uInt16 nWhich = pAttr->m_pItem->Which();
3072  if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3073  *pEndIdx != pAttr->GetSttPara() )
3074  {
3075  // Then move back one position in the content!
3076  bMoveBack = m_pPam->Move( fnMoveBackward );
3077  nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3078  }
3079 
3080  // now end the attribute
3081  HTMLAttr *pNext = pAttr->GetNext();
3082 
3083  bool bInsert;
3084  sal_uInt16 nScriptItem = 0;
3085  bool bScript = false;
3086  // does it have a non-empty range?
3087  if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3088  RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3089  *pEndIdx != pAttr->GetSttPara() ||
3090  nEndCnt != pAttr->GetSttCnt() )
3091  {
3092  bInsert = true;
3093  // We do some optimization for script dependent attributes here.
3094  if( *pEndIdx == pAttr->GetSttPara() )
3095  {
3096  lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3097  }
3098  }
3099  else
3100  {
3101  bInsert = false;
3102  }
3103 
3104  const SwTextNode *pTextNd = (bInsert && bScript) ?
3105  pAttr->GetSttPara().GetNode().GetTextNode() :
3106  nullptr;
3107 
3108  if (pTextNd)
3109  {
3110  const OUString& rText = pTextNd->GetText();
3111  sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3112  rText, pAttr->GetSttCnt() );
3113  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3114  ->endOfScript( rText, pAttr->GetSttCnt(), nScriptText );
3115  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3116  {
3117  if( nScriptItem == nScriptText )
3118  {
3119  HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd );
3120  pSetAttr->ClearPrev();
3121  if( pNext )
3122  pNext->InsertPrev( pSetAttr );
3123  else
3124  {
3125  if (pSetAttr->m_bInsAtStart)
3126  m_aSetAttrTab.push_front( pSetAttr );
3127  else
3128  m_aSetAttrTab.push_back( pSetAttr );
3129  }
3130  }
3131  pAttr->m_nStartContent = nScriptEnd;
3132  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3133  rText, nScriptEnd );
3134  nScriptEnd = g_pBreakIt->GetBreakIter()
3135  ->endOfScript( rText, nScriptEnd, nScriptText );
3136  }
3137  bInsert = nScriptItem == nScriptText;
3138  }
3139  if( bInsert )
3140  {
3141  pAttr->m_nEndPara = *pEndIdx;
3142  pAttr->m_nEndContent = nEndCnt;
3143  pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3144  RES_TXTATR_CHARFMT != nWhich;
3145 
3146  if( !pNext )
3147  {
3148  // No open attributes of that type exists any longer, so all
3149  // can be set. Except they depend on another attribute, then
3150  // they're appended there.
3151  if (pAttr->m_bInsAtStart)
3152  m_aSetAttrTab.push_front( pAttr );
3153  else
3154  m_aSetAttrTab.push_back( pAttr );
3155  }
3156  else
3157  {
3158  // There are other open attributes of that type,
3159  // therefore the setting must be postponed.
3160  // Hence the current attribute is added at the end
3161  // of the Prev-List of the successor.
3162  pNext->InsertPrev( pAttr );
3163  }
3164  }
3165  else
3166  {
3167  // Then don't insert, but delete. Because of the "faking" of styles
3168  // by hard attributing there can be also other empty attributes in the
3169  // Prev-List, which must be set anyway.
3170  HTMLAttr *pPrev = pAttr->GetPrev();
3171  bRet = false;
3172  delete pAttr;
3173 
3174  if( pPrev )
3175  {
3176  // The previous attributes must be set anyway.
3177  if( pNext )
3178  pNext->InsertPrev( pPrev );
3179  else
3180  {
3181  if (pPrev->m_bInsAtStart)
3182  m_aSetAttrTab.push_front( pPrev );
3183  else
3184  m_aSetAttrTab.push_back( pPrev );
3185  }
3186  }
3187 
3188  }
3189 
3190  // If the first attribute of the list was set, then the list header
3191  // must be corrected as well.
3192  if( pLast )
3193  pLast->m_pNext = pNext;
3194  else if( ppHead )
3195  *ppHead = pNext;
3196 
3197  if( bMoveBack )
3199 
3200  return bRet;
3201 }
3202 
3204 {
3205  // preliminary paragraph attributes are not allowed here, they could
3206  // be set here and then the pointers become invalid!
3207  OSL_ENSURE(m_aParaAttrs.empty(),
3208  "Danger: there are non-final paragraph attributes");
3209  m_aParaAttrs.clear();
3210 
3211  // The list header is saved in the attribute
3212  HTMLAttr **ppHead = pAttr->m_ppHead;
3213 
3214  OSL_ENSURE( ppHead, "no list header attribute found!" );
3215 
3216  // Is the last started or an earlier started attribute being removed?
3217  HTMLAttr *pLast = nullptr;
3218  if( ppHead && pAttr != *ppHead )
3219  {
3220  // The last started attribute isn't being ended
3221 
3222  // Then we look for attribute which was started immediately afterwards,
3223  // which has also not yet been ended (otherwise it would no longer be
3224  // in the list).
3225  pLast = *ppHead;
3226  while( pLast && pLast->GetNext() != pAttr )
3227  pLast = pLast->GetNext();
3228 
3229  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3230  }
3231 
3232  // now delete the attribute
3233  HTMLAttr *pNext = pAttr->GetNext();
3234  HTMLAttr *pPrev = pAttr->GetPrev();
3235  //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3236  std::shared_ptr<HTMLAttrTable> xAttrTab(pAttr->m_xAttrTab);
3237  delete pAttr;
3238 
3239  if( pPrev )
3240  {
3241  // The previous attributes must be set anyway.
3242  if( pNext )
3243  pNext->InsertPrev( pPrev );
3244  else
3245  {
3246  if (pPrev->m_bInsAtStart)
3247  m_aSetAttrTab.push_front( pPrev );
3248  else
3249  m_aSetAttrTab.push_back( pPrev );
3250  }
3251  }
3252 
3253  // If the first attribute of the list was deleted, then the list header
3254  // must be corrected as well.
3255  if( pLast )
3256  pLast->m_pNext = pNext;
3257  else if( ppHead )
3258  *ppHead = pNext;
3259 }
3260 
3261 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3262 {
3263  // preliminary paragraph attributes are not allowed here, they could
3264  // be set here and then the pointers become invalid!
3265  OSL_ENSURE(m_aParaAttrs.empty(),
3266  "Danger: there are non-final paragraph attributes");
3267  m_aParaAttrs.clear();
3268 
3269  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3270  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3271 
3272  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3273  {
3274  *pSaveAttributes = *pHTMLAttributes;
3275 
3276  HTMLAttr *pAttr = *pSaveAttributes;
3277  while (pAttr)
3278  {
3279  pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3280  pAttr = pAttr->GetNext();
3281  }
3282 
3283  *pHTMLAttributes = nullptr;
3284  }
3285 }
3286 
3287 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3288  bool bMoveEndBack )
3289 {
3290  // preliminary paragraph attributes are not allowed here, they could
3291  // be set here and then the pointers become invalid!
3292  OSL_ENSURE(m_aParaAttrs.empty(),
3293  "Danger: there are non-final paragraph attributes");
3294  m_aParaAttrs.clear();
3295 
3296  const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode;
3297  SwNodeIndex nEndIdx( nSttIdx );
3298 
3299  // close all still open attributes and re-open them after the table
3300  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3301  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3302  bool bSetAttr = true;
3303  const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex();
3304  sal_Int32 nEndCnt = nSttCnt;
3305 
3306  if( bMoveEndBack )
3307  {
3308  sal_uLong nOldEnd = nEndIdx.GetIndex();
3309  sal_uLong nTmpIdx;
3310  if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3311  ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3312  {
3313  nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3314  }
3315  SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3316 
3317  // Don't set attributes, when the PaM was moved outside of the content area.
3318  bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3319 
3320  nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3321  }
3322  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3323  {
3324  HTMLAttr *pAttr = *pHTMLAttributes;
3325  *pSaveAttributes = nullptr;
3326  while( pAttr )
3327  {
3328  HTMLAttr *pNext = pAttr->GetNext();
3329  HTMLAttr *pPrev = pAttr->GetPrev();
3330 
3331  if( bSetAttr &&
3332  ( pAttr->GetSttParaIdx() < nEndIdx.GetIndex() ||
3333  (pAttr->GetSttPara() == nEndIdx &&
3334  pAttr->GetSttCnt() != nEndCnt) ) )
3335  {
3336  // The attribute must be set before the list. We need the
3337  // original and therefore we clone it, because pointer to the
3338  // attribute exist in the other contexts. The Next-List is lost
3339  // in doing so, but the Previous-List is preserved.
3340  HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt );
3341 
3342  if( pNext )
3343  pNext->InsertPrev( pSetAttr );
3344  else
3345  {
3346  if (pSetAttr->m_bInsAtStart)
3347  m_aSetAttrTab.push_front( pSetAttr );
3348  else
3349  m_aSetAttrTab.push_back( pSetAttr );
3350  }
3351  }
3352  else if( pPrev )
3353  {
3354  // If the attribute doesn't need to be set before the table, then
3355  // the previous attributes must still be set.
3356  if( pNext )
3357  pNext->InsertPrev( pPrev );
3358  else
3359  {
3360  if (pPrev->m_bInsAtStart)
3361  m_aSetAttrTab.push_front( pPrev );
3362  else
3363  m_aSetAttrTab.push_back( pPrev );
3364  }
3365  }
3366 
3367  // set the start of the attribute anew and break link
3368  pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab);
3369 
3370  if (*pSaveAttributes)
3371  {
3372  HTMLAttr *pSAttr = *pSaveAttributes;
3373  while( pSAttr->GetNext() )
3374  pSAttr = pSAttr->GetNext();
3375  pSAttr->InsertNext( pAttr );
3376  }
3377  else
3378  *pSaveAttributes = pAttr;
3379 
3380  pAttr = pNext;
3381  }
3382 
3383  *pHTMLAttributes = nullptr;
3384  }
3385 }
3386 
3387 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3388 {
3389  // preliminary paragraph attributes are not allowed here, they could
3390  // be set here and then the pointers become invalid!
3391  OSL_ENSURE(m_aParaAttrs.empty(),
3392  "Danger: there are non-final paragraph attributes");
3393  m_aParaAttrs.clear();
3394 
3395  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3396  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3397 
3398  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3399  {
3400  OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3401 
3402  *pHTMLAttributes = *pSaveAttributes;
3403 
3404  HTMLAttr *pAttr = *pHTMLAttributes;
3405  while (pAttr)
3406  {
3407  OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3408  "Previous attribute has still a header" );
3409  pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3410  pAttr = pAttr->GetNext();
3411  }
3412 
3413  *pSaveAttributes = nullptr;
3414  }
3415 }
3416 
3417 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3418 {
3419  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3420  if (bInsAtStart)
3421  m_aSetAttrTab.push_front( pTmp );
3422  else
3423  m_aSetAttrTab.push_back( pTmp );
3424 }
3425 
3426 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3427 {
3428  while( !rAttrs.empty() )
3429  {
3430  std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3431  InsertAttr( pAttr->GetItem(), false );
3432  rAttrs.pop_front();
3433  }
3434 }
3435 
3437 {
3438  OUString aId, aStyle, aLang, aDir;
3439  OUString aClass;
3440 
3441  const HTMLOptions& rHTMLOptions = GetOptions();
3442  for (size_t i = rHTMLOptions.size(); i; )
3443  {
3444  const HTMLOption& rOption = rHTMLOptions[--i];
3445  switch( rOption.GetToken() )
3446  {
3447  case HtmlOptionId::ID:
3448  aId = rOption.GetString();
3449  break;
3450  case HtmlOptionId::STYLE:
3451  aStyle = rOption.GetString();
3452  break;
3453  case HtmlOptionId::CLASS:
3454  aClass = rOption.GetString();
3455  break;
3456  case HtmlOptionId::LANG:
3457  aLang = rOption.GetString();
3458  break;
3459  case HtmlOptionId::DIR:
3460  aDir = rOption.GetString();
3461  break;
3462  default: break;
3463  }
3464  }
3465 
3466  // create a new context
3467  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3468 
3469  // parse styles
3470  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3471  {
3472  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3473  SvxCSS1PropertyInfo aPropInfo;
3474 
3475  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3476  {
3477  if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3478  !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3479  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3480  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3481  }
3482  }
3483 
3484  // save the context
3485  PushContext(xCntxt);
3486 }
3487 
3489  HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3490  HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3491  HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3492 {
3493  OUString aId, aStyle, aClass, aLang, aDir;
3494 
3495  const HTMLOptions& rHTMLOptions = GetOptions();
3496  for (size_t i = rHTMLOptions.size(); i; )
3497  {
3498  const HTMLOption& rOption = rHTMLOptions[--i];
3499  switch( rOption.GetToken() )
3500  {
3501  case HtmlOptionId::ID:
3502  aId = rOption.GetString();
3503  break;
3504  case HtmlOptionId::STYLE:
3505  aStyle = rOption.GetString();
3506  break;
3507  case HtmlOptionId::CLASS:
3508  aClass = rOption.GetString();
3509  break;
3510  case HtmlOptionId::LANG:
3511  aLang = rOption.GetString();
3512  break;
3513  case HtmlOptionId::DIR:
3514  aDir = rOption.GetString();
3515  break;
3516  default: break;
3517  }
3518  }
3519 
3520  // create a new context
3521  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3522 
3523  // parse styles
3524  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3525  {
3526  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3527  SvxCSS1PropertyInfo aPropInfo;
3528 
3529  aItemSet.Put( rItem );
3530  if( pItem2 )
3531  aItemSet.Put( *pItem2 );
3532  if( pItem3 )
3533  aItemSet.Put( *pItem3 );
3534 
3535  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3536  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3537 
3538  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3539  }
3540  else
3541  {
3542  InsertAttr( ppAttr ,rItem, xCntxt.get() );
3543  if( pItem2 )
3544  {
3545  OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3546  InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3547  }
3548  if( pItem3 )
3549  {
3550  OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3551  InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3552  }
3553  }
3554 
3555  // save the context
3556  PushContext(xCntxt);
3557 }
3558 
3560 {
3561  // fetch context
3562  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3563  if (xCntxt)
3564  {
3565  // and maybe end the attributes
3566  EndContext(xCntxt.get());
3567  }
3568 }
3569 
3571 {
3572  OUString aId, aStyle, aClass, aLang, aDir;
3573  sal_uInt16 nSize = 3;
3574 
3575  const HTMLOptions& rHTMLOptions = GetOptions();
3576  for (size_t i = rHTMLOptions.size(); i; )
3577  {
3578  const HTMLOption& rOption = rHTMLOptions[--i];
3579  switch( rOption.GetToken() )
3580  {
3581  case HtmlOptionId::SIZE:
3582  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
3583  break;
3584  case HtmlOptionId::ID:
3585  aId = rOption.GetString();
3586  break;
3587  case HtmlOptionId::STYLE:
3588  aStyle = rOption.GetString();
3589  break;
3590  case HtmlOptionId::CLASS:
3591  aClass = rOption.GetString();
3592  break;
3593  case HtmlOptionId::LANG:
3594  aLang = rOption.GetString();
3595  break;
3596  case HtmlOptionId::DIR:
3597  aDir = rOption.GetString();
3598  break;
3599  default: break;
3600  }
3601  }
3602 
3603  if( nSize < 1 )
3604  nSize = 1;
3605 
3606  if( nSize > 7 )
3607  nSize = 7;
3608 
3609  // create a new context
3610  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3611 
3612  // parse styles
3613  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3614  {
3615  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3616  SvxCSS1PropertyInfo aPropInfo;
3617 
3618  //CJK has different defaults
3619  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3620  aItemSet.Put( aFontHeight );
3621  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3622  aItemSet.Put( aFontHeightCJK );
3623  //Complex type can contain so many types of letters,
3624  //that it's not really worthy to bother, IMO.
3625  //Still, I have set a default.
3626  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3627  aItemSet.Put( aFontHeightCTL );
3628 
3629  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3630  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3631 
3632  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3633  }
3634  else
3635  {
3636  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3637  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3638  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3639  InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3640  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3641  InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3642  }
3643 
3644  // save the context
3645  PushContext(xCntxt);
3646 
3647  // save the font size
3648  m_aBaseFontStack.push_back( nSize );
3649 }
3650 
3652 {
3653  EndTag( HtmlTokenId::BASEFONT_ON );
3654 
3655  // avoid stack underflow in tables
3656  if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3657  m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3658 }
3659 
3661 {
3662  sal_uInt16 nBaseSize =
3665  : 3 );
3666  sal_uInt16 nFontSize =
3667  ( m_aFontStack.size() > m_nFontStMin
3668  ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3669  : nBaseSize );
3670 
3671  OUString aFace, aId, aStyle, aClass, aLang, aDir;
3672  Color aColor;
3673  sal_uLong nFontHeight = 0; // actual font height to set
3674  sal_uInt16 nSize = 0; // font height in Netscape notation (1-7)
3675  bool bColor = false;
3676 
3677  const HTMLOptions& rHTMLOptions = GetOptions();
3678  for (size_t i = rHTMLOptions.size(); i; )
3679  {
3680  const HTMLOption& rOption = rHTMLOptions[--i];
3681  switch( rOption.GetToken() )
3682  {
3683  case HtmlOptionId::SIZE:
3684  if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3685  {
3686  sal_Int32 nSSize;
3687  if( '+' == rOption.GetString()[0] ||
3688  '-' == rOption.GetString()[0] )
3689  nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3690  else
3691  nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3692 
3693  if( nSSize < 1 )
3694  nSSize = 1;
3695  else if( nSSize > 7 )
3696  nSSize = 7;
3697 
3698  nSize = static_cast<sal_uInt16>(nSSize);
3699  nFontHeight = m_aFontHeights[nSize-1];
3700  }
3701  break;
3702  case HtmlOptionId::COLOR:
3703  if( HtmlTokenId::FONT_ON==nToken )
3704  {
3705  rOption.GetColor( aColor );
3706  bColor = true;
3707  }
3708  break;
3709  case HtmlOptionId::FACE:
3710  if( HtmlTokenId::FONT_ON==nToken )
3711  aFace = rOption.GetString();
3712  break;
3713  case HtmlOptionId::ID:
3714  aId = rOption.GetString();
3715  break;
3716  case HtmlOptionId::STYLE:
3717  aStyle = rOption.GetString();
3718  break;
3719  case HtmlOptionId::CLASS:
3720  aClass = rOption.GetString();
3721  break;
3722  case HtmlOptionId::LANG:
3723  aLang = rOption.GetString();
3724  break;
3725  case HtmlOptionId::DIR:
3726  aDir = rOption.GetString();
3727  break;
3728  default: break;
3729  }
3730  }
3731 
3732  if( HtmlTokenId::FONT_ON != nToken )
3733  {
3734  // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3735 
3736  // In headings the current heading sets the font height
3737  // and not BASEFONT.
3738  const SwFormatColl *pColl = GetCurrFormatColl();
3739  sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3740  if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3741  nPoolId<=RES_POOLCOLL_HEADLINE6 )
3742  {
3743  // If the font height in the heading wasn't changed yet,
3744  // then take the one from the style.
3745  if( m_nFontStHeadStart==m_aFontStack.size() )
3746  nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3747  }
3748  else
3749  nPoolId = 0;
3750 
3751  if( HtmlTokenId::BIGPRINT_ON == nToken )
3752  nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3753  else
3754  nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3755 
3756  // If possible in headlines we fetch the new font height
3757  // from the style.
3758  if( nPoolId && nSize>=1 && nSize <=6 )
3759  nFontHeight =
3760  m_pCSS1Parser->GetTextCollFromPool(
3761  RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3762  else
3763  nFontHeight = m_aFontHeights[nSize-1];
3764  }
3765 
3766  OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3767 
3768  OUString aFontName, aStyleName;
3769  FontFamily eFamily = FAMILY_DONTKNOW; // family and pitch,
3770  FontPitch ePitch = PITCH_DONTKNOW; // if not found
3771  rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3772 
3773  if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3774  {
3775  const FontList *pFList = nullptr;
3776  SwDocShell *pDocSh = m_xDoc->GetDocShell();
3777  if( pDocSh )
3778  {
3779  const SvxFontListItem *pFListItem =
3780  static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3781  if( pFListItem )
3782  pFList = pFListItem->GetFontList();
3783  }
3784 
3785  bool bFound = false;
3786  sal_Int32 nStrPos = 0;
3787  while( nStrPos!= -1 )
3788  {
3789  OUString aFName = aFace.getToken( 0, ',', nStrPos );
3790  aFName = comphelper::string::strip(aFName, ' ');
3791  if( !aFName.isEmpty() )
3792  {
3793  if( !bFound && pFList )
3794  {
3795  sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3796  if( nullptr != hFont )
3797  {
3798  const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3799  if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3800  {
3801  bFound = true;
3802  if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3803  eEnc = RTL_TEXTENCODING_SYMBOL;
3804  }
3805  }
3806  }
3807  if( !aFontName.isEmpty() )
3808  aFontName += ";";
3809  aFontName += aFName;
3810  }
3811  }
3812  }
3813 
3814  // create a new context
3815  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3816 
3817  // parse styles
3818  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3819  {
3820  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3821  SvxCSS1PropertyInfo aPropInfo;
3822 
3823  if( nFontHeight )
3824  {
3825  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3826  aItemSet.Put( aFontHeight );
3827  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3828  aItemSet.Put( aFontHeightCJK );
3829  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3830  aItemSet.Put( aFontHeightCTL );
3831  }
3832  if( bColor )
3833  aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3834  if( !aFontName.isEmpty() )
3835  {
3836  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3837  aItemSet.Put( aFont );
3838  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3839  aItemSet.Put( aFontCJK );
3840  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3841  aItemSet.Put( aFontCTL );
3842  }
3843 
3844  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3845  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3846 
3847  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3848  }
3849  else
3850  {
3851  if( nFontHeight )
3852  {
3853  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3854  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3855  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3856  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3857  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3858  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3859  }
3860  if( bColor )
3861  InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3862  if( !aFontName.isEmpty() )
3863  {
3864  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3865  InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3866  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3867  InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3868  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3869  InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3870  }
3871  }
3872 
3873  // save the context
3874  PushContext(xCntxt);
3875 
3876  m_aFontStack.push_back( nSize );
3877 }
3878 
3880 {
3881  EndTag( nToken );
3882 
3883  // avoid stack underflow in tables
3884  if( m_aFontStack.size() > m_nFontStMin )
3885  m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3886 }
3887 
3889 {
3890  if( m_pPam->GetPoint()->nContent.GetIndex() )
3892  else
3893  AddParSpace();
3894 
3895  m_eParaAdjust = SvxAdjust::End;
3896  OUString aId, aStyle, aClass, aLang, aDir;
3897 
3898  const HTMLOptions& rHTMLOptions = GetOptions();
3899  for (size_t i = rHTMLOptions.size(); i; )
3900  {
3901  const HTMLOption& rOption = rHTMLOptions[--i];
3902  switch( rOption.GetToken() )
3903  {
3904  case HtmlOptionId::ID:
3905  aId = rOption.GetString();
3906  break;
3907  case HtmlOptionId::ALIGN:
3908  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3909  break;
3910  case HtmlOptionId::STYLE:
3911  aStyle = rOption.GetString();
3912  break;
3913  case HtmlOptionId::CLASS:
3914  aClass = rOption.GetString();
3915  break;
3916  case HtmlOptionId::LANG:
3917  aLang = rOption.GetString();
3918  break;
3919  case HtmlOptionId::DIR:
3920  aDir = rOption.GetString();
3921  break;
3922  default: break;
3923  }
3924  }
3925 
3926  // create a new context
3927  std::unique_ptr<HTMLAttrContext> xCntxt(
3928  !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3929  RES_POOLCOLL_TEXT, aClass )
3930  : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3931 
3932  // parse styles (Don't consider class. This is only possible as long as none of
3933  // the CSS1 properties of the class must be formatted hard!!!)
3934  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
3935  {
3936  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3937  SvxCSS1PropertyInfo aPropInfo;
3938 
3939  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
3940  {
3941  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
3942  "Class is not considered" );
3943  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3944  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
3945  }
3946  }
3947 
3948  if( SvxAdjust::End != m_eParaAdjust )
3949  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
3950 
3951  // and push on stack
3952  PushContext( xCntxt );
3953 
3954  // set the current style or its attributes
3955  SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
3956 
3957  // progress bar
3958  ShowStatline();
3959 
3960  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now an open paragraph element will be lost." );
3961  m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
3962 }
3963 
3964 void SwHTMLParser::EndPara( bool bReal )
3965 {
3966  if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
3967  {
3968 #if OSL_DEBUG_LEVEL > 0
3969  const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule();
3970  OSL_ENSURE( pNumRule, "Where is the NumRule" );
3971 #endif
3972  }
3973 
3974  // Netscape skips empty paragraphs, we do the same.
3975  if( bReal )
3976  {
3977  if( m_pPam->GetPoint()->nContent.GetIndex() )
3979  else
3980  AddParSpace();
3981  }
3982 
3983  // If a DD or DT was open, it's an implied definition list,
3984  // which must be closed now.
3985  if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
3987  {
3988  m_nDefListDeep--;
3989  }
3990 
3991  // Pop the context of the stack. It can also be from an
3992  // implied opened definition list.
3993  std::unique_ptr<HTMLAttrContext> xCntxt(
3994  PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
3995 
3996  // close attribute
3997  if (xCntxt)
3998  {
3999  EndContext(xCntxt.get());
4000  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4001  xCntxt.reset();
4002  }
4003 
4004  // reset the existing style
4005  if( bReal )
4006  SetTextCollAttrs();
4007 
4008  m_nOpenParaToken = HtmlTokenId::NONE;
4009 }
4010 
4012 {
4013  m_eParaAdjust = SvxAdjust::End;
4014 
4015  OUString aId, aStyle, aClass, aLang, aDir;
4016 
4017  const HTMLOptions& rHTMLOptions = GetOptions();
4018  for (size_t i = rHTMLOptions.size(); i; )
4019  {
4020  const HTMLOption& rOption = rHTMLOptions[--i];
4021  switch( rOption.GetToken() )
4022  {
4023  case HtmlOptionId::ID:
4024  aId = rOption.GetString();
4025  break;
4026  case HtmlOptionId::ALIGN:
4027  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4028  break;
4029  case HtmlOptionId::STYLE:
4030  aStyle = rOption.GetString();
4031  break;
4032  case HtmlOptionId::CLASS:
4033  aClass = rOption.GetString();
4034  break;
4035  case HtmlOptionId::LANG:
4036  aLang = rOption.GetString();
4037  break;
4038  case HtmlOptionId::DIR:
4039  aDir = rOption.GetString();
4040  break;
4041  default: break;
4042  }
4043  }
4044 
4045  // open a new paragraph
4046  if( m_pPam->GetPoint()->nContent.GetIndex() )
4048  else
4049  AddParSpace();
4050 
4051  // search for the matching style
4052  sal_uInt16 nTextColl;
4053  switch( nToken )
4054  {
4055  case HtmlTokenId::HEAD1_ON: nTextColl = RES_POOLCOLL_HEADLINE1; break;
4056  case HtmlTokenId::HEAD2_ON: nTextColl = RES_POOLCOLL_HEADLINE2; break;
4057  case HtmlTokenId::HEAD3_ON: nTextColl = RES_POOLCOLL_HEADLINE3; break;
4058  case HtmlTokenId::HEAD4_ON: nTextColl = RES_POOLCOLL_HEADLINE4; break;
4059  case HtmlTokenId::HEAD5_ON: nTextColl = RES_POOLCOLL_HEADLINE5; break;
4060  case HtmlTokenId::HEAD6_ON: nTextColl = RES_POOLCOLL_HEADLINE6; break;
4061  default: nTextColl = RES_POOLCOLL_STANDARD; break;
4062  }
4063 
4064  // create the context
4065  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4066 
4067  // parse styles (regarding class see also NewPara)
4068  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4069  {
4070  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4071  SvxCSS1PropertyInfo aPropInfo;
4072 
4073  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4074  {
4075  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4076  "Class is not considered" );
4077  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4078  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4079  }
4080  }
4081 
4082  if( SvxAdjust::End != m_eParaAdjust )
4083  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4084 
4085  // and push on stack
4086  PushContext(xCntxt);
4087 
4088  // set the current style or its attributes
4089  SetTextCollAttrs(m_aContexts.back().get());
4090 
4092 
4093  // progress bar
4094  ShowStatline();
4095 }
4096 
4098 {
4099  // open a new paragraph
4100  if( m_pPam->GetPoint()->nContent.GetIndex() )
4102  else
4103  AddParSpace();
4104 
4105  // search context matching the token and fetch it from stack
4106  std::unique_ptr<HTMLAttrContext> xCntxt;
4107  auto nPos = m_aContexts.size();
4108  while( !xCntxt && nPos>m_nContextStMin )
4109  {
4110  switch( m_aContexts[--nPos]->GetToken() )
4111  {
4112  case HtmlTokenId::HEAD1_ON:
4113  case HtmlTokenId::HEAD2_ON:
4114  case HtmlTokenId::HEAD3_ON:
4115  case HtmlTokenId::HEAD4_ON:
4116  case HtmlTokenId::HEAD5_ON:
4117  case HtmlTokenId::HEAD6_ON:
4118  xCntxt = std::move(m_aContexts[nPos]);
4119  m_aContexts.erase( m_aContexts.begin() + nPos );
4120  break;
4121  default: break;
4122  }
4123  }
4124 
4125  // and now end attributes
4126  if (xCntxt)
4127  {
4128  EndContext(xCntxt.get());
4129  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4130  xCntxt.reset();
4131  }
4132 
4133  // reset existing style
4134  SetTextCollAttrs();
4135 
4137 }
4138 
4139 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4140 {
4141  OUString aId, aStyle, aClass, aLang, aDir;
4142 
4143  const HTMLOptions& rHTMLOptions = GetOptions();
4144  for (size_t i = rHTMLOptions.size(); i; )
4145  {
4146  const HTMLOption& rOption = rHTMLOptions[--i];
4147  switch( rOption.GetToken() )
4148  {
4149  case HtmlOptionId::ID:
4150  aId = rOption.GetString();
4151  break;
4152  case HtmlOptionId::STYLE:
4153  aStyle = rOption.GetString();
4154  break;
4155  case HtmlOptionId::CLASS:
4156  aClass = rOption.GetString();
4157  break;
4158  case HtmlOptionId::LANG:
4159  aLang = rOption.GetString();
4160  break;
4161  case HtmlOptionId::DIR:
4162  aDir = rOption.GetString();
4163  break;
4164  default: break;
4165  }
4166  }
4167 
4168  // open a new paragraph
4169  SwHTMLAppendMode eMode = AM_NORMAL;
4170  switch( nToken )
4171  {
4172  case HtmlTokenId::LISTING_ON:
4173  case HtmlTokenId::XMP_ON:
4174  // These both tags will be mapped to the PRE style. For the case that a
4175  // a CLASS exists we will delete it so that we don't get the CLASS of
4176  // the PRE style.
4177  aClass.clear();
4178  [[fallthrough]];
4179  case HtmlTokenId::BLOCKQUOTE_ON:
4180  case HtmlTokenId::BLOCKQUOTE30_ON:
4181  case HtmlTokenId::PREFORMTXT_ON:
4182  eMode = AM_SPACE;
4183  break;
4184  case HtmlTokenId::ADDRESS_ON:
4185  eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4186  break;
4187  case HtmlTokenId::DT_ON:
4188  case HtmlTokenId::DD_ON:
4189  eMode = AM_SOFTNOSPACE;
4190  break;
4191  default:
4192  OSL_ENSURE( false, "unknown style" );
4193  break;
4194  }
4195  if( m_pPam->GetPoint()->nContent.GetIndex() )
4196  AppendTextNode( eMode );
4197  else if( AM_SPACE==eMode )
4198  AddParSpace();
4199 
4200  // ... and save in a context
4201  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4202 
4203  // parse styles (regarding class see also NewPara)
4204  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4205  {
4206  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4207  SvxCSS1PropertyInfo aPropInfo;
4208 
4209  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4210  {
4211  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4212  "Class is not considered" );
4213  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4214  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4215  }
4216  }
4217 
4218  PushContext(xCntxt);
4219 
4220  // set the new style
4221  SetTextCollAttrs(m_aContexts.back().get());
4222 
4223  // update progress bar
4224  ShowStatline();
4225 }
4226 
4228 {
4229  SwHTMLAppendMode eMode = AM_NORMAL;
4230  switch( getOnToken(nToken) )
4231  {
4232  case HtmlTokenId::BLOCKQUOTE_ON:
4233  case HtmlTokenId::BLOCKQUOTE30_ON:
4234  case HtmlTokenId::PREFORMTXT_ON:
4235  case HtmlTokenId::LISTING_ON:
4236  case HtmlTokenId::XMP_ON:
4237  eMode = AM_SPACE;
4238  break;
4239  case HtmlTokenId::ADDRESS_ON:
4240  case HtmlTokenId::DT_ON:
4241  case HtmlTokenId::DD_ON:
4242  eMode = AM_SOFTNOSPACE;
4243  break;
4244  default:
4245  OSL_ENSURE( false, "unknown style" );
4246  break;
4247  }
4248  if( m_pPam->GetPoint()->nContent.GetIndex() )
4249  AppendTextNode( eMode );
4250  else if( AM_SPACE==eMode )
4251  AddParSpace();
4252 
4253  // pop current context of stack
4254  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4255 
4256  // and now end attributes
4257  if (xCntxt)
4258  {
4259  EndContext(xCntxt.get());
4260  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4261  xCntxt.reset();
4262  }
4263 
4264  // reset existing style
4265  SetTextCollAttrs();
4266 }
4267 
4269 {
4270  OUString aId, aStyle, aClass, aLang, aDir;
4271 
4272  const HTMLOptions& rHTMLOptions = GetOptions();
4273  for (size_t i = rHTMLOptions.size(); i; )
4274  {
4275  const HTMLOption& rOption = rHTMLOptions[--i];
4276  switch( rOption.GetToken() )
4277  {
4278  case HtmlOptionId::ID:
4279  aId = rOption.GetString();
4280  break;
4281  case HtmlOptionId::STYLE:
4282  aStyle = rOption.GetString();
4283  break;
4284  case HtmlOptionId::CLASS:
4285  aClass = rOption.GetString();
4286  break;
4287  case HtmlOptionId::LANG:
4288  aLang = rOption.GetString();
4289  break;
4290  case HtmlOptionId::DIR:
4291  aDir = rOption.GetString();
4292  break;
4293  default: break;
4294  }
4295  }
4296 
4297  // open a new paragraph
4298  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4299  if( m_pPam->GetPoint()->nContent.GetIndex() )
4300  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4301  else if( bSpace )
4302  AddParSpace();
4303 
4304  // one level more
4305  m_nDefListDeep++;
4306 
4307  bool bInDD = false, bNotInDD = false;
4308  auto nPos = m_aContexts.size();
4309  while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4310  {
4311  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4312  switch( nCntxtToken )
4313  {
4314  case HtmlTokenId::DEFLIST_ON:
4315  case HtmlTokenId::DIRLIST_ON:
4316  case HtmlTokenId::MENULIST_ON:
4317  case HtmlTokenId::ORDERLIST_ON:
4318  case HtmlTokenId::UNORDERLIST_ON:
4319  bNotInDD = true;
4320  break;
4321  case HtmlTokenId::DD_ON:
4322  bInDD = true;
4323  break;
4324  default: break;
4325  }
4326  }
4327 
4328  // ... and save in a context
4329  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4330 
4331  // in it save also the margins
4332  sal_uInt16 nLeft=0, nRight=0;
4333  short nIndent=0;
4334  GetMarginsFromContext( nLeft, nRight, nIndent );
4335 
4336  // The indentation, which already results from a DL, correlates with a DT
4337  // on the current level and this correlates to a DD from the previous level.
4338  // For a level >=2 we must add DD distance.
4339  if( !bInDD && m_nDefListDeep > 1 )
4340  {
4341 
4342  // and the one of the DT-style of the current level
4343  SvxLRSpaceItem rLRSpace =
4344  m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4345  ->GetLRSpace();
4346  nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft());
4347  }
4348 
4349  xCntxt->SetMargins( nLeft, nRight, nIndent );
4350 
4351  // parse styles
4352  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4353  {
4354  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4355  SvxCSS1PropertyInfo aPropInfo;
4356 
4357  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4358  {
4359  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4360  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4361  }
4362  }
4363 
4364  PushContext(xCntxt);
4365 
4366  // set the attributes of the new style
4367  if( m_nDefListDeep > 1 )
4368  SetTextCollAttrs(m_aContexts.back().get());
4369 }
4370 
4372 {
4373  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4374  if( m_pPam->GetPoint()->nContent.GetIndex() )
4375  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4376  else if( bSpace )
4377  AddParSpace();
4378 
4379  // one level less
4380  if( m_nDefListDeep > 0 )
4381  m_nDefListDeep--;
4382 
4383  // pop current context of stack
4384  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4385 
4386  // and now end attributes
4387  if (xCntxt)
4388  {
4389  EndContext(xCntxt.get());
4390  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4391  xCntxt.reset();
4392  }
4393 
4394  // and set style
4395  SetTextCollAttrs();
4396 }
4397 
4399 {
4400  // determine if the DD/DT exist in a DL
4401  bool bInDefList = false, bNotInDefList = false;
4402  auto nPos = m_aContexts.size();
4403  while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4404  {
4405  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4406  switch( nCntxtToken )
4407  {
4408  case HtmlTokenId::DEFLIST_ON:
4409  bInDefList = true;
4410  break;
4411  case HtmlTokenId::DIRLIST_ON:
4412  case HtmlTokenId::MENULIST_ON:
4413  case HtmlTokenId::ORDERLIST_ON:
4414  case HtmlTokenId::UNORDERLIST_ON:
4415  bNotInDefList = true;
4416  break;
4417  default: break;
4418  }
4419  }
4420 
4421  // if not, then implicitly open a new DL
4422  if( !bInDefList )
4423  {
4424  m_nDefListDeep++;
4425  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4426  "Now an open paragraph element will be lost." );
4427  m_nOpenParaToken = nToken;
4428  }
4429 
4430  NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4431  : RES_POOLCOLL_HTML_DT) );
4432 }
4433 
4435 {
4436  // open a new paragraph
4437  if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() )
4439 
4440  // search context matching the token and fetch it from stack
4441  nToken = getOnToken(nToken);
4442  std::unique_ptr<HTMLAttrContext> xCntxt;
4443  auto nPos = m_aContexts.size();
4444  while( !xCntxt && nPos>m_nContextStMin )
4445  {
4446  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4447  switch( nCntxtToken )
4448  {
4449  case HtmlTokenId::DD_ON:
4450  case HtmlTokenId::DT_ON:
4451  if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken )
4452  {
4453  xCntxt = std::move(m_aContexts[nPos]);
4454  m_aContexts.erase( m_aContexts.begin() + nPos );
4455  }
4456  break;
4457  case HtmlTokenId::DEFLIST_ON:
4458  // don't look at DD/DT outside the current DefList
4459  case HtmlTokenId::DIRLIST_ON:
4460  case HtmlTokenId::MENULIST_ON:
4461  case HtmlTokenId::ORDERLIST_ON:
4462  case HtmlTokenId::UNORDERLIST_ON:
4463  // and also not outside another list
4465  break;
4466  default: break;
4467  }
4468  }
4469 
4470  // and now end attributes
4471  if (xCntxt)
4472  {
4473  EndContext(xCntxt.get());
4474  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4475  }
4476 }
4477 
4487 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4488  bool bSurroundOnly ) const
4489 {
4490  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
4491 
4492  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
4493 
4494  bool bFound = false;
4495  for ( size_t i=0; i<rFrameFormatTable.size(); i++ )
4496  {
4497  const SwFrameFormat *const pFormat = rFrameFormatTable[i];
4498  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4499  // A frame was found, when
4500  // - it is paragraph-bound, and
4501  // - is anchored in current paragraph, and
4502  // - every paragraph-bound frame counts, or
4503  // - (only frames without wrapping count and) the frame doesn't have
4504  // a wrapping
4505  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
4506  if (pAPos &&
4507  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4508  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4509  pAPos->nNode == rNodeIdx )
4510  {
4511  if( !(bNoSurroundOnly || bSurroundOnly) )
4512  {
4513  bFound = true;
4514  break;
4515  }
4516  else
4517  {
4518  // When looking for frames with wrapping, also disregard
4519  // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4520  // and you don't want to evade those when positioning.
4521  css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4522  if( bNoSurroundOnly )
4523  {
4524  if( css::text::WrapTextMode_NONE==eSurround )
4525  {
4526  bFound = true;
4527  break;
4528  }
4529  }
4530  if( bSurroundOnly )
4531  {
4532  if( css::text::WrapTextMode_NONE==eSurround )
4533  {
4534  bFound = false;
4535  break;
4536  }
4537  else if( css::text::WrapTextMode_THROUGH!=eSurround )
4538  {
4539  bFound = true;
4540  // Continue searching: It's possible that some without
4541  // wrapping will follow...
4542  }
4543  }
4544  }
4545  }
4546  }
4547 
4548  return bFound;
4549 }
4550 
4551 // the special methods for inserting of objects
4552 
4554 {
4555  const SwContentNode* pCNd = m_pPam->GetContentNode();
4556  return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4557 }
4558 
4560 {
4561  SwTextFormatColl *pCollToSet = nullptr; // the style to set
4562  SfxItemSet *pItemSet = nullptr; // set of hard attributes
4563  sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4564  const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4565  sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4566 
4567  bool bInPRE=false; // some context info
4568 
4569  sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and
4570  short nFirstLineIndent = 0; // indentations
4571 
4572  for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i )
4573  {
4574  const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4575 
4576  sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4577  if( nColl )
4578  {
4579  // There is a style to set. Then at first we must decide,
4580  // if the style can be set.
4581  bool bSetThis = true;
4582  switch( nColl )
4583  {
4584  case RES_POOLCOLL_HTML_PRE:
4585  bInPRE = true;
4586  break;
4587  case RES_POOLCOLL_TEXT:
4588  // <TD><P CLASS=xxx> must become TD.xxx
4589  if( nDfltColl==RES_POOLCOLL_TABLE ||
4590  nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4591  nColl = nDfltColl;
4592  break;
4593  case RES_POOLCOLL_HTML_HR:
4594  // also <HR> in <PRE> set as style, otherwise it can't
4595  // be exported anymore
4596  break;
4597  default:
4598  if( bInPRE )
4599  bSetThis = false;
4600  break;
4601  }
4602 
4603  SwTextFormatColl *pNewColl =
4604  m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4605 
4606  if( bSetThis )
4607  {
4608  // If now a different style should be set as previously, the
4609  // previous style must be replaced by hard attribution.
4610 
4611  if( pCollToSet )
4612  {
4613  // insert the attributes hard, which previous style sets
4614  if( !pItemSet )
4615  pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4616  else
4617  {
4618  const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4619  SfxItemSet aItemSet( *rCollSet.GetPool(),
4620  rCollSet.GetRanges() );
4621  aItemSet.Set( rCollSet );
4622  pItemSet->Put( aItemSet );
4623  }
4624  // but remove the attributes, which the current style sets,
4625  // because otherwise they will be overwritten later
4626  pItemSet->Differentiate( pNewColl->GetAttrSet() );
4627  }
4628 
4629  pCollToSet = pNewColl;
4630  }
4631  else
4632  {
4633  // hard attribution
4634  if( !pItemSet )
4635  pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4636  else
4637  {
4638  const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4639  SfxItemSet aItemSet( *rCollSet.GetPool(),
4640  rCollSet.GetRanges() );
4641  aItemSet.Set( rCollSet );
4642  pItemSet->Put( aItemSet );
4643  }
4644  }
4645  }
4646  else
4647  {
4648  // Maybe a default style exists?
4649  nColl = pCntxt->GetDfltTextFormatColl();
4650  if( nColl )
4651  nDfltColl = nColl;
4652  }
4653 
4654  // if applicable fetch new paragraph indents
4655  if( pCntxt->IsLRSpaceChanged() )
4656  {
4657  sal_uInt16 nLeft=0, nRight=0;
4658 
4659  pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4660  nLeftMargin = nLeft;
4661  nRightMargin = nRight;
4662  }
4663  }
4664 
4665  // If in current context a new style should be set,
4666  // its paragraph margins must be inserted in the context.
4667  if( pContext && nTopColl )
4668  {
4669  // <TD><P CLASS=xxx> must become TD.xxx
4670  if( nTopColl==RES_POOLCOLL_TEXT &&
4671  (nDfltColl==RES_POOLCOLL_TABLE ||
4672  nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4673  nTopColl = nDfltColl;
4674 
4675  const SwTextFormatColl *pTopColl =
4676  m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4677  const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4678  const SfxPoolItem *pItem;
4679  if( SfxItemState::SET == rItemSet.GetItemState(RES_LR_SPACE,true, &pItem) )
4680  {
4681  const SvxLRSpaceItem *pLRItem =
4682  static_cast<const SvxLRSpaceItem *>(pItem);
4683 
4684  sal_Int32 nLeft = pLRItem->GetTextLeft();
4685  sal_Int32 nRight = pLRItem->GetRight();
4686  nFirstLineIndent = pLRItem->GetTextFirstLineOfst();
4687 
4688  // In Definition lists the margins also contain the margins from the previous levels
4689  if( RES_POOLCOLL_HTML_DD == nTopColl )
4690  {
4691  const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser
4692  ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())
4693  ->GetLRSpace();
4694  nLeft -= rDTLRSpace.GetTextLeft();
4695  nRight -= rDTLRSpace.GetRight();
4696  }
4697  else if( RES_POOLCOLL_HTML_DT == nTopColl )
4698  {
4699  nLeft = 0;
4700  nRight = 0;
4701  }
4702 
4703  // the paragraph margins add up
4704  nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4705  nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4706 
4707  pContext->SetMargins( nLeftMargin, nRightMargin,
4708  nFirstLineIndent );
4709  }
4710  if( SfxItemState::SET == rItemSet.GetItemState(RES_UL_SPACE,true, &pItem) )
4711  {
4712  const SvxULSpaceItem *pULItem =
4713  static_cast<const SvxULSpaceItem *>(pItem);
4714  pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4715  }
4716  }
4717 
4718  // If no style is set in the context use the text body.
4719  if( !pCollToSet )
4720  {
4721  pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4722  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4723  if( !nLeftMargin )
4724  nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft());
4725  if( !nRightMargin )
4726  nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight());
4727  if( !nFirstLineIndent )
4728  nFirstLineIndent = rLRItem.GetTextFirstLineOfst();
4729  }
4730 
4731  // remove previous hard attribution of paragraph
4732  for( auto pParaAttr : m_aParaAttrs )
4733  pParaAttr->Invalidate();
4734  m_aParaAttrs.clear();
4735 
4736  // set the style
4737  m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4738 
4739  // if applicable correct the paragraph indent
4740  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4741  bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() ||
4742  nFirstLineIndent != rLRItem.GetTextFirstLineOfst() ||
4743  nRightMargin != rLRItem.GetRight();
4744 
4745  if( bSetLRSpace )
4746  {
4747  SvxLRSpaceItem aLRItem( rLRItem );
4748  aLRItem.SetTextLeft( nLeftMargin );
4749  aLRItem.SetRight( nRightMargin );
4750  aLRItem.SetTextFirstLineOfst( nFirstLineIndent );
4751  if( pItemSet )
4752  pItemSet->Put( aLRItem );
4753  else
4754  {
4755  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4756  m_xAttrTab->pLRSpace->SetLikePara();
4757  m_aParaAttrs.push_back( m_xAttrTab->pLRSpace );
4758  EndAttr( m_xAttrTab->pLRSpace, false );
4759  }
4760  }
4761 
4762  // and now set the attributes
4763  if( pItemSet )
4764  {
4765  InsertParaAttrs( *pItemSet );
4766  delete pItemSet;
4767  }
4768 }
4769 
4771 {
4772  OUString aId, aStyle, aLang, aDir;
4773  OUString aClass;
4774 
4775  const HTMLOptions& rHTMLOptions = GetOptions();
4776  for (size_t i = rHTMLOptions.size(); i; )
4777  {
4778  const HTMLOption& rOption = rHTMLOptions[--i];
4779  switch( rOption.GetToken() )
4780  {
4781  case HtmlOptionId::ID:
4782  aId = rOption.GetString();
4783  break;
4784  case HtmlOptionId::STYLE:
4785  aStyle = rOption.GetString();
4786  break;
4787  case HtmlOptionId::CLASS:
4788  aClass = rOption.GetString();
4789  break;
4790  case HtmlOptionId::LANG:
4791  aLang = rOption.GetString();
4792  break;
4793  case HtmlOptionId::DIR:
4794  aDir = rOption.GetString();
4795  break;
4796  default: break;
4797  }
4798  }
4799 
4800  // create a new context
4801  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4802 
4803  // set the style and save it in the context
4804  SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4805  OSL_ENSURE( pCFormat, "No character format found for token" );
4806 
4807  // parse styles (regarding class see also NewPara)
4808  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4809  {
4810  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4811  SvxCSS1PropertyInfo aPropInfo;
4812 
4813  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4814  {
4815  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4816  "Class is not considered" );
4817  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4818  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4819  }
4820  }
4821 
4822  // Character formats are stored in their own stack and can never be inserted
4823  // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4824  if( pCFormat )
4825  InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4826 
4827  // save the context
4828  PushContext(xCntxt);
4829 }
4830 
4832 {
4833  // and if applicable change it via the options
4834  sal_Int16 eVertOri = text::VertOrientation::TOP;
4835  sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4836  Size aSize( 0, 0);
4837  long nSize = 0;
4838  bool bPrcWidth = false;
4839  bool bPrcHeight = false;
4840  sal_uInt16 nType = HTML_SPTYPE_HORI;
4841 
4842  const HTMLOptions& rHTMLOptions = GetOptions();
4843  for (size_t i = rHTMLOptions.size(); i; )
4844  {
4845  const HTMLOption& rOption = rHTMLOptions[--i];
4846  switch( rOption.GetToken() )
4847  {
4848  case HtmlOptionId::TYPE:
4849  rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4850  break;
4851  case HtmlOptionId::ALIGN:
4852  eVertOri =
4853  rOption.GetEnum( aHTMLImgVAlignTable,
4854  eVertOri );
4855  eHoriOri =
4856  rOption.GetEnum( aHTMLImgHAlignTable,
4857  eHoriOri );
4858  break;
4859  case HtmlOptionId::WIDTH:
4860  // First only save as pixel value!
4861  bPrcWidth = (rOption.GetString().indexOf('%') != -1);
4862  aSize.setWidth( static_cast<long>(rOption.GetNumber()) );
4863  break;
4864  case HtmlOptionId::HEIGHT:
4865  // First only save as pixel value!
4866  bPrcHeight = (rOption.GetString().indexOf('%') != -1);
4867  aSize.setHeight( static_cast<long>(rOption.GetNumber()) );
4868  break;
4869  case HtmlOptionId::SIZE:
4870  // First only save as pixel value!
4871  nSize = rOption.GetNumber();
4872  break;
4873  default: break;
4874  }
4875  }
4876 
4877  switch( nType )
4878  {
4879  case HTML_SPTYPE_BLOCK:
4880  {
4881  // create an empty text frame
4882 
4883  // fetch the ItemSet
4884  SfxItemSet aFrameSet( m_xDoc->GetAttrPool(),
4886  if( !IsNewDoc() )
4887  Reader::ResetFrameFormatAttrs( aFrameSet );
4888 
4889  // set the anchor and the adjustment
4890  SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4891 
4892  // and the size of the frame
4893  Size aDfltSz( MINFLY, MINFLY );
4894  Size aSpace( 0, 0 );
4895  SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4896  m_pCSS1Parser->GetWhichMap() );
4897  SvxCSS1PropertyInfo aDummyPropInfo;
4898 
4899  SetFixSize( aSize, aDfltSz, bPrcWidth, bPrcHeight,
4900  aDummyPropInfo, aFrameSet );
4901  SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4902 
4903  // protect the content
4904  SvxProtectItem aProtectItem( RES_PROTECT) ;
4905  aProtectItem.SetContentProtect( true );
4906  aFrameSet.Put( aProtectItem );
4907 
4908  // create the frame
4909  RndStdIds eAnchorId =
4910  aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4911  SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4912  m_pPam->GetPoint(), &aFrameSet );
4913  // Possibly create frames and register auto-bound frames.
4914  RegisterFlyFrame( pFlyFormat );
4915  }
4916  break;
4917  case HTML_SPTYPE_VERT:
4918  if( nSize > 0 )
4919  {
4921  {
4923  ->PixelToLogic( Size(0,nSize),
4924  MapMode(MapUnit::MapTwip) ).Height();
4925  }
4926 
4927  // set a paragraph margin
4928  SwTextNode *pTextNode = nullptr;
4929  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4930  {
4931  // if possible change the bottom paragraph margin
4932  // of previous node
4933 
4934  SetAttr(); // set still open paragraph attributes
4935 
4936  pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1]
4937  ->GetTextNode();
4938 
4939  // If the previous paragraph isn't a text node, then now an
4940  // empty paragraph is created, which already generates a single
4941  // line of spacing.
4942  if( !pTextNode )
4943  nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
4944  }
4945 
4946  if( pTextNode )
4947  {
4948  SvxULSpaceItem aULSpace( static_cast<const SvxULSpaceItem&>(pTextNode
4950  aULSpace.SetLower( aULSpace.GetLower() + static_cast<sal_uInt16>(nSize) );
4951  pTextNode->SetAttr( aULSpace );
4952  }
4953  else
4954  {
4955  NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, static_cast<sal_uInt16>(nSize), RES_UL_SPACE));
4956  EndAttr( m_xAttrTab->pULSpace, false );
4957 
4958  AppendTextNode(); // Don't change spacing!
4959  }
4960  }
4961  break;
4962  case HTML_SPTYPE_HORI:
4963  if( nSize > 0 )
4964  {
4965  // If the paragraph is still empty, set first line
4966  // indentation, otherwise apply letter spacing over a space.
4967 
4969  {
4971  ->PixelToLogic( Size(nSize,0),
4972  MapMode(MapUnit::MapTwip) ).Width();
4973  }
4974 
4975  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4976  {
4977  sal_uInt16 nLeft=0, nRight=0;
4978  short nIndent = 0;
4979 
4980  GetMarginsFromContextWithNumBul( nLeft, nRight, nIndent );
4981  nIndent = nIndent + static_cast<short>(nSize);
4982 
4983  SvxLRSpaceItem aLRItem( RES_LR_SPACE );
4984  aLRItem.SetTextLeft( nLeft );
4985  aLRItem.SetRight( nRight );
4986  aLRItem.SetTextFirstLineOfst( nIndent );
4987 
4988  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4989  EndAttr( m_xAttrTab->pLRSpace, false );
4990  }
4991  else
4992  {
4993  NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
4994  OUString aTmp( ' ' );
4995  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aTmp );
4996  EndAttr( m_xAttrTab->pKerning );
4997  }
4998  }
4999  }
5000 }
5001 
5002 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
5003 {
5004  if( nPixel && Application::GetDefaultDevice() )
5005  {
5007  Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width();
5008  return static_cast<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16)));
5009  }
5010  else
5011  return nPixel;
5012 }
5013 
5015 {
5017  if( nWidth )
5018  return nWidth;
5019 
5020  if( !m_aHTMLPageSize.Width() )
5021  {
5022  const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5023 
5024  const SwFormatFrameSize& rSz = rPgFormat.GetFrameSize();
5025  const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5026  const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5027  const SwFormatCol& rCol = rPgFormat.GetCol();
5028 
5029  m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5030  m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5031 
5032  if( 1 < rCol.GetNumCols() )
5034  }
5035 
5036  return m_aHTMLPageSize.Width();
5037 }
5038 
5040 {
5041  OUString aId;
5042  const HTMLOptions& rHTMLOptions = GetOptions();
5043  for (size_t i = rHTMLOptions.size(); i; )
5044  {
5045  const HTMLOption& rOption = rHTMLOptions[--i];
5046  if( HtmlOptionId::ID==rOption.GetToken() )
5047  {
5048  aId = rOption.GetString();
5049  break;
5050  }
5051  }
5052 
5053  if( !aId.isEmpty() )
5054  InsertBookmark( aId );
5055 }
5056 
5058 {
5059  // <BR CLEAR=xxx> is handled as:
5060  // 1.) Only regard the paragraph-bound frames anchored in current paragraph.
5061  // 2.) For left-justified aligned frames, CLEAR=LEFT or ALL, and for right-
5062  // justified aligned frames, CLEAR=RIGHT or ALL, the wrap-through is
5063  // changed as following:
5064  // 3.) If the paragraph contains no text, then the frames don't get a wrapping
5065  // 4.) otherwise a left aligned frame gets a right "only anchor" wrapping
5066  // and a right aligned frame gets a left "only anchor" wrapping.
5067  // 5.) if in a non-empty paragraph the wrapping of a frame is changed,
5068  // then a new paragraph is opened
5069  // 6.) If no wrappings of frames are changed, a hard line break is inserted.
5070 
5071  OUString aId, aStyle, aClass; // the id of bookmark
5072  bool bClearLeft = false, bClearRight = false;
5073  bool bCleared = false; // Was a CLEAR executed?
5074 
5075  // then we fetch the options
5076  const HTMLOptions& rHTMLOptions = GetOptions();
5077  for (size_t i = rHTMLOptions.size(); i; )
5078  {
5079  const HTMLOption& rOption = rHTMLOptions[--i];
5080  switch( rOption.GetToken() )
5081  {
5082  case HtmlOptionId::CLEAR:
5083  {
5084  const OUString &rClear = rOption.GetString();
5085  if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5086  {
5087  bClearLeft = true;
5088  bClearRight = true;
5089  }
5090  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5091  bClearLeft = true;
5092  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5093  bClearRight = true;
5094  }
5095  break;
5096  case HtmlOptionId::ID:
5097  aId = rOption.GetString();
5098  break;
5099  case HtmlOptionId::STYLE:
5100  aStyle = rOption.GetString();
5101  break;
5102  case HtmlOptionId::CLASS:
5103  aClass = rOption.GetString();
5104  break;
5105  default: break;
5106  }
5107  }
5108 
5109  // CLEAR is only supported for the current paragraph
5110  if( bClearLeft || bClearRight )
5111  {
5112  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
5113  SwTextNode* pTextNd = rNodeIdx.GetNode().GetTextNode();
5114  if( pTextNd )
5115  {
5116  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
5117 
5118  for( size_t i=0; i<rFrameFormatTable.size(); i++ )
5119  {
5120  SwFrameFormat *const pFormat = rFrameFormatTable[i];
5121  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
5122  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
5123  if (pAPos &&
5124  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
5125  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
5126  pAPos->nNode == rNodeIdx &&
5127  pFormat->GetSurround().GetSurround() != css::text::WrapTextMode_NONE )
5128  {
5129  sal_Int16 eHori = RES_DRAWFRMFMT == pFormat->Which()
5131  : pFormat->GetHoriOrient().GetHoriOrient();
5132 
5133  css::text::WrapTextMode eSurround = css::text::WrapTextMode_PARALLEL;
5134  if( m_pPam->GetPoint()->nContent.GetIndex() )
5135  {
5136  if( bClearLeft && text::HoriOrientation::LEFT==eHori )
5137  eSurround = css::text::WrapTextMode_RIGHT;
5138  else if( bClearRight && text::HoriOrientation::RIGHT==eHori )
5139  eSurround = css::text::WrapTextMode_LEFT;
5140  }
5141  else if( (bClearLeft && text::HoriOrientation::LEFT==eHori) ||
5142  (bClearRight && text::HoriOrientation::RIGHT==eHori) )
5143  {
5144  eSurround = css::text::WrapTextMode_NONE;
5145  }
5146 
5147  if( css::text::WrapTextMode_PARALLEL != eSurround )
5148  {
5149  SwFormatSurround aSurround( eSurround );
5150  if( css::text::WrapTextMode_NONE != eSurround )
5151  aSurround.SetAnchorOnly( true );
5152  pFormat->SetFormatAttr( aSurround );
5153  bCleared = true;
5154  }
5155  }
5156  }
5157  }
5158  }
5159 
5160  // parse styles
5161  std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5162  bool bBreakItem = false;
5163  if( HasStyleOptions( aStyle, aId, aClass ) )
5164  {
5165  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5166  SvxCSS1PropertyInfo aPropInfo;
5167 
5168  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5169  {
5170  if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5171  {
5172  aBreakItem.reset(static_cast<SvxFormatBreakItem*>(aItemSet.Get(RES_BREAK).Clone()));
5173  bBreakItem = true;
5174  }
5175  if( !aPropInfo.m_aId.isEmpty() )
5176  InsertBookmark( aPropInfo.m_aId );
5177  }
5178  }
5179 
5180  if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5181  {
5182  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5183  EndAttr( m_xAttrTab->pBreak, false );
5184  }
5185 
5186  if( !bCleared && !bBreakItem )
5187  {
5188  // If no CLEAR could or should be executed, a line break will be inserted
5189  OUString sTmp( u'\x000a' ); // make the Mac happy :-)
5190  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, sTmp );
5191  }
5192  else if( m_pPam->GetPoint()->nContent.GetIndex() )
5193  {
5194  // If a CLEAR is executed in a non-empty paragraph, then after it
5195  // a new paragraph has to be opened.
5196  // MIB 21.02.97: Here actually we should change the bottom paragraph
5197  // margin to zero. This will fail for something like this <BR ..><P>
5198  // (>Netscape). That's why we don't do it.
5200  }
5201  if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5202  {
5203  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5204  EndAttr( m_xAttrTab->pBreak, false );
5205  }
5206 }
5207 
5209 {
5210  sal_uInt16 nSize = 0;
5211  sal_uInt16 nWidth = 0;
5212 
5213  SvxAdjust eAdjust = SvxAdjust::End;
5214 
5215  bool bPrcWidth = false;
5216  bool bNoShade = false;
5217  bool bColor = false;
5218 
5219  Color aColor;
5220  OUString aId;
5221 
5222  // let's fetch the options
5223  const HTMLOptions& rHTMLOptions = GetOptions();
5224  for (size_t i = rHTMLOptions.size(); i; )
5225  {
5226  const HTMLOption& rOption = rHTMLOptions[--i];
5227  switch( rOption.GetToken() )
5228  {
5229  case HtmlOptionId::ID:
5230  aId = rOption.GetString();
5231  break;
5232  case HtmlOptionId::SIZE:
5233  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
5234  break;
5235  case HtmlOptionId::WIDTH:
5236  bPrcWidth = (rOption.GetString().indexOf('%') != -1);
5237  nWidth = static_cast<sal_uInt16>(rOption.GetNumber());
5238  if( bPrcWidth && nWidth>=100 )
5239  {
5240  // the default case are 100% lines (no attributes necessary)
5241  nWidth = 0;
5242  bPrcWidth = false;
5243  }
5244  break;
5245  case HtmlOptionId::ALIGN:
5246  eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5247  break;
5248  case HtmlOptionId::NOSHADE:
5249  bNoShade = true;
5250  break;
5251  case HtmlOptionId::COLOR:
5252  rOption.GetColor( aColor );
5253  bColor = true;
5254  break;
5255  default: break;
5256  }
5257  }
5258 
5259  if( m_pPam->GetPoint()->nContent.GetIndex() )
5261  if( m_nOpenParaToken != HtmlTokenId::NONE )
5262  EndPara();
5263  AppendTextNode();
5265 
5266  // ...and save in a context
5267  std::unique_ptr<HTMLAttrContext> xCntxt(
5268  new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5269 
5270  PushContext(xCntxt);
5271 
5272  // set the new style
5273  SetTextCollAttrs(m_aContexts.back().get());
5274 
5275  // the hard attributes of the current paragraph will never become invalid
5276  m_aParaAttrs.clear();
5277 
5278  if( nSize>0 || bColor || bNoShade )
5279  {
5280  // set line colour and/or width
5281  if( !bColor )
5282  aColor = COL_GRAY;
5283 
5284  SvxBorderLine aBorderLine( &aColor );
5285  if( nSize )
5286  {
5287  long nPWidth = 0;
5288  long nPHeight = static_cast<long>(nSize);
5289  SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5290  if ( !bNoShade )
5291  {
5292  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5293  }
5294  aBorderLine.SetWidth( nPHeight );
5295  }
5296  else if( bNoShade )
5297  {
5298  aBorderLine.SetWidth( DEF_LINE_WIDTH_2 );
5299  }
5300  else
5301  {
5302  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5303  aBorderLine.SetWidth( DEF_LINE_WIDTH_0 );
5304  }
5305 
5306  SvxBoxItem aBoxItem(RES_BOX);
5307  aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5308  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5309  m_aSetAttrTab.push_back( pTmp );
5310  }
5311  if( nWidth )
5312  {
5313  // If we aren't in a table, then the width value will be "faked" with
5314  // paragraph indents. That makes little sense in a table. In order to
5315  // avoid that the line is considered during the width calculation, it
5316  // still gets an appropriate LRSpace-Item.
5317  if (!m_xTable)
5318  {
5319  // fake length and alignment of line above paragraph indents
5320  long nBrowseWidth = GetCurrentBrowseWidth();
5321  nWidth = bPrcWidth ? static_cast<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5322  : ToTwips( static_cast<sal_uInt16>(nBrowseWidth) );
5323  if( nWidth < MINLAY )
5324  nWidth = MINLAY;
5325 
5326  const SwFormatColl *pColl = (static_cast<long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5327  if (pColl)
5328  {
5329  SvxLRSpaceItem aLRItem( pColl->GetLRSpace() );
5330  long nDist = nBrowseWidth - nWidth;
5331 
5332  switch( eAdjust )
5333  {
5334  case SvxAdjust::Right:
5335  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5336  break;
5337  case SvxAdjust::Left:
5338  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5339  break;
5340  case SvxAdjust::Center:
5341  default:
5342  nDist /= 2;
5343  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5344  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5345  break;
5346  }
5347 
5348  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aLRItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5349  m_aSetAttrTab.push_back( pTmp );
5350  }
5351  }
5352  }
5353 
5354  // it's not possible to insert bookmarks in links
5355  if( !aId.isEmpty() )
5356  InsertBookmark( aId );
5357 
5358  // pop current context of stack
5359  std::unique_ptr<HTMLAttrContext> xPoppedContext(PopContext(HtmlTokenId::HORZRULE));
5360  xPoppedContext.reset();
5361 
5363 
5364  // and set the current style in the next paragraph
5365  SetTextCollAttrs();
5366 }
5367 
5369 {
5370  OUString aName, aContent;
5371  bool bHTTPEquiv = false;
5372 
5373  const HTMLOptions& rHTMLOptions = GetOptions();
5374  for (size_t i = rHTMLOptions.size(); i; )
5375  {
5376  const HTMLOption& rOption = rHTMLOptions[--i];
5377  switch( rOption.GetToken() )
5378  {
5379  case HtmlOptionId::NAME:
5380  aName = rOption.GetString();
5381  bHTTPEquiv = false;
5382  break;
5383  case HtmlOptionId::HTTPEQUIV:
5384  aName = rOption.GetString();
5385  bHTTPEquiv = true;
5386  break;
5387  case HtmlOptionId::CONTENT:
5388  aContent = rOption.GetString();
5389  break;
5390  default: break;
5391  }
5392  }
5393 
5394  // Here things get a little tricky: We know for sure, that the Doc-Info
5395  // wasn't changed. Therefore it's enough to query for Generator and Refresh
5396  // to find a not processed Token. These are the only ones which won't change
5397  // the Doc-Info.
5398  if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_generator ) ||
5399  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_refresh ) ||
5400  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_type ) ||
5401  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_script_type ) )
5402  return;
5403 
5404  aContent = aContent.replaceAll("\r", "").replaceAll("\n", "");
5405 
5406  if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdendnote ) )
5407  {
5408  FillEndNoteInfo( aContent );
5409  return;
5410  }
5411