LibreOffice Module sw (master)  1
swhtml.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
25 
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
38 
39 #include <vcl/errinf.hxx>
40 #include <svl/stritem.hxx>
41 #include <vcl/imap.hxx>
42 #include <svtools/htmltokn.h>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/ctrltool.hxx>
45 #include <unotools/pathoptions.hxx>
46 #include <vcl/svapp.hxx>
47 #include <vcl/wrkwin.hxx>
48 #include <sfx2/event.hxx>
49 #include <sfx2/fcontnr.hxx>
50 #include <sfx2/docfile.hxx>
51 
52 #include <svtools/htmlcfg.hxx>
53 #include <sfx2/linkmgr.hxx>
54 #include <editeng/kernitem.hxx>
55 #include <editeng/boxitem.hxx>
56 #include <editeng/fhgtitem.hxx>
58 #include <editeng/postitem.hxx>
59 #include <editeng/wghtitem.hxx>
61 #include <editeng/udlnitem.hxx>
63 #include <editeng/blinkitem.hxx>
64 #include <editeng/ulspitem.hxx>
65 #include <editeng/colritem.hxx>
66 #include <editeng/fontitem.hxx>
67 #include <editeng/adjustitem.hxx>
68 #include <editeng/lrspitem.hxx>
69 #include <editeng/protitem.hxx>
70 #include <editeng/flstitem.hxx>
72 
73 #include <frmatr.hxx>
74 #include <charatr.hxx>
75 #include <fmtfld.hxx>
76 #include <fmtpdsc.hxx>
77 #include <txtfld.hxx>
78 #include <fmtanchr.hxx>
79 #include <fmtsrnd.hxx>
80 #include <fmtfsize.hxx>
81 #include <fmtclds.hxx>
82 #include <fchrfmt.hxx>
83 #include <fmtinfmt.hxx>
84 #include <fmtfollowtextflow.hxx>
85 #include <docary.hxx>
86 #include <docstat.hxx>
87 #include <doc.hxx>
88 #include <IDocumentUndoRedo.hxx>
95 #include <IDocumentStatistics.hxx>
96 #include <IDocumentState.hxx>
97 #include <pam.hxx>
98 #include <ndtxt.hxx>
99 #include <mdiexp.hxx>
100 #include <expfld.hxx>
101 #include <poolfmt.hxx>
102 #include <pagedesc.hxx>
103 #include <IMark.hxx>
104 #include <docsh.hxx>
105 #include <editsh.hxx>
106 #include <docufld.hxx>
107 #include "swcss1.hxx"
108 #include <fltini.hxx>
109 #include <htmltbl.hxx>
110 #include "htmlnum.hxx"
111 #include "swhtml.hxx"
112 #include <linkenum.hxx>
113 #include <breakit.hxx>
114 #include <SwAppletImpl.hxx>
115 #include <swdll.hxx>
116 
117 #include <sfx2/viewfrm.hxx>
118 #include <svx/svdobj.hxx>
119 
120 #include <swerror.h>
121 #include <hints.hxx>
122 #include <ndole.hxx>
123 #include <unoframe.hxx>
124 #include "css1atr.hxx"
125 
126 #define FONTSIZE_MASK 7
127 
128 #define HTML_ESC_PROP 80
129 #define HTML_ESC_SUPER DFLT_ESC_SUPER
130 #define HTML_ESC_SUB DFLT_ESC_SUB
131 
132 #define HTML_SPTYPE_BLOCK 1
133 #define HTML_SPTYPE_HORI 2
134 #define HTML_SPTYPE_VERT 3
135 
137 using namespace ::com::sun::star;
138 
139 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
141 {
142  { OOO_STRING_SVTOOLS_HTML_AL_left, SvxAdjust::Left },
143  { OOO_STRING_SVTOOLS_HTML_AL_center, SvxAdjust::Center },
144  { OOO_STRING_SVTOOLS_HTML_AL_middle, SvxAdjust::Center }, // Netscape
145  { OOO_STRING_SVTOOLS_HTML_AL_right, SvxAdjust::Right },
146  { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block },
147  { OOO_STRING_SVTOOLS_HTML_AL_char, SvxAdjust::Left },
148  { nullptr, SvxAdjust(0) }
149 };
150 
151 // <SPACER TYPE=...>
153 {
157  { nullptr, 0 }
158 };
159 
161 {
162  m_bTemplateBrowseMode = true;
163 }
164 
165 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
166 {
168  // HTML import into Writer, avoid loading the Writer/Web template.
169  return OUString();
170 
171  const OUString sTemplateWithoutExt("internal/html");
172  SvtPathOptions aPathOpt;
173 
174  // first search for OpenDocument Writer/Web template
175  // OpenDocument Writer/Web template (extension .oth)
176  OUString sTemplate( sTemplateWithoutExt + ".oth" );
177  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
178  return sTemplate;
179 
180  // no OpenDocument Writer/Web template found.
181  // search for OpenOffice.org Writer/Web template
182  sTemplate = sTemplateWithoutExt + ".stw";
183  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
184  return sTemplate;
185 
186  OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
187 
188  return OUString();
189 }
190 
192 {
193  OSL_ENSURE( m_pMedium, "Where is the medium??" );
194 
195  if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
196  {
198  return true;
199  }
200  return false;
201 
202 }
203 
204 // Call for the general Reader-Interface
205 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
206 {
208 
209  if( !m_pStream )
210  {
211  OSL_ENSURE( m_pStream, "HTML-Read without stream" );
212  return ERR_SWG_READ_ERROR;
213  }
214 
215  if( !m_bInsertMode )
216  {
218 
219  // Set the HTML page style, when it isn't a HTML document,
220  // otherwise it's already set.
222  {
225  }
226  }
227 
228  // so nobody steals the document!
229  rtl::Reference<SwDoc> aHoldRef(&rDoc);
230  ErrCode nRet = ERRCODE_NONE;
231  tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
232  rName, rBaseURL, !m_bInsertMode, m_pMedium,
233  IsReadUTF8(),
235 
236  SvParserState eState = xParser->CallParser();
237 
238  if( SvParserState::Pending == eState )
240  else if( SvParserState::Accepted != eState )
241  {
242  const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
243  + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
244 
245  // use the stream as transport for error number
246  nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
247  DialogMask::ButtonsOk | DialogMask::MessageError );
248  }
249 
250  return nRet;
251 }
252 
254  const OUString& rPath,
255  const OUString& rBaseURL,
256  bool bReadNewDoc,
257  SfxMedium* pMed, bool bReadUTF8,
258  bool bNoHTMLComments,
259  const OUString& rNamespace )
260  : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
261  SwClient( nullptr ),
262  m_aPathToFile( rPath ),
263  m_sBaseURL( rBaseURL ),
264  m_xAttrTab(new HTMLAttrTable),
265  m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
266  m_xDoc( pD ),
267  m_pActionViewShell( nullptr ),
268  m_pSttNdIdx( nullptr ),
269  m_pFormImpl( nullptr ),
270  m_pMarquee( nullptr ),
271  m_pImageMap( nullptr ),
272  m_nBaseFontStMin( 0 ),
273  m_nFontStMin( 0 ),
274  m_nDefListDeep( 0 ),
275  m_nFontStHeadStart( 0 ),
276  m_nSBModuleCnt( 0 ),
277  m_nMissingImgMaps( 0 ),
278  m_nParaCnt( 5 ),
279  // #i83625#
280  m_nContextStMin( 0 ),
281  m_nContextStAttrMin( 0 ),
282  m_nSelectEntryCnt( 0 ),
283  m_nOpenParaToken( HtmlTokenId::NONE ),
284  m_eJumpTo( JumpToMarks::NONE ),
285 #ifdef DBG_UTIL
286  m_nContinue( 0 ),
287 #endif
288  m_eParaAdjust( SvxAdjust::End ),
289  m_bDocInitalized( false ),
290  m_bSetModEnabled( false ),
291  m_bInFloatingFrame( false ),
292  m_bInField( false ),
293  m_bCallNextToken( false ),
294  m_bIgnoreRawData( false ),
295  m_bLBEntrySelected ( false ),
296  m_bTAIgnoreNewPara ( false ),
297  m_bFixMarqueeWidth ( false ),
298  m_bNoParSpace( false ),
299  m_bInNoEmbed( false ),
300  m_bInTitle( false ),
301  m_bUpdateDocStat( false ),
302  m_bFixSelectWidth( false ),
303  m_bTextArea( false ),
304  m_bSelect( false ),
305  m_bInFootEndNoteAnchor( false ),
306  m_bInFootEndNoteSymbol( false ),
307  m_bIgnoreHTMLComments( bNoHTMLComments ),
308  m_bRemoveHidden( false ),
309  m_bBodySeen( false ),
310  m_bReadingHeaderOrFooter( false ),
311  m_isInTableStructure(false),
312  m_nTableDepth( 0 ),
313  m_pTempViewFrame(nullptr)
314 {
315  m_nEventId = nullptr;
317 
318  m_eScriptLang = HTMLScriptLanguage::Unknown;
319 
320  rCursor.DeleteMark();
321  m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert
322  memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
323 
324  // Read the font sizes 1-7 from the INI file
325  SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get();
326  m_aFontHeights[0] = rHtmlOptions.GetFontSize( 0 ) * 20;
327  m_aFontHeights[1] = rHtmlOptions.GetFontSize( 1 ) * 20;
328  m_aFontHeights[2] = rHtmlOptions.GetFontSize( 2 ) * 20;
329  m_aFontHeights[3] = rHtmlOptions.GetFontSize( 3 ) * 20;
330  m_aFontHeights[4] = rHtmlOptions.GetFontSize( 4 ) * 20;
331  m_aFontHeights[5] = rHtmlOptions.GetFontSize( 5 ) * 20;
332  m_aFontHeights[6] = rHtmlOptions.GetFontSize( 6 ) * 20;
333 
334  m_bKeepUnknown = rHtmlOptions.IsImportUnknown();
335 
336  if(bReadNewDoc)
337  {
338  //CJK has different defaults, so a different object should be used for this
339  //RES_CHARTR_CJK_FONTSIZE is a valid value
341  m_xDoc->SetDefault( aFontHeight );
343  m_xDoc->SetDefault( aFontHeightCJK );
345  m_xDoc->SetDefault( aFontHeightCTL );
346 
347  // #i18732# - adjust default of option 'FollowTextFlow'
348  // TODO: not sure what the appropriate default for HTML should be?
349  m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
350  }
351 
352  // Change to HTML mode during the import, so that the right styles are created
353  m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
354  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
355 
356  m_pCSS1Parser.reset( new SwCSS1Parser( m_xDoc.get(), m_aFontHeights, m_sBaseURL, IsNewDoc() ) );
357  m_pCSS1Parser->SetIgnoreFontFamily( rHtmlOptions.IsIgnoreFontFamily() );
358 
359  if( bReadUTF8 )
360  {
361  SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
362  }
363  else
364  {
365  SwDocShell *pDocSh = m_xDoc->GetDocShell();
366  SvKeyValueIterator *pHeaderAttrs =
367  pDocSh->GetHeaderAttributes();
368  if( pHeaderAttrs )
369  SetEncodingByHTTPHeader( pHeaderAttrs );
370  }
371  m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
372 
373  SwDocShell* pDocSh = m_xDoc->GetDocShell();
374  if( pDocSh )
375  {
376  m_bViewCreated = true; // not, load synchronous
377 
378  // a jump mark is present
379 
380  if( pMed )
381  {
382  m_sJmpMark = pMed->GetURLObject().GetMark();
383  if( !m_sJmpMark.isEmpty() )
384  {
386  sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
387  sal_Int32 nPos = nLastPos != -1 ? nLastPos : 0;
388 
389  OUString sCmp;
390  if (nPos)
391  {
392  sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
393  }
394 
395  if( !sCmp.isEmpty() )
396  {
397  sCmp = sCmp.toAsciiLowerCase();
398  if( sCmp == "region" )
400  else if( sCmp == "table" )
402  else if( sCmp == "graphic" )
404  else if( sCmp == "outline" ||
405  sCmp == "text" ||
406  sCmp == "frame" )
407  m_eJumpTo = JumpToMarks::NONE; // this is nothing valid!
408  else
409  // otherwise this is a normal (book)mark
410  nPos = -1;
411  }
412  else
413  nPos = -1;
414 
415  if( nPos != -1 )
416  m_sJmpMark = m_sJmpMark.copy( 0, nPos );
417  if( m_sJmpMark.isEmpty() )
419  }
420  }
421  }
422 
423  if (!rNamespace.isEmpty())
424  {
425  SetNamespace(rNamespace);
426  m_bXHTML = true;
427  if (rNamespace == "reqif-xhtml")
428  m_bReqIF = true;
429  }
430 }
431 
433 {
434 #ifdef DBG_UTIL
435  OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
436 #endif
437 
438  OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
439  OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
440  m_nContextStMin = 0;
441  while (!m_aContexts.empty())
442  {
443  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
444  ClearContext(xCntxt.get());
445  }
446 
447  bool bAsync = m_xDoc->IsInLoadAsynchron();
448  m_xDoc->SetInLoadAsynchron( false );
449  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
450 
451  if( m_xDoc->GetDocShell() && m_nEventId )
453 
454  // the DocumentDetected maybe can delete the DocShells, therefore fetch again
455  if( m_xDoc->GetDocShell() )
456  {
457  // update linked sections
458  sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
459  if( nLinkMode != NEVER && bAsync &&
460  SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
461  m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
462 
463  if ( m_xDoc->GetDocShell()->IsLoading() )
464  {
465  // #i59688#
466  m_xDoc->GetDocShell()->LoadingFinished();
467  }
468  }
469 
470  delete m_pSttNdIdx;
471 
472  if( !m_aSetAttrTab.empty() )
473  {
474  OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
475  for ( auto& rpAttr : m_aSetAttrTab )
476  delete rpAttr;
477  m_aSetAttrTab.clear();
478  }
479 
480  m_pCSS1Parser.reset();
481  m_pNumRuleInfo.reset();
482  DeleteFormImpl();
484 
485  OSL_ENSURE(!m_xTable.get(), "It exists still a open table");
486  m_pImageMaps.reset();
487 
488  OSL_ENSURE( m_vPendingStack.empty(),
489  "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
490  m_vPendingStack.clear();
491 
492  m_xDoc.clear();
493 
494  if ( m_pTempViewFrame )
495  {
497 
498  // the temporary view frame is hidden, so the hidden flag might need to be removed
499  if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
500  m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
501  }
502 }
503 
504 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
505 {
506  m_nEventId=nullptr;
507 
508  // #i47907# - If the document has already been destructed,
509  // the parser should be aware of this:
510  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
511  || 1 == m_xDoc->getReferenceCount() )
512  {
513  // was the import aborted by SFX?
514  eState = SvParserState::Error;
515  }
516 
517  GetAsynchCallLink().Call(nullptr);
518 }
519 
521 {
522  // create temporary index on position 0, so it won't be moved!
523  m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
524  if( !IsNewDoc() ) // insert into existing document ?
525  {
526  const SwPosition* pPos = m_pPam->GetPoint();
527 
528  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
529 
530  *m_pSttNdIdx = pPos->nNode.GetIndex()-1;
531  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
532 
533  SwPaM aInsertionRangePam( *pPos );
534 
536 
537  // split any redline over the insertion point
538  aInsertionRangePam.SetMark();
539  *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
540  aInsertionRangePam.Move( fnMoveBackward );
541  m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
542 
543  m_xDoc->SetTextFormatColl( *m_pPam,
544  m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
545  }
546 
547  if( GetMedium() )
548  {
549  if( !m_bViewCreated )
550  {
551  m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
552  }
553  else
554  {
555  m_bViewCreated = true;
556  m_nEventId = nullptr;
557  }
558  }
559  else // show progress bar
560  {
561  rInput.Seek(STREAM_SEEK_TO_END);
562  rInput.ResetError();
563 
564  m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
565 
566  rInput.Seek(STREAM_SEEK_TO_BEGIN);
567  rInput.ResetError();
568  }
569 
570  m_xDoc->GetPageDesc( 0 ).Add( this );
571 
573  return eRet;
574 }
575 
577 {
578  const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
579  return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
580 }
581 
583 {
584 #ifdef DBG_UTIL
585  OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
586  m_nContinue++;
587 #endif
588 
589  // When the import (of SFX) is aborted, an error will be set but
590  // we still continue, so that we clean up properly.
591  OSL_ENSURE( SvParserState::Error!=eState,
592  "SwHTMLParser::Continue: already set an error" );
593  if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
594  eState = SvParserState::Error;
595 
596  // Fetch SwViewShell from document, save it and set as current.
597  SwViewShell *pInitVSh = CallStartAction();
598 
599  if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
600  {
601  // At first call first return, show document and wait for callback
602  // time.
603  // At this point in CallParser only one digit was read and
604  // a SaveState(0) was called.
605  eState = SvParserState::Pending;
606  m_bViewCreated = true;
607  m_xDoc->SetInLoadAsynchron( true );
608 
609 #ifdef DBG_UTIL
610  m_nContinue--;
611 #endif
612 
613  return;
614  }
615 
616  m_bSetModEnabled = false;
617  if( m_xDoc->GetDocShell() )
618  {
619  m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
620  if( m_bSetModEnabled )
621  {
622  m_xDoc->GetDocShell()->EnableSetModified( false );
623  }
624  }
625 
626  // during import don't call OLE-Modified
627  Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
628  m_xDoc->SetOle2Link( Link<bool,void>() );
629 
630  bool bModified = m_xDoc->getIDocumentState().IsModified();
631  bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
632  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
633 
634  // When the import will be aborted, don't call Continue anymore.
635  // If a Pending-Stack exists make sure the stack is ended with a call
636  // of NextToken.
637  if( SvParserState::Error == eState )
638  {
639  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
640  "SwHTMLParser::Continue: Pending-Stack without Token" );
641  if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
642  NextToken( m_vPendingStack.back().nToken );
643  OSL_ENSURE( m_vPendingStack.empty(),
644  "SwHTMLParser::Continue: There is again a Pending-Stack" );
645  }
646  else
647  {
648  HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
649  }
650 
651  // disable progress bar again
652  m_xProgress.reset();
653 
654  bool bLFStripped = false;
655  if( SvParserState::Pending != GetStatus() )
656  {
657  // set the last attributes yet
658  {
659  if( !m_aScriptSource.isEmpty() )
660  {
661  SwScriptFieldType *pType =
662  static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
663 
665  false );
666  InsertAttr( SwFormatField( aField ), false );
667  }
668 
669  if( m_pAppletImpl )
670  {
671  if( m_pAppletImpl->GetApplet().is() )
672  EndApplet();
673  else
674  EndObject();
675  }
676 
677  // maybe remove an existing LF after the last paragraph
678  if( IsNewDoc() )
679  bLFStripped = StripTrailingLF() > 0;
680 
681  // close still open numbering
682  while( GetNumInfo().GetNumRule() )
683  EndNumBulList();
684 
685  OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
686  // try this twice, first normally to let m_nContextStMin decrease
687  // naturally and get contexts popped in desired order, and if that
688  // fails force it
689  for (int i = 0; i < 2; ++i)
690  {
691  while (m_aContexts.size() > m_nContextStMin)
692  {
693  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
694  if (xCntxt)
695  EndContext(xCntxt.get());
696  }
697  if (!m_nContextStMin)
698  break;
699  OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
700  m_nContextStMin = 0;
701  }
702 
703  m_aParaAttrs.clear();
704 
705  SetAttr( false );
706 
707  // set the first delayed styles
708  m_pCSS1Parser->SetDelayedStyles();
709  }
710 
711  // again correct the start
712  if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
713  {
714  SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
715  SwNodeIndex aNxtIdx( *m_pSttNdIdx );
716  if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
717  {
718  const sal_Int32 nStt = pTextNode->GetText().getLength();
719  // when the cursor is still in the node, then set him at the end
720  if( m_pPam->GetPoint()->nNode == aNxtIdx )
721  {
723  m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt );
724  }
725 
726 #if OSL_DEBUG_LEVEL > 0
727 // !!! shouldn't be possible, or ??
728  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(),
729  "Pam.Bound1 is still in the node" );
730  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(),
731  "Pam.Bound2 is still in the node" );
732 
733  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() )
734  {
735  const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex();
736  m_pPam->GetBound().nContent.Assign( pTextNode,
737  pTextNode->GetText().getLength() + nCntPos );
738  }
739  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() )
740  {
741  const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex();
742  m_pPam->GetBound( false ).nContent.Assign( pTextNode,
743  pTextNode->GetText().getLength() + nCntPos );
744  }
745 #endif
746  // Keep character attribute!
747  SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
748  if (pTextNode->GetText().getLength())
749  pDelNd->FormatToTextAttr( pTextNode );
750  else
751  pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
752  pTextNode->JoinNext();
753  }
754  }
755  }
756 
757  if( SvParserState::Accepted == eState )
758  {
759  if( m_nMissingImgMaps )
760  {
761  // Some Image-Map relations are still missing.
762  // Maybe now the Image-Maps are there?
764  }
765 
766  // now remove the last useless paragraph
767  SwPosition* pPos = m_pPam->GetPoint();
768  if( !pPos->nContent.GetIndex() && !bLFStripped )
769  {
770  SwTextNode* pCurrentNd;
771  sal_uLong nNodeIdx = pPos->nNode.GetIndex();
772 
773  bool bHasFlysOrMarks =
775 
776  if( IsNewDoc() )
777  {
778  if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx))
779  {
781  if( pCNd && pCNd->StartOfSectionIndex()+2 <
782  pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
783  {
785  SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
786  if( pCursorSh &&
787  pCursorSh->GetCursor()->GetPoint()
788  ->nNode.GetIndex() == nNodeIdx )
789  {
790  pCursorSh->MovePara(GoPrevPara, fnParaEnd );
791  pCursorSh->SetMark();
792  pCursorSh->ClearMark();
793  }
794  m_pPam->GetBound().nContent.Assign( nullptr, 0 );
795  m_pPam->GetBound(false).nContent.Assign( nullptr, 0 );
796  m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode );
797  }
798  }
799  }
800  else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
801  {
802  if( pCurrentNd->CanJoinNext( &pPos->nNode ))
803  {
804  SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode();
805  pPos->nContent.Assign( pNextNd, 0 );
807  pNextNd->JoinPrev();
808  }
809  else if (pCurrentNd->GetText().isEmpty())
810  {
811  pPos->nContent.Assign( nullptr, 0 );
813  m_xDoc->GetNodes().Delete( pPos->nNode );
815  }
816  }
817  }
818 
819  // annul the SplitNode from the beginning
820  else if( !IsNewDoc() )
821  {
822  if( pPos->nContent.GetIndex() ) // then there was no <p> at the end
823  m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next
824  SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode();
825  SwNodeIndex aPrvIdx( pPos->nNode );
826  if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
827  *m_pSttNdIdx <= aPrvIdx )
828  {
829  // Normally here should take place a JoinNext, but all cursors and
830  // so are registered in pTextNode, so that it MUST remain.
831 
832  // Convert paragraph to character attribute, from Prev adopt
833  // the paragraph attribute and the template!
834  SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
835  pTextNode->ChgFormatColl( pPrev->GetTextColl() );
836  pTextNode->FormatToTextAttr( pPrev );
837  pTextNode->ResetAllAttr();
838 
839  if( pPrev->HasSwAttrSet() )
840  pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
841 
842  if( &m_pPam->GetBound().nNode.GetNode() == pPrev )
843  m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
844  if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev )
845  m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
846 
847  pTextNode->JoinPrev();
848  }
849  }
850 
851  // adjust AutoLoad in DocumentProperties
852  if( IsNewDoc() )
853  {
854  SwDocShell *pDocShell(m_xDoc->GetDocShell());
855  OSL_ENSURE(pDocShell, "no SwDocShell");
856  if (pDocShell) {
857  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
858  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
859  uno::Reference<document::XDocumentProperties> xDocProps(
860  xDPS->getDocumentProperties());
861  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
862  if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
863  (xDocProps->getAutoloadURL().isEmpty()) )
864  {
865  xDocProps->setAutoloadURL(m_aPathToFile);
866  }
867  }
868  }
869 
870  if( m_bUpdateDocStat )
871  {
872  m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
873  }
874  }
875 
876  if( SvParserState::Pending != GetStatus() )
877  {
878  delete m_pSttNdIdx;
879  m_pSttNdIdx = nullptr;
880  }
881 
882  // should the parser be the last one who hold the document, then nothing
883  // has to be done anymore, document will be destroyed shortly!
884  if( 1 < m_xDoc->getReferenceCount() )
885  {
886  if( bWasUndo )
887  {
888  m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
889  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
890  }
891  else if( !pInitVSh )
892  {
893  // When at the beginning of Continue no Shell was available,
894  // it's possible in the meantime one was created.
895  // In that case the bWasUndo flag is wrong and we must
896  // enable Undo.
897  SwViewShell *pTmpVSh = CheckActionViewShell();
898  if( pTmpVSh )
899  {
900  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
901  }
902  }
903 
904  m_xDoc->SetOle2Link( aOLELink );
905  if( !bModified )
906  m_xDoc->getIDocumentState().ResetModified();
907  if( m_bSetModEnabled && m_xDoc->GetDocShell() )
908  {
909  m_xDoc->GetDocShell()->EnableSetModified();
910  m_bSetModEnabled = false; // this is unnecessary here
911  }
912  }
913 
914  // When the Document-SwVievShell still exists and an Action is open
915  // (doesn't have to be by abort), end the Action, disconnect from Shell
916  // and finally reconstruct the old Shell.
917  CallEndAction( true );
918 
919 #ifdef DBG_UTIL
920  m_nContinue--;
921 #endif
922 }
923 
924 void SwHTMLParser::Modify( const SfxPoolItem* pOld, const SfxPoolItem *pNew )
925 {
926  switch( pOld ? pOld->Which() : pNew ? pNew->Which() : 0 )
927  {
928  case RES_OBJECTDYING:
929  if (pOld && static_cast<const SwPtrMsgPoolItem *>(pOld)->pObject == GetRegisteredIn())
930  {
931  // then we kill ourself
932  EndListeningAll();
933  ReleaseRef(); // otherwise we're done!
934  }
935  break;
936  }
937 }
938 
940 {
941  OSL_ENSURE( !m_bDocInitalized, "DocumentDetected called multiple times" );
942  m_bDocInitalized = true;
943  if( IsNewDoc() )
944  {
945  if( IsInHeader() )
946  FinishHeader();
947 
948  CallEndAction( true );
949 
950  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
951  // For DocumentDetected in general a SwViewShell is created.
952  // But it also can be created later, in case the UI is captured.
953  CallStartAction();
954  }
955 }
956 
957 // is called for every token that is recognised in CallParser
959 {
960  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
961  || 1 == m_xDoc->getReferenceCount() )
962  {
963  // Was the import cancelled by SFX? If a pending stack
964  // exists, clean it.
965  eState = SvParserState::Error;
966  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
967  "SwHTMLParser::NextToken: Pending-Stack without token" );
968  if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
969  return ;
970  }
971 
972 #if OSL_DEBUG_LEVEL > 0
973  if( !m_vPendingStack.empty() )
974  {
975  switch( nToken )
976  {
977  // tables are read by recursive method calls
978  case HtmlTokenId::TABLE_ON:
979  // For CSS declarations we might have to wait
980  // for a file download to finish
981  case HtmlTokenId::LINK:
982  // For controls we might have to set the size.
983  case HtmlTokenId::INPUT:
984  case HtmlTokenId::TEXTAREA_ON:
985  case HtmlTokenId::SELECT_ON:
986  case HtmlTokenId::SELECT_OFF:
987  break;
988  default:
989  OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
990  break;
991  }
992  }
993 #endif
994 
995  // The following special cases have to be treated before the
996  // filter detection, because Netscape doesn't reference the content
997  // of the title for filter detection either.
998  if( m_vPendingStack.empty() )
999  {
1000  if( m_bInTitle )
1001  {
1002  switch( nToken )
1003  {
1004  case HtmlTokenId::TITLE_OFF:
1005  {
1006  OUString sTitle = m_sTitle.makeStringAndClear();
1007  if( IsNewDoc() && !sTitle.isEmpty() )
1008  {
1009  if( m_xDoc->GetDocShell() ) {
1010  uno::Reference<document::XDocumentPropertiesSupplier>
1011  xDPS(m_xDoc->GetDocShell()->GetModel(),
1012  uno::UNO_QUERY_THROW);
1013  uno::Reference<document::XDocumentProperties> xDocProps(
1014  xDPS->getDocumentProperties());
1015  OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1016  if (xDocProps.is()) {
1017  xDocProps->setTitle(sTitle);
1018  }
1019 
1020  m_xDoc->GetDocShell()->SetTitle(sTitle);
1021  }
1022  }
1023  m_bInTitle = false;
1024  break;
1025  }
1026 
1027  case HtmlTokenId::NONBREAKSPACE:
1028  m_sTitle.append(" ");
1029  break;
1030 
1031  case HtmlTokenId::SOFTHYPH:
1032  m_sTitle.append("-");
1033  break;
1034 
1035  case HtmlTokenId::TEXTTOKEN:
1036  m_sTitle.append(aToken);
1037  break;
1038 
1039  default:
1040  m_sTitle.append("<");
1041  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1042  m_sTitle.append("/");
1043  m_sTitle.append(sSaveToken);
1044  if( !aToken.isEmpty() )
1045  {
1046  m_sTitle.append(" ");
1047  m_sTitle.append(aToken);
1048  }
1049  m_sTitle.append(">");
1050  break;
1051  }
1052 
1053  return;
1054  }
1055  }
1056 
1057  // Find out what type of document it is if we don't know already.
1058  // For Controls this has to be finished before the control is inserted
1059  // because for inserting a View is needed.
1060  if( !m_bDocInitalized )
1061  DocumentDetected();
1062 
1063  bool bGetIDOption = false, bInsertUnknown = false;
1064  bool bUpperSpaceSave = m_bUpperSpace;
1065  m_bUpperSpace = false;
1066 
1067  // The following special cases may or have to be treated after the
1068  // filter detection
1069  if( m_vPendingStack.empty() )
1070  {
1071  if( m_bInFloatingFrame )
1072  {
1073  // <SCRIPT> is ignored here (from us), because it is ignored in
1074  // Applets as well
1075  if( HtmlTokenId::IFRAME_OFF == nToken )
1076  {
1077  m_bCallNextToken = false;
1078  m_bInFloatingFrame = false;
1079  }
1080 
1081  return;
1082  }
1083  else if( m_bInNoEmbed )
1084  {
1085  switch( nToken )
1086  {
1087  case HtmlTokenId::NOEMBED_OFF:
1090  m_aContents.clear();
1091  m_bCallNextToken = false;
1092  m_bInNoEmbed = false;
1093  break;
1094 
1095  case HtmlTokenId::RAWDATA:
1097  break;
1098 
1099  default:
1100  OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1101  break;
1102  }
1103 
1104  return;
1105  }
1106  else if( m_pAppletImpl )
1107  {
1108  // in an applet only <PARAM> tags and the </APPLET> tag
1109  // are of interest for us (for the moment)
1110  // <SCRIPT> is ignored here (from Netscape)!
1111 
1112  switch( nToken )
1113  {
1114  case HtmlTokenId::APPLET_OFF:
1115  m_bCallNextToken = false;
1116  EndApplet();
1117  break;
1118  case HtmlTokenId::OBJECT_OFF:
1119  m_bCallNextToken = false;
1120  EndObject();
1121  break;
1122  case HtmlTokenId::PARAM:
1123  InsertParam();
1124  break;
1125  default: break;
1126  }
1127 
1128  return;
1129  }
1130  else if( m_bTextArea )
1131  {
1132  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1133  // <SCRIPT> is ignored here (from Netscape)!
1134 
1135  switch( nToken )
1136  {
1137  case HtmlTokenId::TEXTAREA_OFF:
1138  m_bCallNextToken = false;
1139  EndTextArea();
1140  break;
1141 
1142  default:
1143  InsertTextAreaText( nToken );
1144  break;
1145  }
1146 
1147  return;
1148  }
1149  else if( m_bSelect )
1150  {
1151  // HAS to be treated after bNoScript!
1152  switch( nToken )
1153  {
1154  case HtmlTokenId::SELECT_OFF:
1155  m_bCallNextToken = false;
1156  EndSelect();
1157  return;
1158 
1159  case HtmlTokenId::OPTION:
1161  return;
1162 
1163  case HtmlTokenId::TEXTTOKEN:
1164  InsertSelectText();
1165  return;
1166 
1167  case HtmlTokenId::INPUT:
1168  case HtmlTokenId::SCRIPT_ON:
1169  case HtmlTokenId::SCRIPT_OFF:
1170  case HtmlTokenId::NOSCRIPT_ON:
1171  case HtmlTokenId::NOSCRIPT_OFF:
1172  case HtmlTokenId::RAWDATA:
1173  // treat in normal switch
1174  break;
1175 
1176  default:
1177  // ignore
1178  return;
1179  }
1180  }
1181  else if( m_pMarquee )
1182  {
1183  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1184  // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1185  // script.
1186  switch( nToken )
1187  {
1188  case HtmlTokenId::MARQUEE_OFF:
1189  m_bCallNextToken = false;
1190  EndMarquee();
1191  break;
1192 
1193  case HtmlTokenId::TEXTTOKEN:
1195  break;
1196  default: break;
1197  }
1198 
1199  return;
1200  }
1201  else if( m_bInField )
1202  {
1203  switch( nToken )
1204  {
1205  case HtmlTokenId::SDFIELD_OFF:
1206  m_bCallNextToken = false;
1207  EndField();
1208  break;
1209 
1210  case HtmlTokenId::TEXTTOKEN:
1211  InsertFieldText();
1212  break;
1213  default: break;
1214  }
1215 
1216  return;
1217  }
1219  {
1220  switch( nToken )
1221  {
1222  case HtmlTokenId::ANCHOR_OFF:
1223  EndAnchor();
1224  m_bCallNextToken = false;
1225  break;
1226 
1227  case HtmlTokenId::TEXTTOKEN:
1229  break;
1230  default: break;
1231  }
1232  return;
1233  }
1234  else if( !m_aUnknownToken.isEmpty() )
1235  {
1236  // Paste content of unknown tags.
1237  // (but surely if we are not in the header section) fdo#36080 fdo#34666
1238  if (!aToken.isEmpty() && !IsInHeader() )
1239  {
1240  if( !m_bDocInitalized )
1241  DocumentDetected();
1242  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1243 
1244  // if there are temporary paragraph attributes and the
1245  // paragraph isn't empty then the paragraph attributes
1246  // are final.
1247  m_aParaAttrs.clear();
1248 
1249  SetAttr();
1250  }
1251 
1252  // Unknown token in the header are only closed by a matching
1253  // end-token, </HEAD> or <BODY>. Text inside is ignored.
1254  switch( nToken )
1255  {
1256  case HtmlTokenId::UNKNOWNCONTROL_OFF:
1257  if( m_aUnknownToken != sSaveToken )
1258  return;
1259  [[fallthrough]];
1260  case HtmlTokenId::FRAMESET_ON:
1261  case HtmlTokenId::HEAD_OFF:
1262  case HtmlTokenId::BODY_ON:
1263  case HtmlTokenId::IMAGE: // Don't know why Netscape acts this way.
1264  m_aUnknownToken.clear();
1265  break;
1266  case HtmlTokenId::TEXTTOKEN:
1267  return;
1268  default:
1269  m_aUnknownToken.clear();
1270  break;
1271  }
1272  }
1273  }
1274 
1275  switch( nToken )
1276  {
1277  case HtmlTokenId::BODY_ON:
1278  if (!m_bBodySeen)
1279  {
1280  m_bBodySeen = true;
1281  if( !m_aStyleSource.isEmpty() )
1282  {
1283  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1284  m_aStyleSource.clear();
1285  }
1286  if( IsNewDoc() )
1287  {
1289  // If there is a template for the first or the right page,
1290  // it is set here.
1291  const SwPageDesc *pPageDesc = nullptr;
1292  if( m_pCSS1Parser->IsSetFirstPageDesc() )
1293  pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1294  else if( m_pCSS1Parser->IsSetRightPageDesc() )
1295  pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1296 
1297  if( pPageDesc )
1298  {
1299  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1300  }
1301  }
1302  }
1303  break;
1304 
1305  case HtmlTokenId::LINK:
1306  InsertLink();
1307  break;
1308 
1309  case HtmlTokenId::BASE:
1310  {
1311  const HTMLOptions& rHTMLOptions = GetOptions();
1312  for (size_t i = rHTMLOptions.size(); i; )
1313  {
1314  const HTMLOption& rOption = rHTMLOptions[--i];
1315  switch( rOption.GetToken() )
1316  {
1317  case HtmlOptionId::HREF:
1318  m_sBaseURL = rOption.GetString();
1319  break;
1320  case HtmlOptionId::TARGET:
1321  if( IsNewDoc() )
1322  {
1323  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1324  OSL_ENSURE(pDocShell, "no SwDocShell");
1325  if (pDocShell) {
1326  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1327  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1328  uno::Reference<document::XDocumentProperties>
1329  xDocProps(xDPS->getDocumentProperties());
1330  OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1331  if (xDocProps.is()) {
1332  xDocProps->setDefaultTarget(
1333  rOption.GetString());
1334  }
1335  }
1336  }
1337  break;
1338  default: break;
1339  }
1340  }
1341  }
1342  break;
1343 
1344  case HtmlTokenId::META:
1345  {
1346  SvKeyValueIterator *pHTTPHeader = nullptr;
1347  if( IsNewDoc() )
1348  {
1349  SwDocShell *pDocSh = m_xDoc->GetDocShell();
1350  if( pDocSh )
1351  pHTTPHeader = pDocSh->GetHeaderAttributes();
1352  }
1353  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1354  OSL_ENSURE(pDocShell, "no SwDocShell");
1355  if (pDocShell)
1356  {
1357  uno::Reference<document::XDocumentProperties> xDocProps;
1358  if (IsNewDoc())
1359  {
1360  const uno::Reference<document::XDocumentPropertiesSupplier>
1361  xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1362  xDocProps = xDPS->getDocumentProperties();
1363  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1364  }
1365  ParseMetaOptions( xDocProps, pHTTPHeader );
1366  }
1367  }
1368  break;
1369 
1370  case HtmlTokenId::TITLE_ON:
1371  m_bInTitle = true;
1372  break;
1373 
1374  case HtmlTokenId::SCRIPT_ON:
1375  NewScript();
1376  break;
1377 
1378  case HtmlTokenId::SCRIPT_OFF:
1379  EndScript();
1380  break;
1381 
1382  case HtmlTokenId::NOSCRIPT_ON:
1383  case HtmlTokenId::NOSCRIPT_OFF:
1384  bInsertUnknown = true;
1385  break;
1386 
1387  case HtmlTokenId::STYLE_ON:
1388  NewStyle();
1389  break;
1390 
1391  case HtmlTokenId::STYLE_OFF:
1392  EndStyle();
1393  break;
1394 
1395  case HtmlTokenId::RAWDATA:
1396  if( !m_bIgnoreRawData )
1397  {
1398  if( IsReadScript() )
1399  {
1400  AddScriptSource();
1401  }
1402  else if( IsReadStyle() )
1403  {
1404  if( !m_aStyleSource.isEmpty() )
1405  m_aStyleSource += "\n";
1406  m_aStyleSource += aToken;
1407  }
1408  }
1409  break;
1410 
1411  case HtmlTokenId::OBJECT_ON:
1412  if (m_bXHTML)
1413  {
1414  if (!InsertEmbed())
1415  InsertImage();
1416  break;
1417  }
1418 #if HAVE_FEATURE_JAVA
1419  NewObject();
1420  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1421 #endif
1422  break;
1423 
1424  case HtmlTokenId::OBJECT_OFF:
1425  if (!m_aEmbeds.empty())
1426  m_aEmbeds.pop();
1427  break;
1428 
1429  case HtmlTokenId::APPLET_ON:
1430 #if HAVE_FEATURE_JAVA
1431  InsertApplet();
1432  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1433 #endif
1434  break;
1435 
1436  case HtmlTokenId::IFRAME_ON:
1439  break;
1440 
1441  case HtmlTokenId::LINEBREAK:
1442  if( !IsReadPRE() )
1443  {
1444  InsertLineBreak();
1445  break;
1446  }
1447  else
1448  bGetIDOption = true;
1449  // <BR>s in <PRE> resemble true LFs, hence no break
1450  [[fallthrough]];
1451 
1452  case HtmlTokenId::NEWPARA:
1453  // CR in PRE/LISTING/XMP
1454  {
1455  if( HtmlTokenId::NEWPARA==nToken ||
1457  {
1458  AppendTextNode(); // there is no LF at this place
1459  // therefore it will cause no problems
1460  SetTextCollAttrs();
1461  }
1462  // progress bar
1463  if (m_xProgress)
1464  m_xProgress->Update(rInput.Tell());
1465  }
1466  break;
1467 
1468  case HtmlTokenId::NONBREAKSPACE:
1469  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1470  break;
1471 
1472  case HtmlTokenId::SOFTHYPH:
1473  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1474  break;
1475 
1476  case HtmlTokenId::LINEFEEDCHAR:
1477  if( m_pPam->GetPoint()->nContent.GetIndex() )
1478  AppendTextNode();
1479  if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode))
1480  {
1481  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1482  EndAttr( m_xAttrTab->pBreak, false );
1483  }
1484  break;
1485 
1486  case HtmlTokenId::TEXTTOKEN:
1487  // insert string without spanning attributes at the end.
1488  if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1489  {
1490  sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
1491  const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
1492  if (pTextNode)
1493  {
1494  const OUString& rText = pTextNode->GetText();
1495  sal_Unicode cLast = rText[--nPos];
1496  if( ' ' == cLast || '\x0a' == cLast)
1497  aToken = aToken.copy(1);
1498  }
1499  else
1500  aToken = aToken.copy(1);
1501 
1502  if( aToken.isEmpty() )
1503  {
1504  m_bUpperSpace = bUpperSpaceSave;
1505  break;
1506  }
1507  }
1508 
1509  if( !aToken.isEmpty() )
1510  {
1511  if( !m_bDocInitalized )
1512  DocumentDetected();
1513 
1514  if (!m_aEmbeds.empty())
1515  {
1516  // The text token is inside an OLE object, which means
1517  // alternate text.
1518  SwOLENode* pOLENode = m_aEmbeds.top();
1519  if (SwFlyFrameFormat* pFormat
1520  = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1521  {
1522  if (SdrObject* pObject = SwXFrame::GetOrCreateSdrObject(*pFormat))
1523  {
1524  pObject->SetTitle(pObject->GetTitle() + aToken);
1525  break;
1526  }
1527  }
1528  }
1529 
1530  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1531 
1532  // if there are temporary paragraph attributes and the
1533  // paragraph isn't empty then the paragraph attributes
1534  // are final.
1535  m_aParaAttrs.clear();
1536 
1537  SetAttr();
1538  }
1539  break;
1540 
1541  case HtmlTokenId::HORZRULE:
1542  InsertHorzRule();
1543  break;
1544 
1545  case HtmlTokenId::IMAGE:
1546  InsertImage();
1547  // if only the parser references the doc, we can break and set
1548  // an error code
1549  if( 1 == m_xDoc->getReferenceCount() )
1550  {
1551  eState = SvParserState::Error;
1552  }
1553  break;
1554 
1555  case HtmlTokenId::SPACER:
1556  InsertSpacer();
1557  break;
1558 
1559  case HtmlTokenId::EMBED:
1560  InsertEmbed();
1561  break;
1562 
1563  case HtmlTokenId::NOEMBED_ON:
1564  m_bInNoEmbed = true;
1565  m_bCallNextToken = bool(m_xTable);
1566  ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1567  break;
1568 
1569  case HtmlTokenId::DEFLIST_ON:
1570  if( m_nOpenParaToken != HtmlTokenId::NONE )
1571  EndPara();
1572  NewDefList();
1573  break;
1574  case HtmlTokenId::DEFLIST_OFF:
1575  if( m_nOpenParaToken != HtmlTokenId::NONE )
1576  EndPara();
1577  EndDefListItem( HtmlTokenId::NONE );
1578  EndDefList();
1579  break;
1580 
1581  case HtmlTokenId::DD_ON:
1582  case HtmlTokenId::DT_ON:
1583  if( m_nOpenParaToken != HtmlTokenId::NONE )
1584  EndPara();
1585  EndDefListItem();// close <DD>/<DT> and set no template
1586  NewDefListItem( nToken );
1587  break;
1588 
1589  case HtmlTokenId::DD_OFF:
1590  case HtmlTokenId::DT_OFF:
1591  // c.f. HtmlTokenId::LI_OFF
1592  // Actually we should close a DD/DT now.
1593  // But neither Netscape nor Microsoft do this and so don't we.
1594  EndDefListItem( nToken );
1595  break;
1596 
1597  // divisions
1598  case HtmlTokenId::DIVISION_ON:
1599  case HtmlTokenId::CENTER_ON:
1600  if (!m_isInTableStructure)
1601  {
1602  if (m_nOpenParaToken != HtmlTokenId::NONE)
1603  {
1604  if (IsReadPRE())
1605  m_nOpenParaToken = HtmlTokenId::NONE;
1606  else
1607  EndPara();
1608  }
1609  NewDivision( nToken );
1610  }
1611  break;
1612 
1613  case HtmlTokenId::DIVISION_OFF:
1614  case HtmlTokenId::CENTER_OFF:
1615  if (!m_isInTableStructure)
1616  {
1617  if (m_nOpenParaToken != HtmlTokenId::NONE)
1618  {
1619  if (IsReadPRE())
1620  m_nOpenParaToken = HtmlTokenId::NONE;
1621  else
1622  EndPara();
1623  }
1624  EndDivision();
1625  }
1626  break;
1627 
1628  case HtmlTokenId::MULTICOL_ON:
1629  if( m_nOpenParaToken != HtmlTokenId::NONE )
1630  EndPara();
1631  NewMultiCol();
1632  break;
1633 
1634  case HtmlTokenId::MULTICOL_OFF:
1635  if( m_nOpenParaToken != HtmlTokenId::NONE )
1636  EndPara();
1637  EndTag( HtmlTokenId::MULTICOL_ON );
1638  break;
1639 
1640  case HtmlTokenId::MARQUEE_ON:
1641  NewMarquee();
1642  m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1643  break;
1644 
1645  case HtmlTokenId::FORM_ON:
1646  NewForm();
1647  break;
1648  case HtmlTokenId::FORM_OFF:
1649  EndForm();
1650  break;
1651 
1652  // templates
1653  case HtmlTokenId::PARABREAK_ON:
1654  if( m_nOpenParaToken != HtmlTokenId::NONE )
1655  EndPara( true );
1656  NewPara();
1657  break;
1658 
1659  case HtmlTokenId::PARABREAK_OFF:
1660  EndPara( true );
1661  break;
1662 
1663  case HtmlTokenId::ADDRESS_ON:
1664  if( m_nOpenParaToken != HtmlTokenId::NONE )
1665  EndPara();
1666  NewTextFormatColl( HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SENDADRESS );
1667  break;
1668 
1669  case HtmlTokenId::ADDRESS_OFF:
1670  if( m_nOpenParaToken != HtmlTokenId::NONE )
1671  EndPara();
1672  EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1673  break;
1674 
1675  case HtmlTokenId::BLOCKQUOTE_ON:
1676  case HtmlTokenId::BLOCKQUOTE30_ON:
1677  if( m_nOpenParaToken != HtmlTokenId::NONE )
1678  EndPara();
1679  NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1680  break;
1681 
1682  case HtmlTokenId::BLOCKQUOTE_OFF:
1683  case HtmlTokenId::BLOCKQUOTE30_OFF:
1684  if( m_nOpenParaToken != HtmlTokenId::NONE )
1685  EndPara();
1686  EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1687  break;
1688 
1689  case HtmlTokenId::PREFORMTXT_ON:
1690  case HtmlTokenId::LISTING_ON:
1691  case HtmlTokenId::XMP_ON:
1692  if( m_nOpenParaToken != HtmlTokenId::NONE )
1693  EndPara();
1695  break;
1696 
1697  case HtmlTokenId::PREFORMTXT_OFF:
1698  m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1699  EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1700  break;
1701 
1702  case HtmlTokenId::LISTING_OFF:
1703  case HtmlTokenId::XMP_OFF:
1704  EndTextFormatColl( nToken );
1705  break;
1706 
1707  case HtmlTokenId::HEAD1_ON:
1708  case HtmlTokenId::HEAD2_ON:
1709  case HtmlTokenId::HEAD3_ON:
1710  case HtmlTokenId::HEAD4_ON:
1711  case HtmlTokenId::HEAD5_ON:
1712  case HtmlTokenId::HEAD6_ON:
1713  if( m_nOpenParaToken != HtmlTokenId::NONE )
1714  {
1715  if( IsReadPRE() )
1716  m_nOpenParaToken = HtmlTokenId::NONE;
1717  else
1718  EndPara();
1719  }
1720  NewHeading( nToken );
1721  break;
1722 
1723  case HtmlTokenId::HEAD1_OFF:
1724  case HtmlTokenId::HEAD2_OFF:
1725  case HtmlTokenId::HEAD3_OFF:
1726  case HtmlTokenId::HEAD4_OFF:
1727  case HtmlTokenId::HEAD5_OFF:
1728  case HtmlTokenId::HEAD6_OFF:
1729  EndHeading();
1730  break;
1731 
1732  case HtmlTokenId::TABLE_ON:
1733  if( !m_vPendingStack.empty() )
1734  BuildTable( SvxAdjust::End );
1735  else
1736  {
1737  if( m_nOpenParaToken != HtmlTokenId::NONE )
1738  EndPara();
1739  OSL_ENSURE(!m_xTable.get(), "table in table not allowed here");
1740  if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) &&
1741  (m_pPam->GetPoint()->nNode.GetIndex() >
1742  m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1744  {
1745  if ( m_nParaCnt < 5 )
1746  Show(); // show what we have up to here
1747 
1748  SvxAdjust eAdjust = m_xAttrTab->pAdjust
1749  ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1750  GetAdjust()
1751  : SvxAdjust::End;
1752  BuildTable( eAdjust );
1753  }
1754  else
1755  bInsertUnknown = m_bKeepUnknown;
1756  }
1757  break;
1758 
1759  // lists
1760  case HtmlTokenId::DIRLIST_ON:
1761  case HtmlTokenId::MENULIST_ON:
1762  case HtmlTokenId::ORDERLIST_ON:
1763  case HtmlTokenId::UNORDERLIST_ON:
1764  if( m_nOpenParaToken != HtmlTokenId::NONE )
1765  EndPara();
1766  NewNumBulList( nToken );
1767  break;
1768 
1769  case HtmlTokenId::DIRLIST_OFF:
1770  case HtmlTokenId::MENULIST_OFF:
1771  case HtmlTokenId::ORDERLIST_OFF:
1772  case HtmlTokenId::UNORDERLIST_OFF:
1773  if( m_nOpenParaToken != HtmlTokenId::NONE )
1774  EndPara();
1775  EndNumBulListItem( HtmlTokenId::NONE, true );
1776  EndNumBulList( nToken );
1777  break;
1778 
1779  case HtmlTokenId::LI_ON:
1780  case HtmlTokenId::LISTHEADER_ON:
1781  if( m_nOpenParaToken != HtmlTokenId::NONE &&
1783  || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1784  {
1785  // only finish paragraph for <P><LI>, not for <DD><LI>
1786  EndPara();
1787  }
1788 
1789  EndNumBulListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1790  NewNumBulListItem( nToken );
1791  break;
1792 
1793  case HtmlTokenId::LI_OFF:
1794  case HtmlTokenId::LISTHEADER_OFF:
1795  EndNumBulListItem( nToken, false );
1796  break;
1797 
1798  // Attribute :
1799  case HtmlTokenId::ITALIC_ON:
1800  {
1804  NewStdAttr( HtmlTokenId::ITALIC_ON,
1805  &m_xAttrTab->pItalic, aPosture,
1806  &m_xAttrTab->pItalicCJK, &aPostureCJK,
1807  &m_xAttrTab->pItalicCTL, &aPostureCTL );
1808  }
1809  break;
1810 
1811  case HtmlTokenId::BOLD_ON:
1812  {
1816  NewStdAttr( HtmlTokenId::BOLD_ON,
1817  &m_xAttrTab->pBold, aWeight,
1818  &m_xAttrTab->pBoldCJK, &aWeightCJK,
1819  &m_xAttrTab->pBoldCTL, &aWeightCTL );
1820  }
1821  break;
1822 
1823  case HtmlTokenId::STRIKE_ON:
1824  case HtmlTokenId::STRIKETHROUGH_ON:
1825  {
1826  NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1828  }
1829  break;
1830 
1831  case HtmlTokenId::UNDERLINE_ON:
1832  {
1833  NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1835  }
1836  break;
1837 
1838  case HtmlTokenId::SUPERSCRIPT_ON:
1839  {
1840  NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1842  }
1843  break;
1844 
1845  case HtmlTokenId::SUBSCRIPT_ON:
1846  {
1847  NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1849  }
1850  break;
1851 
1852  case HtmlTokenId::BLINK_ON:
1853  {
1854  NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1855  SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1856  }
1857  break;
1858 
1859  case HtmlTokenId::SPAN_ON:
1860  NewStdAttr( HtmlTokenId::SPAN_ON );
1861  break;
1862 
1863  case HtmlTokenId::ITALIC_OFF:
1864  case HtmlTokenId::BOLD_OFF:
1865  case HtmlTokenId::STRIKE_OFF:
1866  case HtmlTokenId::UNDERLINE_OFF:
1867  case HtmlTokenId::SUPERSCRIPT_OFF:
1868  case HtmlTokenId::SUBSCRIPT_OFF:
1869  case HtmlTokenId::BLINK_OFF:
1870  case HtmlTokenId::SPAN_OFF:
1871  EndTag( nToken );
1872  break;
1873 
1874  case HtmlTokenId::STRIKETHROUGH_OFF:
1875  EndTag( HtmlTokenId::STRIKE_OFF );
1876  break;
1877 
1878  case HtmlTokenId::BASEFONT_ON:
1879  NewBasefontAttr();
1880  break;
1881  case HtmlTokenId::BASEFONT_OFF:
1882  EndBasefontAttr();
1883  break;
1884  case HtmlTokenId::FONT_ON:
1885  case HtmlTokenId::BIGPRINT_ON:
1886  case HtmlTokenId::SMALLPRINT_ON:
1887  NewFontAttr( nToken );
1888  break;
1889  case HtmlTokenId::FONT_OFF:
1890  case HtmlTokenId::BIGPRINT_OFF:
1891  case HtmlTokenId::SMALLPRINT_OFF:
1892  EndFontAttr( nToken );
1893  break;
1894 
1895  case HtmlTokenId::EMPHASIS_ON:
1896  case HtmlTokenId::CITIATION_ON:
1897  case HtmlTokenId::STRONG_ON:
1898  case HtmlTokenId::CODE_ON:
1899  case HtmlTokenId::SAMPLE_ON:
1900  case HtmlTokenId::KEYBOARD_ON:
1901  case HtmlTokenId::VARIABLE_ON:
1902  case HtmlTokenId::DEFINSTANCE_ON:
1903  case HtmlTokenId::SHORTQUOTE_ON:
1904  case HtmlTokenId::LANGUAGE_ON:
1905  case HtmlTokenId::AUTHOR_ON:
1906  case HtmlTokenId::PERSON_ON:
1907  case HtmlTokenId::ACRONYM_ON:
1908  case HtmlTokenId::ABBREVIATION_ON:
1909  case HtmlTokenId::INSERTEDTEXT_ON:
1910  case HtmlTokenId::DELETEDTEXT_ON:
1911 
1912  case HtmlTokenId::TELETYPE_ON:
1913  NewCharFormat( nToken );
1914  break;
1915 
1916  case HtmlTokenId::SDFIELD_ON:
1917  NewField();
1919  break;
1920 
1921  case HtmlTokenId::EMPHASIS_OFF:
1922  case HtmlTokenId::CITIATION_OFF:
1923  case HtmlTokenId::STRONG_OFF:
1924  case HtmlTokenId::CODE_OFF:
1925  case HtmlTokenId::SAMPLE_OFF:
1926  case HtmlTokenId::KEYBOARD_OFF:
1927  case HtmlTokenId::VARIABLE_OFF:
1928  case HtmlTokenId::DEFINSTANCE_OFF:
1929  case HtmlTokenId::SHORTQUOTE_OFF:
1930  case HtmlTokenId::LANGUAGE_OFF:
1931  case HtmlTokenId::AUTHOR_OFF:
1932  case HtmlTokenId::PERSON_OFF:
1933  case HtmlTokenId::ACRONYM_OFF:
1934  case HtmlTokenId::ABBREVIATION_OFF:
1935  case HtmlTokenId::INSERTEDTEXT_OFF:
1936  case HtmlTokenId::DELETEDTEXT_OFF:
1937 
1938  case HtmlTokenId::TELETYPE_OFF:
1939  EndTag( nToken );
1940  break;
1941 
1942  case HtmlTokenId::HEAD_OFF:
1943  if( !m_aStyleSource.isEmpty() )
1944  {
1945  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1946  m_aStyleSource.clear();
1947  }
1948  break;
1949 
1950  case HtmlTokenId::DOCTYPE:
1951  case HtmlTokenId::BODY_OFF:
1952  case HtmlTokenId::HTML_OFF:
1953  case HtmlTokenId::HEAD_ON:
1954  case HtmlTokenId::TITLE_OFF:
1955  break; // don't evaluate further???
1956  case HtmlTokenId::HTML_ON:
1957  {
1958  const HTMLOptions& rHTMLOptions = GetOptions();
1959  for (size_t i = rHTMLOptions.size(); i; )
1960  {
1961  const HTMLOption& rOption = rHTMLOptions[--i];
1962  if( HtmlOptionId::DIR == rOption.GetToken() )
1963  {
1964  const OUString& rDir = rOption.GetString();
1965  SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
1966  m_pCSS1Parser->GetWhichMap() );
1967  SvxCSS1PropertyInfo aPropInfo;
1968  OUString aDummy;
1969  ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
1970  aPropInfo, nullptr, &rDir );
1971 
1972  m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
1973  break;
1974  }
1975  }
1976  }
1977  break;
1978 
1979  case HtmlTokenId::INPUT:
1980  InsertInput();
1981  break;
1982 
1983  case HtmlTokenId::TEXTAREA_ON:
1984  NewTextArea();
1986  break;
1987 
1988  case HtmlTokenId::SELECT_ON:
1989  NewSelect();
1991  break;
1992 
1993  case HtmlTokenId::ANCHOR_ON:
1994  NewAnchor();
1995  break;
1996 
1997  case HtmlTokenId::ANCHOR_OFF:
1998  EndAnchor();
1999  break;
2000 
2001  case HtmlTokenId::COMMENT:
2002  if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2003  {
2004  // insert as Post-It
2005  // If there are no space characters right behind
2006  // the <!-- and on front of the -->, leave the comment untouched.
2007  if( ' ' == aToken[ 3 ] &&
2008  ' ' == aToken[ aToken.getLength()-3 ] )
2009  {
2010  OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) );
2011  InsertComment(comphelper::string::strip(aComment, ' '));
2012  }
2013  else
2014  {
2015  OUStringBuffer aComment;
2016  aComment.append('<').append(aToken).append('>');
2017  InsertComment( aComment.makeStringAndClear() );
2018  }
2019  }
2020  break;
2021 
2022  case HtmlTokenId::MAP_ON:
2023  // Image Maps are read asynchronously: At first only an image map is created
2024  // Areas are processed later. Nevertheless the
2025  // ImageMap is inserted into the IMap-Array, because it might be used
2026  // already.
2027  m_pImageMap = new ImageMap;
2029  {
2030  if (!m_pImageMaps)
2031  m_pImageMaps.reset( new ImageMaps );
2032  m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2033  }
2034  else
2035  {
2036  delete m_pImageMap;
2037  m_pImageMap = nullptr;
2038  }
2039  break;
2040 
2041  case HtmlTokenId::MAP_OFF:
2042  // there is no ImageMap anymore (don't delete IMap, because it's
2043  // already contained in the array!)
2044  m_pImageMap = nullptr;
2045  break;
2046 
2047  case HtmlTokenId::AREA:
2048  if( m_pImageMap )
2049  ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2050  SvMacroItemId::OnMouseOut );
2051  break;
2052 
2053  case HtmlTokenId::FRAMESET_ON:
2054  bInsertUnknown = m_bKeepUnknown;
2055  break;
2056 
2057  case HtmlTokenId::NOFRAMES_ON:
2058  if( IsInHeader() )
2059  FinishHeader();
2060  bInsertUnknown = m_bKeepUnknown;
2061  break;
2062 
2063  case HtmlTokenId::UNKNOWNCONTROL_ON:
2064  // Ignore content of unknown token in the header, if the token
2065  // does not start with a '!'.
2066  // (but judging from the code, also if does not start with a '%')
2067  // (and also if we're not somewhere we consider PRE)
2068  if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2069  !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2070  '%' != sSaveToken[0] )
2071  m_aUnknownToken = sSaveToken;
2072  [[fallthrough]];
2073 
2074  default:
2075  bInsertUnknown = m_bKeepUnknown;
2076  break;
2077  }
2078 
2079  if( bGetIDOption )
2080  InsertIDOption();
2081 
2082  if( bInsertUnknown )
2083  {
2084  OUStringBuffer aComment("HTML: <");
2085  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2086  aComment.append("/");
2087  aComment.append(sSaveToken);
2088  if( !aToken.isEmpty() )
2089  {
2090  UnescapeToken();
2091  aComment.append(" ").append(aToken);
2092  }
2093  aComment.append(">");
2094  InsertComment( aComment.makeStringAndClear() );
2095  }
2096 
2097  // if there are temporary paragraph attributes and the
2098  // paragraph isn't empty then the paragraph attributes are final.
2099  if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() )
2100  m_aParaAttrs.clear();
2101 }
2102 
2103 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2104  bool& rScriptDependent,
2105  sal_uInt16& rScriptType )
2106 {
2107  switch( rAttr.GetItem().Which() )
2108  {
2109  case RES_CHRATR_FONT:
2110  case RES_CHRATR_FONTSIZE:
2111  case RES_CHRATR_LANGUAGE:
2112  case RES_CHRATR_POSTURE:
2113  case RES_CHRATR_WEIGHT:
2114  rScriptType = i18n::ScriptType::LATIN;
2115  rScriptDependent = true;
2116  break;
2117  case RES_CHRATR_CJK_FONT:
2121  case RES_CHRATR_CJK_WEIGHT:
2122  rScriptType = i18n::ScriptType::ASIAN;
2123  rScriptDependent = true;
2124  break;
2125  case RES_CHRATR_CTL_FONT:
2129  case RES_CHRATR_CTL_WEIGHT:
2130  rScriptType = i18n::ScriptType::COMPLEX;
2131  rScriptDependent = true;
2132  break;
2133  default:
2134  rScriptDependent = false;
2135  break;
2136  }
2137 }
2138 
2139 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2140 {
2141  // A hard line break at the end always must be removed.
2142  // A second one we replace with paragraph spacing.
2143  sal_Int32 nLFStripped = StripTrailingLF();
2144  if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2145  eMode = AM_SPACE;
2146 
2147  // the hard attributes of this paragraph will never be invalid again
2148  m_aParaAttrs.clear();
2149 
2150  SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2151  m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
2152 
2153  if (pTextNode)
2154  {
2155  const SvxULSpaceItem& rULSpace =
2156  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2157 
2158  bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2159  : rULSpace.GetLower() == 0;
2160 
2161  if( bChange )
2162  {
2163  const SvxULSpaceItem& rCollULSpace =
2164  pTextNode->GetAnyFormatColl().GetULSpace();
2165 
2166  bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2167  : rCollULSpace.GetLower() > 0;
2168 
2169  if( bMayReset &&
2170  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2171  {
2172  pTextNode->ResetAttr( RES_UL_SPACE );
2173  }
2174  else
2175  {
2176  pTextNode->SetAttr(
2177  SvxULSpaceItem( rULSpace.GetUpper(),
2178  AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2179  }
2180  }
2181  }
2182  m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2183 
2184  SwPosition aOldPos( *m_pPam->GetPoint() );
2185 
2186  bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2187 
2188  // split character attributes and maybe set none,
2189  // which are set for the whole paragraph
2190  const SwNodeIndex& rEndIdx = aOldPos.nNode;
2191  const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex();
2192  const SwPosition& rPos = *m_pPam->GetPoint();
2193 
2194  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2195  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2196  {
2197  HTMLAttr *pAttr = *pHTMLAttributes;
2198  if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2199  {
2200  bool bWholePara = false;
2201 
2202  while( pAttr )
2203  {
2204  HTMLAttr *pNext = pAttr->GetNext();
2205  if( pAttr->GetSttParaIdx() < rEndIdx.GetIndex() ||
2206  (!bWholePara &&
2207  pAttr->GetSttPara() == rEndIdx &&
2208  pAttr->GetSttCnt() != nEndCnt) )
2209  {
2210  bWholePara =
2211  pAttr->GetSttPara() == rEndIdx &&
2212  pAttr->GetSttCnt() == 0;
2213 
2214  sal_Int32 nStt = pAttr->m_nStartContent;
2215  bool bScript = false;
2216  sal_uInt16 nScriptItem;
2217  bool bInsert = true;
2218  lcl_swhtml_getItemInfo( *pAttr, bScript,
2219  nScriptItem );
2220  // set previous part
2221  if( bScript )
2222  {
2223  const SwTextNode *pTextNd =
2224  pAttr->GetSttPara().GetNode().GetTextNode();
2225  OSL_ENSURE( pTextNd, "No text node" );
2226  if( pTextNd )
2227  {
2228  const OUString& rText = pTextNd->GetText();
2229  sal_uInt16 nScriptText =
2230  g_pBreakIt->GetBreakIter()->getScriptType(
2231  rText, pAttr->GetSttCnt() );
2232  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2233  ->endOfScript( rText, nStt, nScriptText );
2234  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2235  {
2236  if( nScriptItem == nScriptText )
2237  {
2238  HTMLAttr *pSetAttr =
2239  pAttr->Clone( rEndIdx, nScriptEnd );
2240  pSetAttr->m_nStartContent = nStt;
2241  pSetAttr->ClearPrev();
2242  if( !pNext || bWholePara )
2243  {
2244  if (pSetAttr->m_bInsAtStart)
2245  m_aSetAttrTab.push_front( pSetAttr );
2246  else
2247  m_aSetAttrTab.push_back( pSetAttr );
2248  }
2249  else
2250  pNext->InsertPrev( pSetAttr );
2251  }
2252  nStt = nScriptEnd;
2253  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2254  rText, nStt );
2255  nScriptEnd = g_pBreakIt->GetBreakIter()
2256  ->endOfScript( rText, nStt, nScriptText );
2257  }
2258  bInsert = nScriptItem == nScriptText;
2259  }
2260  }
2261  if( bInsert )
2262  {
2263  HTMLAttr *pSetAttr =
2264  pAttr->Clone( rEndIdx, nEndCnt );
2265  pSetAttr->m_nStartContent = nStt;
2266 
2267  // When the attribute is for the whole paragraph, the outer
2268  // attributes aren't effective anymore. Hence it may not be inserted
2269  // in the Prev-List of an outer attribute, because that won't be
2270  // set. That leads to shifting when fields are used.
2271  if( !pNext || bWholePara )
2272  {
2273  if (pSetAttr->m_bInsAtStart)
2274  m_aSetAttrTab.push_front( pSetAttr );
2275  else
2276  m_aSetAttrTab.push_back( pSetAttr );
2277  }
2278  else
2279  pNext->InsertPrev( pSetAttr );
2280  }
2281  else
2282  {
2283  HTMLAttr *pPrev = pAttr->GetPrev();
2284  if( pPrev )
2285  {
2286  // the previous attributes must be set anyway
2287  if( !pNext || bWholePara )
2288  {
2289  if (pPrev->m_bInsAtStart)
2290  m_aSetAttrTab.push_front( pPrev );
2291  else
2292  m_aSetAttrTab.push_back( pPrev );
2293  }
2294  else
2295  pNext->InsertPrev( pPrev );
2296  }
2297  }
2298  pAttr->ClearPrev();
2299  }
2300 
2301  pAttr->SetStart( rPos );
2302  pAttr = pNext;
2303  }
2304  }
2305  }
2306 
2307  if( bUpdateNum )
2308  {
2309  if( GetNumInfo().GetDepth() )
2310  {
2311  sal_uInt8 nLvl = GetNumInfo().GetLevel();
2312  SetNodeNum( nLvl );
2313  }
2314  else
2316  }
2317 
2318  // We must set the attribute of the paragraph before now (because of JavaScript)
2319  SetAttr();
2320 
2321  // Now it is time to get rid of all script dependent hints that are
2322  // equal to the settings in the style
2323  SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode();
2324  OSL_ENSURE( pTextNd, "There is the txt node" );
2325  size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints())
2326  ? pTextNd->GetSwpHints().Count() : 0;
2327  if( nCntAttr )
2328  {
2329  // These are the end position of all script dependent hints.
2330  // If we find a hint that starts before the current end position,
2331  // we have to set it. If we find a hint that start behind or at
2332  // that position, we have to take the hint value into account.
2333  // If it is equal to the style, or in fact the paragraph value
2334  // for that hint, the hint is removed. Otherwise its end position
2335  // is remembered.
2336  sal_Int32 aEndPos[15] =
2337  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2338  SwpHints& rHints = pTextNd->GetSwpHints();
2339  for( size_t i=0; i < nCntAttr; i++ )
2340  {
2341  SwTextAttr *pHt = rHints.Get( i );
2342  sal_uInt16 nWhich = pHt->Which();
2343  sal_Int16 nIdx = 0;
2344  bool bFont = false;
2345  switch( nWhich )
2346  {
2347  case RES_CHRATR_FONT:
2348  nIdx = 0;
2349  bFont = true;
2350  break;
2351  case RES_CHRATR_FONTSIZE:
2352  nIdx = 1;
2353  break;
2354  case RES_CHRATR_LANGUAGE:
2355  nIdx = 2;
2356  break;
2357  case RES_CHRATR_POSTURE:
2358  nIdx = 3;
2359  break;
2360  case RES_CHRATR_WEIGHT:
2361  nIdx = 4;
2362  break;
2363  case RES_CHRATR_CJK_FONT:
2364  nIdx = 5;
2365  bFont = true;
2366  break;
2368  nIdx = 6;
2369  break;
2371  nIdx = 7;
2372  break;
2374  nIdx = 8;
2375  break;
2376  case RES_CHRATR_CJK_WEIGHT:
2377  nIdx = 9;
2378  break;
2379  case RES_CHRATR_CTL_FONT:
2380  nIdx = 10;
2381  bFont = true;
2382  break;
2384  nIdx = 11;
2385  break;
2387  nIdx = 12;
2388  break;
2390  nIdx = 13;
2391  break;
2392  case RES_CHRATR_CTL_WEIGHT:
2393  nIdx = 14;
2394  break;
2395  default:
2396  // Skip to next attribute
2397  continue;
2398  }
2399  const sal_Int32 nStt = pHt->GetStart();
2400  if( nStt >= aEndPos[nIdx] )
2401  {
2402  const SfxPoolItem& rItem =
2403  static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2404  if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2405  : rItem == pHt->GetAttr() )
2406  {
2407  // The hint is the same as set in the paragraph and
2408  // therefore, it can be deleted
2409  // CAUTION!!! This WILL delete the hint and it MAY
2410  // also delete the SwpHints!!! To avoid any trouble
2411  // we leave the loop immediately if this is the last
2412  // hint.
2413  pTextNd->DeleteAttribute( pHt );
2414  if( 1 == nCntAttr )
2415  break;
2416  i--;
2417  nCntAttr--;
2418  }
2419  else
2420  {
2421  // The hint is different. Therefore all hints within that
2422  // hint have to be ignored.
2423  aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2424  }
2425  }
2426  else
2427  {
2428  // The hint starts before another one ends.
2429  // The hint in this case is not deleted
2430  OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2431  "hints aren't nested properly!" );
2432  }
2433  }
2434  }
2435 
2436  if (!m_xTable && !--m_nParaCnt)
2437  Show();
2438 
2439  return bRet;
2440 }
2441 
2443 {
2444  //If it already has ParSpace, return
2445  if( !m_bNoParSpace )
2446  return;
2447 
2448  m_bNoParSpace = false;
2449 
2450  sal_uLong nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1;
2451 
2452  SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2453  if( !pTextNode )
2454  return;
2455 
2456  SvxULSpaceItem rULSpace =
2457  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2458  if( !rULSpace.GetLower() )
2459  {
2460  const SvxULSpaceItem& rCollULSpace =
2461  pTextNode->GetAnyFormatColl().GetULSpace();
2462  if( rCollULSpace.GetLower() &&
2463  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2464  {
2465  pTextNode->ResetAttr( RES_UL_SPACE );
2466  }
2467  else
2468  {
2469  //What I do here, is that I examine the attributes, and if
2470  //I find out, that it's CJK/CTL, then I set the paragraph space
2471  //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2472 
2473  bool bIsCJK = false;
2474  bool bIsCTL = false;
2475 
2476  const size_t nCntAttr = pTextNode->GetpSwpHints()
2477  ? pTextNode->GetSwpHints().Count() : 0;
2478 
2479  for(size_t i = 0; i < nCntAttr; ++i)
2480  {
2481  SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2482  sal_uInt16 const nWhich = pHt->Which();
2483  if (RES_CHRATR_CJK_FONT == nWhich ||
2484  RES_CHRATR_CJK_FONTSIZE == nWhich ||
2485  RES_CHRATR_CJK_LANGUAGE == nWhich ||
2486  RES_CHRATR_CJK_POSTURE == nWhich ||
2487  RES_CHRATR_CJK_WEIGHT == nWhich)
2488  {
2489  bIsCJK = true;
2490  break;
2491  }
2492  if (RES_CHRATR_CTL_FONT == nWhich ||
2493  RES_CHRATR_CTL_FONTSIZE == nWhich ||
2494  RES_CHRATR_CTL_LANGUAGE == nWhich ||
2495  RES_CHRATR_CTL_POSTURE == nWhich ||
2496  RES_CHRATR_CTL_WEIGHT == nWhich)
2497  {
2498  bIsCTL = true;
2499  break;
2500  }
2501  }
2502 
2503  if( bIsCTL )
2504  {
2505  pTextNode->SetAttr(
2507  }
2508  else if( bIsCJK )
2509  {
2510  pTextNode->SetAttr(
2512  } else {
2513  pTextNode->SetAttr(
2515  }
2516  }
2517  }
2518 }
2519 
2521 {
2522  // Here
2523  // - a EndAction is called, so the document is formatted
2524  // - a Reschedule is called,
2525  // - the own View-Shell is set again
2526  // - and a StartAction is called
2527 
2528  OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2529  SwViewShell *pOldVSh = CallEndAction();
2530 
2532 
2533  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2534  || 1 == m_xDoc->getReferenceCount() )
2535  {
2536  // was the import aborted by SFX?
2537  eState = SvParserState::Error;
2538  }
2539 
2540  // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2541  SwViewShell *pVSh = CallStartAction( pOldVSh );
2542 
2543  // is the current node not visible anymore, then we use a bigger increment
2544  if( pVSh )
2545  {
2547  ? 5 : 50;
2548  }
2549 }
2550 
2552 {
2553  // Here
2554  // - a Reschedule is called, so it can be scrolled
2555  // - the own View-Shell is set again
2556  // - a StartAction/EndAction is called, when there was scrolling.
2557 
2558  OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2559 
2560  // scroll bar
2561  if (m_xProgress)
2562  {
2563  m_xProgress->Update(rInput.Tell());
2565  }
2566  else
2567  {
2569 
2570  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2571  || 1 == m_xDoc->getReferenceCount() )
2572  // was the import aborted by SFX?
2573  eState = SvParserState::Error;
2574 
2576  if( pVSh && pVSh->HasInvalidRect() )
2577  {
2578  CallEndAction( false, false );
2579  CallStartAction( pVSh, false );
2580  }
2581  }
2582 }
2583 
2585 {
2586  OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2587 
2588  if( !pVSh || bChkPtr )
2589  {
2590 #if OSL_DEBUG_LEVEL > 0
2591  SwViewShell *pOldVSh = pVSh;
2592 #endif
2593  pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2594 #if OSL_DEBUG_LEVEL > 0
2595  OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2596  if( pOldVSh && !pVSh )
2597  pVSh = nullptr;
2598 #endif
2599  }
2600  m_pActionViewShell = pVSh;
2601 
2602  if( m_pActionViewShell )
2603  {
2604  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2605  static_cast<SwEditShell*>(m_pActionViewShell)->StartAction();
2606  else
2608  }
2609 
2610  return m_pActionViewShell;
2611 }
2612 
2613 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2614 {
2615  if( bChkPtr )
2616  {
2617  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2618  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2619  "CallEndAction: Who swapped the SwViewShell?" );
2620 #if OSL_DEBUG_LEVEL > 0
2621  if( m_pActionViewShell && !pVSh )
2622  pVSh = nullptr;
2623 #endif
2624  if( pVSh != m_pActionViewShell )
2625  m_pActionViewShell = nullptr;
2626  }
2627 
2628  if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2629  return m_pActionViewShell;
2630 
2631  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2632  {
2633  // Already scrolled?, then make sure that the view doesn't move!
2634  const bool bOldLock = m_pActionViewShell->IsViewLocked();
2635  m_pActionViewShell->LockView( true );
2636  const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev();
2638  static_cast<SwEditShell*>(m_pActionViewShell)->EndAction();
2639  m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev );
2640  m_pActionViewShell->LockView( bOldLock );
2641 
2642  // bChkJumpMark is only set when the object was also found
2643  if( m_bChkJumpMark )
2644  {
2645  const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2646  if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2648  GetMedium()->GetURLObject().GetMark() );
2649  m_bChkJumpMark = false;
2650  }
2651  }
2652  else
2654 
2655  // if the parser holds the last reference to the document, then we can
2656  // abort here and set an error.
2657  if( 1 == m_xDoc->getReferenceCount() )
2658  {
2659  eState = SvParserState::Error;
2660  }
2661 
2663  m_pActionViewShell = nullptr;
2664 
2665  return pVSh;
2666 }
2667 
2669 {
2670  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2671  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2672  "CheckActionViewShell: Who has swapped SwViewShell?" );
2673 #if OSL_DEBUG_LEVEL > 0
2674  if( m_pActionViewShell && !pVSh )
2675  pVSh = nullptr;
2676 #endif
2677  if( pVSh != m_pActionViewShell )
2678  m_pActionViewShell = nullptr;
2679 
2680  return m_pActionViewShell;
2681 }
2682 
2683 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2684  std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2685 {
2686  std::unique_ptr<SwPaM> pAttrPam( new SwPaM( *m_pPam->GetPoint() ) );
2687  const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode;
2688  const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
2689  HTMLAttr* pAttr;
2690  SwContentNode* pCNd;
2691 
2692  std::vector<std::unique_ptr<HTMLAttr>> aFields;
2693 
2694  for( auto n = m_aSetAttrTab.size(); n; )
2695  {
2696  pAttr = m_aSetAttrTab[ --n ];
2697  sal_uInt16 nWhich = pAttr->m_pItem->Which();
2698 
2699  sal_uLong nEndParaIdx = pAttr->GetEndParaIdx();
2700  bool bSetAttr;
2701  if( bChkEnd )
2702  {
2703  // Set character attribute with end early on, so set them still in
2704  // the current paragraph (because of JavaScript and various "chats"(?)).
2705  // This shouldn't be done for attributes which are used for
2706  // the whole paragraph, because they could be from a paragraph style
2707  // which can't be set. Because the attributes are inserted with
2708  // SETATTR_DONTREPLACE, they should be able to be set later.
2709  bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() &&
2710  (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) ||
2711  ( !pAttr->IsLikePara() &&
2712  nEndParaIdx == rEndIdx.GetIndex() &&
2713  pAttr->GetEndCnt() < nEndCnt &&
2714  (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2715  ( bBeforeTable &&
2716  nEndParaIdx == rEndIdx.GetIndex() &&
2717  !pAttr->GetEndCnt() );
2718  }
2719  else
2720  {
2721  // Attributes in body nodes array section shouldn't be set if we are in a
2722  // special nodes array section, but vice versa it's possible.
2723  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2724  bSetAttr = nEndParaIdx < rEndIdx.GetIndex() ||
2725  rEndIdx.GetIndex() > nEndOfIcons ||
2726  nEndParaIdx <= nEndOfIcons;
2727  }
2728 
2729  if( bSetAttr )
2730  {
2731  // The attribute shouldn't be in the list of temporary paragraph
2732  // attributes, because then it would be deleted.
2733  while( !m_aParaAttrs.empty() )
2734  {
2735  OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2736  "SetAttr: Attribute must not yet be set" );
2737  m_aParaAttrs.pop_back();
2738  }
2739 
2740  // then set it
2741  m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2742 
2743  while( pAttr )
2744  {
2745  HTMLAttr *pPrev = pAttr->GetPrev();
2746  if( !pAttr->m_bValid )
2747  {
2748  // invalid attributes can be deleted
2749  delete pAttr;
2750  pAttr = pPrev;
2751  continue;
2752  }
2753 
2754  pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2755  if( !pCNd )
2756  {
2757  // because of the awful deleting of nodes an index can also
2758  // point to an end node :-(
2759  if ( (pAttr->GetSttPara() == pAttr->GetEndPara()) &&
2760  !isTXTATR_NOEND(nWhich) )
2761  {
2762  // when the end index also points to the node, we don't
2763  // need to set attributes anymore, except if it's a text attribute.
2764  delete pAttr;
2765  pAttr = pPrev;
2766  continue;
2767  }
2768  pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2769  if( pCNd )
2770  pAttr->m_nStartContent = 0;
2771  else
2772  {
2773  OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2774  delete pAttr;
2775  pAttr = pPrev;
2776  continue;
2777  }
2778  }
2779  pAttrPam->GetPoint()->nNode = pAttr->m_nStartPara;
2780 
2781  // because of the deleting of BRs the start index can also
2782  // point behind the end the text
2783  if( pAttr->m_nStartContent > pCNd->Len() )
2784  pAttr->m_nStartContent = pCNd->Len();
2785  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent );
2786 
2787  pAttrPam->SetMark();
2788  if ( (pAttr->GetSttPara() != pAttr->GetEndPara()) &&
2789  !isTXTATR_NOEND(nWhich) )
2790  {
2791  pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2792  if( !pCNd )
2793  {
2794  pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2795  if( pCNd )
2796  pAttr->m_nEndContent = pCNd->Len();
2797  else
2798  {
2799  OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2800  pAttrPam->DeleteMark();
2801  delete pAttr;
2802  pAttr = pPrev;
2803  continue;
2804  }
2805  }
2806 
2807  pAttrPam->GetPoint()->nNode = pAttr->m_nEndPara;
2808  }
2809  else if( pAttr->IsLikePara() )
2810  {
2811  pAttr->m_nEndContent = pCNd->Len();
2812  }
2813 
2814  // because of the deleting of BRs the start index can also
2815  // point behind the end the text
2816  if( pAttr->m_nEndContent > pCNd->Len() )
2817  pAttr->m_nEndContent = pCNd->Len();
2818 
2819  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent );
2820  if( bBeforeTable &&
2821  pAttrPam->GetPoint()->nNode.GetIndex() ==
2822  rEndIdx.GetIndex() )
2823  {
2824  // If we're before inserting a table and the attribute ends
2825  // in the current node, then we must end it in the previous
2826  // node or discard it, if it starts in that node.
2827  if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2828  !isTXTATR_NOEND(nWhich) )
2829  {
2830  if( pAttrPam->GetMark()->nNode.GetIndex() !=
2831  rEndIdx.GetIndex() )
2832  {
2833  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
2834  "Content-Position before table not 0???" );
2835  pAttrPam->Move( fnMoveBackward );
2836  }
2837  else
2838  {
2839  pAttrPam->DeleteMark();
2840  delete pAttr;
2841  pAttr = pPrev;
2842  continue;
2843  }
2844  }
2845  }
2846 
2847  switch( nWhich )
2848  {
2849  case RES_FLTR_BOOKMARK: // insert bookmark
2850  {
2851  const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2852  IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2853  IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2854  if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2855  ppBkmk->get()->GetMarkStart() == *pAttrPam->GetPoint() )
2856  break; // do not generate duplicates on this position
2857  pAttrPam->DeleteMark();
2858  const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2859  *pAttrPam,
2860  sName,
2863 
2864  // jump to bookmark
2865  if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2866  {
2867  m_bChkJumpMark = true;
2869  }
2870  }
2871  break;
2872  case RES_TXTATR_FIELD:
2873  case RES_TXTATR_ANNOTATION:
2874  case RES_TXTATR_INPUTFIELD:
2875  {
2876  SwFieldIds nFieldWhich =
2877  pPostIts
2878  ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2880  if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2881  SwFieldIds::Script == nFieldWhich) )
2882  {
2883  pPostIts->emplace_front( pAttr );
2884  }
2885  else
2886  {
2887  aFields.emplace_back( pAttr);
2888  }
2889  }
2890  pAttrPam->DeleteMark();
2891  pAttr = pPrev;
2892  continue;
2893 
2894  case RES_LR_SPACE:
2895  if( pAttrPam->GetPoint()->nNode.GetIndex() ==
2896  pAttrPam->GetMark()->nNode.GetIndex())
2897  {
2898  // because of numbering set this attribute directly at node
2899  pCNd->SetAttr( *pAttr->m_pItem );
2900  break;
2901  }
2902  OSL_ENSURE( false,
2903  "LRSpace set over multiple paragraphs!" );
2904  [[fallthrough]]; // (shouldn't reach this point anyway)
2905 
2906  // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2907  // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2908  // This is the right place in the future if the adapted fill attributes
2909  // may be handled more directly in HTML import to handle them.
2910  case RES_BACKGROUND:
2911  {
2912  const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2914 
2916  m_xDoc->getIDocumentContentOperations().InsertItemSet(*pAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2917  break;
2918  }
2919  default:
2920 
2921  // maybe jump to a bookmark
2922  if( RES_TXTATR_INETFMT == nWhich &&
2924  m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2925  {
2926  m_bChkJumpMark = true;
2928  }
2929 
2930  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2931  }
2932  pAttrPam->DeleteMark();
2933 
2934  delete pAttr;
2935  pAttr = pPrev;
2936  }
2937  }
2938  }
2939 
2940  for( auto n = m_aMoveFlyFrames.size(); n; )
2941  {
2942  SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[ --n ];
2943 
2944  const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
2945  OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
2946  "Only At-Para flys need special handling" );
2947  const SwPosition *pFlyPos = rAnchor.GetContentAnchor();
2948  sal_uLong nFlyParaIdx = pFlyPos->nNode.GetIndex();
2949  bool bMoveFly;
2950  if( bChkEnd )
2951  {
2952  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2953  ( nFlyParaIdx == rEndIdx.GetIndex() &&
2954  m_aMoveFlyCnts[n] < nEndCnt );
2955  }
2956  else
2957  {
2958  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2959  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2960  rEndIdx.GetIndex() > nEndOfIcons ||
2961  nFlyParaIdx <= nEndOfIcons;
2962  }
2963  if( bMoveFly )
2964  {
2965  pFrameFormat->DelFrames();
2966  *pAttrPam->GetPoint() = *pFlyPos;
2967  pAttrPam->GetPoint()->nContent.Assign( pAttrPam->GetContentNode(),
2968  m_aMoveFlyCnts[n] );
2969  SwFormatAnchor aAnchor( rAnchor );
2970  aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
2971  aAnchor.SetAnchor( pAttrPam->GetPoint() );
2972  pFrameFormat->SetFormatAttr( aAnchor );
2973 
2974  const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
2975  if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
2976  {
2977  SwFormatHoriOrient aHoriOri( rHoriOri );
2978  aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
2979  pFrameFormat->SetFormatAttr( aHoriOri );
2980  }
2981  const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
2982  if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
2983  {
2984  SwFormatVertOrient aVertOri( rVertOri );
2985  aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
2986  pFrameFormat->SetFormatAttr( aVertOri );
2987  }
2988 
2989  pFrameFormat->MakeFrames();
2990  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
2991  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
2992  }
2993  }
2994  for (auto & field : aFields)
2995  {
2996  pCNd = field->m_nStartPara.GetNode().GetContentNode();
2997  pAttrPam->GetPoint()->nNode = field->m_nStartPara;
2998  pAttrPam->GetPoint()->nContent.Assign( pCNd, field->m_nStartContent );
2999 
3000  if( bBeforeTable &&
3001  pAttrPam->GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() )
3002  {
3003  OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3004  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
3005  "Content-Position before table not 0???" );
3006  // !!!
3007  pAttrPam->Move( fnMoveBackward );
3008  }
3009 
3010  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *field->m_pItem );
3011 
3012  field.reset();
3013  }
3014  aFields.clear();
3015 }
3016 
3017 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3018 {
3019  // Font height and font colour as well as escape attributes may not be
3020  // combined. Therefore they're saved in a list and in it the last opened
3021  // attribute is at the beginning and count is always one. For all other
3022  // attributes count is just incremented.
3023  if( *ppAttr )
3024  {
3025  HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3026  pAttr->InsertNext( *ppAttr );
3027  (*ppAttr) = pAttr;
3028  }
3029  else
3030  (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3031 }
3032 
3033 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3034 {
3035  bool bRet = true;
3036 
3037  // The list header is saved in the attribute.
3038  HTMLAttr **ppHead = pAttr->m_ppHead;
3039 
3040  OSL_ENSURE( ppHead, "No list header attribute found!" );
3041 
3042  // save the current position as end position
3043  const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode;
3044  sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3045 
3046  // Is the last started or an earlier started attribute being ended?
3047  HTMLAttr *pLast = nullptr;
3048  if( ppHead && pAttr != *ppHead )
3049  {
3050  // The last started attribute isn't being ended
3051 
3052  // Then we look for attribute which was started immediately afterwards,
3053  // which has also not yet been ended (otherwise it would no longer be
3054  // in the list).
3055  pLast = *ppHead;
3056  while( pLast && pLast->GetNext() != pAttr )
3057  pLast = pLast->GetNext();
3058 
3059  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3060  }
3061 
3062  bool bMoveBack = false;
3063  sal_uInt16 nWhich = pAttr->m_pItem->Which();
3064  if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3065  *pEndIdx != pAttr->GetSttPara() )
3066  {
3067  // Then move back one position in the content!
3068  bMoveBack = m_pPam->Move( fnMoveBackward );
3069  nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3070  }
3071 
3072  // now end the attribute
3073  HTMLAttr *pNext = pAttr->GetNext();
3074 
3075  bool bInsert;
3076  sal_uInt16 nScriptItem = 0;
3077  bool bScript = false;
3078  // does it have a non-empty range?
3079  if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3080  RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3081  *pEndIdx != pAttr->GetSttPara() ||
3082  nEndCnt != pAttr->GetSttCnt() )
3083  {
3084  bInsert = true;
3085  // We do some optimization for script dependent attributes here.
3086  if( *pEndIdx == pAttr->GetSttPara() )
3087  {
3088  lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3089  }
3090  }
3091  else
3092  {
3093  bInsert = false;
3094  }
3095 
3096  const SwTextNode *pTextNd = (bInsert && bScript) ?
3097  pAttr->GetSttPara().GetNode().GetTextNode() :
3098  nullptr;
3099 
3100  if (pTextNd)
3101  {
3102  const OUString& rText = pTextNd->GetText();
3103  sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3104  rText, pAttr->GetSttCnt() );
3105  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3106  ->endOfScript( rText, pAttr->GetSttCnt(), nScriptText );
3107  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3108  {
3109  if( nScriptItem == nScriptText )
3110  {
3111  HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd );
3112  pSetAttr->ClearPrev();
3113  if( pNext )
3114  pNext->InsertPrev( pSetAttr );
3115  else
3116  {
3117  if (pSetAttr->m_bInsAtStart)
3118  m_aSetAttrTab.push_front( pSetAttr );
3119  else
3120  m_aSetAttrTab.push_back( pSetAttr );
3121  }
3122  }
3123  pAttr->m_nStartContent = nScriptEnd;
3124  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3125  rText, nScriptEnd );
3126  nScriptEnd = g_pBreakIt->GetBreakIter()
3127  ->endOfScript( rText, nScriptEnd, nScriptText );
3128  }
3129  bInsert = nScriptItem == nScriptText;
3130  }
3131  if( bInsert )
3132  {
3133  pAttr->m_nEndPara = *pEndIdx;
3134  pAttr->m_nEndContent = nEndCnt;
3135  pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3136  RES_TXTATR_CHARFMT != nWhich;
3137 
3138  if( !pNext )
3139  {
3140  // No open attributes of that type exists any longer, so all
3141  // can be set. Except they depend on another attribute, then
3142  // they're appended there.
3143  if (pAttr->m_bInsAtStart)
3144  m_aSetAttrTab.push_front( pAttr );
3145  else
3146  m_aSetAttrTab.push_back( pAttr );
3147  }
3148  else
3149  {
3150  // There are other open attributes of that type,
3151  // therefore the setting must be postponed.
3152  // Hence the current attribute is added at the end
3153  // of the Prev-List of the successor.
3154  pNext->InsertPrev( pAttr );
3155  }
3156  }
3157  else
3158  {
3159  // Then don't insert, but delete. Because of the "faking" of styles
3160  // by hard attributing there can be also other empty attributes in the
3161  // Prev-List, which must be set anyway.
3162  HTMLAttr *pPrev = pAttr->GetPrev();
3163  bRet = false;
3164  delete pAttr;
3165 
3166  if( pPrev )
3167  {
3168  // The previous attributes must be set anyway.
3169  if( pNext )
3170  pNext->InsertPrev( pPrev );
3171  else
3172  {
3173  if (pPrev->m_bInsAtStart)
3174  m_aSetAttrTab.push_front( pPrev );
3175  else
3176  m_aSetAttrTab.push_back( pPrev );
3177  }
3178  }
3179 
3180  }
3181 
3182  // If the first attribute of the list was set, then the list header
3183  // must be corrected as well.
3184  if( pLast )
3185  pLast->m_pNext = pNext;
3186  else if( ppHead )
3187  *ppHead = pNext;
3188 
3189  if( bMoveBack )
3191 
3192  return bRet;
3193 }
3194 
3196 {
3197  // preliminary paragraph attributes are not allowed here, they could
3198  // be set here and then the pointers become invalid!
3199  OSL_ENSURE(m_aParaAttrs.empty(),
3200  "Danger: there are non-final paragraph attributes");
3201  m_aParaAttrs.clear();
3202 
3203  // The list header is saved in the attribute
3204  HTMLAttr **ppHead = pAttr->m_ppHead;
3205 
3206  OSL_ENSURE( ppHead, "no list header attribute found!" );
3207 
3208  // Is the last started or an earlier started attribute being removed?
3209  HTMLAttr *pLast = nullptr;
3210  if( ppHead && pAttr != *ppHead )
3211  {
3212  // The last started attribute isn't being ended
3213 
3214  // Then we look for attribute which was started immediately afterwards,
3215  // which has also not yet been ended (otherwise it would no longer be
3216  // in the list).
3217  pLast = *ppHead;
3218  while( pLast && pLast->GetNext() != pAttr )
3219  pLast = pLast->GetNext();
3220 
3221  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3222  }
3223 
3224  // now delete the attribute
3225  HTMLAttr *pNext = pAttr->GetNext();
3226  HTMLAttr *pPrev = pAttr->GetPrev();
3227  //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3228  std::shared_ptr<HTMLAttrTable> xAttrTab(pAttr->m_xAttrTab);
3229  delete pAttr;
3230 
3231  if( pPrev )
3232  {
3233  // The previous attributes must be set anyway.
3234  if( pNext )
3235  pNext->InsertPrev( pPrev );
3236  else
3237  {
3238  if (pPrev->m_bInsAtStart)
3239  m_aSetAttrTab.push_front( pPrev );
3240  else
3241  m_aSetAttrTab.push_back( pPrev );
3242  }
3243  }
3244 
3245  // If the first attribute of the list was deleted, then the list header
3246  // must be corrected as well.
3247  if( pLast )
3248  pLast->m_pNext = pNext;
3249  else if( ppHead )
3250  *ppHead = pNext;
3251 }
3252 
3253 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3254 {
3255  // preliminary paragraph attributes are not allowed here, they could
3256  // be set here and then the pointers become invalid!
3257  OSL_ENSURE(m_aParaAttrs.empty(),
3258  "Danger: there are non-final paragraph attributes");
3259  m_aParaAttrs.clear();
3260 
3261  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3262  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3263 
3264  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3265  {
3266  *pSaveAttributes = *pHTMLAttributes;
3267 
3268  HTMLAttr *pAttr = *pSaveAttributes;
3269  while (pAttr)
3270  {
3271  pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3272  pAttr = pAttr->GetNext();
3273  }
3274 
3275  *pHTMLAttributes = nullptr;
3276  }
3277 }
3278 
3279 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3280  bool bMoveEndBack )
3281 {
3282  // preliminary paragraph attributes are not allowed here, they could
3283  // be set here and then the pointers become invalid!
3284  OSL_ENSURE(m_aParaAttrs.empty(),
3285  "Danger: there are non-final paragraph attributes");
3286  m_aParaAttrs.clear();
3287 
3288  const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode;
3289  SwNodeIndex nEndIdx( nSttIdx );
3290 
3291  // close all still open attributes and re-open them after the table
3292  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3293  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3294  bool bSetAttr = true;
3295  const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex();
3296  sal_Int32 nEndCnt = nSttCnt;
3297 
3298  if( bMoveEndBack )
3299  {
3300  sal_uLong nOldEnd = nEndIdx.GetIndex();
3301  sal_uLong nTmpIdx;
3302  if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3303  ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3304  {
3305  nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3306  }
3307  SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3308 
3309  // Don't set attributes, when the PaM was moved outside of the content area.
3310  bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3311 
3312  nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3313  }
3314  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3315  {
3316  HTMLAttr *pAttr = *pHTMLAttributes;
3317  *pSaveAttributes = nullptr;
3318  while( pAttr )
3319  {
3320  HTMLAttr *pNext = pAttr->GetNext();
3321  HTMLAttr *pPrev = pAttr->GetPrev();
3322 
3323  if( bSetAttr &&
3324  ( pAttr->GetSttParaIdx() < nEndIdx.GetIndex() ||
3325  (pAttr->GetSttPara() == nEndIdx &&
3326  pAttr->GetSttCnt() != nEndCnt) ) )
3327  {
3328  // The attribute must be set before the list. We need the
3329  // original and therefore we clone it, because pointer to the
3330  // attribute exist in the other contexts. The Next-List is lost
3331  // in doing so, but the Previous-List is preserved.
3332  HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt );
3333 
3334  if( pNext )
3335  pNext->InsertPrev( pSetAttr );
3336  else
3337  {
3338  if (pSetAttr->m_bInsAtStart)
3339  m_aSetAttrTab.push_front( pSetAttr );
3340  else
3341  m_aSetAttrTab.push_back( pSetAttr );
3342  }
3343  }
3344  else if( pPrev )
3345  {
3346  // If the attribute doesn't need to be set before the table, then
3347  // the previous attributes must still be set.
3348  if( pNext )
3349  pNext->InsertPrev( pPrev );
3350  else
3351  {
3352  if (pPrev->m_bInsAtStart)
3353  m_aSetAttrTab.push_front( pPrev );
3354  else
3355  m_aSetAttrTab.push_back( pPrev );
3356  }
3357  }
3358 
3359  // set the start of the attribute anew and break link
3360  pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab);
3361 
3362  if (*pSaveAttributes)
3363  {
3364  HTMLAttr *pSAttr = *pSaveAttributes;
3365  while( pSAttr->GetNext() )
3366  pSAttr = pSAttr->GetNext();
3367  pSAttr->InsertNext( pAttr );
3368  }
3369  else
3370  *pSaveAttributes = pAttr;
3371 
3372  pAttr = pNext;
3373  }
3374 
3375  *pHTMLAttributes = nullptr;
3376  }
3377 }
3378 
3379 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3380 {
3381  // preliminary paragraph attributes are not allowed here, they could
3382  // be set here and then the pointers become invalid!
3383  OSL_ENSURE(m_aParaAttrs.empty(),
3384  "Danger: there are non-final paragraph attributes");
3385  m_aParaAttrs.clear();
3386 
3387  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3388  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3389 
3390  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3391  {
3392  OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3393 
3394  *pHTMLAttributes = *pSaveAttributes;
3395 
3396  HTMLAttr *pAttr = *pHTMLAttributes;
3397  while (pAttr)
3398  {
3399  OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3400  "Previous attribute has still a header" );
3401  pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3402  pAttr = pAttr->GetNext();
3403  }
3404 
3405  *pSaveAttributes = nullptr;
3406  }
3407 }
3408 
3409 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3410 {
3411  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3412  if (bInsAtStart)
3413  m_aSetAttrTab.push_front( pTmp );
3414  else
3415  m_aSetAttrTab.push_back( pTmp );
3416 }
3417 
3418 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3419 {
3420  while( !rAttrs.empty() )
3421  {
3422  std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3423  InsertAttr( pAttr->GetItem(), false );
3424  rAttrs.pop_front();
3425  }
3426 }
3427 
3429 {
3430  OUString aId, aStyle, aLang, aDir;
3431  OUString aClass;
3432 
3433  const HTMLOptions& rHTMLOptions = GetOptions();
3434  for (size_t i = rHTMLOptions.size(); i; )
3435  {
3436  const HTMLOption& rOption = rHTMLOptions[--i];
3437  switch( rOption.GetToken() )
3438  {
3439  case HtmlOptionId::ID:
3440  aId = rOption.GetString();
3441  break;
3442  case HtmlOptionId::STYLE:
3443  aStyle = rOption.GetString();
3444  break;
3445  case HtmlOptionId::CLASS:
3446  aClass = rOption.GetString();
3447  break;
3448  case HtmlOptionId::LANG:
3449  aLang = rOption.GetString();
3450  break;
3451  case HtmlOptionId::DIR:
3452  aDir = rOption.GetString();
3453  break;
3454  default: break;
3455  }
3456  }
3457 
3458  // create a new context
3459  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3460 
3461  // parse styles
3462  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3463  {
3464  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3465  SvxCSS1PropertyInfo aPropInfo;
3466 
3467  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3468  {
3469  if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3470  !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3471  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3472  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3473  }
3474  }
3475 
3476  // save the context
3477  PushContext(xCntxt);
3478 }
3479 
3481  HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3482  HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3483  HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3484 {
3485  OUString aId, aStyle, aClass, aLang, aDir;
3486 
3487  const HTMLOptions& rHTMLOptions = GetOptions();
3488  for (size_t i = rHTMLOptions.size(); i; )
3489  {
3490  const HTMLOption& rOption = rHTMLOptions[--i];
3491  switch( rOption.GetToken() )
3492  {
3493  case HtmlOptionId::ID:
3494  aId = rOption.GetString();
3495  break;
3496  case HtmlOptionId::STYLE:
3497  aStyle = rOption.GetString();
3498  break;
3499  case HtmlOptionId::CLASS:
3500  aClass = rOption.GetString();
3501  break;
3502  case HtmlOptionId::LANG:
3503  aLang = rOption.GetString();
3504  break;
3505  case HtmlOptionId::DIR:
3506  aDir = rOption.GetString();
3507  break;
3508  default: break;
3509  }
3510  }
3511 
3512  // create a new context
3513  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3514 
3515  // parse styles
3516  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3517  {
3518  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3519  SvxCSS1PropertyInfo aPropInfo;
3520 
3521  aItemSet.Put( rItem );
3522  if( pItem2 )
3523  aItemSet.Put( *pItem2 );
3524  if( pItem3 )
3525  aItemSet.Put( *pItem3 );
3526 
3527  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3528  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3529 
3530  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3531  }
3532  else
3533  {
3534  InsertAttr( ppAttr ,rItem, xCntxt.get() );
3535  if( pItem2 )
3536  {
3537  OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3538  InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3539  }
3540  if( pItem3 )
3541  {
3542  OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3543  InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3544  }
3545  }
3546 
3547  // save the context
3548  PushContext(xCntxt);
3549 }
3550 
3552 {
3553  // fetch context
3554  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3555  if (xCntxt)
3556  {
3557  // and maybe end the attributes
3558  EndContext(xCntxt.get());
3559  }
3560 }
3561 
3563 {
3564  OUString aId, aStyle, aClass, aLang, aDir;
3565  sal_uInt16 nSize = 3;
3566 
3567  const HTMLOptions& rHTMLOptions = GetOptions();
3568  for (size_t i = rHTMLOptions.size(); i; )
3569  {
3570  const HTMLOption& rOption = rHTMLOptions[--i];
3571  switch( rOption.GetToken() )
3572  {
3573  case HtmlOptionId::SIZE:
3574  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
3575  break;
3576  case HtmlOptionId::ID:
3577  aId = rOption.GetString();
3578  break;
3579  case HtmlOptionId::STYLE:
3580  aStyle = rOption.GetString();
3581  break;
3582  case HtmlOptionId::CLASS:
3583  aClass = rOption.GetString();
3584  break;
3585  case HtmlOptionId::LANG:
3586  aLang = rOption.GetString();
3587  break;
3588  case HtmlOptionId::DIR:
3589  aDir = rOption.GetString();
3590  break;
3591  default: break;
3592  }
3593  }
3594 
3595  if( nSize < 1 )
3596  nSize = 1;
3597 
3598  if( nSize > 7 )
3599  nSize = 7;
3600 
3601  // create a new context
3602  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3603 
3604  // parse styles
3605  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3606  {
3607  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3608  SvxCSS1PropertyInfo aPropInfo;
3609 
3610  //CJK has different defaults
3611  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3612  aItemSet.Put( aFontHeight );
3613  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3614  aItemSet.Put( aFontHeightCJK );
3615  //Complex type can contain so many types of letters,
3616  //that it's not really worthy to bother, IMO.
3617  //Still, I have set a default.
3618  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3619  aItemSet.Put( aFontHeightCTL );
3620 
3621  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3622  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3623 
3624  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3625  }
3626  else
3627  {
3628  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3629  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3630  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3631  InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3632  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3633  InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3634  }
3635 
3636  // save the context
3637  PushContext(xCntxt);
3638 
3639  // save the font size
3640  m_aBaseFontStack.push_back( nSize );
3641 }
3642 
3644 {
3645  EndTag( HtmlTokenId::BASEFONT_ON );
3646 
3647  // avoid stack underflow in tables
3648  if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3649  m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3650 }
3651 
3653 {
3654  sal_uInt16 nBaseSize =
3657  : 3 );
3658  sal_uInt16 nFontSize =
3659  ( m_aFontStack.size() > m_nFontStMin
3660  ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3661  : nBaseSize );
3662 
3663  OUString aFace, aId, aStyle, aClass, aLang, aDir;
3664  Color aColor;
3665  sal_uLong nFontHeight = 0; // actual font height to set
3666  sal_uInt16 nSize = 0; // font height in Netscape notation (1-7)
3667  bool bColor = false;
3668 
3669  const HTMLOptions& rHTMLOptions = GetOptions();
3670  for (size_t i = rHTMLOptions.size(); i; )
3671  {
3672  const HTMLOption& rOption = rHTMLOptions[--i];
3673  switch( rOption.GetToken() )
3674  {
3675  case HtmlOptionId::SIZE:
3676  if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3677  {
3678  sal_Int32 nSSize;
3679  if( '+' == rOption.GetString()[0] ||
3680  '-' == rOption.GetString()[0] )
3681  nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3682  else
3683  nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3684 
3685  if( nSSize < 1 )
3686  nSSize = 1;
3687  else if( nSSize > 7 )
3688  nSSize = 7;
3689 
3690  nSize = static_cast<sal_uInt16>(nSSize);
3691  nFontHeight = m_aFontHeights[nSize-1];
3692  }
3693  break;
3694  case HtmlOptionId::COLOR:
3695  if( HtmlTokenId::FONT_ON==nToken )
3696  {
3697  rOption.GetColor( aColor );
3698  bColor = true;
3699  }
3700  break;
3701  case HtmlOptionId::FACE:
3702  if( HtmlTokenId::FONT_ON==nToken )
3703  aFace = rOption.GetString();
3704  break;
3705  case HtmlOptionId::ID:
3706  aId = rOption.GetString();
3707  break;
3708  case HtmlOptionId::STYLE:
3709  aStyle = rOption.GetString();
3710  break;
3711  case HtmlOptionId::CLASS:
3712  aClass = rOption.GetString();
3713  break;
3714  case HtmlOptionId::LANG:
3715  aLang = rOption.GetString();
3716  break;
3717  case HtmlOptionId::DIR:
3718  aDir = rOption.GetString();
3719  break;
3720  default: break;
3721  }
3722  }
3723 
3724  if( HtmlTokenId::FONT_ON != nToken )
3725  {
3726  // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3727 
3728  // In headings the current heading sets the font height
3729  // and not BASEFONT.
3730  const SwFormatColl *pColl = GetCurrFormatColl();
3731  sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3732  if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3733  nPoolId<=RES_POOLCOLL_HEADLINE6 )
3734  {
3735  // If the font height in the heading wasn't changed yet,
3736  // then take the one from the style.
3737  if( m_nFontStHeadStart==m_aFontStack.size() )
3738  nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3739  }
3740  else
3741  nPoolId = 0;
3742 
3743  if( HtmlTokenId::BIGPRINT_ON == nToken )
3744  nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3745  else
3746  nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3747 
3748  // If possible in headlines we fetch the new font height
3749  // from the style.
3750  if( nPoolId && nSize>=1 && nSize <=6 )
3751  nFontHeight =
3752  m_pCSS1Parser->GetTextCollFromPool(
3753  RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3754  else
3755  nFontHeight = m_aFontHeights[nSize-1];
3756  }
3757 
3758  OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3759 
3760  OUString aFontName, aStyleName;
3761  FontFamily eFamily = FAMILY_DONTKNOW; // family and pitch,
3762  FontPitch ePitch = PITCH_DONTKNOW; // if not found
3763  rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3764 
3765  if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3766  {
3767  const FontList *pFList = nullptr;
3768  SwDocShell *pDocSh = m_xDoc->GetDocShell();
3769  if( pDocSh )
3770  {
3771  const SvxFontListItem *pFListItem =
3772  static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3773  if( pFListItem )
3774  pFList = pFListItem->GetFontList();
3775  }
3776 
3777  bool bFound = false;
3778  sal_Int32 nStrPos = 0;
3779  while( nStrPos!= -1 )
3780  {
3781  OUString aFName = aFace.getToken( 0, ',', nStrPos );
3782  aFName = comphelper::string::strip(aFName, ' ');
3783  if( !aFName.isEmpty() )
3784  {
3785  if( !bFound && pFList )
3786  {
3787  sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3788  if( nullptr != hFont )
3789  {
3790  const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3791  if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3792  {
3793  bFound = true;
3794  if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3795  eEnc = RTL_TEXTENCODING_SYMBOL;
3796  }
3797  }
3798  }
3799  if( !aFontName.isEmpty() )
3800  aFontName += ";";
3801  aFontName += aFName;
3802  }
3803  }
3804  }
3805 
3806  // create a new context
3807  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3808 
3809  // parse styles
3810  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3811  {
3812  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3813  SvxCSS1PropertyInfo aPropInfo;
3814 
3815  if( nFontHeight )
3816  {
3817  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3818  aItemSet.Put( aFontHeight );
3819  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3820  aItemSet.Put( aFontHeightCJK );
3821  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3822  aItemSet.Put( aFontHeightCTL );
3823  }
3824  if( bColor )
3825  aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3826  if( !aFontName.isEmpty() )
3827  {
3828  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3829  aItemSet.Put( aFont );
3830  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3831  aItemSet.Put( aFontCJK );
3832  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3833  aItemSet.Put( aFontCTL );
3834  }
3835 
3836  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3837  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3838 
3839  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3840  }
3841  else
3842  {
3843  if( nFontHeight )
3844  {
3845  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3846  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3847  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3848  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3849  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3850  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3851  }
3852  if( bColor )
3853  InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3854  if( !aFontName.isEmpty() )
3855  {
3856  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3857  InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3858  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3859  InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3860  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3861  InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3862  }
3863  }
3864 
3865  // save the context
3866  PushContext(xCntxt);
3867 
3868  m_aFontStack.push_back( nSize );
3869 }
3870 
3872 {
3873  EndTag( nToken );
3874 
3875  // avoid stack underflow in tables
3876  if( m_aFontStack.size() > m_nFontStMin )
3877  m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3878 }
3879 
3881 {
3882  if( m_pPam->GetPoint()->nContent.GetIndex() )
3884  else
3885  AddParSpace();
3886 
3887  m_eParaAdjust = SvxAdjust::End;
3888  OUString aId, aStyle, aClass, aLang, aDir;
3889 
3890  const HTMLOptions& rHTMLOptions = GetOptions();
3891  for (size_t i = rHTMLOptions.size(); i; )
3892  {
3893  const HTMLOption& rOption = rHTMLOptions[--i];
3894  switch( rOption.GetToken() )
3895  {
3896  case HtmlOptionId::ID:
3897  aId = rOption.GetString();
3898  break;
3899  case HtmlOptionId::ALIGN:
3900  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3901  break;
3902  case HtmlOptionId::STYLE:
3903  aStyle = rOption.GetString();
3904  break;
3905  case HtmlOptionId::CLASS:
3906  aClass = rOption.GetString();
3907  break;
3908  case HtmlOptionId::LANG:
3909  aLang = rOption.GetString();
3910  break;
3911  case HtmlOptionId::DIR:
3912  aDir = rOption.GetString();
3913  break;
3914  default: break;
3915  }
3916  }
3917 
3918  // create a new context
3919  std::unique_ptr<HTMLAttrContext> xCntxt(
3920  !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3921  RES_POOLCOLL_TEXT, aClass )
3922  : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3923 
3924  // parse styles (Don't consider class. This is only possible as long as none of
3925  // the CSS1 properties of the class must be formatted hard!!!)
3926  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
3927  {
3928  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3929  SvxCSS1PropertyInfo aPropInfo;
3930 
3931  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
3932  {
3933  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
3934  "Class is not considered" );
3935  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3936  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
3937  }
3938  }
3939 
3940  if( SvxAdjust::End != m_eParaAdjust )
3941  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
3942 
3943  // and push on stack
3944  PushContext( xCntxt );
3945 
3946  // set the current style or its attributes
3947  SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
3948 
3949  // progress bar
3950  ShowStatline();
3951 
3952  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now a open paragraph element will be lost." );
3953  m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
3954 }
3955 
3956 void SwHTMLParser::EndPara( bool bReal )
3957 {
3958  if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
3959  {
3960 #if OSL_DEBUG_LEVEL > 0
3961  const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule();
3962  OSL_ENSURE( pNumRule, "Where is the NumRule" );
3963 #endif
3964  }
3965 
3966  // Netscape skips empty paragraphs, we do the same.
3967  if( bReal )
3968  {
3969  if( m_pPam->GetPoint()->nContent.GetIndex() )
3971  else
3972  AddParSpace();
3973  }
3974 
3975  // If a DD or DT was open, it's an implied definition list,
3976  // which must be closed now.
3977  if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
3979  {
3980  m_nDefListDeep--;
3981  }
3982 
3983  // Pop the context of the stack. It can also be from an
3984  // implied opened definition list.
3985  std::unique_ptr<HTMLAttrContext> xCntxt(
3986  PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
3987 
3988  // close attribute
3989  if (xCntxt)
3990  {
3991  EndContext(xCntxt.get());
3992  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
3993  xCntxt.reset();
3994  }
3995 
3996  // reset the existing style
3997  if( bReal )
3998  SetTextCollAttrs();
3999 
4000  m_nOpenParaToken = HtmlTokenId::NONE;
4001 }
4002 
4004 {
4005  m_eParaAdjust = SvxAdjust::End;
4006 
4007  OUString aId, aStyle, aClass, aLang, aDir;
4008 
4009  const HTMLOptions& rHTMLOptions = GetOptions();
4010  for (size_t i = rHTMLOptions.size(); i; )
4011  {
4012  const HTMLOption& rOption = rHTMLOptions[--i];
4013  switch( rOption.GetToken() )
4014  {
4015  case HtmlOptionId::ID:
4016  aId = rOption.GetString();
4017  break;
4018  case HtmlOptionId::ALIGN:
4019  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4020  break;
4021  case HtmlOptionId::STYLE:
4022  aStyle = rOption.GetString();
4023  break;
4024  case HtmlOptionId::CLASS:
4025  aClass = rOption.GetString();
4026  break;
4027  case HtmlOptionId::LANG:
4028  aLang = rOption.GetString();
4029  break;
4030  case HtmlOptionId::DIR:
4031  aDir = rOption.GetString();
4032  break;
4033  default: break;
4034  }
4035  }
4036 
4037  // open a new paragraph
4038  if( m_pPam->GetPoint()->nContent.GetIndex() )
4040  else
4041  AddParSpace();
4042 
4043  // search for the matching style
4044  sal_uInt16 nTextColl;
4045  switch( nToken )
4046  {
4047  case HtmlTokenId::HEAD1_ON: nTextColl = RES_POOLCOLL_HEADLINE1; break;
4048  case HtmlTokenId::HEAD2_ON: nTextColl = RES_POOLCOLL_HEADLINE2; break;
4049  case HtmlTokenId::HEAD3_ON: nTextColl = RES_POOLCOLL_HEADLINE3; break;
4050  case HtmlTokenId::HEAD4_ON: nTextColl = RES_POOLCOLL_HEADLINE4; break;
4051  case HtmlTokenId::HEAD5_ON: nTextColl = RES_POOLCOLL_HEADLINE5; break;
4052  case HtmlTokenId::HEAD6_ON: nTextColl = RES_POOLCOLL_HEADLINE6; break;
4053  default: nTextColl = RES_POOLCOLL_STANDARD; break;
4054  }
4055 
4056  // create the context
4057  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4058 
4059  // parse styles (regarding class see also NewPara)
4060  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4061  {
4062  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4063  SvxCSS1PropertyInfo aPropInfo;
4064 
4065  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4066  {
4067  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4068  "Class is not considered" );
4069  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4070  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4071  }
4072  }
4073 
4074  if( SvxAdjust::End != m_eParaAdjust )
4075  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4076 
4077  // and push on stack
4078  PushContext(xCntxt);
4079 
4080  // set the current style or its attributes
4081  SetTextCollAttrs(m_aContexts.back().get());
4082 
4084 
4085  // progress bar
4086  ShowStatline();
4087 }
4088 
4090 {
4091  // open a new paragraph
4092  if( m_pPam->GetPoint()->nContent.GetIndex() )
4094  else
4095  AddParSpace();
4096 
4097  // search context matching the token and fetch it from stack
4098  std::unique_ptr<HTMLAttrContext> xCntxt;
4099  auto nPos = m_aContexts.size();
4100  while( !xCntxt && nPos>m_nContextStMin )
4101  {
4102  switch( m_aContexts[--nPos]->GetToken() )
4103  {
4104  case HtmlTokenId::HEAD1_ON:
4105  case HtmlTokenId::HEAD2_ON:
4106  case HtmlTokenId::HEAD3_ON:
4107  case HtmlTokenId::HEAD4_ON:
4108  case HtmlTokenId::HEAD5_ON:
4109  case HtmlTokenId::HEAD6_ON:
4110  xCntxt = std::move(m_aContexts[nPos]);
4111  m_aContexts.erase( m_aContexts.begin() + nPos );
4112  break;
4113  default: break;
4114  }
4115  }
4116 
4117  // and now end attributes
4118  if (xCntxt)
4119  {
4120  EndContext(xCntxt.get());
4121  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4122  xCntxt.reset();
4123  }
4124 
4125  // reset existing style
4126  SetTextCollAttrs();
4127 
4129 }
4130 
4131 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4132 {
4133  OUString aId, aStyle, aClass, aLang, aDir;
4134 
4135  const HTMLOptions& rHTMLOptions = GetOptions();
4136  for (size_t i = rHTMLOptions.size(); i; )
4137  {
4138  const HTMLOption& rOption = rHTMLOptions[--i];
4139  switch( rOption.GetToken() )
4140  {
4141  case HtmlOptionId::ID:
4142  aId = rOption.GetString();
4143  break;
4144  case HtmlOptionId::STYLE:
4145  aStyle = rOption.GetString();
4146  break;
4147  case HtmlOptionId::CLASS:
4148  aClass = rOption.GetString();
4149  break;
4150  case HtmlOptionId::LANG:
4151  aLang = rOption.GetString();
4152  break;
4153  case HtmlOptionId::DIR:
4154  aDir = rOption.GetString();
4155  break;
4156  default: break;
4157  }
4158  }
4159 
4160  // open a new paragraph
4161  SwHTMLAppendMode eMode = AM_NORMAL;
4162  switch( nToken )
4163  {
4164  case HtmlTokenId::LISTING_ON:
4165  case HtmlTokenId::XMP_ON:
4166  // These both tags will be mapped to the PRE style. For the case that a
4167  // a CLASS exists we will delete it so that we don't get the CLASS of
4168  // the PRE style.
4169  aClass.clear();
4170  [[fallthrough]];
4171  case HtmlTokenId::BLOCKQUOTE_ON:
4172  case HtmlTokenId::BLOCKQUOTE30_ON:
4173  case HtmlTokenId::PREFORMTXT_ON:
4174  eMode = AM_SPACE;
4175  break;
4176  case HtmlTokenId::ADDRESS_ON:
4177  eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4178  break;
4179  case HtmlTokenId::DT_ON:
4180  case HtmlTokenId::DD_ON:
4181  eMode = AM_SOFTNOSPACE;
4182  break;
4183  default:
4184  OSL_ENSURE( false, "unknown style" );
4185  break;
4186  }
4187  if( m_pPam->GetPoint()->nContent.GetIndex() )
4188  AppendTextNode( eMode );
4189  else if( AM_SPACE==eMode )
4190  AddParSpace();
4191 
4192  // ... and save in a context
4193  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4194 
4195  // parse styles (regarding class see also NewPara)
4196  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4197  {
4198  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4199  SvxCSS1PropertyInfo aPropInfo;
4200 
4201  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4202  {
4203  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4204  "Class is not considered" );
4205  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4206  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4207  }
4208  }
4209 
4210  PushContext(xCntxt);
4211 
4212  // set the new style
4213  SetTextCollAttrs(m_aContexts.back().get());
4214 
4215  // update progress bar
4216  ShowStatline();
4217 }
4218 
4220 {
4221  SwHTMLAppendMode eMode = AM_NORMAL;
4222  switch( getOnToken(nToken) )
4223  {
4224  case HtmlTokenId::BLOCKQUOTE_ON:
4225  case HtmlTokenId::BLOCKQUOTE30_ON:
4226  case HtmlTokenId::PREFORMTXT_ON:
4227  case HtmlTokenId::LISTING_ON:
4228  case HtmlTokenId::XMP_ON:
4229  eMode = AM_SPACE;
4230  break;
4231  case HtmlTokenId::ADDRESS_ON:
4232  case HtmlTokenId::DT_ON:
4233  case HtmlTokenId::DD_ON:
4234  eMode = AM_SOFTNOSPACE;
4235  break;
4236  default:
4237  OSL_ENSURE( false, "unknown style" );
4238  break;
4239  }
4240  if( m_pPam->GetPoint()->nContent.GetIndex() )
4241  AppendTextNode( eMode );
4242  else if( AM_SPACE==eMode )
4243  AddParSpace();
4244 
4245  // pop current context of stack
4246  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4247 
4248  // and now end attributes
4249  if (xCntxt)
4250  {
4251  EndContext(xCntxt.get());
4252  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4253  xCntxt.reset();
4254  }
4255 
4256  // reset existing style
4257  SetTextCollAttrs();
4258 }
4259 
4261 {
4262  OUString aId, aStyle, aClass, aLang, aDir;
4263 
4264  const HTMLOptions& rHTMLOptions = GetOptions();
4265  for (size_t i = rHTMLOptions.size(); i; )
4266  {
4267  const HTMLOption& rOption = rHTMLOptions[--i];
4268  switch( rOption.GetToken() )
4269  {
4270  case HtmlOptionId::ID:
4271  aId = rOption.GetString();
4272  break;
4273  case HtmlOptionId::STYLE:
4274  aStyle = rOption.GetString();
4275  break;
4276  case HtmlOptionId::CLASS:
4277  aClass = rOption.GetString();
4278  break;
4279  case HtmlOptionId::LANG:
4280  aLang = rOption.GetString();
4281  break;
4282  case HtmlOptionId::DIR:
4283  aDir = rOption.GetString();
4284  break;
4285  default: break;
4286  }
4287  }
4288 
4289  // open a new paragraph
4290  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4291  if( m_pPam->GetPoint()->nContent.GetIndex() )
4292  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4293  else if( bSpace )
4294  AddParSpace();
4295 
4296  // one level more
4297  m_nDefListDeep++;
4298 
4299  bool bInDD = false, bNotInDD = false;
4300  auto nPos = m_aContexts.size();
4301  while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4302  {
4303  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4304  switch( nCntxtToken )
4305  {
4306  case HtmlTokenId::DEFLIST_ON:
4307  case HtmlTokenId::DIRLIST_ON:
4308  case HtmlTokenId::MENULIST_ON:
4309  case HtmlTokenId::ORDERLIST_ON:
4310  case HtmlTokenId::UNORDERLIST_ON:
4311  bNotInDD = true;
4312  break;
4313  case HtmlTokenId::DD_ON:
4314  bInDD = true;
4315  break;
4316  default: break;
4317  }
4318  }
4319 
4320  // ... and save in a context
4321  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4322 
4323  // in it save also the margins
4324  sal_uInt16 nLeft=0, nRight=0;
4325  short nIndent=0;
4326  GetMarginsFromContext( nLeft, nRight, nIndent );
4327 
4328  // The indentation, which already results from a DL, correlates with a DT
4329  // on the current level and this correlates to a DD from the previous level.
4330  // For a level >=2 we must add DD distance.
4331  if( !bInDD && m_nDefListDeep > 1 )
4332  {
4333 
4334  // and the one of the DT-style of the current level
4335  SvxLRSpaceItem rLRSpace =
4336  m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4337  ->GetLRSpace();
4338  nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft());
4339  }
4340 
4341  xCntxt->SetMargins( nLeft, nRight, nIndent );
4342 
4343  // parse styles
4344  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4345  {
4346  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4347  SvxCSS1PropertyInfo aPropInfo;
4348 
4349  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4350  {
4351  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4352  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4353  }
4354  }
4355 
4356  PushContext(xCntxt);
4357 
4358  // set the attributes of the new style
4359  if( m_nDefListDeep > 1 )
4360  SetTextCollAttrs(m_aContexts.back().get());
4361 }
4362 
4364 {
4365  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4366  if( m_pPam->GetPoint()->nContent.GetIndex() )
4367  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4368  else if( bSpace )
4369  AddParSpace();
4370 
4371  // one level less
4372  if( m_nDefListDeep > 0 )
4373  m_nDefListDeep--;
4374 
4375  // pop current context of stack
4376  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4377 
4378  // and now end attributes
4379  if (xCntxt)
4380  {
4381  EndContext(xCntxt.get());
4382  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4383  xCntxt.reset();
4384  }
4385 
4386  // and set style
4387  SetTextCollAttrs();
4388 }
4389 
4391 {
4392  // determine if the DD/DT exist in a DL
4393  bool bInDefList = false, bNotInDefList = false;
4394  auto nPos = m_aContexts.size();
4395  while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4396  {
4397  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4398  switch( nCntxtToken )
4399  {
4400  case HtmlTokenId::DEFLIST_ON:
4401  bInDefList = true;
4402  break;
4403  case HtmlTokenId::DIRLIST_ON:
4404  case HtmlTokenId::MENULIST_ON:
4405  case HtmlTokenId::ORDERLIST_ON:
4406  case HtmlTokenId::UNORDERLIST_ON:
4407  bNotInDefList = true;
4408  break;
4409  default: break;
4410  }
4411  }
4412 
4413  // if not, then implicitly open a new DL
4414  if( !bInDefList )
4415  {
4416  m_nDefListDeep++;
4417  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4418  "Now an open paragraph element will be lost." );
4419  m_nOpenParaToken = nToken;
4420  }
4421 
4422  NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4423  : RES_POOLCOLL_HTML_DT) );
4424 }
4425 
4427 {
4428  // open a new paragraph
4429  if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() )
4431 
4432  // search context matching the token and fetch it from stack
4433  nToken = getOnToken(nToken);
4434  std::unique_ptr<HTMLAttrContext> xCntxt;
4435  auto nPos = m_aContexts.size();
4436  while( !xCntxt && nPos>m_nContextStMin )
4437  {
4438  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4439  switch( nCntxtToken )
4440  {
4441  case HtmlTokenId::DD_ON:
4442  case HtmlTokenId::DT_ON:
4443  if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken )
4444  {
4445  xCntxt = std::move(m_aContexts[nPos]);
4446  m_aContexts.erase( m_aContexts.begin() + nPos );
4447  }
4448  break;
4449  case HtmlTokenId::DEFLIST_ON:
4450  // don't look at DD/DT outside the current DefList
4451  case HtmlTokenId::DIRLIST_ON:
4452  case HtmlTokenId::MENULIST_ON:
4453  case HtmlTokenId::ORDERLIST_ON:
4454  case HtmlTokenId::UNORDERLIST_ON:
4455  // and also not outside another list
4457  break;
4458  default: break;
4459  }
4460  }
4461 
4462  // and now end attributes
4463  if (xCntxt)
4464  {
4465  EndContext(xCntxt.get());
4466  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4467  }
4468 }
4469 
4479 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4480  bool bSurroundOnly ) const
4481 {
4482  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
4483 
4484  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
4485 
4486  bool bFound = false;
4487  for ( size_t i=0; i<rFrameFormatTable.size(); i++ )
4488  {
4489  const SwFrameFormat *const pFormat = rFrameFormatTable[i];
4490  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4491  // A frame was found, when
4492  // - it is paragraph-bound, and
4493  // - is anchored in current paragraph, and
4494  // - every paragraph-bound frame counts, or
4495  // - (only frames without wrapping count and) the frame doesn't have
4496  // a wrapping
4497  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
4498  if (pAPos &&
4499  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4500  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4501  pAPos->nNode == rNodeIdx )
4502  {
4503  if( !(bNoSurroundOnly || bSurroundOnly) )
4504  {
4505  bFound = true;
4506  break;
4507  }
4508  else
4509  {
4510  // When looking for frames with wrapping, also disregard
4511  // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4512  // and you don't want to evade those when positioning.
4513  css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4514  if( bNoSurroundOnly )
4515  {
4516  if( css::text::WrapTextMode_NONE==eSurround )
4517  {
4518  bFound = true;
4519  break;
4520  }
4521  }
4522  if( bSurroundOnly )
4523  {
4524  if( css::text::WrapTextMode_NONE==eSurround )
4525  {
4526  bFound = false;
4527  break;
4528  }
4529  else if( css::text::WrapTextMode_THROUGH!=eSurround )
4530  {
4531  bFound = true;
4532  // Continue searching: It's possible that some without
4533  // wrapping will follow...
4534  }
4535  }
4536  }
4537  }
4538  }
4539 
4540  return bFound;
4541 }
4542 
4543 // the special methods for inserting of objects
4544 
4546 {
4547  const SwContentNode* pCNd = m_pPam->GetContentNode();
4548  return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4549 }
4550 
4552 {
4553  SwTextFormatColl *pCollToSet = nullptr; // the style to set
4554  SfxItemSet *pItemSet = nullptr; // set of hard attributes
4555  sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4556  const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4557  sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4558 
4559  bool bInPRE=false; // some context info
4560 
4561  sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and
4562  short nFirstLineIndent = 0; // indentations
4563 
4564  for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i )
4565  {
4566  const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4567 
4568  sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4569  if( nColl )
4570  {
4571  // There is a style to set. Then at first we must decide,
4572  // if the style can be set.
4573  bool bSetThis = true;
4574  switch( nColl )
4575  {
4576  case RES_POOLCOLL_HTML_PRE:
4577  bInPRE = true;
4578  break;
4579  case RES_POOLCOLL_TEXT:
4580  // <TD><P CLASS=xxx> must become TD.xxx
4581  if( nDfltColl==RES_POOLCOLL_TABLE ||
4582  nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4583  nColl = nDfltColl;
4584  break;
4585  case RES_POOLCOLL_HTML_HR:
4586  // also <HR> in <PRE> set as style, otherwise it can't
4587  // be exported anymore
4588  break;
4589  default:
4590  if( bInPRE )
4591  bSetThis = false;
4592  break;
4593  }
4594 
4595  SwTextFormatColl *pNewColl =
4596  m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4597 
4598  if( bSetThis )
4599  {
4600  // If now a different style should be set as previously, the
4601  // previous style must be replaced by hard attribution.
4602 
4603  if( pCollToSet )
4604  {
4605  // insert the attributes hard, which previous style sets
4606  if( !pItemSet )
4607  pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4608  else
4609  {
4610  const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4611  SfxItemSet aItemSet( *rCollSet.GetPool(),
4612  rCollSet.GetRanges() );
4613  aItemSet.Set( rCollSet );
4614  pItemSet->Put( aItemSet );
4615  }
4616  // but remove the attributes, which the current style sets,
4617  // because otherwise they will be overwritten later
4618  pItemSet->Differentiate( pNewColl->GetAttrSet() );
4619  }
4620 
4621  pCollToSet = pNewColl;
4622  }
4623  else
4624  {
4625  // hard attribution
4626  if( !pItemSet )
4627  pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4628  else
4629  {
4630  const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4631  SfxItemSet aItemSet( *rCollSet.GetPool(),
4632  rCollSet.GetRanges() );
4633  aItemSet.Set( rCollSet );
4634  pItemSet->Put( aItemSet );
4635  }
4636  }
4637  }
4638  else
4639  {
4640  // Maybe a default style exists?
4641  nColl = pCntxt->GetDfltTextFormatColl();
4642  if( nColl )
4643  nDfltColl = nColl;
4644  }
4645 
4646  // if applicable fetch new paragraph indents
4647  if( pCntxt->IsLRSpaceChanged() )
4648  {
4649  sal_uInt16 nLeft=0, nRight=0;
4650 
4651  pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4652  nLeftMargin = nLeft;
4653  nRightMargin = nRight;
4654  }
4655  }
4656 
4657  // If in current context a new style should be set,
4658  // its paragraph margins must be inserted in the context.
4659  if( pContext && nTopColl )
4660  {
4661  // <TD><P CLASS=xxx> must become TD.xxx
4662  if( nTopColl==RES_POOLCOLL_TEXT &&
4663  (nDfltColl==RES_POOLCOLL_TABLE ||
4664  nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4665  nTopColl = nDfltColl;
4666 
4667  const SwTextFormatColl *pTopColl =
4668  m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4669  const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4670  const SfxPoolItem *pItem;
4671  if( SfxItemState::SET == rItemSet.GetItemState(RES_LR_SPACE,true, &pItem) )
4672  {
4673  const SvxLRSpaceItem *pLRItem =
4674  static_cast<const SvxLRSpaceItem *>(pItem);
4675 
4676  sal_Int32 nLeft = pLRItem->GetTextLeft();
4677  sal_Int32 nRight = pLRItem->GetRight();
4678  nFirstLineIndent = pLRItem->GetTextFirstLineOfst();
4679 
4680  // In Definition lists the margins also contain the margins from the previous levels
4681  if( RES_POOLCOLL_HTML_DD == nTopColl )
4682  {
4683  const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser
4684  ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())
4685  ->GetLRSpace();
4686  nLeft -= rDTLRSpace.GetTextLeft();
4687  nRight -= rDTLRSpace.GetRight();
4688  }
4689  else if( RES_POOLCOLL_HTML_DT == nTopColl )
4690  {
4691  nLeft = 0;
4692  nRight = 0;
4693  }
4694 
4695  // the paragraph margins add up
4696  nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4697  nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4698 
4699  pContext->SetMargins( nLeftMargin, nRightMargin,
4700  nFirstLineIndent );
4701  }
4702  if( SfxItemState::SET == rItemSet.GetItemState(RES_UL_SPACE,true, &pItem) )
4703  {
4704  const SvxULSpaceItem *pULItem =
4705  static_cast<const SvxULSpaceItem *>(pItem);
4706  pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4707  }
4708  }
4709 
4710  // If no style is set in the context use the text body.
4711  if( !pCollToSet )
4712  {
4713  pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4714  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4715  if( !nLeftMargin )
4716  nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft());
4717  if( !nRightMargin )
4718  nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight());
4719  if( !nFirstLineIndent )
4720  nFirstLineIndent = rLRItem.GetTextFirstLineOfst();
4721  }
4722 
4723  // remove previous hard attribution of paragraph
4724  for( auto pParaAttr : m_aParaAttrs )
4725  pParaAttr->Invalidate();
4726  m_aParaAttrs.clear();
4727 
4728  // set the style
4729  m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4730 
4731  // if applicable correct the paragraph indent
4732  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4733  bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() ||
4734  nFirstLineIndent != rLRItem.GetTextFirstLineOfst() ||
4735  nRightMargin != rLRItem.GetRight();
4736 
4737  if( bSetLRSpace )
4738  {
4739  SvxLRSpaceItem aLRItem( rLRItem );
4740  aLRItem.SetTextLeft( nLeftMargin );
4741  aLRItem.SetRight( nRightMargin );
4742  aLRItem.SetTextFirstLineOfst( nFirstLineIndent );
4743  if( pItemSet )
4744  pItemSet->Put( aLRItem );
4745  else
4746  {
4747  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4748  m_xAttrTab->pLRSpace->SetLikePara();
4749  m_aParaAttrs.push_back( m_xAttrTab->pLRSpace );
4750  EndAttr( m_xAttrTab->pLRSpace, false );
4751  }
4752  }
4753 
4754  // and now set the attributes
4755  if( pItemSet )
4756  {
4757  InsertParaAttrs( *pItemSet );
4758  delete pItemSet;
4759  }
4760 }
4761 
4763 {
4764  OUString aId, aStyle, aLang, aDir;
4765  OUString aClass;
4766 
4767  const HTMLOptions& rHTMLOptions = GetOptions();
4768  for (size_t i = rHTMLOptions.size(); i; )
4769  {
4770  const HTMLOption& rOption = rHTMLOptions[--i];
4771  switch( rOption.GetToken() )
4772  {
4773  case HtmlOptionId::ID:
4774  aId = rOption.GetString();
4775  break;
4776  case HtmlOptionId::STYLE:
4777  aStyle = rOption.GetString();
4778  break;
4779  case HtmlOptionId::CLASS:
4780  aClass = rOption.GetString();
4781  break;
4782  case HtmlOptionId::LANG:
4783  aLang = rOption.GetString();
4784  break;
4785  case HtmlOptionId::DIR:
4786  aDir = rOption.GetString();
4787  break;
4788  default: break;
4789  }
4790  }
4791 
4792  // create a new context
4793  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4794 
4795  // set the style and save it in the context
4796  SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4797  OSL_ENSURE( pCFormat, "No character format found for token" );
4798 
4799  // parse styles (regarding class see also NewPara)
4800  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4801  {
4802  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4803  SvxCSS1PropertyInfo aPropInfo;
4804 
4805  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4806  {
4807  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4808  "Class is not considered" );
4809  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4810  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4811  }
4812  }
4813 
4814  // Character formats are stored in their own stack and can never be inserted
4815  // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4816  if( pCFormat )
4817  InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4818 
4819  // save the context
4820  PushContext(xCntxt);
4821 }
4822 
4824 {
4825  // and if applicable change it via the options
4826  sal_Int16 eVertOri = text::VertOrientation::TOP;
4827  sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4828  Size aSize( 0, 0);
4829  long nSize = 0;
4830  bool bPrcWidth = false;
4831  bool bPrcHeight = false;
4832  sal_uInt16 nType = HTML_SPTYPE_HORI;
4833 
4834  const HTMLOptions& rHTMLOptions = GetOptions();
4835  for (size_t i = rHTMLOptions.size(); i; )
4836  {
4837  const HTMLOption& rOption = rHTMLOptions[--i];
4838  switch( rOption.GetToken() )
4839  {
4840  case HtmlOptionId::TYPE:
4841  rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4842  break;
4843  case HtmlOptionId::ALIGN:
4844  eVertOri =
4845  rOption.GetEnum( aHTMLImgVAlignTable,
4846  eVertOri );
4847  eHoriOri =
4848  rOption.GetEnum( aHTMLImgHAlignTable,
4849  eHoriOri );
4850  break;
4851  case HtmlOptionId::WIDTH:
4852  // First only save as pixel value!
4853  bPrcWidth = (rOption.GetString().indexOf('%') != -1);
4854  aSize.setWidth( static_cast<long>(rOption.GetNumber()) );
4855  break;
4856  case HtmlOptionId::HEIGHT:
4857  // First only save as pixel value!
4858  bPrcHeight = (rOption.GetString().indexOf('%') != -1);
4859  aSize.setHeight( static_cast<long>(rOption.GetNumber()) );
4860  break;
4861  case HtmlOptionId::SIZE:
4862  // First only save as pixel value!
4863  nSize = rOption.GetNumber();
4864  break;
4865  default: break;
4866  }
4867  }
4868 
4869  switch( nType )
4870  {
4871  case HTML_SPTYPE_BLOCK:
4872  {
4873  // create an empty text frame
4874 
4875  // fetch the ItemSet
4876  SfxItemSet aFrameSet( m_xDoc->GetAttrPool(),
4878  if( !IsNewDoc() )
4879  Reader::ResetFrameFormatAttrs( aFrameSet );
4880 
4881  // set the anchor and the adjustment
4882  SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4883 
4884  // and the size of the frame
4885  Size aDfltSz( MINFLY, MINFLY );
4886  Size aSpace( 0, 0 );
4887  SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4888  m_pCSS1Parser->GetWhichMap() );
4889  SvxCSS1PropertyInfo aDummyPropInfo;
4890 
4891  SetFixSize( aSize, aDfltSz, bPrcWidth, bPrcHeight,
4892  aDummyPropInfo, aFrameSet );
4893  SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4894 
4895  // protect the content
4896  SvxProtectItem aProtectItem( RES_PROTECT) ;
4897  aProtectItem.SetContentProtect( true );
4898  aFrameSet.Put( aProtectItem );
4899 
4900  // create the frame
4901  RndStdIds eAnchorId =
4902  aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4903  SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4904  m_pPam->GetPoint(), &aFrameSet );
4905  // Possibly create frames and register auto-bound frames.
4906  RegisterFlyFrame( pFlyFormat );
4907  }
4908  break;
4909  case HTML_SPTYPE_VERT:
4910  if( nSize > 0 )
4911  {
4913  {
4915  ->PixelToLogic( Size(0,nSize),
4916  MapMode(MapUnit::MapTwip) ).Height();
4917  }
4918 
4919  // set a paragraph margin
4920  SwTextNode *pTextNode = nullptr;
4921  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4922  {
4923  // if possible change the bottom paragraph margin
4924  // of previous node
4925 
4926  SetAttr(); // set still open paragraph attributes
4927 
4928  pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1]
4929  ->GetTextNode();
4930 
4931  // If the previous paragraph isn't a text node, then now an
4932  // empty paragraph is created, which already generates a single
4933  // line of spacing.
4934  if( !pTextNode )
4935  nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
4936  }
4937 
4938  if( pTextNode )
4939  {
4940  SvxULSpaceItem aULSpace( static_cast<const SvxULSpaceItem&>(pTextNode
4942  aULSpace.SetLower( aULSpace.GetLower() + static_cast<sal_uInt16>(nSize) );
4943  pTextNode->SetAttr( aULSpace );
4944  }
4945  else
4946  {
4947  NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, static_cast<sal_uInt16>(nSize), RES_UL_SPACE));
4948  EndAttr( m_xAttrTab->pULSpace, false );
4949 
4950  AppendTextNode(); // Don't change spacing!
4951  }
4952  }
4953  break;
4954  case HTML_SPTYPE_HORI:
4955  if( nSize > 0 )
4956  {
4957  // If the paragraph is still empty, set first line
4958  // indentation, otherwise apply letter spacing over a space.
4959 
4961  {
4963  ->PixelToLogic( Size(nSize,0),
4964  MapMode(MapUnit::MapTwip) ).Width();
4965  }
4966 
4967  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4968  {
4969  sal_uInt16 nLeft=0, nRight=0;
4970  short nIndent = 0;
4971 
4972  GetMarginsFromContextWithNumBul( nLeft, nRight, nIndent );
4973  nIndent = nIndent + static_cast<short>(nSize);
4974 
4975  SvxLRSpaceItem aLRItem( RES_LR_SPACE );
4976  aLRItem.SetTextLeft( nLeft );
4977  aLRItem.SetRight( nRight );
4978  aLRItem.SetTextFirstLineOfst( nIndent );
4979 
4980  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4981  EndAttr( m_xAttrTab->pLRSpace, false );
4982  }
4983  else
4984  {
4985  NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
4986  OUString aTmp( ' ' );
4987  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aTmp );
4988  EndAttr( m_xAttrTab->pKerning );
4989  }
4990  }
4991  }
4992 }
4993 
4994 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
4995 {
4996  if( nPixel && Application::GetDefaultDevice() )
4997  {
4999  Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width();
5000  return static_cast<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16)));
5001  }
5002  else
5003  return nPixel;
5004 }
5005 
5007 {
5009  if( nWidth )
5010  return nWidth;
5011 
5012  if( !m_aHTMLPageSize.Width() )
5013  {
5014  const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5015 
5016  const SwFormatFrameSize& rSz = rPgFormat.GetFrameSize();
5017  const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5018  const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5019  const SwFormatCol& rCol = rPgFormat.GetCol();
5020 
5021  m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5022  m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5023 
5024  if( 1 < rCol.GetNumCols() )
5026  }
5027 
5028  return m_aHTMLPageSize.Width();
5029 }
5030 
5032 {
5033  OUString aId;
5034  const HTMLOptions& rHTMLOptions = GetOptions();
5035  for (size_t i = rHTMLOptions.size(); i; )
5036  {
5037  const HTMLOption& rOption = rHTMLOptions[--i];
5038  if( HtmlOptionId::ID==rOption.GetToken() )
5039  {
5040  aId = rOption.GetString();
5041  break;
5042  }
5043  }
5044 
5045  if( !aId.isEmpty() )
5046  InsertBookmark( aId );
5047 }
5048 
5050 {
5051  // <BR CLEAR=xxx> is handled as:
5052  // 1.) Only regard the paragraph-bound frames anchored in current paragraph.
5053  // 2.) For left-justified aligned frames, CLEAR=LEFT or ALL, and for right-
5054  // justified aligned frames, CLEAR=RIGHT or ALL, the wrap-through is
5055  // changed as following:
5056  // 3.) If the paragraph contains no text, then the frames don't get a wrapping
5057  // 4.) otherwise a left aligned frame gets a right "only anchor" wrapping
5058  // and a right aligned frame gets a left "only anchor" wrapping.
5059  // 5.) if in a non-empty paragraph the wrapping of a frame is changed,
5060  // then a new paragraph is opened
5061  // 6.) If no wrappings of frames are changed, a hard line break is inserted.
5062 
5063  OUString aId, aStyle, aClass; // the id of bookmark
5064  bool bClearLeft = false, bClearRight = false;
5065  bool bCleared = false; // Was a CLEAR executed?
5066 
5067  // then we fetch the options
5068  const HTMLOptions& rHTMLOptions = GetOptions();
5069  for (size_t i = rHTMLOptions.size(); i; )
5070  {
5071  const HTMLOption& rOption = rHTMLOptions[--i];
5072  switch( rOption.GetToken() )
5073  {
5074  case HtmlOptionId::CLEAR:
5075  {
5076  const OUString &rClear = rOption.GetString();
5077  if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5078  {
5079  bClearLeft = true;
5080  bClearRight = true;
5081  }
5082  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5083  bClearLeft = true;
5084  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5085  bClearRight = true;
5086  }
5087  break;
5088  case HtmlOptionId::ID:
5089  aId = rOption.GetString();
5090  break;
5091  case HtmlOptionId::STYLE:
5092  aStyle = rOption.GetString();
5093  break;
5094  case HtmlOptionId::CLASS:
5095  aClass = rOption.GetString();
5096  break;
5097  default: break;
5098  }
5099  }
5100 
5101  // CLEAR is only supported for the current paragraph
5102  if( bClearLeft || bClearRight )
5103  {
5104  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
5105  SwTextNode* pTextNd = rNodeIdx.GetNode().GetTextNode();
5106  if( pTextNd )
5107  {
5108  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
5109 
5110  for( size_t i=0; i<rFrameFormatTable.size(); i++ )
5111  {
5112  SwFrameFormat *const pFormat = rFrameFormatTable[i];
5113  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
5114  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
5115  if (pAPos &&
5116  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
5117  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
5118  pAPos->nNode == rNodeIdx &&
5119  pFormat->GetSurround().GetSurround() != css::text::WrapTextMode_NONE )
5120  {
5121  sal_Int16 eHori = RES_DRAWFRMFMT == pFormat->Which()
5123  : pFormat->GetHoriOrient().GetHoriOrient();
5124 
5125  css::text::WrapTextMode eSurround = css::text::WrapTextMode_PARALLEL;
5126  if( m_pPam->GetPoint()->nContent.GetIndex() )
5127  {
5128  if( bClearLeft && text::HoriOrientation::LEFT==eHori )
5129  eSurround = css::text::WrapTextMode_RIGHT;
5130  else if( bClearRight && text::HoriOrientation::RIGHT==eHori )
5131  eSurround = css::text::WrapTextMode_LEFT;
5132  }
5133  else if( (bClearLeft && text::HoriOrientation::LEFT==eHori) ||
5134  (bClearRight && text::HoriOrientation::RIGHT==eHori) )
5135  {
5136  eSurround = css::text::WrapTextMode_NONE;
5137  }
5138 
5139  if( css::text::WrapTextMode_PARALLEL != eSurround )
5140  {
5141  SwFormatSurround aSurround( eSurround );
5142  if( css::text::WrapTextMode_NONE != eSurround )
5143  aSurround.SetAnchorOnly( true );
5144  pFormat->SetFormatAttr( aSurround );
5145  bCleared = true;
5146  }
5147  }
5148  }
5149  }
5150  }
5151 
5152  // parse styles
5153  std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5154  bool bBreakItem = false;
5155  if( HasStyleOptions( aStyle, aId, aClass ) )
5156  {
5157  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5158  SvxCSS1PropertyInfo aPropInfo;
5159 
5160  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5161  {
5162  if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5163  {
5164  aBreakItem.reset(static_cast<SvxFormatBreakItem*>(aItemSet.Get(RES_BREAK).Clone()));
5165  bBreakItem = true;
5166  }
5167  if( !aPropInfo.m_aId.isEmpty() )
5168  InsertBookmark( aPropInfo.m_aId );
5169  }
5170  }
5171 
5172  if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5173  {
5174  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5175  EndAttr( m_xAttrTab->pBreak, false );
5176  }
5177 
5178  if( !bCleared && !bBreakItem )
5179  {
5180  // If no CLEAR could or should be executed, a line break will be inserted
5181  OUString sTmp( u'\x000a' ); // make the Mac happy :-)
5182  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, sTmp );
5183  }
5184  else if( m_pPam->GetPoint()->nContent.GetIndex() )
5185  {
5186  // If a CLEAR is executed in a non-empty paragraph, then after it
5187  // a new paragraph has to be opened.
5188  // MIB 21.02.97: Here actually we should change the bottom paragraph
5189  // margin to zero. This will fail for something like this <BR ..><P>
5190  // (>Netscape). That's why we don't do it.
5192  }
5193  if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5194  {
5195  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5196  EndAttr( m_xAttrTab->pBreak, false );
5197  }
5198 }
5199 
5201 {
5202  sal_uInt16 nSize = 0;
5203  sal_uInt16 nWidth = 0;
5204 
5205  SvxAdjust eAdjust = SvxAdjust::End;
5206 
5207  bool bPrcWidth = false;
5208  bool bNoShade = false;
5209  bool bColor = false;
5210 
5211  Color aColor;
5212  OUString aId;
5213 
5214  // let's fetch the options
5215  const HTMLOptions& rHTMLOptions = GetOptions();
5216  for (size_t i = rHTMLOptions.size(); i; )
5217  {
5218  const HTMLOption& rOption = rHTMLOptions[--i];
5219  switch( rOption.GetToken() )
5220  {
5221  case HtmlOptionId::ID:
5222  aId = rOption.GetString();
5223  break;
5224  case HtmlOptionId::SIZE:
5225  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
5226  break;
5227  case HtmlOptionId::WIDTH:
5228  bPrcWidth = (rOption.GetString().indexOf('%') != -1);
5229  nWidth = static_cast<sal_uInt16>(rOption.GetNumber());
5230  if( bPrcWidth && nWidth>=100 )
5231  {
5232  // the default case are 100% lines (no attributes necessary)
5233  nWidth = 0;
5234  bPrcWidth = false;
5235  }
5236  break;
5237  case HtmlOptionId::ALIGN:
5238  eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5239  break;
5240  case HtmlOptionId::NOSHADE:
5241  bNoShade = true;
5242  break;
5243  case HtmlOptionId::COLOR:
5244  rOption.GetColor( aColor );
5245  bColor = true;
5246  break;
5247  default: break;
5248  }
5249  }
5250 
5251  if( m_pPam->GetPoint()->nContent.GetIndex() )
5253  if( m_nOpenParaToken != HtmlTokenId::NONE )
5254  EndPara();
5255  AppendTextNode();
5257 
5258  // ...and save in a context
5259  std::unique_ptr<HTMLAttrContext> xCntxt(
5260  new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5261 
5262  PushContext(xCntxt);
5263 
5264  // set the new style
5265  SetTextCollAttrs(m_aContexts.back().get());
5266 
5267  // the hard attributes of the current paragraph will never become invalid
5268  m_aParaAttrs.clear();
5269 
5270  if( nSize>0 || bColor || bNoShade )
5271  {
5272  // set line colour and/or width
5273  if( !bColor )
5274  aColor = COL_GRAY;
5275 
5276  SvxBorderLine aBorderLine( &aColor );
5277  if( nSize )
5278  {
5279  long nPWidth = 0;
5280  long nPHeight = static_cast<long>(nSize);
5281  SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5282  if ( !bNoShade )
5283  {
5284  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5285  }
5286  aBorderLine.SetWidth( nPHeight );
5287  }
5288  else if( bNoShade )
5289  {
5290  aBorderLine.SetWidth( DEF_LINE_WIDTH_2 );
5291  }
5292  else
5293  {
5294  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5295  aBorderLine.SetWidth( DEF_LINE_WIDTH_0 );
5296  }
5297 
5298  SvxBoxItem aBoxItem(RES_BOX);
5299  aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5300  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5301  m_aSetAttrTab.push_back( pTmp );
5302  }
5303  if( nWidth )
5304  {
5305  // If we aren't in a table, then the width value will be "faked" with
5306  // paragraph indents. That makes little sense in a table. In order to
5307  // avoid that the line is considered during the width calculation, it
5308  // still gets an appropriate LRSpace-Item.
5309  if (!m_xTable)
5310  {
5311  // fake length and alignment of line above paragraph indents
5312  long nBrowseWidth = GetCurrentBrowseWidth();
5313  nWidth = bPrcWidth ? static_cast<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5314  : ToTwips( static_cast<sal_uInt16>(nBrowseWidth) );
5315  if( nWidth < MINLAY )
5316  nWidth = MINLAY;
5317 
5318  const SwFormatColl *pColl = (static_cast<long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5319  if (pColl)
5320  {
5321  SvxLRSpaceItem aLRItem( pColl->GetLRSpace() );
5322  long nDist = nBrowseWidth - nWidth;
5323 
5324  switch( eAdjust )
5325  {
5326  case SvxAdjust::Right:
5327  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5328  break;
5329  case SvxAdjust::Left:
5330  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5331  break;
5332  case SvxAdjust::Center:
5333  default:
5334  nDist /= 2;
5335  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5336  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5337  break;
5338  }
5339 
5340  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aLRItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5341  m_aSetAttrTab.push_back( pTmp );
5342  }
5343  }
5344  }
5345 
5346  // it's not possible to insert bookmarks in links
5347  if( !aId.isEmpty() )
5348  InsertBookmark( aId );
5349 
5350  // pop current context of stack
5351  std::unique_ptr<HTMLAttrContext> xPoppedContext(PopContext(HtmlTokenId::HORZRULE));
5352  xPoppedContext.reset();
5353 
5355 
5356  // and set the current style in the next paragraph
5357  SetTextCollAttrs();
5358 }
5359 
5361 {
5362  OUString aName, aContent;
5363  bool bHTTPEquiv = false;
5364 
5365  const HTMLOptions& rHTMLOptions = GetOptions();
5366  for (size_t i = rHTMLOptions.size(); i; )
5367  {
5368  const HTMLOption& rOption = rHTMLOptions[--i];
5369  switch( rOption.GetToken() )
5370  {
5371  case HtmlOptionId::NAME:
5372  aName = rOption.GetString();
5373  bHTTPEquiv = false;
5374  break;
5375  case HtmlOptionId::HTTPEQUIV:
5376  aName = rOption.GetString();
5377  bHTTPEquiv = true;
5378  break;
5379  case HtmlOptionId::CONTENT:
5380  aContent = rOption.GetString();
5381  break;
5382  default: break;
5383  }
5384  }
5385 
5386  // Here things get a little tricky: We know for sure, that the Doc-Info
5387  // wasn't changed. Therefore it's enough to query for Generator and Refresh
5388  // to find a not processed Token. These are the only ones which won't change
5389  // the Doc-Info.
5390  if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_generator ) ||
5391  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_refresh ) ||
5392  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_type ) ||
5393  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_script_type ) )
5394  return;
5395 
5396  aContent = aContent.replaceAll("\r", "").replaceAll("\n", "");
5397 
5398  if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdendnote ) )
5399  {
5400  FillEndNoteInfo( aContent );
5401  return;
5402  }
5403 
5404  if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdfootnote ) )
5405  {
5406  FillFootNoteInfo( aContent );
5407  return;
5408  }
5409 
5410  OUStringBuffer sText;
5411  sText.append("HTML: <");
5412  sText.append(OOO_STRING_SVTOOLS_HTML_meta);
5413  sText.append(' ');
5414  if( bHTTPEquiv )
5415  sText.append(