LibreOffice Module sw (master)  1
swhtml.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
25 
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
38 
39 #include <vcl/errinf.hxx>
40 #include <svl/stritem.hxx>
41 #include <vcl/imap.hxx>
42 #include <svtools/htmltokn.h>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/ctrltool.hxx>
45 #include <unotools/configmgr.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <sfx2/event.hxx>
49 #include <sfx2/fcontnr.hxx>
50 #include <sfx2/docfile.hxx>
51 
52 #include <svtools/htmlcfg.hxx>
53 #include <sfx2/linkmgr.hxx>
54 #include <editeng/kernitem.hxx>
55 #include <editeng/boxitem.hxx>
56 #include <editeng/fhgtitem.hxx>
58 #include <editeng/postitem.hxx>
59 #include <editeng/wghtitem.hxx>
61 #include <editeng/udlnitem.hxx>
63 #include <editeng/blinkitem.hxx>
64 #include <editeng/ulspitem.hxx>
65 #include <editeng/colritem.hxx>
66 #include <editeng/fontitem.hxx>
67 #include <editeng/adjustitem.hxx>
68 #include <editeng/lrspitem.hxx>
69 #include <editeng/protitem.hxx>
70 #include <editeng/flstitem.hxx>
72 
73 #include <frmatr.hxx>
74 #include <charatr.hxx>
75 #include <fmtfld.hxx>
76 #include <fmtpdsc.hxx>
77 #include <txtfld.hxx>
78 #include <fmtanchr.hxx>
79 #include <fmtsrnd.hxx>
80 #include <fmtfsize.hxx>
81 #include <fmtclds.hxx>
82 #include <fchrfmt.hxx>
83 #include <fmtinfmt.hxx>
84 #include <fmtfollowtextflow.hxx>
85 #include <fmtornt.hxx>
86 #include <docary.hxx>
87 #include <docstat.hxx>
88 #include <doc.hxx>
89 #include <IDocumentUndoRedo.hxx>
96 #include <IDocumentStatistics.hxx>
97 #include <IDocumentState.hxx>
98 #include <pam.hxx>
99 #include <ndtxt.hxx>
100 #include <mdiexp.hxx>
101 #include <expfld.hxx>
102 #include <poolfmt.hxx>
103 #include <pagedesc.hxx>
104 #include <IMark.hxx>
105 #include <docsh.hxx>
106 #include <editsh.hxx>
107 #include <docufld.hxx>
108 #include "swcss1.hxx"
109 #include <fltini.hxx>
110 #include <htmltbl.hxx>
111 #include "htmlnum.hxx"
112 #include "swhtml.hxx"
113 #include <linkenum.hxx>
114 #include <breakit.hxx>
115 #include <SwAppletImpl.hxx>
116 #include <swdll.hxx>
117 
118 #include <sfx2/viewfrm.hxx>
119 #include <svx/svdobj.hxx>
120 #include <officecfg/Office/Writer.hxx>
121 
122 #include <swerror.h>
123 #include <hints.hxx>
124 #include <ndole.hxx>
125 #include <unoframe.hxx>
126 #include "css1atr.hxx"
127 
128 #define FONTSIZE_MASK 7
129 
130 #define HTML_ESC_PROP 80
131 #define HTML_ESC_SUPER DFLT_ESC_SUPER
132 #define HTML_ESC_SUB DFLT_ESC_SUB
133 
134 #define HTML_SPTYPE_BLOCK 1
135 #define HTML_SPTYPE_HORI 2
136 #define HTML_SPTYPE_VERT 3
137 
139 using namespace ::com::sun::star;
140 
141 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
143 {
144  { OOO_STRING_SVTOOLS_HTML_AL_left, SvxAdjust::Left },
145  { OOO_STRING_SVTOOLS_HTML_AL_center, SvxAdjust::Center },
146  { OOO_STRING_SVTOOLS_HTML_AL_middle, SvxAdjust::Center }, // Netscape
147  { OOO_STRING_SVTOOLS_HTML_AL_right, SvxAdjust::Right },
148  { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block },
149  { OOO_STRING_SVTOOLS_HTML_AL_char, SvxAdjust::Left },
150  { nullptr, SvxAdjust(0) }
151 };
152 
153 // <SPACER TYPE=...>
155 {
159  { nullptr, 0 }
160 };
161 
163 {
164  m_bTemplateBrowseMode = true;
165 }
166 
167 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
168 {
170  // HTML import into Writer, avoid loading the Writer/Web template.
171  return OUString();
172 
173  const OUString sTemplateWithoutExt("internal/html");
174  SvtPathOptions aPathOpt;
175 
176  // first search for OpenDocument Writer/Web template
177  // OpenDocument Writer/Web template (extension .oth)
178  OUString sTemplate( sTemplateWithoutExt + ".oth" );
179  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
180  return sTemplate;
181 
182  // no OpenDocument Writer/Web template found.
183  // search for OpenOffice.org Writer/Web template
184  sTemplate = sTemplateWithoutExt + ".stw";
185  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
186  return sTemplate;
187 
188  OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
189 
190  return OUString();
191 }
192 
194 {
195  OSL_ENSURE( m_pMedium, "Where is the medium??" );
196 
197  if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
198  {
200  return true;
201  }
202  return false;
203 
204 }
205 
206 // Call for the general Reader-Interface
207 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
208 {
210 
211  if( !m_pStream )
212  {
213  OSL_ENSURE( m_pStream, "HTML-Read without stream" );
214  return ERR_SWG_READ_ERROR;
215  }
216 
217  if( !m_bInsertMode )
218  {
220 
221  // Set the HTML page style, when it isn't a HTML document,
222  // otherwise it's already set.
224  {
227  }
228  }
229 
230  // so nobody steals the document!
231  rtl::Reference<SwDoc> aHoldRef(&rDoc);
232  ErrCode nRet = ERRCODE_NONE;
233  tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
234  rName, rBaseURL, !m_bInsertMode, m_pMedium,
235  IsReadUTF8(),
237 
238  SvParserState eState = xParser->CallParser();
239 
240  if( SvParserState::Pending == eState )
242  else if( SvParserState::Accepted != eState )
243  {
244  const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
245  + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
246 
247  // use the stream as transport for error number
248  nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
249  DialogMask::ButtonsOk | DialogMask::MessageError );
250  }
251 
252  return nRet;
253 }
254 
256  const OUString& rPath,
257  const OUString& rBaseURL,
258  bool bReadNewDoc,
259  SfxMedium* pMed, bool bReadUTF8,
260  bool bNoHTMLComments,
261  const OUString& rNamespace )
262  : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
263  m_aPathToFile( rPath ),
264  m_sBaseURL( rBaseURL ),
265  m_xAttrTab(new HTMLAttrTable),
266  m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
267  m_xDoc( pD ),
268  m_pActionViewShell( nullptr ),
269  m_pSttNdIdx( nullptr ),
270  m_pFormImpl( nullptr ),
271  m_pMarquee( nullptr ),
272  m_pImageMap( nullptr ),
273  m_nBaseFontStMin( 0 ),
274  m_nFontStMin( 0 ),
275  m_nDefListDeep( 0 ),
276  m_nFontStHeadStart( 0 ),
277  m_nSBModuleCnt( 0 ),
278  m_nMissingImgMaps( 0 ),
279  m_nParaCnt( 5 ),
280  // #i83625#
281  m_nContextStMin( 0 ),
282  m_nContextStAttrMin( 0 ),
283  m_nSelectEntryCnt( 0 ),
284  m_nOpenParaToken( HtmlTokenId::NONE ),
285  m_eJumpTo( JumpToMarks::NONE ),
286 #ifdef DBG_UTIL
287  m_nContinue( 0 ),
288 #endif
289  m_eParaAdjust( SvxAdjust::End ),
290  m_bDocInitalized( false ),
291  m_bSetModEnabled( false ),
292  m_bInFloatingFrame( false ),
293  m_bInField( false ),
294  m_bCallNextToken( false ),
295  m_bIgnoreRawData( false ),
296  m_bLBEntrySelected ( false ),
297  m_bTAIgnoreNewPara ( false ),
298  m_bFixMarqueeWidth ( false ),
299  m_bNoParSpace( false ),
300  m_bInNoEmbed( false ),
301  m_bInTitle( false ),
302  m_bUpdateDocStat( false ),
303  m_bFixSelectWidth( false ),
304  m_bTextArea( false ),
305  m_bSelect( false ),
306  m_bInFootEndNoteAnchor( false ),
307  m_bInFootEndNoteSymbol( false ),
308  m_bIgnoreHTMLComments( bNoHTMLComments ),
309  m_bRemoveHidden( false ),
310  m_bBodySeen( false ),
311  m_bReadingHeaderOrFooter( false ),
312  m_bNotifyMacroEventRead( false ),
313  m_isInTableStructure(false),
314  m_nTableDepth( 0 ),
315  m_pTempViewFrame(nullptr)
316 {
317  // If requested explicitly, then force ignoring of comments (don't create postits for them).
319  m_bIgnoreHTMLComments = true;
320 
321  m_nEventId = nullptr;
323 
324  m_eScriptLang = HTMLScriptLanguage::Unknown;
325 
326  rCursor.DeleteMark();
327  m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert
328  memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
329 
330  // Read the font sizes 1-7 from the INI file
331  SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get();
332  m_aFontHeights[0] = rHtmlOptions.GetFontSize( 0 ) * 20;
333  m_aFontHeights[1] = rHtmlOptions.GetFontSize( 1 ) * 20;
334  m_aFontHeights[2] = rHtmlOptions.GetFontSize( 2 ) * 20;
335  m_aFontHeights[3] = rHtmlOptions.GetFontSize( 3 ) * 20;
336  m_aFontHeights[4] = rHtmlOptions.GetFontSize( 4 ) * 20;
337  m_aFontHeights[5] = rHtmlOptions.GetFontSize( 5 ) * 20;
338  m_aFontHeights[6] = rHtmlOptions.GetFontSize( 6 ) * 20;
339 
340  m_bKeepUnknown = rHtmlOptions.IsImportUnknown();
341 
342  if(bReadNewDoc)
343  {
344  //CJK has different defaults, so a different object should be used for this
345  //RES_CHARTR_CJK_FONTSIZE is a valid value
347  m_xDoc->SetDefault( aFontHeight );
349  m_xDoc->SetDefault( aFontHeightCJK );
351  m_xDoc->SetDefault( aFontHeightCTL );
352 
353  // #i18732# - adjust default of option 'FollowTextFlow'
354  // TODO: not sure what the appropriate default for HTML should be?
355  m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
356  }
357 
358  // Change to HTML mode during the import, so that the right styles are created
359  m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
360  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
361 
362  m_pCSS1Parser.reset( new SwCSS1Parser( m_xDoc.get(), m_aFontHeights, m_sBaseURL, IsNewDoc() ) );
363  m_pCSS1Parser->SetIgnoreFontFamily( rHtmlOptions.IsIgnoreFontFamily() );
364 
365  if( bReadUTF8 )
366  {
367  SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
368  }
369  else
370  {
371  SwDocShell *pDocSh = m_xDoc->GetDocShell();
372  SvKeyValueIterator *pHeaderAttrs =
373  pDocSh->GetHeaderAttributes();
374  if( pHeaderAttrs )
375  SetEncodingByHTTPHeader( pHeaderAttrs );
376  }
377  m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
378 
379  SwDocShell* pDocSh = m_xDoc->GetDocShell();
380  if( pDocSh )
381  {
382  m_bViewCreated = true; // not, load synchronous
383 
384  // a jump mark is present
385 
386  if( pMed )
387  {
388  m_sJmpMark = pMed->GetURLObject().GetMark();
389  if( !m_sJmpMark.isEmpty() )
390  {
392  sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
393  sal_Int32 nPos = nLastPos != -1 ? nLastPos : 0;
394 
395  OUString sCmp;
396  if (nPos)
397  {
398  sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
399  }
400 
401  if( !sCmp.isEmpty() )
402  {
403  sCmp = sCmp.toAsciiLowerCase();
404  if( sCmp == "region" )
406  else if( sCmp == "table" )
408  else if( sCmp == "graphic" )
410  else if( sCmp == "outline" ||
411  sCmp == "text" ||
412  sCmp == "frame" )
413  m_eJumpTo = JumpToMarks::NONE; // this is nothing valid!
414  else
415  // otherwise this is a normal (book)mark
416  nPos = -1;
417  }
418  else
419  nPos = -1;
420 
421  if( nPos != -1 )
422  m_sJmpMark = m_sJmpMark.copy( 0, nPos );
423  if( m_sJmpMark.isEmpty() )
425  }
426  }
427  }
428 
429  if (!rNamespace.isEmpty())
430  {
431  SetNamespace(rNamespace);
432  m_bXHTML = true;
433  if (rNamespace == "reqif-xhtml")
434  m_bReqIF = true;
435  }
436 }
437 
439 {
440 #ifdef DBG_UTIL
441  OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
442 #endif
443 
444  OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
445  OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
446  m_nContextStMin = 0;
447  while (!m_aContexts.empty())
448  {
449  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
450  ClearContext(xCntxt.get());
451  }
452 
453  bool bAsync = m_xDoc->IsInLoadAsynchron();
454  m_xDoc->SetInLoadAsynchron( false );
455  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
456 
457  if( m_xDoc->GetDocShell() && m_nEventId )
459 
460  // the DocumentDetected maybe can delete the DocShells, therefore fetch again
461  if( m_xDoc->GetDocShell() )
462  {
463  // update linked sections
464  sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
465  if( nLinkMode != NEVER && bAsync &&
466  SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
467  m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
468 
469  if ( m_xDoc->GetDocShell()->IsLoading() )
470  {
471  // #i59688#
472  m_xDoc->GetDocShell()->LoadingFinished();
473  }
474  }
475 
476  delete m_pSttNdIdx;
477 
478  if( !m_aSetAttrTab.empty() )
479  {
480  OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
481  for ( const auto& rpAttr : m_aSetAttrTab )
482  delete rpAttr;
483  m_aSetAttrTab.clear();
484  }
485 
486  m_pCSS1Parser.reset();
487  m_pNumRuleInfo.reset();
488  DeleteFormImpl();
490 
491  OSL_ENSURE(!m_xTable.get(), "It exists still an open table");
492  m_pImageMaps.reset();
493 
494  OSL_ENSURE( m_vPendingStack.empty(),
495  "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
496  m_vPendingStack.clear();
497 
498  m_xDoc.clear();
499 
500  if ( m_pTempViewFrame )
501  {
503 
504  // the temporary view frame is hidden, so the hidden flag might need to be removed
505  if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
506  m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
507  }
508 }
509 
510 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
511 {
512  m_nEventId=nullptr;
513 
514  // #i47907# - If the document has already been destructed,
515  // the parser should be aware of this:
516  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
517  || 1 == m_xDoc->getReferenceCount() )
518  {
519  // was the import aborted by SFX?
520  eState = SvParserState::Error;
521  }
522 
523  GetAsynchCallLink().Call(nullptr);
524 }
525 
527 {
528  // create temporary index on position 0, so it won't be moved!
529  m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
530  if( !IsNewDoc() ) // insert into existing document ?
531  {
532  const SwPosition* pPos = m_pPam->GetPoint();
533 
534  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
535 
536  *m_pSttNdIdx = pPos->nNode.GetIndex()-1;
537  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
538 
539  SwPaM aInsertionRangePam( *pPos );
540 
542 
543  // split any redline over the insertion point
544  aInsertionRangePam.SetMark();
545  *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
546  aInsertionRangePam.Move( fnMoveBackward );
547  m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
548 
549  m_xDoc->SetTextFormatColl( *m_pPam,
550  m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
551  }
552 
553  if( GetMedium() )
554  {
555  if( !m_bViewCreated )
556  {
557  m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
558  }
559  else
560  {
561  m_bViewCreated = true;
562  m_nEventId = nullptr;
563  }
564  }
565  else // show progress bar
566  {
567  rInput.Seek(STREAM_SEEK_TO_END);
568  rInput.ResetError();
569 
570  m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
571 
572  rInput.Seek(STREAM_SEEK_TO_BEGIN);
573  rInput.ResetError();
574  }
575 
576  StartListening(m_xDoc->GetPageDesc( 0 ).GetNotifier());
577 
579  return eRet;
580 }
581 
583 {
584  const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
585  return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
586 }
587 
589 {
590 #ifdef DBG_UTIL
591  OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
592  m_nContinue++;
593 #endif
594 
595  // When the import (of SFX) is aborted, an error will be set but
596  // we still continue, so that we clean up properly.
597  OSL_ENSURE( SvParserState::Error!=eState,
598  "SwHTMLParser::Continue: already set an error" );
599  if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
600  eState = SvParserState::Error;
601 
602  // Fetch SwViewShell from document, save it and set as current.
603  SwViewShell *pInitVSh = CallStartAction();
604 
605  if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
606  {
607  // At first call first return, show document and wait for callback
608  // time.
609  // At this point in CallParser only one digit was read and
610  // a SaveState(0) was called.
611  eState = SvParserState::Pending;
612  m_bViewCreated = true;
613  m_xDoc->SetInLoadAsynchron( true );
614 
615 #ifdef DBG_UTIL
616  m_nContinue--;
617 #endif
618 
619  return;
620  }
621 
622  m_bSetModEnabled = false;
623  if( m_xDoc->GetDocShell() )
624  {
625  m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
626  if( m_bSetModEnabled )
627  {
628  m_xDoc->GetDocShell()->EnableSetModified( false );
629  }
630  }
631 
632  // during import don't call OLE-Modified
633  Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
634  m_xDoc->SetOle2Link( Link<bool,void>() );
635 
636  bool bModified = m_xDoc->getIDocumentState().IsModified();
637  bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
638  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
639 
640  // When the import will be aborted, don't call Continue anymore.
641  // If a Pending-Stack exists make sure the stack is ended with a call
642  // of NextToken.
643  if( SvParserState::Error == eState )
644  {
645  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
646  "SwHTMLParser::Continue: Pending-Stack without Token" );
647  if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
648  NextToken( m_vPendingStack.back().nToken );
649  OSL_ENSURE( m_vPendingStack.empty(),
650  "SwHTMLParser::Continue: There is again a Pending-Stack" );
651  }
652  else
653  {
654  HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
655  }
656 
657  // disable progress bar again
658  m_xProgress.reset();
659 
660  bool bLFStripped = false;
661  if( SvParserState::Pending != GetStatus() )
662  {
663  // set the last attributes yet
664  {
665  if( !m_aScriptSource.isEmpty() )
666  {
667  SwScriptFieldType *pType =
668  static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
669 
671  false );
672  InsertAttr( SwFormatField( aField ), false );
673  }
674 
675  if( m_pAppletImpl )
676  {
677  if( m_pAppletImpl->GetApplet().is() )
678  EndApplet();
679  else
680  EndObject();
681  }
682 
683  // maybe remove an existing LF after the last paragraph
684  if( IsNewDoc() )
685  bLFStripped = StripTrailingLF() > 0;
686 
687  // close still open numbering
688  while( GetNumInfo().GetNumRule() )
689  EndNumBulList();
690 
691  OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
692  // try this twice, first normally to let m_nContextStMin decrease
693  // naturally and get contexts popped in desired order, and if that
694  // fails force it
695  for (int i = 0; i < 2; ++i)
696  {
697  while (m_aContexts.size() > m_nContextStMin)
698  {
699  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
700  if (xCntxt)
701  EndContext(xCntxt.get());
702  }
703  if (!m_nContextStMin)
704  break;
705  OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
706  m_nContextStMin = 0;
707  }
708 
709  m_aParaAttrs.clear();
710 
711  SetAttr( false );
712 
713  // set the first delayed styles
714  m_pCSS1Parser->SetDelayedStyles();
715  }
716 
717  // again correct the start
718  if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
719  {
720  SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
721  SwNodeIndex aNxtIdx( *m_pSttNdIdx );
722  if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
723  {
724  const sal_Int32 nStt = pTextNode->GetText().getLength();
725  // when the cursor is still in the node, then set him at the end
726  if( m_pPam->GetPoint()->nNode == aNxtIdx )
727  {
729  m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt );
730  }
731 
732 #if OSL_DEBUG_LEVEL > 0
733 // !!! shouldn't be possible, or ??
734  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(),
735  "Pam.Bound1 is still in the node" );
736  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(),
737  "Pam.Bound2 is still in the node" );
738 
739  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() )
740  {
741  const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex();
742  m_pPam->GetBound().nContent.Assign( pTextNode,
743  pTextNode->GetText().getLength() + nCntPos );
744  }
745  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() )
746  {
747  const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex();
748  m_pPam->GetBound( false ).nContent.Assign( pTextNode,
749  pTextNode->GetText().getLength() + nCntPos );
750  }
751 #endif
752  // Keep character attribute!
753  SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
754  if (pTextNode->GetText().getLength())
755  pDelNd->FormatToTextAttr( pTextNode );
756  else
757  pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
758  pTextNode->JoinNext();
759  }
760  }
761  }
762 
763  if( SvParserState::Accepted == eState )
764  {
765  if( m_nMissingImgMaps )
766  {
767  // Some Image-Map relations are still missing.
768  // Maybe now the Image-Maps are there?
770  }
771 
772  // now remove the last useless paragraph
773  SwPosition* pPos = m_pPam->GetPoint();
774  if( !pPos->nContent.GetIndex() && !bLFStripped )
775  {
776  SwTextNode* pCurrentNd;
777  sal_uLong nNodeIdx = pPos->nNode.GetIndex();
778 
779  bool bHasFlysOrMarks =
781 
782  if( IsNewDoc() )
783  {
784  if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx))
785  {
787  if( pCNd && pCNd->StartOfSectionIndex()+2 <
788  pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
789  {
791  SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
792  if( pCursorSh &&
793  pCursorSh->GetCursor()->GetPoint()
794  ->nNode.GetIndex() == nNodeIdx )
795  {
796  pCursorSh->MovePara(GoPrevPara, fnParaEnd );
797  pCursorSh->SetMark();
798  pCursorSh->ClearMark();
799  }
800  m_pPam->GetBound().nContent.Assign( nullptr, 0 );
801  m_pPam->GetBound(false).nContent.Assign( nullptr, 0 );
802  m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode );
803  }
804  }
805  }
806  else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
807  {
808  if( pCurrentNd->CanJoinNext( &pPos->nNode ))
809  {
810  SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode();
811  pPos->nContent.Assign( pNextNd, 0 );
813  pNextNd->JoinPrev();
814  }
815  else if (pCurrentNd->GetText().isEmpty())
816  {
817  pPos->nContent.Assign( nullptr, 0 );
819  m_xDoc->GetNodes().Delete( pPos->nNode );
821  }
822  }
823  }
824 
825  // annul the SplitNode from the beginning
826  else if( !IsNewDoc() )
827  {
828  if( pPos->nContent.GetIndex() ) // then there was no <p> at the end
829  m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next
830  SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode();
831  SwNodeIndex aPrvIdx( pPos->nNode );
832  if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
833  *m_pSttNdIdx <= aPrvIdx )
834  {
835  // Normally here should take place a JoinNext, but all cursors and
836  // so are registered in pTextNode, so that it MUST remain.
837 
838  // Convert paragraph to character attribute, from Prev adopt
839  // the paragraph attribute and the template!
840  SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
841  pTextNode->ChgFormatColl( pPrev->GetTextColl() );
842  pTextNode->FormatToTextAttr( pPrev );
843  pTextNode->ResetAllAttr();
844 
845  if( pPrev->HasSwAttrSet() )
846  pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
847 
848  if( &m_pPam->GetBound().nNode.GetNode() == pPrev )
849  m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
850  if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev )
851  m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
852 
853  pTextNode->JoinPrev();
854  }
855  }
856 
857  // adjust AutoLoad in DocumentProperties
858  if (!utl::ConfigManager::IsFuzzing() && IsNewDoc())
859  {
860  SwDocShell *pDocShell(m_xDoc->GetDocShell());
861  OSL_ENSURE(pDocShell, "no SwDocShell");
862  if (pDocShell) {
863  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
864  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
865  uno::Reference<document::XDocumentProperties> xDocProps(
866  xDPS->getDocumentProperties());
867  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
868  if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
869  (xDocProps->getAutoloadURL().isEmpty()) )
870  {
871  xDocProps->setAutoloadURL(m_aPathToFile);
872  }
873  }
874  }
875 
876  if( m_bUpdateDocStat )
877  {
878  m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
879  }
880  }
881 
882  if( SvParserState::Pending != GetStatus() )
883  {
884  delete m_pSttNdIdx;
885  m_pSttNdIdx = nullptr;
886  }
887 
888  // should the parser be the last one who hold the document, then nothing
889  // has to be done anymore, document will be destroyed shortly!
890  if( 1 < m_xDoc->getReferenceCount() )
891  {
892  if( bWasUndo )
893  {
894  m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
895  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
896  }
897  else if( !pInitVSh )
898  {
899  // When at the beginning of Continue no Shell was available,
900  // it's possible in the meantime one was created.
901  // In that case the bWasUndo flag is wrong and we must
902  // enable Undo.
903  SwViewShell *pTmpVSh = CheckActionViewShell();
904  if( pTmpVSh )
905  {
906  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
907  }
908  }
909 
910  m_xDoc->SetOle2Link( aOLELink );
911  if( !bModified )
912  m_xDoc->getIDocumentState().ResetModified();
913  if( m_bSetModEnabled && m_xDoc->GetDocShell() )
914  {
915  m_xDoc->GetDocShell()->EnableSetModified();
916  m_bSetModEnabled = false; // this is unnecessary here
917  }
918  }
919 
920  // When the Document-SwVievShell still exists and an Action is open
921  // (doesn't have to be by abort), end the Action, disconnect from Shell
922  // and finally reconstruct the old Shell.
923  CallEndAction( true );
924 
925 #ifdef DBG_UTIL
926  m_nContinue--;
927 #endif
928 }
929 
930 void SwHTMLParser::Notify(const SfxHint& rHint)
931 {
932  if(rHint.GetId() == SfxHintId::Dying)
933  {
934  EndListeningAll();
935  ReleaseRef();
936  }
937 }
938 
940 {
941  OSL_ENSURE( !m_bDocInitalized, "DocumentDetected called multiple times" );
942  m_bDocInitalized = true;
943  if( IsNewDoc() )
944  {
945  if( IsInHeader() )
946  FinishHeader();
947 
948  CallEndAction( true );
949 
950  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
951  // For DocumentDetected in general a SwViewShell is created.
952  // But it also can be created later, in case the UI is captured.
953  CallStartAction();
954  }
955 }
956 
957 // is called for every token that is recognised in CallParser
959 {
960  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
961  || 1 == m_xDoc->getReferenceCount() )
962  {
963  // Was the import cancelled by SFX? If a pending stack
964  // exists, clean it.
965  eState = SvParserState::Error;
966  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
967  "SwHTMLParser::NextToken: Pending-Stack without token" );
968  if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
969  return ;
970  }
971 
972 #if OSL_DEBUG_LEVEL > 0
973  if( !m_vPendingStack.empty() )
974  {
975  switch( nToken )
976  {
977  // tables are read by recursive method calls
978  case HtmlTokenId::TABLE_ON:
979  // For CSS declarations we might have to wait
980  // for a file download to finish
981  case HtmlTokenId::LINK:
982  // For controls we might have to set the size.
983  case HtmlTokenId::INPUT:
984  case HtmlTokenId::TEXTAREA_ON:
985  case HtmlTokenId::SELECT_ON:
986  case HtmlTokenId::SELECT_OFF:
987  break;
988  default:
989  OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
990  break;
991  }
992  }
993 #endif
994 
995  // The following special cases have to be treated before the
996  // filter detection, because Netscape doesn't reference the content
997  // of the title for filter detection either.
998  if( m_vPendingStack.empty() )
999  {
1000  if( m_bInTitle )
1001  {
1002  switch( nToken )
1003  {
1004  case HtmlTokenId::TITLE_OFF:
1005  {
1006  OUString sTitle = m_sTitle.makeStringAndClear();
1007  if( IsNewDoc() && !sTitle.isEmpty() )
1008  {
1009  if( m_xDoc->GetDocShell() ) {
1010  uno::Reference<document::XDocumentPropertiesSupplier>
1011  xDPS(m_xDoc->GetDocShell()->GetModel(),
1012  uno::UNO_QUERY_THROW);
1013  uno::Reference<document::XDocumentProperties> xDocProps(
1014  xDPS->getDocumentProperties());
1015  OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1016  if (xDocProps.is()) {
1017  xDocProps->setTitle(sTitle);
1018  }
1019 
1020  m_xDoc->GetDocShell()->SetTitle(sTitle);
1021  }
1022  }
1023  m_bInTitle = false;
1024  break;
1025  }
1026 
1027  case HtmlTokenId::NONBREAKSPACE:
1028  m_sTitle.append(" ");
1029  break;
1030 
1031  case HtmlTokenId::SOFTHYPH:
1032  m_sTitle.append("-");
1033  break;
1034 
1035  case HtmlTokenId::TEXTTOKEN:
1036  m_sTitle.append(aToken);
1037  break;
1038 
1039  default:
1040  m_sTitle.append("<");
1041  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1042  m_sTitle.append("/");
1043  m_sTitle.append(sSaveToken);
1044  if( !aToken.isEmpty() )
1045  {
1046  m_sTitle.append(" ");
1047  m_sTitle.append(aToken);
1048  }
1049  m_sTitle.append(">");
1050  break;
1051  }
1052 
1053  return;
1054  }
1055  }
1056 
1057  // Find out what type of document it is if we don't know already.
1058  // For Controls this has to be finished before the control is inserted
1059  // because for inserting a View is needed.
1060  if( !m_bDocInitalized )
1061  DocumentDetected();
1062 
1063  bool bGetIDOption = false, bInsertUnknown = false;
1064  bool bUpperSpaceSave = m_bUpperSpace;
1065  m_bUpperSpace = false;
1066 
1067  // The following special cases may or have to be treated after the
1068  // filter detection
1069  if( m_vPendingStack.empty() )
1070  {
1071  if( m_bInFloatingFrame )
1072  {
1073  // <SCRIPT> is ignored here (from us), because it is ignored in
1074  // Applets as well
1075  if( HtmlTokenId::IFRAME_OFF == nToken )
1076  {
1077  m_bCallNextToken = false;
1078  m_bInFloatingFrame = false;
1079  }
1080 
1081  return;
1082  }
1083  else if( m_bInNoEmbed )
1084  {
1085  switch( nToken )
1086  {
1087  case HtmlTokenId::NOEMBED_OFF:
1090  m_aContents.clear();
1091  m_bCallNextToken = false;
1092  m_bInNoEmbed = false;
1093  break;
1094 
1095  case HtmlTokenId::RAWDATA:
1097  break;
1098 
1099  default:
1100  OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1101  break;
1102  }
1103 
1104  return;
1105  }
1106  else if( m_pAppletImpl )
1107  {
1108  // in an applet only <PARAM> tags and the </APPLET> tag
1109  // are of interest for us (for the moment)
1110  // <SCRIPT> is ignored here (from Netscape)!
1111 
1112  switch( nToken )
1113  {
1114  case HtmlTokenId::APPLET_OFF:
1115  m_bCallNextToken = false;
1116  EndApplet();
1117  break;
1118  case HtmlTokenId::OBJECT_OFF:
1119  m_bCallNextToken = false;
1120  EndObject();
1121  break;
1122  case HtmlTokenId::PARAM:
1123  InsertParam();
1124  break;
1125  default: break;
1126  }
1127 
1128  return;
1129  }
1130  else if( m_bTextArea )
1131  {
1132  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1133  // <SCRIPT> is ignored here (from Netscape)!
1134 
1135  switch( nToken )
1136  {
1137  case HtmlTokenId::TEXTAREA_OFF:
1138  m_bCallNextToken = false;
1139  EndTextArea();
1140  break;
1141 
1142  default:
1143  InsertTextAreaText( nToken );
1144  break;
1145  }
1146 
1147  return;
1148  }
1149  else if( m_bSelect )
1150  {
1151  // HAS to be treated after bNoScript!
1152  switch( nToken )
1153  {
1154  case HtmlTokenId::SELECT_OFF:
1155  m_bCallNextToken = false;
1156  EndSelect();
1157  return;
1158 
1159  case HtmlTokenId::OPTION:
1161  return;
1162 
1163  case HtmlTokenId::TEXTTOKEN:
1164  InsertSelectText();
1165  return;
1166 
1167  case HtmlTokenId::INPUT:
1168  case HtmlTokenId::SCRIPT_ON:
1169  case HtmlTokenId::SCRIPT_OFF:
1170  case HtmlTokenId::NOSCRIPT_ON:
1171  case HtmlTokenId::NOSCRIPT_OFF:
1172  case HtmlTokenId::RAWDATA:
1173  // treat in normal switch
1174  break;
1175 
1176  default:
1177  // ignore
1178  return;
1179  }
1180  }
1181  else if( m_pMarquee )
1182  {
1183  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1184  // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1185  // script.
1186  switch( nToken )
1187  {
1188  case HtmlTokenId::MARQUEE_OFF:
1189  m_bCallNextToken = false;
1190  EndMarquee();
1191  break;
1192 
1193  case HtmlTokenId::TEXTTOKEN:
1195  break;
1196  default: break;
1197  }
1198 
1199  return;
1200  }
1201  else if( m_bInField )
1202  {
1203  switch( nToken )
1204  {
1205  case HtmlTokenId::SDFIELD_OFF:
1206  m_bCallNextToken = false;
1207  EndField();
1208  break;
1209 
1210  case HtmlTokenId::TEXTTOKEN:
1211  InsertFieldText();
1212  break;
1213  default: break;
1214  }
1215 
1216  return;
1217  }
1219  {
1220  switch( nToken )
1221  {
1222  case HtmlTokenId::ANCHOR_OFF:
1223  EndAnchor();
1224  m_bCallNextToken = false;
1225  break;
1226 
1227  case HtmlTokenId::TEXTTOKEN:
1229  break;
1230  default: break;
1231  }
1232  return;
1233  }
1234  else if( !m_aUnknownToken.isEmpty() )
1235  {
1236  // Paste content of unknown tags.
1237  // (but surely if we are not in the header section) fdo#36080 fdo#34666
1238  if (!aToken.isEmpty() && !IsInHeader() )
1239  {
1240  if( !m_bDocInitalized )
1241  DocumentDetected();
1242  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1243 
1244  // if there are temporary paragraph attributes and the
1245  // paragraph isn't empty then the paragraph attributes
1246  // are final.
1247  m_aParaAttrs.clear();
1248 
1249  SetAttr();
1250  }
1251 
1252  // Unknown token in the header are only closed by a matching
1253  // end-token, </HEAD> or <BODY>. Text inside is ignored.
1254  switch( nToken )
1255  {
1256  case HtmlTokenId::UNKNOWNCONTROL_OFF:
1257  if( m_aUnknownToken != sSaveToken )
1258  return;
1259  [[fallthrough]];
1260  case HtmlTokenId::FRAMESET_ON:
1261  case HtmlTokenId::HEAD_OFF:
1262  case HtmlTokenId::BODY_ON:
1263  case HtmlTokenId::IMAGE: // Don't know why Netscape acts this way.
1264  m_aUnknownToken.clear();
1265  break;
1266  case HtmlTokenId::TEXTTOKEN:
1267  return;
1268  default:
1269  m_aUnknownToken.clear();
1270  break;
1271  }
1272  }
1273  }
1274 
1275  switch( nToken )
1276  {
1277  case HtmlTokenId::BODY_ON:
1278  if (!m_bBodySeen)
1279  {
1280  m_bBodySeen = true;
1281  if( !m_aStyleSource.isEmpty() )
1282  {
1283  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1284  m_aStyleSource.clear();
1285  }
1286  if( IsNewDoc() )
1287  {
1289  // If there is a template for the first or the right page,
1290  // it is set here.
1291  const SwPageDesc *pPageDesc = nullptr;
1292  if( m_pCSS1Parser->IsSetFirstPageDesc() )
1293  pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1294  else if( m_pCSS1Parser->IsSetRightPageDesc() )
1295  pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1296 
1297  if( pPageDesc )
1298  {
1299  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1300  }
1301  }
1302  }
1303  break;
1304 
1305  case HtmlTokenId::LINK:
1306  InsertLink();
1307  break;
1308 
1309  case HtmlTokenId::BASE:
1310  {
1311  const HTMLOptions& rHTMLOptions = GetOptions();
1312  for (size_t i = rHTMLOptions.size(); i; )
1313  {
1314  const HTMLOption& rOption = rHTMLOptions[--i];
1315  switch( rOption.GetToken() )
1316  {
1317  case HtmlOptionId::HREF:
1318  m_sBaseURL = rOption.GetString();
1319  break;
1320  case HtmlOptionId::TARGET:
1321  if( IsNewDoc() )
1322  {
1323  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1324  OSL_ENSURE(pDocShell, "no SwDocShell");
1325  if (pDocShell) {
1326  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1327  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1328  uno::Reference<document::XDocumentProperties>
1329  xDocProps(xDPS->getDocumentProperties());
1330  OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1331  if (xDocProps.is()) {
1332  xDocProps->setDefaultTarget(
1333  rOption.GetString());
1334  }
1335  }
1336  }
1337  break;
1338  default: break;
1339  }
1340  }
1341  }
1342  break;
1343 
1344  case HtmlTokenId::META:
1345  {
1346  SvKeyValueIterator *pHTTPHeader = nullptr;
1347  if( IsNewDoc() )
1348  {
1349  SwDocShell *pDocSh = m_xDoc->GetDocShell();
1350  if( pDocSh )
1351  pHTTPHeader = pDocSh->GetHeaderAttributes();
1352  }
1353  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1354  OSL_ENSURE(pDocShell, "no SwDocShell");
1355  if (pDocShell)
1356  {
1357  uno::Reference<document::XDocumentProperties> xDocProps;
1358  if (IsNewDoc())
1359  {
1360  const uno::Reference<document::XDocumentPropertiesSupplier>
1361  xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1362  xDocProps = xDPS->getDocumentProperties();
1363  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1364  }
1365  ParseMetaOptions( xDocProps, pHTTPHeader );
1366  }
1367  }
1368  break;
1369 
1370  case HtmlTokenId::TITLE_ON:
1371  m_bInTitle = true;
1372  break;
1373 
1374  case HtmlTokenId::SCRIPT_ON:
1375  NewScript();
1376  break;
1377 
1378  case HtmlTokenId::SCRIPT_OFF:
1379  EndScript();
1380  break;
1381 
1382  case HtmlTokenId::NOSCRIPT_ON:
1383  case HtmlTokenId::NOSCRIPT_OFF:
1384  bInsertUnknown = true;
1385  break;
1386 
1387  case HtmlTokenId::STYLE_ON:
1388  NewStyle();
1389  break;
1390 
1391  case HtmlTokenId::STYLE_OFF:
1392  EndStyle();
1393  break;
1394 
1395  case HtmlTokenId::RAWDATA:
1396  if( !m_bIgnoreRawData )
1397  {
1398  if( IsReadScript() )
1399  {
1400  AddScriptSource();
1401  }
1402  else if( IsReadStyle() )
1403  {
1404  if( !m_aStyleSource.isEmpty() )
1405  m_aStyleSource += "\n";
1406  m_aStyleSource += aToken;
1407  }
1408  }
1409  break;
1410 
1411  case HtmlTokenId::OBJECT_ON:
1412  if (m_bXHTML)
1413  {
1414  if (!InsertEmbed())
1415  InsertImage();
1416  break;
1417  }
1418 #if HAVE_FEATURE_JAVA
1419  NewObject();
1420  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1421 #endif
1422  break;
1423 
1424  case HtmlTokenId::OBJECT_OFF:
1425  if (!m_aEmbeds.empty())
1426  m_aEmbeds.pop();
1427  break;
1428 
1429  case HtmlTokenId::APPLET_ON:
1430 #if HAVE_FEATURE_JAVA
1431  InsertApplet();
1432  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1433 #endif
1434  break;
1435 
1436  case HtmlTokenId::IFRAME_ON:
1439  break;
1440 
1441  case HtmlTokenId::LINEBREAK:
1442  if( !IsReadPRE() )
1443  {
1444  InsertLineBreak();
1445  break;
1446  }
1447  else
1448  bGetIDOption = true;
1449  // <BR>s in <PRE> resemble true LFs, hence no break
1450  [[fallthrough]];
1451 
1452  case HtmlTokenId::NEWPARA:
1453  // CR in PRE/LISTING/XMP
1454  {
1455  if( HtmlTokenId::NEWPARA==nToken ||
1457  {
1458  AppendTextNode(); // there is no LF at this place
1459  // therefore it will cause no problems
1460  SetTextCollAttrs();
1461  }
1462  // progress bar
1463  if (m_xProgress)
1464  m_xProgress->Update(rInput.Tell());
1465  }
1466  break;
1467 
1468  case HtmlTokenId::NONBREAKSPACE:
1469  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1470  break;
1471 
1472  case HtmlTokenId::SOFTHYPH:
1473  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1474  break;
1475 
1476  case HtmlTokenId::LINEFEEDCHAR:
1477  if( m_pPam->GetPoint()->nContent.GetIndex() )
1478  AppendTextNode();
1479  if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode))
1480  {
1481  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1482  EndAttr( m_xAttrTab->pBreak, false );
1483  }
1484  break;
1485 
1486  case HtmlTokenId::TEXTTOKEN:
1487  // insert string without spanning attributes at the end.
1488  if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1489  {
1490  sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
1491  const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
1492  if (pTextNode)
1493  {
1494  const OUString& rText = pTextNode->GetText();
1495  sal_Unicode cLast = rText[--nPos];
1496  if( ' ' == cLast || '\x0a' == cLast)
1497  aToken = aToken.copy(1);
1498  }
1499  else
1500  aToken = aToken.copy(1);
1501 
1502  if( aToken.isEmpty() )
1503  {
1504  m_bUpperSpace = bUpperSpaceSave;
1505  break;
1506  }
1507  }
1508 
1509  if( !aToken.isEmpty() )
1510  {
1511  if( !m_bDocInitalized )
1512  DocumentDetected();
1513 
1514  if (!m_aEmbeds.empty())
1515  {
1516  // The text token is inside an OLE object, which means
1517  // alternate text.
1518  SwOLENode* pOLENode = m_aEmbeds.top();
1519  if (SwFlyFrameFormat* pFormat
1520  = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1521  {
1522  if (SdrObject* pObject = SwXFrame::GetOrCreateSdrObject(*pFormat))
1523  {
1524  pObject->SetTitle(pObject->GetTitle() + aToken);
1525  break;
1526  }
1527  }
1528  }
1529 
1530  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1531 
1532  // if there are temporary paragraph attributes and the
1533  // paragraph isn't empty then the paragraph attributes
1534  // are final.
1535  m_aParaAttrs.clear();
1536 
1537  SetAttr();
1538  }
1539  break;
1540 
1541  case HtmlTokenId::HORZRULE:
1542  InsertHorzRule();
1543  break;
1544 
1545  case HtmlTokenId::IMAGE:
1546  InsertImage();
1547  // if only the parser references the doc, we can break and set
1548  // an error code
1549  if( 1 == m_xDoc->getReferenceCount() )
1550  {
1551  eState = SvParserState::Error;
1552  }
1553  break;
1554 
1555  case HtmlTokenId::SPACER:
1556  InsertSpacer();
1557  break;
1558 
1559  case HtmlTokenId::EMBED:
1560  InsertEmbed();
1561  break;
1562 
1563  case HtmlTokenId::NOEMBED_ON:
1564  m_bInNoEmbed = true;
1565  m_bCallNextToken = bool(m_xTable);
1566  ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1567  break;
1568 
1569  case HtmlTokenId::DEFLIST_ON:
1570  if( m_nOpenParaToken != HtmlTokenId::NONE )
1571  EndPara();
1572  NewDefList();
1573  break;
1574  case HtmlTokenId::DEFLIST_OFF:
1575  if( m_nOpenParaToken != HtmlTokenId::NONE )
1576  EndPara();
1577  EndDefListItem( HtmlTokenId::NONE );
1578  EndDefList();
1579  break;
1580 
1581  case HtmlTokenId::DD_ON:
1582  case HtmlTokenId::DT_ON:
1583  if( m_nOpenParaToken != HtmlTokenId::NONE )
1584  EndPara();
1585  EndDefListItem();// close <DD>/<DT> and set no template
1586  NewDefListItem( nToken );
1587  break;
1588 
1589  case HtmlTokenId::DD_OFF:
1590  case HtmlTokenId::DT_OFF:
1591  // c.f. HtmlTokenId::LI_OFF
1592  // Actually we should close a DD/DT now.
1593  // But neither Netscape nor Microsoft do this and so don't we.
1594  EndDefListItem( nToken );
1595  break;
1596 
1597  // divisions
1598  case HtmlTokenId::DIVISION_ON:
1599  case HtmlTokenId::CENTER_ON:
1600  if (!m_isInTableStructure)
1601  {
1602  if (m_nOpenParaToken != HtmlTokenId::NONE)
1603  {
1604  if (IsReadPRE())
1605  m_nOpenParaToken = HtmlTokenId::NONE;
1606  else
1607  EndPara();
1608  }
1609  NewDivision( nToken );
1610  }
1611  break;
1612 
1613  case HtmlTokenId::DIVISION_OFF:
1614  case HtmlTokenId::CENTER_OFF:
1615  if (!m_isInTableStructure)
1616  {
1617  if (m_nOpenParaToken != HtmlTokenId::NONE)
1618  {
1619  if (IsReadPRE())
1620  m_nOpenParaToken = HtmlTokenId::NONE;
1621  else
1622  EndPara();
1623  }
1624  EndDivision();
1625  }
1626  break;
1627 
1628  case HtmlTokenId::MULTICOL_ON:
1629  if( m_nOpenParaToken != HtmlTokenId::NONE )
1630  EndPara();
1631  NewMultiCol();
1632  break;
1633 
1634  case HtmlTokenId::MULTICOL_OFF:
1635  if( m_nOpenParaToken != HtmlTokenId::NONE )
1636  EndPara();
1637  EndTag( HtmlTokenId::MULTICOL_ON );
1638  break;
1639 
1640  case HtmlTokenId::MARQUEE_ON:
1641  NewMarquee();
1642  m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1643  break;
1644 
1645  case HtmlTokenId::FORM_ON:
1646  NewForm();
1647  break;
1648  case HtmlTokenId::FORM_OFF:
1649  EndForm();
1650  break;
1651 
1652  // templates
1653  case HtmlTokenId::PARABREAK_ON:
1654  if( m_nOpenParaToken != HtmlTokenId::NONE )
1655  EndPara( true );
1656  NewPara();
1657  break;
1658 
1659  case HtmlTokenId::PARABREAK_OFF:
1660  EndPara( true );
1661  break;
1662 
1663  case HtmlTokenId::ADDRESS_ON:
1664  if( m_nOpenParaToken != HtmlTokenId::NONE )
1665  EndPara();
1666  NewTextFormatColl( HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SENDADRESS );
1667  break;
1668 
1669  case HtmlTokenId::ADDRESS_OFF:
1670  if( m_nOpenParaToken != HtmlTokenId::NONE )
1671  EndPara();
1672  EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1673  break;
1674 
1675  case HtmlTokenId::BLOCKQUOTE_ON:
1676  case HtmlTokenId::BLOCKQUOTE30_ON:
1677  if( m_nOpenParaToken != HtmlTokenId::NONE )
1678  EndPara();
1679  NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1680  break;
1681 
1682  case HtmlTokenId::BLOCKQUOTE_OFF:
1683  case HtmlTokenId::BLOCKQUOTE30_OFF:
1684  if( m_nOpenParaToken != HtmlTokenId::NONE )
1685  EndPara();
1686  EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1687  break;
1688 
1689  case HtmlTokenId::PREFORMTXT_ON:
1690  case HtmlTokenId::LISTING_ON:
1691  case HtmlTokenId::XMP_ON:
1692  if( m_nOpenParaToken != HtmlTokenId::NONE )
1693  EndPara();
1695  break;
1696 
1697  case HtmlTokenId::PREFORMTXT_OFF:
1698  m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1699  EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1700  break;
1701 
1702  case HtmlTokenId::LISTING_OFF:
1703  case HtmlTokenId::XMP_OFF:
1704  EndTextFormatColl( nToken );
1705  break;
1706 
1707  case HtmlTokenId::HEAD1_ON:
1708  case HtmlTokenId::HEAD2_ON:
1709  case HtmlTokenId::HEAD3_ON:
1710  case HtmlTokenId::HEAD4_ON:
1711  case HtmlTokenId::HEAD5_ON:
1712  case HtmlTokenId::HEAD6_ON:
1713  if( m_nOpenParaToken != HtmlTokenId::NONE )
1714  {
1715  if( IsReadPRE() )
1716  m_nOpenParaToken = HtmlTokenId::NONE;
1717  else
1718  EndPara();
1719  }
1720  NewHeading( nToken );
1721  break;
1722 
1723  case HtmlTokenId::HEAD1_OFF:
1724  case HtmlTokenId::HEAD2_OFF:
1725  case HtmlTokenId::HEAD3_OFF:
1726  case HtmlTokenId::HEAD4_OFF:
1727  case HtmlTokenId::HEAD5_OFF:
1728  case HtmlTokenId::HEAD6_OFF:
1729  EndHeading();
1730  break;
1731 
1732  case HtmlTokenId::TABLE_ON:
1733  if( !m_vPendingStack.empty() )
1734  BuildTable( SvxAdjust::End );
1735  else
1736  {
1737  if( m_nOpenParaToken != HtmlTokenId::NONE )
1738  EndPara();
1739  OSL_ENSURE(!m_xTable.get(), "table in table not allowed here");
1740  if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) &&
1741  (m_pPam->GetPoint()->nNode.GetIndex() >
1742  m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1744  {
1745  if ( m_nParaCnt < 5 )
1746  Show(); // show what we have up to here
1747 
1748  SvxAdjust eAdjust = m_xAttrTab->pAdjust
1749  ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1750  GetAdjust()
1751  : SvxAdjust::End;
1752  BuildTable( eAdjust );
1753  }
1754  else
1755  bInsertUnknown = m_bKeepUnknown;
1756  }
1757  break;
1758 
1759  // lists
1760  case HtmlTokenId::DIRLIST_ON:
1761  case HtmlTokenId::MENULIST_ON:
1762  case HtmlTokenId::ORDERLIST_ON:
1763  case HtmlTokenId::UNORDERLIST_ON:
1764  if( m_nOpenParaToken != HtmlTokenId::NONE )
1765  EndPara();
1766  NewNumBulList( nToken );
1767  break;
1768 
1769  case HtmlTokenId::DIRLIST_OFF:
1770  case HtmlTokenId::MENULIST_OFF:
1771  case HtmlTokenId::ORDERLIST_OFF:
1772  case HtmlTokenId::UNORDERLIST_OFF:
1773  if( m_nOpenParaToken != HtmlTokenId::NONE )
1774  EndPara();
1775  EndNumBulListItem( HtmlTokenId::NONE, true );
1776  EndNumBulList( nToken );
1777  break;
1778 
1779  case HtmlTokenId::LI_ON:
1780  case HtmlTokenId::LISTHEADER_ON:
1781  if( m_nOpenParaToken != HtmlTokenId::NONE &&
1783  || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1784  {
1785  // only finish paragraph for <P><LI>, not for <DD><LI>
1786  EndPara();
1787  }
1788 
1789  EndNumBulListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1790  NewNumBulListItem( nToken );
1791  break;
1792 
1793  case HtmlTokenId::LI_OFF:
1794  case HtmlTokenId::LISTHEADER_OFF:
1795  EndNumBulListItem( nToken, false );
1796  break;
1797 
1798  // Attribute :
1799  case HtmlTokenId::ITALIC_ON:
1800  {
1804  NewStdAttr( HtmlTokenId::ITALIC_ON,
1805  &m_xAttrTab->pItalic, aPosture,
1806  &m_xAttrTab->pItalicCJK, &aPostureCJK,
1807  &m_xAttrTab->pItalicCTL, &aPostureCTL );
1808  }
1809  break;
1810 
1811  case HtmlTokenId::BOLD_ON:
1812  {
1816  NewStdAttr( HtmlTokenId::BOLD_ON,
1817  &m_xAttrTab->pBold, aWeight,
1818  &m_xAttrTab->pBoldCJK, &aWeightCJK,
1819  &m_xAttrTab->pBoldCTL, &aWeightCTL );
1820  }
1821  break;
1822 
1823  case HtmlTokenId::STRIKE_ON:
1824  case HtmlTokenId::STRIKETHROUGH_ON:
1825  {
1826  NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1828  }
1829  break;
1830 
1831  case HtmlTokenId::UNDERLINE_ON:
1832  {
1833  NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1835  }
1836  break;
1837 
1838  case HtmlTokenId::SUPERSCRIPT_ON:
1839  {
1840  NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1842  }
1843  break;
1844 
1845  case HtmlTokenId::SUBSCRIPT_ON:
1846  {
1847  NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1849  }
1850  break;
1851 
1852  case HtmlTokenId::BLINK_ON:
1853  {
1854  NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1855  SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1856  }
1857  break;
1858 
1859  case HtmlTokenId::SPAN_ON:
1860  NewStdAttr( HtmlTokenId::SPAN_ON );
1861  break;
1862 
1863  case HtmlTokenId::ITALIC_OFF:
1864  case HtmlTokenId::BOLD_OFF:
1865  case HtmlTokenId::STRIKE_OFF:
1866  case HtmlTokenId::UNDERLINE_OFF:
1867  case HtmlTokenId::SUPERSCRIPT_OFF:
1868  case HtmlTokenId::SUBSCRIPT_OFF:
1869  case HtmlTokenId::BLINK_OFF:
1870  case HtmlTokenId::SPAN_OFF:
1871  EndTag( nToken );
1872  break;
1873 
1874  case HtmlTokenId::STRIKETHROUGH_OFF:
1875  EndTag( HtmlTokenId::STRIKE_OFF );
1876  break;
1877 
1878  case HtmlTokenId::BASEFONT_ON:
1879  NewBasefontAttr();
1880  break;
1881  case HtmlTokenId::BASEFONT_OFF:
1882  EndBasefontAttr();
1883  break;
1884  case HtmlTokenId::FONT_ON:
1885  case HtmlTokenId::BIGPRINT_ON:
1886  case HtmlTokenId::SMALLPRINT_ON:
1887  NewFontAttr( nToken );
1888  break;
1889  case HtmlTokenId::FONT_OFF:
1890  case HtmlTokenId::BIGPRINT_OFF:
1891  case HtmlTokenId::SMALLPRINT_OFF:
1892  EndFontAttr( nToken );
1893  break;
1894 
1895  case HtmlTokenId::EMPHASIS_ON:
1896  case HtmlTokenId::CITIATION_ON:
1897  case HtmlTokenId::STRONG_ON:
1898  case HtmlTokenId::CODE_ON:
1899  case HtmlTokenId::SAMPLE_ON:
1900  case HtmlTokenId::KEYBOARD_ON:
1901  case HtmlTokenId::VARIABLE_ON:
1902  case HtmlTokenId::DEFINSTANCE_ON:
1903  case HtmlTokenId::SHORTQUOTE_ON:
1904  case HtmlTokenId::LANGUAGE_ON:
1905  case HtmlTokenId::AUTHOR_ON:
1906  case HtmlTokenId::PERSON_ON:
1907  case HtmlTokenId::ACRONYM_ON:
1908  case HtmlTokenId::ABBREVIATION_ON:
1909  case HtmlTokenId::INSERTEDTEXT_ON:
1910  case HtmlTokenId::DELETEDTEXT_ON:
1911 
1912  case HtmlTokenId::TELETYPE_ON:
1913  NewCharFormat( nToken );
1914  break;
1915 
1916  case HtmlTokenId::SDFIELD_ON:
1917  NewField();
1919  break;
1920 
1921  case HtmlTokenId::EMPHASIS_OFF:
1922  case HtmlTokenId::CITIATION_OFF:
1923  case HtmlTokenId::STRONG_OFF:
1924  case HtmlTokenId::CODE_OFF:
1925  case HtmlTokenId::SAMPLE_OFF:
1926  case HtmlTokenId::KEYBOARD_OFF:
1927  case HtmlTokenId::VARIABLE_OFF:
1928  case HtmlTokenId::DEFINSTANCE_OFF:
1929  case HtmlTokenId::SHORTQUOTE_OFF:
1930  case HtmlTokenId::LANGUAGE_OFF:
1931  case HtmlTokenId::AUTHOR_OFF:
1932  case HtmlTokenId::PERSON_OFF:
1933  case HtmlTokenId::ACRONYM_OFF:
1934  case HtmlTokenId::ABBREVIATION_OFF:
1935  case HtmlTokenId::INSERTEDTEXT_OFF:
1936  case HtmlTokenId::DELETEDTEXT_OFF:
1937 
1938  case HtmlTokenId::TELETYPE_OFF:
1939  EndTag( nToken );
1940  break;
1941 
1942  case HtmlTokenId::HEAD_OFF:
1943  if( !m_aStyleSource.isEmpty() )
1944  {
1945  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1946  m_aStyleSource.clear();
1947  }
1948  break;
1949 
1950  case HtmlTokenId::DOCTYPE:
1951  case HtmlTokenId::BODY_OFF:
1952  case HtmlTokenId::HTML_OFF:
1953  case HtmlTokenId::HEAD_ON:
1954  case HtmlTokenId::TITLE_OFF:
1955  break; // don't evaluate further???
1956  case HtmlTokenId::HTML_ON:
1957  {
1958  const HTMLOptions& rHTMLOptions = GetOptions();
1959  for (size_t i = rHTMLOptions.size(); i; )
1960  {
1961  const HTMLOption& rOption = rHTMLOptions[--i];
1962  if( HtmlOptionId::DIR == rOption.GetToken() )
1963  {
1964  const OUString& rDir = rOption.GetString();
1965  SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
1966  m_pCSS1Parser->GetWhichMap() );
1967  SvxCSS1PropertyInfo aPropInfo;
1968  OUString aDummy;
1969  ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
1970  aPropInfo, nullptr, &rDir );
1971 
1972  m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
1973  break;
1974  }
1975  }
1976  }
1977  break;
1978 
1979  case HtmlTokenId::INPUT:
1980  InsertInput();
1981  break;
1982 
1983  case HtmlTokenId::TEXTAREA_ON:
1984  NewTextArea();
1986  break;
1987 
1988  case HtmlTokenId::SELECT_ON:
1989  NewSelect();
1991  break;
1992 
1993  case HtmlTokenId::ANCHOR_ON:
1994  NewAnchor();
1995  break;
1996 
1997  case HtmlTokenId::ANCHOR_OFF:
1998  EndAnchor();
1999  break;
2000 
2001  case HtmlTokenId::COMMENT:
2002  if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2003  {
2004  // insert as Post-It
2005  // If there are no space characters right behind
2006  // the <!-- and on front of the -->, leave the comment untouched.
2007  if( ' ' == aToken[ 3 ] &&
2008  ' ' == aToken[ aToken.getLength()-3 ] )
2009  {
2010  OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) );
2011  InsertComment(comphelper::string::strip(aComment, ' '));
2012  }
2013  else
2014  {
2015  OUString aComment = "<" + aToken + ">";
2016  InsertComment( aComment );
2017  }
2018  }
2019  break;
2020 
2021  case HtmlTokenId::MAP_ON:
2022  // Image Maps are read asynchronously: At first only an image map is created
2023  // Areas are processed later. Nevertheless the
2024  // ImageMap is inserted into the IMap-Array, because it might be used
2025  // already.
2026  m_pImageMap = new ImageMap;
2028  {
2029  if (!m_pImageMaps)
2030  m_pImageMaps.reset( new ImageMaps );
2031  m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2032  }
2033  else
2034  {
2035  delete m_pImageMap;
2036  m_pImageMap = nullptr;
2037  }
2038  break;
2039 
2040  case HtmlTokenId::MAP_OFF:
2041  // there is no ImageMap anymore (don't delete IMap, because it's
2042  // already contained in the array!)
2043  m_pImageMap = nullptr;
2044  break;
2045 
2046  case HtmlTokenId::AREA:
2047  if( m_pImageMap )
2048  ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2049  SvMacroItemId::OnMouseOut );
2050  break;
2051 
2052  case HtmlTokenId::FRAMESET_ON:
2053  bInsertUnknown = m_bKeepUnknown;
2054  break;
2055 
2056  case HtmlTokenId::NOFRAMES_ON:
2057  if( IsInHeader() )
2058  FinishHeader();
2059  bInsertUnknown = m_bKeepUnknown;
2060  break;
2061 
2062  case HtmlTokenId::UNKNOWNCONTROL_ON:
2063  // Ignore content of unknown token in the header, if the token
2064  // does not start with a '!'.
2065  // (but judging from the code, also if does not start with a '%')
2066  // (and also if we're not somewhere we consider PRE)
2067  if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2068  !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2069  '%' != sSaveToken[0] )
2070  m_aUnknownToken = sSaveToken;
2071  [[fallthrough]];
2072 
2073  default:
2074  bInsertUnknown = m_bKeepUnknown;
2075  break;
2076  }
2077 
2078  if( bGetIDOption )
2079  InsertIDOption();
2080 
2081  if( bInsertUnknown )
2082  {
2083  OUStringBuffer aComment("HTML: <");
2084  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2085  aComment.append("/");
2086  aComment.append(sSaveToken);
2087  if( !aToken.isEmpty() )
2088  {
2089  UnescapeToken();
2090  aComment.append(" ").append(aToken);
2091  }
2092  aComment.append(">");
2093  InsertComment( aComment.makeStringAndClear() );
2094  }
2095 
2096  // if there are temporary paragraph attributes and the
2097  // paragraph isn't empty then the paragraph attributes are final.
2098  if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() )
2099  m_aParaAttrs.clear();
2100 }
2101 
2102 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2103  bool& rScriptDependent,
2104  sal_uInt16& rScriptType )
2105 {
2106  switch( rAttr.GetItem().Which() )
2107  {
2108  case RES_CHRATR_FONT:
2109  case RES_CHRATR_FONTSIZE:
2110  case RES_CHRATR_LANGUAGE:
2111  case RES_CHRATR_POSTURE:
2112  case RES_CHRATR_WEIGHT:
2113  rScriptType = i18n::ScriptType::LATIN;
2114  rScriptDependent = true;
2115  break;
2116  case RES_CHRATR_CJK_FONT:
2120  case RES_CHRATR_CJK_WEIGHT:
2121  rScriptType = i18n::ScriptType::ASIAN;
2122  rScriptDependent = true;
2123  break;
2124  case RES_CHRATR_CTL_FONT:
2128  case RES_CHRATR_CTL_WEIGHT:
2129  rScriptType = i18n::ScriptType::COMPLEX;
2130  rScriptDependent = true;
2131  break;
2132  default:
2133  rScriptDependent = false;
2134  break;
2135  }
2136 }
2137 
2138 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2139 {
2140  // A hard line break at the end always must be removed.
2141  // A second one we replace with paragraph spacing.
2142  sal_Int32 nLFStripped = StripTrailingLF();
2143  if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2144  eMode = AM_SPACE;
2145 
2146  // the hard attributes of this paragraph will never be invalid again
2147  m_aParaAttrs.clear();
2148 
2149  SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2150  m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
2151 
2152  if (pTextNode)
2153  {
2154  const SvxULSpaceItem& rULSpace =
2155  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2156 
2157  bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2158  : rULSpace.GetLower() == 0;
2159 
2160  if( bChange )
2161  {
2162  const SvxULSpaceItem& rCollULSpace =
2163  pTextNode->GetAnyFormatColl().GetULSpace();
2164 
2165  bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2166  : rCollULSpace.GetLower() > 0;
2167 
2168  if( bMayReset &&
2169  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2170  {
2171  pTextNode->ResetAttr( RES_UL_SPACE );
2172  }
2173  else
2174  {
2175  pTextNode->SetAttr(
2176  SvxULSpaceItem( rULSpace.GetUpper(),
2177  AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2178  }
2179  }
2180  }
2181  m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2182 
2183  SwPosition aOldPos( *m_pPam->GetPoint() );
2184 
2185  bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2186 
2187  // split character attributes and maybe set none,
2188  // which are set for the whole paragraph
2189  const SwNodeIndex& rEndIdx = aOldPos.nNode;
2190  const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex();
2191  const SwPosition& rPos = *m_pPam->GetPoint();
2192 
2193  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2194  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2195  {
2196  HTMLAttr *pAttr = *pHTMLAttributes;
2197  if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2198  {
2199  bool bWholePara = false;
2200 
2201  while( pAttr )
2202  {
2203  HTMLAttr *pNext = pAttr->GetNext();
2204  if( pAttr->GetSttParaIdx() < rEndIdx.GetIndex() ||
2205  (!bWholePara &&
2206  pAttr->GetSttPara() == rEndIdx &&
2207  pAttr->GetSttCnt() != nEndCnt) )
2208  {
2209  bWholePara =
2210  pAttr->GetSttPara() == rEndIdx &&
2211  pAttr->GetSttCnt() == 0;
2212 
2213  sal_Int32 nStt = pAttr->m_nStartContent;
2214  bool bScript = false;
2215  sal_uInt16 nScriptItem;
2216  bool bInsert = true;
2217  lcl_swhtml_getItemInfo( *pAttr, bScript,
2218  nScriptItem );
2219  // set previous part
2220  if( bScript )
2221  {
2222  const SwTextNode *pTextNd =
2223  pAttr->GetSttPara().GetNode().GetTextNode();
2224  OSL_ENSURE( pTextNd, "No text node" );
2225  if( pTextNd )
2226  {
2227  const OUString& rText = pTextNd->GetText();
2228  sal_uInt16 nScriptText =
2229  g_pBreakIt->GetBreakIter()->getScriptType(
2230  rText, pAttr->GetSttCnt() );
2231  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2232  ->endOfScript( rText, nStt, nScriptText );
2233  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2234  {
2235  if( nScriptItem == nScriptText )
2236  {
2237  HTMLAttr *pSetAttr =
2238  pAttr->Clone( rEndIdx, nScriptEnd );
2239  pSetAttr->m_nStartContent = nStt;
2240  pSetAttr->ClearPrev();
2241  if( !pNext || bWholePara )
2242  {
2243  if (pSetAttr->m_bInsAtStart)
2244  m_aSetAttrTab.push_front( pSetAttr );
2245  else
2246  m_aSetAttrTab.push_back( pSetAttr );
2247  }
2248  else
2249  pNext->InsertPrev( pSetAttr );
2250  }
2251  nStt = nScriptEnd;
2252  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2253  rText, nStt );
2254  nScriptEnd = g_pBreakIt->GetBreakIter()
2255  ->endOfScript( rText, nStt, nScriptText );
2256  }
2257  bInsert = nScriptItem == nScriptText;
2258  }
2259  }
2260  if( bInsert )
2261  {
2262  HTMLAttr *pSetAttr =
2263  pAttr->Clone( rEndIdx, nEndCnt );
2264  pSetAttr->m_nStartContent = nStt;
2265 
2266  // When the attribute is for the whole paragraph, the outer
2267  // attributes aren't effective anymore. Hence it may not be inserted
2268  // in the Prev-List of an outer attribute, because that won't be
2269  // set. That leads to shifting when fields are used.
2270  if( !pNext || bWholePara )
2271  {
2272  if (pSetAttr->m_bInsAtStart)
2273  m_aSetAttrTab.push_front( pSetAttr );
2274  else
2275  m_aSetAttrTab.push_back( pSetAttr );
2276  }
2277  else
2278  pNext->InsertPrev( pSetAttr );
2279  }
2280  else
2281  {
2282  HTMLAttr *pPrev = pAttr->GetPrev();
2283  if( pPrev )
2284  {
2285  // the previous attributes must be set anyway
2286  if( !pNext || bWholePara )
2287  {
2288  if (pPrev->m_bInsAtStart)
2289  m_aSetAttrTab.push_front( pPrev );
2290  else
2291  m_aSetAttrTab.push_back( pPrev );
2292  }
2293  else
2294  pNext->InsertPrev( pPrev );
2295  }
2296  }
2297  pAttr->ClearPrev();
2298  }
2299 
2300  pAttr->SetStart( rPos );
2301  pAttr = pNext;
2302  }
2303  }
2304  }
2305 
2306  if( bUpdateNum )
2307  {
2308  if( GetNumInfo().GetDepth() )
2309  {
2310  sal_uInt8 nLvl = GetNumInfo().GetLevel();
2311  SetNodeNum( nLvl );
2312  }
2313  else
2315  }
2316 
2317  // We must set the attribute of the paragraph before now (because of JavaScript)
2318  SetAttr();
2319 
2320  // Now it is time to get rid of all script dependent hints that are
2321  // equal to the settings in the style
2322  SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode();
2323  OSL_ENSURE( pTextNd, "There is the txt node" );
2324  size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints())
2325  ? pTextNd->GetSwpHints().Count() : 0;
2326  if( nCntAttr )
2327  {
2328  // These are the end position of all script dependent hints.
2329  // If we find a hint that starts before the current end position,
2330  // we have to set it. If we find a hint that start behind or at
2331  // that position, we have to take the hint value into account.
2332  // If it is equal to the style, or in fact the paragraph value
2333  // for that hint, the hint is removed. Otherwise its end position
2334  // is remembered.
2335  sal_Int32 aEndPos[15] =
2336  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2337  SwpHints& rHints = pTextNd->GetSwpHints();
2338  for( size_t i=0; i < nCntAttr; i++ )
2339  {
2340  SwTextAttr *pHt = rHints.Get( i );
2341  sal_uInt16 nWhich = pHt->Which();
2342  sal_Int16 nIdx = 0;
2343  bool bFont = false;
2344  switch( nWhich )
2345  {
2346  case RES_CHRATR_FONT:
2347  nIdx = 0;
2348  bFont = true;
2349  break;
2350  case RES_CHRATR_FONTSIZE:
2351  nIdx = 1;
2352  break;
2353  case RES_CHRATR_LANGUAGE:
2354  nIdx = 2;
2355  break;
2356  case RES_CHRATR_POSTURE:
2357  nIdx = 3;
2358  break;
2359  case RES_CHRATR_WEIGHT:
2360  nIdx = 4;
2361  break;
2362  case RES_CHRATR_CJK_FONT:
2363  nIdx = 5;
2364  bFont = true;
2365  break;
2367  nIdx = 6;
2368  break;
2370  nIdx = 7;
2371  break;
2373  nIdx = 8;
2374  break;
2375  case RES_CHRATR_CJK_WEIGHT:
2376  nIdx = 9;
2377  break;
2378  case RES_CHRATR_CTL_FONT:
2379  nIdx = 10;
2380  bFont = true;
2381  break;
2383  nIdx = 11;
2384  break;
2386  nIdx = 12;
2387  break;
2389  nIdx = 13;
2390  break;
2391  case RES_CHRATR_CTL_WEIGHT:
2392  nIdx = 14;
2393  break;
2394  default:
2395  // Skip to next attribute
2396  continue;
2397  }
2398  const sal_Int32 nStt = pHt->GetStart();
2399  if( nStt >= aEndPos[nIdx] )
2400  {
2401  const SfxPoolItem& rItem =
2402  static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2403  if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2404  : rItem == pHt->GetAttr() )
2405  {
2406  // The hint is the same as set in the paragraph and
2407  // therefore, it can be deleted
2408  // CAUTION!!! This WILL delete the hint and it MAY
2409  // also delete the SwpHints!!! To avoid any trouble
2410  // we leave the loop immediately if this is the last
2411  // hint.
2412  pTextNd->DeleteAttribute( pHt );
2413  if( 1 == nCntAttr )
2414  break;
2415  i--;
2416  nCntAttr--;
2417  }
2418  else
2419  {
2420  // The hint is different. Therefore all hints within that
2421  // hint have to be ignored.
2422  aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2423  }
2424  }
2425  else
2426  {
2427  // The hint starts before another one ends.
2428  // The hint in this case is not deleted
2429  OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2430  "hints aren't nested properly!" );
2431  }
2432  }
2433  }
2434 
2435  if (!m_xTable && !--m_nParaCnt)
2436  Show();
2437 
2438  return bRet;
2439 }
2440 
2442 {
2443  //If it already has ParSpace, return
2444  if( !m_bNoParSpace )
2445  return;
2446 
2447  m_bNoParSpace = false;
2448 
2449  sal_uLong nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1;
2450 
2451  SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2452  if( !pTextNode )
2453  return;
2454 
2455  SvxULSpaceItem rULSpace =
2456  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2457  if( !rULSpace.GetLower() )
2458  {
2459  const SvxULSpaceItem& rCollULSpace =
2460  pTextNode->GetAnyFormatColl().GetULSpace();
2461  if( rCollULSpace.GetLower() &&
2462  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2463  {
2464  pTextNode->ResetAttr( RES_UL_SPACE );
2465  }
2466  else
2467  {
2468  //What I do here, is that I examine the attributes, and if
2469  //I find out, that it's CJK/CTL, then I set the paragraph space
2470  //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2471 
2472  bool bIsCJK = false;
2473  bool bIsCTL = false;
2474 
2475  const size_t nCntAttr = pTextNode->GetpSwpHints()
2476  ? pTextNode->GetSwpHints().Count() : 0;
2477 
2478  for(size_t i = 0; i < nCntAttr; ++i)
2479  {
2480  SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2481  sal_uInt16 const nWhich = pHt->Which();
2482  if (RES_CHRATR_CJK_FONT == nWhich ||
2483  RES_CHRATR_CJK_FONTSIZE == nWhich ||
2484  RES_CHRATR_CJK_LANGUAGE == nWhich ||
2485  RES_CHRATR_CJK_POSTURE == nWhich ||
2486  RES_CHRATR_CJK_WEIGHT == nWhich)
2487  {
2488  bIsCJK = true;
2489  break;
2490  }
2491  if (RES_CHRATR_CTL_FONT == nWhich ||
2492  RES_CHRATR_CTL_FONTSIZE == nWhich ||
2493  RES_CHRATR_CTL_LANGUAGE == nWhich ||
2494  RES_CHRATR_CTL_POSTURE == nWhich ||
2495  RES_CHRATR_CTL_WEIGHT == nWhich)
2496  {
2497  bIsCTL = true;
2498  break;
2499  }
2500  }
2501 
2502  if( bIsCTL )
2503  {
2504  pTextNode->SetAttr(
2506  }
2507  else if( bIsCJK )
2508  {
2509  pTextNode->SetAttr(
2511  } else {
2512  pTextNode->SetAttr(
2514  }
2515  }
2516  }
2517 }
2518 
2520 {
2521  // Here
2522  // - a EndAction is called, so the document is formatted
2523  // - a Reschedule is called,
2524  // - the own View-Shell is set again
2525  // - and a StartAction is called
2526 
2527  OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2528  SwViewShell *pOldVSh = CallEndAction();
2529 
2531 
2532  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2533  || 1 == m_xDoc->getReferenceCount() )
2534  {
2535  // was the import aborted by SFX?
2536  eState = SvParserState::Error;
2537  }
2538 
2539  // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2540  SwViewShell *pVSh = CallStartAction( pOldVSh );
2541 
2542  // is the current node not visible anymore, then we use a bigger increment
2543  if( pVSh )
2544  {
2546  ? 5 : 50;
2547  }
2548 }
2549 
2551 {
2552  // Here
2553  // - a Reschedule is called, so it can be scrolled
2554  // - the own View-Shell is set again
2555  // - a StartAction/EndAction is called, when there was scrolling.
2556 
2557  OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2558 
2559  // scroll bar
2560  if (m_xProgress)
2561  {
2562  m_xProgress->Update(rInput.Tell());
2564  }
2565  else
2566  {
2568 
2569  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2570  || 1 == m_xDoc->getReferenceCount() )
2571  // was the import aborted by SFX?
2572  eState = SvParserState::Error;
2573 
2575  if( pVSh && pVSh->HasInvalidRect() )
2576  {
2577  CallEndAction( false, false );
2578  CallStartAction( pVSh, false );
2579  }
2580  }
2581 }
2582 
2584 {
2585  OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2586 
2587  if( !pVSh || bChkPtr )
2588  {
2589 #if OSL_DEBUG_LEVEL > 0
2590  SwViewShell *pOldVSh = pVSh;
2591 #endif
2592  pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2593 #if OSL_DEBUG_LEVEL > 0
2594  OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2595  if( pOldVSh && !pVSh )
2596  pVSh = nullptr;
2597 #endif
2598  }
2599  m_pActionViewShell = pVSh;
2600 
2601  if( m_pActionViewShell )
2602  {
2603  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2604  static_cast<SwEditShell*>(m_pActionViewShell)->StartAction();
2605  else
2607  }
2608 
2609  return m_pActionViewShell;
2610 }
2611 
2612 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2613 {
2614  if( bChkPtr )
2615  {
2616  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2617  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2618  "CallEndAction: Who swapped the SwViewShell?" );
2619 #if OSL_DEBUG_LEVEL > 0
2620  if( m_pActionViewShell && !pVSh )
2621  pVSh = nullptr;
2622 #endif
2623  if( pVSh != m_pActionViewShell )
2624  m_pActionViewShell = nullptr;
2625  }
2626 
2627  if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2628  return m_pActionViewShell;
2629 
2630  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2631  {
2632  // Already scrolled?, then make sure that the view doesn't move!
2633  const bool bOldLock = m_pActionViewShell->IsViewLocked();
2634  m_pActionViewShell->LockView( true );
2635  const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev();
2637  static_cast<SwEditShell*>(m_pActionViewShell)->EndAction();
2638  m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev );
2639  m_pActionViewShell->LockView( bOldLock );
2640 
2641  // bChkJumpMark is only set when the object was also found
2642  if( m_bChkJumpMark )
2643  {
2644  const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2645  if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2647  GetMedium()->GetURLObject().GetMark() );
2648  m_bChkJumpMark = false;
2649  }
2650  }
2651  else
2653 
2654  // if the parser holds the last reference to the document, then we can
2655  // abort here and set an error.
2656  if( 1 == m_xDoc->getReferenceCount() )
2657  {
2658  eState = SvParserState::Error;
2659  }
2660 
2662  m_pActionViewShell = nullptr;
2663 
2664  return pVSh;
2665 }
2666 
2668 {
2669  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2670  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2671  "CheckActionViewShell: Who has swapped SwViewShell?" );
2672 #if OSL_DEBUG_LEVEL > 0
2673  if( m_pActionViewShell && !pVSh )
2674  pVSh = nullptr;
2675 #endif
2676  if( pVSh != m_pActionViewShell )
2677  m_pActionViewShell = nullptr;
2678 
2679  return m_pActionViewShell;
2680 }
2681 
2682 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2683  std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2684 {
2685  std::unique_ptr<SwPaM> pAttrPam( new SwPaM( *m_pPam->GetPoint() ) );
2686  const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode;
2687  const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
2688  HTMLAttr* pAttr;
2689  SwContentNode* pCNd;
2690 
2691  std::vector<std::unique_ptr<HTMLAttr>> aFields;
2692 
2693  for( auto n = m_aSetAttrTab.size(); n; )
2694  {
2695  pAttr = m_aSetAttrTab[ --n ];
2696  sal_uInt16 nWhich = pAttr->m_pItem->Which();
2697 
2698  sal_uLong nEndParaIdx = pAttr->GetEndParaIdx();
2699  bool bSetAttr;
2700  if( bChkEnd )
2701  {
2702  // Set character attribute with end early on, so set them still in
2703  // the current paragraph (because of JavaScript and various "chats"(?)).
2704  // This shouldn't be done for attributes which are used for
2705  // the whole paragraph, because they could be from a paragraph style
2706  // which can't be set. Because the attributes are inserted with
2707  // SETATTR_DONTREPLACE, they should be able to be set later.
2708  bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() &&
2709  (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) ||
2710  ( !pAttr->IsLikePara() &&
2711  nEndParaIdx == rEndIdx.GetIndex() &&
2712  pAttr->GetEndCnt() < nEndCnt &&
2713  (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2714  ( bBeforeTable &&
2715  nEndParaIdx == rEndIdx.GetIndex() &&
2716  !pAttr->GetEndCnt() );
2717  }
2718  else
2719  {
2720  // Attributes in body nodes array section shouldn't be set if we are in a
2721  // special nodes array section, but vice versa it's possible.
2722  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2723  bSetAttr = nEndParaIdx < rEndIdx.GetIndex() ||
2724  rEndIdx.GetIndex() > nEndOfIcons ||
2725  nEndParaIdx <= nEndOfIcons;
2726  }
2727 
2728  if( bSetAttr )
2729  {
2730  // The attribute shouldn't be in the list of temporary paragraph
2731  // attributes, because then it would be deleted.
2732  while( !m_aParaAttrs.empty() )
2733  {
2734  OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2735  "SetAttr: Attribute must not yet be set" );
2736  m_aParaAttrs.pop_back();
2737  }
2738 
2739  // then set it
2740  m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2741 
2742  while( pAttr )
2743  {
2744  HTMLAttr *pPrev = pAttr->GetPrev();
2745  if( !pAttr->m_bValid )
2746  {
2747  // invalid attributes can be deleted
2748  delete pAttr;
2749  pAttr = pPrev;
2750  continue;
2751  }
2752 
2753  pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2754  if( !pCNd )
2755  {
2756  // because of the awful deleting of nodes an index can also
2757  // point to an end node :-(
2758  if ( (pAttr->GetSttPara() == pAttr->GetEndPara()) &&
2759  !isTXTATR_NOEND(nWhich) )
2760  {
2761  // when the end index also points to the node, we don't
2762  // need to set attributes anymore, except if it's a text attribute.
2763  delete pAttr;
2764  pAttr = pPrev;
2765  continue;
2766  }
2767  pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2768  if( pCNd )
2769  pAttr->m_nStartContent = 0;
2770  else
2771  {
2772  OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2773  delete pAttr;
2774  pAttr = pPrev;
2775  continue;
2776  }
2777  }
2778  pAttrPam->GetPoint()->nNode = pAttr->m_nStartPara;
2779 
2780  // because of the deleting of BRs the start index can also
2781  // point behind the end the text
2782  if( pAttr->m_nStartContent > pCNd->Len() )
2783  pAttr->m_nStartContent = pCNd->Len();
2784  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent );
2785 
2786  pAttrPam->SetMark();
2787  if ( (pAttr->GetSttPara() != pAttr->GetEndPara()) &&
2788  !isTXTATR_NOEND(nWhich) )
2789  {
2790  pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2791  if( !pCNd )
2792  {
2793  pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2794  if( pCNd )
2795  pAttr->m_nEndContent = pCNd->Len();
2796  else
2797  {
2798  OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2799  pAttrPam->DeleteMark();
2800  delete pAttr;
2801  pAttr = pPrev;
2802  continue;
2803  }
2804  }
2805 
2806  pAttrPam->GetPoint()->nNode = pAttr->m_nEndPara;
2807  }
2808  else if( pAttr->IsLikePara() )
2809  {
2810  pAttr->m_nEndContent = pCNd->Len();
2811  }
2812 
2813  // because of the deleting of BRs the start index can also
2814  // point behind the end the text
2815  if( pAttr->m_nEndContent > pCNd->Len() )
2816  pAttr->m_nEndContent = pCNd->Len();
2817 
2818  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent );
2819  if( bBeforeTable &&
2820  pAttrPam->GetPoint()->nNode.GetIndex() ==
2821  rEndIdx.GetIndex() )
2822  {
2823  // If we're before inserting a table and the attribute ends
2824  // in the current node, then we must end it in the previous
2825  // node or discard it, if it starts in that node.
2826  if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2827  !isTXTATR_NOEND(nWhich) )
2828  {
2829  if( pAttrPam->GetMark()->nNode.GetIndex() !=
2830  rEndIdx.GetIndex() )
2831  {
2832  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
2833  "Content-Position before table not 0???" );
2834  pAttrPam->Move( fnMoveBackward );
2835  }
2836  else
2837  {
2838  pAttrPam->DeleteMark();
2839  delete pAttr;
2840  pAttr = pPrev;
2841  continue;
2842  }
2843  }
2844  }
2845 
2846  switch( nWhich )
2847  {
2848  case RES_FLTR_BOOKMARK: // insert bookmark
2849  {
2850  const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2851  IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2852  IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2853  if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2854  (*ppBkmk)->GetMarkStart() == *pAttrPam->GetPoint() )
2855  break; // do not generate duplicates on this position
2856  pAttrPam->DeleteMark();
2857  const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2858  *pAttrPam,
2859  sName,
2862 
2863  // jump to bookmark
2864  if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2865  {
2866  m_bChkJumpMark = true;
2868  }
2869  }
2870  break;
2871  case RES_TXTATR_FIELD:
2872  case RES_TXTATR_ANNOTATION:
2873  case RES_TXTATR_INPUTFIELD:
2874  {
2875  SwFieldIds nFieldWhich =
2876  pPostIts
2877  ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2879  if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2880  SwFieldIds::Script == nFieldWhich) )
2881  {
2882  pPostIts->emplace_front( pAttr );
2883  }
2884  else
2885  {
2886  aFields.emplace_back( pAttr);
2887  }
2888  }
2889  pAttrPam->DeleteMark();
2890  pAttr = pPrev;
2891  continue;
2892 
2893  case RES_LR_SPACE:
2894  if( pAttrPam->GetPoint()->nNode.GetIndex() ==
2895  pAttrPam->GetMark()->nNode.GetIndex())
2896  {
2897  // because of numbering set this attribute directly at node
2898  pCNd->SetAttr( *pAttr->m_pItem );
2899  break;
2900  }
2901  OSL_ENSURE( false,
2902  "LRSpace set over multiple paragraphs!" );
2903  [[fallthrough]]; // (shouldn't reach this point anyway)
2904 
2905  // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2906  // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2907  // This is the right place in the future if the adapted fill attributes
2908  // may be handled more directly in HTML import to handle them.
2909  case RES_BACKGROUND:
2910  {
2911  const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2913 
2915  m_xDoc->getIDocumentContentOperations().InsertItemSet(*pAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2916  break;
2917  }
2918  default:
2919 
2920  // maybe jump to a bookmark
2921  if( RES_TXTATR_INETFMT == nWhich &&
2923  m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2924  {
2925  m_bChkJumpMark = true;
2927  }
2928 
2929  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2930  }
2931  pAttrPam->DeleteMark();
2932 
2933  delete pAttr;
2934  pAttr = pPrev;
2935  }
2936  }
2937  }
2938 
2939  for( auto n = m_aMoveFlyFrames.size(); n; )
2940  {
2941  SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[ --n ];
2942 
2943  const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
2944  OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
2945  "Only At-Para flys need special handling" );
2946  const SwPosition *pFlyPos = rAnchor.GetContentAnchor();
2947  sal_uLong nFlyParaIdx = pFlyPos->nNode.GetIndex();
2948  bool bMoveFly;
2949  if( bChkEnd )
2950  {
2951  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2952  ( nFlyParaIdx == rEndIdx.GetIndex() &&
2953  m_aMoveFlyCnts[n] < nEndCnt );
2954  }
2955  else
2956  {
2957  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2958  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2959  rEndIdx.GetIndex() > nEndOfIcons ||
2960  nFlyParaIdx <= nEndOfIcons;
2961  }
2962  if( bMoveFly )
2963  {
2964  pFrameFormat->DelFrames();
2965  *pAttrPam->GetPoint() = *pFlyPos;
2966  pAttrPam->GetPoint()->nContent.Assign( pAttrPam->GetContentNode(),
2967  m_aMoveFlyCnts[n] );
2968  SwFormatAnchor aAnchor( rAnchor );
2969  aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
2970  aAnchor.SetAnchor( pAttrPam->GetPoint() );
2971  pFrameFormat->SetFormatAttr( aAnchor );
2972 
2973  const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
2974  if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
2975  {
2976  SwFormatHoriOrient aHoriOri( rHoriOri );
2977  aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
2978  pFrameFormat->SetFormatAttr( aHoriOri );
2979  }
2980  const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
2981  if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
2982  {
2983  SwFormatVertOrient aVertOri( rVertOri );
2984  aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
2985  pFrameFormat->SetFormatAttr( aVertOri );
2986  }
2987 
2988  pFrameFormat->MakeFrames();
2989  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
2990  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
2991  }
2992  }
2993  for (auto & field : aFields)
2994  {
2995  pCNd = field->m_nStartPara.GetNode().GetContentNode();
2996  pAttrPam->GetPoint()->nNode = field->m_nStartPara;
2997  pAttrPam->GetPoint()->nContent.Assign( pCNd, field->m_nStartContent );
2998 
2999  if( bBeforeTable &&
3000  pAttrPam->GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() )
3001  {
3002  OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3003  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
3004  "Content-Position before table not 0???" );
3005  // !!!
3006  pAttrPam->Move( fnMoveBackward );
3007  }
3008 
3009  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *field->m_pItem );
3010 
3011  field.reset();
3012  }
3013  aFields.clear();
3014 }
3015 
3016 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3017 {
3018  // Font height and font colour as well as escape attributes may not be
3019  // combined. Therefore they're saved in a list and in it the last opened
3020  // attribute is at the beginning and count is always one. For all other
3021  // attributes count is just incremented.
3022  if( *ppAttr )
3023  {
3024  HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3025  pAttr->InsertNext( *ppAttr );
3026  (*ppAttr) = pAttr;
3027  }
3028  else
3029  (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3030 }
3031 
3032 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3033 {
3034  bool bRet = true;
3035 
3036  // The list header is saved in the attribute.
3037  HTMLAttr **ppHead = pAttr->m_ppHead;
3038 
3039  OSL_ENSURE( ppHead, "No list header attribute found!" );
3040 
3041  // save the current position as end position
3042  const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode;
3043  sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3044 
3045  // Is the last started or an earlier started attribute being ended?
3046  HTMLAttr *pLast = nullptr;
3047  if( ppHead && pAttr != *ppHead )
3048  {
3049  // The last started attribute isn't being ended
3050 
3051  // Then we look for attribute which was started immediately afterwards,
3052  // which has also not yet been ended (otherwise it would no longer be
3053  // in the list).
3054  pLast = *ppHead;
3055  while( pLast && pLast->GetNext() != pAttr )
3056  pLast = pLast->GetNext();
3057 
3058  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3059  }
3060 
3061  bool bMoveBack = false;
3062  sal_uInt16 nWhich = pAttr->m_pItem->Which();
3063  if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3064  *pEndIdx != pAttr->GetSttPara() )
3065  {
3066  // Then move back one position in the content!
3067  bMoveBack = m_pPam->Move( fnMoveBackward );
3068  nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3069  }
3070 
3071  // now end the attribute
3072  HTMLAttr *pNext = pAttr->GetNext();
3073 
3074  bool bInsert;
3075  sal_uInt16 nScriptItem = 0;
3076  bool bScript = false;
3077  // does it have a non-empty range?
3078  if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3079  RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3080  *pEndIdx != pAttr->GetSttPara() ||
3081  nEndCnt != pAttr->GetSttCnt() )
3082  {
3083  bInsert = true;
3084  // We do some optimization for script dependent attributes here.
3085  if( *pEndIdx == pAttr->GetSttPara() )
3086  {
3087  lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3088  }
3089  }
3090  else
3091  {
3092  bInsert = false;
3093  }
3094 
3095  const SwTextNode *pTextNd = (bInsert && bScript) ?
3096  pAttr->GetSttPara().GetNode().GetTextNode() :
3097  nullptr;
3098 
3099  if (pTextNd)
3100  {
3101  const OUString& rText = pTextNd->GetText();
3102  sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3103  rText, pAttr->GetSttCnt() );
3104  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3105  ->endOfScript( rText, pAttr->GetSttCnt(), nScriptText );
3106  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3107  {
3108  if( nScriptItem == nScriptText )
3109  {
3110  HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd );
3111  pSetAttr->ClearPrev();
3112  if( pNext )
3113  pNext->InsertPrev( pSetAttr );
3114  else
3115  {
3116  if (pSetAttr->m_bInsAtStart)
3117  m_aSetAttrTab.push_front( pSetAttr );
3118  else
3119  m_aSetAttrTab.push_back( pSetAttr );
3120  }
3121  }
3122  pAttr->m_nStartContent = nScriptEnd;
3123  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3124  rText, nScriptEnd );
3125  nScriptEnd = g_pBreakIt->GetBreakIter()
3126  ->endOfScript( rText, nScriptEnd, nScriptText );
3127  }
3128  bInsert = nScriptItem == nScriptText;
3129  }
3130  if( bInsert )
3131  {
3132  pAttr->m_nEndPara = *pEndIdx;
3133  pAttr->m_nEndContent = nEndCnt;
3134  pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3135  RES_TXTATR_CHARFMT != nWhich;
3136 
3137  if( !pNext )
3138  {
3139  // No open attributes of that type exists any longer, so all
3140  // can be set. Except they depend on another attribute, then
3141  // they're appended there.
3142  if (pAttr->m_bInsAtStart)
3143  m_aSetAttrTab.push_front( pAttr );
3144  else
3145  m_aSetAttrTab.push_back( pAttr );
3146  }
3147  else
3148  {
3149  // There are other open attributes of that type,
3150  // therefore the setting must be postponed.
3151  // Hence the current attribute is added at the end
3152  // of the Prev-List of the successor.
3153  pNext->InsertPrev( pAttr );
3154  }
3155  }
3156  else
3157  {
3158  // Then don't insert, but delete. Because of the "faking" of styles
3159  // by hard attributing there can be also other empty attributes in the
3160  // Prev-List, which must be set anyway.
3161  HTMLAttr *pPrev = pAttr->GetPrev();
3162  bRet = false;
3163  delete pAttr;
3164 
3165  if( pPrev )
3166  {
3167  // The previous attributes must be set anyway.
3168  if( pNext )
3169  pNext->InsertPrev( pPrev );
3170  else
3171  {
3172  if (pPrev->m_bInsAtStart)
3173  m_aSetAttrTab.push_front( pPrev );
3174  else
3175  m_aSetAttrTab.push_back( pPrev );
3176  }
3177  }
3178 
3179  }
3180 
3181  // If the first attribute of the list was set, then the list header
3182  // must be corrected as well.
3183  if( pLast )
3184  pLast->m_pNext = pNext;
3185  else if( ppHead )
3186  *ppHead = pNext;
3187 
3188  if( bMoveBack )
3190 
3191  return bRet;
3192 }
3193 
3195 {
3196  // preliminary paragraph attributes are not allowed here, they could
3197  // be set here and then the pointers become invalid!
3198  OSL_ENSURE(m_aParaAttrs.empty(),
3199  "Danger: there are non-final paragraph attributes");
3200  m_aParaAttrs.clear();
3201 
3202  // The list header is saved in the attribute
3203  HTMLAttr **ppHead = pAttr->m_ppHead;
3204 
3205  OSL_ENSURE( ppHead, "no list header attribute found!" );
3206 
3207  // Is the last started or an earlier started attribute being removed?
3208  HTMLAttr *pLast = nullptr;
3209  if( ppHead && pAttr != *ppHead )
3210  {
3211  // The last started attribute isn't being ended
3212 
3213  // Then we look for attribute which was started immediately afterwards,
3214  // which has also not yet been ended (otherwise it would no longer be
3215  // in the list).
3216  pLast = *ppHead;
3217  while( pLast && pLast->GetNext() != pAttr )
3218  pLast = pLast->GetNext();
3219 
3220  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3221  }
3222 
3223  // now delete the attribute
3224  HTMLAttr *pNext = pAttr->GetNext();
3225  HTMLAttr *pPrev = pAttr->GetPrev();
3226  //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3227  std::shared_ptr<HTMLAttrTable> xAttrTab(pAttr->m_xAttrTab);
3228  delete pAttr;
3229 
3230  if( pPrev )
3231  {
3232  // The previous attributes must be set anyway.
3233  if( pNext )
3234  pNext->InsertPrev( pPrev );
3235  else
3236  {
3237  if (pPrev->m_bInsAtStart)
3238  m_aSetAttrTab.push_front( pPrev );
3239  else
3240  m_aSetAttrTab.push_back( pPrev );
3241  }
3242  }
3243 
3244  // If the first attribute of the list was deleted, then the list header
3245  // must be corrected as well.
3246  if( pLast )
3247  pLast->m_pNext = pNext;
3248  else if( ppHead )
3249  *ppHead = pNext;
3250 }
3251 
3252 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3253 {
3254  // preliminary paragraph attributes are not allowed here, they could
3255  // be set here and then the pointers become invalid!
3256  OSL_ENSURE(m_aParaAttrs.empty(),
3257  "Danger: there are non-final paragraph attributes");
3258  m_aParaAttrs.clear();
3259 
3260  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3261  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3262 
3263  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3264  {
3265  *pSaveAttributes = *pHTMLAttributes;
3266 
3267  HTMLAttr *pAttr = *pSaveAttributes;
3268  while (pAttr)
3269  {
3270  pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3271  pAttr = pAttr->GetNext();
3272  }
3273 
3274  *pHTMLAttributes = nullptr;
3275  }
3276 }
3277 
3278 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3279  bool bMoveEndBack )
3280 {
3281  // preliminary paragraph attributes are not allowed here, they could
3282  // be set here and then the pointers become invalid!
3283  OSL_ENSURE(m_aParaAttrs.empty(),
3284  "Danger: there are non-final paragraph attributes");
3285  m_aParaAttrs.clear();
3286 
3287  const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode;
3288  SwNodeIndex nEndIdx( nSttIdx );
3289 
3290  // close all still open attributes and re-open them after the table
3291  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3292  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3293  bool bSetAttr = true;
3294  const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex();
3295  sal_Int32 nEndCnt = nSttCnt;
3296 
3297  if( bMoveEndBack )
3298  {
3299  sal_uLong nOldEnd = nEndIdx.GetIndex();
3300  sal_uLong nTmpIdx;
3301  if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3302  ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3303  {
3304  nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3305  }
3306  SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3307 
3308  // Don't set attributes, when the PaM was moved outside of the content area.
3309  bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3310 
3311  nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3312  }
3313  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3314  {
3315  HTMLAttr *pAttr = *pHTMLAttributes;
3316  *pSaveAttributes = nullptr;
3317  while( pAttr )
3318  {
3319  HTMLAttr *pNext = pAttr->GetNext();
3320  HTMLAttr *pPrev = pAttr->GetPrev();
3321 
3322  if( bSetAttr &&
3323  ( pAttr->GetSttParaIdx() < nEndIdx.GetIndex() ||
3324  (pAttr->GetSttPara() == nEndIdx &&
3325  pAttr->GetSttCnt() != nEndCnt) ) )
3326  {
3327  // The attribute must be set before the list. We need the
3328  // original and therefore we clone it, because pointer to the
3329  // attribute exist in the other contexts. The Next-List is lost
3330  // in doing so, but the Previous-List is preserved.
3331  HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt );
3332 
3333  if( pNext )
3334  pNext->InsertPrev( pSetAttr );
3335  else
3336  {
3337  if (pSetAttr->m_bInsAtStart)
3338  m_aSetAttrTab.push_front( pSetAttr );
3339  else
3340  m_aSetAttrTab.push_back( pSetAttr );
3341  }
3342  }
3343  else if( pPrev )
3344  {
3345  // If the attribute doesn't need to be set before the table, then
3346  // the previous attributes must still be set.
3347  if( pNext )
3348  pNext->InsertPrev( pPrev );
3349  else
3350  {
3351  if (pPrev->m_bInsAtStart)
3352  m_aSetAttrTab.push_front( pPrev );
3353  else
3354  m_aSetAttrTab.push_back( pPrev );
3355  }
3356  }
3357 
3358  // set the start of the attribute anew and break link
3359  pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab);
3360 
3361  if (*pSaveAttributes)
3362  {
3363  HTMLAttr *pSAttr = *pSaveAttributes;
3364  while( pSAttr->GetNext() )
3365  pSAttr = pSAttr->GetNext();
3366  pSAttr->InsertNext( pAttr );
3367  }
3368  else
3369  *pSaveAttributes = pAttr;
3370 
3371  pAttr = pNext;
3372  }
3373 
3374  *pHTMLAttributes = nullptr;
3375  }
3376 }
3377 
3378 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3379 {
3380  // preliminary paragraph attributes are not allowed here, they could
3381  // be set here and then the pointers become invalid!
3382  OSL_ENSURE(m_aParaAttrs.empty(),
3383  "Danger: there are non-final paragraph attributes");
3384  m_aParaAttrs.clear();
3385 
3386  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3387  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3388 
3389  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3390  {
3391  OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3392 
3393  *pHTMLAttributes = *pSaveAttributes;
3394 
3395  HTMLAttr *pAttr = *pHTMLAttributes;
3396  while (pAttr)
3397  {
3398  OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3399  "Previous attribute has still a header" );
3400  pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3401  pAttr = pAttr->GetNext();
3402  }
3403 
3404  *pSaveAttributes = nullptr;
3405  }
3406 }
3407 
3408 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3409 {
3410  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3411  if (bInsAtStart)
3412  m_aSetAttrTab.push_front( pTmp );
3413  else
3414  m_aSetAttrTab.push_back( pTmp );
3415 }
3416 
3417 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3418 {
3419  while( !rAttrs.empty() )
3420  {
3421  std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3422  InsertAttr( pAttr->GetItem(), false );
3423  rAttrs.pop_front();
3424  }
3425 }
3426 
3428 {
3429  OUString aId, aStyle, aLang, aDir;
3430  OUString aClass;
3431 
3432  const HTMLOptions& rHTMLOptions = GetOptions();
3433  for (size_t i = rHTMLOptions.size(); i; )
3434  {
3435  const HTMLOption& rOption = rHTMLOptions[--i];
3436  switch( rOption.GetToken() )
3437  {
3438  case HtmlOptionId::ID:
3439  aId = rOption.GetString();
3440  break;
3441  case HtmlOptionId::STYLE:
3442  aStyle = rOption.GetString();
3443  break;
3444  case HtmlOptionId::CLASS:
3445  aClass = rOption.GetString();
3446  break;
3447  case HtmlOptionId::LANG:
3448  aLang = rOption.GetString();
3449  break;
3450  case HtmlOptionId::DIR:
3451  aDir = rOption.GetString();
3452  break;
3453  default: break;
3454  }
3455  }
3456 
3457  // create a new context
3458  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3459 
3460  // parse styles
3461  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3462  {
3463  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3464  SvxCSS1PropertyInfo aPropInfo;
3465 
3466  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3467  {
3468  if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3469  !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3470  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3471  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3472  }
3473  }
3474 
3475  // save the context
3476  PushContext(xCntxt);
3477 }
3478 
3480  HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3481  HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3482  HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3483 {
3484  OUString aId, aStyle, aClass, aLang, aDir;
3485 
3486  const HTMLOptions& rHTMLOptions = GetOptions();
3487  for (size_t i = rHTMLOptions.size(); i; )
3488  {
3489  const HTMLOption& rOption = rHTMLOptions[--i];
3490  switch( rOption.GetToken() )
3491  {
3492  case HtmlOptionId::ID:
3493  aId = rOption.GetString();
3494  break;
3495  case HtmlOptionId::STYLE:
3496  aStyle = rOption.GetString();
3497  break;
3498  case HtmlOptionId::CLASS:
3499  aClass = rOption.GetString();
3500  break;
3501  case HtmlOptionId::LANG:
3502  aLang = rOption.GetString();
3503  break;
3504  case HtmlOptionId::DIR:
3505  aDir = rOption.GetString();
3506  break;
3507  default: break;
3508  }
3509  }
3510 
3511  // create a new context
3512  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3513 
3514  // parse styles
3515  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3516  {
3517  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3518  SvxCSS1PropertyInfo aPropInfo;
3519 
3520  aItemSet.Put( rItem );
3521  if( pItem2 )
3522  aItemSet.Put( *pItem2 );
3523  if( pItem3 )
3524  aItemSet.Put( *pItem3 );
3525 
3526  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3527  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3528 
3529  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3530  }
3531  else
3532  {
3533  InsertAttr( ppAttr ,rItem, xCntxt.get() );
3534  if( pItem2 )
3535  {
3536  OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3537  InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3538  }
3539  if( pItem3 )
3540  {
3541  OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3542  InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3543  }
3544  }
3545 
3546  // save the context
3547  PushContext(xCntxt);
3548 }
3549 
3551 {
3552  // fetch context
3553  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3554  if (xCntxt)
3555  {
3556  // and maybe end the attributes
3557  EndContext(xCntxt.get());
3558  }
3559 }
3560 
3562 {
3563  OUString aId, aStyle, aClass, aLang, aDir;
3564  sal_uInt16 nSize = 3;
3565 
3566  const HTMLOptions& rHTMLOptions = GetOptions();
3567  for (size_t i = rHTMLOptions.size(); i; )
3568  {
3569  const HTMLOption& rOption = rHTMLOptions[--i];
3570  switch( rOption.GetToken() )
3571  {
3572  case HtmlOptionId::SIZE:
3573  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
3574  break;
3575  case HtmlOptionId::ID:
3576  aId = rOption.GetString();
3577  break;
3578  case HtmlOptionId::STYLE:
3579  aStyle = rOption.GetString();
3580  break;
3581  case HtmlOptionId::CLASS:
3582  aClass = rOption.GetString();
3583  break;
3584  case HtmlOptionId::LANG:
3585  aLang = rOption.GetString();
3586  break;
3587  case HtmlOptionId::DIR:
3588  aDir = rOption.GetString();
3589  break;
3590  default: break;
3591  }
3592  }
3593 
3594  if( nSize < 1 )
3595  nSize = 1;
3596 
3597  if( nSize > 7 )
3598  nSize = 7;
3599 
3600  // create a new context
3601  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3602 
3603  // parse styles
3604  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3605  {
3606  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3607  SvxCSS1PropertyInfo aPropInfo;
3608 
3609  //CJK has different defaults
3610  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3611  aItemSet.Put( aFontHeight );
3612  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3613  aItemSet.Put( aFontHeightCJK );
3614  //Complex type can contain so many types of letters,
3615  //that it's not really worthy to bother, IMO.
3616  //Still, I have set a default.
3617  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3618  aItemSet.Put( aFontHeightCTL );
3619 
3620  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3621  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3622 
3623  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3624  }
3625  else
3626  {
3627  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3628  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3629  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3630  InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3631  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3632  InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3633  }
3634 
3635  // save the context
3636  PushContext(xCntxt);
3637 
3638  // save the font size
3639  m_aBaseFontStack.push_back( nSize );
3640 }
3641 
3643 {
3644  EndTag( HtmlTokenId::BASEFONT_ON );
3645 
3646  // avoid stack underflow in tables
3647  if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3648  m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3649 }
3650 
3652 {
3653  sal_uInt16 nBaseSize =
3656  : 3 );
3657  sal_uInt16 nFontSize =
3658  ( m_aFontStack.size() > m_nFontStMin
3659  ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3660  : nBaseSize );
3661 
3662  OUString aFace, aId, aStyle, aClass, aLang, aDir;
3663  Color aColor;
3664  sal_uLong nFontHeight = 0; // actual font height to set
3665  sal_uInt16 nSize = 0; // font height in Netscape notation (1-7)
3666  bool bColor = false;
3667 
3668  const HTMLOptions& rHTMLOptions = GetOptions();
3669  for (size_t i = rHTMLOptions.size(); i; )
3670  {
3671  const HTMLOption& rOption = rHTMLOptions[--i];
3672  switch( rOption.GetToken() )
3673  {
3674  case HtmlOptionId::SIZE:
3675  if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3676  {
3677  sal_Int32 nSSize;
3678  if( '+' == rOption.GetString()[0] ||
3679  '-' == rOption.GetString()[0] )
3680  nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3681  else
3682  nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3683 
3684  if( nSSize < 1 )
3685  nSSize = 1;
3686  else if( nSSize > 7 )
3687  nSSize = 7;
3688 
3689  nSize = static_cast<sal_uInt16>(nSSize);
3690  nFontHeight = m_aFontHeights[nSize-1];
3691  }
3692  break;
3693  case HtmlOptionId::COLOR:
3694  if( HtmlTokenId::FONT_ON==nToken )
3695  {
3696  rOption.GetColor( aColor );
3697  bColor = true;
3698  }
3699  break;
3700  case HtmlOptionId::FACE:
3701  if( HtmlTokenId::FONT_ON==nToken )
3702  aFace = rOption.GetString();
3703  break;
3704  case HtmlOptionId::ID:
3705  aId = rOption.GetString();
3706  break;
3707  case HtmlOptionId::STYLE:
3708  aStyle = rOption.GetString();
3709  break;
3710  case HtmlOptionId::CLASS:
3711  aClass = rOption.GetString();
3712  break;
3713  case HtmlOptionId::LANG:
3714  aLang = rOption.GetString();
3715  break;
3716  case HtmlOptionId::DIR:
3717  aDir = rOption.GetString();
3718  break;
3719  default: break;
3720  }
3721  }
3722 
3723  if( HtmlTokenId::FONT_ON != nToken )
3724  {
3725  // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3726 
3727  // In headings the current heading sets the font height
3728  // and not BASEFONT.
3729  const SwFormatColl *pColl = GetCurrFormatColl();
3730  sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3731  if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3732  nPoolId<=RES_POOLCOLL_HEADLINE6 )
3733  {
3734  // If the font height in the heading wasn't changed yet,
3735  // then take the one from the style.
3736  if( m_nFontStHeadStart==m_aFontStack.size() )
3737  nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3738  }
3739  else
3740  nPoolId = 0;
3741 
3742  if( HtmlTokenId::BIGPRINT_ON == nToken )
3743  nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3744  else
3745  nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3746 
3747  // If possible in headlines we fetch the new font height
3748  // from the style.
3749  if( nPoolId && nSize>=1 && nSize <=6 )
3750  nFontHeight =
3751  m_pCSS1Parser->GetTextCollFromPool(
3752  RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3753  else
3754  nFontHeight = m_aFontHeights[nSize-1];
3755  }
3756 
3757  OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3758 
3759  OUString aFontName, aStyleName;
3760  FontFamily eFamily = FAMILY_DONTKNOW; // family and pitch,
3761  FontPitch ePitch = PITCH_DONTKNOW; // if not found
3762  rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3763 
3764  if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3765  {
3766  const FontList *pFList = nullptr;
3767  SwDocShell *pDocSh = m_xDoc->GetDocShell();
3768  if( pDocSh )
3769  {
3770  const SvxFontListItem *pFListItem =
3771  static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3772  if( pFListItem )
3773  pFList = pFListItem->GetFontList();
3774  }
3775 
3776  bool bFound = false;
3777  sal_Int32 nStrPos = 0;
3778  while( nStrPos!= -1 )
3779  {
3780  OUString aFName = aFace.getToken( 0, ',', nStrPos );
3781  aFName = comphelper::string::strip(aFName, ' ');
3782  if( !aFName.isEmpty() )
3783  {
3784  if( !bFound && pFList )
3785  {
3786  sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3787  if( nullptr != hFont )
3788  {
3789  const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3790  if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3791  {
3792  bFound = true;
3793  if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3794  eEnc = RTL_TEXTENCODING_SYMBOL;
3795  }
3796  }
3797  }
3798  if( !aFontName.isEmpty() )
3799  aFontName += ";";
3800  aFontName += aFName;
3801  }
3802  }
3803  }
3804 
3805  // create a new context
3806  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3807 
3808  // parse styles
3809  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3810  {
3811  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3812  SvxCSS1PropertyInfo aPropInfo;
3813 
3814  if( nFontHeight )
3815  {
3816  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3817  aItemSet.Put( aFontHeight );
3818  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3819  aItemSet.Put( aFontHeightCJK );
3820  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3821  aItemSet.Put( aFontHeightCTL );
3822  }
3823  if( bColor )
3824  aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3825  if( !aFontName.isEmpty() )
3826  {
3827  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3828  aItemSet.Put( aFont );
3829  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3830  aItemSet.Put( aFontCJK );
3831  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3832  aItemSet.Put( aFontCTL );
3833  }
3834 
3835  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3836  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3837 
3838  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3839  }
3840  else
3841  {
3842  if( nFontHeight )
3843  {
3844  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3845  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3846  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3847  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3848  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3849  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3850  }
3851  if( bColor )
3852  InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3853  if( !aFontName.isEmpty() )
3854  {
3855  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3856  InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3857  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3858  InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3859  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3860  InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3861  }
3862  }
3863 
3864  // save the context
3865  PushContext(xCntxt);
3866 
3867  m_aFontStack.push_back( nSize );
3868 }
3869 
3871 {
3872  EndTag( nToken );
3873 
3874  // avoid stack underflow in tables
3875  if( m_aFontStack.size() > m_nFontStMin )
3876  m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3877 }
3878 
3880 {
3881  if( m_pPam->GetPoint()->nContent.GetIndex() )
3883  else
3884  AddParSpace();
3885 
3886  m_eParaAdjust = SvxAdjust::End;
3887  OUString aId, aStyle, aClass, aLang, aDir;
3888 
3889  const HTMLOptions& rHTMLOptions = GetOptions();
3890  for (size_t i = rHTMLOptions.size(); i; )
3891  {
3892  const HTMLOption& rOption = rHTMLOptions[--i];
3893  switch( rOption.GetToken() )
3894  {
3895  case HtmlOptionId::ID:
3896  aId = rOption.GetString();
3897  break;
3898  case HtmlOptionId::ALIGN:
3899  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3900  break;
3901  case HtmlOptionId::STYLE:
3902  aStyle = rOption.GetString();
3903  break;
3904  case HtmlOptionId::CLASS:
3905  aClass = rOption.GetString();
3906  break;
3907  case HtmlOptionId::LANG:
3908  aLang = rOption.GetString();
3909  break;
3910  case HtmlOptionId::DIR:
3911  aDir = rOption.GetString();
3912  break;
3913  default: break;
3914  }
3915  }
3916 
3917  // create a new context
3918  std::unique_ptr<HTMLAttrContext> xCntxt(
3919  !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3920  RES_POOLCOLL_TEXT, aClass )
3921  : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3922 
3923  // parse styles (Don't consider class. This is only possible as long as none of
3924  // the CSS1 properties of the class must be formatted hard!!!)
3925  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
3926  {
3927  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3928  SvxCSS1PropertyInfo aPropInfo;
3929 
3930  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
3931  {
3932  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
3933  "Class is not considered" );
3934  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3935  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
3936  }
3937  }
3938 
3939  if( SvxAdjust::End != m_eParaAdjust )
3940  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
3941 
3942  // and push on stack
3943  PushContext( xCntxt );
3944 
3945  // set the current style or its attributes
3946  SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
3947 
3948  // progress bar
3949  ShowStatline();
3950 
3951  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now an open paragraph element will be lost." );
3952  m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
3953 }
3954 
3955 void SwHTMLParser::EndPara( bool bReal )
3956 {
3957  if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
3958  {
3959 #if OSL_DEBUG_LEVEL > 0
3960  const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule();
3961  OSL_ENSURE( pNumRule, "Where is the NumRule" );
3962 #endif
3963  }
3964 
3965  // Netscape skips empty paragraphs, we do the same.
3966  if( bReal )
3967  {
3968  if( m_pPam->GetPoint()->nContent.GetIndex() )
3970  else
3971  AddParSpace();
3972  }
3973 
3974  // If a DD or DT was open, it's an implied definition list,
3975  // which must be closed now.
3976  if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
3978  {
3979  m_nDefListDeep--;
3980  }
3981 
3982  // Pop the context of the stack. It can also be from an
3983  // implied opened definition list.
3984  std::unique_ptr<HTMLAttrContext> xCntxt(
3985  PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
3986 
3987  // close attribute
3988  if (xCntxt)
3989  {
3990  EndContext(xCntxt.get());
3991  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
3992  xCntxt.reset();
3993  }
3994 
3995  // reset the existing style
3996  if( bReal )
3997  SetTextCollAttrs();
3998 
3999  m_nOpenParaToken = HtmlTokenId::NONE;
4000 }
4001 
4003 {
4004  m_eParaAdjust = SvxAdjust::End;
4005 
4006  OUString aId, aStyle, aClass, aLang, aDir;
4007 
4008  const HTMLOptions& rHTMLOptions = GetOptions();
4009  for (size_t i = rHTMLOptions.size(); i; )
4010  {
4011  const HTMLOption& rOption = rHTMLOptions[--i];
4012  switch( rOption.GetToken() )
4013  {
4014  case HtmlOptionId::ID:
4015  aId = rOption.GetString();
4016  break;
4017  case HtmlOptionId::ALIGN:
4018  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4019  break;
4020  case HtmlOptionId::STYLE:
4021  aStyle = rOption.GetString();
4022  break;
4023  case HtmlOptionId::CLASS:
4024  aClass = rOption.GetString();
4025  break;
4026  case HtmlOptionId::LANG:
4027  aLang = rOption.GetString();
4028  break;
4029  case HtmlOptionId::DIR:
4030  aDir = rOption.GetString();
4031  break;
4032  default: break;
4033  }
4034  }
4035 
4036  // open a new paragraph
4037  if( m_pPam->GetPoint()->nContent.GetIndex() )
4039  else
4040  AddParSpace();
4041 
4042  // search for the matching style
4043  sal_uInt16 nTextColl;
4044  switch( nToken )
4045  {
4046  case HtmlTokenId::HEAD1_ON: nTextColl = RES_POOLCOLL_HEADLINE1; break;
4047  case HtmlTokenId::HEAD2_ON: nTextColl = RES_POOLCOLL_HEADLINE2; break;
4048  case HtmlTokenId::HEAD3_ON: nTextColl = RES_POOLCOLL_HEADLINE3; break;
4049  case HtmlTokenId::HEAD4_ON: nTextColl = RES_POOLCOLL_HEADLINE4; break;
4050  case HtmlTokenId::HEAD5_ON: nTextColl = RES_POOLCOLL_HEADLINE5; break;
4051  case HtmlTokenId::HEAD6_ON: nTextColl = RES_POOLCOLL_HEADLINE6; break;
4052  default: nTextColl = RES_POOLCOLL_STANDARD; break;
4053  }
4054 
4055  // create the context
4056  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4057 
4058  // parse styles (regarding class see also NewPara)
4059  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4060  {
4061  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4062  SvxCSS1PropertyInfo aPropInfo;
4063 
4064  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4065  {
4066  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4067  "Class is not considered" );
4068  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4069  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4070  }
4071  }
4072 
4073  if( SvxAdjust::End != m_eParaAdjust )
4074  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4075 
4076  // and push on stack
4077  PushContext(xCntxt);
4078 
4079  // set the current style or its attributes
4080  SetTextCollAttrs(m_aContexts.back().get());
4081 
4083 
4084  // progress bar
4085  ShowStatline();
4086 }
4087 
4089 {
4090  // open a new paragraph
4091  if( m_pPam->GetPoint()->nContent.GetIndex() )
4093  else
4094  AddParSpace();
4095 
4096  // search context matching the token and fetch it from stack
4097  std::unique_ptr<HTMLAttrContext> xCntxt;
4098  auto nPos = m_aContexts.size();
4099  while( !xCntxt && nPos>m_nContextStMin )
4100  {
4101  switch( m_aContexts[--nPos]->GetToken() )
4102  {
4103  case HtmlTokenId::HEAD1_ON:
4104  case HtmlTokenId::HEAD2_ON:
4105  case HtmlTokenId::HEAD3_ON:
4106  case HtmlTokenId::HEAD4_ON:
4107  case HtmlTokenId::HEAD5_ON:
4108  case HtmlTokenId::HEAD6_ON:
4109  xCntxt = std::move(m_aContexts[nPos]);
4110  m_aContexts.erase( m_aContexts.begin() + nPos );
4111  break;
4112  default: break;
4113  }
4114  }
4115 
4116  // and now end attributes
4117  if (xCntxt)
4118  {
4119  EndContext(xCntxt.get());
4120  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4121  xCntxt.reset();
4122  }
4123 
4124  // reset existing style
4125  SetTextCollAttrs();
4126 
4128 }
4129 
4130 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4131 {
4132  OUString aId, aStyle, aClass, aLang, aDir;
4133 
4134  const HTMLOptions& rHTMLOptions = GetOptions();
4135  for (size_t i = rHTMLOptions.size(); i; )
4136  {
4137  const HTMLOption& rOption = rHTMLOptions[--i];
4138  switch( rOption.GetToken() )
4139  {
4140  case HtmlOptionId::ID:
4141  aId = rOption.GetString();
4142  break;
4143  case HtmlOptionId::STYLE:
4144  aStyle = rOption.GetString();
4145  break;
4146  case HtmlOptionId::CLASS:
4147  aClass = rOption.GetString();
4148  break;
4149  case HtmlOptionId::LANG:
4150  aLang = rOption.GetString();
4151  break;
4152  case HtmlOptionId::DIR:
4153  aDir = rOption.GetString();
4154  break;
4155  default: break;
4156  }
4157  }
4158 
4159  // open a new paragraph
4160  SwHTMLAppendMode eMode = AM_NORMAL;
4161  switch( nToken )
4162  {
4163  case HtmlTokenId::LISTING_ON:
4164  case HtmlTokenId::XMP_ON:
4165  // These both tags will be mapped to the PRE style. For the case that a
4166  // a CLASS exists we will delete it so that we don't get the CLASS of
4167  // the PRE style.
4168  aClass.clear();
4169  [[fallthrough]];
4170  case HtmlTokenId::BLOCKQUOTE_ON:
4171  case HtmlTokenId::BLOCKQUOTE30_ON:
4172  case HtmlTokenId::PREFORMTXT_ON:
4173  eMode = AM_SPACE;
4174  break;
4175  case HtmlTokenId::ADDRESS_ON:
4176  eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4177  break;
4178  case HtmlTokenId::DT_ON:
4179  case HtmlTokenId::DD_ON:
4180  eMode = AM_SOFTNOSPACE;
4181  break;
4182  default:
4183  OSL_ENSURE( false, "unknown style" );
4184  break;
4185  }
4186  if( m_pPam->GetPoint()->nContent.GetIndex() )
4187  AppendTextNode( eMode );
4188  else if( AM_SPACE==eMode )
4189  AddParSpace();
4190 
4191  // ... and save in a context
4192  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4193 
4194  // parse styles (regarding class see also NewPara)
4195  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4196  {
4197  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4198  SvxCSS1PropertyInfo aPropInfo;
4199 
4200  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4201  {
4202  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4203  "Class is not considered" );
4204  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4205  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4206  }
4207  }
4208 
4209  PushContext(xCntxt);
4210 
4211  // set the new style
4212  SetTextCollAttrs(m_aContexts.back().get());
4213 
4214  // update progress bar
4215  ShowStatline();
4216 }
4217 
4219 {
4220  SwHTMLAppendMode eMode = AM_NORMAL;
4221  switch( getOnToken(nToken) )
4222  {
4223  case HtmlTokenId::BLOCKQUOTE_ON:
4224  case HtmlTokenId::BLOCKQUOTE30_ON:
4225  case HtmlTokenId::PREFORMTXT_ON:
4226  case HtmlTokenId::LISTING_ON:
4227  case HtmlTokenId::XMP_ON:
4228  eMode = AM_SPACE;
4229  break;
4230  case HtmlTokenId::ADDRESS_ON:
4231  case HtmlTokenId::DT_ON:
4232  case HtmlTokenId::DD_ON:
4233  eMode = AM_SOFTNOSPACE;
4234  break;
4235  default:
4236  OSL_ENSURE( false, "unknown style" );
4237  break;
4238  }
4239  if( m_pPam->GetPoint()->nContent.GetIndex() )
4240  AppendTextNode( eMode );
4241  else if( AM_SPACE==eMode )
4242  AddParSpace();
4243 
4244  // pop current context of stack
4245  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4246 
4247  // and now end attributes
4248  if (xCntxt)
4249  {
4250  EndContext(xCntxt.get());
4251  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4252  xCntxt.reset();
4253  }
4254 
4255  // reset existing style
4256  SetTextCollAttrs();
4257 }
4258 
4260 {
4261  OUString aId, aStyle, aClass, aLang, aDir;
4262 
4263  const HTMLOptions& rHTMLOptions = GetOptions();
4264  for (size_t i = rHTMLOptions.size(); i; )
4265  {
4266  const HTMLOption& rOption = rHTMLOptions[--i];
4267  switch( rOption.GetToken() )
4268  {
4269  case HtmlOptionId::ID:
4270  aId = rOption.GetString();
4271  break;
4272  case HtmlOptionId::STYLE:
4273  aStyle = rOption.GetString();
4274  break;
4275  case HtmlOptionId::CLASS:
4276  aClass = rOption.GetString();
4277  break;
4278  case HtmlOptionId::LANG:
4279  aLang = rOption.GetString();
4280  break;
4281  case HtmlOptionId::DIR:
4282  aDir = rOption.GetString();
4283  break;
4284  default: break;
4285  }
4286  }
4287 
4288  // open a new paragraph
4289  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4290  if( m_pPam->GetPoint()->nContent.GetIndex() )
4291  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4292  else if( bSpace )
4293  AddParSpace();
4294 
4295  // one level more
4296  m_nDefListDeep++;
4297 
4298  bool bInDD = false, bNotInDD = false;
4299  auto nPos = m_aContexts.size();
4300  while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4301  {
4302  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4303  switch( nCntxtToken )
4304  {
4305  case HtmlTokenId::DEFLIST_ON:
4306  case HtmlTokenId::DIRLIST_ON:
4307  case HtmlTokenId::MENULIST_ON:
4308  case HtmlTokenId::ORDERLIST_ON:
4309  case HtmlTokenId::UNORDERLIST_ON:
4310  bNotInDD = true;
4311  break;
4312  case HtmlTokenId::DD_ON:
4313  bInDD = true;
4314  break;
4315  default: break;
4316  }
4317  }
4318 
4319  // ... and save in a context
4320  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4321 
4322  // in it save also the margins
4323  sal_uInt16 nLeft=0, nRight=0;
4324  short nIndent=0;
4325  GetMarginsFromContext( nLeft, nRight, nIndent );
4326 
4327  // The indentation, which already results from a DL, correlates with a DT
4328  // on the current level and this correlates to a DD from the previous level.
4329  // For a level >=2 we must add DD distance.
4330  if( !bInDD && m_nDefListDeep > 1 )
4331  {
4332 
4333  // and the one of the DT-style of the current level
4334  SvxLRSpaceItem rLRSpace =
4335  m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4336  ->GetLRSpace();
4337  nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft());
4338  }
4339 
4340  xCntxt->SetMargins( nLeft, nRight, nIndent );
4341 
4342  // parse styles
4343  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4344  {
4345  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4346  SvxCSS1PropertyInfo aPropInfo;
4347 
4348  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4349  {
4350  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4351  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4352  }
4353  }
4354 
4355  PushContext(xCntxt);
4356 
4357  // set the attributes of the new style
4358  if( m_nDefListDeep > 1 )
4359  SetTextCollAttrs(m_aContexts.back().get());
4360 }
4361 
4363 {
4364  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4365  if( m_pPam->GetPoint()->nContent.GetIndex() )
4366  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4367  else if( bSpace )
4368  AddParSpace();
4369 
4370  // one level less
4371  if( m_nDefListDeep > 0 )
4372  m_nDefListDeep--;
4373 
4374  // pop current context of stack
4375  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4376 
4377  // and now end attributes
4378  if (xCntxt)
4379  {
4380  EndContext(xCntxt.get());
4381  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4382  xCntxt.reset();
4383  }
4384 
4385  // and set style
4386  SetTextCollAttrs();
4387 }
4388 
4390 {
4391  // determine if the DD/DT exist in a DL
4392  bool bInDefList = false, bNotInDefList = false;
4393  auto nPos = m_aContexts.size();
4394  while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4395  {
4396  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4397  switch( nCntxtToken )
4398  {
4399  case HtmlTokenId::DEFLIST_ON:
4400  bInDefList = true;
4401  break;
4402  case HtmlTokenId::DIRLIST_ON:
4403  case HtmlTokenId::MENULIST_ON:
4404  case HtmlTokenId::ORDERLIST_ON:
4405  case HtmlTokenId::UNORDERLIST_ON:
4406  bNotInDefList = true;
4407  break;
4408  default: break;
4409  }
4410  }
4411 
4412  // if not, then implicitly open a new DL
4413  if( !bInDefList )
4414  {
4415  m_nDefListDeep++;
4416  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4417  "Now an open paragraph element will be lost." );
4418  m_nOpenParaToken = nToken;
4419  }
4420 
4421  NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4422  : RES_POOLCOLL_HTML_DT) );
4423 }
4424 
4426 {
4427  // open a new paragraph
4428  if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() )
4430 
4431  // search context matching the token and fetch it from stack
4432  nToken = getOnToken(nToken);
4433  std::unique_ptr<HTMLAttrContext> xCntxt;
4434  auto nPos = m_aContexts.size();
4435  while( !xCntxt && nPos>m_nContextStMin )
4436  {
4437  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4438  switch( nCntxtToken )
4439  {
4440  case HtmlTokenId::DD_ON:
4441  case HtmlTokenId::DT_ON:
4442  if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken )
4443  {
4444  xCntxt = std::move(m_aContexts[nPos]);
4445  m_aContexts.erase( m_aContexts.begin() + nPos );
4446  }
4447  break;
4448  case HtmlTokenId::DEFLIST_ON:
4449  // don't look at DD/DT outside the current DefList
4450  case HtmlTokenId::DIRLIST_ON:
4451  case HtmlTokenId::MENULIST_ON:
4452  case HtmlTokenId::ORDERLIST_ON:
4453  case HtmlTokenId::UNORDERLIST_ON:
4454  // and also not outside another list
4456  break;
4457  default: break;
4458  }
4459  }
4460 
4461  // and now end attributes
4462  if (xCntxt)
4463  {
4464  EndContext(xCntxt.get());
4465  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4466  }
4467 }
4468 
4478 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4479  bool bSurroundOnly ) const
4480 {
4481  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
4482 
4483  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
4484 
4485  bool bFound = false;
4486  for ( size_t i=0; i<rFrameFormatTable.size(); i++ )
4487  {
4488  const SwFrameFormat *const pFormat = rFrameFormatTable[i];
4489  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4490  // A frame was found, when
4491  // - it is paragraph-bound, and
4492  // - is anchored in current paragraph, and
4493  // - every paragraph-bound frame counts, or
4494  // - (only frames without wrapping count and) the frame doesn't have
4495  // a wrapping
4496  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
4497  if (pAPos &&
4498  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4499  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4500  pAPos->nNode == rNodeIdx )
4501  {
4502  if( !(bNoSurroundOnly || bSurroundOnly) )
4503  {
4504  bFound = true;
4505  break;
4506  }
4507  else
4508  {
4509  // When looking for frames with wrapping, also disregard
4510  // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4511  // and you don't want to evade those when positioning.
4512  css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4513  if( bNoSurroundOnly )
4514  {
4515  if( css::text::WrapTextMode_NONE==eSurround )
4516  {
4517  bFound = true;
4518  break;
4519  }
4520  }
4521  if( bSurroundOnly )
4522  {
4523  if( css::text::WrapTextMode_NONE==eSurround )
4524  {
4525  bFound = false;
4526  break;
4527  }
4528  else if( css::text::WrapTextMode_THROUGH!=eSurround )
4529  {
4530  bFound = true;
4531  // Continue searching: It's possible that some without
4532  // wrapping will follow...
4533  }
4534  }
4535  }
4536  }
4537  }
4538 
4539  return bFound;
4540 }
4541 
4542 // the special methods for inserting of objects
4543 
4545 {
4546  const SwContentNode* pCNd = m_pPam->GetContentNode();
4547  return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4548 }
4549 
4551 {
4552  SwTextFormatColl *pCollToSet = nullptr; // the style to set
4553  SfxItemSet *pItemSet = nullptr; // set of hard attributes
4554  sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4555  const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4556  sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4557 
4558  bool bInPRE=false; // some context info
4559 
4560  sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and
4561  short nFirstLineIndent = 0; // indentations
4562 
4563  for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i )
4564  {
4565  const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4566 
4567  sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4568  if( nColl )
4569  {
4570  // There is a style to set. Then at first we must decide,
4571  // if the style can be set.
4572  bool bSetThis = true;
4573  switch( nColl )
4574  {
4575  case RES_POOLCOLL_HTML_PRE:
4576  bInPRE = true;
4577  break;
4578  case RES_POOLCOLL_TEXT:
4579  // <TD><P CLASS=xxx> must become TD.xxx
4580  if( nDfltColl==RES_POOLCOLL_TABLE ||
4581  nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4582  nColl = nDfltColl;
4583  break;
4584  case RES_POOLCOLL_HTML_HR:
4585  // also <HR> in <PRE> set as style, otherwise it can't
4586  // be exported anymore
4587  break;
4588  default:
4589  if( bInPRE )
4590  bSetThis = false;
4591  break;
4592  }
4593 
4594  SwTextFormatColl *pNewColl =
4595  m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4596 
4597  if( bSetThis )
4598  {
4599  // If now a different style should be set as previously, the
4600  // previous style must be replaced by hard attribution.
4601 
4602  if( pCollToSet )
4603  {
4604  // insert the attributes hard, which previous style sets
4605  if( !pItemSet )
4606  pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4607  else
4608  {
4609  const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4610  SfxItemSet aItemSet( *rCollSet.GetPool(),
4611  rCollSet.GetRanges() );
4612  aItemSet.Set( rCollSet );
4613  pItemSet->Put( aItemSet );
4614  }
4615  // but remove the attributes, which the current style sets,
4616  // because otherwise they will be overwritten later
4617  pItemSet->Differentiate( pNewColl->GetAttrSet() );
4618  }
4619 
4620  pCollToSet = pNewColl;
4621  }
4622  else
4623  {
4624  // hard attribution
4625  if( !pItemSet )
4626  pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4627  else
4628  {
4629  const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4630  SfxItemSet aItemSet( *rCollSet.GetPool(),
4631  rCollSet.GetRanges() );
4632  aItemSet.Set( rCollSet );
4633  pItemSet->Put( aItemSet );
4634  }
4635  }
4636  }
4637  else
4638  {
4639  // Maybe a default style exists?
4640  nColl = pCntxt->GetDfltTextFormatColl();
4641  if( nColl )
4642  nDfltColl = nColl;
4643  }
4644 
4645  // if applicable fetch new paragraph indents
4646  if( pCntxt->IsLRSpaceChanged() )
4647  {
4648  sal_uInt16 nLeft=0, nRight=0;
4649 
4650  pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4651  nLeftMargin = nLeft;
4652  nRightMargin = nRight;
4653  }
4654  }
4655 
4656  // If in current context a new style should be set,
4657  // its paragraph margins must be inserted in the context.
4658  if( pContext && nTopColl )
4659  {
4660  // <TD><P CLASS=xxx> must become TD.xxx
4661  if( nTopColl==RES_POOLCOLL_TEXT &&
4662  (nDfltColl==RES_POOLCOLL_TABLE ||
4663  nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4664  nTopColl = nDfltColl;
4665 
4666  const SwTextFormatColl *pTopColl =
4667  m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4668  const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4669  const SfxPoolItem *pItem;
4670  if( SfxItemState::SET == rItemSet.GetItemState(RES_LR_SPACE,true, &pItem) )
4671  {
4672  const SvxLRSpaceItem *pLRItem =
4673  static_cast<const SvxLRSpaceItem *>(pItem);
4674 
4675  sal_Int32 nLeft = pLRItem->GetTextLeft();
4676  sal_Int32 nRight = pLRItem->GetRight();
4677  nFirstLineIndent = pLRItem->GetTextFirstLineOfst();
4678 
4679  // In Definition lists the margins also contain the margins from the previous levels
4680  if( RES_POOLCOLL_HTML_DD == nTopColl )
4681  {
4682  const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser
4683  ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())
4684  ->GetLRSpace();
4685  nLeft -= rDTLRSpace.GetTextLeft();
4686  nRight -= rDTLRSpace.GetRight();
4687  }
4688  else if( RES_POOLCOLL_HTML_DT == nTopColl )
4689  {
4690  nLeft = 0;
4691  nRight = 0;
4692  }
4693 
4694  // the paragraph margins add up
4695  nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4696  nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4697 
4698  pContext->SetMargins( nLeftMargin, nRightMargin,
4699  nFirstLineIndent );
4700  }
4701  if( SfxItemState::SET == rItemSet.GetItemState(RES_UL_SPACE,true, &pItem) )
4702  {
4703  const SvxULSpaceItem *pULItem =
4704  static_cast<const SvxULSpaceItem *>(pItem);
4705  pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4706  }
4707  }
4708 
4709  // If no style is set in the context use the text body.
4710  if( !pCollToSet )
4711  {
4712  pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4713  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4714  if( !nLeftMargin )
4715  nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft());
4716  if( !nRightMargin )
4717  nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight());
4718  if( !nFirstLineIndent )
4719  nFirstLineIndent = rLRItem.GetTextFirstLineOfst();
4720  }
4721 
4722  // remove previous hard attribution of paragraph
4723  for( auto pParaAttr : m_aParaAttrs )
4724  pParaAttr->Invalidate();
4725  m_aParaAttrs.clear();
4726 
4727  // set the style
4728  m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4729 
4730  // if applicable correct the paragraph indent
4731  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4732  bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() ||
4733  nFirstLineIndent != rLRItem.GetTextFirstLineOfst() ||
4734  nRightMargin != rLRItem.GetRight();
4735 
4736  if( bSetLRSpace )
4737  {
4738  SvxLRSpaceItem aLRItem( rLRItem );
4739  aLRItem.SetTextLeft( nLeftMargin );
4740  aLRItem.SetRight( nRightMargin );
4741  aLRItem.SetTextFirstLineOfst( nFirstLineIndent );
4742  if( pItemSet )
4743  pItemSet->Put( aLRItem );
4744  else
4745  {
4746  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4747  m_xAttrTab->pLRSpace->SetLikePara();
4748  m_aParaAttrs.push_back( m_xAttrTab->pLRSpace );
4749  EndAttr( m_xAttrTab->pLRSpace, false );
4750  }
4751  }
4752 
4753  // and now set the attributes
4754  if( pItemSet )
4755  {
4756  InsertParaAttrs( *pItemSet );
4757  delete pItemSet;
4758  }
4759 }
4760 
4762 {
4763  OUString aId, aStyle, aLang, aDir;
4764  OUString aClass;
4765 
4766  const HTMLOptions& rHTMLOptions = GetOptions();
4767  for (size_t i = rHTMLOptions.size(); i; )
4768  {
4769  const HTMLOption& rOption = rHTMLOptions[--i];
4770  switch( rOption.GetToken() )
4771  {
4772  case HtmlOptionId::ID:
4773  aId = rOption.GetString();
4774  break;
4775  case HtmlOptionId::STYLE:
4776  aStyle = rOption.GetString();
4777  break;
4778  case HtmlOptionId::CLASS:
4779  aClass = rOption.GetString();
4780  break;
4781  case HtmlOptionId::LANG:
4782  aLang = rOption.GetString();
4783  break;
4784  case HtmlOptionId::DIR:
4785  aDir = rOption.GetString();
4786  break;
4787  default: break;
4788  }
4789  }
4790 
4791  // create a new context
4792  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4793 
4794  // set the style and save it in the context
4795  SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4796  OSL_ENSURE( pCFormat, "No character format found for token" );
4797 
4798  // parse styles (regarding class see also NewPara)
4799  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4800  {
4801  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4802  SvxCSS1PropertyInfo aPropInfo;
4803 
4804  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4805  {
4806  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4807  "Class is not considered" );
4808  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4809  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4810  }
4811  }
4812 
4813  // Character formats are stored in their own stack and can never be inserted
4814  // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4815  if( pCFormat )
4816  InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4817 
4818  // save the context
4819  PushContext(xCntxt);
4820 }
4821 
4823 {
4824  // and if applicable change it via the options
4825  sal_Int16 eVertOri = text::VertOrientation::TOP;
4826  sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4827  Size aSize( 0, 0);
4828  long nSize = 0;
4829  bool bPrcWidth = false;
4830  bool bPrcHeight = false;
4831  sal_uInt16 nType = HTML_SPTYPE_HORI;
4832 
4833  const HTMLOptions& rHTMLOptions = GetOptions();
4834  for (size_t i = rHTMLOptions.size(); i; )
4835  {
4836  const HTMLOption& rOption = rHTMLOptions[--i];
4837  switch( rOption.GetToken() )
4838  {
4839  case HtmlOptionId::TYPE:
4840  rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4841  break;
4842  case HtmlOptionId::ALIGN:
4843  eVertOri =
4844  rOption.GetEnum( aHTMLImgVAlignTable,
4845  eVertOri );
4846  eHoriOri =
4847  rOption.GetEnum( aHTMLImgHAlignTable,
4848  eHoriOri );
4849  break;
4850  case HtmlOptionId::WIDTH:
4851  // First only save as pixel value!
4852  bPrcWidth = (rOption.GetString().indexOf('%') != -1);
4853  aSize.setWidth( static_cast<long>(rOption.GetNumber()) );
4854  break;
4855  case HtmlOptionId::HEIGHT:
4856  // First only save as pixel value!
4857  bPrcHeight = (rOption.GetString().indexOf('%') != -1);
4858  aSize.setHeight( static_cast<long>(rOption.GetNumber()) );
4859  break;
4860  case HtmlOptionId::SIZE:
4861  // First only save as pixel value!
4862  nSize = rOption.GetNumber();
4863  break;
4864  default: break;
4865  }
4866  }
4867 
4868  switch( nType )
4869  {
4870  case HTML_SPTYPE_BLOCK:
4871  {
4872  // create an empty text frame
4873 
4874  // fetch the ItemSet
4875  SfxItemSet aFrameSet( m_xDoc->GetAttrPool(),
4877  if( !IsNewDoc() )
4878  Reader::ResetFrameFormatAttrs( aFrameSet );
4879 
4880  // set the anchor and the adjustment
4881  SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4882 
4883  // and the size of the frame
4884  Size aDfltSz( MINFLY, MINFLY );
4885  Size aSpace( 0, 0 );
4886  SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4887  m_pCSS1Parser->GetWhichMap() );
4888  SvxCSS1PropertyInfo aDummyPropInfo;
4889 
4890  SetFixSize( aSize, aDfltSz, bPrcWidth, bPrcHeight,
4891  aDummyPropInfo, aFrameSet );
4892  SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4893 
4894  // protect the content
4895  SvxProtectItem aProtectItem( RES_PROTECT) ;
4896  aProtectItem.SetContentProtect( true );
4897  aFrameSet.Put( aProtectItem );
4898 
4899  // create the frame
4900  RndStdIds eAnchorId =
4901  aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4902  SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4903  m_pPam->GetPoint(), &aFrameSet );
4904  // Possibly create frames and register auto-bound frames.
4905  RegisterFlyFrame( pFlyFormat );
4906  }
4907  break;
4908  case HTML_SPTYPE_VERT:
4909  if( nSize > 0 )
4910  {
4912  {
4914  ->PixelToLogic( Size(0,nSize),
4915  MapMode(MapUnit::MapTwip) ).Height();
4916  }
4917 
4918  // set a paragraph margin
4919  SwTextNode *pTextNode = nullptr;
4920  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4921  {
4922  // if possible change the bottom paragraph margin
4923  // of previous node
4924 
4925  SetAttr(); // set still open paragraph attributes
4926 
4927  pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1]
4928  ->GetTextNode();
4929 
4930  // If the previous paragraph isn't a text node, then now an
4931  // empty paragraph is created, which already generates a single
4932  // line of spacing.
4933  if( !pTextNode )
4934  nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
4935  }
4936 
4937  if( pTextNode )
4938  {
4939  SvxULSpaceItem aULSpace( static_cast<const SvxULSpaceItem&>(pTextNode
4941  aULSpace.SetLower( aULSpace.GetLower() + static_cast<sal_uInt16>(nSize) );
4942  pTextNode->SetAttr( aULSpace );
4943  }
4944  else
4945  {
4946  NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, static_cast<sal_uInt16>(nSize), RES_UL_SPACE));
4947  EndAttr( m_xAttrTab->pULSpace, false );
4948 
4949  AppendTextNode(); // Don't change spacing!
4950  }
4951  }
4952  break;
4953  case HTML_SPTYPE_HORI:
4954  if( nSize > 0 )
4955  {
4956  // If the paragraph is still empty, set first line
4957  // indentation, otherwise apply letter spacing over a space.
4958 
4960  {
4962  ->PixelToLogic( Size(nSize,0),
4963  MapMode(MapUnit::MapTwip) ).Width();
4964  }
4965 
4966  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4967  {
4968  sal_uInt16 nLeft=0, nRight=0;
4969  short nIndent = 0;
4970 
4971  GetMarginsFromContextWithNumBul( nLeft, nRight, nIndent );
4972  nIndent = nIndent + static_cast<short>(nSize);
4973 
4974  SvxLRSpaceItem aLRItem( RES_LR_SPACE );
4975  aLRItem.SetTextLeft( nLeft );
4976  aLRItem.SetRight( nRight );
4977  aLRItem.SetTextFirstLineOfst( nIndent );
4978 
4979  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4980  EndAttr( m_xAttrTab->pLRSpace, false );
4981  }
4982  else
4983  {
4984  NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
4985  OUString aTmp( ' ' );
4986  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aTmp );
4987  EndAttr( m_xAttrTab->pKerning );
4988  }
4989  }
4990  }
4991 }
4992 
4993 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
4994 {
4995  if( nPixel && Application::GetDefaultDevice() )
4996  {
4998  Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width();
4999  return static_cast<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16)));
5000  }
5001  else
5002  return nPixel;
5003 }
5004 
5006 {
5008  if( nWidth )
5009  return nWidth;
5010 
5011  if( !m_aHTMLPageSize.Width() )
5012  {
5013  const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5014 
5015  const SwFormatFrameSize& rSz = rPgFormat.GetFrameSize();
5016  const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5017  const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5018  const SwFormatCol& rCol = rPgFormat.GetCol();
5019 
5020  m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5021  m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5022 
5023  if( 1 < rCol.GetNumCols() )
5025  }
5026 
5027  return m_aHTMLPageSize.Width();
5028 }
5029 
5031 {
5032  OUString aId;
5033  const HTMLOptions& rHTMLOptions = GetOptions();
5034  for (size_t i = rHTMLOptions.size(); i; )
5035  {
5036  const HTMLOption& rOption = rHTMLOptions[--i];
5037  if( HtmlOptionId::ID==rOption.GetToken() )
5038  {
5039  aId = rOption.GetString();
5040  break;
5041  }
5042  }
5043 
5044  if( !aId.isEmpty() )
5045  InsertBookmark( aId );
5046 }
5047 
5049 {
5050  // <BR CLEAR=xxx> is handled as:
5051  // 1.) Only regard the paragraph-bound frames anchored in current paragraph.
5052  // 2.) For left-justified aligned frames, CLEAR=LEFT or ALL, and for right-
5053  // justified aligned frames, CLEAR=RIGHT or ALL, the wrap-through is
5054  // changed as following:
5055  // 3.) If the paragraph contains no text, then the frames don't get a wrapping
5056  // 4.) otherwise a left aligned frame gets a right "only anchor" wrapping
5057  // and a right aligned frame gets a left "only anchor" wrapping.
5058  // 5.) if in a non-empty paragraph the wrapping of a frame is changed,
5059  // then a new paragraph is opened
5060  // 6.) If no wrappings of frames are changed, a hard line break is inserted.
5061 
5062  OUString aId, aStyle, aClass; // the id of bookmark
5063  bool bClearLeft = false, bClearRight = false;
5064  bool bCleared = false; // Was a CLEAR executed?
5065 
5066  // then we fetch the options
5067  const HTMLOptions& rHTMLOptions = GetOptions();
5068  for (size_t i = rHTMLOptions.size(); i; )
5069  {
5070  const HTMLOption& rOption = rHTMLOptions[--i];
5071  switch( rOption.GetToken() )
5072  {
5073  case HtmlOptionId::CLEAR:
5074  {
5075  const OUString &rClear = rOption.GetString();
5076  if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5077  {
5078  bClearLeft = true;
5079  bClearRight = true;
5080  }
5081  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5082  bClearLeft = true;
5083  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5084  bClearRight = true;
5085  }
5086  break;
5087  case HtmlOptionId::ID:
5088  aId = rOption.GetString();
5089  break;
5090  case HtmlOptionId::STYLE:
5091  aStyle = rOption.GetString();
5092  break;
5093  case HtmlOptionId::CLASS:
5094  aClass = rOption.GetString();
5095  break;
5096  default: break;
5097  }
5098  }
5099 
5100  // CLEAR is only supported for the current paragraph
5101  if( bClearLeft || bClearRight )
5102  {
5103  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
5104  SwTextNode* pTextNd = rNodeIdx.GetNode().GetTextNode();
5105  if( pTextNd )
5106  {
5107  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
5108 
5109  for( size_t i=0; i<rFrameFormatTable.size(); i++ )
5110  {
5111  SwFrameFormat *const pFormat = rFrameFormatTable[i];
5112  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
5113  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
5114  if (pAPos &&
5115  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
5116  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
5117  pAPos->nNode == rNodeIdx &&
5118  pFormat->GetSurround().GetSurround() != css::text::WrapTextMode_NONE )
5119  {
5120  sal_Int16 eHori = RES_DRAWFRMFMT == pFormat->Which()
5121  ? text::HoriOrientation::LEFT
5122  : pFormat->GetHoriOrient().GetHoriOrient();
5123 
5124  css::text::WrapTextMode eSurround = css::text::WrapTextMode_PARALLEL;
5125  if( m_pPam->GetPoint()->nContent.GetIndex() )
5126  {
5127  if( bClearLeft && text::HoriOrientation::LEFT==eHori )
5128  eSurround = css::text::WrapTextMode_RIGHT;
5129  else if( bClearRight && text::HoriOrientation::RIGHT==eHori )
5130  eSurround = css::text::WrapTextMode_LEFT;
5131  }
5132  else if( (bClearLeft && text::HoriOrientation::LEFT==eHori) ||
5133  (bClearRight && text::HoriOrientation::RIGHT==eHori) )
5134  {
5135  eSurround = css::text::WrapTextMode_NONE;
5136  }
5137 
5138  if( css::text::WrapTextMode_PARALLEL != eSurround )
5139  {
5140  SwFormatSurround aSurround( eSurround );
5141  if( css::text::WrapTextMode_NONE != eSurround )
5142  aSurround.SetAnchorOnly( true );
5143  pFormat->SetFormatAttr( aSurround );
5144  bCleared = true;
5145  }
5146  }
5147  }
5148  }
5149  }
5150 
5151  // parse styles
5152  std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5153  bool bBreakItem = false;
5154  if( HasStyleOptions( aStyle, aId, aClass ) )
5155  {
5156  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5157  SvxCSS1PropertyInfo aPropInfo;
5158 
5159  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5160  {
5161  if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5162  {
5163  aBreakItem.reset(static_cast<SvxFormatBreakItem*>(aItemSet.Get(RES_BREAK).Clone()));
5164  bBreakItem = true;
5165  }
5166  if( !aPropInfo.m_aId.isEmpty() )
5167  InsertBookmark( aPropInfo.m_aId );
5168  }
5169  }
5170 
5171  if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5172  {
5173  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5174  EndAttr( m_xAttrTab->pBreak, false );
5175  }
5176 
5177  if( !bCleared && !bBreakItem )
5178  {
5179  // If no CLEAR could or should be executed, a line break will be inserted
5180  OUString sTmp( u'\x000a' ); // make the Mac happy :-)
5181  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, sTmp );
5182  }
5183  else if( m_pPam->GetPoint()->nContent.GetIndex() )
5184  {
5185  // If a CLEAR is executed in a non-empty paragraph, then after it
5186  // a new paragraph has to be opened.
5187  // MIB 21.02.97: Here actually we should change the bottom paragraph
5188  // margin to zero. This will fail for something like this <BR ..><P>
5189  // (>Netscape). That's why we don't do it.
5191  }
5192  if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5193  {
5194  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5195  EndAttr( m_xAttrTab->pBreak, false );
5196  }
5197 }
5198 
5200 {
5201  sal_uInt16 nSize = 0;
5202  sal_uInt16 nWidth = 0;
5203 
5204  SvxAdjust eAdjust = SvxAdjust::End;
5205 
5206  bool bPrcWidth = false;
5207  bool bNoShade = false;
5208  bool bColor = false;
5209 
5210  Color aColor;
5211  OUString aId;
5212 
5213  // let's fetch the options
5214  const HTMLOptions& rHTMLOptions = GetOptions();
5215  for (size_t i = rHTMLOptions.size(); i; )
5216  {
5217  const HTMLOption& rOption = rHTMLOptions[--i];
5218  switch( rOption.GetToken() )
5219  {
5220  case HtmlOptionId::ID:
5221  aId = rOption.GetString();
5222  break;
5223  case HtmlOptionId::SIZE:
5224  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
5225  break;
5226  case HtmlOptionId::WIDTH:
5227  bPrcWidth = (rOption.GetString().indexOf('%') != -1);
5228  nWidth = static_cast<sal_uInt16>(rOption.GetNumber());
5229  if( bPrcWidth && nWidth>=100 )
5230  {
5231  // the default case are 100% lines (no attributes necessary)
5232  nWidth = 0;
5233  bPrcWidth = false;
5234  }
5235  break;
5236  case HtmlOptionId::ALIGN:
5237  eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5238  break;
5239  case HtmlOptionId::NOSHADE:
5240  bNoShade = true;
5241  break;
5242  case HtmlOptionId::COLOR:
5243  rOption.GetColor( aColor );
5244  bColor = true;
5245  break;
5246  default: break;
5247  }
5248  }
5249 
5250  if( m_pPam->GetPoint()->nContent.GetIndex() )
5252  if( m_nOpenParaToken != HtmlTokenId::NONE )
5253  EndPara();
5254  AppendTextNode();
5256 
5257  // ...and save in a context
5258  std::unique_ptr<HTMLAttrContext> xCntxt(
5259  new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5260 
5261  PushContext(xCntxt);
5262 
5263  // set the new style
5264  SetTextCollAttrs(m_aContexts.back().get());
5265 
5266  // the hard attributes of the current paragraph will never become invalid
5267  m_aParaAttrs.clear();
5268 
5269  if( nSize>0 || bColor || bNoShade )
5270  {
5271  // set line colour and/or width
5272  if( !bColor )
5273  aColor = COL_GRAY;
5274 
5275  SvxBorderLine aBorderLine( &aColor );
5276  if( nSize )
5277  {
5278  long nPWidth = 0;
5279  long nPHeight = static_cast<long>(nSize);
5280  SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5281  if ( !bNoShade )
5282  {
5283  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5284  }
5285  aBorderLine.SetWidth( nPHeight );
5286  }
5287  else if( bNoShade )
5288  {
5289  aBorderLine.SetWidth( DEF_LINE_WIDTH_2 );
5290  }
5291  else
5292  {
5293  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5294  aBorderLine.SetWidth( DEF_LINE_WIDTH_0 );
5295  }
5296 
5297  SvxBoxItem aBoxItem(RES_BOX);
5298  aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5299  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5300  m_aSetAttrTab.push_back( pTmp );
5301  }
5302  if( nWidth )
5303  {
5304  // If we aren't in a table, then the width value will be "faked" with
5305  // paragraph indents. That makes little sense in a table. In order to
5306  // avoid that the line is considered during the width calculation, it
5307  // still gets an appropriate LRSpace-Item.
5308  if (!m_xTable)
5309  {
5310  // fake length and alignment of line above paragraph indents
5311  long nBrowseWidth = GetCurrentBrowseWidth();
5312  nWidth = bPrcWidth ? static_cast<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5313  : ToTwips( static_cast<sal_uInt16>(nBrowseWidth) );
5314  if( nWidth < MINLAY )
5315  nWidth = MINLAY;
5316 
5317  const SwFormatColl *pColl = (static_cast<long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5318  if (pColl)
5319  {
5320  SvxLRSpaceItem aLRItem( pColl->GetLRSpace() );
5321  long nDist = nBrowseWidth - nWidth;
5322 
5323  switch( eAdjust )
5324  {
5325  case SvxAdjust::Right:
5326  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5327  break;
5328  case SvxAdjust::Left:
5329  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5330  break;
5331  case SvxAdjust::Center:
5332  default:
5333  nDist /= 2;
5334  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5335  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5336  break;
5337  }
5338 
5339  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aLRItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5340  m_aSetAttrTab.push_back( pTmp );
5341  }
5342  }
5343  }
5344 
5345  // it's not possible to insert bookmarks in links
5346  if( !aId.isEmpty() )
5347  InsertBookmark( aId );
5348 
5349  // pop current context of stack
5350  std::unique_ptr<HTMLAttrContext> xPoppedContext(PopContext(HtmlTokenId::HORZRULE));
5351  xPoppedContext.reset();
5352 
5354 
5355  // and set the current style in the next paragraph
5356  SetTextCollAttrs();
5357 }
5358 
5360 {
5361  OUString aName, aContent;
5362  bool bHTTPEquiv = false;
5363 
5364  const HTMLOptions& rHTMLOptions = GetOptions();
5365  for (size_t i = rHTMLOptions.size(); i; )
5366  {
5367  const HTMLOption& rOption = rHTMLOptions[--i];
5368  switch( rOption.GetToken() )
5369  {
5370  case HtmlOptionId::NAME:
5371  aName = rOption.GetString();
5372  bHTTPEquiv = false;
5373  break;
5374  case HtmlOptionId::HTTPEQUIV:
5375  aName = rOption.GetString();
5376  bHTTPEquiv = true;
5377  break;
5378  case HtmlOptionId::CONTENT:
5379  aContent = rOption.GetString();
5380  break;
5381  default: break;
5382  }
5383  }
5384 
5385  // Here things get a little tricky: We know for sure, that the Doc-Info
5386  // wasn't changed. Therefore it's enough to query for Generator and Refresh
5387  // to find a not processed Token. These are the only ones which won't change
5388  // the Doc-Info.
5389  if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_generator ) ||
5390  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_refresh ) ||
5391  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_type ) ||
5392  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_script_type ) )
5393  return;
5394 
5395  aContent = aContent.replaceAll("\r", "").replaceAll("\n", "");
5396 
5397  if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdendnote ) )
5398  {
5399  FillEndNoteInfo( aContent );
5400  return;
5401  }
5402 
5403  if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdfootnote ) )
5404  {
5405  FillFootNoteInfo( aContent );
5406  return;
5407  }
5408 
5409  OUStringBuffer sText;
5410  sText.append("HTML: <");
5411  sText.append(OOO_STRING_SVTOOLS_HTML_meta);
5412  sText.append(' ');
5413  if( bHTTPEquiv )
5415  else
5416  sText.append(OOO_STRING_SVTOOLS_HTML_O_name);