LibreOffice Module sw (master)  1
swhtml.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
25 
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
38 
39 #include <vcl/errinf.hxx>
40 #include <svl/stritem.hxx>
41 #include <vcl/imap.hxx>
42 #include <svtools/htmltokn.h>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/ctrltool.hxx>
45 #include <unotools/configmgr.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <vcl/wrkwin.hxx>
49 #include <sfx2/event.hxx>
50 #include <sfx2/fcontnr.hxx>
51 #include <sfx2/docfile.hxx>
52 
53 #include <svtools/htmlcfg.hxx>
54 #include <sfx2/linkmgr.hxx>
55 #include <editeng/kernitem.hxx>
56 #include <editeng/boxitem.hxx>
57 #include <editeng/fhgtitem.hxx>
59 #include <editeng/postitem.hxx>
60 #include <editeng/wghtitem.hxx>
62 #include <editeng/udlnitem.hxx>
64 #include <editeng/blinkitem.hxx>
65 #include <editeng/ulspitem.hxx>
66 #include <editeng/colritem.hxx>
67 #include <editeng/fontitem.hxx>
68 #include <editeng/adjustitem.hxx>
69 #include <editeng/lrspitem.hxx>
70 #include <editeng/protitem.hxx>
71 #include <editeng/flstitem.hxx>
73 
74 #include <frmatr.hxx>
75 #include <charatr.hxx>
76 #include <fmtfld.hxx>
77 #include <fmtpdsc.hxx>
78 #include <txtfld.hxx>
79 #include <fmtanchr.hxx>
80 #include <fmtsrnd.hxx>
81 #include <fmtfsize.hxx>
82 #include <fmtclds.hxx>
83 #include <fchrfmt.hxx>
84 #include <fmtinfmt.hxx>
85 #include <fmtfollowtextflow.hxx>
86 #include <fmtornt.hxx>
87 #include <docary.hxx>
88 #include <docstat.hxx>
89 #include <doc.hxx>
90 #include <IDocumentUndoRedo.hxx>
97 #include <IDocumentStatistics.hxx>
98 #include <IDocumentState.hxx>
99 #include <pam.hxx>
100 #include <ndtxt.hxx>
101 #include <mdiexp.hxx>
102 #include <expfld.hxx>
103 #include <poolfmt.hxx>
104 #include <pagedesc.hxx>
105 #include <IMark.hxx>
106 #include <docsh.hxx>
107 #include <editsh.hxx>
108 #include <docufld.hxx>
109 #include "swcss1.hxx"
110 #include <fltini.hxx>
111 #include <htmltbl.hxx>
112 #include "htmlnum.hxx"
113 #include "swhtml.hxx"
114 #include <linkenum.hxx>
115 #include <breakit.hxx>
116 #include <SwAppletImpl.hxx>
117 #include <swdll.hxx>
118 
119 #include <sfx2/viewfrm.hxx>
120 #include <svx/svdobj.hxx>
121 #include <officecfg/Office/Writer.hxx>
122 
123 #include <swerror.h>
124 #include <hints.hxx>
125 #include <ndole.hxx>
126 #include <unoframe.hxx>
127 #include "css1atr.hxx"
128 
129 #define FONTSIZE_MASK 7
130 
131 #define HTML_ESC_PROP 80
132 #define HTML_ESC_SUPER DFLT_ESC_SUPER
133 #define HTML_ESC_SUB DFLT_ESC_SUB
134 
135 #define HTML_SPTYPE_BLOCK 1
136 #define HTML_SPTYPE_HORI 2
137 #define HTML_SPTYPE_VERT 3
138 
140 using namespace ::com::sun::star;
141 
142 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
144 {
145  { OOO_STRING_SVTOOLS_HTML_AL_left, SvxAdjust::Left },
146  { OOO_STRING_SVTOOLS_HTML_AL_center, SvxAdjust::Center },
147  { OOO_STRING_SVTOOLS_HTML_AL_middle, SvxAdjust::Center }, // Netscape
148  { OOO_STRING_SVTOOLS_HTML_AL_right, SvxAdjust::Right },
149  { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block },
150  { OOO_STRING_SVTOOLS_HTML_AL_char, SvxAdjust::Left },
151  { nullptr, SvxAdjust(0) }
152 };
153 
154 // <SPACER TYPE=...>
156 {
160  { nullptr, 0 }
161 };
162 
164 {
165  m_bTemplateBrowseMode = true;
166 }
167 
168 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
169 {
171  // HTML import into Writer, avoid loading the Writer/Web template.
172  return OUString();
173 
174  const OUString sTemplateWithoutExt("internal/html");
175  SvtPathOptions aPathOpt;
176 
177  // first search for OpenDocument Writer/Web template
178  // OpenDocument Writer/Web template (extension .oth)
179  OUString sTemplate( sTemplateWithoutExt + ".oth" );
180  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
181  return sTemplate;
182 
183  // no OpenDocument Writer/Web template found.
184  // search for OpenOffice.org Writer/Web template
185  sTemplate = sTemplateWithoutExt + ".stw";
186  if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::PATH_TEMPLATE ))
187  return sTemplate;
188 
189  OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
190 
191  return OUString();
192 }
193 
195 {
196  OSL_ENSURE( m_pMedium, "Where is the medium??" );
197 
198  if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
199  {
201  return true;
202  }
203  return false;
204 
205 }
206 
207 // Call for the general Reader-Interface
208 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
209 {
211 
212  if( !m_pStream )
213  {
214  OSL_ENSURE( m_pStream, "HTML-Read without stream" );
215  return ERR_SWG_READ_ERROR;
216  }
217 
218  if( !m_bInsertMode )
219  {
221 
222  // Set the HTML page style, when it isn't a HTML document,
223  // otherwise it's already set.
225  {
228  }
229  }
230 
231  // so nobody steals the document!
232  rtl::Reference<SwDoc> aHoldRef(&rDoc);
233  ErrCode nRet = ERRCODE_NONE;
234  tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
235  rName, rBaseURL, !m_bInsertMode, m_pMedium,
236  IsReadUTF8(),
238 
239  SvParserState eState = xParser->CallParser();
240 
241  if( SvParserState::Pending == eState )
243  else if( SvParserState::Accepted != eState )
244  {
245  const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
246  + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
247 
248  // use the stream as transport for error number
249  nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
250  DialogMask::ButtonsOk | DialogMask::MessageError );
251  }
252 
253  return nRet;
254 }
255 
257  const OUString& rPath,
258  const OUString& rBaseURL,
259  bool bReadNewDoc,
260  SfxMedium* pMed, bool bReadUTF8,
261  bool bNoHTMLComments,
262  const OUString& rNamespace )
263  : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
264  SwClient( nullptr ),
265  m_aPathToFile( rPath ),
266  m_sBaseURL( rBaseURL ),
267  m_xAttrTab(new HTMLAttrTable),
268  m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
269  m_xDoc( pD ),
270  m_pActionViewShell( nullptr ),
271  m_pSttNdIdx( nullptr ),
272  m_pFormImpl( nullptr ),
273  m_pMarquee( nullptr ),
274  m_pImageMap( nullptr ),
275  m_nBaseFontStMin( 0 ),
276  m_nFontStMin( 0 ),
277  m_nDefListDeep( 0 ),
278  m_nFontStHeadStart( 0 ),
279  m_nSBModuleCnt( 0 ),
280  m_nMissingImgMaps( 0 ),
281  m_nParaCnt( 5 ),
282  // #i83625#
283  m_nContextStMin( 0 ),
284  m_nContextStAttrMin( 0 ),
285  m_nSelectEntryCnt( 0 ),
286  m_nOpenParaToken( HtmlTokenId::NONE ),
287  m_eJumpTo( JumpToMarks::NONE ),
288 #ifdef DBG_UTIL
289  m_nContinue( 0 ),
290 #endif
291  m_eParaAdjust( SvxAdjust::End ),
292  m_bDocInitalized( false ),
293  m_bSetModEnabled( false ),
294  m_bInFloatingFrame( false ),
295  m_bInField( false ),
296  m_bCallNextToken( false ),
297  m_bIgnoreRawData( false ),
298  m_bLBEntrySelected ( false ),
299  m_bTAIgnoreNewPara ( false ),
300  m_bFixMarqueeWidth ( false ),
301  m_bNoParSpace( false ),
302  m_bInNoEmbed( false ),
303  m_bInTitle( false ),
304  m_bUpdateDocStat( false ),
305  m_bFixSelectWidth( false ),
306  m_bTextArea( false ),
307  m_bSelect( false ),
308  m_bInFootEndNoteAnchor( false ),
309  m_bInFootEndNoteSymbol( false ),
310  m_bIgnoreHTMLComments( bNoHTMLComments ),
311  m_bRemoveHidden( false ),
312  m_bBodySeen( false ),
313  m_bReadingHeaderOrFooter( false ),
314  m_isInTableStructure(false),
315  m_nTableDepth( 0 ),
316  m_pTempViewFrame(nullptr)
317 {
318  // If requested explicitly, then force ignoring of comments (don't create postits for them).
320  m_bIgnoreHTMLComments = true;
321 
322  m_nEventId = nullptr;
324 
325  m_eScriptLang = HTMLScriptLanguage::Unknown;
326 
327  rCursor.DeleteMark();
328  m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert
329  memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
330 
331  // Read the font sizes 1-7 from the INI file
332  SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get();
333  m_aFontHeights[0] = rHtmlOptions.GetFontSize( 0 ) * 20;
334  m_aFontHeights[1] = rHtmlOptions.GetFontSize( 1 ) * 20;
335  m_aFontHeights[2] = rHtmlOptions.GetFontSize( 2 ) * 20;
336  m_aFontHeights[3] = rHtmlOptions.GetFontSize( 3 ) * 20;
337  m_aFontHeights[4] = rHtmlOptions.GetFontSize( 4 ) * 20;
338  m_aFontHeights[5] = rHtmlOptions.GetFontSize( 5 ) * 20;
339  m_aFontHeights[6] = rHtmlOptions.GetFontSize( 6 ) * 20;
340 
341  m_bKeepUnknown = rHtmlOptions.IsImportUnknown();
342 
343  if(bReadNewDoc)
344  {
345  //CJK has different defaults, so a different object should be used for this
346  //RES_CHARTR_CJK_FONTSIZE is a valid value
348  m_xDoc->SetDefault( aFontHeight );
350  m_xDoc->SetDefault( aFontHeightCJK );
352  m_xDoc->SetDefault( aFontHeightCTL );
353 
354  // #i18732# - adjust default of option 'FollowTextFlow'
355  // TODO: not sure what the appropriate default for HTML should be?
356  m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
357  }
358 
359  // Change to HTML mode during the import, so that the right styles are created
360  m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
361  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
362 
363  m_pCSS1Parser.reset( new SwCSS1Parser( m_xDoc.get(), m_aFontHeights, m_sBaseURL, IsNewDoc() ) );
364  m_pCSS1Parser->SetIgnoreFontFamily( rHtmlOptions.IsIgnoreFontFamily() );
365 
366  if( bReadUTF8 )
367  {
368  SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
369  }
370  else
371  {
372  SwDocShell *pDocSh = m_xDoc->GetDocShell();
373  SvKeyValueIterator *pHeaderAttrs =
374  pDocSh->GetHeaderAttributes();
375  if( pHeaderAttrs )
376  SetEncodingByHTTPHeader( pHeaderAttrs );
377  }
378  m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
379 
380  SwDocShell* pDocSh = m_xDoc->GetDocShell();
381  if( pDocSh )
382  {
383  m_bViewCreated = true; // not, load synchronous
384 
385  // a jump mark is present
386 
387  if( pMed )
388  {
389  m_sJmpMark = pMed->GetURLObject().GetMark();
390  if( !m_sJmpMark.isEmpty() )
391  {
393  sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
394  sal_Int32 nPos = nLastPos != -1 ? nLastPos : 0;
395 
396  OUString sCmp;
397  if (nPos)
398  {
399  sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
400  }
401 
402  if( !sCmp.isEmpty() )
403  {
404  sCmp = sCmp.toAsciiLowerCase();
405  if( sCmp == "region" )
407  else if( sCmp == "table" )
409  else if( sCmp == "graphic" )
411  else if( sCmp == "outline" ||
412  sCmp == "text" ||
413  sCmp == "frame" )
414  m_eJumpTo = JumpToMarks::NONE; // this is nothing valid!
415  else
416  // otherwise this is a normal (book)mark
417  nPos = -1;
418  }
419  else
420  nPos = -1;
421 
422  if( nPos != -1 )
423  m_sJmpMark = m_sJmpMark.copy( 0, nPos );
424  if( m_sJmpMark.isEmpty() )
426  }
427  }
428  }
429 
430  if (!rNamespace.isEmpty())
431  {
432  SetNamespace(rNamespace);
433  m_bXHTML = true;
434  if (rNamespace == "reqif-xhtml")
435  m_bReqIF = true;
436  }
437 }
438 
440 {
441 #ifdef DBG_UTIL
442  OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
443 #endif
444 
445  OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
446  OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
447  m_nContextStMin = 0;
448  while (!m_aContexts.empty())
449  {
450  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
451  ClearContext(xCntxt.get());
452  }
453 
454  bool bAsync = m_xDoc->IsInLoadAsynchron();
455  m_xDoc->SetInLoadAsynchron( false );
456  m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
457 
458  if( m_xDoc->GetDocShell() && m_nEventId )
460 
461  // the DocumentDetected maybe can delete the DocShells, therefore fetch again
462  if( m_xDoc->GetDocShell() )
463  {
464  // update linked sections
465  sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
466  if( nLinkMode != NEVER && bAsync &&
467  SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
468  m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
469 
470  if ( m_xDoc->GetDocShell()->IsLoading() )
471  {
472  // #i59688#
473  m_xDoc->GetDocShell()->LoadingFinished();
474  }
475  }
476 
477  delete m_pSttNdIdx;
478 
479  if( !m_aSetAttrTab.empty() )
480  {
481  OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
482  for ( auto& rpAttr : m_aSetAttrTab )
483  delete rpAttr;
484  m_aSetAttrTab.clear();
485  }
486 
487  m_pCSS1Parser.reset();
488  m_pNumRuleInfo.reset();
489  DeleteFormImpl();
491 
492  OSL_ENSURE(!m_xTable.get(), "It exists still a open table");
493  m_pImageMaps.reset();
494 
495  OSL_ENSURE( m_vPendingStack.empty(),
496  "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
497  m_vPendingStack.clear();
498 
499  m_xDoc.clear();
500 
501  if ( m_pTempViewFrame )
502  {
504 
505  // the temporary view frame is hidden, so the hidden flag might need to be removed
506  if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
507  m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
508  }
509 }
510 
511 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
512 {
513  m_nEventId=nullptr;
514 
515  // #i47907# - If the document has already been destructed,
516  // the parser should be aware of this:
517  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
518  || 1 == m_xDoc->getReferenceCount() )
519  {
520  // was the import aborted by SFX?
521  eState = SvParserState::Error;
522  }
523 
524  GetAsynchCallLink().Call(nullptr);
525 }
526 
528 {
529  // create temporary index on position 0, so it won't be moved!
530  m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
531  if( !IsNewDoc() ) // insert into existing document ?
532  {
533  const SwPosition* pPos = m_pPam->GetPoint();
534 
535  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
536 
537  *m_pSttNdIdx = pPos->nNode.GetIndex()-1;
538  m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
539 
540  SwPaM aInsertionRangePam( *pPos );
541 
543 
544  // split any redline over the insertion point
545  aInsertionRangePam.SetMark();
546  *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
547  aInsertionRangePam.Move( fnMoveBackward );
548  m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
549 
550  m_xDoc->SetTextFormatColl( *m_pPam,
551  m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
552  }
553 
554  if( GetMedium() )
555  {
556  if( !m_bViewCreated )
557  {
558  m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
559  }
560  else
561  {
562  m_bViewCreated = true;
563  m_nEventId = nullptr;
564  }
565  }
566  else // show progress bar
567  {
568  rInput.Seek(STREAM_SEEK_TO_END);
569  rInput.ResetError();
570 
571  m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
572 
573  rInput.Seek(STREAM_SEEK_TO_BEGIN);
574  rInput.ResetError();
575  }
576 
577  m_xDoc->GetPageDesc( 0 ).Add( this );
578 
580  return eRet;
581 }
582 
584 {
585  const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
586  return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
587 }
588 
590 {
591 #ifdef DBG_UTIL
592  OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
593  m_nContinue++;
594 #endif
595 
596  // When the import (of SFX) is aborted, an error will be set but
597  // we still continue, so that we clean up properly.
598  OSL_ENSURE( SvParserState::Error!=eState,
599  "SwHTMLParser::Continue: already set an error" );
600  if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
601  eState = SvParserState::Error;
602 
603  // Fetch SwViewShell from document, save it and set as current.
604  SwViewShell *pInitVSh = CallStartAction();
605 
606  if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
607  {
608  // At first call first return, show document and wait for callback
609  // time.
610  // At this point in CallParser only one digit was read and
611  // a SaveState(0) was called.
612  eState = SvParserState::Pending;
613  m_bViewCreated = true;
614  m_xDoc->SetInLoadAsynchron( true );
615 
616 #ifdef DBG_UTIL
617  m_nContinue--;
618 #endif
619 
620  return;
621  }
622 
623  m_bSetModEnabled = false;
624  if( m_xDoc->GetDocShell() )
625  {
626  m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
627  if( m_bSetModEnabled )
628  {
629  m_xDoc->GetDocShell()->EnableSetModified( false );
630  }
631  }
632 
633  // during import don't call OLE-Modified
634  Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
635  m_xDoc->SetOle2Link( Link<bool,void>() );
636 
637  bool bModified = m_xDoc->getIDocumentState().IsModified();
638  bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
639  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
640 
641  // When the import will be aborted, don't call Continue anymore.
642  // If a Pending-Stack exists make sure the stack is ended with a call
643  // of NextToken.
644  if( SvParserState::Error == eState )
645  {
646  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
647  "SwHTMLParser::Continue: Pending-Stack without Token" );
648  if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
649  NextToken( m_vPendingStack.back().nToken );
650  OSL_ENSURE( m_vPendingStack.empty(),
651  "SwHTMLParser::Continue: There is again a Pending-Stack" );
652  }
653  else
654  {
655  HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
656  }
657 
658  // disable progress bar again
659  m_xProgress.reset();
660 
661  bool bLFStripped = false;
662  if( SvParserState::Pending != GetStatus() )
663  {
664  // set the last attributes yet
665  {
666  if( !m_aScriptSource.isEmpty() )
667  {
668  SwScriptFieldType *pType =
669  static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
670 
672  false );
673  InsertAttr( SwFormatField( aField ), false );
674  }
675 
676  if( m_pAppletImpl )
677  {
678  if( m_pAppletImpl->GetApplet().is() )
679  EndApplet();
680  else
681  EndObject();
682  }
683 
684  // maybe remove an existing LF after the last paragraph
685  if( IsNewDoc() )
686  bLFStripped = StripTrailingLF() > 0;
687 
688  // close still open numbering
689  while( GetNumInfo().GetNumRule() )
690  EndNumBulList();
691 
692  OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
693  // try this twice, first normally to let m_nContextStMin decrease
694  // naturally and get contexts popped in desired order, and if that
695  // fails force it
696  for (int i = 0; i < 2; ++i)
697  {
698  while (m_aContexts.size() > m_nContextStMin)
699  {
700  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
701  if (xCntxt)
702  EndContext(xCntxt.get());
703  }
704  if (!m_nContextStMin)
705  break;
706  OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
707  m_nContextStMin = 0;
708  }
709 
710  m_aParaAttrs.clear();
711 
712  SetAttr( false );
713 
714  // set the first delayed styles
715  m_pCSS1Parser->SetDelayedStyles();
716  }
717 
718  // again correct the start
719  if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
720  {
721  SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
722  SwNodeIndex aNxtIdx( *m_pSttNdIdx );
723  if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
724  {
725  const sal_Int32 nStt = pTextNode->GetText().getLength();
726  // when the cursor is still in the node, then set him at the end
727  if( m_pPam->GetPoint()->nNode == aNxtIdx )
728  {
730  m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt );
731  }
732 
733 #if OSL_DEBUG_LEVEL > 0
734 // !!! shouldn't be possible, or ??
735  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(),
736  "Pam.Bound1 is still in the node" );
737  OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(),
738  "Pam.Bound2 is still in the node" );
739 
740  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() )
741  {
742  const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex();
743  m_pPam->GetBound().nContent.Assign( pTextNode,
744  pTextNode->GetText().getLength() + nCntPos );
745  }
746  if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() )
747  {
748  const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex();
749  m_pPam->GetBound( false ).nContent.Assign( pTextNode,
750  pTextNode->GetText().getLength() + nCntPos );
751  }
752 #endif
753  // Keep character attribute!
754  SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
755  if (pTextNode->GetText().getLength())
756  pDelNd->FormatToTextAttr( pTextNode );
757  else
758  pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
759  pTextNode->JoinNext();
760  }
761  }
762  }
763 
764  if( SvParserState::Accepted == eState )
765  {
766  if( m_nMissingImgMaps )
767  {
768  // Some Image-Map relations are still missing.
769  // Maybe now the Image-Maps are there?
771  }
772 
773  // now remove the last useless paragraph
774  SwPosition* pPos = m_pPam->GetPoint();
775  if( !pPos->nContent.GetIndex() && !bLFStripped )
776  {
777  SwTextNode* pCurrentNd;
778  sal_uLong nNodeIdx = pPos->nNode.GetIndex();
779 
780  bool bHasFlysOrMarks =
782 
783  if( IsNewDoc() )
784  {
785  if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx))
786  {
788  if( pCNd && pCNd->StartOfSectionIndex()+2 <
789  pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
790  {
792  SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
793  if( pCursorSh &&
794  pCursorSh->GetCursor()->GetPoint()
795  ->nNode.GetIndex() == nNodeIdx )
796  {
797  pCursorSh->MovePara(GoPrevPara, fnParaEnd );
798  pCursorSh->SetMark();
799  pCursorSh->ClearMark();
800  }
801  m_pPam->GetBound().nContent.Assign( nullptr, 0 );
802  m_pPam->GetBound(false).nContent.Assign( nullptr, 0 );
803  m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode );
804  }
805  }
806  }
807  else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
808  {
809  if( pCurrentNd->CanJoinNext( &pPos->nNode ))
810  {
811  SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode();
812  pPos->nContent.Assign( pNextNd, 0 );
814  pNextNd->JoinPrev();
815  }
816  else if (pCurrentNd->GetText().isEmpty())
817  {
818  pPos->nContent.Assign( nullptr, 0 );
820  m_xDoc->GetNodes().Delete( pPos->nNode );
822  }
823  }
824  }
825 
826  // annul the SplitNode from the beginning
827  else if( !IsNewDoc() )
828  {
829  if( pPos->nContent.GetIndex() ) // then there was no <p> at the end
830  m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next
831  SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode();
832  SwNodeIndex aPrvIdx( pPos->nNode );
833  if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
834  *m_pSttNdIdx <= aPrvIdx )
835  {
836  // Normally here should take place a JoinNext, but all cursors and
837  // so are registered in pTextNode, so that it MUST remain.
838 
839  // Convert paragraph to character attribute, from Prev adopt
840  // the paragraph attribute and the template!
841  SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
842  pTextNode->ChgFormatColl( pPrev->GetTextColl() );
843  pTextNode->FormatToTextAttr( pPrev );
844  pTextNode->ResetAllAttr();
845 
846  if( pPrev->HasSwAttrSet() )
847  pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
848 
849  if( &m_pPam->GetBound().nNode.GetNode() == pPrev )
850  m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
851  if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev )
852  m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
853 
854  pTextNode->JoinPrev();
855  }
856  }
857 
858  // adjust AutoLoad in DocumentProperties
859  if( IsNewDoc() )
860  {
861  SwDocShell *pDocShell(m_xDoc->GetDocShell());
862  OSL_ENSURE(pDocShell, "no SwDocShell");
863  if (pDocShell) {
864  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
865  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
866  uno::Reference<document::XDocumentProperties> xDocProps(
867  xDPS->getDocumentProperties());
868  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
869  if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
870  (xDocProps->getAutoloadURL().isEmpty()) )
871  {
872  xDocProps->setAutoloadURL(m_aPathToFile);
873  }
874  }
875  }
876 
877  if( m_bUpdateDocStat )
878  {
879  m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
880  }
881  }
882 
883  if( SvParserState::Pending != GetStatus() )
884  {
885  delete m_pSttNdIdx;
886  m_pSttNdIdx = nullptr;
887  }
888 
889  // should the parser be the last one who hold the document, then nothing
890  // has to be done anymore, document will be destroyed shortly!
891  if( 1 < m_xDoc->getReferenceCount() )
892  {
893  if( bWasUndo )
894  {
895  m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
896  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
897  }
898  else if( !pInitVSh )
899  {
900  // When at the beginning of Continue no Shell was available,
901  // it's possible in the meantime one was created.
902  // In that case the bWasUndo flag is wrong and we must
903  // enable Undo.
904  SwViewShell *pTmpVSh = CheckActionViewShell();
905  if( pTmpVSh )
906  {
907  m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
908  }
909  }
910 
911  m_xDoc->SetOle2Link( aOLELink );
912  if( !bModified )
913  m_xDoc->getIDocumentState().ResetModified();
914  if( m_bSetModEnabled && m_xDoc->GetDocShell() )
915  {
916  m_xDoc->GetDocShell()->EnableSetModified();
917  m_bSetModEnabled = false; // this is unnecessary here
918  }
919  }
920 
921  // When the Document-SwVievShell still exists and an Action is open
922  // (doesn't have to be by abort), end the Action, disconnect from Shell
923  // and finally reconstruct the old Shell.
924  CallEndAction( true );
925 
926 #ifdef DBG_UTIL
927  m_nContinue--;
928 #endif
929 }
930 
931 void SwHTMLParser::Modify( const SfxPoolItem* pOld, const SfxPoolItem *pNew )
932 {
933  switch( pOld ? pOld->Which() : pNew ? pNew->Which() : 0 )
934  {
935  case RES_OBJECTDYING:
936  if (pOld && static_cast<const SwPtrMsgPoolItem *>(pOld)->pObject == GetRegisteredIn())
937  {
938  // then we kill ourself
939  EndListeningAll();
940  ReleaseRef(); // otherwise we're done!
941  }
942  break;
943  }
944 }
945 
947 {
948  OSL_ENSURE( !m_bDocInitalized, "DocumentDetected called multiple times" );
949  m_bDocInitalized = true;
950  if( IsNewDoc() )
951  {
952  if( IsInHeader() )
953  FinishHeader();
954 
955  CallEndAction( true );
956 
957  m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
958  // For DocumentDetected in general a SwViewShell is created.
959  // But it also can be created later, in case the UI is captured.
960  CallStartAction();
961  }
962 }
963 
964 // is called for every token that is recognised in CallParser
966 {
967  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
968  || 1 == m_xDoc->getReferenceCount() )
969  {
970  // Was the import cancelled by SFX? If a pending stack
971  // exists, clean it.
972  eState = SvParserState::Error;
973  OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
974  "SwHTMLParser::NextToken: Pending-Stack without token" );
975  if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
976  return ;
977  }
978 
979 #if OSL_DEBUG_LEVEL > 0
980  if( !m_vPendingStack.empty() )
981  {
982  switch( nToken )
983  {
984  // tables are read by recursive method calls
985  case HtmlTokenId::TABLE_ON:
986  // For CSS declarations we might have to wait
987  // for a file download to finish
988  case HtmlTokenId::LINK:
989  // For controls we might have to set the size.
990  case HtmlTokenId::INPUT:
991  case HtmlTokenId::TEXTAREA_ON:
992  case HtmlTokenId::SELECT_ON:
993  case HtmlTokenId::SELECT_OFF:
994  break;
995  default:
996  OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
997  break;
998  }
999  }
1000 #endif
1001 
1002  // The following special cases have to be treated before the
1003  // filter detection, because Netscape doesn't reference the content
1004  // of the title for filter detection either.
1005  if( m_vPendingStack.empty() )
1006  {
1007  if( m_bInTitle )
1008  {
1009  switch( nToken )
1010  {
1011  case HtmlTokenId::TITLE_OFF:
1012  {
1013  OUString sTitle = m_sTitle.makeStringAndClear();
1014  if( IsNewDoc() && !sTitle.isEmpty() )
1015  {
1016  if( m_xDoc->GetDocShell() ) {
1017  uno::Reference<document::XDocumentPropertiesSupplier>
1018  xDPS(m_xDoc->GetDocShell()->GetModel(),
1019  uno::UNO_QUERY_THROW);
1020  uno::Reference<document::XDocumentProperties> xDocProps(
1021  xDPS->getDocumentProperties());
1022  OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1023  if (xDocProps.is()) {
1024  xDocProps->setTitle(sTitle);
1025  }
1026 
1027  m_xDoc->GetDocShell()->SetTitle(sTitle);
1028  }
1029  }
1030  m_bInTitle = false;
1031  break;
1032  }
1033 
1034  case HtmlTokenId::NONBREAKSPACE:
1035  m_sTitle.append(" ");
1036  break;
1037 
1038  case HtmlTokenId::SOFTHYPH:
1039  m_sTitle.append("-");
1040  break;
1041 
1042  case HtmlTokenId::TEXTTOKEN:
1043  m_sTitle.append(aToken);
1044  break;
1045 
1046  default:
1047  m_sTitle.append("<");
1048  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1049  m_sTitle.append("/");
1050  m_sTitle.append(sSaveToken);
1051  if( !aToken.isEmpty() )
1052  {
1053  m_sTitle.append(" ");
1054  m_sTitle.append(aToken);
1055  }
1056  m_sTitle.append(">");
1057  break;
1058  }
1059 
1060  return;
1061  }
1062  }
1063 
1064  // Find out what type of document it is if we don't know already.
1065  // For Controls this has to be finished before the control is inserted
1066  // because for inserting a View is needed.
1067  if( !m_bDocInitalized )
1068  DocumentDetected();
1069 
1070  bool bGetIDOption = false, bInsertUnknown = false;
1071  bool bUpperSpaceSave = m_bUpperSpace;
1072  m_bUpperSpace = false;
1073 
1074  // The following special cases may or have to be treated after the
1075  // filter detection
1076  if( m_vPendingStack.empty() )
1077  {
1078  if( m_bInFloatingFrame )
1079  {
1080  // <SCRIPT> is ignored here (from us), because it is ignored in
1081  // Applets as well
1082  if( HtmlTokenId::IFRAME_OFF == nToken )
1083  {
1084  m_bCallNextToken = false;
1085  m_bInFloatingFrame = false;
1086  }
1087 
1088  return;
1089  }
1090  else if( m_bInNoEmbed )
1091  {
1092  switch( nToken )
1093  {
1094  case HtmlTokenId::NOEMBED_OFF:
1097  m_aContents.clear();
1098  m_bCallNextToken = false;
1099  m_bInNoEmbed = false;
1100  break;
1101 
1102  case HtmlTokenId::RAWDATA:
1104  break;
1105 
1106  default:
1107  OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1108  break;
1109  }
1110 
1111  return;
1112  }
1113  else if( m_pAppletImpl )
1114  {
1115  // in an applet only <PARAM> tags and the </APPLET> tag
1116  // are of interest for us (for the moment)
1117  // <SCRIPT> is ignored here (from Netscape)!
1118 
1119  switch( nToken )
1120  {
1121  case HtmlTokenId::APPLET_OFF:
1122  m_bCallNextToken = false;
1123  EndApplet();
1124  break;
1125  case HtmlTokenId::OBJECT_OFF:
1126  m_bCallNextToken = false;
1127  EndObject();
1128  break;
1129  case HtmlTokenId::PARAM:
1130  InsertParam();
1131  break;
1132  default: break;
1133  }
1134 
1135  return;
1136  }
1137  else if( m_bTextArea )
1138  {
1139  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1140  // <SCRIPT> is ignored here (from Netscape)!
1141 
1142  switch( nToken )
1143  {
1144  case HtmlTokenId::TEXTAREA_OFF:
1145  m_bCallNextToken = false;
1146  EndTextArea();
1147  break;
1148 
1149  default:
1150  InsertTextAreaText( nToken );
1151  break;
1152  }
1153 
1154  return;
1155  }
1156  else if( m_bSelect )
1157  {
1158  // HAS to be treated after bNoScript!
1159  switch( nToken )
1160  {
1161  case HtmlTokenId::SELECT_OFF:
1162  m_bCallNextToken = false;
1163  EndSelect();
1164  return;
1165 
1166  case HtmlTokenId::OPTION:
1168  return;
1169 
1170  case HtmlTokenId::TEXTTOKEN:
1171  InsertSelectText();
1172  return;
1173 
1174  case HtmlTokenId::INPUT:
1175  case HtmlTokenId::SCRIPT_ON:
1176  case HtmlTokenId::SCRIPT_OFF:
1177  case HtmlTokenId::NOSCRIPT_ON:
1178  case HtmlTokenId::NOSCRIPT_OFF:
1179  case HtmlTokenId::RAWDATA:
1180  // treat in normal switch
1181  break;
1182 
1183  default:
1184  // ignore
1185  return;
1186  }
1187  }
1188  else if( m_pMarquee )
1189  {
1190  // in a TextArea everything up to </TEXTAREA> is inserted as text.
1191  // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1192  // script.
1193  switch( nToken )
1194  {
1195  case HtmlTokenId::MARQUEE_OFF:
1196  m_bCallNextToken = false;
1197  EndMarquee();
1198  break;
1199 
1200  case HtmlTokenId::TEXTTOKEN:
1202  break;
1203  default: break;
1204  }
1205 
1206  return;
1207  }
1208  else if( m_bInField )
1209  {
1210  switch( nToken )
1211  {
1212  case HtmlTokenId::SDFIELD_OFF:
1213  m_bCallNextToken = false;
1214  EndField();
1215  break;
1216 
1217  case HtmlTokenId::TEXTTOKEN:
1218  InsertFieldText();
1219  break;
1220  default: break;
1221  }
1222 
1223  return;
1224  }
1226  {
1227  switch( nToken )
1228  {
1229  case HtmlTokenId::ANCHOR_OFF:
1230  EndAnchor();
1231  m_bCallNextToken = false;
1232  break;
1233 
1234  case HtmlTokenId::TEXTTOKEN:
1236  break;
1237  default: break;
1238  }
1239  return;
1240  }
1241  else if( !m_aUnknownToken.isEmpty() )
1242  {
1243  // Paste content of unknown tags.
1244  // (but surely if we are not in the header section) fdo#36080 fdo#34666
1245  if (!aToken.isEmpty() && !IsInHeader() )
1246  {
1247  if( !m_bDocInitalized )
1248  DocumentDetected();
1249  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1250 
1251  // if there are temporary paragraph attributes and the
1252  // paragraph isn't empty then the paragraph attributes
1253  // are final.
1254  m_aParaAttrs.clear();
1255 
1256  SetAttr();
1257  }
1258 
1259  // Unknown token in the header are only closed by a matching
1260  // end-token, </HEAD> or <BODY>. Text inside is ignored.
1261  switch( nToken )
1262  {
1263  case HtmlTokenId::UNKNOWNCONTROL_OFF:
1264  if( m_aUnknownToken != sSaveToken )
1265  return;
1266  [[fallthrough]];
1267  case HtmlTokenId::FRAMESET_ON:
1268  case HtmlTokenId::HEAD_OFF:
1269  case HtmlTokenId::BODY_ON:
1270  case HtmlTokenId::IMAGE: // Don't know why Netscape acts this way.
1271  m_aUnknownToken.clear();
1272  break;
1273  case HtmlTokenId::TEXTTOKEN:
1274  return;
1275  default:
1276  m_aUnknownToken.clear();
1277  break;
1278  }
1279  }
1280  }
1281 
1282  switch( nToken )
1283  {
1284  case HtmlTokenId::BODY_ON:
1285  if (!m_bBodySeen)
1286  {
1287  m_bBodySeen = true;
1288  if( !m_aStyleSource.isEmpty() )
1289  {
1290  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1291  m_aStyleSource.clear();
1292  }
1293  if( IsNewDoc() )
1294  {
1296  // If there is a template for the first or the right page,
1297  // it is set here.
1298  const SwPageDesc *pPageDesc = nullptr;
1299  if( m_pCSS1Parser->IsSetFirstPageDesc() )
1300  pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1301  else if( m_pCSS1Parser->IsSetRightPageDesc() )
1302  pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1303 
1304  if( pPageDesc )
1305  {
1306  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1307  }
1308  }
1309  }
1310  break;
1311 
1312  case HtmlTokenId::LINK:
1313  InsertLink();
1314  break;
1315 
1316  case HtmlTokenId::BASE:
1317  {
1318  const HTMLOptions& rHTMLOptions = GetOptions();
1319  for (size_t i = rHTMLOptions.size(); i; )
1320  {
1321  const HTMLOption& rOption = rHTMLOptions[--i];
1322  switch( rOption.GetToken() )
1323  {
1324  case HtmlOptionId::HREF:
1325  m_sBaseURL = rOption.GetString();
1326  break;
1327  case HtmlOptionId::TARGET:
1328  if( IsNewDoc() )
1329  {
1330  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1331  OSL_ENSURE(pDocShell, "no SwDocShell");
1332  if (pDocShell) {
1333  uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1334  pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1335  uno::Reference<document::XDocumentProperties>
1336  xDocProps(xDPS->getDocumentProperties());
1337  OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1338  if (xDocProps.is()) {
1339  xDocProps->setDefaultTarget(
1340  rOption.GetString());
1341  }
1342  }
1343  }
1344  break;
1345  default: break;
1346  }
1347  }
1348  }
1349  break;
1350 
1351  case HtmlTokenId::META:
1352  {
1353  SvKeyValueIterator *pHTTPHeader = nullptr;
1354  if( IsNewDoc() )
1355  {
1356  SwDocShell *pDocSh = m_xDoc->GetDocShell();
1357  if( pDocSh )
1358  pHTTPHeader = pDocSh->GetHeaderAttributes();
1359  }
1360  SwDocShell *pDocShell(m_xDoc->GetDocShell());
1361  OSL_ENSURE(pDocShell, "no SwDocShell");
1362  if (pDocShell)
1363  {
1364  uno::Reference<document::XDocumentProperties> xDocProps;
1365  if (IsNewDoc())
1366  {
1367  const uno::Reference<document::XDocumentPropertiesSupplier>
1368  xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1369  xDocProps = xDPS->getDocumentProperties();
1370  OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1371  }
1372  ParseMetaOptions( xDocProps, pHTTPHeader );
1373  }
1374  }
1375  break;
1376 
1377  case HtmlTokenId::TITLE_ON:
1378  m_bInTitle = true;
1379  break;
1380 
1381  case HtmlTokenId::SCRIPT_ON:
1382  NewScript();
1383  break;
1384 
1385  case HtmlTokenId::SCRIPT_OFF:
1386  EndScript();
1387  break;
1388 
1389  case HtmlTokenId::NOSCRIPT_ON:
1390  case HtmlTokenId::NOSCRIPT_OFF:
1391  bInsertUnknown = true;
1392  break;
1393 
1394  case HtmlTokenId::STYLE_ON:
1395  NewStyle();
1396  break;
1397 
1398  case HtmlTokenId::STYLE_OFF:
1399  EndStyle();
1400  break;
1401 
1402  case HtmlTokenId::RAWDATA:
1403  if( !m_bIgnoreRawData )
1404  {
1405  if( IsReadScript() )
1406  {
1407  AddScriptSource();
1408  }
1409  else if( IsReadStyle() )
1410  {
1411  if( !m_aStyleSource.isEmpty() )
1412  m_aStyleSource += "\n";
1413  m_aStyleSource += aToken;
1414  }
1415  }
1416  break;
1417 
1418  case HtmlTokenId::OBJECT_ON:
1419  if (m_bXHTML)
1420  {
1421  if (!InsertEmbed())
1422  InsertImage();
1423  break;
1424  }
1425 #if HAVE_FEATURE_JAVA
1426  NewObject();
1427  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1428 #endif
1429  break;
1430 
1431  case HtmlTokenId::OBJECT_OFF:
1432  if (!m_aEmbeds.empty())
1433  m_aEmbeds.pop();
1434  break;
1435 
1436  case HtmlTokenId::APPLET_ON:
1437 #if HAVE_FEATURE_JAVA
1438  InsertApplet();
1439  m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1440 #endif
1441  break;
1442 
1443  case HtmlTokenId::IFRAME_ON:
1446  break;
1447 
1448  case HtmlTokenId::LINEBREAK:
1449  if( !IsReadPRE() )
1450  {
1451  InsertLineBreak();
1452  break;
1453  }
1454  else
1455  bGetIDOption = true;
1456  // <BR>s in <PRE> resemble true LFs, hence no break
1457  [[fallthrough]];
1458 
1459  case HtmlTokenId::NEWPARA:
1460  // CR in PRE/LISTING/XMP
1461  {
1462  if( HtmlTokenId::NEWPARA==nToken ||
1464  {
1465  AppendTextNode(); // there is no LF at this place
1466  // therefore it will cause no problems
1467  SetTextCollAttrs();
1468  }
1469  // progress bar
1470  if (m_xProgress)
1471  m_xProgress->Update(rInput.Tell());
1472  }
1473  break;
1474 
1475  case HtmlTokenId::NONBREAKSPACE:
1476  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1477  break;
1478 
1479  case HtmlTokenId::SOFTHYPH:
1480  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1481  break;
1482 
1483  case HtmlTokenId::LINEFEEDCHAR:
1484  if( m_pPam->GetPoint()->nContent.GetIndex() )
1485  AppendTextNode();
1486  if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode))
1487  {
1488  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1489  EndAttr( m_xAttrTab->pBreak, false );
1490  }
1491  break;
1492 
1493  case HtmlTokenId::TEXTTOKEN:
1494  // insert string without spanning attributes at the end.
1495  if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1496  {
1497  sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
1498  const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
1499  if (pTextNode)
1500  {
1501  const OUString& rText = pTextNode->GetText();
1502  sal_Unicode cLast = rText[--nPos];
1503  if( ' ' == cLast || '\x0a' == cLast)
1504  aToken = aToken.copy(1);
1505  }
1506  else
1507  aToken = aToken.copy(1);
1508 
1509  if( aToken.isEmpty() )
1510  {
1511  m_bUpperSpace = bUpperSpaceSave;
1512  break;
1513  }
1514  }
1515 
1516  if( !aToken.isEmpty() )
1517  {
1518  if( !m_bDocInitalized )
1519  DocumentDetected();
1520 
1521  if (!m_aEmbeds.empty())
1522  {
1523  // The text token is inside an OLE object, which means
1524  // alternate text.
1525  SwOLENode* pOLENode = m_aEmbeds.top();
1526  if (SwFlyFrameFormat* pFormat
1527  = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1528  {
1529  if (SdrObject* pObject = SwXFrame::GetOrCreateSdrObject(*pFormat))
1530  {
1531  pObject->SetTitle(pObject->GetTitle() + aToken);
1532  break;
1533  }
1534  }
1535  }
1536 
1537  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1538 
1539  // if there are temporary paragraph attributes and the
1540  // paragraph isn't empty then the paragraph attributes
1541  // are final.
1542  m_aParaAttrs.clear();
1543 
1544  SetAttr();
1545  }
1546  break;
1547 
1548  case HtmlTokenId::HORZRULE:
1549  InsertHorzRule();
1550  break;
1551 
1552  case HtmlTokenId::IMAGE:
1553  InsertImage();
1554  // if only the parser references the doc, we can break and set
1555  // an error code
1556  if( 1 == m_xDoc->getReferenceCount() )
1557  {
1558  eState = SvParserState::Error;
1559  }
1560  break;
1561 
1562  case HtmlTokenId::SPACER:
1563  InsertSpacer();
1564  break;
1565 
1566  case HtmlTokenId::EMBED:
1567  InsertEmbed();
1568  break;
1569 
1570  case HtmlTokenId::NOEMBED_ON:
1571  m_bInNoEmbed = true;
1572  m_bCallNextToken = bool(m_xTable);
1573  ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1574  break;
1575 
1576  case HtmlTokenId::DEFLIST_ON:
1577  if( m_nOpenParaToken != HtmlTokenId::NONE )
1578  EndPara();
1579  NewDefList();
1580  break;
1581  case HtmlTokenId::DEFLIST_OFF:
1582  if( m_nOpenParaToken != HtmlTokenId::NONE )
1583  EndPara();
1584  EndDefListItem( HtmlTokenId::NONE );
1585  EndDefList();
1586  break;
1587 
1588  case HtmlTokenId::DD_ON:
1589  case HtmlTokenId::DT_ON:
1590  if( m_nOpenParaToken != HtmlTokenId::NONE )
1591  EndPara();
1592  EndDefListItem();// close <DD>/<DT> and set no template
1593  NewDefListItem( nToken );
1594  break;
1595 
1596  case HtmlTokenId::DD_OFF:
1597  case HtmlTokenId::DT_OFF:
1598  // c.f. HtmlTokenId::LI_OFF
1599  // Actually we should close a DD/DT now.
1600  // But neither Netscape nor Microsoft do this and so don't we.
1601  EndDefListItem( nToken );
1602  break;
1603 
1604  // divisions
1605  case HtmlTokenId::DIVISION_ON:
1606  case HtmlTokenId::CENTER_ON:
1607  if (!m_isInTableStructure)
1608  {
1609  if (m_nOpenParaToken != HtmlTokenId::NONE)
1610  {
1611  if (IsReadPRE())
1612  m_nOpenParaToken = HtmlTokenId::NONE;
1613  else
1614  EndPara();
1615  }
1616  NewDivision( nToken );
1617  }
1618  break;
1619 
1620  case HtmlTokenId::DIVISION_OFF:
1621  case HtmlTokenId::CENTER_OFF:
1622  if (!m_isInTableStructure)
1623  {
1624  if (m_nOpenParaToken != HtmlTokenId::NONE)
1625  {
1626  if (IsReadPRE())
1627  m_nOpenParaToken = HtmlTokenId::NONE;
1628  else
1629  EndPara();
1630  }
1631  EndDivision();
1632  }
1633  break;
1634 
1635  case HtmlTokenId::MULTICOL_ON:
1636  if( m_nOpenParaToken != HtmlTokenId::NONE )
1637  EndPara();
1638  NewMultiCol();
1639  break;
1640 
1641  case HtmlTokenId::MULTICOL_OFF:
1642  if( m_nOpenParaToken != HtmlTokenId::NONE )
1643  EndPara();
1644  EndTag( HtmlTokenId::MULTICOL_ON );
1645  break;
1646 
1647  case HtmlTokenId::MARQUEE_ON:
1648  NewMarquee();
1649  m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1650  break;
1651 
1652  case HtmlTokenId::FORM_ON:
1653  NewForm();
1654  break;
1655  case HtmlTokenId::FORM_OFF:
1656  EndForm();
1657  break;
1658 
1659  // templates
1660  case HtmlTokenId::PARABREAK_ON:
1661  if( m_nOpenParaToken != HtmlTokenId::NONE )
1662  EndPara( true );
1663  NewPara();
1664  break;
1665 
1666  case HtmlTokenId::PARABREAK_OFF:
1667  EndPara( true );
1668  break;
1669 
1670  case HtmlTokenId::ADDRESS_ON:
1671  if( m_nOpenParaToken != HtmlTokenId::NONE )
1672  EndPara();
1673  NewTextFormatColl( HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SENDADRESS );
1674  break;
1675 
1676  case HtmlTokenId::ADDRESS_OFF:
1677  if( m_nOpenParaToken != HtmlTokenId::NONE )
1678  EndPara();
1679  EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1680  break;
1681 
1682  case HtmlTokenId::BLOCKQUOTE_ON:
1683  case HtmlTokenId::BLOCKQUOTE30_ON:
1684  if( m_nOpenParaToken != HtmlTokenId::NONE )
1685  EndPara();
1686  NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1687  break;
1688 
1689  case HtmlTokenId::BLOCKQUOTE_OFF:
1690  case HtmlTokenId::BLOCKQUOTE30_OFF:
1691  if( m_nOpenParaToken != HtmlTokenId::NONE )
1692  EndPara();
1693  EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1694  break;
1695 
1696  case HtmlTokenId::PREFORMTXT_ON:
1697  case HtmlTokenId::LISTING_ON:
1698  case HtmlTokenId::XMP_ON:
1699  if( m_nOpenParaToken != HtmlTokenId::NONE )
1700  EndPara();
1702  break;
1703 
1704  case HtmlTokenId::PREFORMTXT_OFF:
1705  m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1706  EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1707  break;
1708 
1709  case HtmlTokenId::LISTING_OFF:
1710  case HtmlTokenId::XMP_OFF:
1711  EndTextFormatColl( nToken );
1712  break;
1713 
1714  case HtmlTokenId::HEAD1_ON:
1715  case HtmlTokenId::HEAD2_ON:
1716  case HtmlTokenId::HEAD3_ON:
1717  case HtmlTokenId::HEAD4_ON:
1718  case HtmlTokenId::HEAD5_ON:
1719  case HtmlTokenId::HEAD6_ON:
1720  if( m_nOpenParaToken != HtmlTokenId::NONE )
1721  {
1722  if( IsReadPRE() )
1723  m_nOpenParaToken = HtmlTokenId::NONE;
1724  else
1725  EndPara();
1726  }
1727  NewHeading( nToken );
1728  break;
1729 
1730  case HtmlTokenId::HEAD1_OFF:
1731  case HtmlTokenId::HEAD2_OFF:
1732  case HtmlTokenId::HEAD3_OFF:
1733  case HtmlTokenId::HEAD4_OFF:
1734  case HtmlTokenId::HEAD5_OFF:
1735  case HtmlTokenId::HEAD6_OFF:
1736  EndHeading();
1737  break;
1738 
1739  case HtmlTokenId::TABLE_ON:
1740  if( !m_vPendingStack.empty() )
1741  BuildTable( SvxAdjust::End );
1742  else
1743  {
1744  if( m_nOpenParaToken != HtmlTokenId::NONE )
1745  EndPara();
1746  OSL_ENSURE(!m_xTable.get(), "table in table not allowed here");
1747  if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) &&
1748  (m_pPam->GetPoint()->nNode.GetIndex() >
1749  m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1751  {
1752  if ( m_nParaCnt < 5 )
1753  Show(); // show what we have up to here
1754 
1755  SvxAdjust eAdjust = m_xAttrTab->pAdjust
1756  ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1757  GetAdjust()
1758  : SvxAdjust::End;
1759  BuildTable( eAdjust );
1760  }
1761  else
1762  bInsertUnknown = m_bKeepUnknown;
1763  }
1764  break;
1765 
1766  // lists
1767  case HtmlTokenId::DIRLIST_ON:
1768  case HtmlTokenId::MENULIST_ON:
1769  case HtmlTokenId::ORDERLIST_ON:
1770  case HtmlTokenId::UNORDERLIST_ON:
1771  if( m_nOpenParaToken != HtmlTokenId::NONE )
1772  EndPara();
1773  NewNumBulList( nToken );
1774  break;
1775 
1776  case HtmlTokenId::DIRLIST_OFF:
1777  case HtmlTokenId::MENULIST_OFF:
1778  case HtmlTokenId::ORDERLIST_OFF:
1779  case HtmlTokenId::UNORDERLIST_OFF:
1780  if( m_nOpenParaToken != HtmlTokenId::NONE )
1781  EndPara();
1782  EndNumBulListItem( HtmlTokenId::NONE, true );
1783  EndNumBulList( nToken );
1784  break;
1785 
1786  case HtmlTokenId::LI_ON:
1787  case HtmlTokenId::LISTHEADER_ON:
1788  if( m_nOpenParaToken != HtmlTokenId::NONE &&
1790  || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1791  {
1792  // only finish paragraph for <P><LI>, not for <DD><LI>
1793  EndPara();
1794  }
1795 
1796  EndNumBulListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1797  NewNumBulListItem( nToken );
1798  break;
1799 
1800  case HtmlTokenId::LI_OFF:
1801  case HtmlTokenId::LISTHEADER_OFF:
1802  EndNumBulListItem( nToken, false );
1803  break;
1804 
1805  // Attribute :
1806  case HtmlTokenId::ITALIC_ON:
1807  {
1811  NewStdAttr( HtmlTokenId::ITALIC_ON,
1812  &m_xAttrTab->pItalic, aPosture,
1813  &m_xAttrTab->pItalicCJK, &aPostureCJK,
1814  &m_xAttrTab->pItalicCTL, &aPostureCTL );
1815  }
1816  break;
1817 
1818  case HtmlTokenId::BOLD_ON:
1819  {
1823  NewStdAttr( HtmlTokenId::BOLD_ON,
1824  &m_xAttrTab->pBold, aWeight,
1825  &m_xAttrTab->pBoldCJK, &aWeightCJK,
1826  &m_xAttrTab->pBoldCTL, &aWeightCTL );
1827  }
1828  break;
1829 
1830  case HtmlTokenId::STRIKE_ON:
1831  case HtmlTokenId::STRIKETHROUGH_ON:
1832  {
1833  NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1835  }
1836  break;
1837 
1838  case HtmlTokenId::UNDERLINE_ON:
1839  {
1840  NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1842  }
1843  break;
1844 
1845  case HtmlTokenId::SUPERSCRIPT_ON:
1846  {
1847  NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1849  }
1850  break;
1851 
1852  case HtmlTokenId::SUBSCRIPT_ON:
1853  {
1854  NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1856  }
1857  break;
1858 
1859  case HtmlTokenId::BLINK_ON:
1860  {
1861  NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1862  SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1863  }
1864  break;
1865 
1866  case HtmlTokenId::SPAN_ON:
1867  NewStdAttr( HtmlTokenId::SPAN_ON );
1868  break;
1869 
1870  case HtmlTokenId::ITALIC_OFF:
1871  case HtmlTokenId::BOLD_OFF:
1872  case HtmlTokenId::STRIKE_OFF:
1873  case HtmlTokenId::UNDERLINE_OFF:
1874  case HtmlTokenId::SUPERSCRIPT_OFF:
1875  case HtmlTokenId::SUBSCRIPT_OFF:
1876  case HtmlTokenId::BLINK_OFF:
1877  case HtmlTokenId::SPAN_OFF:
1878  EndTag( nToken );
1879  break;
1880 
1881  case HtmlTokenId::STRIKETHROUGH_OFF:
1882  EndTag( HtmlTokenId::STRIKE_OFF );
1883  break;
1884 
1885  case HtmlTokenId::BASEFONT_ON:
1886  NewBasefontAttr();
1887  break;
1888  case HtmlTokenId::BASEFONT_OFF:
1889  EndBasefontAttr();
1890  break;
1891  case HtmlTokenId::FONT_ON:
1892  case HtmlTokenId::BIGPRINT_ON:
1893  case HtmlTokenId::SMALLPRINT_ON:
1894  NewFontAttr( nToken );
1895  break;
1896  case HtmlTokenId::FONT_OFF:
1897  case HtmlTokenId::BIGPRINT_OFF:
1898  case HtmlTokenId::SMALLPRINT_OFF:
1899  EndFontAttr( nToken );
1900  break;
1901 
1902  case HtmlTokenId::EMPHASIS_ON:
1903  case HtmlTokenId::CITIATION_ON:
1904  case HtmlTokenId::STRONG_ON:
1905  case HtmlTokenId::CODE_ON:
1906  case HtmlTokenId::SAMPLE_ON:
1907  case HtmlTokenId::KEYBOARD_ON:
1908  case HtmlTokenId::VARIABLE_ON:
1909  case HtmlTokenId::DEFINSTANCE_ON:
1910  case HtmlTokenId::SHORTQUOTE_ON:
1911  case HtmlTokenId::LANGUAGE_ON:
1912  case HtmlTokenId::AUTHOR_ON:
1913  case HtmlTokenId::PERSON_ON:
1914  case HtmlTokenId::ACRONYM_ON:
1915  case HtmlTokenId::ABBREVIATION_ON:
1916  case HtmlTokenId::INSERTEDTEXT_ON:
1917  case HtmlTokenId::DELETEDTEXT_ON:
1918 
1919  case HtmlTokenId::TELETYPE_ON:
1920  NewCharFormat( nToken );
1921  break;
1922 
1923  case HtmlTokenId::SDFIELD_ON:
1924  NewField();
1926  break;
1927 
1928  case HtmlTokenId::EMPHASIS_OFF:
1929  case HtmlTokenId::CITIATION_OFF:
1930  case HtmlTokenId::STRONG_OFF:
1931  case HtmlTokenId::CODE_OFF:
1932  case HtmlTokenId::SAMPLE_OFF:
1933  case HtmlTokenId::KEYBOARD_OFF:
1934  case HtmlTokenId::VARIABLE_OFF:
1935  case HtmlTokenId::DEFINSTANCE_OFF:
1936  case HtmlTokenId::SHORTQUOTE_OFF:
1937  case HtmlTokenId::LANGUAGE_OFF:
1938  case HtmlTokenId::AUTHOR_OFF:
1939  case HtmlTokenId::PERSON_OFF:
1940  case HtmlTokenId::ACRONYM_OFF:
1941  case HtmlTokenId::ABBREVIATION_OFF:
1942  case HtmlTokenId::INSERTEDTEXT_OFF:
1943  case HtmlTokenId::DELETEDTEXT_OFF:
1944 
1945  case HtmlTokenId::TELETYPE_OFF:
1946  EndTag( nToken );
1947  break;
1948 
1949  case HtmlTokenId::HEAD_OFF:
1950  if( !m_aStyleSource.isEmpty() )
1951  {
1952  m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1953  m_aStyleSource.clear();
1954  }
1955  break;
1956 
1957  case HtmlTokenId::DOCTYPE:
1958  case HtmlTokenId::BODY_OFF:
1959  case HtmlTokenId::HTML_OFF:
1960  case HtmlTokenId::HEAD_ON:
1961  case HtmlTokenId::TITLE_OFF:
1962  break; // don't evaluate further???
1963  case HtmlTokenId::HTML_ON:
1964  {
1965  const HTMLOptions& rHTMLOptions = GetOptions();
1966  for (size_t i = rHTMLOptions.size(); i; )
1967  {
1968  const HTMLOption& rOption = rHTMLOptions[--i];
1969  if( HtmlOptionId::DIR == rOption.GetToken() )
1970  {
1971  const OUString& rDir = rOption.GetString();
1972  SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
1973  m_pCSS1Parser->GetWhichMap() );
1974  SvxCSS1PropertyInfo aPropInfo;
1975  OUString aDummy;
1976  ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
1977  aPropInfo, nullptr, &rDir );
1978 
1979  m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
1980  break;
1981  }
1982  }
1983  }
1984  break;
1985 
1986  case HtmlTokenId::INPUT:
1987  InsertInput();
1988  break;
1989 
1990  case HtmlTokenId::TEXTAREA_ON:
1991  NewTextArea();
1993  break;
1994 
1995  case HtmlTokenId::SELECT_ON:
1996  NewSelect();
1998  break;
1999 
2000  case HtmlTokenId::ANCHOR_ON:
2001  NewAnchor();
2002  break;
2003 
2004  case HtmlTokenId::ANCHOR_OFF:
2005  EndAnchor();
2006  break;
2007 
2008  case HtmlTokenId::COMMENT:
2009  if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2010  {
2011  // insert as Post-It
2012  // If there are no space characters right behind
2013  // the <!-- and on front of the -->, leave the comment untouched.
2014  if( ' ' == aToken[ 3 ] &&
2015  ' ' == aToken[ aToken.getLength()-3 ] )
2016  {
2017  OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) );
2018  InsertComment(comphelper::string::strip(aComment, ' '));
2019  }
2020  else
2021  {
2022  OUStringBuffer aComment;
2023  aComment.append('<').append(aToken).append('>');
2024  InsertComment( aComment.makeStringAndClear() );
2025  }
2026  }
2027  break;
2028 
2029  case HtmlTokenId::MAP_ON:
2030  // Image Maps are read asynchronously: At first only an image map is created
2031  // Areas are processed later. Nevertheless the
2032  // ImageMap is inserted into the IMap-Array, because it might be used
2033  // already.
2034  m_pImageMap = new ImageMap;
2036  {
2037  if (!m_pImageMaps)
2038  m_pImageMaps.reset( new ImageMaps );
2039  m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2040  }
2041  else
2042  {
2043  delete m_pImageMap;
2044  m_pImageMap = nullptr;
2045  }
2046  break;
2047 
2048  case HtmlTokenId::MAP_OFF:
2049  // there is no ImageMap anymore (don't delete IMap, because it's
2050  // already contained in the array!)
2051  m_pImageMap = nullptr;
2052  break;
2053 
2054  case HtmlTokenId::AREA:
2055  if( m_pImageMap )
2056  ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2057  SvMacroItemId::OnMouseOut );
2058  break;
2059 
2060  case HtmlTokenId::FRAMESET_ON:
2061  bInsertUnknown = m_bKeepUnknown;
2062  break;
2063 
2064  case HtmlTokenId::NOFRAMES_ON:
2065  if( IsInHeader() )
2066  FinishHeader();
2067  bInsertUnknown = m_bKeepUnknown;
2068  break;
2069 
2070  case HtmlTokenId::UNKNOWNCONTROL_ON:
2071  // Ignore content of unknown token in the header, if the token
2072  // does not start with a '!'.
2073  // (but judging from the code, also if does not start with a '%')
2074  // (and also if we're not somewhere we consider PRE)
2075  if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2076  !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2077  '%' != sSaveToken[0] )
2078  m_aUnknownToken = sSaveToken;
2079  [[fallthrough]];
2080 
2081  default:
2082  bInsertUnknown = m_bKeepUnknown;
2083  break;
2084  }
2085 
2086  if( bGetIDOption )
2087  InsertIDOption();
2088 
2089  if( bInsertUnknown )
2090  {
2091  OUStringBuffer aComment("HTML: <");
2092  if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2093  aComment.append("/");
2094  aComment.append(sSaveToken);
2095  if( !aToken.isEmpty() )
2096  {
2097  UnescapeToken();
2098  aComment.append(" ").append(aToken);
2099  }
2100  aComment.append(">");
2101  InsertComment( aComment.makeStringAndClear() );
2102  }
2103 
2104  // if there are temporary paragraph attributes and the
2105  // paragraph isn't empty then the paragraph attributes are final.
2106  if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() )
2107  m_aParaAttrs.clear();
2108 }
2109 
2110 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2111  bool& rScriptDependent,
2112  sal_uInt16& rScriptType )
2113 {
2114  switch( rAttr.GetItem().Which() )
2115  {
2116  case RES_CHRATR_FONT:
2117  case RES_CHRATR_FONTSIZE:
2118  case RES_CHRATR_LANGUAGE:
2119  case RES_CHRATR_POSTURE:
2120  case RES_CHRATR_WEIGHT:
2121  rScriptType = i18n::ScriptType::LATIN;
2122  rScriptDependent = true;
2123  break;
2124  case RES_CHRATR_CJK_FONT:
2128  case RES_CHRATR_CJK_WEIGHT:
2129  rScriptType = i18n::ScriptType::ASIAN;
2130  rScriptDependent = true;
2131  break;
2132  case RES_CHRATR_CTL_FONT:
2136  case RES_CHRATR_CTL_WEIGHT:
2137  rScriptType = i18n::ScriptType::COMPLEX;
2138  rScriptDependent = true;
2139  break;
2140  default:
2141  rScriptDependent = false;
2142  break;
2143  }
2144 }
2145 
2146 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2147 {
2148  // A hard line break at the end always must be removed.
2149  // A second one we replace with paragraph spacing.
2150  sal_Int32 nLFStripped = StripTrailingLF();
2151  if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2152  eMode = AM_SPACE;
2153 
2154  // the hard attributes of this paragraph will never be invalid again
2155  m_aParaAttrs.clear();
2156 
2157  SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2158  m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
2159 
2160  if (pTextNode)
2161  {
2162  const SvxULSpaceItem& rULSpace =
2163  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2164 
2165  bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2166  : rULSpace.GetLower() == 0;
2167 
2168  if( bChange )
2169  {
2170  const SvxULSpaceItem& rCollULSpace =
2171  pTextNode->GetAnyFormatColl().GetULSpace();
2172 
2173  bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2174  : rCollULSpace.GetLower() > 0;
2175 
2176  if( bMayReset &&
2177  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2178  {
2179  pTextNode->ResetAttr( RES_UL_SPACE );
2180  }
2181  else
2182  {
2183  pTextNode->SetAttr(
2184  SvxULSpaceItem( rULSpace.GetUpper(),
2185  AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2186  }
2187  }
2188  }
2189  m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2190 
2191  SwPosition aOldPos( *m_pPam->GetPoint() );
2192 
2193  bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2194 
2195  // split character attributes and maybe set none,
2196  // which are set for the whole paragraph
2197  const SwNodeIndex& rEndIdx = aOldPos.nNode;
2198  const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex();
2199  const SwPosition& rPos = *m_pPam->GetPoint();
2200 
2201  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2202  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2203  {
2204  HTMLAttr *pAttr = *pHTMLAttributes;
2205  if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2206  {
2207  bool bWholePara = false;
2208 
2209  while( pAttr )
2210  {
2211  HTMLAttr *pNext = pAttr->GetNext();
2212  if( pAttr->GetSttParaIdx() < rEndIdx.GetIndex() ||
2213  (!bWholePara &&
2214  pAttr->GetSttPara() == rEndIdx &&
2215  pAttr->GetSttCnt() != nEndCnt) )
2216  {
2217  bWholePara =
2218  pAttr->GetSttPara() == rEndIdx &&
2219  pAttr->GetSttCnt() == 0;
2220 
2221  sal_Int32 nStt = pAttr->m_nStartContent;
2222  bool bScript = false;
2223  sal_uInt16 nScriptItem;
2224  bool bInsert = true;
2225  lcl_swhtml_getItemInfo( *pAttr, bScript,
2226  nScriptItem );
2227  // set previous part
2228  if( bScript )
2229  {
2230  const SwTextNode *pTextNd =
2231  pAttr->GetSttPara().GetNode().GetTextNode();
2232  OSL_ENSURE( pTextNd, "No text node" );
2233  if( pTextNd )
2234  {
2235  const OUString& rText = pTextNd->GetText();
2236  sal_uInt16 nScriptText =
2237  g_pBreakIt->GetBreakIter()->getScriptType(
2238  rText, pAttr->GetSttCnt() );
2239  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2240  ->endOfScript( rText, nStt, nScriptText );
2241  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2242  {
2243  if( nScriptItem == nScriptText )
2244  {
2245  HTMLAttr *pSetAttr =
2246  pAttr->Clone( rEndIdx, nScriptEnd );
2247  pSetAttr->m_nStartContent = nStt;
2248  pSetAttr->ClearPrev();
2249  if( !pNext || bWholePara )
2250  {
2251  if (pSetAttr->m_bInsAtStart)
2252  m_aSetAttrTab.push_front( pSetAttr );
2253  else
2254  m_aSetAttrTab.push_back( pSetAttr );
2255  }
2256  else
2257  pNext->InsertPrev( pSetAttr );
2258  }
2259  nStt = nScriptEnd;
2260  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2261  rText, nStt );
2262  nScriptEnd = g_pBreakIt->GetBreakIter()
2263  ->endOfScript( rText, nStt, nScriptText );
2264  }
2265  bInsert = nScriptItem == nScriptText;
2266  }
2267  }
2268  if( bInsert )
2269  {
2270  HTMLAttr *pSetAttr =
2271  pAttr->Clone( rEndIdx, nEndCnt );
2272  pSetAttr->m_nStartContent = nStt;
2273 
2274  // When the attribute is for the whole paragraph, the outer
2275  // attributes aren't effective anymore. Hence it may not be inserted
2276  // in the Prev-List of an outer attribute, because that won't be
2277  // set. That leads to shifting when fields are used.
2278  if( !pNext || bWholePara )
2279  {
2280  if (pSetAttr->m_bInsAtStart)
2281  m_aSetAttrTab.push_front( pSetAttr );
2282  else
2283  m_aSetAttrTab.push_back( pSetAttr );
2284  }
2285  else
2286  pNext->InsertPrev( pSetAttr );
2287  }
2288  else
2289  {
2290  HTMLAttr *pPrev = pAttr->GetPrev();
2291  if( pPrev )
2292  {
2293  // the previous attributes must be set anyway
2294  if( !pNext || bWholePara )
2295  {
2296  if (pPrev->m_bInsAtStart)
2297  m_aSetAttrTab.push_front( pPrev );
2298  else
2299  m_aSetAttrTab.push_back( pPrev );
2300  }
2301  else
2302  pNext->InsertPrev( pPrev );
2303  }
2304  }
2305  pAttr->ClearPrev();
2306  }
2307 
2308  pAttr->SetStart( rPos );
2309  pAttr = pNext;
2310  }
2311  }
2312  }
2313 
2314  if( bUpdateNum )
2315  {
2316  if( GetNumInfo().GetDepth() )
2317  {
2318  sal_uInt8 nLvl = GetNumInfo().GetLevel();
2319  SetNodeNum( nLvl );
2320  }
2321  else
2323  }
2324 
2325  // We must set the attribute of the paragraph before now (because of JavaScript)
2326  SetAttr();
2327 
2328  // Now it is time to get rid of all script dependent hints that are
2329  // equal to the settings in the style
2330  SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode();
2331  OSL_ENSURE( pTextNd, "There is the txt node" );
2332  size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints())
2333  ? pTextNd->GetSwpHints().Count() : 0;
2334  if( nCntAttr )
2335  {
2336  // These are the end position of all script dependent hints.
2337  // If we find a hint that starts before the current end position,
2338  // we have to set it. If we find a hint that start behind or at
2339  // that position, we have to take the hint value into account.
2340  // If it is equal to the style, or in fact the paragraph value
2341  // for that hint, the hint is removed. Otherwise its end position
2342  // is remembered.
2343  sal_Int32 aEndPos[15] =
2344  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2345  SwpHints& rHints = pTextNd->GetSwpHints();
2346  for( size_t i=0; i < nCntAttr; i++ )
2347  {
2348  SwTextAttr *pHt = rHints.Get( i );
2349  sal_uInt16 nWhich = pHt->Which();
2350  sal_Int16 nIdx = 0;
2351  bool bFont = false;
2352  switch( nWhich )
2353  {
2354  case RES_CHRATR_FONT:
2355  nIdx = 0;
2356  bFont = true;
2357  break;
2358  case RES_CHRATR_FONTSIZE:
2359  nIdx = 1;
2360  break;
2361  case RES_CHRATR_LANGUAGE:
2362  nIdx = 2;
2363  break;
2364  case RES_CHRATR_POSTURE:
2365  nIdx = 3;
2366  break;
2367  case RES_CHRATR_WEIGHT:
2368  nIdx = 4;
2369  break;
2370  case RES_CHRATR_CJK_FONT:
2371  nIdx = 5;
2372  bFont = true;
2373  break;
2375  nIdx = 6;
2376  break;
2378  nIdx = 7;
2379  break;
2381  nIdx = 8;
2382  break;
2383  case RES_CHRATR_CJK_WEIGHT:
2384  nIdx = 9;
2385  break;
2386  case RES_CHRATR_CTL_FONT:
2387  nIdx = 10;
2388  bFont = true;
2389  break;
2391  nIdx = 11;
2392  break;
2394  nIdx = 12;
2395  break;
2397  nIdx = 13;
2398  break;
2399  case RES_CHRATR_CTL_WEIGHT:
2400  nIdx = 14;
2401  break;
2402  default:
2403  // Skip to next attribute
2404  continue;
2405  }
2406  const sal_Int32 nStt = pHt->GetStart();
2407  if( nStt >= aEndPos[nIdx] )
2408  {
2409  const SfxPoolItem& rItem =
2410  static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2411  if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2412  : rItem == pHt->GetAttr() )
2413  {
2414  // The hint is the same as set in the paragraph and
2415  // therefore, it can be deleted
2416  // CAUTION!!! This WILL delete the hint and it MAY
2417  // also delete the SwpHints!!! To avoid any trouble
2418  // we leave the loop immediately if this is the last
2419  // hint.
2420  pTextNd->DeleteAttribute( pHt );
2421  if( 1 == nCntAttr )
2422  break;
2423  i--;
2424  nCntAttr--;
2425  }
2426  else
2427  {
2428  // The hint is different. Therefore all hints within that
2429  // hint have to be ignored.
2430  aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2431  }
2432  }
2433  else
2434  {
2435  // The hint starts before another one ends.
2436  // The hint in this case is not deleted
2437  OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2438  "hints aren't nested properly!" );
2439  }
2440  }
2441  }
2442 
2443  if (!m_xTable && !--m_nParaCnt)
2444  Show();
2445 
2446  return bRet;
2447 }
2448 
2450 {
2451  //If it already has ParSpace, return
2452  if( !m_bNoParSpace )
2453  return;
2454 
2455  m_bNoParSpace = false;
2456 
2457  sal_uLong nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1;
2458 
2459  SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2460  if( !pTextNode )
2461  return;
2462 
2463  SvxULSpaceItem rULSpace =
2464  static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2465  if( !rULSpace.GetLower() )
2466  {
2467  const SvxULSpaceItem& rCollULSpace =
2468  pTextNode->GetAnyFormatColl().GetULSpace();
2469  if( rCollULSpace.GetLower() &&
2470  rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2471  {
2472  pTextNode->ResetAttr( RES_UL_SPACE );
2473  }
2474  else
2475  {
2476  //What I do here, is that I examine the attributes, and if
2477  //I find out, that it's CJK/CTL, then I set the paragraph space
2478  //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2479 
2480  bool bIsCJK = false;
2481  bool bIsCTL = false;
2482 
2483  const size_t nCntAttr = pTextNode->GetpSwpHints()
2484  ? pTextNode->GetSwpHints().Count() : 0;
2485 
2486  for(size_t i = 0; i < nCntAttr; ++i)
2487  {
2488  SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2489  sal_uInt16 const nWhich = pHt->Which();
2490  if (RES_CHRATR_CJK_FONT == nWhich ||
2491  RES_CHRATR_CJK_FONTSIZE == nWhich ||
2492  RES_CHRATR_CJK_LANGUAGE == nWhich ||
2493  RES_CHRATR_CJK_POSTURE == nWhich ||
2494  RES_CHRATR_CJK_WEIGHT == nWhich)
2495  {
2496  bIsCJK = true;
2497  break;
2498  }
2499  if (RES_CHRATR_CTL_FONT == nWhich ||
2500  RES_CHRATR_CTL_FONTSIZE == nWhich ||
2501  RES_CHRATR_CTL_LANGUAGE == nWhich ||
2502  RES_CHRATR_CTL_POSTURE == nWhich ||
2503  RES_CHRATR_CTL_WEIGHT == nWhich)
2504  {
2505  bIsCTL = true;
2506  break;
2507  }
2508  }
2509 
2510  if( bIsCTL )
2511  {
2512  pTextNode->SetAttr(
2514  }
2515  else if( bIsCJK )
2516  {
2517  pTextNode->SetAttr(
2519  } else {
2520  pTextNode->SetAttr(
2522  }
2523  }
2524  }
2525 }
2526 
2528 {
2529  // Here
2530  // - a EndAction is called, so the document is formatted
2531  // - a Reschedule is called,
2532  // - the own View-Shell is set again
2533  // - and a StartAction is called
2534 
2535  OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2536  SwViewShell *pOldVSh = CallEndAction();
2537 
2539 
2540  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2541  || 1 == m_xDoc->getReferenceCount() )
2542  {
2543  // was the import aborted by SFX?
2544  eState = SvParserState::Error;
2545  }
2546 
2547  // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2548  SwViewShell *pVSh = CallStartAction( pOldVSh );
2549 
2550  // is the current node not visible anymore, then we use a bigger increment
2551  if( pVSh )
2552  {
2554  ? 5 : 50;
2555  }
2556 }
2557 
2559 {
2560  // Here
2561  // - a Reschedule is called, so it can be scrolled
2562  // - the own View-Shell is set again
2563  // - a StartAction/EndAction is called, when there was scrolling.
2564 
2565  OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2566 
2567  // scroll bar
2568  if (m_xProgress)
2569  {
2570  m_xProgress->Update(rInput.Tell());
2572  }
2573  else
2574  {
2576 
2577  if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2578  || 1 == m_xDoc->getReferenceCount() )
2579  // was the import aborted by SFX?
2580  eState = SvParserState::Error;
2581 
2583  if( pVSh && pVSh->HasInvalidRect() )
2584  {
2585  CallEndAction( false, false );
2586  CallStartAction( pVSh, false );
2587  }
2588  }
2589 }
2590 
2592 {
2593  OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2594 
2595  if( !pVSh || bChkPtr )
2596  {
2597 #if OSL_DEBUG_LEVEL > 0
2598  SwViewShell *pOldVSh = pVSh;
2599 #endif
2600  pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2601 #if OSL_DEBUG_LEVEL > 0
2602  OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2603  if( pOldVSh && !pVSh )
2604  pVSh = nullptr;
2605 #endif
2606  }
2607  m_pActionViewShell = pVSh;
2608 
2609  if( m_pActionViewShell )
2610  {
2611  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2612  static_cast<SwEditShell*>(m_pActionViewShell)->StartAction();
2613  else
2615  }
2616 
2617  return m_pActionViewShell;
2618 }
2619 
2620 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2621 {
2622  if( bChkPtr )
2623  {
2624  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2625  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2626  "CallEndAction: Who swapped the SwViewShell?" );
2627 #if OSL_DEBUG_LEVEL > 0
2628  if( m_pActionViewShell && !pVSh )
2629  pVSh = nullptr;
2630 #endif
2631  if( pVSh != m_pActionViewShell )
2632  m_pActionViewShell = nullptr;
2633  }
2634 
2635  if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2636  return m_pActionViewShell;
2637 
2638  if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2639  {
2640  // Already scrolled?, then make sure that the view doesn't move!
2641  const bool bOldLock = m_pActionViewShell->IsViewLocked();
2642  m_pActionViewShell->LockView( true );
2643  const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev();
2645  static_cast<SwEditShell*>(m_pActionViewShell)->EndAction();
2646  m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev );
2647  m_pActionViewShell->LockView( bOldLock );
2648 
2649  // bChkJumpMark is only set when the object was also found
2650  if( m_bChkJumpMark )
2651  {
2652  const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2653  if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2655  GetMedium()->GetURLObject().GetMark() );
2656  m_bChkJumpMark = false;
2657  }
2658  }
2659  else
2661 
2662  // if the parser holds the last reference to the document, then we can
2663  // abort here and set an error.
2664  if( 1 == m_xDoc->getReferenceCount() )
2665  {
2666  eState = SvParserState::Error;
2667  }
2668 
2670  m_pActionViewShell = nullptr;
2671 
2672  return pVSh;
2673 }
2674 
2676 {
2677  SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2678  OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2679  "CheckActionViewShell: Who has swapped SwViewShell?" );
2680 #if OSL_DEBUG_LEVEL > 0
2681  if( m_pActionViewShell && !pVSh )
2682  pVSh = nullptr;
2683 #endif
2684  if( pVSh != m_pActionViewShell )
2685  m_pActionViewShell = nullptr;
2686 
2687  return m_pActionViewShell;
2688 }
2689 
2690 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2691  std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2692 {
2693  std::unique_ptr<SwPaM> pAttrPam( new SwPaM( *m_pPam->GetPoint() ) );
2694  const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode;
2695  const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
2696  HTMLAttr* pAttr;
2697  SwContentNode* pCNd;
2698 
2699  std::vector<std::unique_ptr<HTMLAttr>> aFields;
2700 
2701  for( auto n = m_aSetAttrTab.size(); n; )
2702  {
2703  pAttr = m_aSetAttrTab[ --n ];
2704  sal_uInt16 nWhich = pAttr->m_pItem->Which();
2705 
2706  sal_uLong nEndParaIdx = pAttr->GetEndParaIdx();
2707  bool bSetAttr;
2708  if( bChkEnd )
2709  {
2710  // Set character attribute with end early on, so set them still in
2711  // the current paragraph (because of JavaScript and various "chats"(?)).
2712  // This shouldn't be done for attributes which are used for
2713  // the whole paragraph, because they could be from a paragraph style
2714  // which can't be set. Because the attributes are inserted with
2715  // SETATTR_DONTREPLACE, they should be able to be set later.
2716  bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() &&
2717  (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) ||
2718  ( !pAttr->IsLikePara() &&
2719  nEndParaIdx == rEndIdx.GetIndex() &&
2720  pAttr->GetEndCnt() < nEndCnt &&
2721  (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2722  ( bBeforeTable &&
2723  nEndParaIdx == rEndIdx.GetIndex() &&
2724  !pAttr->GetEndCnt() );
2725  }
2726  else
2727  {
2728  // Attributes in body nodes array section shouldn't be set if we are in a
2729  // special nodes array section, but vice versa it's possible.
2730  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2731  bSetAttr = nEndParaIdx < rEndIdx.GetIndex() ||
2732  rEndIdx.GetIndex() > nEndOfIcons ||
2733  nEndParaIdx <= nEndOfIcons;
2734  }
2735 
2736  if( bSetAttr )
2737  {
2738  // The attribute shouldn't be in the list of temporary paragraph
2739  // attributes, because then it would be deleted.
2740  while( !m_aParaAttrs.empty() )
2741  {
2742  OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2743  "SetAttr: Attribute must not yet be set" );
2744  m_aParaAttrs.pop_back();
2745  }
2746 
2747  // then set it
2748  m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2749 
2750  while( pAttr )
2751  {
2752  HTMLAttr *pPrev = pAttr->GetPrev();
2753  if( !pAttr->m_bValid )
2754  {
2755  // invalid attributes can be deleted
2756  delete pAttr;
2757  pAttr = pPrev;
2758  continue;
2759  }
2760 
2761  pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2762  if( !pCNd )
2763  {
2764  // because of the awful deleting of nodes an index can also
2765  // point to an end node :-(
2766  if ( (pAttr->GetSttPara() == pAttr->GetEndPara()) &&
2767  !isTXTATR_NOEND(nWhich) )
2768  {
2769  // when the end index also points to the node, we don't
2770  // need to set attributes anymore, except if it's a text attribute.
2771  delete pAttr;
2772  pAttr = pPrev;
2773  continue;
2774  }
2775  pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2776  if( pCNd )
2777  pAttr->m_nStartContent = 0;
2778  else
2779  {
2780  OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2781  delete pAttr;
2782  pAttr = pPrev;
2783  continue;
2784  }
2785  }
2786  pAttrPam->GetPoint()->nNode = pAttr->m_nStartPara;
2787 
2788  // because of the deleting of BRs the start index can also
2789  // point behind the end the text
2790  if( pAttr->m_nStartContent > pCNd->Len() )
2791  pAttr->m_nStartContent = pCNd->Len();
2792  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent );
2793 
2794  pAttrPam->SetMark();
2795  if ( (pAttr->GetSttPara() != pAttr->GetEndPara()) &&
2796  !isTXTATR_NOEND(nWhich) )
2797  {
2798  pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2799  if( !pCNd )
2800  {
2801  pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2802  if( pCNd )
2803  pAttr->m_nEndContent = pCNd->Len();
2804  else
2805  {
2806  OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2807  pAttrPam->DeleteMark();
2808  delete pAttr;
2809  pAttr = pPrev;
2810  continue;
2811  }
2812  }
2813 
2814  pAttrPam->GetPoint()->nNode = pAttr->m_nEndPara;
2815  }
2816  else if( pAttr->IsLikePara() )
2817  {
2818  pAttr->m_nEndContent = pCNd->Len();
2819  }
2820 
2821  // because of the deleting of BRs the start index can also
2822  // point behind the end the text
2823  if( pAttr->m_nEndContent > pCNd->Len() )
2824  pAttr->m_nEndContent = pCNd->Len();
2825 
2826  pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent );
2827  if( bBeforeTable &&
2828  pAttrPam->GetPoint()->nNode.GetIndex() ==
2829  rEndIdx.GetIndex() )
2830  {
2831  // If we're before inserting a table and the attribute ends
2832  // in the current node, then we must end it in the previous
2833  // node or discard it, if it starts in that node.
2834  if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2835  !isTXTATR_NOEND(nWhich) )
2836  {
2837  if( pAttrPam->GetMark()->nNode.GetIndex() !=
2838  rEndIdx.GetIndex() )
2839  {
2840  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
2841  "Content-Position before table not 0???" );
2842  pAttrPam->Move( fnMoveBackward );
2843  }
2844  else
2845  {
2846  pAttrPam->DeleteMark();
2847  delete pAttr;
2848  pAttr = pPrev;
2849  continue;
2850  }
2851  }
2852  }
2853 
2854  switch( nWhich )
2855  {
2856  case RES_FLTR_BOOKMARK: // insert bookmark
2857  {
2858  const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2859  IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2860  IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2861  if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2862  (*ppBkmk)->GetMarkStart() == *pAttrPam->GetPoint() )
2863  break; // do not generate duplicates on this position
2864  pAttrPam->DeleteMark();
2865  const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2866  *pAttrPam,
2867  sName,
2870 
2871  // jump to bookmark
2872  if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2873  {
2874  m_bChkJumpMark = true;
2876  }
2877  }
2878  break;
2879  case RES_TXTATR_FIELD:
2880  case RES_TXTATR_ANNOTATION:
2881  case RES_TXTATR_INPUTFIELD:
2882  {
2883  SwFieldIds nFieldWhich =
2884  pPostIts
2885  ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2887  if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2888  SwFieldIds::Script == nFieldWhich) )
2889  {
2890  pPostIts->emplace_front( pAttr );
2891  }
2892  else
2893  {
2894  aFields.emplace_back( pAttr);
2895  }
2896  }
2897  pAttrPam->DeleteMark();
2898  pAttr = pPrev;
2899  continue;
2900 
2901  case RES_LR_SPACE:
2902  if( pAttrPam->GetPoint()->nNode.GetIndex() ==
2903  pAttrPam->GetMark()->nNode.GetIndex())
2904  {
2905  // because of numbering set this attribute directly at node
2906  pCNd->SetAttr( *pAttr->m_pItem );
2907  break;
2908  }
2909  OSL_ENSURE( false,
2910  "LRSpace set over multiple paragraphs!" );
2911  [[fallthrough]]; // (shouldn't reach this point anyway)
2912 
2913  // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2914  // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2915  // This is the right place in the future if the adapted fill attributes
2916  // may be handled more directly in HTML import to handle them.
2917  case RES_BACKGROUND:
2918  {
2919  const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2921 
2923  m_xDoc->getIDocumentContentOperations().InsertItemSet(*pAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2924  break;
2925  }
2926  default:
2927 
2928  // maybe jump to a bookmark
2929  if( RES_TXTATR_INETFMT == nWhich &&
2931  m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2932  {
2933  m_bChkJumpMark = true;
2935  }
2936 
2937  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2938  }
2939  pAttrPam->DeleteMark();
2940 
2941  delete pAttr;
2942  pAttr = pPrev;
2943  }
2944  }
2945  }
2946 
2947  for( auto n = m_aMoveFlyFrames.size(); n; )
2948  {
2949  SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[ --n ];
2950 
2951  const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
2952  OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
2953  "Only At-Para flys need special handling" );
2954  const SwPosition *pFlyPos = rAnchor.GetContentAnchor();
2955  sal_uLong nFlyParaIdx = pFlyPos->nNode.GetIndex();
2956  bool bMoveFly;
2957  if( bChkEnd )
2958  {
2959  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2960  ( nFlyParaIdx == rEndIdx.GetIndex() &&
2961  m_aMoveFlyCnts[n] < nEndCnt );
2962  }
2963  else
2964  {
2965  sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2966  bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2967  rEndIdx.GetIndex() > nEndOfIcons ||
2968  nFlyParaIdx <= nEndOfIcons;
2969  }
2970  if( bMoveFly )
2971  {
2972  pFrameFormat->DelFrames();
2973  *pAttrPam->GetPoint() = *pFlyPos;
2974  pAttrPam->GetPoint()->nContent.Assign( pAttrPam->GetContentNode(),
2975  m_aMoveFlyCnts[n] );
2976  SwFormatAnchor aAnchor( rAnchor );
2977  aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
2978  aAnchor.SetAnchor( pAttrPam->GetPoint() );
2979  pFrameFormat->SetFormatAttr( aAnchor );
2980 
2981  const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
2982  if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
2983  {
2984  SwFormatHoriOrient aHoriOri( rHoriOri );
2985  aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
2986  pFrameFormat->SetFormatAttr( aHoriOri );
2987  }
2988  const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
2989  if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
2990  {
2991  SwFormatVertOrient aVertOri( rVertOri );
2992  aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
2993  pFrameFormat->SetFormatAttr( aVertOri );
2994  }
2995 
2996  pFrameFormat->MakeFrames();
2997  m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
2998  m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
2999  }
3000  }
3001  for (auto & field : aFields)
3002  {
3003  pCNd = field->m_nStartPara.GetNode().GetContentNode();
3004  pAttrPam->GetPoint()->nNode = field->m_nStartPara;
3005  pAttrPam->GetPoint()->nContent.Assign( pCNd, field->m_nStartContent );
3006 
3007  if( bBeforeTable &&
3008  pAttrPam->GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() )
3009  {
3010  OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3011  OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(),
3012  "Content-Position before table not 0???" );
3013  // !!!
3014  pAttrPam->Move( fnMoveBackward );
3015  }
3016 
3017  m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *field->m_pItem );
3018 
3019  field.reset();
3020  }
3021  aFields.clear();
3022 }
3023 
3024 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3025 {
3026  // Font height and font colour as well as escape attributes may not be
3027  // combined. Therefore they're saved in a list and in it the last opened
3028  // attribute is at the beginning and count is always one. For all other
3029  // attributes count is just incremented.
3030  if( *ppAttr )
3031  {
3032  HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3033  pAttr->InsertNext( *ppAttr );
3034  (*ppAttr) = pAttr;
3035  }
3036  else
3037  (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3038 }
3039 
3040 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3041 {
3042  bool bRet = true;
3043 
3044  // The list header is saved in the attribute.
3045  HTMLAttr **ppHead = pAttr->m_ppHead;
3046 
3047  OSL_ENSURE( ppHead, "No list header attribute found!" );
3048 
3049  // save the current position as end position
3050  const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode;
3051  sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3052 
3053  // Is the last started or an earlier started attribute being ended?
3054  HTMLAttr *pLast = nullptr;
3055  if( ppHead && pAttr != *ppHead )
3056  {
3057  // The last started attribute isn't being ended
3058 
3059  // Then we look for attribute which was started immediately afterwards,
3060  // which has also not yet been ended (otherwise it would no longer be
3061  // in the list).
3062  pLast = *ppHead;
3063  while( pLast && pLast->GetNext() != pAttr )
3064  pLast = pLast->GetNext();
3065 
3066  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3067  }
3068 
3069  bool bMoveBack = false;
3070  sal_uInt16 nWhich = pAttr->m_pItem->Which();
3071  if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3072  *pEndIdx != pAttr->GetSttPara() )
3073  {
3074  // Then move back one position in the content!
3075  bMoveBack = m_pPam->Move( fnMoveBackward );
3076  nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3077  }
3078 
3079  // now end the attribute
3080  HTMLAttr *pNext = pAttr->GetNext();
3081 
3082  bool bInsert;
3083  sal_uInt16 nScriptItem = 0;
3084  bool bScript = false;
3085  // does it have a non-empty range?
3086  if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3087  RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3088  *pEndIdx != pAttr->GetSttPara() ||
3089  nEndCnt != pAttr->GetSttCnt() )
3090  {
3091  bInsert = true;
3092  // We do some optimization for script dependent attributes here.
3093  if( *pEndIdx == pAttr->GetSttPara() )
3094  {
3095  lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3096  }
3097  }
3098  else
3099  {
3100  bInsert = false;
3101  }
3102 
3103  const SwTextNode *pTextNd = (bInsert && bScript) ?
3104  pAttr->GetSttPara().GetNode().GetTextNode() :
3105  nullptr;
3106 
3107  if (pTextNd)
3108  {
3109  const OUString& rText = pTextNd->GetText();
3110  sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3111  rText, pAttr->GetSttCnt() );
3112  sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3113  ->endOfScript( rText, pAttr->GetSttCnt(), nScriptText );
3114  while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3115  {
3116  if( nScriptItem == nScriptText )
3117  {
3118  HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd );
3119  pSetAttr->ClearPrev();
3120  if( pNext )
3121  pNext->InsertPrev( pSetAttr );
3122  else
3123  {
3124  if (pSetAttr->m_bInsAtStart)
3125  m_aSetAttrTab.push_front( pSetAttr );
3126  else
3127  m_aSetAttrTab.push_back( pSetAttr );
3128  }
3129  }
3130  pAttr->m_nStartContent = nScriptEnd;
3131  nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3132  rText, nScriptEnd );
3133  nScriptEnd = g_pBreakIt->GetBreakIter()
3134  ->endOfScript( rText, nScriptEnd, nScriptText );
3135  }
3136  bInsert = nScriptItem == nScriptText;
3137  }
3138  if( bInsert )
3139  {
3140  pAttr->m_nEndPara = *pEndIdx;
3141  pAttr->m_nEndContent = nEndCnt;
3142  pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3143  RES_TXTATR_CHARFMT != nWhich;
3144 
3145  if( !pNext )
3146  {
3147  // No open attributes of that type exists any longer, so all
3148  // can be set. Except they depend on another attribute, then
3149  // they're appended there.
3150  if (pAttr->m_bInsAtStart)
3151  m_aSetAttrTab.push_front( pAttr );
3152  else
3153  m_aSetAttrTab.push_back( pAttr );
3154  }
3155  else
3156  {
3157  // There are other open attributes of that type,
3158  // therefore the setting must be postponed.
3159  // Hence the current attribute is added at the end
3160  // of the Prev-List of the successor.
3161  pNext->InsertPrev( pAttr );
3162  }
3163  }
3164  else
3165  {
3166  // Then don't insert, but delete. Because of the "faking" of styles
3167  // by hard attributing there can be also other empty attributes in the
3168  // Prev-List, which must be set anyway.
3169  HTMLAttr *pPrev = pAttr->GetPrev();
3170  bRet = false;
3171  delete pAttr;
3172 
3173  if( pPrev )
3174  {
3175  // The previous attributes must be set anyway.
3176  if( pNext )
3177  pNext->InsertPrev( pPrev );
3178  else
3179  {
3180  if (pPrev->m_bInsAtStart)
3181  m_aSetAttrTab.push_front( pPrev );
3182  else
3183  m_aSetAttrTab.push_back( pPrev );
3184  }
3185  }
3186 
3187  }
3188 
3189  // If the first attribute of the list was set, then the list header
3190  // must be corrected as well.
3191  if( pLast )
3192  pLast->m_pNext = pNext;
3193  else if( ppHead )
3194  *ppHead = pNext;
3195 
3196  if( bMoveBack )
3198 
3199  return bRet;
3200 }
3201 
3203 {
3204  // preliminary paragraph attributes are not allowed here, they could
3205  // be set here and then the pointers become invalid!
3206  OSL_ENSURE(m_aParaAttrs.empty(),
3207  "Danger: there are non-final paragraph attributes");
3208  m_aParaAttrs.clear();
3209 
3210  // The list header is saved in the attribute
3211  HTMLAttr **ppHead = pAttr->m_ppHead;
3212 
3213  OSL_ENSURE( ppHead, "no list header attribute found!" );
3214 
3215  // Is the last started or an earlier started attribute being removed?
3216  HTMLAttr *pLast = nullptr;
3217  if( ppHead && pAttr != *ppHead )
3218  {
3219  // The last started attribute isn't being ended
3220 
3221  // Then we look for attribute which was started immediately afterwards,
3222  // which has also not yet been ended (otherwise it would no longer be
3223  // in the list).
3224  pLast = *ppHead;
3225  while( pLast && pLast->GetNext() != pAttr )
3226  pLast = pLast->GetNext();
3227 
3228  OSL_ENSURE( pLast, "Attribute not found in own list!" );
3229  }
3230 
3231  // now delete the attribute
3232  HTMLAttr *pNext = pAttr->GetNext();
3233  HTMLAttr *pPrev = pAttr->GetPrev();
3234  //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3235  std::shared_ptr<HTMLAttrTable> xAttrTab(pAttr->m_xAttrTab);
3236  delete pAttr;
3237 
3238  if( pPrev )
3239  {
3240  // The previous attributes must be set anyway.
3241  if( pNext )
3242  pNext->InsertPrev( pPrev );
3243  else
3244  {
3245  if (pPrev->m_bInsAtStart)
3246  m_aSetAttrTab.push_front( pPrev );
3247  else
3248  m_aSetAttrTab.push_back( pPrev );
3249  }
3250  }
3251 
3252  // If the first attribute of the list was deleted, then the list header
3253  // must be corrected as well.
3254  if( pLast )
3255  pLast->m_pNext = pNext;
3256  else if( ppHead )
3257  *ppHead = pNext;
3258 }
3259 
3260 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3261 {
3262  // preliminary paragraph attributes are not allowed here, they could
3263  // be set here and then the pointers become invalid!
3264  OSL_ENSURE(m_aParaAttrs.empty(),
3265  "Danger: there are non-final paragraph attributes");
3266  m_aParaAttrs.clear();
3267 
3268  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3269  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3270 
3271  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3272  {
3273  *pSaveAttributes = *pHTMLAttributes;
3274 
3275  HTMLAttr *pAttr = *pSaveAttributes;
3276  while (pAttr)
3277  {
3278  pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3279  pAttr = pAttr->GetNext();
3280  }
3281 
3282  *pHTMLAttributes = nullptr;
3283  }
3284 }
3285 
3286 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3287  bool bMoveEndBack )
3288 {
3289  // preliminary paragraph attributes are not allowed here, they could
3290  // be set here and then the pointers become invalid!
3291  OSL_ENSURE(m_aParaAttrs.empty(),
3292  "Danger: there are non-final paragraph attributes");
3293  m_aParaAttrs.clear();
3294 
3295  const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode;
3296  SwNodeIndex nEndIdx( nSttIdx );
3297 
3298  // close all still open attributes and re-open them after the table
3299  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3300  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3301  bool bSetAttr = true;
3302  const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex();
3303  sal_Int32 nEndCnt = nSttCnt;
3304 
3305  if( bMoveEndBack )
3306  {
3307  sal_uLong nOldEnd = nEndIdx.GetIndex();
3308  sal_uLong nTmpIdx;
3309  if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3310  ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3311  {
3312  nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3313  }
3314  SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3315 
3316  // Don't set attributes, when the PaM was moved outside of the content area.
3317  bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3318 
3319  nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3320  }
3321  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3322  {
3323  HTMLAttr *pAttr = *pHTMLAttributes;
3324  *pSaveAttributes = nullptr;
3325  while( pAttr )
3326  {
3327  HTMLAttr *pNext = pAttr->GetNext();
3328  HTMLAttr *pPrev = pAttr->GetPrev();
3329 
3330  if( bSetAttr &&
3331  ( pAttr->GetSttParaIdx() < nEndIdx.GetIndex() ||
3332  (pAttr->GetSttPara() == nEndIdx &&
3333  pAttr->GetSttCnt() != nEndCnt) ) )
3334  {
3335  // The attribute must be set before the list. We need the
3336  // original and therefore we clone it, because pointer to the
3337  // attribute exist in the other contexts. The Next-List is lost
3338  // in doing so, but the Previous-List is preserved.
3339  HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt );
3340 
3341  if( pNext )
3342  pNext->InsertPrev( pSetAttr );
3343  else
3344  {
3345  if (pSetAttr->m_bInsAtStart)
3346  m_aSetAttrTab.push_front( pSetAttr );
3347  else
3348  m_aSetAttrTab.push_back( pSetAttr );
3349  }
3350  }
3351  else if( pPrev )
3352  {
3353  // If the attribute doesn't need to be set before the table, then
3354  // the previous attributes must still be set.
3355  if( pNext )
3356  pNext->InsertPrev( pPrev );
3357  else
3358  {
3359  if (pPrev->m_bInsAtStart)
3360  m_aSetAttrTab.push_front( pPrev );
3361  else
3362  m_aSetAttrTab.push_back( pPrev );
3363  }
3364  }
3365 
3366  // set the start of the attribute anew and break link
3367  pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab);
3368 
3369  if (*pSaveAttributes)
3370  {
3371  HTMLAttr *pSAttr = *pSaveAttributes;
3372  while( pSAttr->GetNext() )
3373  pSAttr = pSAttr->GetNext();
3374  pSAttr->InsertNext( pAttr );
3375  }
3376  else
3377  *pSaveAttributes = pAttr;
3378 
3379  pAttr = pNext;
3380  }
3381 
3382  *pHTMLAttributes = nullptr;
3383  }
3384 }
3385 
3386 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3387 {
3388  // preliminary paragraph attributes are not allowed here, they could
3389  // be set here and then the pointers become invalid!
3390  OSL_ENSURE(m_aParaAttrs.empty(),
3391  "Danger: there are non-final paragraph attributes");
3392  m_aParaAttrs.clear();
3393 
3394  HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3395  HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3396 
3397  for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3398  {
3399  OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3400 
3401  *pHTMLAttributes = *pSaveAttributes;
3402 
3403  HTMLAttr *pAttr = *pHTMLAttributes;
3404  while (pAttr)
3405  {
3406  OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3407  "Previous attribute has still a header" );
3408  pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3409  pAttr = pAttr->GetNext();
3410  }
3411 
3412  *pSaveAttributes = nullptr;
3413  }
3414 }
3415 
3416 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3417 {
3418  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3419  if (bInsAtStart)
3420  m_aSetAttrTab.push_front( pTmp );
3421  else
3422  m_aSetAttrTab.push_back( pTmp );
3423 }
3424 
3425 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3426 {
3427  while( !rAttrs.empty() )
3428  {
3429  std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3430  InsertAttr( pAttr->GetItem(), false );
3431  rAttrs.pop_front();
3432  }
3433 }
3434 
3436 {
3437  OUString aId, aStyle, aLang, aDir;
3438  OUString aClass;
3439 
3440  const HTMLOptions& rHTMLOptions = GetOptions();
3441  for (size_t i = rHTMLOptions.size(); i; )
3442  {
3443  const HTMLOption& rOption = rHTMLOptions[--i];
3444  switch( rOption.GetToken() )
3445  {
3446  case HtmlOptionId::ID:
3447  aId = rOption.GetString();
3448  break;
3449  case HtmlOptionId::STYLE:
3450  aStyle = rOption.GetString();
3451  break;
3452  case HtmlOptionId::CLASS:
3453  aClass = rOption.GetString();
3454  break;
3455  case HtmlOptionId::LANG:
3456  aLang = rOption.GetString();
3457  break;
3458  case HtmlOptionId::DIR:
3459  aDir = rOption.GetString();
3460  break;
3461  default: break;
3462  }
3463  }
3464 
3465  // create a new context
3466  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3467 
3468  // parse styles
3469  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3470  {
3471  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3472  SvxCSS1PropertyInfo aPropInfo;
3473 
3474  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3475  {
3476  if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3477  !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3478  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3479  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3480  }
3481  }
3482 
3483  // save the context
3484  PushContext(xCntxt);
3485 }
3486 
3488  HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3489  HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3490  HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3491 {
3492  OUString aId, aStyle, aClass, aLang, aDir;
3493 
3494  const HTMLOptions& rHTMLOptions = GetOptions();
3495  for (size_t i = rHTMLOptions.size(); i; )
3496  {
3497  const HTMLOption& rOption = rHTMLOptions[--i];
3498  switch( rOption.GetToken() )
3499  {
3500  case HtmlOptionId::ID:
3501  aId = rOption.GetString();
3502  break;
3503  case HtmlOptionId::STYLE:
3504  aStyle = rOption.GetString();
3505  break;
3506  case HtmlOptionId::CLASS:
3507  aClass = rOption.GetString();
3508  break;
3509  case HtmlOptionId::LANG:
3510  aLang = rOption.GetString();
3511  break;
3512  case HtmlOptionId::DIR:
3513  aDir = rOption.GetString();
3514  break;
3515  default: break;
3516  }
3517  }
3518 
3519  // create a new context
3520  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3521 
3522  // parse styles
3523  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3524  {
3525  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3526  SvxCSS1PropertyInfo aPropInfo;
3527 
3528  aItemSet.Put( rItem );
3529  if( pItem2 )
3530  aItemSet.Put( *pItem2 );
3531  if( pItem3 )
3532  aItemSet.Put( *pItem3 );
3533 
3534  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3535  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3536 
3537  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3538  }
3539  else
3540  {
3541  InsertAttr( ppAttr ,rItem, xCntxt.get() );
3542  if( pItem2 )
3543  {
3544  OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3545  InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3546  }
3547  if( pItem3 )
3548  {
3549  OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3550  InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3551  }
3552  }
3553 
3554  // save the context
3555  PushContext(xCntxt);
3556 }
3557 
3559 {
3560  // fetch context
3561  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3562  if (xCntxt)
3563  {
3564  // and maybe end the attributes
3565  EndContext(xCntxt.get());
3566  }
3567 }
3568 
3570 {
3571  OUString aId, aStyle, aClass, aLang, aDir;
3572  sal_uInt16 nSize = 3;
3573 
3574  const HTMLOptions& rHTMLOptions = GetOptions();
3575  for (size_t i = rHTMLOptions.size(); i; )
3576  {
3577  const HTMLOption& rOption = rHTMLOptions[--i];
3578  switch( rOption.GetToken() )
3579  {
3580  case HtmlOptionId::SIZE:
3581  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
3582  break;
3583  case HtmlOptionId::ID:
3584  aId = rOption.GetString();
3585  break;
3586  case HtmlOptionId::STYLE:
3587  aStyle = rOption.GetString();
3588  break;
3589  case HtmlOptionId::CLASS:
3590  aClass = rOption.GetString();
3591  break;
3592  case HtmlOptionId::LANG:
3593  aLang = rOption.GetString();
3594  break;
3595  case HtmlOptionId::DIR:
3596  aDir = rOption.GetString();
3597  break;
3598  default: break;
3599  }
3600  }
3601 
3602  if( nSize < 1 )
3603  nSize = 1;
3604 
3605  if( nSize > 7 )
3606  nSize = 7;
3607 
3608  // create a new context
3609  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3610 
3611  // parse styles
3612  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3613  {
3614  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3615  SvxCSS1PropertyInfo aPropInfo;
3616 
3617  //CJK has different defaults
3618  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3619  aItemSet.Put( aFontHeight );
3620  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3621  aItemSet.Put( aFontHeightCJK );
3622  //Complex type can contain so many types of letters,
3623  //that it's not really worthy to bother, IMO.
3624  //Still, I have set a default.
3625  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3626  aItemSet.Put( aFontHeightCTL );
3627 
3628  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3629  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3630 
3631  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3632  }
3633  else
3634  {
3635  SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3636  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3637  SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3638  InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3639  SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3640  InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3641  }
3642 
3643  // save the context
3644  PushContext(xCntxt);
3645 
3646  // save the font size
3647  m_aBaseFontStack.push_back( nSize );
3648 }
3649 
3651 {
3652  EndTag( HtmlTokenId::BASEFONT_ON );
3653 
3654  // avoid stack underflow in tables
3655  if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3656  m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3657 }
3658 
3660 {
3661  sal_uInt16 nBaseSize =
3664  : 3 );
3665  sal_uInt16 nFontSize =
3666  ( m_aFontStack.size() > m_nFontStMin
3667  ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3668  : nBaseSize );
3669 
3670  OUString aFace, aId, aStyle, aClass, aLang, aDir;
3671  Color aColor;
3672  sal_uLong nFontHeight = 0; // actual font height to set
3673  sal_uInt16 nSize = 0; // font height in Netscape notation (1-7)
3674  bool bColor = false;
3675 
3676  const HTMLOptions& rHTMLOptions = GetOptions();
3677  for (size_t i = rHTMLOptions.size(); i; )
3678  {
3679  const HTMLOption& rOption = rHTMLOptions[--i];
3680  switch( rOption.GetToken() )
3681  {
3682  case HtmlOptionId::SIZE:
3683  if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3684  {
3685  sal_Int32 nSSize;
3686  if( '+' == rOption.GetString()[0] ||
3687  '-' == rOption.GetString()[0] )
3688  nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3689  else
3690  nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3691 
3692  if( nSSize < 1 )
3693  nSSize = 1;
3694  else if( nSSize > 7 )
3695  nSSize = 7;
3696 
3697  nSize = static_cast<sal_uInt16>(nSSize);
3698  nFontHeight = m_aFontHeights[nSize-1];
3699  }
3700  break;
3701  case HtmlOptionId::COLOR:
3702  if( HtmlTokenId::FONT_ON==nToken )
3703  {
3704  rOption.GetColor( aColor );
3705  bColor = true;
3706  }
3707  break;
3708  case HtmlOptionId::FACE:
3709  if( HtmlTokenId::FONT_ON==nToken )
3710  aFace = rOption.GetString();
3711  break;
3712  case HtmlOptionId::ID:
3713  aId = rOption.GetString();
3714  break;
3715  case HtmlOptionId::STYLE:
3716  aStyle = rOption.GetString();
3717  break;
3718  case HtmlOptionId::CLASS:
3719  aClass = rOption.GetString();
3720  break;
3721  case HtmlOptionId::LANG:
3722  aLang = rOption.GetString();
3723  break;
3724  case HtmlOptionId::DIR:
3725  aDir = rOption.GetString();
3726  break;
3727  default: break;
3728  }
3729  }
3730 
3731  if( HtmlTokenId::FONT_ON != nToken )
3732  {
3733  // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3734 
3735  // In headings the current heading sets the font height
3736  // and not BASEFONT.
3737  const SwFormatColl *pColl = GetCurrFormatColl();
3738  sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3739  if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3740  nPoolId<=RES_POOLCOLL_HEADLINE6 )
3741  {
3742  // If the font height in the heading wasn't changed yet,
3743  // then take the one from the style.
3744  if( m_nFontStHeadStart==m_aFontStack.size() )
3745  nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3746  }
3747  else
3748  nPoolId = 0;
3749 
3750  if( HtmlTokenId::BIGPRINT_ON == nToken )
3751  nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3752  else
3753  nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3754 
3755  // If possible in headlines we fetch the new font height
3756  // from the style.
3757  if( nPoolId && nSize>=1 && nSize <=6 )
3758  nFontHeight =
3759  m_pCSS1Parser->GetTextCollFromPool(
3760  RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3761  else
3762  nFontHeight = m_aFontHeights[nSize-1];
3763  }
3764 
3765  OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3766 
3767  OUString aFontName, aStyleName;
3768  FontFamily eFamily = FAMILY_DONTKNOW; // family and pitch,
3769  FontPitch ePitch = PITCH_DONTKNOW; // if not found
3770  rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3771 
3772  if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3773  {
3774  const FontList *pFList = nullptr;
3775  SwDocShell *pDocSh = m_xDoc->GetDocShell();
3776  if( pDocSh )
3777  {
3778  const SvxFontListItem *pFListItem =
3779  static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3780  if( pFListItem )
3781  pFList = pFListItem->GetFontList();
3782  }
3783 
3784  bool bFound = false;
3785  sal_Int32 nStrPos = 0;
3786  while( nStrPos!= -1 )
3787  {
3788  OUString aFName = aFace.getToken( 0, ',', nStrPos );
3789  aFName = comphelper::string::strip(aFName, ' ');
3790  if( !aFName.isEmpty() )
3791  {
3792  if( !bFound && pFList )
3793  {
3794  sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3795  if( nullptr != hFont )
3796  {
3797  const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3798  if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3799  {
3800  bFound = true;
3801  if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3802  eEnc = RTL_TEXTENCODING_SYMBOL;
3803  }
3804  }
3805  }
3806  if( !aFontName.isEmpty() )
3807  aFontName += ";";
3808  aFontName += aFName;
3809  }
3810  }
3811  }
3812 
3813  // create a new context
3814  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3815 
3816  // parse styles
3817  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3818  {
3819  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3820  SvxCSS1PropertyInfo aPropInfo;
3821 
3822  if( nFontHeight )
3823  {
3824  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3825  aItemSet.Put( aFontHeight );
3826  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3827  aItemSet.Put( aFontHeightCJK );
3828  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3829  aItemSet.Put( aFontHeightCTL );
3830  }
3831  if( bColor )
3832  aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3833  if( !aFontName.isEmpty() )
3834  {
3835  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3836  aItemSet.Put( aFont );
3837  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3838  aItemSet.Put( aFontCJK );
3839  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3840  aItemSet.Put( aFontCTL );
3841  }
3842 
3843  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3844  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3845 
3846  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3847  }
3848  else
3849  {
3850  if( nFontHeight )
3851  {
3852  SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3853  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3854  SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3855  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3856  SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3857  InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3858  }
3859  if( bColor )
3860  InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3861  if( !aFontName.isEmpty() )
3862  {
3863  SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3864  InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3865  SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3866  InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3867  SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3868  InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3869  }
3870  }
3871 
3872  // save the context
3873  PushContext(xCntxt);
3874 
3875  m_aFontStack.push_back( nSize );
3876 }
3877 
3879 {
3880  EndTag( nToken );
3881 
3882  // avoid stack underflow in tables
3883  if( m_aFontStack.size() > m_nFontStMin )
3884  m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3885 }
3886 
3888 {
3889  if( m_pPam->GetPoint()->nContent.GetIndex() )
3891  else
3892  AddParSpace();
3893 
3894  m_eParaAdjust = SvxAdjust::End;
3895  OUString aId, aStyle, aClass, aLang, aDir;
3896 
3897  const HTMLOptions& rHTMLOptions = GetOptions();
3898  for (size_t i = rHTMLOptions.size(); i; )
3899  {
3900  const HTMLOption& rOption = rHTMLOptions[--i];
3901  switch( rOption.GetToken() )
3902  {
3903  case HtmlOptionId::ID:
3904  aId = rOption.GetString();
3905  break;
3906  case HtmlOptionId::ALIGN:
3907  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3908  break;
3909  case HtmlOptionId::STYLE:
3910  aStyle = rOption.GetString();
3911  break;
3912  case HtmlOptionId::CLASS:
3913  aClass = rOption.GetString();
3914  break;
3915  case HtmlOptionId::LANG:
3916  aLang = rOption.GetString();
3917  break;
3918  case HtmlOptionId::DIR:
3919  aDir = rOption.GetString();
3920  break;
3921  default: break;
3922  }
3923  }
3924 
3925  // create a new context
3926  std::unique_ptr<HTMLAttrContext> xCntxt(
3927  !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3928  RES_POOLCOLL_TEXT, aClass )
3929  : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3930 
3931  // parse styles (Don't consider class. This is only possible as long as none of
3932  // the CSS1 properties of the class must be formatted hard!!!)
3933  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
3934  {
3935  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3936  SvxCSS1PropertyInfo aPropInfo;
3937 
3938  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
3939  {
3940  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
3941  "Class is not considered" );
3942  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3943  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
3944  }
3945  }
3946 
3947  if( SvxAdjust::End != m_eParaAdjust )
3948  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
3949 
3950  // and push on stack
3951  PushContext( xCntxt );
3952 
3953  // set the current style or its attributes
3954  SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
3955 
3956  // progress bar
3957  ShowStatline();
3958 
3959  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now a open paragraph element will be lost." );
3960  m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
3961 }
3962 
3963 void SwHTMLParser::EndPara( bool bReal )
3964 {
3965  if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
3966  {
3967 #if OSL_DEBUG_LEVEL > 0
3968  const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule();
3969  OSL_ENSURE( pNumRule, "Where is the NumRule" );
3970 #endif
3971  }
3972 
3973  // Netscape skips empty paragraphs, we do the same.
3974  if( bReal )
3975  {
3976  if( m_pPam->GetPoint()->nContent.GetIndex() )
3978  else
3979  AddParSpace();
3980  }
3981 
3982  // If a DD or DT was open, it's an implied definition list,
3983  // which must be closed now.
3984  if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
3986  {
3987  m_nDefListDeep--;
3988  }
3989 
3990  // Pop the context of the stack. It can also be from an
3991  // implied opened definition list.
3992  std::unique_ptr<HTMLAttrContext> xCntxt(
3993  PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
3994 
3995  // close attribute
3996  if (xCntxt)
3997  {
3998  EndContext(xCntxt.get());
3999  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4000  xCntxt.reset();
4001  }
4002 
4003  // reset the existing style
4004  if( bReal )
4005  SetTextCollAttrs();
4006 
4007  m_nOpenParaToken = HtmlTokenId::NONE;
4008 }
4009 
4011 {
4012  m_eParaAdjust = SvxAdjust::End;
4013 
4014  OUString aId, aStyle, aClass, aLang, aDir;
4015 
4016  const HTMLOptions& rHTMLOptions = GetOptions();
4017  for (size_t i = rHTMLOptions.size(); i; )
4018  {
4019  const HTMLOption& rOption = rHTMLOptions[--i];
4020  switch( rOption.GetToken() )
4021  {
4022  case HtmlOptionId::ID:
4023  aId = rOption.GetString();
4024  break;
4025  case HtmlOptionId::ALIGN:
4026  m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4027  break;
4028  case HtmlOptionId::STYLE:
4029  aStyle = rOption.GetString();
4030  break;
4031  case HtmlOptionId::CLASS:
4032  aClass = rOption.GetString();
4033  break;
4034  case HtmlOptionId::LANG:
4035  aLang = rOption.GetString();
4036  break;
4037  case HtmlOptionId::DIR:
4038  aDir = rOption.GetString();
4039  break;
4040  default: break;
4041  }
4042  }
4043 
4044  // open a new paragraph
4045  if( m_pPam->GetPoint()->nContent.GetIndex() )
4047  else
4048  AddParSpace();
4049 
4050  // search for the matching style
4051  sal_uInt16 nTextColl;
4052  switch( nToken )
4053  {
4054  case HtmlTokenId::HEAD1_ON: nTextColl = RES_POOLCOLL_HEADLINE1; break;
4055  case HtmlTokenId::HEAD2_ON: nTextColl = RES_POOLCOLL_HEADLINE2; break;
4056  case HtmlTokenId::HEAD3_ON: nTextColl = RES_POOLCOLL_HEADLINE3; break;
4057  case HtmlTokenId::HEAD4_ON: nTextColl = RES_POOLCOLL_HEADLINE4; break;
4058  case HtmlTokenId::HEAD5_ON: nTextColl = RES_POOLCOLL_HEADLINE5; break;
4059  case HtmlTokenId::HEAD6_ON: nTextColl = RES_POOLCOLL_HEADLINE6; break;
4060  default: nTextColl = RES_POOLCOLL_STANDARD; break;
4061  }
4062 
4063  // create the context
4064  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4065 
4066  // parse styles (regarding class see also NewPara)
4067  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4068  {
4069  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4070  SvxCSS1PropertyInfo aPropInfo;
4071 
4072  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4073  {
4074  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4075  "Class is not considered" );
4076  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4077  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4078  }
4079  }
4080 
4081  if( SvxAdjust::End != m_eParaAdjust )
4082  InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4083 
4084  // and push on stack
4085  PushContext(xCntxt);
4086 
4087  // set the current style or its attributes
4088  SetTextCollAttrs(m_aContexts.back().get());
4089 
4091 
4092  // progress bar
4093  ShowStatline();
4094 }
4095 
4097 {
4098  // open a new paragraph
4099  if( m_pPam->GetPoint()->nContent.GetIndex() )
4101  else
4102  AddParSpace();
4103 
4104  // search context matching the token and fetch it from stack
4105  std::unique_ptr<HTMLAttrContext> xCntxt;
4106  auto nPos = m_aContexts.size();
4107  while( !xCntxt && nPos>m_nContextStMin )
4108  {
4109  switch( m_aContexts[--nPos]->GetToken() )
4110  {
4111  case HtmlTokenId::HEAD1_ON:
4112  case HtmlTokenId::HEAD2_ON:
4113  case HtmlTokenId::HEAD3_ON:
4114  case HtmlTokenId::HEAD4_ON:
4115  case HtmlTokenId::HEAD5_ON:
4116  case HtmlTokenId::HEAD6_ON:
4117  xCntxt = std::move(m_aContexts[nPos]);
4118  m_aContexts.erase( m_aContexts.begin() + nPos );
4119  break;
4120  default: break;
4121  }
4122  }
4123 
4124  // and now end attributes
4125  if (xCntxt)
4126  {
4127  EndContext(xCntxt.get());
4128  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4129  xCntxt.reset();
4130  }
4131 
4132  // reset existing style
4133  SetTextCollAttrs();
4134 
4136 }
4137 
4138 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4139 {
4140  OUString aId, aStyle, aClass, aLang, aDir;
4141 
4142  const HTMLOptions& rHTMLOptions = GetOptions();
4143  for (size_t i = rHTMLOptions.size(); i; )
4144  {
4145  const HTMLOption& rOption = rHTMLOptions[--i];
4146  switch( rOption.GetToken() )
4147  {
4148  case HtmlOptionId::ID:
4149  aId = rOption.GetString();
4150  break;
4151  case HtmlOptionId::STYLE:
4152  aStyle = rOption.GetString();
4153  break;
4154  case HtmlOptionId::CLASS:
4155  aClass = rOption.GetString();
4156  break;
4157  case HtmlOptionId::LANG:
4158  aLang = rOption.GetString();
4159  break;
4160  case HtmlOptionId::DIR:
4161  aDir = rOption.GetString();
4162  break;
4163  default: break;
4164  }
4165  }
4166 
4167  // open a new paragraph
4168  SwHTMLAppendMode eMode = AM_NORMAL;
4169  switch( nToken )
4170  {
4171  case HtmlTokenId::LISTING_ON:
4172  case HtmlTokenId::XMP_ON:
4173  // These both tags will be mapped to the PRE style. For the case that a
4174  // a CLASS exists we will delete it so that we don't get the CLASS of
4175  // the PRE style.
4176  aClass.clear();
4177  [[fallthrough]];
4178  case HtmlTokenId::BLOCKQUOTE_ON:
4179  case HtmlTokenId::BLOCKQUOTE30_ON:
4180  case HtmlTokenId::PREFORMTXT_ON:
4181  eMode = AM_SPACE;
4182  break;
4183  case HtmlTokenId::ADDRESS_ON:
4184  eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4185  break;
4186  case HtmlTokenId::DT_ON:
4187  case HtmlTokenId::DD_ON:
4188  eMode = AM_SOFTNOSPACE;
4189  break;
4190  default:
4191  OSL_ENSURE( false, "unknown style" );
4192  break;
4193  }
4194  if( m_pPam->GetPoint()->nContent.GetIndex() )
4195  AppendTextNode( eMode );
4196  else if( AM_SPACE==eMode )
4197  AddParSpace();
4198 
4199  // ... and save in a context
4200  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4201 
4202  // parse styles (regarding class see also NewPara)
4203  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4204  {
4205  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4206  SvxCSS1PropertyInfo aPropInfo;
4207 
4208  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4209  {
4210  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4211  "Class is not considered" );
4212  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4213  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4214  }
4215  }
4216 
4217  PushContext(xCntxt);
4218 
4219  // set the new style
4220  SetTextCollAttrs(m_aContexts.back().get());
4221 
4222  // update progress bar
4223  ShowStatline();
4224 }
4225 
4227 {
4228  SwHTMLAppendMode eMode = AM_NORMAL;
4229  switch( getOnToken(nToken) )
4230  {
4231  case HtmlTokenId::BLOCKQUOTE_ON:
4232  case HtmlTokenId::BLOCKQUOTE30_ON:
4233  case HtmlTokenId::PREFORMTXT_ON:
4234  case HtmlTokenId::LISTING_ON:
4235  case HtmlTokenId::XMP_ON:
4236  eMode = AM_SPACE;
4237  break;
4238  case HtmlTokenId::ADDRESS_ON:
4239  case HtmlTokenId::DT_ON:
4240  case HtmlTokenId::DD_ON:
4241  eMode = AM_SOFTNOSPACE;
4242  break;
4243  default:
4244  OSL_ENSURE( false, "unknown style" );
4245  break;
4246  }
4247  if( m_pPam->GetPoint()->nContent.GetIndex() )
4248  AppendTextNode( eMode );
4249  else if( AM_SPACE==eMode )
4250  AddParSpace();
4251 
4252  // pop current context of stack
4253  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4254 
4255  // and now end attributes
4256  if (xCntxt)
4257  {
4258  EndContext(xCntxt.get());
4259  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4260  xCntxt.reset();
4261  }
4262 
4263  // reset existing style
4264  SetTextCollAttrs();
4265 }
4266 
4268 {
4269  OUString aId, aStyle, aClass, aLang, aDir;
4270 
4271  const HTMLOptions& rHTMLOptions = GetOptions();
4272  for (size_t i = rHTMLOptions.size(); i; )
4273  {
4274  const HTMLOption& rOption = rHTMLOptions[--i];
4275  switch( rOption.GetToken() )
4276  {
4277  case HtmlOptionId::ID:
4278  aId = rOption.GetString();
4279  break;
4280  case HtmlOptionId::STYLE:
4281  aStyle = rOption.GetString();
4282  break;
4283  case HtmlOptionId::CLASS:
4284  aClass = rOption.GetString();
4285  break;
4286  case HtmlOptionId::LANG:
4287  aLang = rOption.GetString();
4288  break;
4289  case HtmlOptionId::DIR:
4290  aDir = rOption.GetString();
4291  break;
4292  default: break;
4293  }
4294  }
4295 
4296  // open a new paragraph
4297  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4298  if( m_pPam->GetPoint()->nContent.GetIndex() )
4299  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4300  else if( bSpace )
4301  AddParSpace();
4302 
4303  // one level more
4304  m_nDefListDeep++;
4305 
4306  bool bInDD = false, bNotInDD = false;
4307  auto nPos = m_aContexts.size();
4308  while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4309  {
4310  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4311  switch( nCntxtToken )
4312  {
4313  case HtmlTokenId::DEFLIST_ON:
4314  case HtmlTokenId::DIRLIST_ON:
4315  case HtmlTokenId::MENULIST_ON:
4316  case HtmlTokenId::ORDERLIST_ON:
4317  case HtmlTokenId::UNORDERLIST_ON:
4318  bNotInDD = true;
4319  break;
4320  case HtmlTokenId::DD_ON:
4321  bInDD = true;
4322  break;
4323  default: break;
4324  }
4325  }
4326 
4327  // ... and save in a context
4328  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4329 
4330  // in it save also the margins
4331  sal_uInt16 nLeft=0, nRight=0;
4332  short nIndent=0;
4333  GetMarginsFromContext( nLeft, nRight, nIndent );
4334 
4335  // The indentation, which already results from a DL, correlates with a DT
4336  // on the current level and this correlates to a DD from the previous level.
4337  // For a level >=2 we must add DD distance.
4338  if( !bInDD && m_nDefListDeep > 1 )
4339  {
4340 
4341  // and the one of the DT-style of the current level
4342  SvxLRSpaceItem rLRSpace =
4343  m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4344  ->GetLRSpace();
4345  nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft());
4346  }
4347 
4348  xCntxt->SetMargins( nLeft, nRight, nIndent );
4349 
4350  // parse styles
4351  if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4352  {
4353  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4354  SvxCSS1PropertyInfo aPropInfo;
4355 
4356  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4357  {
4358  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4359  InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4360  }
4361  }
4362 
4363  PushContext(xCntxt);
4364 
4365  // set the attributes of the new style
4366  if( m_nDefListDeep > 1 )
4367  SetTextCollAttrs(m_aContexts.back().get());
4368 }
4369 
4371 {
4372  bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4373  if( m_pPam->GetPoint()->nContent.GetIndex() )
4374  AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4375  else if( bSpace )
4376  AddParSpace();
4377 
4378  // one level less
4379  if( m_nDefListDeep > 0 )
4380  m_nDefListDeep--;
4381 
4382  // pop current context of stack
4383  std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4384 
4385  // and now end attributes
4386  if (xCntxt)
4387  {
4388  EndContext(xCntxt.get());
4389  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4390  xCntxt.reset();
4391  }
4392 
4393  // and set style
4394  SetTextCollAttrs();
4395 }
4396 
4398 {
4399  // determine if the DD/DT exist in a DL
4400  bool bInDefList = false, bNotInDefList = false;
4401  auto nPos = m_aContexts.size();
4402  while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4403  {
4404  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4405  switch( nCntxtToken )
4406  {
4407  case HtmlTokenId::DEFLIST_ON:
4408  bInDefList = true;
4409  break;
4410  case HtmlTokenId::DIRLIST_ON:
4411  case HtmlTokenId::MENULIST_ON:
4412  case HtmlTokenId::ORDERLIST_ON:
4413  case HtmlTokenId::UNORDERLIST_ON:
4414  bNotInDefList = true;
4415  break;
4416  default: break;
4417  }
4418  }
4419 
4420  // if not, then implicitly open a new DL
4421  if( !bInDefList )
4422  {
4423  m_nDefListDeep++;
4424  OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4425  "Now an open paragraph element will be lost." );
4426  m_nOpenParaToken = nToken;
4427  }
4428 
4429  NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4430  : RES_POOLCOLL_HTML_DT) );
4431 }
4432 
4434 {
4435  // open a new paragraph
4436  if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() )
4438 
4439  // search context matching the token and fetch it from stack
4440  nToken = getOnToken(nToken);
4441  std::unique_ptr<HTMLAttrContext> xCntxt;
4442  auto nPos = m_aContexts.size();
4443  while( !xCntxt && nPos>m_nContextStMin )
4444  {
4445  HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4446  switch( nCntxtToken )
4447  {
4448  case HtmlTokenId::DD_ON:
4449  case HtmlTokenId::DT_ON:
4450  if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken )
4451  {
4452  xCntxt = std::move(m_aContexts[nPos]);
4453  m_aContexts.erase( m_aContexts.begin() + nPos );
4454  }
4455  break;
4456  case HtmlTokenId::DEFLIST_ON:
4457  // don't look at DD/DT outside the current DefList
4458  case HtmlTokenId::DIRLIST_ON:
4459  case HtmlTokenId::MENULIST_ON:
4460  case HtmlTokenId::ORDERLIST_ON:
4461  case HtmlTokenId::UNORDERLIST_ON:
4462  // and also not outside another list
4464  break;
4465  default: break;
4466  }
4467  }
4468 
4469  // and now end attributes
4470  if (xCntxt)
4471  {
4472  EndContext(xCntxt.get());
4473  SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4474  }
4475 }
4476 
4486 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4487  bool bSurroundOnly ) const
4488 {
4489  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
4490 
4491  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
4492 
4493  bool bFound = false;
4494  for ( size_t i=0; i<rFrameFormatTable.size(); i++ )
4495  {
4496  const SwFrameFormat *const pFormat = rFrameFormatTable[i];
4497  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4498  // A frame was found, when
4499  // - it is paragraph-bound, and
4500  // - is anchored in current paragraph, and
4501  // - every paragraph-bound frame counts, or
4502  // - (only frames without wrapping count and) the frame doesn't have
4503  // a wrapping
4504  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
4505  if (pAPos &&
4506  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4507  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4508  pAPos->nNode == rNodeIdx )
4509  {
4510  if( !(bNoSurroundOnly || bSurroundOnly) )
4511  {
4512  bFound = true;
4513  break;
4514  }
4515  else
4516  {
4517  // When looking for frames with wrapping, also disregard
4518  // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4519  // and you don't want to evade those when positioning.
4520  css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4521  if( bNoSurroundOnly )
4522  {
4523  if( css::text::WrapTextMode_NONE==eSurround )
4524  {
4525  bFound = true;
4526  break;
4527  }
4528  }
4529  if( bSurroundOnly )
4530  {
4531  if( css::text::WrapTextMode_NONE==eSurround )
4532  {
4533  bFound = false;
4534  break;
4535  }
4536  else if( css::text::WrapTextMode_THROUGH!=eSurround )
4537  {
4538  bFound = true;
4539  // Continue searching: It's possible that some without
4540  // wrapping will follow...
4541  }
4542  }
4543  }
4544  }
4545  }
4546 
4547  return bFound;
4548 }
4549 
4550 // the special methods for inserting of objects
4551 
4553 {
4554  const SwContentNode* pCNd = m_pPam->GetContentNode();
4555  return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4556 }
4557 
4559 {
4560  SwTextFormatColl *pCollToSet = nullptr; // the style to set
4561  SfxItemSet *pItemSet = nullptr; // set of hard attributes
4562  sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4563  const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4564  sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4565 
4566  bool bInPRE=false; // some context info
4567 
4568  sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and
4569  short nFirstLineIndent = 0; // indentations
4570 
4571  for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i )
4572  {
4573  const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4574 
4575  sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4576  if( nColl )
4577  {
4578  // There is a style to set. Then at first we must decide,
4579  // if the style can be set.
4580  bool bSetThis = true;
4581  switch( nColl )
4582  {
4583  case RES_POOLCOLL_HTML_PRE:
4584  bInPRE = true;
4585  break;
4586  case RES_POOLCOLL_TEXT:
4587  // <TD><P CLASS=xxx> must become TD.xxx
4588  if( nDfltColl==RES_POOLCOLL_TABLE ||
4589  nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4590  nColl = nDfltColl;
4591  break;
4592  case RES_POOLCOLL_HTML_HR:
4593  // also <HR> in <PRE> set as style, otherwise it can't
4594  // be exported anymore
4595  break;
4596  default:
4597  if( bInPRE )
4598  bSetThis = false;
4599  break;
4600  }
4601 
4602  SwTextFormatColl *pNewColl =
4603  m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4604 
4605  if( bSetThis )
4606  {
4607  // If now a different style should be set as previously, the
4608  // previous style must be replaced by hard attribution.
4609 
4610  if( pCollToSet )
4611  {
4612  // insert the attributes hard, which previous style sets
4613  if( !pItemSet )
4614  pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4615  else
4616  {
4617  const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4618  SfxItemSet aItemSet( *rCollSet.GetPool(),
4619  rCollSet.GetRanges() );
4620  aItemSet.Set( rCollSet );
4621  pItemSet->Put( aItemSet );
4622  }
4623  // but remove the attributes, which the current style sets,
4624  // because otherwise they will be overwritten later
4625  pItemSet->Differentiate( pNewColl->GetAttrSet() );
4626  }
4627 
4628  pCollToSet = pNewColl;
4629  }
4630  else
4631  {
4632  // hard attribution
4633  if( !pItemSet )
4634  pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4635  else
4636  {
4637  const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4638  SfxItemSet aItemSet( *rCollSet.GetPool(),
4639  rCollSet.GetRanges() );
4640  aItemSet.Set( rCollSet );
4641  pItemSet->Put( aItemSet );
4642  }
4643  }
4644  }
4645  else
4646  {
4647  // Maybe a default style exists?
4648  nColl = pCntxt->GetDfltTextFormatColl();
4649  if( nColl )
4650  nDfltColl = nColl;
4651  }
4652 
4653  // if applicable fetch new paragraph indents
4654  if( pCntxt->IsLRSpaceChanged() )
4655  {
4656  sal_uInt16 nLeft=0, nRight=0;
4657 
4658  pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4659  nLeftMargin = nLeft;
4660  nRightMargin = nRight;
4661  }
4662  }
4663 
4664  // If in current context a new style should be set,
4665  // its paragraph margins must be inserted in the context.
4666  if( pContext && nTopColl )
4667  {
4668  // <TD><P CLASS=xxx> must become TD.xxx
4669  if( nTopColl==RES_POOLCOLL_TEXT &&
4670  (nDfltColl==RES_POOLCOLL_TABLE ||
4671  nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4672  nTopColl = nDfltColl;
4673 
4674  const SwTextFormatColl *pTopColl =
4675  m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4676  const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4677  const SfxPoolItem *pItem;
4678  if( SfxItemState::SET == rItemSet.GetItemState(RES_LR_SPACE,true, &pItem) )
4679  {
4680  const SvxLRSpaceItem *pLRItem =
4681  static_cast<const SvxLRSpaceItem *>(pItem);
4682 
4683  sal_Int32 nLeft = pLRItem->GetTextLeft();
4684  sal_Int32 nRight = pLRItem->GetRight();
4685  nFirstLineIndent = pLRItem->GetTextFirstLineOfst();
4686 
4687  // In Definition lists the margins also contain the margins from the previous levels
4688  if( RES_POOLCOLL_HTML_DD == nTopColl )
4689  {
4690  const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser
4691  ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())
4692  ->GetLRSpace();
4693  nLeft -= rDTLRSpace.GetTextLeft();
4694  nRight -= rDTLRSpace.GetRight();
4695  }
4696  else if( RES_POOLCOLL_HTML_DT == nTopColl )
4697  {
4698  nLeft = 0;
4699  nRight = 0;
4700  }
4701 
4702  // the paragraph margins add up
4703  nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4704  nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4705 
4706  pContext->SetMargins( nLeftMargin, nRightMargin,
4707  nFirstLineIndent );
4708  }
4709  if( SfxItemState::SET == rItemSet.GetItemState(RES_UL_SPACE,true, &pItem) )
4710  {
4711  const SvxULSpaceItem *pULItem =
4712  static_cast<const SvxULSpaceItem *>(pItem);
4713  pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4714  }
4715  }
4716 
4717  // If no style is set in the context use the text body.
4718  if( !pCollToSet )
4719  {
4720  pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4721  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4722  if( !nLeftMargin )
4723  nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft());
4724  if( !nRightMargin )
4725  nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight());
4726  if( !nFirstLineIndent )
4727  nFirstLineIndent = rLRItem.GetTextFirstLineOfst();
4728  }
4729 
4730  // remove previous hard attribution of paragraph
4731  for( auto pParaAttr : m_aParaAttrs )
4732  pParaAttr->Invalidate();
4733  m_aParaAttrs.clear();
4734 
4735  // set the style
4736  m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4737 
4738  // if applicable correct the paragraph indent
4739  const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4740  bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() ||
4741  nFirstLineIndent != rLRItem.GetTextFirstLineOfst() ||
4742  nRightMargin != rLRItem.GetRight();
4743 
4744  if( bSetLRSpace )
4745  {
4746  SvxLRSpaceItem aLRItem( rLRItem );
4747  aLRItem.SetTextLeft( nLeftMargin );
4748  aLRItem.SetRight( nRightMargin );
4749  aLRItem.SetTextFirstLineOfst( nFirstLineIndent );
4750  if( pItemSet )
4751  pItemSet->Put( aLRItem );
4752  else
4753  {
4754  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4755  m_xAttrTab->pLRSpace->SetLikePara();
4756  m_aParaAttrs.push_back( m_xAttrTab->pLRSpace );
4757  EndAttr( m_xAttrTab->pLRSpace, false );
4758  }
4759  }
4760 
4761  // and now set the attributes
4762  if( pItemSet )
4763  {
4764  InsertParaAttrs( *pItemSet );
4765  delete pItemSet;
4766  }
4767 }
4768 
4770 {
4771  OUString aId, aStyle, aLang, aDir;
4772  OUString aClass;
4773 
4774  const HTMLOptions& rHTMLOptions = GetOptions();
4775  for (size_t i = rHTMLOptions.size(); i; )
4776  {
4777  const HTMLOption& rOption = rHTMLOptions[--i];
4778  switch( rOption.GetToken() )
4779  {
4780  case HtmlOptionId::ID:
4781  aId = rOption.GetString();
4782  break;
4783  case HtmlOptionId::STYLE:
4784  aStyle = rOption.GetString();
4785  break;
4786  case HtmlOptionId::CLASS:
4787  aClass = rOption.GetString();
4788  break;
4789  case HtmlOptionId::LANG:
4790  aLang = rOption.GetString();
4791  break;
4792  case HtmlOptionId::DIR:
4793  aDir = rOption.GetString();
4794  break;
4795  default: break;
4796  }
4797  }
4798 
4799  // create a new context
4800  std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4801 
4802  // set the style and save it in the context
4803  SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4804  OSL_ENSURE( pCFormat, "No character format found for token" );
4805 
4806  // parse styles (regarding class see also NewPara)
4807  if (HasStyleOptions(aStyle, aId, OUString(), &aLang, &aDir))
4808  {
4809  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4810  SvxCSS1PropertyInfo aPropInfo;
4811 
4812  if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4813  {
4814  OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4815  "Class is not considered" );
4816  DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4817  InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4818  }
4819  }
4820 
4821  // Character formats are stored in their own stack and can never be inserted
4822  // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4823  if( pCFormat )
4824  InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4825 
4826  // save the context
4827  PushContext(xCntxt);
4828 }
4829 
4831 {
4832  // and if applicable change it via the options
4833  sal_Int16 eVertOri = text::VertOrientation::TOP;
4834  sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4835  Size aSize( 0, 0);
4836  long nSize = 0;
4837  bool bPrcWidth = false;
4838  bool bPrcHeight = false;
4839  sal_uInt16 nType = HTML_SPTYPE_HORI;
4840 
4841  const HTMLOptions& rHTMLOptions = GetOptions();
4842  for (size_t i = rHTMLOptions.size(); i; )
4843  {
4844  const HTMLOption& rOption = rHTMLOptions[--i];
4845  switch( rOption.GetToken() )
4846  {
4847  case HtmlOptionId::TYPE:
4848  rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4849  break;
4850  case HtmlOptionId::ALIGN:
4851  eVertOri =
4852  rOption.GetEnum( aHTMLImgVAlignTable,
4853  eVertOri );
4854  eHoriOri =
4855  rOption.GetEnum( aHTMLImgHAlignTable,
4856  eHoriOri );
4857  break;
4858  case HtmlOptionId::WIDTH:
4859  // First only save as pixel value!
4860  bPrcWidth = (rOption.GetString().indexOf('%') != -1);
4861  aSize.setWidth( static_cast<long>(rOption.GetNumber()) );
4862  break;
4863  case HtmlOptionId::HEIGHT:
4864  // First only save as pixel value!
4865  bPrcHeight = (rOption.GetString().indexOf('%') != -1);
4866  aSize.setHeight( static_cast<long>(rOption.GetNumber()) );
4867  break;
4868  case HtmlOptionId::SIZE:
4869  // First only save as pixel value!
4870  nSize = rOption.GetNumber();
4871  break;
4872  default: break;
4873  }
4874  }
4875 
4876  switch( nType )
4877  {
4878  case HTML_SPTYPE_BLOCK:
4879  {
4880  // create an empty text frame
4881 
4882  // fetch the ItemSet
4883  SfxItemSet aFrameSet( m_xDoc->GetAttrPool(),
4885  if( !IsNewDoc() )
4886  Reader::ResetFrameFormatAttrs( aFrameSet );
4887 
4888  // set the anchor and the adjustment
4889  SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4890 
4891  // and the size of the frame
4892  Size aDfltSz( MINFLY, MINFLY );
4893  Size aSpace( 0, 0 );
4894  SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4895  m_pCSS1Parser->GetWhichMap() );
4896  SvxCSS1PropertyInfo aDummyPropInfo;
4897 
4898  SetFixSize( aSize, aDfltSz, bPrcWidth, bPrcHeight,
4899  aDummyPropInfo, aFrameSet );
4900  SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4901 
4902  // protect the content
4903  SvxProtectItem aProtectItem( RES_PROTECT) ;
4904  aProtectItem.SetContentProtect( true );
4905  aFrameSet.Put( aProtectItem );
4906 
4907  // create the frame
4908  RndStdIds eAnchorId =
4909  aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4910  SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4911  m_pPam->GetPoint(), &aFrameSet );
4912  // Possibly create frames and register auto-bound frames.
4913  RegisterFlyFrame( pFlyFormat );
4914  }
4915  break;
4916  case HTML_SPTYPE_VERT:
4917  if( nSize > 0 )
4918  {
4920  {
4922  ->PixelToLogic( Size(0,nSize),
4923  MapMode(MapUnit::MapTwip) ).Height();
4924  }
4925 
4926  // set a paragraph margin
4927  SwTextNode *pTextNode = nullptr;
4928  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4929  {
4930  // if possible change the bottom paragraph margin
4931  // of previous node
4932 
4933  SetAttr(); // set still open paragraph attributes
4934 
4935  pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1]
4936  ->GetTextNode();
4937 
4938  // If the previous paragraph isn't a text node, then now an
4939  // empty paragraph is created, which already generates a single
4940  // line of spacing.
4941  if( !pTextNode )
4942  nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
4943  }
4944 
4945  if( pTextNode )
4946  {
4947  SvxULSpaceItem aULSpace( static_cast<const SvxULSpaceItem&>(pTextNode
4949  aULSpace.SetLower( aULSpace.GetLower() + static_cast<sal_uInt16>(nSize) );
4950  pTextNode->SetAttr( aULSpace );
4951  }
4952  else
4953  {
4954  NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, static_cast<sal_uInt16>(nSize), RES_UL_SPACE));
4955  EndAttr( m_xAttrTab->pULSpace, false );
4956 
4957  AppendTextNode(); // Don't change spacing!
4958  }
4959  }
4960  break;
4961  case HTML_SPTYPE_HORI:
4962  if( nSize > 0 )
4963  {
4964  // If the paragraph is still empty, set first line
4965  // indentation, otherwise apply letter spacing over a space.
4966 
4968  {
4970  ->PixelToLogic( Size(nSize,0),
4971  MapMode(MapUnit::MapTwip) ).Width();
4972  }
4973 
4974  if( !m_pPam->GetPoint()->nContent.GetIndex() )
4975  {
4976  sal_uInt16 nLeft=0, nRight=0;
4977  short nIndent = 0;
4978 
4979  GetMarginsFromContextWithNumBul( nLeft, nRight, nIndent );
4980  nIndent = nIndent + static_cast<short>(nSize);
4981 
4982  SvxLRSpaceItem aLRItem( RES_LR_SPACE );
4983  aLRItem.SetTextLeft( nLeft );
4984  aLRItem.SetRight( nRight );
4985  aLRItem.SetTextFirstLineOfst( nIndent );
4986 
4987  NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4988  EndAttr( m_xAttrTab->pLRSpace, false );
4989  }
4990  else
4991  {
4992  NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
4993  OUString aTmp( ' ' );
4994  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aTmp );
4995  EndAttr( m_xAttrTab->pKerning );
4996  }
4997  }
4998  }
4999 }
5000 
5001 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
5002 {
5003  if( nPixel && Application::GetDefaultDevice() )
5004  {
5006  Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width();
5007  return static_cast<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16)));
5008  }
5009  else
5010  return nPixel;
5011 }
5012 
5014 {
5016  if( nWidth )
5017  return nWidth;
5018 
5019  if( !m_aHTMLPageSize.Width() )
5020  {
5021  const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5022 
5023  const SwFormatFrameSize& rSz = rPgFormat.GetFrameSize();
5024  const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5025  const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5026  const SwFormatCol& rCol = rPgFormat.GetCol();
5027 
5028  m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5029  m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5030 
5031  if( 1 < rCol.GetNumCols() )
5033  }
5034 
5035  return m_aHTMLPageSize.Width();
5036 }
5037 
5039 {
5040  OUString aId;
5041  const HTMLOptions& rHTMLOptions = GetOptions();
5042  for (size_t i = rHTMLOptions.size(); i; )
5043  {
5044  const HTMLOption& rOption = rHTMLOptions[--i];
5045  if( HtmlOptionId::ID==rOption.GetToken() )
5046  {
5047  aId = rOption.GetString();
5048  break;
5049  }
5050  }
5051 
5052  if( !aId.isEmpty() )
5053  InsertBookmark( aId );
5054 }
5055 
5057 {
5058  // <BR CLEAR=xxx> is handled as:
5059  // 1.) Only regard the paragraph-bound frames anchored in current paragraph.
5060  // 2.) For left-justified aligned frames, CLEAR=LEFT or ALL, and for right-
5061  // justified aligned frames, CLEAR=RIGHT or ALL, the wrap-through is
5062  // changed as following:
5063  // 3.) If the paragraph contains no text, then the frames don't get a wrapping
5064  // 4.) otherwise a left aligned frame gets a right "only anchor" wrapping
5065  // and a right aligned frame gets a left "only anchor" wrapping.
5066  // 5.) if in a non-empty paragraph the wrapping of a frame is changed,
5067  // then a new paragraph is opened
5068  // 6.) If no wrappings of frames are changed, a hard line break is inserted.
5069 
5070  OUString aId, aStyle, aClass; // the id of bookmark
5071  bool bClearLeft = false, bClearRight = false;
5072  bool bCleared = false; // Was a CLEAR executed?
5073 
5074  // then we fetch the options
5075  const HTMLOptions& rHTMLOptions = GetOptions();
5076  for (size_t i = rHTMLOptions.size(); i; )
5077  {
5078  const HTMLOption& rOption = rHTMLOptions[--i];
5079  switch( rOption.GetToken() )
5080  {
5081  case HtmlOptionId::CLEAR:
5082  {
5083  const OUString &rClear = rOption.GetString();
5084  if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5085  {
5086  bClearLeft = true;
5087  bClearRight = true;
5088  }
5089  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5090  bClearLeft = true;
5091  else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5092  bClearRight = true;
5093  }
5094  break;
5095  case HtmlOptionId::ID:
5096  aId = rOption.GetString();
5097  break;
5098  case HtmlOptionId::STYLE:
5099  aStyle = rOption.GetString();
5100  break;
5101  case HtmlOptionId::CLASS:
5102  aClass = rOption.GetString();
5103  break;
5104  default: break;
5105  }
5106  }
5107 
5108  // CLEAR is only supported for the current paragraph
5109  if( bClearLeft || bClearRight )
5110  {
5111  SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
5112  SwTextNode* pTextNd = rNodeIdx.GetNode().GetTextNode();
5113  if( pTextNd )
5114  {
5115  const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
5116 
5117  for( size_t i=0; i<rFrameFormatTable.size(); i++ )
5118  {
5119  SwFrameFormat *const pFormat = rFrameFormatTable[i];
5120  SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
5121  SwPosition const*const pAPos = pAnchor->GetContentAnchor();
5122  if (pAPos &&
5123  ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
5124  (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
5125  pAPos->nNode == rNodeIdx &&
5126  pFormat->GetSurround().GetSurround() != css::text::WrapTextMode_NONE )
5127  {
5128  sal_Int16 eHori = RES_DRAWFRMFMT == pFormat->Which()
5130  : pFormat->GetHoriOrient().GetHoriOrient();
5131 
5132  css::text::WrapTextMode eSurround = css::text::WrapTextMode_PARALLEL;
5133  if( m_pPam->GetPoint()->nContent.GetIndex() )
5134  {
5135  if( bClearLeft && text::HoriOrientation::LEFT==eHori )
5136  eSurround = css::text::WrapTextMode_RIGHT;
5137  else if( bClearRight && text::HoriOrientation::RIGHT==eHori )
5138  eSurround = css::text::WrapTextMode_LEFT;
5139  }
5140  else if( (bClearLeft && text::HoriOrientation::LEFT==eHori) ||
5141  (bClearRight && text::HoriOrientation::RIGHT==eHori) )
5142  {
5143  eSurround = css::text::WrapTextMode_NONE;
5144  }
5145 
5146  if( css::text::WrapTextMode_PARALLEL != eSurround )
5147  {
5148  SwFormatSurround aSurround( eSurround );
5149  if( css::text::WrapTextMode_NONE != eSurround )
5150  aSurround.SetAnchorOnly( true );
5151  pFormat->SetFormatAttr( aSurround );
5152  bCleared = true;
5153  }
5154  }
5155  }
5156  }
5157  }
5158 
5159  // parse styles
5160  std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5161  bool bBreakItem = false;
5162  if( HasStyleOptions( aStyle, aId, aClass ) )
5163  {
5164  SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5165  SvxCSS1PropertyInfo aPropInfo;
5166 
5167  if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5168  {
5169  if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5170  {
5171  aBreakItem.reset(static_cast<SvxFormatBreakItem*>(aItemSet.Get(RES_BREAK).Clone()));
5172  bBreakItem = true;
5173  }
5174  if( !aPropInfo.m_aId.isEmpty() )
5175  InsertBookmark( aPropInfo.m_aId );
5176  }
5177  }
5178 
5179  if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5180  {
5181  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5182  EndAttr( m_xAttrTab->pBreak, false );
5183  }
5184 
5185  if( !bCleared && !bBreakItem )
5186  {
5187  // If no CLEAR could or should be executed, a line break will be inserted
5188  OUString sTmp( u'\x000a' ); // make the Mac happy :-)
5189  m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, sTmp );
5190  }
5191  else if( m_pPam->GetPoint()->nContent.GetIndex() )
5192  {
5193  // If a CLEAR is executed in a non-empty paragraph, then after it
5194  // a new paragraph has to be opened.
5195  // MIB 21.02.97: Here actually we should change the bottom paragraph
5196  // margin to zero. This will fail for something like this <BR ..><P>
5197  // (>Netscape). That's why we don't do it.
5199  }
5200  if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5201  {
5202  NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5203  EndAttr( m_xAttrTab->pBreak, false );
5204  }
5205 }
5206 
5208 {
5209  sal_uInt16 nSize = 0;
5210  sal_uInt16 nWidth = 0;
5211 
5212  SvxAdjust eAdjust = SvxAdjust::End;
5213 
5214  bool bPrcWidth = false;
5215  bool bNoShade = false;
5216  bool bColor = false;
5217 
5218  Color aColor;
5219  OUString aId;
5220 
5221  // let's fetch the options
5222  const HTMLOptions& rHTMLOptions = GetOptions();
5223  for (size_t i = rHTMLOptions.size(); i; )
5224  {
5225  const HTMLOption& rOption = rHTMLOptions[--i];
5226  switch( rOption.GetToken() )
5227  {
5228  case HtmlOptionId::ID:
5229  aId = rOption.GetString();
5230  break;
5231  case HtmlOptionId::SIZE:
5232  nSize = static_cast<sal_uInt16>(rOption.GetNumber());
5233  break;
5234  case HtmlOptionId::WIDTH:
5235  bPrcWidth = (rOption.GetString().indexOf('%') != -1);
5236  nWidth = static_cast<sal_uInt16>(rOption.GetNumber());
5237  if( bPrcWidth && nWidth>=100 )
5238  {
5239  // the default case are 100% lines (no attributes necessary)
5240  nWidth = 0;
5241  bPrcWidth = false;
5242  }
5243  break;
5244  case HtmlOptionId::ALIGN:
5245  eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5246  break;
5247  case HtmlOptionId::NOSHADE:
5248  bNoShade = true;
5249  break;
5250  case HtmlOptionId::COLOR:
5251  rOption.GetColor( aColor );
5252  bColor = true;
5253  break;
5254  default: break;
5255  }
5256  }
5257 
5258  if( m_pPam->GetPoint()->nContent.GetIndex() )
5260  if( m_nOpenParaToken != HtmlTokenId::NONE )
5261  EndPara();
5262  AppendTextNode();
5264 
5265  // ...and save in a context
5266  std::unique_ptr<HTMLAttrContext> xCntxt(
5267  new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5268 
5269  PushContext(xCntxt);
5270 
5271  // set the new style
5272  SetTextCollAttrs(m_aContexts.back().get());
5273 
5274  // the hard attributes of the current paragraph will never become invalid
5275  m_aParaAttrs.clear();
5276 
5277  if( nSize>0 || bColor || bNoShade )
5278  {
5279  // set line colour and/or width
5280  if( !bColor )
5281  aColor = COL_GRAY;
5282 
5283  SvxBorderLine aBorderLine( &aColor );
5284  if( nSize )
5285  {
5286  long nPWidth = 0;
5287  long nPHeight = static_cast<long>(nSize);
5288  SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5289  if ( !bNoShade )
5290  {
5291  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5292  }
5293  aBorderLine.SetWidth( nPHeight );
5294  }
5295  else if( bNoShade )
5296  {
5297  aBorderLine.SetWidth( DEF_LINE_WIDTH_2 );
5298  }
5299  else
5300  {
5301  aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5302  aBorderLine.SetWidth( DEF_LINE_WIDTH_0 );
5303  }
5304 
5305  SvxBoxItem aBoxItem(RES_BOX);
5306  aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5307  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5308  m_aSetAttrTab.push_back( pTmp );
5309  }
5310  if( nWidth )
5311  {
5312  // If we aren't in a table, then the width value will be "faked" with
5313  // paragraph indents. That makes little sense in a table. In order to
5314  // avoid that the line is considered during the width calculation, it
5315  // still gets an appropriate LRSpace-Item.
5316  if (!m_xTable)
5317  {
5318  // fake length and alignment of line above paragraph indents
5319  long nBrowseWidth = GetCurrentBrowseWidth();
5320  nWidth = bPrcWidth ? static_cast<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5321  : ToTwips( static_cast<sal_uInt16>(nBrowseWidth) );
5322  if( nWidth < MINLAY )
5323  nWidth = MINLAY;
5324 
5325  const SwFormatColl *pColl = (static_cast<long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5326  if (pColl)
5327  {
5328  SvxLRSpaceItem aLRItem( pColl->GetLRSpace() );
5329  long nDist = nBrowseWidth - nWidth;
5330 
5331  switch( eAdjust )
5332  {
5333  case SvxAdjust::Right:
5334  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5335  break;
5336  case SvxAdjust::Left:
5337  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5338  break;
5339  case SvxAdjust::Center:
5340  default:
5341  nDist /= 2;
5342  aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) );
5343  aLRItem.SetRight( static_cast<sal_uInt16>(nDist) );
5344  break;
5345  }
5346 
5347  HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aLRItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5348  m_aSetAttrTab.push_back( pTmp );
5349  }
5350  }
5351  }
5352 
5353  // it's not possible to insert bookmarks in links
5354  if( !aId.isEmpty() )
5355  InsertBookmark( aId );
5356 
5357  // pop current context of stack
5358  std::unique_ptr<HTMLAttrContext> xPoppedContext(PopContext(HtmlTokenId::HORZRULE));
5359  xPoppedContext.reset();
5360 
5362 
5363  // and set the current style in the next paragraph
5364  SetTextCollAttrs();
5365 }
5366 
5368 {
5369  OUString aName, aContent;
5370  bool bHTTPEquiv = false;
5371 
5372  const HTMLOptions& rHTMLOptions = GetOptions();
5373  for (size_t i = rHTMLOptions.size(); i; )
5374  {
5375  const HTMLOption& rOption = rHTMLOptions[--i];
5376  switch( rOption.GetToken() )
5377  {
5378  case HtmlOptionId::NAME:
5379  aName = rOption.GetString();
5380  bHTTPEquiv = false;
5381  break;
5382  case HtmlOptionId::HTTPEQUIV:
5383  aName = rOption.GetString();
5384  bHTTPEquiv = true;
5385  break;
5386  case HtmlOptionId::CONTENT:
5387  aContent = rOption.GetString();
5388  break;
5389  default: break;
5390  }
5391  }
5392 
5393  // Here things get a little tricky: We know for sure, that the Doc-Info
5394  // wasn't changed. Therefore it's enough to query for Generator and Refresh
5395  // to find a not processed Token. These are the only ones which won't change
5396  // the Doc-Info.
5397  if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_generator ) ||
5398  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_refresh ) ||
5399  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_type ) ||
5400  aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_script_type ) )
5401  return;
5402 
5403  aContent = aContent.replaceAll("\r", "").replaceAll("\n", "");
5404 
5405  if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdendnote ) )
5406  {
5407  FillEndNoteInfo( aContent );
5408  return;
5409  }
5410 
5411  if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdfootnote ) )