LibreOffice Module dbaccess (master) 1
HtmlReader.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <HtmlReader.hxx>
23#include <tools/stream.hxx>
24#include <tools/tenccvt.hxx>
25#include <comphelper/string.hxx>
26#include <strings.hrc>
27#include <osl/diagnose.h>
28#include <core_resource.hxx>
29#include <com/sun/star/sdbcx/XColumnsSupplier.hpp>
30#include <com/sun/star/awt/FontWeight.hpp>
31#include <com/sun/star/awt/FontStrikeout.hpp>
32#include <com/sun/star/awt/FontSlant.hpp>
33#include <com/sun/star/awt/FontUnderline.hpp>
34#include <svtools/htmltokn.h>
35#include <svtools/htmlkywd.hxx>
36#include <tools/color.hxx>
37#include <WExtendPages.hxx>
38#include <vcl/svapp.hxx>
39#include <vcl/settings.hxx>
40
41using namespace dbaui;
42using namespace ::com::sun::star::uno;
43using namespace ::com::sun::star::beans;
44using namespace ::com::sun::star::container;
45using namespace ::com::sun::star::sdbc;
46using namespace ::com::sun::star::sdbcx;
47using namespace ::com::sun::star::awt;
48
49#define DBAUI_HTML_FONTSIZES 8 // like export, HTML-Options
50
51// OHTMLReader
52OHTMLReader::OHTMLReader(SvStream& rIn,const SharedConnection& _rxConnection,
54 const css::uno::Reference< css::uno::XComponentContext >& _rxContext)
55 : HTMLParser(rIn)
56 , ODatabaseExport( _rxConnection, _rxNumberF, _rxContext, rIn )
57 , m_nTableCount(0)
58 , m_nColumnWidth(87)
59{
60 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 ) );
61 // If the file starts with a BOM, switch to UCS2.
62 SetSwitchToUCS2( true );
63}
64
65OHTMLReader::OHTMLReader(SvStream& rIn,
66 sal_Int32 nRows,
67 TPositions&& _rColumnPositions,
69 const css::uno::Reference< css::uno::XComponentContext >& _rxContext,
70 const TColumnVector* pList,
71 const OTypeInfoMap* _pInfoMap,
72 bool _bAutoIncrementEnabled)
73 : HTMLParser(rIn)
74 , ODatabaseExport( nRows, std::move(_rColumnPositions), _rxNumberF, _rxContext, pList, _pInfoMap, _bAutoIncrementEnabled, rIn )
75 , m_nTableCount(0)
76 , m_nColumnWidth(87)
77{
78 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 ) );
79 // If the file starts with a BOM, switch to UCS2.
80 SetSwitchToUCS2( true );
81}
82
83OHTMLReader::~OHTMLReader()
84{
85}
86
88{
93 return m_bFoundTable ? eParseState : SvParserState::Error;
94}
95
96#if defined _MSC_VER
97#pragma warning(disable: 4702) // unreachable code, bug in MSVC2015
98#endif
100{
101 if(m_bError || !m_nRows) // if there is an error or no more rows to check, return immediately
102 return;
103 if ( nToken == HtmlTokenId::META )
105
106 if(m_xConnection.is()) // names, which CTOR was called and hence, if a table should be created
107 {
108 switch(nToken)
109 {
110 case HtmlTokenId::TABLE_ON:
112 { // can also be TD or TH, if there was no TABLE before
113 const HTMLOptions& rHtmlOptions = GetOptions();
114 for (const auto & rOption : rHtmlOptions)
115 {
116 if( rOption.GetToken() == HtmlOptionId::WIDTH )
117 { // percentage: of document width respectively outer cell
118 m_nColumnWidth = GetWidthPixel( rOption );
119 }
120 }
121 }
122 [[fallthrough]];
123 case HtmlTokenId::THEAD_ON:
124 case HtmlTokenId::TBODY_ON:
125 {
126 sal_uInt64 const nTell = rInput.Tell(); // perhaps alters position of the stream
127 if ( !m_xTable.is() )
128 {// use first line as header
130 if ( m_bAppendFirstLine )
131 rInput.Seek(nTell);
132 }
133 }
134 break;
135 case HtmlTokenId::TABLE_OFF:
136 if(!--m_nTableCount)
137 {
138 m_xTable = nullptr;
139 }
140 break;
141 case HtmlTokenId::TABLEROW_ON:
142 if ( !m_pUpdateHelper )
143 m_bError = true;
144 break;
145 case HtmlTokenId::TEXTTOKEN:
146 case HtmlTokenId::SINGLECHAR:
147 if ( m_bInTbl ) //&& !m_bSDNum ) // important, as otherwise we also get the names of the fonts
149 break;
150 case HtmlTokenId::PARABREAK_OFF:
152 break;
153 case HtmlTokenId::PARABREAK_ON:
154 m_sTextToken.clear();
155 break;
156 case HtmlTokenId::TABLEDATA_ON:
157 fetchOptions();
158 break;
159 case HtmlTokenId::TABLEDATA_OFF:
160 {
161 if ( !m_sCurrent.isEmpty() )
163 try
164 {
166 }
167 catch(SQLException& e)
168 // handling update failure
169 {
171 }
172 m_sCurrent.clear();
173 m_nColumnPos++;
174 eraseTokens();
175 m_bInTbl = false;
176 }
177 break;
178 case HtmlTokenId::TABLEROW_OFF:
179 if ( !m_pUpdateHelper )
180 {
181 m_bError = true;
182 break;
183 }
184 try
185 {
186 m_nRowCount++;
187 if (m_bIsAutoIncrement) // if bSetAutoIncrement then I have to set the autoincrement
188 m_pUpdateHelper->updateInt(1,m_nRowCount);
189 m_pUpdateHelper->insertRow();
190 }
191 catch(SQLException& e)
192 // handling update failure
193 {
195 }
196 m_nColumnPos = 0;
197 break;
198 default: break;
199 }
200 }
201 else // branch only valid for type checking
202 {
203 switch(nToken)
204 {
205 case HtmlTokenId::THEAD_ON:
206 case HtmlTokenId::TBODY_ON:
207 // The head of the column is not included
208 if(m_bHead)
209 {
210 do
211 {}
212 while(GetNextToken() != HtmlTokenId::TABLEROW_OFF);
213 m_bHead = false;
214 }
215 break;
216 case HtmlTokenId::TABLEDATA_ON:
217 case HtmlTokenId::TABLEHEADER_ON:
218 fetchOptions();
219 break;
220 case HtmlTokenId::TEXTTOKEN:
221 case HtmlTokenId::SINGLECHAR:
222 if ( m_bInTbl ) // && !m_bSDNum ) // important, as otherwise we also get the names of the fonts
224 break;
225 case HtmlTokenId::PARABREAK_OFF:
227 break;
228 case HtmlTokenId::PARABREAK_ON:
229 m_sTextToken.clear();
230 break;
231 case HtmlTokenId::TABLEDATA_OFF:
232 if ( !m_sCurrent.isEmpty() )
234 adjustFormat();
235 m_nColumnPos++;
236 m_bInTbl = false;
237 m_sCurrent.clear();
238 break;
239 case HtmlTokenId::TABLEROW_OFF:
240 if ( !m_sCurrent.isEmpty() )
242 adjustFormat();
243 m_nColumnPos = 0;
244 m_nRows--;
245 m_sCurrent.clear();
246 break;
247 default: break;
248 }
249 }
250}
251
253{
254 m_bInTbl = true;
255 const HTMLOptions& options = GetOptions();
256 for (const auto & rOption : options)
257 {
258 switch( rOption.GetToken() )
259 {
260 case HtmlOptionId::SDNUM:
261 m_sNumToken = rOption.GetString();
262 break;
263 default: break;
264 }
265 }
266}
267
269{
270 const HTMLOptions& rHtmlOptions = GetOptions();
271 for (const auto & rOption : rHtmlOptions)
272 {
273 switch( rOption.GetToken() )
274 {
275 case HtmlOptionId::ALIGN:
276 {
277 const OUString& rOptVal = rOption.GetString();
278 if (rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ))
279 eVal = SvxCellHorJustify::Right;
280 else if (rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_center ))
281 eVal = SvxCellHorJustify::Center;
282 else if (rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ))
283 eVal = SvxCellHorJustify::Left;
284 else
285 eVal = SvxCellHorJustify::Standard;
286 }
287 break;
288 default: break;
289 }
290 }
291}
292
293void OHTMLReader::TableFontOn(FontDescriptor& _rFont, Color &_rTextColor)
294{
295 const HTMLOptions& rHtmlOptions = GetOptions();
296 for (const auto & rOption : rHtmlOptions)
297 {
298 switch( rOption.GetToken() )
299 {
300 case HtmlOptionId::COLOR:
301 {
302 Color aColor;
303 rOption.GetColor( aColor );
304 _rTextColor = aColor.GetRGBColor();
305 }
306 break;
307 case HtmlOptionId::FACE :
308 {
309 const OUString& rFace = rOption.GetString();
310 OUStringBuffer aFontName;
311 sal_Int32 nPos = 0;
312 while( nPos != -1 )
313 {
314 // list of fonts, VCL: semicolon as separator, HTML: comma
315 std::u16string_view aFName = o3tl::getToken(rFace, 0, ',', nPos );
316 aFName = comphelper::string::strip(aFName, ' ');
317 if( !aFontName.isEmpty() )
318 aFontName.append(";");
319 aFontName.append(aFName);
320 }
321 if ( !aFontName.isEmpty() )
322 _rFont.Name = aFontName.makeStringAndClear();
323 }
324 break;
325 case HtmlOptionId::SIZE :
326 {
327 sal_Int16 nSize = static_cast<sal_Int16>(rOption.GetNumber());
328 if ( nSize == 0 )
329 nSize = 1;
330 else if ( nSize < DBAUI_HTML_FONTSIZES )
331 nSize = DBAUI_HTML_FONTSIZES;
332
333 _rFont.Height = nSize;
334 }
335 break;
336 default: break;
337 }
338 }
339}
340
341sal_Int16 OHTMLReader::GetWidthPixel( const HTMLOption& rOption )
342{
343 const OUString& rOptVal = rOption.GetString();
344 if ( rOptVal.indexOf('%') != -1 )
345 { // percentage
346 OSL_ENSURE( m_nColumnWidth, "WIDTH Option: m_nColumnWidth==0 and Width%" );
347 return static_cast<sal_Int16>((rOption.GetNumber() * m_nColumnWidth) / 100);
348 }
349 else
350 {
351 if ( rOptVal.indexOf('*') != -1 )
352 { // relative to what?!?
353//TODO: collect ColArray of all relevant values and then MakeCol
354 return 0;
355 }
356 else
357 return static_cast<sal_Int16>(rOption.GetNumber()); // pixel
358 }
359}
360
362{
363 OUString aTempName(DBA_RES(STR_TBL_TITLE));
364 aTempName = aTempName.getToken(0,' ');
365 aTempName = ::dbtools::createUniqueName(m_xTables, aTempName);
366
367 bool bCaption = false;
368 bool bTableHeader = false;
369 OUString aColumnName;
371
372 OUString aTableName;
373 FontDescriptor aFont = VCLUnoHelper::CreateFontDescriptor(Application::GetSettings().GetStyleSettings().GetAppFont());
374 Color nTextColor;
375 do
376 {
377 switch (nToken)
378 {
379 case HtmlTokenId::TEXTTOKEN:
380 case HtmlTokenId::SINGLECHAR:
381 if(bTableHeader)
382 aColumnName += aToken;
383 if(bCaption)
384 aTableName += aToken;
385 break;
386 case HtmlTokenId::PARABREAK_OFF:
387 m_sCurrent += aColumnName;
388 break;
389 case HtmlTokenId::PARABREAK_ON:
390 m_sTextToken.clear();
391 break;
392 case HtmlTokenId::TABLEDATA_ON:
393 case HtmlTokenId::TABLEHEADER_ON:
394 TableDataOn(eVal);
395 bTableHeader = true;
396 break;
397 case HtmlTokenId::TABLEDATA_OFF:
398 case HtmlTokenId::TABLEHEADER_OFF:
399 {
400 aColumnName = comphelper::string::strip(aColumnName, ' ' );
401 if (aColumnName.isEmpty() || m_bAppendFirstLine )
402 aColumnName = DBA_RES(STR_COLUMN_NAME);
403 else if ( !m_sCurrent.isEmpty() )
404 aColumnName = m_sCurrent;
405
406 aColumnName = comphelper::string::strip(aColumnName, ' ');
407 CreateDefaultColumn(aColumnName);
408 aColumnName.clear();
409 m_sCurrent.clear();
410
411 eVal = SvxCellHorJustify::Standard;
412 bTableHeader = false;
413 }
414 break;
415
416 case HtmlTokenId::TITLE_ON:
417 case HtmlTokenId::CAPTION_ON:
418 bCaption = true;
419 break;
420 case HtmlTokenId::TITLE_OFF:
421 case HtmlTokenId::CAPTION_OFF:
422 aTableName = comphelper::string::strip(aTableName, ' ');
423 if(aTableName.isEmpty())
424 aTableName = ::dbtools::createUniqueName(m_xTables, aTableName);
425 else
426 aTableName = aTempName;
427 bCaption = false;
428 break;
429 case HtmlTokenId::FONT_ON:
430 TableFontOn(aFont,nTextColor);
431 break;
432 case HtmlTokenId::BOLD_ON:
433 aFont.Weight = css::awt::FontWeight::BOLD;
434 break;
435 case HtmlTokenId::ITALIC_ON:
436 aFont.Slant = css::awt::FontSlant_ITALIC;
437 break;
438 case HtmlTokenId::UNDERLINE_ON:
439 aFont.Underline = css::awt::FontUnderline::SINGLE;
440 break;
441 case HtmlTokenId::STRIKE_ON:
442 aFont.Strikeout = css::awt::FontStrikeout::SINGLE;
443 break;
444 default: break;
445 }
447 }
448 while (nToken != HtmlTokenId::TABLEROW_OFF);
449
450 if ( !m_sCurrent.isEmpty() )
451 aColumnName = m_sCurrent;
452 aColumnName = comphelper::string::strip(aColumnName, ' ');
453 if(!aColumnName.isEmpty())
454 CreateDefaultColumn(aColumnName);
455
456 if ( m_vDestVector.empty() )
457 return false;
458
459 if(aTableName.isEmpty())
460 aTableName = aTempName;
461
462 m_bInTbl = false;
463 m_bFoundTable = true;
464
465 if ( isCheckEnabled() )
466 return true;
467
468 return !executeWizard(aTableName,Any(nTextColor),aFont) && m_xTable.is();
469}
470
472{
473 ParseMetaOptions(nullptr, nullptr);
474}
475
477{
479}
480
481/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
#define DBAUI_HTML_FONTSIZES
Definition: HtmlReader.cxx:49
static const AllSettings & GetSettings()
Color GetRGBColor() const
const OUString & GetString() const
sal_uInt32 GetNumber() const
virtual SvParserState CallParser() override
virtual bool ParseMetaOptions(const css::uno::Reference< css::document::XDocumentProperties > &, SvKeyValueIterator *)
const HTMLOptions & GetOptions(HtmlOptionId const *pNoConvertToken=nullptr)
OUStringBuffer aToken
virtual void ResetError()
sal_uInt64 Tell() const
sal_uInt64 Seek(sal_uInt64 nPos)
static css::awt::FontDescriptor CreateFontDescriptor(const vcl::Font &rFont)
sal_Int32 m_nRows
number of rows to be searched
Definition: DExport.hxx:91
bool isCheckEnabled() const
Definition: DExport.hxx:154
OUString m_sTextToken
cell content
Definition: DExport.hxx:85
void CreateDefaultColumn(const OUString &_rColumnName)
Definition: DExport.cxx:605
css::uno::Reference< css::container::XNameAccess > m_xTables
container
Definition: DExport.hxx:72
TColumnVector m_vDestVector
Definition: DExport.hxx:69
bool m_bError
error and termination code
Definition: DExport.hxx:93
std::shared_ptr< IUpdateHelper > m_pUpdateHelper
Definition: DExport.hxx:75
sal_Int32 m_nColumnPos
current column position
Definition: DExport.hxx:90
const OTypeInfoMap * m_pInfoMap
Definition: DExport.hxx:89
bool m_bInTbl
true, if parser is in RTF table
Definition: DExport.hxx:94
OUString m_sNumToken
SDNUM value.
Definition: DExport.hxx:86
sal_Int32 m_nRowCount
current count of rows
Definition: DExport.hxx:92
css::uno::Reference< css::beans::XPropertySet > m_xTable
dest table
Definition: DExport.hxx:71
bool executeWizard(const OUString &_sTableName, const css::uno::Any &_aTextColor, const css::awt::FontDescriptor &_rFont)
executeWizard calls a wizard to create/append data
Definition: DExport.cxx:661
bool m_bFoundTable
set to true when a table was found
Definition: DExport.hxx:98
bool m_bIsAutoIncrement
if PKey is set by user
Definition: DExport.hxx:97
void showErrorDialog(const css::sdbc::SQLException &e)
Definition: DExport.cxx:725
void SetColumnTypes(const TColumnVector *rList, const OTypeInfoMap *_pInfoMap)
Definition: DExport.cxx:530
void insertValueIntoColumn()
Definition: DExport.cxx:272
bool m_bHead
true, if the header hasn't been read yet
Definition: DExport.hxx:95
const TColumnVector * m_pColumnList
Definition: DExport.hxx:88
SharedConnection m_xConnection
dest conn
Definition: DExport.hxx:73
virtual TypeSelectionPageFactory getTypeSelectionPageFactory() override
Definition: HtmlReader.cxx:476
sal_Int32 m_nTableCount
Definition: HtmlReader.hxx:34
sal_Int16 m_nColumnWidth
maximum column width
Definition: HtmlReader.hxx:35
void TableDataOn(SvxCellHorJustify &eVal)
Definition: HtmlReader.cxx:268
bool CreateTable(HtmlTokenId nToken)
Definition: HtmlReader.cxx:361
sal_Int16 GetWidthPixel(const HTMLOption &rOption)
Definition: HtmlReader.cxx:341
virtual SvParserState CallParser() override
Definition: HtmlReader.cxx:87
void TableFontOn(css::awt::FontDescriptor &_rFont, Color &_rTextColor)
Definition: HtmlReader.cxx:293
virtual void NextToken(HtmlTokenId nToken) override
Definition: HtmlReader.cxx:99
static std::unique_ptr< OWizTypeSelect > Create(weld::Container *pPage, OCopyTableWizard *pWizard, SvStream &rInput)
#define DBA_RES(id)
#define OOO_STRING_SVTOOLS_HTML_AL_left
#define OOO_STRING_SVTOOLS_HTML_AL_center
#define OOO_STRING_SVTOOLS_HTML_AL_right
HtmlTokenId
sal_uInt16 nPos
OString strip(const OString &rIn, char c)
std::multimap< DataTypeEnum, OExtendedTypeInfo * > OTypeInfoMap
std::unique_ptr< OWizTypeSelect >(* TypeSelectionPageFactory)(weld::Container *, OCopyTableWizard *, SvStream &)
std::basic_string_view< charT, traits > getToken(std::basic_string_view< charT, traits > sv, charT delimiter, std::size_t &position)
::std::vector< HTMLOption > HTMLOptions
DefTokenId nToken
#define STREAM_SEEK_TO_BEGIN
SvParserState
SvxCellHorJustify
TOOLS_DLLPUBLIC rtl_TextEncoding GetExtendedCompatibilityTextEncoding(rtl_TextEncoding eEncoding)