18#include <libxml/HTMLparser.h>
20#include <libxml/xpath.h>
41 std::vector<std::shared_ptr<sc::DataTransformation>>&& rTransformations);
43 virtual void execute()
override;
47 ScDocument& rDoc,
const OUString& rURL,
const OUString& rID,
48 std::function<
void()> aImportFinishedHdl,
49 std::vector<std::shared_ptr<sc::DataTransformation>>&& rTransformations)
54 , maDataTransformations(
std::move(rTransformations))
55 , maImportFinishedHdl(
std::move(aImportFinishedHdl))
63 return OString(
reinterpret_cast<const char*
>(pStr), xmlStrlen(pStr));
66OUString trim_string(
const OUString& aStr)
69 OUString aString =
aStr;
78 while (aOldString != aString);
83OUString get_node_str(xmlNodePtr pNode)
86 for (xmlNodePtr cur_node = pNode->children; cur_node; cur_node = cur_node->next)
88 if (cur_node->type == XML_TEXT_NODE)
90 OUString aString = OStringToOUString(
toString(cur_node->content), RTL_TEXTENCODING_UTF8);
91 aStr.append(trim_string(aString));
93 else if (cur_node->type == XML_ELEMENT_NODE)
95 aStr.append(get_node_str(cur_node));
99 return aStr.makeStringAndClear();
107 for (xmlNodePtr cur_node = pCellNode->children; cur_node; cur_node = cur_node->next)
109 if (cur_node->type == XML_TEXT_NODE)
111 OUString aString = OStringToOUString(
toString(cur_node->content), RTL_TEXTENCODING_UTF8);
112 aStr.append(trim_string(aString));
114 else if (cur_node->type == XML_ELEMENT_NODE)
116 aStr.append(get_node_str(cur_node));
122 OUString aCellStr =
aStr.makeStringAndClear();
130 for (xmlNodePtr cur_node = pRowNode->children; cur_node; cur_node = cur_node->next)
132 if (cur_node->type == XML_ELEMENT_NODE)
134 OString aNodeName =
toString(cur_node->name);
135 if (aNodeName ==
"td" || aNodeName ==
"th")
146 for (xmlNodePtr cur_node = pSkipElement->children; cur_node; cur_node = cur_node->next)
148 if (cur_node->type == XML_ELEMENT_NODE)
150 OString aNodeName =
toString(cur_node->name);
151 if (aNodeName ==
"tr")
164 for (xmlNodePtr cur_node = pTable->children; cur_node; cur_node = cur_node->next)
166 if (cur_node->type == XML_ELEMENT_NODE)
168 OString aNodeName =
toString(cur_node->name);
169 if (aNodeName ==
"tr")
174 else if (aNodeName ==
"thead" || aNodeName ==
"tbody")
190 htmlDocPtr pHtmlPtr = htmlParseDoc(
reinterpret_cast<xmlChar*
>(
const_cast<char*
>(
aBuffer.getStr())),
nullptr);
193 xmlXPathContextPtr pXmlXpathCtx = xmlXPathNewContext(pHtmlPtr);
194 xmlXPathObjectPtr pXmlXpathObj = xmlXPathEvalExpression(BAD_CAST(aID.getStr()), pXmlXpathCtx);
198 xmlXPathFreeContext(pXmlXpathCtx);
201 xmlNodeSetPtr pXmlNodes = pXmlXpathObj->nodesetval;
205 xmlXPathFreeNodeSetList(pXmlXpathObj);
206 xmlXPathFreeContext(pXmlXpathCtx);
210 if (pXmlNodes->nodeNr == 0)
212 xmlXPathFreeNodeSet(pXmlNodes);
213 xmlXPathFreeNodeSetList(pXmlXpathObj);
214 xmlXPathFreeContext(pXmlXpathCtx);
218 xmlNodePtr pNode = pXmlNodes->nodeTab[0];
221 xmlXPathFreeNodeSet(pXmlNodes);
222 xmlXPathFreeNodeSetList(pXmlXpathObj);
223 xmlXPathFreeContext(pXmlXpathCtx);
SC_DLLPUBLIC bool SetString(SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString &rString, const ScSetStringParam *pParam=nullptr)
Abstract class for all data provider.
static std::unique_ptr< SvStream > FetchStreamFromURL(const OUString &, OStringBuffer &rBuffer)
bool mbDeterministic
If true make the threaded import deterministic for the tests.
sc::ExternalDataSource & mrDataSource
const OUString & getID() const
const std::vector< std::shared_ptr< sc::DataTransformation > > & getDataTransformation() const
const OUString & getURL() const
ScDBDataManager * getDBManager()
ScDocumentUniquePtr mpDoc
HTMLDataProvider(ScDocument *pDoc, sc::ExternalDataSource &rDataSource)
virtual const OUString & GetURL() const override
virtual void Import() override
rtl::Reference< HTMLFetchThread > mxHTMLFetchThread
virtual ~HTMLDataProvider() override
void handleCell(xmlNodePtr pCell, SCROW nRow, SCCOL nCol)
void skipHeadBody(xmlNodePtr pSkip, SCROW &rRow)
HTMLFetchThread(ScDocument &rDoc, const OUString &, const OUString &rID, std::function< void()> aImportFinishedHdl, std::vector< std::shared_ptr< sc::DataTransformation > > &&rTransformations)
virtual void execute() override
const std::vector< std::shared_ptr< sc::DataTransformation > > maDataTransformations
std::function< void()> maImportFinishedHdl
void handleRow(xmlNodePtr pRow, SCROW nRow)
void handleTable(xmlNodePtr pTable)
void WriteToDoc(ScDocument &rDoc)
OString strip(const OString &rIn, char c)
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
CAUTION! The following defines must be in the same namespace as the respective type.
OUString toString(OptionInfo const *info)
std::unique_ptr< char[]> aBuffer