17 #include <libxml/HTMLparser.h>
19 #include <libxml/xpath.h>
41 const std::vector<std::shared_ptr<sc::DataTransformation>>& rTransformations);
43 virtual void execute()
override;
47 ScDocument& rDoc,
const OUString& rURL,
const OUString& rID,
48 std::function<
void()> aImportFinishedHdl,
49 const std::vector<std::shared_ptr<sc::DataTransformation>>& rTransformations)
54 , maDataTransformations(rTransformations)
55 , maImportFinishedHdl(
std::move(aImportFinishedHdl))
61 OString
toString(
const xmlChar* pStr)
63 return OString(reinterpret_cast<const char*>(pStr), xmlStrlen(pStr));
66 OUString trim_string(
const OUString& aStr)
69 OUString aString = aStr;
78 while (aOldString != aString);
83 OUString get_node_str(xmlNodePtr pNode)
86 for (xmlNodePtr cur_node = pNode->children; cur_node; cur_node = cur_node->next)
88 if (cur_node->type == XML_TEXT_NODE)
90 OUString aString = OStringToOUString(
toString(cur_node->content), RTL_TEXTENCODING_UTF8);
91 aStr.append(trim_string(aString));
93 else if (cur_node->type == XML_ELEMENT_NODE)
95 aStr.append(get_node_str(cur_node));
99 return aStr.makeStringAndClear();
107 for (xmlNodePtr cur_node = pCellNode->children; cur_node; cur_node = cur_node->next)
109 if (cur_node->type == XML_TEXT_NODE)
111 OUString aString = OStringToOUString(
toString(cur_node->content), RTL_TEXTENCODING_UTF8);
112 aStr.append(trim_string(aString));
114 else if (cur_node->type == XML_ELEMENT_NODE)
116 aStr.append(get_node_str(cur_node));
122 OUString aCellStr = aStr.makeStringAndClear();
123 mrDocument.
SetString(nCol, nRow, 0, aCellStr);
130 for (xmlNodePtr cur_node = pRowNode->children; cur_node; cur_node = cur_node->next)
132 if (cur_node->type == XML_ELEMENT_NODE)
134 OString aNodeName =
toString(cur_node->name);
135 if (aNodeName ==
"td" || aNodeName ==
"th")
146 for (xmlNodePtr cur_node = pSkipElement->children; cur_node; cur_node = cur_node->next)
148 if (cur_node->type == XML_ELEMENT_NODE)
150 OString aNodeName =
toString(cur_node->name);
151 if (aNodeName ==
"tr")
164 for (xmlNodePtr cur_node = pTable->children; cur_node; cur_node = cur_node->next)
166 if (cur_node->type == XML_ELEMENT_NODE)
168 OString aNodeName =
toString(cur_node->name);
169 if (aNodeName ==
"tr")
174 else if (aNodeName ==
"thead" || aNodeName ==
"tbody")
187 if (aBuffer.isEmpty())
190 htmlDocPtr pHtmlPtr = htmlParseDoc(reinterpret_cast<xmlChar*>(const_cast<char*>(aBuffer.getStr())),
nullptr);
193 xmlXPathContextPtr pXmlXpathCtx = xmlXPathNewContext(pHtmlPtr);
194 xmlXPathObjectPtr pXmlXpathObj = xmlXPathEvalExpression(BAD_CAST(aID.getStr()), pXmlXpathCtx);
198 xmlXPathFreeContext(pXmlXpathCtx);
201 xmlNodeSetPtr pXmlNodes = pXmlXpathObj->nodesetval;
205 xmlXPathFreeNodeSetList(pXmlXpathObj);
206 xmlXPathFreeContext(pXmlXpathCtx);
210 if (pXmlNodes->nodeNr == 0)
212 xmlXPathFreeNodeSet(pXmlNodes);
213 xmlXPathFreeNodeSetList(pXmlXpathObj);
214 xmlXPathFreeContext(pXmlXpathCtx);
218 xmlNodePtr pNode = pXmlNodes->nodeTab[0];
221 xmlXPathFreeNodeSet(pXmlNodes);
222 xmlXPathFreeNodeSetList(pXmlXpathObj);
223 xmlXPathFreeContext(pXmlXpathCtx);
225 for (
auto& itr : maDataTransformations)
227 itr->Transform(mrDocument);
sc::ExternalDataSource & mrDataSource
Abstract class for all data provider.
virtual ~HTMLDataProvider() override
std::function< void()> maImportFinishedHdl
bool mbDeterministic
If true make the threaded import deterministic for the tests.
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
HTMLFetchThread(ScDocument &rDoc, const OUString &, const OUString &rID, std::function< void()> aImportFinishedHdl, const std::vector< std::shared_ptr< sc::DataTransformation >> &rTransformations)
static std::unique_ptr< SvStream > FetchStreamFromURL(const OUString &, OStringBuffer &rBuffer)
const std::vector< std::shared_ptr< sc::DataTransformation > > maDataTransformations
void WriteToDoc(ScDocument &rDoc)
SC_DLLPUBLIC bool SetString(SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString &rString, const ScSetStringParam *pParam=nullptr)
virtual const OUString & GetURL() const override
ScDocumentUniquePtr mpDoc
void skipHeadBody(xmlNodePtr pSkip, SCROW &rRow)
void handleRow(xmlNodePtr pRow, SCROW nRow)
const OUString & getURL() const
rtl::Reference< HTMLFetchThread > mxHTMLFetchThread
HTMLDataProvider(ScDocument *pDoc, sc::ExternalDataSource &rDataSource)
void handleCell(xmlNodePtr pCell, SCROW nRow, SCCOL nCol)
ScDBDataManager * getDBManager()
std::unique_ptr< char[]> aBuffer
OString strip(const OString &rIn, char c)
virtual void Import() override
void handleTable(xmlNodePtr pTable)
virtual void execute() override
const std::vector< std::shared_ptr< sc::DataTransformation > > & getDataTransformation() const
const OUString & getID() const
OUString toString(OptionInfo const *info)