LibreOffice Module unoxml (master) 1
documentbuilder.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include "documentbuilder.hxx"
21
22#include <string.h>
23
24#include <libxml/xmlerror.h>
25
26#include <memory>
27
28#include <sal/log.hxx>
30
34
35#include <com/sun/star/xml/sax/SAXParseException.hpp>
36#include <com/sun/star/ucb/XCommandEnvironment.hpp>
37#include <com/sun/star/task/XInteractionHandler.hpp>
38#include <com/sun/star/ucb/SimpleFileAccess.hpp>
39
40#include <ucbhelper/content.hxx>
42
43#include "document.hxx"
44
45using namespace css::io;
46using namespace css::lang;
47using namespace css::ucb;
48using namespace css::uno;
49using namespace css::xml::dom;
50using namespace css::xml::sax;
51using namespace ucbhelper;
52using css::task::XInteractionHandler;
53using css::xml::sax::InputSource;
54
55
56namespace DOM
57{
58 namespace {
59
60 class CDefaultEntityResolver : public cppu::WeakImplHelper< XEntityResolver >
61 {
62 public:
63 virtual InputSource SAL_CALL resolveEntity( const OUString& sPublicId, const OUString& sSystemId ) override
64 {
65 InputSource is;
66 is.sPublicId = sPublicId;
67 is.sSystemId = sSystemId;
68 is.sEncoding.clear();
69
70 try {
71 Reference< XCommandEnvironment > aEnvironment(
72 new CommandEnvironment(Reference< XInteractionHandler >(),
73 Reference< XProgressHandler >() ));
74 Content aContent(sSystemId, aEnvironment, comphelper::getProcessComponentContext());
75
76 is.aInputStream = aContent.openStream();
77 } catch (const css::uno::Exception&) {
78 TOOLS_WARN_EXCEPTION( "unoxml", "exception in default entity resolver");
79 is.aInputStream.clear();
80 }
81 return is;
82 }
83
84 };
85
86 }
87
89 : m_xEntityResolver(new CDefaultEntityResolver)
90 {
91 // init libxml. libxml will protect itself against multiple
92 // initializations so there is no problem here if this gets
93 // called multiple times.
94 xmlInitParser();
95 }
96
97 Sequence< OUString > SAL_CALL CDocumentBuilder::getSupportedServiceNames()
98 {
99 return { "com.sun.star.xml.dom.DocumentBuilder" };
100 }
101
103 {
104 return "com.sun.star.comp.xml.dom.DocumentBuilder";
105 }
106
107 sal_Bool SAL_CALL CDocumentBuilder::supportsService(const OUString& aServiceName)
108 {
109 return cppu::supportsService(this, aServiceName);
110 }
111
112 Reference< XDOMImplementation > SAL_CALL CDocumentBuilder::getDOMImplementation()
113 {
114
115 return Reference< XDOMImplementation >();
116 }
117
119 {
120 return true;
121 }
122
124 {
125 return false;
126 }
127
128 Reference< XDocument > SAL_CALL CDocumentBuilder::newDocument()
129 {
130 std::scoped_lock const g(m_Mutex);
131
132 // create a new document
133 xmlDocPtr pDocument = xmlNewDoc(reinterpret_cast<const xmlChar*>("1.0"));
134 return CDocument::CreateCDocument(pDocument);
135 }
136
137 static OUString make_error_message(xmlParserCtxtPtr ctxt)
138 {
139 return OUString(ctxt->lastError.message, strlen(ctxt->lastError.message), RTL_TEXTENCODING_ASCII_US) +
140 "Line: " +
141 OUString::number(static_cast<sal_Int32>(ctxt->lastError.line)) +
142 "\nColumn: " +
143 OUString::number(static_cast<sal_Int32>(ctxt->lastError.int2));
144 }
145
146 // -- callbacks and context struct for parsing from stream
147 // -- c-linkage, so the callbacks can be used by libxml
148 extern "C" {
149
150 namespace {
151
152 // context struct passed to IO functions
153 typedef struct context {
154 Reference< XInputStream > rInputStream;
155 bool close;
157 } context_t;
158
159 }
160
161 static int xmlIO_read_func( void *context, char *buffer, int len)
162 {
163 // get the context...
164 context_t *pctx = static_cast<context_t*>(context);
165 if (!pctx->rInputStream.is())
166 return -1;
167 try {
168 // try to read the requested number of bytes
169 Sequence< sal_Int8 > chunk(len);
170 int nread = pctx->rInputStream->readBytes(chunk, len);
171
172 // copy bytes to the provided buffer
173 memcpy(buffer, chunk.getConstArray(), nread);
174 return nread;
175 } catch (const css::uno::Exception&) {
176 TOOLS_WARN_EXCEPTION( "unoxml", "");
177 return -1;
178 }
179 }
180
181 static int xmlIO_close_func(void* context)
182 {
183 // get the context...
184 context_t *pctx = static_cast<context_t*>(context);
185 if (!pctx->rInputStream.is())
186 return 0;
187 try
188 {
189 if (pctx->close)
190 pctx->rInputStream->closeInput();
191 if (pctx->freeOnClose)
192 delete pctx;
193 return 0;
194 } catch (const css::uno::Exception&) {
195 TOOLS_WARN_EXCEPTION( "unoxml", "");
196 return -1;
197 }
198 }
199
200 static xmlParserInputPtr resolve_func(void *ctx,
201 const xmlChar *publicId,
202 const xmlChar *systemId)
203 {
204 // get the CDocumentBuilder object
205 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(ctx);
206 CDocumentBuilder *builder = static_cast< CDocumentBuilder* >(ctxt->_private);
207 Reference< XEntityResolver > resolver = builder->getEntityResolver();
208 OUString sysid;
209 if (systemId != nullptr)
210 sysid = OUString(reinterpret_cast<char const *>(systemId), strlen(reinterpret_cast<char const *>(systemId)), RTL_TEXTENCODING_UTF8);
211 OUString pubid;
212 if (publicId != nullptr)
213 pubid = OUString(reinterpret_cast<char const *>(publicId), strlen(reinterpret_cast<char const *>(publicId)), RTL_TEXTENCODING_UTF8);
214
215 // resolve the entity
216 InputSource src = resolver->resolveEntity(pubid, sysid);
217
218 // create IO context on heap because this call will no longer be on the stack
219 // when IO is actually performed through the callbacks. The close function must
220 // free the memory which is indicated by the freeOnClose field in the context struct
221 context_t *c = new context_t;
222 c->rInputStream = src.aInputStream;
223 c->close = true;
224 c->freeOnClose = true;
225
226 // set up the inputBuffer and inputPtr for libxml
227 xmlParserInputBufferPtr pBuffer =
228 xmlParserInputBufferCreateIO(xmlIO_read_func, xmlIO_close_func, c, XML_CHAR_ENCODING_NONE);
229 xmlParserInputPtr pInput =
230 xmlNewIOInputStream(ctxt, pBuffer, XML_CHAR_ENCODING_NONE);
231 return pInput;
232 }
233
234#if 0
235 static xmlParserInputPtr external_entity_loader(const char *URL, const char * /*ID*/, xmlParserCtxtPtr ctxt)
236 {
237 // just call our resolver function using the URL as systemId
238 return resolve_func(ctxt, 0, (const xmlChar*)URL);
239 }
240#endif
241
242 // default warning handler does not trigger assertion
243 static void warning_func(void * ctx, const char * /*msg*/, ...)
244 {
245 try
246 {
247 xmlParserCtxtPtr const pctx = static_cast<xmlParserCtxtPtr>(ctx);
248
249 SAL_INFO(
250 "unoxml",
251 "libxml2 warning: "
252 << make_error_message(pctx));
253
254 CDocumentBuilder * const pDocBuilder = static_cast<CDocumentBuilder*>(pctx->_private);
255
256 if (pDocBuilder->getErrorHandler().is()) // if custom error handler is set (using setErrorHandler ())
257 {
258 // Prepare SAXParseException to be passed to custom XErrorHandler::warning function
259 css::xml::sax::SAXParseException saxex;
260 saxex.Message = make_error_message(pctx);
261 saxex.LineNumber = static_cast<sal_Int32>(pctx->lastError.line);
262 saxex.ColumnNumber = static_cast<sal_Int32>(pctx->lastError.int2);
263
264 // Call custom warning function
265 pDocBuilder->getErrorHandler()->warning(::css::uno::Any(saxex));
266 }
267 }
268 catch (const css::uno::Exception &)
269 {
270 // Protect lib2xml from UNO Exception
271 TOOLS_WARN_EXCEPTION("unoxml", "DOM::warning_func");
272 }
273 }
274
275 // default error handler triggers assertion
276 static void error_func(void * ctx, const char * /*msg*/, ...)
277 {
278 try
279 {
280 xmlParserCtxtPtr const pctx = static_cast<xmlParserCtxtPtr>(ctx);
281 SAL_WARN(
282 "unoxml",
283 "libxml2 error: "
284 << make_error_message(pctx));
285
286 CDocumentBuilder * const pDocBuilder = static_cast<CDocumentBuilder*>(pctx->_private);
287
288 if (pDocBuilder->getErrorHandler().is()) // if custom error handler is set (using setErrorHandler ())
289 {
290 // Prepare SAXParseException to be passed to custom XErrorHandler::error function
291 css::xml::sax::SAXParseException saxex;
292 saxex.Message = make_error_message(pctx);
293 saxex.LineNumber = static_cast<sal_Int32>(pctx->lastError.line);
294 saxex.ColumnNumber = static_cast<sal_Int32>(pctx->lastError.int2);
295
296 // Call custom warning function
297 pDocBuilder->getErrorHandler()->error(::css::uno::Any(saxex));
298 }
299 }
300 catch (const css::uno::Exception &)
301 {
302 // Protect lib2xml from UNO Exception
303 TOOLS_WARN_EXCEPTION("unoxml", "DOM::error_func");
304 }
305 }
306 } // extern "C"
307
308 static void throwEx(xmlParserCtxtPtr ctxt)
309 {
310 css::xml::sax::SAXParseException saxex;
311 saxex.Message = make_error_message(ctxt);
312 saxex.LineNumber = static_cast<sal_Int32>(ctxt->lastError.line);
313 saxex.ColumnNumber = static_cast<sal_Int32>(ctxt->lastError.int2);
314 throw saxex;
315 }
316
317 namespace {
318
319 struct XmlFreeParserCtxt {
320 void operator ()(xmlParserCtxt * p) const { xmlFreeParserCtxt(p); }
321 };
322
323 }
324
325 Reference< XDocument > SAL_CALL CDocumentBuilder::parse(const Reference< XInputStream >& is)
326 {
327 if (!is.is()) {
328 throw RuntimeException();
329 }
330
331 std::scoped_lock const g(m_Mutex);
332
333 // IO context struct. Must outlive pContext, as destroying that via
334 // xmlFreeParserCtxt may still access this context_t
335 context_t c;
336 c.rInputStream = is;
337 // we did not open the stream, thus we do not close it.
338 c.close = false;
339 c.freeOnClose = false;
340
341 std::unique_ptr<xmlParserCtxt, XmlFreeParserCtxt> const pContext(
342 xmlNewParserCtxt());
343
344 // register error functions to prevent errors being printed
345 // on the console
346 pContext->_private = this;
347 pContext->sax->error = error_func;
348 pContext->sax->warning = warning_func;
349 pContext->sax->resolveEntity = resolve_func;
350
351 xmlDocPtr const pDoc = xmlCtxtReadIO(pContext.get(),
352 xmlIO_read_func, xmlIO_close_func, &c, nullptr, nullptr, 0);
353
354 if (pDoc == nullptr) {
355 throwEx(pContext.get());
356 }
357 return CDocument::CreateCDocument(pDoc);
358 }
359
360 Reference< XDocument > SAL_CALL CDocumentBuilder::parseURI(const OUString& sUri)
361 {
362 std::scoped_lock const g(m_Mutex);
363
364 std::unique_ptr<xmlParserCtxt, XmlFreeParserCtxt> const pContext(
365 xmlNewParserCtxt());
366 pContext->_private = this;
367 pContext->sax->error = error_func;
368 pContext->sax->warning = warning_func;
369 pContext->sax->resolveEntity = resolve_func;
370 // xmlSetExternalEntityLoader(external_entity_loader);
371 OString oUri = OUStringToOString(sUri, RTL_TEXTENCODING_UTF8);
372 char *uri = const_cast<char*>(oUri.getStr());
373 xmlDocPtr pDoc = xmlCtxtReadFile(pContext.get(), uri, nullptr, 0);
374
375 Reference< XDocument > xRet;
376
377 // if we failed to parse the URI as a simple file, lets try via a ucb stream.
378 // For Android file:///assets/ URLs which must go via the osl/ file API.
379 if (pDoc == nullptr) {
380 Reference < XSimpleFileAccess3 > xStreamAccess(
381 SimpleFileAccess::create( comphelper::getProcessComponentContext() ) );
382 Reference< XInputStream > xInStream = xStreamAccess->openFileRead( sUri );
383 if (!xInStream.is())
384 throwEx(pContext.get());
385
386 // loop over every layout entry in current file
387 xRet = parse( xInStream );
388
389 xInStream->closeInput();
390 xInStream.clear();
391
392 } else
393 xRet = CDocument::CreateCDocument(pDoc).get();
394
395 return xRet;
396 }
397
398 void SAL_CALL
399 CDocumentBuilder::setEntityResolver(Reference< XEntityResolver > const& xER)
400 {
401 std::scoped_lock const g(m_Mutex);
402
403 m_xEntityResolver = xER;
404 }
405
406 Reference< XEntityResolver > CDocumentBuilder::getEntityResolver()
407 {
408 std::scoped_lock const g(m_Mutex);
409
410 return m_xEntityResolver;
411 }
412
413 void SAL_CALL
414 CDocumentBuilder::setErrorHandler(Reference< XErrorHandler > const& xEH)
415 {
416 std::scoped_lock const g(m_Mutex);
417
418 m_xErrorHandler = xEH;
419 }
420}
421
422extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
424 css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
425{
426 return cppu::acquire(new DOM::CDocumentBuilder());
427}
428
429/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const css::uno::Reference< css::xml::sax::XErrorHandler > & getErrorHandler() const
virtual OUString SAL_CALL getImplementationName() override
virtual sal_Bool SAL_CALL isNamespaceAware() override
Indicates whether or not this parser is configured to understand namespaces.
virtual void SAL_CALL setErrorHandler(const css::uno::Reference< css::xml::sax::XErrorHandler > &eh) override
Specify the ErrorHandler to be used to report errors present in the XML document to be parsed.
std::recursive_mutex m_Mutex
css::uno::Reference< css::xml::sax::XEntityResolver > getEntityResolver()
virtual css::uno::Reference< css::xml::dom::XDocument > SAL_CALL parseURI(const OUString &uri) override
Parse the content of the given URI as an XML document and return a new DOM Document object.
virtual sal_Bool SAL_CALL supportsService(const OUString &ServiceName) override
virtual void SAL_CALL setEntityResolver(const css::uno::Reference< css::xml::sax::XEntityResolver > &er) override
Specify the EntityResolver to be used to resolve entities present in the XML document to be parsed.
virtual sal_Bool SAL_CALL isValidating() override
Indicates whether or not this parser is configured to validate XML documents.
virtual css::uno::Reference< css::xml::dom::XDocument > SAL_CALL newDocument() override
Obtain a new instance of a DOM Document object to build a DOM tree with.
css::uno::Reference< css::xml::sax::XErrorHandler > m_xErrorHandler
virtual css::uno::Reference< css::xml::dom::XDocument > SAL_CALL parse(const css::uno::Reference< css::io::XInputStream > &is) override
Parse the content of the given InputStream as an XML document and return a new DOM Document object.
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
css::uno::Reference< css::xml::sax::XEntityResolver > m_xEntityResolver
virtual css::uno::Reference< css::xml::dom::XDOMImplementation > SAL_CALL getDOMImplementation() override
Obtain an instance of a DOMImplementation object.
static ::rtl::Reference< CDocument > CreateCDocument(xmlDocPtr const pDoc)
factory: only way to create instance!
Definition: document.cxx:94
#define TOOLS_WARN_EXCEPTION(area, stream)
Reference< XInputStream > rInputStream
bool close
bool freeOnClose
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * unoxml_CDocumentBuilder_get_implementation(css::uno::XComponentContext *, css::uno::Sequence< css::uno::Any > const &)
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
Definition: attr.cxx:38
static void throwEx(xmlParserCtxtPtr ctxt)
static int xmlIO_close_func(void *context)
static OUString make_error_message(xmlParserCtxtPtr ctxt)
static int xmlIO_read_func(void *context, char *buffer, int len)
static xmlParserInputPtr resolve_func(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
static void warning_func(void *ctx, const char *,...)
static void error_func(void *ctx, const char *,...)
Reference< XComponentContext > getProcessComponentContext()
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
ctx
resolver
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
unsigned char sal_Bool