LibreOffice Module filter (master) 1
LibXSLTTransformer.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
3/*
4 * This file is part of the LibreOffice project.
5 *
6 * This Source Code Form is subject to the terms of the Mozilla Public
7 * License, v. 2.0. If a copy of the MPL was not distributed with this
8 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 */
10
11#include <algorithm>
12#include <cstring>
13#include <map>
14#include <optional>
15#include <string_view>
16#include <utility>
17#include <vector>
18#include <libxml/parser.h>
19#include <libxml/xmlIO.h>
20#include <libxml/xpath.h>
21#include <libxml/xpathInternals.h>
22#include <libxml/xmlstring.h>
23#include <libxslt/transform.h>
24#include <libxslt/xsltutils.h>
25#include <libxslt/variables.h>
26#include <libxslt/extensions.h>
27#include <libexslt/exslt.h>
28
31
32#include <osl/file.hxx>
33#include <com/sun/star/uno/Any.hxx>
34#include <com/sun/star/beans/NamedValue.hpp>
35#include <com/sun/star/io/XInputStream.hpp>
36#include <com/sun/star/io/XOutputStream.hpp>
37#include <com/sun/star/io/XStreamListener.hpp>
38
40#include "OleHandler.hxx"
41
42using namespace ::cppu;
43using namespace ::osl;
44using namespace ::com::sun::star::beans;
45using namespace ::com::sun::star::io;
46using namespace ::com::sun::star::uno;
47using namespace ::com::sun::star::lang;
48using namespace ::com::sun::star::registry;
49using ::std::pair;
50
51namespace XSLT
52{
53 const char* const LibXSLTTransformer::PARAM_SOURCE_URL = "sourceURL";
55 "sourceBaseURL";
56 const char* const LibXSLTTransformer::PARAM_TARGET_URL = "targetURL";
58 "targetBaseURL";
59 const char* const LibXSLTTransformer::PARAM_DOCTYPE_PUBLIC = "publicType";
60
61 const sal_Int32 Reader::OUTPUT_BUFFER_SIZE = 4096;
62
63 const sal_Int32 Reader::INPUT_BUFFER_SIZE = 4096;
64
65 namespace {
66
70 struct ParserInputBufferCallback
71 {
72 static int
73 on_read(void * context, char * buffer, int len)
74 {
75 Reader * tmp = static_cast<Reader*> (context);
76 return tmp->read(buffer, len);
77 }
78 static int
79 on_close(void * )
80 {
81 return 0;
82 }
83 };
87 struct ParserOutputBufferCallback
88 {
89 static int
90 on_write(void * context, const char * buffer, int len)
91 {
92 Reader * tmp = static_cast<Reader*> (context);
93 return tmp->write(buffer, len);
94 }
95 static int
96 on_close(void * context)
97 {
98 Reader * tmp = static_cast<Reader*> (context);
99 tmp->closeOutput();
100 return 0;
101 }
102 };
111 struct ExtFuncOleCB
112 {
113 static void *
114 init(xsltTransformContextPtr, const xmlChar*)
115 {
116 return nullptr;
117 }
118 static void
119 insertByName(xmlXPathParserContextPtr ctxt, int nargs)
120 {
121 xsltTransformContextPtr tctxt;
122 void *data;
123 if (nargs != 2) {
124 xsltGenericError(xsltGenericErrorContext,
125 "insertByName: requires exactly 2 arguments\n");
126 return;
127 }
128 tctxt = xsltXPathGetTransformContext(ctxt);
129 if (tctxt == nullptr) {
130 xsltGenericError(xsltGenericErrorContext,
131 "xsltExtFunctionTest: failed to get the transformation context\n");
132 return;
133 }
134 // XXX: someone with better knowledge of libxslt might come up with a better
135 // idea to pass the OleHandler than by attaching it to tctxt->_private. See also
136 // below.
137 data = tctxt->_private;
138 if (data == nullptr) {
139 xsltGenericError(xsltGenericErrorContext,
140 "xsltExtFunctionTest: failed to get module data\n");
141 return;
142 }
143 OleHandler * oh = static_cast<OleHandler*> (data);
144
145 xmlXPathObjectPtr value = valuePop(ctxt);
146 value = ensureStringValue(value, ctxt);
147 xmlXPathObjectPtr streamName = valuePop(ctxt);
148 streamName = ensureStringValue(streamName, ctxt);
149
150 oh->insertByName(OStringToOUString(reinterpret_cast<char*>(streamName->stringval), RTL_TEXTENCODING_UTF8),
151 std::string_view(reinterpret_cast<char*>(value->stringval)));
152 valuePush(ctxt, xmlXPathNewCString(""));
153 }
154
155 static xmlXPathObjectPtr ensureStringValue(xmlXPathObjectPtr obj, const xmlXPathParserContextPtr ctxt)
156 {
157 if (obj->type != XPATH_STRING) {
158 valuePush(ctxt, obj);
159 xmlXPathStringFunction(ctxt, 1);
160 obj = valuePop(ctxt);
161 }
162 return obj;
163 }
164
165 static void getByName(xmlXPathParserContextPtr ctxt, int nargs)
166 {
167 xsltTransformContextPtr tctxt;
168 void *data;
169 if (nargs != 1) {
170 xsltGenericError(xsltGenericErrorContext,
171 "getByName: requires exactly 1 argument\n");
172 return;
173 }
174
175 tctxt = xsltXPathGetTransformContext(ctxt);
176 if (tctxt == nullptr) {
177 xsltGenericError(xsltGenericErrorContext,
178 "xsltExtFunctionTest: failed to get the transformation context\n");
179 return;
180 }
181 // XXX: someone with better knowledge of libxslt might come up with a better
182 // idea to pass the OleHandler than by attaching it to tctxt->_private
183 data = tctxt->_private;
184 if (data == nullptr) {
185 xsltGenericError(xsltGenericErrorContext,
186 "xsltExtFunctionTest: failed to get module data\n");
187 return;
188 }
189 OleHandler * oh = static_cast<OleHandler*> (data);
190 xmlXPathObjectPtr streamName = valuePop(ctxt);
191 streamName = ensureStringValue(streamName, ctxt);
192 const OString content = oh->getByName(OStringToOUString(reinterpret_cast<char*>(streamName->stringval), RTL_TEXTENCODING_UTF8));
193 valuePush(ctxt, xmlXPathNewCString(content.getStr()));
194 xmlXPathFreeObject(streamName);
195 }
196 };
197
198 }
199
201 Thread("LibXSLTTransformer"), m_transformer(transformer),
202 m_readBuf(INPUT_BUFFER_SIZE), m_writeBuf(OUTPUT_BUFFER_SIZE),
203 m_tcontext(nullptr)
204 {
205 LIBXML_TEST_VERSION;
206 }
207 ;
208
209 int
210 Reader::read(char * buffer, int len)
211 {
212 // const char *ptr = (const char *) context;
213 if (buffer == nullptr || len < 0)
214 return -1;
215 sal_Int32 n;
216 css::uno::Reference<XInputStream> xis = m_transformer->getInputStream();
217 n = xis->readBytes(m_readBuf, len);
218 if (n > 0)
219 {
220 memcpy(buffer, m_readBuf.getArray(), n);
221 }
222 return n;
223 }
224
225 int
226 Reader::write(const char * buffer, int len)
227 {
228 if (buffer == nullptr || len < 0)
229 return -1;
230 if (len > 0)
231 {
232 css::uno::Reference<XOutputStream> xos = m_transformer->getOutputStream();
233 sal_Int32 writeLen = len;
234 sal_Int32 bufLen = ::std::min(writeLen, OUTPUT_BUFFER_SIZE);
235 const sal_uInt8* memPtr =
236 reinterpret_cast<const sal_uInt8*> (buffer);
237 while (writeLen > 0)
238 {
239 sal_Int32 n = ::std::min(writeLen, bufLen);
240 m_writeBuf.realloc(n);
241 memcpy(m_writeBuf.getArray(), memPtr,
242 static_cast<size_t> (n));
243 xos->writeBytes(m_writeBuf);
244 memPtr += n;
245 writeLen -= n;
246 }
247 }
248 return len;
249 }
250
251 void
253 {
254 css::uno::Reference<XOutputStream> xos = m_transformer->getOutputStream();
255 if (xos.is())
256 {
257 xos->flush();
258 xos->closeOutput();
259 }
260 m_transformer->done();
261 }
262
263 void
265 {
266 OSL_ASSERT(m_transformer != nullptr);
267 OSL_ASSERT(m_transformer->getInputStream().is());
268 OSL_ASSERT(m_transformer->getOutputStream().is());
269 OSL_ASSERT(!m_transformer->getStyleSheetURL().isEmpty() || !m_transformer->getStyleSheetText().isEmpty());
270 ::std::map<const char*, OString> pmap = m_transformer->getParameters();
271 ::std::vector< const char* > params( pmap.size() * 2 + 1 ); // build parameters
272 int paramIndex = 0;
273 for (auto const& elem : pmap)
274 {
275 params[paramIndex++] = elem.first;
276 params[paramIndex++] = elem.second.getStr();
277 }
278 params[paramIndex] = nullptr;
279 xmlDocPtr doc = xmlReadIO(&ParserInputBufferCallback::on_read,
280 &ParserInputBufferCallback::on_close,
281 static_cast<void*> (this), nullptr, nullptr, 0);
282 xsltStylesheetPtr styleSheet = nullptr;
283 if (m_transformer->getStyleSheetURL().getLength())
284 styleSheet = xsltParseStylesheetFile(
285 reinterpret_cast<const xmlChar *>(m_transformer->getStyleSheetURL().getStr()));
286 else if (m_transformer->getStyleSheetText().getLength())
287 {
288 xmlDocPtr styleSheetDoc = xmlReadMemory(
289 m_transformer->getStyleSheetText().getStr(),
290 m_transformer->getStyleSheetText().getLength(),
291 "noname.xml", nullptr, 0);
292
293 styleSheet = xsltParseStylesheetDoc(styleSheetDoc);
294 }
295
296 if (!styleSheet)
297 {
298 m_transformer->error("No stylesheet was created");
299 }
300
301 xmlDocPtr result = nullptr;
302 exsltRegisterAll();
304#ifdef DEBUG_FILTER_LIBXSLTTRANSFORMER
305 xsltSetGenericDebugFunc(stderr, NULL);
306 xsltDebugDumpExtensions(NULL);
307#endif
308 std::optional<OleHandler> oh(std::in_place, m_transformer->getComponentContext());
309 if (styleSheet)
310 {
311 xsltTransformContextPtr tcontext = xsltNewTransformContext(
312 styleSheet, doc);
313 {
314 std::scoped_lock<std::mutex> g(m_mutex);
315 m_tcontext = tcontext;
316 }
317 oh->registercontext(m_tcontext);
318 xsltQuoteUserParams(m_tcontext, params.data());
319 result = xsltApplyStylesheetUser(styleSheet, doc, nullptr, nullptr, nullptr,
320 m_tcontext);
321 }
322
323 if (result)
324 {
325 xmlCharEncodingHandlerPtr encoder = xmlGetCharEncodingHandler(
326 XML_CHAR_ENCODING_UTF8);
327 xmlOutputBufferPtr outBuf = xmlAllocOutputBuffer(encoder);
328 outBuf->context = static_cast<void *> (this);
329 outBuf->writecallback = &ParserOutputBufferCallback::on_write;
330 outBuf->closecallback = &ParserOutputBufferCallback::on_close;
331 xsltSaveResultTo(outBuf, result, styleSheet);
332 (void)xmlOutputBufferClose(outBuf);
333 }
334 else
335 {
336 xmlErrorPtr lastErr = xmlGetLastError();
337 OUString msg;
338 if (lastErr)
339 msg = OStringToOUString(lastErr->message, RTL_TEXTENCODING_UTF8);
340 else
341 msg = "Unknown XSLT transformation error";
342
343 m_transformer->error(msg);
344 }
345 oh.reset();
346 xsltFreeStylesheet(styleSheet);
347 xsltTransformContextPtr tcontext = nullptr;
348 {
349 std::scoped_lock<std::mutex> g(m_mutex);
350 std::swap(m_tcontext, tcontext);
351 }
352 xsltFreeTransformContext(tcontext);
353 xmlFreeDoc(doc);
354 xmlFreeDoc(result);
355 }
356
357 void
359 {
360 const xmlChar* oleModuleURI = reinterpret_cast<const xmlChar *>(EXT_MODULE_OLE_URI);
361 xsltRegisterExtModule(oleModuleURI, &ExtFuncOleCB::init, nullptr);
362 xsltRegisterExtModuleFunction(
363 reinterpret_cast<const xmlChar*>("insertByName"),
364 oleModuleURI,
365 &ExtFuncOleCB::insertByName);
366 xsltRegisterExtModuleFunction(
367 reinterpret_cast<const xmlChar*>("getByName"),
368 oleModuleURI,
369 &ExtFuncOleCB::getByName);
370
371 }
372
374 {
375 std::scoped_lock<std::mutex> g(m_mutex);
376 if (!m_tcontext)
377 return;
378 //tdf#100057 If we force a cancel, libxslt will of course just keep on going unless something
379 //tells it to stop. Here we force the stopped state so that libxslt will stop processing
380 //and so Reader::execute will complete and we can join cleanly
381 m_tcontext->state = XSLT_STATE_STOPPED;
382 }
383
385 {
386 }
387
389 css::uno::Reference<XComponentContext> xContext) :
390 m_xContext(std::move(xContext))
391 {
392 }
393
394 // XServiceInfo
395 sal_Bool LibXSLTTransformer::supportsService(const OUString& sServiceName)
396 {
398 }
400 {
401 return "com.sun.star.comp.documentconversion.XSLTFilter";
402 }
403 css::uno::Sequence< OUString > LibXSLTTransformer::getSupportedServiceNames()
404 {
405 return { "com.sun.star.documentconversion.XSLTFilter" };
406 }
407
408 void
410 const css::uno::Reference<XInputStream>& inputStream)
411 {
412 m_rInputStream = inputStream;
413 }
414
415 css::uno::Reference<XInputStream>
417 {
418 return m_rInputStream;
419 }
420
421 void
423 const css::uno::Reference<XOutputStream>& outputStream)
424 {
425 m_rOutputStream = outputStream;
426 }
427
428 css::uno::Reference<XOutputStream>
430 {
431 return m_rOutputStream;
432 }
433
434 void
435 LibXSLTTransformer::addListener(const css::uno::Reference<XStreamListener>& listener)
436 {
437 m_listeners.push_front(listener);
438 }
439
440 void
442 const css::uno::Reference<XStreamListener>& listener)
443 {
444 m_listeners.erase( std::remove(m_listeners.begin(), m_listeners.end(), listener ), m_listeners.end() );
445 }
446
447 void
449 {
450 for (const css::uno::Reference<XStreamListener>& xl : m_listeners)
451 {
452 xl->started();
453 }
454 OSL_ENSURE(!m_Reader.is(), "Somebody forgot to call terminate *and* holds a reference to this LibXSLTTransformer instance");
455 m_Reader = new Reader(this);
456 m_Reader->launch();
457 }
458
459 void
460 LibXSLTTransformer::error(const OUString& msg)
461 {
462 Any arg;
463 arg <<= Exception(msg, *this);
464 for (const css::uno::Reference<XStreamListener>& xl : m_listeners)
465 {
466 if (xl.is())
467 {
468 xl->error(arg);
469 }
470 }
471 }
472
473 void
475 {
476 for (const css::uno::Reference<XStreamListener>& xl : m_listeners)
477 {
478 if (xl.is())
479 {
480 xl->closed();
481 }
482 }
483 }
484
485 void
487 {
488 if (m_Reader.is())
489 {
490 m_Reader->terminate();
491 m_Reader->forceStateStopped();
492 m_Reader->join();
493 }
494 m_Reader.clear();
495 m_parameters.clear();
496 }
497
498 void
499 LibXSLTTransformer::initialize(const Sequence<Any>& args)
500 {
501 Sequence<Any> params;
502 if (!(args[0] >>= params))
503 { // backward compatibility for old clients using createInstance
504 params = args;
505 }
506 xmlSubstituteEntitiesDefault(0);
507 m_parameters.clear();
508 for (const Any& p : std::as_const(params))
509 {
510 NamedValue nv;
511 p >>= nv;
512 OString nameUTF8 = OUStringToOString(nv.Name,
513 RTL_TEXTENCODING_UTF8);
514 OUString value;
515 OString valueUTF8;
516 if (nv.Value >>= value)
517 {
518 valueUTF8 = OUStringToOString(value,
519 RTL_TEXTENCODING_UTF8);
520 }
521 else
522 {
523 // ignore non-string parameters
524 continue;
525 }
526 if (nameUTF8 == "StylesheetURL")
527 {
528 m_styleSheetURL = valueUTF8;
529 }
530 if (nameUTF8 == "StylesheetText")
531 {
532 m_styleSheetText = valueUTF8;
533 }
534 else if (nameUTF8 == "SourceURL")
535 {
536 m_parameters.insert(pair<const char*, OString> (
537 PARAM_SOURCE_URL, valueUTF8));
538 }
539 else if (nameUTF8 == "SourceBaseURL")
540 {
541 m_parameters.insert(pair<const char*, OString> (
542 PARAM_SOURCE_BASE_URL, valueUTF8));
543 }
544 else if (nameUTF8 == "TargetURL")
545 {
546 m_parameters.insert(pair<const char*, OString> (
547 PARAM_TARGET_URL, valueUTF8));
548 }
549 else if (nameUTF8 == "TargetBaseURL")
550 {
551 m_parameters.insert(pair<const char*, OString> (
552 PARAM_TARGET_BASE_URL, valueUTF8));
553 }
554 else if (nameUTF8 == "DoctypePublic")
555 {
556 m_parameters.insert(pair<const char*, OString> (
557 PARAM_DOCTYPE_PUBLIC, valueUTF8));
558 }
559 }
560 }
561}
562
563extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
565 css::uno::XComponentContext* context, css::uno::Sequence<css::uno::Any> const&)
566{
567 return cppu::acquire(new XSLT::LibXSLTTransformer(context));
568}
569
570/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
571
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * filter_LibXSLTTransformer_get_implementation(css::uno::XComponentContext *context, css::uno::Sequence< css::uno::Any > const &)
#define EXT_MODULE_OLE_URI
Reference< XComponentContext > m_xContext
Definition: OdfFlatXml.cxx:62
#define OUTPUT_BUFFER_SIZE
#define INPUT_BUFFER_SIZE
constexpr OUStringLiteral sServiceName
FILE * init(int, char **)
virtual void SAL_CALL start() override
static const char *const PARAM_SOURCE_URL
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
static const char *const PARAM_DOCTYPE_PUBLIC
void error(const OUString &msg)
virtual css::uno::Reference< XOutputStream > SAL_CALL getOutputStream() override
::std::map< const char *, OString > m_parameters
virtual OUString SAL_CALL getImplementationName() override
static const char *const PARAM_TARGET_BASE_URL
static const char *const PARAM_TARGET_URL
css::uno::Reference< XInputStream > m_rInputStream
virtual void SAL_CALL addListener(const css::uno::Reference< XStreamListener > &listener) override
virtual void SAL_CALL terminate() override
virtual void SAL_CALL setInputStream(const css::uno::Reference< XInputStream > &inputStream) override
css::uno::Reference< XOutputStream > m_rOutputStream
virtual sal_Bool SAL_CALL supportsService(const OUString &sServiceName) override
LibXSLTTransformer(css::uno::Reference< css::uno::XComponentContext > x)
rtl::Reference< Reader > m_Reader
virtual void SAL_CALL removeListener(const css::uno::Reference< XStreamListener > &listener) override
static const char *const PARAM_SOURCE_BASE_URL
virtual void SAL_CALL setOutputStream(const css::uno::Reference< XOutputStream > &outputStream) override
virtual css::uno::Reference< XInputStream > SAL_CALL getInputStream() override
virtual void SAL_CALL initialize(const Sequence< Any > &params) override
OString getByName(const OUString &streamName)
Definition: OleHandler.cxx:147
void insertByName(const OUString &streamName, std::string_view content)
Definition: OleHandler.cxx:133
int read(char *buffer, int len)
static void registerExtensionModule()
virtual void execute() override
virtual ~Reader() override
xsltTransformContextPtr m_tcontext
Sequence< sal_Int8 > m_readBuf
int write(const char *buffer, int len)
Sequence< sal_Int8 > m_writeBuf
Reader(LibXSLTTransformer *transformer)
rtl::Reference< LibXSLTTransformer > m_transformer
static const sal_Int32 OUTPUT_BUFFER_SIZE
static const sal_Int32 INPUT_BUFFER_SIZE
Any value
void * p
sal_Int64 n
return NULL
@ Exception
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
args
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
unsigned char sal_uInt8
unsigned char sal_Bool
Any result