LibreOffice Module xmloff (master) 1
RDFaImportHelper.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <RDFaImportHelper.hxx>
21
22#include <utility>
23#include <xmloff/xmlimp.hxx>
25
27
28#include <com/sun/star/rdf/URI.hpp>
29#include <com/sun/star/rdf/XDocumentRepository.hpp>
30#include <com/sun/star/rdf/XRepositorySupplier.hpp>
31
32#include <rtl/ustring.hxx>
33#include <sal/log.hxx>
34
35#include <map>
36
37using namespace ::com::sun::star;
38
39namespace xmloff {
40
41namespace {
42
44class RDFaReader
45{
46 const SvXMLImport & m_rImport;
47
48 const SvXMLImport & GetImport() const { return m_rImport; }
49
50 //FIXME: this is an ugly hack to workaround buggy SvXMLImport::GetAbsolute
51 OUString GetAbsoluteReference(OUString const & i_rURI) const
52 {
53 if (i_rURI.isEmpty() || i_rURI[0] == '#')
54 {
55 return GetImport().GetBaseURL() + i_rURI;
56 }
57 else
58 {
59 return GetImport().GetAbsoluteReference(i_rURI);
60 }
61 }
62
63public:
64 explicit RDFaReader(SvXMLImport const & i_rImport)
65 : m_rImport(i_rImport)
66 { }
67
68 // returns URI or blank node!
69 OUString ReadCURIE(OUString const & i_rCURIE) const;
70
71 std::vector< OUString >
72 ReadCURIEs(OUString const & i_rCURIEs) const;
73
74 OUString
75 ReadURIOrSafeCURIE( OUString const & i_rURIOrSafeCURIE) const;
76};
77
79class RDFaInserter
80{
81 const uno::Reference<uno::XComponentContext> m_xContext;
82 uno::Reference< rdf::XDocumentRepository > m_xRepository;
83
84 typedef ::std::map< OUString, uno::Reference< rdf::XBlankNode > >
85 BlankNodeMap_t;
86
87 BlankNodeMap_t m_BlankNodeMap;
88
89public:
90 RDFaInserter(uno::Reference<uno::XComponentContext> i_xContext,
91 uno::Reference< rdf::XDocumentRepository > i_xRepository)
92 : m_xContext(std::move(i_xContext))
93 , m_xRepository(std::move(i_xRepository))
94 {}
95
96 uno::Reference< rdf::XBlankNode >
97 LookupBlankNode(OUString const & i_rNodeId );
98
99 uno::Reference< rdf::XURI >
100 MakeURI( OUString const & i_rURI) const;
101
102 uno::Reference< rdf::XResource>
103 MakeResource( OUString const & i_rResource);
104
105 void InsertRDFaEntry(struct RDFaEntry const & i_rEntry);
106};
107
108}
109
112{
113 OUString m_About;
114 ::std::vector< OUString > m_Properties;
115 OUString m_Content;
116 OUString m_Datatype;
117
119 OUString i_sAbout,
120 ::std::vector< OUString >&& i_rProperties,
121 OUString i_sContent,
122 OUString i_sDatatype)
123 : m_About(std::move(i_sAbout))
124 , m_Properties(std::move(i_rProperties))
125 , m_Content(std::move(i_sContent))
126 , m_Datatype(std::move(i_sDatatype))
127 { }
128};
129
132{
133 uno::Reference<rdf::XMetadatable> m_xObject;
134 std::shared_ptr<ParsedRDFaAttributes> m_xRDFaAttributes;
135
136 RDFaEntry(uno::Reference<rdf::XMetadatable> i_xObject,
137 std::shared_ptr<ParsedRDFaAttributes> i_pRDFaAttributes)
138 : m_xObject(std::move(i_xObject))
139 , m_xRDFaAttributes(std::move(i_pRDFaAttributes))
140 { }
141};
142
143static bool isWS(const sal_Unicode i_Char)
144{
145 return ('\t' == i_Char) || ('\n' == i_Char) || ('\r' == i_Char)
146 || (' ' == i_Char);
147}
148
149static OUString splitAtWS(OUString & io_rString)
150{
151 const sal_Int32 len( io_rString.getLength() );
152 sal_Int32 idxstt(0);
153 while ((idxstt < len) && ( isWS(io_rString[idxstt])))
154 ++idxstt; // skip leading ws
155 sal_Int32 idxend(idxstt);
156 while ((idxend < len) && (!isWS(io_rString[idxend])))
157 ++idxend; // the CURIE
158 const OUString ret(io_rString.copy(idxstt, idxend - idxstt));
159 io_rString = io_rString.copy(idxend); // rest
160 return ret;
161}
162
163OUString
164RDFaReader::ReadCURIE(OUString const & i_rCURIE) const
165{
166 // the RDFa spec says that a prefix is required (it may be empty: ":foo")
167 const sal_Int32 idx( i_rCURIE.indexOf(':') );
168 if (idx >= 0)
169 {
170 OUString Prefix;
171 OUString LocalName;
172 OUString Namespace;
173 // LocalName may contain ':', see "ipchar" in RFC 3987
174 sal_uInt16 nKey( GetImport().GetNamespaceMap().GetKeyByQName(
175 i_rCURIE, &Prefix, &LocalName, &Namespace, SvXMLNamespaceMap::QNameMode::AttrValue) );
176 if ( Prefix == "_" )
177 {
178 // eeek, it's a bnode!
179 // "_" is not a valid URI scheme => we can identify bnodes
180 return i_rCURIE;
181 }
182 else
183 {
184 SAL_WARN_IF(XML_NAMESPACE_NONE == nKey, "xmloff.core", "no namespace?");
185 if ((XML_NAMESPACE_UNKNOWN != nKey) &&
186 (XML_NAMESPACE_XMLNS != nKey))
187 {
188 // N.B.: empty LocalName is valid!
189 const OUString URI(Namespace + LocalName);
190 return GetAbsoluteReference(URI);
191 }
192 else
193 {
194 SAL_INFO("xmloff.core", "ReadCURIE: invalid CURIE: invalid prefix" );
195 return OUString();
196 }
197 }
198 }
199 SAL_INFO("xmloff.core", "ReadCURIE: invalid CURIE: no prefix" );
200 return OUString();
201}
202
203::std::vector< OUString >
204RDFaReader::ReadCURIEs(OUString const & i_rCURIEs) const
205{
206 std::vector< OUString > vec;
207 OUString CURIEs(i_rCURIEs);
208 do {
209 OUString curie( splitAtWS(CURIEs) );
210 if (!curie.isEmpty())
211 {
212 const OUString uri(ReadCURIE(curie));
213 if (!uri.isEmpty())
214 {
215 vec.push_back(uri);
216 }
217 }
218 }
219 while (!CURIEs.isEmpty());
220 if (vec.empty())
221 {
222 SAL_INFO("xmloff.core", "ReadCURIEs: invalid CURIEs" );
223 }
224 return vec;
225}
226
227OUString
228RDFaReader::ReadURIOrSafeCURIE(OUString const & i_rURIOrSafeCURIE) const
229{
230 const sal_Int32 len(i_rURIOrSafeCURIE.getLength());
231 if (len && (i_rURIOrSafeCURIE[0] == '['))
232 {
233 if ((len >= 2) && (i_rURIOrSafeCURIE[len - 1] == ']'))
234 {
235 return ReadCURIE(i_rURIOrSafeCURIE.copy(1, len - 2));
236 }
237 else
238 {
239 SAL_INFO("xmloff.core", "ReadURIOrSafeCURIE: invalid SafeCURIE" );
240 return OUString();
241 }
242 }
243 else
244 {
245 if (i_rURIOrSafeCURIE.startsWith("_:")) // blank node
246 {
247 SAL_INFO("xmloff.core", "ReadURIOrSafeCURIE: invalid URI: scheme is _" );
248 return OUString();
249 }
250 else
251 {
252 return GetAbsoluteReference(i_rURIOrSafeCURIE);
253 }
254 }
255}
256
257uno::Reference< rdf::XBlankNode >
258RDFaInserter::LookupBlankNode(OUString const & i_rNodeId )
259{
260 uno::Reference< rdf::XBlankNode > & rEntry( m_BlankNodeMap[ i_rNodeId ] );
261 if (!rEntry.is())
262 {
263 rEntry = m_xRepository->createBlankNode();
264 }
265 return rEntry;
266}
267
268uno::Reference< rdf::XURI >
269RDFaInserter::MakeURI( OUString const & i_rURI) const
270{
271 if (i_rURI.startsWith("_:")) // blank node
272 {
273 SAL_INFO("xmloff.core", "MakeURI: cannot create URI for blank node");
274 return nullptr;
275 }
276 else
277 {
278 try
279 {
280 return rdf::URI::create( m_xContext, i_rURI );
281 }
282 catch (uno::Exception &)
283 {
284 SAL_WARN("xmloff.core", "MakeURI: cannot create URI");
285 return nullptr;
286 }
287 }
288}
289
290uno::Reference<rdf::XResource>
291RDFaInserter::MakeResource( OUString const & i_rResource)
292{
293 if (i_rResource.startsWith("_:")) // blank node
294 {
295 // we cannot use the blank node label as-is: it must be distinct
296 // from labels in other graphs, so create fresh ones per XML stream
297 // N.B.: content.xml and styles.xml are distinct graphs
298 OUString name( i_rResource.copy(2) );
299 const uno::Reference< rdf::XBlankNode > xBNode( LookupBlankNode(name) );
300 SAL_WARN_IF(!xBNode.is(), "xmloff.core", "no blank node?");
301 return xBNode;
302 }
303 else
304 {
305 return MakeURI( i_rResource );
306 }
307}
308
309void RDFaInserter::InsertRDFaEntry(
310 struct RDFaEntry const & i_rEntry)
311{
312 SAL_WARN_IF(!i_rEntry.m_xObject.is(), "xmloff.core", "InsertRDFaEntry: invalid arg: null object");
313 if (!i_rEntry.m_xObject.is()) return;
314
315 const uno::Reference< rdf::XResource > xSubject(
316 MakeResource( i_rEntry.m_xRDFaAttributes->m_About ) );
317 if (!xSubject.is())
318 {
319 return; // invalid
320 }
321
322 ::std::vector< uno::Reference< rdf::XURI > > predicates;
323
324 predicates.reserve(i_rEntry.m_xRDFaAttributes->m_Properties.size());
325
326 for (OUString const& prop : i_rEntry.m_xRDFaAttributes->m_Properties)
327 {
328 auto const xURI(MakeURI(prop));
329 if (xURI.is())
330 {
331 predicates.push_back(xURI);
332 }
333 }
334
335 if (predicates.empty())
336 {
337 return; // invalid
338 }
339
340 uno::Reference<rdf::XURI> xDatatype;
341 if (!i_rEntry.m_xRDFaAttributes->m_Datatype.isEmpty())
342 {
343 xDatatype = MakeURI( i_rEntry.m_xRDFaAttributes->m_Datatype );
344 }
345
346 try
347 {
348 // N.B.: this will call xMeta->ensureMetadataReference, which is why
349 // this must be done _after_ importing the whole XML file,
350 // to prevent collision between generated ids and ids in the file
351 m_xRepository->setStatementRDFa(xSubject, comphelper::containerToSequence(predicates),
352 i_rEntry.m_xObject,
353 i_rEntry.m_xRDFaAttributes->m_Content, xDatatype);
354 }
355 catch (uno::Exception &)
356 {
357 SAL_WARN("xmloff.core", "InsertRDFaEntry: setStatementRDFa failed?");
358 }
359}
360
361RDFaImportHelper::RDFaImportHelper(const SvXMLImport & i_rImport)
362 : m_rImport(i_rImport)
363{
364}
365
367{
368}
369
370std::shared_ptr<ParsedRDFaAttributes>
372 OUString const & i_rAbout,
373 OUString const & i_rProperty,
374 OUString const & i_rContent,
375 OUString const & i_rDatatype)
376{
377 if (i_rProperty.isEmpty())
378 {
379 SAL_INFO("xmloff.core", "AddRDFa: invalid input: xhtml:property empty");
380 return std::shared_ptr<ParsedRDFaAttributes>();
381 }
382 // must parse CURIEs here: need namespace declaration context
383 RDFaReader reader(GetImport());
384 const OUString about( reader.ReadURIOrSafeCURIE(i_rAbout) );
385 if (about.isEmpty()) {
386 return std::shared_ptr<ParsedRDFaAttributes>();
387 }
388 ::std::vector< OUString > properties(
389 reader.ReadCURIEs(i_rProperty) );
390 if (properties.empty()) {
391 return std::shared_ptr<ParsedRDFaAttributes>();
392 }
393 const OUString datatype( !i_rDatatype.isEmpty()
394 ? reader.ReadCURIE(i_rDatatype)
395 : OUString() );
396 return std::make_shared<ParsedRDFaAttributes>(
397 about, std::move(properties), i_rContent, datatype);
398}
399
400void
402 uno::Reference<rdf::XMetadatable> const & i_xObject,
403 std::shared_ptr<ParsedRDFaAttributes> const & i_pRDFaAttributes)
404{
405 if (!i_xObject.is())
406 {
407 SAL_WARN("xmloff.core", "AddRDFa: invalid arg: null textcontent");
408 return;
409 }
410 if (!i_pRDFaAttributes)
411 {
412 SAL_WARN("xmloff.core", "AddRDFa: invalid arg: null RDFa attributes");
413 return;
414 }
415 m_RDFaEntries.emplace_back(i_xObject, i_pRDFaAttributes);
416}
417
418void
420 uno::Reference<rdf::XMetadatable> const & i_xObject,
421 OUString const & i_rAbout,
422 OUString const & i_rProperty,
423 OUString const & i_rContent,
424 OUString const & i_rDatatype)
425{
426 std::shared_ptr<ParsedRDFaAttributes> pAttributes(
427 ParseRDFa(i_rAbout, i_rProperty, i_rContent, i_rDatatype) );
428 if (pAttributes)
429 {
430 AddRDFa(i_xObject, pAttributes);
431 }
432}
433
435 uno::Reference< rdf::XRepositorySupplier> const & i_xModel)
436{
437 SAL_WARN_IF(!i_xModel.is(), "xmloff.core", "InsertRDFa: invalid arg: model null");
438 if (!i_xModel.is()) return;
439 const uno::Reference< rdf::XDocumentRepository > xRepository(
440 i_xModel->getRDFRepository(), uno::UNO_QUERY);
441 SAL_WARN_IF(!xRepository.is(), "xmloff.core", "InsertRDFa: no DocumentRepository?");
442 if (!xRepository.is()) return;
443 RDFaInserter inserter(GetImport().GetComponentContext(), xRepository);
444 for (const auto& RDFaEntry : m_RDFaEntries)
445 inserter.InsertRDFaEntry(RDFaEntry);
446}
447
448} // namespace xmloff
449
450/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
BlankNodeMap_t m_BlankNodeMap
uno::Reference< rdf::XDocumentRepository > m_xRepository
const uno::Reference< uno::XComponentContext > m_xContext
const SvXMLImport & m_rImport
void InsertRDFa(css::uno::Reference< css::rdf::XRepositorySupplier > const &i_xModel)
Insert all added statements into the RDF repository.
::std::vector< RDFaEntry > m_RDFaEntries
RDFaImportHelper(const SvXMLImport &i_rImport)
void AddRDFa(css::uno::Reference< css::rdf::XMetadatable > const &i_xObject, std::shared_ptr< ParsedRDFaAttributes > const &i_pRDFaAttributes)
Add a RDFa statement; must have been parsed with ParseRDFa.
const SvXMLImport & GetImport() const
std::shared_ptr< ParsedRDFaAttributes > ParseRDFa(OUString const &i_rAbout, OUString const &i_rProperty, OUString const &i_rContent, OUString const &i_rDatatype)
Parse RDFa attributes.
void ParseAndAddRDFa(css::uno::Reference< css::rdf::XMetadatable > const &i_xObject, OUString const &i_rAbout, OUString const &i_rProperty, OUString const &i_rContent, OUString const &i_rDatatype)
Parse and add a RDFa statement; parameters are XML attribute values.
const char * name
const sal_uInt16 idx[]
#define SAL_WARN_IF(condition, area, stream)
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
css::uno::Sequence< DstElementType > containerToSequence(const SrcType &i_Container)
const sal_uInt16 XML_NAMESPACE_NONE
const sal_uInt16 XML_NAMESPACE_UNKNOWN
const sal_uInt16 XML_NAMESPACE_XMLNS
static bool isWS(const sal_Unicode i_Char)
static OUString splitAtWS(OUString &io_rString)
store parsed RDFa attributes
::std::vector< OUString > m_Properties
ParsedRDFaAttributes(OUString i_sAbout, ::std::vector< OUString > &&i_rProperties, OUString i_sContent, OUString i_sDatatype)
store metadatable object and its RDFa attributes
uno::Reference< rdf::XMetadatable > m_xObject
RDFaEntry(uno::Reference< rdf::XMetadatable > i_xObject, std::shared_ptr< ParsedRDFaAttributes > i_pRDFaAttributes)
std::shared_ptr< ParsedRDFaAttributes > m_xRDFaAttributes
sal_uInt16 sal_Unicode