LibreOffice Module xmloff (master) 1
RDFaImportHelper.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <RDFaImportHelper.hxx>
21
22#include <xmloff/xmlimp.hxx>
24
26
27#include <com/sun/star/rdf/URI.hpp>
28#include <com/sun/star/rdf/XDocumentRepository.hpp>
29#include <com/sun/star/rdf/XRepositorySupplier.hpp>
30
31#include <rtl/ustring.hxx>
32#include <sal/log.hxx>
33
34#include <map>
35
36using namespace ::com::sun::star;
37
38namespace xmloff {
39
40namespace {
41
43class RDFaReader
44{
46
47 const SvXMLImport & GetImport() const { return m_rImport; }
48
49 //FIXME: this is an ugly hack to workaround buggy SvXMLImport::GetAbsolute
50 OUString GetAbsoluteReference(OUString const & i_rURI) const
51 {
52 if (i_rURI.isEmpty() || i_rURI[0] == '#')
53 {
54 return GetImport().GetBaseURL() + i_rURI;
55 }
56 else
57 {
58 return GetImport().GetAbsoluteReference(i_rURI);
59 }
60 }
61
62public:
63 explicit RDFaReader(SvXMLImport const & i_rImport)
64 : m_rImport(i_rImport)
65 { }
66
67 // returns URI or blank node!
68 OUString ReadCURIE(OUString const & i_rCURIE) const;
69
70 std::vector< OUString >
71 ReadCURIEs(OUString const & i_rCURIEs) const;
72
73 OUString
74 ReadURIOrSafeCURIE( OUString const & i_rURIOrSafeCURIE) const;
75};
76
78class RDFaInserter
79{
80 const uno::Reference<uno::XComponentContext> m_xContext;
81 uno::Reference< rdf::XDocumentRepository > m_xRepository;
82
83 typedef ::std::map< OUString, uno::Reference< rdf::XBlankNode > >
84 BlankNodeMap_t;
85
86 BlankNodeMap_t m_BlankNodeMap;
87
88public:
89 RDFaInserter(uno::Reference<uno::XComponentContext> const & i_xContext,
90 uno::Reference< rdf::XDocumentRepository > const & i_xRepository)
91 : m_xContext(i_xContext)
92 , m_xRepository(i_xRepository)
93 {}
94
95 uno::Reference< rdf::XBlankNode >
96 LookupBlankNode(OUString const & i_rNodeId );
97
98 uno::Reference< rdf::XURI >
99 MakeURI( OUString const & i_rURI) const;
100
101 uno::Reference< rdf::XResource>
102 MakeResource( OUString const & i_rResource);
103
104 void InsertRDFaEntry(struct RDFaEntry const & i_rEntry);
105};
106
107}
108
111{
112 OUString m_About;
113 ::std::vector< OUString > m_Properties;
114 OUString m_Content;
115 OUString m_Datatype;
116
118 OUString const & i_rAbout,
119 ::std::vector< OUString >&& i_rProperties,
120 OUString const & i_rContent,
121 OUString const & i_rDatatype)
122 : m_About(i_rAbout)
123 , m_Properties(std::move(i_rProperties))
124 , m_Content(i_rContent)
125 , m_Datatype(i_rDatatype)
126 { }
127};
128
131{
132 uno::Reference<rdf::XMetadatable> m_xObject;
133 std::shared_ptr<ParsedRDFaAttributes> m_xRDFaAttributes;
134
135 RDFaEntry(uno::Reference<rdf::XMetadatable> const & i_xObject,
136 std::shared_ptr<ParsedRDFaAttributes> const& i_pRDFaAttributes)
137 : m_xObject(i_xObject)
138 , m_xRDFaAttributes(i_pRDFaAttributes)
139 { }
140};
141
142static bool isWS(const sal_Unicode i_Char)
143{
144 return ('\t' == i_Char) || ('\n' == i_Char) || ('\r' == i_Char)
145 || (' ' == i_Char);
146}
147
148static OUString splitAtWS(OUString & io_rString)
149{
150 const sal_Int32 len( io_rString.getLength() );
151 sal_Int32 idxstt(0);
152 while ((idxstt < len) && ( isWS(io_rString[idxstt])))
153 ++idxstt; // skip leading ws
154 sal_Int32 idxend(idxstt);
155 while ((idxend < len) && (!isWS(io_rString[idxend])))
156 ++idxend; // the CURIE
157 const OUString ret(io_rString.copy(idxstt, idxend - idxstt));
158 io_rString = io_rString.copy(idxend); // rest
159 return ret;
160}
161
162OUString
163RDFaReader::ReadCURIE(OUString const & i_rCURIE) const
164{
165 // the RDFa spec says that a prefix is required (it may be empty: ":foo")
166 const sal_Int32 idx( i_rCURIE.indexOf(':') );
167 if (idx >= 0)
168 {
169 OUString Prefix;
170 OUString LocalName;
171 OUString Namespace;
172 // LocalName may contain ':', see "ipchar" in RFC 3987
173 sal_uInt16 nKey( GetImport().GetNamespaceMap().GetKeyByQName(
174 i_rCURIE, &Prefix, &LocalName, &Namespace, SvXMLNamespaceMap::QNameMode::AttrValue) );
175 if ( Prefix == "_" )
176 {
177 // eeek, it's a bnode!
178 // "_" is not a valid URI scheme => we can identify bnodes
179 return i_rCURIE;
180 }
181 else
182 {
183 SAL_WARN_IF(XML_NAMESPACE_NONE == nKey, "xmloff.core", "no namespace?");
184 if ((XML_NAMESPACE_UNKNOWN != nKey) &&
185 (XML_NAMESPACE_XMLNS != nKey))
186 {
187 // N.B.: empty LocalName is valid!
188 const OUString URI(Namespace + LocalName);
189 return GetAbsoluteReference(URI);
190 }
191 else
192 {
193 SAL_INFO("xmloff.core", "ReadCURIE: invalid CURIE: invalid prefix" );
194 return OUString();
195 }
196 }
197 }
198 SAL_INFO("xmloff.core", "ReadCURIE: invalid CURIE: no prefix" );
199 return OUString();
200}
201
202::std::vector< OUString >
203RDFaReader::ReadCURIEs(OUString const & i_rCURIEs) const
204{
205 std::vector< OUString > vec;
206 OUString CURIEs(i_rCURIEs);
207 do {
208 OUString curie( splitAtWS(CURIEs) );
209 if (!curie.isEmpty())
210 {
211 const OUString uri(ReadCURIE(curie));
212 if (!uri.isEmpty())
213 {
214 vec.push_back(uri);
215 }
216 }
217 }
218 while (!CURIEs.isEmpty());
219 if (vec.empty())
220 {
221 SAL_INFO("xmloff.core", "ReadCURIEs: invalid CURIEs" );
222 }
223 return vec;
224}
225
226OUString
227RDFaReader::ReadURIOrSafeCURIE(OUString const & i_rURIOrSafeCURIE) const
228{
229 const sal_Int32 len(i_rURIOrSafeCURIE.getLength());
230 if (len && (i_rURIOrSafeCURIE[0] == '['))
231 {
232 if ((len >= 2) && (i_rURIOrSafeCURIE[len - 1] == ']'))
233 {
234 return ReadCURIE(i_rURIOrSafeCURIE.copy(1, len - 2));
235 }
236 else
237 {
238 SAL_INFO("xmloff.core", "ReadURIOrSafeCURIE: invalid SafeCURIE" );
239 return OUString();
240 }
241 }
242 else
243 {
244 if (i_rURIOrSafeCURIE.startsWith("_:")) // blank node
245 {
246 SAL_INFO("xmloff.core", "ReadURIOrSafeCURIE: invalid URI: scheme is _" );
247 return OUString();
248 }
249 else
250 {
251 return GetAbsoluteReference(i_rURIOrSafeCURIE);
252 }
253 }
254}
255
256uno::Reference< rdf::XBlankNode >
257RDFaInserter::LookupBlankNode(OUString const & i_rNodeId )
258{
259 uno::Reference< rdf::XBlankNode > & rEntry( m_BlankNodeMap[ i_rNodeId ] );
260 if (!rEntry.is())
261 {
262 rEntry = m_xRepository->createBlankNode();
263 }
264 return rEntry;
265}
266
267uno::Reference< rdf::XURI >
268RDFaInserter::MakeURI( OUString const & i_rURI) const
269{
270 if (i_rURI.startsWith("_:")) // blank node
271 {
272 SAL_INFO("xmloff.core", "MakeURI: cannot create URI for blank node");
273 return nullptr;
274 }
275 else
276 {
277 try
278 {
279 return rdf::URI::create( m_xContext, i_rURI );
280 }
281 catch (uno::Exception &)
282 {
283 SAL_WARN("xmloff.core", "MakeURI: cannot create URI");
284 return nullptr;
285 }
286 }
287}
288
289uno::Reference<rdf::XResource>
290RDFaInserter::MakeResource( OUString const & i_rResource)
291{
292 if (i_rResource.startsWith("_:")) // blank node
293 {
294 // we cannot use the blank node label as-is: it must be distinct
295 // from labels in other graphs, so create fresh ones per XML stream
296 // N.B.: content.xml and styles.xml are distinct graphs
297 OUString name( i_rResource.copy(2) );
298 const uno::Reference< rdf::XBlankNode > xBNode( LookupBlankNode(name) );
299 SAL_WARN_IF(!xBNode.is(), "xmloff.core", "no blank node?");
300 return xBNode;
301 }
302 else
303 {
304 return MakeURI( i_rResource );
305 }
306}
307
308void RDFaInserter::InsertRDFaEntry(
309 struct RDFaEntry const & i_rEntry)
310{
311 SAL_WARN_IF(!i_rEntry.m_xObject.is(), "xmloff.core", "InsertRDFaEntry: invalid arg: null object");
312 if (!i_rEntry.m_xObject.is()) return;
313
314 const uno::Reference< rdf::XResource > xSubject(
315 MakeResource( i_rEntry.m_xRDFaAttributes->m_About ) );
316 if (!xSubject.is())
317 {
318 return; // invalid
319 }
320
321 ::std::vector< uno::Reference< rdf::XURI > > predicates;
322
323 predicates.reserve(i_rEntry.m_xRDFaAttributes->m_Properties.size());
324
325 for (OUString const& prop : i_rEntry.m_xRDFaAttributes->m_Properties)
326 {
327 auto const xURI(MakeURI(prop));
328 if (xURI.is())
329 {
330 predicates.push_back(xURI);
331 }
332 }
333
334 if (predicates.empty())
335 {
336 return; // invalid
337 }
338
339 uno::Reference<rdf::XURI> xDatatype;
340 if (!i_rEntry.m_xRDFaAttributes->m_Datatype.isEmpty())
341 {
342 xDatatype = MakeURI( i_rEntry.m_xRDFaAttributes->m_Datatype );
343 }
344
345 try
346 {
347 // N.B.: this will call xMeta->ensureMetadataReference, which is why
348 // this must be done _after_ importing the whole XML file,
349 // to prevent collision between generated ids and ids in the file
350 m_xRepository->setStatementRDFa(xSubject, comphelper::containerToSequence(predicates),
351 i_rEntry.m_xObject,
352 i_rEntry.m_xRDFaAttributes->m_Content, xDatatype);
353 }
354 catch (uno::Exception &)
355 {
356 SAL_WARN("xmloff.core", "InsertRDFaEntry: setStatementRDFa failed?");
357 }
358}
359
361 : m_rImport(i_rImport)
362{
363}
364
366{
367}
368
369std::shared_ptr<ParsedRDFaAttributes>
371 OUString const & i_rAbout,
372 OUString const & i_rProperty,
373 OUString const & i_rContent,
374 OUString const & i_rDatatype)
375{
376 if (i_rProperty.isEmpty())
377 {
378 SAL_INFO("xmloff.core", "AddRDFa: invalid input: xhtml:property empty");
379 return std::shared_ptr<ParsedRDFaAttributes>();
380 }
381 // must parse CURIEs here: need namespace declaration context
382 RDFaReader reader(GetImport());
383 const OUString about( reader.ReadURIOrSafeCURIE(i_rAbout) );
384 if (about.isEmpty()) {
385 return std::shared_ptr<ParsedRDFaAttributes>();
386 }
387 ::std::vector< OUString > properties(
388 reader.ReadCURIEs(i_rProperty) );
389 if (properties.empty()) {
390 return std::shared_ptr<ParsedRDFaAttributes>();
391 }
392 const OUString datatype( !i_rDatatype.isEmpty()
393 ? reader.ReadCURIE(i_rDatatype)
394 : OUString() );
395 return std::make_shared<ParsedRDFaAttributes>(
396 about, std::move(properties), i_rContent, datatype);
397}
398
399void
401 uno::Reference<rdf::XMetadatable> const & i_xObject,
402 std::shared_ptr<ParsedRDFaAttributes> const & i_pRDFaAttributes)
403{
404 if (!i_xObject.is())
405 {
406 SAL_WARN("xmloff.core", "AddRDFa: invalid arg: null textcontent");
407 return;
408 }
409 if (!i_pRDFaAttributes)
410 {
411 SAL_WARN("xmloff.core", "AddRDFa: invalid arg: null RDFa attributes");
412 return;
413 }
414 m_RDFaEntries.emplace_back(i_xObject, i_pRDFaAttributes);
415}
416
417void
419 uno::Reference<rdf::XMetadatable> const & i_xObject,
420 OUString const & i_rAbout,
421 OUString const & i_rProperty,
422 OUString const & i_rContent,
423 OUString const & i_rDatatype)
424{
425 std::shared_ptr<ParsedRDFaAttributes> pAttributes(
426 ParseRDFa(i_rAbout, i_rProperty, i_rContent, i_rDatatype) );
427 if (pAttributes)
428 {
429 AddRDFa(i_xObject, pAttributes);
430 }
431}
432
434 uno::Reference< rdf::XRepositorySupplier> const & i_xModel)
435{
436 SAL_WARN_IF(!i_xModel.is(), "xmloff.core", "InsertRDFa: invalid arg: model null");
437 if (!i_xModel.is()) return;
438 const uno::Reference< rdf::XDocumentRepository > xRepository(
439 i_xModel->getRDFRepository(), uno::UNO_QUERY);
440 SAL_WARN_IF(!xRepository.is(), "xmloff.core", "InsertRDFa: no DocumentRepository?");
441 if (!xRepository.is()) return;
442 RDFaInserter inserter(GetImport().GetComponentContext(), xRepository);
443 for (const auto& RDFaEntry : m_RDFaEntries)
444 inserter.InsertRDFaEntry(RDFaEntry);
445}
446
447} // namespace xmloff
448
449/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
BlankNodeMap_t m_BlankNodeMap
uno::Reference< rdf::XDocumentRepository > m_xRepository
const uno::Reference< uno::XComponentContext > m_xContext
const SvXMLImport & m_rImport
OUString GetBaseURL() const
Definition: xmlimp.cxx:1799
void InsertRDFa(css::uno::Reference< css::rdf::XRepositorySupplier > const &i_xModel)
Insert all added statements into the RDF repository.
::std::vector< RDFaEntry > m_RDFaEntries
RDFaImportHelper(const SvXMLImport &i_rImport)
void AddRDFa(css::uno::Reference< css::rdf::XMetadatable > const &i_xObject, std::shared_ptr< ParsedRDFaAttributes > const &i_pRDFaAttributes)
Add a RDFa statement; must have been parsed with ParseRDFa.
const SvXMLImport & GetImport() const
std::shared_ptr< ParsedRDFaAttributes > ParseRDFa(OUString const &i_rAbout, OUString const &i_rProperty, OUString const &i_rContent, OUString const &i_rDatatype)
Parse RDFa attributes.
void ParseAndAddRDFa(css::uno::Reference< css::rdf::XMetadatable > const &i_xObject, OUString const &i_rAbout, OUString const &i_rProperty, OUString const &i_rContent, OUString const &i_rDatatype)
Parse and add a RDFa statement; parameters are XML attribute values.
const char * name
const sal_uInt16 idx[]
#define SAL_WARN_IF(condition, area, stream)
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
css::uno::Sequence< DstElementType > containerToSequence(const SrcType &i_Container)
const sal_uInt16 XML_NAMESPACE_NONE
const sal_uInt16 XML_NAMESPACE_UNKNOWN
const sal_uInt16 XML_NAMESPACE_XMLNS
static bool isWS(const sal_Unicode i_Char)
static OUString splitAtWS(OUString &io_rString)
store parsed RDFa attributes
ParsedRDFaAttributes(OUString const &i_rAbout, ::std::vector< OUString > &&i_rProperties, OUString const &i_rContent, OUString const &i_rDatatype)
::std::vector< OUString > m_Properties
store metadatable object and its RDFa attributes
uno::Reference< rdf::XMetadatable > m_xObject
RDFaEntry(uno::Reference< rdf::XMetadatable > const &i_xObject, std::shared_ptr< ParsedRDFaAttributes > const &i_pRDFaAttributes)
std::shared_ptr< ParsedRDFaAttributes > m_xRDFaAttributes
sal_uInt16 sal_Unicode