LibreOffice Module xmloff (master)  1
RDFaImportHelper.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <RDFaImportHelper.hxx>
21 
22 #include <xmloff/xmlimp.hxx>
23 #include <xmloff/namespacemap.hxx>
24 
25 #include <comphelper/sequence.hxx>
26 
27 #include <com/sun/star/rdf/URI.hpp>
28 #include <com/sun/star/rdf/XDocumentRepository.hpp>
29 #include <com/sun/star/rdf/XRepositorySupplier.hpp>
30 
31 #include <rtl/ustring.hxx>
32 #include <sal/log.hxx>
33 
34 #include <map>
35 
36 using namespace ::com::sun::star;
37 
38 namespace xmloff {
39 
40 namespace {
41 
43 class RDFaReader
44 {
46 
47  const SvXMLImport & GetImport() const { return m_rImport; }
48 
49  //FIXME: this is an ugly hack to workaround buggy SvXMLImport::GetAbsolute
50  OUString GetAbsoluteReference(OUString const & i_rURI) const
51  {
52  if (i_rURI.isEmpty() || i_rURI[0] == '#')
53  {
54  return GetImport().GetBaseURL() + i_rURI;
55  }
56  else
57  {
58  return GetImport().GetAbsoluteReference(i_rURI);
59  }
60  }
61 
62 public:
63  explicit RDFaReader(SvXMLImport const & i_rImport)
64  : m_rImport(i_rImport)
65  { }
66 
67  // returns URI or blank node!
68  OUString ReadCURIE(OUString const & i_rCURIE) const;
69 
70  std::vector< OUString >
71  ReadCURIEs(OUString const & i_rCURIEs) const;
72 
73  OUString
74  ReadURIOrSafeCURIE( OUString const & i_rURIOrSafeCURIE) const;
75 };
76 
78 class RDFaInserter
79 {
80  const uno::Reference<uno::XComponentContext> m_xContext;
81  uno::Reference< rdf::XDocumentRepository > m_xRepository;
82 
83  typedef ::std::map< OUString, uno::Reference< rdf::XBlankNode > >
84  BlankNodeMap_t;
85 
86  BlankNodeMap_t m_BlankNodeMap;
87 
88 public:
89  RDFaInserter(uno::Reference<uno::XComponentContext> const & i_xContext,
90  uno::Reference< rdf::XDocumentRepository > const & i_xRepository)
91  : m_xContext(i_xContext)
92  , m_xRepository(i_xRepository)
93  {}
94 
95  uno::Reference< rdf::XBlankNode >
96  LookupBlankNode(OUString const & i_rNodeId );
97 
98  uno::Reference< rdf::XURI >
99  MakeURI( OUString const & i_rURI) const;
100 
101  uno::Reference< rdf::XResource>
102  MakeResource( OUString const & i_rResource);
103 
104  void InsertRDFaEntry(struct RDFaEntry const & i_rEntry);
105 };
106 
107 }
108 
111 {
112  OUString m_About;
113  ::std::vector< OUString > m_Properties;
114  OUString m_Content;
115  OUString m_Datatype;
116 
118  OUString const & i_rAbout,
119  ::std::vector< OUString > const & i_rProperties,
120  OUString const & i_rContent,
121  OUString const & i_rDatatype)
122  : m_About(i_rAbout)
123  , m_Properties(i_rProperties)
124  , m_Content(i_rContent)
125  , m_Datatype(i_rDatatype)
126  { }
127 };
128 
130 struct RDFaEntry
131 {
132  uno::Reference<rdf::XMetadatable> m_xObject;
133  std::shared_ptr<ParsedRDFaAttributes> m_xRDFaAttributes;
134 
135  RDFaEntry(uno::Reference<rdf::XMetadatable> const & i_xObject,
136  std::shared_ptr<ParsedRDFaAttributes> const& i_pRDFaAttributes)
137  : m_xObject(i_xObject)
138  , m_xRDFaAttributes(i_pRDFaAttributes)
139  { }
140 };
141 
142 static bool isWS(const sal_Unicode i_Char)
143 {
144  return ('\t' == i_Char) || ('\n' == i_Char) || ('\r' == i_Char)
145  || (' ' == i_Char);
146 }
147 
148 static OUString splitAtWS(OUString & io_rString)
149 {
150  const sal_Int32 len( io_rString.getLength() );
151  sal_Int32 idxstt(0);
152  while ((idxstt < len) && ( isWS(io_rString[idxstt])))
153  ++idxstt; // skip leading ws
154  sal_Int32 idxend(idxstt);
155  while ((idxend < len) && (!isWS(io_rString[idxend])))
156  ++idxend; // the CURIE
157  const OUString ret(io_rString.copy(idxstt, idxend - idxstt));
158  io_rString = io_rString.copy(idxend); // rest
159  return ret;
160 }
161 
162 OUString
163 RDFaReader::ReadCURIE(OUString const & i_rCURIE) const
164 {
165  // the RDFa spec says that a prefix is required (it may be empty: ":foo")
166  const sal_Int32 idx( i_rCURIE.indexOf(':') );
167  if (idx >= 0)
168  {
169  OUString Prefix;
170  OUString LocalName;
171  OUString Namespace;
172  // LocalName may contain ':', see "ipchar" in RFC 3987
173  sal_uInt16 nKey( GetImport().GetNamespaceMap().GetKeyByQName(
174  i_rCURIE, &Prefix, &LocalName, &Namespace, SvXMLNamespaceMap::QNameMode::AttrValue) );
175  if ( Prefix == "_" )
176  {
177  // eeek, it's a bnode!
178  // "_" is not a valid URI scheme => we can identify bnodes
179  return i_rCURIE;
180  }
181  else
182  {
183  SAL_WARN_IF(XML_NAMESPACE_NONE == nKey, "xmloff.core", "no namespace?");
184  if ((XML_NAMESPACE_UNKNOWN != nKey) &&
185  (XML_NAMESPACE_XMLNS != nKey))
186  {
187  // N.B.: empty LocalName is valid!
188  const OUString URI(Namespace + LocalName);
189  return GetAbsoluteReference(URI);
190  }
191  else
192  {
193  SAL_INFO("xmloff.core", "ReadCURIE: invalid CURIE: invalid prefix" );
194  return OUString();
195  }
196  }
197  }
198  SAL_INFO("xmloff.core", "ReadCURIE: invalid CURIE: no prefix" );
199  return OUString();
200 }
201 
202 ::std::vector< OUString >
203 RDFaReader::ReadCURIEs(OUString const & i_rCURIEs) const
204 {
205  std::vector< OUString > vec;
206  OUString CURIEs(i_rCURIEs);
207  do {
208  OUString curie( splitAtWS(CURIEs) );
209  if (!curie.isEmpty())
210  {
211  const OUString uri(ReadCURIE(curie));
212  if (!uri.isEmpty())
213  {
214  vec.push_back(uri);
215  }
216  }
217  }
218  while (!CURIEs.isEmpty());
219  if (vec.empty())
220  {
221  SAL_INFO("xmloff.core", "ReadCURIEs: invalid CURIEs" );
222  }
223  return vec;
224 }
225 
226 OUString
227 RDFaReader::ReadURIOrSafeCURIE(OUString const & i_rURIOrSafeCURIE) const
228 {
229  const sal_Int32 len(i_rURIOrSafeCURIE.getLength());
230  if (len && (i_rURIOrSafeCURIE[0] == '['))
231  {
232  if ((len >= 2) && (i_rURIOrSafeCURIE[len - 1] == ']'))
233  {
234  return ReadCURIE(i_rURIOrSafeCURIE.copy(1, len - 2));
235  }
236  else
237  {
238  SAL_INFO("xmloff.core", "ReadURIOrSafeCURIE: invalid SafeCURIE" );
239  return OUString();
240  }
241  }
242  else
243  {
244  if (i_rURIOrSafeCURIE.startsWith("_:")) // blank node
245  {
246  SAL_INFO("xmloff.core", "ReadURIOrSafeCURIE: invalid URI: scheme is _" );
247  return OUString();
248  }
249  else
250  {
251  return GetAbsoluteReference(i_rURIOrSafeCURIE);
252  }
253  }
254 }
255 
256 uno::Reference< rdf::XBlankNode >
257 RDFaInserter::LookupBlankNode(OUString const & i_rNodeId )
258 {
259  uno::Reference< rdf::XBlankNode > & rEntry( m_BlankNodeMap[ i_rNodeId ] );
260  if (!rEntry.is())
261  {
262  rEntry = m_xRepository->createBlankNode();
263  }
264  return rEntry;
265 }
266 
267 uno::Reference< rdf::XURI >
268 RDFaInserter::MakeURI( OUString const & i_rURI) const
269 {
270  if (i_rURI.startsWith("_:")) // blank node
271  {
272  SAL_INFO("xmloff.core", "MakeURI: cannot create URI for blank node");
273  return nullptr;
274  }
275  else
276  {
277  try
278  {
279  return rdf::URI::create( m_xContext, i_rURI );
280  }
281  catch (uno::Exception &)
282  {
283  SAL_WARN("xmloff.core", "MakeURI: cannot create URI");
284  return nullptr;
285  }
286  }
287 }
288 
289 uno::Reference<rdf::XResource>
290 RDFaInserter::MakeResource( OUString const & i_rResource)
291 {
292  if (i_rResource.startsWith("_:")) // blank node
293  {
294  // we cannot use the blank node label as-is: it must be distinct
295  // from labels in other graphs, so create fresh ones per XML stream
296  // N.B.: content.xml and styles.xml are distinct graphs
297  OUString name( i_rResource.copy(2) );
298  const uno::Reference< rdf::XBlankNode > xBNode( LookupBlankNode(name) );
299  SAL_WARN_IF(!xBNode.is(), "xmloff.core", "no blank node?");
300  return xBNode;
301  }
302  else
303  {
304  return MakeURI( i_rResource );
305  }
306 }
307 
308 void RDFaInserter::InsertRDFaEntry(
309  struct RDFaEntry const & i_rEntry)
310 {
311  SAL_WARN_IF(!i_rEntry.m_xObject.is(), "xmloff.core", "InsertRDFaEntry: invalid arg: null object");
312  if (!i_rEntry.m_xObject.is()) return;
313 
314  const uno::Reference< rdf::XResource > xSubject(
315  MakeResource( i_rEntry.m_xRDFaAttributes->m_About ) );
316  if (!xSubject.is())
317  {
318  return; // invalid
319  }
320 
321  ::std::vector< uno::Reference< rdf::XURI > > predicates;
322 
323  predicates.reserve(i_rEntry.m_xRDFaAttributes->m_Properties.size());
324 
325  for (OUString const& prop : i_rEntry.m_xRDFaAttributes->m_Properties)
326  {
327  auto const xURI(MakeURI(prop));
328  if (xURI.is())
329  {
330  predicates.push_back(xURI);
331  }
332  }
333 
334  if (predicates.empty())
335  {
336  return; // invalid
337  }
338 
339  uno::Reference<rdf::XURI> xDatatype;
340  if (!i_rEntry.m_xRDFaAttributes->m_Datatype.isEmpty())
341  {
342  xDatatype = MakeURI( i_rEntry.m_xRDFaAttributes->m_Datatype );
343  }
344 
345  try
346  {
347  // N.B.: this will call xMeta->ensureMetadataReference, which is why
348  // this must be done _after_ importing the whole XML file,
349  // to prevent collision between generated ids and ids in the file
350  m_xRepository->setStatementRDFa(xSubject, comphelper::containerToSequence(predicates),
351  i_rEntry.m_xObject,
352  i_rEntry.m_xRDFaAttributes->m_Content, xDatatype);
353  }
354  catch (uno::Exception &)
355  {
356  SAL_WARN("xmloff.core", "InsertRDFaEntry: setStatementRDFa failed?");
357  }
358 }
359 
361  : m_rImport(i_rImport)
362 {
363 }
364 
366 {
367 }
368 
369 std::shared_ptr<ParsedRDFaAttributes>
371  OUString const & i_rAbout,
372  OUString const & i_rProperty,
373  OUString const & i_rContent,
374  OUString const & i_rDatatype)
375 {
376  if (i_rProperty.isEmpty())
377  {
378  SAL_INFO("xmloff.core", "AddRDFa: invalid input: xhtml:property empty");
379  return std::shared_ptr<ParsedRDFaAttributes>();
380  }
381  // must parse CURIEs here: need namespace declaration context
382  RDFaReader reader(GetImport());
383  const OUString about( reader.ReadURIOrSafeCURIE(i_rAbout) );
384  if (about.isEmpty()) {
385  return std::shared_ptr<ParsedRDFaAttributes>();
386  }
387  const ::std::vector< OUString > properties(
388  reader.ReadCURIEs(i_rProperty) );
389  if (properties.empty()) {
390  return std::shared_ptr<ParsedRDFaAttributes>();
391  }
392  const OUString datatype( !i_rDatatype.isEmpty()
393  ? reader.ReadCURIE(i_rDatatype)
394  : OUString() );
395  return std::make_shared<ParsedRDFaAttributes>(
396  about, properties, i_rContent, datatype);
397 }
398 
399 void
401  uno::Reference<rdf::XMetadatable> const & i_xObject,
402  std::shared_ptr<ParsedRDFaAttributes> const & i_pRDFaAttributes)
403 {
404  if (!i_xObject.is())
405  {
406  SAL_WARN("xmloff.core", "AddRDFa: invalid arg: null textcontent");
407  return;
408  }
409  if (!i_pRDFaAttributes)
410  {
411  SAL_WARN("xmloff.core", "AddRDFa: invalid arg: null RDFa attributes");
412  return;
413  }
414  m_RDFaEntries.emplace_back(i_xObject, i_pRDFaAttributes);
415 }
416 
417 void
419  uno::Reference<rdf::XMetadatable> const & i_xObject,
420  OUString const & i_rAbout,
421  OUString const & i_rProperty,
422  OUString const & i_rContent,
423  OUString const & i_rDatatype)
424 {
425  std::shared_ptr<ParsedRDFaAttributes> pAttributes(
426  ParseRDFa(i_rAbout, i_rProperty, i_rContent, i_rDatatype) );
427  if (pAttributes)
428  {
429  AddRDFa(i_xObject, pAttributes);
430  }
431 }
432 
434  uno::Reference< rdf::XRepositorySupplier> const & i_xModel)
435 {
436  SAL_WARN_IF(!i_xModel.is(), "xmloff.core", "InsertRDFa: invalid arg: model null");
437  if (!i_xModel.is()) return;
438  const uno::Reference< rdf::XDocumentRepository > xRepository(
439  i_xModel->getRDFRepository(), uno::UNO_QUERY);
440  SAL_WARN_IF(!xRepository.is(), "xmloff.core", "InsertRDFa: no DocumentRepository?");
441  if (!xRepository.is()) return;
442  RDFaInserter inserter(GetImport().GetComponentContext(), xRepository);
443  for (const auto& RDFaEntry : m_RDFaEntries)
444  inserter.InsertRDFaEntry(RDFaEntry);
445 }
446 
447 } // namespace xmloff
448 
449 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
OUString GetBaseURL() const
Definition: xmlimp.cxx:1857
::std::vector< RDFaEntry > m_RDFaEntries
RDFaEntry(uno::Reference< rdf::XMetadatable > const &i_xObject, std::shared_ptr< ParsedRDFaAttributes > const &i_pRDFaAttributes)
const SvXMLImport & GetImport() const
BlankNodeMap_t m_BlankNodeMap
std::shared_ptr< ParsedRDFaAttributes > m_xRDFaAttributes
const sal_uInt16 XML_NAMESPACE_UNKNOWN
const sal_uInt16 XML_NAMESPACE_XMLNS
sal_uInt16 sal_Unicode
std::shared_ptr< ParsedRDFaAttributes > ParseRDFa(OUString const &i_rAbout, OUString const &i_rProperty, OUString const &i_rContent, OUString const &i_rDatatype)
Parse RDFa attributes.
store metadatable object and its RDFa attributes
static bool isWS(const sal_Unicode i_Char)
RDFaImportHelper(const SvXMLImport &i_rImport)
const sal_uInt16 XML_NAMESPACE_NONE
static OUString splitAtWS(OUString &io_rString)
const uno::Reference< uno::XComponentContext > m_xContext
ParsedRDFaAttributes(OUString const &i_rAbout,::std::vector< OUString > const &i_rProperties, OUString const &i_rContent, OUString const &i_rDatatype)
const sal_uInt16 idx[]
uno::Reference< rdf::XDocumentRepository > m_xRepository
void ParseAndAddRDFa(css::uno::Reference< css::rdf::XMetadatable > const &i_xObject, OUString const &i_rAbout, OUString const &i_rProperty, OUString const &i_rContent, OUString const &i_rDatatype)
Parse and add a RDFa statement; parameters are XML attribute values.
#define SAL_WARN_IF(condition, area, stream)
css::uno::Sequence< DstElementType > containerToSequence(const SrcType &i_Container)
const SvXMLImport & m_rImport
#define SAL_INFO(area, stream)
uno::Reference< rdf::XMetadatable > m_xObject
const char * name
store parsed RDFa attributes
#define SAL_WARN(area, stream)
void InsertRDFa(css::uno::Reference< css::rdf::XRepositorySupplier > const &i_xModel)
Insert all added statements into the RDF repository.
void AddRDFa(css::uno::Reference< css::rdf::XMetadatable > const &i_xObject, std::shared_ptr< ParsedRDFaAttributes > const &i_pRDFaAttributes)
Add a RDFa statement; must have been parsed with ParseRDFa.
::std::vector< OUString > m_Properties
exports com.sun.star. uri