LibreOffice Module xmlreader (master) 1
xmlreader.hxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#pragma once
21
22#include <sal/config.h>
23
24#include <stack>
25#include <vector>
26
27#include <osl/file.h>
28#include <rtl/ustring.hxx>
29#include <sal/types.h>
31#include <xmlreader/pad.hxx>
32#include <xmlreader/span.hxx>
33
34namespace xmlreader {
35
37public:
38 explicit XmlReader(OUString fileUrl);
39
40 ~XmlReader();
41
42 enum { NAMESPACE_NONE = -2, NAMESPACE_UNKNOWN = -1, NAMESPACE_XML = 0 };
43
44 enum class Text { NONE, Raw, Normalized };
45
46 enum class Result { Begin, End, Text, Done };
47
48 int registerNamespaceIri(Span const & iri);
49
50 // RESULT_BEGIN: data = localName, ns = ns
51 // RESULT_END: data, ns unused
52 // RESULT_TEXT: data = text, ns unused
53 Result nextItem(Text reportText, Span * data, int * nsId);
54
55 bool nextAttribute(int * nsId, Span * localName);
56
57 // the span returned by getAttributeValue is only valid until the next call
58 // to nextItem or getAttributeValue
59 Span getAttributeValue(bool fullyNormalize);
60
61 int getNamespaceId(Span const & prefix) const;
62
63 const OUString& getUrl() const { return fileUrl_;}
64
65private:
66 XmlReader(const XmlReader&) = delete;
67 XmlReader& operator=(const XmlReader&) = delete;
68
69 typedef std::vector< Span > NamespaceIris;
70
71 // If NamespaceData (and similarly ElementData and AttributeData) is made
72 // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about
73 // "'xmlreader::XmlReader' declared with greater visibility than the type of
74 // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for
75 // elements_ and attributes_):
76
79 int nsId;
80
82 nsId(-1) {}
83
84 NamespaceData(Span const & thePrefix, int theNsId):
85 prefix(thePrefix), nsId(theNsId) {}
86 };
87
88 typedef std::vector< NamespaceData > NamespaceList;
89
90 struct ElementData {
92 NamespaceList::size_type inheritedNamespaces;
94
96 Span const & theName,
97 NamespaceList::size_type theInheritedNamespaces,
98 int theDefaultNamespaceId):
99 name(theName), inheritedNamespaces(theInheritedNamespaces),
100 defaultNamespaceId(theDefaultNamespaceId)
101 {}
102 };
103
104 typedef std::stack< ElementData > ElementStack;
105
107 char const * nameBegin;
108 char const * nameEnd;
109 char const * nameColon;
110 char const * valueBegin;
111 char const * valueEnd;
112
114 char const * theNameBegin, char const * theNameEnd,
115 char const * theNameColon, char const * theValueBegin,
116 char const * theValueEnd):
117 nameBegin(theNameBegin), nameEnd(theNameEnd),
118 nameColon(theNameColon), valueBegin(theValueBegin),
119 valueEnd(theValueEnd)
120 {}
121 };
122
123 typedef std::vector< AttributeData > Attributes;
124
125 enum class State { Content, StartTag, EndTag, EmptyElementTag, Done };
126
127 SAL_DLLPRIVATE char read() { return pos_ == end_ ? '\0' : *pos_++; }
128
129 SAL_DLLPRIVATE char peek() const { return pos_ == end_ ? '\0' : *pos_; }
130
131 SAL_DLLPRIVATE void normalizeLineEnds(Span const & text);
132
133 SAL_DLLPRIVATE void skipSpace();
134
135 SAL_DLLPRIVATE bool skipComment();
136
137 SAL_DLLPRIVATE void skipProcessingInstruction();
138
139 SAL_DLLPRIVATE void skipDocumentTypeDeclaration();
140
141 SAL_DLLPRIVATE Span scanCdataSection();
142
143 SAL_DLLPRIVATE bool scanName(char const ** nameColon);
144
145 SAL_DLLPRIVATE int scanNamespaceIri(
146 char const * begin, char const * end);
147
148 SAL_DLLPRIVATE char const * handleReference(
149 char const * position, char const * end);
150
151 SAL_DLLPRIVATE Span handleAttributeValue(
152 char const * begin, char const * end, bool fullyNormalize);
153
154 SAL_DLLPRIVATE Result handleStartTag(int * nsId, Span * localName);
155
156 SAL_DLLPRIVATE Result handleEndTag();
157
158 SAL_DLLPRIVATE void handleElementEnd();
159
160 SAL_DLLPRIVATE Result handleSkippedText(Span * data, int * nsId);
161
162 SAL_DLLPRIVATE Result handleRawText(Span * text);
163
164 SAL_DLLPRIVATE Result handleNormalizedText(Span * text);
165
166 SAL_DLLPRIVATE static int toNamespaceId(NamespaceIris::size_type pos);
167
168 OUString const fileUrl_;
169 oslFileHandle fileHandle_;
170 sal_uInt64 fileSize_;
175 char const * pos_;
176 char const * end_;
179 Attributes::iterator currentAttribute_;
182};
183
184}
185
186/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
std::stack< ElementData > ElementStack
Definition: xmlreader.hxx:104
XmlReader & operator=(const XmlReader &)=delete
XmlReader(const XmlReader &)=delete
const OUString & getUrl() const
Definition: xmlreader.hxx:63
std::vector< Span > NamespaceIris
Definition: xmlreader.hxx:69
Attributes attributes_
Definition: xmlreader.hxx:178
ElementStack elements_
Definition: xmlreader.hxx:174
OUString const fileUrl_
Definition: xmlreader.hxx:168
oslFileHandle fileHandle_
Definition: xmlreader.hxx:169
char const * pos_
Definition: xmlreader.hxx:175
std::vector< NamespaceData > NamespaceList
Definition: xmlreader.hxx:88
char const * end_
Definition: xmlreader.hxx:176
sal_uInt64 fileSize_
Definition: xmlreader.hxx:170
Attributes::iterator currentAttribute_
Definition: xmlreader.hxx:179
SAL_DLLPRIVATE char peek() const
Definition: xmlreader.hxx:129
SAL_DLLPRIVATE char read()
Definition: xmlreader.hxx:127
NamespaceList namespaces_
Definition: xmlreader.hxx:173
NamespaceIris namespaceIris_
Definition: xmlreader.hxx:172
std::vector< AttributeData > Attributes
Definition: xmlreader.hxx:123
const char * name
NONE
AttributeData(char const *theNameBegin, char const *theNameEnd, char const *theNameColon, char const *theValueBegin, char const *theValueEnd)
Definition: xmlreader.hxx:113
ElementData(Span const &theName, NamespaceList::size_type theInheritedNamespaces, int theDefaultNamespaceId)
Definition: xmlreader.hxx:95
NamespaceList::size_type inheritedNamespaces
Definition: xmlreader.hxx:92
NamespaceData(Span const &thePrefix, int theNsId)
Definition: xmlreader.hxx:84
#define OOO_DLLPUBLIC_XMLREADER