25#include <com/sun/star/container/NoSuchElementException.hpp>
26#include <com/sun/star/uno/RuntimeException.hpp>
28#include <rtl/character.hxx>
29#include <rtl/string.h>
30#include <rtl/ustring.hxx>
57 : fileUrl_(
std::move(fileUrl))
58 , fileHandle_(nullptr)
60 oslFileError e = osl_openFile(
66 case osl_File_E_NOENT:
67 throw css::container::NoSuchElementException(
fileUrl_ );
69 throw css::uno::RuntimeException(
70 "cannot open " +
fileUrl_ +
": " + OUString::number(e));
73 if (e == osl_File_E_None) {
76 osl_File_MapFlag_WillNeed);
78 if (e != osl_File_E_None) {
80 if (e2 != osl_File_E_None) {
83 "osl_closeFile of \"" <<
fileUrl_ <<
"\" failed with " << +e2);
85 throw css::uno::RuntimeException(
86 "cannot mmap " +
fileUrl_ +
" (" + OUString::number(e) +
")" );
88 namespaceIris_.emplace_back(
"http://www.w3.org/XML/1998/namespace");
100 if (e != osl_File_E_None) {
103 "osl_unmapMappedFile of \"" <<
fileUrl_ <<
"\" failed with " << +e);
106 if (e != osl_File_E_None) {
109 "osl_closeFile of \"" <<
fileUrl_ <<
"\" failed with " << +e);
116 if (iri ==
"http://www.w3.org/2001/XMLSchema-instance") {
131 switch (reportText) {
152 assert(nsId !=
nullptr && localName !=
nullptr);
187 [&
prefix](
const NamespaceData& rNamespaceData) { return prefix == rNamespaceData.prefix; });
197 char const *
p =
text.begin;
198 sal_Int32
n =
text.length;
200 sal_Int32
i = rtl_str_indexOfChar_WithLength(
p,
n,
'\x0D');
207 if (
n == 0 || *
p !=
'\x0A') {
215 while (isSpace(
peek())) {
221 if (rtl_str_shortenedCompare_WithLength(
223 RTL_CONSTASCII_LENGTH(
"--")) !=
228 pos_ += RTL_CONSTASCII_LENGTH(
"--");
229 sal_Int32
i = rtl_str_indexOfStr_WithLength(
232 throw css::uno::RuntimeException(
233 "premature end (within comment) of " +
fileUrl_ );
235 pos_ +=
i + RTL_CONSTASCII_LENGTH(
"--");
237 throw css::uno::RuntimeException(
238 "illegal \"--\" within comment in " +
fileUrl_ );
244 sal_Int32
i = rtl_str_indexOfStr_WithLength(
247 throw css::uno::RuntimeException(
250 pos_ +=
i + RTL_CONSTASCII_LENGTH(
"?>");
260 throw css::uno::RuntimeException(
261 "premature end (within DTD) of " +
fileUrl_ );
265 sal_Int32
i = rtl_str_indexOfChar_WithLength(
268 throw css::uno::RuntimeException(
269 "premature end (within DTD) of " +
fileUrl_ );
281 throw css::uno::RuntimeException(
282 "premature end (within DTD) of " +
fileUrl_ );
286 sal_Int32
i = rtl_str_indexOfChar_WithLength(
289 throw css::uno::RuntimeException(
290 "premature end (within DTD) of " +
fileUrl_ );
298 throw css::uno::RuntimeException(
299 "premature end (within DTD) of " +
fileUrl_ );
313 throw css::uno::RuntimeException(
314 "missing \">\" of DTD in " +
fileUrl_ );
328 if (rtl_str_shortenedCompare_WithLength(
329 pos_,
end_ -
pos_, RTL_CONSTASCII_STRINGPARAM(
"[CDATA["),
330 RTL_CONSTASCII_LENGTH(
"[CDATA[")) !=
335 pos_ += RTL_CONSTASCII_LENGTH(
"[CDATA[");
337 sal_Int32
i = rtl_str_indexOfStr_WithLength(
340 throw css::uno::RuntimeException(
341 "premature end (within CDATA section) of " +
fileUrl_ );
343 pos_ +=
i + RTL_CONSTASCII_LENGTH(
"]]>");
348 assert(nameColon !=
nullptr && *nameColon ==
nullptr);
393 if (c >=
'0' && c <=
'9') {
394 val = 16 * val + (c -
'0');
395 }
else if (c >=
'A' && c <=
'F') {
396 val = 16 * val + (c -
'A') + 10;
397 }
else if (c >=
'a' && c <=
'f') {
398 val = 16 * val + (c -
'a') + 10;
402 if (!rtl::isUnicodeCodePoint(val)) {
403 throw css::uno::RuntimeException(
404 "'&#x...' too large in " +
fileUrl_ );
411 if (c >=
'0' && c <=
'9') {
412 val = 10 * val + (c -
'0');
416 if (!rtl::isUnicodeCodePoint(val)) {
417 throw css::uno::RuntimeException(
418 "'&#...' too large in " +
fileUrl_ );
423 throw css::uno::RuntimeException(
424 "'&#...' missing ';' in " +
fileUrl_ );
426 assert(rtl::isUnicodeCodePoint(val));
427 if ((val < 0x20 && val != 0x9 && val != 0xA && val != 0xD) ||
428 (val >= 0xD800 && val <= 0xDFFF) || val == 0xFFFE || val == 0xFFFF)
430 throw css::uno::RuntimeException(
431 "character reference denoting invalid character in " +
fileUrl_ );
436 buf[0] =
static_cast< char >(val);
438 }
else if (val < 0x800) {
439 buf[0] =
static_cast< char >((val >> 6) | 0xC0);
440 buf[1] =
static_cast< char >((val & 0x3F) | 0x80);
442 }
else if (val < 0x10000) {
443 buf[0] =
static_cast< char >((val >> 12) | 0xE0);
444 buf[1] =
static_cast< char >(((val >> 6) & 0x3F) | 0x80);
445 buf[2] =
static_cast< char >((val & 0x3F) | 0x80);
448 buf[0] =
static_cast< char >((val >> 18) | 0xF0);
449 buf[1] =
static_cast< char >(((val >> 12) & 0x3F) | 0x80);
450 buf[2] =
static_cast< char >(((val >> 6) & 0x3F) | 0x80);
451 buf[3] =
static_cast< char >((val & 0x3F) | 0x80);
458 char const * inBegin;
459 sal_Int32
const inLength;
460 char const * outBegin;
461 sal_Int32
const outLength;
463 static EntityRef
const refs[] = {
464 { RTL_CONSTASCII_STRINGPARAM(
"amp;"),
465 RTL_CONSTASCII_STRINGPARAM(
"&") },
466 { RTL_CONSTASCII_STRINGPARAM(
"lt;"),
467 RTL_CONSTASCII_STRINGPARAM(
"<") },
468 { RTL_CONSTASCII_STRINGPARAM(
"gt;"),
469 RTL_CONSTASCII_STRINGPARAM(
">") },
470 { RTL_CONSTASCII_STRINGPARAM(
"apos;"),
471 RTL_CONSTASCII_STRINGPARAM(
"'") },
472 { RTL_CONSTASCII_STRINGPARAM(
"quot;"),
473 RTL_CONSTASCII_STRINGPARAM(
"\"") } };
474 for (
const auto & ref : refs) {
475 if (rtl_str_shortenedCompare_WithLength(
481 pad_.
add(ref.outBegin, ref.outLength);
485 throw css::uno::RuntimeException(
486 "unknown entity reference in " +
fileUrl_ );
491 char const * begin,
char const * end,
bool fullyNormalize)
494 if (fullyNormalize) {
502 enum Space { SPACE_NONE, SPACE_SPAN, SPACE_BREAK };
505 Space space = SPACE_NONE;
568 if (
peek() ==
'\x0A') {
590 assert(nsId !=
nullptr && localName);
591 char const * nameBegin =
pos_;
592 char const * nameColon =
nullptr;
594 throw css::uno::RuntimeException(
597 char const * nameEnd =
pos_;
598 NamespaceList::size_type inheritedNamespaces =
namespaces_.size();
599 bool hasDefaultNs =
false;
603 char const *
p =
pos_;
605 if (
peek() ==
'/' ||
peek() ==
'>') {
609 throw css::uno::RuntimeException(
610 "missing whitespace before attribute in " +
fileUrl_ );
612 char const * attrNameBegin =
pos_;
613 char const * attrNameColon =
nullptr;
615 throw css::uno::RuntimeException(
616 "bad attribute name in " +
fileUrl_ );
618 char const * attrNameEnd =
pos_;
621 throw css::uno::RuntimeException(
626 if (del !=
'\'' && del !=
'"') {
627 throw css::uno::RuntimeException(
628 "bad attribute value in " +
fileUrl_ );
630 char const * valueBegin =
pos_;
631 sal_Int32
i = rtl_str_indexOfChar_WithLength(
pos_,
end_ -
pos_, del);
633 throw css::uno::RuntimeException(
634 "unterminated attribute value in " +
fileUrl_ );
636 char const * valueEnd =
pos_ +
i;
638 if (attrNameColon ==
nullptr &&
639 Span(attrNameBegin, attrNameEnd - attrNameBegin) ==
"xmlns")
643 }
else if (attrNameColon !=
nullptr &&
644 Span(attrNameBegin, attrNameColon - attrNameBegin) ==
648 Span(attrNameColon + 1, attrNameEnd - (attrNameColon + 1)),
652 attrNameBegin, attrNameEnd, attrNameColon, valueBegin,
656 if (!hasDefaultNs && !
elements_.empty()) {
657 defaultNsId =
elements_.top().defaultNamespaceId;
667 throw css::uno::RuntimeException(
673 Span(nameBegin, nameEnd - nameBegin), inheritedNamespaces,
675 if (nameColon ==
nullptr) {
677 *localName =
Span(nameBegin, nameEnd - nameBegin);
680 *localName =
Span(nameColon + 1, nameEnd - (nameColon + 1));
687 throw css::uno::RuntimeException(
688 "spurious end tag in " +
fileUrl_ );
690 char const * nameBegin =
pos_;
691 char const * nameColon =
nullptr;
695 throw css::uno::RuntimeException(
701 throw css::uno::RuntimeException(
718 auto i =
static_cast<const char*
>(std::memchr(
pos_,
'<',
end_ -
pos_));
720 throw css::uno::RuntimeException(
749 throw css::uno::RuntimeException(
754 if (
peek() !=
'\x0A') {
805 char const * flowBegin =
pos_;
806 char const * flowEnd =
pos_;
807 enum Space { SPACE_START, SPACE_NONE, SPACE_SPAN, SPACE_BREAK };
810 Space space = SPACE_START;
814 throw css::uno::RuntimeException(
853 pad_.
add(flowBegin, flowEnd - flowBegin);
883 pad_.
add(flowBegin, flowEnd - flowBegin);
898 pad_.
add(flowBegin, flowEnd - flowBegin);
908 pad_.
add(flowBegin, flowEnd - flowBegin);
923 pad_.
add(flowBegin, flowEnd - flowBegin);
936 assert(
pos <= INT_MAX);
937 return static_cast< int >(
pos);
SAL_DLLPRIVATE void addEphemeral(char const *begin, sal_Int32 length)
void add(char const *begin, sal_Int32 length)
SAL_DLLPRIVATE int scanNamespaceIri(char const *begin, char const *end)
int getNamespaceId(Span const &prefix) const
int registerNamespaceIri(Span const &iri)
SAL_DLLPRIVATE Result handleStartTag(int *nsId, Span *localName)
SAL_DLLPRIVATE Result handleRawText(Span *text)
Result nextItem(Text reportText, Span *data, int *nsId)
oslFileHandle fileHandle_
SAL_DLLPRIVATE Result handleNormalizedText(Span *text)
SAL_DLLPRIVATE void skipSpace()
static SAL_DLLPRIVATE int toNamespaceId(NamespaceIris::size_type pos)
SAL_DLLPRIVATE void handleElementEnd()
bool nextAttribute(int *nsId, Span *localName)
Attributes::iterator currentAttribute_
SAL_DLLPRIVATE char peek() const
SAL_DLLPRIVATE void skipDocumentTypeDeclaration()
SAL_DLLPRIVATE bool skipComment()
SAL_DLLPRIVATE char read()
SAL_DLLPRIVATE void skipProcessingInstruction()
SAL_DLLPRIVATE Result handleSkippedText(Span *data, int *nsId)
NamespaceList namespaces_
Span getAttributeValue(bool fullyNormalize)
SAL_DLLPRIVATE char const * handleReference(char const *position, char const *end)
NamespaceIris namespaceIris_
SAL_DLLPRIVATE void normalizeLineEnds(Span const &text)
SAL_DLLPRIVATE Span handleAttributeValue(char const *begin, char const *end, bool fullyNormalize)
SAL_DLLPRIVATE Span scanCdataSection()
SAL_DLLPRIVATE Result handleEndTag()
SAL_DLLPRIVATE bool scanName(char const **nameColon)
XmlReader(OUString fileUrl)
#define SAL_WARN(area, stream)
enumrange< T >::Iterator begin(enumrange< T >)