22#include <com/sun/star/xml/sax/FastTokenHandler.hpp>
34#if OSL_DEBUG_LEVEL > 0
40using ::com::sun::star::uno::Sequence;
41using ::com::sun::star::io::XOutputStream;
43#define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
44#define NAMESPACE(x) (x >> 16)
45#define TOKEN(x) (x & 0xffff)
47#define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1)
51constexpr OStringLiteral
sColon =
":";
57const char sXmlHeader[] =
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
61 : mbMarkStackEmpty(true)
62 , mpDoubleStr(nullptr)
63 , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
68 ::comphelper::getProcessComponentContext());
69 assert(xOutputStream.is());
85 rtl_math_doubleToString(
87 RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH(
"-x.E-xxx"),
'.',
nullptr,
104 write( sOutput.data(), sOutput.length(), bEscape );
121 template<
typename Int>
static std::optional<std::pair<unsigned, Int>>
invalidChar(
122 char const *
string, Int length, Int index )
125 auto const c =
string[
index];
127 if (
static_cast<unsigned char>(c) >= 0x20 && c !=
'\xEF')
138 switch (
string[
index + 2]) {
140 return std::pair(0xFFFE, 3);
142 return std::pair(0xFFFF, 3);
147 return std::pair(
static_cast<unsigned char>(c), 1);
152 return (
'0' <= c && c <=
'9') || (
'A' <= c && c <=
'F') || (
'a' <= c && c <=
'f');
158 nLen = pStr ? strlen(pStr) : 0;
167 const sal_Int32 kXescapeLen = 7;
168 char bufXescape[kXescapeLen+1];
169 sal_Int32 nNextXescape = 0;
170 for (sal_Int32
i = 0;
i < nLen;)
188 snprintf( bufXescape, kXescapeLen+1,
"_x%04x_",
189 static_cast<unsigned int>(
static_cast<unsigned char>(c)));
202 snprintf( bufXescape, kXescapeLen+1,
"_x%04x_",
203 static_cast<unsigned int>(
static_cast<unsigned char>(c)));
216 snprintf( bufXescape, kXescapeLen+1,
"_x%04x_",
217 static_cast<unsigned int>(
static_cast<unsigned char>(c)));
237 if (c ==
'_' &&
i >= nNextXescape &&
i <= nLen - kXescapeLen &&
239 ((pStr[
i+1] | 0x20) ==
'x') &&
249 if (!(c1 ==
'0' && c2 ==
'0' && c3 ==
'2' && c4 ==
'0'))
257 if (c1 ==
'0' && c2 ==
'0' && c3 ==
'5' && (c4 | 0x20) ==
'f' &&
258 i + kXescapeLen <= nLen - 6 &&
259 pStr[
i+kXescapeLen+5] ==
'_' &&
260 ((pStr[
i+kXescapeLen+0] | 0x20) ==
'x') &&
268 nNextXescape =
i + kXescapeLen + 6;
276 nNextXescape =
i + kXescapeLen;
283 snprintf( bufXescape, kXescapeLen+1,
"_x%04x_",
290#if OSL_DEBUG_LEVEL > 0
307 SAL_WARN_IF( !bGood && nLen > 1,
"sax",
"in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) <<
"'");
320 assert(Namespace.hasElements());
324 assert(Element.hasElements());
328 assert(Element.hasElements());
337 Sequence<sal_Int8>
const ns(
339 Sequence<sal_Int8>
const name(
341 return std::string_view(
342 reinterpret_cast<char const*
>(
ns.getConstArray()),
ns.getLength())
345 reinterpret_cast<char const*
>(
name.getConstArray()),
name.getLength());
347 Sequence<sal_Int8>
const name(
349 return OString(
reinterpret_cast<char const*
>(
name.getConstArray()),
name.getLength());
366 maMarkStack.top()->m_DebugStartedElements.push_back(Element);
395 assert(!
maMarkStack.top()->m_DebugStartedElements.empty());
397 if (
maMarkStack.top()->m_DebugStartedElements.empty())
399 maMarkStack.top()->m_DebugEndedElements.push_back(Element);
403 assert(Element ==
maMarkStack.top()->m_DebugStartedElements.back());
404 maMarkStack.top()->m_DebugStartedElements.pop_back();
443 ::std::set<OString> DebugAttributes;
449 sal_Int32
nToken = rTokenValue.nToken;
455 assert(DebugAttributes.find(nameId) == DebugAttributes.end());
456 DebugAttributes.insert(nameId);
461 write(rTokenValue.pValue, -1,
true);
471 ::std::set<OString> DebugAttributes;
474 for (
size_t j = 0; j < Tokens.size(); j++)
478 sal_Int32
nToken = Tokens[j];
484 SAL_WARN_IF(DebugAttributes.find(nameId) != DebugAttributes.end(),
"sax",
"Duplicate attribute: " << nameId );
485 assert(DebugAttributes.find(nameId) == DebugAttributes.end());
486 DebugAttributes.insert(nameId);
494 bool bEscape = !(pAttributeValue
495 && *pAttributeValue !=
'\0'
496 && (*pAttributeValue ==
'#'
497 ? strncmp(pAttributeValue,
"#_x0000_t", 9) == 0
498 : strncmp(pAttributeValue,
"_x0000_t", 8) == 0));
508 if (rOrder.hasElements())
510 auto pSort = std::make_shared<ForSort>(nTag, rOrder);
516 auto pMerge = std::make_shared<ForMerge>(nTag);
525 std::deque<sal_Int32> & rLeftEndedElements,
526 std::deque<sal_Int32> & rLeftStartedElements,
527 std::deque<sal_Int32> & rRightEndedElements,
528 std::deque<sal_Int32> & rRightStartedElements)
530 while (!rRightEndedElements.empty())
532 if (rLeftStartedElements.empty())
534 rLeftEndedElements.push_back(rRightEndedElements.front());
538 assert(rLeftStartedElements.back() == rRightEndedElements.front());
539 rLeftStartedElements.pop_back();
541 rRightEndedElements.pop_front();
543 while (!rRightStartedElements.empty())
545 rLeftStartedElements.push_back(rRightStartedElements.front());
546 rRightStartedElements.pop_front();
551 std::deque<sal_Int32> & rLeftEndedElements,
552 std::deque<sal_Int32> & rLeftStartedElements,
553 std::deque<sal_Int32> & rRightEndedElements,
554 std::deque<sal_Int32> & rRightStartedElements)
556 while (!rLeftStartedElements.empty())
558 if (rRightEndedElements.empty())
560 rRightStartedElements.push_front(rLeftStartedElements.back());
564 assert(rRightEndedElements.front() == rLeftStartedElements.back());
565 rRightEndedElements.pop_front();
567 rLeftStartedElements.pop_back();
569 while (!rLeftEndedElements.empty())
571 rRightEndedElements.push_front(rLeftEndedElements.back());
572 rLeftEndedElements.pop_back();
585 assert(
maMarkStack.top()->m_Tag == nTag &&
"mark/merge tag mismatch!");
591 assert(
maMarkStack.top()->m_DebugStartedElements.empty());
592 assert(
maMarkStack.top()->m_DebugEndedElements.empty());
598 maMarkStack.top()->m_DebugPostponedStartedElements);
607 while (!
maMarkStack.top()->m_DebugEndedElements.empty())
610 maMarkStack.top()->m_DebugEndedElements.pop_front();
613 while (!
maMarkStack.top()->m_DebugStartedElements.empty())
616 maMarkStack.top()->m_DebugStartedElements.pop_front();
628 ::std::deque<sal_Int32> topDebugStartedElements(
maMarkStack.top()->m_DebugStartedElements);
629 ::std::deque<sal_Int32> topDebugEndedElements(
maMarkStack.top()->m_DebugEndedElements);
640 topDebugEndedElements,
641 topDebugStartedElements);
649 topDebugEndedElements,
650 topDebugStartedElements);
655 topDebugEndedElements,
656 topDebugStartedElements,
664 maMarkStack.top()->m_DebugPostponedStartedElements,
665 topDebugEndedElements,
666 topDebugStartedElements);
680 switch ( eMergeType )
695#if OSL_DEBUG_LEVEL > 0
698 for (
size_t i=0;
i < nLen;)
703 SAL_WARN(
"sax",
"FastSaxSerializer::writeBytes - illegal XML character 0x" <<
704 std::hex << inv->first);
710 SAL_WARN_IF( !bGood && nLen > 1,
"sax",
"in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) <<
"'");
724#if OSL_DEBUG_LEVEL > 0
727 std::cerr <<
"Data: ";
728 for ( sal_Int32
i=0, len=
maData.getLength();
i < len;
i++ )
733 std::cerr <<
"\nPostponed: ";
734 for ( sal_Int32
i=0, len=maPostponed.getLength();
i < len;
i++ )
736 std::cerr << maPostponed[
i];
745 merge(
maData, rWhat,
false );
750 merge(
maData, rWhat,
true );
755 merge( maPostponed, rWhat,
true );
760 sal_Int32 nMergeLen = rMerge.getLength();
761 if ( nMergeLen <= 0 )
764 sal_Int32 nTopLen = rTop.getLength();
766 rTop.realloc( nTopLen + nMergeLen );
770 memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
775 memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
776 memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
787 const auto & rOrder = maOrder;
788 if( std::find( rOrder.begin(), rOrder.end(), nElement ) != rOrder.end() )
790 mnCurrentElement = nElement;
803 merge(
maData[mnCurrentElement], rWhat,
true );
812 std::map< sal_Int32, Int8Sequence >::iterator iter;
813 for (
const auto nIndex : std::as_const(maOrder) )
816 if ( iter !=
maData.end() )
827#if OSL_DEBUG_LEVEL > 0
830 for (
const auto& [rElement, rData] :
maData )
832 std::cerr <<
"pair: " << rElement;
833 for ( sal_Int32
i=0, len=rData.getLength();
i < len; ++
i )
834 std::cerr << rData[
i];
void setOutput(std::shared_ptr< ForMergeBase > pForMerge)
void writeBytes(const sal_Int8 *pStr, sal_Int32 nLen)
cache string and if limit is hit, flush
const css::uno::Reference< css::io::XOutputStream > & getOutputStream() const
void resetOutputToStream()
void flush()
immediately write buffer into mxOutputStream and clear
void setOutputStream(const css::uno::Reference< css::io::XOutputStream > &xOutputStream)
sal_Int32 AttributeValueLength(size_t i) const
const std::vector< sal_Int32 > & getFastAttributeTokens() const
const char * getFastAttributeValue(size_t nIndex) const
virtual Int8Sequence & getData() override
virtual void prepend(const Int8Sequence &rWhat) override
void setCurrentElement(::sal_Int32 nToken) override
virtual void append(const css::uno::Sequence< sal_Int8 > &rWhat) override
virtual void print() override
std::stack< std::shared_ptr< ForMerge > > maMarkStack
OString getId(::sal_Int32 Element)
std::stack< sal_Int32 > m_DebugStartedElements
void endDocument()
called by the parser after the last XML element of a stream is processed.
void writeFastAttributeList(FastAttributeList const &rAttrList)
void writeTokenValueList()
void startFastElement(::sal_Int32 Element, FastAttributeList const *pAttrList=nullptr)
receives notification of the beginning of an element.
void mergeTopMarks(sal_Int32 nTag, sax_fastparser::MergeMarks eMergeType)
Merge 2 topmost marks.
FastSaxSerializer(const css::uno::Reference< css::io::XOutputStream > &xOutputStream)
void singleFastElement(::sal_Int32 Element, FastAttributeList const *pAttrList=nullptr)
receives notification of the beginning of a single element.
css::uno::Sequence< ::sal_Int8 > Int8Sequence
void endFastElement(::sal_Int32 Element)
receives notification of the end of a known element.
CachedOutputStream maCachedOutputStream
Helper class to cache data and write in chunks to XOutputStream or ForMerge::append.
css::uno::Reference< css::xml::sax::XFastTokenHandler > mxFastTokenHandler
TokenValueList maTokenValues
bool mbXescape
whether to escape invalid XML characters as xHHHH in write(const char*,sal_Int32,true)
css::uno::Sequence< ::sal_Int32 > Int32Sequence
css::uno::Reference< css::io::XOutputStream > const & getOutputStream() const
void writeBytes(const css::uno::Sequence< ::sal_Int8 > &aData)
Forward the call to the output stream, or write to the stack.
void writeId(::sal_Int32 Element)
void mark(sal_Int32 nTag, const Int32Sequence &rOrder)
From now on, don't write directly to the stream, but to top of a stack.
void startDocument()
called by the parser when parsing of an XML stream is started.
sal_Int32 mnDoubleStrCapacity
const char sClosingBracket[]
const char sSlashAndClosingBracket[]
const char sOpeningBracket[]
const char sOpeningBracketAndSlash[]
constexpr OStringLiteral sColon
const char sEqualSignAndQuote[]
Sequence< sal_Int8 > aSeq
#define SAL_WARN_IF(condition, area, stream)
#define SAL_WARN(area, stream)
std::vector< sal_Int8, boost::noinit_adaptor< std::allocator< sal_Int8 > > > maData
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
static bool isHexDigit(char c)
static void lcl_DebugMergeAppend(std::deque< sal_Int32 > &rLeftEndedElements, std::deque< sal_Int32 > &rLeftStartedElements, std::deque< sal_Int32 > &rRightEndedElements, std::deque< sal_Int32 > &rRightStartedElements)
static void lcl_DebugMergePrepend(std::deque< sal_Int32 > &rLeftEndedElements, std::deque< sal_Int32 > &rLeftStartedElements, std::deque< sal_Int32 > &rRightEndedElements, std::deque< sal_Int32 > &rRightStartedElements)
static std::optional< std::pair< unsigned, Int > > invalidChar(char const *string, Int length, Int index)
Characters not allowed in XML 1.0 XML 1.1 would exclude only U+0000.