LibreOffice Module sax (master) 1
fastserializer.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include "fastserializer.hxx"
21
22#include <com/sun/star/xml/sax/FastTokenHandler.hpp>
23#include <rtl/math.h>
24#include <sal/log.hxx>
27
28#include <cassert>
29#include <optional>
30#include <string.h>
31#include <string_view>
32#include <utility>
33
34#if OSL_DEBUG_LEVEL > 0
35#include <iostream>
36#include <set>
37#endif
38
39using ::std::vector;
40using ::com::sun::star::uno::Sequence;
41using ::com::sun::star::io::XOutputStream;
42
43#define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
44#define NAMESPACE(x) (x >> 16)
45#define TOKEN(x) (x & 0xffff)
46// number of characters without terminating 0
47#define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1)
48
49const char sClosingBracket[] = ">";
50const char sSlashAndClosingBracket[] = "/>";
51constexpr OStringLiteral sColon = ":";
52const char sOpeningBracket[] = "<";
53const char sOpeningBracketAndSlash[] = "</";
54const char sQuote[] = "\"";
55const char sEqualSignAndQuote[] = "=\"";
56const char sSpace[] = " ";
57const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
58
59namespace sax_fastparser {
60 FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
61 : mbMarkStackEmpty(true)
62 , mpDoubleStr(nullptr)
63 , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
64 , mbXescape(true)
65 {
66 rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity);
67 mxFastTokenHandler = css::xml::sax::FastTokenHandler::create(
68 ::comphelper::getProcessComponentContext());
69 assert(xOutputStream.is()); // cannot do anything without that
71 }
72
74 {
75 rtl_string_release(mpDoubleStr);
76 }
77
79 {
81 }
82
83 void FastSaxSerializer::write( double value )
84 {
85 rtl_math_doubleToString(
86 &mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G,
87 RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', nullptr,
88 0, true);
89
90 write(mpDoubleStr->buffer, mpDoubleStr->length);
91 // and "clear" the string
92 mpDoubleStr->length = 0;
93 mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE;
94 }
95
96 void FastSaxSerializer::write( std::u16string_view sOutput, bool bEscape )
97 {
98 write( OUStringToOString(sOutput, RTL_TEXTENCODING_UTF8), bEscape );
99
100 }
101
102 void FastSaxSerializer::write( std::string_view sOutput, bool bEscape )
103 {
104 write( sOutput.data(), sOutput.length(), bEscape );
105 }
106
121 template<typename Int> static std::optional<std::pair<unsigned, Int>> invalidChar(
122 char const * string, Int length, Int index )
123 {
124 assert(index < length);
125 auto const c = string[index];
126
127 if (static_cast<unsigned char>(c) >= 0x20 && c != '\xEF')
128 return {};
129
130 switch (c)
131 {
132 case 0x09:
133 case 0x0a:
134 case 0x0d:
135 return {};
136 case '\xEF': // U+FFFE, U+FFFF:
137 if (length - index >= 3 && string[index + 1] == '\xBF') {
138 switch (string[index + 2]) {
139 case '\xBE':
140 return std::pair(0xFFFE, 3);
141 case '\xBF':
142 return std::pair(0xFFFF, 3);
143 }
144 }
145 return {};
146 }
147 return std::pair(static_cast<unsigned char>(c), 1);
148 }
149
150 static bool isHexDigit( char c )
151 {
152 return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f');
153 }
154
155 void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape )
156 {
157 if (nLen == -1)
158 nLen = pStr ? strlen(pStr) : 0;
159
160 if (!bEscape)
161 {
162 writeBytes( pStr, nLen );
163 return;
164 }
165
166 bool bGood = true;
167 const sal_Int32 kXescapeLen = 7;
168 char bufXescape[kXescapeLen+1];
169 sal_Int32 nNextXescape = 0;
170 for (sal_Int32 i = 0; i < nLen;)
171 {
172 char c = pStr[ i ];
173 switch( c )
174 {
175 case '<': writeBytes( "&lt;", 4 ); break;
176 case '>': writeBytes( "&gt;", 4 ); break;
177 case '&': writeBytes( "&amp;", 5 ); break;
178 case '\'': writeBytes( "&apos;", 6 ); break;
179 case '"': writeBytes( "&quot;", 6 ); break;
180 case '\t':
181#if 0
182 // Seems OOXML prefers the _xHHHH_ escape over the
183 // entity in *some* cases, apparently in attribute
184 // values but not in element data.
185 // Would need to distinguish at a higher level.
186 if (mbXescape)
187 {
188 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
189 static_cast<unsigned int>(static_cast<unsigned char>(c)));
190 writeBytes( bufXescape, kXescapeLen);
191 }
192 else
193#endif
194 {
195 writeBytes( "&#9;", 4 );
196 }
197 break;
198 case '\n':
199#if 0
200 if (mbXescape)
201 {
202 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
203 static_cast<unsigned int>(static_cast<unsigned char>(c)));
204 writeBytes( bufXescape, kXescapeLen);
205 }
206 else
207#endif
208 {
209 writeBytes( "&#10;", 5 );
210 }
211 break;
212 case '\r':
213#if 0
214 if (mbXescape)
215 {
216 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
217 static_cast<unsigned int>(static_cast<unsigned char>(c)));
218 writeBytes( bufXescape, kXescapeLen);
219 }
220 else
221#endif
222 {
223 writeBytes( "&#13;", 5 );
224 }
225 break;
226 default:
227 if (mbXescape)
228 {
229 char c1, c2, c3, c4;
230 // Escape characters not valid in XML 1.0 as
231 // _xHHHH_. A literal "_xHHHH_" has to be
232 // escaped as _x005F_xHHHH_ (effectively
233 // escaping the leading '_').
234 // See ECMA-376-1:2016 page 3736,
235 // 22.4.2.4 bstr (Basic String)
236 // for reference.
237 if (c == '_' && i >= nNextXescape && i <= nLen - kXescapeLen &&
238 pStr[i+6] == '_' &&
239 ((pStr[i+1] | 0x20) == 'x') &&
240 isHexDigit( c1 = pStr[i+2] ) &&
241 isHexDigit( c2 = pStr[i+3] ) &&
242 isHexDigit( c3 = pStr[i+4] ) &&
243 isHexDigit( c4 = pStr[i+5] ))
244 {
245 // OOXML has the odd habit to write some
246 // names using this that when re-saving
247 // should *not* be escaped, specifically
248 // _x0020_ for blanks in w:xpath values.
249 if (!(c1 == '0' && c2 == '0' && c3 == '2' && c4 == '0'))
250 {
251 // When encountering "_x005F_xHHHH_"
252 // assume that is an already escaped
253 // sequence that was not unescaped and
254 // shall be written as is, to not end
255 // up with "_x005F_x005F_xHHHH_" and
256 // repeated...
257 if (c1 == '0' && c2 == '0' && c3 == '5' && (c4 | 0x20) == 'f' &&
258 i + kXescapeLen <= nLen - 6 &&
259 pStr[i+kXescapeLen+5] == '_' &&
260 ((pStr[i+kXescapeLen+0] | 0x20) == 'x') &&
261 isHexDigit( pStr[i+kXescapeLen+1] ) &&
262 isHexDigit( pStr[i+kXescapeLen+2] ) &&
263 isHexDigit( pStr[i+kXescapeLen+3] ) &&
264 isHexDigit( pStr[i+kXescapeLen+4] ))
265 {
266 writeBytes( &c, 1 );
267 // Remember this fake escapement.
268 nNextXescape = i + kXescapeLen + 6;
269 }
270 else
271 {
272 writeBytes( "_x005F_", kXescapeLen);
273 // Remember this escapement so in
274 // _xHHHH_xHHHH_ only the first '_'
275 // is escaped.
276 nNextXescape = i + kXescapeLen;
277 }
278 break;
279 }
280 }
281 if (auto const inv = invalidChar(pStr, nLen, i))
282 {
283 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
284 inv->first);
285 writeBytes( bufXescape, kXescapeLen);
286 i += inv->second;
287 continue;
288 }
289 }
290#if OSL_DEBUG_LEVEL > 0
291 else
292 {
293 if (bGood && invalidChar(pStr, nLen, i))
294 {
295 bGood = false;
296 // The SAL_WARN() for the single character is
297 // issued in writeBytes(), just gather for the
298 // SAL_WARN_IF() below.
299 }
300 }
301#endif
302 writeBytes( &c, 1 );
303 break;
304 }
305 ++i;
306 }
307 SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
308 }
309
311 {
312 assert(mbMarkStackEmpty && maMarkStack.empty());
314 }
315
316 void FastSaxSerializer::writeId( ::sal_Int32 nElement )
317 {
318 if( HAS_NAMESPACE( nElement ) ) {
319 auto const Namespace(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
320 assert(Namespace.hasElements());
321 writeBytes(Namespace);
322 writeBytes(sColon.getStr(), sColon.getLength());
323 auto const Element(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
324 assert(Element.hasElements());
325 writeBytes(Element);
326 } else {
327 auto const Element(mxFastTokenHandler->getUTF8Identifier(nElement));
328 assert(Element.hasElements());
329 writeBytes(Element);
330 }
331 }
332
333#ifdef DBG_UTIL
334 OString FastSaxSerializer::getId( ::sal_Int32 nElement )
335 {
336 if (HAS_NAMESPACE(nElement)) {
337 Sequence<sal_Int8> const ns(
338 mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
339 Sequence<sal_Int8> const name(
340 mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
341 return std::string_view(
342 reinterpret_cast<char const*>(ns.getConstArray()), ns.getLength())
343 + sColon
344 + std::string_view(
345 reinterpret_cast<char const*>(name.getConstArray()), name.getLength());
346 } else {
347 Sequence<sal_Int8> const name(
348 mxFastTokenHandler->getUTF8Identifier(nElement));
349 return OString(reinterpret_cast<char const*>(name.getConstArray()), name.getLength());
350 }
351 }
352#endif
353
354 void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList )
355 {
356 if ( !mbMarkStackEmpty )
357 {
359 maMarkStack.top()->setCurrentElement( Element );
360 }
361
362#ifdef DBG_UTIL
364 m_DebugStartedElements.push(Element);
365 else
366 maMarkStack.top()->m_DebugStartedElements.push_back(Element);
367#endif
368
370
371 writeId(Element);
372 if (pAttrList)
373 writeFastAttributeList(*pAttrList);
374 else
376
378 }
379
380 void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
381 {
382#ifdef DBG_UTIL
383 // Well-formedness constraint: Element Type Match
385 {
386 assert(!m_DebugStartedElements.empty());
387 assert(Element == m_DebugStartedElements.top());
389 }
390 else
391 {
392 if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
393 {
394 // Sort is always well-formed fragment
395 assert(!maMarkStack.top()->m_DebugStartedElements.empty());
396 }
397 if (maMarkStack.top()->m_DebugStartedElements.empty())
398 {
399 maMarkStack.top()->m_DebugEndedElements.push_back(Element);
400 }
401 else
402 {
403 assert(Element == maMarkStack.top()->m_DebugStartedElements.back());
404 maMarkStack.top()->m_DebugStartedElements.pop_back();
405 }
406 }
407#endif
408
410
411 writeId(Element);
412
414 }
415
416 void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList )
417 {
418 if ( !mbMarkStackEmpty )
419 {
421 maMarkStack.top()->setCurrentElement( Element );
422 }
423
425
426 writeId(Element);
427 if (pAttrList)
428 writeFastAttributeList(*pAttrList);
429 else
431
433 }
434
435 css::uno::Reference< css::io::XOutputStream > const & FastSaxSerializer::getOutputStream() const
436 {
438 }
439
441 {
442#ifdef DBG_UTIL
443 ::std::set<OString> DebugAttributes;
444#endif
445 for (const TokenValue & rTokenValue : maTokenValues)
446 {
448
449 sal_Int32 nToken = rTokenValue.nToken;
451
452#ifdef DBG_UTIL
453 // Well-formedness constraint: Unique Att Spec
454 OString const nameId(getId(nToken));
455 assert(DebugAttributes.find(nameId) == DebugAttributes.end());
456 DebugAttributes.insert(nameId);
457#endif
458
460
461 write(rTokenValue.pValue, -1, true);
462
464 }
465 maTokenValues.clear();
466 }
467
469 {
470#ifdef DBG_UTIL
471 ::std::set<OString> DebugAttributes;
472#endif
473 const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens();
474 for (size_t j = 0; j < Tokens.size(); j++)
475 {
477
478 sal_Int32 nToken = Tokens[j];
480
481#ifdef DBG_UTIL
482 // Well-formedness constraint: Unique Att Spec
483 OString const nameId(getId(nToken));
484 SAL_WARN_IF(DebugAttributes.find(nameId) != DebugAttributes.end(), "sax", "Duplicate attribute: " << nameId );
485 assert(DebugAttributes.find(nameId) == DebugAttributes.end());
486 DebugAttributes.insert(nameId);
487#endif
488
490
491 const char* pAttributeValue = rAttrList.getFastAttributeValue(j);
492
493 // tdf#117274 don't escape the special VML shape type id "#_x0000_t202"
494 bool bEscape = !(pAttributeValue
495 && *pAttributeValue != '\0'
496 && (*pAttributeValue == '#'
497 ? strncmp(pAttributeValue, "#_x0000_t", 9) == 0
498 : strncmp(pAttributeValue, "_x0000_t", 8) == 0));
499
500 write(pAttributeValue, rAttrList.AttributeValueLength(j), bEscape);
501
503 }
504 }
505
506 void FastSaxSerializer::mark(sal_Int32 const nTag, const Int32Sequence& rOrder)
507 {
508 if (rOrder.hasElements())
509 {
510 auto pSort = std::make_shared<ForSort>(nTag, rOrder);
511 maMarkStack.push( pSort );
513 }
514 else
515 {
516 auto pMerge = std::make_shared<ForMerge>(nTag);
517 maMarkStack.push( pMerge );
519 }
520 mbMarkStackEmpty = false;
521 }
522
523#ifdef DBG_UTIL
525 std::deque<sal_Int32> & rLeftEndedElements,
526 std::deque<sal_Int32> & rLeftStartedElements,
527 std::deque<sal_Int32> & rRightEndedElements,
528 std::deque<sal_Int32> & rRightStartedElements)
529 {
530 while (!rRightEndedElements.empty())
531 {
532 if (rLeftStartedElements.empty())
533 {
534 rLeftEndedElements.push_back(rRightEndedElements.front());
535 }
536 else
537 {
538 assert(rLeftStartedElements.back() == rRightEndedElements.front());
539 rLeftStartedElements.pop_back();
540 }
541 rRightEndedElements.pop_front();
542 }
543 while (!rRightStartedElements.empty())
544 {
545 rLeftStartedElements.push_back(rRightStartedElements.front());
546 rRightStartedElements.pop_front();
547 }
548 }
549
551 std::deque<sal_Int32> & rLeftEndedElements,
552 std::deque<sal_Int32> & rLeftStartedElements,
553 std::deque<sal_Int32> & rRightEndedElements,
554 std::deque<sal_Int32> & rRightStartedElements)
555 {
556 while (!rLeftStartedElements.empty())
557 {
558 if (rRightEndedElements.empty())
559 {
560 rRightStartedElements.push_front(rLeftStartedElements.back());
561 }
562 else
563 {
564 assert(rRightEndedElements.front() == rLeftStartedElements.back());
565 rRightEndedElements.pop_front();
566 }
567 rLeftStartedElements.pop_back();
568 }
569 while (!rLeftEndedElements.empty())
570 {
571 rRightEndedElements.push_front(rLeftEndedElements.back());
572 rLeftEndedElements.pop_back();
573 }
574 }
575#endif
576
578 sal_Int32 const nTag, sax_fastparser::MergeMarks const eMergeType)
579 {
580 SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
581 assert(!mbMarkStackEmpty); // should never happen
582 if ( mbMarkStackEmpty )
583 return;
584
585 assert(maMarkStack.top()->m_Tag == nTag && "mark/merge tag mismatch!");
586 (void) nTag;
587#ifdef DBG_UTIL
588 if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
589 {
590 // Sort is always well-formed fragment
591 assert(maMarkStack.top()->m_DebugStartedElements.empty());
592 assert(maMarkStack.top()->m_DebugEndedElements.empty());
593 }
595 maMarkStack.top()->m_DebugEndedElements,
596 maMarkStack.top()->m_DebugStartedElements,
597 maMarkStack.top()->m_DebugPostponedEndedElements,
598 maMarkStack.top()->m_DebugPostponedStartedElements);
599#endif
600
601 // flush, so that we get everything in getData()
603
604 if (maMarkStack.size() == 1)
605 {
606#ifdef DBG_UTIL
607 while (!maMarkStack.top()->m_DebugEndedElements.empty())
608 {
609 assert(maMarkStack.top()->m_DebugEndedElements.front() == m_DebugStartedElements.top());
610 maMarkStack.top()->m_DebugEndedElements.pop_front();
612 }
613 while (!maMarkStack.top()->m_DebugStartedElements.empty())
614 {
615 m_DebugStartedElements.push(maMarkStack.top()->m_DebugStartedElements.front());
616 maMarkStack.top()->m_DebugStartedElements.pop_front();
617 }
618#endif
619 Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
620 maMarkStack.pop();
621 mbMarkStackEmpty = true;
623 maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() );
624 return;
625 }
626
627#ifdef DBG_UTIL
628 ::std::deque<sal_Int32> topDebugStartedElements(maMarkStack.top()->m_DebugStartedElements);
629 ::std::deque<sal_Int32> topDebugEndedElements(maMarkStack.top()->m_DebugEndedElements);
630#endif
631 const Int8Sequence aMerge( maMarkStack.top()->getData() );
632 maMarkStack.pop();
633#ifdef DBG_UTIL
634 switch (eMergeType)
635 {
638 maMarkStack.top()->m_DebugEndedElements,
639 maMarkStack.top()->m_DebugStartedElements,
640 topDebugEndedElements,
641 topDebugStartedElements);
642 break;
644 if (dynamic_cast<ForSort*>(maMarkStack.top().get())) // argh...
645 {
647 maMarkStack.top()->m_DebugEndedElements,
648 maMarkStack.top()->m_DebugStartedElements,
649 topDebugEndedElements,
650 topDebugStartedElements);
651 }
652 else
653 {
655 topDebugEndedElements,
656 topDebugStartedElements,
657 maMarkStack.top()->m_DebugEndedElements,
658 maMarkStack.top()->m_DebugStartedElements);
659 }
660 break;
663 maMarkStack.top()->m_DebugPostponedEndedElements,
664 maMarkStack.top()->m_DebugPostponedStartedElements,
665 topDebugEndedElements,
666 topDebugStartedElements);
667 break;
668 }
669#endif
670 if (maMarkStack.empty())
671 {
672 mbMarkStackEmpty = true;
674 }
675 else
676 {
678 }
679
680 switch ( eMergeType )
681 {
682 case MergeMarks::APPEND: maMarkStack.top()->append( aMerge ); break;
683 case MergeMarks::PREPEND: maMarkStack.top()->prepend( aMerge ); break;
684 case MergeMarks::POSTPONE: maMarkStack.top()->postpone( aMerge ); break;
685 }
686 }
687
689 {
690 maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() );
691 }
692
693 void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
694 {
695#if OSL_DEBUG_LEVEL > 0
696 {
697 bool bGood = true;
698 for (size_t i=0; i < nLen;)
699 {
700 if (auto const inv = invalidChar(pStr, nLen, i))
701 {
702 bGood = false;
703 SAL_WARN("sax", "FastSaxSerializer::writeBytes - illegal XML character 0x" <<
704 std::hex << inv->first);
705 i += inv->second;
706 continue;
707 }
708 ++i;
709 }
710 SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
711 }
712#endif
713 maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
714 }
715
717 {
718 merge( maData, maPostponed, true );
719 maPostponed.realloc( 0 );
720
721 return maData;
722 }
723
724#if OSL_DEBUG_LEVEL > 0
726 {
727 std::cerr << "Data: ";
728 for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ )
729 {
730 std::cerr << maData[i];
731 }
732
733 std::cerr << "\nPostponed: ";
734 for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ )
735 {
736 std::cerr << maPostponed[i];
737 }
738
739 std::cerr << "\n";
740 }
741#endif
742
744 {
745 merge( maData, rWhat, false );
746 }
747
748 void FastSaxSerializer::ForMerge::append( const css::uno::Sequence<sal_Int8> &rWhat )
749 {
750 merge( maData, rWhat, true );
751 }
752
754 {
755 merge( maPostponed, rWhat, true );
756 }
757
758 void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend )
759 {
760 sal_Int32 nMergeLen = rMerge.getLength();
761 if ( nMergeLen <= 0 )
762 return;
763
764 sal_Int32 nTopLen = rTop.getLength();
765
766 rTop.realloc( nTopLen + nMergeLen );
767 if ( bAppend )
768 {
769 // append the rMerge to the rTop
770 memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
771 }
772 else
773 {
774 // prepend the rMerge to the rTop
775 memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
776 memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
777 }
778 }
779
781 {
783 }
784
786 {
787 const auto & rOrder = maOrder;
788 if( std::find( rOrder.begin(), rOrder.end(), nElement ) != rOrder.end() )
789 {
790 mnCurrentElement = nElement;
791 if ( maData.find( nElement ) == maData.end() )
792 maData[ nElement ] = Int8Sequence();
793 }
794 }
795
797 {
798 append( rWhat );
799 }
800
801 void FastSaxSerializer::ForSort::append( const css::uno::Sequence<sal_Int8> &rWhat )
802 {
803 merge( maData[mnCurrentElement], rWhat, true );
804 }
805
807 {
808 // Clear the ForMerge data to avoid duplicate items
809 resetData();
810
811 // Sort it all
812 std::map< sal_Int32, Int8Sequence >::iterator iter;
813 for ( const auto nIndex : std::as_const(maOrder) )
814 {
815 iter = maData.find( nIndex );
816 if ( iter != maData.end() )
817 ForMerge::append( iter->second );
818 }
819 }
820
822 {
823 sort( );
824 return ForMerge::getData();
825 }
826
827#if OSL_DEBUG_LEVEL > 0
829 {
830 for ( const auto& [rElement, rData] : maData )
831 {
832 std::cerr << "pair: " << rElement;
833 for ( sal_Int32 i=0, len=rData.getLength(); i < len; ++i )
834 std::cerr << rData[i];
835 std::cerr << "\n";
836 }
837
838 sort( );
840 }
841#endif
842
843} // namespace sax_fastparser
844
845/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
void setOutput(std::shared_ptr< ForMergeBase > pForMerge)
void writeBytes(const sal_Int8 *pStr, sal_Int32 nLen)
cache string and if limit is hit, flush
const css::uno::Reference< css::io::XOutputStream > & getOutputStream() const
void flush()
immediately write buffer into mxOutputStream and clear
void setOutputStream(const css::uno::Reference< css::io::XOutputStream > &xOutputStream)
sal_Int32 AttributeValueLength(size_t i) const
const std::vector< sal_Int32 > & getFastAttributeTokens() const
const char * getFastAttributeValue(size_t nIndex) const
virtual void append(const css::uno::Sequence< sal_Int8 > &rWhat) override
void postpone(const Int8Sequence &rWhat)
virtual void prepend(const Int8Sequence &rWhat)
static void merge(Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend)
virtual Int8Sequence & getData() override
virtual void prepend(const Int8Sequence &rWhat) override
void setCurrentElement(::sal_Int32 nToken) override
virtual void append(const css::uno::Sequence< sal_Int8 > &rWhat) override
std::stack< std::shared_ptr< ForMerge > > maMarkStack
OString getId(::sal_Int32 Element)
std::stack< sal_Int32 > m_DebugStartedElements
void endDocument()
called by the parser after the last XML element of a stream is processed.
void writeFastAttributeList(FastAttributeList const &rAttrList)
void startFastElement(::sal_Int32 Element, FastAttributeList const *pAttrList=nullptr)
receives notification of the beginning of an element.
void mergeTopMarks(sal_Int32 nTag, sax_fastparser::MergeMarks eMergeType)
Merge 2 topmost marks.
FastSaxSerializer(const css::uno::Reference< css::io::XOutputStream > &xOutputStream)
void singleFastElement(::sal_Int32 Element, FastAttributeList const *pAttrList=nullptr)
receives notification of the beginning of a single element.
css::uno::Sequence< ::sal_Int8 > Int8Sequence
void endFastElement(::sal_Int32 Element)
receives notification of the end of a known element.
CachedOutputStream maCachedOutputStream
Helper class to cache data and write in chunks to XOutputStream or ForMerge::append.
css::uno::Reference< css::xml::sax::XFastTokenHandler > mxFastTokenHandler
bool mbXescape
whether to escape invalid XML characters as xHHHH in write(const char*,sal_Int32,true)
css::uno::Sequence< ::sal_Int32 > Int32Sequence
css::uno::Reference< css::io::XOutputStream > const & getOutputStream() const
void writeBytes(const css::uno::Sequence< ::sal_Int8 > &aData)
Forward the call to the output stream, or write to the stack.
void writeId(::sal_Int32 Element)
void mark(sal_Int32 nTag, const Int32Sequence &rOrder)
From now on, don't write directly to the stream, but to top of a stack.
void startDocument()
called by the parser when parsing of an XML stream is started.
Any value
const char sClosingBracket[]
const char sSlashAndClosingBracket[]
#define NAMESPACE(x)
const char sOpeningBracket[]
#define TOKEN(x)
const char sOpeningBracketAndSlash[]
const char sXmlHeader[]
#define N_CHARS(string)
#define HAS_NAMESPACE(x)
const char sQuote[]
constexpr OStringLiteral sColon
const char sSpace[]
const char sEqualSignAndQuote[]
const char * name
sal_Int32 nIndex
Sequence< sal_Int8 > aSeq
#define SAL_WARN_IF(condition, area, stream)
#define SAL_WARN(area, stream)
std::vector< sal_Int8, boost::noinit_adaptor< std::allocator< sal_Int8 > > > maData
ns
int i
index
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
static bool isHexDigit(char c)
static void lcl_DebugMergeAppend(std::deque< sal_Int32 > &rLeftEndedElements, std::deque< sal_Int32 > &rLeftStartedElements, std::deque< sal_Int32 > &rRightEndedElements, std::deque< sal_Int32 > &rRightStartedElements)
static void lcl_DebugMergePrepend(std::deque< sal_Int32 > &rLeftEndedElements, std::deque< sal_Int32 > &rLeftStartedElements, std::deque< sal_Int32 > &rRightEndedElements, std::deque< sal_Int32 > &rRightStartedElements)
static std::optional< std::pair< unsigned, Int > > invalidChar(char const *string, Int length, Int index)
Characters not allowed in XML 1.0 XML 1.1 would exclude only U+0000.
DefTokenId nToken
signed char sal_Int8