LibreOffice Module l10ntools (master) 1
xmlparse.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19#include <sal/config.h>
20
21#include <cassert>
22#include <stdio.h>
23#include <string_view>
24
25#include <helper.hxx>
26#include <utility>
27#include <xmlparse.hxx>
28#include <fstream>
29#include <iostream>
30#include <osl/file.hxx>
31#include <osl/process.h>
32#include <o3tl/string_view.hxx>
33#include <rtl/ustring.hxx>
34#include <rtl/strbuf.hxx>
35#include <unicode/regex.h>
36
37using namespace osl;
38
39constexpr OStringLiteral XML_LANG = "xml-lang";
40
41
42
43
45 : m_pParent( pPar )
46{
47 if ( m_pParent )
48 m_pParent->AddChild( this );
49}
50
51
53 : XMLNode(rObj),
54 m_pParent(rObj.m_pParent)
55{
56}
57
59{
60 if(this != &rObj)
61 {
63 }
64 return *this;
65}
66
67
68
69
71{
72 if( m_pChildList )
73 {
75 }
76}
77
79: XMLChildNode( rObj )
80{
81 if( !rObj.m_pChildList )
82 return;
83
84 m_pChildList.reset( new XMLChildNodeList );
85 for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ )
86 {
87 XMLChildNode* pNode = (*rObj.m_pChildList)[ i ];
88 if( pNode != nullptr)
89 {
90 switch(pNode->GetNodeType())
91 {
93 AddChild( new XMLElement( *static_cast<XMLElement* >(pNode) ) ); break;
95 AddChild( new XMLData ( *static_cast<XMLData* > (pNode) ) ); break;
97 AddChild( new XMLComment( *static_cast<XMLComment* >(pNode) ) ); break;
99 AddChild( new XMLDefault( *static_cast<XMLDefault* >(pNode) ) ); break;
100 default: fprintf(stdout,"XMLParentNode::XMLParentNode( const XMLParentNode& rObj) strange obj");
101 }
102 }
103 }
104}
105
107{
108 if(this!=&rObj)
109 {
111 if( m_pChildList )
112 {
114 }
115 if( rObj.m_pChildList )
116 {
117 m_pChildList.reset( new XMLChildNodeList );
118 for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ )
119 AddChild( (*rObj.m_pChildList)[ i ] );
120 }
121 else
122 m_pChildList.reset();
123
124 }
125 return *this;
126}
128{
129 if ( !m_pChildList )
130 m_pChildList.reset( new XMLChildNodeList );
131 m_pChildList->push_back( pChild );
132}
133
135{
136 if ( m_pChildList )
137 {
138 for ( size_t i = 0; i < m_pChildList->size(); i++ )
139 delete (*m_pChildList)[ i ];
140 m_pChildList->clear();
141 }
142}
143
144
145
146
147void XMLFile::Write( OString const &aFilename )
148{
149 std::ofstream s(
150 aFilename.getStr(), std::ios_base::out | std::ios_base::trunc);
151 if (!s.is_open())
152 {
153 std::cerr
154 << "Error: helpex cannot create file " << aFilename
155 << '\n';
156 std::exit(EXIT_FAILURE);
157 }
158 Write(s);
159 s.close();
160}
161
162void XMLFile::Write( std::ofstream &rStream , XMLNode *pCur )
163{
164 if ( !pCur )
165 Write( rStream, this );
166 else {
167 switch( pCur->GetNodeType())
168 {
170 {
171 if( GetChildList())
172 for ( size_t i = 0; i < GetChildList()->size(); i++ )
173 Write( rStream, (*GetChildList())[ i ] );
174 }
175 break;
177 {
178 XMLElement *pElement = static_cast<XMLElement*>(pCur);
179 rStream << "<";
180 rStream << pElement->GetName();
181 if ( pElement->GetAttributeList())
182 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
183 {
184 rStream << " ";
185 OString sData( (*pElement->GetAttributeList())[ j ]->GetName() );
186 rStream << XMLUtil::QuotHTML( sData );
187 rStream << "=\"";
188 sData = (*pElement->GetAttributeList())[ j ]->GetValue();
189 rStream << XMLUtil::QuotHTML( sData );
190 rStream << "\"";
191 }
192 if ( !pElement->GetChildList())
193 rStream << "/>";
194 else
195 {
196 rStream << ">";
197 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
198 Write( rStream, (*pElement->GetChildList())[ k ] );
199 rStream << "</";
200 rStream << pElement->GetName();
201 rStream << ">";
202 }
203 }
204 break;
206 {
207 OString sData( static_cast<const XMLData*>(pCur)->GetData());
208 rStream << XMLUtil::QuotHTML( sData );
209 }
210 break;
212 {
213 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
214 rStream << "<!--";
215 rStream << pComment->GetComment();
216 rStream << "-->";
217 }
218 break;
220 {
221 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
222 rStream << pDefault->GetDefault();
223 }
224 break;
225 }
226 }
227}
228
229void XMLFile::Print( XMLNode *pCur, sal_uInt16 nLevel )
230{
231 if ( !pCur )
232 Print( this );
233 else
234 {
235 switch( pCur->GetNodeType())
236 {
238 {
239 if( GetChildList())
240 for ( size_t i = 0; i < GetChildList()->size(); i++ )
241 Print( (*GetChildList())[ i ] );
242 }
243 break;
245 {
246 XMLElement *pElement = static_cast<XMLElement*>(pCur);
247
248 fprintf( stdout, "<%s", pElement->GetName().getStr());
249 if ( pElement->GetAttributeList())
250 {
251 for (size_t j = 0; j < pElement->GetAttributeList()->size(); ++j)
252 {
253 const OString aAttrName((*pElement->GetAttributeList())[j]->GetName());
254 if (aAttrName != XML_LANG)
255 {
256 fprintf( stdout, " %s=\"%s\"",
257 aAttrName.getStr(),
258 (*pElement->GetAttributeList())[ j ]->GetValue().getStr());
259 }
260 }
261 }
262 if ( !pElement->GetChildList())
263 fprintf( stdout, "/>" );
264 else
265 {
266 fprintf( stdout, ">" );
267 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
268 Print( (*pElement->GetChildList())[ k ], nLevel + 1 );
269 fprintf( stdout, "</%s>", pElement->GetName().getStr());
270 }
271 }
272 break;
274 {
275 const XMLData *pData = static_cast<const XMLData*>(pCur);
276 fprintf( stdout, "%s", pData->GetData().getStr());
277 }
278 break;
280 {
281 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
282 fprintf( stdout, "<!--%s-->", pComment->GetComment().getStr());
283 }
284 break;
286 {
287 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
288 fprintf( stdout, "%s", pDefault->GetDefault().getStr());
289 }
290 break;
291 }
292 }
293}
295{
296 if( m_pXMLStrings )
297 {
298 for (auto const& pos : *m_pXMLStrings)
299 {
300 delete pos.second; // Check and delete content also ?
301 }
302 }
303}
304
305XMLFile::XMLFile( OString _sFileName ) // the file name, empty if created from memory stream
306 : XMLParentNode( nullptr )
307 , m_sFileName(std::move( _sFileName ))
308{
309 m_aNodes_localize.emplace( OString("bookmark") , true );
310 m_aNodes_localize.emplace( OString("variable") , true );
311 m_aNodes_localize.emplace( OString("paragraph") , true );
312 m_aNodes_localize.emplace( OString("h1") , true );
313 m_aNodes_localize.emplace( OString("h2") , true );
314 m_aNodes_localize.emplace( OString("h3") , true );
315 m_aNodes_localize.emplace( OString("h4") , true );
316 m_aNodes_localize.emplace( OString("h5") , true );
317 m_aNodes_localize.emplace( OString("h6") , true );
318 m_aNodes_localize.emplace( OString("note") , true );
319 m_aNodes_localize.emplace( OString("tip") , true );
320 m_aNodes_localize.emplace( OString("warning") , true );
321 m_aNodes_localize.emplace( OString("alt") , true );
322 m_aNodes_localize.emplace( OString("caption") , true );
323 m_aNodes_localize.emplace( OString("title") , true );
324 m_aNodes_localize.emplace( OString("link") , true );
325}
326
328{
329 m_pXMLStrings.reset( new XMLHashMap );
330 SearchL10NElements( this );
331}
332
334{
335 OString sId, sLanguage("en-US");
336 LangHashMap* pElem;
337
338 if( pElement->GetAttributeList() != nullptr )
339 {
340 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
341 {
342 const OString sTempStr((*pElement->GetAttributeList())[ j ]->GetName());
343 // Get the "id" Attribute
344 if (sTempStr == "id")
345 {
346 sId = (*pElement->GetAttributeList())[ j ]->GetValue();
347 }
348 // Get the "xml-lang" Attribute
349 if (sTempStr == XML_LANG)
350 {
351 sLanguage = (*pElement->GetAttributeList())[j]->GetValue();
352 }
353
354 }
355 }
356 else
357 {
358 fprintf(stdout,"XMLFile::InsertL10NElement: No AttributeList found");
359 fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
360 Print( pElement );
361 fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
362 }
363
364 XMLHashMap::iterator pos = m_pXMLStrings->find( sId );
365 if( pos == m_pXMLStrings->end() ) // No instance, create new one
366 {
367 pElem = new LangHashMap;
368 (*pElem)[ sLanguage ]=pElement;
369 m_pXMLStrings->emplace( sId , pElem );
370 m_vOrder.push_back( sId );
371 }
372 else // Already there
373 {
374 pElem=pos->second;
375 if ( pElem->count(sLanguage) )
376 {
377 fprintf(stdout,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", sId.getStr(), sLanguage.getStr(), m_sFileName.getStr() );
378 exit( -1 );
379 }
380 (*pElem)[ sLanguage ]=pElement;
381 }
382}
383
385 : XMLParentNode( rObj )
386 , m_sFileName( rObj.m_sFileName )
387{
388 if( this != &rObj )
389 {
391 m_vOrder = rObj.m_vOrder;
392 }
393}
394
396{
397 if( this == &rObj )
398 return *this;
399
401
403 m_vOrder = rObj.m_vOrder;
404
405 m_pXMLStrings.reset();
406
407 if( rObj.m_pXMLStrings )
408 {
409 m_pXMLStrings.reset( new XMLHashMap );
410 for (auto const& pos : *rObj.m_pXMLStrings)
411 {
412 LangHashMap* pElem=pos.second;
413 LangHashMap* pNewelem = new LangHashMap;
414 for (auto const& pos2 : *pElem)
415 {
416 (*pNewelem)[ pos2.first ] = new XMLElement( *pos2.second );
417 }
418 (*m_pXMLStrings)[ pos.first ] = pNewelem;
419 }
420 }
421 return *this;
422}
423
425{
426 if ( !pCur )
427 SearchL10NElements( this );
428 else
429 {
430 switch( pCur->GetNodeType())
431 {
433 {
434 if( GetChildList())
435 {
436 for ( size_t i = 0; i < GetChildList()->size(); i++ )
437 {
438 XMLChildNode* pElement = (*GetChildList())[ i ];
439 if( pElement->GetNodeType() == XMLNodeType::ELEMENT )
440 SearchL10NElements( pElement );
441 }
442 }
443 }
444 break;
446 {
447 bool bInsert = true;
448 XMLElement *pElement = static_cast<XMLElement*>(pCur);
449 const OString sName(pElement->GetName().toAsciiLowerCase());
450 if ( pElement->GetAttributeList())
451 {
452 for ( size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j )
453 {
454 if ((*pElement->GetAttributeList())[j]->GetName() == "localize")
455 {
456 bInsert=false;
457 break;
458 }
459 }
460 }
461
462 if ( bInsert && ( m_aNodes_localize.find( sName ) != m_aNodes_localize.end() ) )
463 InsertL10NElement(pElement);
464 else if ( bInsert && pElement->GetChildList() )
465 {
466 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
467 SearchL10NElements( (*pElement->GetChildList())[ k ] );
468 }
469 }
470 break;
471 default:
472 break;
473 }
474 }
475}
476
478{
479 static bool bStatusExport = true;
480
481 if ( !pCur )
482 CheckExportStatus( this );
483 else {
484 switch( pCur->GetNodeType())
485 {
487 {
488 if( GetChildList())
489 {
490 for ( size_t i = 0; i < GetChildList()->size(); i++ )
491 {
492 XMLChildNode* pElement = (*GetChildList())[ i ];
493 if( pElement->GetNodeType() == XMLNodeType::ELEMENT ) CheckExportStatus( pElement );//, i);
494 }
495 }
496 }
497 break;
499 {
500 XMLElement *pElement = static_cast<XMLElement*>(pCur);
501 if (pElement->GetName().equalsIgnoreAsciiCase("TOPIC"))
502 {
503 if ( pElement->GetAttributeList())
504 {
505 for (size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt; ++j)
506 {
507 const OString tmpStr((*pElement->GetAttributeList())[j]->GetName());
508 if (tmpStr.equalsIgnoreAsciiCase("STATUS"))
509 {
510 const OString tmpStrVal((*pElement->GetAttributeList())[j]->GetValue());
511 if (!tmpStrVal.equalsIgnoreAsciiCase("PUBLISH") &&
512 !tmpStrVal.equalsIgnoreAsciiCase("DEPRECATED"))
513 {
514 bStatusExport = false;
515 }
516 }
517
518 }
519 }
520 }
521 else if ( pElement->GetChildList() )
522 {
523 for (size_t k = 0; k < pElement->GetChildList()->size(); ++k)
524 CheckExportStatus( (*pElement->GetChildList())[k] );
525 }
526 }
527 break;
528 default:
529 break;
530 }
531 }
532 return bStatusExport;
533}
534
536 OString _sName, // the element name
537 XMLParentNode *pParent // parent node of this element
538)
539 : XMLParentNode( pParent )
540 , m_sElementName(std::move( _sName ))
541{
542}
543
545 : XMLParentNode( rObj )
546 , m_sElementName( rObj.m_sElementName )
547{
548 if ( rObj.m_pAttributes )
549 {
550 m_pAttributes.reset( new XMLAttributeList );
551 for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ )
552 AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() );
553 }
554}
555
557{
558 if( this !=& rObj )
559 {
562
563 if ( m_pAttributes )
564 {
565 for ( size_t i = 0; i < m_pAttributes->size(); i++ )
566 delete (*m_pAttributes)[ i ];
567 m_pAttributes.reset();
568 }
569 if ( rObj.m_pAttributes )
570 {
571 m_pAttributes.reset( new XMLAttributeList );
572 for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ )
573 AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() );
574 }
575 }
576 return *this;
577}
578
579void XMLElement::AddAttribute( const OString &rAttribute, const OString &rValue )
580{
581 if ( !m_pAttributes )
582 m_pAttributes.reset( new XMLAttributeList );
583 m_pAttributes->push_back( new XMLAttribute( rAttribute, rValue ) );
584}
585
586void XMLElement::ChangeLanguageTag( const OString &rValue )
587{
588 if ( m_pAttributes )
589 {
590 bool bWasSet = false;
591 for (size_t i = 0; i < m_pAttributes->size(); ++i)
592 {
593 if ((*m_pAttributes)[ i ]->GetName() == XML_LANG)
594 {
595 (*m_pAttributes)[ i ]->setValue(rValue);
596 bWasSet = true;
597 }
598 }
599
600 if (!bWasSet)
601 AddAttribute(XML_LANG, rValue);
602 }
603 XMLChildNodeList* pCList = GetChildList();
604
605 if( !pCList )
606 return;
607
608 for ( size_t i = 0; i < pCList->size(); i++ )
609 {
610 XMLChildNode* pNode = (*pCList)[ i ];
611 if( pNode && pNode->GetNodeType() == XMLNodeType::ELEMENT )
612 {
613 XMLElement* pElem = static_cast< XMLElement* >(pNode);
614 pElem->ChangeLanguageTag( rValue );
615 pElem = nullptr;
616 pNode = nullptr;
617 }
618 }
619 pCList = nullptr;
620}
621
623{
624 if ( m_pAttributes )
625 {
626 for ( size_t i = 0; i < m_pAttributes->size(); i++ )
627 delete (*m_pAttributes)[ i ];
628 }
629}
630
632{
633 OStringBuffer sBuffer;
634 Print(this,sBuffer,true);
635 return sBuffer.makeStringAndClear();
636}
637
638void XMLElement::Print(XMLNode *pCur, OStringBuffer& rBuffer, bool bRootelement ) const
639{
640 if( pCur )
641 {
642 if( bRootelement )
643 {
644 XMLElement *pElement = static_cast<XMLElement*>(pCur);
645 if ( pElement->GetAttributeList())
646 {
647 if ( pElement->GetChildList())
648 {
649 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
650 {
651 XMLChildNode* pTmp = (*pElement->GetChildList())[ k ];
652 Print( pTmp, rBuffer , false);
653 }
654 }
655 }
656 }
657 else
658 {
659 switch( pCur->GetNodeType())
660 {
662 {
663 XMLElement *pElement = static_cast<XMLElement*>(pCur);
664
665 if( !pElement->GetName().equalsIgnoreAsciiCase("comment") )
666 {
667 rBuffer.append( "<" );
668 rBuffer.append( pElement->GetName() );
669 if ( pElement->GetAttributeList())
670 {
671 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
672 {
673 const OString aAttrName( (*pElement->GetAttributeList())[ j ]->GetName() );
674 if (aAttrName != XML_LANG)
675 {
676 rBuffer.append(
677 " " + aAttrName + "=\"" +
678 (*pElement->GetAttributeList())[ j ]->GetValue() + "\"" );
679 }
680 }
681 }
682 if ( !pElement->GetChildList())
683 rBuffer.append( "/>" );
684 else
685 {
686 rBuffer.append( ">" );
687 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
688 {
689 XMLChildNode* pTmp = (*pElement->GetChildList())[ k ];
690 Print( pTmp, rBuffer , false);
691 }
692 rBuffer.append( "</" + pElement->GetName() + ">" );
693 }
694 }
695 }
696 break;
698 {
699 const XMLData *pData = static_cast<const XMLData*>(pCur);
700 rBuffer.append( pData->GetData() );
701 }
702 break;
704 {
705 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
706 rBuffer.append( "<!--" + pComment->GetComment() + "-->" );
707 }
708 break;
710 {
711 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
712 rBuffer.append( pDefault->GetDefault() );
713 }
714 break;
715 default:
716 break;
717 }
718 }
719 }
720 else
721 {
722 fprintf(stdout,"\n#+------Error: NULL Pointer in XMLELement::Print------+#\n");
723 return;
724 }
725}
726
727
728
729
730namespace
731{
732
733OUString lcl_pathnameToAbsoluteUrl(std::string_view rPathname)
734{
735 OUString sPath = OStringToOUString(rPathname, RTL_TEXTENCODING_UTF8 );
736 OUString sUrl;
737 if (osl::FileBase::getFileURLFromSystemPath(sPath, sUrl)
738 != osl::FileBase::E_None)
739 {
740 std::cerr << "Error: Cannot convert input pathname to URL\n";
741 std::exit(EXIT_FAILURE);
742 }
743 OUString sCwd;
744 if (osl_getProcessWorkingDir(&sCwd.pData) != osl_Process_E_None)
745 {
746 std::cerr << "Error: Cannot determine cwd\n";
747 std::exit(EXIT_FAILURE);
748 }
749 if (osl::FileBase::getAbsoluteFileURL(sCwd, sUrl, sUrl)
750 != osl::FileBase::E_None)
751 {
752 std::cerr << "Error: Cannot convert input URL to absolute URL\n";
753 std::exit(EXIT_FAILURE);
754 }
755 return sUrl;
756}
757}
758
759
761 : m_pCurNode(nullptr)
762 , m_pCurData(nullptr)
763{
764 m_aParser = XML_ParserCreate( nullptr );
765 XML_SetUserData( m_aParser, this );
766 XML_SetElementHandler( m_aParser, StartElementHandler, EndElementHandler );
767 XML_SetCharacterDataHandler( m_aParser, CharacterDataHandler );
768 XML_SetCommentHandler( m_aParser, CommentHandler );
769 XML_SetDefaultHandler( m_aParser, DefaultHandler );
770}
771
773{
774 XML_ParserFree( m_aParser );
775}
776
778 void *userData, const XML_Char *name, const XML_Char **atts )
779{
780 static_cast<SimpleXMLParser *>(userData)->StartElement( name, atts );
781}
782
784 void *userData, const XML_Char * /*name*/ )
785{
786 static_cast<SimpleXMLParser *>(userData)->EndElement();
787}
788
790 void *userData, const XML_Char *s, int len )
791{
792 static_cast<SimpleXMLParser *>(userData)->CharacterData( s, len );
793}
794
796 void *userData, const XML_Char *data )
797{
798 static_cast<SimpleXMLParser *>(userData)->Comment( data );
799}
800
802 void *userData, const XML_Char *s, int len )
803{
804 static_cast<SimpleXMLParser *>(userData)->Default( s, len );
805}
806
808 const XML_Char *name, const XML_Char **atts )
809{
810 XMLElement *pElement = new XMLElement( OString(name), m_pCurNode );
811 m_pCurNode = pElement;
812 m_pCurData = nullptr;
813
814 int i = 0;
815 while( atts[i] )
816 {
817 pElement->AddAttribute( atts[ i ], atts[ i + 1 ] );
818 i += 2;
819 }
820}
821
823{
825 m_pCurData = nullptr;
826}
827
828void SimpleXMLParser::CharacterData( const XML_Char *s, int len )
829{
830 if ( !m_pCurData )
831 {
832 OString x( s, len );
834 }
835 else
836 {
837 OString x( s, len );
839
840 }
841}
842
843void SimpleXMLParser::Comment( const XML_Char *data )
844{
845 m_pCurData = nullptr;
846 new XMLComment( OString( data ), m_pCurNode );
847}
848
849void SimpleXMLParser::Default( const XML_Char *s, int len )
850{
851 m_pCurData = nullptr;
852 new XMLDefault(OString( s, len ), m_pCurNode );
853}
854
855bool SimpleXMLParser::Execute( const OString &rFileName, XMLFile* pXMLFile )
856{
857 m_aErrorInformation.m_eCode = XML_ERROR_NONE;
860 m_aErrorInformation.m_sMessage = "ERROR: Unable to open file ";
861 m_aErrorInformation.m_sMessage += rFileName;
862
863 OUString aFileURL(lcl_pathnameToAbsoluteUrl(rFileName));
864
865 oslFileHandle h;
866 if (osl_openFile(aFileURL.pData, &h, osl_File_OpenFlag_Read)
867 != osl_File_E_None)
868 {
869 return false;
870 }
871
872 sal_uInt64 s;
873 oslFileError e = osl_getFileSize(h, &s);
874 void * p = nullptr;
875 if (e == osl_File_E_None)
876 {
877 e = osl_mapFile(h, &p, s, 0, 0);
878 }
879 if (e != osl_File_E_None)
880 {
881 osl_closeFile(h);
882 return false;
883 }
884
885 pXMLFile->SetName( rFileName );
886
887 m_pCurNode = pXMLFile;
888 m_pCurData = nullptr;
889
890 m_aErrorInformation.m_eCode = XML_ERROR_NONE;
893 if ( !pXMLFile->GetName().isEmpty())
894 {
895 m_aErrorInformation.m_sMessage = "File " + pXMLFile->GetName() + " parsed successfully";
896 }
897 else
898 m_aErrorInformation.m_sMessage = "XML-File parsed successfully";
899
900 bool result = XML_Parse(m_aParser, static_cast< char * >(p), s, true);
901 if (!result)
902 {
903 m_aErrorInformation.m_eCode = XML_GetErrorCode( m_aParser );
904 m_aErrorInformation.m_nLine = XML_GetErrorLineNumber( m_aParser );
905 m_aErrorInformation.m_nColumn = XML_GetErrorColumnNumber( m_aParser );
906
908 if ( !pXMLFile->GetName().isEmpty())
910 else
911 m_aErrorInformation.m_sMessage += "XML-File (";
912
914 OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nLine)) + "," +
915 OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nColumn)) + "): ";
916
918 {
919 case XML_ERROR_NO_MEMORY:
920 m_aErrorInformation.m_sMessage += "No memory";
921 break;
922 case XML_ERROR_SYNTAX:
924 break;
925 case XML_ERROR_NO_ELEMENTS:
926 m_aErrorInformation.m_sMessage += "No elements";
927 break;
928 case XML_ERROR_INVALID_TOKEN:
929 m_aErrorInformation.m_sMessage += "Invalid token";
930 break;
931 case XML_ERROR_UNCLOSED_TOKEN:
932 m_aErrorInformation.m_sMessage += "Unclosed token";
933 break;
934 case XML_ERROR_PARTIAL_CHAR:
935 m_aErrorInformation.m_sMessage += "Partial char";
936 break;
937 case XML_ERROR_TAG_MISMATCH:
938 m_aErrorInformation.m_sMessage += "Tag mismatch";
939 break;
940 case XML_ERROR_DUPLICATE_ATTRIBUTE:
941 m_aErrorInformation.m_sMessage += "Duplicated attribute";
942 break;
943 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
944 m_aErrorInformation.m_sMessage += "Junk after doc element";
945 break;
946 case XML_ERROR_PARAM_ENTITY_REF:
947 m_aErrorInformation.m_sMessage += "Param entity ref";
948 break;
949 case XML_ERROR_UNDEFINED_ENTITY:
950 m_aErrorInformation.m_sMessage += "Undefined entity";
951 break;
952 case XML_ERROR_RECURSIVE_ENTITY_REF:
953 m_aErrorInformation.m_sMessage += "Recursive entity ref";
954 break;
955 case XML_ERROR_ASYNC_ENTITY:
956 m_aErrorInformation.m_sMessage += "Async_entity";
957 break;
958 case XML_ERROR_BAD_CHAR_REF:
959 m_aErrorInformation.m_sMessage += "Bad char ref";
960 break;
961 case XML_ERROR_BINARY_ENTITY_REF:
962 m_aErrorInformation.m_sMessage += "Binary entity";
963 break;
964 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
965 m_aErrorInformation.m_sMessage += "Attribute external entity ref";
966 break;
967 case XML_ERROR_MISPLACED_XML_PI:
968 m_aErrorInformation.m_sMessage += "Misplaced xml pi";
969 break;
970 case XML_ERROR_UNKNOWN_ENCODING:
971 m_aErrorInformation.m_sMessage += "Unknown encoding";
972 break;
973 case XML_ERROR_INCORRECT_ENCODING:
974 m_aErrorInformation.m_sMessage += "Incorrect encoding";
975 break;
976 case XML_ERROR_UNCLOSED_CDATA_SECTION:
977 m_aErrorInformation.m_sMessage += "Unclosed cdata section";
978 break;
979 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
980 m_aErrorInformation.m_sMessage += "External entity handling";
981 break;
982 case XML_ERROR_NOT_STANDALONE:
983 m_aErrorInformation.m_sMessage += "Not standalone";
984 break;
985 case XML_ERROR_NONE:
986 break;
987 default:
988 break;
989 }
990 }
991
992 osl_unmapMappedFile(h, p, s);
993 osl_closeFile(h);
994
995 return result;
996}
997
998namespace
999{
1000
1001icu::UnicodeString lcl_QuotRange(
1002 const icu::UnicodeString& rString, const sal_Int32 nStart,
1003 const sal_Int32 nEnd, bool bInsideTag = false )
1004{
1005 icu::UnicodeString sReturn;
1006 assert( nStart < nEnd );
1007 assert( nStart >= 0 );
1008 assert( nEnd <= rString.length() );
1009 for (sal_Int32 i = nStart; i < nEnd; ++i)
1010 {
1011 switch (rString[i])
1012 {
1013 case '<':
1014 sReturn.append("&lt;");
1015 break;
1016 case '>':
1017 sReturn.append("&gt;");
1018 break;
1019 case '"':
1020 if( !bInsideTag )
1021 sReturn.append("&quot;");
1022 else
1023 sReturn.append(rString[i]);
1024 break;
1025 case '&':
1026 if (rString.startsWith("&amp;", i, 5))
1027 sReturn.append('&');
1028 else
1029 sReturn.append("&amp;");
1030 break;
1031 default:
1032 sReturn.append(rString[i]);
1033 break;
1034 }
1035 }
1036 return sReturn;
1037}
1038
1039bool lcl_isTag( const icu::UnicodeString& rString )
1040{
1041 static const int nSize = 20;
1042 static const icu::UnicodeString vTags[nSize] = {
1043 "ahelp", "link", "item", "emph", "defaultinline",
1044 "switchinline", "caseinline", "variable",
1045 "bookmark_value", "image", "object",
1046 "embedvar", "alt", "sup", "sub",
1047 "menuitem", "keycode", "input", "literal", "widget"
1048 };
1049
1050 for( int nIndex = 0; nIndex < nSize; ++nIndex )
1051 {
1052 if( rString.startsWith("<" + vTags[nIndex]) ||
1053 rString == "</" + vTags[nIndex] + ">" )
1054 return true;
1055 }
1056
1057 return rString == "<br/>" || rString =="<help-id-missing/>";
1058}
1059
1060}
1061
1062OString XMLUtil::QuotHTML( const OString &rString )
1063{
1064 if( o3tl::trim(rString).empty() )
1065 return rString;
1066 UErrorCode nIcuErr = U_ZERO_ERROR;
1067 static const sal_uInt32 nSearchFlags =
1068 UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
1069 static const icu::UnicodeString sSearchPat( "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>" );
1070
1071 const OUString sOUSource = OStringToOUString(rString, RTL_TEXTENCODING_UTF8);
1072 icu::UnicodeString sSource(
1073 reinterpret_cast<const UChar*>(
1074 sOUSource.getStr()), sOUSource.getLength() );
1075
1076 icu::RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
1077 aRegexMatcher.reset( sSource );
1078
1079 icu::UnicodeString sReturn;
1080 int32_t nEndPos = 0;
1081 int32_t nStartPos = 0;
1082 while( aRegexMatcher.find(nStartPos, nIcuErr) && U_SUCCESS(nIcuErr) )
1083 {
1084 nStartPos = aRegexMatcher.start(nIcuErr);
1085 if ( nEndPos < nStartPos )
1086 sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos));
1087 nEndPos = aRegexMatcher.end(nIcuErr);
1088 icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr);
1089 if( lcl_isTag(sMatch) )
1090 {
1091 sReturn.append("<");
1092 sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-1, true));
1093 sReturn.append(">");
1094 }
1095 else
1096 sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos));
1097 nStartPos = nEndPos;
1098 }
1099 if( nEndPos < sSource.length() )
1100 sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length()));
1101 sReturn.append('\0');
1102 return
1104 reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer()),
1105 RTL_TEXTENCODING_UTF8);
1106}
1107
1108/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
validating xml parser, creates a document tree with xml nodes
Definition: xmlparse.hxx:321
void Default(const XML_Char *s, int len)
Definition: xmlparse.cxx:849
XMLData * m_pCurData
Definition: xmlparse.hxx:327
XML_Parser m_aParser
Definition: xmlparse.hxx:323
static void StartElementHandler(void *userData, const XML_Char *name, const XML_Char **atts)
Definition: xmlparse.cxx:777
static void CommentHandler(void *userData, const XML_Char *data)
Definition: xmlparse.cxx:795
SimpleXMLParser()
creates a new parser
Definition: xmlparse.cxx:760
XMLParentNode * m_pCurNode
Definition: xmlparse.hxx:326
static void CharacterDataHandler(void *userData, const XML_Char *s, int len)
Definition: xmlparse.cxx:789
bool Execute(const OString &rFileName, XMLFile *pXMLFile)
parse a file, return false on critical errors
Definition: xmlparse.cxx:855
static void DefaultHandler(void *userData, const XML_Char *s, int len)
Definition: xmlparse.cxx:801
void CharacterData(const XML_Char *s, int len)
Definition: xmlparse.cxx:828
static void EndElementHandler(void *userData, const XML_Char *name)
Definition: xmlparse.cxx:783
void EndElement()
Definition: xmlparse.cxx:822
void Comment(const XML_Char *data)
Definition: xmlparse.cxx:843
void StartElement(const XML_Char *name, const XML_Char **atts)
Definition: xmlparse.cxx:807
XMLError m_aErrorInformation
Definition: xmlparse.hxx:324
Holds data of Attributes.
Definition: xmlparse.hxx:50
Virtual base to handle different kinds of child nodes.
Definition: xmlparse.hxx:93
XMLChildNode & operator=(const XMLChildNode &rObj)
Definition: xmlparse.cxx:58
XMLParentNode * m_pParent
Definition: xmlparse.hxx:95
XMLChildNode(XMLParentNode *pPar)
Definition: xmlparse.cxx:44
XMLParentNode * GetParent()
returns the parent of this node
Definition: xmlparse.hxx:103
Holds comments.
Definition: xmlparse.hxx:265
const OString & GetComment() const
returns the comment
Definition: xmlparse.hxx:282
Holds character data.
Definition: xmlparse.hxx:240
void AddData(const OString &rData)
adds new character data to the existing one
Definition: xmlparse.hxx:259
Holds additional file content like those for which no handler exists.
Definition: xmlparse.hxx:288
const OString & GetDefault() const
returns the comment
Definition: xmlparse.hxx:305
Hold information of an element node.
Definition: xmlparse.hxx:202
void Print(XMLNode *pCur, OStringBuffer &rBuffer, bool bRootelement) const
Definition: xmlparse.cxx:638
OString m_sElementName
Definition: xmlparse.hxx:204
const OString & GetName() const
returns element name
Definition: xmlparse.hxx:223
void AddAttribute(const OString &rAttribute, const OString &rValue)
adds a new attribute to this element, typically used by parser
Definition: xmlparse.cxx:579
XMLAttributeList * GetAttributeList()
returns list of attributes of this element
Definition: xmlparse.hxx:226
XMLElement & operator=(const XMLElement &rObj)
Definition: xmlparse.cxx:556
void ChangeLanguageTag(const OString &rValue)
Definition: xmlparse.cxx:586
std::unique_ptr< XMLAttributeList > m_pAttributes
Definition: xmlparse.hxx:205
OString ToOString()
Return a Unicode String representation of this object.
Definition: xmlparse.cxx:631
virtual ~XMLElement() override
Definition: xmlparse.cxx:622
XMLElement(OString sName, XMLParentNode *pParent)
create an element node
Definition: xmlparse.cxx:535
Holds information of a XML file, is root node of tree.
Definition: xmlparse.hxx:148
std::unique_ptr< XMLHashMap > m_pXMLStrings
Definition: xmlparse.hxx:185
void Write(OString const &rFilename)
Definition: xmlparse.cxx:147
OString m_sFileName
Definition: xmlparse.hxx:180
const OString & GetName() const
returns file name
Definition: xmlparse.hxx:171
std::vector< OString > m_vOrder
Definition: xmlparse.hxx:187
std::unordered_map< OString, bool > m_aNodes_localize
Mapping XML tag names <-> have localizable strings.
Definition: xmlparse.hxx:183
virtual ~XMLFile() override
Definition: xmlparse.cxx:294
bool CheckExportStatus(XMLChildNode *pCur=nullptr)
Definition: xmlparse.cxx:477
void Extract()
Definition: xmlparse.cxx:327
void SearchL10NElements(XMLChildNode *pCur)
Definition: xmlparse.cxx:424
void SetName(const OString &rFilename)
Definition: xmlparse.hxx:172
XMLFile & operator=(const XMLFile &rObj)
Definition: xmlparse.cxx:395
void InsertL10NElement(XMLElement *pElement)
Definition: xmlparse.cxx:333
void Print(XMLNode *pCur, sal_uInt16 nLevel=0)
Definition: xmlparse.cxx:229
XMLFile(OString sFileName)
Definition: xmlparse.cxx:305
Virtual base to handle different kinds of XML nodes.
Definition: xmlparse.hxx:75
virtual XMLNodeType GetNodeType() const =0
Virtual base to handle different kinds of parent nodes.
Definition: xmlparse.hxx:114
void RemoveAndDeleteAllChildren()
Definition: xmlparse.cxx:134
void AddChild(XMLChildNode *pChild)
adds a new child
Definition: xmlparse.cxx:127
XMLParentNode(XMLParentNode *pPar)
Definition: xmlparse.hxx:119
virtual ~XMLParentNode() override
Definition: xmlparse.cxx:70
XMLChildNodeList * GetChildList()
returns child list of this node
Definition: xmlparse.hxx:129
std::unique_ptr< XMLChildNodeList > m_pChildList
Definition: xmlparse.hxx:116
XMLParentNode & operator=(const XMLParentNode &rObj)
Definition: xmlparse.cxx:106
static OString QuotHTML(const OString &rString)
Quot the XML characters.
Definition: xmlparse.cxx:1062
float x
OUString sName
const char * name
sal_Int32 nIndex
void * p
std::unique_ptr< sal_Int32[]> pData
size
int i
OString UnQuotHTML(std::string_view rString)
Convert XML entity references to single characters.
Definition: helper.cxx:102
std::basic_string_view< charT, traits > trim(std::basic_string_view< charT, traits > str)
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
sal_Int32 h
const char GetValue[]
XML_Error m_eCode
the error code
Definition: xmlparse.hxx:311
std::size_t m_nLine
error line number
Definition: xmlparse.hxx:312
OString m_sMessage
readable error message
Definition: xmlparse.hxx:314
std::size_t m_nColumn
error column number
Definition: xmlparse.hxx:313
sal_uInt16 sal_Unicode
Any result
OUString sId
size_t pos
constexpr OStringLiteral XML_LANG
Definition: xmlparse.cxx:39
std::unordered_map< OString, LangHashMap * > XMLHashMap
Mapping XML Element string identifier <-> Language Map.
Definition: xmlparse.hxx:143
std::vector< XMLChildNode * > XMLChildNodeList
Definition: xmlparse.hxx:106
std::vector< XMLAttribute * > XMLAttributeList
Definition: xmlparse.hxx:70
std::unordered_map< OString, XMLElement * > LangHashMap
Mapping numeric Language code <-> XML Element.
Definition: xmlparse.hxx:140
static OString sLanguage
Definition: xrmmerge.cxx:40