LibreOffice Module i18nlangtag (master) 1
languagetag.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
10#include <config_folders.h>
11#include <config_liblangtag.h>
12
16#include <rtl/ustrbuf.hxx>
17#include <rtl/bootstrap.hxx>
18#include <sal/log.hxx>
19#include <osl/file.hxx>
20#include <rtl/locale.h>
21#include <o3tl/string_view.hxx>
22#include <algorithm>
23#include <map>
24#include <mutex>
25#include <string_view>
26#include <unordered_set>
27
28//#define erDEBUG
29
30#if LIBLANGTAG_INLINE_FIX
31#define LT_HAVE_INLINE
32#endif
33#include <liblangtag/langtag.h>
34
35#ifdef ANDROID
36#include <osl/detail/android-bootstrap.h>
37#endif
38
39#ifdef EMSCRIPTEN
40#include <osl/detail/emscripten-bootstrap.h>
41#endif
42
43using namespace com::sun::star;
44
45namespace {
46
47// Helper to ensure lt_error_t is free'd
48struct myLtError
49{
50 lt_error_t* p;
51 myLtError() : p(nullptr) {}
52 ~myLtError() { if (p) lt_error_unref( p); }
53};
54
55}
56
57namespace {
58std::recursive_mutex& theMutex()
59{
60 static std::recursive_mutex SINGLETON;
61 return SINGLETON;
62}
63}
64
65typedef std::unordered_set< OUString > KnownTagSet;
66static const KnownTagSet & getKnowns()
67{
68 static KnownTagSet theKnowns = []()
69 {
70 KnownTagSet tmpSet;
71 ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
72 for (auto const& elemDefined : aDefined)
73 {
74 // Do not use the BCP47 string here to initialize the
75 // LanguageTag because then canonicalize() would call this
76 // getKnowns() again...
77 ::std::vector< OUString > aFallbacks( LanguageTag( elemDefined.mnLang).getFallbackStrings( true));
78 for (auto const& fallback : aFallbacks)
79 {
80 tmpSet.insert(fallback);
81 }
82 }
83 return tmpSet;
84 }();
85 return theKnowns;
86}
87
88
89namespace {
90struct compareIgnoreAsciiCaseLess
91{
92 bool operator()( std::u16string_view r1, std::u16string_view r2 ) const
93 {
94 return o3tl::compareToIgnoreAsciiCase(r1, r2) < 0;
95 }
96};
97typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47;
98typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID;
99MapBcp47& theMapBcp47()
100{
101 static MapBcp47 SINGLETON;
102 return SINGLETON;
103}
104MapLangID& theMapLangID()
105{
106 static MapLangID SINGLETON;
107 return SINGLETON;
108}
109LanguageTag::ImplPtr& theSystemLocale()
110{
111 static LanguageTag::ImplPtr SINGLETON;
112 return SINGLETON;
113}
114}
115
116
118{
119 static LanguageType nOnTheFlyLanguage(0);
120 std::unique_lock aGuard( theMutex());
121 if (!nOnTheFlyLanguage)
123 else
124 {
125 if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage) != LANGUAGE_ON_THE_FLY_END)
126 ++nOnTheFlyLanguage;
127 else
128 {
129 LanguageType nSub = MsLangId::getSubLanguage( nOnTheFlyLanguage);
130 if (nSub != LANGUAGE_ON_THE_FLY_SUB_END)
131 nOnTheFlyLanguage = MsLangId::makeLangID( ++nSub, LANGUAGE_ON_THE_FLY_START);
132 else
133 {
134 SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
135 << ((sal_uInt16(LANGUAGE_ON_THE_FLY_END) - sal_uInt16(LANGUAGE_ON_THE_FLY_START) + 1)
136 * (sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_END) - sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_START) + 1))
137 << " consumed?!?)");
138 return LanguageType(0);
139 }
140 }
141 }
142#if OSL_DEBUG_LEVEL > 0
143 static size_t nOnTheFlies = 0;
144 ++nOnTheFlies;
145 SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies);
146#endif
147 return nOnTheFlyLanguage;
148}
149
150
151// static
153{
156 return
159}
160
161namespace {
162
167class LiblangtagDataRef
168{
169public:
170 LiblangtagDataRef();
171 ~LiblangtagDataRef();
172 void init()
173 {
174 if (!mbInitialized)
175 setup();
176 }
177private:
178 OString maDataPath; // path to liblangtag data, "|" if system
179 bool mbInitialized;
180
181 void setupDataPath();
182 void setup();
183 static void teardown();
184};
185
186LiblangtagDataRef& theDataRef()
187{
188 static LiblangtagDataRef SINGLETON;
189 return SINGLETON;
190}
191}
192
193LiblangtagDataRef::LiblangtagDataRef()
194 :
195 mbInitialized(false)
196{
197}
198
199LiblangtagDataRef::~LiblangtagDataRef()
200{
201 if (mbInitialized)
202 teardown();
203}
204
205void LiblangtagDataRef::setup()
206{
207 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::setup: initializing database");
208 if (maDataPath.isEmpty())
209 setupDataPath();
210 lt_db_initialize();
211 mbInitialized = true;
212}
213
214void LiblangtagDataRef::teardown()
215{
216 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::teardown: finalizing database");
217 lt_db_finalize();
218}
219
220void LiblangtagDataRef::setupDataPath()
221{
222#if defined(ANDROID) || defined(EMSCRIPTEN)
223 maDataPath = OString(lo_get_app_data_dir()) + "/share/liblangtag";
224#else
225 // maDataPath is assumed to be empty here.
226 OUString aURL("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/liblangtag");
227 rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
228
229 // Check if data is in our own installation, else assume system
230 // installation.
231 OUString aData = aURL + "/language-subtag-registry.xml";
232 osl::DirectoryItem aDirItem;
233 if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
234 {
235 OUString aPath;
236 if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
237 maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
238 }
239#endif
240 if (maDataPath.isEmpty())
241 maDataPath = "|"; // assume system
242 else
243 lt_db_set_datadir( maDataPath.getStr());
244}
245
246
247/* TODO: we could transform known vendor and browser-specific variants to known
248 * BCP 47 if available. For now just remove them to not confuse any later
249 * treatments that check for empty variants. This vendor stuff was never
250 * supported anyway. */
251static void handleVendorVariant( css::lang::Locale & rLocale )
252{
253 if (!rLocale.Variant.isEmpty() && rLocale.Language != I18NLANGTAG_QLT)
254 rLocale.Variant.clear();
255}
256
257
259{
260public:
261
262 explicit LanguageTagImpl( const LanguageTag & rLanguageTag );
263 explicit LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl );
265 LanguageTagImpl& operator=( const LanguageTagImpl & rLanguageTagImpl );
266
267private:
268
269 friend class LanguageTag;
270
272 {
276 };
277
278 mutable css::lang::Locale maLocale;
279 mutable OUString maBcp47;
280 mutable OUString maCachedLanguage;
281 mutable OUString maCachedScript;
282 mutable OUString maCachedCountry;
283 mutable OUString maCachedVariants;
284 mutable OUString maCachedGlibcString;
285 mutable lt_tag_t* mpImplLangtag;
293 mutable bool mbInitializedBcp47 : 1;
294 mutable bool mbInitializedLocale : 1;
295 mutable bool mbInitializedLangID : 1;
296 mutable bool mbCachedLanguage : 1;
297 mutable bool mbCachedScript : 1;
298 mutable bool mbCachedCountry : 1;
299 mutable bool mbCachedVariants : 1;
300 mutable bool mbCachedGlibcString : 1;
301
302 OUString const & getBcp47() const;
303 OUString const & getLanguage() const;
304 OUString const & getScript() const;
305 OUString const & getCountry() const;
306 OUString getRegion() const;
307 OUString const & getVariants() const;
308 bool hasScript() const;
309 OUString const & getGlibcLocaleString() const;
310
313
314 bool isIsoLocale() const;
315 bool isIsoODF() const;
316 bool isValidBcp47() const;
317
319 bool convertLocaleToLang( bool bAllowOnTheFlyID );
321 void convertBcp47ToLang();
322 void convertLangToLocale();
323 void convertLangToBcp47();
324
326 bool canonicalize();
327
332 bool synCanonicalize();
333
334 OUString getLanguageFromLangtag();
335 OUString getScriptFromLangtag();
336 OUString getRegionFromLangtag();
337 OUString getVariantsFromLangtag();
338
350
356 bool cacheSimpleLSCV();
357
359 {
368 };
369
385 static Extraction simpleExtract( const OUString& rBcp47,
386 OUString& rLanguage,
387 OUString& rScript,
388 OUString& rCountry,
389 OUString& rRegion,
390 OUString& rVariants );
391
394 static OUString convertToBcp47( const css::lang::Locale& rLocale );
395
396};
397
398
400 :
401 maLocale( rLanguageTag.maLocale),
402 maBcp47( rLanguageTag.maBcp47),
403 mpImplLangtag( nullptr),
404 mnLangID( rLanguageTag.mnLangID),
405 meScriptType( LanguageTag::ScriptType::UNKNOWN),
406 meIsValid( DECISION_DONTKNOW),
407 meIsIsoLocale( DECISION_DONTKNOW),
408 meIsIsoODF( DECISION_DONTKNOW),
409 meIsLiblangtagNeeded( DECISION_DONTKNOW),
410 mbSystemLocale( rLanguageTag.mbSystemLocale),
414 mbCachedLanguage( false),
415 mbCachedScript( false),
416 mbCachedCountry( false),
417 mbCachedVariants( false),
418 mbCachedGlibcString( false)
419{
420}
421
422
424 :
425 maLocale( rLanguageTagImpl.maLocale),
426 maBcp47( rLanguageTagImpl.maBcp47),
427 maCachedLanguage( rLanguageTagImpl.maCachedLanguage),
428 maCachedScript( rLanguageTagImpl.maCachedScript),
429 maCachedCountry( rLanguageTagImpl.maCachedCountry),
430 maCachedVariants( rLanguageTagImpl.maCachedVariants),
431 maCachedGlibcString( rLanguageTagImpl.maCachedGlibcString),
432 mpImplLangtag( rLanguageTagImpl.mpImplLangtag ?
433 lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : nullptr),
434 mnLangID( rLanguageTagImpl.mnLangID),
435 meScriptType( rLanguageTagImpl.meScriptType),
436 meIsValid( rLanguageTagImpl.meIsValid),
437 meIsIsoLocale( rLanguageTagImpl.meIsIsoLocale),
438 meIsIsoODF( rLanguageTagImpl.meIsIsoODF),
439 meIsLiblangtagNeeded( rLanguageTagImpl.meIsLiblangtagNeeded),
440 mbSystemLocale( rLanguageTagImpl.mbSystemLocale),
441 mbInitializedBcp47( rLanguageTagImpl.mbInitializedBcp47),
442 mbInitializedLocale( rLanguageTagImpl.mbInitializedLocale),
443 mbInitializedLangID( rLanguageTagImpl.mbInitializedLangID),
444 mbCachedLanguage( rLanguageTagImpl.mbCachedLanguage),
445 mbCachedScript( rLanguageTagImpl.mbCachedScript),
446 mbCachedCountry( rLanguageTagImpl.mbCachedCountry),
447 mbCachedVariants( rLanguageTagImpl.mbCachedVariants),
448 mbCachedGlibcString( rLanguageTagImpl.mbCachedGlibcString)
449{
450 if (mpImplLangtag)
451 theDataRef().init();
452}
453
454
456{
457 if (&rLanguageTagImpl == this)
458 return *this;
459
460 maLocale = rLanguageTagImpl.maLocale;
461 maBcp47 = rLanguageTagImpl.maBcp47;
462 maCachedLanguage = rLanguageTagImpl.maCachedLanguage;
463 maCachedScript = rLanguageTagImpl.maCachedScript;
464 maCachedCountry = rLanguageTagImpl.maCachedCountry;
465 maCachedVariants = rLanguageTagImpl.maCachedVariants;
466 maCachedGlibcString = rLanguageTagImpl.maCachedGlibcString;
467 lt_tag_t * oldTag = mpImplLangtag;
468 mpImplLangtag = rLanguageTagImpl.mpImplLangtag ?
469 lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : nullptr;
470 lt_tag_unref(oldTag);
471 mnLangID = rLanguageTagImpl.mnLangID;
472 meScriptType = rLanguageTagImpl.meScriptType;
473 meIsValid = rLanguageTagImpl.meIsValid;
474 meIsIsoLocale = rLanguageTagImpl.meIsIsoLocale;
475 meIsIsoODF = rLanguageTagImpl.meIsIsoODF;
477 mbSystemLocale = rLanguageTagImpl.mbSystemLocale;
478 mbInitializedBcp47 = rLanguageTagImpl.mbInitializedBcp47;
479 mbInitializedLocale = rLanguageTagImpl.mbInitializedLocale;
480 mbInitializedLangID = rLanguageTagImpl.mbInitializedLangID;
481 mbCachedLanguage = rLanguageTagImpl.mbCachedLanguage;
482 mbCachedScript = rLanguageTagImpl.mbCachedScript;
483 mbCachedCountry = rLanguageTagImpl.mbCachedCountry;
484 mbCachedVariants = rLanguageTagImpl.mbCachedVariants;
485 mbCachedGlibcString = rLanguageTagImpl.mbCachedGlibcString;
486 if (mpImplLangtag && !oldTag)
487 theDataRef().init();
488 return *this;
489}
490
491
493{
494 if (mpImplLangtag)
495 {
496 lt_tag_unref( mpImplLangtag);
497 }
498}
499
500
501LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
502 :
503 maBcp47( rBcp47LanguageTag),
505 mbSystemLocale( rBcp47LanguageTag.isEmpty()),
507 mbInitializedLocale( false),
508 mbInitializedLangID( false),
509 mbIsFallback( false)
510{
511 if (bCanonicalize)
512 {
514 // Registration itself may already have canonicalized, so do an
515 // unconditional sync.
516 syncFromImpl();
517 }
518
519}
520
521
522LanguageTag::LanguageTag( const css::lang::Locale & rLocale )
523 :
524 maLocale( rLocale),
526 mbSystemLocale( rLocale.Language.isEmpty()),
527 mbInitializedBcp47( false),
528 mbInitializedLocale( false), // we do not know which mess we got passed in
529 mbInitializedLangID( false),
530 mbIsFallback( false)
531{
533}
534
535
537 :
538 mnLangID( nLanguage),
539 mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
540 mbInitializedBcp47( false),
541 mbInitializedLocale( false),
543 mbIsFallback( false)
544{
545}
546
547
548LanguageTag::LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
549 std::u16string_view rScript, const OUString& rCountry )
550 :
551 maBcp47( rBcp47),
553 mbSystemLocale( rBcp47.isEmpty() && rLanguage.isEmpty()),
554 mbInitializedBcp47( !rBcp47.isEmpty()),
555 mbInitializedLocale( false),
556 mbInitializedLangID( false),
557 mbIsFallback( false)
558{
560 return;
561
562 if (rScript.empty())
563 {
564 maBcp47 = rLanguage + "-" + rCountry;
565 mbInitializedBcp47 = true;
566 maLocale.Language = rLanguage;
567 maLocale.Country = rCountry;
568 mbInitializedLocale = true;
569 }
570 else
571 {
572 if (rCountry.isEmpty())
573 maBcp47 = rLanguage + "-" + rScript;
574 else
575 maBcp47 = rLanguage + "-" + rScript + "-" + rCountry;
576 mbInitializedBcp47 = true;
577 maLocale.Language = I18NLANGTAG_QLT;
578 maLocale.Country = rCountry;
579 maLocale.Variant = maBcp47;
580 mbInitializedLocale = true;
581 }
582}
583
584
585LanguageTag::LanguageTag( const rtl_Locale & rLocale )
586 :
587 maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
589 mbSystemLocale( maLocale.Language.isEmpty()),
590 mbInitializedBcp47( false),
592 mbInitializedLangID( false),
593 mbIsFallback( false)
594{
596}
597
599
601{
603
605 {
607 {
609 mbInitializedBcp47 = !maBcp47.isEmpty();
610 }
611 }
612 if (maBcp47.isEmpty())
613 {
614 SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
615 return pImpl;
616 }
617
618 std::unique_lock aGuard( theMutex());
619
620 MapBcp47& rMapBcp47 = theMapBcp47();
621 MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
622 bool bOtherImpl = false;
623 if (it != rMapBcp47.end())
624 {
625 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47 << "'");
626 pImpl = (*it).second;
627 if (pImpl.get() != this)
628 {
629 // Could happen for example if during registerImpl() the tag was
630 // changed via canonicalize() and the result was already present in
631 // the map before, for example 'bn-Beng' => 'bn'. This specific
632 // case is now taken care of in registerImpl() and doesn't reach
633 // here. However, use the already existing impl if it matches.
634 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47 << "'");
635 *this = *pImpl; // ensure consistency
636 bOtherImpl = true;
637 }
638 }
639 else
640 {
641 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47 << "'");
642 pImpl = std::make_shared<LanguageTagImpl>( *this);
643 rMapBcp47.insert( ::std::make_pair( maBcp47, pImpl));
644 }
645
646 if (!bOtherImpl || !pImpl->mbInitializedLangID)
647 {
648 if (nRegisterID == LanguageType(0) || nRegisterID == LANGUAGE_DONTKNOW)
649 nRegisterID = getNextOnTheFlyLanguage();
650 else
651 {
652 // Accept a suggested ID only if it is not mapped yet to something
653 // different, otherwise we would end up with ambiguous assignments
654 // of different language tags, for example for the same primary
655 // LangID with "no", "nb" and "nn".
656 const MapLangID& rMapLangID = theMapLangID();
657 MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
658 if (itID != rMapLangID.end())
659 {
660 if ((*itID).second->maBcp47 != maBcp47)
661 {
662 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
663 << ::std::hex << nRegisterID << " for '" << maBcp47 << "' have '"
664 << (*itID).second->maBcp47 << "'");
665 nRegisterID = getNextOnTheFlyLanguage();
666 }
667 else
668 {
669 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
670 << ::std::hex << nRegisterID << " for '" << maBcp47 << "' already registered");
671 }
672 }
673 }
674 if (!nRegisterID)
675 {
676 // out of IDs, nothing to register
677 return pImpl;
678 }
679 pImpl->mnLangID = nRegisterID;
680 pImpl->mbInitializedLangID = true;
681 if (pImpl.get() != this)
682 {
683 mnLangID = nRegisterID;
684 mbInitializedLangID = true;
685 }
686 }
687
688 ::std::pair< MapLangID::const_iterator, bool > res(
689 theMapLangID().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
690 if (res.second)
691 {
692 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
693 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
694 }
695 else
696 {
697 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
698 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
699 << (*res.first).second->maBcp47 << "'");
700 }
701
702 return pImpl;
703}
704
705
707{
708 const MapLangID& rMapLangID = theMapLangID();
709 MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
710 if (itID != rMapLangID.end())
711 return (*itID).second->getScriptType();
712 else
713 return ScriptType::UNKNOWN;
714}
715
716
717// static
719{
720 if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_SYSTEM)
721 {
722 SAL_WARN( "i18nlangtag",
723 "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
724 ::std::hex << nLang);
725 return;
726 }
727 SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex << nLang);
729 // Reset system locale to none and let registerImpl() do the rest to
730 // initialize a new one.
731 theSystemLocale().reset();
732 LanguageTag aLanguageTag( LANGUAGE_SYSTEM);
733 aLanguageTag.registerImpl();
734}
735
736static bool lt_tag_parse_disabled = false;
737
738// static
740{
742}
743
745{
746 return nLang != LANGUAGE_DONTKNOW && nLang != LANGUAGE_SYSTEM &&
747 (LanguageTag::isOnTheFlyID( nLang) || (nLang == MsLangId::getPrimaryLanguage( nLang)));
748}
749
750
752{
753 // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
754 // here as they access getImpl() and syncFromImpl() and would lead to
755 // recursion. Also do not use the static LanguageTag::convertTo...()
756 // methods as they may create temporary LanguageTag instances. Only
757 // LanguageTagImpl::convertToBcp47(Locale) is ok.
758
759 ImplPtr pImpl;
760
761#if OSL_DEBUG_LEVEL > 0
762 static size_t nCalls = 0;
763 ++nCalls;
764 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls");
765#endif
766
767 // Do not register unresolved system locale, also force LangID if system
768 // and take the system locale shortcut if possible.
769 if (mbSystemLocale)
770 {
771 pImpl = theSystemLocale();
772 if (pImpl)
773 {
774#if OSL_DEBUG_LEVEL > 0
775 static size_t nCallsSystem = 0;
776 ++nCallsSystem;
777 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem << " system calls");
778#endif
779 return pImpl;
780 }
782 {
785 SAL_WARN_IF( !mbInitializedLangID, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
786 }
787 }
788
790 {
792 {
793 static LanguageTag::ImplPtr theDontKnow;
794 // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
795 // conversion attempts. At the same time provide a central breakpoint
796 // to inspect such places.
797 if (!theDontKnow)
798 theDontKnow = std::make_shared<LanguageTagImpl>( *this);
799 pImpl = theDontKnow;
800#if OSL_DEBUG_LEVEL > 0
801 static size_t nCallsDontKnow = 0;
802 ++nCallsDontKnow;
803 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow << " DontKnow calls");
804#endif
805 return pImpl;
806 }
807 else
808 {
809 // A great share are calls for a system equal locale.
810 pImpl = theSystemLocale();
811 if (pImpl && pImpl->mnLangID == mnLangID)
812 {
813#if OSL_DEBUG_LEVEL > 0
814 static size_t nCallsSystemEqual = 0;
815 ++nCallsSystemEqual;
816 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
817 << " system equal LangID calls");
818#endif
819 return pImpl;
820 }
821 }
822 }
823
824 // Force Bcp47 if not LangID.
826 {
827 // The one central point to set mbInitializedLocale=true if a
828 // LanguageTag was initialized with a Locale. We will now convert and
829 // possibly later resolve it.
830 if (!mbInitializedLocale && (mbSystemLocale || !maLocale.Language.isEmpty()))
831 mbInitializedLocale = true;
832 SAL_WARN_IF( !mbInitializedLocale, "i18nlangtag", "LanguageTag::registerImpl: still not mbInitializedLocale");
833
835 mbInitializedBcp47 = !maBcp47.isEmpty();
836 }
837
839 {
840 // A great share are calls for a system equal locale.
841 pImpl = theSystemLocale();
842 if (pImpl && pImpl->maBcp47 == maBcp47)
843 {
844#if OSL_DEBUG_LEVEL > 0
845 static size_t nCallsSystemEqual = 0;
846 ++nCallsSystemEqual;
847 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual << " system equal BCP47 calls");
848#endif
849 return pImpl;
850 }
851 }
852
853#if OSL_DEBUG_LEVEL > 0
854 static size_t nCallsNonSystem = 0;
855 ++nCallsNonSystem;
856 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls");
857#endif
858
859 std::unique_lock aGuard( theMutex());
860
861#if OSL_DEBUG_LEVEL > 0
862 static long nRunning = 0;
863 // Entering twice here is ok, which is needed for fallback init in
864 // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
865 // everything else is suspicious.
866 SAL_WARN_IF( nRunning > 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
867 << maBcp47 << "' 0x" << ::std::hex << mnLangID );
868 struct Runner { Runner() { ++nRunning; } ~Runner() { --nRunning; } } aRunner;
869#endif
870
871 // Prefer LangID map as find+insert needs less comparison work.
873 {
874 MapLangID& rMap = theMapLangID();
875 MapLangID::const_iterator it( rMap.find( mnLangID));
876 if (it != rMap.end())
877 {
878 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex << mnLangID);
879 pImpl = (*it).second;
880 }
881 else
882 {
883 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex << mnLangID);
884 pImpl = std::make_shared<LanguageTagImpl>( *this);
885 rMap.insert( ::std::make_pair( mnLangID, pImpl));
886 // Try round-trip.
887 if (!pImpl->mbInitializedLocale)
888 pImpl->convertLangToLocale();
890 // If round-trip is identical cross-insert to Bcp47 map.
891 if (nLang == pImpl->mnLangID)
892 {
893 if (!pImpl->mbInitializedBcp47)
894 pImpl->convertLocaleToBcp47();
895 ::std::pair< MapBcp47::const_iterator, bool > res(
896 theMapBcp47().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
897 if (res.second)
898 {
899 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID);
900 }
901 else
902 {
903 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " have 0x"
904 << ::std::hex << (*res.first).second->mnLangID);
905 }
906 }
907 else
908 {
909 if (!pImpl->mbInitializedBcp47)
910 pImpl->convertLocaleToBcp47();
911 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " round-trip to 0x" << ::std::hex << nLang);
912 }
913 }
914 }
915 else if (!maBcp47.isEmpty())
916 {
917 MapBcp47& rMap = theMapBcp47();
918 MapBcp47::const_iterator it( rMap.find( maBcp47));
919 if (it != rMap.end())
920 {
921 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47 << "'");
922 pImpl = (*it).second;
923 }
924 else
925 {
926 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47 << "'");
927 pImpl = std::make_shared<LanguageTagImpl>( *this);
928 ::std::pair< MapBcp47::iterator, bool > insOrig( rMap.insert( ::std::make_pair( maBcp47, pImpl)));
929 // If changed after canonicalize() also add the resulting tag to
930 // the map.
931 if (pImpl->synCanonicalize())
932 {
933 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl->maBcp47 << "'");
934 ::std::pair< MapBcp47::const_iterator, bool > insCanon(
935 rMap.insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
936 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon.second ? "" : "not ")
937 << "inserted '" << pImpl->maBcp47 << "'");
938 // If the canonicalized tag already existed (was not inserted)
939 // and impls are different, make this impl that impl and skip
940 // the rest if that LangID is present as well. The existing
941 // entry may or may not be different, it may even be strictly
942 // identical to this if it differs only in case (e.g. ko-kr =>
943 // ko-KR) which was corrected in canonicalize() hence also in
944 // the map entry but comparison is case insensitive and found
945 // it again.
946 if (!insCanon.second && (*insCanon.first).second != pImpl)
947 {
948 (*insOrig.first).second = pImpl = (*insCanon.first).second;
949 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
950 << ::std::hex << pImpl->mnLangID);
951 }
952 }
953 if (!pImpl->mbInitializedLangID)
954 {
955 // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
956 if (!pImpl->mbInitializedLocale)
957 pImpl->convertBcp47ToLocale();
958 if (!pImpl->mbInitializedLangID)
959 pImpl->convertLocaleToLang( true);
960 // Unconditionally insert (round-trip is possible) for
961 // on-the-fly IDs and (generated or not) suggested IDs.
962 bool bInsert = lcl_isKnownOnTheFlyID( pImpl->mnLangID);
963 OUString aBcp47;
964 if (!bInsert)
965 {
966 if (pImpl->mnLangID != LANGUAGE_DONTKNOW)
967 {
968 // May have involved canonicalize(), so compare with
969 // pImpl->maBcp47 instead of maBcp47!
971 MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true));
972 bInsert = (aBcp47 == pImpl->maBcp47);
973 }
974 }
975 // If round-trip is identical cross-insert to Bcp47 map.
976 if (bInsert)
977 {
978 ::std::pair< MapLangID::const_iterator, bool > res(
979 theMapLangID().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
980 if (res.second)
981 {
982 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
983 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
984 }
985 else
986 {
987 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
988 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
989 << (*res.first).second->maBcp47 << "'");
990 }
991 }
992 else
993 {
994 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
995 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' round-trip to '"
996 << aBcp47 << "'");
997 }
998 }
999 }
1000 }
1001 else
1002 {
1003 SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex << mnLangID );
1004 pImpl = std::make_shared<LanguageTagImpl>( *this);
1005 }
1006
1007 // If we reach here for mbSystemLocale we didn't have theSystemLocale
1008 // above, so add it.
1010 {
1011 theSystemLocale() = pImpl;
1012 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
1013 << ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'");
1014 }
1015
1016 return pImpl;
1017}
1018
1019
1021{
1022 if (!mpImpl)
1023 {
1024 mpImpl = registerImpl();
1026 }
1027 return mpImpl.get();
1028}
1029
1031{
1032 if (!mpImpl)
1033 {
1034 mpImpl = registerImpl();
1036 }
1037 return mpImpl.get();
1038}
1039
1041{
1042 mpImpl.reset();
1043 maLocale = lang::Locale();
1044 maBcp47.clear();
1046 mbSystemLocale = true;
1047 mbInitializedBcp47 = false;
1048 mbInitializedLocale = false;
1049 mbInitializedLangID = false;
1050 mbIsFallback = false;
1051}
1052
1053
1054LanguageTag & LanguageTag::reset( const OUString & rBcp47LanguageTag )
1055{
1056 resetVars();
1057 maBcp47 = rBcp47LanguageTag;
1058 mbSystemLocale = rBcp47LanguageTag.isEmpty();
1060
1061 return *this;
1062}
1063
1064
1065LanguageTag & LanguageTag::reset( const css::lang::Locale & rLocale )
1066{
1067 resetVars();
1068 maLocale = rLocale;
1069 mbSystemLocale = rLocale.Language.isEmpty();
1072 return *this;
1073}
1074
1075
1077{
1078 resetVars();
1079 mnLangID = nLanguage;
1080 mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
1082 return *this;
1083}
1084
1085
1087{
1088#ifdef erDEBUG
1089 // dump once
1090 struct dumper
1091 {
1092 lt_tag_t** mpp;
1093 explicit dumper( lt_tag_t** pp ) : mpp( *pp ? NULL : pp) {}
1094 ~dumper() { if (mpp && *mpp) lt_tag_dump( *mpp); }
1095 };
1096 dumper aDumper( &mpImplLangtag);
1097#endif
1098
1099 bool bChanged = false;
1100
1101 // Side effect: have maBcp47 in any case, resolved system.
1102 // Some methods calling canonicalize() (or not calling it due to
1103 // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
1104 // meIsLiblangtagNeeded anywhere else than hereafter.
1105 getBcp47();
1106
1107 // The simple cases and known locales don't need liblangtag processing,
1108 // which also avoids loading liblangtag data on startup.
1110 {
1111 bool bTemporaryLocale = false;
1112 bool bTemporaryLangID = false;
1114 {
1115 if (mbSystemLocale)
1116 {
1118 mbInitializedLangID = true;
1119 }
1120 else
1121 {
1122 // Now this is getting funny... we only have some BCP47 string
1123 // and want to determine if parsing it would be possible
1124 // without using liblangtag just to see if it is a simple known
1125 // locale or could fall back to one.
1126 OUString aLanguage, aScript, aCountry, aRegion, aVariants;
1127 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aRegion, aVariants);
1128 if (eExt != EXTRACTED_NONE)
1129 {
1130 if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV || eExt == EXTRACTED_LR)
1131 {
1132 // Rebuild bcp47 with proper casing of tags.
1133 OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() +
1134 1 + aCountry.getLength() + 1 + aRegion.getLength() + 1 + aVariants.getLength());
1135 aBuf.append( aLanguage);
1136 if (!aScript.isEmpty())
1137 aBuf.append("-" + aScript);
1138 if (!aCountry.isEmpty())
1139 aBuf.append("-" + aCountry);
1140 if (!aRegion.isEmpty())
1141 aBuf.append("-" + aRegion);
1142 if (!aVariants.isEmpty())
1143 aBuf.append("-" + aVariants);
1144 OUString aStr( aBuf.makeStringAndClear());
1145
1146 if (maBcp47 != aStr)
1147 {
1148 maBcp47 = aStr;
1149 bChanged = true;
1150 }
1151 }
1152 if (eExt == EXTRACTED_LSC && aScript.isEmpty())
1153 {
1154 maLocale.Language = aLanguage;
1155 maLocale.Country = aCountry;
1156 }
1157 else if (eExt == EXTRACTED_C_LOCALE)
1158 {
1159 maLocale.Language = aLanguage;
1160 maLocale.Country = aCountry;
1161 }
1162 else
1163 {
1164 maLocale.Language = I18NLANGTAG_QLT;
1165 maLocale.Country = aCountry;
1166 maLocale.Variant = maBcp47;
1167 }
1168 bTemporaryLocale = mbInitializedLocale = true;
1169 }
1170 }
1171 }
1173 {
1174 // Do not call getLocale() here because that prefers
1175 // convertBcp47ToLocale() which would end up in recursion via
1176 // isIsoLocale()!
1177
1178 // Prepare to verify that we have a known locale, not just an
1179 // arbitrary MS-LangID.
1181 }
1183 {
1185 {
1186 if (convertLocaleToLang( false))
1187 bChanged = true;
1188 if (bTemporaryLocale || mnLangID == LANGUAGE_DONTKNOW)
1189 bTemporaryLangID = true;
1190 }
1192 meIsLiblangtagNeeded = DECISION_NO; // known locale
1193 else
1194 {
1195 const KnownTagSet& rKnowns = getKnowns();
1196 if (rKnowns.find( maBcp47) != rKnowns.end())
1197 meIsLiblangtagNeeded = DECISION_NO; // known fallback
1198 }
1199 // We may have an internal override "canonicalization".
1200 lang::Locale aNew( MsLangId::Conversion::getOverride( maLocale));
1201 if (!aNew.Language.isEmpty() &&
1202 (aNew.Language != maLocale.Language ||
1203 aNew.Country != maLocale.Country ||
1204 aNew.Variant != maLocale.Variant))
1205 {
1207 bChanged = true;
1210 meIsLiblangtagNeeded = DECISION_NO; // known locale
1211 }
1212 }
1213 if (bTemporaryLocale)
1214 {
1215 mbInitializedLocale = false;
1216 maLocale = lang::Locale();
1217 }
1218 if (bTemporaryLangID)
1219 {
1220 mbInitializedLangID = false;
1222 }
1223 }
1225 {
1226 meIsValid = DECISION_YES; // really, known must be valid ...
1227 return bChanged; // that's it
1228 }
1229
1231 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
1232
1233 if (!mpImplLangtag)
1234 {
1235 theDataRef().init();
1236 mpImplLangtag = lt_tag_new();
1237 }
1238
1239 myLtError aError;
1240
1241 if (!lt_tag_parse_disabled && lt_tag_parse(mpImplLangtag, OUStringToOString(maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
1242 {
1243 if (aError.p)
1244 {
1245 SAL_WARN("i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
1246 }
1247 else
1248 {
1249 char* pTag = lt_tag_canonicalize(mpImplLangtag, &aError.p);
1250 SAL_WARN_IF(!pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
1251 if (pTag)
1252 {
1253 OUString aNew(OUString::createFromAscii(pTag));
1254 // Make the lt_tag_t follow the new string if different, which
1255 // removes default script and such.
1256 if (maBcp47 != aNew)
1257 {
1258 maBcp47 = aNew;
1259 bChanged = true;
1262 if (!lt_tag_parse(mpImplLangtag, pTag, &aError.p))
1263 {
1264 SAL_WARN("i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '"
1265 << maBcp47 << "'");
1266 free(pTag);
1268 return bChanged;
1269 }
1270 }
1271 free(pTag);
1273 return bChanged;
1274 }
1275 }
1276 }
1277 else
1278 {
1279 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
1280 }
1282 return bChanged;
1283}
1284
1285
1287{
1288 bool bChanged = false;
1290 {
1291 bChanged = canonicalize();
1292 if (bChanged)
1293 {
1298 }
1299 }
1300 return bChanged;
1301}
1302
1303
1305{
1306 LanguageTagImpl* pImpl = getImpl();
1307 bool bRegister = ((mbInitializedBcp47 && maBcp47 != pImpl->maBcp47) ||
1308 (mbInitializedLangID && mnLangID != pImpl->mnLangID));
1309 SAL_INFO_IF( bRegister, "i18nlangtag",
1310 "LanguageTag::syncFromImpl: re-registering, '" << pImpl->maBcp47 << "' vs '" << maBcp47 <<
1311 " and 0x" << ::std::hex << pImpl->mnLangID << " vs 0x" << ::std::hex << mnLangID);
1313 if (bRegister)
1314 mpImpl = registerImpl();
1315}
1316
1317
1319{
1320 if (!mpImpl)
1321 getImpl(); // with side effect syncVarsFromRawImpl()
1322 else
1324}
1325
1326
1328{
1329 // Do not use getImpl() here.
1330 LanguageTagImpl* pImpl = mpImpl.get();
1331 if (!pImpl)
1332 return;
1333
1334 // Obviously only mutable variables.
1336 maBcp47 = pImpl->maBcp47;
1338 maLocale = pImpl->maLocale;
1340 mnLangID = pImpl->mnLangID;
1341}
1342
1343
1345{
1346 bool bChanged = getImpl()->synCanonicalize();
1347 if (bChanged)
1348 syncFromImpl();
1349 return bChanged;
1350}
1351
1352
1354{
1357
1358 if (maLocale.Language.isEmpty())
1359 {
1360 // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
1361 // locale via LanguageTag::convertToBcp47(LanguageType) and
1362 // LanguageTag::convertToLocale(LanguageType) would instantiate another
1363 // LanguageTag.
1365 }
1366 if (maLocale.Language.isEmpty())
1367 {
1368 maBcp47.clear(); // bad luck
1369 }
1370 else if (maLocale.Language == I18NLANGTAG_QLT)
1371 {
1372 maBcp47 = maLocale.Variant;
1374 }
1375 else
1376 {
1378 }
1379 mbInitializedBcp47 = true;
1380}
1381
1382
1383bool LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
1384{
1385 bool bRemapped = false;
1386 if (mbSystemLocale)
1387 {
1389 }
1390 else
1391 {
1394 {
1395 // convertLocaleToLanguage() only searches in ISO and private
1396 // definitions, search in remaining definitions, i.e. for the "C"
1397 // locale and non-standard things like "sr-latin" or "german" to
1398 // resolve to a known locale, skipping ISO lll-CC that were already
1399 // searched.
1402 {
1403 // If one found, convert back and adapt Locale and Bcp47
1404 // strings so we have a matching entry.
1405 OUString aOrgBcp47( maBcp47);
1408 bRemapped = (maBcp47 != aOrgBcp47);
1409 }
1410 }
1411 if (mnLangID == LANGUAGE_DONTKNOW && bAllowOnTheFlyID)
1412 {
1413 if (isValidBcp47())
1414 {
1415 // For language-only (including script) look if we know some
1416 // locale of that language and if so try to use the primary
1417 // language ID of that instead of generating an on-the-fly ID.
1418 if (getCountry().isEmpty() && isIsoODF())
1419 {
1421 // 'en-US' is last resort, do not use except when looking
1422 // for 'en'.
1423 if (aLoc.Language != "en" || getLanguage() == "en")
1424 {
1428 }
1429 }
1431 }
1432 else
1433 {
1434 SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
1435 << maBcp47 << "'");
1436 }
1437 }
1438 }
1439 mbInitializedLangID = true;
1440 return bRemapped;
1441}
1442
1443
1445{
1446 getImpl()->convertLocaleToLang( true);
1447 syncFromImpl();
1448}
1449
1450
1452{
1453 bool bIso = isIsoLocale();
1454 if (bIso)
1455 {
1456 maLocale.Language = getLanguageFromLangtag();
1457 maLocale.Country = getRegionFromLangtag();
1458 maLocale.Variant.clear();
1459 }
1460 else
1461 {
1462 maLocale.Language = I18NLANGTAG_QLT;
1463 maLocale.Country = getCountry();
1464 maLocale.Variant = maBcp47;
1465 }
1466 mbInitializedLocale = true;
1467}
1468
1469
1471{
1473 syncFromImpl();
1474}
1475
1476
1478{
1479 if (mbSystemLocale)
1480 {
1482 }
1483 else
1484 {
1487 convertLocaleToLang( true);
1488 }
1489 mbInitializedLangID = true;
1490}
1491
1492
1494{
1496 syncFromImpl();
1497}
1498
1499
1501{
1503 {
1505 mbInitializedLangID = true;
1506 }
1507 // Resolve system here! The original is remembered as mbSystemLocale.
1509 mbInitializedLocale = true;
1510}
1511
1512
1514{
1516 syncFromImpl();
1517}
1518
1519
1521{
1525 mbInitializedBcp47 = true;
1526}
1527
1528
1530{
1531 // The rtl_Locale follows the Open Group Base Specification,
1532 // 8.2 Internationalization Variables
1533 // language[_territory][.codeset][@modifier]
1534 // On GNU/Linux systems usually being glibc locales.
1535 // sal/osl/unx/nlsupport.c _parse_locale() parses them into
1536 // Language: language 2 or 3 alpha code
1537 // Country: [territory] 2 alpha code
1538 // Variant: [.codeset][@modifier]
1539 // Variant effectively contains anything that follows the territory, not
1540 // looking for '.' dot delimiter or '@' modifier content.
1541 if (maLocale.Variant.isEmpty())
1542 return;
1543
1544 OString aStr = OUStringToOString(maLocale.Language, RTL_TEXTENCODING_UTF8) + "_" + OUStringToOString(Concat2View(maLocale.Country + maLocale.Variant),
1545 RTL_TEXTENCODING_UTF8);
1546 /* FIXME: let liblangtag parse this entirely with
1547 * lt_tag_convert_from_locale() but that needs a patch to pass the
1548 * string. */
1549#if 0
1550 myLtError aError;
1551 theDataRef::get().init();
1552 mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
1553 maBcp47 = OStringToOUString( lt_tag_get_string( mpImplLangtag), RTL_TEXTENCODING_UTF8);
1554 mbInitializedBcp47 = true;
1555#else
1558 {
1559 SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
1560 mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here
1561 }
1562 mbInitializedLangID = true;
1563#endif
1564 maLocale = lang::Locale();
1565 mbInitializedLocale = false;
1566}
1567
1568
1569const OUString & LanguageTagImpl::getBcp47() const
1570{
1571 if (!mbInitializedBcp47)
1572 {
1574 const_cast<LanguageTagImpl*>(this)->convertLocaleToBcp47();
1575 else
1576 const_cast<LanguageTagImpl*>(this)->convertLangToBcp47();
1577 }
1578 return maBcp47;
1579}
1580
1581
1582const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
1583{
1584 static const OUString theEmptyBcp47 = u"";
1585
1586 if (!bResolveSystem && mbSystemLocale)
1587 return theEmptyBcp47;
1588 if (!mbInitializedBcp47)
1590 if (!mbInitializedBcp47)
1591 {
1592 getImpl()->getBcp47();
1593 const_cast<LanguageTag*>(this)->syncFromImpl();
1594 }
1595 return maBcp47;
1596}
1597
1598
1600{
1601 OUString aLanguage;
1603 if (maBcp47.isEmpty())
1604 return aLanguage;
1605 if (mpImplLangtag)
1606 {
1607 const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
1608 SAL_WARN_IF( !pLangT, "i18nlangtag",
1609 "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
1610 if (!pLangT)
1611 return aLanguage;
1612 const char* pLang = lt_lang_get_tag( pLangT);
1613 SAL_WARN_IF( !pLang, "i18nlangtag",
1614 "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
1615 if (pLang)
1616 aLanguage = OUString::createFromAscii( pLang);
1617 }
1618 else
1619 {
1621 aLanguage = maCachedLanguage;
1622 }
1623 return aLanguage;
1624}
1625
1626
1628{
1629 OUString aScript;
1631 if (maBcp47.isEmpty())
1632 return aScript;
1633 if (mpImplLangtag)
1634 {
1635 const lt_script_t* pScriptT = lt_tag_get_script( mpImplLangtag);
1636 // pScriptT==NULL is valid for default scripts
1637 if (!pScriptT)
1638 return aScript;
1639 const char* pScript = lt_script_get_tag( pScriptT);
1640 SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
1641 if (pScript)
1642 aScript = OUString::createFromAscii( pScript);
1643 }
1644 else
1645 {
1647 aScript = maCachedScript;
1648 }
1649 return aScript;
1650}
1651
1652
1654{
1655 OUString aRegion;
1657 if (maBcp47.isEmpty())
1658 return aRegion;
1659 if (mpImplLangtag)
1660 {
1661 const lt_region_t* pRegionT = lt_tag_get_region( mpImplLangtag);
1662 // pRegionT==NULL is valid for language only tags, rough check here
1663 // that does not take sophisticated tags into account that actually
1664 // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
1665 // that ll-CC and lll-CC actually fail.
1666 SAL_WARN_IF( !pRegionT &&
1667 maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
1668 maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
1669 "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
1670 if (!pRegionT)
1671 return aRegion;
1672 const char* pRegion = lt_region_get_tag( pRegionT);
1673 SAL_WARN_IF( !pRegion, "i18nlangtag",
1674 "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
1675 if (pRegion)
1676 aRegion = OUString::createFromAscii( pRegion);
1677 }
1678 else
1679 {
1681 aRegion = maCachedCountry;
1682 }
1683 return aRegion;
1684}
1685
1686
1688{
1689 OUStringBuffer aVariants;
1691 if (maBcp47.isEmpty())
1692 return OUString();
1693 if (mpImplLangtag)
1694 {
1695 const lt_list_t* pVariantsT = lt_tag_get_variants( mpImplLangtag);
1696 for (const lt_list_t* pE = pVariantsT; pE; pE = lt_list_next( pE))
1697 {
1698 const lt_variant_t* pVariantT = static_cast<const lt_variant_t*>(lt_list_value( pE));
1699 if (pVariantT)
1700 {
1701 const char* p = lt_variant_get_tag( pVariantT);
1702 if (p)
1703 {
1704 if (!aVariants.isEmpty())
1705 aVariants.append("-");
1706 aVariants.appendAscii(p);
1707 }
1708 }
1709 }
1710 }
1711 else
1712 {
1714 aVariants = maCachedVariants;
1715 }
1716 return aVariants.makeStringAndClear();
1717}
1718
1719
1720const css::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
1721{
1722 // "static" to be returned as const reference to an empty locale.
1723 static lang::Locale theEmptyLocale;
1724
1725 if (!bResolveSystem && mbSystemLocale)
1726 return theEmptyLocale;
1730 {
1732 const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
1733 else
1734 const_cast<LanguageTag*>(this)->convertLangToLocale();
1735 }
1736 return maLocale;
1737}
1738
1739
1740LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
1741{
1742 if (!bResolveSystem && mbSystemLocale)
1743 return LANGUAGE_SYSTEM;
1747 {
1749 const_cast<LanguageTag*>(this)->convertBcp47ToLang();
1750 else
1751 {
1752 const_cast<LanguageTag*>(this)->convertLocaleToLang();
1753
1754 /* Resolve a locale only unknown due to some redundant information,
1755 * like 'de-Latn-DE' with script tag. Never call canonicalize()
1756 * from within convert...() methods due to possible recursion, so
1757 * do it here. */
1759 const_cast<LanguageTag*>(this)->synCanonicalize();
1760 }
1761 }
1762 return mnLangID;
1763}
1764
1765
1766void LanguageTag::getIsoLanguageScriptCountry( OUString& rLanguage, OUString& rScript, OUString& rCountry ) const
1767{
1768 // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
1769 // and getCountry() to work correctly in this context.
1770 if (isIsoODF())
1771 {
1772 rLanguage = getLanguage();
1773 rScript = getScript();
1774 rCountry = getCountry();
1775 }
1776 else
1777 {
1778 rLanguage = (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
1779 rScript = (LanguageTag::isIsoScript( getScript()) ? getScript() : OUString());
1780 rCountry = (LanguageTag::isIsoCountry( getCountry()) ? getCountry() : OUString());
1781 }
1782}
1783
1784
1785namespace
1786{
1787
1788bool isLowerAscii( sal_Unicode c )
1789{
1790 return 'a' <= c && c <= 'z';
1791}
1792
1793bool isUpperAscii( sal_Unicode c )
1794{
1795 return 'A' <= c && c <= 'Z';
1796}
1797
1798}
1799
1800
1801// static
1802bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
1803{
1804 /* TODO: ignore case? For now let's see where rubbish is used. */
1805 bool b2chars = rLanguage.getLength() == 2;
1806 if ((b2chars || rLanguage.getLength() == 3) &&
1807 isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
1808 (b2chars || isLowerAscii( rLanguage[2])))
1809 return true;
1810 SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
1811 (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
1812 (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
1813 "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
1814 return false;
1815}
1816
1817
1818// static
1819bool LanguageTag::isIsoCountry( const OUString& rRegion )
1820{
1821 /* TODO: ignore case? For now let's see where rubbish is used. */
1822 if (rRegion.isEmpty() ||
1823 (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
1824 return true;
1825 SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
1826 "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
1827 return false;
1828}
1829
1830
1831// static
1832bool LanguageTag::isIsoScript( const OUString& rScript )
1833{
1834 /* TODO: ignore case? For now let's see where rubbish is used. */
1835 if (rScript.isEmpty() ||
1836 (rScript.getLength() == 4 &&
1837 isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
1838 isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
1839 return true;
1840 SAL_WARN_IF( rScript.getLength() == 4 &&
1841 (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
1842 isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
1843 "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
1844 return false;
1845}
1846
1847
1848OUString const & LanguageTagImpl::getLanguage() const
1849{
1850 if (!mbCachedLanguage)
1851 {
1853 mbCachedLanguage = true;
1854 }
1855 return maCachedLanguage;
1856}
1857
1858
1860{
1861 LanguageTagImpl const* pImpl = getImpl();
1862 if (pImpl->mbCachedLanguage)
1863 return pImpl->maCachedLanguage;
1864 OUString aRet( pImpl->getLanguage());
1865 const_cast<LanguageTag*>(this)->syncFromImpl();
1866 return aRet;
1867}
1868
1869
1870OUString const & LanguageTagImpl::getScript() const
1871{
1872 if (!mbCachedScript)
1873 {
1874 maCachedScript = const_cast<LanguageTagImpl*>(this)->getScriptFromLangtag();
1875 mbCachedScript = true;
1876 }
1877 return maCachedScript;
1878}
1879
1880
1882{
1883 LanguageTagImpl const* pImpl = getImpl();
1884 if (pImpl->mbCachedScript)
1885 return pImpl->maCachedScript;
1886 OUString aRet( pImpl->getScript());
1887 const_cast<LanguageTag*>(this)->syncFromImpl();
1888 return aRet;
1889}
1890
1891
1893{
1894 OUString aLanguageScript( getLanguage());
1895 OUString aScript( getScript());
1896 if (!aScript.isEmpty())
1897 {
1898 aLanguageScript += "-" + aScript;
1899 }
1900 return aLanguageScript;
1901}
1902
1903
1904OUString const & LanguageTagImpl::getCountry() const
1905{
1906 if (!mbCachedCountry)
1907 {
1910 maCachedCountry.clear();
1911 mbCachedCountry = true;
1912 }
1913 return maCachedCountry;
1914}
1915
1916
1918{
1919 LanguageTagImpl const* pImpl = getImpl();
1920 if (pImpl->mbCachedCountry)
1921 return pImpl->maCachedCountry;
1922 OUString aRet( pImpl->getCountry());
1923 const_cast<LanguageTag*>(this)->syncFromImpl();
1924 return aRet;
1925}
1926
1927
1929{
1930 return const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1931}
1932
1933
1934OUString const & LanguageTagImpl::getVariants() const
1935{
1936 if (!mbCachedVariants)
1937 {
1939 mbCachedVariants = true;
1940 }
1941 return maCachedVariants;
1942}
1943
1944
1946{
1947 LanguageTagImpl const * pImpl = getImpl();
1948 if (pImpl->mbCachedVariants)
1949 return pImpl->maCachedVariants;
1950 OUString aRet( pImpl->getVariants());
1951 const_cast<LanguageTag*>(this)->syncFromImpl();
1952 return aRet;
1953}
1954
1956{
1958 return maCachedGlibcString;
1959
1960 if (!mpImplLangtag)
1961 {
1963 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
1964 }
1965 if (mpImplLangtag)
1966 {
1967 char* pLang = lt_tag_convert_to_locale(mpImplLangtag, nullptr);
1968 if (pLang)
1969 {
1970 maCachedGlibcString = OUString::createFromAscii( pLang);
1971 mbCachedGlibcString = true;
1972 free(pLang);
1973 }
1974 }
1975 return maCachedGlibcString;
1976}
1977
1978OUString LanguageTag::getGlibcLocaleString( std::u16string_view rEncoding ) const
1979{
1980 OUString aRet;
1981 if (isIsoLocale())
1982 {
1983 OUString aCountry( getCountry());
1984 if (aCountry.isEmpty())
1985 aRet = getLanguage() + rEncoding;
1986 else
1987 aRet = getLanguage() + "_" + aCountry + rEncoding;
1988 }
1989 else
1990 {
1991 aRet = getImpl()->getGlibcLocaleString();
1992 sal_Int32 nAt = aRet.indexOf('@');
1993 if (nAt != -1)
1994 aRet = OUString::Concat(aRet.subView(0, nAt)) + rEncoding + aRet.subView(nAt);
1995 else
1996 aRet += rEncoding;
1997 }
1998 return aRet;
1999}
2000
2002{
2003 if (!mbCachedScript)
2004 getScript();
2005 return !maCachedScript.isEmpty();
2006}
2007
2008
2010{
2011 bool bRet = getImpl()->hasScript();
2012 const_cast<LanguageTag*>(this)->syncFromImpl();
2013 return bRet;
2014}
2015
2016
2018{
2019 return meScriptType;
2020}
2021
2022
2024{
2025 return getImpl()->getScriptType();
2026}
2027
2028
2030{
2031 if (meScriptType == LanguageTag::ScriptType::UNKNOWN) // poor man's clash resolution
2032 meScriptType = st;
2033}
2034
2035
2037{
2038 getImpl()->setScriptType(st);
2039}
2040
2041
2043{
2044 OUString aLanguage, aScript, aCountry, aRegion, aVariants;
2045 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aRegion, aVariants);
2046 bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV || eExt == EXTRACTED_LR);
2047 if (bRet)
2048 {
2049 maCachedLanguage = aLanguage;
2050 maCachedScript = aScript;
2051 maCachedCountry = aCountry;
2052 maCachedVariants = aVariants;
2054 }
2055 return bRet;
2056}
2057
2058
2060{
2062 {
2063 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2064 // It must be at most ll-CC or lll-CC
2065 // Do not use getCountry() here, use getRegion() instead.
2066 meIsIsoLocale = ((maBcp47.isEmpty() ||
2067 (maBcp47.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
2069 }
2070 return meIsIsoLocale == DECISION_YES;
2071}
2072
2073
2075{
2076 bool bRet = getImpl()->isIsoLocale();
2077 const_cast<LanguageTag*>(this)->syncFromImpl();
2078 return bRet;
2079}
2080
2081
2083{
2085 {
2086 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2088 {
2090 return false;
2091 }
2092 // The usual case is lll-CC so simply check that first.
2093 if (isIsoLocale())
2094 {
2096 return true;
2097 }
2098 // If this is not ISO locale for which script must not exist it can
2099 // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
2100 // ll-vvvvvvvv
2101 meIsIsoODF = ((maBcp47.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
2103 getVariants().isEmpty()) ? DECISION_YES : DECISION_NO);
2104 }
2105 return meIsIsoODF == DECISION_YES;
2106}
2107
2108
2110{
2111 bool bRet = getImpl()->isIsoODF();
2112 const_cast<LanguageTag*>(this)->syncFromImpl();
2113 return bRet;
2114}
2115
2116
2118{
2120 {
2121 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2122 SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
2123 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
2124 }
2125 return meIsValid == DECISION_YES;
2126}
2127
2128
2130{
2131 bool bRet = getImpl()->isValidBcp47();
2132 const_cast<LanguageTag*>(this)->syncFromImpl();
2133 return bRet;
2134}
2135
2136
2138{
2139 if (!mbIsFallback)
2140 {
2141 const lang::Locale& rLocale1 = getLocale();
2142 lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
2143 if ( rLocale1.Language != aLocale2.Language ||
2144 rLocale1.Country != aLocale2.Country ||
2145 rLocale1.Variant != aLocale2.Variant)
2146 {
2147 if (rLocale1.Language != "en" && aLocale2.Language == "en" && aLocale2.Country == "US")
2148 {
2149 // "en-US" is the last resort fallback, try if we get a better
2150 // one for the fallback hierarchy of a non-"en" locale.
2151 ::std::vector< OUString > aFallbacks( getFallbackStrings( false));
2152 for (auto const& fallback : aFallbacks)
2153 {
2154 lang::Locale aLocale3( LanguageTag(fallback).getLocale());
2155 aLocale2 = MsLangId::Conversion::lookupFallbackLocale( aLocale3);
2156 if (aLocale2.Language != "en" || aLocale2.Country != "US")
2157 break; // for, success
2158 }
2159 }
2160 SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
2161 rLocale1.Language << "," << rLocale1.Country << "," << rLocale1.Variant << ") to (" <<
2162 aLocale2.Language << "," << aLocale2.Country << "," << aLocale2.Variant << ")");
2163 reset( aLocale2);
2164 }
2165 mbIsFallback = true;
2166 }
2167 return *this;
2168}
2169
2170
2171/* TODO: maybe this now could take advantage of the mnOverride field in
2172 * isolang.cxx entries and search for kSAME instead of hardcoded special
2173 * fallbacks. Though iterating through those tables would be slower and even
2174 * then there would be some special cases, but we wouldn't lack entries that
2175 * were missed out. */
2176::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
2177{
2178 ::std::vector< OUString > aVec;
2179 OUString aLanguage( getLanguage());
2180 OUString aCountry( getCountry());
2181 if (isIsoLocale())
2182 {
2183 if (!aCountry.isEmpty())
2184 {
2185 if (bIncludeFullBcp47)
2186 aVec.emplace_back(aLanguage + "-" + aCountry);
2187 if (aLanguage == "zh")
2188 {
2189 // For zh-HK or zh-MO also list zh-TW to get zh-Hant, for all
2190 // other zh-XX also list zh-CN to get zh-Hans; both of which we
2191 // use the legacy forms instead of the more correct script
2192 // tags that unfortunately most pieces don't understand.
2193 if (aCountry == "HK" || aCountry == "MO")
2194 aVec.emplace_back(aLanguage + "-TW");
2195 else if (aCountry != "CN")
2196 aVec.emplace_back(aLanguage + "-CN");
2197 aVec.push_back( aLanguage);
2198 }
2199 else if (aLanguage == "sh")
2200 {
2201 // Manual list instead of calling
2202 // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
2203 // that would also include "sh-*" again.
2204 aVec.emplace_back("sr-Latn-" + aCountry);
2205 aVec.emplace_back("sr-Latn");
2206 aVec.emplace_back("sh"); // legacy with script, before default script with country
2207 aVec.emplace_back("sr-" + aCountry);
2208 aVec.emplace_back("sr");
2209 }
2210 else if (aLanguage == "ca" && aCountry == "XV")
2211 {
2212 ::std::vector< OUString > aRep( LanguageTag( "ca-ES-valencia").getFallbackStrings( true));
2213 aVec.insert( aVec.end(), aRep.begin(), aRep.end());
2214 // Already includes 'ca' language fallback.
2215 }
2216 else if (aLanguage == "ku")
2217 {
2218 if (aCountry == "TR" || aCountry == "SY")
2219 {
2220 aVec.emplace_back("kmr-Latn-" + aCountry);
2221 aVec.emplace_back("kmr-" + aCountry);
2222 aVec.emplace_back("kmr-Latn");
2223 aVec.emplace_back("kmr");
2224 aVec.push_back( aLanguage);
2225 }
2226 else if (aCountry == "IQ" || aCountry == "IR")
2227 {
2228 aVec.emplace_back("ckb-" + aCountry);
2229 aVec.emplace_back("ckb");
2230 }
2231 }
2232 else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
2233 {
2234 aVec.emplace_back("ku-Latn-" + aCountry);
2235 aVec.emplace_back("ku-" + aCountry);
2236 aVec.push_back( aLanguage);
2237 aVec.emplace_back("ku");
2238 }
2239 else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
2240 {
2241 aVec.emplace_back("ku-Arab-" + aCountry);
2242 aVec.emplace_back("ku-" + aCountry);
2243 aVec.push_back( aLanguage);
2244 // not 'ku' only, that was used for Latin script
2245 }
2246 else
2247 aVec.push_back( aLanguage);
2248 }
2249 else
2250 {
2251 if (bIncludeFullBcp47)
2252 aVec.push_back( aLanguage);
2253 if (aLanguage == "sh")
2254 {
2255 aVec.emplace_back("sr-Latn");
2256 aVec.emplace_back("sr");
2257 }
2258 else if (aLanguage == "pli")
2259 {
2260 // a special case for Pali dictionary, see fdo#41599
2261 aVec.emplace_back("pi-Latn");
2262 aVec.emplace_back("pi");
2263 }
2264 }
2265 return aVec;
2266 }
2267
2268 getBcp47(); // have maBcp47 now
2269 if (bIncludeFullBcp47)
2270 aVec.push_back( maBcp47);
2271
2272 // Special cases for deprecated tags and their replacements, include both
2273 // in fallbacks in a sensible order.
2274 /* TODO: could such things be generalized and automated with liblangtag? */
2275 if (maBcp47 == "en-GB-oed")
2276 aVec.emplace_back("en-GB-oxendict");
2277 else if (maBcp47 == "en-GB-oxendict")
2278 aVec.emplace_back("en-GB-oed");
2279
2280 OUString aVariants( getVariants());
2281 OUString aTmp;
2282 if (hasScript())
2283 {
2284 OUString aScript = getScript();
2285 bool bHaveLanguageScriptVariant = false;
2286 if (!aCountry.isEmpty())
2287 {
2288 if (!aVariants.isEmpty())
2289 {
2290 aTmp = aLanguage + "-" + aScript + "-" + aCountry + "-" + aVariants;
2291 if (aTmp != maBcp47)
2292 aVec.push_back( aTmp);
2293 // Language with variant but without country before language
2294 // without variant but with country.
2295 aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2296 if (aTmp != maBcp47)
2297 aVec.push_back( aTmp);
2298 bHaveLanguageScriptVariant = true;
2299 }
2300 aTmp = aLanguage + "-" + aScript + "-" + aCountry;
2301 if (aTmp != maBcp47)
2302 aVec.push_back( aTmp);
2303 if (aLanguage == "sr" && aScript == "Latn")
2304 {
2305 // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
2306 if (aCountry == "CS")
2307 {
2308 aVec.emplace_back("sr-Latn-YU");
2309 aVec.emplace_back("sh-CS");
2310 aVec.emplace_back("sh-YU");
2311 }
2312 else
2313 aVec.emplace_back("sh-" + aCountry);
2314 }
2315 else if (aLanguage == "pi" && aScript == "Latn")
2316 aVec.emplace_back("pli"); // a special case for Pali dictionary, see fdo#41599
2317 else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
2318 aVec.emplace_back("ku-" + aCountry);
2319 }
2320 if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
2321 {
2322 aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2323 if (aTmp != maBcp47)
2324 aVec.push_back( aTmp);
2325 }
2326 aTmp = aLanguage + "-" + aScript;
2327 if (aTmp != maBcp47)
2328 aVec.push_back( aTmp);
2329
2330 // 'sh' actually denoted a script, so have it here instead of appended
2331 // at the end as language-only.
2332 if (aLanguage == "sr" && aScript == "Latn")
2333 aVec.emplace_back("sh");
2334 else if (aLanguage == "ku" && aScript == "Arab")
2335 aVec.emplace_back("ckb");
2336 // 'ku' only denoted Latin script
2337 else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
2338 aVec.emplace_back("ku");
2339 }
2340 bool bHaveLanguageVariant = false;
2341 if (!aCountry.isEmpty())
2342 {
2343 if (!aVariants.isEmpty())
2344 {
2345 aTmp = aLanguage + "-" + aCountry + "-" + aVariants;
2346 if (aTmp != maBcp47)
2347 aVec.push_back( aTmp);
2348 if (maBcp47 == "ca-ES-valencia")
2349 aVec.emplace_back("ca-XV");
2350 // Language with variant but without country before language
2351 // without variant but with country.
2352 // But only if variant is not from a grandfathered tag that
2353 // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
2354 // not.
2355 if (aVariants.getLength() >= 5 ||
2356 (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2357 {
2358 aTmp = aLanguage + "-" + aVariants;
2359 if (aTmp != maBcp47)
2360 aVec.push_back( aTmp);
2361 bHaveLanguageVariant = true;
2362 }
2363 }
2364 aTmp = aLanguage + "-" + aCountry;
2365 if (aTmp != maBcp47)
2366 aVec.push_back( aTmp);
2367 }
2368 if (!aVariants.isEmpty() && !bHaveLanguageVariant)
2369 {
2370 // Only if variant is not from a grandfathered tag that wouldn't match
2371 // the rules, i.e. "de-1901" is fine but "en-oed" is not.
2372 if (aVariants.getLength() >= 5 ||
2373 (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2374 {
2375 aTmp = aLanguage + "-" + aVariants;
2376 if (aTmp != maBcp47)
2377 aVec.push_back( aTmp);
2378 }
2379 }
2380
2381 // Insert legacy fallbacks with country before language-only, but only
2382 // default script, script was handled already above.
2383 if (!aCountry.isEmpty())
2384 {
2385 if (aLanguage == "sr" && aCountry == "CS")
2386 aVec.emplace_back("sr-YU");
2387 }
2388
2389 // Original language-only.
2390 if (!aLanguage.isEmpty() && aLanguage != maBcp47)
2391 aVec.push_back( aLanguage);
2392
2393 return aVec;
2394}
2395
2396
2398{
2400 return "es-ES_tradnl";
2401 return getBcp47();
2402}
2403
2404
2405bool LanguageTag::equals( const LanguageTag & rLanguageTag ) const
2406{
2407 // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
2408 // can use the operator==() optimization.
2409 if (isSystemLocale() == rLanguageTag.isSystemLocale())
2410 return operator==( rLanguageTag);
2411
2412 // Compare full language tag strings.
2413 return getBcp47() == rLanguageTag.getBcp47();
2414}
2415
2416
2417bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
2418{
2419 if (isSystemLocale() && rLanguageTag.isSystemLocale())
2420 return true; // both SYSTEM
2421
2422 // No need to convert to BCP47 if both Lang-IDs are available.
2423 if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
2424 {
2425 // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
2426 return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
2427 }
2428
2429 // Compare full language tag strings but SYSTEM unresolved.
2430 return getBcp47( false) == rLanguageTag.getBcp47( false);
2431}
2432
2433
2434bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
2435{
2436 return !operator==( rLanguageTag);
2437}
2438
2439
2440bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const
2441{
2442 return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag.getBcp47( false)) < 0;
2443}
2444
2445
2446// static
2448 OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rRegion, OUString& rVariants )
2449{
2451 const sal_Int32 nLen = rBcp47.getLength();
2452 const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
2453 sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
2454 sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
2455 sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
2456 if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker
2457 {
2458 // It's f*d up but we need to recognize this.
2459 eRet = EXTRACTED_X_JOKER;
2460 }
2461 else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse
2462 {
2463 // x-... privateuse tags MUST be known to us by definition.
2464 eRet = EXTRACTED_X;
2465 }
2466 else if (nLen == 1 && rBcp47[0] == 'C') // the 'C' locale
2467 {
2468 eRet = EXTRACTED_C_LOCALE;
2469 rLanguage = "C";
2470 rScript.clear();
2471 rCountry.clear();
2472 rRegion.clear();
2473 rVariants.clear();
2474 }
2475 else if (nLen == 2 || nLen == 3) // ll or lll
2476 {
2477 if (nHyph1 < 0)
2478 {
2479 rLanguage = rBcp47.toAsciiLowerCase();
2480 rScript.clear();
2481 rCountry.clear();
2482 rRegion.clear();
2483 rVariants.clear();
2484 eRet = EXTRACTED_LSC;
2485 }
2486 }
2487 else if ( (nHyph1 == 2 && nLen == 5) // ll-CC
2488 || (nHyph1 == 3 && nLen == 6)) // lll-CC
2489 {
2490 if (nHyph2 < 0)
2491 {
2492 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2493 rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2494 rRegion.clear();
2495 rScript.clear();
2496 rVariants.clear();
2497 eRet = EXTRACTED_LSC;
2498 }
2499 }
2500 else if ( (nHyph1 == 2 && nLen == 6) // ll-rrr
2501 || (nHyph1 == 3 && nLen == 7)) // lll-rrr
2502 {
2503 if (nHyph2 < 0)
2504 {
2505 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2506 rCountry.clear();
2507 rRegion = rBcp47.copy( nHyph1 + 1, 3);
2508 rScript.clear();
2509 rVariants.clear();
2510 eRet = EXTRACTED_LR;
2511 }
2512 }
2513 else if ( (nHyph1 == 2 && nLen == 7) // ll-Ssss or ll-vvvv
2514 || (nHyph1 == 3 && nLen == 8)) // lll-Ssss or lll-vvvv
2515 {
2516 if (nHyph2 < 0)
2517 {
2518 sal_Unicode c = rBcp47[nHyph1+1];
2519 if ('0' <= c && c <= '9')
2520 {
2521 // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
2522 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2523 rScript.clear();
2524 rCountry.clear();
2525 rRegion.clear();
2526 rVariants = rBcp47.copy( nHyph1 + 1);
2527 eRet = EXTRACTED_LV;
2528 }
2529 else
2530 {
2531 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2532 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() +
2533 rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2534 rCountry.clear();
2535 rRegion.clear();
2536 rVariants.clear();
2537 eRet = EXTRACTED_LSC;
2538 }
2539 }
2540 }
2541 else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 10) // ll-Ssss-CC
2542 || (nHyph1 == 3 && nHyph2 == 8 && nLen == 11)) // lll-Ssss-CC
2543 {
2544 if (nHyph3 < 0)
2545 {
2546 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2547 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2548 rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2549 rRegion.clear();
2550 rVariants.clear();
2551 eRet = EXTRACTED_LSC;
2552 }
2553 }
2554 else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 11) // ll-Ssss-rrr
2555 || (nHyph1 == 3 && nHyph2 == 8 && nLen == 12)) // lll-Ssss-rrr
2556 {
2557 if (nHyph3 < 0)
2558 {
2559 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2560 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2561 rCountry.clear();
2562 rRegion = rBcp47.copy( nHyph2 + 1, 3);
2563 rVariants.clear();
2564 eRet = EXTRACTED_LR;
2565 }
2566 }
2567 else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
2568 || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
2569 {
2570 if (nHyph4 < 0)
2571 nHyph4 = rBcp47.getLength();
2572 if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
2573 {
2574 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2575 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2576 rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2577 rRegion.clear();
2578 rVariants = rBcp47.copy( nHyph3 + 1);
2579 eRet = EXTRACTED_LV;
2580 }
2581 }
2582 else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 11 && nLen >= 16) // ll-Ssss-rrr-vvvv[vvvv][-...]
2583 || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 12 && nLen >= 17)) // lll-Ssss-rrr-vvvv[vvvv][-...]
2584 {
2585 if (nHyph4 < 0)
2586 nHyph4 = rBcp47.getLength();
2587 if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
2588 {
2589 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2590 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2591 rCountry.clear();
2592 rRegion = rBcp47.copy( nHyph2 + 1, 3);
2593 rVariants = rBcp47.copy( nHyph3 + 1);
2594 eRet = EXTRACTED_LR;
2595 }
2596 }
2597 else if ( (nHyph1 == 2 && nHyph2 == 5 && nHyph3 == 7) // ll-CC-u-...
2598 || (nHyph1 == 3 && nHyph2 == 6 && nHyph3 == 8)) // lll-CC-u-...
2599 {
2600 if (rBcp47[nHyph3-1] == 'u')
2601 {
2602 // Need to recognize as known, otherwise getLanguage() and
2603 // getCountry() return empty string because mpImplLangtag is not
2604 // used with a known mapping.
2605 /* TODO: if there were more this would get ugly and needed some
2606 * table driven approach via isolang.cxx instead. */
2607 if (rBcp47.equalsIgnoreAsciiCase( "es-ES-u-co-trad"))
2608 {
2609 rLanguage = "es";
2610 rScript.clear();
2611 rCountry = "ES";
2612 rRegion.clear();
2613 rVariants = "u-co-trad"; // not strictly a variant, but used to reconstruct the tag.
2614 eRet = EXTRACTED_LV;
2615 }
2616 }
2617 }
2618 else if ( (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10) // ll-CC-vvvv[vvvv][-...]
2619 || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11)) // lll-CC-vvvv[vvvv][-...]
2620 {
2621 if (nHyph3 < 0)
2622 nHyph3 = rBcp47.getLength();
2623 if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
2624 {
2625 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2626 rScript.clear();
2627 rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2628 rRegion.clear();
2629 rVariants = rBcp47.copy( nHyph2 + 1);
2630 eRet = EXTRACTED_LV;
2631 }
2632 }
2633 else if ( (nHyph1 == 2 && nHyph2 == 6 && nLen >= 11) // ll-rrr-vvvv[vvvv][-...]
2634 || (nHyph1 == 3 && nHyph2 == 7 && nLen >= 12)) // lll-rrr-vvvv[vvvv][-...]
2635 {
2636 if (nHyph3 < 0)
2637 nHyph3 = rBcp47.getLength();
2638 if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
2639 {
2640 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2641 rScript.clear();
2642 rCountry.clear();
2643 rRegion = rBcp47.copy( nHyph1 + 1, 3);
2644 rVariants = rBcp47.copy( nHyph2 + 1);
2645 eRet = EXTRACTED_LR;
2646 }
2647 }
2648 else if ( (nHyph1 == 2 && nLen >= 8) // ll-vvvvv[vvv][-...]
2649 || (nHyph1 == 3 && nLen >= 9)) // lll-vvvvv[vvv][-...]
2650 {
2651 if (nHyph2 < 0)
2652 nHyph2 = rBcp47.getLength();
2653 if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
2654 {
2655 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2656 rScript.clear();
2657 rCountry.clear();
2658 rRegion.clear();
2659 rVariants = rBcp47.copy( nHyph1 + 1);
2660 eRet = EXTRACTED_LV;
2661 }
2662 else
2663 {
2664 // Known and handled grandfathered; ugly but effective ...
2665 // Note that nLen must have matched above.
2666 // Strictly not a variant, but so far we treat it as such.
2667 if (rBcp47.equalsIgnoreAsciiCase( "en-GB-oed"))
2668 {
2669 rLanguage = "en";
2670 rScript.clear();
2671 rCountry = "GB";
2672 rRegion.clear();
2673 rVariants = "oed";
2674 eRet = EXTRACTED_LV;
2675 }
2676 // Other known and handled odd cases.
2677 else if (rBcp47.equalsIgnoreAsciiCase( "es-ES_tradnl"))
2678 {
2679 // Will get overridden, but needs to be recognized as known.
2680 rLanguage = "es";
2681 rScript.clear();
2682 rCountry = "ES";
2683 rRegion.clear();
2684 rVariants = "tradnl"; // this is nonsense, but... ignored.
2685 eRet = EXTRACTED_KNOWN_BAD;
2686 }
2687 }
2688 }
2689 if (eRet == EXTRACTED_NONE)
2690 {
2691 SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
2692 rLanguage.clear();
2693 rScript.clear();
2694 rCountry.clear();
2695 rRegion.clear();
2696 rVariants.clear();
2697 }
2698 else
2699 {
2700 assert(rLanguage.getLength() == 2 || rLanguage.getLength() == 3
2701 || eRet == EXTRACTED_X_JOKER || eRet == EXTRACTED_X || eRet == EXTRACTED_C_LOCALE);
2702 assert(rScript.isEmpty() || rScript.getLength() == 4);
2703 assert(rCountry.isEmpty() || rRegion.isEmpty()); // [2ALPHA / 3DIGIT]
2704 assert(rCountry.isEmpty() || rCountry.getLength() == 2);
2705 assert(rRegion.isEmpty() || rRegion.getLength() == 3);
2706 assert(rVariants.isEmpty() || rVariants.getLength() >= 4 || rVariants == "oed");
2707 }
2708 return eRet;
2709}
2710
2711
2712// static
2713::std::vector< OUString >::const_iterator LanguageTag::getFallback(
2714 const ::std::vector< OUString > & rList, const OUString & rReference )
2715{
2716 if (rList.empty())
2717 return rList.end();
2718
2719 // Try the simple case first without constructing fallbacks.
2720 ::std::vector< OUString >::const_iterator it = std::find(rList.begin(), rList.end(), rReference);
2721 if (it != rList.end())
2722 return it; // exact match
2723
2724 ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2725 if (rReference != "en-US")
2726 {
2727 aFallbacks.emplace_back("en-US");
2728 if (rReference != "en")
2729 aFallbacks.emplace_back("en");
2730 }
2731 if (rReference != "x-default")
2732 aFallbacks.emplace_back("x-default");
2733 if (rReference != "x-no-translate")
2734 aFallbacks.emplace_back("x-no-translate");
2735 /* TODO: the original comphelper::Locale::getFallback() code had
2736 * "x-notranslate" instead of "x-no-translate", but all .xcu files use
2737 * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
2738 * Did that ever work? Was it supposed to work at all like this? */
2739
2740 for (const auto& fb : aFallbacks)
2741 {
2742 it = std::find(rList.begin(), rList.end(), fb);
2743 if (it != rList.end())
2744 return it; // fallback found
2745 }
2746
2747 // Did not find anything so return something of the list, the first value
2748 // will do as well as any other as none did match any of the possible
2749 // fallbacks.
2750 return rList.begin();
2751}
2752
2753
2754// static
2755::std::vector< css::lang::Locale >::const_iterator LanguageTag::getMatchingFallback(
2756 const ::std::vector< css::lang::Locale > & rList,
2757 const css::lang::Locale & rReference )
2758{
2759 if (rList.empty())
2760 return rList.end();
2761
2762 // Try the simple case first without constructing fallbacks.
2763 ::std::vector< lang::Locale >::const_iterator it = std::find_if(rList.begin(), rList.end(),
2764 [&rReference](const lang::Locale& rLocale) {
2765 return rLocale.Language == rReference.Language
2766 && rLocale.Country == rReference.Country
2767 && rLocale.Variant == rReference.Variant; });
2768 if (it != rList.end())
2769 return it; // exact match
2770
2771 // Now for each reference fallback test the fallbacks of the list in order.
2772 ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2773 ::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size());
2774 size_t i = 0;
2775 for (auto const& elem : rList)
2776 {
2777 ::std::vector< OUString > aTmp( LanguageTag(elem).getFallbackStrings( true));
2778 aListFallbacks[i++] = aTmp;
2779 }
2780 for (auto const& rfb : aFallbacks)
2781 {
2782 size_t nPosFb = 0;
2783 for (auto const& lfb : aListFallbacks)
2784 {
2785 for (auto const& fb : lfb)
2786 {
2787 if (rfb == fb)
2788 return rList.begin() + nPosFb;
2789 }
2790 ++nPosFb;
2791 }
2792 }
2793
2794 // No match found.
2795 return rList.end();
2796}
2797
2798
2799static bool lcl_isSystem( LanguageType nLangID )
2800{
2801 if (nLangID == LANGUAGE_SYSTEM)
2802 return true;
2803 // There are some special values that simplify to SYSTEM,
2804 // getRealLanguage() catches and resolves them.
2805 LanguageType nNewLangID = MsLangId::getRealLanguage( nLangID);
2806 return nNewLangID != nLangID;
2807}
2808
2809
2810// static
2811css::lang::Locale LanguageTag::convertToLocale( LanguageType nLangID, bool bResolveSystem )
2812{
2813 if (!bResolveSystem && lcl_isSystem( nLangID))
2814 return lang::Locale();
2815
2816 return LanguageTag( nLangID).getLocale( bResolveSystem);
2817}
2818
2819
2820// static
2821LanguageType LanguageTag::convertToLanguageType( const css::lang::Locale& rLocale, bool bResolveSystem )
2822{
2823 if (rLocale.Language.isEmpty() && !bResolveSystem)
2824 return LANGUAGE_SYSTEM;
2825
2826 return LanguageTag( rLocale).getLanguageType( bResolveSystem);
2827}
2828
2829
2830// static
2831OUString LanguageTagImpl::convertToBcp47( const css::lang::Locale& rLocale )
2832{
2833 OUString aBcp47;
2834 if (rLocale.Language.isEmpty())
2835 {
2836 // aBcp47 stays empty
2837 }
2838 else if (rLocale.Language == I18NLANGTAG_QLT)
2839 {
2840 aBcp47 = rLocale.Variant;
2841 }
2842 else
2843 {
2844 /* XXX NOTE: most legacy code never evaluated the Variant field, so for
2845 * now just concatenate language and country. In case we stumbled over
2846 * variant aware code we'd have to take care of that. */
2847 if (rLocale.Country.isEmpty())
2848 aBcp47 = rLocale.Language;
2849 else
2850 {
2851 aBcp47 = rLocale.Language + "-" + rLocale.Country;
2852 }
2853 }
2854 return aBcp47;
2855}
2856
2857
2858// static
2859OUString LanguageTag::convertToBcp47( const css::lang::Locale& rLocale, bool bResolveSystem )
2860{
2861 OUString aBcp47;
2862 if (rLocale.Language.isEmpty())
2863 {
2864 if (bResolveSystem)
2866 // else aBcp47 stays empty
2867 }
2868 else
2869 {
2870 aBcp47 = LanguageTagImpl::convertToBcp47( rLocale);
2871 }
2872 return aBcp47;
2873}
2874
2875
2876// static
2878{
2879 lang::Locale aLocale( LanguageTag::convertToLocale( nLangID ));
2880 // If system for some reason (should not happen... haha) could not be
2881 // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
2882 // would recurse into this method here!
2883 if (aLocale.Language.isEmpty())
2884 return OUString(); // bad luck, bail out
2885 return LanguageTagImpl::convertToBcp47( aLocale);
2886}
2887
2888
2889// static
2890css::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool bResolveSystem )
2891{
2892 if (rBcp47.isEmpty() && !bResolveSystem)
2893 return lang::Locale();
2894
2895 return LanguageTag( rBcp47).getLocale( bResolveSystem);
2896}
2897
2898
2899// static
2901{
2902 return LanguageTag( rBcp47).getLanguageType();
2903}
2904
2905
2906// static
2908{
2909 return LanguageTag( rBcp47).makeFallback().getLanguageType();
2910}
2911
2912
2913// static
2914css::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp47 )
2915{
2916 return LanguageTag( rBcp47).makeFallback().getLocale();
2917}
2918
2919
2920// static
2922{
2923 if (rLocale.Language.isEmpty())
2924 return LANGUAGE_SYSTEM;
2925
2926 return LanguageTag( rLocale).makeFallback().getLanguageType();
2927}
2928
2929
2930// static
2931bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized,
2932 LanguageTag::PrivateUse ePrivateUse )
2933{
2934 bool bValid = false;
2935
2936 struct guard
2937 {
2938 lt_tag_t* mpLangtag;
2939 guard()
2940 {
2941 theDataRef().init();
2942 mpLangtag = lt_tag_new();
2943 }
2944 ~guard()
2945 {
2946 lt_tag_unref( mpLangtag);
2947 }
2948 } aVar;
2949
2950 myLtError aError;
2951
2952 if (!lt_tag_parse_disabled && lt_tag_parse(aVar.mpLangtag, OUStringToOString(rString, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
2953 {
2954 char* pTag = lt_tag_canonicalize( aVar.mpLangtag, &aError.p);
2955 SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag:isValidBcp47: could not canonicalize '" << rString << "'");
2956 if (pTag)
2957 {
2958 bValid = true;
2959 if (ePrivateUse != PrivateUse::ALLOW)
2960 {
2961 do
2962 {
2963 const char* pLang = nullptr;
2964 const lt_lang_t* pLangT = lt_tag_get_language( aVar.mpLangtag);
2965 if (pLangT)
2966 {
2967 pLang = lt_lang_get_tag( pLangT);
2968 if (pLang && strcmp( pLang, I18NLANGTAG_QLT_ASCII) == 0)
2969 {
2970 // Disallow 'qlt' localuse code to prevent
2971 // confusion with our internal usage.
2972 bValid = false;
2973 break;
2974 }
2975 }
2976 if (ePrivateUse == PrivateUse::ALLOW_ART_X && pLang && strcmp( pLang, "art") == 0)
2977 {
2978 // Allow anything 'art' which includes 'art-x-...' and 'art-Latn-x-...'.
2979 break;
2980 }
2981 const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag);
2982 if (pPrivate && lt_string_length( pPrivate) > 0)
2983 bValid = false;
2984 }
2985 while (false);
2986 }
2987 if (o_pCanonicalized)
2988 *o_pCanonicalized = OUString::createFromAscii( pTag);
2989 free( pTag);
2990 }
2991 }
2992 else
2993 {
2994 SAL_INFO( "i18nlangtag", "LanguageTag:isValidBcp47: could not parse '" << rString << "'");
2995 }
2996 return bValid;
2997}
2998
3000{
3001 //map the simple ones via LanguageTypes, and the hard ones explicitly
3003
3004 switch (nLanguage)
3005 {
3007 nLang = LANGUAGE_ENGLISH_US;
3008 break;
3010 nLang = LANGUAGE_FRENCH;
3011 break;
3013 nLang = LANGUAGE_GERMAN;
3014 break;
3016 nLang = LANGUAGE_ITALIAN;
3017 break;
3019 nLang = LANGUAGE_DUTCH;
3020 break;
3022 nLang = LANGUAGE_SWEDISH;
3023 break;
3025 nLang = LANGUAGE_SPANISH;
3026 break;
3028 nLang = LANGUAGE_DANISH;
3029 break;
3031 nLang = LANGUAGE_PORTUGUESE;
3032 break;
3034 nLang = LANGUAGE_NORWEGIAN;
3035 break;
3037 nLang = LANGUAGE_HEBREW;
3038 break;
3040 nLang = LANGUAGE_JAPANESE;
3041 break;
3044 break;
3046 nLang = LANGUAGE_FINNISH;
3047 break;
3049 nLang = LANGUAGE_GREEK;
3050 break;
3052 nLang = LANGUAGE_ICELANDIC;
3053 break;
3055 nLang = LANGUAGE_MALTESE;
3056 break;
3058 nLang = LANGUAGE_TURKISH;
3059 break;
3061 nLang = LANGUAGE_CROATIAN;
3062 break;
3065 break;
3067 nLang = LANGUAGE_URDU_PAKISTAN; //probably, otherwise we need a LANGUAGE_URDU_PRIMARY_ONLY
3068 break;
3070 nLang = LANGUAGE_HINDI;
3071 break;
3073 nLang = LANGUAGE_THAI;
3074 break;
3076 nLang = LANGUAGE_KOREAN;
3077 break;
3079 nLang = LANGUAGE_LITHUANIAN;
3080 break;
3082 nLang = LANGUAGE_POLISH;
3083 break;
3085 nLang = LANGUAGE_HUNGARIAN;
3086 break;
3088 nLang = LANGUAGE_ESTONIAN;
3089 break;
3091 nLang = LANGUAGE_LATVIAN;
3092 break;
3094 nLang = LANGUAGE_SAMI_NORTHERN_NORWAY; //maybe
3095 break;
3097 nLang = LANGUAGE_FAEROESE;
3098 break;
3100 nLang = LANGUAGE_FARSI;
3101 break;
3103 nLang = LANGUAGE_RUSSIAN;
3104 break;
3107 break;
3109 nLang = LANGUAGE_DUTCH_BELGIAN;
3110 break;
3113 break;
3115 nLang = LANGUAGE_ALBANIAN;
3116 break;
3118 nLang = LANGUAGE_ROMANIAN;
3119 break;
3121 nLang = LANGUAGE_CZECH;
3122 break;
3124 nLang = LANGUAGE_SLOVAK;
3125 break;
3127 nLang = LANGUAGE_SLOVENIAN;
3128 break;
3130 nLang = LANGUAGE_YIDDISH;
3131 break;
3133 nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA; //maybe
3134 break;
3136 nLang = LANGUAGE_MACEDONIAN;
3137 break;
3139 nLang = LANGUAGE_BULGARIAN;
3140 break;
3142 nLang = LANGUAGE_UKRAINIAN;
3143 break;
3145 nLang = LANGUAGE_BELARUSIAN;
3146 break;
3148 nLang = LANGUAGE_UZBEK_CYRILLIC; //maybe
3149 break;
3151 nLang = LANGUAGE_KAZAKH;
3152 break;
3155 break;
3157 return LanguageTag("az-Arab");
3159 nLang = LANGUAGE_ARMENIAN;
3160 break;
3162 nLang = LANGUAGE_GEORGIAN;
3163 break;
3166 break;
3168 nLang = LANGUAGE_KIRGHIZ;
3169 break;
3171 nLang = LANGUAGE_TAJIK;
3172 break;
3174 nLang = LANGUAGE_TURKMEN;
3175 break;
3178 break;
3181 break;
3183 nLang = LANGUAGE_PASHTO;
3184 break;
3186 nLang = LANGUAGE_USER_KURDISH_TURKEY; //maybe
3187 break;
3189 nLang = LANGUAGE_KASHMIRI;
3190 break;
3192 nLang = LANGUAGE_SINDHI;
3193 break;
3195 nLang = LANGUAGE_TIBETAN;
3196 break;
3198 nLang = LANGUAGE_NEPALI;
3199 break;
3201 nLang = LANGUAGE_SANSKRIT;
3202 break;
3204 nLang = LANGUAGE_MARATHI;
3205 break;
3207 nLang = LANGUAGE_BENGALI;
3208 break;
3210 nLang = LANGUAGE_ASSAMESE;
3211 break;
3213 nLang = LANGUAGE_GUJARATI;
3214 break;
3216 nLang = LANGUAGE_PUNJABI;
3217 break;
3219 nLang = LANGUAGE_ODIA;
3220 break;
3222 nLang = LANGUAGE_MALAYALAM;
3223 break;
3225 nLang = LANGUAGE_KANNADA;
3226 break;
3228 nLang = LANGUAGE_TAMIL;
3229 break;
3231 nLang = LANGUAGE_TELUGU;
3232 break;
3235 break;
3237 nLang = LANGUAGE_BURMESE;
3238 break;
3240 nLang = LANGUAGE_KHMER;
3241 break;
3243 nLang = LANGUAGE_LAO;
3244 break;
3246 nLang = LANGUAGE_VIETNAMESE;
3247 break;
3249 nLang = LANGUAGE_INDONESIAN;
3250 break;
3252 nLang = LANGUAGE_USER_TAGALOG;
3253 break;
3256 break;
3259 break;
3262 break;
3265 break;
3267 nLang = LANGUAGE_OROMO;
3268 break;
3270 nLang = LANGUAGE_SOMALI;
3271 break;
3273 nLang = LANGUAGE_SWAHILI;
3274 break;
3277 break;
3279 return LanguageTag("rn");
3281 nLang = LANGUAGE_USER_NYANJA;
3282 break;
3285 break;
3288 break;
3290 nLang = LANGUAGE_WELSH;
3291 break;
3293 nLang = LANGUAGE_BASQUE;
3294 break;
3296 nLang = LANGUAGE_CATALAN;
3297 break;
3299 nLang = LANGUAGE_LATIN;
3300 break;
3302 nLang = LANGUAGE_QUECHUA_BOLIVIA; //maybe
3303 break;
3306 break;
3308 return LanguageTag("ay");
3310 nLang = LANGUAGE_TATAR;
3311 break;
3313 nLang = LANGUAGE_UIGHUR_CHINA;
3314 break;
3317 break;
3319 return LanguageTag("jv-Latn");
3321 return LanguageTag("su-Latn");
3323 nLang = LANGUAGE_GALICIAN;
3324 break;
3326 nLang = LANGUAGE_AFRIKAANS;
3327 break;
3329 nLang = LANGUAGE_BRETON_FRANCE;
3330 break;
3332 nLang = LANGUAGE_INUKTITUT_LATIN_CANADA; //probably
3333 break;
3336 break;
3338 nLang = LANGUAGE_USER_MANX;
3339 break;
3341 return LanguageTag("ga-Latg");
3343 return LanguageTag("to");
3346 break;
3349 break;
3351 nLang = LANGUAGE_AZERI_LATIN;
3352 break;
3353 }
3354
3355 return LanguageTag(nLang);
3356}
3357
3358/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
bool mbInitialized
AppleLanguageId
Definition: applelangid.hxx:18
FILE * init(int, char **)
bool cacheSimpleLSCV()
Obtain Language, Script, Country and Variants via simpleExtract() and assign them to the cached varia...
OUString getScriptFromLangtag()
bool hasScript() const
bool convertLocaleToLang(bool bAllowOnTheFlyID)
void convertLangToLocale()
bool synCanonicalize()
Canonicalize if not yet done and synchronize initialized conversions.
bool isIsoODF() const
OUString const & getGlibcLocaleString() const
static OUString convertToBcp47(const css::lang::Locale &rLocale)
Convert Locale to BCP 47 string without resolving system and creating temporary LanguageTag instances...
css::lang::Locale maLocale
LanguageTagImpl(const LanguageTag &rLanguageTag)
OUString getLanguageFromLangtag()
lt_tag_t * mpImplLangtag
liblangtag pointer
OUString const & getLanguage() const
Decision meIsIsoODF
LanguageTag::ImplPtr registerOnTheFly(LanguageType nRegisterID)
Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
LanguageTagImpl & operator=(const LanguageTagImpl &rLanguageTagImpl)
OUString const & getScript() const
OUString maCachedLanguage
cache getLanguage()
LanguageTag::ScriptType meScriptType
OUString const & getCountry() const
LanguageTag::ScriptType getScriptType() const
LanguageType mnLangID
static Extraction simpleExtract(const OUString &rBcp47, OUString &rLanguage, OUString &rScript, OUString &rCountry, OUString &rRegion, OUString &rVariants)
Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the portions.
OUString maCachedScript
cache getScript()
void convertLocaleToBcp47()
OUString const & getVariants() const
OUString getRegionFromLangtag()
OUString getVariantsFromLangtag()
OUString getRegion() const
OUString maCachedCountry
cache getCountry()
OUString const & getBcp47() const
Decision meIsLiblangtagNeeded
whether processing with liblangtag needed
bool isValidBcp47() const
OUString maCachedVariants
cache getVariants()
void convertBcp47ToLocale()
void setScriptType(LanguageTag::ScriptType st)
void convertBcp47ToLang()
Decision meIsIsoLocale
OUString maCachedGlibcString
cache getGlibcLocaleString()
Decision meIsValid
void convertLangToBcp47()
bool isIsoLocale() const
Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and conversions in between.
Definition: languagetag.hxx:53
void convertFromRtlLocale()
void syncVarsFromImpl() const
::std::vector< css::lang::Locale >::const_iterator getMatchingFallback(const ::std::vector< css::lang::Locale > &rList, const css::lang::Locale &rReference)
Search for an equal or for a similar locale in a list of possible ones where at least the language ma...
bool operator<(const LanguageTag &rLanguageTag) const
Test this LanguageTag less than that LanguageTag.
LanguageType getLanguageType(bool bResolveSystem=true) const
Obtain mapping to MS-LangID.
void syncFromImpl()
void convertLocaleToLang()
OUString getLanguageAndScript() const
Get combined language and script code, separated by '-' if non-default script, if default script only...
bool isSystemLocale() const
If this tag was constructed as an empty tag denoting the system locale.
void syncVarsFromRawImpl() const
static bool isIsoScript(const OUString &rScript)
const css::lang::Locale & getLocale(bool bResolveSystem=true) const
Obtain language tag as Locale.
void resetVars()
bool synCanonicalize()
Canonicalize if not yet done and synchronize initialized conversions.
LanguageTag(const OUString &rBcp47LanguageTag, bool bCanonicalize=false)
Init LanguageTag with existing BCP 47 language tag string.
bool operator!=(const LanguageTag &rLanguageTag) const
Test inequality of two LanguageTag.
LanguageTagImpl * getImpl()
OUString getLanguage() const
Get ISO 639 language code, or BCP 47 language.
static void setConfiguredSystemLanguage(LanguageType nLang)
@ATTENTION: ONLY to be called by the application's configuration!
static css::lang::Locale convertToLocaleWithFallback(const OUString &rBcp47)
Convert BCP 47 string to Locale with fallback, convenience method.
OUString getScript() const
Get ISO 15924 script code, if not the default script according to BCP 47.
static css::lang::Locale convertToLocale(LanguageType nLangID, bool bResolveSystem=true)
Convert MS-LangID to Locale.
void convertLangToLocale()
static bool isIsoLanguage(const OUString &rLanguage)
const OUString & getBcp47(bool bResolveSystem=true) const
Obtain BCP 47 language tag.
LanguageTag & makeFallback()
Fall back to a known locale.
void convertBcp47ToLocale()
css::lang::Locale maLocale
bool isValidBcp47() const
If this is a valid BCP 47 language tag.
bool hasScript() const
If language tag has a non-default script specified.
ImplPtr mpImpl
bool isIsoLocale() const
If language tag is a locale that can be expressed using only ISO 639 language codes and ISO 3166 coun...
static void disable_lt_tag_parse()
@ATTENTION: ONLY to be called by fuzzing setup
static bool isOnTheFlyID(LanguageType nLang)
If nLang is a generated on-the-fly LangID.
static OUString convertToBcp47(LanguageType nLangID)
Convert MS-LangID to BCP 47 string.
void convertBcp47ToLang()
bool mbSystemLocale
OUString getGlibcLocaleString(std::u16string_view rEncoding) const
Get a GLIBC locale string.
OUString getBcp47MS() const
Obtain BCP 47 language tag, but with MS malformed exceptions.
PrivateUse
Enums to be used with isValidBcp47().
bool mbInitializedBcp47
OUString maBcp47
ScriptType getScriptType() const
Returns the script type for this language, UNKNOWN if not set.
ScriptType
ScriptType for a language.
Definition: languagetag.hxx:68
bool mbInitializedLangID
bool equals(const LanguageTag &rLanguageTag) const
Test equality of two LanguageTag, possibly resolving system locale.
OUString getCountry() const
Get ISO 3166 country alpha code.
bool mbInitializedLocale
static LanguageType convertToLanguageType(const css::lang::Locale &rLocale, bool bResolveSystem=true)
Convert Locale to MS-LangID.
bool isIsoODF() const
If language tag is a locale that can be expressed using only ISO 639 language codes and ISO 15924 scr...
ImplPtr registerImpl() const
::std::vector< OUString >::const_iterator getFallback(const ::std::vector< OUString > &rList, const OUString &rReference)
Search for an equal or at least for a similar locale in a list of possible ones.
static ScriptType getOnTheFlyScriptType(LanguageType nLang)
static LanguageType convertToLanguageTypeWithFallback(const OUString &rBcp47)
Convert BCP 47 string to MS-LangID with fallback, convenience method.
LanguageType mnLangID
static bool isIsoCountry(const OUString &rRegion)
LanguageTag & reset(const OUString &rBcp47LanguageTag)
Reset with existing BCP 47 language tag string.
::std::vector< OUString > getFallbackStrings(bool bIncludeFullBcp47) const
Return a vector of fall-back strings.
void setScriptType(ScriptType st)
Sets the script type for this language.
std::shared_ptr< LanguageTagImpl > ImplPtr
OUString getVariants() const
Get BCP 47 variant subtags, of the IANA Language Subtag Registry.
void getIsoLanguageScriptCountry(OUString &rLanguage, OUString &rScript, OUString &rCountry) const
Obtain ISO strings for language, script and country.
bool operator==(const LanguageTag &rLanguageTag) const
Test equality of two LanguageTag.
static I18NLANGTAG_DLLPRIVATE css::lang::Locale convertLanguageToLocale(LanguageType nLang, bool bIgnoreOverride)
Convert a LanguageType to a Locale.
Definition: mslangid.cxx:193
static I18NLANGTAG_DLLPRIVATE LanguageType convertLocaleToLanguage(const css::lang::Locale &rLocale)
Convert a Locale to a LanguageType with handling of an empty language name designating LANGUAGE_SYSTE...
Definition: mslangid.cxx:214
static I18NLANGTAG_DLLPRIVATE css::lang::Locale lookupFallbackLocale(const css::lang::Locale &rLocale)
Definition: isolang.cxx:1107
static I18NLANGTAG_DLLPRIVATE css::lang::Locale getOverride(const css::lang::Locale &rLocale)
Used by LanguageTag::canonicalize()
Definition: isolang.cxx:1271
static I18NLANGTAG_DLLPRIVATE LanguageType convertIsoNamesToLanguage(const OUString &rLang, const OUString &rCountry, bool bSkipIsoTable)
Used by convertLocaleToLanguageImpl(Locale) and LanguageTagImpl::convertLocaleToLang()
Definition: isolang.cxx:1315
static I18NLANGTAG_DLLPRIVATE void setConfiguredSystemLanguage(LanguageType nLang)
Configured system locale needs always be synchronized with LanguageTag's system locale.
Definition: mslangid.cxx:40
static LanguageType getPrimaryLanguage(LanguageType nLangID)
Get the primary language of a LangID.
Definition: mslangid.hxx:50
static ::std::vector< LanguagetagMapping > getDefinedLanguagetags()
Definition: isolang.cxx:1516
static LanguageType getSubLanguage(LanguageType nLangID)
Get the sublanguage of a LangID.
Definition: mslangid.hxx:56
static LanguageType convertUnxByteStringToLanguage(std::string_view rString)
Definition: isolang.cxx:1458
static LanguageType getRealLanguage(LanguageType nLang)
: A proper language/locale if the nLang parameter designates some special value.
Definition: mslangid.cxx:81
static LanguageType makeLangID(LanguageType nSubLangId, LanguageType nPriLangId)
Create a LangID from a primary and a sublanguage.
Definition: mslangid.hxx:44
FormulaCommand pE
URL aURL
float u
UNKNOWN
void * p
o3tl::strong_int< sal_uInt16, struct LanguageTypeTag > LanguageType
These are MS LANGIDs, the lower 10 bits (mask 0x03ff, values below 0x0400 aren't real locale IDs) rep...
Definition: lang.h:87
#define LANGUAGE_USER_KURDISH_TURKEY
Definition: lang.h:555
#define LANGUAGE_KIRGHIZ
Definition: lang.h:284
#define LANGUAGE_ARMENIAN
Definition: lang.h:147
#define LANGUAGE_SANSKRIT
Definition: lang.h:354
#define LANGUAGE_UIGHUR_CHINA
Definition: lang.h:431
#define LANGUAGE_KINYARWANDA_RWANDA
Definition: lang.h:283
#define LANGUAGE_YIDDISH
Definition: lang.h:446
#define LANGUAGE_SYSTEM
Definition: lang.h:104
#define LANGUAGE_ARABIC_PRIMARY_ONLY
Definition: lang.h:140
#define LANGUAGE_QUECHUA_BOLIVIA
Definition: lang.h:331
#define LANGUAGE_MALAY_MALAYSIA
Definition: lang.h:301
#define LANGUAGE_LITHUANIAN
Definition: lang.h:293
#define LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA
Definition: lang.h:310
#define LANGUAGE_ROMANIAN
Definition: lang.h:336
#define LANGUAGE_CHINESE_TRADITIONAL
Definition: lang.h:176
#define LANGUAGE_PORTUGUESE
Definition: lang.h:326
#define LANGUAGE_GAELIC_IRELAND
Definition: lang.h:242
#define LANGUAGE_HUNGARIAN
Definition: lang.h:260
#define LANGUAGE_TIBETAN
Definition: lang.h:421
#define LANGUAGE_THAI
Definition: lang.h:420
#define LANGUAGE_PUNJABI
Definition: lang.h:328
#define LANGUAGE_BELARUSIAN
Definition: lang.h:155
#define LANGUAGE_AZERI_CYRILLIC
Definition: lang.h:149
#define LANGUAGE_MACEDONIAN
Definition: lang.h:297
#define LANGUAGE_FINNISH
Definition: lang.h:220
#define LANGUAGE_BENGALI
Definition: lang.h:156
#define LANGUAGE_SERBIAN_CYRILLIC_SERBIA
Definition: lang.h:361
#define LANGUAGE_NEPALI
Definition: lang.h:313
#define LANGUAGE_SAMI_NORTHERN_NORWAY
Definition: lang.h:340
#define LANGUAGE_SWAHILI
Definition: lang.h:403
#define LANGUAGE_CZECH
Definition: lang.h:183
#define LANGUAGE_FRENCH
Definition: lang.h:221
#define LANGUAGE_DZONGKHA_BHUTAN
Definition: lang.h:423
#define LANGUAGE_ITALIAN
Definition: lang.h:269
#define LANGUAGE_USER_MALAY_ARABIC_MALAYSIA
Definition: lang.h:752
#define LANGUAGE_LATIN
Definition: lang.h:291
#define LANGUAGE_DUTCH
Definition: lang.h:187
#define LANGUAGE_KALAALLISUT_GREENLAND
Definition: lang.h:272
#define LANGUAGE_GALICIAN
Definition: lang.h:245
#define LANGUAGE_ON_THE_FLY_SUB_START
Definition: lang.h:798
#define LANGUAGE_TURKMEN
Definition: lang.h:430
#define LANGUAGE_GUARANI_PARAGUAY
Definition: lang.h:253
#define LANGUAGE_PASHTO
Definition: lang.h:324
#define LANGUAGE_ICELANDIC
Definition: lang.h:262
#define LANGUAGE_LAO
Definition: lang.h:290
#define LANGUAGE_KAZAKH
Definition: lang.h:277
#define LANGUAGE_SPANISH
Definition: lang.h:402
#define LANGUAGE_CHINESE_SIMPLIFIED
Definition: lang.h:174
#define LANGUAGE_SLOVAK
Definition: lang.h:373
#define LANGUAGE_CATALAN
Definition: lang.h:167
#define LANGUAGE_ODIA
Definition: lang.h:321
#define LANGUAGE_TAMIL
Definition: lang.h:416
#define LANGUAGE_JAPANESE
Definition: lang.h:271
#define LANGUAGE_SINHALESE_SRI_LANKA
Definition: lang.h:372
#define LANGUAGE_GREEK
Definition: lang.h:252
#define LANGUAGE_USER_NYANJA
Definition: lang.h:599
#define LANGUAGE_GAELIC_SCOTLAND
Definition: lang.h:243
#define LANGUAGE_FAEROESE
Definition: lang.h:217
#define LANGUAGE_RUSSIAN
Definition: lang.h:338
#define LANGUAGE_KOREAN
Definition: lang.h:286
#define LANGUAGE_SOMALI
Definition: lang.h:375
#define LANGUAGE_OROMO
Definition: lang.h:322
#define LANGUAGE_UZBEK_CYRILLIC
Definition: lang.h:435
#define LANGUAGE_USER_TAGALOG
Definition: lang.h:593
#define LANGUAGE_ALBANIAN
Definition: lang.h:127
#define LANGUAGE_SWEDISH
Definition: lang.h:404
#define LANGUAGE_BRETON_FRANCE
Definition: lang.h:164
#define LANGUAGE_INUKTITUT_LATIN_CANADA
Definition: lang.h:267
#define LANGUAGE_POLISH
Definition: lang.h:325
#define LANGUAGE_ROMANIAN_MOLDOVA
Definition: lang.h:337
#define LANGUAGE_BURMESE
Definition: lang.h:166
#define LANGUAGE_ASSAMESE
Definition: lang.h:148
#define LANGUAGE_UKRAINIAN
Definition: lang.h:432
#define LANGUAGE_USER_MANX
Definition: lang.h:662
#define LANGUAGE_KHMER
Definition: lang.h:281
#define LANGUAGE_SPANISH_DATED
Definition: lang.h:380
#define LANGUAGE_VIETNAMESE
Definition: lang.h:440
#define LANGUAGE_DONTKNOW
Definition: lang.h:101
#define LANGUAGE_HEBREW
Definition: lang.h:258
#define LANGUAGE_DANISH
Definition: lang.h:184
#define LANGUAGE_GERMAN
Definition: lang.h:247
#define LANGUAGE_USER_ANCIENT_GREEK
Definition: lang.h:631
#define LANGUAGE_AFRIKAANS
Definition: lang.h:126
#define LANGUAGE_NORWEGIAN
Definition: lang.h:315
#define LANGUAGE_HINDI
Definition: lang.h:259
#define LANGUAGE_SINDHI
Definition: lang.h:369
#define LANGUAGE_ON_THE_FLY_START
Definition: lang.h:795
#define LANGUAGE_AZERI_LATIN
Definition: lang.h:151
#define LANGUAGE_CROATIAN
Definition: lang.h:181
#define LANGUAGE_KANNADA
Definition: lang.h:273
#define LANGUAGE_MARATHI
Definition: lang.h:306
#define LANGUAGE_MALAYALAM
Definition: lang.h:299
#define LANGUAGE_GEORGIAN
Definition: lang.h:246
#define LANGUAGE_BULGARIAN
Definition: lang.h:165
#define LANGUAGE_URDU_PAKISTAN
Definition: lang.h:434
#define LANGUAGE_KASHMIRI
Definition: lang.h:275
#define LANGUAGE_FARSI
Definition: lang.h:218
#define LANGUAGE_TAJIK
Definition: lang.h:408
#define LANGUAGE_AMHARIC_ETHIOPIA
Definition: lang.h:129
#define LANGUAGE_DUTCH_BELGIAN
Definition: lang.h:188
#define LANGUAGE_ESTONIAN
Definition: lang.h:216
#define LANGUAGE_GUJARATI
Definition: lang.h:254
#define LANGUAGE_SLOVENIAN
Definition: lang.h:374
#define LANGUAGE_WELSH
Definition: lang.h:441
#define LANGUAGE_MALAGASY_PLATEAU
Definition: lang.h:298
#define LANGUAGE_TURKISH
Definition: lang.h:429
#define LANGUAGE_USER_ESPERANTO
Definition: lang.h:540
#define LANGUAGE_TELUGU
Definition: lang.h:419
#define LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA
Definition: lang.h:308
#define LANGUAGE_ENGLISH_US
Definition: lang.h:213
#define LANGUAGE_TIGRIGNA_ETHIOPIA
Definition: lang.h:425
#define LANGUAGE_ON_THE_FLY_SUB_END
Definition: lang.h:799
#define LANGUAGE_LATVIAN
Definition: lang.h:292
#define LANGUAGE_MALTESE
Definition: lang.h:302
#define LANGUAGE_BASQUE
Definition: lang.h:154
#define LANGUAGE_INDONESIAN
Definition: lang.h:264
#define LANGUAGE_TATAR
Definition: lang.h:418
#define LANGUAGE_ON_THE_FLY_END
Definition: lang.h:796
LanguageTag makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage)
Init LanguageTag with Apple LanguageId.
static bool lcl_isSystem(LanguageType nLangID)
static bool lcl_isKnownOnTheFlyID(LanguageType nLang)
static void handleVendorVariant(css::lang::Locale &rLocale)
static bool lt_tag_parse_disabled
std::unordered_set< OUString > KnownTagSet
Definition: languagetag.cxx:65
static LanguageType getNextOnTheFlyLanguage()
static const KnownTagSet & getKnowns()
Definition: languagetag.cxx:66
#define I18NLANGTAG_QLT_ASCII
The ISO 639-2 code reserved for local use used to indicate that a css::Locale contains a BCP 47 strin...
Definition: languagetag.hxx:35
constexpr OUStringLiteral I18NLANGTAG_QLT
Definition: languagetag.hxx:36
#define SAL_INFO_IF(condition, area, stream)
#define SAL_WARN_IF(condition, area, stream)
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
ScriptType
return NULL
aStr
aBuf
constexpr OUStringLiteral aData
int i
int compareToIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2)
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
sal_uInt16 sal_Unicode