LibreOffice Module unotools (master) 1
wincodepage.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
10#include <sal/config.h>
11
12#include <string_view>
13
15#include <rtl/textenc.h>
16
17namespace{
18
19struct LangEncodingDef
20{
21 const std::u16string_view msLangStr;
22 rtl_TextEncoding meTextEncoding;
23};
24
25// See https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756
26rtl_TextEncoding impl_getWinTextEncodingFromLangStrANSI(const OUString& sLanguage)
27{
28 static constexpr LangEncodingDef aLanguageTab[] =
29 {
30 { u"en", RTL_TEXTENCODING_MS_1252 }, // Most used -> first in list
31 { u"th", RTL_TEXTENCODING_MS_874 },
32 { u"ja", RTL_TEXTENCODING_MS_932 },
33 { u"zh-cn", RTL_TEXTENCODING_MS_936 }, // Chinese (simplified) - must go before "zh"
34 { u"ko", RTL_TEXTENCODING_MS_949 },
35 { u"zh", RTL_TEXTENCODING_MS_950 }, // Chinese (traditional)
36 { u"bs", RTL_TEXTENCODING_MS_1250 },
37 { u"cs", RTL_TEXTENCODING_MS_1250 },
38 { u"hr", RTL_TEXTENCODING_MS_1250 },
39 { u"hu", RTL_TEXTENCODING_MS_1250 },
40 { u"pl", RTL_TEXTENCODING_MS_1250 },
41 { u"ro", RTL_TEXTENCODING_MS_1250 },
42 { u"sk", RTL_TEXTENCODING_MS_1250 },
43 { u"sl", RTL_TEXTENCODING_MS_1250 },
44// { "sr", RTL_TEXTENCODING_MS_1250 },
45 { u"sq", RTL_TEXTENCODING_MS_1250 },
46 { u"be", RTL_TEXTENCODING_MS_1251 },
47 { u"bg", RTL_TEXTENCODING_MS_1251 },
48 { u"mk", RTL_TEXTENCODING_MS_1251 },
49 { u"ru", RTL_TEXTENCODING_MS_1251 },
50 { u"sr", RTL_TEXTENCODING_MS_1251 },
51 { u"uk", RTL_TEXTENCODING_MS_1251 },
52 { u"es", RTL_TEXTENCODING_MS_1252 },
53 { u"el", RTL_TEXTENCODING_MS_1253 },
54 { u"tr", RTL_TEXTENCODING_MS_1254 },
55 { u"he", RTL_TEXTENCODING_MS_1255 },
56 { u"ar", RTL_TEXTENCODING_MS_1256 },
57 { u"et", RTL_TEXTENCODING_MS_1257 },
58 { u"lt", RTL_TEXTENCODING_MS_1257 },
59 { u"lv", RTL_TEXTENCODING_MS_1257 },
60 { u"vi", RTL_TEXTENCODING_MS_1258 },
61 };
62
63 for (auto& def : aLanguageTab)
64 {
65 if (sLanguage.startsWithIgnoreAsciiCase(def.msLangStr))
66 return def.meTextEncoding;
67 }
68
69 return RTL_TEXTENCODING_MS_1252;
70}
71
72/* ----------------------------------------------------------------------- */
73
74// See https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756
75// See http://shapelib.maptools.org/codepage.html
76rtl_TextEncoding impl_getWinTextEncodingFromLangStrOEM(const OUString& sLanguage)
77{
78 static constexpr LangEncodingDef aLanguageTab[] =
79 {
80 { u"de", RTL_TEXTENCODING_IBM_437 }, // OEM United States
81 { u"en-us", RTL_TEXTENCODING_IBM_437 }, // OEM United States
82 { u"fi", RTL_TEXTENCODING_IBM_437 }, // OEM United States
83 { u"fr-ca", RTL_TEXTENCODING_IBM_863 }, // OEM French Canadian; French Canadian (DOS)
84 { u"fr", RTL_TEXTENCODING_IBM_437 }, // OEM United States
85 { u"it", RTL_TEXTENCODING_IBM_437 }, // OEM United States
86 { u"nl", RTL_TEXTENCODING_IBM_437 }, // OEM United States
87 { u"sv", RTL_TEXTENCODING_IBM_437 }, // OEM United States
88 { u"el", RTL_TEXTENCODING_IBM_737 }, // OEM Greek (formerly 437G); Greek (DOS)
89 { u"et", RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS)
90 { u"lt", RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS)
91 { u"lv", RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS)
92 { u"en", RTL_TEXTENCODING_IBM_850 }, // OEM Multilingual Latin 1; Western European (DOS)
93 { u"bs", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
94 { u"cs", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
95 { u"hr", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
96 { u"hu", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
97 { u"pl", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
98 { u"ro", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
99 { u"sk", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
100 { u"sl", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
101// { "sr", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
102 { u"bg", RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian)
103 { u"mk", RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian)
104 { u"sr", RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian)
105 { u"tr", RTL_TEXTENCODING_IBM_857 }, // OEM Turkish; Turkish (DOS)
106 { u"pt", RTL_TEXTENCODING_IBM_860 }, // OEM Portuguese; Portuguese (DOS)
107 { u"is", RTL_TEXTENCODING_IBM_861 }, // OEM Icelandic; Icelandic (DOS)
108 { u"he", RTL_TEXTENCODING_IBM_862 }, // OEM Hebrew; Hebrew (DOS)
109 { u"ar", RTL_TEXTENCODING_IBM_864 }, // OEM Arabic; Arabic (864)
110 { u"da", RTL_TEXTENCODING_IBM_865 }, // OEM Nordic; Nordic (DOS)
111 { u"nn", RTL_TEXTENCODING_IBM_865 }, // OEM Nordic; Nordic (DOS)
112 { u"be", RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS)
113 { u"ru", RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS)
114 { u"uk", RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS)
115 { u"th", RTL_TEXTENCODING_MS_874 }, // ANSI/OEM Thai (ISO 8859-11); Thai (Windows)
116 { u"ja", RTL_TEXTENCODING_MS_932 }, // ANSI/OEM Japanese; Japanese (Shift-JIS)
117 { u"zh-cn", RTL_TEXTENCODING_MS_936 }, // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
118 { u"ko", RTL_TEXTENCODING_MS_949 }, // ANSI/OEM Korean (Unified Hangul Code)
119 { u"zh", RTL_TEXTENCODING_MS_950 }, // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
120 { u"vi", RTL_TEXTENCODING_MS_1258 }, // ANSI/OEM Vietnamese; Vietnamese (Windows)
121 };
122
123 for (auto& def : aLanguageTab)
124 {
125 if (sLanguage.startsWithIgnoreAsciiCase(def.msLangStr))
126 return def.meTextEncoding;
127 }
128
129 return RTL_TEXTENCODING_IBM_850;
130}
131
132} // namespace
133
134rtl_TextEncoding utl_getWinTextEncodingFromLangStr(const OUString& sLanguage, bool bOEM)
135{
136 return bOEM ?
137 impl_getWinTextEncodingFromLangStrOEM(sLanguage) :
138 impl_getWinTextEncodingFromLangStrANSI(sLanguage);
139}
140
141/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
float u
rtl_TextEncoding utl_getWinTextEncodingFromLangStr(const OUString &sLanguage, bool bOEM)
Map from an ISO-639 language code (and optionally ISO-3166 country/region code) to a text encoding of...