LibreOffice Module i18npool (master) 1
textconversion_zh.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20
21#include <textconversion.hxx>
22#include <com/sun/star/i18n/TextConversionType.hpp>
23#include <com/sun/star/i18n/TextConversionOption.hpp>
24#include <com/sun/star/lang/NoSupportException.hpp>
25#include <com/sun/star/linguistic2/ConversionDirection.hpp>
26#include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
27#include <com/sun/star/linguistic2/ConversionDictionaryList.hpp>
28#include <memory>
29
30using namespace com::sun::star::lang;
31using namespace com::sun::star::i18n;
32using namespace com::sun::star::linguistic2;
33using namespace com::sun::star::uno;
34
35
36namespace i18npool {
37
39 : TextConversionService("com.sun.star.i18n.TextConversion_zh")
40{
41 xCDL = ConversionDictionaryList::create(xContext);
42}
43
44static sal_Unicode getOneCharConversion(sal_Unicode ch, const sal_Unicode* Data, const sal_uInt16* Index)
45{
46 if (Data && Index) {
47 sal_Unicode address = Index[ch>>8];
48 if (address != 0xFFFF)
49 address = Data[address + (ch & 0xFF)];
50 return (address != 0xFFFF) ? address : ch;
51 } else {
52 return ch;
53 }
54}
55
56#ifdef DISABLE_DYNLOADING
57
58extern "C" {
59
60const sal_Unicode* getSTC_CharData_T2S();
61const sal_uInt16* getSTC_CharIndex_T2S();
62const sal_Unicode* getSTC_CharData_S2V();
63const sal_uInt16* getSTC_CharIndex_S2V();
64const sal_Unicode* getSTC_CharData_S2T();
65const sal_uInt16* getSTC_CharIndex_S2T();
66
67const sal_Unicode *getSTC_WordData(sal_Int32&);
68
69const sal_uInt16 *getSTC_WordIndex_T2S(sal_Int32&);
70const sal_uInt16 *getSTC_WordEntry_T2S();
71const sal_uInt16 *getSTC_WordIndex_S2T(sal_Int32&);
72const sal_uInt16 *getSTC_WordEntry_S2T();
73
74}
75
76#endif
77
78OUString
79TextConversion_zh::getCharConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions)
80{
81 const sal_Unicode *Data;
82 const sal_uInt16 *Index;
83
84#ifndef DISABLE_DYNLOADING
85 if (toSChinese) {
86 Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
87 Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
88 } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
89 Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
90 Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
91 } else {
92 Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
93 Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
94 }
95#else
96 if (toSChinese) {
97 Data = getSTC_CharData_T2S();
98 Index = getSTC_CharIndex_T2S();
99 } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
100 Data = getSTC_CharData_S2V();
101 Index = getSTC_CharIndex_S2V();
102 } else {
103 Data = getSTC_CharData_S2T();
104 Index = getSTC_CharIndex_S2T();
105 }
106#endif
107
108 rtl_uString * newStr = rtl_uString_alloc(nLength);
109 for (sal_Int32 i = 0; i < nLength; i++)
110 newStr->buffer[i] =
111 getOneCharConversion(aText[nStartPos+i], Data, Index);
112 return OUString(newStr, SAL_NO_ACQUIRE); //take ownership
113}
114
115OUString
116TextConversion_zh::getWordConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
117{
118 sal_Int32 dictLen = 0;
119 sal_Int32 maxLen = 0;
120 const sal_uInt16 *index;
121 const sal_uInt16 *entry;
122 const sal_Unicode *charData;
123 const sal_uInt16 *charIndex;
124 bool one2one=true;
125
126#ifndef DISABLE_DYNLOADING
127 const sal_Unicode *wordData = reinterpret_cast<const sal_Unicode* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordData"))(dictLen);
128 if (toSChinese) {
129 index = reinterpret_cast<const sal_uInt16* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen);
130 entry = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_WordEntry_T2S"))();
131 charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
132 charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
133 } else {
134 index = reinterpret_cast<const sal_uInt16* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen);
135 entry = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_WordEntry_S2T"))();
136 if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
137 charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
138 charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
139 } else {
140 charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
141 charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
142 }
143 }
144#else
145 const sal_Unicode *wordData = getSTC_WordData(dictLen);
146 if (toSChinese) {
147 index = getSTC_WordIndex_T2S(maxLen);
148 entry = getSTC_WordEntry_T2S();
149 charData = getSTC_CharData_T2S();
150 charIndex = getSTC_CharIndex_T2S();
151 } else {
152 index = getSTC_WordIndex_S2T(maxLen);
153 entry = getSTC_WordEntry_S2T();
154 if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
155 charData = getSTC_CharData_S2V();
156 charIndex = getSTC_CharIndex_S2V();
157 } else {
158 charData = getSTC_CharData_S2T();
159 charIndex = getSTC_CharIndex_S2T();
160 }
161 }
162#endif
163
164 if ((!wordData || !index || !entry) && !xCDL.is()) // no word mapping defined, do char2char conversion.
165 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
166
167 std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nLength * 2 + 1]);
168 sal_Int32 currPos = 0, count = 0;
169 auto offsetRange = asNonConstRange(offset);
170 while (currPos < nLength) {
171 sal_Int32 len = nLength - currPos;
172 bool found = false;
173 if (len > maxLen)
174 len = maxLen;
175 for (; len > 0 && ! found; len--) {
176 OUString word = aText.copy(nStartPos + currPos, len);
177 sal_Int32 current = 0;
178 // user dictionary
179 if (xCDL.is()) {
180 Sequence < OUString > conversions;
181 try {
182 conversions = xCDL->queryConversions(word, 0, len,
183 aLocale, ConversionDictionaryType::SCHINESE_TCHINESE,
184 /*toSChinese ?*/ ConversionDirection_FROM_LEFT /*: ConversionDirection_FROM_RIGHT*/,
185 nConversionOptions);
186 }
187 catch ( NoSupportException & ) {
188 // clear reference (when there is no user dictionary) in order
189 // to not always have to catch this exception again
190 // in further calls. (save time)
191 xCDL = nullptr;
192 }
193 catch (...) {
194 // catch all other exceptions to allow
195 // querying the system dictionary in the next line
196 }
197 if (conversions.hasElements()) {
198 if (offset.hasElements()) {
199 if (word.getLength() != conversions[0].getLength())
200 one2one=false;
201 while (current < conversions[0].getLength()) {
202 offsetRange[count] = nStartPos + currPos + (current *
203 word.getLength() / conversions[0].getLength());
204 newStr[count++] = conversions[0][current++];
205 }
206 // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
207 } else {
208 while (current < conversions[0].getLength())
209 newStr[count++] = conversions[0][current++];
210 }
211 currPos += word.getLength();
212 found = true;
213 }
214 }
215
216 if (wordData && !found && index[len+1] - index[len] > 0) {
217 sal_Int32 bottom = static_cast<sal_Int32>(index[len]);
218 sal_Int32 top = static_cast<sal_Int32>(index[len+1]) - 1;
219
220 while (bottom <= top && !found) {
221 current = (top + bottom) / 2;
222 const sal_Int32 result = rtl_ustr_compare(
223 word.getStr(), wordData + entry[current]);
224 if (result < 0)
225 top = current - 1;
226 else if (result > 0)
227 bottom = current + 1;
228 else {
229 if (toSChinese) // Traditionary/Simplified conversion,
230 for (current = entry[current]-1; current > 0 && wordData[current-1]; current--) ;
231 else // Simplified/Traditionary conversion, forwards search for next word
232 current = entry[current] + word.getLength() + 1;
233 sal_Int32 start=current;
234 if (offset.hasElements()) {
235 if (word.getLength() != static_cast<sal_Int32>(std::u16string_view(&wordData[current]).size()))
236 one2one=false;
237 sal_Int32 convertedLength=std::u16string_view(&wordData[current]).size();
238 while (wordData[current]) {
239 offsetRange[count]=nStartPos + currPos + ((current-start) *
240 word.getLength() / convertedLength);
241 newStr[count++] = wordData[current++];
242 }
243 // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
244 } else {
245 while (wordData[current])
246 newStr[count++] = wordData[current++];
247 }
248 currPos += word.getLength();
249 found = true;
250 }
251 }
252 }
253 }
254 if (!found) {
255 if (offset.hasElements())
256 offsetRange[count]=nStartPos+currPos;
257 newStr[count++] =
258 getOneCharConversion(aText[nStartPos+currPos], charData, charIndex);
259 currPos++;
260 }
261 }
262 if (offset.hasElements())
263 offset.realloc(one2one ? 0 : count);
264 OUString aRet(newStr.get(), count);
265 return aRet;
266}
267
268TextConversionResult SAL_CALL
269TextConversion_zh::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
270 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
271{
272 TextConversionResult result;
273
274 result.Candidates =
275 { getConversion( aText, nStartPos, nLength, rLocale, nConversionType, nConversionOptions) };
276 result.Boundary.startPos = nStartPos;
277 result.Boundary.endPos = nStartPos + nLength;
278
279 return result;
280}
281
282OUString SAL_CALL
283TextConversion_zh::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
284 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
285{
286 if (rLocale.Language != "zh" || ( nConversionType != TextConversionType::TO_SCHINESE && nConversionType != TextConversionType::TO_TCHINESE) )
287 throw NoSupportException(); // Conversion type is not supported in this service.
288
289 aLocale=rLocale;
290 bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
291
292 if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER)
293 // char to char dictionary
294 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
295 else {
297 // word to word dictionary
298 return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
299 }
300}
301
302OUString SAL_CALL
303TextConversion_zh::getConversionWithOffset( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
304 const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
305{
306 if (rLocale.Language != "zh" || ( nConversionType != TextConversionType::TO_SCHINESE && nConversionType != TextConversionType::TO_TCHINESE) )
307 throw NoSupportException(); // Conversion type is not supported in this service.
308
309 aLocale=rLocale;
310 bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
311
312 if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) {
313 offset.realloc(0);
314 // char to char dictionary
315 return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
316 } else {
317 if (offset.getLength() < 2*nLength)
318 offset.realloc(2*nLength);
319 // word to word dictionary
320 return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
321 }
322}
323
324sal_Bool SAL_CALL
325TextConversion_zh::interactiveConversion( const Locale& /*rLocale*/, sal_Int16 /*nTextConversionType*/, sal_Int32 /*nTextConversionOptions*/ )
326{
327 return false;
328}
329
330}
331
332/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
oslGenericFunction getFunctionBySymbol(const char *func)
TextConversion_zh(const css::uno::Reference< css::uno::XComponentContext > &rxContext)
OUString SAL_CALL getConversion(const OUString &aText, sal_Int32 nStartPos, sal_Int32 nLength, const css::lang::Locale &aLocale, sal_Int16 nTextConversionType, sal_Int32 nTextConversionOptions) override
OUString getWordConversion(const OUString &aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions, css::uno::Sequence< sal_Int32 > &offset)
OUString SAL_CALL getConversionWithOffset(const OUString &aText, sal_Int32 nStartPos, sal_Int32 nLength, const css::lang::Locale &aLocale, sal_Int16 nTextConversionType, sal_Int32 nTextConversionOptions, css::uno::Sequence< sal_Int32 > &offset) override
css::uno::Reference< css::linguistic2::XConversionDictionaryList > xCDL
css::i18n::TextConversionResult SAL_CALL getConversions(const OUString &aText, sal_Int32 nStartPos, sal_Int32 nLength, const css::lang::Locale &aLocale, sal_Int16 nTextConversionType, sal_Int32 nTextConversionOptions) override
rtl::OUString getCharConversion(const rtl::OUString &aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions)
sal_Bool SAL_CALL interactiveConversion(const css::lang::Locale &aLocale, sal_Int16 nTextConversionType, sal_Int32 nTextConversionOptions) override
OString top
OString bottom
double getLength(const B2DPolygon &rCandidate)
int i
Constant values shared between i18npool and, for example, the number formatter.
static sal_Unicode getOneCharConversion(sal_Unicode ch, const sal_Unicode *Data, const sal_uInt16 *Index)
index
unsigned char sal_Bool
sal_uInt16 sal_Unicode
Any result
sal_Int32 nLength