LibreOffice Module i18npool (master) 1
transliteration_body.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19// Silence spurious Werror=maybe-uninitialized in transliterateImpl emitted at least by GCC 11.2.0
20#if defined __GNUC__ && !defined __clang__
21#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
22#endif
23
24#include <rtl/ref.hxx>
26#include <i18nutil/unicode.hxx>
27#include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
28#include <com/sun/star/i18n/TransliterationType.hpp>
31#include <o3tl/temporary.hxx>
32
34
36#include <memory>
37#include <numeric>
38
39using namespace ::com::sun::star::uno;
40using namespace ::com::sun::star::i18n;
41using namespace ::com::sun::star::lang;
42
43namespace i18npool {
44
46{
47 nMappingType = MappingType::NONE;
48 transliterationName = "Transliteration_body";
49 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
50}
51
53{
54 return TransliterationType::ONE_TO_ONE;
55}
56
58 const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
59 const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
60{
61 throw RuntimeException();
62}
63
64Sequence< OUString > SAL_CALL
65Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
66{
67 return { str1, str2 };
68}
69
71{
72 MappingType nRes = nMappingType;
73
74 // take care of TOGGLE_CASE transliteration:
75 // nMappingType should not be a combination of flags, thuse we decide now
76 // which one to use.
77 if (nMappingType == (MappingType::LowerToUpper | MappingType::UpperToLower))
78 {
79 const sal_Int16 nType = unicode::getUnicodeType( cChar );
80 if (nType & 0x02 /* lower case*/)
81 nRes = MappingType::LowerToUpper;
82 else
83 {
84 // should also work properly for non-upper characters like white spaces, numbers, ...
85 nRes = MappingType::UpperToLower;
86 }
87 }
88
89 return nRes;
90}
91
92OUString
94 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
95 Sequence< sal_Int32 >* pOffset)
96{
97 const sal_Unicode *in = inStr.getStr() + startPos;
98
99 // We could assume that most calls result in identical string lengths,
100 // thus using a preallocated OUStringBuffer could be an easy way
101 // to assemble the return string without too much hassle. However,
102 // for single characters the OUStringBuffer::append() method is quite
103 // expensive compared to a simple array operation, so it pays here
104 // to copy the final result instead.
105
106 // Allocate the max possible buffer. Try to use stack instead of heap,
107 // which would have to be reallocated most times anyways.
108 constexpr sal_Int32 nLocalBuf = 2048;
109 sal_Unicode* out;
110 std::unique_ptr<sal_Unicode[]> pHeapBuf;
111 if (nCount <= nLocalBuf)
112 out = static_cast<sal_Unicode*>(alloca(nCount * NMAPPINGMAX * sizeof(sal_Unicode)));
113 else
114 {
115 pHeapBuf.reset(new sal_Unicode[ nCount * NMAPPINGMAX ]);
116 out = pHeapBuf.get();
117 }
118
119 sal_Int32 j = 0;
120 // Two different blocks to eliminate the if(useOffset) condition inside the loop.
121 // Yes, on massive use even such small things do count.
122 if ( pOffset )
123 {
124 sal_Int32* offsetData;
125 std::unique_ptr<sal_Int32[]> pOffsetHeapBuf;
126 sal_Int32 nOffsetCount = std::max<sal_Int32>(nLocalBuf, nCount);
127 if (nOffsetCount <= nLocalBuf)
128 offsetData = static_cast<sal_Int32*>(alloca(nOffsetCount * NMAPPINGMAX * sizeof(sal_Int32)));
129 else
130 {
131 pOffsetHeapBuf.reset(new sal_Int32[ nOffsetCount * NMAPPINGMAX ]);
132 offsetData = pOffsetHeapBuf.get();
133 }
134 sal_Int32* offsetDataEnd = offsetData;
135
136 for (sal_Int32 i = 0; i < nCount; i++)
137 {
138 // take care of TOGGLE_CASE transliteration:
140
141 const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
142 std::fill_n(offsetDataEnd, map.nmap, i + startPos);
143 offsetDataEnd += map.nmap;
144 std::copy_n(map.map, map.nmap, out + j);
145 j += map.nmap;
146 }
147
148 *pOffset = css::uno::Sequence< sal_Int32 >(offsetData, offsetDataEnd - offsetData);
149 }
150 else
151 {
152 for ( sal_Int32 i = 0; i < nCount; i++)
153 {
154 // take care of TOGGLE_CASE transliteration:
156
157 const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
158 std::copy_n(map.map, map.nmap, out + j);
159 j += map.nmap;
160 }
161 }
162
163 return OUString(out, j);
164}
165
166OUString SAL_CALL
168{
169 const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
170 rtl_uString* pStr = rtl_uString_alloc(map.nmap);
171 sal_Unicode* out = pStr->buffer;
172 sal_Int32 i;
173
174 for (i = 0; i < map.nmap; i++)
175 out[i] = map.map[i];
176 out[i] = 0;
177
178 return OUString( pStr, SAL_NO_ACQUIRE );
179}
180
181sal_Unicode SAL_CALL
183{
184 const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
185 if (map.nmap > 1)
186 throw MultipleCharsOutputException();
187 return map.map[0];
188}
189
190OUString
191Transliteration_body::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
192 Sequence< sal_Int32 >* pOffset)
193{
194 return transliterateImpl(inStr, startPos, nCount, pOffset);
195}
196
198{
199 nMappingType = MappingType::NONE;
200 transliterationName = "casemapping(generic)";
201 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
202}
203
205{
206 nMappingType = MappingType::UpperToLower;
207 transliterationName = "upper_to_lower(generic)";
208 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_u2l";
209}
210
212{
213 nMappingType = MappingType::LowerToUpper;
214 transliterationName = "lower_to_upper(generic)";
215 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u";
216}
217
219{
220 // usually nMappingType must NOT be a combination of different flags here,
221 // but we take care of that problem in Transliteration_body::transliterate above
222 // before that value is used. There we will decide which of both is to be used on
223 // a per character basis.
224 nMappingType = MappingType::LowerToUpper | MappingType::UpperToLower;
225 transliterationName = "toggle(generic)";
226 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
227}
228
230{
231 nMappingType = MappingType::ToTitle;
232 transliterationName = "title(generic)";
233 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
234}
235
238 std::u16string_view inStr, sal_Int32 startPos, sal_Int32 nCount,
239 const Locale &rLocale,
240 Sequence< sal_Int32 >* pOffset )
241{
242 const OUString aText( inStr.substr( startPos, nCount ) );
243
244 OUString aRes;
245 if (!aText.isEmpty())
246 {
247 Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext();
249
250 // because xCharClassImpl.toTitle does not handle ligatures or Beta but will raise
251 // an exception we need to handle the first chara manually...
252
253 // we don't want to change surrogates by accident, thuse we use proper code point iteration
254 sal_uInt32 cFirstChar = aText.iterateCodePoints( &o3tl::temporary(sal_Int32(0)) );
255 OUString aResolvedLigature( &cFirstChar, 1 );
256 // toUpper can be used to properly resolve ligatures and characters like Beta
257 aResolvedLigature = xCharClassImpl->toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
258 // since toTitle will leave all-uppercase text unchanged we first need to
259 // use toLower to bring possible 2nd and following chars in lowercase
260 aResolvedLigature = xCharClassImpl->toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
261 sal_Int32 nResolvedLen = aResolvedLigature.getLength();
262
263 // now we can properly use toTitle to get the expected result for the resolved string.
264 // The rest of the text should just become lowercase.
265 aRes = xCharClassImpl->toTitle( aResolvedLigature, 0, nResolvedLen, rLocale ) +
266 xCharClassImpl->toLower( aText, 1, aText.getLength() - 1, rLocale );
267 pOffset->realloc( aRes.getLength() );
268
269 auto [begin, end] = asNonConstRange(*pOffset);
270 sal_Int32* pOffsetInt = std::fill_n(begin, nResolvedLen, 0);
271 std::iota(pOffsetInt, end, 1);
272 }
273 return aRes;
274}
275
276// this function expects to be called on a word-by-word basis,
277// namely that startPos points to the first char of the word
279 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
280 Sequence< sal_Int32 >* pOffset )
281{
282 return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, pOffset );
283}
284
286{
287 nMappingType = MappingType::ToTitle; // though only to be applied to the first word...
288 transliterationName = "sentence(generic)";
289 implementationName = "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
290}
291
292// this function expects to be called on a sentence-by-sentence basis,
293// namely that startPos points to the first word (NOT first char!) in the sentence
295 const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
296 Sequence< sal_Int32 >* pOffset )
297{
298 return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, pOffset );
299}
300
301}
302
303/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
#define NMAPPINGMAX
MappingType
virtual sal_Unicode SAL_CALL transliterateChar2Char(sal_Unicode inChar) override
sal_Int16 SAL_CALL getType() override
sal_Bool SAL_CALL equals(const OUString &str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32 &nMatch1, const OUString &str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32 &nMatch2) override
OUString SAL_CALL transliterateChar2String(sal_Unicode inChar) override
OUString foldingImpl(const OUString &inStr, sal_Int32 startPos, sal_Int32 nCount, css::uno::Sequence< sal_Int32 > *pOffset) override
css::uno::Sequence< OUString > SAL_CALL transliterateRange(const OUString &str1, const OUString &str2) override
OUString transliterateImpl(const OUString &inStr, sal_Int32 startPos, sal_Int32 nCount, css::uno::Sequence< sal_Int32 > *pOffset) override
virtual OUString transliterateImpl(const OUString &inStr, sal_Int32 startPos, sal_Int32 nCount, css::uno::Sequence< sal_Int32 > *pOffset) override
virtual OUString transliterateImpl(const OUString &inStr, sal_Int32 startPos, sal_Int32 nCount, css::uno::Sequence< sal_Int32 > *pOffset) override
static sal_Int16 getUnicodeType(const sal_Unicode ch)
int nCount
int i
Constant values shared between i18npool and, for example, the number formatter.
static MappingType lcl_getMappingTypeForToggleCase(MappingType nMappingType, sal_Unicode cChar)
static OUString transliterate_titlecase_Impl(std::u16string_view inStr, sal_Int32 startPos, sal_Int32 nCount, const Locale &rLocale, Sequence< sal_Int32 > *pOffset)
enumrange< T >::Iterator begin(enumrange< T >)
constexpr T & temporary(T &&x)
end
QPRO_FUNC_TYPE nType
std::map< OUString, rtl::Reference< Entity > > map
unsigned char sal_Bool
sal_uInt16 sal_Unicode