LibreOffice Module i18npool (master)  1
transliteration_body.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 // Silence spurious Werror=maybe-uninitialized in transliterateImpl emitted at least by GCC 11.2.0
20 #if defined __GNUC__ && !defined __clang__
21 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
22 #endif
23 
24 #include <rtl/ref.hxx>
25 #include <i18nutil/casefolding.hxx>
26 #include <i18nutil/unicode.hxx>
27 #include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
28 #include <com/sun/star/i18n/TransliterationType.hpp>
30 #include <comphelper/sequence.hxx>
31 #include <o3tl/temporary.hxx>
32 
34 
35 #include <transliteration_body.hxx>
36 #include <memory>
37 #include <numeric>
38 
39 using namespace ::com::sun::star::uno;
40 using namespace ::com::sun::star::i18n;
41 using namespace ::com::sun::star::lang;
42 
43 namespace i18npool {
44 
46 {
47  nMappingType = MappingType::NONE;
48  transliterationName = "Transliteration_body";
49  implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
50 }
51 
52 sal_Int16 SAL_CALL Transliteration_body::getType()
53 {
54  return TransliterationType::ONE_TO_ONE;
55 }
56 
58  const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
59  const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
60 {
61  throw RuntimeException();
62 }
63 
64 Sequence< OUString > SAL_CALL
65 Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
66 {
67  return { str1, str2 };
68 }
69 
71 {
72  MappingType nRes = nMappingType;
73 
74  // take care of TOGGLE_CASE transliteration:
75  // nMappingType should not be a combination of flags, thuse we decide now
76  // which one to use.
77  if (nMappingType == (MappingType::LowerToUpper | MappingType::UpperToLower))
78  {
79  const sal_Int16 nType = unicode::getUnicodeType( cChar );
80  if (nType & 0x02 /* lower case*/)
81  nRes = MappingType::LowerToUpper;
82  else
83  {
84  // should also work properly for non-upper characters like white spaces, numbers, ...
85  nRes = MappingType::UpperToLower;
86  }
87  }
88 
89  return nRes;
90 }
91 
92 OUString
94  const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
95  Sequence< sal_Int32 >* pOffset)
96 {
97  const sal_Unicode *in = inStr.getStr() + startPos;
98 
99  // We could assume that most calls result in identical string lengths,
100  // thus using a preallocated OUStringBuffer could be an easy way
101  // to assemble the return string without too much hassle. However,
102  // for single characters the OUStringBuffer::append() method is quite
103  // expensive compared to a simple array operation, so it pays here
104  // to copy the final result instead.
105 
106  // Allocate the max possible buffer. Try to use stack instead of heap,
107  // which would have to be reallocated most times anyways.
108  constexpr sal_Int32 nLocalBuf = 2048;
109  sal_Unicode* out;
110  std::unique_ptr<sal_Unicode[]> pHeapBuf;
111  if (nCount <= nLocalBuf)
112  out = static_cast<sal_Unicode*>(alloca(nCount * NMAPPINGMAX * sizeof(sal_Unicode)));
113  else
114  {
115  pHeapBuf.reset(new sal_Unicode[ nCount * NMAPPINGMAX ]);
116  out = pHeapBuf.get();
117  }
118 
119  sal_Int32 j = 0;
120  // Two different blocks to eliminate the if(useOffset) condition inside the loop.
121  // Yes, on massive use even such small things do count.
122  if ( pOffset )
123  {
124  std::vector<sal_Int32> aVec;
125  aVec.reserve(std::max<sal_Int32>(nLocalBuf, nCount) * NMAPPINGMAX);
126 
127  for (sal_Int32 i = 0; i < nCount; i++)
128  {
129  // take care of TOGGLE_CASE transliteration:
130  MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
131 
132  const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
133  std::fill_n(std::back_inserter(aVec), map.nmap, i + startPos);
134  std::copy_n(map.map, map.nmap, out + j);
135  j += map.nmap;
136  }
137 
138  *pOffset = comphelper::containerToSequence(aVec);
139  }
140  else
141  {
142  for ( sal_Int32 i = 0; i < nCount; i++)
143  {
144  // take care of TOGGLE_CASE transliteration:
145  MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
146 
147  const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
148  std::copy_n(map.map, map.nmap, out + j);
149  j += map.nmap;
150  }
151  }
152 
153  return OUString(out, j);
154 }
155 
156 OUString SAL_CALL
158 {
159  const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
160  rtl_uString* pStr = rtl_uString_alloc(map.nmap);
161  sal_Unicode* out = pStr->buffer;
162  sal_Int32 i;
163 
164  for (i = 0; i < map.nmap; i++)
165  out[i] = map.map[i];
166  out[i] = 0;
167 
168  return OUString( pStr, SAL_NO_ACQUIRE );
169 }
170 
171 sal_Unicode SAL_CALL
173 {
174  const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
175  if (map.nmap > 1)
176  throw MultipleCharsOutputException();
177  return map.map[0];
178 }
179 
180 OUString
181 Transliteration_body::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
182  Sequence< sal_Int32 >* pOffset)
183 {
184  return transliterateImpl(inStr, startPos, nCount, pOffset);
185 }
186 
188 {
189  nMappingType = MappingType::NONE;
190  transliterationName = "casemapping(generic)";
191  implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
192 }
193 
194 void
195 Transliteration_casemapping::setMappingType( const MappingType rMappingType, const Locale& rLocale )
196 {
197  nMappingType = rMappingType;
198  if (aLocale != rLocale)
199  aLocale = rLocale;
200 }
201 
203 {
204  nMappingType = MappingType::UpperToLower;
205  transliterationName = "upper_to_lower(generic)";
206  implementationName = "com.sun.star.i18n.Transliteration.Transliteration_u2l";
207 }
208 
210 {
211  nMappingType = MappingType::LowerToUpper;
212  transliterationName = "lower_to_upper(generic)";
213  implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u";
214 }
215 
217 {
218  // usually nMappingType must NOT be a combination of different flags here,
219  // but we take care of that problem in Transliteration_body::transliterate above
220  // before that value is used. There we will decide which of both is to be used on
221  // a per character basis.
222  nMappingType = MappingType::LowerToUpper | MappingType::UpperToLower;
223  transliterationName = "toggle(generic)";
224  implementationName = "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
225 }
226 
228 {
229  nMappingType = MappingType::ToTitle;
230  transliterationName = "title(generic)";
231  implementationName = "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
232 }
233 
236  const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
237  const Locale &rLocale,
238  Sequence< sal_Int32 >* pOffset )
239 {
240  const OUString aText( inStr.copy( startPos, nCount ) );
241 
242  OUString aRes;
243  if (!aText.isEmpty())
244  {
245  Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext();
247 
248  // because xCharClassImpl.toTitle does not handle ligatures or Beta but will raise
249  // an exception we need to handle the first chara manually...
250 
251  // we don't want to change surrogates by accident, thuse we use proper code point iteration
252  sal_uInt32 cFirstChar = aText.iterateCodePoints( &o3tl::temporary(sal_Int32(0)) );
253  OUString aResolvedLigature( &cFirstChar, 1 );
254  // toUpper can be used to properly resolve ligatures and characters like Beta
255  aResolvedLigature = xCharClassImpl->toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
256  // since toTitle will leave all-uppercase text unchanged we first need to
257  // use toLower to bring possible 2nd and following chars in lowercase
258  aResolvedLigature = xCharClassImpl->toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
259  sal_Int32 nResolvedLen = aResolvedLigature.getLength();
260 
261  // now we can properly use toTitle to get the expected result for the resolved string.
262  // The rest of the text should just become lowercase.
263  aRes = xCharClassImpl->toTitle( aResolvedLigature, 0, nResolvedLen, rLocale ) +
264  xCharClassImpl->toLower( aText, 1, aText.getLength() - 1, rLocale );
265  pOffset->realloc( aRes.getLength() );
266 
267  auto [begin, end] = asNonConstRange(*pOffset);
268  sal_Int32* pOffsetInt = std::fill_n(begin, nResolvedLen, 0);
269  std::iota(pOffsetInt, end, 1);
270  }
271  return aRes;
272 }
273 
274 // this function expects to be called on a word-by-word basis,
275 // namely that startPos points to the first char of the word
277  const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
278  Sequence< sal_Int32 >* pOffset )
279 {
280  return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, pOffset );
281 }
282 
284 {
285  nMappingType = MappingType::ToTitle; // though only to be applied to the first word...
286  transliterationName = "sentence(generic)";
287  implementationName = "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
288 }
289 
290 // this function expects to be called on a sentence-by-sentence basis,
291 // namely that startPos points to the first word (NOT first char!) in the sentence
293  const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
294  Sequence< sal_Int32 >* pOffset )
295 {
296  return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, pOffset );
297 }
298 
299 }
300 
301 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
static OUString transliterate_titlecase_Impl(const OUString &inStr, sal_Int32 startPos, sal_Int32 nCount, const Locale &rLocale, Sequence< sal_Int32 > *pOffset)
static MappingType lcl_getMappingTypeForToggleCase(MappingType nMappingType, sal_Unicode cChar)
css::uno::Sequence< OUString > SAL_CALL transliterateRange(const OUString &str1, const OUString &str2) override
virtual OUString transliterateImpl(const OUString &inStr, sal_Int32 startPos, sal_Int32 nCount, css::uno::Sequence< sal_Int32 > *pOffset) override
MappingType
sal_uInt16 sal_Unicode
#define NMAPPINGMAX
enumrange< T >::Iterator begin(enumrange< T >)
sal_Unicode map[NMAPPINGMAX]
void setMappingType(const MappingType rMappingType, const css::lang::Locale &rLocale)
int i
unsigned char sal_Bool
OUString SAL_CALL transliterateChar2String(sal_Unicode inChar) override
OUString transliterateImpl(const OUString &inStr, sal_Int32 startPos, sal_Int32 nCount, css::uno::Sequence< sal_Int32 > *pOffset) override
Constant values shared between i18npool and, for example, the number formatter.
enumrange< T >::Iterator end(enumrange< T >)
constexpr T & temporary(T &&x)
sal_Bool SAL_CALL equals(const OUString &str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32 &nMatch1, const OUString &str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32 &nMatch2) override
virtual OUString transliterateImpl(const OUString &inStr, sal_Int32 startPos, sal_Int32 nCount, css::uno::Sequence< sal_Int32 > *pOffset) override
css::uno::Sequence< DstElementType > containerToSequence(const SrcType &i_Container)
std::map< OUString, rtl::Reference< Entity > > map
sal_Int16 SAL_CALL getType() override
QPRO_FUNC_TYPE nType
static sal_Int16 getUnicodeType(const sal_Unicode ch)
OUString foldingImpl(const OUString &inStr, sal_Int32 startPos, sal_Int32 nCount, css::uno::Sequence< sal_Int32 > *pOffset) override
virtual sal_Unicode SAL_CALL transliterateChar2Char(sal_Unicode inChar) override