LibreOffice Module i18npool (master)  1
cclass_unicode.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <cclass_unicode.hxx>
21 #include <com/sun/star/i18n/KCharacterType.hpp>
22 #include <com/sun/star/i18n/WordType.hpp>
23 #include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
24 #include <unicode/uchar.h>
25 #include <cppuhelper/exc_hlp.hxx>
27 #include <breakiteratorImpl.hxx>
28 #include <transliteration_body.hxx>
29 #include <rtl/ref.hxx>
30 
31 using namespace ::com::sun::star;
32 using namespace ::com::sun::star::uno;
33 using namespace ::com::sun::star::i18n;
34 using namespace ::com::sun::star::lang;
35 
36 namespace i18npool {
37 
38 // class cclass_Unicode
39 // ----------------------------------------------------;
40 
41 cclass_Unicode::cclass_Unicode( const uno::Reference < XComponentContext >& rxContext ) :
42  transToUpper( new Transliteration_casemapping() ),
43  transToLower( new Transliteration_casemapping() ),
44  transToTitle( new Transliteration_casemapping() ),
45  m_xContext( rxContext ),
46  nStartTypes( 0 ),
47  nContTypes( 0 ),
48  cGroupSep( ',' ),
49  cDecimalSep( '.' ),
50  cDecimalSepAlt( 0 )
51 {
52  transToUpper->setMappingType(MappingType::ToUpper);
53  transToLower->setMappingType(MappingType::ToLower);
54  transToTitle->setMappingType(MappingType::ToTitle);
55 }
56 
59 }
60 
61 
62 OUString SAL_CALL
63 cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
64  sal_Int32 len = Text.getLength();
65  if (nPos >= len)
66  return OUString();
67  if (nCount + nPos > len)
68  nCount = len - nPos;
69 
70  transToUpper->setLocale(rLocale);
71  return transToUpper->transliterateString2String(Text, nPos, nCount);
72 }
73 
74 OUString SAL_CALL
75 cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
76  sal_Int32 len = Text.getLength();
77  if (nPos >= len)
78  return OUString();
79  if (nCount + nPos > len)
80  nCount = len - nPos;
81 
82  transToLower->setLocale(rLocale);
83  return transToLower->transliterateString2String(Text, nPos, nCount);
84 }
85 
86 OUString SAL_CALL
87 cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
88  try
89  {
90  sal_Int32 len = Text.getLength();
91  if (nPos >= len)
92  return OUString();
93  if (nCount + nPos > len)
94  nCount = len - nPos;
95 
96  transToTitle->setLocale(rLocale);
97  rtl_uString* pStr = rtl_uString_alloc(nCount);
98  sal_Unicode* out = pStr->buffer;
100  Boundary bdy = xBrk->getWordBoundary(Text, nPos, rLocale,
101  WordType::ANYWORD_IGNOREWHITESPACES, true);
102  for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
103  if (i >= bdy.endPos)
104  bdy = xBrk->nextWord(Text, bdy.endPos, rLocale,
105  WordType::ANYWORD_IGNOREWHITESPACES);
106  *out = (i == bdy.startPos) ?
107  transToTitle->transliterateChar2Char(Text[i]) : Text[i];
108  }
109  *out = 0;
110  return OUString( pStr, SAL_NO_ACQUIRE );
111  }
112  catch (const RuntimeException&)
113  {
114  throw;
115  }
116  catch (const Exception& e)
117  {
119  throw lang::WrappedTargetRuntimeException(
120  "wrapped " + a.getValueTypeName() + ": " + e.Message,
121  uno::Reference<uno::XInterface>(), a);
122  }
123 }
124 
125 sal_Int16 SAL_CALL
126 cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) {
127  if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
128  return static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nPos, 0)));
129 }
130 
131 sal_Int16 SAL_CALL
132 cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) {
133  if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
134  return static_cast<sal_Int16>(u_charDirection(Text.iterateCodePoints(&nPos, 0)));
135 }
136 
137 
138 sal_Int16 SAL_CALL
139 cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) {
140  if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
141  // ICU Unicode script type UBlockCode starts from 1 for Basic Latin,
142  // while OO.o enum UnicideScript starts from 0.
143  // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
144  return static_cast<sal_Int16>(ublock_getCode(Text.iterateCodePoints(&nPos, 0)))-1;
145 }
146 
147 
148 sal_Int32
149 cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
150  using namespace ::com::sun::star::i18n::KCharacterType;
151 
152  sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
153  switch ( u_charType(ch) ) {
154  // Upper
155  case U_UPPERCASE_LETTER :
156  return UPPER|LETTER|PRINTABLE|BASE_FORM;
157 
158  // Lower
159  case U_LOWERCASE_LETTER :
160  return LOWER|LETTER|PRINTABLE|BASE_FORM;
161 
162  // Title
163  case U_TITLECASE_LETTER :
164  return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
165 
166  // Letter
167  case U_MODIFIER_LETTER :
168  case U_OTHER_LETTER :
169  return LETTER|PRINTABLE|BASE_FORM;
170 
171  // Digit
172  case U_DECIMAL_DIGIT_NUMBER:
173  case U_LETTER_NUMBER:
174  case U_OTHER_NUMBER:
175  return DIGIT|PRINTABLE|BASE_FORM;
176 
177  // Base
178  case U_NON_SPACING_MARK:
179  case U_ENCLOSING_MARK:
180  case U_COMBINING_SPACING_MARK:
181  return BASE_FORM|PRINTABLE;
182 
183  // Print
184  case U_SPACE_SEPARATOR:
185 
186  case U_DASH_PUNCTUATION:
187  case U_INITIAL_PUNCTUATION:
188  case U_FINAL_PUNCTUATION:
189  case U_CONNECTOR_PUNCTUATION:
190  case U_OTHER_PUNCTUATION:
191 
192  case U_MATH_SYMBOL:
193  case U_CURRENCY_SYMBOL:
194  case U_MODIFIER_SYMBOL:
195  case U_OTHER_SYMBOL:
196  return PRINTABLE;
197 
198  // Control
199  case U_CONTROL_CHAR:
200  case U_FORMAT_CHAR:
201  return CONTROL;
202 
203  case U_LINE_SEPARATOR:
204  case U_PARAGRAPH_SEPARATOR:
205  return CONTROL|PRINTABLE;
206 
207  // for all others
208  default:
209  return U_GENERAL_OTHER_TYPES;
210  }
211 }
212 
213 sal_Int32 SAL_CALL
214 cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) {
215  if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
216  return getCharType(Text, &nPos, 0);
217 
218 }
219 
220 sal_Int32 SAL_CALL
221 cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) {
222  if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
223 
224  sal_Int32 result = 0;
225 
226  while (nCount > 0 && nPos < Text.getLength())
227  {
228  sal_Int32 nOrigPos = nPos;
229  result |= getCharType(Text, &nPos, 1);
230  sal_Int32 nUtf16Units = nPos - nOrigPos;
231  nCount -= nUtf16Units;
232  }
233 
234  return result;
235 }
236 
237 ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
238  const OUString& Text,
239  sal_Int32 nPos,
240  const Locale& rLocale,
241  sal_Int32 startCharTokenType,
242  const OUString& userDefinedCharactersStart,
243  sal_Int32 contCharTokenType,
244  const OUString& userDefinedCharactersCont )
245 {
246  ParseResult r;
247  if ( Text.getLength() <= nPos )
248  return r;
249 
250  setupParserTable( rLocale,
251  startCharTokenType, userDefinedCharactersStart,
252  contCharTokenType, userDefinedCharactersCont );
253  parseText( r, Text, nPos );
254 
255  return r;
256 }
257 
258 
260  sal_Int32 nTokenType,
261  const OUString& Text,
262  sal_Int32 nPos,
263  const Locale& rLocale,
264  sal_Int32 startCharTokenType,
265  const OUString& userDefinedCharactersStart,
266  sal_Int32 contCharTokenType,
267  const OUString& userDefinedCharactersCont )
268 {
269  ParseResult r;
270  if ( Text.getLength() <= nPos )
271  return r;
272 
273  setupParserTable( rLocale,
274  startCharTokenType, userDefinedCharactersStart,
275  contCharTokenType, userDefinedCharactersCont );
276  parseText( r, Text, nPos, nTokenType );
277 
278  return r;
279 }
280 
282 {
283  return "com.sun.star.i18n.CharacterClassification_Unicode";
284 }
285 
286 sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName)
287 {
288  return cppu::supportsService(this, rServiceName);
289 }
290 
292 {
293  return { "com.sun.star.i18n.CharacterClassification_Unicode" };
294 }
295 
296 }
297 
298 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
300  css::uno::XComponentContext *context,
301  css::uno::Sequence<css::uno::Any> const &)
302 {
303  return cppu::acquire(new i18npool::cclass_Unicode(context));
304 }
305 
306 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
void destroyParserTable()
Destroy parser table.
virtual OUString SAL_CALL toLower(const OUString &Text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override
PRINTABLE
void parseText(css::i18n::ParseResult &r, const OUString &rText, sal_Int32 nPos, sal_Int32 nTokenType=0xffffffff)
Parse a text.
virtual sal_Int32 SAL_CALL getCharacterType(const OUString &text, sal_Int32 nPos, const css::lang::Locale &rLocale) override
virtual ~cclass_Unicode() override
cclass_Unicode(const css::uno::Reference< css::uno::XComponentContext > &rxContext)
sal_uInt16 sal_Unicode
Any SAL_CALL getCaughtException()
rtl::Reference< Transliteration_casemapping > transToUpper
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
virtual sal_Bool SAL_CALL supportsService(const OUString &ServiceName) override
virtual OUString SAL_CALL toTitle(const OUString &Text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override
virtual sal_Int32 SAL_CALL getStringType(const OUString &text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override
#define DIGIT
virtual css::i18n::ParseResult SAL_CALL parseAnyToken(const OUString &Text, sal_Int32 nPos, const css::lang::Locale &rLocale, sal_Int32 nStartCharFlags, const OUString &userDefinedCharactersStart, sal_Int32 nContCharFlags, const OUString &userDefinedCharactersCont) override
int i
uno_Any a
virtual sal_Int16 SAL_CALL getType(const OUString &Text, sal_Int32 nPos) override
virtual sal_Int16 SAL_CALL getCharacterDirection(const OUString &Text, sal_Int32 nPos) override
virtual css::i18n::ParseResult SAL_CALL parsePredefinedToken(sal_Int32 nTokenType, const OUString &Text, sal_Int32 nPos, const css::lang::Locale &rLocale, sal_Int32 nStartCharFlags, const OUString &userDefinedCharactersStart, sal_Int32 nContCharFlags, const OUString &userDefinedCharactersCont) override
unsigned char sal_Bool
Constant values shared between i18npool and, for example, the number formatter.
virtual sal_Int16 SAL_CALL getScript(const OUString &Text, sal_Int32 nPos) override
static sal_Int32 getCharType(const OUString &Text, sal_Int32 *nPos, sal_Int32 increment)
Implementation of getCharacterType() for one single character.
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
rtl::Reference< Transliteration_casemapping > transToLower
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * com_sun_star_i18n_CharacterClassification_Unicode_get_implementation(css::uno::XComponentContext *context, css::uno::Sequence< css::uno::Any > const &)
virtual OUString SAL_CALL getImplementationName() override
Any result
void setupParserTable(const css::lang::Locale &rLocale, sal_Int32 startCharTokenType, const OUString &userDefinedCharactersStart, sal_Int32 contCharTokenType, const OUString &userDefinedCharactersCont)
Setup parser table. Calls initParserTable() only if needed.
Reference< XComponentContext > m_xContext
rtl::Reference< Transliteration_casemapping > transToTitle
css::uno::Reference< css::uno::XComponentContext > m_xContext
virtual OUString SAL_CALL toUpper(const OUString &Text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override