LibreOffice Module i18npool (master) 1
cclass_unicode.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <cclass_unicode.hxx>
21#include <com/sun/star/i18n/KCharacterType.hpp>
22#include <com/sun/star/i18n/WordType.hpp>
23#include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
24#include <unicode/uchar.h>
27#include <breakiteratorImpl.hxx>
29#include <rtl/ref.hxx>
30#include <utility>
31
32using namespace ::com::sun::star;
33using namespace ::com::sun::star::uno;
34using namespace ::com::sun::star::i18n;
35using namespace ::com::sun::star::lang;
36
37namespace i18npool {
38
39// class cclass_Unicode
40// ----------------------------------------------------;
41
42cclass_Unicode::cclass_Unicode( uno::Reference < XComponentContext > xContext ) :
43 transToUpper( new Transliteration_casemapping() ),
44 transToLower( new Transliteration_casemapping() ),
45 transToTitle( new Transliteration_casemapping() ),
46 m_xContext(std::move( xContext )),
47 nStartTypes( 0 ),
48 nContTypes( 0 ),
49 cGroupSep( ',' ),
50 cDecimalSep( '.' ),
51 cDecimalSepAlt( 0 )
52{
53 transToUpper->setMappingType(MappingType::ToUpper);
54 transToLower->setMappingType(MappingType::ToLower);
55 transToTitle->setMappingType(MappingType::ToTitle);
56}
57
60}
61
62
63OUString SAL_CALL
64cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
65 sal_Int32 len = Text.getLength();
66 if (nPos >= len)
67 return OUString();
68 if (nCount + nPos > len)
69 nCount = len - nPos;
70
71 transToUpper->setLocale(rLocale);
72 return transToUpper->transliterateString2String(Text, nPos, nCount);
73}
74
75OUString SAL_CALL
76cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
77 sal_Int32 len = Text.getLength();
78 if (nPos >= len)
79 return OUString();
80 if (nCount + nPos > len)
81 nCount = len - nPos;
82
83 transToLower->setLocale(rLocale);
84 return transToLower->transliterateString2String(Text, nPos, nCount);
85}
86
87OUString SAL_CALL
88cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
89 try
90 {
91 sal_Int32 len = Text.getLength();
92 if (nPos >= len)
93 return OUString();
94 if (nCount + nPos > len)
95 nCount = len - nPos;
96
97 transToTitle->setLocale(rLocale);
98 rtl_uString* pStr = rtl_uString_alloc(nCount);
99 sal_Unicode* out = pStr->buffer;
101 Boundary bdy = xBrk->getWordBoundary(Text, nPos, rLocale,
102 WordType::ANYWORD_IGNOREWHITESPACES, true);
103 for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
104 if (i >= bdy.endPos)
105 bdy = xBrk->nextWord(Text, bdy.endPos, rLocale,
106 WordType::ANYWORD_IGNOREWHITESPACES);
107 *out = (i == bdy.startPos) ?
108 transToTitle->transliterateChar2Char(Text[i]) : Text[i];
109 }
110 *out = 0;
111 return OUString( pStr, SAL_NO_ACQUIRE );
112 }
113 catch (const RuntimeException&)
114 {
115 throw;
116 }
117 catch (const Exception& e)
118 {
120 throw lang::WrappedTargetRuntimeException(
121 "wrapped " + a.getValueTypeName() + ": " + e.Message,
122 uno::Reference<uno::XInterface>(), a);
123 }
124}
125
126sal_Int16 SAL_CALL
127cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) {
128 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
129 return static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nPos, 0)));
130}
131
132sal_Int16 SAL_CALL
133cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) {
134 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
135 return static_cast<sal_Int16>(u_charDirection(Text.iterateCodePoints(&nPos, 0)));
136}
137
138
139sal_Int16 SAL_CALL
140cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) {
141 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
142 // ICU Unicode script type UBlockCode starts from 1 for Basic Latin,
143 // while OO.o enum UnicideScript starts from 0.
144 // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
145 return static_cast<sal_Int16>(ublock_getCode(Text.iterateCodePoints(&nPos, 0)))-1;
146}
147
148
149sal_Int32
150cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
151 using namespace ::com::sun::star::i18n::KCharacterType;
152
153 sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
154 switch ( u_charType(ch) ) {
155 // Upper
156 case U_UPPERCASE_LETTER :
157 return UPPER|LETTER|PRINTABLE|BASE_FORM;
158
159 // Lower
160 case U_LOWERCASE_LETTER :
161 return LOWER|LETTER|PRINTABLE|BASE_FORM;
162
163 // Title
164 case U_TITLECASE_LETTER :
165 return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
166
167 // Letter
168 case U_MODIFIER_LETTER :
169 case U_OTHER_LETTER :
170 return LETTER|PRINTABLE|BASE_FORM;
171
172 // Digit
173 case U_DECIMAL_DIGIT_NUMBER:
174 case U_LETTER_NUMBER:
175 case U_OTHER_NUMBER:
176 return DIGIT|PRINTABLE|BASE_FORM;
177
178 // Base
179 case U_NON_SPACING_MARK:
180 case U_ENCLOSING_MARK:
181 case U_COMBINING_SPACING_MARK:
182 return BASE_FORM|PRINTABLE;
183
184 // Print
185 case U_SPACE_SEPARATOR:
186
187 case U_DASH_PUNCTUATION:
188 case U_INITIAL_PUNCTUATION:
189 case U_FINAL_PUNCTUATION:
190 case U_CONNECTOR_PUNCTUATION:
191 case U_OTHER_PUNCTUATION:
192
193 case U_MATH_SYMBOL:
194 case U_CURRENCY_SYMBOL:
195 case U_MODIFIER_SYMBOL:
196 case U_OTHER_SYMBOL:
197 return PRINTABLE;
198
199 // Control
200 case U_CONTROL_CHAR:
201 case U_FORMAT_CHAR:
202 return CONTROL;
203
204 case U_LINE_SEPARATOR:
205 case U_PARAGRAPH_SEPARATOR:
206 return CONTROL|PRINTABLE;
207
208 // for all others
209 default:
210 return U_GENERAL_OTHER_TYPES;
211 }
212}
213
214sal_Int32 SAL_CALL
215cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) {
216 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
217 return getCharType(Text, &nPos, 0);
218
219}
220
221sal_Int32 SAL_CALL
222cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) {
223 if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
224
225 sal_Int32 result = 0;
226
227 while (nCount > 0 && nPos < Text.getLength())
228 {
229 sal_Int32 nOrigPos = nPos;
230 result |= getCharType(Text, &nPos, 1);
231 sal_Int32 nUtf16Units = nPos - nOrigPos;
232 nCount -= nUtf16Units;
233 }
234
235 return result;
236}
237
238ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
239 const OUString& Text,
240 sal_Int32 nPos,
241 const Locale& rLocale,
242 sal_Int32 startCharTokenType,
243 const OUString& userDefinedCharactersStart,
244 sal_Int32 contCharTokenType,
245 const OUString& userDefinedCharactersCont )
246{
247 ParseResult r;
248 if ( Text.getLength() <= nPos )
249 return r;
250
251 setupParserTable( rLocale,
252 startCharTokenType, userDefinedCharactersStart,
253 contCharTokenType, userDefinedCharactersCont );
254 parseText( r, Text, nPos );
255
256 return r;
257}
258
259
261 sal_Int32 nTokenType,
262 const OUString& Text,
263 sal_Int32 nPos,
264 const Locale& rLocale,
265 sal_Int32 startCharTokenType,
266 const OUString& userDefinedCharactersStart,
267 sal_Int32 contCharTokenType,
268 const OUString& userDefinedCharactersCont )
269{
270 ParseResult r;
271 if ( Text.getLength() <= nPos )
272 return r;
273
274 setupParserTable( rLocale,
275 startCharTokenType, userDefinedCharactersStart,
276 contCharTokenType, userDefinedCharactersCont );
277 parseText( r, Text, nPos, nTokenType );
278
279 return r;
280}
281
283{
284 return "com.sun.star.i18n.CharacterClassification_Unicode";
285}
286
287sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName)
288{
289 return cppu::supportsService(this, rServiceName);
290}
291
292Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames()
293{
294 return { "com.sun.star.i18n.CharacterClassification_Unicode" };
295}
296
297}
298
299extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
301 css::uno::XComponentContext *context,
302 css::uno::Sequence<css::uno::Any> const &)
303{
304 return cppu::acquire(new i18npool::cclass_Unicode(context));
305}
306
307/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Reference< XComponentContext > m_xContext
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * com_sun_star_i18n_CharacterClassification_Unicode_get_implementation(css::uno::XComponentContext *context, css::uno::Sequence< css::uno::Any > const &)
#define DIGIT
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
cclass_Unicode(css::uno::Reference< css::uno::XComponentContext > xContext)
virtual OUString SAL_CALL getImplementationName() override
virtual OUString SAL_CALL toUpper(const OUString &Text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override
virtual OUString SAL_CALL toLower(const OUString &Text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override
virtual css::i18n::ParseResult SAL_CALL parseAnyToken(const OUString &Text, sal_Int32 nPos, const css::lang::Locale &rLocale, sal_Int32 nStartCharFlags, const OUString &userDefinedCharactersStart, sal_Int32 nContCharFlags, const OUString &userDefinedCharactersCont) override
virtual sal_Bool SAL_CALL supportsService(const OUString &ServiceName) override
virtual OUString SAL_CALL toTitle(const OUString &Text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override
static sal_Int32 getCharType(const OUString &Text, sal_Int32 *nPos, sal_Int32 increment)
Implementation of getCharacterType() for one single character.
void parseText(css::i18n::ParseResult &r, const OUString &rText, sal_Int32 nPos, sal_Int32 nTokenType=0xffffffff)
Parse a text.
virtual sal_Int32 SAL_CALL getStringType(const OUString &text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override
void setupParserTable(const css::lang::Locale &rLocale, sal_Int32 startCharTokenType, const OUString &userDefinedCharactersStart, sal_Int32 contCharTokenType, const OUString &userDefinedCharactersCont)
Setup parser table. Calls initParserTable() only if needed.
rtl::Reference< Transliteration_casemapping > transToTitle
rtl::Reference< Transliteration_casemapping > transToLower
virtual sal_Int16 SAL_CALL getCharacterDirection(const OUString &Text, sal_Int32 nPos) override
virtual css::i18n::ParseResult SAL_CALL parsePredefinedToken(sal_Int32 nTokenType, const OUString &Text, sal_Int32 nPos, const css::lang::Locale &rLocale, sal_Int32 nStartCharFlags, const OUString &userDefinedCharactersStart, sal_Int32 nContCharFlags, const OUString &userDefinedCharactersCont) override
virtual sal_Int16 SAL_CALL getType(const OUString &Text, sal_Int32 nPos) override
void destroyParserTable()
Destroy parser table.
virtual sal_Int16 SAL_CALL getScript(const OUString &Text, sal_Int32 nPos) override
css::uno::Reference< css::uno::XComponentContext > m_xContext
virtual sal_Int32 SAL_CALL getCharacterType(const OUString &text, sal_Int32 nPos, const css::lang::Locale &rLocale) override
rtl::Reference< Transliteration_casemapping > transToUpper
virtual ~cclass_Unicode() override
int nCount
uno_Any a
sal_uInt16 nPos
@ Exception
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
Any SAL_CALL getCaughtException()
int i
Constant values shared between i18npool and, for example, the number formatter.
unsigned char sal_Bool
sal_uInt16 sal_Unicode
Any result
PRINTABLE