LibreOffice Module unotools (master) 1
charclass.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
22#include <rtl/character.hxx>
24
25#include <com/sun/star/i18n/CharacterClassification.hpp>
26#include <utility>
27
28using namespace ::com::sun::star;
29using namespace ::com::sun::star::i18n;
30using namespace ::com::sun::star::uno;
31
33 const Reference< uno::XComponentContext > & rxContext,
34 LanguageTag aLanguageTag
35 )
36 : maLanguageTag(std::move( aLanguageTag))
37{
38 xCC = CharacterClassification::create( rxContext );
39}
40
42 : maLanguageTag(std::move( aLanguageTag))
43{
44 xCC = CharacterClassification::create( comphelper::getProcessComponentContext() );
45}
46
48{
49}
50
52{
53 return maLanguageTag;
54}
55
56const css::lang::Locale& CharClass::getMyLocale() const
57{
58 return maLanguageTag.getLocale();
59}
60
61// static
62bool CharClass::isAsciiNumeric( std::u16string_view rStr )
63{
64 if ( rStr.empty() )
65 return false;
66 const sal_Unicode* p = rStr.data();
67 const sal_Unicode* const pStop = p + rStr.size();
68
69 do
70 {
71 if ( !rtl::isAsciiDigit( *p ) )
72 return false;
73 }
74 while ( ++p < pStop );
75
76 return true;
77}
78
79// static
80bool CharClass::isAsciiAlpha( std::u16string_view rStr )
81{
82 if ( rStr.empty() )
83 return false;
84 const sal_Unicode* p = rStr.data();
85 const sal_Unicode* const pStop = p + rStr.size();
86
87 do
88 {
89 if ( !rtl::isAsciiAlpha( *p ) )
90 return false;
91 }
92 while ( ++p < pStop );
93
94 return true;
95}
96
97bool CharClass::isAlpha( const OUString& rStr, sal_Int32 nPos ) const
98{
99 sal_Unicode c = rStr[nPos];
100 if ( c < 128 )
101 return rtl::isAsciiAlpha( c );
102
103 try
104 {
105 return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
107 }
108 catch ( const Exception& )
109 {
110 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
111 }
112 return false;
113}
114
115bool CharClass::isLetter( const OUString& rStr, sal_Int32 nPos ) const
116{
117 sal_Unicode c = rStr[nPos];
118 if ( c < 128 )
119 return rtl::isAsciiAlpha( c );
120
121 try
122 {
123 return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
125 }
126 catch ( const Exception& )
127 {
128 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
129 }
130 return false;
131}
132
133bool CharClass::isLetter( const OUString& rStr ) const
134{
135 if (rStr.isEmpty())
136 return false;
137
138 try
139 {
140 sal_Int32 nPos = 0;
141 while (nPos < rStr.getLength())
142 {
143 if (!isLetter( rStr, nPos))
144 return false;
145 rStr.iterateCodePoints( &nPos);
146 }
147 return true;
148 }
149 catch ( const Exception& )
150 {
151 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
152 }
153 return false;
154}
155
156bool CharClass::isDigit( const OUString& rStr, sal_Int32 nPos ) const
157{
158 sal_Unicode c = rStr[ nPos ];
159 if ( c < 128 )
160 return rtl::isAsciiDigit( c );
161
162 try
163 {
164 return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
165 KCharacterType::DIGIT) != 0;
166 }
167 catch ( const Exception& )
168 {
169 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
170 }
171 return false;
172}
173
174bool CharClass::isNumeric( const OUString& rStr ) const
175{
176 if (rStr.isEmpty())
177 return false;
178
179 try
180 {
181 sal_Int32 nPos = 0;
182 while (nPos < rStr.getLength())
183 {
184 if (!isDigit( rStr, nPos))
185 return false;
186 rStr.iterateCodePoints( &nPos);
187 }
188 return true;
189 }
190 catch ( const Exception& )
191 {
192 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
193 }
194 return false;
195}
196
197bool CharClass::isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const
198{
199 sal_Unicode c = rStr[nPos];
200 if ( c < 128 )
201 return rtl::isAsciiAlphanumeric( c );
202
203 try
204 {
205 return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
207 }
208 catch ( const Exception& )
209 {
210 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
211 }
212 return false;
213}
214
215bool CharClass::isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const
216{
217 sal_Unicode c = rStr[nPos];
218 if ( c < 128 )
219 return rtl::isAsciiAlphanumeric( c );
220
221 try
222 {
223 return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
225 }
226 catch ( const Exception& )
227 {
228 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
229 }
230 return false;
231}
232
233bool CharClass::isLetterNumeric( const OUString& rStr ) const
234{
235 if (rStr.isEmpty())
236 return false;
237
238 try
239 {
240 sal_Int32 nPos = 0;
241 while (nPos < rStr.getLength())
242 {
243 if (!isLetterNumeric( rStr, nPos))
244 return false;
245 rStr.iterateCodePoints( &nPos);
246 }
247 return true;
248 }
249 catch ( const Exception& )
250 {
251 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
252 }
253 return false;
254}
255
256bool CharClass::isBase( const OUString& rStr, sal_Int32 nPos ) const
257{
258 sal_Unicode c = rStr[nPos];
259 if ( c < 128 )
260 return rtl::isAsciiAlphanumeric( c );
261
262 try
263 {
264 return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & nCharClassBaseType ) != 0;
265 }
266 catch ( const Exception& )
267 {
268 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
269 }
270 return false;
271}
272
273bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const
274{
275 sal_Unicode c = rStr[nPos];
276 if ( c < 128 )
277 return rtl::isAsciiUpperCase(c);
278
279 try
280 {
281 return (xCC->getCharacterType( rStr, nPos, getMyLocale()) &
282 KCharacterType::UPPER) != 0;
283 }
284 catch ( const Exception& )
285 {
286 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
287 }
288 return false;
289}
290
291bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
292{
293 if (rStr.isEmpty())
294 return false;
295
296 assert(nPos >= 0 && nPos < rStr.getLength() && nCount > 0);
297 if (nPos < 0 || nPos >= rStr.getLength() || nCount == 0)
298 return false;
299
300 try
301 {
302 const sal_Int32 nLen = std::min( nPos + nCount, rStr.getLength());
303 while (nPos < nLen)
304 {
305 if (!isUpper( rStr, nPos))
306 return false;
307 rStr.iterateCodePoints( &nPos);
308 }
309 return true;
310 }
311 catch ( const Exception& )
312 {
313 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
314 }
315 return false;
316}
317
318OUString CharClass::titlecase(const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount) const
319{
320 try
321 {
322 return xCC->toTitle( rStr, nPos, nCount, getMyLocale() );
323 }
324 catch ( const Exception& )
325 {
326 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
327 }
328 return rStr.copy( nPos, nCount );
329}
330
331OUString CharClass::uppercase( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
332{
333 try
334 {
335 return xCC->toUpper( rStr, nPos, nCount, getMyLocale() );
336 }
337 catch ( const Exception& )
338 {
339 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
340 }
341 return rStr.copy( nPos, nCount );
342}
343
344OUString CharClass::lowercase( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
345{
346 try
347 {
348 return xCC->toLower( rStr, nPos, nCount, getMyLocale() );
349 }
350 catch ( const Exception& )
351 {
352 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
353 }
354 return rStr.copy( nPos, nCount );
355}
356
357sal_Int16 CharClass::getType( const OUString& rStr, sal_Int32 nPos ) const
358{
359 try
360 {
361 return xCC->getType( rStr, nPos );
362 }
363 catch ( const Exception& )
364 {
365 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
366 }
367 return 0;
368}
369
370css::i18n::DirectionProperty CharClass::getCharacterDirection( const OUString& rStr, sal_Int32 nPos ) const
371{
372 try
373 {
374 return static_cast<css::i18n::DirectionProperty>(xCC->getCharacterDirection( rStr, nPos ));
375 }
376 catch ( const Exception& )
377 {
378 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
379 }
380 return css::i18n::DirectionProperty_LEFT_TO_RIGHT;
381}
382
383css::i18n::UnicodeScript CharClass::getScript( const OUString& rStr, sal_Int32 nPos ) const
384{
385 try
386 {
387 return static_cast<css::i18n::UnicodeScript>(xCC->getScript( rStr, nPos ));
388 }
389 catch ( const Exception& )
390 {
391 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
392 }
393 return UnicodeScript_kBasicLatin;
394}
395
396sal_Int32 CharClass::getCharacterType( const OUString& rStr, sal_Int32 nPos ) const
397{
398 try
399 {
400 return xCC->getCharacterType( rStr, nPos, getMyLocale() );
401 }
402 catch ( const Exception& )
403 {
404 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
405 }
406 return 0;
407}
408
409css::i18n::ParseResult CharClass::parseAnyToken(
410 const OUString& rStr,
411 sal_Int32 nPos,
412 sal_Int32 nStartCharFlags,
413 const OUString& userDefinedCharactersStart,
414 sal_Int32 nContCharFlags,
415 const OUString& userDefinedCharactersCont ) const
416{
417 try
418 {
419 return xCC->parseAnyToken( rStr, nPos, getMyLocale(),
420 nStartCharFlags, userDefinedCharactersStart,
421 nContCharFlags, userDefinedCharactersCont );
422 }
423 catch ( const Exception& )
424 {
425 TOOLS_WARN_EXCEPTION( "unotools.i18n", "parseAnyToken" );
426 }
427 return ParseResult();
428}
429
430css::i18n::ParseResult CharClass::parsePredefinedToken(
431 sal_Int32 nTokenType,
432 const OUString& rStr,
433 sal_Int32 nPos,
434 sal_Int32 nStartCharFlags,
435 const OUString& userDefinedCharactersStart,
436 sal_Int32 nContCharFlags,
437 const OUString& userDefinedCharactersCont ) const
438{
439 try
440 {
441 return xCC->parsePredefinedToken( nTokenType, rStr, nPos, getMyLocale(),
442 nStartCharFlags, userDefinedCharactersStart,
443 nContCharFlags, userDefinedCharactersCont );
444 }
445 catch ( const Exception& )
446 {
447 TOOLS_WARN_EXCEPTION( "unotools.i18n", "parsePredefinedToken" );
448 }
449 return ParseResult();
450}
451
452/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
constexpr sal_Int32 nCharClassBaseType
Definition: charclass.hxx:61
constexpr sal_Int32 nCharClassNumericType
Definition: charclass.hxx:53
constexpr sal_Int32 nCharClassAlphaType
Definition: charclass.hxx:34
constexpr sal_Int32 nCharClassLetterType
Definition: charclass.hxx:45
bool isNumeric(const OUString &rStr) const
Definition: charclass.cxx:174
static bool isAsciiNumeric(std::u16string_view rStr)
isdigit() on ascii values of entire string
Definition: charclass.cxx:62
static bool isAsciiAlpha(std::u16string_view rStr)
isalpha() on ascii values of entire string
Definition: charclass.cxx:80
OUString titlecase(const OUString &rStr, sal_Int32 nPos, sal_Int32 nCount) const
Definition: charclass.cxx:318
OUString uppercase(const OUString &rStr, sal_Int32 nPos, sal_Int32 nCount) const
Definition: charclass.cxx:331
css::i18n::DirectionProperty getCharacterDirection(const OUString &rStr, sal_Int32 nPos) const
Definition: charclass.cxx:370
sal_Int32 getCharacterType(const OUString &rStr, sal_Int32 nPos) const
Definition: charclass.cxx:396
bool isAlpha(const OUString &rStr, sal_Int32 nPos) const
Definition: charclass.cxx:97
css::i18n::ParseResult parseAnyToken(const OUString &rStr, sal_Int32 nPos, sal_Int32 nStartCharFlags, const OUString &userDefinedCharactersStart, sal_Int32 nContCharFlags, const OUString &userDefinedCharactersCont) const
Definition: charclass.cxx:409
OUString lowercase(const OUString &rStr, sal_Int32 nPos, sal_Int32 nCount) const
Definition: charclass.cxx:344
const css::lang::Locale & getMyLocale() const
Definition: charclass.cxx:56
const LanguageTag & getLanguageTag() const
get current Locale
Definition: charclass.cxx:51
bool isAlphaNumeric(const OUString &rStr, sal_Int32 nPos) const
Definition: charclass.cxx:197
bool isUpper(const OUString &rStr, sal_Int32 nPos) const
Definition: charclass.cxx:273
bool isLetter(const OUString &rStr, sal_Int32 nPos) const
Definition: charclass.cxx:115
css::uno::Reference< css::i18n::XCharacterClassification > xCC
Definition: charclass.hxx:67
bool isLetterNumeric(const OUString &rStr, sal_Int32 nPos) const
Definition: charclass.cxx:215
bool isBase(const OUString &rStr, sal_Int32 nPos) const
Definition: charclass.cxx:256
css::i18n::ParseResult parsePredefinedToken(sal_Int32 nTokenType, const OUString &rStr, sal_Int32 nPos, sal_Int32 nStartCharFlags, const OUString &userDefinedCharactersStart, sal_Int32 nContCharFlags, const OUString &userDefinedCharactersCont) const
Definition: charclass.cxx:430
sal_Int16 getType(const OUString &rStr, sal_Int32 nPos) const
Definition: charclass.cxx:357
LanguageTag maLanguageTag
Definition: charclass.hxx:66
CharClass(const CharClass &)=delete
css::i18n::UnicodeScript getScript(const OUString &rStr, sal_Int32 nPos) const
Definition: charclass.cxx:383
bool isDigit(const OUString &rStr, sal_Int32 nPos) const
Definition: charclass.cxx:156
const css::lang::Locale & getLocale(bool bResolveSystem=true) const
int nCount
#define TOOLS_WARN_EXCEPTION(area, stream)
void * p
sal_uInt16 nPos
LanguageTag maLanguageTag
@ Exception
Reference< XComponentContext > getProcessComponentContext()
sal_uInt16 sal_Unicode