LibreOffice Module i18npool (master) 1
cclass_unicode.hxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19#pragma once
20
21#include <com/sun/star/i18n/XCharacterClassification.hpp>
23#include <com/sun/star/lang/XServiceInfo.hpp>
24#include <rtl/ref.hxx>
25
27#include <memory>
28
29namespace com::sun::star::uno { class XComponentContext; }
30namespace com::sun::star::i18n { class XNativeNumberSupplier; }
31namespace com::sun::star::i18n { class XLocaleData5; }
32namespace i18npool { class Transliteration_casemapping; }
33
34
36enum class ParserFlags : sal_uInt32 {
37 ILLEGAL = 0x00000000,
38 CHAR = 0x00000001,
39 CHAR_BOOL = 0x00000002,
40 CHAR_WORD = 0x00000004,
41 CHAR_VALUE = 0x00000008,
42 CHAR_STRING = 0x00000010,
43 CHAR_DONTCARE = 0x00000020,
44 BOOL = 0x00000040,
45 WORD = 0x00000080,
46 WORD_SEP = 0x00000100,
47 VALUE = 0x00000200,
48 VALUE_SEP = 0x00000400,
49 VALUE_EXP = 0x00000800,
50 VALUE_SIGN = 0x00001000,
51 VALUE_EXP_VALUE = 0x00002000,
52 VALUE_DIGIT = 0x00004000,
53 NAME_SEP = 0x20000000,
54 STRING_SEP = 0x40000000,
55 EXCLUDED = 0x80000000,
56};
57namespace o3tl {
58 template<> struct typed_flags<ParserFlags> : is_typed_flags<ParserFlags, 0xe0007fff> {};
59}
60
61
62namespace i18npool {
63
64class cclass_Unicode final : public cppu::WeakImplHelper < css::i18n::XCharacterClassification, css::lang::XServiceInfo >
65{
66public:
67 cclass_Unicode(css::uno::Reference < css::uno::XComponentContext > xContext );
68 virtual ~cclass_Unicode() override;
69
70 virtual OUString SAL_CALL toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount,
71 const css::lang::Locale& rLocale ) override;
72 virtual OUString SAL_CALL toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount,
73 const css::lang::Locale& rLocale ) override;
74 virtual OUString SAL_CALL toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount,
75 const css::lang::Locale& rLocale ) override;
76 virtual sal_Int16 SAL_CALL getType( const OUString& Text, sal_Int32 nPos ) override;
77 virtual sal_Int16 SAL_CALL getCharacterDirection( const OUString& Text, sal_Int32 nPos ) override;
78 virtual sal_Int16 SAL_CALL getScript( const OUString& Text, sal_Int32 nPos ) override;
79 virtual sal_Int32 SAL_CALL getCharacterType( const OUString& text, sal_Int32 nPos,
80 const css::lang::Locale& rLocale ) override;
81 virtual sal_Int32 SAL_CALL getStringType( const OUString& text, sal_Int32 nPos, sal_Int32 nCount,
82 const css::lang::Locale& rLocale ) override;
83 virtual css::i18n::ParseResult SAL_CALL parseAnyToken( const OUString& Text, sal_Int32 nPos,
84 const css::lang::Locale& rLocale, sal_Int32 nStartCharFlags, const OUString& userDefinedCharactersStart,
85 sal_Int32 nContCharFlags, const OUString& userDefinedCharactersCont ) override;
86 virtual css::i18n::ParseResult SAL_CALL parsePredefinedToken( sal_Int32 nTokenType, const OUString& Text,
87 sal_Int32 nPos, const css::lang::Locale& rLocale, sal_Int32 nStartCharFlags,
88 const OUString& userDefinedCharactersStart, sal_Int32 nContCharFlags,
89 const OUString& userDefinedCharactersCont ) override;
90
91 //XServiceInfo
92 virtual OUString SAL_CALL getImplementationName() override;
93 virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override;
94 virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override;
95
96private:
97 // These are performance sensitive, so we don't want to use locking and switch their state, so just
98 // have multiple copies.
102
103// --- parser specific (implemented in cclass_unicode_parser.cxx) ---
104
106 {
107 ssGetChar, // initial state; -> ssBounce, ssGetValue, ssRewindFromValue, ssGetWord, ssGetWordFirstChar, ssGetString, ssGetBool, ssStop
108 ssGetValue, // -> ssBounce, ssRewindFromValue, ssStopBack, ssGetWord
109 ssGetWord, // -> ssBounce, ssStop, ssStopBack
110 ssGetWordFirstChar, // -> ssBounce, ssGetWord, ssStop, ssStopBack
111 ssGetString, // -> ssBounce, ssStop
112 ssGetBool, // -> ssBounce, ssStop, ssStopBack
113 ssRewindFromValue, // -> ssBounce, ssGetValue, ssGetWord, ssGetWordFirstChar, ssGetString, ssGetBool, ssStop, ssIgnoreLeadingInRewind
114 ssIgnoreLeadingInRewind, // -> ssBounce, ssGetValue, ssRewindFromValue, ssGetWord, ssGetWordFirstChar, ssGetString, ssGetBool, ssStop
115 ssStopBack, // -> ssStop
116 ssBounce, // -> ssStopBack
117 ssStop
118 };
119
120 static const sal_uInt8 nDefCnt;
122 static const sal_Int32 pParseTokensType[];
123
125 static const sal_Unicode* StrChr( const sal_Unicode* pStr, sal_uInt32 c );
126
127
128 css::uno::Reference < css::uno::XComponentContext > m_xContext;
129
131 css::lang::Locale aParserLocale;
132 css::uno::Reference < css::i18n::XLocaleData5 > mxLocaleData;
133 css::uno::Reference < css::i18n::XNativeNumberSupplier > xNatNumSup;
134 OUString aStartChars;
135 OUString aContChars;
136 std::unique_ptr<ParserFlags[]> pTable;
137 std::unique_ptr<ParserFlags[]> pStart;
138 std::unique_ptr<ParserFlags[]> pCont;
139 sal_Int32 nStartTypes;
140 sal_Int32 nContTypes;
144
146 static sal_Int32 getParseTokensType(sal_uInt32 c, bool isFirst);
147
149 ParserFlags getFlags(sal_uInt32 c, ScanState eState);
150
152 ParserFlags getFlagsExtended(sal_uInt32 c, ScanState eState) const;
153
155 ParserFlags getStartCharsFlags( sal_uInt32 c );
156
159
161 void setupParserTable( const css::lang::Locale& rLocale, sal_Int32 startCharTokenType,
162 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
163 const OUString& userDefinedCharactersCont );
164
166 void initParserTable( const css::lang::Locale& rLocale, sal_Int32 startCharTokenType,
167 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
168 const OUString& userDefinedCharactersCont );
169
171 void destroyParserTable();
172
174 void parseText( css::i18n::ParseResult& r, const OUString& rText, sal_Int32 nPos,
175 sal_Int32 nTokenType = 0xffffffff );
176
178 void setupInternational( const css::lang::Locale& rLocale );
179
181 static sal_Int32 getCharType( const OUString& Text, sal_Int32 *nPos, sal_Int32 increment);
182
183};
184
185}
186
187/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
ParserFlags
Flag values of table.
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
cclass_Unicode(css::uno::Reference< css::uno::XComponentContext > xContext)
virtual OUString SAL_CALL getImplementationName() override
virtual OUString SAL_CALL toUpper(const OUString &Text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override
virtual OUString SAL_CALL toLower(const OUString &Text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override
virtual css::i18n::ParseResult SAL_CALL parseAnyToken(const OUString &Text, sal_Int32 nPos, const css::lang::Locale &rLocale, sal_Int32 nStartCharFlags, const OUString &userDefinedCharactersStart, sal_Int32 nContCharFlags, const OUString &userDefinedCharactersCont) override
css::lang::Locale aParserLocale
used for parser only
virtual sal_Bool SAL_CALL supportsService(const OUString &ServiceName) override
virtual OUString SAL_CALL toTitle(const OUString &Text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override
static const ParserFlags pDefaultParserTable[]
static sal_Int32 getCharType(const OUString &Text, sal_Int32 *nPos, sal_Int32 increment)
Implementation of getCharacterType() for one single character.
void parseText(css::i18n::ParseResult &r, const OUString &rText, sal_Int32 nPos, sal_Int32 nTokenType=0xffffffff)
Parse a text.
void setupInternational(const css::lang::Locale &rLocale)
Setup International class, new'ed only if different from existing.
virtual sal_Int32 SAL_CALL getStringType(const OUString &text, sal_Int32 nPos, sal_Int32 nCount, const css::lang::Locale &rLocale) override
static const sal_uInt8 nDefCnt
std::unique_ptr< ParserFlags[]> pTable
void setupParserTable(const css::lang::Locale &rLocale, sal_Int32 startCharTokenType, const OUString &userDefinedCharactersStart, sal_Int32 contCharTokenType, const OUString &userDefinedCharactersCont)
Setup parser table. Calls initParserTable() only if needed.
rtl::Reference< Transliteration_casemapping > transToTitle
ParserFlags getStartCharsFlags(sal_uInt32 c)
Access parser table flags for user defined start characters.
rtl::Reference< Transliteration_casemapping > transToLower
virtual sal_Int16 SAL_CALL getCharacterDirection(const OUString &Text, sal_Int32 nPos) override
virtual css::i18n::ParseResult SAL_CALL parsePredefinedToken(sal_Int32 nTokenType, const OUString &Text, sal_Int32 nPos, const css::lang::Locale &rLocale, sal_Int32 nStartCharFlags, const OUString &userDefinedCharactersStart, sal_Int32 nContCharFlags, const OUString &userDefinedCharactersCont) override
css::uno::Reference< css::i18n::XNativeNumberSupplier > xNatNumSup
virtual sal_Int16 SAL_CALL getType(const OUString &Text, sal_Int32 nPos) override
std::unique_ptr< ParserFlags[]> pCont
ParserFlags getFlagsExtended(sal_uInt32 c, ScanState eState) const
Access parser flags via International and special definitions.
void destroyParserTable()
Destroy parser table.
virtual sal_Int16 SAL_CALL getScript(const OUString &Text, sal_Int32 nPos) override
css::uno::Reference< css::uno::XComponentContext > m_xContext
ParserFlags getFlags(sal_uInt32 c, ScanState eState)
Access parser table flags.
std::unique_ptr< ParserFlags[]> pStart
static const sal_Unicode * StrChr(const sal_Unicode *pStr, sal_uInt32 c)
If and where c occurs in pStr.
virtual sal_Int32 SAL_CALL getCharacterType(const OUString &text, sal_Int32 nPos, const css::lang::Locale &rLocale) override
static sal_Int32 getParseTokensType(sal_uInt32 c, bool isFirst)
Get corresponding KParseTokens flag for a character.
rtl::Reference< Transliteration_casemapping > transToUpper
virtual ~cclass_Unicode() override
static const sal_Int32 pParseTokensType[]
css::uno::Reference< css::i18n::XLocaleData5 > mxLocaleData
void initParserTable(const css::lang::Locale &rLocale, sal_Int32 startCharTokenType, const OUString &userDefinedCharactersStart, sal_Int32 contCharTokenType, const OUString &userDefinedCharactersCont)
Init parser table.
ParserFlags getContCharsFlags(sal_Unicode c)
Access parser table flags for user defined continuation characters.
int nCount
sal_uInt16 nPos
unsigned short WORD
def text(shape, orig_st)
Constant values shared between i18npool and, for example, the number formatter.
const wchar_t *typedef BOOL
unsigned char sal_uInt8
unsigned char sal_Bool
sal_uInt16 sal_Unicode