LibreOffice Module i18nutil (master)  1
unicode.hxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 #ifndef INCLUDED_I18NUTIL_UNICODE_HXX
20 #define INCLUDED_I18NUTIL_UNICODE_HXX
21 
22 #include <com/sun/star/i18n/UnicodeScript.hpp>
23 #include <sal/types.h>
24 #include <rtl/ustrbuf.hxx>
25 #include <unicode/uscript.h>
27 
28 class LanguageTag;
29 
31 {
32  css::i18n::UnicodeScript from;
33  css::i18n::UnicodeScript to;
34  sal_Int16 value;
35 };
36 
38 {
39 public:
40  static sal_Int16 getUnicodeType(const sal_Unicode ch);
41  static sal_Int16 getUnicodeScriptType(const sal_Unicode ch, const ScriptTypeList* typeList,
42  sal_Int16 unknownType = 0);
43  static sal_Unicode getUnicodeScriptStart(css::i18n::UnicodeScript type);
44  static sal_Unicode getUnicodeScriptEnd(css::i18n::UnicodeScript type);
45  static sal_uInt8 getUnicodeDirection(const sal_Unicode ch);
46  static bool isControl(const sal_Unicode ch);
47  static bool isAlpha(const sal_Unicode ch);
48  static bool isSpace(const sal_Unicode ch);
49  static bool isWhiteSpace(const sal_Unicode ch);
50 
57  static bool isIVSSelector(sal_uInt32 nCode)
58  {
59  return (nCode >= 0xFE00 && nCode <= 0xFE0F) // Variation Selectors block
60  || (nCode >= 0xE0100 && nCode <= 0xE01EF); // Variation Selectors Supplement block
61  }
62 
69  static bool isCJKIVSCharacter(sal_uInt32 nCode)
70  {
71  return (nCode >= 0x4E00 && nCode <= 0x9FFF) // CJK Unified Ideographs
72  || (nCode >= 0x3400 && nCode <= 0x4DBF) // CJK Unified Ideographs Extension A
73  || (nCode >= 0x20000 && nCode <= 0x2A6DF); // CJK Unified Ideographs Extension B
74  }
75 
76  //Map an ISO 15924 script code to Latin/Asian/Complex/Weak
77  static sal_Int16 getScriptClassFromUScriptCode(UScriptCode eScript);
78 
79  //Return a language that can be written in a given ISO 15924 script code
80  static OString getExemplarLanguageForUScriptCode(UScriptCode eScript);
81 
82  //Format a number as a percentage according to the rules of the given
83  //language, e.g. 100 -> "100%" for en-US vs "100 %" for de-DE
84  static OUString formatPercent(double dNumber, const LanguageTag& rLangTag);
85 };
86 
87 /*
88  Toggle between a character and its Unicode Notation.
89  -implements the concept found in Microsoft Word's Alt-X
90  -accepts sequences of up to 8 hex characters and converts into the corresponding Unicode Character
91  -example: 0000A78c or 2bc
92  -accepts sequences of up to 256 characters in Unicode notation
93  -example: U+00000065u+0331u+308
94  -handles complex characters (with combining elements) and the all of the Unicode planes.
95 */
97 {
98 private:
99  OUStringBuffer maInput;
100  OUStringBuffer maUtf16;
101  OUStringBuffer maCombining;
102  bool mbAllowMoreChars = true;
103  bool mbRequiresU = false;
104  bool mbIsHexString = false;
105 
106 public:
112  bool AllowMoreInput(sal_Unicode uChar);
113 
119  OUString StringToReplace();
120  OUString ReplacementString();
121 
126  sal_uInt32 CharsToDelete();
127 };
128 
129 #endif
130 
131 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
sal_Int16 value
Definition: unicode.hxx:34
#define I18NUTIL_DLLPUBLIC
sal_uInt16 sal_Unicode
static bool isCJKIVSCharacter(sal_uInt32 nCode)
Check for base characters of a CJK ideographic variation sequence (IVS)
Definition: unicode.hxx:69
css::i18n::UnicodeScript from
Definition: unicode.hxx:32
OUStringBuffer maInput
Definition: unicode.hxx:99
static bool isIVSSelector(sal_uInt32 nCode)
Check for Unicode variation sequence selectors.
Definition: unicode.hxx:57
css::i18n::UnicodeScript to
Definition: unicode.hxx:33
bool isAlpha(sal_Unicode c, bool bCompatible)
unsigned char sal_uInt8
OUStringBuffer maCombining
Definition: unicode.hxx:101
ResultType type
ScriptTypeList const typeList[]
OUStringBuffer maUtf16
Definition: unicode.hxx:100