LibreOffice Module i18nutil (master) 1
scripttypedetector.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <com/sun/star/i18n/CTLScriptType.hpp>
21#include <com/sun/star/i18n/ScriptDirection.hpp>
22#include <com/sun/star/i18n/UnicodeScript.hpp>
24#include <i18nutil/unicode.hxx>
25#include <o3tl/safeint.hxx>
26
27using namespace com::sun::star::i18n;
28
29const sal_Int16 scriptDirection[] = {
30 ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_LEFT_TO_RIGHT = 0,
31 ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT = 1,
32 ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_EUROPEAN_NUMBER = 2,
33 ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_EUROPEAN_NUMBER_SEPARATOR = 3,
34 ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_EUROPEAN_NUMBER_TERMINATOR = 4,
35 ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_ARABIC_NUMBER = 5,
36 ScriptDirection::NEUTRAL, // DirectionProperty_COMMON_NUMBER_SEPARATOR = 6,
37 ScriptDirection::NEUTRAL, // DirectionProperty_BLOCK_SEPARATOR = 7,
38 ScriptDirection::NEUTRAL, // DirectionProperty_SEGMENT_SEPARATOR = 8,
39 ScriptDirection::NEUTRAL, // DirectionProperty_WHITE_SPACE_NEUTRAL = 9,
40 ScriptDirection::NEUTRAL, // DirectionProperty_OTHER_NEUTRAL = 10,
41 ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_LEFT_TO_RIGHT_EMBEDDING = 11,
42 ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_LEFT_TO_RIGHT_OVERRIDE = 12,
43 ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT_ARABIC = 13,
44 ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT_EMBEDDING = 14,
45 ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT_OVERRIDE = 15,
46 ScriptDirection::NEUTRAL, // DirectionProperty_POP_DIRECTIONAL_FORMAT = 16,
47 ScriptDirection::NEUTRAL, // DirectionProperty_DIR_NON_SPACING_MARK = 17,
48 ScriptDirection::NEUTRAL, // DirectionProperty_BOUNDARY_NEUTRAL = 18,
49};
50
51sal_Int16 ScriptTypeDetector::getScriptDirection( std::u16string_view Text, sal_Int32 nPos, sal_Int16 defaultScriptDirection )
52{
54 return (dir == ScriptDirection::NEUTRAL) ? defaultScriptDirection : dir;
55}
56
57// return value '-1' means either the direction on nPos is not same as scriptDirection or nPos is out of range.
58sal_Int32 ScriptTypeDetector::beginOfScriptDirection( std::u16string_view Text, sal_Int32 nPos, sal_Int16 direction )
59{
60 sal_Int32 cPos = nPos;
61
62 if (cPos < static_cast<sal_Int32>(Text.size())) {
63 for (; cPos >= 0; cPos--) {
64 if (direction != getScriptDirection(Text, cPos, direction))
65 break;
66 }
67 }
68 return cPos == nPos ? -1 : cPos + 1;
69}
70
71sal_Int32 ScriptTypeDetector::endOfScriptDirection( std::u16string_view Text, sal_Int32 nPos, sal_Int16 direction )
72{
73 sal_Int32 cPos = nPos;
74 sal_Int32 len = Text.size();
75
76 if (cPos >=0) {
77 for (; cPos < len; cPos++) {
78 if (direction != getScriptDirection(Text, cPos, direction))
79 break;
80 }
81 }
82 return cPos == nPos ? -1 : cPos;
83}
84
85sal_Int16 ScriptTypeDetector::getCTLScriptType( std::u16string_view Text, sal_Int32 nPos )
86{
87 static const ScriptTypeList typeList[] = {
88 { UnicodeScript_kHebrew, UnicodeScript_kHebrew, CTLScriptType::CTL_HEBREW }, // 10
89 { UnicodeScript_kArabic, UnicodeScript_kArabic, CTLScriptType::CTL_ARABIC }, // 11
90 { UnicodeScript_kDevanagari, UnicodeScript_kDevanagari, CTLScriptType::CTL_INDIC }, // 14
91 { UnicodeScript_kThai, UnicodeScript_kThai, CTLScriptType::CTL_THAI }, // 24
92 { UnicodeScript_kScriptCount, UnicodeScript_kScriptCount, CTLScriptType::CTL_UNKNOWN } // 88
93 };
94
96}
97
98// Begin of Script Type is inclusive.
99sal_Int32 ScriptTypeDetector::beginOfCTLScriptType( std::u16string_view Text, sal_Int32 nPos )
100{
101 if (nPos < 0)
102 return 0;
103 else if (o3tl::make_unsigned(nPos) >= Text.size())
104 return Text.size();
105 else {
106 sal_Int16 cType = getCTLScriptType(Text, nPos);
107 for (nPos--; nPos >= 0; nPos--) {
108 if (cType != getCTLScriptType(Text, nPos))
109 break;
110 }
111 return nPos + 1;
112 }
113}
114
115// End of the Script Type is exclusive, the return value pointing to the begin of next script type
116sal_Int32 ScriptTypeDetector::endOfCTLScriptType( std::u16string_view Text, sal_Int32 nPos )
117{
118 if (nPos < 0)
119 return 0;
120 else if (o3tl::make_unsigned(nPos) >= Text.size())
121 return Text.size();
122 else {
123 sal_Int16 cType = getCTLScriptType(Text, nPos);
124 sal_Int32 len = Text.size();
125 for (nPos++; nPos < len; nPos++) {
126 if (cType != getCTLScriptType(Text, nPos))
127 break;
128 }
129 return nPos;
130 }
131}
132
133/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
static sal_Int32 beginOfCTLScriptType(std::u16string_view Text, sal_Int32 nPos)
static sal_Int32 beginOfScriptDirection(std::u16string_view Text, sal_Int32 nPos, sal_Int16 scriptDirection)
static sal_Int16 getScriptDirection(std::u16string_view Text, sal_Int32 nPos, sal_Int16 defaultScriptDirection)
static sal_Int16 getCTLScriptType(std::u16string_view Text, sal_Int32 nPos)
static sal_Int32 endOfScriptDirection(std::u16string_view Text, sal_Int32 nPos, sal_Int16 scriptDirection)
static sal_Int32 endOfCTLScriptType(std::u16string_view Text, sal_Int32 nPos)
static sal_uInt8 getUnicodeDirection(const sal_Unicode ch)
Definition: unicode.cxx:86
static sal_Int16 getUnicodeScriptType(const sal_Unicode ch, const ScriptTypeList *typeList, sal_Int16 unknownType=0)
Definition: unicode.cxx:55
sal_uInt16 nPos
ScriptTypeList const typeList[]
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
const sal_Int16 scriptDirection[]