LibreOffice Module i18nutil (master)  1
scripttypedetector.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <com/sun/star/i18n/CTLScriptType.hpp>
21 #include <com/sun/star/i18n/ScriptDirection.hpp>
22 #include <com/sun/star/i18n/UnicodeScript.hpp>
24 #include <i18nutil/unicode.hxx>
25 
26 using namespace com::sun::star::i18n;
27 
28 const sal_Int16 scriptDirection[] = {
29  ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_LEFT_TO_RIGHT = 0,
30  ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT = 1,
31  ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_EUROPEAN_NUMBER = 2,
32  ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_EUROPEAN_NUMBER_SEPARATOR = 3,
33  ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_EUROPEAN_NUMBER_TERMINATOR = 4,
34  ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_ARABIC_NUMBER = 5,
35  ScriptDirection::NEUTRAL, // DirectionProperty_COMMON_NUMBER_SEPARATOR = 6,
36  ScriptDirection::NEUTRAL, // DirectionProperty_BLOCK_SEPARATOR = 7,
37  ScriptDirection::NEUTRAL, // DirectionProperty_SEGMENT_SEPARATOR = 8,
38  ScriptDirection::NEUTRAL, // DirectionProperty_WHITE_SPACE_NEUTRAL = 9,
39  ScriptDirection::NEUTRAL, // DirectionProperty_OTHER_NEUTRAL = 10,
40  ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_LEFT_TO_RIGHT_EMBEDDING = 11,
41  ScriptDirection::LEFT_TO_RIGHT, // DirectionProperty_LEFT_TO_RIGHT_OVERRIDE = 12,
42  ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT_ARABIC = 13,
43  ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT_EMBEDDING = 14,
44  ScriptDirection::RIGHT_TO_LEFT, // DirectionProperty_RIGHT_TO_LEFT_OVERRIDE = 15,
45  ScriptDirection::NEUTRAL, // DirectionProperty_POP_DIRECTIONAL_FORMAT = 16,
46  ScriptDirection::NEUTRAL, // DirectionProperty_DIR_NON_SPACING_MARK = 17,
47  ScriptDirection::NEUTRAL, // DirectionProperty_BOUNDARY_NEUTRAL = 18,
48 };
49 
50 sal_Int16 ScriptTypeDetector::getScriptDirection( std::u16string_view Text, sal_Int32 nPos, sal_Int16 defaultScriptDirection )
51 {
52  sal_Int16 dir = scriptDirection[unicode::getUnicodeDirection(Text[nPos])];
53  return (dir == ScriptDirection::NEUTRAL) ? defaultScriptDirection : dir;
54 }
55 
56 // return value '-1' means either the direction on nPos is not same as scriptDirection or nPos is out of range.
57 sal_Int32 ScriptTypeDetector::beginOfScriptDirection( const OUString& Text, sal_Int32 nPos, sal_Int16 direction )
58 {
59  sal_Int32 cPos = nPos;
60 
61  if (cPos < Text.getLength()) {
62  for (; cPos >= 0; cPos--) {
63  if (direction != getScriptDirection(Text, cPos, direction))
64  break;
65  }
66  }
67  return cPos == nPos ? -1 : cPos + 1;
68 }
69 
70 sal_Int32 ScriptTypeDetector::endOfScriptDirection( const OUString& Text, sal_Int32 nPos, sal_Int16 direction )
71 {
72  sal_Int32 cPos = nPos;
73  sal_Int32 len = Text.getLength();
74 
75  if (cPos >=0) {
76  for (; cPos < len; cPos++) {
77  if (direction != getScriptDirection(Text, cPos, direction))
78  break;
79  }
80  }
81  return cPos == nPos ? -1 : cPos;
82 }
83 
84 sal_Int16 ScriptTypeDetector::getCTLScriptType( std::u16string_view Text, sal_Int32 nPos )
85 {
86  static const ScriptTypeList typeList[] = {
87  { UnicodeScript_kHebrew, UnicodeScript_kHebrew, CTLScriptType::CTL_HEBREW }, // 10
88  { UnicodeScript_kArabic, UnicodeScript_kArabic, CTLScriptType::CTL_ARABIC }, // 11
89  { UnicodeScript_kDevanagari, UnicodeScript_kDevanagari, CTLScriptType::CTL_INDIC }, // 14
90  { UnicodeScript_kThai, UnicodeScript_kThai, CTLScriptType::CTL_THAI }, // 24
91  { UnicodeScript_kScriptCount, UnicodeScript_kScriptCount, CTLScriptType::CTL_UNKNOWN } // 88
92  };
93 
94  return unicode::getUnicodeScriptType(Text[nPos], typeList);
95 }
96 
97 // Begin of Script Type is inclusive.
98 sal_Int32 ScriptTypeDetector::beginOfCTLScriptType( const OUString& Text, sal_Int32 nPos )
99 {
100  if (nPos < 0)
101  return 0;
102  else if (nPos >= Text.getLength())
103  return Text.getLength();
104  else {
105  sal_Int16 cType = getCTLScriptType(Text, nPos);
106  for (nPos--; nPos >= 0; nPos--) {
107  if (cType != getCTLScriptType(Text, nPos))
108  break;
109  }
110  return nPos + 1;
111  }
112 }
113 
114 // End of the Script Type is exclusive, the return value pointing to the begin of next script type
115 sal_Int32 ScriptTypeDetector::endOfCTLScriptType( const OUString& Text, sal_Int32 nPos )
116 {
117  if (nPos < 0)
118  return 0;
119  else if (nPos >= Text.getLength())
120  return Text.getLength();
121  else {
122  sal_Int16 cType = getCTLScriptType(Text, nPos);
123  sal_Int32 len = Text.getLength();
124  for (nPos++; nPos < len; nPos++) {
125  if (cType != getCTLScriptType(Text, nPos))
126  break;
127  }
128  return nPos;
129  }
130 }
131 
132 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
static sal_uInt8 getUnicodeDirection(const sal_Unicode ch)
Definition: unicode.cxx:84
static sal_Int16 getUnicodeScriptType(const sal_Unicode ch, const ScriptTypeList *typeList, sal_Int16 unknownType=0)
Definition: unicode.cxx:53
static sal_Int16 getScriptDirection(std::u16string_view Text, sal_Int32 nPos, sal_Int16 defaultScriptDirection)
static sal_Int16 getCTLScriptType(std::u16string_view Text, sal_Int32 nPos)
static sal_Int32 endOfCTLScriptType(const OUString &Text, sal_Int32 nPos)
static sal_Int32 beginOfScriptDirection(const OUString &Text, sal_Int32 nPos, sal_Int16 scriptDirection)
static sal_Int32 endOfScriptDirection(const OUString &Text, sal_Int32 nPos, sal_Int16 scriptDirection)
const sal_Int16 scriptDirection[]
static sal_Int32 beginOfCTLScriptType(const OUString &Text, sal_Int32 nPos)
ScriptTypeList const typeList[]