LibreOffice Module vcl (master) 1
scrptrun.cxx
Go to the documentation of this file.
1/*
2 *******************************************************************************
3 *
4 * Copyright (c) 1995-2013 International Business Machines Corporation and others
5 *
6 * All rights reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy of
9 * this software and associated documentation files (the "Software"), to deal in
10 * the Software without restriction, including without limitation the rights to
11 * use, copy, modify, merge, publish, distribute, and/or sell copies of the
12 * Software, and to permit persons to whom the Software is furnished to do so,
13 * provided that the above copyright notice(s) and this permission notice appear
14 * in all copies of the Software and that both the above copyright notice(s) and
15 * this permission notice appear in supporting documentation.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
20 * NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
21 * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
22 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
23 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
24 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
25 *
26 * Except as contained in this notice, the name of a copyright holder shall not be
27 * used in advertising or otherwise to promote the sale, use or other dealings in
28 * this Software without prior written authorization of the copyright holder.
29 *
30 *******************************************************************************
31 * file name: scrptrun.cpp
32 *
33 * created on: 10/17/2001
34 * created by: Eric R. Mader
35 */
41#include <sal/config.h>
42
43#include <rtl/character.hxx>
44#include <unicode/uchar.h>
45#include <unicode/utypes.h>
46#include <unicode/uscript.h>
47
48#include <scrptrun.h>
49#include <algorithm>
50
51namespace {
52
53struct PairIndices
54{
55 int8_t ma00[0xff];
56 int8_t ma20[0x7f];
57 int8_t ma30[0x7f];
58
59 PairIndices()
60 {
61 std::fill_n(ma00, 0xff, -1);
62 std::fill_n(ma20, 0x7f, -1);
63 std::fill_n(ma30, 0x7f, -1);
64
65 // characters in the range 0x0000 - 0x007e (inclusive)
66 // ascii paired punctuation
67 ma00[0x28] = 0;
68 ma00[0x29] = 1;
69 ma00[0x3c] = 2;
70 ma00[0x3e] = 3;
71 ma00[0x5b] = 4;
72 ma00[0x5d] = 5;
73 ma00[0x7b] = 6;
74 ma00[0x7d] = 7;
75 // guillemets
76 ma00[0xab] = 8;
77 ma00[0xbb] = 9;
78
79 // characters in the range 0x2000 - 0x207e (inclusive)
80 // general punctuation
81 ma20[0x18] = 10;
82 ma20[0x19] = 11;
83 ma20[0x1c] = 12;
84 ma20[0x1d] = 13;
85 ma20[0x39] = 14;
86 ma20[0x3a] = 15;
87
88 // characters in the range 0x3000 - 0x307e (inclusive)
89 // chinese paired punctuation
90 ma30[0x08] = 16;
91 ma30[0x09] = 17;
92 ma30[0x0a] = 18;
93 ma30[0x0b] = 19;
94 ma30[0x0c] = 20;
95 ma30[0x0d] = 21;
96 ma30[0x0e] = 22;
97 ma30[0x0f] = 23;
98 ma30[0x10] = 24;
99 ma30[0x11] = 25;
100 ma30[0x14] = 26;
101 ma30[0x15] = 27;
102 ma30[0x16] = 28;
103 ma30[0x17] = 29;
104 ma30[0x18] = 30;
105 ma30[0x19] = 31;
106 ma30[0x1a] = 32;
107 ma30[0x1b] = 33;
108 }
109
110 int32_t getPairIndex(UChar32 ch) const
111 {
112 if (ch < 0xff)
113 return ma00[ch];
114 if (ch >= 0x2000 && ch < 0x207f)
115 return ma20[ch - 0x2000];
116 if (ch >= 0x3000 && ch < 0x307f)
117 return ma30[ch - 0x3000];
118 return -1;
119 }
120
121};
122
123UScriptCode getScript(UChar32 ch, UErrorCode* status)
124{
125 // tdf#154549
126 // Make combining marks inherit the script of their bases, regardless of
127 // their own script.
128 if (u_getIntPropertyValue(ch, UCHAR_GENERAL_CATEGORY) == U_NON_SPACING_MARK)
129 return USCRIPT_INHERITED;
130
131 UScriptCode script = uscript_getScript(ch, status);
132 if (U_FAILURE(*status))
133 return script;
134
135 // There are three Unicode script codes for Japanese text, but only one
136 // OpenType script tag, so we want to keep them in one run as splitting is
137 // pointless for the purpose of OpenType shaping.
138 if (script == USCRIPT_KATAKANA || script == USCRIPT_KATAKANA_OR_HIRAGANA)
139 return USCRIPT_HIRAGANA;
140 return script;
141}
142
143}
144
145const PairIndices gPairIndices;
146
147
148namespace vcl {
149
150const char ScriptRun::fgClassID=0;
151
152static bool sameScript(int32_t scriptOne, int32_t scriptTwo)
153{
154 return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo;
155}
156
158{
159 int32_t startSP = parenSP; // used to find the first new open character
160 UErrorCode error = U_ZERO_ERROR;
161
162 // if we've fallen off the end of the text, we're done
163 if (scriptEnd >= charLimit) {
164 return false;
165 }
166
167 scriptCode = USCRIPT_COMMON;
168
170 UChar high = charArray[scriptEnd];
171 UChar32 ch = high;
172
173 // if the character is a high surrogate and it's not the last one
174 // in the text, see if it's followed by a low surrogate
175 if (rtl::isHighSurrogate(high) && scriptEnd < charLimit - 1)
176 {
177 UChar low = charArray[scriptEnd + 1];
178
179 // if it is followed by a low surrogate,
180 // consume it and form the full character
181 if (rtl::isLowSurrogate(low)) {
182 ch = rtl::combineSurrogates(high, low);
183 scriptEnd += 1;
184 }
185 }
186
187 UScriptCode sc = getScript(ch, &error);
188 int32_t pairIndex = gPairIndices.getPairIndex(ch);
189
190 // Paired character handling:
191
192 // if it's an open character, push it onto the stack.
193 // if it's a close character, find the matching open on the
194 // stack, and use that script code. Any non-matching open
195 // characters above it on the stack will be popped.
196 if (pairIndex >= 0) {
197 if ((pairIndex & 1) == 0) {
198 ++parenSP;
199 int32_t nVecSize = parenStack.size();
200 if (parenSP == nVecSize)
201 parenStack.resize(nVecSize + 128);
202 parenStack[parenSP].pairIndex = pairIndex;
203 parenStack[parenSP].scriptCode = scriptCode;
204 } else if (parenSP >= 0) {
205 int32_t pi = pairIndex & ~1;
206
207 while (parenSP >= 0 && parenStack[parenSP].pairIndex != pi) {
208 parenSP -= 1;
209 }
210
211 if (parenSP < startSP) {
212 startSP = parenSP;
213 }
214
215 if (parenSP >= 0) {
216 sc = parenStack[parenSP].scriptCode;
217 }
218 }
219 }
220
221 if (sameScript(scriptCode, sc)) {
222 if (scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
223 scriptCode = sc;
224
225 // now that we have a final script code, fix any open
226 // characters we pushed before we knew the script code.
227 while (startSP < parenSP) {
228 parenStack[++startSP].scriptCode = scriptCode;
229 }
230 }
231
232 // if this character is a close paired character,
233 // pop it from the stack
234 if (pairIndex >= 0 && (pairIndex & 1) != 0 && parenSP >= 0) {
235 parenSP -= 1;
236 /* decrement startSP only if it is >= 0,
237 decrementing it unnecessarily will lead to memory corruption
238 while processing the above while block.
239 e.g. startSP = -4 , parenSP = -1
240 */
241 if (startSP >= 0) {
242 startSP -= 1;
243 }
244 }
245 } else {
246 // if the run broke on a surrogate pair,
247 // end it before the high surrogate
248 if (ch >= 0x10000) {
249 scriptEnd -= 1;
250 }
251
252 break;
253 }
254 }
255
256 return true;
257}
258
259}
sal_Int16 script
UScriptCode scriptCode
Definition: scrptrun.h:104
const UChar * charArray
Definition: scrptrun.h:100
int32_t scriptEnd
Definition: scrptrun.h:103
static const char fgClassID
The address of this static class variable serves as this class's ID for ICU "poor man's RTTI".
Definition: scrptrun.h:113
std::vector< ParenStackEntry > parenStack
Definition: scrptrun.h:106
int32_t charLimit
Definition: scrptrun.h:99
UBool next()
Definition: scrptrun.cxx:157
int32_t scriptStart
Definition: scrptrun.h:102
int32_t parenSP
Definition: scrptrun.h:107
static bool sameScript(int32_t scriptOne, int32_t scriptTwo)
Definition: scrptrun.cxx:152
const PairIndices gPairIndices
Definition: scrptrun.cxx:145