LibreOffice Module vcl (master)  1
scrptrun.cxx
Go to the documentation of this file.
1 /*
2  *******************************************************************************
3  *
4  * Copyright (c) 1995-2013 International Business Machines Corporation and others
5  *
6  * All rights reserved.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy of
9  * this software and associated documentation files (the "Software"), to deal in
10  * the Software without restriction, including without limitation the rights to
11  * use, copy, modify, merge, publish, distribute, and/or sell copies of the
12  * Software, and to permit persons to whom the Software is furnished to do so,
13  * provided that the above copyright notice(s) and this permission notice appear
14  * in all copies of the Software and that both the above copyright notice(s) and
15  * this permission notice appear in supporting documentation.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
20  * NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
21  * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
22  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
23  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
24  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
25  *
26  * Except as contained in this notice, the name of a copyright holder shall not be
27  * used in advertising or otherwise to promote the sale, use or other dealings in
28  * this Software without prior written authorization of the copyright holder.
29  *
30  *******************************************************************************
31  * file name: scrptrun.cpp
32  *
33  * created on: 10/17/2001
34  * created by: Eric R. Mader
35  */
40 #include <unicode/utypes.h>
41 #include <unicode/uscript.h>
42 
43 #include <scrptrun.h>
44 #include <algorithm>
45 
46 namespace {
47 
48 struct PairIndices
49 {
50  int8_t ma00[0xff];
51  int8_t ma20[0x7f];
52  int8_t ma30[0x7f];
53 
54  PairIndices()
55  {
56  std::fill_n(ma00, 0xff, -1);
57  std::fill_n(ma20, 0x7f, -1);
58  std::fill_n(ma30, 0x7f, -1);
59 
60  // characters in the range 0x0000 - 0x007e (inclusive)
61  // ascii paired punctuation
62  ma00[0x28] = 0;
63  ma00[0x29] = 1;
64  ma00[0x3c] = 2;
65  ma00[0x3e] = 3;
66  ma00[0x5b] = 4;
67  ma00[0x5d] = 5;
68  ma00[0x7b] = 6;
69  ma00[0x7d] = 7;
70  // guillemets
71  ma00[0xab] = 8;
72  ma00[0xbb] = 9;
73 
74  // characters in the range 0x2000 - 0x207e (inclusive)
75  // general punctuation
76  ma20[0x18] = 10;
77  ma20[0x19] = 11;
78  ma20[0x1c] = 12;
79  ma20[0x1d] = 13;
80  ma20[0x39] = 14;
81  ma20[0x3a] = 15;
82 
83  // characters in the range 0x3000 - 0x307e (inclusive)
84  // chinese paired punctuation
85  ma30[0x08] = 16;
86  ma30[0x09] = 17;
87  ma30[0x0a] = 18;
88  ma30[0x0b] = 19;
89  ma30[0x0c] = 20;
90  ma30[0x0d] = 21;
91  ma30[0x0e] = 22;
92  ma30[0x0f] = 23;
93  ma30[0x10] = 24;
94  ma30[0x11] = 25;
95  ma30[0x14] = 26;
96  ma30[0x15] = 27;
97  ma30[0x16] = 28;
98  ma30[0x17] = 29;
99  ma30[0x18] = 30;
100  ma30[0x19] = 31;
101  ma30[0x1a] = 32;
102  ma30[0x1b] = 33;
103  }
104 
105  int32_t getPairIndex(UChar32 ch) const
106  {
107  if (ch < 0xff)
108  return ma00[ch];
109  if (ch >= 0x2000 && ch < 0x207f)
110  return ma20[ch - 0x2000];
111  if (ch >= 0x3000 && ch < 0x307f)
112  return ma30[ch - 0x3000];
113  return -1;
114  }
115 
116 };
117 
118 // There are three Unicode script codes for Japanese text, but only one
119 // OpenType script tag, so we want to keep them in one run as splitting is
120 // pointless for the purpose of OpenType shaping.
121 UScriptCode getScript(UChar32 ch, UErrorCode* status)
122 {
123  UScriptCode script = uscript_getScript(ch, status);
124  if (U_FAILURE(*status))
125  return script;
126  if (script == USCRIPT_KATAKANA || script == USCRIPT_KATAKANA_OR_HIRAGANA)
127  return USCRIPT_HIRAGANA;
128  return script;
129 }
130 
131 }
132 
133 static const PairIndices gPairIndices;
134 
135 
136 namespace vcl {
137 
138 const char ScriptRun::fgClassID=0;
139 
140 static UBool sameScript(int32_t scriptOne, int32_t scriptTwo)
141 {
142  return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo;
143 }
144 
146 {
147  int32_t startSP = parenSP; // used to find the first new open character
148  UErrorCode error = U_ZERO_ERROR;
149 
150  // if we've fallen off the end of the text, we're done
151  if (scriptEnd >= charLimit) {
152  return false;
153  }
154 
155  scriptCode = USCRIPT_COMMON;
156 
158  UChar high = charArray[scriptEnd];
159  UChar32 ch = high;
160 
161  // if the character is a high surrogate and it's not the last one
162  // in the text, see if it's followed by a low surrogate
163  if (high >= 0xD800 && high <= 0xDBFF && scriptEnd < charLimit - 1)
164  {
165  UChar low = charArray[scriptEnd + 1];
166 
167  // if it is followed by a low surrogate,
168  // consume it and form the full character
169  if (low >= 0xDC00 && low <= 0xDFFF) {
170  ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
171  scriptEnd += 1;
172  }
173  }
174 
175  UScriptCode sc = getScript(ch, &error);
176  int32_t pairIndex = gPairIndices.getPairIndex(ch);
177 
178  // Paired character handling:
179 
180  // if it's an open character, push it onto the stack.
181  // if it's a close character, find the matching open on the
182  // stack, and use that script code. Any non-matching open
183  // characters above it on the stack will be popped.
184  if (pairIndex >= 0) {
185  if ((pairIndex & 1) == 0) {
186  ++parenSP;
187  int32_t nVecSize = parenStack.size();
188  if (parenSP == nVecSize)
189  parenStack.resize(nVecSize + 128);
190  parenStack[parenSP].pairIndex = pairIndex;
191  parenStack[parenSP].scriptCode = scriptCode;
192  } else if (parenSP >= 0) {
193  int32_t pi = pairIndex & ~1;
194 
195  while (parenSP >= 0 && parenStack[parenSP].pairIndex != pi) {
196  parenSP -= 1;
197  }
198 
199  if (parenSP < startSP) {
200  startSP = parenSP;
201  }
202 
203  if (parenSP >= 0) {
204  sc = parenStack[parenSP].scriptCode;
205  }
206  }
207  }
208 
209  if (sameScript(scriptCode, sc)) {
210  if (scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
211  scriptCode = sc;
212 
213  // now that we have a final script code, fix any open
214  // characters we pushed before we knew the script code.
215  while (startSP < parenSP) {
216  parenStack[++startSP].scriptCode = scriptCode;
217  }
218  }
219 
220  // if this character is a close paired character,
221  // pop it from the stack
222  if (pairIndex >= 0 && (pairIndex & 1) != 0 && parenSP >= 0) {
223  parenSP -= 1;
224  /* decrement startSP only if it is >= 0,
225  decrementing it unnecessarily will lead to memory corruption
226  while processing the above while block.
227  e.g. startSP = -4 , parenSP = -1
228  */
229  if (startSP >= 0) {
230  startSP -= 1;
231  }
232  }
233  } else {
234  // if the run broke on a surrogate pair,
235  // end it before the high surrogate
236  if (ch >= 0x10000) {
237  scriptEnd -= 1;
238  }
239 
240  break;
241  }
242  }
243 
244  return true;
245 }
246 
247 }
UBool next()
Definition: scrptrun.cxx:145
int32_t scriptEnd
Definition: scrptrun.h:99
sal_Int16 script
const UChar * charArray
Definition: scrptrun.h:96
int32_t scriptStart
Definition: scrptrun.h:98
std::vector< ParenStackEntry > parenStack
Definition: scrptrun.h:102
int32_t parenSP
Definition: scrptrun.h:103
int32_t charLimit
Definition: scrptrun.h:95
static const PairIndices gPairIndices
Definition: scrptrun.cxx:133
static UBool sameScript(int32_t scriptOne, int32_t scriptTwo)
Definition: scrptrun.cxx:140
UScriptCode scriptCode
Definition: scrptrun.h:100
static const char fgClassID
The address of this static class variable serves as this class's ID for ICU "poor man's RTTI"...
Definition: scrptrun.h:109