LibreOffice Module i18npool (master)  1
breakiterator_th.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
22 #include <o3tl/safeint.hxx>
23 #include <breakiterator_th.hxx>
24 #include <wtt.h>
25 
26 using namespace ::com::sun::star;
27 using namespace ::com::sun::star::uno;
28 using namespace ::com::sun::star::i18n;
29 using namespace ::com::sun::star::lang;
30 
31 namespace i18npool {
32 
37 {
38  cBreakIterator = "com.sun.star.i18n.BreakIterator_th";
39  // to improve performance, alloc big enough memory in construct.
40  m_aNextCellIndex.assign(512, 0);
41  m_aPreviousCellIndex.assign(512, 0);
42  lineRule=nullptr;
43 }
44 
49 {
50 }
51 
52 sal_Int32 SAL_CALL BreakIterator_th::previousCharacters( const OUString& Text,
53  sal_Int32 nStartPos, const lang::Locale& rLocale,
54  sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
55 {
56  if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) {
57  nDone = 0;
58  if (nStartPos > 0) { // for others to skip cell.
59  makeIndex(Text, nStartPos);
60 
61  if (m_aNextCellIndex[nStartPos-1] == 0) // not a CTL character
62  return BreakIterator_Unicode::previousCharacters(Text, nStartPos, rLocale,
63  nCharacterIteratorMode, nCount, nDone);
64  else
65  {
66  while (nCount > 0 && m_aNextCellIndex[nStartPos - 1] > 0)
67  {
68  nCount--; nDone++;
69  nStartPos = m_aPreviousCellIndex[nStartPos - 1];
70  }
71  }
72  } else
73  nStartPos = 0;
74  } else { // for BS to delete one char.
75  for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++)
76  Text.iterateCodePoints(&nStartPos, -1);
77  }
78 
79  return nStartPos;
80 }
81 
82 sal_Int32 SAL_CALL BreakIterator_th::nextCharacters(const OUString& Text,
83  sal_Int32 nStartPos, const lang::Locale& rLocale,
84  sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone)
85 {
86  sal_Int32 len = Text.getLength();
87  if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) {
88  nDone = 0;
89  if (nStartPos < len) {
90  makeIndex(Text, nStartPos);
91 
92  if (m_aNextCellIndex[nStartPos] == 0) // not a CTL character
93  return BreakIterator_Unicode::nextCharacters(Text, nStartPos, rLocale,
94  nCharacterIteratorMode, nCount, nDone);
95  else
96  {
97  while (nCount > 0 && m_aNextCellIndex[nStartPos] > 0)
98  {
99  nCount--; nDone++;
100  nStartPos = m_aNextCellIndex[nStartPos];
101  }
102  }
103  } else
104  nStartPos = len;
105  } else {
106  for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++)
107  Text.iterateCodePoints(&nStartPos);
108  }
109 
110  return nStartPos;
111 }
112 
113 // Make sure line is broken on cell boundary if we implement cell iterator.
114 LineBreakResults SAL_CALL BreakIterator_th::getLineBreak(
115  const OUString& Text, sal_Int32 nStartPos,
116  const lang::Locale& rLocale, sal_Int32 nMinBreakPos,
117  const LineBreakHyphenationOptions& hOptions,
118  const LineBreakUserOptions& bOptions )
119 {
120  LineBreakResults lbr = BreakIterator_Unicode::getLineBreak(Text, nStartPos,
121  rLocale, nMinBreakPos, hOptions, bOptions );
122  if (lbr.breakIndex < Text.getLength()) {
123  makeIndex(Text, lbr.breakIndex);
124  lbr.breakIndex = m_aPreviousCellIndex[ lbr.breakIndex ];
125  }
126  return lbr;
127 }
128 
129 #define SARA_AM 0x0E33
130 
131 /*
132  * cell composition states
133  */
134 
135 #define ST_COM 1 // Compose the following character with leading char and display in the same cell
136 #define ST_NXT 2 // display the following character in the next cell
137 #define ST_NDP 3 // non-display
138 
139 const sal_Int16 thaiCompRel[MAX_CT][MAX_CT] = {
140  // C N C L F F F B B B T A A A A A A
141  // T O O V V V V V V D O D D D V V V
142  // R N N 1 2 3 1 2 N 1 2 3 1 2 3
143  // L S E
144  // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
162 
163 };
164 
165 const sal_uInt32 is_ST_COM = (1<<CT_CTRL)|(1<<CT_NON)|(1<<CT_CONS)|(1<<CT_TONE);
166 
167 static sal_uInt16 getCombState(const sal_Unicode *text, sal_Int32 pos)
168 {
169  sal_uInt16 ch1 = getCharType(text[pos]);
170  sal_uInt16 ch2 = getCharType(text[pos+1]);
171 
172  if (text[pos+1] == SARA_AM) {
173  if ((1 << ch1) & is_ST_COM)
174  return ST_COM;
175  else
176  ch2 = CT_AD1;
177  }
178 
179  return thaiCompRel[ch1][ch2];
180 }
181 
182 
183 static sal_Int32 getACell(const sal_Unicode *text, sal_Int32 pos, sal_Int32 len)
184 {
185  sal_uInt32 curr = 1;
186  for (; pos + 1 < len && getCombState(text, pos) == ST_COM; curr++, pos++) {}
187  return curr;
188 }
189 
190 #define is_Thai(c) (0x0e00 <= c && c <= 0x0e7f) // Unicode definition for Thai
191 
192 void BreakIterator_th::makeIndex(const OUString& Text, sal_Int32 const nStartPos)
193 {
194  if (Text != cachedText) {
195  cachedText = Text;
196  if (m_aNextCellIndex.size() < o3tl::make_unsigned(cachedText.getLength())) {
197  m_aNextCellIndex.resize(cachedText.getLength());
198  m_aPreviousCellIndex.resize(cachedText.getLength());
199  }
200  // reset nextCell for new Text
201  m_aNextCellIndex.assign(cachedText.getLength(), 0);
202  }
203  else if (nStartPos >= Text.getLength() || m_aNextCellIndex[nStartPos] > 0
204  || !is_Thai(Text[nStartPos]))
205  return;
206 
207  const sal_Unicode* str = cachedText.getStr();
208  sal_Int32 const len = cachedText.getLength();
209 
210  sal_Int32 startPos = nStartPos;
211  while (startPos > 0 && is_Thai(str[startPos-1])) startPos--;
212  sal_Int32 endPos = nStartPos;
213  while (endPos < len && is_Thai(str[endPos])) endPos++;
214 
215  sal_Int32 start, end, pos;
216  pos = start = end = startPos;
217 
218  assert(endPos >= 0 && o3tl::make_unsigned(endPos) <= m_aNextCellIndex.size());
219  while (pos < endPos) {
220  end += getACell(str, start, endPos);
221  assert(end >= 0 && o3tl::make_unsigned(end) <= m_aNextCellIndex.size());
222  while (pos < end) {
223  m_aNextCellIndex[pos] = end;
224  m_aPreviousCellIndex[pos] = start;
225  pos++;
226  }
227  start = end;
228  }
229 }
230 
231 }
232 
233 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const sal_uInt32 is_ST_COM
#define ST_NDP
virtual sal_Int32 SAL_CALL nextCharacters(const OUString &Text, sal_Int32 nStartPos, const css::lang::Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32 &nDone) override
std::vector< sal_Int32 > m_aPreviousCellIndex
#define CT_TONE
Definition: wtt.h:39
static sal_Int32 getACell(const sal_Unicode *text, sal_Int32 pos, sal_Int32 len)
#define MAX_CT
Definition: wtt.h:47
#define is_Thai(c)
void makeIndex(const OUString &text, sal_Int32 pos)
sal_uInt16 sal_Unicode
size_t pos
static constexpr sal_uInt16 getCharType(sal_Unicode x)
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
Constant values shared between i18npool and, for example, the number formatter.
#define CT_CONS
Definition: wtt.h:31
#define SARA_AM
virtual css::i18n::LineBreakResults SAL_CALL getLineBreak(const OUString &Text, sal_Int32 nStartPos, const css::lang::Locale &nLocale, sal_Int32 nMinBreakPos, const css::i18n::LineBreakHyphenationOptions &hOptions, const css::i18n::LineBreakUserOptions &bOptions) override
enumrange< T >::Iterator end(enumrange< T >)
virtual ~BreakIterator_th() override
Deconstructor.
virtual css::i18n::LineBreakResults SAL_CALL getLineBreak(const OUString &Text, sal_Int32 nStartPos, const css::lang::Locale &nLocale, sal_Int32 nMinBreakPos, const css::i18n::LineBreakHyphenationOptions &hOptions, const css::i18n::LineBreakUserOptions &bOptions) override
#define ST_NXT
virtual sal_Int32 SAL_CALL previousCharacters(const OUString &Text, sal_Int32 nStartPos, const css::lang::Locale &nLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32 &nDone) override
#define ST_COM
const sal_Int16 thaiCompRel[MAX_CT][MAX_CT]
std::vector< sal_Int32 > m_aNextCellIndex
virtual sal_Int32 SAL_CALL previousCharacters(const OUString &text, sal_Int32 start, const css::lang::Locale &nLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 count, sal_Int32 &nDone) override
virtual sal_Int32 SAL_CALL nextCharacters(const OUString &text, sal_Int32 start, const css::lang::Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 count, sal_Int32 &nDone) override
#define CT_NON
Definition: wtt.h:30
static sal_uInt16 getCombState(const sal_Unicode *text, sal_Int32 pos)
#define CT_CTRL
Definition: wtt.h:29
#define CT_AD1
Definition: wtt.h:40