LibreOffice Module i18npool (master) 1
breakiterator_th.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20
21#include <com/sun/star/i18n/CharacterIteratorMode.hpp>
22#include <o3tl/safeint.hxx>
23#include <breakiterator_th.hxx>
24#include <wtt.h>
25
26using namespace ::com::sun::star;
27using namespace ::com::sun::star::uno;
28using namespace ::com::sun::star::i18n;
29using namespace ::com::sun::star::lang;
30
31namespace i18npool {
32
37{
38 cBreakIterator = "com.sun.star.i18n.BreakIterator_th";
39 // to improve performance, alloc big enough memory in construct.
40 m_aNextCellIndex.assign(512, 0);
41 m_aPreviousCellIndex.assign(512, 0);
42 lineRule=nullptr;
43}
44
49{
50}
51
52sal_Int32 SAL_CALL BreakIterator_th::previousCharacters( const OUString& Text,
53 sal_Int32 nStartPos, const lang::Locale& rLocale,
54 sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
55{
56 if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) {
57 nDone = 0;
58 if (nStartPos > 0) { // for others to skip cell.
59 makeIndex(Text, nStartPos);
60
61 if (m_aNextCellIndex[nStartPos-1] == 0) // not a CTL character
62 return BreakIterator_Unicode::previousCharacters(Text, nStartPos, rLocale,
63 nCharacterIteratorMode, nCount, nDone);
64 else
65 {
66 while (nCount > 0 && m_aNextCellIndex[nStartPos - 1] > 0)
67 {
68 nCount--; nDone++;
69 nStartPos = m_aPreviousCellIndex[nStartPos - 1];
70 }
71 }
72 } else
73 nStartPos = 0;
74 } else { // for BS to delete one char.
75 for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++)
76 Text.iterateCodePoints(&nStartPos, -1);
77 }
78
79 return nStartPos;
80}
81
82sal_Int32 SAL_CALL BreakIterator_th::nextCharacters(const OUString& Text,
83 sal_Int32 nStartPos, const lang::Locale& rLocale,
84 sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone)
85{
86 sal_Int32 len = Text.getLength();
87 if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) {
88 nDone = 0;
89 if (nStartPos < len) {
90 makeIndex(Text, nStartPos);
91
92 if (m_aNextCellIndex[nStartPos] == 0) // not a CTL character
93 return BreakIterator_Unicode::nextCharacters(Text, nStartPos, rLocale,
94 nCharacterIteratorMode, nCount, nDone);
95 else
96 {
97 while (nCount > 0 && m_aNextCellIndex[nStartPos] > 0)
98 {
99 nCount--; nDone++;
100 nStartPos = m_aNextCellIndex[nStartPos];
101 }
102 }
103 } else
104 nStartPos = len;
105 } else {
106 for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++)
107 Text.iterateCodePoints(&nStartPos);
108 }
109
110 return nStartPos;
111}
112
113// Make sure line is broken on cell boundary if we implement cell iterator.
114LineBreakResults SAL_CALL BreakIterator_th::getLineBreak(
115 const OUString& Text, sal_Int32 nStartPos,
116 const lang::Locale& rLocale, sal_Int32 nMinBreakPos,
117 const LineBreakHyphenationOptions& hOptions,
118 const LineBreakUserOptions& bOptions )
119{
120 LineBreakResults lbr = BreakIterator_Unicode::getLineBreak(Text, nStartPos,
121 rLocale, nMinBreakPos, hOptions, bOptions );
122 if (lbr.breakIndex < Text.getLength()) {
123 makeIndex(Text, lbr.breakIndex);
124 lbr.breakIndex = m_aPreviousCellIndex[ lbr.breakIndex ];
125 }
126 return lbr;
127}
128
129#define SARA_AM 0x0E33
130
131/*
132 * cell composition states
133 */
134
135#define ST_COM 1 // Compose the following character with leading char and display in the same cell
136#define ST_NXT 2 // display the following character in the next cell
137#define ST_NDP 3 // non-display
138
139const sal_Int16 thaiCompRel[MAX_CT][MAX_CT] = {
140 // C N C L F F F B B B T A A A A A A
141 // T O O V V V V V V D O D D D V V V
142 // R N N 1 2 3 1 2 N 1 2 3 1 2 3
143 // L S E
144 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
162
163};
164
165const sal_uInt32 is_ST_COM = (1<<CT_CTRL)|(1<<CT_NON)|(1<<CT_CONS)|(1<<CT_TONE);
166
167static sal_uInt16 getCombState(const sal_Unicode *text, sal_Int32 pos)
168{
169 sal_uInt16 ch1 = getCharType(text[pos]);
170 sal_uInt16 ch2 = getCharType(text[pos+1]);
171
172 if (text[pos+1] == SARA_AM) {
173 if ((1 << ch1) & is_ST_COM)
174 return ST_COM;
175 else
176 ch2 = CT_AD1;
177 }
178
179 return thaiCompRel[ch1][ch2];
180}
181
182
183static sal_Int32 getACell(const sal_Unicode *text, sal_Int32 pos, sal_Int32 len)
184{
185 sal_uInt32 curr = 1;
186 for (; pos + 1 < len && getCombState(text, pos) == ST_COM; curr++, pos++) {}
187 return curr;
188}
189
190#define is_Thai(c) (0x0e00 <= c && c <= 0x0e7f) // Unicode definition for Thai
191
192void BreakIterator_th::makeIndex(const OUString& Text, sal_Int32 const nStartPos)
193{
194 if (Text != cachedText) {
196 if (m_aNextCellIndex.size() < o3tl::make_unsigned(cachedText.getLength())) {
197 m_aNextCellIndex.resize(cachedText.getLength());
198 m_aPreviousCellIndex.resize(cachedText.getLength());
199 }
200 // reset nextCell for new Text
201 m_aNextCellIndex.assign(cachedText.getLength(), 0);
202 }
203 else if (nStartPos >= Text.getLength() || m_aNextCellIndex[nStartPos] > 0
204 || !is_Thai(Text[nStartPos]))
205 return;
206
207 const sal_Unicode* str = cachedText.getStr();
208 sal_Int32 const len = cachedText.getLength();
209
210 sal_Int32 startPos = nStartPos;
211 while (startPos > 0 && is_Thai(str[startPos-1])) startPos--;
212 sal_Int32 endPos = nStartPos;
213 while (endPos < len && is_Thai(str[endPos])) endPos++;
214
215 sal_Int32 start, end, pos;
216 pos = start = end = startPos;
217
218 assert(endPos >= 0 && o3tl::make_unsigned(endPos) <= m_aNextCellIndex.size());
219 while (pos < endPos) {
220 end += getACell(str, start, endPos);
221 assert(end >= 0 && o3tl::make_unsigned(end) <= m_aNextCellIndex.size());
222 while (pos < end) {
225 pos++;
226 }
227 start = end;
228 }
229}
230
231}
232
233/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
#define ST_COM
#define SARA_AM
#define is_Thai(c)
#define ST_NDP
#define ST_NXT
virtual sal_Int32 SAL_CALL previousCharacters(const OUString &Text, sal_Int32 nStartPos, const css::lang::Locale &nLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32 &nDone) override
virtual sal_Int32 SAL_CALL nextCharacters(const OUString &Text, sal_Int32 nStartPos, const css::lang::Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32 &nDone) override
virtual css::i18n::LineBreakResults SAL_CALL getLineBreak(const OUString &Text, sal_Int32 nStartPos, const css::lang::Locale &nLocale, sal_Int32 nMinBreakPos, const css::i18n::LineBreakHyphenationOptions &hOptions, const css::i18n::LineBreakUserOptions &bOptions) override
virtual sal_Int32 SAL_CALL previousCharacters(const OUString &text, sal_Int32 start, const css::lang::Locale &nLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 count, sal_Int32 &nDone) override
virtual ~BreakIterator_th() override
Deconstructor.
virtual sal_Int32 SAL_CALL nextCharacters(const OUString &text, sal_Int32 start, const css::lang::Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 count, sal_Int32 &nDone) override
void makeIndex(const OUString &text, sal_Int32 pos)
std::vector< sal_Int32 > m_aPreviousCellIndex
std::vector< sal_Int32 > m_aNextCellIndex
virtual css::i18n::LineBreakResults SAL_CALL getLineBreak(const OUString &Text, sal_Int32 nStartPos, const css::lang::Locale &nLocale, sal_Int32 nMinBreakPos, const css::i18n::LineBreakHyphenationOptions &hOptions, const css::i18n::LineBreakUserOptions &bOptions) override
int nCount
def text(shape, orig_st)
Constant values shared between i18npool and, for example, the number formatter.
static sal_uInt16 getCombState(const sal_Unicode *text, sal_Int32 pos)
const sal_Int16 thaiCompRel[MAX_CT][MAX_CT]
static constexpr sal_uInt16 getCharType(sal_Unicode x)
static sal_Int32 getACell(const sal_Unicode *text, sal_Int32 pos, sal_Int32 len)
const sal_uInt32 is_ST_COM
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
end
sal_uInt16 sal_Unicode
size_t pos
#define CT_NON
Definition: wtt.h:30
#define CT_CONS
Definition: wtt.h:31
#define MAX_CT
Definition: wtt.h:47
#define CT_AD1
Definition: wtt.h:40
#define CT_CTRL
Definition: wtt.h:29
#define CT_TONE
Definition: wtt.h:39