LibreOffice Module i18npool (master)  1
indexentrysupplier_default.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
21 #include <collatorImpl.hxx>
22 #include <localedata.hxx>
23 #include <i18nutil/unicode.hxx>
24 #include <com/sun/star/i18n/CollatorOptions.hpp>
25 #include <o3tl/temporary.hxx>
26 
27 using namespace ::com::sun::star;
28 using namespace ::com::sun::star::uno;
29 using namespace ::com::sun::star::i18n;
30 using namespace ::com::sun::star::lang;
31 
32 namespace i18npool {
33 
35  const css::uno::Reference < css::uno::XComponentContext >& rxContext ) :
36  IndexEntrySupplier_Common(rxContext)
37 {
38  implementationName = "com.sun.star.i18n.IndexEntrySupplier_Unicode";
39  index.reset( new Index(rxContext) );
40 }
41 
43 {
44 }
45 
46 sal_Bool SAL_CALL IndexEntrySupplier_Unicode::loadAlgorithm( const lang::Locale& rLocale,
47  const OUString& rAlgorithm, sal_Int32 collatorOptions )
48 {
49  index->init(rLocale, rAlgorithm);
50  return IndexEntrySupplier_Common::loadAlgorithm(rLocale, rAlgorithm, collatorOptions);
51 }
52 
53 OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexKey( const OUString& rIndexEntry,
54  const OUString& rPhoneticEntry, const lang::Locale& rLocale )
55 {
56  return index->getIndexDescription(getEntry(rIndexEntry, rPhoneticEntry, rLocale));
57 }
58 
60  const OUString& rIndexEntry1, const OUString& rPhoneticEntry1, const lang::Locale& rLocale1,
61  const OUString& rIndexEntry2, const OUString& rPhoneticEntry2, const lang::Locale& rLocale2 )
62 {
63  sal_Int16 result =
64  index->getIndexWeight(getEntry(rIndexEntry1, rPhoneticEntry1, rLocale1)) -
65  index->getIndexWeight(getEntry(rIndexEntry2, rPhoneticEntry2, rLocale2));
66  if (result == 0)
68  rIndexEntry1, rPhoneticEntry1, rLocale1,
69  rIndexEntry2, rPhoneticEntry2, rLocale2);
70  return result > 0 ? 1 : -1;
71 }
72 
73 OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexCharacter( const OUString& rIndexEntry,
74  const lang::Locale& rLocale, const OUString& rAlgorithm ) {
75 
76  if (loadAlgorithm( rLocale, rAlgorithm, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT))
77  return index->getIndexDescription(rIndexEntry);
78  else
79  return IndexEntrySupplier_Common::getIndexCharacter(rIndexEntry, rLocale, rAlgorithm);
80 }
81 
83  : start(0)
84  , end(0)
85  , table(nullptr)
86 {
87 }
88 
90 {
91  if (table) free(table);
92 }
93 
94 void IndexTable::init(sal_Unicode start_, sal_Unicode end_, IndexKey const *keys, sal_Int16 key_count, Index *index)
95 {
96  start=start_;
97  end=end_;
98  table = static_cast<sal_uInt8*>(malloc((end-start+1)*sizeof(sal_uInt8)));
99  for (sal_Unicode i = start; i <= end; i++) {
100  sal_Int16 j;
101  for (j = 0; j < key_count; j++) {
102  if (keys[j].key > 0 && (i == keys[j].key || index->compare(i, keys[j].key) == 0)) {
103  table[i-start] = sal::static_int_cast<sal_uInt8>(j);
104  break;
105  }
106  }
107  if (j == key_count)
108  table[i-start] = 0xFF;
109  }
110 }
111 
112 Index::Index(const css::uno::Reference < css::uno::XComponentContext >& rxContext)
113  : table_count(0)
114  , key_count(0)
115  , mkey_count(0)
116  , collator( new CollatorImpl(rxContext) )
117 {
118 }
119 
121 {
122 }
123 
125 {
126  return sal::static_int_cast<sal_Int16>( collator->compareString(OUString(&c1, 1), OUString(&c2, 1)) );
127 }
128 
129 sal_Int16 Index::getIndexWeight(const OUString& rIndexEntry)
130 {
131  sal_Int32 startPos=0;
132  if (!skipping_chars.isEmpty())
133  while (skipping_chars.indexOf(rIndexEntry[startPos]) >= 0)
134  startPos++;
135  if (mkey_count > 0) {
136  for (sal_Int16 i = 0; i < mkey_count; i++) {
137  sal_Int32 len = keys[mkeys[i]].mkey.getLength();
138  if (collator->compareSubstring(rIndexEntry, startPos, len,
139  keys[mkeys[i]].mkey, 0, len) == 0)
140  return mkeys[i];
141  }
142  }
143  sal_Unicode code = startPos < rIndexEntry.getLength() ? rIndexEntry[startPos] : 0;
144  for (sal_Int16 i = 0; i < table_count; i++) {
145  if (tables[i].start <= code && code <= tables[i].end)
146  return tables[i].table[code-tables[i].start];
147  }
148  return 0xFF;
149 }
150 
151 OUString Index::getIndexDescription(const OUString& rIndexEntry)
152 {
153  sal_Int16 wgt = getIndexWeight(rIndexEntry);
154  if (wgt < MAX_KEYS) {
155  if (!keys[wgt].desc.isEmpty())
156  return keys[wgt].desc;
157  else if (keys[wgt].key > 0)
158  return OUString(&keys[wgt].key, 1);
159  else
160  return keys[wgt].mkey;
161  }
162  sal_uInt32 indexChar=rIndexEntry.iterateCodePoints(&o3tl::temporary(sal_Int32(0)), 0);
163  return OUString(&indexChar, 1);
164 }
165 
166 #define LOCALE_EN lang::Locale("en", OUString(), OUString())
167 
168 void Index::makeIndexKeys(const lang::Locale &rLocale, std::u16string_view algorithm)
169 {
170  OUString keyStr = LocaleDataImpl::get()->getIndexKeysByAlgorithm(rLocale, algorithm);
171 
172  if (keyStr.isEmpty()) {
173  keyStr = LocaleDataImpl::get()->getIndexKeysByAlgorithm(LOCALE_EN,
174  LocaleDataImpl::get()->getDefaultIndexAlgorithm(LOCALE_EN));
175  if (keyStr.isEmpty())
176  throw RuntimeException(
177  "Index::makeIndexKeys: No index keys returned by algorithm");
178  }
179 
180  sal_Int16 len = sal::static_int_cast<sal_Int16>( keyStr.getLength() );
182  skipping_chars=OUString();
183  sal_Int16 i, j;
184 
185  for (i = 0; i < len && key_count < MAX_KEYS; i++)
186  {
187  sal_Unicode curr = keyStr[i];
188  sal_Unicode close = ')';
189 
190  if (unicode::isWhiteSpace(curr))
191  continue;
192 
193  switch(curr) {
194  case u'-': {
195  if (key_count <= 0 || i + 1 >= len)
196  throw RuntimeException("Index::makeIndexKeys: key_count<=0||"
197  "'-' is the last char of KeyString");
198  for (curr = keyStr[++i]; key_count < MAX_KEYS && keys[key_count-1].key < curr; key_count++) {
200  keys[key_count].desc.clear();
201  }
202  break;
203  }
204  case u'[':
205  for (i++; i < len && keyStr[i] != ']'; i++) {
206  if (unicode::isWhiteSpace(keyStr[i])) {
207  continue;
208  } else if (keyStr[i] == '_') {
209  for (curr=keyStr[i-1]+1; curr <= keyStr[i+1]; curr++)
210  skipping_chars+=OUStringChar(curr);
211  i+=2;
212  } else {
213  skipping_chars+=OUStringChar(keyStr[i]);
214  }
215  }
216  break;
217  case u'{':
218  close = '}';
219  [[fallthrough]];
220  case u'(': {
221  if (key_count <= 0)
222  throw RuntimeException("Index::makeIndexKeys: key_count<=0");
223 
224  sal_Int16 end = i+1;
225  for (; end < len && keyStr[end] != close; end++) ;
226 
227  if (end >= len) // no found
228  throw RuntimeException("Index::makeIndexKeys: Closing bracket not found");
229  if (close == ')')
230  keys[key_count-1].desc = keyStr.copy(i+1, end-i-1);
231  else {
232  mkeys[mkey_count++]=key_count;
233  keys[key_count].key = 0;
234  keys[key_count].mkey = keyStr.copy(i+1, end-i-1);
235  keys[key_count++].desc.clear();
236  }
237  i=end+1;
238  break;
239  }
240  default:
241  keys[key_count].key = curr;
242  keys[key_count++].desc.clear();
243  break;
244  }
245  }
246  for (i = 0; i < mkey_count; i++) {
247  for (j=i+1; j < mkey_count; j++) {
248  if (keys[mkeys[i]].mkey.getLength() < keys[mkeys[j]].mkey.getLength()) {
249  sal_Int16 k = mkeys[i];
250  mkeys[i] = mkeys[j];
251  mkeys[j] = k;
252  }
253  }
254  }
255 }
256 
257 void Index::init(const lang::Locale &rLocale, const OUString& algorithm)
258 {
259  makeIndexKeys(rLocale, algorithm);
260 
261  Sequence< UnicodeScript > scriptList = LocaleDataImpl::get()->getUnicodeScripts( rLocale );
262 
263  if (!scriptList.hasElements()) {
264  scriptList = LocaleDataImpl::get()->getUnicodeScripts(LOCALE_EN);
265  if (!scriptList.hasElements())
266  throw RuntimeException("Index::init: scriptList is empty");
267  }
268 
269  table_count = sal::static_int_cast<sal_Int16>( scriptList.getLength() );
270  if (table_count > MAX_TABLES)
271  throw RuntimeException("Index::init: Length of scriptList is too big");
272 
273  collator->loadCollatorAlgorithm(algorithm, rLocale, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT);
274  sal_Int16 j=0;
276  sal_Unicode end = unicode::getUnicodeScriptEnd(UnicodeScript(0));
277  for (sal_Int32 i= (scriptList[0] == UnicodeScript(0)) ? 1 : 0; i< scriptList.getLength(); i++) {
278  if (unicode::getUnicodeScriptStart(scriptList[i]) != end+1) {
279  tables[j++].init(start, end, keys, key_count, this);
280  start = unicode::getUnicodeScriptStart(scriptList[i]);
281  }
282  end = unicode::getUnicodeScriptEnd(scriptList[i]);
283  }
284  tables[j++].init(start, end, keys, key_count, this);
285  table_count = j;
286 }
287 
288 }
289 
290 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const OUString & getEntry(const OUString &IndexEntry, const OUString &PhoneticEntry, const css::lang::Locale &rLocale)
sal_Int16 getIndexWeight(const OUString &rIndexEntry)
static sal_Unicode getUnicodeScriptStart(css::i18n::UnicodeScript type)
exports com.sun.star. table
static bool isWhiteSpace(const sal_Unicode ch)
Index(const css::uno::Reference< css::uno::XComponentContext > &rxContext)
void makeIndexKeys(const css::lang::Locale &rLocale, std::u16string_view algorithm)
sal_Unicode code
OUString getIndexDescription(const OUString &rIndexEntry)
virtual OUString SAL_CALL getIndexKey(const OUString &IndexEntry, const OUString &PhoneticEntry, const css::lang::Locale &rLocale) override
sal_uInt16 sal_Unicode
rtl::Reference< CollatorImpl > collator
sal_Int16 compare(sal_Unicode c1, sal_Unicode c2)
virtual OUString SAL_CALL getIndexCharacter(const OUString &rIndexEntry, const css::lang::Locale &rLocale, const OUString &rSortAlgorithm) override
static rtl::Reference< LocaleDataImpl > get()
Definition: localedata.hxx:62
const UBlockScript scriptList[]
int i
virtual sal_Int16 SAL_CALL compareIndexEntry(const OUString &IndexEntry1, const OUString &PhoneticEntry1, const css::lang::Locale &rLocale1, const OUString &IndexEntry2, const OUString &PhoneticEntry2, const css::lang::Locale &rLocale2) override
#define MAX_KEYS
float u
unsigned char sal_Bool
IndexEntrySupplier_Unicode(const css::uno::Reference< css::uno::XComponentContext > &rxContext)
void init(const css::lang::Locale &rLocale, const OUString &algorithm)
Constant values shared between i18npool and, for example, the number formatter.
enumrange< T >::Iterator end(enumrange< T >)
virtual OUString SAL_CALL getIndexCharacter(const OUString &rIndexEntry, const css::lang::Locale &rLocale, const OUString &rSortAlgorithm) override
constexpr T & temporary(T &&x)
bool close
static sal_Unicode getUnicodeScriptEnd(css::i18n::UnicodeScript type)
IndexTable tables[MAX_TABLES]
unsigned char sal_uInt8
virtual sal_Bool SAL_CALL loadAlgorithm(const css::lang::Locale &rLocale, const OUString &SortAlgorithm, sal_Int32 collatorOptions) override
virtual sal_Int16 SAL_CALL compareIndexEntry(const OUString &IndexEntry1, const OUString &PhoneticEntry1, const css::lang::Locale &rLocale1, const OUString &IndexEntry2, const OUString &PhoneticEntry2, const css::lang::Locale &rLocale2) override
Any result
sal_Int16 mkeys[MAX_KEYS]
#define MAX_TABLES
void init(sal_Unicode start_, sal_Unicode end_, IndexKey const *keys, sal_Int16 key_count, Index *index)
#define LOCALE_EN
virtual sal_Bool SAL_CALL loadAlgorithm(const css::lang::Locale &rLocale, const OUString &SortAlgorithm, sal_Int32 collatorOptions) override