20#include <config_folders.h>
23#include <osl/module.h>
24#include <osl/mutex.hxx>
25#include <rtl/bootstrap.hxx>
26#include <com/sun/star/i18n/ScriptType.hpp>
27#include <com/sun/star/i18n/WordType.hpp>
29#include <unicode/uchar.h>
37#ifdef DICT_JA_ZH_IN_DATAFILE
39#elif !defined DISABLE_DYNLOADING
48sal_Int16* getIndex1_ja();
49sal_Int32* getIndex2_ja();
50sal_Int32* getLenArray_ja();
54sal_Int16* getIndex1_zh();
55sal_Int32* getIndex2_zh();
56sal_Int32* getLenArray_zh();
64 japaneseWordBreak( false )
65#ifdef DICT_JA_ZH_IN_DATAFILE
66 , m_aFileHandle(nullptr),
72#ifdef DICT_JA_ZH_IN_DATAFILE
74 if( strcmp( lang,
"ja" ) == 0 || strcmp( lang,
"zh" ) == 0 )
76 OUString sUrl(
"$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER
"/dict_" );
77 rtl::Bootstrap::expandMacros(sUrl);
79 if( strcmp( lang,
"ja" ) == 0 )
81 else if( strcmp( lang,
"zh" ) == 0 )
84 if( osl_openFile( sUrl.pData, &m_aFileHandle, osl_File_OpenFlag_Read ) == osl_File_E_None &&
85 osl_getFileSize( m_aFileHandle, &m_nFileSize) == osl_File_E_None &&
86 osl_mapFile( m_aFileHandle, (
void **) &m_pMapping, m_nFileSize, 0, osl_File_MapFlag_RandomAccess ) == osl_File_E_None )
89 sal_Int64 *pEOF = (sal_Int64*)(m_pMapping + m_nFileSize);
92 data.
index2 = (sal_Int32*) (m_pMapping + pEOF[-2]);
93 data.
index1 = (sal_Int16*) (m_pMapping + pEOF[-3]);
99#elif !defined DISABLE_DYNLOADING
105 if( strcmp( lang,
"ja" ) == 0 ) {
112 else if( strcmp( lang,
"zh" ) == 0 ) {
132 delete []
i.contents;
133 delete []
i.wordboundary;
136#ifdef DICT_JA_ZH_IN_DATAFILE
139 osl_unmapMappedFile(m_aFileHandle, m_pMapping, m_nFileSize);
141 osl_closeFile(m_aFileHandle);
154#if !defined(DICT_JA_ZH_IN_DATAFILE) && !defined(DISABLE_DYNLOADING)
159 static std::vector< datacache > aLoadedCache;
161 osl::MutexGuard aGuard( osl::Mutex::getGlobalMutex() );
162 for(
const datacache &
i : aLoadedCache)
164 if(
i.maLang == pLang )
173 aEntry.maLang = OString( pLang, strlen( pLang ) );
176 OString sModuleName =
179 OString sModuleName =
182 aEntry.mhModule = osl_loadModuleRelativeAscii( &
thisModule, sModuleName.getStr(), SAL_LOADMODULE_DEFAULT );
183 if( aEntry.mhModule ) {
184 oslGenericFunction func;
185 func = osl_getAsciiFunctionSymbol( aEntry.mhModule,
"getExistMark" );
186 aEntry.maData.existMark =
reinterpret_cast<sal_uInt8 const * (*)()
>(func)();
187 func = osl_getAsciiFunctionSymbol( aEntry.mhModule,
"getIndex1" );
188 aEntry.maData.index1 =
reinterpret_cast<sal_Int16
const * (*)()
>(func)();
189 func = osl_getAsciiFunctionSymbol( aEntry.mhModule,
"getIndex2" );
190 aEntry.maData.index2 =
reinterpret_cast<sal_Int32
const * (*)()
>(func)();
191 func = osl_getAsciiFunctionSymbol( aEntry.mhModule,
"getLenArray" );
192 aEntry.maData.lenArray =
reinterpret_cast<sal_Int32
const * (*)()
>(func)();
193 func = osl_getAsciiFunctionSymbol( aEntry.mhModule,
"getDataArea" );
194 aEntry.maData.dataArea =
reinterpret_cast<sal_Unicode const * (*)()
>(func)();
197 data = aEntry.maData;
198 aLoadedCache.push_back( aEntry );
224 if (
idx == 0xFF)
return 0;
226 idx = (
idx<<8) | (str[0]&0xff);
230 if (
begin == 0)
return 0;
255 wordboundary( nullptr ),
268 if (
length != boundary.endPos - boundary.startPos)
return false;
271 if (
contents[
i] != str[
i + boundary.startPos])
return false;
283 Boundary& segBoundary)
285 sal_Int32 indexUtf16;
298 rText.iterateCodePoints(&indexUtf16);
303 segBoundary.endPos = segBoundary.startPos =
pos;
306 while (indexUtf16 > 0)
308 sal_uInt32
ch = rText.iterateCodePoints(&indexUtf16, -1);
310 segBoundary.startPos = indexUtf16;
316 while (indexUtf16 < rText.getLength())
318 sal_uInt32
ch = rText.iterateCodePoints(&indexUtf16);
320 segBoundary.endPos = indexUtf16;
329 indexUtf16 = segBoundary.startPos;
330 rText.iterateCodePoints(&indexUtf16);
331 return segBoundary.endPos > indexUtf16;
340 if (0x3041 <= c && c <= 0x309e)
342 if ((0x30a1 <= c && c <= 0x30fe) || (0xff65 <= c && c <= 0xff9f))
354 sal_Int32 len = wordBoundary.endPos - wordBoundary.startPos;
356 if (rCache.
size == 0 || len > rCache.
size) {
357 if (rCache.
size != 0) {
371 memset(rCache.
wordboundary,
'\0',
sizeof(sal_Int32)*(len + 2));
377 while (u_isWhitespace(
static_cast<sal_uInt32
>(
text[wordBoundary.startPos + rCache.
wordboundary[
i] + len])))
384 for (;len == 0 && slen > 0; str++, slen--) {
420 rText.iterateCodePoints(&anyPos, -1);
422 while (anyPos > 0 && u_isWhitespace(
ch))
ch = rText.iterateCodePoints(&anyPos, -1);
431 const sal_Int32 nLen = rText.getLength();
434 sal_uInt32
ch = rText.iterateCodePoints(&anyPos);
435 while (u_isWhitespace(
ch) && (anyPos < nLen))
ch=rText.iterateCodePoints(&anyPos);
437 rText.iterateCodePoints(&anyPos, -1);
446 sal_Int32 len=rText.getLength();
447 if (anyPos >= len || anyPos < 0) {
457 if (!bDirection && startPos > 0 && startPos == (anyPos -
boundary.startPos))
460 if (u_isWhitespace(
ch))
470 if (anyPos < len) rText.iterateCodePoints(&anyPos);
471 boundary.endPos = std::min(anyPos, len);
473 if (wordType == WordType::WORD_COUNT) {
477 sal_Int32 indexUtf16 =
boundary.endPos;
478 if (u_ispunct(rText.iterateCodePoints(&indexUtf16)))
static sal_Int16 getScriptClass(sal_uInt32 currentChar)
css::i18n::Boundary nextWord(const OUString &rText, sal_Int32 nPos, sal_Int16 wordType)
css::i18n::Boundary boundary
sal_Int32 getLongestMatch(const sal_Unicode *text, sal_Int32 len) const
void initDictionaryData(const char *lang)
css::i18n::Boundary previousWord(const OUString &rText, sal_Int32 nPos, sal_Int16 wordType)
WordBreakCache cache[CACHE_MAX]
css::i18n::Boundary const & getWordBoundary(const OUString &rText, sal_Int32 nPos, sal_Int16 wordType, bool bDirection)
bool seekSegment(const OUString &rText, sal_Int32 pos, css::i18n::Boundary &boundary)
WordBreakCache & getCache(const sal_Unicode *text, css::i18n::Boundary const &boundary)
css::i18n::Boundary segmentCachedBoundary
void setJapaneseWordBreak()
OUString segmentCachedString
bool exists(const sal_uInt32 u) const
xdictionary(const char *lang)
Constant values shared between i18npool and, for example, the number formatter.
static sal_Int16 JapaneseCharType(sal_Unicode c)
enumrange< T >::Iterator begin(enumrange< T >)
constexpr T & temporary(T &&x)
bool equals(const sal_Unicode *str, css::i18n::Boundary const &boundary) const
const sal_Unicode * dataArea
const sal_Int32 * lenArray
const sal_uInt8 * existMark