LibreOffice Module sw (master) 1
breakit.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <breakit.hxx>
21#include <swtypes.hxx>
22
23#include <com/sun/star/i18n/ScriptType.hpp>
24#include <com/sun/star/i18n/CharacterIteratorMode.hpp>
25#include <com/sun/star/i18n/BreakIterator.hpp>
27#include <unicode/uchar.h>
29#include <algorithm>
30#include <utility>
31
32using namespace com::sun::star;
33
35
36void SwBreakIt::Create_( const uno::Reference<uno::XComponentContext> & rxContext )
37{
38 delete g_pBreakIt;
39 g_pBreakIt = new SwBreakIt( rxContext );
40}
41
43{
44 delete g_pBreakIt;
45 g_pBreakIt = nullptr;
46}
47
49{
50 return g_pBreakIt;
51}
52
53SwBreakIt::SwBreakIt( uno::Reference<uno::XComponentContext> xContext )
54 : m_xContext(std::move(xContext))
55 , m_xBreak(i18n::BreakIterator::create(m_xContext))
56 , m_aForbiddenLang(LANGUAGE_DONTKNOW)
57{
58}
59
61{
63 m_xLanguageTag->reset(aLang);
64 else
65 m_xLanguageTag.reset(new LanguageTag(aLang));
66}
67
68void SwBreakIt::GetLocale_( const LanguageTag& rLanguageTag )
69{
71 *m_xLanguageTag = rLanguageTag;
72 else
73 m_xLanguageTag.reset(new LanguageTag(rLanguageTag));
74}
75
77{
79
80 m_aForbiddenLang = aLang;
81 m_oForbidden.emplace(aWrap.getForbiddenCharacters());
82}
83
84sal_uInt16 SwBreakIt::GetRealScriptOfText( const OUString& rText, sal_Int32 nPos ) const
85{
86 sal_uInt16 nScript = i18n::ScriptType::WEAK;
87 if (!rText.isEmpty())
88 {
89 if( nPos && nPos == rText.getLength() )
90 --nPos;
91 else if( nPos < 0)
92 nPos = 0;
93
94 nScript = m_xBreak->getScriptType(rText, nPos);
95 sal_Int32 nChgPos = 0;
96 if (i18n::ScriptType::WEAK == nScript && nPos >= 0 && nPos + 1 < rText.getLength())
97 {
98 // A weak character followed by a mark may be meant to combine with
99 // the mark, so prefer the following character's script
100 switch (u_charType(rText[nPos + 1]))
101 {
102 case U_NON_SPACING_MARK:
103 case U_ENCLOSING_MARK:
104 case U_COMBINING_SPACING_MARK:
105 nScript = m_xBreak->getScriptType(rText, nPos+1);
106 break;
107 }
108 }
109 if( i18n::ScriptType::WEAK == nScript && nPos )
110 {
111 nChgPos = m_xBreak->beginOfScript(rText, nPos, nScript);
112 if( 0 < nChgPos )
113 nScript = m_xBreak->getScriptType(rText, nChgPos-1);
114 }
115
116 if( i18n::ScriptType::WEAK == nScript )
117 {
118 nChgPos = m_xBreak->endOfScript(rText, nPos, nScript);
119 if( rText.getLength() > nChgPos && 0 <= nChgPos )
120 nScript = m_xBreak->getScriptType(rText, nChgPos);
121 }
122 }
123 if( i18n::ScriptType::WEAK == nScript )
125 return nScript;
126}
127
128SvtScriptType SwBreakIt::GetAllScriptsOfText( const OUString& rText ) const
129{
130 const SvtScriptType coAllScripts = SvtScriptType::LATIN |
131 SvtScriptType::ASIAN |
132 SvtScriptType::COMPLEX;
133 SvtScriptType nRet = SvtScriptType::NONE;
134 sal_uInt16 nScript = 0;
135 if (!rText.isEmpty())
136 {
137 for( sal_Int32 n = 0, nEnd = rText.getLength(); n < nEnd;
138 n = m_xBreak->endOfScript(rText, n, nScript) )
139 {
140 nScript = m_xBreak->getScriptType(rText, n);
141 switch( nScript )
142 {
143 case i18n::ScriptType::LATIN: nRet |= SvtScriptType::LATIN; break;
144 case i18n::ScriptType::ASIAN: nRet |= SvtScriptType::ASIAN; break;
145 case i18n::ScriptType::COMPLEX: nRet |= SvtScriptType::COMPLEX; break;
146 case i18n::ScriptType::WEAK:
147 if( nRet == SvtScriptType::NONE )
148 nRet |= coAllScripts;
149 break;
150 }
151 if( coAllScripts == nRet )
152 break;
153 }
154 }
155 return nRet;
156}
157
158sal_Int32 SwBreakIt::getGraphemeCount(const OUString& rText,
159 sal_Int32 nStart, sal_Int32 nEnd) const
160{
161 sal_Int32 nGraphemeCount = 0;
162
163 sal_Int32 nCurPos = std::max(static_cast<sal_Int32>(0), nStart);
164 while (nCurPos < nEnd)
165 {
166 // fdo#49208 cheat and assume that nothing can combine with a space
167 // to form a single grapheme
168 if (rText[nCurPos] == ' ')
169 {
170 ++nCurPos;
171 }
172 else
173 {
174 sal_Int32 nCount2 = 1;
175 nCurPos = m_xBreak->nextCharacters(rText, nCurPos, lang::Locale(),
176 i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
177 }
178 ++nGraphemeCount;
179 }
180
181 return nGraphemeCount;
182}
183
184/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Reference< XComponentContext > m_xContext
SwBreakIt * g_pBreakIt
Definition: breakit.cxx:34
css::i18n::ForbiddenCharacters getForbiddenCharacters() const
static void Delete_()
Definition: breakit.cxx:42
static SwBreakIt * Get()
Definition: breakit.cxx:48
void GetForbidden_(const LanguageType aLang)
Definition: breakit.cxx:76
static void Create_(const css::uno::Reference< css::uno::XComponentContext > &rxContext)
Definition: breakit.cxx:36
sal_Int32 getGraphemeCount(const OUString &rStr, sal_Int32 nStart, sal_Int32 nEnd) const
Definition: breakit.cxx:158
std::optional< css::i18n::ForbiddenCharacters > m_oForbidden
Definition: breakit.hxx:40
SwBreakIt(SwBreakIt const &)=delete
SvtScriptType GetAllScriptsOfText(const OUString &rText) const
Definition: breakit.cxx:128
void GetLocale_(const LanguageType aLang)
Definition: breakit.cxx:60
LanguageType m_aForbiddenLang
language of the current forbiddenChar struct
Definition: breakit.hxx:42
sal_uInt16 GetRealScriptOfText(const OUString &rText, sal_Int32 nPos) const
Definition: breakit.cxx:84
std::unique_ptr< LanguageTag > m_xLanguageTag
language tag of the current locale
Definition: breakit.hxx:39
css::uno::Reference< css::uno::XComponentContext > m_xContext
Definition: breakit.hxx:36
css::uno::Reference< css::i18n::XBreakIterator > m_xBreak
Definition: breakit.hxx:37
const LanguageTag & GetLanguageTag(const LanguageType aLang)
Definition: breakit.hxx:86
LanguageType GetAppLanguage()
Definition: init.cxx:741
sal_Int64 n
#define LANGUAGE_DONTKNOW
SvtScriptType
sal_uInt16 nPos
sal_Int16 GetI18NScriptTypeOfLanguage(LanguageType nLang)
css::uno::Reference< css::deployment::XPackageRegistry > create(css::uno::Reference< css::deployment::XPackageRegistry > const &xRootRegistry, OUString const &context, OUString const &cachePath, css::uno::Reference< css::uno::XComponentContext > const &xComponentContext)