LibreOffice Module sw (master)  1
breakit.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <breakit.hxx>
21 #include <swtypes.hxx>
22 
23 #include <com/sun/star/i18n/ScriptType.hpp>
24 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
25 #include <com/sun/star/i18n/BreakIterator.hpp>
26 #include <svl/languageoptions.hxx>
27 #include <unicode/uchar.h>
29 #include <algorithm>
30 
31 using namespace com::sun::star;
32 
33 SwBreakIt* g_pBreakIt = nullptr;
34 
35 void SwBreakIt::Create_( const uno::Reference<uno::XComponentContext> & rxContext )
36 {
37  delete g_pBreakIt;
38  g_pBreakIt = new SwBreakIt( rxContext );
39 }
40 
42 {
43  delete g_pBreakIt;
44  g_pBreakIt = nullptr;
45 }
46 
48 {
49  return g_pBreakIt;
50 }
51 
52 SwBreakIt::SwBreakIt( const uno::Reference<uno::XComponentContext> & rxContext )
53  : m_xContext(rxContext)
54  , m_xBreak(i18n::BreakIterator::create(m_xContext))
55  , m_aForbiddenLang(LANGUAGE_DONTKNOW)
56 {
57 }
58 
60 {
61  if (m_xLanguageTag)
62  m_xLanguageTag->reset(aLang);
63  else
64  m_xLanguageTag.reset(new LanguageTag(aLang));
65 }
66 
67 void SwBreakIt::GetLocale_( const LanguageTag& rLanguageTag )
68 {
69  if (m_xLanguageTag)
70  *m_xLanguageTag = rLanguageTag;
71  else
72  m_xLanguageTag.reset(new LanguageTag(rLanguageTag));
73 }
74 
76 {
78 
79  m_aForbiddenLang = aLang;
80  m_xForbidden.reset(new i18n::ForbiddenCharacters(aWrap.getForbiddenCharacters()));
81 }
82 
83 sal_uInt16 SwBreakIt::GetRealScriptOfText( const OUString& rText, sal_Int32 nPos ) const
84 {
85  sal_uInt16 nScript = i18n::ScriptType::WEAK;
86  if (!rText.isEmpty())
87  {
88  if( nPos && nPos == rText.getLength() )
89  --nPos;
90  else if( nPos < 0)
91  nPos = 0;
92 
93  nScript = m_xBreak->getScriptType(rText, nPos);
94  sal_Int32 nChgPos = 0;
95  if (i18n::ScriptType::WEAK == nScript && nPos >= 0 && nPos + 1 < rText.getLength())
96  {
97  // A weak character followed by a mark may be meant to combine with
98  // the mark, so prefer the following character's script
99  switch (u_charType(rText[nPos + 1]))
100  {
101  case U_NON_SPACING_MARK:
102  case U_ENCLOSING_MARK:
103  case U_COMBINING_SPACING_MARK:
104  nScript = m_xBreak->getScriptType(rText, nPos+1);
105  break;
106  }
107  }
108  if( i18n::ScriptType::WEAK == nScript &&
109  nPos &&
110  0 < ( nChgPos = m_xBreak->beginOfScript(rText, nPos, nScript) ) )
111  {
112  nScript = m_xBreak->getScriptType(rText, nChgPos-1);
113  }
114 
115  if( i18n::ScriptType::WEAK == nScript &&
116  rText.getLength() > ( nChgPos = m_xBreak->endOfScript(rText, nPos, nScript) ) &&
117  0 <= nChgPos )
118  {
119  nScript = m_xBreak->getScriptType(rText, nChgPos);
120  }
121  }
122  if( i18n::ScriptType::WEAK == nScript )
124  return nScript;
125 }
126 
127 SvtScriptType SwBreakIt::GetAllScriptsOfText( const OUString& rText ) const
128 {
129  const SvtScriptType coAllScripts = SvtScriptType::LATIN |
130  SvtScriptType::ASIAN |
131  SvtScriptType::COMPLEX;
132  SvtScriptType nRet = SvtScriptType::NONE;
133  sal_uInt16 nScript = 0;
134  if (!rText.isEmpty())
135  {
136  for( sal_Int32 n = 0, nEnd = rText.getLength(); n < nEnd;
137  n = m_xBreak->endOfScript(rText, n, nScript) )
138  {
139  nScript = m_xBreak->getScriptType(rText, n);
140  switch( nScript )
141  {
142  case i18n::ScriptType::LATIN: nRet |= SvtScriptType::LATIN; break;
143  case i18n::ScriptType::ASIAN: nRet |= SvtScriptType::ASIAN; break;
144  case i18n::ScriptType::COMPLEX: nRet |= SvtScriptType::COMPLEX; break;
145  case i18n::ScriptType::WEAK:
146  if( nRet == SvtScriptType::NONE )
147  nRet |= coAllScripts;
148  break;
149  }
150  if( coAllScripts == nRet )
151  break;
152  }
153  }
154  return nRet;
155 }
156 
157 sal_Int32 SwBreakIt::getGraphemeCount(const OUString& rText,
158  sal_Int32 nStart, sal_Int32 nEnd) const
159 {
160  sal_Int32 nGraphemeCount = 0;
161 
162  sal_Int32 nCurPos = std::max(static_cast<sal_Int32>(0), nStart);
163  while (nCurPos < nEnd)
164  {
165  // fdo#49208 cheat and assume that nothing can combine with a space
166  // to form a single grapheme
167  if (rText[nCurPos] == ' ')
168  {
169  ++nCurPos;
170  }
171  else
172  {
173  sal_Int32 nCount2 = 1;
174  nCurPos = m_xBreak->nextCharacters(rText, nCurPos, lang::Locale(),
175  i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
176  }
177  ++nGraphemeCount;
178  }
179 
180  return nGraphemeCount;
181 }
182 
183 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
LanguageType m_aForbiddenLang
language of the current forbiddenChar struct
Definition: breakit.hxx:41
css::uno::Reference< css::uno::XComponentContext > m_xContext
Definition: breakit.hxx:35
void GetLocale_(const LanguageType aLang)
Definition: breakit.cxx:59
SvtScriptType GetAllScriptsOfText(const OUString &rText) const
Definition: breakit.cxx:127
css::i18n::ForbiddenCharacters getForbiddenCharacters() const
std::unique_ptr< LanguageTag > m_xLanguageTag
language tag of the current locale
Definition: breakit.hxx:38
static void Delete_()
Definition: breakit.cxx:41
sal_uInt16 GetRealScriptOfText(const OUString &rText, sal_Int32 nPos) const
Definition: breakit.cxx:83
const LanguageTag & GetLanguageTag(const LanguageType aLang)
Definition: breakit.hxx:85
sal_Int32 getGraphemeCount(const OUString &rStr, sal_Int32 nStart, sal_Int32 nEnd) const
Definition: breakit.cxx:157
SwBreakIt * g_pBreakIt
Definition: breakit.cxx:33
SwBreakIt(SwBreakIt const &)=delete
Reference< deployment::XPackageRegistry > create(Reference< deployment::XPackageRegistry > const &xRootRegistry, OUString const &context, OUString const &cachePath, Reference< XComponentContext > const &xComponentContext)
SvtScriptType
static sal_Int16 GetI18NScriptTypeOfLanguage(LanguageType nLang)
#define LANGUAGE_DONTKNOW
void GetForbidden_(const LanguageType aLang)
Definition: breakit.cxx:75
LanguageType GetAppLanguage()
Definition: init.cxx:729
css::uno::Reference< css::i18n::XBreakIterator > m_xBreak
Definition: breakit.hxx:36
std::unique_ptr< css::i18n::ForbiddenCharacters > m_xForbidden
Definition: breakit.hxx:39
static void Create_(const css::uno::Reference< css::uno::XComponentContext > &rxContext)
Definition: breakit.cxx:35
static SwBreakIt * Get()
Definition: breakit.cxx:47
const uno::Reference< uno::XComponentContext > m_xContext