LibreOffice Module lingucomponent (master) 1
guesslang.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <iostream>
21#include <mutex>
22#include <string_view>
23
24#include <osl/file.hxx>
25#include <tools/debug.hxx>
26
27#include <sal/config.h>
31
32#include "simpleguesser.hxx"
33#include "guess.hxx"
34
35#include <com/sun/star/lang/IllegalArgumentException.hpp>
36#include <com/sun/star/lang/XServiceInfo.hpp>
37#include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
39#include <osl/thread.h>
40
41#include <sal/macros.h>
42
43#ifdef SYSTEM_LIBEXTTEXTCAT
44#include <libexttextcat/textcat.h>
45#else
46#include <textcat.h>
47#endif
48
49using namespace ::osl;
50using namespace ::cppu;
51using namespace ::com::sun::star;
52using namespace ::com::sun::star::uno;
53using namespace ::com::sun::star::lang;
54using namespace ::com::sun::star::linguistic2;
55
56static std::mutex & GetLangGuessMutex()
57{
58 static std::mutex aMutex;
59 return aMutex;
60}
61
62namespace {
63
64class LangGuess_Impl :
65 public ::cppu::WeakImplHelper<
66 XLanguageGuessing,
67 XServiceInfo >
68{
69 SimpleGuesser m_aGuesser;
70 bool m_bInitialized;
71
72 virtual ~LangGuess_Impl() override {}
73 void EnsureInitialized();
74
75public:
76 LangGuess_Impl();
77 LangGuess_Impl(const LangGuess_Impl&) = delete;
78 LangGuess_Impl& operator=(const LangGuess_Impl&) = delete;
79
80 // XServiceInfo implementation
81 virtual OUString SAL_CALL getImplementationName( ) override;
82 virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) override;
83 virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) override;
84
85 // XLanguageGuessing implementation
86 virtual css::lang::Locale SAL_CALL guessPrimaryLanguage( const OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) override;
87 virtual void SAL_CALL disableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
88 virtual void SAL_CALL enableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
89 virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getAvailableLanguages( ) override;
90 virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getEnabledLanguages( ) override;
91 virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getDisabledLanguages( ) override;
92
93 // implementation specific
95 void SetFingerPrintsDB( std::u16string_view fileName );
96};
97
98}
99
100LangGuess_Impl::LangGuess_Impl() :
101 m_bInitialized( false )
102{
103}
104
105void LangGuess_Impl::EnsureInitialized()
106{
107 if (m_bInitialized)
108 return;
109
110 // set this to true at the very start to prevent loops because of
111 // implicitly called functions below
112 m_bInitialized = true;
113
114 // set default fingerprint path to where those get installed
115 OUString aPhysPath;
116 OUString aURL( SvtPathOptions().GetFingerprintPath() );
117 osl::FileBase::getSystemPathFromFileURL( aURL, aPhysPath );
118#ifdef _WIN32
119 aPhysPath += "\\";
120#else
121 aPhysPath += "/";
122#endif
123
124 SetFingerPrintsDB( aPhysPath );
125
126#if !defined(EXTTEXTCAT_VERSION_MAJOR)
127
128 // disable currently not functional languages...
129 struct LangCountry
130 {
131 const char *pLang;
132 const char *pCountry;
133 };
134 LangCountry aDisable[] =
135 {
136 // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0
137 // which is the first with EXTTEXTCAT_VERSION_MAJOR defined
138 {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""},
139 {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""},
140 {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""}
141 };
142 sal_Int32 nNum = SAL_N_ELEMENTS(aDisable);
143 Sequence< Locale > aDisableSeq( nNum );
144 Locale *pDisableSeq = aDisableSeq.getArray();
145 for (sal_Int32 i = 0; i < nNum; ++i)
146 {
147 Locale aLocale;
148 aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
149 aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry );
150 pDisableSeq[i] = aLocale;
151 }
152 disableLanguages( aDisableSeq );
153 DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
154#endif
155}
156
157Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage(
158 const OUString& rText,
159 ::sal_Int32 nStartPos,
160 ::sal_Int32 nLen )
161{
162 std::scoped_lock aGuard( GetLangGuessMutex() );
163
164 EnsureInitialized();
165
166 if (nStartPos < 0 || nLen < 0 || nStartPos + nLen > rText.getLength())
167 throw lang::IllegalArgumentException();
168
169 OString o( OUStringToOString( rText.subView(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
170 Guess g = m_aGuesser.GuessPrimaryLanguage(o.getStr());
171 lang::Locale aRes;
172 aRes.Language = OUString::createFromAscii( g.GetLanguage() );
173 aRes.Country = OUString::createFromAscii( g.GetCountry() );
174 return aRes;
175}
176
177#define DEFAULT_CONF_FILE_NAME "fpdb.conf"
178
179void LangGuess_Impl::SetFingerPrintsDB(
180 std::u16string_view filePath )
181{
183 OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
184 OString conf_file_path = path + DEFAULT_CONF_FILE_NAME;
185
186 m_aGuesser.SetDBPath(conf_file_path.getStr(), path.getStr());
187}
188
189uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( )
190{
191 std::scoped_lock aGuard( GetLangGuessMutex() );
192
193 EnsureInitialized();
194
195 Sequence< css::lang::Locale > aRes;
196 std::vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
197 aRes.realloc(gs.size());
198
199 css::lang::Locale *pRes = aRes.getArray();
200
201 for(size_t i = 0; i < gs.size() ; i++ ){
202 css::lang::Locale current_aRes;
203 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage() );
204 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry() );
205 pRes[i] = current_aRes;
206 }
207
208 return aRes;
209}
210
211uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( )
212{
213 std::scoped_lock aGuard( GetLangGuessMutex() );
214
215 EnsureInitialized();
216
217 Sequence< css::lang::Locale > aRes;
218 std::vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
219 aRes.realloc(gs.size());
220
221 css::lang::Locale *pRes = aRes.getArray();
222
223 for(size_t i = 0; i < gs.size() ; i++ ){
224 css::lang::Locale current_aRes;
225 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage() );
226 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry() );
227 pRes[i] = current_aRes;
228 }
229
230 return aRes;
231}
232
233uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( )
234{
235 std::scoped_lock aGuard( GetLangGuessMutex() );
236
237 EnsureInitialized();
238
239 Sequence< css::lang::Locale > aRes;
240 std::vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
241 aRes.realloc(gs.size());
242
243 css::lang::Locale *pRes = aRes.getArray();
244
245 for(size_t i = 0; i < gs.size() ; i++ ){
246 css::lang::Locale current_aRes;
247 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage() );
248 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry() );
249 pRes[i] = current_aRes;
250 }
251
252 return aRes;
253}
254
255void SAL_CALL LangGuess_Impl::disableLanguages(
256 const uno::Sequence< Locale >& rLanguages )
257{
258 std::scoped_lock aGuard( GetLangGuessMutex() );
259
260 EnsureInitialized();
261
262 for (const Locale& rLanguage : rLanguages)
263 {
264 std::string language;
265
266 OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US );
267 OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US );
268
269 language += l.getStr();
270 language += "-";
271 language += c.getStr();
272 m_aGuesser.DisableLanguage(language);
273 }
274}
275
276void SAL_CALL LangGuess_Impl::enableLanguages(
277 const uno::Sequence< Locale >& rLanguages )
278{
279 std::scoped_lock aGuard( GetLangGuessMutex() );
280
281 EnsureInitialized();
282
283 for (const Locale& rLanguage : rLanguages)
284 {
285 std::string language;
286
287 OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US );
288 OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US );
289
290 language += l.getStr();
291 language += "-";
292 language += c.getStr();
293 m_aGuesser.EnableLanguage(language);
294 }
295}
296
297OUString SAL_CALL LangGuess_Impl::getImplementationName( )
298{
299 return "com.sun.star.lingu2.LanguageGuessing";
300}
301
302sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
303{
304 return cppu::supportsService(this, ServiceName);
305}
306
307Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( )
308{
309 return { "com.sun.star.linguistic2.LanguageGuessing" };
310}
311
312extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
314 css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
315{
316 return cppu::acquire(new LangGuess_Impl());
317}
318
319
320/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: guess.hxx:31
const std::string & GetCountry() const
Definition: guess.hxx:45
const std::string & GetLanguage() const
Definition: guess.hxx:44
#define DBG_ASSERT(sCon, aError)
URL aURL
#define DEFAULT_CONF_FILE_NAME
Definition: guesslang.cxx:177
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * lingucomponent_LangGuess_get_implementation(css::uno::XComponentContext *, css::uno::Sequence< css::uno::Any > const &)
Definition: guesslang.cxx:313
static std::mutex & GetLangGuessMutex()
Definition: guesslang.cxx:56
#define SAL_N_ELEMENTS(arr)
LanguageType GetLanguage(SfxItemSet const &aSet, sal_uInt16 nLangWhichId)
double getLength(const B2DPolygon &rCandidate)
css::uno::Sequence< OUString > getSupportedServiceNames()
OUString getImplementationName()
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
filePath
int i
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
std::mutex aMutex
unsigned char sal_Bool