LibreOffice Module lingucomponent (master)  1
guesslang.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <iostream>
21 
22 #include <osl/file.hxx>
23 #include <tools/debug.hxx>
24 
25 #include <sal/config.h>
26 #include <cppuhelper/factory.hxx>
28 #include <cppuhelper/implbase.hxx>
30 
31 #include "simpleguesser.hxx"
32 #include "guess.hxx"
33 
34 #include <com/sun/star/lang/IllegalArgumentException.hpp>
35 #include <com/sun/star/lang/XServiceInfo.hpp>
36 #include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
37 #include <unotools/pathoptions.hxx>
38 #include <osl/thread.h>
39 
40 #include <sal/macros.h>
41 
42 #ifdef SYSTEM_LIBEXTTEXTCAT
43 #include <libexttextcat/textcat.h>
44 #else
45 #include <textcat.h>
46 #endif
47 
48 using namespace ::osl;
49 using namespace ::cppu;
50 using namespace ::com::sun::star;
51 using namespace ::com::sun::star::uno;
52 using namespace ::com::sun::star::lang;
53 using namespace ::com::sun::star::linguistic2;
54 
55 #define SERVICENAME "com.sun.star.linguistic2.LanguageGuessing"
56 #define IMPLNAME "com.sun.star.lingu2.LanguageGuessing"
57 
59 {
61  return names;
62 }
63 
65 {
66  return IMPLNAME;
67 }
68 
69 static osl::Mutex & GetLangGuessMutex()
70 {
71  static osl::Mutex aMutex;
72  return aMutex;
73 }
74 
76  public ::cppu::WeakImplHelper<
77  XLanguageGuessing,
78  XServiceInfo >
79 {
82 
83  virtual ~LangGuess_Impl() override {}
84  void EnsureInitialized();
85 
86 public:
88  LangGuess_Impl(const LangGuess_Impl&) = delete;
89  LangGuess_Impl& operator=(const LangGuess_Impl&) = delete;
90 
91  // XServiceInfo implementation
92  virtual OUString SAL_CALL getImplementationName( ) override;
93  virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) override;
94  virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) override;
96 
97  // XLanguageGuessing implementation
98  virtual css::lang::Locale SAL_CALL guessPrimaryLanguage( const OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) override;
99  virtual void SAL_CALL disableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
100  virtual void SAL_CALL enableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
101  virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getAvailableLanguages( ) override;
102  virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getEnabledLanguages( ) override;
103  virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getDisabledLanguages( ) override;
104 
105  // implementation specific
107  void SetFingerPrintsDB( const OUString &fileName );
108 };
109 
111  m_bInitialized( false )
112 {
113 }
114 
116 {
117  if (!m_bInitialized)
118  {
119  // set this to true at the very start to prevent loops because of
120  // implicitly called functions below
121  m_bInitialized = true;
122 
123  // set default fingerprint path to where those get installed
124  OUString aPhysPath;
125  OUString aURL( SvtPathOptions().GetFingerprintPath() );
126  osl::FileBase::getSystemPathFromFileURL( aURL, aPhysPath );
127 #ifdef _WIN32
128  aPhysPath += "\\";
129 #else
130  aPhysPath += "/";
131 #endif
132 
133  SetFingerPrintsDB( aPhysPath );
134 
135 #if !defined(EXTTEXTCAT_VERSION_MAJOR)
136 
137  // disable currently not functional languages...
138  struct LangCountry
139  {
140  const char *pLang;
141  const char *pCountry;
142  };
143  LangCountry aDisable[] =
144  {
145  // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0
146  // which is the first with EXTTEXTCAT_VERSION_MAJOR defined
147  {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""},
148  {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""},
149  {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""}
150  };
151  sal_Int32 nNum = SAL_N_ELEMENTS(aDisable);
152  Sequence< Locale > aDisableSeq( nNum );
153  Locale *pDisableSeq = aDisableSeq.getArray();
154  for (sal_Int32 i = 0; i < nNum; ++i)
155  {
156  Locale aLocale;
157  aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
158  aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry );
159  pDisableSeq[i] = aLocale;
160  }
161  disableLanguages( aDisableSeq );
162  DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
163 #endif
164  }
165 }
166 
168  const OUString& rText,
169  ::sal_Int32 nStartPos,
170  ::sal_Int32 nLen )
171 {
172  osl::MutexGuard aGuard( GetLangGuessMutex() );
173 
175 
176  if (nStartPos < 0 || nLen < 0 || nStartPos + nLen > rText.getLength())
177  throw lang::IllegalArgumentException();
178 
179  OString o( OUStringToOString( rText.copy(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
180  Guess g = m_aGuesser.GuessPrimaryLanguage(o.getStr());
181  lang::Locale aRes;
182  aRes.Language = OUString::createFromAscii( g.GetLanguage().c_str() );
183  aRes.Country = OUString::createFromAscii( g.GetCountry().c_str() );
184  return aRes;
185 }
186 
187 #define DEFAULT_CONF_FILE_NAME "fpdb.conf"
188 
190  const OUString &filePath )
191 {
193  OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
194  OString conf_file_path(path);
195  conf_file_path += DEFAULT_CONF_FILE_NAME;
196 
197  m_aGuesser.SetDBPath(conf_file_path.getStr(), path.getStr());
198 }
199 
200 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( )
201 {
202  osl::MutexGuard aGuard( GetLangGuessMutex() );
203 
205 
206  Sequence< css::lang::Locale > aRes;
207  vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
208  aRes.realloc(gs.size());
209 
210  css::lang::Locale *pRes = aRes.getArray();
211 
212  for(size_t i = 0; i < gs.size() ; i++ ){
213  css::lang::Locale current_aRes;
214  current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
215  current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
216  pRes[i] = current_aRes;
217  }
218 
219  return aRes;
220 }
221 
222 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( )
223 {
224  osl::MutexGuard aGuard( GetLangGuessMutex() );
225 
227 
228  Sequence< css::lang::Locale > aRes;
229  vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
230  aRes.realloc(gs.size());
231 
232  css::lang::Locale *pRes = aRes.getArray();
233 
234  for(size_t i = 0; i < gs.size() ; i++ ){
235  css::lang::Locale current_aRes;
236  current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
237  current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
238  pRes[i] = current_aRes;
239  }
240 
241  return aRes;
242 }
243 
244 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( )
245 {
246  osl::MutexGuard aGuard( GetLangGuessMutex() );
247 
249 
250  Sequence< css::lang::Locale > aRes;
251  vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
252  aRes.realloc(gs.size());
253 
254  css::lang::Locale *pRes = aRes.getArray();
255 
256  for(size_t i = 0; i < gs.size() ; i++ ){
257  css::lang::Locale current_aRes;
258  current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
259  current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
260  pRes[i] = current_aRes;
261  }
262 
263  return aRes;
264 }
265 
267  const uno::Sequence< Locale >& rLanguages )
268 {
269  osl::MutexGuard aGuard( GetLangGuessMutex() );
270 
272 
273  for (const Locale& rLanguage : rLanguages)
274  {
275  string language;
276 
277  OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US );
278  OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US );
279 
280  language += l.getStr();
281  language += "-";
282  language += c.getStr();
283  m_aGuesser.DisableLanguage(language);
284  }
285 }
286 
288  const uno::Sequence< Locale >& rLanguages )
289 {
290  osl::MutexGuard aGuard( GetLangGuessMutex() );
291 
293 
294  for (const Locale& rLanguage : rLanguages)
295  {
296  string language;
297 
298  OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US );
299  OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US );
300 
301  language += l.getStr();
302  language += "-";
303  language += c.getStr();
304  m_aGuesser.EnableLanguage(language);
305  }
306 }
307 
309 {
310  return IMPLNAME;
311 }
312 
313 sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
314 {
315  return cppu::supportsService(this, ServiceName);
316 }
317 
319 {
321 }
322 
324 {
325  return { SERVICENAME };
326 }
327 
334 {
335  return static_cast< ::cppu::OWeakObject * >( new LangGuess_Impl );
336 }
337 
338 //#### EXPORTED ### functions to allow for registration and creation of the UNO component
339 static const struct ::cppu::ImplementationEntry s_component_entries [] =
340 {
341  {
344  ::cppu::createSingleComponentFactory,
345  nullptr, 0
346  },
347  { nullptr, nullptr, nullptr, nullptr, nullptr, 0 }
348 };
349 
350 extern "C"
351 {
352 
353 SAL_DLLPUBLIC_EXPORT void * guesslang_component_getFactory(
354  sal_Char const * implName, void * xMgr,
355  void * xRegistry )
356 {
357  return ::cppu::component_getFactoryHelper(
358  implName, xMgr, xRegistry, s_component_entries );
359 }
360 
361 }
362 
363 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override
Definition: guesslang.cxx:318
#define SERVICENAME
Definition: guesslang.cxx:55
const string & GetLanguage()
Definition: guess.hxx:46
virtual OUString SAL_CALL getImplementationName() override
Definition: guesslang.cxx:308
virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getDisabledLanguages() override
Definition: guesslang.cxx:244
void DisableLanguage(const string &lang)
Mark a language disabled.
static const struct::cppu::ImplementationEntry s_component_entries[]
Definition: guesslang.cxx:339
void SetFingerPrintsDB(const OUString &fileName)
Definition: guesslang.cxx:189
static OUString getImplementationName_LangGuess_Impl()
Definition: guesslang.cxx:64
static osl::Mutex & GetLangGuessMutex()
Definition: guesslang.cxx:69
static Sequence< OUString > getSupportedServiceNames_LangGuess_Impl()
Definition: guesslang.cxx:58
vector< Guess > GetAvailableLanguages()
List all available languages (possibly to be in guesses)
LangGuess_Impl & operator=(const LangGuess_Impl &)=delete
bool m_bInitialized
Definition: guesslang.cxx:81
char sal_Char
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
#define SAL_N_ELEMENTS(arr)
#define DBG_ASSERT(sCon, aError)
static Sequence< OUString > getSupportedServiceNames_Static()
Definition: guesslang.cxx:323
LanguageType GetLanguage(SwWrtShell &rSh, sal_uInt16 nLangWhichId)
int i
virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getEnabledLanguages() override
Definition: guesslang.cxx:222
unsigned char sal_Bool
vector< Guess > GetAllManagedLanguages()
List all languages (possibly in guesses or not)
SimpleGuesser m_aGuesser
Definition: guesslang.cxx:80
static Reference< XInterface > LangGuess_Impl_create(Reference< XComponentContext > const &)
Function to create a new component instance; is needed by factory helper implementation.
Definition: guesslang.cxx:332
OString OUStringToOString(const OUString &str, ConnectionSettings const *settings)
void EnableLanguage(const string &lang)
Mark a language enabled.
virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getAvailableLanguages() override
Definition: guesslang.cxx:200
void EnsureInitialized()
Definition: guesslang.cxx:115
virtual void SAL_CALL disableLanguages(const css::uno::Sequence< css::lang::Locale > &aLanguages) override
Definition: guesslang.cxx:266
vector< Guess > GetUnavailableLanguages()
List all Unavailable languages (disable for any reason)
SAL_DLLPUBLIC_EXPORT void * guesslang_component_getFactory(sal_Char const *implName, void *xMgr, void *xRegistry)
Definition: guesslang.cxx:353
Definition: guess.hxx:33
#define DEFAULT_CONF_FILE_NAME
Definition: guesslang.cxx:187
virtual css::lang::Locale SAL_CALL guessPrimaryLanguage(const OUString &aText,::sal_Int32 nStartPos,::sal_Int32 nLen) override
Definition: guesslang.cxx:167
double getLength(const B2DPolygon &rCandidate)
virtual sal_Bool SAL_CALL supportsService(const OUString &ServiceName) override
Definition: guesslang.cxx:313
const string & GetCountry()
Definition: guess.hxx:47
void SetDBPath(const char *thePathOfConfFile, const char *prefix)
Load a new DB of fingerprints.
#define IMPLNAME
Definition: guesslang.cxx:56
virtual ~LangGuess_Impl() override
Definition: guesslang.cxx:83
Guess GuessPrimaryLanguage(const char *text)
Analyze a text and return the most probable language of the text.
virtual void SAL_CALL enableLanguages(const css::uno::Sequence< css::lang::Locale > &aLanguages) override
Definition: guesslang.cxx:287