34 #ifdef SYSTEM_LIBEXTTEXTCAT
35 #include <libexttextcat/textcat.h>
36 #include <libexttextcat/common.h>
37 #include <libexttextcat/constants.h>
38 #include <libexttextcat/fingerprint.h>
39 #include <libexttextcat/utf8misc.h>
43 #include <constants.h>
44 #include <fingerprint.h>
50 #include<rtl/character.hxx>
59 size_t min = s1.length();
60 if (min > s2.length())
63 for(i = 0; i < min && s2[i] && s1[i] && !ret; i++){
64 ret = rtl::toAsciiUpperCase(static_cast<unsigned char>(s1[i]))
65 - rtl::toAsciiUpperCase(static_cast<unsigned char>(s2[i]));
66 if(s1[i] ==
'.' || s2[i] ==
'.') {ret = 0;}
76 typedef struct textcat_t{
83 char output[MAXOUTPUTSIZE];
100 if(
h){textcat_Done(
h);}
107 if(
h){textcat_Done(
h);}
115 vector<Guess> guesses;
120 int len = strlen(text);
125 const char *guess_list = textcat_Classify(
h, text, len);
130 int current_pointer = 0;
132 for(
int i = 0; guess_list[current_pointer] !=
'\0';
i++)
134 while (guess_list[current_pointer] !=
GUESS_SEPARATOR_OPEN && guess_list[current_pointer] !=
'\0')
136 if(guess_list[current_pointer] !=
'\0')
138 Guess g(guess_list + current_pointer);
140 guesses.push_back(g);
151 vector<Guess> ret = GuessLanguage(text);
152 return ret.empty() ?
Guess() : ret[0];
162 textcat_t *tables =
static_cast<textcat_t*
>(
h);
167 for (
size_t i=0;
i<tables->size; ++
i)
169 if (tables->fprint_disable[
i] & mask)
171 string langStr =
"[";
172 langStr += fp_Name(tables->fprint[
i]);
173 Guess g(langStr.c_str());
183 return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
188 return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
193 return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
198 textcat_t *tables =
static_cast<textcat_t*
>(
h);
202 for (
size_t i=0;
i<tables->size;
i++)
204 string language(fp_Name(tables->fprint[
i]));
206 tables->fprint_disable[
i] = mask;
212 XableLanguage(lang, sal::static_int_cast< char >( 0xF0 ));
217 XableLanguage(lang, sal::static_int_cast< char >( 0x0F ));
224 h = special_textcat_Init(path, prefix);
void XableLanguage(const string &lang, char mask)
#define GUESS_SEPARATOR_OPEN
void DisableLanguage(const string &lang)
Mark a language disabled.
#define MAX_STRING_LENGTH_TO_ANALYSE
SimpleGuesser()
inits the object with conf file "./conf.txt"
vector< Guess > GetAvailableLanguages()
List all available languages (possibly to be in guesses)
#define TEXTCAT_RESULT_SHORT_STR
~SimpleGuesser()
destroy the object
vector< Guess > GuessLanguage(const char *text)
Analyze a text and return the most probable languages of the text.
vector< Guess > GetAllManagedLanguages()
List all languages (possibly in guesses or not)
static int startsAsciiCaseInsensitive(const std::string &s1, const std::string &s2)
void EnableLanguage(const string &lang)
Mark a language enabled.
vector< Guess > GetUnavailableLanguages()
List all Unavailable languages (disable for any reason)
vector< Guess > GetManagedLanguages(const char mask)
Is used to know which language is available, unavailable or both when mask = 0xF0, return only Available when mask = 0x0F, return only Unavailable when mask = 0xFF, return both Available and Unavailable.
SimpleGuesser & operator=(const SimpleGuesser &sg)
void SetDBPath(const char *thePathOfConfFile, const char *prefix)
Load a new DB of fingerprints.
Guess GuessPrimaryLanguage(const char *text)
Analyze a text and return the most probable language of the text.