32#ifdef SYSTEM_LIBEXTTEXTCAT
33#include <libexttextcat/textcat.h>
34#include <libexttextcat/common.h>
35#include <libexttextcat/constants.h>
36#include <libexttextcat/fingerprint.h>
41#include <fingerprint.h>
46#include<rtl/character.hxx>
53 size_t min = s1.length();
54 if (
min > s2.length())
57 for(
i = 0;
i <
min && s2[
i] && s1[
i] && !ret;
i++){
58 ret = rtl::toAsciiUpperCase(
static_cast<unsigned char>(s1[
i]))
59 - rtl::toAsciiUpperCase(
static_cast<unsigned char>(s2[
i]));
60 if(s1[
i] ==
'.' || s2[
i] ==
'.') {ret = 0;}
70typedef struct textcat_t{
77 char output[MAXOUTPUTSIZE];
94 if(
h){textcat_Done(
h);}
101 if(
h){textcat_Done(
h);}
109 std::vector<Guess> guesses;
114 int len = strlen(
text);
119 const char *guess_list = textcat_Classify(
h,
text, len);
124 int current_pointer = 0;
126 while(guess_list[current_pointer] !=
'\0')
128 while (guess_list[current_pointer] !=
GUESS_SEPARATOR_OPEN && guess_list[current_pointer] !=
'\0')
130 if(guess_list[current_pointer] !=
'\0')
132 Guess g(guess_list + current_pointer);
134 guesses.push_back(g);
146 return ret.empty() ?
Guess() : ret[0];
156 textcat_t *tables =
static_cast<textcat_t*
>(
h);
158 std::vector<Guess> lang;
161 for (
size_t i=0;
i<tables->size; ++
i)
163 if (tables->fprint_disable[
i] & mask)
165 std::string langStr =
"[";
166 langStr += fp_Name(tables->fprint[
i]);
167 Guess g(langStr.c_str());
192 textcat_t *tables =
static_cast<textcat_t*
>(
h);
196 for (
size_t i=0;
i<tables->size;
i++)
198 std::string language(fp_Name(tables->fprint[
i]));
200 tables->fprint_disable[
i] = mask;
218 h = special_textcat_Init(path,
prefix);
std::vector< Guess > GetManagedLanguages(const char mask)
Is used to know which language is available, unavailable or both when mask = 0xF0,...
std::vector< Guess > GetAllManagedLanguages()
List all languages (possibly in guesses or not)
SimpleGuesser()
inits the object with conf file "./conf.txt"
void XableLanguage(const std::string &lang, char mask)
void DisableLanguage(const std::string &lang)
Mark a language disabled.
void SetDBPath(const char *thePathOfConfFile, const char *prefix)
Load a new DB of fingerprints.
~SimpleGuesser()
destroy the object
SimpleGuesser & operator=(const SimpleGuesser &sg)
Guess GuessPrimaryLanguage(const char *text)
Analyze a text and return the most probable language of the text.
std::vector< Guess > GetAvailableLanguages()
List all available languages (possibly to be in guesses)
std::vector< Guess > GuessLanguage(const char *text)
Analyze a text and return the most probable languages of the text.
std::vector< Guess > GetUnavailableLanguages()
List all Unavailable languages (disable for any reason)
void EnableLanguage(const std::string &lang)
Mark a language enabled.
#define TEXTCAT_RESULT_SHORT_STR
#define GUESS_SEPARATOR_OPEN
SwNodeOffset min(const SwNodeOffset &a, const SwNodeOffset &b)
static int startsAsciiCaseInsensitive(const std::string &s1, const std::string &s2)
TODO.
#define MAX_STRING_LENGTH_TO_ANALYSE