12#include <rtl/string.hxx>
15#include <osl/file.hxx>
16#include <osl/thread.h>
23#include <CLucene/analysis/LanguageBasedAnalyzer.h>
34 std::u16string_view srcDir, std::u16string_view outDir)
45template <
class Constructor>
46auto TryWithUnicodePathWorkaround(
const OUString& ustrPath,
const Constructor& constructor)
48 const rtl_TextEncoding eThreadEncoding = osl_getThreadTextEncoding();
53 return constructor(sPath);
55 catch (
const CLuceneError&)
65 if (GetShortPathNameW(o3tl::toW(ustrPath.getStr()), buf, std::size(buf)) == 0)
68 return constructor(sPath);
82 bool bUseCJK = sLang ==
u"ja" || sLang ==
u"ko" || sLang ==
u"zh";
85 std::unique_ptr<lucene::analysis::Analyzer> analyzer;
87 analyzer.reset(
new lucene::analysis::LanguageBasedAnalyzer(L
"cjk"));
89 analyzer.reset(
new lucene::analysis::standard::StandardAnalyzer());
91 OUString ustrSystemPath;
92 osl::File::getSystemPathFromFileURL(
d_indexDir, ustrSystemPath);
97 auto writer = TryWithUnicodePathWorkaround(ustrSystemPath, [&analyzer](
const OString& s) {
98 return std::make_unique<lucene::index::IndexWriter>(s.getStr(), analyzer.get(),
true);
101 OString indexDirStr =
OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding());
102 auto writer = std::make_unique<lucene::index::IndexWriter>(indexDirStr.getStr(),
103 analyzer.get(),
true);
109 writer->setMaxFieldLength(lucene::index::IndexWriter::DEFAULT_MAX_FIELD_LENGTH*2);
113 for (
auto const& elem :
d_files)
116 writer->addDocument(&doc);
123 catch (CLuceneError &e)
145 osl::Directory dir(path);
146 if (osl::FileBase::E_None != dir.open()) {
147 d_error =
"Error reading directory " + path;
151 osl::DirectoryItem item;
152 osl::FileStatus fileStatus(osl_FileStatus_Mask_FileName | osl_FileStatus_Mask_Type);
153 while (dir.getNextItem(item) == osl::FileBase::E_None) {
154 item.getFileStatus(fileStatus);
155 if (fileStatus.getFileType() == osl::FileStatus::Regular) {
156 d_files.insert(fileStatus.getFileName());
166 OUString path =
"#HLP#" +
d_module +
"/" + fileName;
168 doc->add(*_CLNEW
Field(_T(
"path"), aPath.data(),
int(Field::STORE_YES) |
int(Field::INDEX_UNTOKENIZED)));
170 OUString sEscapedFileName =
172 rtl_UriCharClassUric, rtl_UriEncodeIgnoreEscapes, RTL_TEXTENCODING_UTF8);
175 OUString captionPath =
d_captionDir +
"/" + sEscapedFileName;
176 doc->add(*_CLNEW
Field(_T(
"caption"),
helpFileReader(captionPath),
int(Field::STORE_NO) |
int(Field::INDEX_TOKENIZED)));
179 OUString contentPath =
d_contentDir +
"/" + sEscapedFileName;
180 doc->add(*_CLNEW
Field(_T(
"content"),
helpFileReader(contentPath),
int(Field::STORE_NO) |
int(Field::INDEX_TOKENIZED)));
184 osl::File file(path);
185 if (osl::FileBase::E_None == file.open(osl_File_OpenFlag_Read)) {
187 OUString ustrSystemPath;
188 osl::File::getSystemPathFromFileURL(path, ustrSystemPath);
190 return TryWithUnicodePathWorkaround(ustrSystemPath, [](
const OString& s) {
191 return _CLNEW lucene::util::FileReader(s.getStr(),
"UTF-8");
194 OString pathStr =
OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding());
195 return _CLNEW lucene::util::FileReader(pathStr.getStr(),
"UTF-8");
198 return _CLNEW lucene::util::StringReader(L
"");
std::vector< TCHAR > OUStringToTCHARVec(OUString const &rStr)
bool indexDocuments()
Run the indexer.
bool scanForFiles()
Scan the caption & contents directories for help files.
std::set< OUString > d_files
static lucene::util::Reader * helpFileReader(OUString const &path)
Create a reader for the given file, and create an "empty" reader in case the file doesn't exist.
HelpIndexer(OUString lang, OUString module, std::u16string_view srcDir, std::u16string_view outDir)
void helpDocument(OUString const &fileName, lucene::document::Document *doc) const
Fill the Document with information on the given help file.
static std::string encode(const std::string &rIn)
OUString runtimeToOUString(char const *runtimeString)
std::basic_string_view< charT, traits > getToken(std::basic_string_view< charT, traits > sv, charT delimiter, std::size_t &position)
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)