LibreOffice Module lingucomponent (master) 1
languagetoolimp.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <sal/config.h>
21
22#include <config_version.h>
23
26#include <cppuhelper/weak.hxx>
27#include "languagetoolimp.hxx"
28
30#include <svtools/strings.hrc>
31#include <unotools/resmgr.hxx>
32
33#include <vector>
34#include <set>
35#include <string.h>
36
37#include <officecfg/Office/Linguistic.hxx>
38
39#include <curl/curl.h>
40#include <boost/property_tree/ptree.hpp>
41#include <boost/property_tree/json_parser.hpp>
42#include <algorithm>
43#include <string_view>
44#include <sal/log.hxx>
45#include <tools/color.hxx>
46#include <tools/long.hxx>
47#include <com/sun/star/text/TextMarkupType.hpp>
48#include <com/sun/star/uno/Any.hxx>
50#include <unotools/lingucfg.hxx>
51#include <osl/mutex.hxx>
52#include <rtl/uri.hxx>
53
54using namespace com::sun::star;
55using namespace com::sun::star::beans;
56using namespace com::sun::star::lang;
57using namespace com::sun::star::linguistic2;
58
59namespace
60{
61constexpr size_t MAX_SUGGESTIONS_SIZE = 10;
62using LanguageToolCfg = officecfg::Office::Linguistic::GrammarChecking::LanguageTool;
63
64PropertyValue lcl_GetLineColorPropertyFromErrorId(const std::string& rErrorId)
65{
66 Color aColor;
67 if (rErrorId == "TYPOS" || rErrorId == "orth")
68 {
69 aColor = COL_LIGHTRED;
70 }
71 else if (rErrorId == "STYLE")
72 {
73 aColor = COL_LIGHTBLUE;
74 }
75 else
76 {
77 // Same color is used for other errorId's such as GRAMMAR, TYPOGRAPHY..
78 constexpr Color COL_ORANGE(0xD1, 0x68, 0x20);
79 aColor = COL_ORANGE;
80 }
81 return comphelper::makePropertyValue("LineColor", aColor);
82}
83
84OString encodeTextForLanguageTool(const OUString& text)
85{
86 // Let's be a bit conservative. I don't find a good description what needs encoding (and in
87 // which way) at https://languagetool.org/http-api/; the "Try it out!" function shows that
88 // different cases are handled differently by the demo; some percent-encode the UTF-8
89 // representation, like %D0%90 (for cyrillic А); some turn into entities like &#33; (for
90 // exclamation mark !); some other to things like \u0027 (for apostrophe ').
91 static constexpr auto myCharClass = rtl::createUriCharClass(
92 u8"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
93 return OUStringToOString(
94 rtl::Uri::encode(text, myCharClass.data(), rtl_UriEncodeStrict, RTL_TEXTENCODING_UTF8),
95 RTL_TEXTENCODING_ASCII_US);
96}
97
98// Callback to get the response data from server.
99size_t WriteCallback(void* ptr, size_t size, size_t nmemb, void* userp)
100{
101 if (!userp)
102 return 0;
103
104 std::string* response = static_cast<std::string*>(userp);
105 size_t real_size = size * nmemb;
106 response->append(static_cast<char*>(ptr), real_size);
107 return real_size;
108}
109
110enum class HTTP_METHOD
111{
112 HTTP_GET,
113 HTTP_POST
114};
115
116std::string makeHttpRequest_impl(std::u16string_view aURL, HTTP_METHOD method,
117 const OString& aPostData, curl_slist* pHttpHeader,
118 tools::Long& nStatusCode)
119{
120 struct curl_cleanup_t
121 {
122 void operator()(CURL* p) const { curl_easy_cleanup(p); }
123 };
124 std::unique_ptr<CURL, curl_cleanup_t> curl(curl_easy_init());
125 if (!curl)
126 {
127 SAL_WARN("languagetool", "CURL initialization failed");
128 return {}; // empty string
129 }
130
131 // Same useragent string as in CurlSession (ucp/webdav-curl/CurlSession.cxx)
132 curl_version_info_data const* const pVersion(curl_version_info(CURLVERSION_NOW));
133 assert(pVersion);
134 OString const useragent(
135 OString::Concat("LibreOffice " LIBO_VERSION_DOTTED " denylistedbackend/")
136 + pVersion->version + " " + pVersion->ssl_version);
137 (void)curl_easy_setopt(curl.get(), CURLOPT_USERAGENT, useragent.getStr());
138
139 OString aURL8 = OUStringToOString(aURL, RTL_TEXTENCODING_UTF8);
140 (void)curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, pHttpHeader);
141 (void)curl_easy_setopt(curl.get(), CURLOPT_FAILONERROR, 1L);
142 (void)curl_easy_setopt(curl.get(), CURLOPT_URL, aURL8.getStr());
143 (void)curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, 10L);
144 // (void)curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
145
146 std::string response_body;
147 (void)curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallback);
148 (void)curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_body);
149
150 // allow unknown or self-signed certificates
151 if (!LanguageToolCfg::SSLCertVerify::get())
152 {
153 (void)curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false);
154 (void)curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYHOST, false);
155 }
156
157 if (method == HTTP_METHOD::HTTP_POST)
158 {
159 (void)curl_easy_setopt(curl.get(), CURLOPT_POST, 1L);
160 (void)curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, aPostData.getStr());
161 }
162
163 CURLcode cc = curl_easy_perform(curl.get());
164 if (cc != CURLE_OK)
165 {
166 SAL_WARN("languagetool",
167 "CURL request returned with error: " << static_cast<sal_Int32>(cc));
168 }
169
170 curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &nStatusCode);
171 return response_body;
172}
173
174std::string makeDudenHttpRequest(std::u16string_view aURL, const OString& aPostData,
175 tools::Long& nStatusCode)
176{
177 struct curl_slist* pList = nullptr;
178 OString sAccessToken
179 = OUStringToOString(LanguageToolCfg::ApiKey::get().value_or(""), RTL_TEXTENCODING_UTF8);
180
181 pList = curl_slist_append(pList, "Cache-Control: no-cache");
182 pList = curl_slist_append(pList, "Content-Type: application/json");
183 if (!sAccessToken.isEmpty())
184 {
185 sAccessToken = "access_token: " + sAccessToken;
186 pList = curl_slist_append(pList, sAccessToken.getStr());
187 }
188
189 return makeHttpRequest_impl(aURL, HTTP_METHOD::HTTP_POST, aPostData, pList, nStatusCode);
190}
191
192std::string makeHttpRequest(std::u16string_view aURL, HTTP_METHOD method, const OString& aPostData,
193 tools::Long& nStatusCode)
194{
195 OString realPostData(aPostData);
196 if (method == HTTP_METHOD::HTTP_POST)
197 {
198 OString apiKey
199 = OUStringToOString(LanguageToolCfg::ApiKey::get().value_or(""), RTL_TEXTENCODING_UTF8);
200 OString username = OUStringToOString(LanguageToolCfg::Username::get().value_or(""),
201 RTL_TEXTENCODING_UTF8);
202 if (!apiKey.isEmpty() && !username.isEmpty())
203 realPostData += "&username=" + username + "&apiKey=" + apiKey;
204 }
205
206 return makeHttpRequest_impl(aURL, method, realPostData, nullptr, nStatusCode);
207}
208
209template <typename Func>
210uno::Sequence<SingleProofreadingError> parseJson(std::string&& json, std::string path, Func f)
211{
212 std::stringstream aStream(std::move(json)); // Optimized in C++20
213 boost::property_tree::ptree aRoot;
214 boost::property_tree::read_json(aStream, aRoot);
215
216 if (auto tree = aRoot.get_child_optional(path))
217 {
218 uno::Sequence<SingleProofreadingError> aErrors(tree->size());
219 auto it = tree->begin();
220 for (auto& rError : asNonConstRange(aErrors))
221 {
222 f(it->second, rError);
223 it++;
224 }
225 return aErrors;
226 }
227 return {};
228}
229
230void parseDudenResponse(ProofreadingResult& rResult, std::string&& aJSONBody)
231{
232 rResult.aErrors = parseJson(
233 std::move(aJSONBody), "check-positions",
234 [](const boost::property_tree::ptree& rPos, SingleProofreadingError& rError) {
235 rError.nErrorStart = rPos.get<int>("offset", 0);
236 rError.nErrorLength = rPos.get<int>("length", 0);
237 rError.nErrorType = text::TextMarkupType::PROOFREADING;
238 //rError.aShortComment = ??
239 //rError.aFullComment = ??
240 const std::string sType = rPos.get<std::string>("type", {});
241 rError.aProperties = { lcl_GetLineColorPropertyFromErrorId(sType) };
242
243 const auto proposals = rPos.get_child_optional("proposals");
244 if (!proposals)
245 return;
246 rError.aSuggestions.realloc(std::min(proposals->size(), MAX_SUGGESTIONS_SIZE));
247 auto itProp = proposals->begin();
248 for (auto& rSuggestion : asNonConstRange(rError.aSuggestions))
249 {
250 rSuggestion = OStringToOUString(itProp->second.data(), RTL_TEXTENCODING_UTF8);
251 itProp++;
252 }
253 });
254}
255
256/*
257 rResult is both input and output
258 aJSONBody is the response body from the HTTP Request to LanguageTool API
259*/
260void parseProofreadingJSONResponse(ProofreadingResult& rResult, std::string&& aJSONBody)
261{
262 rResult.aErrors = parseJson(
263 std::move(aJSONBody), "matches",
264 [](const boost::property_tree::ptree& match, SingleProofreadingError& rError) {
265 rError.nErrorStart = match.get<int>("offset", 0);
266 rError.nErrorLength = match.get<int>("length", 0);
267 rError.nErrorType = text::TextMarkupType::PROOFREADING;
268 const std::string shortMessage = match.get<std::string>("message", {});
269 const std::string message = match.get<std::string>("shortMessage", {});
270
271 rError.aShortComment = OStringToOUString(shortMessage, RTL_TEXTENCODING_UTF8);
272 rError.aFullComment = OStringToOUString(message, RTL_TEXTENCODING_UTF8);
273
274 // Parse the error category for Line Color
275 std::string errorCategoryId;
276 if (auto rule = match.get_child_optional("rule"))
277 if (auto ruleCategory = rule->get_child_optional("category"))
278 errorCategoryId = ruleCategory->get<std::string>("id", {});
279 rError.aProperties = { lcl_GetLineColorPropertyFromErrorId(errorCategoryId) };
280
281 const auto replacements = match.get_child_optional("replacements");
282 if (!replacements)
283 return;
284 // Limit suggestions to avoid crash on context menu popup:
285 // (soffice:17251): Gdk-CRITICAL **: 17:00:21.277: ../../../../../gdk/wayland/gdkdisplay-wayland.c:1399: Unable to create Cairo image
286 // surface: invalid value (typically too big) for the size of the input (surface, pattern, etc.)
287 rError.aSuggestions.realloc(std::min(replacements->size(), MAX_SUGGESTIONS_SIZE));
288 auto itRep = replacements->begin();
289 for (auto& rSuggestion : asNonConstRange(rError.aSuggestions))
290 {
291 std::string replacementStr = itRep->second.get<std::string>("value", {});
292 rSuggestion = OStringToOUString(replacementStr, RTL_TEXTENCODING_UTF8);
293 itRep++;
294 }
295 });
296}
297
298OUString getLocaleListURL()
299{
300 if (auto oURL = LanguageToolCfg::BaseURL::get())
301 if (!oURL->isEmpty())
302 return *oURL + "/languages";
303 return {};
304}
305
306OUString getCheckerURL()
307{
308 if (auto oURL = LanguageToolCfg::BaseURL::get())
309 if (!oURL->isEmpty())
310 return *oURL + "/check";
311 return {};
312}
313}
314
316 : mCachedResults(10)
317{
318}
319
321
323
324sal_Bool SAL_CALL LanguageToolGrammarChecker::hasLocale(const Locale& rLocale)
325{
326 if (!m_aSuppLocales.hasElements())
327 getLocales();
328
329 for (auto const& suppLocale : std::as_const(m_aSuppLocales))
330 if (rLocale == suppLocale)
331 return true;
332
333 SAL_INFO("languagetool", "No locale \"" << LanguageTag::convertToBcp47(rLocale, false) << "\"");
334 return false;
335}
336
337uno::Sequence<Locale> SAL_CALL LanguageToolGrammarChecker::getLocales()
338{
339 if (m_aSuppLocales.hasElements())
340 return m_aSuppLocales;
341 if (!LanguageToolCfg::IsEnabled::get())
342 {
343 return m_aSuppLocales;
344 }
345
346 OUString localeUrl = getLocaleListURL();
347 if (localeUrl.isEmpty())
348 {
349 return m_aSuppLocales;
350 }
351 tools::Long statusCode = 0;
352 std::string response = makeHttpRequest(localeUrl, HTTP_METHOD::HTTP_GET, OString(), statusCode);
353 if (statusCode != 200)
354 {
355 return m_aSuppLocales;
356 }
357 if (response.empty())
358 {
359 return m_aSuppLocales;
360 }
361 boost::property_tree::ptree root;
362 std::stringstream aStream(response);
363 boost::property_tree::read_json(aStream, root);
364
365 size_t length = root.size();
366 m_aSuppLocales.realloc(length);
367 auto pArray = m_aSuppLocales.getArray();
368 int i = 0;
369 for (auto it = root.begin(); it != root.end(); it++, i++)
370 {
371 boost::property_tree::ptree& localeItem = it->second;
372 const std::string longCode = localeItem.get<std::string>("longCode");
374 OUString(longCode.c_str(), longCode.length(), RTL_TEXTENCODING_UTF8));
375 pArray[i] = aLocale;
376 }
377 return m_aSuppLocales;
378}
379
381 const OUString& aDocumentIdentifier, const OUString& aText, const Locale& aLocale,
382 sal_Int32 nStartOfSentencePosition, sal_Int32 nSuggestedBehindEndOfSentencePosition,
383 const uno::Sequence<PropertyValue>& aProperties)
384{
385 // ProofreadingResult declared here instead of parseHttpJSONResponse because of the early exists.
386 ProofreadingResult xRes;
387 xRes.aDocumentIdentifier = aDocumentIdentifier;
388 xRes.aText = aText;
389 xRes.aLocale = aLocale;
390 xRes.nStartOfSentencePosition = nStartOfSentencePosition;
391 xRes.nBehindEndOfSentencePosition = nSuggestedBehindEndOfSentencePosition;
392 xRes.aProperties = {};
393 xRes.xProofreader = this;
394 xRes.aErrors = {};
395
396 if (aText.isEmpty())
397 {
398 return xRes;
399 }
400
401 if (nStartOfSentencePosition != 0)
402 {
403 return xRes;
404 }
405
406 xRes.nStartOfNextSentencePosition = aText.getLength();
407
408 if (!LanguageToolCfg::IsEnabled::get())
409 {
410 return xRes;
411 }
412
413 OUString checkerURL = getCheckerURL();
414 if (checkerURL.isEmpty())
415 {
416 return xRes;
417 }
418
419 if (aProperties.getLength() > 0 && aProperties[0].Name == "Update")
420 {
421 // locale changed
422 xRes.aText = "";
423 return xRes;
424 }
425
426 sal_Int32 spaceIndex = std::min(xRes.nStartOfNextSentencePosition, aText.getLength() - 1);
427 while (spaceIndex < aText.getLength() && aText[spaceIndex] == ' ')
428 {
429 xRes.nStartOfNextSentencePosition += 1;
430 spaceIndex = xRes.nStartOfNextSentencePosition;
431 }
432 if (xRes.nStartOfNextSentencePosition == nSuggestedBehindEndOfSentencePosition
433 && spaceIndex < aText.getLength())
434 {
435 xRes.nStartOfNextSentencePosition
436 = std::min(nSuggestedBehindEndOfSentencePosition + 1, aText.getLength());
437 }
438 xRes.nBehindEndOfSentencePosition
439 = std::min(xRes.nStartOfNextSentencePosition, aText.getLength());
440
441 OString langTag(LanguageTag::convertToBcp47(aLocale, false).toUtf8());
442 OString postData = encodeTextForLanguageTool(aText);
443 const bool bDudenProtocol = LanguageToolCfg::RestProtocol::get().value_or("") == "duden";
444 if (bDudenProtocol)
445 {
446 std::stringstream aStream;
447 boost::property_tree::ptree aTree;
448 aTree.put("text-language", langTag.getStr());
449 aTree.put("text", postData.getStr());
450 aTree.put("hyphenation", false);
451 aTree.put("spellchecking-level", 3);
452 aTree.put("correction-proposals", true);
453 boost::property_tree::write_json(aStream, aTree);
454 postData = OString(aStream.str());
455 }
456 else
457 {
458 postData = "text=" + postData + "&language=" + langTag;
459 }
460
461 if (auto cachedResult = mCachedResults.find(postData); cachedResult != mCachedResults.end())
462 {
463 xRes.aErrors = cachedResult->second;
464 return xRes;
465 }
466
467 tools::Long http_code = 0;
468 std::string response_body;
469 if (bDudenProtocol)
470 response_body = makeDudenHttpRequest(checkerURL, postData, http_code);
471 else
472 response_body = makeHttpRequest(checkerURL, HTTP_METHOD::HTTP_POST, postData, http_code);
473
474 if (http_code != 200)
475 {
476 return xRes;
477 }
478
479 if (response_body.length() <= 0)
480 {
481 return xRes;
482 }
483
484 if (bDudenProtocol)
485 {
486 parseDudenResponse(xRes, std::move(response_body));
487 }
488 else
489 {
490 parseProofreadingJSONResponse(xRes, std::move(response_body));
491 }
492 // cache the result
493 mCachedResults.insert(std::make_pair(postData, xRes.aErrors));
494 return xRes;
495}
496
497void SAL_CALL LanguageToolGrammarChecker::ignoreRule(const OUString& /*aRuleIdentifier*/,
498 const Locale& /*aLocale*/
499)
500{
501}
503
504OUString SAL_CALL LanguageToolGrammarChecker::getServiceDisplayName(const Locale& rLocale)
505{
506 std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
507 return Translate::get(STR_DESCRIPTION_LANGUAGETOOL, loc);
508}
509
511{
512 return "org.openoffice.lingu.LanguageToolGrammarChecker";
513}
514
515sal_Bool SAL_CALL LanguageToolGrammarChecker::supportsService(const OUString& ServiceName)
516{
518}
519
521{
522 return { SN_GRAMMARCHECKER };
523}
524
525void SAL_CALL LanguageToolGrammarChecker::initialize(const uno::Sequence<uno::Any>&) {}
526
527extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
529 css::uno::XComponentContext*, css::uno::Sequence<css::uno::Any> const&)
530{
531 return cppu::acquire(new LanguageToolGrammarChecker());
532}
533
534/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
OptionalString sType
PropertiesInfo aProperties
static css::lang::Locale convertToLocale(LanguageType nLangID, bool bResolveSystem=true)
static OUString convertToBcp47(LanguageType nLangID)
virtual sal_Bool SAL_CALL supportsService(const OUString &rServiceName) override
virtual sal_Bool SAL_CALL hasLocale(const css::lang::Locale &rLocale) override
o3tl::lru_map< OString, css::uno::Sequence< css::linguistic2::SingleProofreadingError > > mCachedResults
virtual OUString SAL_CALL getImplementationName() override
virtual sal_Bool SAL_CALL isSpellChecker() override
virtual css::linguistic2::ProofreadingResult SAL_CALL doProofreading(const OUString &aDocumentIdentifier, const OUString &aText, const css::lang::Locale &aLocale, sal_Int32 nStartOfSentencePosition, sal_Int32 nSuggestedBehindEndOfSentencePosition, const css::uno::Sequence< css::beans::PropertyValue > &aProperties) override
virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getLocales() override
virtual void SAL_CALL initialize(const css::uno::Sequence< css::uno::Any > &rArguments) override
virtual OUString SAL_CALL getServiceDisplayName(const css::lang::Locale &rLocale) override
virtual void SAL_CALL resetIgnoreRules() override
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
virtual ~LanguageToolGrammarChecker() override
virtual void SAL_CALL ignoreRule(const OUString &aRuleIdentifier, const css::lang::Locale &aLocale) override
css::uno::Sequence< css::lang::Locale > m_aSuppLocales
void insert(key_value_pair_t &rPair)
list_const_iterator_t end() const
list_const_iterator_t find(const Key &key)
constexpr ::Color COL_LIGHTRED(0xFF, 0x00, 0x00)
constexpr ::Color COL_LIGHTBLUE(0x00, 0x00, 0xFF)
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * lingucomponent_LanguageToolGrammarChecker_get_implementation(css::uno::XComponentContext *, css::uno::Sequence< css::uno::Any > const &)
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
constexpr OUStringLiteral SN_GRAMMARCHECKER
std::locale Create(std::string_view aPrefixName, const LanguageTag &rLocale)
OUString get(TranslateId sContextAndId, const std::locale &loc)
size
css::beans::PropertyValue makePropertyValue(const OUString &rName, T &&rValue)
bool match(const sal_Unicode *pWild, const sal_Unicode *pStr, const sal_Unicode cEscape)
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
int i
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
long Long
officecfg::Office::Linguistic::GrammarChecking::LanguageTool LanguageToolCfg
unsigned char sal_Bool