LibreOffice Module unotools (master) 1
fontdefs.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <o3tl/safeint.hxx>
21#include <o3tl/string_view.hxx>
22#include <unotools/fontdefs.hxx>
23#include <unotools/fontcfg.hxx>
24#include <rtl/ustrbuf.hxx>
25
26#include <string_view>
27#include <unordered_map>
28
29OUString StripScriptFromName(const OUString& _aName)
30{
31 // I worry that someone will have a font which *does* have
32 // e.g. "Greek" legitimately at the end of its name :-(
33 const char*const suffixes[] = { " baltic",
34 " ce",
35 " cyr",
36 " greek",
37 " tur",
38 " (arabic)",
39 " (hebrew)",
40 " (thai)",
41 " (vietnamese)"
42 };
43
44 OUString aName = _aName;
45 // These can be crazily piled up, e.g. Times New Roman CYR Greek
46 bool bFinished = false;
47 while (!bFinished)
48 {
49 bFinished = true;
50 for (const char* suffix : suffixes)
51 {
52 size_t nLen = strlen(suffix);
53 if (aName.endsWithIgnoreAsciiCaseAsciiL(suffix, nLen))
54 {
55 bFinished = false;
56 aName = aName.copy(0, aName.getLength() - nLen);
57 }
58 }
59 }
60 return aName;
61}
62
63//return true if the character is stripped from the string
64static bool toOnlyLowerAsciiOrStrip(sal_Unicode c, OUStringBuffer &rName, sal_Int32 nIndex, sal_Int32& rLen)
65{
66 // not lowercase Ascii
67 if (c < 'a' || c > 'z')
68 {
69 // To Lowercase-Ascii
70 if ( (c >= 'A') && (c <= 'Z') )
71 {
72 c += 'a' - 'A';
73 rName[nIndex] = c;
74 }
75 else if( ((c < '0') || (c > '9')) && (c != ';') && (c != '(') && (c != ')') ) // not 0-9, semicolon, or brackets
76 {
77 // Remove white spaces and special characters
78 rName.remove(nIndex, 1);
79 rLen--;
80 return true;
81 }
82 }
83 return false;
84}
85
86OUString GetEnglishSearchFontName(std::u16string_view rInName)
87{
88 OUStringBuffer rName(rInName);
89 bool bNeedTranslation = false;
90 sal_Int32 nLen = rName.getLength();
91
92 // Remove trailing whitespaces
93 sal_Int32 i = nLen;
94 while ( i && (rName[ i-1 ] < 32) )
95 i--;
96 if ( i != nLen )
97 rName.truncate(i);
98
99 nLen = rName.getLength();
100
101 // remove all whitespaces and converts to lower case ASCII
102 // TODO: better transliteration to ASCII e.g. all digits
103 i = 0;
104 while ( i < nLen )
105 {
106 sal_Unicode c = rName[ i ];
107 if ( c > 127 )
108 {
109 // Translate to Lowercase-ASCII
110 // FullWidth-ASCII to half ASCII
111 if ( (c >= 0xFF00) && (c <= 0xFF5E) )
112 {
113 c -= 0xFF00-0x0020;
114 rName[ i ] = c;
115 if (toOnlyLowerAsciiOrStrip(c, rName, i, nLen))
116 continue;
117 }
118 else
119 {
120 // Only Fontnames with None-Ascii-Characters must be translated
121 bNeedTranslation = true;
122 }
123 }
124 else if (toOnlyLowerAsciiOrStrip(c, rName, i, nLen))
125 continue;
126
127 i++;
128 }
129 OUString rNameStr = rName.makeStringAndClear();
130 // translate normalized localized name to its normalized English ASCII name
131 if( bNeedTranslation )
132 {
133 typedef std::unordered_map<OUString, OUString> FontNameDictionary;
134 static FontNameDictionary const aDictionary = {
135 {u"\uBC14\uD0D5", "batang"},
136 {u"\uBC14\uD0D5\uCCB4", "batangche"},
137 {u"\uAD81\uC11C", "gungshu"},
138 {u"\uAD81\uC11C\uCCB4", "gungshuche"},
139 {u"\uAD74\uB9BC", "gulim"},
140 {u"\uAD74\uB9BC\uCCB4", "gulimche"},
141 {u"\uB3CB\uC6C0", "dotum"},
142 {u"\uB3CB\uC6C0\uCCB4", "dotumche"},
143 {u"\u5B8B\u4F53", "simsun"},
144 {u"\u65B0\u5B8B\u4F53", "nsimsun"},
145 {u"\u9ED1\u4F53", "simhei"},
146 {u"\u6977\u4F53", "simkai"},
147 {u"\u4E2D\u6613\u5B8B\u4F53", "zycjksun"},
148 {u"\u4E2D\u6613\u9ED1\u4F53", "zycjkhei"},
149 {u"\u4E2D\u6613\u6977\u4F53", "zycjkkai"},
150 {u"\u65B9\u6B63\u9ED1\u4F53", "fzhei"},
151 {u"\u65B9\u6B63\u6977\u4F53", "fzkai"},
152 {u"\u65B9\u6B63\u5B8B\u4E00", "fzsong"},
153 {u"\u65B9\u6B63\u4E66\u5B8B", "fzshusong"},
154 {u"\u65B9\u6B63\u4EFF\u5B8B", "fzfangsong"},
155 // Attention: this fonts includes the wrong encoding vector - so we double the names with correct and wrong encoding
156 // First one is the GB-Encoding (we think the correct one), second is the big5 encoded name
157 {u"m\u7B80\u9ED1", "mhei"},
158 {u"m\u6F60\u7AAA", "mhei"},
159 {u"m\u7B80\u6977\u566C", "mkai"},
160 {u"m\u6F60\u7FF1\u628E", "mkai"},
161 {u"m\u7B80\u5B8B", "msong"},
162 {u"m\u6F60\u51BC", "msong"},
163 {u"m\u7B80\u592B\u5B8B", "cfangsong"},
164 {u"m\u6F60\u6E98\u51BC", "cfangsong"},
165 {u"\u7D30\u660E\u9AD4", "mingliu"},
166 {u"\u65B0\u7D30\u660E\u9AD4", "pmingliu"},
167 {u"\u6865", "hei"},
168 {u"\u6B61", "kai"},
169 {u"\u6D69\u6E67", "ming"},
170 {u"ms\u30B4\u30B7\u30C3\u30AF", "msgothic"},
171 {u"msp\u30B4\u30B7\u30C3\u30AF", "mspgothic"},
172 {u"ms\u660E\u671D", "msmincho"},
173 {u"msp\u660E\u671D", "mspmincho"},
174 {u"\u5FAE\u8EDF\u6B63\u9ED1\u9AD4", "microsoftjhenghei"},
175 {u"\u5FAE\u8F6F\u96C5\u9ED1", "microsoftyahei"},
176 {u"\u30e1\u30a4\u30ea\u30aa", "meiryo"},
177 {u"hg\u660E\u671Dl", "hgminchol"},
178 {u"hg\u30B4\u30B7\u30C3\u30AFb", "hggothicb"},
179 {u"hgp\u660E\u671Dl", "hgpminchol"},
180 {u"hgp\u30B4\u30B7\u30C3\u30AFb", "hgpgothicb"},
181 {u"hg\u660E\u671Dlsun", "hgmincholsun"},
182 {u"hg\u30B4\u30B7\u30C3\u30AFbsun", "hggothicbsun"},
183 {u"hgp\u660E\u671Dlsun", "hgpmincholsun"},
184 {u"hgp\u30B4\u30B7\u30C3\u30AFbsun", "hgpgothicbsun"},
185 {u"hg\u5E73\u6210\u660E\u671D\u4F53", "hgheiseimin"},
186 {u"hg\u5E73\u6210\u660E\u671D\u4F53w3x12", "hgheiseimin"},
187 {u"ipa\u660E\u671D", "ipamincho"},
188 {u"ipap\u660E\u671D", "ipapmincho"},
189 {u"ipa\u30B4\u30B7\u30C3\u30AF", "ipagothic"},
190 {u"ipap\u30B4\u30B7\u30C3\u30AF", "ipapgothic"},
191 {u"ipaui\u30B4\u30B7\u30C3\u30AF", "ipauigothic"},
192 {u"takao\u660E\u671D", "takaomincho"},
193 {u"takaop\u660E\u671D", "takaopmincho"},
194 {u"takao\u30B4\u30B7\u30C3\u30AF", "takaogothic"},
195 {u"takaop\u30B4\u30B7\u30C3\u30AF", "takaopgothic"},
196 {u"\u3055\u3056\u306A\u307F\u660E\u671D", "sazanamimincho"},
197 {u"\u3055\u3056\u306A\u307F\u30B4\u30B7\u30C3\u30AF", "sazanamigothic"},
198 {u"\u6771\u98A8\u660E\u671D", "kochimincho"},
199 {u"\u6771\u98A8\u30B4\u30B7\u30C3\u30AF", "kochigothic"},
200 {u"\uC36C\uB3CB\uC6C0", "sundotum"},
201 {u"\uC36C\uAD74\uB9BC", "sungulim"},
202 {u"\uC36C\uBC14\uD0D5", "sunbatang"},
203 {u"\uBC31\uBB35\uB3CB\uC6C0", "baekmukdotum"},
204 {u"\uBC31\uBB35\uAD74\uB9BC", "baekmukgulim"},
205 {u"\uBC31\uBB35\uBC14\uD0D5", "baekmukbatang"},
206 {u"\u65B9\u6B63\u9ED1\u4F53", "fzheiti"},
207 {u"\u65B9\u6B63\u9ED1\u9AD4", "fzheiti"},
208 {u"\u65B9\u6B63\u6977\u4F53", "fzkaiti"},
209 {u"\u65B9\u6B63\u6977\u9AD4", "fzkaitib"},
210 {u"\u65B9\u6B63\u660E\u9AD4", "fzmingtib"},
211 {u"\u65B9\u6B63\u5B8B\u4F53", "fzsongti"},
212 {u"hy\uACAC\uBA85\uC870", "hymyeongjoextra"},
213 {u"hy\uC2E0\uBA85\uC870", "hysinmyeongjomedium"},
214 {u"hy\uC911\uACE0\uB515", "hygothicmedium"},
215 {u"hy\uADF8\uB798\uD53Dm", "hygraphicmedium"},
216 {u"hy\uADF8\uB798\uD53D", "hygraphic"},
217 {u"\uC0C8\uAD74\uB9BC", "newgulim"},
218 {u"\uC36C\uAD81\uC11C", "sungungseo"},
219 {u"hy\uAD81\uC11Cb", "hygungsobold"},
220 {u"hy\uAD81\uC11C", "hygungso"},
221 {u"\uC36C\uD5E4\uB4DC\uB77C\uC778", "sunheadline"},
222 {u"hy\uD5E4\uB4DC\uB77C\uC778m", "hyheadlinemedium"},
223 {u"hy\uD5E4\uB4DC\uB77C\uC778", "hyheadline"},
224 {u"\uD734\uBA3C\uC61B\uCCB4", "yetr"},
225 {u"hy\uACAC\uACE0\uB515", "hygothicextra"},
226 {u"\uC36C\uBAA9\uD310", "sunmokpan"},
227 {u"\uC36C\uC5FD\uC11C", "sunyeopseo"},
228 {u"\uC36C\uBC31\uC1A1", "sunbaeksong"},
229 {u"hy\uC5FD\uC11Cl", "hypostlight"},
230 {u"hy\uC5FD\uC11C", "hypost"},
231 {u"\uD734\uBA3C\uB9E4\uC9C1\uCCB4", "magicr"},
232 {u"\uC36C\uD06C\uB9AC\uC2A4\uD0C8", "suncrystal"},
233 {u"\uC36C\uC0D8\uBB3C", "sunsaemmul"},
234 {u"hy\uC595\uC740\uC0D8\uBB3Cm", "hyshortsamulmedium"},
235 {u"hy\uC595\uC740\uC0D8\uBB3C", "hyshortsamul"},
236 {u"\uD55C\uCEF4\uBC14\uD0D5", "haansoftbatang"},
237 {u"\uD55C\uCEF4\uB3CB\uC6C0", "haansoftdotum"},
238 {u"\uD55C\uC591\uD574\uC11C", "hyhaeseo"},
239 {u"md\uC194\uCCB4", "mdsol"},
240 {u"md\uAC1C\uC131\uCCB4", "mdgaesung"},
241 {u"md\uC544\uD2B8\uCCB4", "mdart"},
242 {u"md\uC544\uB871\uCCB4", "mdalong"},
243 {u"md\uC774\uC19D\uCCB4", "mdeasop"},
244 {u"hg\uFF7A\uFF9E\uFF7C\uFF6F\uFF78e", "hggothice"},
245 {u"hgp\uFF7A\uFF9E\uFF7C\uFF6F\uFF78e", "hgpgothice"},
246 {u"hgs\uFF7A\uFF9E\uFF7C\uFF6F\uFF78e", "hgsgothice"},
247 {u"hg\uFF7A\uFF9E\uFF7C\uFF6F\uFF78m", "hggothicm"},
248 {u"hgp\uFF7A\uFF9E\uFF7C\uFF6F\uFF78m", "hgpgothicm"},
249 {u"hgs\uFF7A\uFF9E\uFF7C\uFF6F\uFF78m", "hgsgothicm"},
250 {u"hg\u884C\u66F8\u4F53", "hggyoshotai"},
251 {u"hgp\u884C\u66F8\u4F53", "hgpgyoshotai"},
252 {u"hgs\u884C\u66F8\u4F53", "hgsgyoshotai"},
253 {u"hg\u6559\u79D1\u66F8\u4F53", "hgkyokashotai"},
254 {u"hgp\u6559\u79D1\u66F8\u4F53", "hgpkyokashotai"},
255 {u"hgs\u6559\u79D1\u66F8\u4F53", "hgskyokashotai"},
256 {u"hg\u660E\u671Db", "hgminchob"},
257 {u"hgp\u660E\u671Db", "hgpminchob"},
258 {u"hgs\u660E\u671Db", "hgsminchob"},
259 {u"hg\u660E\u671De", "hgminchoe"},
260 {u"hgp\u660E\u671De", "hgpminchoe"},
261 {u"hgs\u660E\u671De", "hgsminchoe"},
262 {u"hg\u5275\u82F1\u89D2\uFF8E\uFF9F\uFF6F\uFF8C\uFF9F\u4F53", "hgsoeikakupoptai"},
263 {u"hgp\u5275\u82F1\u89D2\uFF8E\uFF9F\uFF6F\uFF8C\uFF9F\u4F53", "hgpsoeikakupopta"},
264 {u"hgs\u5275\u82F1\u89D2\uFF8E\uFF9F\uFF6F\uFF8C\uFF9F\u4F53", "hgssoeikakupopta"},
265 {u"hg\u5275\u82F1\uFF8C\uFF9F\uFF9A\uFF7E\uFF9E\uFF9D\uFF7Deb", "hgsoeipresenceeb"},
266 {u"hgp\u5275\u82F1\uFF8C\uFF9F\uFF9A\uFF7E\uFF9E\uFF9D\uFF7Deb", "hgpsoeipresenceeb"},
267 {u"hgs\u5275\u82F1\uFF8C\uFF9F\uFF9A\uFF7E\uFF9E\uFF9D\uFF7Deb", "hgssoeipresenceeb"},
268 {u"hg\u5275\u82F1\u89D2\uFF7A\uFF9E\uFF7C\uFF6F\uFF78ub", "hgsoeikakugothicub"},
269 {u"hgp\u5275\u82F1\u89D2\uFF7A\uFF9E\uFF7C\uFF6F\uFF78ub", "hgpsoeikakugothicub"},
270 {u"hgs\u5275\u82F1\u89D2\uFF7A\uFF9E\uFF7C\uFF6F\uFF78ub", "hgssoeikakugothicub"},
271 {u"hg\u6B63\u6977\u66F8\u4F53-pro", "hgseikaishotaipro"},
272 {u"hg\u4E38\uFF7A\uFF9E\uFF7C\uFF6F\uFF78-pro", "hgmarugothicmpro"},
273 {u"\u30D2\u30E9\u30AE\u30CE\u660E\u671Dpro", "hiraginominchopro"},
274 {u"\u30D2\u30E9\u30AE\u30CE\u660E\u671Dpron", "hiraginominchopron"},
275 {u"\u30D2\u30E9\u30AE\u30CE\u89D2\u30B4\u30B7\u30C3\u30AF", "hiraginosans"},
276 {u"\u30D2\u30E9\u30AE\u30CE\u89D2\u30B4pro", "hiraginokakugothicpro"},
277 {u"\u30D2\u30E9\u30AE\u30CE\u89D2\u30B4pron", "hiraginokakugothicpron"},
278 {u"\u30D2\u30E9\u30AE\u30CE\u4E38\u30B4pro", "hiraginomarugothicpro"},
279 {u"\u30D2\u30E9\u30AE\u30CE\u4E38\u30B4pron", "hiraginomarugothicpron"},
280 {u"\u6E38\u30B4\u30B7\u30C3\u30AF", "yugothic"},
281 {u"\u6E38\u30B4\u30B7\u30C3\u30AF\u4F53", "yugothictai"},
282 {u"\u6E38\u660E\u671D", "yumincho"},
283 {u"\u6E38\u660E\u671D\u4F53", "yuminchotai"},
284 {u"\u6E90\u30CE\u89D2\u30B4\u30B7\u30C3\u30AF", "sourcehansans"},
285 {u"\u6E90\u30CE\u89D2\u30B4\u30B7\u30C3\u30AFjp", "sourcehansansjp"},
286 {u"\u6E90\u30CE\u89D2\u30B4\u30B7\u30C3\u30AFhw", "sourcehansanshw"},
287 {u"\u6E90\u30CE\u660E\u671D", "sourcehanserif"},
288 {u"\u6E90\u30CE\u660E\u671Djp", "sourcehanserifjp"},
289 {u"ipamj\u660E\u671D", "ipamjmincho"},
290 {u"ipaex\u30B4\u30B7\u30C3\u30AF", "ipaexgothic"},
291 {u"ipaex\u660E\u671D", "ipaexmimcho"}};
292
293 FontNameDictionary::const_iterator it = aDictionary.find( rNameStr );
294 if( it != aDictionary.end() )
295 rNameStr = it->second;
296 }
297
298 return rNameStr;
299}
300
301std::u16string_view GetNextFontToken( std::u16string_view rTokenStr, sal_Int32& rIndex )
302{
303 // check for valid start index
304 size_t nStringLen = rTokenStr.size();
305 if( o3tl::make_unsigned(rIndex) >= nStringLen )
306 {
307 rIndex = -1;
308 return {};
309 }
310
311 // find the next token delimiter and return the token substring
312 const sal_Unicode* pStr = rTokenStr.data() + rIndex;
313 const sal_Unicode* pEnd = rTokenStr.data() + nStringLen;
314 for(; pStr < pEnd; ++pStr )
315 if( (*pStr == ';') || (*pStr == ',') )
316 break;
317
318 sal_Int32 nTokenStart = rIndex;
319 sal_Int32 nTokenLen;
320 if( pStr < pEnd )
321 {
322 rIndex = sal::static_int_cast<sal_Int32>(pStr - rTokenStr.data());
323 nTokenLen = rIndex - nTokenStart;
324 ++rIndex; // skip over token separator
325 }
326 else
327 {
328 // no token delimiter found => handle last token
329 rIndex = -1;
330
331 // optimize if the token string consists of just one token
332 if( !nTokenStart )
333 {
334 return rTokenStr;
335 }
336 else
337 {
338 nTokenLen = nStringLen - nTokenStart;
339 }
340 }
341
342 return rTokenStr.substr( nTokenStart, nTokenLen );
343}
344
345static bool ImplIsFontToken( std::u16string_view rName, std::u16string_view rToken )
346{
347 sal_Int32 nIndex = 0;
348 do
349 {
350 std::u16string_view aTempName = GetNextFontToken( rName, nIndex );
351 if ( rToken == aTempName )
352 return true;
353 }
354 while ( nIndex != -1 );
355
356 return false;
357}
358
359static void ImplAppendFontToken( OUString& rName, std::u16string_view rNewToken )
360{
361 if ( !rName.isEmpty() )
362 {
363 rName += ";";
364 }
365 rName += rNewToken;
366}
367
368void AddTokenFontName( OUString& rName, std::u16string_view rNewToken )
369{
370 if ( !ImplIsFontToken( rName, rNewToken ) )
372}
373
374OUString GetSubsFontName( std::u16string_view rName, SubsFontFlags nFlags )
375{
376 OUString aName;
377
378 sal_Int32 nIndex = 0;
379 OUString aOrgName = GetEnglishSearchFontName(
380 GetNextFontToken( rName, nIndex ) );
381
382 // #93662# do not try to replace StarSymbol with MS only font
384 && ( aOrgName == "starsymbol"
385 || aOrgName == "opensymbol" ) )
386 return aName;
387
388 if (nFlags & SubsFontFlags::MS)
389 {
390 const utl::FontNameAttr* pAttr = utl::FontSubstConfiguration::get().getSubstInfo( aOrgName );
391 if (pAttr)
392 for( const auto& rSubstitution : pAttr->MSSubstitutions )
393 if( ! ImplIsFontToken( rName, rSubstitution ) )
394 {
395 ImplAppendFontToken( aName, rSubstitution );
396 if( nFlags & SubsFontFlags::ONLYONE )
397 {
398 break;
399 }
400 }
401 }
402
403 return aName;
404}
405
406bool IsOpenSymbol(std::u16string_view rFontName)
407{
408 sal_Int32 nIndex = 0;
409 std::u16string_view sFamilyNm(GetNextFontToken(rFontName, nIndex));
410 return (o3tl::equalsIgnoreAsciiCase(sFamilyNm, "starsymbol") ||
411 o3tl::equalsIgnoreAsciiCase(sFamilyNm, "opensymbol"));
412}
413
414/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
float u
std::u16string_view GetNextFontToken(std::u16string_view rTokenStr, sal_Int32 &rIndex)
Definition: fontdefs.cxx:301
static bool ImplIsFontToken(std::u16string_view rName, std::u16string_view rToken)
Definition: fontdefs.cxx:345
void AddTokenFontName(OUString &rName, std::u16string_view rNewToken)
Definition: fontdefs.cxx:368
static bool toOnlyLowerAsciiOrStrip(sal_Unicode c, OUStringBuffer &rName, sal_Int32 nIndex, sal_Int32 &rLen)
Definition: fontdefs.cxx:64
OUString GetEnglishSearchFontName(std::u16string_view rInName)
Definition: fontdefs.cxx:86
bool IsOpenSymbol(std::u16string_view rFontName)
Determine if the font is the special Open|Star Symbol font.
Definition: fontdefs.cxx:406
OUString StripScriptFromName(const OUString &_aName)
Strip any "script font suffix" from the font name.
Definition: fontdefs.cxx:29
static void ImplAppendFontToken(OUString &rName, std::u16string_view rNewToken)
Definition: fontdefs.cxx:359
OUString GetSubsFontName(std::u16string_view rName, SubsFontFlags nFlags)
Definition: fontdefs.cxx:374
SubsFontFlags
Definition: fontdefs.hxx:33
std::u16string_view rNewToken
Definition: fontdefs.hxx:45
sal_Int32 nIndex
OUString aName
OUString get(TranslateId sContextAndId, const std::locale &loc)
Definition: resmgr.cxx:211
int i
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
bool equalsIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2)
::std::vector< OUString > MSSubstitutions
Definition: fontcfg.hxx:130
sal_uInt16 sal_Unicode