LibreOffice Module svtools (master) 1
sampletext.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10#include <vcl/font.hxx>
11#include <vcl/outdev.hxx>
12#include <vcl/virdev.hxx>
13#include <vcl/fontcharmap.hxx>
14#include <i18nutil/unicode.hxx>
15#include <sal/log.hxx>
16#include <com/sun/star/i18n/ScriptType.hpp>
17#include <vector>
18#include <map>
19
20// This should only be used when a commonly used font incorrectly declares its
21// coverage. If you add a font here, please leave a note explaining the issue
22// that caused it to be added
23static UScriptCode lcl_getHardCodedScriptNameForFont (const OutputDevice &rDevice)
24{
25 const OUString &rName = rDevice.GetFont().GetFamilyName();
26
27 if (rName == "GB18030 Bitmap")
28 {
29 // As of OSX 10.9, the system font "GB18030 Bitmap" incorrectly declares
30 // that it only covers "Phoenician" when in fact it's a Chinese font.
31 return USCRIPT_HAN;
32 }
33 else if (rName == "BiauKai")
34 {
35 // "BiauKai" makes crazy claims to cover BUGINESE, SUNDANESE, etc
36 // but in fact it's a Traditional Chinese font.
37 return USCRIPT_TRADITIONAL_HAN;
38 }
39 else if (rName == "GungSeo" || rName == "PCMyungjo" || rName == "PilGi")
40 {
41 // These have no OS/2 tables, but we know they are Korean fonts.
42 return USCRIPT_KOREAN;
43 }
44 else if (rName == "Hei" || rName == "Kai")
45 {
46 // These have no OS/2 tables, but we know they are Chinese fonts.
47 return USCRIPT_HAN;
48 }
49 else if (rName.startsWith("Bangla "))
50 {
51 // "Bangla Sangam MN" claims it supports MALAYALAM, but it doesn't
52 // "Bangla MN" claims just DEVANAGARI and not an additional BENGALI
53 return USCRIPT_BENGALI;
54 }
55 else if (rName.startsWith("Gurmukhi "))
56 {
57 // "Gurmukhi MN" claims it supports TAMIL, but it doesn't
58 return USCRIPT_GURMUKHI;
59 }
60 else if (rName.startsWith("Kannada "))
61 {
62 // "Kannada MN" claims it supports TAMIL, but it doesn't
63 return USCRIPT_KANNADA;
64 }
65 else if (rName.startsWith("Lao "))
66 {
67 // "Lao Sangam MN" claims it supports TAMIL, but it doesn't
68 return USCRIPT_LAO;
69 }
70 else if (rName.startsWith("Malayalam "))
71 {
72 // "Malayalam MN" claims it supports TAMIL, but it doesn't
73 return USCRIPT_MALAYALAM;
74 }
75 else if (rName.startsWith("Sinhala "))
76 {
77 // "Sinhala MN" claims it supports CYRILLIC
78 return USCRIPT_SINHALA;
79 }
80 else if (rName.startsWith("Telugu "))
81 {
82 // "Telugu MN" claims it supports TAMIL, but it doesn't
83 return USCRIPT_TELUGU;
84 }
85 else if (rName.startsWith("Myanmar "))
86 {
87 return USCRIPT_MYANMAR;
88 }
89 else if (rName == "InaiMathi")
90 {
91 // "InaiMathi" claims it supports GOTHIC and CJK_UNIFIED_IDEOGRAPHS as well as
92 // TAMIL, but it doesn't
93 return USCRIPT_TAMIL;
94 }
95 else if (rName == "Hannotate TC" || rName == "HanziPen TC" || rName == "Heiti TC" || rName == "Weibei TC")
96 {
97 // These fonts claim support for ARMENIAN and a bunch of other stuff they don't support
98 return USCRIPT_TRADITIONAL_HAN;
99 }
100 else if (rName == "Hannotate SC" || rName == "HanziPen SC" || rName == "Heiti SC" || rName == "Weibei SC")
101 {
102 // These fonts claim support for ARMENIAN and a bunch of other stuff they don't support
103 return USCRIPT_SIMPLIFIED_HAN;
104 }
105 return USCRIPT_INVALID_CODE;
106}
107
108bool isSymbolFont(const vcl::Font &rFont)
109{
110 return (rFont.GetCharSet() == RTL_TEXTENCODING_SYMBOL) ||
111 rFont.GetFamilyName().equalsIgnoreAsciiCase("Apple Color Emoji") ||
112 rFont.GetFamilyName().equalsIgnoreAsciiCase("cmsy10") ||
113 rFont.GetFamilyName().equalsIgnoreAsciiCase("cmex10") ||
114 rFont.GetFamilyName().equalsIgnoreAsciiCase("esint10") ||
115 rFont.GetFamilyName().equalsIgnoreAsciiCase("feta26") ||
116 rFont.GetFamilyName().equalsIgnoreAsciiCase("jsMath-cmsy10") ||
117 rFont.GetFamilyName().equalsIgnoreAsciiCase("jsMath-cmex10") ||
118 rFont.GetFamilyName().equalsIgnoreAsciiCase("msam10") ||
119 rFont.GetFamilyName().equalsIgnoreAsciiCase("msbm10") ||
120 rFont.GetFamilyName().equalsIgnoreAsciiCase("wasy10") ||
121 rFont.GetFamilyName().equalsIgnoreAsciiCase("Denemo") ||
122 rFont.GetFamilyName().equalsIgnoreAsciiCase("GlyphBasic1") ||
123 rFont.GetFamilyName().equalsIgnoreAsciiCase("GlyphBasic2") ||
124 rFont.GetFamilyName().equalsIgnoreAsciiCase("GlyphBasic3") ||
125 rFont.GetFamilyName().equalsIgnoreAsciiCase("GlyphBasic4") ||
126 rFont.GetFamilyName().equalsIgnoreAsciiCase("Letters Laughing") ||
127 rFont.GetFamilyName().equalsIgnoreAsciiCase("MusiQwik") ||
128 rFont.GetFamilyName().equalsIgnoreAsciiCase("MusiSync") ||
129 rFont.GetFamilyName().equalsIgnoreAsciiCase("stmary10") ||
130 rFont.GetFamilyName().equalsIgnoreAsciiCase("Symbol") ||
131 rFont.GetFamilyName().equalsIgnoreAsciiCase("Webdings") ||
132 rFont.GetFamilyName().equalsIgnoreAsciiCase("Wingdings") ||
133 rFont.GetFamilyName().equalsIgnoreAsciiCase("Wingdings 2") ||
134 rFont.GetFamilyName().equalsIgnoreAsciiCase("Wingdings 3") ||
135 rFont.GetFamilyName().equalsIgnoreAsciiCase("Bookshelf Symbol 7") ||
136 rFont.GetFamilyName().startsWith("STIXIntegrals") ||
137 rFont.GetFamilyName().startsWith("STIXNonUnicode") ||
138 rFont.GetFamilyName().startsWith("STIXSize") ||
139 rFont.GetFamilyName().startsWith("STIXVariants") ||
141}
142
144{
145 const vcl::Font &rFont = rDevice.GetFont();
146 return !isSymbolFont(rFont) && ( -1 == rDevice.HasGlyphs(rFont, rFont.GetFamilyName()) );
147}
148
150{
151 if (rDevice.GetFont().GetFamilyName() == "Symbol")
152 {
153 static constexpr OUStringLiteral aImplAppleSymbolText =
154 u"\u03BC\u2202\u2211\u220F\u03C0\u222B\u03A9\u221A";
155 bool bHasSampleTextGlyphs
156 = (-1 == rDevice.HasGlyphs(rDevice.GetFont(), aImplAppleSymbolText));
157 //It's the Apple version
158 if (bHasSampleTextGlyphs)
159 return aImplAppleSymbolText;
160 static constexpr OUStringLiteral aImplAdobeSymbolText =
161 u"\uF06D\uF0B6\uF0E5\uF0D5\uF070\uF0F2\uF057\uF0D6";
162 return aImplAdobeSymbolText;
163 }
164
165 const bool bOpenSymbol = IsOpenSymbol(rDevice.GetFont().GetFamilyName());
166
167 if (!bOpenSymbol)
168 {
169 FontCharMapRef xFontCharMap;
170 bool bHasCharMap = rDevice.GetFontCharMap(xFontCharMap);
171 if( bHasCharMap )
172 {
173 // use some sample characters available in the font
174 sal_Unicode aText[8];
175
176 // start just above the PUA used by most symbol fonts
177 sal_uInt32 cNewChar = 0xFF00;
178
179 const int nMaxCount = SAL_N_ELEMENTS(aText) - 1;
180 int nSkip = xFontCharMap->GetCharCount() / nMaxCount;
181 if( nSkip > 10 )
182 nSkip = 10;
183 else if( nSkip <= 0 )
184 nSkip = 1;
185 for( int i = 0; i < nMaxCount; ++i )
186 {
187 sal_uInt32 cOldChar = cNewChar;
188 for( int j = nSkip; --j >= 0; )
189 cNewChar = xFontCharMap->GetPrevChar( cNewChar );
190 if( cOldChar == cNewChar )
191 break;
192 aText[ i ] = static_cast<sal_Unicode>(cNewChar); // TODO: support UCS4 samples
193 aText[ i+1 ] = 0;
194 }
195
196 return OUString(aText);
197 }
198 }
199
200 static const sal_Unicode aImplSymbolFontText[] = {
201 0xF021,0xF032,0xF043,0xF054,0xF065,0xF076,0xF0B7,0xF0C8,0};
202 static const sal_Unicode aImplStarSymbolText[] = {
203 0x2702,0x2708,0x270D,0xE033,0x2211,0x2288,0};
204 const sal_Unicode* pText = bOpenSymbol ? aImplStarSymbolText : aImplSymbolFontText;
205 OUString sSampleText(pText);
206 bool bHasSampleTextGlyphs = (-1 == rDevice.HasGlyphs(rDevice.GetFont(), sSampleText));
207 return bHasSampleTextGlyphs ? sSampleText : OUString();
208}
209
210//These ones are typically for use in the font dropdown box beside the
211//fontname, so say things roughly like "Script/Alphabet/Name-Of-Major-Language"
212
213//Here we don't always know the language of course, only the script that can be
214//written with the font. Often that's one single language written in that
215//script, or a handful of related languages where the name of the script is the
216//same between languages, or the name in the major language is known by most
217//readers of the minor languages, e.g. Yiddish is written with the HEBREW
218//script as well, the vast majority of Yiddish readers will be able to read
219//Hebrew as well.
220OUString makeShortRepresentativeTextForScript(UScriptCode eScript)
221{
222 OUString sSampleText;
223 switch (eScript)
224 {
225 case USCRIPT_GREEK:
226 {
227 static constexpr OUStringLiteral aGrek =
228 u"\u0391\u03BB\u03C6\u03AC\u03B2\u03B7\u03C4\u03BF";
229 sSampleText = aGrek;
230 break;
231 }
232 case USCRIPT_HEBREW:
233 {
234 static constexpr OUStringLiteral aHebr =
235 u"\u05D0\u05DC\u05E3\u05BE\u05D1\u05D9\u05EA "
236 "\u05E2\u05D1\u05E8\u05D9";
237 sSampleText = aHebr;
238 break;
239 }
240 case USCRIPT_ARABIC:
241 {
242 static constexpr OUStringLiteral aArab =
243 u"\u0623\u0628\u062C\u062F\u064A\u0629 \u0639"
244 "\u0631\u0628\u064A\u0629";
245 sSampleText = aArab;
246 break;
247 }
248 case USCRIPT_ARMENIAN:
249 {
250 static constexpr OUStringLiteral aArmenian =
251 u"\u0561\u0575\u0562\u0578\u0582\u0562\u0565"
252 "\u0576";
253 sSampleText = aArmenian;
254 break;
255 }
256 case USCRIPT_DEVANAGARI:
257 {
258 static constexpr OUStringLiteral aDeva =
259 u"\u0926\u0947\u0935\u0928\u093E\u0917\u0930\u0940";
260 sSampleText = aDeva;
261 break;
262 }
263 case USCRIPT_BENGALI:
264 {
265 static constexpr OUStringLiteral aBeng =
266 u"\u09AC\u09BE\u0982\u09B2\u09BE \u09B2\u09BF"
267 "\u09AA\u09BF";
268 sSampleText = aBeng;
269 break;
270 }
271 case USCRIPT_GURMUKHI:
272 {
273 static constexpr OUStringLiteral aGuru =
274 u"\u0A17\u0A41\u0A30\u0A2E\u0A41\u0A16\u0A40";
275 sSampleText = aGuru;
276 break;
277 }
278 case USCRIPT_GUJARATI:
279 {
280 static constexpr OUStringLiteral aGujr =
281 u"\u0A97\u0AC1\u0A9C\u0AB0\u0ABE\u0AA4\u0aC0 "
282 "\u0AB2\u0ABF\u0AAA\u0ABF";
283 sSampleText = aGujr;
284 break;
285 }
286 case USCRIPT_ORIYA:
287 {
288 static constexpr OUStringLiteral aOrya =
289 u"\u0B09\u0B24\u0B4D\u0B15\u0B33 \u0B32\u0B3F"
290 "\u0B2A\u0B3F";
291 sSampleText = aOrya;
292 break;
293 }
294 case USCRIPT_TAMIL:
295 {
296 static constexpr OUStringLiteral aTaml =
297 u"\u0B85\u0BB0\u0BBF\u0B9A\u0BCD\u0B9A\u0BC1\u0BB5"
298 "\u0B9F\u0BBF";
299 sSampleText = aTaml;
300 break;
301 }
302 case USCRIPT_TELUGU:
303 {
304 static constexpr OUStringLiteral aTelu =
305 u"\u0C24\u0C46\u0C32\u0C41\u0C17\u0C41";
306 sSampleText = aTelu;
307 break;
308 }
309 case USCRIPT_KANNADA:
310 {
311 static constexpr OUStringLiteral aKnda =
312 u"\u0C95\u0CA8\u0CCD\u0CA8\u0CA1 \u0CB2\u0CBF"
313 "\u0CAA\u0CBF";
314 sSampleText = aKnda;
315 break;
316 }
317 case USCRIPT_MALAYALAM:
318 {
319 static constexpr OUStringLiteral aMlym =
320 u"\u0D2E\u0D32\u0D2F\u0D3E\u0D33\u0D32\u0D3F\u0D2A"
321 "\u0D3F";
322 sSampleText = aMlym;
323 break;
324 }
325 case USCRIPT_THAI:
326 {
327 static constexpr OUStringLiteral aThai =
328 u"\u0E2D\u0E31\u0E01\u0E29\u0E23\u0E44\u0E17\u0E22";
329 sSampleText = aThai;
330 break;
331 }
332 case USCRIPT_LAO:
333 {
334 static constexpr OUStringLiteral aLao =
335 u"\u0EAD\u0EB1\u0E81\u0EAA\u0EAD\u0E99\u0EA5\u0EB2"
336 "\u0EA7";
337 sSampleText = aLao;
338 break;
339 }
340 case USCRIPT_GEORGIAN:
341 {
342 static constexpr OUStringLiteral aGeorgian =
343 u"\u10D3\u10D0\u10DB\u10EC\u10D4\u10E0\u10DA\u10DD"
344 "\u10D1\u10D0";
345 sSampleText = aGeorgian;
346 break;
347 }
348 case USCRIPT_JAMO:
349 case USCRIPT_HANGUL:
350 case USCRIPT_KOREAN:
351 {
352 static constexpr OUStringLiteral aHang =
353 u"\uD55C\uAE00";
354 sSampleText = aHang;
355 break;
356 }
357 case USCRIPT_TIBETAN:
358 {
359 static constexpr OUStringLiteral aTibt =
360 u"\u0F51\u0F56\u0F74\u0F0B\u0F45\u0F53\u0F0B";
361 sSampleText = aTibt;
362 break;
363 }
364 case USCRIPT_SYRIAC:
365 {
366 static constexpr OUStringLiteral aSyri =
367 u"\u0723\u071B\u072A\u0722\u0713\u0720\u0710";
368 sSampleText = aSyri;
369 break;
370 }
371 case USCRIPT_THAANA:
372 {
373 static constexpr OUStringLiteral aThaa =
374 u"\u078C\u07A7\u0782\u07A6";
375 sSampleText = aThaa;
376 break;
377 }
378 case USCRIPT_SINHALA:
379 {
380 static constexpr OUStringLiteral aSinh =
381 u"\u0DC1\u0DD4\u0DAF\u0DCA\u0DB0 \u0DC3\u0DD2"
382 "\u0D82\u0DC4\u0DBD";
383 sSampleText = aSinh;
384 break;
385 }
386 case USCRIPT_MYANMAR:
387 {
388 static constexpr OUStringLiteral aMymr =
389 u"\u1019\u103C\u1014\u103A\u1019\u102C\u1021\u1000"
390 "\u1039\u1001\u101B\u102C";
391 sSampleText = aMymr;
392 break;
393 }
394 case USCRIPT_ETHIOPIC:
395 {
396 static constexpr OUStringLiteral aEthi =
397 u"\u130D\u12D5\u12DD";
398 sSampleText = aEthi;
399 break;
400 }
401 case USCRIPT_CHEROKEE:
402 {
403 static constexpr OUStringLiteral aCher =
404 u"\u13D7\u13AA\u13EA\u13B6\u13D9\u13D7";
405 sSampleText = aCher;
406 break;
407 }
408 case USCRIPT_KHMER:
409 {
410 static constexpr OUStringLiteral aKhmr =
411 u"\u17A2\u1780\u17D2\u1781\u179A\u1780\u17D2\u179A"
412 "\u1798\u1781\u17C1\u1798\u179A\u1797\u17B6\u179F"
413 "\u17B6";
414 sSampleText = aKhmr;
415 break;
416 }
417 case USCRIPT_MONGOLIAN:
418 {
419 static constexpr OUStringLiteral aMongolian =
420 u"\u182A\u1822\u1834\u1822\u182D\u180C";
421 sSampleText = aMongolian;
422 break;
423 }
424 case USCRIPT_TAGALOG:
425 {
426 static constexpr OUStringLiteral aTagalog =
427 u"\u170A\u170A\u170C\u1712";
428 sSampleText = aTagalog;
429 break;
430 }
431 case USCRIPT_NEW_TAI_LUE:
432 {
433 static constexpr OUStringLiteral aTalu =
434 u"\u1991\u19BA\u199F\u19B9\u19C9";
435 sSampleText = aTalu;
436 break;
437 }
438 case USCRIPT_TRADITIONAL_HAN:
439 {
440 static constexpr OUStringLiteral aHant =
441 u"\u7E41";
442 sSampleText = aHant;
443 break;
444 }
445 case USCRIPT_SIMPLIFIED_HAN:
446 {
447 static constexpr OUStringLiteral aHans =
448 u"\u7B80";
449 sSampleText = aHans;
450 break;
451 }
452 case USCRIPT_HAN:
453 {
454 static constexpr OUStringLiteral aSimplifiedAndTraditionalChinese =
455 u"\u7B80\u7E41";
456 sSampleText = aSimplifiedAndTraditionalChinese;
457 break;
458 }
459 case USCRIPT_JAPANESE:
460 {
461 static constexpr OUStringLiteral aJpan =
462 u"\u65E5\u672C\u8A9E";
463 sSampleText = aJpan;
464 break;
465 }
466 case USCRIPT_YI:
467 {
468 static constexpr OUStringLiteral aYiii =
469 u"\uA188\uA320\uA071\uA0B7";
470 sSampleText = aYiii;
471 break;
472 }
473 case USCRIPT_PHAGS_PA:
474 {
475 static constexpr OUStringLiteral aPhag =
476 u"\uA84F\uA861\uA843 \uA863\uA861\uA859 "
477 u"\uA850\uA85C\uA85E";
478 sSampleText = aPhag;
479 break;
480 }
481 case USCRIPT_TAI_LE:
482 {
483 static constexpr OUStringLiteral aTale =
484 u"\u1956\u196D\u1970\u1956\u196C\u1973\u1951\u1968"
485 "\u1952\u1970";
486 sSampleText = aTale;
487 break;
488 }
489 case USCRIPT_LATIN:
490 sSampleText = "Lorem ipsum";
491 break;
492 default:
493 break;
494 }
495 return sSampleText;
496}
497
498static OUString makeRepresentativeTextForScript(UScriptCode eScript)
499{
500 OUString sSampleText;
501 switch (eScript)
502 {
503 case USCRIPT_TRADITIONAL_HAN:
504 case USCRIPT_SIMPLIFIED_HAN:
505 case USCRIPT_HAN:
506 {
507 //Three Character Classic
508 static constexpr OUStringLiteral aZh =
509 u"\u4EBA\u4E4B\u521D \u6027\u672C\u5584";
510 sSampleText = aZh;
511 break;
512 }
513 case USCRIPT_JAPANESE:
514 {
515 //'Beautiful Japanese'
516 static constexpr OUStringLiteral aJa =
517 u"\u7F8E\u3057\u3044\u65E5\u672C\u8A9E";
518 sSampleText = aJa;
519 break;
520 }
521 case USCRIPT_JAMO:
522 case USCRIPT_KOREAN:
523 case USCRIPT_HANGUL:
524 {
525 //The essential condition for...
526 static constexpr OUStringLiteral aKo =
527 u"\uD0A4\uC2A4\uC758 \uACE0\uC720\uC870"
528 "\uAC74\uC740";
529 sSampleText = aKo;
530 break;
531 }
532 default:
533 break;
534 }
535
536 if (sSampleText.isEmpty())
537 sSampleText = makeShortRepresentativeTextForScript(eScript);
538 return sSampleText;
539}
540
541OUString makeShortMinimalTextForScript(UScriptCode eScript)
542{
543 OUString sSampleText;
544 switch (eScript)
545 {
546 case USCRIPT_GREEK:
547 {
548 static constexpr OUStringLiteral aGrek =
549 u"\u0391\u0392";
550 sSampleText = aGrek;
551 break;
552 }
553 case USCRIPT_HEBREW:
554 {
555 static constexpr OUStringLiteral aHebr =
556 u"\u05D0\u05D1";
557 sSampleText = aHebr;
558 break;
559 }
560 default:
561 break;
562 }
563 return sSampleText;
564}
565
566static OUString makeMinimalTextForScript(UScriptCode eScript)
567{
568 return makeShortMinimalTextForScript(eScript);
569}
570
571//These ones are typically for use in the font preview window in format
572//character
573
574//There we generally know the language. Though it's possible for the language to
575//be "none".
576
577//Currently we fall back to makeShortRepresentativeTextForScript when we don't
578//have suitable strings
580{
581 OUString sRet;
582 LanguageType pri = primary(eLang);
583 if( pri == primary(LANGUAGE_ARMENIAN) )
584 sRet = makeRepresentativeTextForScript(USCRIPT_ARMENIAN);
585 else if( pri == primary(LANGUAGE_CHINESE) )
586 sRet = makeRepresentativeTextForScript(USCRIPT_HAN);
587 else if( pri == primary(LANGUAGE_GREEK) )
588 sRet = makeRepresentativeTextForScript(USCRIPT_GREEK);
589 else if( pri.anyOf(
592 sRet = makeRepresentativeTextForScript(USCRIPT_HEBREW);
593 else if( pri == primary(LANGUAGE_ARABIC_SAUDI_ARABIA) )
594 sRet = makeRepresentativeTextForScript(USCRIPT_ARABIC);
595 else if( pri == primary(LANGUAGE_HINDI) )
596 sRet = makeRepresentativeTextForScript(USCRIPT_DEVANAGARI);
597 else if( pri == primary(LANGUAGE_ASSAMESE) )
598 {
599 static constexpr OUStringLiteral aAs =
600 u"\u0985\u09B8\u09AE\u09C0\u09AF\u09BC\u09BE"
601 " \u0986\u0996\u09F0";
602 sRet = aAs;
603 }
604 else if( pri == primary(LANGUAGE_BENGALI) )
605 sRet = makeRepresentativeTextForScript(USCRIPT_BENGALI);
606 else if( pri == primary(LANGUAGE_PUNJABI) )
607 sRet = makeRepresentativeTextForScript(USCRIPT_GURMUKHI);
608 else if( pri == primary(LANGUAGE_GUJARATI) )
609 sRet = makeRepresentativeTextForScript(USCRIPT_GUJARATI);
610 else if( pri == primary(LANGUAGE_ODIA) )
611 sRet = makeRepresentativeTextForScript(USCRIPT_ORIYA);
612 else if( pri == primary(LANGUAGE_TAMIL) )
613 sRet = makeRepresentativeTextForScript(USCRIPT_TAMIL);
614 else if( pri == primary(LANGUAGE_TELUGU) )
615 sRet = makeRepresentativeTextForScript(USCRIPT_TELUGU);
616 else if( pri == primary(LANGUAGE_KANNADA) )
617 sRet = makeRepresentativeTextForScript(USCRIPT_KANNADA);
618 else if( pri == primary(LANGUAGE_MALAYALAM) )
619 sRet = makeRepresentativeTextForScript(USCRIPT_MALAYALAM);
620 else if( pri == primary(LANGUAGE_THAI) )
621 sRet = makeRepresentativeTextForScript(USCRIPT_THAI);
622 else if( pri == primary(LANGUAGE_LAO) )
623 sRet = makeRepresentativeTextForScript(USCRIPT_LAO);
624 else if( pri == primary(LANGUAGE_GEORGIAN) )
625 sRet = makeRepresentativeTextForScript(USCRIPT_GEORGIAN);
626 else if( pri == primary(LANGUAGE_KOREAN) )
627 sRet = makeRepresentativeTextForScript(USCRIPT_KOREAN);
628 else if( pri == primary(LANGUAGE_TIBETAN) )
629 sRet = makeRepresentativeTextForScript(USCRIPT_TIBETAN);
630 else if( pri == primary(LANGUAGE_SYRIAC) )
631 sRet = makeRepresentativeTextForScript(USCRIPT_SYRIAC);
632 else if( pri == primary(LANGUAGE_SINHALESE_SRI_LANKA) )
633 sRet = makeRepresentativeTextForScript(USCRIPT_SINHALA);
634 else if( pri == primary(LANGUAGE_BURMESE) )
635 sRet = makeRepresentativeTextForScript(USCRIPT_MYANMAR);
636 else if( pri == primary(LANGUAGE_AMHARIC_ETHIOPIA) )
637 sRet = makeRepresentativeTextForScript(USCRIPT_ETHIOPIC);
639 sRet = makeRepresentativeTextForScript(USCRIPT_CHEROKEE);
640 else if( pri == primary(LANGUAGE_KHMER) )
641 sRet = makeRepresentativeTextForScript(USCRIPT_KHMER);
643 {
644 if (eLang.anyOf(
648 sRet = makeRepresentativeTextForScript(USCRIPT_MONGOLIAN);
649 }
650 else if( pri == primary(LANGUAGE_JAPANESE) )
651 sRet = makeRepresentativeTextForScript(USCRIPT_JAPANESE);
652 else if( pri == primary(LANGUAGE_YI) )
653 sRet = makeRepresentativeTextForScript(USCRIPT_YI);
654 else if( pri == primary(LANGUAGE_GAELIC_IRELAND) )
655 {
656 static constexpr OUStringLiteral aGa =
657 u"T\u00E9acs Samplach";
658 sRet = aGa;
659 }
660
661 return sRet;
662}
663
664namespace
665{
666#if OSL_DEBUG_LEVEL > 0
667 void lcl_dump_unicode_coverage(const std::optional<std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM>> &roIn)
668 {
669 if (!roIn)
670 {
671 SAL_INFO("svtools", "<NOTHING>");
672 return;
673 }
674 auto & rIn(*roIn);
675 if (rIn.none())
676 {
677 SAL_INFO("svtools", "<NONE>");
678 return;
679 }
681 SAL_INFO("svtools", "BASIC_LATIN");
683 SAL_INFO("svtools", "LATIN_1_SUPPLEMENT");
685 SAL_INFO("svtools", "LATIN_EXTENDED_A");
687 SAL_INFO("svtools", "LATIN_EXTENDED_B");
689 SAL_INFO("svtools", "IPA_EXTENSIONS");
691 SAL_INFO("svtools", "SPACING_MODIFIER_LETTERS");
693 SAL_INFO("svtools", "COMBINING_DIACRITICAL_MARKS");
695 SAL_INFO("svtools", "GREEK_AND_COPTIC");
697 SAL_INFO("svtools", "COPTIC");
699 SAL_INFO("svtools", "CYRILLIC");
701 SAL_INFO("svtools", "ARMENIAN");
703 SAL_INFO("svtools", "HEBREW");
705 SAL_INFO("svtools", "VAI");
707 SAL_INFO("svtools", "ARABIC");
709 SAL_INFO("svtools", "NKO");
711 SAL_INFO("svtools", "DEVANAGARI");
713 SAL_INFO("svtools", "BENGALI");
715 SAL_INFO("svtools", "GURMUKHI");
717 SAL_INFO("svtools", "GUJARATI");
719 SAL_INFO("svtools", "ODIA");
721 SAL_INFO("svtools", "TAMIL");
723 SAL_INFO("svtools", "TELUGU");
725 SAL_INFO("svtools", "KANNADA");
727 SAL_INFO("svtools", "MALAYALAM");
729 SAL_INFO("svtools", "THAI");
731 SAL_INFO("svtools", "LAO");
733 SAL_INFO("svtools", "GEORGIAN");
735 SAL_INFO("svtools", "BALINESE");
737 SAL_INFO("svtools", "HANGUL_JAMO");
739 SAL_INFO("svtools", "LATIN_EXTENDED_ADDITIONAL");
741 SAL_INFO("svtools", "GREEK_EXTENDED");
743 SAL_INFO("svtools", "GENERAL_PUNCTUATION");
745 SAL_INFO("svtools", "SUPERSCRIPTS_AND_SUBSCRIPTS");
747 SAL_INFO("svtools", "CURRENCY_SYMBOLS");
749 SAL_INFO("svtools", "COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS");
751 SAL_INFO("svtools", "LETTERLIKE_SYMBOLS");
753 SAL_INFO("svtools", "NUMBER_FORMS");
755 SAL_INFO("svtools", "ARROWS");
757 SAL_INFO("svtools", "MATHEMATICAL_OPERATORS");
759 SAL_INFO("svtools", "MISCELLANEOUS_TECHNICAL");
761 SAL_INFO("svtools", "CONTROL_PICTURES");
763 SAL_INFO("svtools", "OPTICAL_CHARACTER_RECOGNITION");
765 SAL_INFO("svtools", "ENCLOSED_ALPHANUMERICS");
767 SAL_INFO("svtools", "BOX_DRAWING");
769 SAL_INFO("svtools", "BLOCK_ELEMENTS");
771 SAL_INFO("svtools", "GEOMETRIC_SHAPES");
773 SAL_INFO("svtools", "MISCELLANEOUS_SYMBOLS");
775 SAL_INFO("svtools", "DINGBATS");
777 SAL_INFO("svtools", "CJK_SYMBOLS_AND_PUNCTUATION");
779 SAL_INFO("svtools", "HIRAGANA");
781 SAL_INFO("svtools", "KATAKANA");
783 SAL_INFO("svtools", "BOPOMOFO");
785 SAL_INFO("svtools", "HANGUL_COMPATIBILITY_JAMO");
787 SAL_INFO("svtools", "PHAGS_PA");
789 SAL_INFO("svtools", "ENCLOSED_CJK_LETTERS_AND_MONTHS");
791 SAL_INFO("svtools", "CJK_COMPATIBILITY");
793 SAL_INFO("svtools", "HANGUL_SYLLABLES");
795 SAL_INFO("svtools", "NONPLANE_0");
797 SAL_INFO("svtools", "PHOENICIAN");
799 SAL_INFO("svtools", "CJK_UNIFIED_IDEOGRAPHS");
801 SAL_INFO("svtools", "PRIVATE_USE_AREA_PLANE_0");
803 SAL_INFO("svtools", "CJK_STROKES");
805 SAL_INFO("svtools", "ALPHABETIC_PRESENTATION_FORMS");
807 SAL_INFO("svtools", "ARABIC_PRESENTATION_FORMS_A");
809 SAL_INFO("svtools", "COMBINING_HALF_MARKS");
811 SAL_INFO("svtools", "VERTICAL_FORMS");
813 SAL_INFO("svtools", "SMALL_FORM_VARIANTS");
815 SAL_INFO("svtools", "ARABIC_PRESENTATION_FORMS_B");
817 SAL_INFO("svtools", "HALFWIDTH_AND_FULLWIDTH_FORMS");
819 SAL_INFO("svtools", "SPECIALS");
821 SAL_INFO("svtools", "TIBETAN");
823 SAL_INFO("svtools", "SYRIAC");
825 SAL_INFO("svtools", "THAANA");
827 SAL_INFO("svtools", "SINHALA");
829 SAL_INFO("svtools", "MYANMAR");
831 SAL_INFO("svtools", "ETHIOPIC");
833 SAL_INFO("svtools", "CHEROKEE");
835 SAL_INFO("svtools", "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
837 SAL_INFO("svtools", "OGHAM");
839 SAL_INFO("svtools", "RUNIC");
841 SAL_INFO("svtools", "KHMER");
843 SAL_INFO("svtools", "MONGOLIAN");
845 SAL_INFO("svtools", "BRAILLE_PATTERNS");
847 SAL_INFO("svtools", "YI_SYLLABLES");
849 SAL_INFO("svtools", "TAGALOG");
851 SAL_INFO("svtools", "OLD_ITALIC");
853 SAL_INFO("svtools", "GOTHIC");
855 SAL_INFO("svtools", "DESERET");
857 SAL_INFO("svtools", "BYZANTINE_MUSICAL_SYMBOLS");
859 SAL_INFO("svtools", "MATHEMATICAL_ALPHANUMERIC_SYMBOLS");
861 SAL_INFO("svtools", "PRIVATE_USE_PLANE_15");
863 SAL_INFO("svtools", "VARIATION_SELECTORS");
865 SAL_INFO("svtools", "TAGS");
867 SAL_INFO("svtools", "LIMBU");
869 SAL_INFO("svtools", "TAI_LE");
871 SAL_INFO("svtools", "NEW_TAI_LUE");
873 SAL_INFO("svtools", "BUGINESE");
875 SAL_INFO("svtools", "GLAGOLITIC");
877 SAL_INFO("svtools", "TIFINAGH");
879 SAL_INFO("svtools", "YIJING_HEXAGRAM_SYMBOLS");
881 SAL_INFO("svtools", "SYLOTI_NAGRI");
883 SAL_INFO("svtools", "LINEAR_B_SYLLABARY");
885 SAL_INFO("svtools", "ANCIENT_GREEK_NUMBERS");
887 SAL_INFO("svtools", "UGARITIC");
889 SAL_INFO("svtools", "OLD_PERSIAN");
891 SAL_INFO("svtools", "SHAVIAN");
893 SAL_INFO("svtools", "OSMANYA");
895 SAL_INFO("svtools", "CYPRIOT_SYLLABARY");
897 SAL_INFO("svtools", "KHAROSHTHI");
899 SAL_INFO("svtools", "TAI_XUAN_JING_SYMBOLS");
901 SAL_INFO("svtools", "CUNEIFORM");
903 SAL_INFO("svtools", "COUNTING_ROD_NUMERALS");
905 SAL_INFO("svtools", "SUNDANESE");
907 SAL_INFO("svtools", "LEPCHA");
909 SAL_INFO("svtools", "OL_CHIKI");
911 SAL_INFO("svtools", "SAURASHTRA");
913 SAL_INFO("svtools", "KAYAH_LI");
915 SAL_INFO("svtools", "REJANG");
917 SAL_INFO("svtools", "CHAM");
919 SAL_INFO("svtools", "ANCIENT_SYMBOLS");
921 SAL_INFO("svtools", "PHAISTOS_DISC");
923 SAL_INFO("svtools", "CARIAN");
925 SAL_INFO("svtools", "DOMINO_TILES");
927 SAL_INFO("svtools", "RESERVED1");
929 SAL_INFO("svtools", "RESERVED2");
931 SAL_INFO("svtools", "RESERVED3");
933 SAL_INFO("svtools", "RESERVED4");
935 return;
936
937 SAL_INFO("svtools", "RESERVED5");
938 }
939
940 void lcl_dump_codepage_coverage(const std::optional<std::bitset<vcl::CodePageCoverage::MAX_CP_ENUM>> &roIn)
941 {
942 if (!roIn)
943 {
944 SAL_INFO("svtools", "<NOTHING>");
945 return;
946 }
947 auto & rIn(*roIn);
948 if (rIn.none())
949 {
950 SAL_INFO("svtools", "<NONE>");
951 return;
952 }
954 SAL_INFO("svtools", "CP1252");
956 SAL_INFO("svtools", "CP1250");
958 SAL_INFO("svtools", "CP1251");
960 SAL_INFO("svtools", "CP1253");
962 SAL_INFO("svtools", "CP1254");
964 SAL_INFO("svtools", "CP1255");
966 SAL_INFO("svtools", "CP1256");
968 SAL_INFO("svtools", "CP1257");
970 SAL_INFO("svtools", "CP1258");
972 SAL_INFO("svtools", "CP874");
974 SAL_INFO("svtools", "CP932");
976 SAL_INFO("svtools", "CP936");
978 SAL_INFO("svtools", "CP949");
980 SAL_INFO("svtools", "CP950");
982 SAL_INFO("svtools", "CP1361");
984 SAL_INFO("svtools", "CP869");
986 SAL_INFO("svtools", "CP866");
988 SAL_INFO("svtools", "CP865");
990 SAL_INFO("svtools", "CP864");
992 SAL_INFO("svtools", "CP863");
994 SAL_INFO("svtools", "CP862");
996 SAL_INFO("svtools", "CP861");
998 SAL_INFO("svtools", "CP860");
1000 SAL_INFO("svtools", "CP857");
1002 SAL_INFO("svtools", "CP855");
1004 SAL_INFO("svtools", "CP852");
1006 SAL_INFO("svtools", "CP775");
1008 SAL_INFO("svtools", "CP737");
1010 SAL_INFO("svtools", "CP780");
1012 SAL_INFO("svtools", "CP850");
1013 if (!(rIn[vcl::CodePageCoverage::CP437]))
1014 return;
1015
1016 SAL_INFO("svtools", "CP437");
1017 }
1018#endif
1019
1020 std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> getMaskByScriptType(sal_Int16 nScriptType)
1021 {
1022 std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> aMask;
1023 aMask.set();
1024
1025 for (size_t i = 0; i < vcl::UnicodeCoverage::MAX_UC_ENUM; ++i)
1026 {
1028 UScriptCode eScriptCode = otCoverageToScript(static_cast<UnicodeCoverageEnum>(i));
1029 if (unicode::getScriptClassFromUScriptCode(eScriptCode) == nScriptType)
1030 aMask.set(i, false);
1031 }
1032
1033 return aMask;
1034 }
1035
1036 //false for all bits considered "Latin" by LibreOffice
1037 std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> const & getLatinMask()
1038 {
1039 static std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> s_Mask(getMaskByScriptType(css::i18n::ScriptType::LATIN));
1040 return s_Mask;
1041 }
1042
1043 //false for all bits considered "Asian" by LibreOffice
1044 std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> const & getCJKMask()
1045 {
1046 static std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> s_Mask(getMaskByScriptType(css::i18n::ScriptType::ASIAN));
1047 return s_Mask;
1048 }
1049
1050 //false for all bits considered "Complex" by LibreOffice
1051 std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> const & getCTLMask()
1052 {
1053 static std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> s_Mask(getMaskByScriptType(css::i18n::ScriptType::COMPLEX));
1054 return s_Mask;
1055 }
1056
1057 //false for all bits considered "WEAK" by LibreOffice
1058 std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> const & getWeakMask()
1059 {
1060 static std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> s_Mask(getMaskByScriptType(css::i18n::ScriptType::WEAK));
1061 return s_Mask;
1062 }
1063
1064 //Nearly every font supports some basic Latin
1065 std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> getCommonLatnSubsetMask()
1066 {
1067 std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> aMask;
1068 aMask.set();
1069 aMask.set(vcl::UnicodeCoverage::BASIC_LATIN, false);
1074 return aMask;
1075 }
1076
1077 template<size_t N>
1078 size_t find_first(std::bitset<N> const& rSet)
1079 {
1080 for (size_t i = 0; i < N; ++i)
1081 {
1082 if (rSet.test(i))
1083 return i;
1084 }
1085 assert(false); // see current usage
1086 return N;
1087 }
1088
1089 UScriptCode getScript(const vcl::FontCapabilities &rFontCapabilities)
1090 {
1092
1093 std::bitset<vcl::UnicodeCoverage::MAX_UC_ENUM> aMasked;
1094 if (rFontCapabilities.oUnicodeRange)
1095 {
1096 aMasked = *rFontCapabilities.oUnicodeRange & getWeakMask();
1097 }
1098
1099 if (aMasked.count() == 1)
1100 return otCoverageToScript(static_cast<UnicodeCoverageEnum>(find_first(aMasked)));
1101
1102 if (aMasked[vcl::UnicodeCoverage::ARABIC])
1103 {
1106 aMasked.set(vcl::UnicodeCoverage::NKO, false);
1107 //Probably strongly tuned for Arabic
1108 if (aMasked.count() == 1)
1109 return USCRIPT_ARABIC;
1110 if (aMasked.count() == 2 && aMasked[vcl::UnicodeCoverage::SYRIAC])
1111 return USCRIPT_SYRIAC;
1112 }
1113
1115 {
1116 aMasked.set(vcl::UnicodeCoverage::DEVANAGARI, false);
1117 //Probably strongly tuned for a single Indic script
1118 if (aMasked.count() == 1)
1119 return otCoverageToScript(static_cast<UnicodeCoverageEnum>(find_first(aMasked)));
1120 }
1121
1122 aMasked.set(vcl::UnicodeCoverage::GREEK_EXTENDED, false);
1123 aMasked.set(vcl::UnicodeCoverage::GREEK_AND_COPTIC, false);
1124 // tdf#88484
1125 // Some fonts set the Arabic Presentation Forms-B bit because they
1126 // support U+FEFF (Zero Width Space) which happens to be in that block
1127 // but it isn’t an Arabic code point. By the time we reach here we
1128 // decided this isn’t an Arabic font, so it should be safe.
1130 if (aMasked.count() == 1)
1131 return otCoverageToScript(static_cast<UnicodeCoverageEnum>(find_first(aMasked)));
1132
1133 if (aMasked[vcl::UnicodeCoverage::CYRILLIC])
1134 {
1135 //Probably strongly tuned for Georgian
1136 if (aMasked.count() == 2 && aMasked[vcl::UnicodeCoverage::GEORGIAN])
1137 return USCRIPT_GEORGIAN;
1138 }
1139
1140 aMasked &= getCJKMask();
1141
1142 aMasked.set(vcl::UnicodeCoverage::CYRILLIC, false);
1143 aMasked.set(vcl::UnicodeCoverage::THAI, false);
1144 aMasked.set(vcl::UnicodeCoverage::DESERET, false);
1145 aMasked.set(vcl::UnicodeCoverage::PHAGS_PA, false);
1146
1147 //So, possibly a CJK font
1148 if (!aMasked.count() && rFontCapabilities.oCodePageRange)
1149 {
1150 std::bitset<vcl::CodePageCoverage::MAX_CP_ENUM> aCJKCodePageMask;
1151 aCJKCodePageMask.set(vcl::CodePageCoverage::CP932);
1152 aCJKCodePageMask.set(vcl::CodePageCoverage::CP936);
1153 aCJKCodePageMask.set(vcl::CodePageCoverage::CP949);
1154 aCJKCodePageMask.set(vcl::CodePageCoverage::CP950);
1155 aCJKCodePageMask.set(vcl::CodePageCoverage::CP1361);
1156 std::bitset<vcl::CodePageCoverage::MAX_CP_ENUM> aMaskedCodePage =
1157 *rFontCapabilities.oCodePageRange & aCJKCodePageMask;
1158 //fold Korean
1159 if (aMaskedCodePage[vcl::CodePageCoverage::CP1361])
1160 {
1161 aMaskedCodePage.set(vcl::CodePageCoverage::CP949);
1162 aMaskedCodePage.set(vcl::CodePageCoverage::CP1361, false);
1163 }
1164
1165 if (aMaskedCodePage.count() == 1)
1166 {
1167 if (aMaskedCodePage[vcl::CodePageCoverage::CP932])
1168 return USCRIPT_JAPANESE;
1169 if (aMaskedCodePage[vcl::CodePageCoverage::CP949])
1170 return USCRIPT_KOREAN;
1171 if (aMaskedCodePage[vcl::CodePageCoverage::CP936])
1172 return USCRIPT_SIMPLIFIED_HAN;
1173 if (aMaskedCodePage[vcl::CodePageCoverage::CP950])
1174 return USCRIPT_TRADITIONAL_HAN;
1175 }
1176
1177 if (aMaskedCodePage.count())
1178 return USCRIPT_HAN;
1179 }
1180
1181 return USCRIPT_COMMON;
1182 }
1183}
1184
1185const std::map<UScriptCode, std::vector<OUString>> distCjkMap =
1186{
1187 { USCRIPT_KOREAN, { " KR", "Korean"} }, // Korean
1188 { USCRIPT_JAPANESE, {" JP", "Japanese"} } , // Japanese
1189 { USCRIPT_SIMPLIFIED_HAN, {" SC", " GB", "S Chinese"} }, // Simplified Chinese Family
1190 { USCRIPT_TRADITIONAL_HAN, {" TC", " HC", " TW", " HK", " MO", "T Chinese"} }// Traditional Chinese Family
1191};
1192namespace
1193{
1194 UScriptCode attemptToDisambiguateHan(UScriptCode eScript, OutputDevice const &rDevice)
1195 {
1196 //If we're a CJK font, see if we seem to be tuned for C, J or K
1197 if (eScript == USCRIPT_HAN)
1198 {
1199 const vcl::Font &rFont = rDevice.GetFont();
1200
1201 bool bKore = false, bJpan = false, bHant = false, bHans = false;
1202
1203 static constexpr OUStringLiteral sKorean = u"\u4E6D\u4E76\u596C";
1204 if (-1 == rDevice.HasGlyphs(rFont, sKorean))
1205 bKore = true;
1206
1207 static constexpr OUStringLiteral sJapanese = u"\u5968\u67A0\u9D8F";
1208 if (-1 == rDevice.HasGlyphs(rFont, sJapanese))
1209 bJpan = true;
1210
1211 static constexpr OUStringLiteral sTraditionalChinese = u"\u555F\u96DE";
1212 if (-1 == rDevice.HasGlyphs(rFont, sTraditionalChinese))
1213 bHant = true;
1214
1215 static constexpr OUStringLiteral sSimplifiedChinese = u"\u4E61\u542F\u5956";
1216 if (-1 == rDevice.HasGlyphs(rFont, sSimplifiedChinese))
1217 bHans = true;
1218
1219 if (bKore && !bJpan && !bHans && !bHant) {
1220 eScript = USCRIPT_KOREAN;
1221 return eScript;
1222 }
1223 else if (bJpan && !bKore && !bHans && !bHant) {
1224 eScript = USCRIPT_JAPANESE;
1225 return eScript;
1226 }
1227 else if (bHans && !bHant && !bKore && !bJpan) {
1228 eScript = USCRIPT_SIMPLIFIED_HAN;
1229 return eScript;
1230 }
1231 else if (bHant && !bHans && !bKore && !bJpan) {
1232 eScript = USCRIPT_TRADITIONAL_HAN;
1233 return eScript;
1234 }
1235
1236 // for the last time, Check the ISO code strings or font specific strings
1237 const OUString &rName = rDevice.GetFont().GetFamilyName();
1238 std::map<UScriptCode, std::vector<OUString>>::const_iterator distCjkMapIt;
1239 for (distCjkMapIt = distCjkMap.begin(); distCjkMapIt != distCjkMap.end(); ++distCjkMapIt) {
1240 std::vector<OUString> cjkCodeList = distCjkMapIt->second;
1241 std::vector<OUString>::const_iterator cjkPtr;
1242 for (cjkPtr = cjkCodeList.begin(); cjkPtr != cjkCodeList.end(); ++cjkPtr) {
1243 if (rName.indexOf(*cjkPtr) > 0) {
1244 return distCjkMapIt->first;
1245 }
1246 }
1247 }
1248 //otherwise fall-through as USCRIPT_HAN and expect a combined Hant/Hans preview
1249 }
1250 return eScript;
1251 }
1252}
1253
1255{
1256 UScriptCode eScript = lcl_getHardCodedScriptNameForFont(rDevice);
1257 if (eScript == USCRIPT_INVALID_CODE)
1258 {
1259 vcl::FontCapabilities aFontCapabilities;
1260 if (!rDevice.GetFontCapabilities(aFontCapabilities))
1261 return OUString();
1262
1263#if OSL_DEBUG_LEVEL > 0
1264 lcl_dump_unicode_coverage(aFontCapabilities.oUnicodeRange);
1265 lcl_dump_codepage_coverage(aFontCapabilities.oCodePageRange);
1266#endif
1267
1268 if (aFontCapabilities.oUnicodeRange)
1269 *aFontCapabilities.oUnicodeRange &= getCommonLatnSubsetMask();
1270
1271 //If this font is probably tuned to display a single non-Latin
1272 //script and the font name is itself in Latin, then show a small
1273 //chunk of representative text for that script
1274 eScript = getScript(aFontCapabilities);
1275 if (eScript == USCRIPT_COMMON)
1276 return OUString();
1277
1278 eScript = attemptToDisambiguateHan(eScript, rDevice);
1279 }
1280
1281 OUString sSampleText = makeShortRepresentativeTextForScript(eScript);
1282 bool bHasSampleTextGlyphs = (-1 == rDevice.HasGlyphs(rDevice.GetFont(), sSampleText));
1283 return bHasSampleTextGlyphs ? sSampleText : OUString();
1284}
1285
1287{
1288 UScriptCode eRet = USCRIPT_COMMON;
1289 switch (eOTCoverage)
1290 {
1295 eRet = USCRIPT_LATIN;
1296 break;
1298 eRet = USCRIPT_INHERITED;
1299 break;
1301 eRet = USCRIPT_GREEK;
1302 break;
1304 eRet = USCRIPT_COPTIC;
1305 break;
1307 eRet = USCRIPT_CYRILLIC;
1308 break;
1310 eRet = USCRIPT_ARMENIAN;
1311 break;
1313 eRet = USCRIPT_HEBREW;
1314 break;
1316 eRet = USCRIPT_VAI;
1317 break;
1319 eRet = USCRIPT_ARABIC;
1320 break;
1322 eRet = USCRIPT_NKO;
1323 break;
1325 eRet = USCRIPT_DEVANAGARI;
1326 break;
1328 eRet = USCRIPT_BENGALI;
1329 break;
1331 eRet = USCRIPT_GURMUKHI;
1332 break;
1334 eRet = USCRIPT_GUJARATI;
1335 break;
1337 eRet = USCRIPT_ORIYA;
1338 break;
1340 eRet = USCRIPT_TAMIL;
1341 break;
1343 eRet = USCRIPT_TELUGU;
1344 break;
1346 eRet = USCRIPT_KANNADA;
1347 break;
1349 eRet = USCRIPT_MALAYALAM;
1350 break;
1352 eRet = USCRIPT_THAI;
1353 break;
1355 eRet = USCRIPT_LAO;
1356 break;
1358 eRet = USCRIPT_GEORGIAN;
1359 break;
1361 eRet = USCRIPT_BALINESE;
1362 break;
1364 eRet = USCRIPT_HANGUL;
1365 break;
1367 eRet = USCRIPT_LATIN;
1368 break;
1370 eRet = USCRIPT_GREEK;
1371 break;
1373 eRet = USCRIPT_SYMBOLS;
1374 break;
1376 eRet = USCRIPT_INHERITED;
1377 break;
1381 eRet = USCRIPT_SYMBOLS;
1382 break;
1384 eRet = USCRIPT_MATHEMATICAL_NOTATION;
1385 break;
1394 eRet = USCRIPT_SYMBOLS;
1395 break;
1397 eRet = USCRIPT_HIRAGANA;
1398 break;
1400 eRet = USCRIPT_KATAKANA;
1401 break;
1403 eRet = USCRIPT_BOPOMOFO;
1404 break;
1406 eRet = USCRIPT_HANGUL;
1407 break;
1409 eRet = USCRIPT_PHAGS_PA;
1410 break;
1412 eRet = USCRIPT_HANGUL;
1413 break;
1415 eRet = USCRIPT_HAN;
1416 break;
1418 eRet = USCRIPT_HANGUL;
1419 break;
1421 eRet = USCRIPT_PHOENICIAN;
1422 break;
1425 eRet = USCRIPT_HAN;
1426 break;
1428 eRet = USCRIPT_ARABIC;
1429 break;
1431 eRet = USCRIPT_INHERITED;
1432 break;
1434 eRet = USCRIPT_ARABIC;
1435 break;
1437 eRet = USCRIPT_TIBETAN;
1438 break;
1440 eRet = USCRIPT_SYRIAC;
1441 break;
1443 eRet = USCRIPT_THAANA;
1444 break;
1446 eRet = USCRIPT_SINHALA;
1447 break;
1449 eRet = USCRIPT_MYANMAR;
1450 break;
1452 eRet = USCRIPT_ETHIOPIC;
1453 break;
1455 eRet = USCRIPT_CHEROKEE;
1456 break;
1458 eRet = USCRIPT_CANADIAN_ABORIGINAL;
1459 break;
1461 eRet = USCRIPT_OGHAM;
1462 break;
1464 eRet = USCRIPT_RUNIC;
1465 break;
1467 eRet = USCRIPT_KHMER;
1468 break;
1470 eRet = USCRIPT_MONGOLIAN;
1471 break;
1473 eRet = USCRIPT_BRAILLE;
1474 break;
1476 eRet = USCRIPT_YI;
1477 break;
1479 eRet = USCRIPT_TAGALOG;
1480 break;
1482 eRet = USCRIPT_OLD_ITALIC;
1483 break;
1485 eRet = USCRIPT_GOTHIC;
1486 break;
1488 eRet = USCRIPT_DESERET;
1489 break;
1493 eRet = USCRIPT_SYMBOLS;
1494 break;
1496 eRet = USCRIPT_INHERITED;
1497 break;
1499 eRet = USCRIPT_SYMBOLS;
1500 break;
1502 eRet = USCRIPT_LIMBU;
1503 break;
1505 eRet = USCRIPT_TAI_LE;
1506 break;
1508 eRet = USCRIPT_NEW_TAI_LUE;
1509 break;
1511 eRet = USCRIPT_BUGINESE;
1512 break;
1514 eRet = USCRIPT_GLAGOLITIC;
1515 break;
1517 eRet = USCRIPT_TIFINAGH;
1518 break;
1520 eRet = USCRIPT_SYMBOLS;
1521 break;
1523 eRet = USCRIPT_SYLOTI_NAGRI;
1524 break;
1526 eRet = USCRIPT_LINEAR_B;
1527 break;
1529 eRet = USCRIPT_GREEK;
1530 break;
1532 eRet = USCRIPT_UGARITIC;
1533 break;
1535 eRet = USCRIPT_OLD_PERSIAN;
1536 break;
1538 eRet = USCRIPT_SHAVIAN;
1539 break;
1541 eRet = USCRIPT_OSMANYA;
1542 break;
1544 eRet = USCRIPT_CYPRIOT;
1545 break;
1547 eRet = USCRIPT_KHAROSHTHI;
1548 break;
1550 eRet = USCRIPT_CUNEIFORM;
1551 break;
1553 eRet = USCRIPT_SUNDANESE;
1554 break;
1556 eRet = USCRIPT_LEPCHA;
1557 break;
1559 eRet = USCRIPT_OL_CHIKI;
1560 break;
1562 eRet = USCRIPT_SAURASHTRA;
1563 break;
1565 eRet = USCRIPT_KAYAH_LI;
1566 break;
1568 eRet = USCRIPT_REJANG;
1569 break;
1571 eRet = USCRIPT_CHAM;
1572 break;
1574 eRet = USCRIPT_CARIAN;
1575 break;
1581 eRet = USCRIPT_SYMBOLS;
1582 break;
1602 break;
1603 }
1604 return eRet;
1605}
1606
1607OUString makeRepresentativeTextForFont(sal_Int16 nScriptType, const vcl::Font &rFont)
1608{
1609 OUString sRet(makeRepresentativeTextForLanguage(rFont.GetLanguage()));
1610
1612 if (sRet.isEmpty() || (-1 != aDevice->HasGlyphs(rFont, sRet)))
1613 {
1614 aDevice->SetFont(rFont);
1615 vcl::FontCapabilities aFontCapabilities;
1616 if (aDevice->GetFontCapabilities(aFontCapabilities))
1617 {
1618#if OSL_DEBUG_LEVEL > 0
1619 lcl_dump_unicode_coverage(aFontCapabilities.oUnicodeRange);
1620#endif
1621
1622 if (aFontCapabilities.oUnicodeRange)
1623 {
1624 *aFontCapabilities.oUnicodeRange &= getWeakMask();
1625
1626 if (nScriptType != css::i18n::ScriptType::ASIAN)
1627 {
1628 *aFontCapabilities.oUnicodeRange &= getCJKMask();
1629 aFontCapabilities.oCodePageRange.reset();
1630 }
1631 if (nScriptType != css::i18n::ScriptType::LATIN)
1632 *aFontCapabilities.oUnicodeRange &= getLatinMask();
1633 if (nScriptType != css::i18n::ScriptType::COMPLEX)
1634 *aFontCapabilities.oUnicodeRange &= getCTLMask();
1635 }
1636
1637#if OSL_DEBUG_LEVEL > 0
1638 SAL_INFO("svtools", "minimal");
1639 lcl_dump_unicode_coverage(aFontCapabilities.oUnicodeRange);
1640 lcl_dump_codepage_coverage(aFontCapabilities.oCodePageRange);
1641#endif
1642
1643 UScriptCode eScript = getScript(aFontCapabilities);
1644
1645 if (nScriptType == css::i18n::ScriptType::ASIAN)
1646 eScript = attemptToDisambiguateHan(eScript, *aDevice);
1647
1648 sRet = makeRepresentativeTextForScript(eScript);
1649 }
1650
1651 if (sRet.isEmpty())
1652 {
1653 if (nScriptType == css::i18n::ScriptType::COMPLEX)
1654 {
1655 sRet = makeRepresentativeTextForScript(USCRIPT_HEBREW);
1656 if (-1 != aDevice->HasGlyphs(rFont, sRet))
1657 {
1658 sRet = makeMinimalTextForScript(USCRIPT_HEBREW);
1659 if (-1 != aDevice->HasGlyphs(rFont, sRet))
1660 sRet = makeRepresentativeTextForScript(USCRIPT_ARABIC);
1661 }
1662 }
1663 else if (nScriptType == css::i18n::ScriptType::LATIN)
1664 sRet = makeRepresentativeTextForScript(USCRIPT_LATIN);
1665 }
1666 }
1667
1668 return sRet;
1669}
1670
1671/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const vcl::Font & GetFont() const
bool GetFontCharMap(FontCharMapRef &rxFontCharMap) const
bool GetFontCapabilities(vcl::FontCapabilities &rFontCapabilities) const
sal_Int32 HasGlyphs(const vcl::Font &rFont, std::u16string_view rStr, sal_Int32 nIndex=0, sal_Int32 nLen=-1) const
static sal_Int16 getScriptClassFromUScriptCode(UScriptCode eScript)
const OUString & GetFamilyName() const
LanguageType GetLanguage() const
rtl_TextEncoding GetCharSet() const
float u
UNOTOOLS_DLLPUBLIC bool IsOpenSymbol(std::u16string_view rFontName)
#define LANGUAGE_ARMENIAN
#define LANGUAGE_YIDDISH
#define LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA
#define LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA
#define LANGUAGE_YI
#define LANGUAGE_GAELIC_IRELAND
#define LANGUAGE_TIBETAN
#define LANGUAGE_THAI
#define LANGUAGE_PUNJABI
#define LANGUAGE_BENGALI
#define LANGUAGE_CHINESE
#define LANGUAGE_SYRIAC
#define LANGUAGE_LAO
#define LANGUAGE_ODIA
#define LANGUAGE_TAMIL
#define LANGUAGE_JAPANESE
#define LANGUAGE_SINHALESE_SRI_LANKA
#define LANGUAGE_GREEK
#define LANGUAGE_KOREAN
#define LANGUAGE_BURMESE
#define LANGUAGE_ASSAMESE
#define LANGUAGE_KHMER
#define LANGUAGE_ARABIC_SAUDI_ARABIA
#define LANGUAGE_HEBREW
#define LANGUAGE_HINDI
#define LANGUAGE_CHEROKEE_UNITED_STATES
#define LANGUAGE_KANNADA
#define LANGUAGE_MALAYALAM
constexpr LanguageType primary(LanguageType lt)
#define LANGUAGE_GEORGIAN
#define LANGUAGE_AMHARIC_ETHIOPIA
#define LANGUAGE_GUJARATI
#define LANGUAGE_TELUGU
#define LANGUAGE_MONGOLIAN_MONGOLIAN_LSO
#define SAL_INFO(area, stream)
#define SAL_N_ELEMENTS(arr)
int i
#define N
OUString makeShortRepresentativeTextForSelectedFont(OutputDevice const &rDevice)
static UScriptCode lcl_getHardCodedScriptNameForFont(const OutputDevice &rDevice)
Definition: sampletext.cxx:23
bool canRenderNameOfSelectedFont(OutputDevice const &rDevice)
Definition: sampletext.cxx:143
OUString makeShortMinimalTextForScript(UScriptCode eScript)
Definition: sampletext.cxx:541
static OUString makeMinimalTextForScript(UScriptCode eScript)
Definition: sampletext.cxx:566
static OUString makeRepresentativeTextForScript(UScriptCode eScript)
Definition: sampletext.cxx:498
bool isSymbolFont(const vcl::Font &rFont)
Definition: sampletext.cxx:108
static OUString makeRepresentativeTextForLanguage(LanguageType eLang)
Definition: sampletext.cxx:579
OUString makeShortRepresentativeTextForScript(UScriptCode eScript)
Definition: sampletext.cxx:220
UScriptCode otCoverageToScript(vcl::UnicodeCoverage::UnicodeCoverageEnum eOTCoverage)
const std::map< UScriptCode, std::vector< OUString > > distCjkMap
OUString makeRepresentativeTextForFont(sal_Int16 nScriptType, const vcl::Font &rFont)
OUString makeShortRepresentativeSymbolTextForSelectedFont(OutputDevice const &rDevice)
Definition: sampletext.cxx:149
static SfxItemSet & rSet
bool anyOf(strong_int v) const
std::optional< std::bitset< CodePageCoverage::MAX_CP_ENUM > > oCodePageRange
std::optional< std::bitset< UnicodeCoverage::MAX_UC_ENUM > > oUnicodeRange
sal_uInt16 sal_Unicode