LibreOffice Module i18npool (master)  1
genconv_dict.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <stdio.h>
22 #include <string.h>
23 #include <stdlib.h>
24 #include <errno.h>
25 #include <sal/main.h>
26 #include <sal/types.h>
27 #include <rtl/ustring.hxx>
28 
29 #include <vector>
30 
31 static void make_hhc_char(FILE *sfp, FILE *cfp);
32 static void make_stc_char(FILE *sfp, FILE *cfp);
33 static void make_stc_word(FILE *sfp, FILE *cfp);
34 
35 /* Main Procedure */
36 
38 {
39  FILE *sfp, *cfp;
40 
41  if (argc < 4) exit(-1);
42 
43 
44  sfp = fopen(argv[2], "rb"); // open the source file for read;
45  if (sfp == nullptr)
46  {
47  fprintf(stderr, "Opening the dictionary source file %s for reading failed: %s\n", argv[1], strerror(errno));
48  exit(1);
49  }
50 
51  // create the C source file to write
52  cfp = fopen(argv[3], "wb");
53  if (cfp == nullptr) {
54  fclose(sfp);
55  fprintf(stderr, "Opening %s for writing failed: %s\n", argv[3], strerror(errno));
56  exit(1);
57  }
58 
59  fprintf(cfp, "/*\n");
60  fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
61  fprintf(cfp, " * All Rights Reserved.\n");
62  fprintf(cfp, " */\n\n");
63  fprintf(cfp, "/* !!!The file is generated automatically. DO NOT edit the file manually!!! */\n\n");
64  fprintf(cfp, "#include <sal/types.h>\n");
65  fprintf(cfp, "#include <textconversion.hxx>\n");
66  fprintf(cfp, "\nextern \"C\" {\n");
67 
68  if (strcmp(argv[1], "hhc_char") == 0)
69  make_hhc_char(sfp, cfp);
70  else if (strcmp(argv[1], "stc_char") == 0)
71  make_stc_char(sfp, cfp);
72  else if (strcmp(argv[1], "stc_word") == 0)
73  make_stc_word(sfp, cfp);
74 
75  fprintf (cfp, "}\n");
76 
77  fclose(sfp);
78  fclose(cfp);
79 
80  return 0;
81 } // end of main
82 
83 // Hangul/Hanja character conversion
84 void make_hhc_char(FILE *sfp, FILE *cfp)
85 {
86  sal_Int32 count, address, i, j, k;
87  sal_Unicode Hanja2HangulData[0x10000];
88  for (i = 0; i < 0x10000; i++) {
89  Hanja2HangulData[i] = 0;
90  }
91  sal_uInt16 Hangul2HanjaData[10000][3];
92 
93  // generate main dict. data array
94  fprintf(cfp, "\nstatic const sal_Unicode Hangul2HanjaData[] = {");
95 
96  char Cstr[1024];
97  count = 0;
98  address = 0;
99  while (fgets(Cstr, 1024, sfp)) {
100  // input file is in UTF-8 encoding (Hangul:Hanja)
101  // don't convert last new line character to Ostr.
102  OUString Ostr(Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
103  sal_Int32 len = Ostr.getLength();
104 
105  Hangul2HanjaData[count][0] = Ostr[0];
106  Hangul2HanjaData[count][1] = sal::static_int_cast<sal_uInt16>( address );
107  Hangul2HanjaData[count][2] = sal::static_int_cast<sal_uInt16>( len - 2 );
108  count++;
109 
110  for (i = 2; i < len; i++) {
111  Hanja2HangulData[Ostr[i]] = Ostr[0];
112  if (address++ % 16 == 0)
113  fprintf(cfp, "\n\t");
114  fprintf(cfp, "0x%04x, ", Ostr[i]);
115  }
116  }
117  fprintf(cfp, "\n};\n");
118 
119  fprintf(cfp, "\nstatic const i18npool::Hangul_Index Hangul2HanjaIndex[] = {\n");
120  for (i = 0; i < count; i++)
121  fprintf(cfp, "\t{ 0x%04x, 0x%04x, 0x%02x },\n",
122  Hangul2HanjaData[i][0],
123  Hangul2HanjaData[i][1],
124  Hangul2HanjaData[i][2]);
125  fprintf(cfp, "};\n");
126 
127  fprintf(cfp, "\nstatic const sal_uInt16 Hanja2HangulIndex[] = {");
128 
129  address=0;
130  for (i = 0; i < 0x10; i++) {
131  fprintf(cfp, "\n\t");
132  for (j = 0; j < 0x10; j++) {
133  for (k = 0; k < 0x100; k++) {
134  if (Hanja2HangulData[((i*0x10)+j)*0x100+k] != 0)
135  break;
136  }
137  fprintf(
138  cfp, "0x%04lx, ",
139  sal::static_int_cast< unsigned long >(
140  k < 0x100 ? (address++)*0x100 : 0xFFFF));
141  }
142  }
143  fprintf(cfp, "\n};\n");
144 
145  fprintf(cfp, "\nstatic const sal_Unicode Hanja2HangulData[] = {");
146 
147  for (i = 0; i < 0x100; i++) {
148  for (j = 0; j < 0x100; j++) {
149  if (Hanja2HangulData[i*0x100+j] != 0)
150  break;
151  }
152  if (j < 0x100) {
153  for (j = 0; j < 0x10; j++) {
154  fprintf(cfp, "\n\t");
155  for (k = 0; k < 0x10; k++) {
156  sal_Unicode c = Hanja2HangulData[((i*0x10+j)*0x10)+k];
157  fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
158  }
159  }
160  }
161  }
162  fprintf(cfp, "\n};\n");
163 
164  // create function to return arrays
165  fprintf (cfp, "\tconst sal_Unicode* getHangul2HanjaData() { return Hangul2HanjaData; }\n");
166  fprintf (cfp, "\tconst i18npool::Hangul_Index* getHangul2HanjaIndex() { return Hangul2HanjaIndex; }\n");
167  fprintf (cfp, "\tsal_Int16 getHangul2HanjaIndexCount() { return sizeof(Hangul2HanjaIndex) / sizeof(i18npool::Hangul_Index); }\n");
168  fprintf (cfp, "\tconst sal_uInt16* getHanja2HangulIndex() { return Hanja2HangulIndex; }\n");
169  fprintf (cfp, "\tconst sal_Unicode* getHanja2HangulData() { return Hanja2HangulData; }\n");
170 }
171 
172 // Simplified/Traditional Chinese character conversion
173 void make_stc_char(FILE *sfp, FILE *cfp)
174 {
175  sal_Int32 address, i, j, k;
176  sal_Unicode SChinese2TChineseData[0x10000];
177  sal_Unicode SChinese2VChineseData[0x10000];
178  sal_Unicode TChinese2SChineseData[0x10000];
179  for (i = 0; i < 0x10000; i++) {
180  SChinese2TChineseData[i] = 0;
181  SChinese2VChineseData[i] = 0;
182  TChinese2SChineseData[i] = 0;
183  }
184 
185  char Cstr[1024];
186  while (fgets(Cstr, 1024, sfp)) {
187  // input file is in UTF-8 encoding (SChinese:TChinese)
188  // don't convert last new line character to Ostr.
189  OUString Ostr(Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
190  sal_Int32 len = Ostr.getLength();
191  if (Ostr[1] == 'v')
192  SChinese2VChineseData[Ostr[0]] = Ostr[2];
193  else {
194  SChinese2TChineseData[Ostr[0]] = Ostr[2];
195  if (SChinese2VChineseData[Ostr[0]] == 0)
196  SChinese2VChineseData[Ostr[0]] = Ostr[2];
197  }
198  for (i = 2; i < len; i++)
199  TChinese2SChineseData[Ostr[i]] = Ostr[0];
200  }
201 
202  fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_S2T[] = {");
203 
204  address=0;
205  for (i = 0; i < 0x10; i++) {
206  fprintf(cfp, "\n\t");
207  for (j = 0; j < 0x10; j++) {
208  for (k = 0; k < 0x100; k++) {
209  if (SChinese2TChineseData[((i*0x10)+j)*0x100+k] != 0)
210  break;
211  }
212  fprintf(
213  cfp, "0x%04lx, ",
214  sal::static_int_cast< unsigned long >(
215  k < 0x100 ? (address++)*0x100 : 0xFFFF));
216  }
217  }
218  fprintf(cfp, "\n};\n");
219 
220  fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_S2T[] = {");
221 
222  for (i = 0; i < 0x100; i++) {
223  for (j = 0; j < 0x100; j++) {
224  if (SChinese2TChineseData[i*0x100+j] != 0)
225  break;
226  }
227  if (j < 0x100) {
228  for (j = 0; j < 0x10; j++) {
229  fprintf(cfp, "\n\t");
230  for (k = 0; k < 0x10; k++) {
231  sal_Unicode c = SChinese2TChineseData[((i*0x10+j)*0x10)+k];
232  fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
233  }
234  }
235  }
236  }
237  fprintf(cfp, "\n};\n");
238 
239  fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_S2V[] = {");
240 
241  address=0;
242  for (i = 0; i < 0x10; i++) {
243  fprintf(cfp, "\n\t");
244  for (j = 0; j < 0x10; j++) {
245  for (k = 0; k < 0x100; k++) {
246  if (SChinese2VChineseData[((i*0x10)+j)*0x100+k] != 0)
247  break;
248  }
249  fprintf(
250  cfp, "0x%04lx, ",
251  sal::static_int_cast< unsigned long >(
252  k < 0x100 ? (address++)*0x100 : 0xFFFF));
253  }
254  }
255  fprintf(cfp, "\n};\n");
256 
257  fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_S2V[] = {");
258 
259  for (i = 0; i < 0x100; i++) {
260  for (j = 0; j < 0x100; j++) {
261  if (SChinese2VChineseData[i*0x100+j] != 0)
262  break;
263  }
264  if (j < 0x100) {
265  for (j = 0; j < 0x10; j++) {
266  fprintf(cfp, "\n\t");
267  for (k = 0; k < 0x10; k++) {
268  sal_Unicode c = SChinese2VChineseData[((i*0x10+j)*0x10)+k];
269  fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
270  }
271  }
272  }
273  }
274  fprintf(cfp, "\n};\n");
275 
276  fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_T2S[] = {");
277 
278  address=0;
279  for (i = 0; i < 0x10; i++) {
280  fprintf(cfp, "\n\t");
281  for (j = 0; j < 0x10; j++) {
282  for (k = 0; k < 0x100; k++) {
283  if (TChinese2SChineseData[((i*0x10)+j)*0x100+k] != 0)
284  break;
285  }
286  fprintf(
287  cfp, "0x%04lx, ",
288  sal::static_int_cast< unsigned long >(
289  k < 0x100 ? (address++)*0x100 : 0xFFFF));
290  }
291  }
292  fprintf(cfp, "\n};\n");
293 
294  fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_T2S[] = {");
295 
296  for (i = 0; i < 0x100; i++) {
297  for (j = 0; j < 0x100; j++) {
298  if (TChinese2SChineseData[i*0x100+j] != 0)
299  break;
300  }
301  if (j < 0x100) {
302  for (j = 0; j < 0x10; j++) {
303  fprintf(cfp, "\n\t");
304  for (k = 0; k < 0x10; k++) {
305  sal_Unicode c = TChinese2SChineseData[((i*0x10+j)*0x10)+k];
306  fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
307  }
308  }
309  }
310  }
311  fprintf(cfp, "\n};\n");
312 
313  // create function to return arrays
314  fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_S2T() { return STC_CharIndex_S2T; }\n");
315  fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_S2T() { return STC_CharData_S2T; }\n");
316  fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_S2V() { return STC_CharIndex_S2V; }\n");
317  fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_S2V() { return STC_CharData_S2V; }\n");
318  fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_T2S() { return STC_CharIndex_T2S; }\n");
319  fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_T2S() { return STC_CharData_T2S; }\n");
320 }
321 
322 namespace {
323 
324 struct Index {
325  sal_uInt16 address;
326  sal_Int32 len;
327  sal_Unicode *data;
328 };
329 
330 }
331 
332 extern "C" {
333 static int Index_comp(const void* s1, const void* s2)
334 {
335  Index const *p1 = static_cast<Index const *>(s1), *p2 = static_cast<Index const *>(s2);
336  int result = p1->len - p2->len;
337  for (int i = 0; result == 0 && i < p1->len; i++)
338  result = *(p1->data+i) - *(p2->data+i);
339  return result;
340 }
341 }
342 
343 // Simplified/Traditional Chinese word conversion
344 void make_stc_word(FILE *sfp, FILE *cfp)
345 {
346  sal_Int32 count, i, length;
347  sal_Unicode STC_WordData[0x10000];
348  std::vector<Index> STC_WordEntry_S2T(0x10000);
349  std::vector<Index> STC_WordEntry_T2S(0x10000);
350  sal_Int32 count_S2T = 0, count_T2S = 0;
351  sal_Int32 line = 0, char_total = 0;
352  char Cstr[1024];
353 
354  while (fgets(Cstr, 1024, sfp)) {
355  // input file is in UTF-8 encoding (SChinese:TChinese)
356  // don't convert last new line character to Ostr.
357  OUString Ostr(Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
358  sal_Int32 len = Ostr.getLength();
359  if (char_total + len + 1 > 0xFFFF) {
360  fprintf(stderr, "Word Dictionary stc_word.dic is too big (line %" SAL_PRIdINT32 ")", line);
361  return;
362  }
363  sal_Int32 sep=-1, eq=-1, gt=-1, lt=-1;
364  if (((sep = eq = Ostr.indexOf('=')) > 0) ||
365  ((sep = gt = Ostr.indexOf('>')) > 0) ||
366  ((sep = lt = Ostr.indexOf('<')) > 0)) {
367 
368  if (eq > 0 || gt > 0) {
369  STC_WordEntry_S2T[count_S2T].address = sal::static_int_cast<sal_uInt16>( char_total );
370  STC_WordEntry_S2T[count_S2T].len = sep;
371  STC_WordEntry_S2T[count_S2T++].data = &STC_WordData[char_total];
372  }
373  if (eq > 0 || lt > 0) {
374  STC_WordEntry_T2S[count_T2S].address = sal::static_int_cast<sal_uInt16>( char_total + sep + 1 );
375  STC_WordEntry_T2S[count_T2S].len = len - sep - 1;
376  STC_WordEntry_T2S[count_T2S++].data = &STC_WordData[char_total + sep + 1];
377  }
378  for (i = 0; i < len; i++)
379  STC_WordData[char_total++] = (i == sep) ? 0 : Ostr[i];
380  STC_WordData[char_total++] = 0;
381  } else {
382  fprintf(stderr, "Invalid entry in stc_word.dic (line %" SAL_PRIdINT64 ")", sal_Int64(line));
383  return;
384  }
385  line++;
386  }
387 
388  if (char_total > 0) {
389  fprintf(cfp, "\nstatic const sal_Unicode STC_WordData[] = {");
390  for (i = 0; i < char_total; i++) {
391  if (i % 32 == 0) fprintf(cfp, "\n\t");
392  fprintf(cfp, "0x%04x, ", STC_WordData[i]);
393  }
394  fprintf(cfp, "\n};\n");
395 
396  fprintf(cfp, "\nstatic sal_Int32 STC_WordData_Count = %" SAL_PRIdINT32 ";\n", sal_Int32(char_total));
397 
398  // create function to return arrays
399  fprintf (cfp, "\tconst sal_Unicode* getSTC_WordData(sal_Int32& count) { count = STC_WordData_Count; return STC_WordData; }\n");
400  } else {
401  fprintf (cfp, "\tconst sal_Unicode* getSTC_WordData(sal_Int32& count) { count = 0; return NULL; }\n");
402  }
403 
404  sal_uInt16 STC_WordIndex[0x100];
405 
406  if (count_S2T > 0) {
407  qsort(STC_WordEntry_S2T.data(), count_S2T, sizeof(Index), Index_comp);
408 
409  fprintf(cfp, "\nstatic const sal_uInt16 STC_WordEntry_S2T[] = {");
410  count = 0;
411  length = 0;
412  for (i = 0; i < count_S2T; i++) {
413  if (i % 32 == 0) fprintf(cfp, "\n\t");
414  fprintf(cfp, "0x%04x, ", STC_WordEntry_S2T[i].address);
415  if (STC_WordEntry_S2T[i].len != length) {
416  length = STC_WordEntry_S2T[i].len;
417  while (count <= length)
418  STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
419  }
420  }
421  fprintf(cfp, "\n};\n");
422  STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
423 
424  fprintf(cfp, "\nstatic const sal_uInt16 STC_WordIndex_S2T[] = {");
425  for (i = 0; i < count; i++) {
426  if (i % 16 == 0) fprintf(cfp, "\n\t");
427  fprintf(cfp, "0x%04x, ", STC_WordIndex[i]);
428  }
429  fprintf(cfp, "\n};\n");
430 
431  fprintf(cfp, "\nstatic sal_Int32 STC_WordIndex_S2T_Count = %" SAL_PRIdINT64 ";\n", sal_Int64(length));
432  fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_S2T() { return STC_WordEntry_S2T; }\n");
433  fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = STC_WordIndex_S2T_Count; return STC_WordIndex_S2T; }\n");
434  } else {
435  fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_S2T() { return NULL; }\n");
436  fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = 0; return NULL; }\n");
437  }
438 
439  if (count_T2S > 0) {
440  qsort(STC_WordEntry_T2S.data(), count_T2S, sizeof(Index), Index_comp);
441 
442  fprintf(cfp, "\nstatic const sal_uInt16 STC_WordEntry_T2S[] = {");
443  count = 0;
444  length = 0;
445  for (i = 0; i < count_T2S; i++) {
446  if (i % 32 == 0) fprintf(cfp, "\n\t");
447  fprintf(cfp, "0x%04x, ", STC_WordEntry_T2S[i].address);
448  if (STC_WordEntry_T2S[i].len != length) {
449  length = STC_WordEntry_T2S[i].len;
450  while (count <= length)
451  STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
452  }
453  }
454  STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
455  fprintf(cfp, "\n};\n");
456 
457  fprintf(cfp, "\nstatic const sal_uInt16 STC_WordIndex_T2S[] = {");
458  for (i = 0; i < count; i++) {
459  if (i % 16 == 0) fprintf(cfp, "\n\t");
460  fprintf(cfp, "0x%04x, ", STC_WordIndex[i]);
461  }
462  fprintf(cfp, "\n};\n");
463 
464  fprintf(cfp, "\nstatic sal_Int32 STC_WordIndex_T2S_Count = %" SAL_PRIdINT64 ";\n\n", sal_Int64(length));
465  fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_T2S() { return STC_WordEntry_T2S; }\n");
466  fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = STC_WordIndex_T2S_Count; return STC_WordIndex_T2S; }\n");
467  } else {
468  fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_T2S() { return NULL; }\n");
469  fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = 0; return NULL; }\n");
470  }
471 }
472 
473 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
tuple line
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
static void make_stc_char(FILE *sfp, FILE *cfp)
sal_uInt16 sal_Unicode
static int Index_comp(const void *s1, const void *s2)
int i
exports com.sun.star.chart2. data
static void make_hhc_char(FILE *sfp, FILE *cfp)
Any result
struct _ADOIndex Index
static void make_stc_word(FILE *sfp, FILE *cfp)