GNU Unifont  15.1.01
Pan-Unicode font with complete Unicode Plane 0 coverage and partial coverage of higher planes
unigen-hangul.c
Go to the documentation of this file.
1 /**
2  @file unigen-hangul.c
3 
4  @brief Generate arbitrary hangul syllables.
5 
6  Input is a Unifont .hex file such as the "hangul-base.hex" file that
7  is included in the Unifont package.
8 
9  The default program parameters will generate the Unicode
10  Hangul Syllables range of U+AC00..U+D7A3. The syllables
11  will appear in this order:
12 
13  For each modern choseong {
14  For each modern jungseong {
15  Output syllable of choseong and jungseong
16  For each modern jongseong {
17  Output syllable of choseong + jungseong + jongseong
18  }
19  }
20  }
21 
22  By starting the jongseong code point at one before the first
23  valid jongseong, the first inner loop iteration will add a
24  blank glyph for the jongseong portion of the syllable, so
25  only the current choseong and jungseong will be output first.
26 
27  @author Paul Hardy
28 
29  @copyright Copyright © 2023 Paul Hardy
30 */
31 /*
32  LICENSE:
33 
34  This program is free software: you can redistribute it and/or modify
35  it under the terms of the GNU General Public License as published by
36  the Free Software Foundation, either version 2 of the License, or
37  (at your option) any later version.
38 
39  This program is distributed in the hope that it will be useful,
40  but WITHOUT ANY WARRANTY; without even the implied warranty of
41  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
42  GNU General Public License for more details.
43 
44  You should have received a copy of the GNU General Public License
45  along with this program. If not, see <http://www.gnu.org/licenses/>.
46 */
47 
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include "hangul.h"
51 
52 // #define DEBUG
53 
54 
55 struct PARAMS {
56  unsigned starting_codept; /* First output Unicode code point. */
57  unsigned cho_start, cho_end; /* Choseong start and end code points. */
58  unsigned jung_start, jung_end; /* Jungseong start and end code points. */
59  unsigned jong_start, jong_end; /* Jongseong start and end code points. */
60  FILE *infp;
61  FILE *outfp;
62 };
63 
64 
65 /**
66  @brief Program entry point.
67 */
68 int
69 main (int argc, char *argv[]) {
70 
71  int i; /* loop variable */
72  unsigned codept;
73  unsigned max_codept;
74  unsigned glyph[MAX_GLYPHS][16];
75  unsigned tmp_glyph [16]; /* To build one combined glyph at a time. */
76  int cho, jung, jong; /* The 3 components in a Hangul syllable. */
77 
78  /// Default parameters for Hangul syllable generation.
79  struct PARAMS params = { 0xAC00, /* Starting output Unicode code point */
80  0x1100, /* First modern choseong */
81  0x1112, /* Last modern choseong */
82  0x1161, /* First modern jungseong */
83  0x1175, /* Last modern jungseong */
84  0x11A7, /* One before first modern jongseong */
85  0x11C2, /* Last modern jongseong */
86  stdin, /* Default input file pointer */
87  stdout /* Default output file pointer */
88  };
89 
90  void parse_args (int argc, char *argv[], struct PARAMS *params);
91 
92  unsigned hangul_read_base16 (FILE *infp, unsigned glyph[][16]);
93 
94  void print_glyph_hex (FILE *fp, unsigned codept, unsigned *this_glyph);
95 
96  void combined_jamo (unsigned glyph [MAX_GLYPHS][16],
97  unsigned cho, unsigned jung, unsigned jong,
98  unsigned *combined_glyph);
99 
100 
101  if (argc > 1) {
102  parse_args (argc, argv, &params);
103 
104 #ifdef DEBUG
105  fprintf (stderr,
106  "Range: (U+%04X, U+%04X, U+%04X) to (U+%04X, U+%04X, U+%04X)\n",
107  params.cho_start, params.jung_start, params.jong_start,
108  params.cho_end, params.jung_end, params.jong_end);
109 #endif
110  }
111 
112  /*
113  Initialize glyph array to all zeroes.
114  */
115  for (codept = 0; codept < MAX_GLYPHS; codept++) {
116  for (i = 0; i < 16; i++) glyph[codept][i] = 0x0000;
117  }
118 
119  /*
120  Read Hangul base glyph file.
121  */
122  max_codept = hangul_read_base16 (params.infp, glyph);
123  if (max_codept > 0x8FF) {
124  fprintf (stderr, "\nWARNING: Hangul glyph range exceeds PUA space.\n\n");
125  }
126 
127  codept = params.starting_codept; /* First code point to output */
128 
129  for (cho = params.cho_start; cho <= params.cho_end; cho++) {
130  for (jung = params.jung_start; jung <= params.jung_end; jung++) {
131  for (jong = params.jong_start; jong <= params.jong_end; jong++) {
132 
133 #ifdef DEBUG
134  fprintf (params.outfp,
135  "(U+%04X, U+%04X, U+%04X)\n",
136  cho, jung, jong);
137 #endif
138  combined_jamo (glyph, cho, jung, jong, tmp_glyph);
139  print_glyph_hex (params.outfp, codept, tmp_glyph);
140  codept++;
141  if (jong == JONG_UNICODE_END)
142  jong = JONG_EXTB_UNICODE_START - 1; /* Start Extended-B range */
143  }
144  if (jung == JUNG_UNICODE_END)
145  jung = JUNG_EXTB_UNICODE_START - 1; /* Start Extended-B range */
146  }
147  if (cho == CHO_UNICODE_END)
148  cho = CHO_EXTA_UNICODE_START - 1; /* Start Extended-A range */
149  }
150 
151  if (params.infp != stdin) fclose (params.infp);
152  if (params.outfp != stdout) fclose (params.outfp);
153 
154  exit (EXIT_SUCCESS);
155 }
156 
157 
158 /**
159  @brief Parse command line arguments.
160 
161 */
162 void
163 parse_args (int argc, char *argv[], struct PARAMS *params) {
164  int arg_count; /* Current index into argv[]. */
165 
166  void get_hex_range (char *instring, unsigned *start, unsigned *end);
167 
168  int strncmp (const char *s1, const char *s2, size_t n);
169 
170 
171  arg_count = 1;
172 
173  while (arg_count < argc) {
174  /* If all 600,000+ Hangul syllables are requested. */
175  if (strncmp (argv [arg_count], "-all", 4) == 0) {
176  params->starting_codept = 0x0001;
177  params->cho_start = CHO_UNICODE_START; /* First modern choseong */
178  params->cho_end = CHO_EXTA_UNICODE_END; /* Last ancient choseong */
179  params->jung_start = JUNG_UNICODE_START; /* First modern jungseong */
180  params->jung_end = JUNG_EXTB_UNICODE_END; /* Last ancient jungseong */
181  params->jong_start = JONG_UNICODE_START - 1; /* One before first modern jongseong */
182  params->jong_end = JONG_EXTB_UNICODE_END; /* Last andient jongseong */
183  }
184  /* If starting code point for output Unifont hex file is specified. */
185  else if (strncmp (argv [arg_count], "-c", 2) == 0) {
186  arg_count++;
187  if (arg_count < argc) {
188  sscanf (argv [arg_count], "%X", &params->starting_codept);
189  }
190  }
191  /* If initial consonant (choseong) range, "jamo 1", get range. */
192  else if (strncmp (argv [arg_count], "-j1", 3) == 0) {
193  arg_count++;
194  if (arg_count < argc) {
195  get_hex_range (argv [arg_count],
196  &params->cho_start, &params->cho_end);
197  /*
198  Allow one initial blank glyph at start of a loop, none at end.
199  */
200  if (params->cho_start < CHO_UNICODE_START) {
201  params->cho_start = CHO_UNICODE_START - 1;
202  }
203  else if (params->cho_start > CHO_UNICODE_END &&
204  params->cho_start < CHO_EXTA_UNICODE_START) {
205  params->cho_start = CHO_EXTA_UNICODE_START - 1;
206  }
207  /*
208  Do not go past desired Hangul choseong range,
209  Hangul Jamo or Hangul Jamo Extended-A choseong.
210  */
211  if (params->cho_end > CHO_EXTA_UNICODE_END) {
212  params->cho_end = CHO_EXTA_UNICODE_END;
213  }
214  else if (params->cho_end > CHO_UNICODE_END &&
215  params->cho_end < CHO_EXTA_UNICODE_START) {
216  params->cho_end = CHO_UNICODE_END;
217  }
218  }
219  }
220  /* If medial vowel (jungseong) range, "jamo 2", get range. */
221  else if (strncmp (argv [arg_count], "-j2", 3) == 0) {
222  arg_count++;
223  if (arg_count < argc) {
224  get_hex_range (argv [arg_count],
225  &params->jung_start, &params->jung_end);
226  /*
227  Allow one initial blank glyph at start of a loop, none at end.
228  */
229  if (params->jung_start < JUNG_UNICODE_START) {
230  params->jung_start = JUNG_UNICODE_START - 1;
231  }
232  else if (params->jung_start > JUNG_UNICODE_END &&
233  params->jung_start < JUNG_EXTB_UNICODE_START) {
234  params->jung_start = JUNG_EXTB_UNICODE_START - 1;
235  }
236  /*
237  Do not go past desired Hangul jungseong range,
238  Hangul Jamo or Hangul Jamo Extended-B jungseong.
239  */
240  if (params->jung_end > JUNG_EXTB_UNICODE_END) {
241  params->jung_end = JUNG_EXTB_UNICODE_END;
242  }
243  else if (params->jung_end > JUNG_UNICODE_END &&
244  params->jung_end < JUNG_EXTB_UNICODE_START) {
245  params->jung_end = JUNG_UNICODE_END;
246  }
247  }
248  }
249  /* If final consonant (jongseong) range, "jamo 3", get range. */
250  else if (strncmp (argv [arg_count], "-j3", 3) == 0) {
251  arg_count++;
252  if (arg_count < argc) {
253  get_hex_range (argv [arg_count],
254  &params->jong_start, &params->jong_end);
255  /*
256  Allow one initial blank glyph at start of a loop, none at end.
257  */
258  if (params->jong_start < JONG_UNICODE_START) {
259  params->jong_start = JONG_UNICODE_START - 1;
260  }
261  else if (params->jong_start > JONG_UNICODE_END &&
262  params->jong_start < JONG_EXTB_UNICODE_START) {
263  params->jong_start = JONG_EXTB_UNICODE_START - 1;
264  }
265  /*
266  Do not go past desired Hangul jongseong range,
267  Hangul Jamo or Hangul Jamo Extended-B jongseong.
268  */
269  if (params->jong_end > JONG_EXTB_UNICODE_END) {
270  params->jong_end = JONG_EXTB_UNICODE_END;
271  }
272  else if (params->jong_end > JONG_UNICODE_END &&
273  params->jong_end < JONG_EXTB_UNICODE_START) {
274  params->jong_end = JONG_UNICODE_END;
275  }
276  }
277  }
278  /* If input file is specified, open it for read access. */
279  else if (strncmp (argv [arg_count], "-i", 2) == 0) {
280  arg_count++;
281  if (arg_count < argc) {
282  params->infp = fopen (argv [arg_count], "r");
283  if (params->infp == NULL) {
284  fprintf (stderr, "\n*** ERROR: Cannot open %s for input.\n\n",
285  argv [arg_count]);
286  exit (EXIT_FAILURE);
287  }
288  }
289  }
290  /* If output file is specified, open it for write access. */
291  else if (strncmp (argv [arg_count], "-o", 2) == 0) {
292  arg_count++;
293  if (arg_count < argc) {
294  params->outfp = fopen (argv [arg_count], "w");
295  if (params->outfp == NULL) {
296  fprintf (stderr, "\n*** ERROR: Cannot open %s for output.\n\n",
297  argv [arg_count]);
298  exit (EXIT_FAILURE);
299  }
300  }
301  }
302  /* If help is requested, print help message and exit. */
303  else if (strncmp (argv [arg_count], "-h", 2) == 0 ||
304  strncmp (argv [arg_count], "--help", 6) == 0) {
305  printf ("\nunigen-hangul [options]\n\n");
306  printf (" Generates Hangul syllables from an input Unifont .hex file encoded\n");
307  printf (" in Johab 6/3/1 format. By default, the output is the Unicode Hangul\n");
308  printf (" Syllables range, U+AC00..U+D7A3. Options allow the user to specify\n");
309  printf (" a starting code point for the output Unifont .hex file, and ranges\n");
310  printf (" in hexadecimal of the starting and ending Hangul Jamo code points:\n\n");
311 
312  printf (" * 1100-115E Initial consonants (choseong)\n");
313  printf (" * 1161-11A7 Medial vowels (jungseong)\n");
314  printf (" * 11A8-11FF Final consonants (jongseong).\n\n");
315 
316  printf (" A single code point or 0 to omit can be specified instead of a range.\n\n");
317 
318  printf (" Option Parameters Function\n");
319  printf (" ------ ---------- --------\n");
320  printf (" -h, --help Print this message and exit.\n\n");
321  printf (" -all Generate all Hangul syllables, using all modern and\n");
322  printf (" ancient Hangul in the Unicode range U+1100..U+11FF,\n");
323  printf (" U+A960..U+A97C, and U+D7B0..U+D7FB.\n");
324  printf (" WARNING: this will generate over 1,600,000 syllables\n");
325  printf (" in a 115 megabyte Unifont .hex format file. The\n");
326  printf (" default is to only output modern Hangul syllables.\n\n");
327  printf (" -c code_point Starting code point in hexadecimal for output file.\n\n");
328  printf (" -j1 start-end Choseong (jamo 1) start-end range in hexadecimal.\n\n");
329  printf (" -j2 start-end Jungseong (jamo 2) start-end range in hexadecimal.\n\n");
330  printf (" -j3 start-end Jongseong (jamo 3) start-end range in hexadecimal.\n\n");
331  printf (" -i input_file Unifont hangul-base.hex formatted input file.\n\n");
332  printf (" -o output_file Unifont .hex format output file.\n\n");
333  printf (" Example:\n\n");
334  printf (" unigen-hangul -c 1 -j3 11AB-11AB -i hangul-base.hex -o nieun-only.hex\n\n");
335  printf (" Generates Hangul syllables using all modern choseong and jungseong,\n");
336  printf (" and only the jongseong nieun (Unicode code point U+11AB). The output\n");
337  printf (" Unifont .hex file will contain code points starting at 1. Instead of\n");
338  printf (" specifying \"-j3 11AB-11AB\", simply using \"-j3 11AB\" will also suffice.\n\n");
339 
340  exit (EXIT_SUCCESS);
341  }
342 
343  arg_count++;
344  }
345 
346  return;
347 }
348 
349 
350 /**
351  @brief Scan a hexadecimal range from a character string.
352 */
353 void
354 get_hex_range (char *instring, unsigned *start, unsigned *end) {
355 
356  int i; /* String index variable. */
357 
358  /* Get first number in range. */
359  sscanf (instring, "%X", start);
360  for (i = 0;
361  instring [i] != '\0' && instring [i] != '-';
362  i++);
363  /* Get last number in range. */
364  if (instring [i] == '-') {
365  i++;
366  sscanf (&instring [i], "%X", end);
367  }
368  else {
369  *end = *start;
370  }
371 
372  return;
373 }
Define constants and function prototypes for using Hangul glyphs.
void print_glyph_hex(FILE *fp, unsigned codept, unsigned *this_glyph)
Print one glyph in Unifont hexdraw hexadecimal string style.
void combined_jamo(unsigned glyph_table[MAX_GLYPHS][16], unsigned cho, unsigned jung, unsigned jong, unsigned *combined_glyph)
Convert Hangul Jamo choseong, jungseong, and jongseong into a glyph.
#define CHO_UNICODE_START
Modern Hangul choseong start.
Definition: hangul.h:50
#define JONG_UNICODE_END
Modern Hangul jongseong end.
Definition: hangul.h:61
#define JUNG_EXTB_UNICODE_START
Hangul Extended-B jungseong start.
Definition: hangul.h:57
#define JONG_EXTB_UNICODE_END
Hangul Extended-B jongseong end.
Definition: hangul.h:63
#define CHO_EXTA_UNICODE_START
Hangul Extended-A choseong start.
Definition: hangul.h:52
#define JONG_UNICODE_START
Modern Hangul jongseong start.
Definition: hangul.h:60
#define CHO_UNICODE_END
Hangul Jamo choseong end.
Definition: hangul.h:51
#define CHO_EXTA_UNICODE_END
Hangul Extended-A choseong end.
Definition: hangul.h:53
#define JUNG_UNICODE_START
Modern Hangul jungseong start.
Definition: hangul.h:55
#define JONG_EXTB_UNICODE_START
Hangul Extended-B jongseong start.
Definition: hangul.h:62
#define JUNG_UNICODE_END
Modern Hangul jungseong end.
Definition: hangul.h:56
unsigned hangul_read_base16(FILE *infp, unsigned base[][16])
Read hangul-base.hex file into a unsigned array.
#define JUNG_EXTB_UNICODE_END
Hangul Extended-B jungseong end.
Definition: hangul.h:58
#define MAX_GLYPHS
An OpenType font has at most 65536 glyphs.
Definition: hex2otf.c:85
int main(int argc, char *argv[])
Program entry point.
Definition: unigen-hangul.c:69
void get_hex_range(char *instring, unsigned *start, unsigned *end)
Scan a hexadecimal range from a character string.
void parse_args(int argc, char *argv[], struct PARAMS *params)
Parse command line arguments.