Commit 05f99527 authored by Noah Levitt's avatar Noah Levitt Committed by Noah Levitt

Update Unicode data to 4.0. (#107974)

2003-07-30  Noah Levitt  <nlevitt@columbia.edu>

	* glib/gen-unicode-tables.pl:
	* glib/gunibreak.c:
	* glib/gunibreak.h:
	* glib/gunichartables.h:
	* glib/gunicode.h:
	* glib/gunicomp.h:
	* glib/gunidecomp.c:
	* glib/gunidecomp.h:
	* glib/guniprop.c:
	* tests/casefold.txt:
	* tests/casemap.txt:
	* tests/gen-casefold-txt.pl:
	* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
parent cdf72b09
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
* glib/gen-unicode-tables.pl:
* glib/gunibreak.c:
* glib/gunibreak.h:
* glib/gunichartables.h:
* glib/gunicode.h:
* glib/gunicomp.h:
* glib/gunidecomp.c:
* glib/gunidecomp.h:
* glib/guniprop.c:
* tests/casefold.txt:
* tests/casemap.txt:
* tests/gen-casefold-txt.pl:
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
2003-07-31 Tor Lillqvist <tml@iki.fi>
* glib/gspawn-win32.c: When possible, manage without the helper
......
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
* glib/gen-unicode-tables.pl:
* glib/gunibreak.c:
* glib/gunibreak.h:
* glib/gunichartables.h:
* glib/gunicode.h:
* glib/gunicomp.h:
* glib/gunidecomp.c:
* glib/gunidecomp.h:
* glib/guniprop.c:
* tests/casefold.txt:
* tests/casemap.txt:
* tests/gen-casefold-txt.pl:
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
2003-07-31 Tor Lillqvist <tml@iki.fi>
* glib/gspawn-win32.c: When possible, manage without the helper
......
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
* glib/gen-unicode-tables.pl:
* glib/gunibreak.c:
* glib/gunibreak.h:
* glib/gunichartables.h:
* glib/gunicode.h:
* glib/gunicomp.h:
* glib/gunidecomp.c:
* glib/gunidecomp.h:
* glib/guniprop.c:
* tests/casefold.txt:
* tests/casemap.txt:
* tests/gen-casefold-txt.pl:
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
2003-07-31 Tor Lillqvist <tml@iki.fi>
* glib/gspawn-win32.c: When possible, manage without the helper
......
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
* glib/gen-unicode-tables.pl:
* glib/gunibreak.c:
* glib/gunibreak.h:
* glib/gunichartables.h:
* glib/gunicode.h:
* glib/gunicomp.h:
* glib/gunidecomp.c:
* glib/gunidecomp.h:
* glib/guniprop.c:
* tests/casefold.txt:
* tests/casemap.txt:
* tests/gen-casefold-txt.pl:
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
2003-07-31 Tor Lillqvist <tml@iki.fi>
* glib/gspawn-win32.c: When possible, manage without the helper
......
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
* glib/gen-unicode-tables.pl:
* glib/gunibreak.c:
* glib/gunibreak.h:
* glib/gunichartables.h:
* glib/gunicode.h:
* glib/gunicomp.h:
* glib/gunidecomp.c:
* glib/gunidecomp.h:
* glib/guniprop.c:
* tests/casefold.txt:
* tests/casemap.txt:
* tests/gen-casefold-txt.pl:
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
2003-07-31 Tor Lillqvist <tml@iki.fi>
* glib/gspawn-win32.c: When possible, manage without the helper
......
2003-07-30 Noah Levitt <nlevitt@columbia.edu>
* glib/gen-unicode-tables.pl:
* glib/gunibreak.c:
* glib/gunibreak.h:
* glib/gunichartables.h:
* glib/gunicode.h:
* glib/gunicomp.h:
* glib/gunidecomp.c:
* glib/gunidecomp.h:
* glib/guniprop.c:
* tests/casefold.txt:
* tests/casemap.txt:
* tests/gen-casefold-txt.pl:
* tests/gen-casemap-txt.pl: Update Unicode data to 4.0. (#107974)
2003-07-31 Tor Lillqvist <tml@iki.fi>
* glib/gspawn-win32.c: When possible, manage without the helper
......
This diff is collapsed.
......@@ -25,13 +25,22 @@
#include "glib.h"
#include "gunibreak.h"
#define TPROP_PART1(Page, Char) \
((break_property_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (break_property_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (break_property_data[break_property_table_part1[Page]][Char]))
#define TPROP(Page, Char) \
((break_property_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (break_property_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (break_property_data[break_property_table[Page]][Char]))
#define TPROP_PART2(Page, Char) \
((break_property_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (break_property_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (break_property_data[break_property_table_part2[Page]][Char]))
#define PROP(Char) (((Char) > (G_UNICODE_LAST_CHAR)) ? G_UNICODE_BREAK_UNKNOWN : TPROP ((Char) >> 8, (Char) & 0xff))
#define PROP(Char) \
(((Char) <= G_UNICODE_LAST_CHAR_PART1) \
? TPROP_PART1 ((Char) >> 8, (Char) & 0xff) \
: (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
? TPROP_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
: G_UNICODE_BREAK_UNKNOWN))
/**
* g_unichar_break_type:
......
This diff is collapsed.
This diff is collapsed.
......@@ -100,7 +100,9 @@ typedef enum
G_UNICODE_BREAK_POSTFIX,
G_UNICODE_BREAK_COMPLEX_CONTEXT,
G_UNICODE_BREAK_AMBIGUOUS,
G_UNICODE_BREAK_UNKNOWN
G_UNICODE_BREAK_UNKNOWN,
G_UNICODE_BREAK_NEXT_LINE,
G_UNICODE_BREAK_WORD_JOINER
} GUnicodeBreakType;
/* Returns TRUE if current locale uses UTF-8 charset. If CHARSET is
......
......@@ -3,7 +3,7 @@
#define COMPOSE_SECOND_START 357
#define COMPOSE_SECOND_SINGLE_START 388
static const gushort compose_data[][256] = {
static const guint16 compose_data[][256] = {
{ /* page 0, index 0 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
......@@ -222,7 +222,7 @@ static const gushort compose_data[][256] = {
}
};
static const short compose_table[256] = {
static const gint16 compose_table[256] = {
0 /* page 0 */,
1 /* page 1 */,
2 /* page 2 */,
......@@ -274,7 +274,7 @@ static const short compose_table[256] = {
15 /* page 48 */
};
static const gushort compose_first_single[][2] = {
static const guint16 compose_first_single[][2] = {
{ 0x0338, 0x226e },
{ 0x0338, 0x2260 },
{ 0x0338, 0x226f },
......@@ -486,7 +486,7 @@ static const gushort compose_first_single[][2] = {
{ 0x3099, 0x30fa },
{ 0x3099, 0x30fe }
};
static const gushort compose_second_single[][2] = {
static const guint16 compose_second_single[][2] = {
{ 0x0627, 0x0622 },
{ 0x0627, 0x0623 },
{ 0x0627, 0x0625 },
......@@ -506,7 +506,7 @@ static const gushort compose_second_single[][2] = {
{ 0x0dd9, 0x0ddc },
{ 0x0dd9, 0x0dde }
};
static const gushort compose_array[146][31] = {
static const guint16 compose_array[146][31] = {
{ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x0100, 0x0102, 0x0226, 0x00c4, 0x1ea2, 0x00c5, 0, 0x01cd, 0x0200, 0x0202, 0, 0, 0, 0x1ea0, 0, 0x1e00, 0, 0, 0x0104, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0x1e02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e04, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e06, 0, 0, 0, 0 },
{ 0, 0x0106, 0x0108, 0, 0, 0, 0x010a, 0, 0, 0, 0, 0x010c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00c7, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
......
......@@ -28,13 +28,22 @@
#include "gunicomp.h"
#define CC(Page, Char) \
((combining_class_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (combining_class_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (cclass_data[combining_class_table[Page]][Char]))
#define CC_PART1(Page, Char) \
((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (cclass_data[combining_class_table_part1[Page]][Char]))
#define CC_PART2(Page, Char) \
((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (cclass_data[combining_class_table_part2[Page]][Char]))
#define COMBINING_CLASS(Char) \
(((Char) > (G_UNICODE_LAST_CHAR)) ? 0 : CC((Char) >> 8, (Char) & 0xff))
(((Char) <= G_UNICODE_LAST_CHAR_PART1) \
? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
: (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
: 0))
/**
* g_unicode_canonical_ordering:
......@@ -84,7 +93,8 @@ g_unicode_canonical_ordering (gunichar *string,
}
}
static const guchar *
/* returns a pointer to a null-terminated UTF-8 string */
static const gchar *
find_decomposition (gunichar ch,
gboolean compat)
{
......@@ -104,17 +114,17 @@ find_decomposition (gunichar ch,
if (compat)
{
offset = decomp_table[half].compat_offset;
if (offset == 0xff)
if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
offset = decomp_table[half].canon_offset;
}
else
{
offset = decomp_table[half].canon_offset;
if (offset == 0xff)
if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
return NULL;
}
return &(decomp_expansion_string[decomp_table[half].expansion_offset + offset]);
return &(decomp_expansion_string[offset]);
}
else if (half == start)
break;
......@@ -142,27 +152,20 @@ gunichar *
g_unicode_canonical_decomposition (gunichar ch,
gsize *result_len)
{
const guchar *decomp = find_decomposition (ch, FALSE);
const gchar *decomp = find_decomposition (ch, FALSE);
const gchar *p;
gunichar *r;
if (decomp)
{
/* Found it. */
int i, len;
/* We store as a double-nul terminated string. */
for (len = 0; (decomp[len] || decomp[len + 1]);
len += 2)
;
int i;
/* We've counted twice as many bytes as there are
characters. */
*result_len = len / 2;
r = g_malloc (len / 2 * sizeof (gunichar));
*result_len = g_utf8_strlen (decomp, -1);
r = g_malloc (*result_len * sizeof (gunichar));
for (i = 0; i < len; i += 2)
{
r[i / 2] = (decomp[i] << 8 | decomp[i + 1]);
}
for (p = decomp, i = 0; *p != '\0'; p = g_utf8_next_char (p), i++)
r[i] = g_utf8_get_char (p);
}
else
{
......@@ -194,6 +197,7 @@ combine (gunichar a,
gushort index_a, index_b;
index_a = COMPOSE_INDEX(a);
if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
{
if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
......@@ -202,10 +206,11 @@ combine (gunichar a,
return TRUE;
}
else
return FALSE;
return FALSE;
}
index_b = COMPOSE_INDEX(b);
if (index_b >= COMPOSE_SECOND_SINGLE_START)
{
if (a == compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
......@@ -214,7 +219,7 @@ combine (gunichar a,
return TRUE;
}
else
return FALSE;
return FALSE;
}
if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START &&
......@@ -252,17 +257,10 @@ _g_utf8_normalize_wc (const gchar *str,
{
gunichar wc = g_utf8_get_char (p);
const guchar *decomp = find_decomposition (wc, do_compat);
const gchar *decomp = find_decomposition (wc, do_compat);
if (decomp)
{
int len;
/* We store as a double-nul terminated string. */
for (len = 0; (decomp[len] || decomp[len + 1]);
len += 2)
;
n_wc += len / 2;
}
n_wc += g_utf8_strlen (decomp, -1);
else
n_wc++;
......@@ -277,7 +275,7 @@ _g_utf8_normalize_wc (const gchar *str,
while ((max_len < 0 || p < str + max_len) && *p)
{
gunichar wc = g_utf8_get_char (p);
const guchar *decomp;
const gchar *decomp;
int cc;
gsize old_n_wc = n_wc;
......@@ -285,11 +283,9 @@ _g_utf8_normalize_wc (const gchar *str,
if (decomp)
{
int len;
/* We store as a double-nul terminated string. */
for (len = 0; (decomp[len] || decomp[len + 1]);
len += 2)
wc_buffer[n_wc++] = (decomp[len] << 8 | decomp[len + 1]);
const char *pd;
for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))
wc_buffer[n_wc++] = g_utf8_get_char (pd);
}
else
wc_buffer[n_wc++] = wc;
......@@ -318,7 +314,6 @@ _g_utf8_normalize_wc (const gchar *str,
/* All decomposed and reordered */
if (do_compose && n_wc > 0)
{
gsize i, j;
......@@ -402,7 +397,7 @@ g_utf8_normalize (const gchar *str,
{
gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
gchar *result;
result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
g_free (result_wc);
......
This diff is collapsed.
......@@ -28,17 +28,30 @@
#include "glib.h"
#include "gunichartables.h"
#define ATTR_TABLE(Page) (((Page) <= G_UNICODE_LAST_PAGE_PART1) \
? attr_table_part1[Page] \
: attr_table_part2[(Page) - 0xe00])
#define ATTTABLE(Page, Char) \
((attr_table[Page] == G_UNICODE_MAX_TABLE_INDEX) ? 0 : (attr_data[attr_table[Page]][Char]))
((ATTR_TABLE(Page) == G_UNICODE_MAX_TABLE_INDEX) ? 0 : (attr_data[ATTR_TABLE(Page)][Char]))
#define TTYPE(Page, Char) \
((type_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (type_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (type_data[type_table[Page]][Char]))
#define TTYPE_PART1(Page, Char) \
((type_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (type_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (type_data[type_table_part1[Page]][Char]))
#define TTYPE_PART2(Page, Char) \
((type_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (type_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (type_data[type_table_part2[Page]][Char]))
#define TYPE(Char) \
(((Char) <= G_UNICODE_LAST_CHAR_PART1) \
? TTYPE_PART1 ((Char) >> 8, (Char) & 0xff) \
: (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
? TTYPE_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
: G_UNICODE_UNASSIGNED))
#define TYPE(Char) (((Char) > (G_UNICODE_LAST_CHAR)) ? G_UNICODE_UNASSIGNED : TTYPE ((Char) >> 8, (Char) & 0xff))
#define ISDIGIT(Type) ((Type) == G_UNICODE_DECIMAL_NUMBER \
|| (Type) == G_UNICODE_LETTER_NUMBER \
......@@ -361,10 +374,10 @@ g_unichar_toupper (gunichar c)
if (t == G_UNICODE_LOWERCASE_LETTER)
{
gunichar val = ATTTABLE (c >> 8, c & 0xff);
if (val >= 0xd800 && val < 0xdc00)
if (val >= 0x1000000)
{
const guchar *p = special_case_table[val - 0xd800];
return p[0] * 256 + p[1];
const guchar *p = special_case_table + val - 0x1000000;
return g_utf8_get_char (p);
}
else
return val ? val : c;
......@@ -398,10 +411,10 @@ g_unichar_tolower (gunichar c)
if (t == G_UNICODE_UPPERCASE_LETTER)
{
gunichar val = ATTTABLE (c >> 8, c & 0xff);
if (val >= 0xd800 && val < 0xdc00)
if (val >= 0x1000000)
{
const guchar *p = special_case_table[val - 0xd800];
return p[0] * 256 + p[1];
const guchar *p = special_case_table + val - 0x1000000;
return g_utf8_get_char (p);
}
else
return val ? val : c;
......@@ -561,31 +574,22 @@ output_marks (const char **p_inout,
static gsize
output_special_case (gchar *out_buffer,
gsize len,
int index,
int offset,
int type,
int which)
{
const guchar *p = special_case_table[index];
const guchar *p = special_case_table + offset;
gint len;
if (type != G_UNICODE_TITLECASE_LETTER)
p += 2; /* +2 to skip over "best single match" */
p = g_utf8_next_char (p);
if (which == 1)
{
while (p[0] || p[1])
p += 2;
p += 2;
}
p += strlen (p) + 1;
while (TRUE)
{
gunichar ch = p[0] * 256 + p[1];
if (!ch)
break;
len += g_unichar_to_utf8 (ch, out_buffer ? out_buffer + len : NULL);
p += 2;
}
len = strlen (p);
if (out_buffer)
memcpy (out_buffer, p, len);
return len;
}
......@@ -662,9 +666,9 @@ real_toupper (const gchar *str,
{
val = ATTTABLE (c >> 8, c & 0xff);
if (val >= 0xd800 && val < 0xdc00)
if (val >= 0x1000000)
{
len += output_special_case (out_buffer, len, val - 0xd800, t,
len += output_special_case (out_buffer, len, val - 0x1000000, t,
t == G_UNICODE_LOWERCASE_LETTER ? 0 : 1);
}
else
......@@ -785,9 +789,9 @@ real_tolower (const gchar *str,
{
val = ATTTABLE (c >> 8, c & 0xff);
if (val >= 0xd800 && val < 0xdc00)
if (val >= 0x1000000)
{
len += output_special_case (out_buffer, len, val - 0xd800, t, 0);
len += output_special_case (out_buffer, len, val - 0x1000000, t, 0);
}
else
{
......@@ -891,7 +895,7 @@ g_utf8_casefold (const gchar *str,
int end = G_N_ELEMENTS (casefold_table);
if (ch >= casefold_table[start].ch &&
ch <= casefold_table[end - 1].ch)
ch <= casefold_table[end - 1].ch)
{
while (TRUE)
{
......
# Test cases generated from Unicode 3.1 data
# Test cases generated from Unicode 4.0 data
# by gen-casefold-test.pl. Do not edit.
#
# Some special hand crafted tests
......@@ -89,8 +89,7 @@ Z z
Ī ī
Ĭ ĭ
Į į
İ i
ı i
İ i̇
IJ ij
Ĵ ĵ
Ķ ķ
......@@ -216,6 +215,7 @@ Z z
Ț ț
Ȝ ȝ
Ȟ ȟ
Ƞ ƞ
Ȣ ȣ
Ȥ ȥ
Ȧ ȧ
......@@ -266,6 +266,7 @@ Z z
ϑ θ
ϕ φ
ϖ π
Ϙ ϙ
Ϛ ϛ
Ϝ ϝ
Ϟ ϟ
......@@ -279,9 +280,11 @@ Z z
Ϯ ϯ
ϰ κ
ϱ ρ
ϲ σ
ϴ θ
ϵ ε
Ϸ ϸ
Ϲ ϲ
Ϻ ϻ
Ѐ ѐ
Ё ё
Ђ ђ
......@@ -347,6 +350,7 @@ Z z
Ѽ ѽ
Ѿ ѿ
Ҁ ҁ
Ҋ ҋ
Ҍ ҍ
Ҏ ҏ
Ґ ґ
......@@ -375,8 +379,11 @@ Z z
Ҿ ҿ
Ӂ ӂ
Ӄ ӄ
Ӆ ӆ
Ӈ ӈ
Ӊ ӊ
Ӌ ӌ
Ӎ ӎ
Ӑ ӑ
Ӓ ӓ
Ӕ ӕ
......@@ -397,6 +404,14 @@ Z z
Ӳ ӳ
Ӵ ӵ
Ӹ ӹ
Ԁ ԁ
Ԃ ԃ
Ԅ ԅ
Ԇ ԇ
Ԉ ԉ
Ԋ ԋ
Ԍ ԍ
Ԏ ԏ
Ա ա
Բ բ
Գ գ
......@@ -794,3 +809,43 @@ Z z
X x
Y y
Z z
𐐀 𐐨
𐐁 𐐩
𐐂 𐐪
𐐃 𐐫
𐐄 𐐬
𐐅 𐐭
𐐆 𐐮
𐐇 𐐯
𐐈 𐐰
𐐉 𐐱
𐐊 𐐲
𐐋 𐐳
𐐌 𐐴
𐐍 𐐵
𐐎 𐐶
𐐏 𐐷
𐐐 𐐸
𐐑 𐐹
𐐒 𐐺
𐐓 𐐻
𐐔 𐐼
𐐕 𐐽
𐐖 𐐾
𐐗 𐐿
𐐘 𐑀
𐐙 𐑁
𐐚 𐑂
𐐛 𐑃
𐐜 𐑄
𐐝 𐑅
𐐞 𐑆
𐐟 𐑇
𐐠 𐑈
𐐡 𐑉
𐐢 𐑊
𐐣 𐑋
𐐤 𐑌
𐐥 𐑍
𐐦 𐑎
𐐧 𐑏
This diff is collapsed.
......@@ -24,6 +24,8 @@
# I consider the output of this program to be unrestricted. Use it as
# you will.
require 5.006;
# Names of fields in the CaseFolding table
$FOLDING_CODE = 0;
$FOLDING_STATUS = 1;
......@@ -49,6 +51,7 @@ AaBbCc@@\taabbcc@@
#
EOT
binmode STDOUT, ":utf8";
open (INPUT, "< $ARGV[1]") || exit 1;
while (<INPUT>)
......@@ -65,15 +68,14 @@ while (<INPUT>)
my $raw_code = $fields[$FOLDING_CODE];
my $code = hex ($raw_code);
next if $code > 0xffff; # FIXME!
if ($#fields != 3)
{
printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
next;
}
next if ($fields[$FOLDING_STATUS] eq 'S');
# skip simple and Turkic mappings
next if ($fields[$FOLDING_STATUS] =~ /^[ST]$/);
@values = map { hex ($_) } split /\s+/, $fields[$FOLDING_MAPPING];
printf ("%s\t%s\n", pack ("U", $code), pack ("U*", @values));
......
......@@ -23,6 +23,7 @@
# I consider the output of this program to be unrestricted. Use it as
# you will.
require 5.006;
use utf8;
if (@ARGV != 3) {
......@@ -60,6 +61,7 @@ my @upper;
my @title;
my @lower;
binmode STDOUT, ":utf8";
open (INPUT, "< $ARGV[1]") || exit 1;
$last_code = -1;
......@@ -74,8 +76,6 @@ while (<INPUT>)
$code = hex ($fields[$CODE]);
last if ($code > 0xFFFF); # ignore characters out of the basic plane
if ($code > $last_code + 1)
{
# Found a gap.
......@@ -196,7 +196,7 @@ sub process_one
sub print_tests
{
for ($i = 0; $i < 0xffff; $i++) {
for ($i = 0; $i < 0x10ffff; $i++) {
if ($i == 0x3A3) {
# Greek sigma needs special tests
next;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment