Commit f59d2ae8 authored by Matthias Clasen's avatar Matthias Clasen
Browse files

Merge branch 'input-tweaks' into 'master'

imcontext: Tweak Compose sequence preedit

Closes #10, #4127, and #4124

See merge request !3799
parents 7c3a53a1 c4dbb8f0
Pipeline #303312 passed with stages
in 38 minutes and 4 seconds
No preview for this file type
......@@ -3,7 +3,7 @@
#define MAX_SEQ_LEN 5
#define N_INDEX_SIZE 30
#define DATA_SIZE 16521
#define N_CHARS 1572
#define DATA_SIZE 16447
#define N_CHARS 1241
#endif
No preview for this file type
......@@ -486,6 +486,9 @@ parser_remove_duplicates (GtkComposeParser *parser)
GHashTableIter iter;
gunichar *sequence;
char *value;
GString *output;
output = g_string_new ("");
g_hash_table_iter_init (&iter, parser->sequences);
while (g_hash_table_iter_next (&iter, (gpointer *)&sequence, (gpointer *)&value))
......@@ -493,8 +496,6 @@ parser_remove_duplicates (GtkComposeParser *parser)
static guint16 keysyms[MAX_COMPOSE_LEN + 1];
int i;
int n_compose = 0;
gunichar output_char;
char buf[8] = { 0, };
gboolean remove_sequence = FALSE;
if (value[0] == '\0')
......@@ -529,10 +530,9 @@ parser_remove_duplicates (GtkComposeParser *parser)
n_compose++;
}
if (gtk_check_algorithmically (keysyms, n_compose, &output_char))
if (gtk_check_algorithmically (keysyms, n_compose, output))
{
g_unichar_to_utf8 (output_char, buf);
if (strcmp (value, buf) == 0)
if (strcmp (value, output->str) == 0)
remove_sequence = TRUE;
}
......@@ -540,6 +540,8 @@ next:
if (remove_sequence)
g_hash_table_iter_remove (&iter);
}
g_string_free (output, TRUE);
}
static void
......@@ -1069,22 +1071,22 @@ gtk_compose_table_parse (const char *compose_file,
return compose_table;
}
static const char *prefix =
"# GTK has rewritten this file to add the line:\n"
"\n"
"include \"%L\"\n"
"\n"
"# This is necessary to add your own Compose sequences\n"
"# in addition to the builtin sequences of GTK. If this\n"
"# is not what you want, just remove that line.\n"
"#\n"
"# A backup of the previous file contents has been made.\n"
"\n"
"\n";
static gboolean
rewrite_compose_file (const char *compose_file)
{
static const char *prefix =
"# GTK has rewritten this file to add the line:\n"
"\n"
"include \"%L\"\n"
"\n"
"# This is necessary to add your own Compose sequences\n"
"# in addition to the builtin sequences of GTK. If this\n"
"# is not what you want, just remove that line.\n"
"#\n"
"# A backup of the previous file contents has been made.\n"
"\n"
"\n";
char *path = NULL;
char *content = NULL;
gsize content_len;
......@@ -1352,6 +1354,47 @@ gtk_compose_table_check (const GtkComposeTable *table,
return FALSE;
}
void
gtk_compose_table_get_prefix (const GtkComposeTable *table,
const guint16 *compose_buffer,
int n_compose,
int *prefix)
{
int index_stride = table->max_seq_len + 1;
int p = 0;
for (int idx = 0; idx < table->n_index_size; idx++)
{
const guint16 *seq_index = table->data + (idx * index_stride);
if (seq_index[0] == compose_buffer[0])
{
p = 1;
for (int i = 1; i < table->max_seq_len; i++)
{
int len = i + 1;
for (int j = seq_index[i]; j < seq_index[i + 1]; j += len)
{
int k;
for (k = 0; k < MIN (len, n_compose) - 1; k++)
{
if (compose_buffer[k + 1] != (gunichar) table->data[j + k])
break;
}
p = MAX (p, k + 1);
}
}
break;
}
}
*prefix = p;
}
void
gtk_compose_table_foreach (const GtkComposeTable *table,
GtkComposeSequenceCallback callback,
......@@ -1419,119 +1462,56 @@ gtk_compose_table_foreach (const GtkComposeTable *table,
#define IS_DEAD_KEY(k) \
((k) >= GDK_KEY_dead_grave && (k) <= GDK_KEY_dead_greek)
/* This function receives a sequence of Unicode characters and tries to
* normalize it (NFC). We check for the case where the resulting string
* has length 1 (single character).
* NFC normalisation normally rearranges diacritic marks, unless these
* belong to the same Canonical Combining Class.
* If they belong to the same canonical combining class, we produce all
* permutations of the diacritic marks, then attempt to normalize.
*/
static gboolean
check_normalize_nfc (gunichar *combination_buffer,
int n_compose)
{
gunichar *combination_buffer_temp;
char *combination_utf8_temp = NULL;
char *nfc_temp = NULL;
int n_combinations;
gunichar temp_swap;
int i;
combination_buffer_temp = g_alloca (n_compose * sizeof (gunichar));
n_combinations = 1;
for (i = 1; i < n_compose; i++)
n_combinations *= i;
/* Xorg reuses dead_tilde for the perispomeni diacritic mark.
* We check if base character belongs to Greek Unicode block,
* and if so, we replace tilde with perispomeni.
*/
if (combination_buffer[0] >= 0x390 && combination_buffer[0] <= 0x3FF)
{
for (i = 1; i < n_compose; i++ )
if (combination_buffer[i] == 0x303)
combination_buffer[i] = 0x342;
}
memcpy (combination_buffer_temp, combination_buffer, n_compose * sizeof (gunichar) );
for (i = 0; i < n_combinations; i++)
{
g_unicode_canonical_ordering (combination_buffer_temp, n_compose);
combination_utf8_temp = g_ucs4_to_utf8 (combination_buffer_temp, n_compose, NULL, NULL, NULL);
nfc_temp = g_utf8_normalize (combination_utf8_temp, -1, G_NORMALIZE_NFC);
if (g_utf8_strlen (nfc_temp, -1) == 1)
{
memcpy (combination_buffer, combination_buffer_temp, n_compose * sizeof (gunichar) );
g_free (combination_utf8_temp);
g_free (nfc_temp);
return TRUE;
}
g_free (combination_utf8_temp);
g_free (nfc_temp);
if (n_compose > 2)
{
temp_swap = combination_buffer_temp[i % (n_compose - 1) + 1];
combination_buffer_temp[i % (n_compose - 1) + 1] = combination_buffer_temp[(i+1) % (n_compose - 1) + 1];
combination_buffer_temp[(i+1) % (n_compose - 1) + 1] = temp_swap;
}
else
break;
}
return FALSE;
}
gboolean
gtk_check_algorithmically (const guint16 *compose_buffer,
int n_compose,
gunichar *output_char)
GString *output)
{
int i;
gunichar *combination_buffer;
char *combination_utf8, *nfc;
combination_buffer = alloca (sizeof (gunichar) * (n_compose + 1));
if (output_char)
*output_char = 0;
g_string_set_size (output, 0);
for (i = 0; i < n_compose && IS_DEAD_KEY (compose_buffer[i]); i++)
;
/* Allow at most 2 dead keys */
if (i > 2)
return FALSE;
/* Can't combine if there's no base character */
/* Can't combine if there's no base character: incomplete sequence */
if (i == n_compose)
return TRUE;
if (i > 0 && i == n_compose - 1)
{
combination_buffer[0] = gdk_keyval_to_unicode (compose_buffer[i]);
combination_buffer[n_compose] = 0;
GString *input;
char *nfc;
gunichar ch;
ch = gdk_keyval_to_unicode (compose_buffer[i]);
/* We don't allow combining with non-letters */
if (!g_unichar_isalpha (ch))
return FALSE;
input = g_string_sized_new (4 * n_compose);
g_string_append_unichar (input, ch);
i--;
while (i >= 0)
{
switch (compose_buffer[i])
{
#define CASE(keysym, unicode) \
case GDK_KEY_dead_##keysym: combination_buffer[i+1] = unicode; break
case GDK_KEY_dead_##keysym: g_string_append_unichar (input, unicode); break
CASE (grave, 0x0300);
CASE (acute, 0x0301);
CASE (circumflex, 0x0302);
CASE (tilde, 0x0303); /* Also used with perispomeni, 0x342. */
case GDK_KEY_dead_tilde:
if (g_unichar_get_script (ch) == G_UNICODE_SCRIPT_GREEK)
g_string_append_unichar (input, 0x342); /* combining perispomeni */
else
g_string_append_unichar (input, 0x303); /* combining tilde */
break;
CASE (macron, 0x0304);
CASE (breve, 0x0306);
CASE (abovedot, 0x0307);
......@@ -1549,7 +1529,7 @@ gtk_check_algorithmically (const guint16 *compose_buffer,
CASE (horn, 0x031B); /* Legacy use for psili, 0x313 (or 0x343). */
CASE (stroke, 0x335);
CASE (abovecomma, 0x0313); /* Equivalent to psili */
CASE (abovereversedcomma, 0x0314); /* Equivalent to dasia */
CASE (abovereversedcomma, 0x0314); /* Equivalent to dasia */
CASE (doublegrave, 0x30F);
CASE (belowring, 0x325);
CASE (belowmacron, 0x331);
......@@ -1577,27 +1557,20 @@ gtk_check_algorithmically (const guint16 *compose_buffer,
CASE (capital_schwa, 0x1DEA);
#undef CASE
default:
combination_buffer[i+1] = gdk_keyval_to_unicode (compose_buffer[i]);
g_string_append_unichar (input, gdk_keyval_to_unicode (compose_buffer[i]));
}
i--;
}
/* If the buffer normalizes to a single character, then modify the order
* of combination_buffer accordingly, if necessary, and return TRUE.
*/
if (check_normalize_nfc (combination_buffer, n_compose))
{
combination_utf8 = g_ucs4_to_utf8 (combination_buffer, -1, NULL, NULL, NULL);
nfc = g_utf8_normalize (combination_utf8, -1, G_NORMALIZE_NFC);
nfc = g_utf8_normalize (input->str, input->len, G_NORMALIZE_NFC);
if (output_char)
*output_char = g_utf8_get_char (nfc);
g_string_assign (output, nfc);
g_free (combination_utf8);
g_free (nfc);
g_free (nfc);
return TRUE;
}
g_string_free (input, TRUE);
return TRUE;
}
return FALSE;
......
......@@ -80,9 +80,14 @@ gboolean gtk_compose_table_check (const GtkComposeTable *table,
gboolean *compose_match,
GString *output);
void gtk_compose_table_get_prefix (const GtkComposeTable *table,
const guint16 *compose_buffer,
int n_compose,
int *prefix);
gboolean gtk_check_algorithmically (const guint16 *compose_buffer,
int n_compose,
gunichar *output);
GString *output);
guint32 gtk_compose_table_data_hash (const guint16 *data,
int max_seq_len,
......
......@@ -544,18 +544,22 @@ is_dead_key (guint keysym)
return GDK_KEY_dead_grave <= keysym && keysym <= GDK_KEY_dead_greek;
}
static gunichar
dead_key_to_unicode (guint keysym,
gboolean *need_space)
static void
append_dead_key (GString *string,
guint keysym)
{
/* Sadly, not all the dead keysyms have spacing mark equivalents
* in Unicode. For those that don't, we use space + the non-spacing
* mark as an approximation
* in Unicode. For those that don't, we use NBSP + the non-spacing
* mark as an approximation.
*/
switch (keysym)
{
#define CASE(keysym, unicode, sp) \
case GDK_KEY_dead_##keysym: *need_space = sp; return unicode;
#define CASE(keysym, unicode, sp) \
case GDK_KEY_dead_##keysym: \
if (sp) \
g_string_append_unichar (string, 0xA0); \
g_string_append_unichar (string, unicode); \
break;
CASE (grave, 0x60, 0);
CASE (acute, 0xb4, 0);
......@@ -606,8 +610,7 @@ dead_key_to_unicode (guint keysym,
CASE (capital_schwa, 0x1dea, 1);
#undef CASE
default:
*need_space = FALSE;
return gdk_keyval_to_unicode (keysym);
g_string_append_unichar (string, gdk_keyval_to_unicode (keysym));
}
}
......@@ -622,7 +625,7 @@ no_sequence_matches (GtkIMContextSimple *context_simple,
guint keyval;
context = GTK_IM_CONTEXT (context_simple);
priv->in_compose_sequence = FALSE;
/* No compose sequences found, check first if we have a partial
......@@ -675,7 +678,6 @@ no_sequence_matches (GtkIMContextSimple *context_simple,
if (n_compose > 1 && i >= n_compose - 1)
{
gboolean need_space;
GString *s;
s = g_string_new ("");
......@@ -684,15 +686,7 @@ no_sequence_matches (GtkIMContextSimple *context_simple,
{
/* dead keys are never *really* dead */
for (int j = 0; j < i; j++)
{
ch = dead_key_to_unicode (priv->compose_buffer[j], &need_space);
if (ch)
{
if (need_space)
g_string_append_c (s, ' ');
g_string_append_unichar (s, ch);
}
}
append_dead_key (s, priv->compose_buffer[j]);
ch = gdk_keyval_to_unicode (priv->compose_buffer[i]);
if (ch != 0 && ch != ' ' && !g_unichar_iscntrl (ch))
......@@ -702,14 +696,7 @@ no_sequence_matches (GtkIMContextSimple *context_simple,
}
else
{
ch = dead_key_to_unicode (priv->compose_buffer[0], &need_space);
if (ch)
{
if (need_space)
g_string_append_c (s, ' ');
g_string_append_unichar (s, ch);
}
append_dead_key (s, priv->compose_buffer[0]);
gtk_im_context_simple_commit_string (context_simple, s->str);
for (i = 1; i < n_compose; i++)
......@@ -821,7 +808,6 @@ gtk_im_context_simple_filter_keypress (GtkIMContext *context,
int i;
gboolean compose_finish;
gboolean compose_match;
gunichar output_char;
guint keyval, state;
while (priv->compose_buffer[n_compose] != 0 && n_compose < priv->compose_buffer_len)
......@@ -1001,16 +987,22 @@ gtk_im_context_simple_filter_keypress (GtkIMContext *context,
return TRUE;
}
if (is_escape)
{
if (priv->in_hex_sequence || priv->in_compose_sequence)
{
gtk_im_context_simple_reset (context);
return TRUE;
}
return FALSE;
}
if (priv->in_hex_sequence)
{
if (hex_keyval && n_compose < 6)
priv->compose_buffer[n_compose++] = hex_keyval;
else if (is_escape)
{
gtk_im_context_simple_reset (context);
return TRUE;
}
else if (!is_hex_end)
{
/* non-hex character in hex sequence, or sequence too long */
......@@ -1071,6 +1063,7 @@ gtk_im_context_simple_filter_keypress (GtkIMContext *context,
else /* Then, check for compose sequences */
{
gboolean success = FALSE;
int prefix = 0;
GString *output;
output = g_string_new ("");
......@@ -1109,18 +1102,29 @@ gtk_im_context_simple_filter_keypress (GtkIMContext *context,
success = TRUE;
break;
}
else
{
int table_prefix;
gtk_compose_table_get_prefix ((GtkComposeTable *)tmp_list->data,
priv->compose_buffer, n_compose,
&table_prefix);
prefix = MAX (prefix, table_prefix);
}
tmp_list = tmp_list->next;
}
G_UNLOCK (global_tables);
g_string_free (output, TRUE);
if (success)
return TRUE;
{
g_string_free (output, TRUE);
return TRUE;
}
if (gtk_check_algorithmically (priv->compose_buffer, n_compose, &output_char))
if (gtk_check_algorithmically (priv->compose_buffer, n_compose, output))
{
if (!priv->in_compose_sequence)
{
......@@ -1128,11 +1132,30 @@ gtk_im_context_simple_filter_keypress (GtkIMContext *context,
g_signal_emit_by_name (context_simple, "preedit-start");
}
if (output_char)
gtk_im_context_simple_commit_char (context_simple, output_char);
if (output->len > 0)
gtk_im_context_simple_commit_string (context_simple, output->str);
else
g_signal_emit_by_name (context_simple, "preedit-changed");
g_string_free (output, TRUE);
return TRUE;
}
g_string_free (output, TRUE);
/* If we get here, no Compose sequence matched.
* Only beep if we were in a sequence before.
*/
if (prefix > 0)
{
for (i = prefix; i < n_compose; i++)
priv->compose_buffer[i] = 0;
beep_surface (gdk_event_get_surface (event));
g_signal_emit_by_name (context_simple, "preedit-changed");
return TRUE;
}
}
......@@ -1195,28 +1218,23 @@ gtk_im_context_simple_get_preedit_string (GtkIMContext *context,
if (priv->compose_buffer[i] == GDK_KEY_Multi_key)
{
/* We only show the Compose key visibly when it is the
* only glyph in the preedit, or when it occurs in the
* only glyph in the preedit, or when the sequence contains
* multiple Compose keys, or when it occurs in the
* middle of the sequence. Sadly, the official character,
* U+2384, COMPOSITION SYMBOL, is bit too distracting, so
* we use U+00B7, MIDDLE DOT.
*/
if (priv->compose_buffer[1] == 0 || i > 0)
if (priv->compose_buffer[1] == 0 || i > 0 ||
priv->compose_buffer[i + 1] == GDK_KEY_Multi_key)
g_string_append (s, "·");
}
else
{
gunichar ch;
gboolean need_space;
if (is_dead_key (priv->compose_buffer[i]))
{
ch = dead_key_to_unicode (priv->compose_buffer[i], &need_space);
if (ch)
{
if (need_space)
g_string_append_c (s, ' ');
g_string_append_unichar (s, ch);
}
append_dead_key (s, priv->compose_buffer[i]);
}
else
{
......
# n_sequences: 4909
# n_sequences: 4874
# max_seq_len: 5
# n_index_size: 30
# data_size: 16521
# n_chars: 1572
# data_size: 16447
# n_chars: 1241
<U7ae> <U7e9> : "ΐ" # U390
<U7ae> <U7f5> : "ΰ" # U3b0
<Ufe50> <U20> : "`" # U60
<Ufe50> <U4d> : "M̀"
<Ufe50> <U56> : "Ǜ" # U1db
<Ufe50> <U6d> : "m̀"
<Ufe50> <U76> : "ǜ" # U1dc
<Ufe50> <Ua0> : "̀" # U300
<Ufe50> <U186> : "Ɔ̀"
......@@ -23,16 +21,6 @@
<Ufe50> <U269> : "ɩ̀"
<Ufe50> <U28a> : "ʊ̀"
<Ufe50> <U28b> : "ʋ̀"
<Ufe50> <U3bd> : "Ŋ̀"
<Ufe50> <U3bf> : "ŋ̀"
<Ufe50> <U6c1> : "а̀"
<Ufe50> <U6cf> : "о̀"
<Ufe50> <U6d2> : "р̀"
<Ufe50> <U6d5> : "у̀"
<Ufe50> <U6e1> : "А̀"
<Ufe50> <U6ef> : "О̀"
<Ufe50> <U6f2> : "Р̀"
<Ufe50> <U6f5> : "У̀"
<Ufe50> <U1f00> : "ἂ" # U1f02
<Ufe50> <U1f01> : "ἃ" # U1f03
<Ufe50> <U1f08> : "Ἂ" # U1f0a
......@@ -131,9 +119,7 @@
<Ufe50> <Uff20> <Uaf> <U65> : "ḕ" # U1e15
<Ufe50> <Uff20> <Uaf> <U6f> : "ṑ" # U1e51
<Ufe51> <U20> : "'" # U27
<Ufe51> <U4a> : "J́"
<Ufe51> <U56> : "Ǘ" # U1d7
<Ufe51> <U6a> : "j́"
<Ufe51> <U76> : "ǘ" # U1d8
<Ufe51> <Ua0> : "́" # U301
<Ufe51> <U186> : "Ɔ́"
......@@ -148,28 +134,7 @@
<Ufe51> <U269> : "ɩ́"
<Ufe51> <U28a> : "ʊ́"
<Ufe51> <U28b> : "ʋ́"
<Ufe51> <U3bd> : "Ŋ́"
<Ufe51> <U3bf> : "ŋ́"
<Ufe51> <U6c0> : "ю́"
<Ufe51> <U6c1> : "а́"
<Ufe51> <U6c5> : "е́"