Commit 6655ceab authored by Matthias Clasen's avatar Matthias Clasen
Browse files

break: Fix hyphen condition

When looking at scripts, we want to look
at the script of the *previous* character.
And then we need to exclude SHY from the
common script.
parent 9d114095
......@@ -170,6 +170,8 @@ default_break (const char *text,
GUnicodeBreakType prev_break_type;
GUnicodeBreakType prev_prev_break_type;
PangoScript prev_script;
/* See Grapheme_Cluster_Break Property Values table of UAX#29 */
typedef enum
{
......@@ -262,6 +264,7 @@ default_break (const char *text,
prev_break_type = G_UNICODE_BREAK_UNKNOWN;
prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN;
prev_wc = 0;
prev_script = PANGO_SCRIPT_COMMON;
prev_jamo = NO_JAMO;
prev_space_or_hyphen = FALSE;
......@@ -539,7 +542,6 @@ default_break (const char *text,
}
script = (PangoScript)g_unichar_get_script (wc);
/* ---- UAX#29 Word Boundaries ---- */
{
is_word_boundary = FALSE;
......@@ -1571,9 +1573,11 @@ default_break (const char *text,
attrs[i].break_inserts_hyphen = FALSE;
attrs[i].break_removes_preceding = FALSE;
switch ((int)script)
switch ((int)prev_script)
{
case PANGO_SCRIPT_COMMON:
insert_hyphens = prev_wc == 0x00ad;
break;
case PANGO_SCRIPT_HAN:
case PANGO_SCRIPT_HANGUL:
case PANGO_SCRIPT_HIRAGANA:
......@@ -1634,6 +1638,7 @@ default_break (const char *text,
}
prev_wc = wc;
prev_script = script;
/* wc might not be a valid Unicode base character, but really all we
* need to know is the last non-combining character */
......
......@@ -4,4 +4,4 @@ Whitespace: x x x x
Sentences: bs e b
Words: bs be bs e s be bs be b bs be bs be bs be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
Hyphens: i i i i i i i i i i i i i i i i
Hyphens: i i i i i i i i i i i i i i i i
Text: 👨 [0x200d]🦰 👨🏿 [0x200d]🦱 0 🏴[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] 🇩🇪 [0x0a]
Breaks: c lc lc lc lc lc c lc
Whitespace: w w
Sentences: bs e b
Words: b b b bs be b b b
Graphemes: b b b b b b b b
Hyphens: i i i i i i i i
Text: 👨[0x200d] 🦰 👨🏿[0x200d] 🦱 0 🏴[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] 🇩🇪 [0x0a]
Breaks: c lc lc lc lc lc c Lc
Whitespace: w w
Sentences: bs e b
Words: b b b bs be b b b
Graphemes: b b b b b b b b
Hyphens: i i i i i
Text: o n e [ ] t w o - t h r e e [ ] f o [0xad] u r [0x0a]
Breaks: c c c c lc c c c lc c c c c c lc c c lc c c lc
Breaks: c c c c lc c c c lc c c c c c lc c c lc c c Lc
Whitespace: x x w w
Sentences: bs e b
Words: bs be bs be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b
Hyphens: i i i i i i i i i
Hyphens: i i i i i i i i i i
Text: a b c / d e f [ ] g h i [0xad] j k l . [ ] B l a [0x0a]
Breaks: c c c c lc c c c lc c c c lc c c c c lc c c c lc
Breaks: c c c c lc c c c lc c c c lc c c c c lc c c c Lc
Whitespace: x x w w
Sentences: bs e bs e b
Words: bs be bs be bs be b bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b
Hyphens: i i i i i i i i i i i
Hyphens: i i i i i i i i i i i i
......@@ -4,4 +4,4 @@ Whitespace: x x x w
Sentences: bs e bs e bs e b
Words: bs be bs be bs be bs e s be bs be bs be bs be bs e s be bs be bs be bs be bs e s e s be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Text: h y p h e n | a t i o n [ ] o v e r [0xad] l o a d [0x0a]
Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c lc
Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c Lc
Whitespace: x w w
Sentences: bs e b
Words: bs e s be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b
Hyphens: i ri i i i i i i i i i i i i i i
Hyphens: i ri i i i i i i i i i i i i i i i
Text: a [ ] a b [0x200b] s p [0x200b] [ ] [ ] d e [0xad] f g [ ] b [0x0a]
Breaks: c c lc c c lc c c c c lc c c lc c c lc c lc
Breaks: c c lc c c lc c c c c lc c c lc c c lc c Lc
Whitespace: x x x x w w
Sentences: bs e b
Words: bs be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b
Hyphens: i i i i i i
Hyphens: i i i i i i i i
# Example from https://gitlab.gnome.org/GNOME/pango/issues/218
# This shows difference between word start/end and boundary
goril·les
gorilles
Text: g o r i l · l e s [0x0a]
Breaks: c c c c c c c c c c lc
Whitespace: w w
Sentences: bs e b
Words: bs e s be b
Graphemes: b b b b b b b b b b b
Hyphens: i i i i i i i
Text: g o r i l l e s [0x0a]
Breaks: c c c c c c lc c c c Lc
Whitespace: w w
Sentences: bs e b
Words: bs e s be b
Graphemes: b b b b b b b b b b b
Hyphens: i i i i ri i i
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment