Commit 55efd092 authored by Matthias Clasen's avatar Matthias Clasen
Browse files

Merge branch 'line-breaking-fixes4' into 'main'

Fix a case of unintended hyphenation

See merge request !519
parents a31741fa 0a1218eb
Pipeline #335762 passed with stages
in 7 minutes and 3 seconds
......@@ -170,6 +170,8 @@ default_break (const char *text,
GUnicodeBreakType prev_break_type;
GUnicodeBreakType prev_prev_break_type;
PangoScript prev_script;
/* See Grapheme_Cluster_Break Property Values table of UAX#29 */
typedef enum
{
......@@ -262,6 +264,7 @@ default_break (const char *text,
prev_break_type = G_UNICODE_BREAK_UNKNOWN;
prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN;
prev_wc = 0;
prev_script = PANGO_SCRIPT_COMMON;
prev_jamo = NO_JAMO;
prev_space_or_hyphen = FALSE;
......@@ -539,7 +542,6 @@ default_break (const char *text,
}
script = (PangoScript)g_unichar_get_script (wc);
/* ---- UAX#29 Word Boundaries ---- */
{
is_word_boundary = FALSE;
......@@ -1571,9 +1573,11 @@ default_break (const char *text,
attrs[i].break_inserts_hyphen = FALSE;
attrs[i].break_removes_preceding = FALSE;
switch ((int)script)
switch ((int)prev_script)
{
case PANGO_SCRIPT_COMMON:
insert_hyphens = prev_wc == 0x00ad;
break;
case PANGO_SCRIPT_HAN:
case PANGO_SCRIPT_HANGUL:
case PANGO_SCRIPT_HIRAGANA:
......@@ -1634,6 +1638,7 @@ default_break (const char *text,
}
prev_wc = wc;
prev_script = script;
/* wc might not be a valid Unicode base character, but really all we
* need to know is the last non-combining character */
......
......@@ -739,6 +739,10 @@ void pango_attr_iterator_get_font (PangoAttrIterator *
PANGO_AVAILABLE_IN_1_2
GSList * pango_attr_iterator_get_attrs (PangoAttrIterator *iterator);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(PangoAttribute, pango_attribute_destroy)
G_DEFINE_AUTOPTR_CLEANUP_FUNC(PangoAttrList, pango_attr_list_unref)
G_DEFINE_AUTOPTR_CLEANUP_FUNC(PangoAttrIterator, pango_attr_iterator_destroy)
G_END_DECLS
#endif /* __PANGO_ATTRIBUTES_H__ */
......@@ -101,6 +101,8 @@ PANGO_DEPRECATED_IN_1_44
PangoCoverage *pango_coverage_from_bytes (guchar *bytes,
int n_bytes);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(PangoCoverage, pango_coverage_unref)
G_END_DECLS
#endif /* __PANGO_COVERAGE_H__ */
......@@ -689,6 +689,7 @@ PangoLanguage ** pango_font_get_languages (PangoFont *font);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(PangoFontFamily, g_object_unref)
G_DEFINE_AUTOPTR_CLEANUP_FUNC(PangoFontFace, g_object_unref)
G_DEFINE_AUTOPTR_CLEANUP_FUNC(PangoFont, g_object_unref)
G_DEFINE_AUTOPTR_CLEANUP_FUNC(PangoFontDescription, pango_font_description_free)
G_END_DECLS
......
......@@ -3930,15 +3930,29 @@ process_item (PangoLayout *layout,
width += state->log_widths[state->log_widths_offset + i];
}
if ((width <= state->remaining_width || (item->num_chars == 1 && !line->runs)) &&
if (!no_break_at_end &&
can_break_at (layout, state->start_offset + item->num_chars, wrap))
{
if (processing_new_item)
{
compute_log_widths (layout, state);
processing_new_item = FALSE;
}
extra_width = find_break_extra_width (layout, state, item->num_chars);
}
else
extra_width = 0;
if ((width + extra_width <= state->remaining_width || (item->num_chars == 1 && !line->runs)) &&
!no_break_at_end)
{
DEBUG1 ("%d <= %d", width, state->remaining_width);
DEBUG1 ("%d + %d <= %d", width, extra_width, state->remaining_width);
insert_run (line, state, item, NULL, FALSE);
width = pango_glyph_string_get_width (((PangoGlyphItem *)(line->runs->data))->glyphs);
if (width <= state->remaining_width || (item->num_chars == 1 && !line->runs))
if (width + extra_width <= state->remaining_width || (item->num_chars == 1 && !line->runs))
{
state->remaining_width -= width;
state->remaining_width = MAX (state->remaining_width, 0);
......@@ -4007,7 +4021,7 @@ retry_break:
if (can_break_at (layout, state->start_offset + num_chars, wrap) &&
(num_chars > 0 || line->runs))
{
DEBUG1 ("possible breakpoint: %d", num_chars);
DEBUG1 ("possible breakpoint: %d, extra_width %d", num_chars, extra_width);
if (num_chars == 0 ||
width + extra_width < state->remaining_width - safe_distance)
{
......@@ -4044,10 +4058,13 @@ retry_break:
if (num_chars > 0 &&
layout->log_attrs[state->start_offset + num_chars - 1].is_white)
extra_width = - state->log_widths[state->log_widths_offset + num_chars - 1];
else if (item == new_item &&
break_needs_hyphen (layout, state, num_chars))
extra_width = state->hyphen_width;
else
extra_width = 0;
DEBUG1 ("measured breakpoint %d: %d", num_chars, new_break_width);
DEBUG1 ("measured breakpoint %d: %d, extra %d", num_chars, new_break_width, extra_width);
if (new_item != item)
{
......@@ -4107,7 +4124,8 @@ retry_break:
if (break_num_chars == item->num_chars)
{
if (break_needs_hyphen (layout, state, break_num_chars))
if (can_break_at (layout, state->start_offset + break_num_chars, wrap) &&
break_needs_hyphen (layout, state, break_num_chars))
item->analysis.flags |= PANGO_ANALYSIS_FLAG_NEED_HYPHEN;
insert_run (line, state, item, NULL, TRUE);
......@@ -5946,6 +5964,7 @@ add_missing_hyphen (PangoLayoutLine *line,
int width;
int start_offset;
DEBUG1("add a missing hyphen");
/* The last run fit onto the line without breaking it, but it still needs a hyphen */
width = pango_glyph_string_get_width (run->glyphs);
......
......@@ -494,6 +494,10 @@ int pango_layout_iter_get_baseline (PangoLayoutIter *iter);
PANGO_AVAILABLE_IN_1_50
int pango_layout_iter_get_run_baseline (PangoLayoutIter *iter);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(PangoLayout, g_object_unref)
G_DEFINE_AUTOPTR_CLEANUP_FUNC(PangoLayoutIter, pango_layout_iter_free)
G_END_DECLS
#endif /* __PANGO_LAYOUT_H__ */
......
......@@ -92,6 +92,9 @@ char * pango_tab_array_to_string (PangoTabArray *tab_array);
PANGO_AVAILABLE_IN_1_50
PangoTabArray * pango_tab_array_from_string (const char *text);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(PangoTabArray, pango_tab_array_free)
G_END_DECLS
#endif /* __PANGO_TABS_H__ */
......@@ -4,4 +4,4 @@ Whitespace: x x x x
Sentences: bs e b
Words: bs be bs e s be bs be b bs be bs be bs be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
Hyphens: i i i i i i i i i i i i i i i i
Hyphens: i i i i i i i i i i i i i i i i
Text: 👨 [0x200d]🦰 👨🏿 [0x200d]🦱 0 🏴[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] 🇩🇪 [0x0a]
Breaks: c lc lc lc lc lc c lc
Whitespace: w w
Sentences: bs e b
Words: b b b bs be b b b
Graphemes: b b b b b b b b
Hyphens: i i i i i i i i
Text: 👨[0x200d] 🦰 👨🏿[0x200d] 🦱 0 🏴[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] 🇩🇪 [0x0a]
Breaks: c lc lc lc lc lc c Lc
Whitespace: w w
Sentences: bs e b
Words: b b b bs be b b b
Graphemes: b b b b b b b b
Hyphens: i i i i i
Text: o n e [ ] t w o - t h r e e [ ] f o [0xad] u r [0x0a]
Breaks: c c c c lc c c c lc c c c c c lc c c lc c c lc
Breaks: c c c c lc c c c lc c c c c c lc c c lc c c Lc
Whitespace: x x w w
Sentences: bs e b
Words: bs be bs be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b
Hyphens: i i i i i i i i i
Hyphens: i i i i i i i i i i
Text: a b c / d e f [ ] g h i [0xad] j k l . [ ] B l a [0x0a]
Breaks: c c c c lc c c c lc c c c lc c c c c lc c c c lc
Breaks: c c c c lc c c c lc c c c lc c c c c lc c c c Lc
Whitespace: x x w w
Sentences: bs e bs e b
Words: bs be bs be bs be b bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b
Hyphens: i i i i i i i i i i i
Hyphens: i i i i i i i i i i i i
......@@ -4,4 +4,4 @@ Whitespace: x x x w
Sentences: bs e bs e bs e b
Words: bs be bs be bs be bs e s be bs be bs be bs be bs e s be bs be bs be bs be bs e s e s be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Text: h y p h e n | a t i o n [ ] o v e r [0xad] l o a d [0x0a]
Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c lc
Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c Lc
Whitespace: x w w
Sentences: bs e b
Words: bs e s be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b
Hyphens: i ri i i i i i i i i i i i i i i
Hyphens: i ri i i i i i i i i i i i i i i i
Text: a [ ] a b [0x200b] s p [0x200b] [ ] [ ] d e [0xad] f g [ ] b [0x0a]
Breaks: c c lc c c lc c c c c lc c c lc c c lc c lc
Breaks: c c lc c c lc c c c c lc c c lc c c lc c Lc
Whitespace: x x x x w w
Sentences: bs e b
Words: bs be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b
Hyphens: i i i i i i
Hyphens: i i i i i i i i
# Example from https://gitlab.gnome.org/GNOME/pango/issues/218
# This shows difference between word start/end and boundary
goril·les
gorilles
Text: g o r i l · l e s [0x0a]
Breaks: c c c c c c c c c c lc
Whitespace: w w
Sentences: bs e b
Words: bs e s be b
Graphemes: b b b b b b b b b b b
Hyphens: i i i i i i i
Text: g o r i l l e s [0x0a]
Breaks: c c c c c c lc c c c Lc
Whitespace: w w
Sentences: bs e b
Words: bs e s be b
Graphemes: b b b b b b b b b b b
Hyphens: i i i i ri i i
Bratwurst
--- parameters
wrapped: 0
ellipsized: 0
lines: 1
width: 102400
--- attributes
range 0 4
range 4 2147483647
4 4294967295 style italic
range 2147483647 2147483647
--- directions
0 0 0 0 0 0 0 0 0
--- cursor positions
0(0) 1(0) 2(0) 3(0) 4(0) 5(0) 6(0) 7(0) 8(0) 8(1)
--- lines
i=1, index=0, paragraph-start=1, dir=ltr 'Bratwurst'
--- runs
i=1, index=0, chars=4, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us, 'Brat'
i=2, index=4, chars=5, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us, 'wurst'
i=3, index=9, no run, line end
{
"text" : "Bratwurst",
"attributes" : [
{
"start" : 4,
"type" : "style",
"value" : "italic"
}
],
"font" : "Sans Bold 32",
"tabs" : {
"positions-in-pixels" : true,
"positions" : [
0,
50,
100
]
},
"width" : 102400,
"line-spacing" : 1.2999999523162842
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment