From 5aa69c107828fc549cb90cbfb85445085f8f7bb1 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 5 Oct 2020 13:10:41 -0400 Subject: [PATCH 1/2] parser.c: xmlParseCharData peek behavior fixed wrt newlines Previously, xmlParseCharData and xmlParseComment would consider 0xA and 0xD to be unhandleable when seen as the first byte of an input chunk, and fall back to xmlParseCharDataComplex and xmlParseCommentComplex, which have different memory and performance characteristics. Fixes GNOME/libxml2#192 --- parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parser.c b/parser.c index f779eb6a..4f92e9f6 100644 --- a/parser.c +++ b/parser.c @@ -4506,7 +4506,7 @@ get_more: if (ctxt->instate == XML_PARSER_EOF) return; in = ctxt->input->cur; - } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); + } while (((*in >= 0x20) && (*in <= 0x7F)) || IS_BLANK_CH(*in)); nbchar = 0; } ctxt->input->line = line; @@ -4987,7 +4987,7 @@ get_more: ctxt->input->col++; goto get_more; } - } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); + } while (((*in >= 0x20) && (*in <= 0x7F)) || IS_BLANK_CH(*in)); xmlParseCommentComplex(ctxt, buf, len, size); ctxt->instate = state; return; -- GitLab From 8098c75af6cf5b397e025886017dadceca9f81f9 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 5 Oct 2020 13:13:39 -0400 Subject: [PATCH 2/2] parser.c: Use IS_BLANK_CH instead of in-line hardcoded byte value tests There is no behavioral change here, simply making the code more readable and better expressing the intention. --- parser.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/parser.c b/parser.c index 4f92e9f6..1266678e 100644 --- a/parser.c +++ b/parser.c @@ -3972,8 +3972,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { if (rep != NULL) { current = rep; while (*current != 0) { /* non input consuming */ - if ((*current == 0xD) || (*current == 0xA) || - (*current == 0x9)) { + if (IS_BLANK_CH(*current)) { buf[len++] = 0x20; current++; } else @@ -4036,7 +4035,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { } } } else { - if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { + if (IS_BLANK_CH(c)) { if ((len != 0) || (!normalize)) { if ((!normalize) || (!in_space)) { COPY_BUF(l,buf,len,0x20); @@ -8919,9 +8918,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, /* * Skip any leading spaces */ - while ((in < end) && (*in != limit) && - ((*in == 0x20) || (*in == 0x9) || - (*in == 0xA) || (*in == 0xD))) { + while ((in < end) && (*in != limit) && IS_BLANK_CH(*in)) { if (*in == 0xA) { line++; col = 1; } else { @@ -8958,9 +8955,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, * skip the trailing blanks */ while ((last[-1] == 0x20) && (last > start)) last--; - while ((in < end) && (*in != limit) && - ((*in == 0x20) || (*in == 0x9) || - (*in == 0xA) || (*in == 0xD))) { + while ((in < end) && (*in != limit) && IS_BLANK_CH(*in)) { if (*in == 0xA) { line++, col = 1; } else { @@ -11068,10 +11063,8 @@ xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { for (ix = 0; ix < len;) { /* string is 0-terminated */ c = utf[ix]; if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ - if (c >= 0x20) + if (c >= 0x20 || IS_BLANK_CH(c)) ix++; - else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) - ix++; else return(-ix); } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ -- GitLab