Missing check for variable cur in function xmlEncodeEntitiesInternal of entities.c.
There are several missing check for variable cur in line 684, line 689 and line 696, which can lead to an out of bound access when encode malformed strings.
673 if (*cur < 0xC0) {
674 xmlEntitiesErr(XML_CHECK_NOT_UTF8,
675 "xmlEncodeEntities: input not UTF-8");
676 if (doc != NULL)
677 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
678 snprintf(buf, sizeof(buf), "&#%d;", *cur);
679 buf[sizeof(buf) - 1] = 0;
680 ptr = buf;
681 while (*ptr != 0) *out++ = *ptr++;
682 cur++;
683 continue;
684 } else if (*cur < 0xE0) {
685 val = (cur[0]) & 0x1F;
686 val <<= 6;
687 val |= (cur[1]) & 0x3F;
688 l = 2;
689 } else if (*cur < 0xF0) {
690 val = (cur[0]) & 0x0F;
691 val <<= 6;
692 val |= (cur[1]) & 0x3F;
693 val <<= 6;
694 val |= (cur[2]) & 0x3F;
695 l = 3;
696 } else if (*cur < 0xF8) {
697 val = (cur[0]) & 0x07;
698 val <<= 6;
699 val |= (cur[1]) & 0x3F;
700 val <<= 6;
701 val |= (cur[2]) & 0x3F;
702 val <<= 6;
703 val |= (cur[3]) & 0x3F;
704 l = 4;
705 }
We should check cur[1], cur[2] and cur[3] before access these address.
Below is the proposal patch.
diff --git a/entities.c b/entities.c
index 43549bc..4101ebd 100644
--- a/entities.c
+++ b/entities.c
@@ -681,19 +681,19 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
while (*ptr != 0) *out++ = *ptr++;
cur++;
continue;
- } else if (*cur < 0xE0) {
+ } else if (*cur < 0xE0 && (cur[1] != 0)) {
val = (cur[0]) & 0x1F;
val <<= 6;
val |= (cur[1]) & 0x3F;
l = 2;
- } else if (*cur < 0xF0) {
+ } else if (*cur < 0xF0 && (cur[1] != 0) && (cur[2] != 0)) {
val = (cur[0]) & 0x0F;
val <<= 6;
val |= (cur[1]) & 0x3F;
val <<= 6;
val |= (cur[2]) & 0x3F;
l = 3;
- } else if (*cur < 0xF8) {
+ } else if (*cur < 0xF8 && (cur[1] != 0) && (cur[2] != 0) && cur[3] != 0) {
val = (cur[0]) & 0x07;
val <<= 6;
val |= (cur[1]) & 0x3F;