Commit 79c8a6b1 authored by Nick Wellnhofer's avatar Nick Wellnhofer
Browse files

Print error messages for truncated UTF-8 sequences

Before, truncated UTF-8 sequences at the end of a file were treated as
EOF. Create an error message containing the offending bytes.

xmlStringCurrentChar would also print characters from the input stream,
not the string it's working on.
parent fb2f518c
......@@ -709,16 +709,6 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
}
return((int) *ctxt->input->cur);
encoding_error:
/*
* An encoding problem may arise from a truncated input buffer
* splitting a character in the middle. In that case do not raise
* an error but return 0 to endicate an end of stream problem
*/
if (ctxt->input->end - ctxt->input->cur < 4) {
*len = 0;
return(0);
}
/*
* If we detect an UTF8 error that probably mean that the
* input encoding didn't get properly advertised in the
......@@ -729,9 +719,21 @@ encoding_error:
{
char buffer[150];
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
if (ctxt->input->cur[1] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n",
ctxt->input->cur[0]);
} else if (ctxt->input->cur[2] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n",
ctxt->input->cur[0], ctxt->input->cur[1]);
} else if (ctxt->input->cur[3] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2]);
} else {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
}
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL);
......@@ -821,17 +823,6 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
*len = 1;
return ((int) *cur);
encoding_error:
/*
* An encoding problem may arise from a truncated input buffer
* splitting a character in the middle. In that case do not raise
* an error but return 0 to endicate an end of stream problem
*/
if ((ctxt == NULL) || (ctxt->input == NULL) ||
(ctxt->input->end - ctxt->input->cur < 4)) {
*len = 0;
return(0);
}
/*
* If we detect an UTF8 error that probably mean that the
* input encoding didn't get properly advertised in the
......@@ -842,9 +833,19 @@ encoding_error:
{
char buffer[150];
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
if (cur[1] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n",
cur[0]);
} else if (cur[2] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n",
cur[0], cur[1]);
} else if (cur[3] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n",
cur[0], cur[1], cur[2]);
} else {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
cur[0], cur[1], cur[2], cur[3]);
}
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL);
......
./test/errors/partial_utf8_1.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xC2 EOF
<a>Â
^
./test/errors/partial_utf8_1.xml:1: parser error : Premature end of data in tag a line 1
<a>Â
^
./test/errors/partial_utf8_1.xml:1: parser error : Extra content at the end of the document
<a>Â
^
./test/errors/partial_utf8_1.xml : failed to parse
./test/errors/partial_utf8_2.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xE3 0xA0 EOF
<a>ã 
^
./test/errors/partial_utf8_2.xml:1: parser error : Premature end of data in tag a line 1
<a>ã 
^
./test/errors/partial_utf8_2.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xE3 0xA0 EOF
<a>ã 
^
./test/errors/partial_utf8_2.xml : failed to parse
./test/errors/partial_utf8_3.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xF2 0xA0 0xA0 EOF
<a>
^
./test/errors/partial_utf8_3.xml:1: parser error : Premature end of data in tag a line 1
<a>
^
./test/errors/partial_utf8_3.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xF2 0xA0 0xA0 EOF
<a>ò  
^
./test/errors/partial_utf8_3.xml : failed to parse
<a>Â
\ No newline at end of file
<a>
\ No newline at end of file
<a>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment