Commit beca86e8 authored by Hugh Davenport's avatar Hugh Davenport Committed by Daniel Veillard

Detect change of encoding when parsing HTML names

From https://bugzilla.gnome.org/show_bug.cgi?id=758518

Happens when a file has a name getting parsed, but no valid encoding
set, so libxml has to guess what the encoding is. This patch detects
when the buffer location changes, and if it does, restarts the parsing
of the name.

This slightly change a couple of regression tests output
parent b1d34de4
......@@ -2492,6 +2492,7 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
int len = 0, l;
int c;
int count = 0;
const xmlChar *base = ctxt->input->base;
/*
* Handler for more complex cases
......@@ -2517,6 +2518,13 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
len += l;
NEXTL(l);
c = CUR_CHAR(l);
if (ctxt->input->base != base) {
/*
* We changed encoding from an unknown encoding
* Input buffer changed location, so we better start again
*/
return(htmlParseNameComplex(ctxt));
}
}
if (ctxt->input->base > ctxt->input->cur - len)
......
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><body><p>&amp;
<html><body><p>&amp;&ecirc;
</p></body></html>
./test/HTML/758605.html:1: HTML parser error : htmlParseEntityRef: no name
./test/HTML/758605.html:1: HTML parser error : htmlParseEntityRef: expecting ';'
ê
^
SAX.setDocumentLocator()
SAX.startDocument()
SAX.error: htmlParseEntityRef: no name
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(&amp;, 1)
SAX.characters(&ecirc;, 2)
SAX.ignorableWhitespace(
, 1)
SAX.endElement(p)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment