Commit bf058dce authored by Daniel Veillard's avatar Daniel Veillard

Fix the flushing out of raw buffers on encoding conversions

https://bugzilla.gnome.org/show_bug.cgi?id=692915

the new set of converting functions tried to limit the encoding
conversion of the raw buffer to the consumption one to work in
a more progressive fashion. Unfortunately this was bad for
performances and led to errors on progressive parsing when
a very large chunk was close to the end of the document. Fix
the new internal function and switch back to the old way of
converting. Fix another bug in the process.
parent de0cc20c
......@@ -3561,7 +3561,7 @@ htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
*/
processed = ctxt->input->cur - ctxt->input->base;
xmlBufShrink(ctxt->input->buf->buffer, processed);
nbchars = xmlCharEncInput(ctxt->input->buf);
nbchars = xmlCharEncInput(ctxt->input->buf, 1);
if (nbchars < 0) {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"htmlCheckEncoding: encoder error\n",
......@@ -6057,7 +6057,7 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
size_t current = ctxt->input->cur - ctxt->input->base;
nbchars = xmlCharEncInput(in);
nbchars = xmlCharEncInput(in, terminate);
if (nbchars < 0) {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"encoder error\n", NULL, NULL);
......
......@@ -21,7 +21,7 @@ extern "C" {
int xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
xmlBufferPtr in, int len);
int xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len);
int xmlCharEncInput(xmlParserInputBufferPtr input);
int xmlCharEncInput(xmlParserInputBufferPtr input, int flush);
int xmlCharEncOutput(xmlOutputBufferPtr output, int init);
#ifdef __cplusplus
......
......@@ -2163,6 +2163,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
/**
* xmlCharEncInput:
* @input: a parser input buffer
* @flush: try to flush all the raw buffer
*
* Generic front-end for the encoding handler on parser input
*
......@@ -2172,7 +2173,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
* the result of transformation can't fit into the encoding we want), or
*/
int
xmlCharEncInput(xmlParserInputBufferPtr input)
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
{
int ret = -2;
size_t written;
......@@ -2191,7 +2192,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input)
toconv = xmlBufUse(in);
if (toconv == 0)
return (0);
if (toconv > 64 * 1024)
if ((toconv > 64 * 1024) && (flush == 0))
toconv = 64 * 1024;
written = xmlBufAvail(out);
if (written > 0)
......@@ -2202,7 +2203,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input)
if (written > 0)
written--; /* count '\0' */
}
if (written > 128 * 1024)
if ((written > 128 * 1024) && (flush == 0))
written = 128 * 1024;
c_in = toconv;
......
......@@ -11122,9 +11122,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
/*
* If we are operating on converted input, try to flush
* remainng chars to avoid them stalling in the non-converted
* buffer.
* buffer. But do not do this in document start where
* encoding="..." may not have been read and we work on a
* guessed encoding.
*/
if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) {
if ((ctxt->instate != XML_PARSER_START) &&
(ctxt->input->buf->raw != NULL) &&
(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
ctxt->input);
size_t current = ctxt->input->cur - ctxt->input->base;
......@@ -12146,7 +12150,7 @@ xmldecl_done:
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
size_t current = ctxt->input->cur - ctxt->input->base;
nbchars = xmlCharEncInput(in);
nbchars = xmlCharEncInput(in, terminate);
if (nbchars < 0) {
/* TODO 2.6.0 */
xmlGenericError(xmlGenericErrorContext,
......
......@@ -1201,7 +1201,7 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
/*
* convert as much as possible of the buffer
*/
nbchars = xmlCharEncInput(input->buf);
nbchars = xmlCharEncInput(input->buf, 1);
} else {
/*
* convert just enough to get
......
......@@ -3238,7 +3238,7 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in,
* convert as much as possible to the parser reading buffer.
*/
use = xmlBufUse(in->raw);
nbchars = xmlCharEncInput(in);
nbchars = xmlCharEncInput(in, 1);
if (nbchars < 0) {
xmlIOErr(XML_IO_ENCODER, NULL);
in->error = XML_IO_ENCODER;
......@@ -3343,7 +3343,7 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
* convert as much as possible to the parser reading buffer.
*/
use = xmlBufUse(in->raw);
nbchars = xmlCharEncInput(in);
nbchars = xmlCharEncInput(in, 1);
if (nbchars < 0) {
xmlIOErr(XML_IO_ENCODER, NULL);
in->error = XML_IO_ENCODER;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment