Commit 4c778d8b authored by Daniel Veillard's avatar Daniel Veillard
Browse files

boosting common commnent parsing code, it was really slow. added sprecific

* parser.c: boosting common commnent parsing code, it was really
  slow.
* test/comment[3-5].xml result//comment[3-5].xml*: added sprecific
  regression tests
Daniel
parent 0714c5bf
Sun Jan 23 18:35:00 CET 2005 Daniel Veillard <daniel@veillard.com>
* parser.c: boosting common commnent parsing code, it was really
slow.
* test/comment[3-5].xml result//comment[3-5].xml*: added sprecific
regression tests
Sun Jan 23 01:00:09 CET 2005 Daniel Veillard <daniel@veillard.com>
 
* parser.c: small optimization back.
......
......@@ -3475,42 +3475,35 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
}
/**
* xmlParseComment:
* xmlParseCommentComplex:
* @ctxt: an XML parser context
* @buf: the already parsed part of the buffer
* @len: number of bytes filles in the buffer
* @size: allocated size of the buffer
*
* Skip an XML (SGML) comment <!-- .... -->
* The spec says that "For compatibility, the string "--" (double-hyphen)
* must not occur within comments. "
* This is the slow routine in case the accelerator for ascii didn't work
*
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
*/
void
xmlParseComment(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
int len;
int size = XML_PARSER_BUFFER_SIZE;
static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
int q, ql;
int r, rl;
int cur, l;
xmlParserInputState state;
xmlParserInputPtr input = ctxt->input;
int count = 0;
/*
* Check that there is a comment right here.
*/
if ((RAW != '<') || (NXT(1) != '!') ||
(NXT(2) != '-') || (NXT(3) != '-')) return;
state = ctxt->instate;
ctxt->instate = XML_PARSER_COMMENT;
SHRINK;
SKIP(4);
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
if (buf == NULL) {
xmlErrMemory(ctxt, NULL);
ctxt->instate = state;
return;
len = 0;
size = XML_PARSER_BUFFER_SIZE;
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
if (buf == NULL) {
xmlErrMemory(ctxt, NULL);
return;
}
}
q = CUR_CHAR(ql);
if (q == 0)
......@@ -3523,7 +3516,6 @@ xmlParseComment(xmlParserCtxtPtr ctxt) {
cur = CUR_CHAR(l);
if (cur == 0)
goto not_terminated;
len = 0;
while (IS_CHAR(cur) && /* checked */
((cur != '>') ||
(r != '-') || (q != '-'))) {
......@@ -3537,7 +3529,6 @@ xmlParseComment(xmlParserCtxtPtr ctxt) {
if (new_buf == NULL) {
xmlFree (buf);
xmlErrMemory(ctxt, NULL);
ctxt->instate = state;
return;
}
buf = new_buf;
......@@ -3577,13 +3568,164 @@ xmlParseComment(xmlParserCtxtPtr ctxt) {
ctxt->sax->comment(ctxt->userData, buf);
xmlFree(buf);
}
ctxt->instate = state;
return;
not_terminated:
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
"Comment not terminated\n", NULL);
xmlFree(buf);
}
/**
* xmlParseComment:
* @ctxt: an XML parser context
*
* Skip an XML (SGML) comment <!-- .... -->
* The spec says that "For compatibility, the string "--" (double-hyphen)
* must not occur within comments. "
*
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
*/
void
xmlParseComment(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
int size = XML_PARSER_BUFFER_SIZE;
int len;
xmlParserInputState state;
const xmlChar *in;
int nbchar = 0, ccol;
/*
* Check that there is a comment right here.
*/
if ((RAW != '<') || (NXT(1) != '!') ||
(NXT(2) != '-') || (NXT(3) != '-')) return;
state = ctxt->instate;
ctxt->instate = XML_PARSER_COMMENT;
SKIP(4);
SHRINK;
GROW;
/*
* Accelerated common case where input don't need to be
* modified before passing it to the handler.
*/
in = ctxt->input->cur;
do {
if (*in == 0xA) {
ctxt->input->line++; ctxt->input->col = 1;
in++;
while (*in == 0xA) {
ctxt->input->line++; ctxt->input->col = 1;
in++;
}
}
get_more:
ccol = ctxt->input->col;
while (((*in > '-') && (*in <= 0x7F)) ||
((*in >= 0x20) && (*in < '-')) ||
(*in == 0x09)) {
in++;
ccol++;
}
ctxt->input->col = ccol;
if (*in == 0xA) {
ctxt->input->line++; ctxt->input->col = 1;
in++;
while (*in == 0xA) {
ctxt->input->line++; ctxt->input->col = 1;
in++;
}
goto get_more;
}
nbchar = in - ctxt->input->cur;
/*
* save current set of data
*/
if (nbchar > 0) {
if ((ctxt->sax != NULL) &&
(ctxt->sax->comment != NULL)) {
if (buf == NULL) {
if ((*in == '-') && (in[1] == '-'))
size = nbchar + 1;
else
size = XML_PARSER_BUFFER_SIZE + nbchar;
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
if (buf == NULL) {
xmlErrMemory(ctxt, NULL);
ctxt->instate = state;
return;
}
len = 0;
} else if (len + nbchar + 1 >= size) {
xmlChar *new_buf;
size += len + nbchar + XML_PARSER_BUFFER_SIZE;
new_buf = (xmlChar *) xmlRealloc(buf,
size * sizeof(xmlChar));
if (new_buf == NULL) {
xmlFree (buf);
xmlErrMemory(ctxt, NULL);
ctxt->instate = state;
return;
}
buf = new_buf;
}
memcpy(&buf[len], ctxt->input->cur, nbchar);
len += nbchar;
buf[len] = 0;
}
}
ctxt->input->cur = in;
if (*in == 0xA)
if (*in == 0xD) {
in++;
if (*in == 0xA) {
ctxt->input->cur = in;
in++;
ctxt->input->line++; ctxt->input->col = 1;
continue; /* while */
}
in--;
}
SHRINK;
GROW;
in = ctxt->input->cur;
if (*in == '-') {
if (in[1] == '-') {
if (in[2] == '>') {
SKIP(3);
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
(!ctxt->disableSAX)) {
if (buf != NULL)
ctxt->sax->comment(ctxt->userData, buf);
else
ctxt->sax->comment(ctxt->userData, BAD_CAST "");
}
if (buf != NULL)
xmlFree(buf);
ctxt->instate = state;
return;
}
if (buf != NULL)
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
"Comment not terminated \n<!--%.50s\n",
buf);
else
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
"Comment not terminated \n", NULL);
in++;
ctxt->input->col++;
}
in++;
ctxt->input->col++;
goto get_more;
}
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
xmlParseCommentComplex(ctxt, buf, len, size);
ctxt->instate = state;
return;
}
/**
* xmlParsePITarget:
......@@ -3924,7 +4066,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
xmlChar *orig = NULL;
int skipped;
GROW;
/* GROW; done in the caller */
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
xmlParserInputPtr input = ctxt->input;
SHRINK;
......@@ -5008,7 +5150,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
int ret = -1;
xmlElementContentPtr content = NULL;
GROW;
/* GROW; done in the caller */
if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
xmlParserInputPtr input = ctxt->input;
......@@ -5251,12 +5393,32 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
GROW;
xmlParseElementDecl(ctxt);
xmlParseAttributeListDecl(ctxt);
xmlParseEntityDecl(ctxt);
xmlParseNotationDecl(ctxt);
xmlParsePI(ctxt);
xmlParseComment(ctxt);
if (CUR == '<') {
if (NXT(1) == '!') {
switch (NXT(2)) {
case 'E':
if (NXT(3) == 'L')
xmlParseElementDecl(ctxt);
else if (NXT(3) == 'N')
xmlParseEntityDecl(ctxt);
break;
case 'A':
xmlParseAttributeListDecl(ctxt);
break;
case 'N':
xmlParseNotationDecl(ctxt);
break;
case '-':
xmlParseComment(ctxt);
break;
default:
/* there is an error but it will be detected later */
break;
}
} else if (NXT(1) == '?') {
xmlParsePI(ctxt);
}
}
/*
* This is only for internal subset. On external entities,
* the replacement is done before parsing stage
......
<?xml version="1.0" encoding="ISO-8859-1"?>
<!-- test of very very long comments and buffer limits
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
-->
<doc/>
0 8 #comment 0 1 test of very very long comments and buffer limits
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
01234567890123456789012345678901234567890123456789
0 1 doc 1 0