Commit c82c57e6 authored by Daniel Veillard's avatar Daniel Veillard
Browse files

some parser optimizations, xmllint --memory --timing --repeat --stream

* dict.c parser.c xmlstring.c: some parser optimizations,
  xmllint --memory --timing --repeat --stream ./db10000.xml
  went down from 16.5 secs to 15.5 secs.
Daniel
parent 65765284
Mon Jan 12 17:22:57 CET 2004 Daniel Veillard <daniel@veillard.com>
* dict.c parser.c xmlstring.c: some parser optimizations,
xmllint --memory --timing --repeat --stream ./db10000.xml
went down from 16.5 secs to 15.5 secs.
Thu Jan 8 17:57:50 CET 2004 Daniel Veillard <daniel@veillard.com>
 
* xmlschemas.c: removed a memory leak remaining from the switch
......
......@@ -484,14 +484,30 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
} else {
for (insert = &(dict->dict[key]); insert->next != NULL;
insert = insert->next) {
#ifdef __GNUC__
if (insert->len == len) {
register int tmp = memcmp(insert->name, name, len);
if (!tmp)
return(insert->name);
}
#else
if ((insert->len == len) &&
(!xmlStrncmp(insert->name, name, len)))
return(insert->name);
#endif
nbi++;
}
#ifdef __GNUC__
if (insert->len == len) {
register int tmp = memcmp(insert->name, name, len);
if (!tmp)
return(insert->name);
}
#else
if ((insert->len == len) &&
(!xmlStrncmp(insert->name, name, len)))
return(insert->name);
#endif
}
ret = xmlDictAddString(dict, name, len);
......
......@@ -2038,13 +2038,15 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
* @ctxt: an XML parser context
* @str: a xmlChar *
* @len: the size of @str
* @blank_chars: we know the chars are blanks
*
* Is this a sequence of blank chars that one can ignore ?
*
* Returns 1 if ignorable 0 otherwise.
*/
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
int blank_chars) {
int i, ret;
xmlNodePtr lastChild;
......@@ -2064,8 +2066,10 @@ static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
/*
* Check that the string is made of blanks
*/
for (i = 0;i < len;i++)
if (!(IS_BLANK_CH(str[i]))) return(0);
if (blank_chars == 0) {
for (i = 0;i < len;i++)
if (!(IS_BLANK_CH(str[i]))) return(0);
}
/*
* Look if the element is mixed content in the DTD if available
......@@ -2339,8 +2343,8 @@ xmlParseName(xmlParserCtxtPtr ctxt) {
static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
const xmlChar *cmp = other;
const xmlChar *in;
register const xmlChar *cmp = other;
register const xmlChar *in;
const xmlChar *ret;
GROW;
......@@ -3106,10 +3110,50 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
if (!cdata) {
in = ctxt->input->cur;
do {
get_more_space:
while (*in == 0x20) in++;
if (*in == 0xA) {
ctxt->input->line++;
in++;
while (*in == 0xA) {
ctxt->input->line++;
in++;
}
goto get_more_space;
}
if (*in == '<') {
nbchar = in - ctxt->input->cur;
if (nbchar > 0) {
const xmlChar *tmp = ctxt->input->cur;
ctxt->input->cur = in;
if (ctxt->sax->ignorableWhitespace !=
ctxt->sax->characters) {
if (areBlanks(ctxt, tmp, nbchar, 1)) {
ctxt->sax->ignorableWhitespace(ctxt->userData,
tmp, nbchar);
} else if (ctxt->sax->characters != NULL)
ctxt->sax->characters(ctxt->userData,
tmp, nbchar);
} else if (ctxt->sax->characters != NULL) {
ctxt->sax->characters(ctxt->userData,
tmp, nbchar);
}
}
return;
}
get_more:
#if 0
while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
(*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
in++;
#endif
while (((*in > ']') && (*in <= 0x7F)) ||
((*in > '&') && (*in < '<')) ||
((*in > '<') && (*in < ']')) ||
((*in >= 0x20) && (*in < '&')) ||
(*in == 0x09))
in++;
if (*in == 0xA) {
ctxt->input->line++;
in++;
......@@ -3136,7 +3180,7 @@ get_more:
const xmlChar *tmp = ctxt->input->cur;
ctxt->input->cur = in;
if (areBlanks(ctxt, tmp, nbchar)) {
if (areBlanks(ctxt, tmp, nbchar, 0)) {
ctxt->sax->ignorableWhitespace(ctxt->userData,
tmp, nbchar);
} else if (ctxt->sax->characters != NULL)
......@@ -3217,7 +3261,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
* OK the segment is to be consumed as chars.
*/
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (areBlanks(ctxt, buf, nbchar)) {
if (areBlanks(ctxt, buf, nbchar, 0)) {
if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(ctxt->userData,
buf, nbchar);
......@@ -3242,7 +3286,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
* OK the segment is to be consumed as chars.
*/
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (areBlanks(ctxt, buf, nbchar)) {
if (areBlanks(ctxt, buf, nbchar, 0)) {
if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
} else {
......@@ -6689,6 +6733,7 @@ xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
/**
* xmlParseNCName:
* @ctxt: an XML parser context
* @len: lenght of the string parsed
*
* parse an XML name.
*
......@@ -7142,7 +7187,7 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
const xmlChar **URI) {
const xmlChar **URI, int *tlen) {
const xmlChar *localname;
const xmlChar *prefix;
const xmlChar *attname;
......@@ -7182,6 +7227,7 @@ reparse:
"StartTag: invalid element name\n");
return(NULL);
}
*tlen = ctxt->input->cur - ctxt->input->base - cur;
/*
* Now parse the attributes, it ends up with the ending
......@@ -7507,7 +7553,7 @@ base_changed:
static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
const xmlChar *URI, int line, int nsNr) {
const xmlChar *URI, int line, int nsNr, int tlen) {
const xmlChar *name;
GROW;
......@@ -7517,7 +7563,19 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
}
SKIP(2);
name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
if ((tlen > 0) && (memcmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
if (ctxt->input->cur[tlen] == '>') {
ctxt->input->cur += tlen + 1;
goto done;
}
ctxt->input->cur += tlen;
name = (xmlChar*)1;
} else {
if (prefix == NULL)
name = xmlParseNameAndCompare(ctxt, ctxt->name);
else
name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
}
/*
* We should definitely be at the ending "S? '>'" part
......@@ -7545,6 +7603,7 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
/*
* SAX: End of Tag
*/
done:
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
(!ctxt->disableSAX))
ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
......@@ -7753,7 +7812,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
const xmlChar *prefix;
const xmlChar *URI;
xmlParserNodeInfo node_info;
int line;
int line, tlen;
xmlNodePtr ret;
int nsNr = ctxt->nsNr;
......@@ -7773,7 +7832,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
#ifdef LIBXML_SAX1_ENABLED
if (ctxt->sax2)
#endif /* LIBXML_SAX1_ENABLED */
name = xmlParseStartTag2(ctxt, &prefix, &URI);
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
#ifdef LIBXML_SAX1_ENABLED
else
name = xmlParseStartTag(ctxt);
......@@ -7878,7 +7937,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
* parse the end of tag: '</' should be here.
*/
if (ctxt->sax2) {
xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
namePop(ctxt);
}
#ifdef LIBXML_SAX1_ENABLED
......@@ -8734,7 +8793,7 @@ xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
int ret = 0;
int avail;
int avail, tlen;
xmlChar cur, next;
const xmlChar *lastlt, *lastgt;
......@@ -8984,7 +9043,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
#ifdef LIBXML_SAX1_ENABLED
if (ctxt->sax2)
#endif /* LIBXML_SAX1_ENABLED */
name = xmlParseStartTag2(ctxt, &prefix, &URI);
name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
#ifdef LIBXML_SAX1_ENABLED
else
name = xmlParseStartTag(ctxt);
......@@ -9159,7 +9218,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
xmlParseEndTag2(ctxt,
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
(int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
(int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
nameNsPop(ctxt);
}
#ifdef LIBXML_SAX1_ENABLED
......
......@@ -208,11 +208,16 @@ xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
if (str1 == str2) return(0);
if (str1 == NULL) return(-1);
if (str2 == NULL) return(1);
#ifdef __GNUC__
tmp = strncmp(str1, str2, len);
return tmp;
#else
do {
tmp = *str1++ - *str2;
if (tmp != 0 || --len == 0) return(tmp);
} while (*str2++ != 0);
return 0;
#endif
}
static const xmlChar casemap[256] = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment