Commit e96a2a4b authored by Daniel Veillard's avatar Daniel Veillard

adding repeated parsing and validating tests make the new DOM tree

* Makefile.am: adding repeated parsing and validating tests
* SAX2.c parser.c tree.c include/libxml/parser.h: make the new
  DOM tree building interfaces use the dictionary from the
  parsing context to build the element and attributes names
  as well as formatting spaces and short text nodes
* include/libxml/dict.h dict.c: added some reference counting
  for xmlDictPtr because they can be shared by documents and
  a parser context.
* xmlreader.c: a bit of cleanup, remove the specific tree freeing
  functions and use the standard ones now.
* xmllint.c: add --nodict
* python/libxml.c: fix a stupid bug so that ns() works on
  attribute nodes.
Daniel
parent 16fa96c5
Wed Sep 24 23:17:59 CEST 2003 Daniel Veillard <daniel@veillard.com>
* Makefile.am: adding repeated parsing and validating tests
* SAX2.c parser.c tree.c include/libxml/parser.h: make the new
DOM tree building interfaces use the dictionary from the
parsing context to build the element and attributes names
as well as formatting spaces and short text nodes
* include/libxml/dict.h dict.c: added some reference counting
for xmlDictPtr because they can be shared by documents and
a parser context.
* xmlreader.c: a bit of cleanup, remove the specific tree freeing
functions and use the standard ones now.
* xmllint.c: add --nodict
* python/libxml.c: fix a stupid bug so that ns() works on
attribute nodes.
Tue Sep 23 23:07:45 CEST 2003 Daniel Veillard <daniel@veillard.com>
* parser.c include/libxml/parser.h: adding a new set of
......
......@@ -724,8 +724,10 @@ Timingtests: $(srcdir)/dba100000.xml
@echo "##"
@echo "## Timing tests to try to detect performance"
@echo "## as well a memory usage breakage when streaming"
@echo "## first when using the file interface"
@echo "## then when using the memory interface"
@echo "## 1/ using the file interface"
@echo "## 2/ using the memory interface"
@echo "## 3/ repeated DOM parsing"
@echo "## 4/ repeated DOM validation"
@echo "##"
-@(xmllint --stream --timing $(srcdir)/dba100000.xml; \
MEM=`cat .memdump | grep "MEMORY ALLOCATED" | awk '{ print $$7}'`;\
......@@ -737,6 +739,16 @@ Timingtests: $(srcdir)/dba100000.xml
if [ "$$MEM" != "" ] ; then echo Using $$MEM bytes ; fi ; \
grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\
exit 0)
-@(xmllint --noout --timing --repeat $(srcdir)/test/valid/REC-xml-19980210.xml; \
MEM=`cat .memdump | grep "MEMORY ALLOCATED" | awk '{ print $$7}'`;\
if [ "$$MEM" != "" ] ; then echo Using $$MEM bytes ; fi ; \
grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\
exit 0)
-@(xmllint --noout --timing --valid --repeat $(srcdir)/test/valid/REC-xml-19980210.xml; \
MEM=`cat .memdump | grep "MEMORY ALLOCATED" | awk '{ print $$7}'`;\
if [ "$$MEM" != "" ] ; then echo Using $$MEM bytes ; fi ; \
grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\
exit 0)
C14Ntests : testC14N$(EXEEXT)
@echo "##"
......
......@@ -802,6 +802,8 @@ xmlSAX2StartDocument(void *ctx)
ctxt->disableSAX = 1;
return;
}
if ((ctxt->dictNames) && (doc != NULL))
doc->dict = ctxt->dict;
}
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->URL == NULL) &&
(ctxt->input != NULL) && (ctxt->input->filename != NULL)) {
......@@ -1564,6 +1566,7 @@ xmlSAX2EndElement(void *ctx, const xmlChar *name ATTRIBUTE_UNUSED)
nodePop(ctxt);
}
int nb_interned = 0;
/*
* xmlSAX2TextNode:
* @ctxt: the parser context
......@@ -1577,21 +1580,17 @@ xmlSAX2EndElement(void *ctx, const xmlChar *name ATTRIBUTE_UNUSED)
static xmlNodePtr
xmlSAX2TextNode(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
xmlNodePtr ret;
const xmlChar *intern = NULL;
/*
* Allocate
*/
if (ctxt->freeElems != NULL) {
ret = ctxt->freeElems;
ctxt->freeElems = ret->next;
ctxt->freeElemsNr--;
memset(ret, 0, sizeof(xmlNode));
ret->type = XML_TEXT_NODE;
ret->name = xmlStringText;
ret->content = xmlStrndup(str, len);
if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue(ret);
} else {
ret = xmlNewTextLen(str, len);
ret = (xmlNodePtr) xmlMalloc(sizeof(xmlNode));
}
if (ret == NULL) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
......@@ -1602,6 +1601,36 @@ xmlSAX2TextNode(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
ctxt->disableSAX = 1;
return(NULL);
}
/*
* intern the formatting blanks found between tags, or the
* very short strings
*/
if (ctxt->dictNames) {
xmlChar cur = str[len];
if ((len <= 3) && ((cur == '"') || (cur == '\'') || (cur == '<'))) {
intern = xmlDictLookup(ctxt->dict, str, len);
} else if (IS_BLANK(*str) && (len < 60) && (cur == '<')) {
int i;
for (i = 1;i < len;i++) {
if (!IS_BLANK(*str)) goto skip;
}
intern = xmlDictLookup(ctxt->dict, str, len);
}
}
skip:
memset(ret, 0, sizeof(xmlNode));
ret->type = XML_TEXT_NODE;
ret->name = xmlStringText;
if (intern == NULL)
ret->content = xmlStrndup(str, len);
else
ret->content = (xmlChar *) intern;
if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue(ret);
return(ret);
}
......
......@@ -59,6 +59,8 @@ struct _xmlDictStrings {
* The entire dictionnary
*/
struct _xmlDict {
int ref_counter;
struct _xmlDictEntry *dict;
int size;
int nbElems;
......@@ -277,6 +279,8 @@ xmlDictCreate(void) {
dict = xmlMalloc(sizeof(xmlDict));
if (dict) {
dict->ref_counter = 1;
dict->size = MIN_DICT_SIZE;
dict->nbElems = 0;
dict->dict = xmlMalloc(MIN_DICT_SIZE * sizeof(xmlDictEntry));
......@@ -290,6 +294,21 @@ xmlDictCreate(void) {
return(NULL);
}
/**
* xmlDictReference:
* @dict: the dictionnary
*
* Increment the reference counter of a dictionary
*
* Returns 0 in case of success and -1 in case of error
*/
int
xmlDictReference(xmlDictPtr dict) {
if (dict == NULL) return -1;
dict->ref_counter++;
return(0);
}
/**
* xmlDictGrow:
* @dict: the dictionnary
......@@ -401,6 +420,11 @@ xmlDictFree(xmlDictPtr dict) {
if (dict == NULL)
return;
/* decrement the counter, it may be shared by a parser and docs */
dict->ref_counter--;
if (dict->ref_counter > 0) return;
if (dict->dict) {
for(i = 0; ((i < dict->size) && (dict->nbElems > 0)); i++) {
iter = &(dict->dict[i]);
......@@ -595,6 +619,7 @@ xmlDictOwns(xmlDictPtr dict, const xmlChar *str) {
}
return(0);
}
/**
* xmlDictSize:
* @dict: the dictionnary
......
......@@ -37,6 +37,8 @@ typedef xmlDict *xmlDictPtr;
*/
XMLPUBFUN xmlDictPtr XMLCALL
xmlDictCreate (void);
XMLPUBFUN int XMLCALL
xmlDictReference(xmlDictPtr dict);
XMLPUBFUN void XMLCALL
xmlDictFree (xmlDictPtr dict);
......
......@@ -1044,7 +1044,8 @@ typedef enum {
XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */
XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */
XML_PARSE_NONET = 1<<11 /* Forbid network access */
XML_PARSE_NONET = 1<<11,/* Forbid network access */
XML_PARSE_NODICT = 1<<12 /* Do not reuse the context dictionnary */
} xmlParserOption;
XMLPUBFUN void XMLCALL
......
......@@ -11925,6 +11925,18 @@ xmlCleanupParser(void) {
* *
************************************************************************/
/**
* DICT_FREE:
* @str: a string
*
* Free a string if it is not owned by the "dict" dictionnary in the
* current scope
*/
#define DICT_FREE(str) \
if ((str) && ((!dict) || \
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
xmlFree((char *)(str));
/**
* xmlCtxtReset:
* @ctxt: an XML parser context
......@@ -11935,6 +11947,7 @@ void
xmlCtxtReset(xmlParserCtxtPtr ctxt)
{
xmlParserInputPtr input;
xmlDictPtr dict = ctxt->dict;
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
xmlFreeInputStream(input);
......@@ -11953,8 +11966,20 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
ctxt->nameNr = 0;
ctxt->name = NULL;
DICT_FREE(ctxt->version);
ctxt->version = NULL;
DICT_FREE(ctxt->encoding);
ctxt->encoding = NULL;
DICT_FREE(ctxt->directory);
ctxt->directory = NULL;
DICT_FREE(ctxt->extSubURI);
ctxt->extSubURI = NULL;
DICT_FREE(ctxt->extSubSystem);
ctxt->extSubSystem = NULL;
if (ctxt->myDoc != NULL)
xmlFreeDoc(ctxt->myDoc);
ctxt->myDoc = NULL;
ctxt->standalone = -1;
ctxt->hasExternalSubset = 0;
ctxt->hasPErefs = 0;
......@@ -11962,9 +11987,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
ctxt->external = 0;
ctxt->instate = XML_PARSER_START;
ctxt->token = 0;
ctxt->directory = NULL;
ctxt->myDoc = NULL;
ctxt->wellFormed = 1;
ctxt->nsWellFormed = 1;
ctxt->valid = 1;
......@@ -12064,6 +12087,12 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
ctxt->sax->initialized = 1;
options -= XML_PARSE_SAX1;
}
if (options & XML_PARSE_NODICT) {
ctxt->dictNames = 0;
options -= XML_PARSE_NODICT;
} else {
ctxt->dictNames = 1;
}
return (options);
}
......@@ -12096,11 +12125,25 @@ xmlDoRead(xmlParserCtxtPtr ctxt, const char *encoding, int options, int reuse)
ret = ctxt->myDoc;
else {
ret = NULL;
xmlFreeDoc(ctxt->myDoc);
ctxt->myDoc = NULL;
if (ctxt->myDoc != NULL) {
ctxt->myDoc->dict = NULL;
xmlFreeDoc(ctxt->myDoc);
}
}
if (!reuse)
ctxt->myDoc = NULL;
if (!reuse) {
if ((ctxt->dictNames) &&
(ret != NULL) &&
(ret->dict == ctxt->dict))
ctxt->dict = NULL;
xmlFreeParserCtxt(ctxt);
} else {
/* Must duplicate the reference to the dictionary */
if ((ctxt->dictNames) &&
(ret != NULL) &&
(ret->dict == ctxt->dict))
xmlDictReference(ctxt->dict);
}
return (ret);
}
......
......@@ -2288,7 +2288,9 @@ libxml_xmlNodeGetNs(ATTRIBUTE_UNUSED PyObject * self, PyObject * args)
return (NULL);
node = (xmlNodePtr) PyxmlNode_Get(pyobj_node);
if ((node == NULL) || (node->type != XML_ELEMENT_NODE)) {
if ((node == NULL) ||
((node->type != XML_ELEMENT_NODE) &&
(node->type != XML_ATTRIBUTE_NODE))) {
Py_INCREF(Py_None);
return (Py_None);
}
......
......@@ -913,6 +913,18 @@ xmlCreateIntSubset(xmlDocPtr doc, const xmlChar *name,
return(cur);
}
/**
* DICT_FREE:
* @str: a string
*
* Free a string if it is not owned by the "dict" dictionnary in the
* current scope
*/
#define DICT_FREE(str) \
if ((str) && ((!dict) || \
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
xmlFree((char *)(str));
/**
* xmlFreeDtd:
* @cur: the DTD structure to free up
......@@ -921,13 +933,12 @@ xmlCreateIntSubset(xmlDocPtr doc, const xmlChar *name,
*/
void
xmlFreeDtd(xmlDtdPtr cur) {
xmlDictPtr dict = NULL;
if (cur == NULL) {
#ifdef DEBUG_TREE
xmlGenericError(xmlGenericErrorContext,
"xmlFreeDtd : DTD == NULL\n");
#endif
return;
}
if (cur->doc != NULL) dict = cur->doc->dict;
if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue((xmlNodePtr)cur);
......@@ -948,9 +959,9 @@ xmlFreeDtd(xmlDtdPtr cur) {
c = next;
}
}
if (cur->name != NULL) xmlFree((char *) cur->name);
if (cur->SystemID != NULL) xmlFree((char *) cur->SystemID);
if (cur->ExternalID != NULL) xmlFree((char *) cur->ExternalID);
DICT_FREE(cur->name)
DICT_FREE(cur->SystemID)
DICT_FREE(cur->ExternalID)
/* TODO !!! */
if (cur->notations != NULL)
xmlFreeNotationTable((xmlNotationTablePtr) cur->notations);
......@@ -1019,6 +1030,7 @@ xmlNewDoc(const xmlChar *version) {
void
xmlFreeDoc(xmlDocPtr cur) {
xmlDtdPtr extSubset, intSubset;
xmlDictPtr dict = NULL;
if (cur == NULL) {
#ifdef DEBUG_TREE
......@@ -1027,6 +1039,7 @@ xmlFreeDoc(xmlDocPtr cur) {
#endif
return;
}
if (cur != NULL) dict = cur->dict;
if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue((xmlNodePtr)cur);
......@@ -1054,13 +1067,14 @@ xmlFreeDoc(xmlDocPtr cur) {
}
if (cur->children != NULL) xmlFreeNodeList(cur->children);
if (cur->version != NULL) xmlFree((char *) cur->version);
if (cur->name != NULL) xmlFree((char *) cur->name);
if (cur->encoding != NULL) xmlFree((char *) cur->encoding);
if (cur->oldNs != NULL) xmlFreeNsList(cur->oldNs);
if (cur->URL != NULL) xmlFree((char *) cur->URL);
DICT_FREE(cur->version)
DICT_FREE(cur->name)
DICT_FREE(cur->encoding)
DICT_FREE(cur->URL)
xmlFree(cur);
if (dict) xmlDictFree(dict);
}
/**
......@@ -1915,13 +1929,7 @@ xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value) {
void
xmlFreePropList(xmlAttrPtr cur) {
xmlAttrPtr next;
if (cur == NULL) {
#ifdef DEBUG_TREE
xmlGenericError(xmlGenericErrorContext,
"xmlFreePropList : property == NULL\n");
#endif
return;
}
if (cur == NULL) return;
while (cur != NULL) {
next = cur->next;
xmlFreeProp(cur);
......@@ -1937,13 +1945,10 @@ xmlFreePropList(xmlAttrPtr cur) {
*/
void
xmlFreeProp(xmlAttrPtr cur) {
if (cur == NULL) {
#ifdef DEBUG_TREE
xmlGenericError(xmlGenericErrorContext,
"xmlFreeProp : property == NULL\n");
#endif
return;
}
xmlDictPtr dict = NULL;
if (cur == NULL) return;
if (cur->doc != NULL) dict = cur->doc->dict;
if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue((xmlNodePtr)cur);
......@@ -1955,8 +1960,8 @@ xmlFreeProp(xmlAttrPtr cur) {
if (xmlIsID(cur->parent->doc, cur->parent, cur))
xmlRemoveID(cur->parent->doc, cur);
}
if (cur->name != NULL) xmlFree((char *) cur->name);
if (cur->children != NULL) xmlFreeNodeList(cur->children);
DICT_FREE(cur->name)
xmlFree(cur);
}
......@@ -3187,13 +3192,9 @@ xmlGetLastChild(xmlNodePtr parent) {
void
xmlFreeNodeList(xmlNodePtr cur) {
xmlNodePtr next;
if (cur == NULL) {
#ifdef DEBUG_TREE
xmlGenericError(xmlGenericErrorContext,
"xmlFreeNodeList : node == NULL\n");
#endif
return;
}
xmlDictPtr dict = NULL;
if (cur == NULL) return;
if (cur->type == XML_NAMESPACE_DECL) {
xmlFreeNsList((xmlNsPtr) cur);
return;
......@@ -3206,9 +3207,9 @@ xmlFreeNodeList(xmlNodePtr cur) {
xmlFreeDoc((xmlDocPtr) cur);
return;
}
if (cur->doc != NULL) dict = cur->doc->dict;
while (cur != NULL) {
next = cur->next;
/* unroll to speed up freeing the document */
if (cur->type != XML_DTD_NODE) {
if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
......@@ -3226,7 +3227,7 @@ xmlFreeNodeList(xmlNodePtr cur) {
(cur->type != XML_XINCLUDE_START) &&
(cur->type != XML_XINCLUDE_END) &&
(cur->type != XML_ENTITY_REF_NODE)) {
if (cur->content != NULL) xmlFree(cur->content);
DICT_FREE(cur->content)
}
if (((cur->type == XML_ELEMENT_NODE) ||
(cur->type == XML_XINCLUDE_START) ||
......@@ -3237,28 +3238,13 @@ xmlFreeNodeList(xmlNodePtr cur) {
/*
* When a node is a text node or a comment, it uses a global static
* variable for the name of the node.
*
* The xmlStrEqual comparisons need to be done when (happened with
* XML::libXML and XML::libXSLT) the library is included twice
* statically in the binary and a tree allocated by one occurrence
* of the lib gets freed by the other occurrence, in this case
* the string addresses compare are not sufficient.
* Otherwise the node name might come from the document's
* dictionnary
*/
if ((cur->name != NULL) &&
(cur->name != xmlStringText) &&
(cur->name != xmlStringTextNoenc) &&
(cur->name != xmlStringComment)) {
if (cur->type == XML_TEXT_NODE) {
if ((!xmlStrEqual(cur->name, xmlStringText)) &&
(!xmlStrEqual(cur->name, xmlStringTextNoenc)))
xmlFree((char *) cur->name);
} else if (cur->type == XML_COMMENT_NODE) {
if (!xmlStrEqual(cur->name, xmlStringComment))
xmlFree((char *) cur->name);
} else
xmlFree((char *) cur->name);
}
/* TODO : derecursivate this function */
(cur->type != XML_TEXT_NODE) &&
(cur->type != XML_COMMENT_NODE))
DICT_FREE(cur->name)
xmlFree(cur);
}
cur = next;
......@@ -3274,13 +3260,9 @@ xmlFreeNodeList(xmlNodePtr cur) {
*/
void
xmlFreeNode(xmlNodePtr cur) {
if (cur == NULL) {
#ifdef DEBUG_TREE
xmlGenericError(xmlGenericErrorContext,
"xmlFreeNode : node == NULL\n");
#endif
return;
}
xmlDictPtr dict = NULL;
if (cur == NULL) return;
/* use xmlFreeDtd for DTD nodes */
if (cur->type == XML_DTD_NODE) {
......@@ -3299,6 +3281,8 @@ xmlFreeNode(xmlNodePtr cur) {
if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue(cur);
if (cur->doc != NULL) dict = cur->doc->dict;
if ((cur->children != NULL) &&
(cur->type != XML_ENTITY_REF_NODE))
xmlFreeNodeList(cur->children);
......@@ -3312,33 +3296,18 @@ xmlFreeNode(xmlNodePtr cur) {
(cur->type != XML_ENTITY_REF_NODE) &&
(cur->type != XML_XINCLUDE_END) &&
(cur->type != XML_XINCLUDE_START)) {
xmlFree(cur->content);
DICT_FREE(cur->content)
}
/*
* When a node is a text node or a comment, it uses a global static
* variable for the name of the node.
*
* The xmlStrEqual comparisons need to be done when (happened with
* XML::libXML and XML::libXSLT) the library is included twice statically
* in the binary and a tree allocated by one occurence of the lib gets
* freed by the other occurrence, in this case the string addresses compare
* are not sufficient.
* Otherwise the node name might come from the document's dictionnary
*/
if ((cur->name != NULL) &&
(cur->name != xmlStringText) &&
(cur->name != xmlStringTextNoenc) &&
(cur->name != xmlStringComment)) {
if (cur->type == XML_TEXT_NODE) {
if ((!xmlStrEqual(cur->name, xmlStringText)) &&
(!xmlStrEqual(cur->name, xmlStringTextNoenc)))
xmlFree((char *) cur->name);
} else if (cur->type == XML_COMMENT_NODE) {
if (!xmlStrEqual(cur->name, xmlStringComment))
xmlFree((char *) cur->name);
} else
xmlFree((char *) cur->name);
}
(cur->type != XML_TEXT_NODE) &&
(cur->type != XML_COMMENT_NODE))
DICT_FREE(cur->name)
if (((cur->type == XML_ELEMENT_NODE) ||
(cur->type == XML_XINCLUDE_START) ||
......
......@@ -720,7 +720,7 @@ static void streamFile(char *filename) {
* Tree Test processing *
* *
************************************************************************/
static void parseAndPrintFile(char *filename) {
static void parseAndPrintFile(char *filename, xmlParserCtxtPtr rectxt) {
xmlDocPtr doc = NULL, tmp;
if ((timing) && (!repeat))
......@@ -811,17 +811,26 @@ static void parseAndPrintFile(char *filename) {
FILE *f;
f = fopen(filename, "r");
if (f != NULL)
doc = xmlReadIO((xmlInputReadCallback) myRead,
(xmlInputCloseCallback) myClose, f,
NULL, options);
else
if (f != NULL) {
if (rectxt == NULL)
doc = xmlReadIO((xmlInputReadCallback) myRead,
(xmlInputCloseCallback) myClose, f,
NULL, options);
else
doc = xmlCtxtReadIO(rectxt,
(xmlInputReadCallback) myRead,
(xmlInputCloseCallback) myClose, f,
NULL, options);
} else
doc = NULL;
}
} else if (htmlout) {
xmlParserCtxtPtr ctxt;
ctxt = xmlNewParserCtxt();
if (rectxt == NULL)
ctxt = xmlNewParserCtxt();
else
ctxt = rectxt;
if (ctxt == NULL) {
doc = NULL;
} else {
......@@ -832,7 +841,8 @@ static void parseAndPrintFile(char *filename) {
doc = xmlCtxtReadFile(ctxt, filename, NULL, options);
xmlFreeParserCtxt(ctxt);
if (rectxt == NULL)
xmlFreeParserCtxt(ctxt);
}
#ifdef HAVE_SYS_MMAN_H
} else if (memory) {
......@@ -847,13 +857,21 @@ static void parseAndPrintFile(char *filename) {
if (base == (void *) MAP_FAILED)
return;