xmlTextReaderNext() parent traversal broken with preparsed documents
I noticed a bug in the behavior of xmlTextReaderNext(reader)
:
When the reader is built on a preparsed document using xmlReaderWalker(doc)
, it will not properly traverse parent nodes, but instead stop at the last sibling (similar to xmlTextReaderNextSibling(reader)
).
Here's a test case:
test.xml
:
<?xml version="1.0" encoding="UTF-8"?>
<root>
<child id="1">
<subchild id="1.1"/>
<subchild id="1.2">
<subsubchild id="1.2.1"/>
</subchild>
<subchild id="1.3"/>
</child>
<child id="2">
<subchild id="2.1"/>
<subchild id="2.2"/>
</child>
<child id="3">
<subchild id="3.1"/>
<subchild id="3.2"/>
</child>
</root>
test.c
:
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/xmlreader.h>
static void processNode(xmlTextReaderPtr reader) {
int type;
xmlChar *id = NULL;
type = xmlTextReaderNodeType(reader);
if (type == XML_ELEMENT_NODE) {
int depth;
depth = xmlTextReaderDepth(reader);
while (depth--) {
printf(" ");
}
id = xmlTextReaderGetAttribute(reader, (xmlChar *)"id");
if (id != NULL)
printf("<%s id=\"%s\">\n", xmlTextReaderConstLocalName(reader), id);
else
printf("<%s>\n", xmlTextReaderConstLocalName(reader));
}
}
static void check(xmlTextReaderPtr reader) {
int depth;
while (xmlTextReaderRead(reader) == 1) {
depth = xmlTextReaderDepth(reader);
/* processNode(reader); */
if (depth == 3) break;
}
while (xmlTextReaderNext(reader) == 1) {
depth = xmlTextReaderDepth(reader);
processNode(reader);
}
}
int main(void) {
const char filename[] = "test.xml";
xmlTextReaderPtr reader = NULL;
xmlDocPtr doc = NULL;
int ret;
doc = xmlReadFile(filename, NULL, 0);
if (doc == NULL) {
fprintf(stderr, "Unable to open %s\n", filename);
return 1;
}
printf("doc reader\n");
reader = xmlReaderWalker(doc);
check(reader);
xmlFreeTextReader(reader);
reader = NULL;
printf("file reader\n");
reader = xmlNewTextReaderFilename(filename);
if (reader != NULL) {
check(reader);
xmlFreeTextReader(reader);
} else {
fprintf(stderr, "Unable to open %s\n", filename);
}
}
Output:
doc reader
<subsubchild id="1.2.1">
file reader
<subsubchild id="1.2.1">
<subchild id="1.3">
<child id="2">
<child id="3">
As can be seen the doc based reader does not traverse to the parent nodes.
The bug is caused, because xmlTextReaderNextTree(reader)
is on a XML_READER_TYPE_TEXT
node when it should enter backtracking, but the code path is limited to XML_ELEMENT_NODE
or XML_ATTRIBUTE_NODE
.
This can be fixed by applying the following patch:
diff --git a/xmlreader.c b/xmlreader.c
index 4053269b..05176c56 100644
--- a/xmlreader.c
+++ b/xmlreader.c
@@ -1917,12 +1917,9 @@ xmlTextReaderNextTree(xmlTextReaderPtr reader)
/* if reader->node->next is NULL mean no subtree for current node,
so need to move to sibling of parent node if present */
- if ((reader->node->type == XML_ELEMENT_NODE) ||
- (reader->node->type == XML_ATTRIBUTE_NODE)) {
- reader->state = XML_TEXTREADER_BACKTRACK;
- /* This will move to parent if present */
- xmlTextReaderRead(reader);
- }
+ reader->state = XML_TEXTREADER_BACKTRACK;
+ /* This will move to parent if present */
+ xmlTextReaderRead(reader);
}
if (reader->node->next != 0) {