From 5aa56e7397668874f3a0948859c9b5a7602a6472 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Thu, 18 Apr 2024 14:21:19 +0200 Subject: [PATCH 1/7] reader: Add tests for content accessors --- testparser.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/testparser.c b/testparser.c index cabc5f448..923a0f2b6 100644 --- a/testparser.c +++ b/testparser.c @@ -209,6 +209,41 @@ testReaderEncoding(void) { return err; } +static int +testReaderContent(void) { + xmlTextReader *reader; + const xmlChar *xml = BAD_CAST "xyz"; + xmlChar *string; + int err = 0; + + reader = xmlReaderForDoc(xml, NULL, NULL, 0); + xmlTextReaderRead(reader); + + string = xmlTextReaderReadOuterXml(reader); + if (!xmlStrEqual(string, xml)) { + fprintf(stderr, "xmlTextReaderReadOuterXml failed\n"); + err = 1; + } + xmlFree(string); + + string = xmlTextReaderReadInnerXml(reader); + if (!xmlStrEqual(string, BAD_CAST "xyz")) { + fprintf(stderr, "xmlTextReaderReadInnerXml failed\n"); + err = 1; + } + xmlFree(string); + + string = xmlTextReaderReadString(reader); + if (!xmlStrEqual(string, BAD_CAST "xyz")) { + fprintf(stderr, "xmlTextReaderReadString failed\n"); + err = 1; + } + xmlFree(string); + + xmlFreeTextReader(reader); + return err; +} + #ifdef LIBXML_XINCLUDE_ENABLED typedef struct { char *message; @@ -350,6 +385,7 @@ main(void) { #endif #ifdef LIBXML_READER_ENABLED err |= testReaderEncoding(); + err |= testReaderContent(); #ifdef LIBXML_XINCLUDE_ENABLED err |= testReaderXIncludeError(); #endif -- GitLab From d2daf33e33e534db3dbd609c4cee5d2b9becafb9 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 16 Apr 2024 14:53:07 +0200 Subject: [PATCH 2/7] reader: Fix preservation of attributes Don't use 'curnode' which might be an attribute or namespace node which doesn't have an 'extra' member. --- xmlreader.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xmlreader.c b/xmlreader.c index eecca841d..1b3e49ec1 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -3892,10 +3892,7 @@ xmlTextReaderPreserve(xmlTextReaderPtr reader) { if (reader == NULL) return(NULL); - if (reader->curnode != NULL) - cur = reader->curnode; - else - cur = reader->node; + cur = reader->node; if (cur == NULL) return(NULL); -- GitLab From cdb3103ba8283cd9a9f520374881ce9574dd17c1 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Mon, 22 Apr 2024 12:23:06 +0200 Subject: [PATCH 3/7] reader: Report malloc failures --- xmlreader.c | 381 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 227 insertions(+), 154 deletions(-) diff --git a/xmlreader.c b/xmlreader.c index 1b3e49ec1..d12d42344 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -161,22 +161,9 @@ struct _xmlTextReader { #define NODE_IS_PRESERVED 0x2 #define NODE_IS_SPRESERVED 0x4 -/** - * CONSTSTR: - * - * Macro used to return an interned string - */ -#define CONSTSTR(str) xmlDictLookup(reader->dict, (str), -1) -#define CONSTQSTR(p, str) xmlDictQLookup(reader->dict, (p), (str)) - static int xmlTextReaderReadTree(xmlTextReaderPtr reader); static int xmlTextReaderNextTree(xmlTextReaderPtr reader); -/************************************************************************ - * * - * Our own version of the freeing routines as we recycle nodes * - * * - ************************************************************************/ /** * DICT_FREE: * @str: a string @@ -199,6 +186,55 @@ xmlTextReaderErrMemory(xmlTextReaderPtr reader) { reader->state = XML_TEXTREADER_ERROR; } +static xmlChar * +readerStrdup(xmlTextReaderPtr reader, const xmlChar *string) { + xmlChar *copy; + + if (string == NULL) + return(NULL); + + copy = xmlStrdup(string); + if (copy == NULL) + xmlTextReaderErrMemory(reader); + + return(copy); +} + +static const xmlChar * +constString(xmlTextReaderPtr reader, const xmlChar *string) { + const xmlChar *dictString; + + if (string == NULL) + return(NULL); + + dictString = xmlDictLookup(reader->dict, string, -1); + if (dictString == NULL) + xmlTextReaderErrMemory(reader); + + return(dictString); +} + +static const xmlChar * +constQString(xmlTextReaderPtr reader, const xmlChar *prefix, + const xmlChar *name) { + const xmlChar *dictString; + + if (name == NULL) + return(NULL); + + dictString = xmlDictQLookup(reader->dict, prefix, name); + if (dictString == NULL) + xmlTextReaderErrMemory(reader); + + return(dictString); +} + +/************************************************************************ + * * + * Our own version of the freeing routines as we recycle nodes * + * * + ************************************************************************/ + /** * xmlTextReaderFreeProp: * @reader: the xmlTextReaderPtr used @@ -850,27 +886,18 @@ xmlTextReaderValidatePush(xmlTextReaderPtr reader) { reader->ctxt->valid &= xmlValidatePushElement(&reader->ctxt->vctxt, reader->ctxt->myDoc, node, node->name); } else { - /* TODO use the BuildQName interface */ + xmlChar buf[50]; xmlChar *qname; - qname = xmlStrdup(node->ns->prefix); - if (qname == NULL) { - xmlTextReaderErrMemory(reader); - return(-1); - } - qname = xmlStrcat(qname, BAD_CAST ":"); - if (qname == NULL) { - xmlTextReaderErrMemory(reader); - return(-1); - } - qname = xmlStrcat(qname, node->name); + qname = xmlBuildQName(node->name, node->ns->prefix, buf, 50); if (qname == NULL) { xmlTextReaderErrMemory(reader); return(-1); } reader->ctxt->valid &= xmlValidatePushElement(&reader->ctxt->vctxt, reader->ctxt->myDoc, node, qname); - xmlFree(qname); + if (qname != buf) + xmlFree(qname); } /*if (reader->ctxt->errNo == XML_ERR_NO_MEMORY) { reader->mode = XML_TEXTREADER_MODE_ERROR; @@ -958,27 +985,18 @@ xmlTextReaderValidatePop(xmlTextReaderPtr reader) { reader->ctxt->valid &= xmlValidatePopElement(&reader->ctxt->vctxt, reader->ctxt->myDoc, node, node->name); } else { - /* TODO use the BuildQName interface */ + xmlChar buf[50]; xmlChar *qname; - qname = xmlStrdup(node->ns->prefix); - if (qname == NULL) { - xmlTextReaderErrMemory(reader); - return(-1); - } - qname = xmlStrcat(qname, BAD_CAST ":"); - if (qname == NULL) { - xmlTextReaderErrMemory(reader); - return(-1); - } - qname = xmlStrcat(qname, node->name); + qname = xmlBuildQName(node->name, node->ns->prefix, buf, 50); if (qname == NULL) { xmlTextReaderErrMemory(reader); return(-1); } reader->ctxt->valid &= xmlValidatePopElement(&reader->ctxt->vctxt, reader->ctxt->myDoc, node, qname); - xmlFree(qname); + if (qname != buf) + xmlFree(qname); } /*if (reader->ctxt->errNo == XML_ERR_NO_MEMORY) { reader->mode = XML_TEXTREADER_MODE_ERROR; @@ -1757,7 +1775,7 @@ xmlTextReaderReadString(xmlTextReaderPtr reader) switch (node->type) { case XML_TEXT_NODE: if (node->content != NULL) - return(xmlStrdup(node->content)); + return(readerStrdup(reader, node->content)); break; case XML_ELEMENT_NODE: if (xmlTextReaderDoExpand(reader) != -1) { @@ -2214,6 +2232,7 @@ xmlFreeTextReader(xmlTextReaderPtr reader) { * Methods for XmlTextReader * * * ************************************************************************/ + /** * xmlTextReaderClose: * @reader: the xmlTextReaderPtr used @@ -2294,7 +2313,7 @@ xmlTextReaderGetAttributeNo(xmlTextReaderPtr reader, int no) { ns = ns->next; } if (ns != NULL) - return(xmlStrdup(ns->href)); + return(readerStrdup(reader, ns->href)); cur = reader->node->properties; if (cur == NULL) @@ -2307,7 +2326,8 @@ xmlTextReaderGetAttributeNo(xmlTextReaderPtr reader, int no) { /* TODO walk the DTD if present */ ret = xmlNodeListGetString(reader->node->doc, cur->children, 1); - if (ret == NULL) return(xmlStrdup((xmlChar *)"")); + if (ret == NULL) + xmlTextReaderErrMemory(reader); return(ret); } @@ -2324,9 +2344,10 @@ xmlTextReaderGetAttributeNo(xmlTextReaderPtr reader, int no) { xmlChar * xmlTextReaderGetAttribute(xmlTextReaderPtr reader, const xmlChar *name) { xmlChar *prefix = NULL; - xmlChar *localname; + const xmlChar *localname; xmlNsPtr ns; xmlChar *ret = NULL; + int result; if ((reader == NULL) || (name == NULL)) return(NULL); @@ -2339,43 +2360,56 @@ xmlTextReaderGetAttribute(xmlTextReaderPtr reader, const xmlChar *name) { if (reader->node->type != XML_ELEMENT_NODE) return(NULL); - localname = xmlSplitQName2(name, &prefix); + localname = xmlSplitQName4(name, &prefix); if (localname == NULL) { - /* - * Namespace default decl - */ - if (xmlStrEqual(name, BAD_CAST "xmlns")) { - ns = reader->node->nsDef; - while (ns != NULL) { - if (ns->prefix == NULL) { - return(xmlStrdup(ns->href)); - } - ns = ns->next; - } - return NULL; - } - return(xmlGetNoNsProp(reader->node, name)); - } + xmlTextReaderErrMemory(reader); + return(NULL); + } + if (prefix == NULL) { + /* + * Namespace default decl + */ + if (xmlStrEqual(name, BAD_CAST "xmlns")) { + ns = reader->node->nsDef; + while (ns != NULL) { + if (ns->prefix == NULL) { + return(readerStrdup(reader, ns->href)); + } + ns = ns->next; + } + return NULL; + } + + result = xmlNodeGetAttrValue(reader->node, name, NULL, &ret); + if (result < 0) + xmlTextReaderErrMemory(reader); + return(ret); + } /* * Namespace default decl */ if (xmlStrEqual(prefix, BAD_CAST "xmlns")) { - ns = reader->node->nsDef; - while (ns != NULL) { - if ((ns->prefix != NULL) && (xmlStrEqual(ns->prefix, localname))) { - ret = xmlStrdup(ns->href); - break; - } - ns = ns->next; - } + ns = reader->node->nsDef; + while (ns != NULL) { + if ((ns->prefix != NULL) && (xmlStrEqual(ns->prefix, localname))) { + ret = readerStrdup(reader, ns->href); + break; + } + ns = ns->next; + } } else { - ns = xmlSearchNs(reader->node->doc, reader->node, prefix); - if (ns != NULL) - ret = xmlGetNsProp(reader->node, localname, ns->href); - } + result = xmlSearchNsSafe(reader->node, prefix, &ns); + if (result < 0) + xmlTextReaderErrMemory(reader); + if (ns != NULL) { + result = xmlNodeGetAttrValue(reader->node, localname, ns->href, + &ret); + if (result < 0) + xmlTextReaderErrMemory(reader); + } + } - xmlFree(localname); if (prefix != NULL) xmlFree(prefix); return(ret); @@ -2396,8 +2430,10 @@ xmlTextReaderGetAttribute(xmlTextReaderPtr reader, const xmlChar *name) { xmlChar * xmlTextReaderGetAttributeNs(xmlTextReaderPtr reader, const xmlChar *localName, const xmlChar *namespaceURI) { + xmlChar *ret = NULL; xmlChar *prefix = NULL; xmlNsPtr ns; + int result; if ((reader == NULL) || (localName == NULL)) return(NULL); @@ -2411,21 +2447,25 @@ xmlTextReaderGetAttributeNs(xmlTextReaderPtr reader, const xmlChar *localName, return(NULL); if (xmlStrEqual(namespaceURI, BAD_CAST "http://www.w3.org/2000/xmlns/")) { - if (! xmlStrEqual(localName, BAD_CAST "xmlns")) { - prefix = BAD_CAST localName; - } - ns = reader->node->nsDef; - while (ns != NULL) { - if ((prefix == NULL && ns->prefix == NULL) || - ((ns->prefix != NULL) && (xmlStrEqual(ns->prefix, localName)))) { - return xmlStrdup(ns->href); - } - ns = ns->next; - } - return NULL; + if (! xmlStrEqual(localName, BAD_CAST "xmlns")) { + prefix = BAD_CAST localName; + } + ns = reader->node->nsDef; + while (ns != NULL) { + if ((prefix == NULL && ns->prefix == NULL) || + ((ns->prefix != NULL) && (xmlStrEqual(ns->prefix, localName)))) { + return readerStrdup(reader, ns->href); + } + ns = ns->next; + } + return NULL; } - return(xmlGetNsProp(reader->node, localName, namespaceURI)); + result = xmlNodeGetAttrValue(reader->node, localName, namespaceURI, &ret); + if (result < 0) + xmlTextReaderErrMemory(reader); + + return(ret); } /** @@ -2493,16 +2533,21 @@ xmlTextReaderGetRemainder(xmlTextReaderPtr reader) { xmlChar * xmlTextReaderLookupNamespace(xmlTextReaderPtr reader, const xmlChar *prefix) { xmlNsPtr ns; + int result; if (reader == NULL) return(NULL); if (reader->node == NULL) return(NULL); - ns = xmlSearchNs(reader->node->doc, reader->node, prefix); + result = xmlSearchNsSafe(reader->node, prefix, &ns); + if (result < 0) { + xmlTextReaderErrMemory(reader); + return(NULL); + } if (ns == NULL) return(NULL); - return(xmlStrdup(ns->href)); + return(readerStrdup(reader, ns->href)); } /** @@ -2568,7 +2613,7 @@ xmlTextReaderMoveToAttributeNo(xmlTextReaderPtr reader, int no) { int xmlTextReaderMoveToAttribute(xmlTextReaderPtr reader, const xmlChar *name) { xmlChar *prefix = NULL; - xmlChar *localname; + const xmlChar *localname; xmlNsPtr ns; xmlAttrPtr prop; @@ -2581,8 +2626,12 @@ xmlTextReaderMoveToAttribute(xmlTextReaderPtr reader, const xmlChar *name) { if (reader->node->type != XML_ELEMENT_NODE) return(0); - localname = xmlSplitQName2(name, &prefix); + localname = xmlSplitQName4(name, &prefix); if (localname == NULL) { + xmlTextReaderErrMemory(reader); + return(-1); + } + if (prefix == NULL) { /* * Namespace default decl */ @@ -2644,15 +2693,11 @@ xmlTextReaderMoveToAttribute(xmlTextReaderPtr reader, const xmlChar *name) { prop = prop->next; } not_found: - if (localname != NULL) - xmlFree(localname); if (prefix != NULL) xmlFree(prefix); return(0); found: - if (localname != NULL) - xmlFree(localname); if (prefix != NULL) xmlFree(prefix); return(1); @@ -2888,7 +2933,7 @@ xmlTextReaderConstEncoding(xmlTextReaderPtr reader) { else if (reader->doc != NULL) encoding = reader->doc->encoding; - return(CONSTSTR(encoding)); + return(constString(reader, encoding)); } @@ -3062,14 +3107,14 @@ xmlTextReaderLocalName(xmlTextReaderPtr reader) { if (node->type == XML_NAMESPACE_DECL) { xmlNsPtr ns = (xmlNsPtr) node; if (ns->prefix == NULL) - return(xmlStrdup(BAD_CAST "xmlns")); + return(readerStrdup(reader, BAD_CAST "xmlns")); else - return(xmlStrdup(ns->prefix)); + return(readerStrdup(reader, ns->prefix)); } if ((node->type != XML_ELEMENT_NODE) && (node->type != XML_ATTRIBUTE_NODE)) return(xmlTextReaderName(reader)); - return(xmlStrdup(node->name)); + return(readerStrdup(reader, node->name)); } /** @@ -3093,7 +3138,7 @@ xmlTextReaderConstLocalName(xmlTextReaderPtr reader) { if (node->type == XML_NAMESPACE_DECL) { xmlNsPtr ns = (xmlNsPtr) node; if (ns->prefix == NULL) - return(CONSTSTR(BAD_CAST "xmlns")); + return(constString(reader, BAD_CAST "xmlns")); else return(ns->prefix); } @@ -3128,41 +3173,41 @@ xmlTextReaderName(xmlTextReaderPtr reader) { case XML_ATTRIBUTE_NODE: if ((node->ns == NULL) || (node->ns->prefix == NULL)) - return(xmlStrdup(node->name)); + return(readerStrdup(reader, node->name)); - ret = xmlStrdup(node->ns->prefix); - ret = xmlStrcat(ret, BAD_CAST ":"); - ret = xmlStrcat(ret, node->name); + ret = xmlBuildQName(node->name, node->ns->prefix, NULL, 0); + if (ret == NULL) + xmlTextReaderErrMemory(reader); return(ret); case XML_TEXT_NODE: - return(xmlStrdup(BAD_CAST "#text")); + return(readerStrdup(reader, BAD_CAST "#text")); case XML_CDATA_SECTION_NODE: - return(xmlStrdup(BAD_CAST "#cdata-section")); + return(readerStrdup(reader, BAD_CAST "#cdata-section")); case XML_ENTITY_NODE: case XML_ENTITY_REF_NODE: - return(xmlStrdup(node->name)); + return(readerStrdup(reader, node->name)); case XML_PI_NODE: - return(xmlStrdup(node->name)); + return(readerStrdup(reader, node->name)); case XML_COMMENT_NODE: - return(xmlStrdup(BAD_CAST "#comment")); + return(readerStrdup(reader, BAD_CAST "#comment")); case XML_DOCUMENT_NODE: case XML_HTML_DOCUMENT_NODE: - return(xmlStrdup(BAD_CAST "#document")); + return(readerStrdup(reader, BAD_CAST "#document")); case XML_DOCUMENT_FRAG_NODE: - return(xmlStrdup(BAD_CAST "#document-fragment")); + return(readerStrdup(reader, BAD_CAST "#document-fragment")); case XML_NOTATION_NODE: - return(xmlStrdup(node->name)); + return(readerStrdup(reader, node->name)); case XML_DOCUMENT_TYPE_NODE: case XML_DTD_NODE: - return(xmlStrdup(node->name)); + return(readerStrdup(reader, node->name)); case XML_NAMESPACE_DECL: { xmlNsPtr ns = (xmlNsPtr) node; - ret = xmlStrdup(BAD_CAST "xmlns"); if (ns->prefix == NULL) - return(ret); - ret = xmlStrcat(ret, BAD_CAST ":"); - ret = xmlStrcat(ret, ns->prefix); + return(readerStrdup(reader, BAD_CAST "xmlns")); + ret = xmlBuildQName(ns->prefix, BAD_CAST "xmlns", NULL, 0); + if (ret == NULL) + xmlTextReaderErrMemory(reader); return(ret); } @@ -3201,34 +3246,34 @@ xmlTextReaderConstName(xmlTextReaderPtr reader) { if ((node->ns == NULL) || (node->ns->prefix == NULL)) return(node->name); - return(CONSTQSTR(node->ns->prefix, node->name)); + return(constQString(reader, node->ns->prefix, node->name)); case XML_TEXT_NODE: - return(CONSTSTR(BAD_CAST "#text")); + return(constString(reader, BAD_CAST "#text")); case XML_CDATA_SECTION_NODE: - return(CONSTSTR(BAD_CAST "#cdata-section")); + return(constString(reader, BAD_CAST "#cdata-section")); case XML_ENTITY_NODE: case XML_ENTITY_REF_NODE: - return(CONSTSTR(node->name)); + return(constString(reader, node->name)); case XML_PI_NODE: - return(CONSTSTR(node->name)); + return(constString(reader, node->name)); case XML_COMMENT_NODE: - return(CONSTSTR(BAD_CAST "#comment")); + return(constString(reader, BAD_CAST "#comment")); case XML_DOCUMENT_NODE: case XML_HTML_DOCUMENT_NODE: - return(CONSTSTR(BAD_CAST "#document")); + return(constString(reader, BAD_CAST "#document")); case XML_DOCUMENT_FRAG_NODE: - return(CONSTSTR(BAD_CAST "#document-fragment")); + return(constString(reader, BAD_CAST "#document-fragment")); case XML_NOTATION_NODE: - return(CONSTSTR(node->name)); + return(constString(reader, node->name)); case XML_DOCUMENT_TYPE_NODE: case XML_DTD_NODE: - return(CONSTSTR(node->name)); + return(constString(reader, node->name)); case XML_NAMESPACE_DECL: { xmlNsPtr ns = (xmlNsPtr) node; if (ns->prefix == NULL) - return(CONSTSTR(BAD_CAST "xmlns")); - return(CONSTQSTR(BAD_CAST "xmlns", ns->prefix)); + return(constString(reader, BAD_CAST "xmlns")); + return(constQString(reader, BAD_CAST "xmlns", ns->prefix)); } case XML_ELEMENT_DECL: @@ -3263,13 +3308,13 @@ xmlTextReaderPrefix(xmlTextReaderPtr reader) { xmlNsPtr ns = (xmlNsPtr) node; if (ns->prefix == NULL) return(NULL); - return(xmlStrdup(BAD_CAST "xmlns")); + return(readerStrdup(reader, BAD_CAST "xmlns")); } if ((node->type != XML_ELEMENT_NODE) && (node->type != XML_ATTRIBUTE_NODE)) return(NULL); if ((node->ns != NULL) && (node->ns->prefix != NULL)) - return(xmlStrdup(node->ns->prefix)); + return(readerStrdup(reader, node->ns->prefix)); return(NULL); } @@ -3295,13 +3340,13 @@ xmlTextReaderConstPrefix(xmlTextReaderPtr reader) { xmlNsPtr ns = (xmlNsPtr) node; if (ns->prefix == NULL) return(NULL); - return(CONSTSTR(BAD_CAST "xmlns")); + return(constString(reader, BAD_CAST "xmlns")); } if ((node->type != XML_ELEMENT_NODE) && (node->type != XML_ATTRIBUTE_NODE)) return(NULL); if ((node->ns != NULL) && (node->ns->prefix != NULL)) - return(CONSTSTR(node->ns->prefix)); + return(constString(reader, node->ns->prefix)); return(NULL); } @@ -3324,12 +3369,12 @@ xmlTextReaderNamespaceUri(xmlTextReaderPtr reader) { else node = reader->node; if (node->type == XML_NAMESPACE_DECL) - return(xmlStrdup(BAD_CAST "http://www.w3.org/2000/xmlns/")); + return(readerStrdup(reader, BAD_CAST "http://www.w3.org/2000/xmlns/")); if ((node->type != XML_ELEMENT_NODE) && (node->type != XML_ATTRIBUTE_NODE)) return(NULL); if (node->ns != NULL) - return(xmlStrdup(node->ns->href)); + return(readerStrdup(reader, node->ns->href)); return(NULL); } @@ -3352,12 +3397,12 @@ xmlTextReaderConstNamespaceUri(xmlTextReaderPtr reader) { else node = reader->node; if (node->type == XML_NAMESPACE_DECL) - return(CONSTSTR(BAD_CAST "http://www.w3.org/2000/xmlns/")); + return(constString(reader, BAD_CAST "http://www.w3.org/2000/xmlns/")); if ((node->type != XML_ELEMENT_NODE) && (node->type != XML_ATTRIBUTE_NODE)) return(NULL); if (node->ns != NULL) - return(CONSTSTR(node->ns->href)); + return(constString(reader, node->ns->href)); return(NULL); } @@ -3372,9 +3417,16 @@ xmlTextReaderConstNamespaceUri(xmlTextReaderPtr reader) { */ xmlChar * xmlTextReaderBaseUri(xmlTextReaderPtr reader) { + xmlChar *ret = NULL; + int result; + if ((reader == NULL) || (reader->node == NULL)) return(NULL); - return(xmlNodeGetBase(NULL, reader->node)); + result = xmlNodeGetBaseSafe(NULL, reader->node, &ret); + if (result < 0) + xmlTextReaderErrMemory(reader); + + return(ret); } /** @@ -3390,13 +3442,16 @@ const xmlChar * xmlTextReaderConstBaseUri(xmlTextReaderPtr reader) { xmlChar *tmp; const xmlChar *ret; + int result; if ((reader == NULL) || (reader->node == NULL)) return(NULL); - tmp = xmlNodeGetBase(NULL, reader->node); + result = xmlNodeGetBaseSafe(NULL, reader->node, &tmp); + if (result < 0) + xmlTextReaderErrMemory(reader); if (tmp == NULL) return(NULL); - ret = CONSTSTR(tmp); + ret = constString(reader, tmp); xmlFree(tmp); return(ret); } @@ -3509,23 +3564,24 @@ xmlTextReaderValue(xmlTextReaderPtr reader) { switch (node->type) { case XML_NAMESPACE_DECL: - return(xmlStrdup(((xmlNsPtr) node)->href)); + return(readerStrdup(reader, ((xmlNsPtr) node)->href)); case XML_ATTRIBUTE_NODE:{ xmlAttrPtr attr = (xmlAttrPtr) node; + xmlDocPtr doc = NULL; + xmlChar *ret; if (attr->parent != NULL) - return (xmlNodeListGetString - (attr->parent->doc, attr->children, 1)); - else - return (xmlNodeListGetString(NULL, attr->children, 1)); - break; + doc = attr->parent->doc; + ret = xmlNodeListGetString(doc, attr->children, 1); + if (ret == NULL) + xmlTextReaderErrMemory(reader); + return(ret); } case XML_TEXT_NODE: case XML_CDATA_SECTION_NODE: case XML_PI_NODE: case XML_COMMENT_NODE: - if (node->content != NULL) - return (xmlStrdup(node->content)); + return(readerStrdup(reader, node->content)); default: break; } @@ -3668,7 +3724,7 @@ xmlTextReaderConstXmlLang(xmlTextReaderPtr reader) { tmp = xmlNodeGetLang(reader->node); if (tmp == NULL) return(NULL); - ret = CONSTSTR(tmp); + ret = constString(reader, tmp); xmlFree(tmp); return(ret); } @@ -3688,7 +3744,7 @@ const xmlChar * xmlTextReaderConstString(xmlTextReaderPtr reader, const xmlChar *str) { if (reader == NULL) return(NULL); - return(CONSTSTR(str)); + return(constString(reader, str)); } /** @@ -4508,7 +4564,7 @@ xmlTextReaderConstXmlVersion(xmlTextReaderPtr reader) { if (doc->version == NULL) return(NULL); else - return(CONSTSTR(doc->version)); + return(constString(reader, doc->version)); } /** @@ -4921,6 +4977,8 @@ xmlTextReaderSetup(xmlTextReaderPtr reader, if (options & XML_PARSE_XINCLUDE) { reader->xinclude = 1; reader->xinclude_name = xmlDictLookup(reader->dict, XINCLUDE_NODE, -1); + if (reader->xinclude_name == NULL) + return(-1); options -= XML_PARSE_XINCLUDE; } else reader->xinclude = 0; @@ -4947,9 +5005,12 @@ xmlTextReaderSetup(xmlTextReaderPtr reader, if (encoding != NULL) xmlSwitchEncodingName(reader->ctxt, encoding); if ((URL != NULL) && (reader->ctxt->input != NULL) && - (reader->ctxt->input->filename == NULL)) + (reader->ctxt->input->filename == NULL)) { reader->ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); + if (reader->ctxt->input->filename == NULL) + return(-1); + } reader->doc = NULL; @@ -5078,7 +5139,10 @@ xmlReaderForFile(const char *filename, const char *encoding, int options) reader = xmlNewTextReaderFilename(filename); if (reader == NULL) return (NULL); - xmlTextReaderSetup(reader, NULL, NULL, encoding, options); + if (xmlTextReaderSetup(reader, NULL, NULL, encoding, options) < 0) { + xmlFreeTextReader(reader); + return (NULL); + } return (reader); } @@ -5112,7 +5176,10 @@ xmlReaderForMemory(const char *buffer, int size, const char *URL, return (NULL); } reader->allocs |= XML_TEXTREADER_INPUT; - xmlTextReaderSetup(reader, NULL, URL, encoding, options); + if (xmlTextReaderSetup(reader, NULL, URL, encoding, options) < 0) { + xmlFreeTextReader(reader); + return (NULL); + } return (reader); } @@ -5149,7 +5216,10 @@ xmlReaderForFd(int fd, const char *URL, const char *encoding, int options) return (NULL); } reader->allocs |= XML_TEXTREADER_INPUT; - xmlTextReaderSetup(reader, NULL, URL, encoding, options); + if (xmlTextReaderSetup(reader, NULL, URL, encoding, options) < 0) { + xmlFreeTextReader(reader); + return (NULL); + } return (reader); } @@ -5191,7 +5261,10 @@ xmlReaderForIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, return (NULL); } reader->allocs |= XML_TEXTREADER_INPUT; - xmlTextReaderSetup(reader, NULL, URL, encoding, options); + if (xmlTextReaderSetup(reader, NULL, URL, encoding, options) < 0) { + xmlFreeTextReader(reader); + return (NULL); + } return (reader); } -- GitLab From f69647811c6c86c42bd9cb036fc76db5ce5fc6b0 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Mon, 22 Apr 2024 12:23:39 +0200 Subject: [PATCH 4/7] reader: Rework xmlTextReaderRead{Inner,Outer}Xml Use an xmlOutputBuffer. Report malloc failures. --- xmlreader.c | 135 +++++++++++++++++++++++++++++----------------------- 1 file changed, 75 insertions(+), 60 deletions(-) diff --git a/xmlreader.c b/xmlreader.c index d12d42344..c0d3c3f86 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -1655,6 +1655,37 @@ xmlTextReaderNext(xmlTextReaderPtr reader) { } #ifdef LIBXML_WRITER_ENABLED +static void +xmlTextReaderDumpCopy(xmlTextReaderPtr reader, xmlOutputBufferPtr output, + xmlNodePtr node) { + if ((node->type == XML_DTD_NODE) || + (node->type == XML_ELEMENT_DECL) || + (node->type == XML_ATTRIBUTE_DECL) || + (node->type == XML_ENTITY_DECL)) + return; + + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + xmlNodeDumpOutput(output, node->doc, node, 0, 0, NULL); + } else { + xmlNodePtr copy; + + /* + * Create a copy to make sure that namespace declarations from + * ancestors are added. + */ + copy = xmlDocCopyNode(node, node->doc, 1); + if (copy == NULL) { + xmlTextReaderErrMemory(reader); + return; + } + + xmlNodeDumpOutput(output, copy->doc, copy, 0, 0, NULL); + + xmlFreeNode(copy); + } +} + /** * xmlTextReaderReadInnerXml: * @reader: the xmlTextReaderPtr used @@ -1666,47 +1697,36 @@ xmlTextReaderNext(xmlTextReaderPtr reader) { * string must be deallocated by the caller. */ xmlChar * -xmlTextReaderReadInnerXml(xmlTextReaderPtr reader ATTRIBUTE_UNUSED) +xmlTextReaderReadInnerXml(xmlTextReaderPtr reader) { - xmlChar *resbuf; - xmlNodePtr node, cur_node; - xmlBufferPtr buff, buff2; - xmlDocPtr doc; + xmlOutputBufferPtr output; + xmlNodePtr cur; + xmlChar *ret; - if (xmlTextReaderExpand(reader) == NULL) { - return NULL; - } - doc = reader->node->doc; - buff = xmlBufferCreate(); - if (buff == NULL) - return NULL; - xmlBufferSetAllocationScheme(buff, XML_BUFFER_ALLOC_DOUBLEIT); - for (cur_node = reader->node->children; cur_node != NULL; - cur_node = cur_node->next) { - /* XXX: Why is the node copied? */ - node = xmlDocCopyNode(cur_node, doc, 1); - /* XXX: Why do we need a second buffer? */ - buff2 = xmlBufferCreate(); - xmlBufferSetAllocationScheme(buff2, XML_BUFFER_ALLOC_DOUBLEIT); - if (xmlNodeDump(buff2, doc, node, 0, 0) == -1) { - xmlFreeNode(node); - xmlBufferFree(buff2); - xmlBufferFree(buff); - return NULL; - } - xmlBufferCat(buff, buff2->content); - xmlFreeNode(node); - xmlBufferFree(buff2); + if (xmlTextReaderExpand(reader) == NULL) + return(NULL); + + if (reader->node == NULL) + return(NULL); + + output = xmlAllocOutputBuffer(NULL); + if (output == NULL) { + xmlTextReaderErrMemory(reader); + return(NULL); } - resbuf = buff->content; - buff->content = NULL; - xmlBufferFree(buff); - return resbuf; + for (cur = reader->node->children; cur != NULL; cur = cur->next) + xmlTextReaderDumpCopy(reader, output, cur); + + if (output->error) + xmlCtxtErrIO(reader->ctxt, output->error, NULL); + + ret = xmlBufDetach(output->buffer); + xmlOutputBufferClose(output); + + return(ret); } -#endif -#ifdef LIBXML_WRITER_ENABLED /** * xmlTextReaderReadOuterXml: * @reader: the xmlTextReaderPtr used @@ -1718,38 +1738,33 @@ xmlTextReaderReadInnerXml(xmlTextReaderPtr reader ATTRIBUTE_UNUSED) * by the caller. */ xmlChar * -xmlTextReaderReadOuterXml(xmlTextReaderPtr reader ATTRIBUTE_UNUSED) +xmlTextReaderReadOuterXml(xmlTextReaderPtr reader) { - xmlChar *resbuf; + xmlOutputBufferPtr output; xmlNodePtr node; - xmlBufferPtr buff; - xmlDocPtr doc; + xmlChar *ret; + + if (xmlTextReaderExpand(reader) == NULL) + return(NULL); - if (xmlTextReaderExpand(reader) == NULL) { - return NULL; - } node = reader->node; - doc = node->doc; - /* XXX: Why is the node copied? */ - if (node->type == XML_DTD_NODE) { - node = (xmlNodePtr) xmlCopyDtd((xmlDtdPtr) node); - } else { - node = xmlDocCopyNode(node, doc, 1); - } - buff = xmlBufferCreate(); - xmlBufferSetAllocationScheme(buff, XML_BUFFER_ALLOC_DOUBLEIT); - if (xmlNodeDump(buff, doc, node, 0, 0) == -1) { - xmlFreeNode(node); - xmlBufferFree(buff); - return NULL; + if (node == NULL) + return(NULL); + + output = xmlAllocOutputBuffer(NULL); + if (output == NULL) { + xmlTextReaderErrMemory(reader); + return(NULL); } - resbuf = buff->content; - buff->content = NULL; + xmlTextReaderDumpCopy(reader, output, node); + if (output->error) + xmlCtxtErrIO(reader->ctxt, output->error, NULL); + + ret = xmlBufDetach(output->buffer); + xmlOutputBufferClose(output); - xmlFreeNode(node); - xmlBufferFree(buff); - return resbuf; + return(ret); } #endif -- GitLab From 7cbf609ae842f58c9d58ae69ab066b557c434435 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Thu, 18 Apr 2024 12:57:15 +0200 Subject: [PATCH 5/7] reader: Make xmlTextReaderReadString non-recursive Also report malloc failures. Fixes #607. --- xmlreader.c | 121 +++++++++++++++++++++++++--------------------------- 1 file changed, 57 insertions(+), 64 deletions(-) diff --git a/xmlreader.c b/xmlreader.c index c0d3c3f86..31ff0caa9 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -1179,54 +1179,6 @@ xmlTextReaderDoExpand(xmlTextReaderPtr reader) { return(1); } -/** - * xmlTextReaderCollectSiblings: - * @node: the first child - * - * Traverse depth-first through all sibling nodes and their children - * nodes and concatenate their content. This is an auxiliary function - * to xmlTextReaderReadString. - * - * Returns a string containing the content, or NULL in case of error. - */ -static xmlChar * -xmlTextReaderCollectSiblings(xmlNodePtr node) -{ - xmlBufferPtr buffer; - xmlChar *ret; - - if ((node == NULL) || (node->type == XML_NAMESPACE_DECL)) - return(NULL); - - buffer = xmlBufferCreate(); - if (buffer == NULL) - return NULL; - xmlBufferSetAllocationScheme(buffer, XML_BUFFER_ALLOC_DOUBLEIT); - - for ( ; node != NULL; node = node->next) { - switch (node->type) { - case XML_TEXT_NODE: - case XML_CDATA_SECTION_NODE: - xmlBufferCat(buffer, node->content); - break; - case XML_ELEMENT_NODE: { - xmlChar *tmp; - - tmp = xmlTextReaderCollectSiblings(node->children); - xmlBufferCat(buffer, tmp); - xmlFree(tmp); - break; - } - default: - break; - } - } - ret = buffer->content; - buffer->content = NULL; - xmlBufferFree(buffer); - return(ret); -} - /** * xmlTextReaderRead: * @reader: the xmlTextReaderPtr used @@ -1781,29 +1733,70 @@ xmlTextReaderReadOuterXml(xmlTextReaderPtr reader) xmlChar * xmlTextReaderReadString(xmlTextReaderPtr reader) { - xmlNodePtr node; + xmlNodePtr node, cur; + xmlBufPtr buf; + xmlChar *ret; if ((reader == NULL) || (reader->node == NULL)) return(NULL); node = (reader->curnode != NULL) ? reader->curnode : reader->node; switch (node->type) { - case XML_TEXT_NODE: - if (node->content != NULL) - return(readerStrdup(reader, node->content)); - break; - case XML_ELEMENT_NODE: - if (xmlTextReaderDoExpand(reader) != -1) { - return xmlTextReaderCollectSiblings(node->children); - } - break; - case XML_ATTRIBUTE_NODE: - /* TODO */ - break; - default: - break; + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ELEMENT_NODE: + break; + case XML_ATTRIBUTE_NODE: + /* TODO */ + break; + default: + break; } - return(NULL); + + buf = xmlBufCreateSize(30); + if (buf == NULL) { + xmlTextReaderErrMemory(reader); + return(NULL); + } + xmlBufSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); + + cur = node; + while (cur != NULL) { + switch (cur->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + xmlBufCat(buf, cur->content); + break; + + case XML_ELEMENT_NODE: + if (cur->children != NULL) { + cur = cur->children; + continue; + } + break; + + default: + break; + } + + if (cur == node) + goto done; + + while (cur->next == NULL) { + cur = cur->parent; + if (cur == node) + goto done; + } + cur = cur->next; + } + +done: + ret = xmlBufDetach(buf); + if (ret == NULL) + xmlTextReaderErrMemory(reader); + + xmlBufFree(buf); + return(ret); } #if 0 -- GitLab From 087a346448a313eb7b6fbb688e51ee6ffc73513f Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Thu, 18 Apr 2024 11:57:46 +0200 Subject: [PATCH 6/7] xinclude: Increase error count in xmlXIncludeErrMemory --- xinclude.c | 1 + 1 file changed, 1 insertion(+) diff --git a/xinclude.c b/xinclude.c index 1bce7494a..1c7c8438a 100644 --- a/xinclude.c +++ b/xinclude.c @@ -134,6 +134,7 @@ xmlXIncludeErrMemory(xmlXIncludeCtxtPtr ctxt) { ctxt->errNo = XML_ERR_NO_MEMORY; ctxt->fatalErr = 1; + ctxt->nbErrors++; xmlRaiseMemoryError(ctxt->errorHandler, NULL, ctxt->errorCtxt, XML_FROM_XINCLUDE, NULL); -- GitLab From b62ccf7f10db52d5341fe1e01d9bc3fc5e309e99 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 16 Apr 2024 13:24:12 +0200 Subject: [PATCH 7/7] fuzz: Add fuzzer for XML reader API --- fuzz/.gitignore | 1 + fuzz/Makefile.am | 23 +- fuzz/fuzz.h | 3 + fuzz/genSeed.c | 14 ++ fuzz/reader.c | 545 ++++++++++++++++++++++++++++++++++++++++++++++ fuzz/testFuzzer.c | 14 ++ 6 files changed, 599 insertions(+), 1 deletion(-) create mode 100644 fuzz/reader.c diff --git a/fuzz/.gitignore b/fuzz/.gitignore index 537d26044..40d11bf9f 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -2,6 +2,7 @@ api corpus/ genSeed html +reader regexp schema seed/ diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am index 155a0293e..7fc216a13 100644 --- a/fuzz/Makefile.am +++ b/fuzz/Makefile.am @@ -1,5 +1,6 @@ AUTOMAKE_OPTIONS = -Wno-syntax -EXTRA_PROGRAMS = api genSeed html regexp schema uri valid xinclude xml xpath +EXTRA_PROGRAMS = genSeed \ + api html reader regexp schema uri valid xinclude xml xpath check_PROGRAMS = testFuzzer EXTRA_DIST = html.dict regexp.dict schema.dict xml.dict xpath.dict \ static_seed/uri static_seed/regexp fuzz.h @@ -115,6 +116,26 @@ fuzz-html: html$(EXEEXT) seed/html.stamp $$XML_FUZZ_OPTIONS \ corpus/html seed/html +# Reader fuzzer + +seed/reader.stamp: genSeed$(EXEEXT) + @mkdir -p seed/reader + ./genSeed$(EXEEXT) reader \ + $(XML_SEED_CORPUS_SRC) \ + '$(top_srcdir)/test/XInclude/docs/*' + @touch seed/reader.stamp + +reader_SOURCES = reader.c fuzz.c +reader_LDFLAGS = $(AM_LDFLAGS) -fsanitize=fuzzer + +fuzz-reader: reader$(EXEEXT) seed/reader.stamp + @mkdir -p corpus/reader + ./reader$(EXEEXT) \ + -dict=xml.dict \ + -max_len=$(XML_MAX_LEN) \ + $$XML_FUZZ_OPTIONS \ + corpus/reader seed/reader + # API fuzzer api_SOURCES = api.c fuzz.c diff --git a/fuzz/fuzz.h b/fuzz/fuzz.h index c187d8b0f..c92366887 100644 --- a/fuzz/fuzz.h +++ b/fuzz/fuzz.h @@ -18,6 +18,9 @@ extern "C" { #if defined(LIBXML_HTML_ENABLED) #define HAVE_HTML_FUZZER #endif +#if defined(LIBXML_READER_ENABLED) + #define HAVE_READER_FUZZER +#endif #if defined(LIBXML_REGEXP_ENABLED) #define HAVE_REGEXP_FUZZER #endif diff --git a/fuzz/genSeed.c b/fuzz/genSeed.c index a31547245..38e2f7500 100644 --- a/fuzz/genSeed.c +++ b/fuzz/genSeed.c @@ -27,6 +27,8 @@ #define SEED_BUF_SIZE 16384 #define EXPR_SIZE 4500 +#define FLAG_READER (1 << 0) + typedef int (*fileFunc)(const char *base, FILE *out); @@ -41,6 +43,7 @@ static struct { const char *fuzzer; int counter; char cwd[PATH_SIZE]; + int flags; } globalData; #if defined(HAVE_SCHEMA_FUZZER) || \ @@ -117,6 +120,11 @@ processXml(const char *docFile, FILE *out) { /* Max allocations. */ xmlFuzzWriteInt(out, 0, 4); + if (globalData.flags & FLAG_READER) { + /* Initial reader program with a couple of OP_READs */ + xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01"); + } + fuzzRecorderInit(out); doc = xmlReadFile(docFile, NULL, opts); @@ -415,6 +423,12 @@ main(int argc, const char **argv) { #ifdef HAVE_HTML_FUZZER processArg = processPattern; globalData.processFile = processHtml; +#endif + } else if (strcmp(fuzzer, "reader") == 0) { +#ifdef HAVE_READER_FUZZER + processArg = processPattern; + globalData.flags |= FLAG_READER; + globalData.processFile = processXml; #endif } else if (strcmp(fuzzer, "schema") == 0) { #ifdef HAVE_SCHEMA_FUZZER diff --git a/fuzz/reader.c b/fuzz/reader.c new file mode 100644 index 000000000..5721d356a --- /dev/null +++ b/fuzz/reader.c @@ -0,0 +1,545 @@ +/* + * xml.c: a libFuzzer target to test several XML parser interfaces. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include +#include +#include +#include +#include "fuzz.h" + +#include + +#if 0 + #define DEBUG printf +#else + #define DEBUG noop +#endif + +typedef enum { + OP_READ = 1, + OP_READ_INNER_XML, + OP_READ_OUTER_XML, + OP_READ_STRING, + OP_READ_ATTRIBUTE_VALUE, + OP_ATTRIBUTE_COUNT, + OP_DEPTH, + OP_HAS_ATTRIBUTES, + OP_HAS_VALUE, + OP_IS_DEFAULT, + OP_IS_EMPTY_ELEMENT, + OP_NODE_TYPE, + OP_QUOTE_CHAR, + OP_READ_STATE, + OP_IS_NAMESPACE_DECL, + OP_CONST_BASE_URI, + OP_CONST_LOCAL_NAME, + OP_CONST_NAME, + OP_CONST_NAMESPACE_URI, + OP_CONST_PREFIX, + OP_CONST_XML_LANG, + OP_CONST_VALUE, + OP_BASE_URI, + OP_LOCAL_NAME, + OP_NAME, + OP_NAMESPACE_URI, + OP_PREFIX, + OP_XML_LANG, + OP_VALUE, + OP_CLOSE, + OP_GET_ATTRIBUTE_NO, + OP_GET_ATTRIBUTE, + OP_GET_ATTRIBUTE_NS, + OP_GET_REMAINDER, + OP_LOOKUP_NAMESPACE, + OP_MOVE_TO_ATTRIBUTE_NO, + OP_MOVE_TO_ATTRIBUTE, + OP_MOVE_TO_ATTRIBUTE_NS, + OP_MOVE_TO_FIRST_ATTRIBUTE, + OP_MOVE_TO_NEXT_ATTRIBUTE, + OP_MOVE_TO_ELEMENT, + OP_NORMALIZATION, + OP_CONST_ENCODING, + OP_GET_PARSER_PROP, + OP_CURRENT_NODE, + OP_GET_PARSER_LINE_NUMBER, + OP_GET_PARSER_COLUMN_NUMBER, + OP_PRESERVE, + OP_CURRENT_DOC, + OP_EXPAND, + OP_NEXT, + OP_NEXT_SIBLING, + OP_IS_VALID, + OP_CONST_XML_VERSION, + OP_STANDALONE, + OP_BYTE_CONSUMED, + + OP_MAX +} opType; + +static void +noop(const char *fmt, ...) { + (void) fmt; +} + +static void +startOp(const char *name) { + (void) name; + DEBUG("%s\n", name); +} + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + xmlFuzzMemSetup(); + xmlInitParser(); +#ifdef LIBXML_CATALOG_ENABLED + xmlInitializeCatalog(); + xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE); +#endif + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + xmlTextReaderPtr reader; + xmlDocPtr doc = NULL; + const xmlError *error; + const char *docBuffer; + const unsigned char *program; + size_t maxAlloc, docSize, programSize, i; + size_t totalStringSize = 0; + int opts; + int oomReport = 0; + + xmlFuzzDataInit(data, size); + opts = (int) xmlFuzzReadInt(4); + maxAlloc = xmlFuzzReadInt(4) % (size + 100); + + program = (const unsigned char *) xmlFuzzReadString(&programSize); + if (programSize > 1000) + programSize = 1000; + + xmlFuzzReadEntities(); + docBuffer = xmlFuzzMainEntity(&docSize); + if (docBuffer == NULL) + goto exit; + + xmlFuzzMemSetLimit(maxAlloc); + reader = xmlReaderForMemory(docBuffer, docSize, NULL, NULL, opts); + if (reader == NULL) + goto exit; + + i = 0; + while (i < programSize) { + int op = program[i++]; + +#define READ_BYTE() (i < programSize ? program[i++] : 0) +#define FREE_STRING(str) \ + do { \ + if (str != NULL) { \ + totalStringSize += strlen((char *) str); \ + xmlFree(str); \ + } \ + } while (0) + + switch (op & 0x3F) { + case OP_READ: + default: + startOp("Read"); + xmlTextReaderRead(reader); + break; + + case OP_READ_INNER_XML: { + xmlChar *result; + + startOp("ReadInnerXml"); + result = xmlTextReaderReadInnerXml(reader); + FREE_STRING(result); + break; + } + + case OP_READ_OUTER_XML: { + xmlChar *result; + + startOp("ReadOuterXml"); + result = xmlTextReaderReadOuterXml(reader); + FREE_STRING(result); + break; + } + + case OP_READ_STRING: { + xmlChar *result; + + startOp("ReadString"); + result = xmlTextReaderReadString(reader); + FREE_STRING(result); + break; + } + + case OP_READ_ATTRIBUTE_VALUE: + startOp("ReadAttributeValue"); + xmlTextReaderReadAttributeValue(reader); + break; + + case OP_ATTRIBUTE_COUNT: + startOp("AttributeCount"); + xmlTextReaderAttributeCount(reader); + break; + + case OP_DEPTH: + startOp("Depth"); + xmlTextReaderDepth(reader); + break; + + case OP_HAS_ATTRIBUTES: + startOp("HasAttributes"); + xmlTextReaderHasAttributes(reader); + break; + + case OP_HAS_VALUE: + startOp("HasValue"); + xmlTextReaderHasValue(reader); + break; + + case OP_IS_DEFAULT: + startOp("IsDefault"); + xmlTextReaderIsDefault(reader); + break; + + case OP_IS_EMPTY_ELEMENT: + startOp("IsEmptyElement"); + xmlTextReaderIsEmptyElement(reader); + break; + + case OP_NODE_TYPE: + startOp("NodeType"); + xmlTextReaderNodeType(reader); + break; + + case OP_QUOTE_CHAR: + startOp("QuoteChar"); + xmlTextReaderQuoteChar(reader); + break; + + case OP_READ_STATE: + startOp("ReadState"); + xmlTextReaderReadState(reader); + break; + + case OP_IS_NAMESPACE_DECL: + startOp("IsNamespaceDecl"); + xmlTextReaderIsNamespaceDecl(reader); + break; + + case OP_CONST_BASE_URI: + startOp("ConstBaseUri"); + xmlTextReaderConstBaseUri(reader); + break; + + case OP_CONST_LOCAL_NAME: + startOp("ConstLocalName"); + xmlTextReaderConstLocalName(reader); + break; + + case OP_CONST_NAME: + startOp("ConstName"); + xmlTextReaderConstName(reader); + break; + + case OP_CONST_NAMESPACE_URI: + startOp("ConstNamespaceUri"); + xmlTextReaderConstNamespaceUri(reader); + break; + + case OP_CONST_PREFIX: + startOp("ConstPrefix"); + xmlTextReaderConstPrefix(reader); + break; + + case OP_CONST_XML_LANG: + startOp("ConstXmlLang"); + xmlTextReaderConstXmlLang(reader); + oomReport = -1; + break; + + case OP_CONST_VALUE: + startOp("ConstValue"); + xmlTextReaderConstValue(reader); + break; + + case OP_BASE_URI: { + xmlChar *result; + + startOp("BaseUri"); + result = xmlTextReaderBaseUri(reader); + FREE_STRING(result); + break; + } + + case OP_LOCAL_NAME: { + xmlChar *result; + + startOp("LocalName"); + result = xmlTextReaderLocalName(reader); + FREE_STRING(result); + break; + } + + case OP_NAME: { + xmlChar *result; + + startOp("Name"); + result = xmlTextReaderName(reader); + FREE_STRING(result); + break; + } + + case OP_NAMESPACE_URI: { + xmlChar *result; + + startOp("NamespaceUri"); + result = xmlTextReaderNamespaceUri(reader); + FREE_STRING(result); + break; + } + + case OP_PREFIX: { + xmlChar *result; + + startOp("Prefix"); + result = xmlTextReaderPrefix(reader); + FREE_STRING(result); + break; + } + + case OP_XML_LANG: { + xmlChar *result; + + startOp("XmlLang"); + result = xmlTextReaderXmlLang(reader); + oomReport = -1; + FREE_STRING(result); + break; + } + + case OP_VALUE: { + xmlChar *result; + + startOp("Value"); + result = xmlTextReaderValue(reader); + FREE_STRING(result); + break; + } + + case OP_CLOSE: + startOp("Close"); + if (doc == NULL) + doc = xmlTextReaderCurrentDoc(reader); + xmlTextReaderClose(reader); + break; + + case OP_GET_ATTRIBUTE_NO: { + xmlChar *result; + int no = READ_BYTE(); + + startOp("GetAttributeNo"); + result = xmlTextReaderGetAttributeNo(reader, no); + FREE_STRING(result); + break; + } + + case OP_GET_ATTRIBUTE: { + const xmlChar *name = xmlTextReaderConstName(reader); + xmlChar *result; + + startOp("GetAttribute"); + result = xmlTextReaderGetAttribute(reader, name); + FREE_STRING(result); + break; + } + + case OP_GET_ATTRIBUTE_NS: { + const xmlChar *localName, *namespaceUri; + xmlChar *result; + + startOp("GetAttributeNs"); + localName = xmlTextReaderConstLocalName(reader); + namespaceUri = xmlTextReaderConstNamespaceUri(reader); + result = xmlTextReaderGetAttributeNs(reader, localName, + namespaceUri); + FREE_STRING(result); + break; + } + + case OP_GET_REMAINDER: + startOp("GetRemainder"); + if (doc == NULL) + doc = xmlTextReaderCurrentDoc(reader); + xmlFreeParserInputBuffer(xmlTextReaderGetRemainder(reader)); + break; + + case OP_LOOKUP_NAMESPACE: { + const xmlChar *prefix = xmlTextReaderConstPrefix(reader); + xmlChar *result; + + startOp("LookupNamespace"); + result = xmlTextReaderLookupNamespace(reader, prefix); + FREE_STRING(result); + break; + } + + case OP_MOVE_TO_ATTRIBUTE_NO: { + int no = READ_BYTE(); + + startOp("MoveToAttributeNo"); + xmlTextReaderMoveToAttributeNo(reader, no); + break; + } + + case OP_MOVE_TO_ATTRIBUTE: { + const xmlChar *name = xmlTextReaderConstName(reader); + + startOp("MoveToAttribute"); + xmlTextReaderMoveToAttribute(reader, name); + break; + } + + case OP_MOVE_TO_ATTRIBUTE_NS: { + const xmlChar *localName, *namespaceUri; + + startOp("MoveToAttributeNs"); + localName = xmlTextReaderConstLocalName(reader); + namespaceUri = xmlTextReaderConstNamespaceUri(reader); + xmlTextReaderMoveToAttributeNs(reader, localName, + namespaceUri); + break; + } + + case OP_MOVE_TO_FIRST_ATTRIBUTE: + startOp("MoveToFirstAttribute"); + xmlTextReaderMoveToFirstAttribute(reader); + break; + + case OP_MOVE_TO_NEXT_ATTRIBUTE: + startOp("MoveToNextAttribute"); + xmlTextReaderMoveToNextAttribute(reader); + break; + + case OP_MOVE_TO_ELEMENT: + startOp("MoveToElement"); + xmlTextReaderMoveToElement(reader); + break; + + case OP_NORMALIZATION: + startOp("Normalization"); + xmlTextReaderNormalization(reader); + break; + + case OP_CONST_ENCODING: + startOp("ConstEncoding"); + xmlTextReaderConstEncoding(reader); + break; + + case OP_GET_PARSER_PROP: { + int prop = READ_BYTE(); + + startOp("GetParserProp"); + xmlTextReaderGetParserProp(reader, prop); + break; + } + + case OP_CURRENT_NODE: + startOp("CurrentNode"); + xmlTextReaderCurrentNode(reader); + break; + + case OP_GET_PARSER_LINE_NUMBER: + startOp("GetParserLineNumber"); + xmlTextReaderGetParserLineNumber(reader); + break; + + case OP_GET_PARSER_COLUMN_NUMBER: + startOp("GetParserColumnNumber"); + xmlTextReaderGetParserColumnNumber(reader); + break; + + case OP_PRESERVE: + startOp("Preserve"); + xmlTextReaderPreserve(reader); + break; + + case OP_CURRENT_DOC: { + xmlDocPtr result; + + startOp("CurrentDoc"); + result = xmlTextReaderCurrentDoc(reader); + if (doc == NULL) + doc = result; + break; + } + + case OP_EXPAND: + startOp("Expand"); + xmlTextReaderExpand(reader); + break; + + case OP_NEXT: + startOp("Next"); + xmlTextReaderNext(reader); + break; + + case OP_NEXT_SIBLING: + startOp("NextSibling"); + xmlTextReaderNextSibling(reader); + break; + + case OP_IS_VALID: + startOp("IsValid"); + xmlTextReaderIsValid(reader); + break; + + case OP_CONST_XML_VERSION: + startOp("ConstXmlVersion"); + xmlTextReaderConstXmlVersion(reader); + break; + + case OP_STANDALONE: + startOp("Standalone"); + xmlTextReaderStandalone(reader); + break; + + case OP_BYTE_CONSUMED: + startOp("ByteConsumed"); + xmlTextReaderByteConsumed(reader); + break; + } + + if (totalStringSize > docSize * 2) + break; + } + + error = xmlTextReaderGetLastError(reader); + if (error->code == XML_ERR_NO_MEMORY) + oomReport = 1; + xmlFuzzCheckMallocFailure("reader", oomReport); + + xmlFreeTextReader(reader); + + if (doc != NULL) + xmlFreeDoc(doc); + +exit: + xmlFuzzMemSetLimit(0); + xmlFuzzDataCleanup(); + xmlResetLastError(); + return(0); +} + diff --git a/fuzz/testFuzzer.c b/fuzz/testFuzzer.c index 119379d44..302964d70 100644 --- a/fuzz/testFuzzer.c +++ b/fuzz/testFuzzer.c @@ -22,6 +22,16 @@ int fuzzHtml(const char *data, size_t size); #undef LLVMFuzzerTestOneInput #endif +#ifdef HAVE_READER_FUZZER +int fuzzReaderInit(int *argc, char ***argv); +int fuzzReader(const char *data, size_t size); +#define LLVMFuzzerInitialize fuzzReaderInit +#define LLVMFuzzerTestOneInput fuzzReader +#include "reader.c" +#undef LLVMFuzzerInitialize +#undef LLVMFuzzerTestOneInput +#endif + #ifdef HAVE_REGEXP_FUZZER int fuzzRegexpInit(int *argc, char ***argv); int fuzzRegexp(const char *data, size_t size); @@ -196,6 +206,10 @@ main(void) { if (testFuzzer(fuzzHtmlInit, fuzzHtml, "seed/html/*") != 0) ret = 1; #endif +#ifdef HAVE_READER_FUZZER + if (testFuzzer(fuzzReaderInit, fuzzReader, "seed/xml/*") != 0) + ret = 1; +#endif #ifdef HAVE_REGEXP_FUZZER if (testFuzzer(fuzzRegexpInit, fuzzRegexp, "seed/regexp/*") != 0) ret = 1; -- GitLab