Commit 3b7840cd authored by Daniel Veillard's avatar Daniel Veillard

adding namespace checkings while making sure they still parse as

* parser.c parserInternals.c tree.c include/libxml/parser.h
  include/libxml/xmlerror.h: adding namespace checkings
  while making sure they still parse as wellformed documents.
  Add an nsWellFormed status report to the context, and
  provide new appropriate error codes.
* Makefile.am result/namespaces/* test/namespaces/*: add
  specific regression testing for the new namespace support
* test/att5 result/noent/att5 result/att5 result/att5.sax:
  add more coverage for the attribute parsing and normalization
  code.
Daniel
parent 5f1e1f8a
Fri Sep 12 01:36:20 CEST 2003 Daniel Veillard <daniel@veillard.com>
* parser.c parserInternals.c tree.c include/libxml/parser.h
include/libxml/xmlerror.h: adding namespace checkings
while making sure they still parse as wellformed documents.
Add an nsWellFormed status report to the context, and
provide new appropriate error codes.
* Makefile.am result/namespaces/* test/namespaces/*: add
specific regression testing for the new namespace support
* test/att5 result/noent/att5 result/att5 result/att5.sax:
add more coverage for the attribute parsing and normalization
code.
Fri Sep 12 01:34:19 CEST 2003 Daniel Veillard <daniel@veillard.com>
* threads.c: backport of a thread bugfix from 2_5_X branch
......
......@@ -125,7 +125,7 @@ check-local: tests
testall : tests SVGtests SAXtests
tests: XMLtests XMLenttests SAXtests HTMLtests Validtests URItests XPathtests XPtrtests XIncludetests C14Ntests Scripttests Catatests @TEST_REGEXPS@ @TEST_SCHEMAS@ @TEST_THREADS@
tests: XMLtests XMLenttests NStests SAXtests HTMLtests Validtests URItests XPathtests XPtrtests XIncludetests C14Ntests Scripttests Catatests @TEST_REGEXPS@ @TEST_SCHEMAS@ @TEST_THREADS@
@(if [ "@PYTHON_SUBDIR@" != "" ] ; then cd python ; $(MAKE) tests ; fi)
valgrind:
......@@ -279,6 +279,29 @@ XMLtests : xmllint$(EXEEXT)
rm result.$$name result2.$$name ; \
fi ; fi ; done)
NStests : xmllint$(EXEEXT)
@(echo > .memdump)
@echo "##"
@echo "## XML Namespaces regression tests"
@echo "##"
-@(for i in $(srcdir)/test/namespaces/* ; do \
name=`basename $$i`; \
if [ ! -d $$i ] ; then \
if [ ! -f $(srcdir)/result/namespaces/$$name ] ; then \
echo New test file $$name ; \
$(CHECKER) $(top_builddir)/xmllint $$i \
2> $(srcdir)/result/namespaces/$$name.err \
> $(srcdir)/result/namespaces/$$name ; \
grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0"; \
else \
echo Testing $$name ; \
$(CHECKER) $(top_builddir)/xmllint $$i 2> error.$$name > result.$$name ; \
grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0"; \
diff $(srcdir)/result/namespaces/$$name result.$$name ; \
diff $(srcdir)/result/namespaces/$$name.err error.$$name ; \
rm result.$$name error.$$name ; \
fi ; fi ; done)
Docbtests : xmllint$(EXEEXT)
@(echo > .memdump)
@echo "##"
......
......@@ -260,6 +260,7 @@ struct _xmlParserCtxt {
void * *pushTab; /* array of data for push */
xmlHashTablePtr attsDefault; /* defaulted attributes if any */
xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
int nsWellFormed; /* is the document XML Nanespace okay */
};
/**
......
......@@ -135,8 +135,12 @@ typedef enum {
XML_ERR_INVALID_URI, /* 91 */
XML_ERR_URI_FRAGMENT, /* 92 */
XML_WAR_CATALOG_PI, /* 93 */
XML_ERR_NO_DTD /* 94 */
}xmlParserErrors;
XML_ERR_NO_DTD, /* 94 */
XML_NS_ERR_XML_NAMESPACE,
XML_NS_ERR_UNDEFINED_NAMESPACE,
XML_NS_ERR_QNAME,
XML_NS_ERR_ATTRIBUTE_REDEFINED
} xmlParserErrors;
/**
* xmlGenericErrorFunc:
......
......@@ -7302,15 +7302,13 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
GROW;
c = CUR_CHAR(l);
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
(!IS_LETTER(c) && (c != '_') &&
(c != ':'))) {
(!IS_LETTER(c) && (c != '_'))) {
return(NULL);
}
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
(c == '.') || (c == '-') ||
(c == '_') || (c == ':') ||
(c == '.') || (c == '-') || (c == '_') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c)))) {
if (count++ > 100) {
......@@ -7423,24 +7421,64 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
GROW;
l = xmlParseNCName(ctxt);
if (l == NULL) return(NULL);
if (l == NULL) {
if (CUR == ':') {
l = xmlParseName(ctxt);
if (l != NULL) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
ctxt->sax->error(ctxt->userData,
"Failed to parse QName '%s'\n", l);
}
ctxt->nsWellFormed = 0;
ctxt->errNo = XML_NS_ERR_QNAME;
*prefix = NULL;
return(l);
}
}
return(NULL);
}
if (CUR == ':') {
NEXT;
p = l;
l = xmlParseNCName(ctxt);
if (l == NULL) {
xmlChar *tmp;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
ctxt->sax->error(ctxt->userData,
"Failed to parse QName '%s:'\n", p);
}
return(NULL);
ctxt->nsWellFormed = 0;
ctxt->errNo = XML_NS_ERR_QNAME;
tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
p = xmlDictLookup(ctxt->dict, tmp, -1);
if (tmp != NULL) xmlFree(tmp);
*prefix = NULL;
return(p);
}
if (CUR == ':') {
xmlChar *tmp;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
ctxt->sax->error(ctxt->userData,
"Failed to parse QName '%s:%s:'\n", p, l);
}
return(NULL);
ctxt->nsWellFormed = 0;
ctxt->errNo = XML_NS_ERR_QNAME;
NEXT;
tmp = (xmlChar *) xmlParseName(ctxt);
if (tmp != NULL) {
tmp = xmlBuildQName(tmp, l, NULL, 0);
l = xmlDictLookup(ctxt->dict, tmp, -1);
if (tmp != NULL) xmlFree(tmp);
*prefix = p;
return(l);
}
tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
l = xmlDictLookup(ctxt->dict, tmp, -1);
if (tmp != NULL) xmlFree(tmp);
*prefix = p;
return(l);
}
*prefix = p;
} else
......@@ -7895,13 +7933,29 @@ reparse:
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
xmlURIPtr uri;
if (attname == ctxt->str_xml) {
if (URL != ctxt->str_xml_ns) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"xml namespace prefix mapped to wrong URI\n");
ctxt->nsWellFormed = 0;
}
ctxt->errNo = XML_NS_ERR_XML_NAMESPACE;
/*
* Do not keep a namespace definition node
*/
if (alloc != 0) xmlFree(attvalue);
SKIP_BLANKS;
continue;
}
uri = xmlParseURI((const char *) URL);
if (uri == NULL) {
if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
ctxt->sax->warning(ctxt->userData,
"xmlns:%s: %s not a valid URI\n", attname, URL);
"xmlns:%s: '%s' is not a valid URI\n",
attname, URL);
} else {
if (uri->scheme == NULL) {
if ((ctxt->pedantic) && (uri->scheme == NULL)) {
if ((ctxt->sax != NULL) &&
(ctxt->sax->warning != NULL))
ctxt->sax->warning(ctxt->userData,
......@@ -7985,8 +8039,10 @@ failed:
if ((atts[i + 1] != NULL) && (nsname == NULL)) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"Namespace prefix %s for %s on %d is not defined\n",
"Namespace prefix %s for %s on %s is not defined\n",
atts[i + 1], atts[i], localname);
ctxt->nsWellFormed = 0;
ctxt->errNo = XML_NS_ERR_UNDEFINED_NAMESPACE;
}
atts[i + 2] = nsname;
/*
......@@ -8015,8 +8071,10 @@ failed:
}
if ((nsname != NULL) && (atts[j + 2] == nsname)) {
ctxt->sax->error(ctxt->userData,
"Attribute %s in %s redefined\n",
"Namespaced Attribute %s in '%s' redefined\n",
atts[i], nsname);
ctxt->nsWellFormed = 0;
ctxt->errNo = XML_NS_ERR_ATTRIBUTE_REDEFINED;
break;
}
}
......@@ -8103,7 +8161,9 @@ failed:
if ((prefix != NULL) && (nsname == NULL)) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"Namespace prefix %s on %d is not defined\n", prefix, localname);
"Namespace prefix %s on %s is not defined\n", prefix, localname);
ctxt->nsWellFormed = 0;
ctxt->errNo = XML_NS_ERR_UNDEFINED_NAMESPACE;
}
*pref = prefix;
*URI = nsname;
......
......@@ -2309,6 +2309,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
ctxt->userData = ctxt;
ctxt->myDoc = NULL;
ctxt->wellFormed = 1;
ctxt->nsWellFormed = 1;
ctxt->valid = 1;
ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
ctxt->validate = xmlDoValidityCheckingDefaultValue;
......
<?xml version="1.0"?>
<!DOCTYPE doc [
<!ATTLIST normId attr NMTOKENS #IMPLIED>
]>
<doc>
<!-- no normalization -->
<norm attr=" "/>
<norm attr=" foo bar "/>
<norm attr=" foobar"/>
<norm attr=" foo bar "/>
<norm attr="foobar "/>
<norm attr=" &amp; "/>
<norm attr=" foo&amp;bar "/>
<norm attr=" foobar&amp;"/>
<norm attr="&amp;foo bar "/>
<norm attr="foobar &amp;"/>
<norm attr=" &lt; "/>
<norm attr=" foo&lt;bar "/>
<norm attr=" foobar&lt;"/>
<norm attr="&lt;foo bar "/>
<norm attr="foobar &lt;"/>
<norm attr=" &#13;&#10;&#9; "/>
<!-- normalization -->
<normId attr=""/>
<normId attr="foo bar"/>
<normId attr="foobar"/>
<normId attr="foo bar"/>
<normId attr="foobar"/>
<normId attr="&amp;"/>
<normId attr="foo&amp;bar"/>
<normId attr="foobar&amp;"/>
<normId attr="&amp;foo bar"/>
<normId attr="foobar &amp;"/>
<normId attr="&lt;"/>
<normId attr="foo&lt;bar"/>
<normId attr="foobar&lt;"/>
<normId attr="&lt;foo bar"/>
<normId attr="foobar &lt;"/>
<normId attr="&#13;&#10;&#9;"/> <!-- PBM serializing back -->
</doc>
SAX.setDocumentLocator()
SAX.startDocument()
SAX.internalSubset(doc, , )
SAX.attributeDecl(normId, attr, 8, 3, NULL, ...)
SAX.externalSubset(doc, , )
SAX.startElement(doc)
SAX.characters(
, 3)
SAX.comment( no normalization )
SAX.characters(
, 3)
SAX.startElement(norm, attr=' ')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.startElement(norm, attr=' foo bar ')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.startElement(norm, attr=' foobar')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.startElement(norm, attr=' foo bar ')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.startElement(norm, attr='foobar ')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.getEntity(amp)
SAX.startElement(norm, attr=' &#38; ')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.getEntity(amp)
SAX.startElement(norm, attr=' foo&#38;bar ')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.getEntity(amp)
SAX.startElement(norm, attr=' foobar&#38;')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.getEntity(amp)
SAX.startElement(norm, attr='&#38;foo bar ')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.getEntity(amp)
SAX.startElement(norm, attr='foobar &#38;')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.getEntity(lt)
SAX.startElement(norm, attr=' < ')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.getEntity(lt)
SAX.startElement(norm, attr=' foo<bar ')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.getEntity(lt)
SAX.startElement(norm, attr=' foobar<')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.getEntity(lt)
SAX.startElement(norm, attr='<foo bar ')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.getEntity(lt)
SAX.startElement(norm, attr='foobar <')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.startElement(norm, attr='
')
SAX.endElement(norm)
SAX.characters(
, 3)
SAX.comment( normalization )
SAX.characters(
, 3)
SAX.startElement(normId, attr=' ')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.startElement(normId, attr=' foo bar ')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.startElement(normId, attr=' foobar')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.startElement(normId, attr=' foo bar ')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.startElement(normId, attr='foobar ')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.getEntity(amp)
SAX.startElement(normId, attr=' &#38; ')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.getEntity(amp)
SAX.startElement(normId, attr=' foo&#38;bar ')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.getEntity(amp)
SAX.startElement(normId, attr=' foobar&#38;')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.getEntity(amp)
SAX.startElement(normId, attr='&#38;foo bar ')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.getEntity(amp)
SAX.startElement(normId, attr='foobar &#38;')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.getEntity(lt)
SAX.startElement(normId, attr=' < ')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.getEntity(lt)
SAX.startElement(normId, attr=' foo<bar ')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.getEntity(lt)
SAX.startElement(normId, attr=' foobar<')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.getEntity(lt)
SAX.startElement(normId, attr='<foo bar ')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.getEntity(lt)
SAX.startElement(normId, attr='foobar <')
SAX.endElement(normId)
SAX.characters(
, 3)
SAX.startElement(normId, attr='
')
SAX.endElement(normId)
SAX.characters( , 1)
SAX.comment( PBM serializing back )
SAX.characters(
, 1)
SAX.endElement(doc)
SAX.endDocument()
<?xml version="1.0"?>
<foo xmlnsbar="1"/>
<?xml version="1.0"?>
<foo xmlns:="http://example.com/"/>
./test/namespaces/err_1.xml:1: error: Failed to parse QName 'xmlns:'
<foo xmlns:="http://example.com/"/>
^
<?xml version="1.0"?>
<:/>
./test/namespaces/err_2.xml:1: error: Failed to parse QName ':'
<:/>
^
<?xml version="1.0"?>
<:foo/>
./test/namespaces/err_3.xml:1: error: Failed to parse QName ':foo'
<:foo/>
^
<?xml version="1.0"?>
<f: xmlns:f="http://example.com/foo"/>
./test/namespaces/err_4.xml:1: error: Failed to parse QName 'f:'
<f: xmlns:f="http://example.com/foo"/>
^
<?xml version="1.0"?>
<f:a:b xmlns:f="http://example.com/foo"/>
./test/namespaces/err_6.xml:1: error: Failed to parse QName 'f:a:'
<f:a:b xmlns:f="http://example.com/foo"/>
^
<?xml version="1.0"?>
<foo/>
./test/namespaces/err_7.xml:1: error: Namespace prefix f on foo is not defined
<f:foo/>
^
<?xml version="1.0"?>
<tst/>
./test/namespaces/err_8.xml:1: error: xml namespace prefix mapped to wrong URI
<tst xmlns:xml="http://example.com/"/>
^
<?xml version="1.0"?>
<tst xmlns:a="http://example.com/" xmlns:b="http://example.com/" a:err="1" b:err="2"/>
./test/namespaces/err_9.xml:2: error: Namespaced Attribute err in 'http://example.com/' redefined
a:err="1" b:err="2"/>
^
<?xml version="1.0"?>
<!DOCTYPE doc [
<!ATTLIST normId attr NMTOKENS #IMPLIED>
]>
<doc>
<!-- no normalization -->
<norm attr=" "/>
<norm attr=" foo bar "/>
<norm attr=" foobar"/>
<norm attr=" foo bar "/>
<norm attr="foobar "/>
<norm attr=" &amp; "/>
<norm attr=" foo&amp;bar "/>
<norm attr=" foobar&amp;"/>
<norm attr="&amp;foo bar "/>
<norm attr="foobar &amp;"/>
<norm attr=" &lt; "/>
<norm attr=" foo&lt;bar "/>
<norm attr=" foobar&lt;"/>
<norm attr="&lt;foo bar "/>
<norm attr="foobar &lt;"/>
<norm attr=" &#13;&#10;&#9; "/>
<!-- normalization -->
<normId attr=""/>
<normId attr="foo bar"/>
<normId attr="foobar"/>
<normId attr="foo bar"/>
<normId attr="foobar"/>
<normId attr="&amp;"/>
<normId attr="foo&amp;bar"/>
<normId attr="foobar&amp;"/>
<normId attr="&amp;foo bar"/>
<normId attr="foobar &amp;"/>
<normId attr="&lt;"/>
<normId attr="foo&lt;bar"/>
<normId attr="foobar&lt;"/>
<normId attr="&lt;foo bar"/>
<normId attr="foobar &lt;"/>
<normId attr="&#13;&#10;&#9;"/> <!-- PBM serializing back -->
</doc>
<!DOCTYPE doc [<!ATTLIST normId attr NMTOKENS #IMPLIED>]>
<doc>
<!-- no normalization -->
<norm attr=' '/>
<norm attr='
foo bar '/>
<norm attr='
foobar'/>
<norm attr=' foo bar
'/>
<norm attr='foobar
'/>
<norm attr=' &amp; '/>
<norm attr='
foo&amp;bar '/>
<norm attr='
foobar&amp;'/>
<norm attr='&amp;foo bar
'/>
<norm attr='foobar
&amp;'/>
<norm attr=' &lt; '/>
<norm attr='
foo&lt;bar '/>
<norm attr='
foobar&lt;'/>
<norm attr='&lt;foo bar
'/>
<norm attr='foobar
&lt;'/>
<norm attr=' &#x20;&#13;&#xa;&#9; '/>
<!-- normalization -->
<normId attr=' '/>
<normId attr='
foo bar '/>
<normId attr='
foobar'/>
<normId attr=' foo bar
'/>
<normId attr='foobar
'/>
<normId attr=' &amp; '/>
<normId attr='
foo&amp;bar '/>
<normId attr='
foobar&amp;'/>
<normId attr='&amp;foo bar