Commit be803967 authored by Daniel Veillard's avatar Daniel Veillard
Browse files

- Large resync between W3C and Gnome tree

- configure.in: 2.1.0 prerelease
- example/Makefile.am example/gjobread.c tree.h: work on
  libxml1 libxml2 convergence.
- nanoftp, nanohttp.c: fixed stalled connections probs
- HTMLtree.c SAX.c : support for attribute without values in
  HTML for andersca
- valid.c: Fixed most validation + namespace problems
- HTMLparser.c: start document callback for andersca
- debugXML.c xpath.c: lots of XPath fixups from Picdar Technology
- parser.h, SAX.c: serious speed improvement for large
  CDATA blocks
- encoding.[ch] xmlIO.[ch]: Improved seriously saving to
  different encoding
- config.h.in parser.c xmllint.c: added xmlCheckVersion()
  and the LIBXML_TEST_VERSION macro
Daniel
parent c310d564
Wed Jun 28 23:10:26 MEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* configure.in: 2.1.0 prerelease
* Large resync between W3C and Gnome tree
* nanoftp, nanohttp.c: fixed stalled connections probs
* HTMLtree.c SAX.c : support for attribute without values in
HTML for andersca
* valid.c: Fixed most validation + namespace problems
* HTMLparser.c: start document callback for andersca
* debugXML.c xpath.c: lots of XPath fixups from Picdar Technology
* parser.h, SAX.c: serious speed improvement for large
CDATA blocks
* encoding.[ch] xmlIO.[ch]: Improved seriously saving to
different encoding
* example/Makefile.am example/gjobread.c tree.h: work on
libxml1 libxml2 convergence.
* config.h.in parser.c xmllint.c: added xmlCheckVersion()
and the LIBXML_TEST_VERSION macro
Fri Jun 23 22:26:07 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* doc/xml.html: various patches and improvements typo fixed by
......
......@@ -388,6 +388,7 @@ char *htmlStartClose[] = {
NULL
};
static char** htmlStartCloseIndex[100];
static int htmlStartCloseIndexinitialized = 0;
......@@ -604,6 +605,54 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *new) {
}
}
/**
* htmlCheckImplied:
* @ctxt: an HTML parser context
* @new: The new tag name
*
* The HTmL DtD allows a tag to exists only implicitely
* called when a new tag has been detected and generates the
* appropriates implicit tags if missing
*/
void
htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) {
if (!strcmp(new, "html"))
return;
if (ctxt->nameNr <= 0) {
#ifdef DEBUG
fprintf(stderr,"Implied element html: pushed html\n");
#endif
htmlnamePush(ctxt, xmlStrdup(BAD_CAST"html"));
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
}
if ((!strcmp(new, "body")) || (!strcmp(new, "head")))
return;
if (ctxt->nameNr <= 1) {
if ((!strcmp(new, "script")) || (!strcmp(new, "style")) ||
(!strcmp(new, "meta")) || (!strcmp(new, "link")) ||
(!strcmp(new, "title")) || (!strcmp(new, "base"))) {
/*
* dropped OBJECT ... i you put it first BODY will be
* assumed !
*/
#ifdef DEBUG
fprintf(stderr,"Implied element head: pushed head\n");
#endif
htmlnamePush(ctxt, xmlStrdup(BAD_CAST"head"));
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
} else {
#ifdef DEBUG
fprintf(stderr,"Implied element body: pushed body\n");
#endif
htmlnamePush(ctxt, xmlStrdup(BAD_CAST"body"));
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
}
}
}
/************************************************************************
* *
* The list of HTML predefined entities *
......@@ -1322,6 +1371,7 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
else
xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI);
cur->doc = cur;
cur->name = NULL;
cur->children = NULL;
cur->extSubset = NULL;
......@@ -2161,11 +2211,12 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
NEXT;
SKIP_BLANKS;
val = htmlParseAttValue(ctxt);
/******
} else {
/* TODO : some attribute must have values, some may not */
* TODO : some attribute must have values, some may not
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->warning(ctxt->userData,
"No value for attribute %s\n", name);
"No value for attribute %s\n", name); */
}
*value = val;
......@@ -2219,6 +2270,11 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
*/
htmlAutoClose(ctxt, name);
/*
* Check for implied HTML elements.
*/
htmlCheckImplied(ctxt, name);
/*
* Now parse the attributes, it ends up with the ending
*
......@@ -2759,6 +2815,10 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
ctxt->wellFormed = 0;
}
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
ctxt->sax->startDocument(ctxt->userData);
/*
* Parse possible comments before any content
*/
......
......@@ -84,13 +84,15 @@ htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
}
xmlBufferWriteChar(buf, " ");
xmlBufferWriteCHAR(buf, cur->name);
value = xmlNodeListGetString(doc, cur->children, 0);
if (value) {
xmlBufferWriteChar(buf, "=");
xmlBufferWriteQuotedString(buf, value);
xmlFree(value);
} else {
xmlBufferWriteChar(buf, "=\"\"");
if (cur->children != NULL) {
value = xmlNodeListGetString(doc, cur->children, 0);
if (value) {
xmlBufferWriteChar(buf, "=");
xmlBufferWriteQuotedString(buf, value);
xmlFree(value);
} else {
xmlBufferWriteChar(buf, "=\"\"");
}
}
}
......
......@@ -86,12 +86,14 @@ testURI_LDADD= $(LDADDS)
check-local: tests
install-data: $(srcdir)/libxml
$(srcdir)/libxml:
-$(RM) $(srcdir)/libxml
ln -s $(srcdir)/. $(srcdir)/libxml
install-data: $(srcdir)/libxml
$(libxml_la_SOURCES): $(srcdir)/libxml
testall : tests SVGtests SAXtests XPathtests XMLenttests
tests: XMLtests HTMLtests Validtests
......
......@@ -4,6 +4,8 @@
Full documentation is available on-line at
http://xmlsoft.org/
This code is released under the LGPL and the W3C IPR
A mailing-list is available, to subscribe:
echo "subscribe xml" | mail majordomo@rufus.w3.org
......
......@@ -24,6 +24,7 @@
#include <libxml/debugXML.h>
#include <libxml/xmlIO.h>
#include <libxml/SAX.h>
#include <libxml/uri.h>
/* #define DEBUG_SAX */
/* #define DEBUG_SAX_TREE */
......@@ -193,6 +194,7 @@ externalSubset(void *ctx, const xmlChar *name,
int oldwellFormed;
xmlParserInputPtr input = NULL;
xmlCharEncoding enc;
xmlCharEncoding oldcharset;
/*
* Ask the Entity resolver to load the damn thing
......@@ -214,6 +216,7 @@ externalSubset(void *ctx, const xmlChar *name,
oldinputMax = ctxt->inputMax;
oldinputTab = ctxt->inputTab;
oldwellFormed = ctxt->wellFormed;
oldcharset = ctxt->charset;
ctxt->inputTab = (xmlParserInputPtr *)
xmlMalloc(5 * sizeof(xmlParserInputPtr));
......@@ -227,6 +230,7 @@ externalSubset(void *ctx, const xmlChar *name,
ctxt->inputNr = oldinputNr;
ctxt->inputMax = oldinputMax;
ctxt->inputTab = oldinputTab;
ctxt->charset = oldcharset;
return;
}
ctxt->inputNr = 0;
......@@ -269,6 +273,7 @@ externalSubset(void *ctx, const xmlChar *name,
ctxt->inputNr = oldinputNr;
ctxt->inputMax = oldinputMax;
ctxt->inputTab = oldinputTab;
ctxt->charset = oldcharset;
/* ctxt->wellFormed = oldwellFormed; */
}
}
......@@ -604,6 +609,14 @@ endDocument(void *ctx)
ctxt->myDoc->encoding = ctxt->encoding;
ctxt->encoding = NULL;
}
if ((ctxt->inputTab[0]->encoding != NULL) && (ctxt->myDoc != NULL) &&
(ctxt->myDoc->encoding == NULL)) {
ctxt->myDoc->encoding = xmlStrdup(ctxt->inputTab[0]->encoding);
}
if ((ctxt->charset != XML_CHAR_ENCODING_NONE) && (ctxt->myDoc != NULL) &&
(ctxt->myDoc->charset == XML_CHAR_ENCODING_NONE)) {
ctxt->myDoc->charset = ctxt->charset;
}
}
/**
......@@ -640,7 +653,10 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
/*
* Do the last stave of the attribute normalization
*/
nval = xmlValidNormalizeAttributeValue(ctxt->myDoc,
if (ctxt->html)
nval = NULL;
else
nval = xmlValidNormalizeAttributeValue(ctxt->myDoc,
ctxt->node, fullname, value);
if (nval != NULL)
value = nval;
......@@ -648,9 +664,25 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
/*
* Check whether it's a namespace definition
*/
if ((ns == NULL) &&
if ((!ctxt->html) && (ns == NULL) &&
(name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
(name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
xmlURIPtr uri;
uri = xmlParseURI((const char *)value);
if (uri == NULL) {
if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
ctxt->sax->warning(ctxt->userData,
"nmlns: %s not a valid URI\n", value);
} else {
if (uri->scheme == NULL) {
if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
ctxt->sax->warning(ctxt->userData,
"nmlns: URI %s is not absolute\n", value);
}
xmlFreeURI(uri);
}
/* a default namespace definition */
xmlNewNs(ctxt->node, value, NULL);
if (name != NULL)
......@@ -659,7 +691,8 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
xmlFree(nval);
return;
}
if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
if ((!ctxt->html) &&
(ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
(ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
/*
* Validate also for namespace decls, they are attributes from
......@@ -701,7 +734,7 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
ret->last = tmp;
tmp = tmp->next;
}
} else {
} else if (value != NULL) {
ret->children = xmlNewDocText(ctxt->myDoc, value);
ret->last = ret->children;
if (ret->children != NULL)
......@@ -709,7 +742,7 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
}
}
if (ctxt->validate && ctxt->wellFormed &&
if ((!ctxt->html) && ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset) {
/*
......@@ -817,6 +850,7 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
} else if (parent == NULL) {
parent = ctxt->myDoc->children;
}
ctxt->nodemem = -1;
/*
* We are parsing a new node.
......@@ -844,15 +878,6 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
}
}
/*
* If it's the Document root, finish the Dtd validation and
* check the document root element for validity
*/
if ((ctxt->validate) && (ctxt->vctxt.finishDtd == 0)) {
ctxt->valid &= xmlValidateDtdFinal(&ctxt->vctxt, ctxt->myDoc);
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
ctxt->vctxt.finishDtd = 1;
}
/*
* process all the attributes whose name start with "xml"
*/
......@@ -860,15 +885,26 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
i = 0;
att = atts[i++];
value = atts[i++];
while ((att != NULL) && (value != NULL)) {
if ((att[0] == 'x') && (att[1] == 'm') && (att[2] == 'l'))
attribute(ctxt, att, value);
if (!ctxt->html) {
while ((att != NULL) && (value != NULL)) {
if ((att[0] == 'x') && (att[1] == 'm') && (att[2] == 'l'))
attribute(ctxt, att, value);
att = atts[i++];
value = atts[i++];
att = atts[i++];
value = atts[i++];
}
}
}
/*
* Search the namespace, note that since the attributes have been
* processed, the local namespaces are available.
*/
ns = xmlSearchNs(ctxt->myDoc, ret, prefix);
if ((ns == NULL) && (parent != NULL))
ns = xmlSearchNs(ctxt->myDoc, parent, prefix);
xmlSetNs(ret, ns);
/*
* process all the other attributes
*/
......@@ -876,26 +912,35 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
i = 0;
att = atts[i++];
value = atts[i++];
while ((att != NULL) && (value != NULL)) {
if ((att[0] != 'x') || (att[1] != 'm') || (att[2] != 'l'))
if (ctxt->html) {
while (att != NULL) {
attribute(ctxt, att, value);
/*
* Next ones
*/
att = atts[i++];
value = atts[i++];
att = atts[i++];
value = atts[i++];
}
} else {
while ((att != NULL) && (value != NULL)) {
if ((att[0] != 'x') || (att[1] != 'm') || (att[2] != 'l'))
attribute(ctxt, att, value);
/*
* Next ones
*/
att = atts[i++];
value = atts[i++];
}
}
}
/*
* Search the namespace, note that since the attributes have been
* processed, the local namespaces are available.
* If it's the Document root, finish the Dtd validation and
* check the document root element for validity
*/
ns = xmlSearchNs(ctxt->myDoc, ret, prefix);
if ((ns == NULL) && (parent != NULL))
ns = xmlSearchNs(ctxt->myDoc, parent, prefix);
xmlSetNs(ret, ns);
if ((ctxt->validate) && (ctxt->vctxt.finishDtd == 0)) {
ctxt->valid &= xmlValidateDtdFinal(&ctxt->vctxt, ctxt->myDoc);
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
ctxt->vctxt.finishDtd = 1;
}
if (prefix != NULL)
xmlFree(prefix);
......@@ -932,6 +977,7 @@ endElement(void *ctx, const xmlChar *name)
node_info.node = cur;
xmlParserAddNodeInfo(ctxt, &node_info);
}
ctxt->nodemem = -1;
if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset)
......@@ -1008,14 +1054,62 @@ characters(void *ctx, const xmlChar *ch, int len)
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "add chars to %s \n", ctxt->node->name);
#endif
if (lastChild == NULL)
/*
* Here we needed an accelerator mechanism in case of very large
* elements. Use an attribute in the structure !!!
*/
if (lastChild == NULL) {
/* first node, first time */
xmlNodeAddContentLen(ctxt->node, ch, len);
else {
if (xmlNodeIsText(lastChild))
#ifndef XML_USE_BUFFER_CONTENT
if (ctxt->node->children != NULL) {
ctxt->nodelen = len;
ctxt->nodemem = len + 1;
}
#endif
} else {
if (xmlNodeIsText(lastChild)) {
#ifndef XML_USE_BUFFER_CONTENT
/*
* The whole point of maintaining nodelen and nodemem,
* xmlTextConcat is too costly, i.e. compute lenght,
* reallocate a new buffer, move data, append ch. Here
* We try to minimaze realloc() uses and avoid copying
* and recomputing lenght over and over.
*/
if (ctxt->nodelen + len >= ctxt->nodemem) {
xmlChar *newbuf;
int size;
size = ctxt->nodemem + len;
size *= 2;
newbuf = (xmlChar *) xmlRealloc(lastChild->content,size);
if (newbuf == NULL) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"SAX.characters(): out of memory\n");
return;
}
ctxt->nodemem = size;
lastChild->content = newbuf;
}
memcpy(&lastChild->content[ctxt->nodelen], ch, len);
ctxt->nodelen += len;
lastChild->content[ctxt->nodelen] = 0;
#else
xmlTextConcat(lastChild, ch, len);
else {
#endif
} else {
/* Mixed content, first time */
lastChild = xmlNewTextLen(ch, len);
xmlAddChild(ctxt->node, lastChild);
#ifndef XML_USE_BUFFER_CONTENT
if (ctxt->node->children != NULL) {
ctxt->nodelen = len;
ctxt->nodemem = len + 1;
}
#endif
}
}
}
......
......@@ -6,14 +6,17 @@
TODO:
=====
- xmlSwitchToEncoding() need a rewrite for correct handling of conversion
error code conditions.
- If the internal encoding is not UTF8 saving to a given encoding doesn't
work
- problem when parsing hrefs with & with the HTML parser (IRC ac)
- DOM needs
xmlAttrPtr xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value)
int xmlPruneProp(xmlNodePtr node, xmlAtttrPtr attr);
- General checking of DTD validation in presence of namespaces ... hairy
mostly done
- Fix DTD + namespace validity problem
"Not valid: root and DtD name do not match 'ROOT' and 'prefix:ROOT'"
- add support for the trick from Henry conf/sun/valid/empty.xml
mostly done
- Correct standalone checking/emitting (hard)
2.9 Standalone Document Declaration
- Better checking of external parsed entities TAG 1234
......@@ -24,9 +27,10 @@ TODO:
- Handle undefined namespaces in entity contents better ... at least
issue a warning
- Issue warning when using non-absolute namespaces URI.
- General checking of DTD validation in presence of namespaces ... hairy
- fix --disable-corba configure switch handling, and use XML_WITHOUT_CORBA
not WITHOUT_CORBA flag
- the html parser should add <head> and <body> if they don't exist
- Command to force the parser to stop parsing and ignore the rest of the file.
TODO:
=====
......@@ -92,6 +96,9 @@ EXTENSIONS:
Done:
=====
- support for HTML empty attributes like <hr noshade>
- plugged iconv() in for support of a large set of encodings.
- xmlSwitchToEncoding() rewrite done
- URI checkings (no fragments) rfc2396.txt
- Added a clean mechanism for overload or added input methods:
xmlRegisterInputCallbacks()
......
......@@ -1326,7 +1326,7 @@ xmlShellDu(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr tree,
if ((node->type == XML_DOCUMENT_NODE) ||
(node->type == XML_HTML_DOCUMENT_NODE)) {
node = ((xmlDocPtr) node)->children;
} else if (node->children != NULL) {
} else if ((node->children != NULL) && (node->type != XML_ENTITY_REF_NODE)) {
/* deep first */
node = node->children;
indent++;
......
<
......@@ -8137,1314 +8137,4 @@ HREF="gnome-xml-tree.html#XMLCHAR"
>xmlChar</A
>* xmlNodeGetContent (<A
HREF="gnome-xml-tree.html#XMLNODEPTR"
>xmlNodePtr</A
> cur);</PRE
></TD
></TR
></TABLE
><P
>Read the value of a node, this can be either the text carried
directly by this node if it's a TEXT node or the aggregate string
of the values carried by this node child's (TEXT and ENTITY_REF).
Entity references are substitued.</P
><P
></P
><DIV
CLASS="INFORMALTABLE"
><P
></P
><TABLE
BORDER="0"
WIDTH="100%"
BGCOLOR="#FFD0D0"
CELLSPACING="0"
CELLPADDING="4"
CLASS="CALSTABLE"
><TR
><TD
WIDTH="20%"
ALIGN="RIGHT"
VALIGN="TOP"
><TT
CLASS="PARAMETER"
><I
>cur</I
></TT
>&nbsp;:</TD
><TD
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
>&nbsp;</TD
></TR
><TR
><TD
WIDTH="20%"
ALIGN="RIGHT"
VALIGN="TOP"
><I
CLASS="EMPHASIS"
>Returns</I
> :</TD
><TD
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
>&nbsp;</TD
></TR
></TABLE
><P
></P
></DIV
></DIV
><HR><DIV
CLASS="REFSECT2"
><A
NAME="AEN4780"
></A
><H3
><A
NAME="XMLNODEGETLANG"
></A
>xmlNodeGetLang ()</H3
><TABLE
BORDER="0"
BGCOLOR="#D6E8FF"
WIDTH="100%"
CELLPADDING="6"
><TR
><TD
><PRE
CLASS="PROGRAMLISTING"
><A
HREF="gnome-xml-tree.html#XMLCHAR"
>xmlChar</A
>* xmlNodeGetLang (<A
HREF="gnome-xml-tree.html#XMLNODEPTR"
>xmlNodePtr</A
> cur);</PRE
></TD
></TR
></TABLE
><P
>Searches the language of a node, i.e. the values of the xml:lang
attribute or the one carried by the nearest ancestor.</P
><P
></P