Commit dbfd641b authored by Daniel Veillard's avatar Daniel Veillard
Browse files

- Lots of improvements, too long to list here

- Push mode for the XML parser (HTML to come)
- XML shell like interface for debug
- improvements on XPath and validation
Daniel
parent fef854d2
Tue Dec 28 18:44:22 CET 1999 Daniel Veillard <Daniel.Veillard@w3.org>
* parser.[ch] parserInternals.h: Push parser for XML,
seems to work fine now
* tester.c debugXML.[ch]: Added an XML shell debug facility and
--push for push testing
* xpath.[ch] : cleaned up for Shell usage, added missing APIs
* testSAX.c: added --push
* HTMLtree.[ch] tree.[ch]: new functions for dumping parts of the
subtree
* xmlIO.[ch] : enriched API + fixes for push mode
* entities.[ch]: added the entity content length to the struct.
* xmlmemory.[ch]: new API to show the last entries for the shell
* valid.c: added required attribute testing
* SAX.c: the cdata callback now merge contiguous fragments
* HTMLparser.c: cleanup of some macros
Wed Dec 22 12:20:53 CET 1999 Daniel Veillard <Daniel.Veillard@w3.org>
* parser.c: fix for PIs name starting with xml
......
......@@ -57,8 +57,8 @@
* Generic function for accessing stacks in the Parser Context
*/
#define PUSH_AND_POP(type, name) \
int html##name##Push(htmlParserCtxtPtr ctxt, type value) { \
#define PUSH_AND_POP(scope, type, name) \
scope int html##name##Push(htmlParserCtxtPtr ctxt, type value) { \
if (ctxt->name##Nr >= ctxt->name##Max) { \
ctxt->name##Max *= 2; \
ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
......@@ -72,7 +72,7 @@ int html##name##Push(htmlParserCtxtPtr ctxt, type value) { \
ctxt->name = value; \
return(ctxt->name##Nr++); \
} \
type html##name##Pop(htmlParserCtxtPtr ctxt) { \
scope type html##name##Pop(htmlParserCtxtPtr ctxt) { \
type ret; \
if (ctxt->name##Nr < 0) return(0); \
ctxt->name##Nr--; \
......@@ -86,8 +86,8 @@ type html##name##Pop(htmlParserCtxtPtr ctxt) { \
return(ret); \
} \
PUSH_AND_POP(xmlNodePtr, node)
PUSH_AND_POP(xmlChar*, name)
PUSH_AND_POP(extern, xmlNodePtr, node)
PUSH_AND_POP(extern, xmlChar*, name)
/*
* Macros for accessing the content. Those should be used only by the parser,
......@@ -2626,11 +2626,11 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
}
/********************************************************************************
* *
* Parser contexts handling *
* *
********************************************************************************/
/************************************************************************
* *
* Parser contexts handling *
* *
************************************************************************/
/**
* xmlInitParserCtxt:
......@@ -2665,6 +2665,7 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
ctxt->version = NULL;
ctxt->encoding = NULL;
ctxt->standalone = -1;
ctxt->instate = XML_PARSER_START;
/* Allocate the Node stack */
ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
......@@ -2691,6 +2692,7 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
ctxt->record_info = 0;
ctxt->validate = 0;
ctxt->nbChars = 0;
ctxt->checkIndex = 0;
xmlInitNodeInfoSeq(&ctxt->node_seq);
}
......
......@@ -28,6 +28,9 @@
#include "entities.h"
#include "valid.h"
static void
htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
/**
* htmlDtdDump:
* @buf: the HTML buffer output
......@@ -108,7 +111,7 @@ htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
}
static void
void
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
/**
* htmlNodeListDump:
......@@ -138,7 +141,7 @@ htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
*
* Dump an HTML node, recursive behaviour,children are printed too.
*/
static void
void
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
htmlElemDescPtr info;
......@@ -149,6 +152,10 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
/*
* Special cases.
*/
if (cur->type == XML_HTML_DOCUMENT_NODE) {
htmlDocContentDump(buf, (xmlDocPtr) cur);
return;
}
if (cur->type == HTML_TEXT_NODE) {
if (cur->content != NULL) {
xmlChar *buffer;
......
......@@ -1101,14 +1101,22 @@ void
cdataBlock(void *ctx, const xmlChar *value, int len)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlNodePtr ret;
xmlNodePtr ret, lastChild;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.pcdata(%.10s, %d)\n", value, len);
#endif
ret = xmlNewCDataBlock(ctxt->myDoc, value, len);
xmlAddChild(ctxt->node, ret);
/* !!!!! merges */
lastChild = xmlGetLastChild(ctxt->node);
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "add chars to %s \n", ctxt->node->name);
#endif
if ((lastChild != NULL) &&
(lastChild->type == XML_CDATA_SECTION_NODE)) {
xmlTextConcat(lastChild, value, len);
} else {
ret = xmlNewCDataBlock(ctxt->myDoc, value, len);
xmlAddChild(ctxt->node, ret);
}
}
/*
......
This diff is collapsed.
......@@ -7,19 +7,97 @@
#ifndef __DEBUG_XML__
#define __DEBUG_XML__
#include <stdio.h>
#include "tree.h"
#include "xpath.h"
#ifdef __cplusplus
extern "C" {
#endif
extern void xmlDebugDumpString(FILE *output, const xmlChar *str);
extern void xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth);
extern void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth);
extern void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth);
extern void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth);
extern void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth);
extern void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc);
extern void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc);
/*
* The standard Dump routines
*/
void xmlDebugDumpString (FILE *output,
const xmlChar *str);
void xmlDebugDumpAttr (FILE *output,
xmlAttrPtr attr,
int depth);
void xmlDebugDumpAttrList (FILE *output,
xmlAttrPtr attr,
int depth);
void xmlDebugDumpOneNode (FILE *output,
xmlNodePtr node,
int depth);
void xmlDebugDumpNode (FILE *output,
xmlNodePtr node,
int depth);
void xmlDebugDumpNodeList (FILE *output,
xmlNodePtr node,
int depth);
void xmlDebugDumpDocumentHead(FILE *output,
xmlDocPtr doc);
void xmlDebugDumpDocument (FILE *output,
xmlDocPtr doc);
void xmlDebugDumpEntities (FILE *output,
xmlDocPtr doc);
void xmlLsOneNode (FILE *output,
xmlNodePtr node);
/****************************************************************
* *
* The XML shell related structures and functions *
* *
****************************************************************/
/**
* xmlShellReadlineFunc:
* @prompt: a string prompt
*
* This is a generic signature for the XML shell input function
*
* Returns a string which will be freed by the Shell
*/
typedef char * (* xmlShellReadlineFunc)(char *prompt);
/*
* The shell context itself
* TODO: add the defined function tables.
*/
typedef struct xmlShellCtxt {
char *filename;
xmlDocPtr doc;
xmlNodePtr node;
xmlXPathContextPtr pctxt;
int loaded;
FILE *output;
xmlShellReadlineFunc input;
} xmlShellCtxt, *xmlShellCtxtPtr;
/**
* xmlShellCmd:
* @ctxt: a shell context
* @arg: a string argument
* @node: a first node
* @node2: a second node
*
* This is a generic signature for the XML shell functions
*
* Returns an int, negative returns indicating errors
*/
typedef int (* xmlShellCmd) (xmlShellCtxtPtr ctxt,
char *arg,
xmlNodePtr node,
xmlNodePtr node2);
/*
* The Shell interface.
*/
void xmlShell (xmlDocPtr doc,
char *filename,
xmlShellReadlineFunc input,
FILE *output);
#ifdef __cplusplus
}
#endif
......
......@@ -128,6 +128,9 @@ for really accurate description</h3>
<ul>
<li>working on HTML and XML links recognition layers, get in touch with me
if you want to test those.</li>
<li>a Push interface for the XML parser</li>
<li>an shell like interface to the document tree (try tester --shell :-)</li>
<li>lots of bug fixes and improvement added over XMas hollidays</li>
</ul>
<h3>1.8.2: Dec 21 1999</h3>
......@@ -901,6 +904,6 @@ base under gnome-xml/example</p>
<p><a href="mailto:Daniel.Veillard@w3.org">Daniel Veillard</a></p>
<p>$Id: xml.html,v 1.16 1997/01/04 02:49:42 veillard Exp $</p>
<p>$Id: xml.html,v 1.17 1999/12/21 15:35:27 veillard Exp $</p>
</body>
</html>
......@@ -114,10 +114,13 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
cur->SystemID = xmlStrdup(SystemID);
else
cur->SystemID = NULL;
if (content != NULL)
cur->content = xmlStrdup(content);
else
if (content != NULL) {
cur->length = xmlStrlen(content);
cur->content = xmlStrndup(content, cur->length);
} else {
cur->length = 0;
cur->content = NULL;
}
cur->orig = NULL;
table->nb_entities++;
}
......
......@@ -34,6 +34,7 @@ typedef struct xmlEntity {
const xmlChar *ExternalID; /* External identifier for PUBLIC Entity */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
xmlChar *content; /* The entity content or ndata if unparsed */
int length; /* the content length */
xmlChar *orig; /* The entity cont without ref substitution */
} xmlEntity;
typedef xmlEntity *xmlEntityPtr;
......
......@@ -7,19 +7,97 @@
#ifndef __DEBUG_XML__
#define __DEBUG_XML__
#include <stdio.h>
#include "tree.h"
#include "xpath.h"
#ifdef __cplusplus
extern "C" {
#endif
extern void xmlDebugDumpString(FILE *output, const xmlChar *str);
extern void xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth);
extern void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth);
extern void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth);
extern void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth);
extern void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth);
extern void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc);
extern void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc);
/*
* The standard Dump routines
*/
void xmlDebugDumpString (FILE *output,
const xmlChar *str);
void xmlDebugDumpAttr (FILE *output,
xmlAttrPtr attr,
int depth);
void xmlDebugDumpAttrList (FILE *output,
xmlAttrPtr attr,
int depth);
void xmlDebugDumpOneNode (FILE *output,
xmlNodePtr node,
int depth);
void xmlDebugDumpNode (FILE *output,
xmlNodePtr node,
int depth);
void xmlDebugDumpNodeList (FILE *output,
xmlNodePtr node,
int depth);
void xmlDebugDumpDocumentHead(FILE *output,
xmlDocPtr doc);
void xmlDebugDumpDocument (FILE *output,
xmlDocPtr doc);
void xmlDebugDumpEntities (FILE *output,
xmlDocPtr doc);
void xmlLsOneNode (FILE *output,
xmlNodePtr node);
/****************************************************************
* *
* The XML shell related structures and functions *
* *
****************************************************************/
/**
* xmlShellReadlineFunc:
* @prompt: a string prompt
*
* This is a generic signature for the XML shell input function
*
* Returns a string which will be freed by the Shell
*/
typedef char * (* xmlShellReadlineFunc)(char *prompt);
/*
* The shell context itself
* TODO: add the defined function tables.
*/
typedef struct xmlShellCtxt {
char *filename;
xmlDocPtr doc;
xmlNodePtr node;
xmlXPathContextPtr pctxt;
int loaded;
FILE *output;
xmlShellReadlineFunc input;
} xmlShellCtxt, *xmlShellCtxtPtr;
/**
* xmlShellCmd:
* @ctxt: a shell context
* @arg: a string argument
* @node: a first node
* @node2: a second node
*
* This is a generic signature for the XML shell functions
*
* Returns an int, negative returns indicating errors
*/
typedef int (* xmlShellCmd) (xmlShellCtxtPtr ctxt,
char *arg,
xmlNodePtr node,
xmlNodePtr node2);
/*
* The Shell interface.
*/
void xmlShell (xmlDocPtr doc,
char *filename,
xmlShellReadlineFunc input,
FILE *output);
#ifdef __cplusplus
}
#endif
......
......@@ -34,6 +34,7 @@ typedef struct xmlEntity {
const xmlChar *ExternalID; /* External identifier for PUBLIC Entity */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
xmlChar *content; /* The entity content or ndata if unparsed */
int length; /* the content length */
xmlChar *orig; /* The entity cont without ref substitution */
} xmlEntity;
typedef xmlEntity *xmlEntityPtr;
......
......@@ -40,11 +40,12 @@ typedef struct xmlParserInput {
const char *filename; /* The file analyzed, if any */
const char *directory; /* the directory/base of teh file */
const xmlChar *base; /* Base of the array to parse */
const xmlChar *cur; /* Current char being parsed */
const xmlChar *base; /* Base of the array to parse */
const xmlChar *cur; /* Current char being parsed */
int length; /* length if known */
int line; /* Current line */
int col; /* Current column */
int consumed; /* How many xmlChars were already consumed */
int consumed; /* How many xmlChars already consumed */
xmlParserInputDeallocate free; /* function to deallocate the base */
} xmlParserInput;
typedef xmlParserInput *xmlParserInputPtr;
......@@ -77,20 +78,25 @@ typedef _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
/**
* The parser is not a state based parser, but we need to maintain
* The parser is not (yet) a state based parser, but we need to maintain
* minimum state informations, especially for entities processing.
*/
typedef enum {
XML_PARSER_EOF = 0,
XML_PARSER_PROLOG,
XML_PARSER_CONTENT,
XML_PARSER_ENTITY_DECL,
XML_PARSER_ENTITY_VALUE,
XML_PARSER_ATTRIBUTE_VALUE,
XML_PARSER_DTD,
XML_PARSER_EPILOG,
XML_PARSER_COMMENT,
XML_PARSER_CDATA_SECTION
XML_PARSER_EOF = -1, /* nothing is to be parsed */
XML_PARSER_START = 0, /* nothing has been parsed */
XML_PARSER_MISC, /* Misc* before int subset */
XML_PARSER_PI, /* Whithin a processing instruction */
XML_PARSER_DTD, /* within some DTD content */
XML_PARSER_PROLOG, /* Misc* after internal subset */
XML_PARSER_COMMENT, /* within a comment */
XML_PARSER_START_TAG, /* within a start tag */
XML_PARSER_CONTENT, /* within the content */
XML_PARSER_CDATA_SECTION, /* within a CDATA section */
XML_PARSER_END_TAG, /* within a closing tag */
XML_PARSER_ENTITY_DECL, /* within an entity declaration */
XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
XML_PARSER_EPILOG /* the Misc* after the last end tag */
} xmlParserInputState;
/**
......@@ -151,6 +157,7 @@ typedef struct _xmlParserCtxt {
xmlChar * *nameTab; /* array of nodes */
long nbChars; /* number of xmlChar processed */
long checkIndex; /* used by progressive parsing lookup */
} _xmlParserCtxt;
typedef _xmlParserCtxt xmlParserCtxt;
typedef xmlParserCtxt *xmlParserCtxtPtr;
......@@ -347,13 +354,35 @@ xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID,
xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
const xmlChar *ExternalID,
const xmlChar *SystemID);
/**
* SAX initialization routines
*/
void xmlDefaultSAXHandlerInit(void);
void htmlDefaultSAXHandlerInit(void);
/**
* Parser contexts handling.
*/
void xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
const xmlChar* buffer,
const char* filename);
void xmlDefaultSAXHandlerInit(void);
void htmlDefaultSAXHandlerInit(void);
xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
/**
* Interfaces for the Push mode
*/
xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
void *user_data,
const char *chunk,
int size,
const char *filename);
int xmlParseChunk (xmlParserCtxtPtr ctxt,
const char *chunk,
int size,
int terminate);
/**
* Node infos
......
......@@ -435,9 +435,10 @@ typedef unsigned char CHARVAL;
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
*/
#define IS_CHAR(c) \
((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
(((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
(((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) <= 0x10FFFF))
((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
(((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
(((c) >= 0x10000) && ((c) <= 0x10FFFF)))
/*
* [85] BaseChar ::= ... long list see REC ...
......@@ -595,8 +596,7 @@ void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt,
xmlChar **value);
xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt);
void xmlParseEndTag (xmlParserCtxtPtr ctxt,
xmlChar *tagname);
void xmlParseEndTag (xmlParserCtxtPtr ctxt);
void xmlParseCDSect (xmlParserCtxtPtr ctxt);
void xmlParseContent (xmlParserCtxtPtr ctxt);
void xmlParseElement (xmlParserCtxtPtr ctxt);
......
......@@ -526,6 +526,9 @@ void xmlDocDumpMemory (xmlDocPtr cur,
int *size);
void xmlDocDump (FILE *f,
xmlDocPtr cur);
void xmlElemDump (FILE *f,
xmlDocPtr cur,
xmlNodePtr elem);
int xmlSaveFile (const char *filename,
xmlDocPtr cur);
......
......@@ -37,6 +37,9 @@ typedef xmlParserInputBuffer *xmlParserInputBufferPtr;
* Interfaces
*/
xmlParserInputBufferPtr
xmlAllocParserInputBuffer (xmlCharEncoding enc);
xmlParserInputBufferPtr
xmlParserInputBufferCreateFilename (const char *filename,
xmlCharEncoding enc);
......
/*
* memory.h: interface for the memory allocation debug.
* xmlmemory.h: interface for the memory allocation debug.
*
* Daniel.Veillard@w3.org
*/
......@@ -24,6 +24,7 @@
#define xmlInitMemory()
#define xmlMemoryDump()
#define xmlMemDisplay(x)
#define xmlMemShow(x, d)
#else /* ! NO_DEBUG_MEMORY */
#include <stdio.h>
......@@ -51,6 +52,7 @@ void xmlFree (void *ptr);
char * xmlMemStrdup (const char *str);
int xmlMemUsed (void);
void xmlMemDisplay (FILE *fp);
void xmlMemShow (FILE *fp, int nr);
void xmlMemoryDump (void);
int xmlInitMemory (void);
......
......@@ -205,6 +205,9 @@ xmlXPathObjectPtr xmlXPathEval (const xmlChar *str,
void xmlXPathFreeObject (xmlXPathObjectPtr obj);
xmlXPathObjectPtr xmlXPathEvalExpression (const xmlChar *str,
xmlXPathContextPtr ctxt);
xmlNodeSetPtr xmlXPathNodeSetCreate (xmlNodePtr val);
void xmlXPathFreeNodeSetList (xmlXPathObjectPtr obj);
void xmlXPathFreeNodeSet (xmlNodeSetPtr obj);
#ifdef __cplusplus
}
......
This diff is collapsed.
......@@ -40,11 +40,12 @@ typedef struct xmlParserInput {
const char *filename; /* The file analyzed, if any */
const char *directory; /* the directory/base of teh file */
const xmlChar *base; /* Base of the array to parse */
const xmlChar *cur; /* Current char being parsed */
const xmlChar *base; /* Base of the array to parse */
const xmlChar *cur; /* Current char being parsed */
int length; /* length if known */
int line; /* Current line */
int col; /* Current column */
int consumed; /* How many xmlChars were already consumed */
int consumed; /* How many xmlChars already consumed */
xmlParserInputDeallocate free; /* function to deallocate the base */
} xmlParserInput;
typedef xmlParserInput *xmlParserInputPtr;
......@@ -77,20 +78,25 @@ typedef _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
/**
* The parser is not a state based parser, but we need to maintain
* The parser is not (yet) a state based parser, but we need to maintain
* minimum state informations, especially for entities processing.
*/
typedef enum {
XML_PARSER_EOF = 0,
XML_PARSER_PROLOG,
XML_PARSER_CONTENT,
XML_PARSER_ENTITY_DECL,
XML_PARSER_ENTITY_VALUE,
XML_PARSER_ATTRIBUTE_VALUE,
XML_PARSER_DTD,
XML_PARSER_EPILOG,
XML_PARSER_COMMENT,