Commit 7646b18d authored by Daniel Veillard's avatar Daniel Veillard
Browse files

another entity processing update from Markus Henke Daniel

* tree.c: another entity processing update from Markus Henke
Daniel
parent 54d02fb3
Fri Apr 19 18:26:04 CEST 2002 Daniel Veillard <daniel@veillard.com>
* tree.c: another entity processing update from Markus Henke
Fri Apr 19 17:14:24 CEST 2002 Bjorn Reese <breese@users.sourceforge.net>
* trionan.c: fixed crash on OSF/1
......
......@@ -60,6 +60,16 @@ xmlAutomataStatePtr xmlAutomataNewCountTrans(xmlAutomataPtr am,
int min,
int max,
void *data);
xmlAutomataStatePtr xmlAutomataNewOnceTrans (xmlAutomataPtr am,
xmlAutomataStatePtr from,
xmlAutomataStatePtr to,
const xmlChar *token,
int min,
int max,
void *data);
xmlAutomataStatePtr xmlAutomataNewAllTrans (xmlAutomataPtr am,
xmlAutomataStatePtr from,
xmlAutomataStatePtr to);
xmlAutomataStatePtr xmlAutomataNewEpsilon (xmlAutomataPtr am,
xmlAutomataStatePtr from,
xmlAutomataStatePtr to);
......
......@@ -902,56 +902,73 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
* Returns a pointer to the string copy, the caller must free it.
*/
xmlChar *
xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine)
{
xmlNodePtr node = list;
xmlChar *ret = NULL;
xmlEntityPtr ent;
if (list == NULL) return(NULL);
if (list == NULL)
return (NULL);
while (node != NULL) {
if ((node->type == XML_TEXT_NODE) ||
(node->type == XML_CDATA_SECTION_NODE)) {
if (inLine) {
ret = xmlStrcat(ret, node->content);
} else {
xmlChar *buffer;
(node->type == XML_CDATA_SECTION_NODE)) {
if (inLine) {
ret = xmlStrcat(ret, node->content);
} else {
xmlChar *buffer;
buffer = xmlEncodeEntitiesReentrant(doc, node->content);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
buffer = xmlEncodeEntitiesReentrant(doc, node->content);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
}
} else if (node->type == XML_ENTITY_REF_NODE) {
if (inLine) {
ent = xmlGetDocEntity(doc, node->name);
if (ent != NULL)
ret = xmlStrcat(ret, ent->content);
else {
ret = xmlStrcat(ret, node->content);
}
} else if (node->type == XML_ENTITY_REF_NODE) {
if (inLine) {
ent = xmlGetDocEntity(doc, node->name);
if (ent != NULL) {
xmlChar *buffer;
/* an entity content can be any "well balanced chunk",
* i.e. the result of the content [43] production:
* http://www.w3.org/TR/REC-xml#NT-content.
* So it can contain text, CDATA section or nested
* entity reference nodes (among others).
* -> we recursive call xmlNodeListGetString()
* which handles these types */
buffer = xmlNodeListGetString(doc, ent->children, 1);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
} else {
ret = xmlStrcat(ret, node->content);
}
} else {
xmlChar buf[2];
buf[0] = '&'; buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
ret = xmlStrcat(ret, node->name);
buf[0] = ';'; buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
}
}
xmlChar buf[2];
buf[0] = '&';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
ret = xmlStrcat(ret, node->name);
buf[0] = ';';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
}
}
#if 0
else {
xmlGenericError(xmlGenericErrorContext,
"xmlGetNodeListString : invalid node type %d\n",
node->type);
}
else {
xmlGenericError(xmlGenericErrorContext,
"xmlGetNodeListString : invalid node type %d\n",
node->type);
}
#endif
node = node->next;
node = node->next;
}
return(ret);
return (ret);
}
/**
* xmlNodeListGetRawString:
* @doc: the document
......@@ -965,54 +982,73 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
* Returns a pointer to the string copy, the caller must free it.
*/
xmlChar *
xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine)
{
xmlNodePtr node = list;
xmlChar *ret = NULL;
xmlEntityPtr ent;
if (list == NULL) return(NULL);
if (list == NULL)
return (NULL);
while (node != NULL) {
if ((node->type == XML_TEXT_NODE) ||
(node->type == XML_CDATA_SECTION_NODE)) {
if (inLine) {
ret = xmlStrcat(ret, node->content);
} else {
xmlChar *buffer;
(node->type == XML_CDATA_SECTION_NODE)) {
if (inLine) {
ret = xmlStrcat(ret, node->content);
} else {
xmlChar *buffer;
buffer = xmlEncodeSpecialChars(doc, node->content);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
buffer = xmlEncodeSpecialChars(doc, node->content);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
}
} else if (node->type == XML_ENTITY_REF_NODE) {
if (inLine) {
ent = xmlGetDocEntity(doc, node->name);
if (ent != NULL)
ret = xmlStrcat(ret, ent->content);
else {
ret = xmlStrcat(ret, node->content);
}
} else if (node->type == XML_ENTITY_REF_NODE) {
if (inLine) {
ent = xmlGetDocEntity(doc, node->name);
if (ent != NULL) {
xmlChar *buffer;
/* an entity content can be any "well balanced chunk",
* i.e. the result of the content [43] production:
* http://www.w3.org/TR/REC-xml#NT-content.
* So it can contain text, CDATA section or nested
* entity reference nodes (among others).
* -> we recursive call xmlNodeListGetRawString()
* which handles these types */
buffer =
xmlNodeListGetRawString(doc, ent->children, 1);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
} else {
ret = xmlStrcat(ret, node->content);
}
} else {
xmlChar buf[2];
buf[0] = '&'; buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
ret = xmlStrcat(ret, node->name);
buf[0] = ';'; buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
}
}
xmlChar buf[2];
buf[0] = '&';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
ret = xmlStrcat(ret, node->name);
buf[0] = ';';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
}
}
#if 0
else {
xmlGenericError(xmlGenericErrorContext,
"xmlGetNodeListString : invalid node type %d\n",
node->type);
}
else {
xmlGenericError(xmlGenericErrorContext,
"xmlGetNodeListString : invalid node type %d\n",
node->type);
}
#endif
node = node->next;
node = node->next;
}
return(ret);
return (ret);
}
/**
......@@ -3763,122 +3799,159 @@ xmlNodeGetBase(xmlDocPtr doc, xmlNodePtr cur) {
* It's up to the caller to free the memory.
*/
xmlChar *
xmlNodeGetContent(xmlNodePtr cur) {
if (cur == NULL) return(NULL);
xmlNodeGetContent(xmlNodePtr cur)
{
if (cur == NULL)
return (NULL);
switch (cur->type) {
case XML_DOCUMENT_FRAG_NODE:
case XML_ELEMENT_NODE: {
xmlNodePtr tmp = cur;
xmlBufferPtr buffer;
xmlChar *ret;
buffer = xmlBufferCreate();
if (buffer == NULL)
return(NULL);
while (tmp != NULL) {
switch (tmp->type) {
case XML_CDATA_SECTION_NODE:
case XML_TEXT_NODE:
if (tmp->content != NULL)
xmlBufferCat(buffer, tmp->content);
break;
case XML_ENTITY_REF_NODE: {
xmlEntityPtr ent;
case XML_ELEMENT_NODE:{
xmlNodePtr tmp = cur;
xmlBufferPtr buffer;
xmlChar *ret;
buffer = xmlBufferCreate();
if (buffer == NULL)
return (NULL);
while (tmp != NULL) {
switch (tmp->type) {
case XML_CDATA_SECTION_NODE:
case XML_TEXT_NODE:
if (tmp->content != NULL)
xmlBufferCat(buffer, tmp->content);
break;
case XML_ENTITY_REF_NODE:{
/* recursive substitution of entity references */
xmlChar *cont = xmlNodeGetContent(tmp);
if (cont) {
xmlBufferCat(buffer,
(const xmlChar *) cont);
xmlFree(cont);
}
break;
}
default:
break;
}
/*
* Skip to next node
*/
if (tmp->children != NULL) {
if (tmp->children->type != XML_ENTITY_DECL) {
tmp = tmp->children;
continue;
}
}
if (tmp == cur)
break;
ent = xmlGetDocEntity(cur->doc, tmp->name);
if (ent != NULL)
xmlBufferCat(buffer, ent->content);
}
default:
break;
}
/*
* Skip to next node
*/
if (tmp->children != NULL) {
if (tmp->children->type != XML_ENTITY_DECL) {
tmp = tmp->children;
continue;
}
}
if (tmp == cur)
break;
if (tmp->next != NULL) {
tmp = tmp->next;
continue;
}
if (tmp->next != NULL) {
tmp = tmp->next;
continue;
}
do {
tmp = tmp->parent;
if (tmp == NULL)
break;
if (tmp == cur) {
tmp = NULL;
break;
}
if (tmp->next != NULL) {
tmp = tmp->next;
break;
}
} while (tmp != NULL);
}
ret = buffer->content;
buffer->content = NULL;
xmlBufferFree(buffer);
return(ret);
}
case XML_ATTRIBUTE_NODE: {
xmlAttrPtr attr = (xmlAttrPtr) cur;
if (attr->parent != NULL)
return(xmlNodeListGetString(attr->parent->doc, attr->children, 1));
else
return(xmlNodeListGetString(NULL, attr->children, 1));
break;
}
do {
tmp = tmp->parent;
if (tmp == NULL)
break;
if (tmp == cur) {
tmp = NULL;
break;
}
if (tmp->next != NULL) {
tmp = tmp->next;
break;
}
} while (tmp != NULL);
}
ret = buffer->content;
buffer->content = NULL;
xmlBufferFree(buffer);
return (ret);
}
case XML_ATTRIBUTE_NODE:{
xmlAttrPtr attr = (xmlAttrPtr) cur;
if (attr->parent != NULL)
return (xmlNodeListGetString
(attr->parent->doc, attr->children, 1));
else
return (xmlNodeListGetString(NULL, attr->children, 1));
break;
}
case XML_COMMENT_NODE:
case XML_PI_NODE:
if (cur->content != NULL)
return(xmlStrdup(cur->content));
return(NULL);
case XML_ENTITY_REF_NODE:
/*
* Locate the entity, and get it's content
* @@@
*/
return(NULL);
if (cur->content != NULL)
return (xmlStrdup(cur->content));
return (NULL);
case XML_ENTITY_REF_NODE:{
xmlEntityPtr ent;
xmlNodePtr tmp;
xmlBufferPtr buffer;
xmlChar *ret;
/* lookup entity declaration */
ent = xmlGetDocEntity(cur->doc, cur->name);
if (ent == NULL)
return (NULL);
buffer = xmlBufferCreate();
if (buffer == NULL)
return (NULL);
/* an entity content can be any "well balanced chunk",
* i.e. the result of the content [43] production:
* http://www.w3.org/TR/REC-xml#NT-content
* -> we iterate through child nodes and recursive call
* xmlNodeGetContent() which handles all possible node types */
tmp = ent->children;
while (tmp) {
xmlChar *cont = xmlNodeGetContent(tmp);
if (cont) {
xmlBufferCat(buffer, (const xmlChar *) cont);
xmlFree(cont);
}
tmp = tmp->next;
}
ret = buffer->content;
buffer->content = NULL;
xmlBufferFree(buffer);
return (ret);
}
case XML_ENTITY_NODE:
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE:
case XML_NOTATION_NODE:
case XML_DTD_NODE:
case XML_XINCLUDE_START:
case XML_XINCLUDE_END:
case XML_XINCLUDE_START:
case XML_XINCLUDE_END:
#ifdef LIBXML_DOCB_ENABLED
case XML_DOCB_DOCUMENT_NODE:
case XML_DOCB_DOCUMENT_NODE:
#endif
return(NULL);
case XML_NAMESPACE_DECL:
return(xmlStrdup(((xmlNsPtr)cur)->href));
return (NULL);
case XML_NAMESPACE_DECL:
return (xmlStrdup(((xmlNsPtr) cur)->href));
case XML_ELEMENT_DECL:
/* TODO !!! */
return(NULL);
/* TODO !!! */
return (NULL);
case XML_ATTRIBUTE_DECL:
/* TODO !!! */
return(NULL);
/* TODO !!! */
return (NULL);
case XML_ENTITY_DECL:
/* TODO !!! */
return(NULL);
/* TODO !!! */
return (NULL);
case XML_CDATA_SECTION_NODE:
case XML_TEXT_NODE:
if (cur->content != NULL)
return(xmlStrdup(cur->content));
return(NULL);
if (cur->content != NULL)
return (xmlStrdup(cur->content));
return (NULL);
}
return(NULL);
return (NULL);
}
/**
* xmlNodeSetContent:
* @cur: the node being modified
......
......@@ -109,6 +109,8 @@ typedef enum {
XML_REGEXP_QUANT_OPT,
XML_REGEXP_QUANT_MULT,
XML_REGEXP_QUANT_PLUS,
XML_REGEXP_QUANT_ONCEONLY,
XML_REGEXP_QUANT_ALL,
XML_REGEXP_QUANT_RANGE
} xmlRegQuantType;
......@@ -279,6 +281,8 @@ struct _xmlRegExecCtxt {
};
#define REGEXP_ALL_COUNTER 0x123456
static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
/************************************************************************
......@@ -630,6 +634,10 @@ xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
fprintf(output, "+ "); break;
case XML_REGEXP_QUANT_RANGE:
fprintf(output, "range "); break;
case XML_REGEXP_QUANT_ONCEONLY:
fprintf(output, "onceonly "); break;
case XML_REGEXP_QUANT_ALL:
fprintf(output, "all "); break;
}
}
static void
......@@ -942,6 +950,24 @@ xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
ctxt->states[ctxt->nbStates++] = state;
}
/**
* xmlFAGenerateAllTransition:
* ctxt: a regexp parser context
* from: the from state
* to: the target state or NULL for building a new one
*
*/
static void
xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
xmlRegStatePtr from, xmlRegStatePtr to) {
if (to == NULL) {
to = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, to);
ctxt->state = to;
}
xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
}
/**
* xmlFAGenerateEpsilonTransition:
* ctxt: a regexp parser context
......@@ -3423,6 +3449,69 @@ xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
return(to);
}
/**
* xmlAutomataNewOnceTrans:
* @am: an automata
* @from: the starting point of the transition
* @to: the target point of the transition or NULL
* @token: the input string associated to that transition
* @min: the minimum successive occurences of token
* @min: the maximum successive occurences of token
*
* If @to is NULL, this create first a new target state in the automata
* and then adds a transition from the @from state to the target state
* activated by a succession of input of value @token and whose number
* is between @min and @max, moreover that transistion can only be crossed
* once.
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
xmlAutomataStatePtr to, const xmlChar *token,
int min, int max, void *data) {
xmlRegAtomPtr atom;
int counter;
if ((am == NULL) || (from == NULL) || (token == NULL))
return(NULL);
if (min < 1)
return(NULL);
if ((max < min) || (max < 1))
return(NULL);
atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
if (atom == NULL)
return(NULL);
atom->valuep = xmlStrdup(token);
atom->data = data;
atom->quant = XML_REGEXP_QUANT_ONCEONLY;
if (min == 0)
atom->min = 1;
else
atom->min = min;
atom->max = max;
/*
* associate a counter to the transition.
*/
counter = xmlRegGetCounter(am);
am->counters[counter].min = 1;
am->counters[counter].max = 1;
/* xmlFAGenerateTransitions(am, from, to, atom); */
if (to == NULL) {
to = xmlRegNewState(am);
xmlRegStatePush(am, to);
}
xmlRegStateAddTrans(am, from, atom, to, counter, -1);
xmlRegAtomPush(am, atom);
am->state = to;
if (to == NULL)
to = am->state;
if (to == NULL)
return(NULL);
return(to);
}
/**
* xmlAutomataNewState:
* @am: an automata
......@@ -3465,6 +3554,30 @@ xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
return(to);
}
/**
* xmlAutomataNewAllTrans: