parser.c 336 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
 *            implemented on top of the SAX interfaces
 *
 * References:
 *   The XML specification:
 *     http://www.w3.org/TR/REC-xml
 *   Original 1.0 version:
 *     http://www.w3.org/TR/1998/REC-xml-19980210
 *   XML second edition working draft
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
 *
 * Okay this is a big file, the parser core is around 7000 lines, then it
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16 17 18 19 20 21 22 23 24
 * A number of helper functions and deprecated ones have been moved to
 * parserInternals.c to reduce this file size.
 * As much as possible the functions are associated with their relative
 * production in the XML specification. A few productions defining the
 * different ranges of character are actually implanted either in 
 * parserInternals.h or parserInternals.c
 * The DOM tree build is realized from the default SAX callbacks in
 * the module SAX.c.
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26 27 28 29
 * document.
 *
 * See Copyright for the status of this software.
 *
30
 * daniel@veillard.com
31 32
 */

33
#define IN_LIBXML
Bjorn Reese's avatar
Bjorn Reese committed
34 35
#include "libxml.h"

36
#if defined(WIN32) && !defined (__CYGWIN__)
37 38 39 40 41 42 43
#define XML_DIR_SEP '\\'
#else
#define XML_DIR_SEP '/'
#endif

#include <stdlib.h>
#include <string.h>
44
#include <stdarg.h>
45
#include <libxml/xmlmemory.h>
46 47
#include <libxml/threads.h>
#include <libxml/globals.h>
48 49 50 51 52 53 54 55 56
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/valid.h>
#include <libxml/entities.h>
#include <libxml/xmlerror.h>
#include <libxml/encoding.h>
#include <libxml/xmlIO.h>
#include <libxml/uri.h>
57 58 59
#ifdef LIBXML_CATALOG_ENABLED
#include <libxml/catalog.h>
#endif
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79

#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif

80 81 82 83 84 85 86 87
/**
 * MAX_DEPTH:
 *
 * arbitrary depth limit for the XML documents that we allow to 
 * process. This is not a limitation of the parser but a safety 
 * boundary feature.
 */
#define MAX_DEPTH 1024
88

89 90
#define SAX2 1

91
#define XML_PARSER_BIG_BUFFER_SIZE 300
92 93
#define XML_PARSER_BUFFER_SIZE 100

94 95
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"

96 97 98 99
/*
 * List of XML prefixed PI allowed by W3C specs
 */

100
static const char *xmlW3CPIs[] = {
101 102 103 104 105 106 107 108
    "xml-stylesheet",
    NULL
};

/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
                                       const xmlChar **str);

109
static xmlParserErrors
110 111
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
	              xmlSAXHandlerPtr sax,
112
		      void *user_data, int depth, const xmlChar *URL,
113
		      const xmlChar *ID, xmlNodePtr *list);
114

115
#ifdef LIBXML_LEGACY_ENABLED
116 117 118
static void
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
                      xmlNodePtr lastNode);
119
#endif /* LIBXML_LEGACY_ENABLED */
120

121
static xmlParserErrors
122 123
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
124

125 126 127 128 129 130
/************************************************************************
 *									*
 * 		Some factorized error routines				*
 *									*
 ************************************************************************/

131

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
/**
 * xmlErrMemory:
 * @ctxt:  an XML parser context
 * @extra:  extra informations
 *
 * Handle a redefinition of attribute error
 */
static void
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
{
    if (ctxt != NULL) {
        ctxt->errNo = XML_ERR_NO_MEMORY;
        ctxt->instate = XML_PARSER_EOF;
        ctxt->disableSAX = 1;
    }
147
    if (extra)
148 149 150 151
        __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
                        NULL, NULL, 0, 0,
                        "Memory allocation failed : %s\n", extra);
152
    else
153 154 155
        __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
                        NULL, NULL, 0, 0, "Memory allocation failed\n");
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
}

/**
 * xmlErrAttributeDup:
 * @ctxt:  an XML parser context
 * @prefix:  the attribute prefix
 * @localname:  the attribute localname
 *
 * Handle a redefinition of attribute error
 */
static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
                   const xmlChar * localname)
{
    ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
171
    if (prefix == NULL)
172 173 174 175
        __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
                        ctxt->errNo, XML_ERR_FATAL, NULL, 0,
                        (const char *) localname, NULL, NULL, 0, 0,
                        "Attribute %s redefined\n", localname);
176
    else
177 178 179 180 181
        __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
                        ctxt->errNo, XML_ERR_FATAL, NULL, 0,
                        (const char *) prefix, (const char *) localname,
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
                        localname);
182 183 184 185 186 187 188 189 190 191 192 193 194 195
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

/**
 * xmlFatalErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @extra:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
196
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
197 198 199 200 201
{
    const char *errmsg;

    switch (error) {
        case XML_ERR_INVALID_HEX_CHARREF:
202 203
            errmsg = "CharRef: invalid hexadecimal value\n";
            break;
204
        case XML_ERR_INVALID_DEC_CHARREF:
205 206
            errmsg = "CharRef: invalid decimal value\n";
            break;
207
        case XML_ERR_INVALID_CHARREF:
208 209
            errmsg = "CharRef: invalid value\n";
            break;
210
        case XML_ERR_INTERNAL_ERROR:
211 212
            errmsg = "internal error";
            break;
213
        case XML_ERR_PEREF_AT_EOF:
214 215
            errmsg = "PEReference at end of document\n";
            break;
216
        case XML_ERR_PEREF_IN_PROLOG:
217 218
            errmsg = "PEReference in prolog\n";
            break;
219
        case XML_ERR_PEREF_IN_EPILOG:
220 221
            errmsg = "PEReference in epilog\n";
            break;
222
        case XML_ERR_PEREF_NO_NAME:
223 224
            errmsg = "PEReference: no name\n";
            break;
225
        case XML_ERR_PEREF_SEMICOL_MISSING:
226 227
            errmsg = "PEReference: expecting ';'\n";
            break;
228
        case XML_ERR_ENTITY_LOOP:
229 230
            errmsg = "Detected an entity reference loop\n";
            break;
231
        case XML_ERR_ENTITY_NOT_STARTED:
232 233
            errmsg = "EntityValue: \" or ' expected\n";
            break;
234
        case XML_ERR_ENTITY_PE_INTERNAL:
235 236
            errmsg = "PEReferences forbidden in internal subset\n";
            break;
237
        case XML_ERR_ENTITY_NOT_FINISHED:
238 239
            errmsg = "EntityValue: \" or ' expected\n";
            break;
240
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
241 242
            errmsg = "AttValue: \" or ' expected\n";
            break;
243
        case XML_ERR_LT_IN_ATTRIBUTE:
244 245
            errmsg = "Unescaped '<' not allowed in attributes values\n";
            break;
246
        case XML_ERR_LITERAL_NOT_STARTED:
247 248
            errmsg = "SystemLiteral \" or ' expected\n";
            break;
249
        case XML_ERR_LITERAL_NOT_FINISHED:
250 251
            errmsg = "Unfinished System or Public ID \" or ' expected\n";
            break;
252
        case XML_ERR_MISPLACED_CDATA_END:
253 254
            errmsg = "Sequence ']]>' not allowed in content\n";
            break;
255
        case XML_ERR_URI_REQUIRED:
256 257
            errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
            break;
258
        case XML_ERR_PUBID_REQUIRED:
259 260
            errmsg = "PUBLIC, the Public Identifier is missing\n";
            break;
261
        case XML_ERR_HYPHEN_IN_COMMENT:
262 263
            errmsg = "Comment must not contain '--' (double-hyphen)\n";
            break;
264
        case XML_ERR_PI_NOT_STARTED:
265 266
            errmsg = "xmlParsePI : no target name\n";
            break;
267
        case XML_ERR_RESERVED_XML_NAME:
268 269
            errmsg = "Invalid PI name\n";
            break;
270
        case XML_ERR_NOTATION_NOT_STARTED:
271 272
            errmsg = "NOTATION: Name expected here\n";
            break;
273
        case XML_ERR_NOTATION_NOT_FINISHED:
274 275
            errmsg = "'>' required to close NOTATION declaration\n";
            break;
276
        case XML_ERR_VALUE_REQUIRED:
277 278
            errmsg = "Entity value required\n";
            break;
279
        case XML_ERR_URI_FRAGMENT:
280 281
            errmsg = "Fragment not allowed";
            break;
282
        case XML_ERR_ATTLIST_NOT_STARTED:
283 284
            errmsg = "'(' required to start ATTLIST enumeration\n";
            break;
285
        case XML_ERR_NMTOKEN_REQUIRED:
286 287
            errmsg = "NmToken expected in ATTLIST enumeration\n";
            break;
288
        case XML_ERR_ATTLIST_NOT_FINISHED:
289 290
            errmsg = "')' required to finish ATTLIST enumeration\n";
            break;
291
        case XML_ERR_MIXED_NOT_STARTED:
292 293
            errmsg = "MixedContentDecl : '|' or ')*' expected\n";
            break;
294
        case XML_ERR_PCDATA_REQUIRED:
295 296
            errmsg = "MixedContentDecl : '#PCDATA' expected\n";
            break;
297
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
298 299
            errmsg = "ContentDecl : Name or '(' expected\n";
            break;
300
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
301 302
            errmsg = "ContentDecl : ',' '|' or ')' expected\n";
            break;
303
        case XML_ERR_PEREF_IN_INT_SUBSET:
304 305 306
            errmsg =
                "PEReference: forbidden within markup decl in internal subset\n";
            break;
307
        case XML_ERR_GT_REQUIRED:
308 309
            errmsg = "expected '>'\n";
            break;
310
        case XML_ERR_CONDSEC_INVALID:
311 312
            errmsg = "XML conditional section '[' expected\n";
            break;
313
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
314 315 316 317 318 319
            errmsg = "Content error in the external subset\n";
            break;
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
            errmsg =
                "conditional section INCLUDE or IGNORE keyword expected\n";
            break;
320
        case XML_ERR_CONDSEC_NOT_FINISHED:
321 322
            errmsg = "XML conditional section not closed\n";
            break;
323
        case XML_ERR_XMLDECL_NOT_STARTED:
324 325
            errmsg = "Text declaration '<?xml' required\n";
            break;
326
        case XML_ERR_XMLDECL_NOT_FINISHED:
327 328
            errmsg = "parsing XML declaration: '?>' expected\n";
            break;
329
        case XML_ERR_EXT_ENTITY_STANDALONE:
330 331
            errmsg = "external parsed entities cannot be standalone\n";
            break;
332
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
333 334
            errmsg = "EntityRef: expecting ';'\n";
            break;
335
        case XML_ERR_DOCTYPE_NOT_FINISHED:
336 337
            errmsg = "DOCTYPE improperly terminated\n";
            break;
338
        case XML_ERR_LTSLASH_REQUIRED:
339 340
            errmsg = "EndTag: '</' not found\n";
            break;
341
        case XML_ERR_EQUAL_REQUIRED:
342 343
            errmsg = "expected '='\n";
            break;
344
        case XML_ERR_STRING_NOT_CLOSED:
345 346
            errmsg = "String not closed expecting \" or '\n";
            break;
347
        case XML_ERR_STRING_NOT_STARTED:
348 349
            errmsg = "String not started expecting ' or \"\n";
            break;
350
        case XML_ERR_ENCODING_NAME:
351 352
            errmsg = "Invalid XML encoding name\n";
            break;
353
        case XML_ERR_STANDALONE_VALUE:
354 355
            errmsg = "standalone accepts only 'yes' or 'no'\n";
            break;
356
        case XML_ERR_DOCUMENT_EMPTY:
357 358
            errmsg = "Document is empty\n";
            break;
359
        case XML_ERR_DOCUMENT_END:
360 361
            errmsg = "Extra content at the end of the document\n";
            break;
362
        case XML_ERR_NOT_WELL_BALANCED:
363 364
            errmsg = "chunk is not well balanced\n";
            break;
365
        case XML_ERR_EXTRA_CONTENT:
366 367
            errmsg = "extra content at the end of well balanced chunk\n";
            break;
368
        case XML_ERR_VERSION_MISSING:
369 370
            errmsg = "Malformed declaration expecting version\n";
            break;
371
#if 0
372 373 374
        case:
            errmsg = "\n";
            break;
375
#endif
376 377
        default:
            errmsg = "Unregistered error message\n";
378 379
    }
    ctxt->errNo = error;
380 381 382
    __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
                    XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
                    info);
383 384 385 386 387
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

388 389 390 391 392 393 394 395 396
/**
 * xmlFatalErrMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
397 398
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
               const char *msg)
399 400
{
    ctxt->errNo = error;
401 402
    __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
403 404 405 406 407
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
/**
 * xmlWarningMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 * @str2:  extra data
 *
 * Handle a warning.
 */
static void
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
              const char *msg, const xmlChar *str1, const xmlChar *str2)
{
    ctxt->errNo = error;
    __xmlRaiseError((ctxt->sax) ? ctxt->sax->warning : NULL, ctxt->userData,
                    ctxt, NULL, XML_FROM_PARSER, error,
                    XML_ERR_WARNING, NULL, 0,
		    (const char *) str1, (const char *) str2, NULL, 0, 0,
		    msg, (const char *) str1, (const char *) str2);
}

/**
 * xmlValidityError:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 *
 * Handle a warning.
 */
static void
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
              const char *msg, const xmlChar *str1)
{
    ctxt->errNo = error;
    __xmlRaiseError(ctxt->vctxt.error, ctxt->vctxt.userData,
                    ctxt, NULL, XML_FROM_DTD, error,
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
		    NULL, NULL, 0, 0,
		    msg, (const char *) str1);
    ctxt->valid = 0;
}

452 453 454 455 456 457 458 459 460 461 462
/**
 * xmlFatalErrMsgInt:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  an integer value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
463
                  const char *msg, int val)
464 465
{
    ctxt->errNo = error;
466 467 468
    __xmlRaiseError(NULL, NULL,
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
469 470 471 472 473
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

474 475 476 477 478 479 480 481 482 483 484
/**
 * xmlFatalErrMsgStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  a string value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
485
                  const char *msg, const xmlChar * val)
486 487
{
    ctxt->errNo = error;
488 489 490 491
    __xmlRaiseError(NULL, NULL, ctxt, NULL,
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
                    val);
492 493 494 495 496
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

497 498 499 500 501 502 503 504 505 506 507 508 509
/**
 * xmlNsErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the message
 * @info1:  extra information string
 * @info2:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
         const char *msg,
510 511
         const xmlChar * info1, const xmlChar * info2,
         const xmlChar * info3)
512 513
{
    ctxt->errNo = error;
514 515 516 517
    __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
                    (const char *) info2, (const char *) info3, 0, 0, msg,
                    info1, info2, info3);
518 519 520
    ctxt->nsWellFormed = 0;
}

521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
/************************************************************************
 *									*
 * 		SAX2 defaulted attributes handling			*
 *									*
 ************************************************************************/

/**
 * xmlDetectSAX2:
 * @ctxt:  an XML parser context
 *
 * Do the SAX2 detection and specific intialization
 */
static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
    if (ctxt == NULL) return;
536
#ifdef LIBXML_SAX1_ENABLED
537 538 539
    if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
        ((ctxt->sax->startElementNs != NULL) ||
         (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
540 541 542
#else
    ctxt->sax2 = 1;
#endif /* LIBXML_SAX1_ENABLED */
543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640

    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
}

typedef struct _xmlDefAttrs xmlDefAttrs;
typedef xmlDefAttrs *xmlDefAttrsPtr;
struct _xmlDefAttrs {
    int nbAttrs;	/* number of defaulted attributes on that element */
    int maxAttrs;       /* the size of the array */
    const xmlChar *values[4]; /* array of localname/prefix/values */
};

/**
 * xmlAddDefAttrs:
 * @ctxt:  an XML parser context
 * @fullname:  the element fullname
 * @fullattr:  the attribute fullname
 * @value:  the attribute value
 *
 * Add a defaulted attribute for an element
 */
static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
               const xmlChar *fullname,
               const xmlChar *fullattr,
               const xmlChar *value) {
    xmlDefAttrsPtr defaults;
    int len;
    const xmlChar *name;
    const xmlChar *prefix;

    if (ctxt->attsDefault == NULL) {
        ctxt->attsDefault = xmlHashCreate(10);
	if (ctxt->attsDefault == NULL)
	    goto mem_error;
    }

    /*
     * plit the element name into prefix:localname , the string found
     * are within the DTD and hen not associated to namespace names.
     */
    name = xmlSplitQName3(fullname, &len);
    if (name == NULL) {
        name = xmlDictLookup(ctxt->dict, fullname, -1);
	prefix = NULL;
    } else {
        name = xmlDictLookup(ctxt->dict, name, -1);
	prefix = xmlDictLookup(ctxt->dict, fullname, len);
    }

    /*
     * make sure there is some storage
     */
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
    if (defaults == NULL) {
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
	                               12 * sizeof(const xmlChar *));
	if (defaults == NULL)
	    goto mem_error;
	defaults->maxAttrs = 4;
	defaults->nbAttrs = 0;
	xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
        defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
		       (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
	if (defaults == NULL)
	    goto mem_error;
	defaults->maxAttrs *= 2;
	xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
    }

    /*
     * plit the element name into prefix:localname , the string found
     * are within the DTD and hen not associated to namespace names.
     */
    name = xmlSplitQName3(fullattr, &len);
    if (name == NULL) {
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
	prefix = NULL;
    } else {
        name = xmlDictLookup(ctxt->dict, name, -1);
	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
    }

    defaults->values[4 * defaults->nbAttrs] = name;
    defaults->values[4 * defaults->nbAttrs + 1] = prefix;
    /* intern the string and precompute the end */
    len = xmlStrlen(value);
    value = xmlDictLookup(ctxt->dict, value, len);
    defaults->values[4 * defaults->nbAttrs + 2] = value;
    defaults->values[4 * defaults->nbAttrs + 3] = value + len;
    defaults->nbAttrs++;

    return;

mem_error:
641
    xmlErrMemory(ctxt, NULL);
642 643 644
    return;
}

645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
/**
 * xmlAddSpecialAttr:
 * @ctxt:  an XML parser context
 * @fullname:  the element fullname
 * @fullattr:  the attribute fullname
 * @type:  the attribute type
 *
 * Register that this attribute is not CDATA
 */
static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
		  const xmlChar *fullname,
		  const xmlChar *fullattr,
		  int type)
{
    if (ctxt->attsSpecial == NULL) {
        ctxt->attsSpecial = xmlHashCreate(10);
	if (ctxt->attsSpecial == NULL)
	    goto mem_error;
    }

666 667
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
                     (void *) (long) type);
668 669 670
    return;

mem_error:
671
    xmlErrMemory(ctxt, NULL);
672 673 674
    return;
}

675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746
/**
 * xmlCheckLanguageID:
 * @lang:  pointer to the string value
 *
 * Checks that the value conforms to the LanguageID production:
 *
 * NOTE: this is somewhat deprecated, those productions were removed from
 *       the XML Second edition.
 *
 * [33] LanguageID ::= Langcode ('-' Subcode)*
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
 * [38] Subcode ::= ([a-z] | [A-Z])+
 *
 * Returns 1 if correct 0 otherwise
 **/
int
xmlCheckLanguageID(const xmlChar * lang)
{
    const xmlChar *cur = lang;

    if (cur == NULL)
        return (0);
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
        ((cur[0] == 'I') && (cur[1] == '-'))) {
        /*
         * IANA code
         */
        cur += 2;
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
               ((cur[0] == 'X') && (cur[1] == '-'))) {
        /*
         * User code
         */
        cur += 2;
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
               ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
        /*
         * ISO639
         */
        cur++;
        if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
            ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
        else
            return (0);
    } else
        return (0);
    while (cur[0] != 0) {       /* non input consuming */
        if (cur[0] != '-')
            return (0);
        cur++;
        if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
            ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
        else
            return (0);
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    }
    return (1);
}

747 748 749 750 751 752 753 754 755
/************************************************************************
 *									*
 * 		Parser stacks related functions and macros		*
 *									*
 ************************************************************************/

xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
                                     const xmlChar ** str);

756 757 758 759 760 761 762 763 764
#ifdef SAX2
/**
 * nsPush:
 * @ctxt:  an XML parser context
 * @prefix:  the namespace prefix or NULL
 * @URL:  the namespace name
 *
 * Pushes a new parser namespace on top of the ns stack
 *
765 766
 * Returns -1 in case of error, -2 if the namespace should be discarded
 *	   and the index in the stack otherwise.
767 768 769 770
 */
static int
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
{
771 772 773 774 775 776 777 778 779 780 781 782
    if (ctxt->options & XML_PARSE_NSCLEAN) {
        int i;
	for (i = 0;i < ctxt->nsNr;i += 2) {
	    if (ctxt->nsTab[i] == prefix) {
		/* in scope */
	        if (ctxt->nsTab[i + 1] == URL)
		    return(-2);
		/* out of scope keep it */
		break;
	    }
	}
    }
783 784 785 786 787 788
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
	ctxt->nsMax = 10;
	ctxt->nsNr = 0;
	ctxt->nsTab = (const xmlChar **)
	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
	if (ctxt->nsTab == NULL) {
789
	    xmlErrMemory(ctxt, NULL);
790 791 792 793 794 795 796 797 798
	    ctxt->nsMax = 0;
            return (-1);
	}
    } else if (ctxt->nsNr >= ctxt->nsMax) {
        ctxt->nsMax *= 2;
        ctxt->nsTab = (const xmlChar **)
	              xmlRealloc(ctxt->nsTab,
				 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
        if (ctxt->nsTab == NULL) {
799
            xmlErrMemory(ctxt, NULL);
800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840
	    ctxt->nsMax /= 2;
            return (-1);
        }
    }
    ctxt->nsTab[ctxt->nsNr++] = prefix;
    ctxt->nsTab[ctxt->nsNr++] = URL;
    return (ctxt->nsNr);
}
/**
 * nsPop:
 * @ctxt: an XML parser context
 * @nr:  the number to pop
 *
 * Pops the top @nr parser prefix/namespace from the ns stack
 *
 * Returns the number of namespaces removed
 */
static int
nsPop(xmlParserCtxtPtr ctxt, int nr)
{
    int i;

    if (ctxt->nsTab == NULL) return(0);
    if (ctxt->nsNr < nr) {
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
        nr = ctxt->nsNr;
    }
    if (ctxt->nsNr <= 0)
        return (0);
    
    for (i = 0;i < nr;i++) {
         ctxt->nsNr--;
	 ctxt->nsTab[ctxt->nsNr] = NULL;
    }
    return(nr);
}
#endif

static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
    const xmlChar **atts;
841
    int *attallocs;
842 843 844
    int maxatts;

    if (ctxt->atts == NULL) {
845
	maxatts = 55; /* allow for 10 attrs by default */
846 847
	atts = (const xmlChar **)
	       xmlMalloc(maxatts * sizeof(xmlChar *));
848
	if (atts == NULL) goto mem_error;
849
	ctxt->atts = atts;
850 851 852
	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
	if (attallocs == NULL) goto mem_error;
	ctxt->attallocs = attallocs;
853
	ctxt->maxatts = maxatts;
854 855
    } else if (nr + 5 > ctxt->maxatts) {
	maxatts = (nr + 5) * 2;
856 857
	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
				     maxatts * sizeof(const xmlChar *));
858
	if (atts == NULL) goto mem_error;
859
	ctxt->atts = atts;
860 861 862 863
	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
	                             (maxatts / 5) * sizeof(int));
	if (attallocs == NULL) goto mem_error;
	ctxt->attallocs = attallocs;
864 865 866
	ctxt->maxatts = maxatts;
    }
    return(ctxt->maxatts);
867
mem_error:
868
    xmlErrMemory(ctxt, NULL);
869
    return(-1);
870 871
}

872 873 874
/**
 * inputPush:
 * @ctxt:  an XML parser context
875
 * @value:  the parser input
876 877
 *
 * Pushes a new parser input on top of the input stack
878 879
 *
 * Returns 0 in case of error, the index in the stack otherwise
880
 */
Daniel Veillard's avatar
Daniel Veillard committed
881 882 883 884 885 886 887 888 889 890
extern int
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
{
    if (ctxt->inputNr >= ctxt->inputMax) {
        ctxt->inputMax *= 2;
        ctxt->inputTab =
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
                                             ctxt->inputMax *
                                             sizeof(ctxt->inputTab[0]));
        if (ctxt->inputTab == NULL) {
891
            xmlErrMemory(ctxt, NULL);
Daniel Veillard's avatar
Daniel Veillard committed
892 893 894 895 896 897 898
            return (0);
        }
    }
    ctxt->inputTab[ctxt->inputNr] = value;
    ctxt->input = value;
    return (ctxt->inputNr++);
}
899
/**
Daniel Veillard's avatar
Daniel Veillard committed
900
 * inputPop:
901 902
 * @ctxt: an XML parser context
 *
Daniel Veillard's avatar
Daniel Veillard committed
903
 * Pops the top parser input from the input stack
904
 *
Daniel Veillard's avatar
Daniel Veillard committed
905
 * Returns the input just removed
906
 */
Daniel Veillard's avatar
Daniel Veillard committed
907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922
extern xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)
{
    xmlParserInputPtr ret;

    if (ctxt->inputNr <= 0)
        return (0);
    ctxt->inputNr--;
    if (ctxt->inputNr > 0)
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
    else
        ctxt->input = NULL;
    ret = ctxt->inputTab[ctxt->inputNr];
    ctxt->inputTab[ctxt->inputNr] = 0;
    return (ret);
}
923
/**
Daniel Veillard's avatar
Daniel Veillard committed
924
 * nodePush:
925
 * @ctxt:  an XML parser context
Daniel Veillard's avatar
Daniel Veillard committed
926
 * @value:  the element node
927
 *
Daniel Veillard's avatar
Daniel Veillard committed
928
 * Pushes a new element node on top of the node stack
929 930
 *
 * Returns 0 in case of error, the index in the stack otherwise
931
 */
Daniel Veillard's avatar
Daniel Veillard committed
932 933 934 935 936 937 938 939 940 941
extern int
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
{
    if (ctxt->nodeNr >= ctxt->nodeMax) {
        ctxt->nodeMax *= 2;
        ctxt->nodeTab =
            (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
                                      ctxt->nodeMax *
                                      sizeof(ctxt->nodeTab[0]));
        if (ctxt->nodeTab == NULL) {
942
            xmlErrMemory(ctxt, NULL);
Daniel Veillard's avatar
Daniel Veillard committed
943 944 945
            return (0);
        }
    }
946 947
#ifdef MAX_DEPTH
    if (ctxt->nodeNr > MAX_DEPTH) {
948
	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
949
		 "Excessive depth in document: change MAX_DEPTH = %d\n",
950
			  MAX_DEPTH);
951 952 953 954
	ctxt->instate = XML_PARSER_EOF;
	return(0);
    }
#endif
Daniel Veillard's avatar
Daniel Veillard committed
955 956 957 958
    ctxt->nodeTab[ctxt->nodeNr] = value;
    ctxt->node = value;
    return (ctxt->nodeNr++);
}
959 960 961 962 963 964 965 966
/**
 * nodePop:
 * @ctxt: an XML parser context
 *
 * Pops the top element node from the node stack
 *
 * Returns the node just removed
 */
Daniel Veillard's avatar
Daniel Veillard committed
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
extern xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)
{
    xmlNodePtr ret;

    if (ctxt->nodeNr <= 0)
        return (0);
    ctxt->nodeNr--;
    if (ctxt->nodeNr > 0)
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
    else
        ctxt->node = NULL;
    ret = ctxt->nodeTab[ctxt->nodeNr];
    ctxt->nodeTab[ctxt->nodeNr] = 0;
    return (ret);
}
983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022
/**
 * nameNsPush:
 * @ctxt:  an XML parser context
 * @value:  the element name
 * @prefix:  the element prefix
 * @URI:  the element namespace name
 *
 * Pushes a new element name/prefix/URL on top of the name stack
 *
 * Returns -1 in case of error, the index in the stack otherwise
 */
static int
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
           const xmlChar *prefix, const xmlChar *URI, int nsNr)
{
    if (ctxt->nameNr >= ctxt->nameMax) {
        const xmlChar * *tmp;
        void **tmp2;
        ctxt->nameMax *= 2;
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
                                    ctxt->nameMax *
                                    sizeof(ctxt->nameTab[0]));
        if (tmp == NULL) {
	    ctxt->nameMax /= 2;
	    goto mem_error;
        }
	ctxt->nameTab = tmp;
        tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
                                    ctxt->nameMax * 3 *
                                    sizeof(ctxt->pushTab[0]));
        if (tmp2 == NULL) {
	    ctxt->nameMax /= 2;
	    goto mem_error;
        }
	ctxt->pushTab = tmp2;
    }
    ctxt->nameTab[ctxt->nameNr] = value;
    ctxt->name = value;
    ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
    ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1023
    ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1024 1025
    return (ctxt->nameNr++);
mem_error:
1026
    xmlErrMemory(ctxt, NULL);
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
    return (-1);
}
/**
 * nameNsPop:
 * @ctxt: an XML parser context
 *
 * Pops the top element/prefix/URI name from the name stack
 *
 * Returns the name just removed
 */
static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)
{
    const xmlChar *ret;

    if (ctxt->nameNr <= 0)
        return (0);
    ctxt->nameNr--;
    if (ctxt->nameNr > 0)
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
    else
        ctxt->name = NULL;
    ret = ctxt->nameTab[ctxt->nameNr];
    ctxt->nameTab[ctxt->nameNr] = NULL;
    return (ret);
}

1054
/**
Daniel Veillard's avatar
Daniel Veillard committed
1055
 * namePush:
1056
 * @ctxt:  an XML parser context
Daniel Veillard's avatar
Daniel Veillard committed
1057
 * @value:  the element name
1058
 *
Daniel Veillard's avatar
Daniel Veillard committed
1059
 * Pushes a new element name on top of the name stack
1060
 *
1061
 * Returns -1 in case of error, the index in the stack otherwise
1062
 */
Daniel Veillard's avatar
Daniel Veillard committed
1063
extern int
1064
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard's avatar
Daniel Veillard committed
1065 1066
{
    if (ctxt->nameNr >= ctxt->nameMax) {
1067
        const xmlChar * *tmp;
Daniel Veillard's avatar
Daniel Veillard committed
1068
        ctxt->nameMax *= 2;
1069
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard's avatar
Daniel Veillard committed
1070 1071
                                    ctxt->nameMax *
                                    sizeof(ctxt->nameTab[0]));
1072 1073 1074
        if (tmp == NULL) {
	    ctxt->nameMax /= 2;
	    goto mem_error;
Daniel Veillard's avatar
Daniel Veillard committed
1075
        }
1076
	ctxt->nameTab = tmp;
Daniel Veillard's avatar
Daniel Veillard committed
1077 1078 1079 1080
    }
    ctxt->nameTab[ctxt->nameNr] = value;
    ctxt->name = value;
    return (ctxt->nameNr++);
1081
mem_error:
1082
    xmlErrMemory(ctxt, NULL);
1083
    return (-1);
Daniel Veillard's avatar
Daniel Veillard committed
1084 1085 1086 1087 1088 1089 1090 1091
}
/**
 * namePop:
 * @ctxt: an XML parser context
 *
 * Pops the top element name from the name stack
 *
 * Returns the name just removed
1092
 */
1093
extern const xmlChar *
Daniel Veillard's avatar
Daniel Veillard committed
1094 1095
namePop(xmlParserCtxtPtr ctxt)
{
1096
    const xmlChar *ret;
Daniel Veillard's avatar
Daniel Veillard committed
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108

    if (ctxt->nameNr <= 0)
        return (0);
    ctxt->nameNr--;
    if (ctxt->nameNr > 0)
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
    else
        ctxt->name = NULL;
    ret = ctxt->nameTab[ctxt->nameNr];
    ctxt->nameTab[ctxt->nameNr] = 0;
    return (ret);
}
1109

1110
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1111 1112 1113 1114 1115
    if (ctxt->spaceNr >= ctxt->spaceMax) {
	ctxt->spaceMax *= 2;
        ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
	             ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
        if (ctxt->spaceTab == NULL) {
1116
	    xmlErrMemory(ctxt, NULL);
1117 1118 1119 1120 1121 1122 1123 1124
	    return(0);
	}
    }
    ctxt->spaceTab[ctxt->spaceNr] = val;
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
    return(ctxt->spaceNr