parser.c 339 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
 *            implemented on top of the SAX interfaces
 *
 * References:
 *   The XML specification:
 *     http://www.w3.org/TR/REC-xml
 *   Original 1.0 version:
 *     http://www.w3.org/TR/1998/REC-xml-19980210
 *   XML second edition working draft
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
 *
 * Okay this is a big file, the parser core is around 7000 lines, then it
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16 17 18 19 20 21 22 23 24
 * A number of helper functions and deprecated ones have been moved to
 * parserInternals.c to reduce this file size.
 * As much as possible the functions are associated with their relative
 * production in the XML specification. A few productions defining the
 * different ranges of character are actually implanted either in 
 * parserInternals.h or parserInternals.c
 * The DOM tree build is realized from the default SAX callbacks in
 * the module SAX.c.
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26 27 28 29
 * document.
 *
 * See Copyright for the status of this software.
 *
30
 * daniel@veillard.com
31 32
 */

33
#define IN_LIBXML
Bjorn Reese's avatar
Bjorn Reese committed
34 35
#include "libxml.h"

36
#if defined(WIN32) && !defined (__CYGWIN__)
37 38 39 40 41 42 43
#define XML_DIR_SEP '\\'
#else
#define XML_DIR_SEP '/'
#endif

#include <stdlib.h>
#include <string.h>
44
#include <stdarg.h>
45
#include <libxml/xmlmemory.h>
46 47
#include <libxml/threads.h>
#include <libxml/globals.h>
48 49 50 51 52 53 54 55 56
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/valid.h>
#include <libxml/entities.h>
#include <libxml/xmlerror.h>
#include <libxml/encoding.h>
#include <libxml/xmlIO.h>
#include <libxml/uri.h>
57 58 59
#ifdef LIBXML_CATALOG_ENABLED
#include <libxml/catalog.h>
#endif
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79

#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif

80
/**
81
 * xmlParserMaxDepth:
82 83 84 85 86
 *
 * arbitrary depth limit for the XML documents that we allow to 
 * process. This is not a limitation of the parser but a safety 
 * boundary feature.
 */
87
unsigned int xmlParserMaxDepth = 1024;
88

89 90
#define SAX2 1

91
#define XML_PARSER_BIG_BUFFER_SIZE 300
92 93
#define XML_PARSER_BUFFER_SIZE 100

94 95
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"

96 97 98 99
/*
 * List of XML prefixed PI allowed by W3C specs
 */

100
static const char *xmlW3CPIs[] = {
101 102 103 104
    "xml-stylesheet",
    NULL
};

105

106 107 108 109
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
                                       const xmlChar **str);

110
static xmlParserErrors
111 112
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
	              xmlSAXHandlerPtr sax,
113
		      void *user_data, int depth, const xmlChar *URL,
114
		      const xmlChar *ID, xmlNodePtr *list);
115

116
#ifdef LIBXML_LEGACY_ENABLED
117 118 119
static void
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
                      xmlNodePtr lastNode);
120
#endif /* LIBXML_LEGACY_ENABLED */
121

122
static xmlParserErrors
123 124
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
125

126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
/************************************************************************
 *									*
 * 		Some factorized error routines				*
 *									*
 ************************************************************************/

/**
 * xmlErrAttributeDup:
 * @ctxt:  an XML parser context
 * @prefix:  the attribute prefix
 * @localname:  the attribute localname
 *
 * Handle a redefinition of attribute error
 */
static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
                   const xmlChar * localname)
{
    ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
145
    if (prefix == NULL)
146
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
147 148 149
                        ctxt->errNo, XML_ERR_FATAL, NULL, 0,
                        (const char *) localname, NULL, NULL, 0, 0,
                        "Attribute %s redefined\n", localname);
150
    else
151
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
152 153 154 155
                        ctxt->errNo, XML_ERR_FATAL, NULL, 0,
                        (const char *) prefix, (const char *) localname,
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
                        localname);
156 157 158 159 160 161 162 163 164 165 166 167 168 169
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

/**
 * xmlFatalErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @extra:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
170
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
171 172 173 174 175
{
    const char *errmsg;

    switch (error) {
        case XML_ERR_INVALID_HEX_CHARREF:
176 177
            errmsg = "CharRef: invalid hexadecimal value\n";
            break;
178
        case XML_ERR_INVALID_DEC_CHARREF:
179 180
            errmsg = "CharRef: invalid decimal value\n";
            break;
181
        case XML_ERR_INVALID_CHARREF:
182 183
            errmsg = "CharRef: invalid value\n";
            break;
184
        case XML_ERR_INTERNAL_ERROR:
185 186
            errmsg = "internal error";
            break;
187
        case XML_ERR_PEREF_AT_EOF:
188 189
            errmsg = "PEReference at end of document\n";
            break;
190
        case XML_ERR_PEREF_IN_PROLOG:
191 192
            errmsg = "PEReference in prolog\n";
            break;
193
        case XML_ERR_PEREF_IN_EPILOG:
194 195
            errmsg = "PEReference in epilog\n";
            break;
196
        case XML_ERR_PEREF_NO_NAME:
197 198
            errmsg = "PEReference: no name\n";
            break;
199
        case XML_ERR_PEREF_SEMICOL_MISSING:
200 201
            errmsg = "PEReference: expecting ';'\n";
            break;
202
        case XML_ERR_ENTITY_LOOP:
203 204
            errmsg = "Detected an entity reference loop\n";
            break;
205
        case XML_ERR_ENTITY_NOT_STARTED:
206 207
            errmsg = "EntityValue: \" or ' expected\n";
            break;
208
        case XML_ERR_ENTITY_PE_INTERNAL:
209 210
            errmsg = "PEReferences forbidden in internal subset\n";
            break;
211
        case XML_ERR_ENTITY_NOT_FINISHED:
212 213
            errmsg = "EntityValue: \" or ' expected\n";
            break;
214
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
215 216
            errmsg = "AttValue: \" or ' expected\n";
            break;
217
        case XML_ERR_LT_IN_ATTRIBUTE:
218 219
            errmsg = "Unescaped '<' not allowed in attributes values\n";
            break;
220
        case XML_ERR_LITERAL_NOT_STARTED:
221 222
            errmsg = "SystemLiteral \" or ' expected\n";
            break;
223
        case XML_ERR_LITERAL_NOT_FINISHED:
224 225
            errmsg = "Unfinished System or Public ID \" or ' expected\n";
            break;
226
        case XML_ERR_MISPLACED_CDATA_END:
227 228
            errmsg = "Sequence ']]>' not allowed in content\n";
            break;
229
        case XML_ERR_URI_REQUIRED:
230 231
            errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
            break;
232
        case XML_ERR_PUBID_REQUIRED:
233 234
            errmsg = "PUBLIC, the Public Identifier is missing\n";
            break;
235
        case XML_ERR_HYPHEN_IN_COMMENT:
236 237
            errmsg = "Comment must not contain '--' (double-hyphen)\n";
            break;
238
        case XML_ERR_PI_NOT_STARTED:
239 240
            errmsg = "xmlParsePI : no target name\n";
            break;
241
        case XML_ERR_RESERVED_XML_NAME:
242 243
            errmsg = "Invalid PI name\n";
            break;
244
        case XML_ERR_NOTATION_NOT_STARTED:
245 246
            errmsg = "NOTATION: Name expected here\n";
            break;
247
        case XML_ERR_NOTATION_NOT_FINISHED:
248 249
            errmsg = "'>' required to close NOTATION declaration\n";
            break;
250
        case XML_ERR_VALUE_REQUIRED:
251 252
            errmsg = "Entity value required\n";
            break;
253
        case XML_ERR_URI_FRAGMENT:
254 255
            errmsg = "Fragment not allowed";
            break;
256
        case XML_ERR_ATTLIST_NOT_STARTED:
257 258
            errmsg = "'(' required to start ATTLIST enumeration\n";
            break;
259
        case XML_ERR_NMTOKEN_REQUIRED:
260 261
            errmsg = "NmToken expected in ATTLIST enumeration\n";
            break;
262
        case XML_ERR_ATTLIST_NOT_FINISHED:
263 264
            errmsg = "')' required to finish ATTLIST enumeration\n";
            break;
265
        case XML_ERR_MIXED_NOT_STARTED:
266 267
            errmsg = "MixedContentDecl : '|' or ')*' expected\n";
            break;
268
        case XML_ERR_PCDATA_REQUIRED:
269 270
            errmsg = "MixedContentDecl : '#PCDATA' expected\n";
            break;
271
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
272 273
            errmsg = "ContentDecl : Name or '(' expected\n";
            break;
274
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
275 276
            errmsg = "ContentDecl : ',' '|' or ')' expected\n";
            break;
277
        case XML_ERR_PEREF_IN_INT_SUBSET:
278 279 280
            errmsg =
                "PEReference: forbidden within markup decl in internal subset\n";
            break;
281
        case XML_ERR_GT_REQUIRED:
282 283
            errmsg = "expected '>'\n";
            break;
284
        case XML_ERR_CONDSEC_INVALID:
285 286
            errmsg = "XML conditional section '[' expected\n";
            break;
287
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
288 289 290 291 292 293
            errmsg = "Content error in the external subset\n";
            break;
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
            errmsg =
                "conditional section INCLUDE or IGNORE keyword expected\n";
            break;
294
        case XML_ERR_CONDSEC_NOT_FINISHED:
295 296
            errmsg = "XML conditional section not closed\n";
            break;
297
        case XML_ERR_XMLDECL_NOT_STARTED:
298 299
            errmsg = "Text declaration '<?xml' required\n";
            break;
300
        case XML_ERR_XMLDECL_NOT_FINISHED:
301 302
            errmsg = "parsing XML declaration: '?>' expected\n";
            break;
303
        case XML_ERR_EXT_ENTITY_STANDALONE:
304 305
            errmsg = "external parsed entities cannot be standalone\n";
            break;
306
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
307 308
            errmsg = "EntityRef: expecting ';'\n";
            break;
309
        case XML_ERR_DOCTYPE_NOT_FINISHED:
310 311
            errmsg = "DOCTYPE improperly terminated\n";
            break;
312
        case XML_ERR_LTSLASH_REQUIRED:
313 314
            errmsg = "EndTag: '</' not found\n";
            break;
315
        case XML_ERR_EQUAL_REQUIRED:
316 317
            errmsg = "expected '='\n";
            break;
318
        case XML_ERR_STRING_NOT_CLOSED:
319 320
            errmsg = "String not closed expecting \" or '\n";
            break;
321
        case XML_ERR_STRING_NOT_STARTED:
322 323
            errmsg = "String not started expecting ' or \"\n";
            break;
324
        case XML_ERR_ENCODING_NAME:
325 326
            errmsg = "Invalid XML encoding name\n";
            break;
327
        case XML_ERR_STANDALONE_VALUE:
328 329
            errmsg = "standalone accepts only 'yes' or 'no'\n";
            break;
330
        case XML_ERR_DOCUMENT_EMPTY:
331 332
            errmsg = "Document is empty\n";
            break;
333
        case XML_ERR_DOCUMENT_END:
334 335
            errmsg = "Extra content at the end of the document\n";
            break;
336
        case XML_ERR_NOT_WELL_BALANCED:
337 338
            errmsg = "chunk is not well balanced\n";
            break;
339
        case XML_ERR_EXTRA_CONTENT:
340 341
            errmsg = "extra content at the end of well balanced chunk\n";
            break;
342
        case XML_ERR_VERSION_MISSING:
343 344
            errmsg = "Malformed declaration expecting version\n";
            break;
345
#if 0
346 347 348
        case:
            errmsg = "\n";
            break;
349
#endif
350 351
        default:
            errmsg = "Unregistered error message\n";
352 353
    }
    ctxt->errNo = error;
354
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
355 356
                    XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
                    info);
357 358 359 360 361
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

362 363 364 365 366 367 368 369 370
/**
 * xmlFatalErrMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
371 372
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
               const char *msg)
373 374
{
    ctxt->errNo = error;
375
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
376
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
377 378 379 380 381
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

382 383 384 385 386 387 388 389 390 391 392 393 394 395
/**
 * xmlWarningMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 * @str2:  extra data
 *
 * Handle a warning.
 */
static void
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
              const char *msg, const xmlChar *str1, const xmlChar *str2)
{
Daniel Veillard's avatar
Daniel Veillard committed
396
    xmlStructuredErrorFunc schannel = NULL;
397
    
398
    ctxt->errNo = error;
399
    if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard's avatar
Daniel Veillard committed
400 401
        schannel = ctxt->sax->serror;
    __xmlRaiseError(schannel,
402 403
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
                    ctxt->userData,
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
                    ctxt, NULL, XML_FROM_PARSER, error,
                    XML_ERR_WARNING, NULL, 0,
		    (const char *) str1, (const char *) str2, NULL, 0, 0,
		    msg, (const char *) str1, (const char *) str2);
}

/**
 * xmlValidityError:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 *
 * Handle a warning.
 */
static void
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
              const char *msg, const xmlChar *str1)
{
Daniel Veillard's avatar
Daniel Veillard committed
423
    xmlStructuredErrorFunc schannel = NULL;
424
    ctxt->errNo = error;
425
    if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard's avatar
Daniel Veillard committed
426
        schannel = ctxt->sax->serror;
427
    __xmlRaiseError(schannel,
428
                    ctxt->vctxt.error, ctxt->vctxt.userData,
429 430 431 432 433 434 435
                    ctxt, NULL, XML_FROM_DTD, error,
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
		    NULL, NULL, 0, 0,
		    msg, (const char *) str1);
    ctxt->valid = 0;
}

436 437 438 439 440 441 442 443 444 445 446
/**
 * xmlFatalErrMsgInt:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  an integer value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
447
                  const char *msg, int val)
448 449
{
    ctxt->errNo = error;
450
    __xmlRaiseError(NULL, NULL, NULL,
451 452
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
453 454 455 456 457
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
/**
 * xmlFatalErrMsgStrIntStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  an string info
 * @val:  an integer value
 * @str2:  an string info
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                  const char *msg, const xmlChar *str1, int val, 
		  const xmlChar *str2)
{
    ctxt->errNo = error;
475
    __xmlRaiseError(NULL, NULL, NULL,
476 477 478 479 480 481 482 483
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, (const char *) str1, (const char *) str2,
		    NULL, val, 0, msg, str1, val, str2);
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

484 485 486 487 488 489 490 491 492 493 494
/**
 * xmlFatalErrMsgStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  a string value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
495
                  const char *msg, const xmlChar * val)
496 497
{
    ctxt->errNo = error;
498
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
499 500 501
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
                    val);
502 503 504 505 506
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

507 508 509 510 511 512 513 514 515 516 517 518 519 520
/**
 * xmlErrMsgStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  a string value
 *
 * Handle a non fatal parser error
 */
static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                  const char *msg, const xmlChar * val)
{
    ctxt->errNo = error;
521
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
522 523 524 525 526
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
                    val);
}

527 528 529 530 531 532 533 534 535 536 537 538 539
/**
 * xmlNsErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the message
 * @info1:  extra information string
 * @info2:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
         const char *msg,
540 541
         const xmlChar * info1, const xmlChar * info2,
         const xmlChar * info3)
542 543
{
    ctxt->errNo = error;
544
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
545 546 547
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
                    (const char *) info2, (const char *) info3, 0, 0, msg,
                    info1, info2, info3);
548 549 550
    ctxt->nsWellFormed = 0;
}

551 552 553 554 555 556 557 558 559 560 561 562 563 564 565
/************************************************************************
 *									*
 * 		SAX2 defaulted attributes handling			*
 *									*
 ************************************************************************/

/**
 * xmlDetectSAX2:
 * @ctxt:  an XML parser context
 *
 * Do the SAX2 detection and specific intialization
 */
static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
    if (ctxt == NULL) return;
566
#ifdef LIBXML_SAX1_ENABLED
567 568 569
    if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
        ((ctxt->sax->startElementNs != NULL) ||
         (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
570 571 572
#else
    ctxt->sax2 = 1;
#endif /* LIBXML_SAX1_ENABLED */
573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670

    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
}

typedef struct _xmlDefAttrs xmlDefAttrs;
typedef xmlDefAttrs *xmlDefAttrsPtr;
struct _xmlDefAttrs {
    int nbAttrs;	/* number of defaulted attributes on that element */
    int maxAttrs;       /* the size of the array */
    const xmlChar *values[4]; /* array of localname/prefix/values */
};

/**
 * xmlAddDefAttrs:
 * @ctxt:  an XML parser context
 * @fullname:  the element fullname
 * @fullattr:  the attribute fullname
 * @value:  the attribute value
 *
 * Add a defaulted attribute for an element
 */
static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
               const xmlChar *fullname,
               const xmlChar *fullattr,
               const xmlChar *value) {
    xmlDefAttrsPtr defaults;
    int len;
    const xmlChar *name;
    const xmlChar *prefix;

    if (ctxt->attsDefault == NULL) {
        ctxt->attsDefault = xmlHashCreate(10);
	if (ctxt->attsDefault == NULL)
	    goto mem_error;
    }

    /*
     * plit the element name into prefix:localname , the string found
     * are within the DTD and hen not associated to namespace names.
     */
    name = xmlSplitQName3(fullname, &len);
    if (name == NULL) {
        name = xmlDictLookup(ctxt->dict, fullname, -1);
	prefix = NULL;
    } else {
        name = xmlDictLookup(ctxt->dict, name, -1);
	prefix = xmlDictLookup(ctxt->dict, fullname, len);
    }

    /*
     * make sure there is some storage
     */
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
    if (defaults == NULL) {
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
	                               12 * sizeof(const xmlChar *));
	if (defaults == NULL)
	    goto mem_error;
	defaults->maxAttrs = 4;
	defaults->nbAttrs = 0;
	xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
        defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
		       (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
	if (defaults == NULL)
	    goto mem_error;
	defaults->maxAttrs *= 2;
	xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
    }

    /*
     * plit the element name into prefix:localname , the string found
     * are within the DTD and hen not associated to namespace names.
     */
    name = xmlSplitQName3(fullattr, &len);
    if (name == NULL) {
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
	prefix = NULL;
    } else {
        name = xmlDictLookup(ctxt->dict, name, -1);
	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
    }

    defaults->values[4 * defaults->nbAttrs] = name;
    defaults->values[4 * defaults->nbAttrs + 1] = prefix;
    /* intern the string and precompute the end */
    len = xmlStrlen(value);
    value = xmlDictLookup(ctxt->dict, value, len);
    defaults->values[4 * defaults->nbAttrs + 2] = value;
    defaults->values[4 * defaults->nbAttrs + 3] = value + len;
    defaults->nbAttrs++;

    return;

mem_error:
671
    xmlErrMemory(ctxt, NULL);
672 673 674
    return;
}

675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
/**
 * xmlAddSpecialAttr:
 * @ctxt:  an XML parser context
 * @fullname:  the element fullname
 * @fullattr:  the attribute fullname
 * @type:  the attribute type
 *
 * Register that this attribute is not CDATA
 */
static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
		  const xmlChar *fullname,
		  const xmlChar *fullattr,
		  int type)
{
    if (ctxt->attsSpecial == NULL) {
        ctxt->attsSpecial = xmlHashCreate(10);
	if (ctxt->attsSpecial == NULL)
	    goto mem_error;
    }

696 697
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
                     (void *) (long) type);
698 699 700
    return;

mem_error:
701
    xmlErrMemory(ctxt, NULL);
702 703 704
    return;
}

705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776
/**
 * xmlCheckLanguageID:
 * @lang:  pointer to the string value
 *
 * Checks that the value conforms to the LanguageID production:
 *
 * NOTE: this is somewhat deprecated, those productions were removed from
 *       the XML Second edition.
 *
 * [33] LanguageID ::= Langcode ('-' Subcode)*
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
 * [38] Subcode ::= ([a-z] | [A-Z])+
 *
 * Returns 1 if correct 0 otherwise
 **/
int
xmlCheckLanguageID(const xmlChar * lang)
{
    const xmlChar *cur = lang;

    if (cur == NULL)
        return (0);
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
        ((cur[0] == 'I') && (cur[1] == '-'))) {
        /*
         * IANA code
         */
        cur += 2;
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
               ((cur[0] == 'X') && (cur[1] == '-'))) {
        /*
         * User code
         */
        cur += 2;
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
               ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
        /*
         * ISO639
         */
        cur++;
        if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
            ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
        else
            return (0);
    } else
        return (0);
    while (cur[0] != 0) {       /* non input consuming */
        if (cur[0] != '-')
            return (0);
        cur++;
        if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
            ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
        else
            return (0);
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    }
    return (1);
}

777 778 779 780 781 782 783 784 785
/************************************************************************
 *									*
 * 		Parser stacks related functions and macros		*
 *									*
 ************************************************************************/

xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
                                     const xmlChar ** str);

786 787 788 789 790 791 792 793 794
#ifdef SAX2
/**
 * nsPush:
 * @ctxt:  an XML parser context
 * @prefix:  the namespace prefix or NULL
 * @URL:  the namespace name
 *
 * Pushes a new parser namespace on top of the ns stack
 *
795 796
 * Returns -1 in case of error, -2 if the namespace should be discarded
 *	   and the index in the stack otherwise.
797 798 799 800
 */
static int
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
{
801 802 803 804 805 806 807 808 809 810 811 812
    if (ctxt->options & XML_PARSE_NSCLEAN) {
        int i;
	for (i = 0;i < ctxt->nsNr;i += 2) {
	    if (ctxt->nsTab[i] == prefix) {
		/* in scope */
	        if (ctxt->nsTab[i + 1] == URL)
		    return(-2);
		/* out of scope keep it */
		break;
	    }
	}
    }
813 814 815 816 817 818
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
	ctxt->nsMax = 10;
	ctxt->nsNr = 0;
	ctxt->nsTab = (const xmlChar **)
	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
	if (ctxt->nsTab == NULL) {
819
	    xmlErrMemory(ctxt, NULL);
820 821 822 823 824 825 826 827 828
	    ctxt->nsMax = 0;
            return (-1);
	}
    } else if (ctxt->nsNr >= ctxt->nsMax) {
        ctxt->nsMax *= 2;
        ctxt->nsTab = (const xmlChar **)
	              xmlRealloc(ctxt->nsTab,
				 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
        if (ctxt->nsTab == NULL) {
829
            xmlErrMemory(ctxt, NULL);
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
	    ctxt->nsMax /= 2;
            return (-1);
        }
    }
    ctxt->nsTab[ctxt->nsNr++] = prefix;
    ctxt->nsTab[ctxt->nsNr++] = URL;
    return (ctxt->nsNr);
}
/**
 * nsPop:
 * @ctxt: an XML parser context
 * @nr:  the number to pop
 *
 * Pops the top @nr parser prefix/namespace from the ns stack
 *
 * Returns the number of namespaces removed
 */
static int
nsPop(xmlParserCtxtPtr ctxt, int nr)
{
    int i;

    if (ctxt->nsTab == NULL) return(0);
    if (ctxt->nsNr < nr) {
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
        nr = ctxt->nsNr;
    }
    if (ctxt->nsNr <= 0)
        return (0);
    
    for (i = 0;i < nr;i++) {
         ctxt->nsNr--;
	 ctxt->nsTab[ctxt->nsNr] = NULL;
    }
    return(nr);
}
#endif

static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
    const xmlChar **atts;
871
    int *attallocs;
872 873 874
    int maxatts;

    if (ctxt->atts == NULL) {
875
	maxatts = 55; /* allow for 10 attrs by default */
876 877
	atts = (const xmlChar **)
	       xmlMalloc(maxatts * sizeof(xmlChar *));
878
	if (atts == NULL) goto mem_error;
879
	ctxt->atts = atts;
880 881 882
	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
	if (attallocs == NULL) goto mem_error;
	ctxt->attallocs = attallocs;
883
	ctxt->maxatts = maxatts;
884 885
    } else if (nr + 5 > ctxt->maxatts) {
	maxatts = (nr + 5) * 2;
886 887
	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
				     maxatts * sizeof(const xmlChar *));
888
	if (atts == NULL) goto mem_error;
889
	ctxt->atts = atts;
890 891 892 893
	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
	                             (maxatts / 5) * sizeof(int));
	if (attallocs == NULL) goto mem_error;
	ctxt->attallocs = attallocs;
894 895 896
	ctxt->maxatts = maxatts;
    }
    return(ctxt->maxatts);
897
mem_error:
898
    xmlErrMemory(ctxt, NULL);
899
    return(-1);
900 901
}

902 903 904
/**
 * inputPush:
 * @ctxt:  an XML parser context
905
 * @value:  the parser input
906 907
 *
 * Pushes a new parser input on top of the input stack
908 909
 *
 * Returns 0 in case of error, the index in the stack otherwise
910
 */
Daniel Veillard's avatar
Daniel Veillard committed
911 912 913 914 915 916 917 918 919 920
extern int
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
{
    if (ctxt->inputNr >= ctxt->inputMax) {
        ctxt->inputMax *= 2;
        ctxt->inputTab =
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
                                             ctxt->inputMax *
                                             sizeof(ctxt->inputTab[0]));
        if (ctxt->inputTab == NULL) {
921
            xmlErrMemory(ctxt, NULL);
Daniel Veillard's avatar
Daniel Veillard committed
922 923 924 925 926 927 928
            return (0);
        }
    }
    ctxt->inputTab[ctxt->inputNr] = value;
    ctxt->input = value;
    return (ctxt->inputNr++);
}
929
/**
Daniel Veillard's avatar
Daniel Veillard committed
930
 * inputPop:
931 932
 * @ctxt: an XML parser context
 *
Daniel Veillard's avatar
Daniel Veillard committed
933
 * Pops the top parser input from the input stack
934
 *
Daniel Veillard's avatar
Daniel Veillard committed
935
 * Returns the input just removed
936
 */
Daniel Veillard's avatar
Daniel Veillard committed
937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952
extern xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)
{
    xmlParserInputPtr ret;

    if (ctxt->inputNr <= 0)
        return (0);
    ctxt->inputNr--;
    if (ctxt->inputNr > 0)
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
    else
        ctxt->input = NULL;
    ret = ctxt->inputTab[ctxt->inputNr];
    ctxt->inputTab[ctxt->inputNr] = 0;
    return (ret);
}
953
/**
Daniel Veillard's avatar
Daniel Veillard committed
954
 * nodePush:
955
 * @ctxt:  an XML parser context
Daniel Veillard's avatar
Daniel Veillard committed
956
 * @value:  the element node
957
 *
Daniel Veillard's avatar
Daniel Veillard committed
958
 * Pushes a new element node on top of the node stack
959 960
 *
 * Returns 0 in case of error, the index in the stack otherwise
961
 */
Daniel Veillard's avatar
Daniel Veillard committed
962 963 964 965 966 967 968 969 970 971
extern int
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
{
    if (ctxt->nodeNr >= ctxt->nodeMax) {
        ctxt->nodeMax *= 2;
        ctxt->nodeTab =
            (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
                                      ctxt->nodeMax *
                                      sizeof(ctxt->nodeTab[0]));
        if (ctxt->nodeTab == NULL) {
972
            xmlErrMemory(ctxt, NULL);
Daniel Veillard's avatar
Daniel Veillard committed
973 974 975
            return (0);
        }
    }
976
    if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
977
	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
978 979
		 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
			  xmlParserMaxDepth);
980 981 982
	ctxt->instate = XML_PARSER_EOF;
	return(0);
    }
Daniel Veillard's avatar
Daniel Veillard committed
983 984 985 986
    ctxt->nodeTab[ctxt->nodeNr] = value;
    ctxt->node = value;
    return (ctxt->nodeNr++);
}
987 988 989 990 991 992 993 994
/**
 * nodePop:
 * @ctxt: an XML parser context
 *
 * Pops the top element node from the node stack
 *
 * Returns the node just removed
 */
Daniel Veillard's avatar
Daniel Veillard committed
995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
extern xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)
{
    xmlNodePtr ret;

    if (ctxt->nodeNr <= 0)
        return (0);
    ctxt->nodeNr--;
    if (ctxt->nodeNr > 0)
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
    else
        ctxt->node = NULL;
    ret = ctxt->nodeTab[ctxt->nodeNr];
    ctxt->nodeTab[ctxt->nodeNr] = 0;
    return (ret);
}
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
/**
 * nameNsPush:
 * @ctxt:  an XML parser context
 * @value:  the element name
 * @prefix:  the element prefix
 * @URI:  the element namespace name
 *
 * Pushes a new element name/prefix/URL on top of the name stack
 *
 * Returns -1 in case of error, the index in the stack otherwise
 */
static int
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
           const xmlChar *prefix, const xmlChar *URI, int nsNr)
{
    if (ctxt->nameNr >= ctxt->nameMax) {
        const xmlChar * *tmp;
        void **tmp2;
        ctxt->nameMax *= 2;
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
                                    ctxt->nameMax *
                                    sizeof(ctxt->nameTab[0]));
        if (tmp == NULL) {
	    ctxt->nameMax /= 2;
	    goto mem_error;
        }
	ctxt->nameTab = tmp;
        tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
                                    ctxt->nameMax * 3 *
                                    sizeof(ctxt->pushTab[0]));
        if (tmp2 == NULL) {
	    ctxt->nameMax /= 2;
	    goto mem_error;
        }
	ctxt->pushTab = tmp2;
    }
    ctxt->nameTab[ctxt->nameNr] = value;
    ctxt->name = value;
    ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
    ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1051
    ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1052 1053
    return (ctxt->nameNr++);
mem_error:
1054
    xmlErrMemory(ctxt, NULL);
1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081
    return (-1);
}
/**
 * nameNsPop:
 * @ctxt: an XML parser context
 *
 * Pops the top element/prefix/URI name from the name stack
 *
 * Returns the name just removed
 */
static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)
{
    const xmlChar *ret;

    if (ctxt->nameNr <= 0)
        return (0);
    ctxt->nameNr--;
    if (ctxt->nameNr > 0)
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
    else
        ctxt->name = NULL;
    ret = ctxt->nameTab[ctxt->nameNr];
    ctxt->nameTab[ctxt->nameNr] = NULL;
    return (ret);
}

1082
/**
Daniel Veillard's avatar
Daniel Veillard committed
1083
 * namePush:
1084
 * @ctxt:  an XML parser context
Daniel Veillard's avatar
Daniel Veillard committed
1085
 * @value:  the element name
1086
 *
Daniel Veillard's avatar
Daniel Veillard committed
1087
 * Pushes a new element name on top of the name stack
1088
 *
1089
 * Returns -1 in case of error, the index in the stack otherwise
1090
 */
Daniel Veillard's avatar
Daniel Veillard committed
1091
extern int
1092
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard's avatar
Daniel Veillard committed
1093 1094
{
    if (ctxt->nameNr >= ctxt->nameMax) {
1095
        const xmlChar * *tmp;
Daniel Veillard's avatar
Daniel Veillard committed
1096
        ctxt->nameMax *= 2;
1097
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard's avatar
Daniel Veillard committed
1098 1099
                                    ctxt->nameMax *
                                    sizeof(ctxt->nameTab[0]));