parser.c 396 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
 *            implemented on top of the SAX interfaces
 *
 * References:
 *   The XML specification:
 *     http://www.w3.org/TR/REC-xml
 *   Original 1.0 version:
 *     http://www.w3.org/TR/1998/REC-xml-19980210
 *   XML second edition working draft
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
 *
 * Okay this is a big file, the parser core is around 7000 lines, then it
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16 17 18 19 20 21 22 23 24
 * A number of helper functions and deprecated ones have been moved to
 * parserInternals.c to reduce this file size.
 * As much as possible the functions are associated with their relative
 * production in the XML specification. A few productions defining the
 * different ranges of character are actually implanted either in 
 * parserInternals.h or parserInternals.c
 * The DOM tree build is realized from the default SAX callbacks in
 * the module SAX.c.
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26 27 28 29
 * document.
 *
 * See Copyright for the status of this software.
 *
30
 * daniel@veillard.com
31 32
 */

33
#define IN_LIBXML
Bjorn Reese's avatar
Bjorn Reese committed
34 35
#include "libxml.h"

36
#if defined(WIN32) && !defined (__CYGWIN__)
37 38 39 40 41 42 43
#define XML_DIR_SEP '\\'
#else
#define XML_DIR_SEP '/'
#endif

#include <stdlib.h>
#include <string.h>
44
#include <stdarg.h>
45
#include <libxml/xmlmemory.h>
46 47
#include <libxml/threads.h>
#include <libxml/globals.h>
48 49 50 51 52 53 54 55 56
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/valid.h>
#include <libxml/entities.h>
#include <libxml/xmlerror.h>
#include <libxml/encoding.h>
#include <libxml/xmlIO.h>
#include <libxml/uri.h>
57 58 59
#ifdef LIBXML_CATALOG_ENABLED
#include <libxml/catalog.h>
#endif
60 61 62 63
#ifdef LIBXML_SCHEMAS_ENABLED
#include <libxml/xmlschemastypes.h>
#include <libxml/relaxng.h>
#endif
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif

83 84 85
static void
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);

86 87 88 89
static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
	                  const xmlChar *base, xmlParserCtxtPtr pctx);

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
/************************************************************************
 *									*
 *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
 *									*
 ************************************************************************/

#define XML_PARSER_BIG_ENTITY 1000
#define XML_PARSER_LOT_ENTITY 5000

/*
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
 *    replacement over the size in byte of the input indicates that you have
 *    and eponential behaviour. A value of 10 correspond to at least 3 entity
 *    replacement per byte of input.
 */
#define XML_PARSER_NON_LINEAR 10

/*
 * xmlParserEntityCheck
 *
 * Function to check non-linear entity expansion behaviour
 * This is here to detect and stop exponential linear entity expansion
 * This is not a limitation of the parser but a safety
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
 * parser option.
 */
static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
                     xmlEntityPtr ent)
{
120
    unsigned long consumed = 0;
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176

    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
        return (0);
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
        return (1);
    if (size != 0) {
        /*
         * Do the check based on the replacement size of the entity
         */
        if (size < XML_PARSER_BIG_ENTITY)
	    return(0);

        /*
         * A limit on the amount of text data reasonably used
         */
        if (ctxt->input != NULL) {
            consumed = ctxt->input->consumed +
                (ctxt->input->cur - ctxt->input->base);
        }
        consumed += ctxt->sizeentities;

        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
            return (0);
    } else if (ent != NULL) {
        /*
         * use the number of parsed entities in the replacement
         */
        size = ent->checked;

        /*
         * The amount of data parsed counting entities size only once
         */
        if (ctxt->input != NULL) {
            consumed = ctxt->input->consumed +
                (ctxt->input->cur - ctxt->input->base);
        }
        consumed += ctxt->sizeentities;

        /*
         * Check the density of entities for the amount of data
	 * knowing an entity reference will take at least 3 bytes
         */
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
            return (0);
    } else {
        /*
         * strange we got no data for checking just return
         */
        return (0);
    }

    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
    return (1);
}

177
/**
178
 * xmlParserMaxDepth:
179
 *
180 181 182 183
 * arbitrary depth limit for the XML documents that we allow to
 * process. This is not a limitation of the parser but a safety
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
 * parser option.
184
 */
185
unsigned int xmlParserMaxDepth = 256;
186

187

188 189

#define SAX2 1
190
#define XML_PARSER_BIG_BUFFER_SIZE 300
191
#define XML_PARSER_BUFFER_SIZE 100
192 193
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"

194 195 196 197
/*
 * List of XML prefixed PI allowed by W3C specs
 */

198
static const char *xmlW3CPIs[] = {
199 200 201 202
    "xml-stylesheet",
    NULL
};

203

204 205 206 207
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
                                       const xmlChar **str);

208
static xmlParserErrors
209 210
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
	              xmlSAXHandlerPtr sax,
211
		      void *user_data, int depth, const xmlChar *URL,
212
		      const xmlChar *ID, xmlNodePtr *list);
213

214 215 216
static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
                          const char *encoding);
217
#ifdef LIBXML_LEGACY_ENABLED
218 219 220
static void
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
                      xmlNodePtr lastNode);
221
#endif /* LIBXML_LEGACY_ENABLED */
222

223
static xmlParserErrors
224 225
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
226

227 228 229
static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);

230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
/************************************************************************
 *									*
 * 		Some factorized error routines				*
 *									*
 ************************************************************************/

/**
 * xmlErrAttributeDup:
 * @ctxt:  an XML parser context
 * @prefix:  the attribute prefix
 * @localname:  the attribute localname
 *
 * Handle a redefinition of attribute error
 */
static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
                   const xmlChar * localname)
{
248 249 250
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
251 252
    if (ctxt != NULL)
	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
253
    if (prefix == NULL)
254
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
255 256 257
                        ctxt->errNo, XML_ERR_FATAL, NULL, 0,
                        (const char *) localname, NULL, NULL, 0, 0,
                        "Attribute %s redefined\n", localname);
258
    else
259
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
260 261 262 263
                        ctxt->errNo, XML_ERR_FATAL, NULL, 0,
                        (const char *) prefix, (const char *) localname,
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
                        localname);
264 265 266 267 268
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
269 270 271 272 273 274 275 276 277 278 279
}

/**
 * xmlFatalErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @extra:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
280
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
281 282 283
{
    const char *errmsg;

284 285 286
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
287 288
    switch (error) {
        case XML_ERR_INVALID_HEX_CHARREF:
289 290
            errmsg = "CharRef: invalid hexadecimal value\n";
            break;
291
        case XML_ERR_INVALID_DEC_CHARREF:
292 293
            errmsg = "CharRef: invalid decimal value\n";
            break;
294
        case XML_ERR_INVALID_CHARREF:
295 296
            errmsg = "CharRef: invalid value\n";
            break;
297
        case XML_ERR_INTERNAL_ERROR:
298 299
            errmsg = "internal error";
            break;
300
        case XML_ERR_PEREF_AT_EOF:
301 302
            errmsg = "PEReference at end of document\n";
            break;
303
        case XML_ERR_PEREF_IN_PROLOG:
304 305
            errmsg = "PEReference in prolog\n";
            break;
306
        case XML_ERR_PEREF_IN_EPILOG:
307 308
            errmsg = "PEReference in epilog\n";
            break;
309
        case XML_ERR_PEREF_NO_NAME:
310 311
            errmsg = "PEReference: no name\n";
            break;
312
        case XML_ERR_PEREF_SEMICOL_MISSING:
313 314
            errmsg = "PEReference: expecting ';'\n";
            break;
315
        case XML_ERR_ENTITY_LOOP:
316 317
            errmsg = "Detected an entity reference loop\n";
            break;
318
        case XML_ERR_ENTITY_NOT_STARTED:
319 320
            errmsg = "EntityValue: \" or ' expected\n";
            break;
321
        case XML_ERR_ENTITY_PE_INTERNAL:
322 323
            errmsg = "PEReferences forbidden in internal subset\n";
            break;
324
        case XML_ERR_ENTITY_NOT_FINISHED:
325 326
            errmsg = "EntityValue: \" or ' expected\n";
            break;
327
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
328 329
            errmsg = "AttValue: \" or ' expected\n";
            break;
330
        case XML_ERR_LT_IN_ATTRIBUTE:
331 332
            errmsg = "Unescaped '<' not allowed in attributes values\n";
            break;
333
        case XML_ERR_LITERAL_NOT_STARTED:
334 335
            errmsg = "SystemLiteral \" or ' expected\n";
            break;
336
        case XML_ERR_LITERAL_NOT_FINISHED:
337 338
            errmsg = "Unfinished System or Public ID \" or ' expected\n";
            break;
339
        case XML_ERR_MISPLACED_CDATA_END:
340 341
            errmsg = "Sequence ']]>' not allowed in content\n";
            break;
342
        case XML_ERR_URI_REQUIRED:
343 344
            errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
            break;
345
        case XML_ERR_PUBID_REQUIRED:
346 347
            errmsg = "PUBLIC, the Public Identifier is missing\n";
            break;
348
        case XML_ERR_HYPHEN_IN_COMMENT:
349 350
            errmsg = "Comment must not contain '--' (double-hyphen)\n";
            break;
351
        case XML_ERR_PI_NOT_STARTED:
352 353
            errmsg = "xmlParsePI : no target name\n";
            break;
354
        case XML_ERR_RESERVED_XML_NAME:
355 356
            errmsg = "Invalid PI name\n";
            break;
357
        case XML_ERR_NOTATION_NOT_STARTED:
358 359
            errmsg = "NOTATION: Name expected here\n";
            break;
360
        case XML_ERR_NOTATION_NOT_FINISHED:
361 362
            errmsg = "'>' required to close NOTATION declaration\n";
            break;
363
        case XML_ERR_VALUE_REQUIRED:
364 365
            errmsg = "Entity value required\n";
            break;
366
        case XML_ERR_URI_FRAGMENT:
367 368
            errmsg = "Fragment not allowed";
            break;
369
        case XML_ERR_ATTLIST_NOT_STARTED:
370 371
            errmsg = "'(' required to start ATTLIST enumeration\n";
            break;
372
        case XML_ERR_NMTOKEN_REQUIRED:
373 374
            errmsg = "NmToken expected in ATTLIST enumeration\n";
            break;
375
        case XML_ERR_ATTLIST_NOT_FINISHED:
376 377
            errmsg = "')' required to finish ATTLIST enumeration\n";
            break;
378
        case XML_ERR_MIXED_NOT_STARTED:
379 380
            errmsg = "MixedContentDecl : '|' or ')*' expected\n";
            break;
381
        case XML_ERR_PCDATA_REQUIRED:
382 383
            errmsg = "MixedContentDecl : '#PCDATA' expected\n";
            break;
384
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
385 386
            errmsg = "ContentDecl : Name or '(' expected\n";
            break;
387
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
388 389
            errmsg = "ContentDecl : ',' '|' or ')' expected\n";
            break;
390
        case XML_ERR_PEREF_IN_INT_SUBSET:
391 392 393
            errmsg =
                "PEReference: forbidden within markup decl in internal subset\n";
            break;
394
        case XML_ERR_GT_REQUIRED:
395 396
            errmsg = "expected '>'\n";
            break;
397
        case XML_ERR_CONDSEC_INVALID:
398 399
            errmsg = "XML conditional section '[' expected\n";
            break;
400
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
401 402 403 404 405 406
            errmsg = "Content error in the external subset\n";
            break;
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
            errmsg =
                "conditional section INCLUDE or IGNORE keyword expected\n";
            break;
407
        case XML_ERR_CONDSEC_NOT_FINISHED:
408 409
            errmsg = "XML conditional section not closed\n";
            break;
410
        case XML_ERR_XMLDECL_NOT_STARTED:
411 412
            errmsg = "Text declaration '<?xml' required\n";
            break;
413
        case XML_ERR_XMLDECL_NOT_FINISHED:
414 415
            errmsg = "parsing XML declaration: '?>' expected\n";
            break;
416
        case XML_ERR_EXT_ENTITY_STANDALONE:
417 418
            errmsg = "external parsed entities cannot be standalone\n";
            break;
419
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
420 421
            errmsg = "EntityRef: expecting ';'\n";
            break;
422
        case XML_ERR_DOCTYPE_NOT_FINISHED:
423 424
            errmsg = "DOCTYPE improperly terminated\n";
            break;
425
        case XML_ERR_LTSLASH_REQUIRED:
426 427
            errmsg = "EndTag: '</' not found\n";
            break;
428
        case XML_ERR_EQUAL_REQUIRED:
429 430
            errmsg = "expected '='\n";
            break;
431
        case XML_ERR_STRING_NOT_CLOSED:
432 433
            errmsg = "String not closed expecting \" or '\n";
            break;
434
        case XML_ERR_STRING_NOT_STARTED:
435 436
            errmsg = "String not started expecting ' or \"\n";
            break;
437
        case XML_ERR_ENCODING_NAME:
438 439
            errmsg = "Invalid XML encoding name\n";
            break;
440
        case XML_ERR_STANDALONE_VALUE:
441 442
            errmsg = "standalone accepts only 'yes' or 'no'\n";
            break;
443
        case XML_ERR_DOCUMENT_EMPTY:
444 445
            errmsg = "Document is empty\n";
            break;
446
        case XML_ERR_DOCUMENT_END:
447 448
            errmsg = "Extra content at the end of the document\n";
            break;
449
        case XML_ERR_NOT_WELL_BALANCED:
450 451
            errmsg = "chunk is not well balanced\n";
            break;
452
        case XML_ERR_EXTRA_CONTENT:
453 454
            errmsg = "extra content at the end of well balanced chunk\n";
            break;
455
        case XML_ERR_VERSION_MISSING:
456 457
            errmsg = "Malformed declaration expecting version\n";
            break;
458
#if 0
459 460 461
        case:
            errmsg = "\n";
            break;
462
#endif
463 464
        default:
            errmsg = "Unregistered error message\n";
465
    }
466 467
    if (ctxt != NULL)
	ctxt->errNo = error;
468
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
469 470
                    XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
                    info);
471 472 473 474 475
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
476 477
}

478 479 480 481 482 483 484 485 486
/**
 * xmlFatalErrMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
487 488
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
               const char *msg)
489
{
490 491 492
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
493 494
    if (ctxt != NULL)
	ctxt->errNo = error;
495
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
496
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
497 498 499 500 501
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
502 503
}

504 505 506 507 508 509 510 511 512 513 514 515 516 517
/**
 * xmlWarningMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 * @str2:  extra data
 *
 * Handle a warning.
 */
static void
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
              const char *msg, const xmlChar *str1, const xmlChar *str2)
{
Daniel Veillard's avatar
Daniel Veillard committed
518
    xmlStructuredErrorFunc schannel = NULL;
519

520 521 522
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
523 524
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard's avatar
Daniel Veillard committed
525 526
        schannel = ctxt->sax->serror;
    __xmlRaiseError(schannel,
527 528
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
                    ctxt->userData,
529 530 531 532 533 534 535 536 537 538 539 540 541
                    ctxt, NULL, XML_FROM_PARSER, error,
                    XML_ERR_WARNING, NULL, 0,
		    (const char *) str1, (const char *) str2, NULL, 0, 0,
		    msg, (const char *) str1, (const char *) str2);
}

/**
 * xmlValidityError:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 *
542
 * Handle a validity error.
543 544 545
 */
static void
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
546
              const char *msg, const xmlChar *str1, const xmlChar *str2)
547
{
Daniel Veillard's avatar
Daniel Veillard committed
548
    xmlStructuredErrorFunc schannel = NULL;
549 550 551 552

    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
553 554 555 556 557
    if (ctxt != NULL) {
	ctxt->errNo = error;
	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
	    schannel = ctxt->sax->serror;
    }
558
    __xmlRaiseError(schannel,
559
                    ctxt->vctxt.error, ctxt->vctxt.userData,
560 561
                    ctxt, NULL, XML_FROM_DTD, error,
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
562 563
		    (const char *) str2, NULL, 0, 0,
		    msg, (const char *) str1, (const char *) str2);
564 565 566
    if (ctxt != NULL) {
	ctxt->valid = 0;
    }
567 568
}

569 570 571 572 573 574 575 576 577 578 579
/**
 * xmlFatalErrMsgInt:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  an integer value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
580
                  const char *msg, int val)
581
{
582 583 584
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
585 586
    if (ctxt != NULL)
	ctxt->errNo = error;
587
    __xmlRaiseError(NULL, NULL, NULL,
588 589
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
590 591 592 593 594
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
595 596
}

597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
/**
 * xmlFatalErrMsgStrIntStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  an string info
 * @val:  an integer value
 * @str2:  an string info
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                  const char *msg, const xmlChar *str1, int val, 
		  const xmlChar *str2)
{
613 614 615
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
616 617
    if (ctxt != NULL)
	ctxt->errNo = error;
618
    __xmlRaiseError(NULL, NULL, NULL,
619 620 621
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, (const char *) str1, (const char *) str2,
		    NULL, val, 0, msg, str1, val, str2);
622 623 624 625 626
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
627 628
}

629 630 631 632 633 634 635 636 637 638 639
/**
 * xmlFatalErrMsgStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  a string value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
640
                  const char *msg, const xmlChar * val)
641
{
642 643 644
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
645 646
    if (ctxt != NULL)
	ctxt->errNo = error;
647
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
648 649 650
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
                    val);
651 652 653 654 655
    if (ctxt != NULL) {
	ctxt->wellFormed = 0;
	if (ctxt->recovery == 0)
	    ctxt->disableSAX = 1;
    }
656 657
}

658 659 660 661 662 663 664 665 666 667 668 669 670
/**
 * xmlErrMsgStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  a string value
 *
 * Handle a non fatal parser error
 */
static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                  const char *msg, const xmlChar * val)
{
671 672 673
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
674 675
    if (ctxt != NULL)
	ctxt->errNo = error;
676
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
677 678 679 680 681
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
                    val);
}

682 683 684 685 686 687 688 689 690 691 692 693 694
/**
 * xmlNsErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the message
 * @info1:  extra information string
 * @info2:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
         const char *msg,
695 696
         const xmlChar * info1, const xmlChar * info2,
         const xmlChar * info3)
697
{
698 699 700
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
701 702
    if (ctxt != NULL)
	ctxt->errNo = error;
703
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
704 705 706
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
                    (const char *) info2, (const char *) info3, 0, 0, msg,
                    info1, info2, info3);
707 708
    if (ctxt != NULL)
	ctxt->nsWellFormed = 0;
709 710
}

711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
/**
 * xmlNsWarn
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the message
 * @info1:  extra information string
 * @info2:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
         const char *msg,
         const xmlChar * info1, const xmlChar * info2,
         const xmlChar * info3)
{
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
                    (const char *) info2, (const char *) info3, 0, 0, msg,
                    info1, info2, info3);
}

736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
/************************************************************************
 *									*
 * 		Library wide options					*
 *									*
 ************************************************************************/

/**
  * xmlHasFeature:
  * @feature: the feature to be examined
  *
  * Examines if the library has been compiled with a given feature.
  *
  * Returns a non-zero value if the feature exist, otherwise zero.
  * Returns zero (0) if the feature does not exist or an unknown
  * unknown feature is requested, non-zero otherwise.
  */
int
xmlHasFeature(xmlFeature feature)
{
    switch (feature) {
756
	case XML_WITH_THREAD:
757 758 759 760 761
#ifdef LIBXML_THREAD_ENABLED
	    return(1);
#else
	    return(0);
#endif
762
        case XML_WITH_TREE:
763 764 765 766 767
#ifdef LIBXML_TREE_ENABLED
            return(1);
#else
            return(0);
#endif
768
        case XML_WITH_OUTPUT:
769 770 771 772 773
#ifdef LIBXML_OUTPUT_ENABLED
            return(1);
#else
            return(0);
#endif
774
        case XML_WITH_PUSH:
775 776 777 778 779
#ifdef LIBXML_PUSH_ENABLED
            return(1);
#else
            return(0);
#endif
780
        case XML_WITH_READER:
781 782 783 784 785
#ifdef LIBXML_READER_ENABLED
            return(1);
#else
            return(0);
#endif
786
        case XML_WITH_PATTERN:
787 788 789 790 791
#ifdef LIBXML_PATTERN_ENABLED
            return(1);
#else
            return(0);
#endif
792
        case XML_WITH_WRITER:
793 794 795 796 797
#ifdef LIBXML_WRITER_ENABLED
            return(1);
#else
            return(0);
#endif
798
        case XML_WITH_SAX1:
799 800 801 802 803
#ifdef LIBXML_SAX1_ENABLED
            return(1);
#else
            return(0);
#endif
804
        case XML_WITH_FTP:
805 806 807 808 809
#ifdef LIBXML_FTP_ENABLED
            return(1);
#else
            return(0);
#endif
810
        case XML_WITH_HTTP:
811 812 813 814 815
#ifdef LIBXML_HTTP_ENABLED
            return(1);
#else
            return(0);
#endif
816
        case XML_WITH_VALID:
817 818 819 820 821
#ifdef LIBXML_VALID_ENABLED
            return(1);
#else
            return(0);
#endif
822
        case XML_WITH_HTML:
823 824 825 826 827
#ifdef LIBXML_HTML_ENABLED
            return(1);
#else
            return(0);
#endif
828
        case XML_WITH_LEGACY:
829 830 831 832 833
#ifdef LIBXML_LEGACY_ENABLED
            return(1);
#else
            return(0);
#endif
834
        case XML_WITH_C14N:
835 836 837 838 839
#ifdef LIBXML_C14N_ENABLED
            return(1);
#else
            return(0);
#endif
840
        case XML_WITH_CATALOG:
841 842 843 844 845
#ifdef LIBXML_CATALOG_ENABLED
            return(1);
#else
            return(0);
#endif
846
        case XML_WITH_XPATH:
847 848 849 850 851
#ifdef LIBXML_XPATH_ENABLED
            return(1);
#else
            return(0);
#endif
852
        case XML_WITH_XPTR:
853 854 855 856 857
#ifdef LIBXML_XPTR_ENABLED
            return(1);
#else
            return(0);
#endif
858
        case XML_WITH_XINCLUDE:
859 860 861 862 863
#ifdef LIBXML_XINCLUDE_ENABLED
            return(1);
#else
            return(0);
#endif
864
        case XML_WITH_ICONV:
865 866 867 868 869
#ifdef LIBXML_ICONV_ENABLED
            return(1);
#else
            return(0);
#endif
870
        case XML_WITH_ISO8859X:
871 872 873 874 875
#ifdef LIBXML_ISO8859X_ENABLED
            return(1);
#else
            return(0);
#endif
876
        case XML_WITH_UNICODE:
877 878 879 880 881
#ifdef LIBXML_UNICODE_ENABLED
            return(1);
#else
            return(0);
#endif
882
        case XML_WITH_REGEXP:
883 884 885 886 887
#ifdef LIBXML_REGEXP_ENABLED
            return(1);
#else
            return(0);
#endif
888
        case XML_WITH_AUTOMATA:
889 890 891 892 893
#ifdef LIBXML_AUTOMATA_ENABLED
            return(1);
#else
            return(0);
#endif
894
        case XML_WITH_EXPR:
895 896 897 898 899
#ifdef LIBXML_EXPR_ENABLED
            return(1);
#else
            return(0);
#endif
900
        case XML_WITH_SCHEMAS:
901 902 903 904 905
#ifdef LIBXML_SCHEMAS_ENABLED
            return(1);
#else
            return(0);
#endif
906
        case XML_WITH_SCHEMATRON:
907 908 909 910 911
#ifdef LIBXML_SCHEMATRON_ENABLED
            return(1);
#else
            return(0);
#endif
912
        case XML_WITH_MODULES:
913 914 915 916 917
#ifdef LIBXML_MODULES_ENABLED
            return(1);
#else
            return(0);
#endif
918
        case XML_WITH_DEBUG:
919 920 921 922 923
#ifdef LIBXML_DEBUG_ENABLED
            return(1);
#else
            return(0);
#endif
924
        case XML_WITH_DEBUG_MEM:
925 926 927 928 929
#ifdef DEBUG_MEMORY_LOCATION
            return(1);
#else
            return(0);
#endif
930
        case XML_WITH_DEBUG_RUN:
931 932 933 934
#ifdef LIBXML_DEBUG_RUNTIME
            return(1);
#else
            return(0);
935
#endif
936 937 938 939 940 941
        case XML_WITH_ZLIB:
#ifdef LIBXML_ZLIB_ENABLED
            return(1);
#else
            return(0);
#endif
942 943 944 945 946 947
        default:
	    break;
     }
     return(0);
}

948 949 950 951 952 953 954 955 956 957 958 959 960 961 962
/************************************************************************
 *									*
 * 		SAX2 defaulted attributes handling			*
 *									*
 ************************************************************************/

/**
 * xmlDetectSAX2:
 * @ctxt:  an XML parser context
 *
 * Do the SAX2 detection and specific intialization
 */
static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
    if (ctxt == NULL) return;
963
#ifdef LIBXML_SAX1_ENABLED
964 965 966
    if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
        ((ctxt->sax->startElementNs != NULL) ||
         (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
967 968 969
#else
    ctxt->sax2 = 1;
#endif /* LIBXML_SAX1_ENABLED */
970 971 972 973

    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
974 975 976 977
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 
    		(ctxt->str_xml_ns == NULL)) {
        xmlErrMemory(ctxt, NULL);
    }
978 979 980 981 982 983 984
}

typedef struct _xmlDefAttrs xmlDefAttrs;
typedef xmlDefAttrs *xmlDefAttrsPtr;
struct _xmlDefAttrs {
    int nbAttrs;	/* number of defaulted attributes on that element */
    int maxAttrs;       /* the size of the array */
985
    const xmlChar *values[5]; /* array of localname/prefix/values/external */
986 987
};

988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038
/**
 * xmlAttrNormalizeSpace:
 * @src: the source string
 * @dst: the target string
 *
 * Normalize the space in non CDATA attribute values:
 * If the attribute type is not CDATA, then the XML processor MUST further
 * process the normalized attribute value by discarding any leading and
 * trailing space (#x20) characters, and by replacing sequences of space
 * (#x20) characters by a single space (#x20) character.
 * Note that the size of dst need to be at least src, and if one doesn't need
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
 * passing src as dst is just fine.
 *
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
 *         is needed.
 */
static xmlChar *
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
{
    if ((src == NULL) || (dst == NULL))
        return(NULL);

    while (*src == 0x20) src++;
    while (*src != 0) {
	if (*src == 0x20) {
	    while (*src == 0x20) src++;
	    if (*src != 0)
		*dst++ = 0x20;
	} else {
	    *dst++ = *src++;
	}
    }
    *dst = 0;
    if (dst == src)
       return(NULL);
    return(dst);
}

/**
 * xmlAttrNormalizeSpace2:
 * @src: the source string
 *
 * Normalize the space in non CDATA attribute values, a slightly more complex
 * front end to avoid allocation problems when running on attribute values
 * coming from the input.
 *
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
 *         is needed.
 */
static const xmlChar *
1039
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076