parser.c 335 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
 *            implemented on top of the SAX interfaces
 *
 * References:
 *   The XML specification:
 *     http://www.w3.org/TR/REC-xml
 *   Original 1.0 version:
 *     http://www.w3.org/TR/1998/REC-xml-19980210
 *   XML second edition working draft
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
 *
 * Okay this is a big file, the parser core is around 7000 lines, then it
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16 17 18 19 20 21 22 23 24
 * A number of helper functions and deprecated ones have been moved to
 * parserInternals.c to reduce this file size.
 * As much as possible the functions are associated with their relative
 * production in the XML specification. A few productions defining the
 * different ranges of character are actually implanted either in 
 * parserInternals.h or parserInternals.c
 * The DOM tree build is realized from the default SAX callbacks in
 * the module SAX.c.
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26 27 28 29
 * document.
 *
 * See Copyright for the status of this software.
 *
30
 * daniel@veillard.com
31 32
 */

33
#define IN_LIBXML
Bjorn Reese's avatar
Bjorn Reese committed
34 35
#include "libxml.h"

36
#if defined(WIN32) && !defined (__CYGWIN__)
37 38 39 40 41 42 43
#define XML_DIR_SEP '\\'
#else
#define XML_DIR_SEP '/'
#endif

#include <stdlib.h>
#include <string.h>
44
#include <stdarg.h>
45
#include <libxml/xmlmemory.h>
46 47
#include <libxml/threads.h>
#include <libxml/globals.h>
48 49 50 51 52 53 54 55 56
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/valid.h>
#include <libxml/entities.h>
#include <libxml/xmlerror.h>
#include <libxml/encoding.h>
#include <libxml/xmlIO.h>
#include <libxml/uri.h>
57 58 59
#ifdef LIBXML_CATALOG_ENABLED
#include <libxml/catalog.h>
#endif
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79

#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif

80
/**
81
 * xmlParserMaxDepth:
82 83 84 85 86
 *
 * arbitrary depth limit for the XML documents that we allow to 
 * process. This is not a limitation of the parser but a safety 
 * boundary feature.
 */
87
unsigned int xmlParserMaxDepth = 1024;
88

89 90
#define SAX2 1

91
#define XML_PARSER_BIG_BUFFER_SIZE 300
92 93
#define XML_PARSER_BUFFER_SIZE 100

94 95
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"

96 97 98 99
/*
 * List of XML prefixed PI allowed by W3C specs
 */

100
static const char *xmlW3CPIs[] = {
101 102 103 104
    "xml-stylesheet",
    NULL
};

105

106 107 108 109
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
                                       const xmlChar **str);

110
static xmlParserErrors
111 112
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
	              xmlSAXHandlerPtr sax,
113
		      void *user_data, int depth, const xmlChar *URL,
114
		      const xmlChar *ID, xmlNodePtr *list);
115

116
#ifdef LIBXML_LEGACY_ENABLED
117 118 119
static void
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
                      xmlNodePtr lastNode);
120
#endif /* LIBXML_LEGACY_ENABLED */
121

122
static xmlParserErrors
123 124
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
125

126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
/************************************************************************
 *									*
 * 		Some factorized error routines				*
 *									*
 ************************************************************************/

/**
 * xmlErrAttributeDup:
 * @ctxt:  an XML parser context
 * @prefix:  the attribute prefix
 * @localname:  the attribute localname
 *
 * Handle a redefinition of attribute error
 */
static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
                   const xmlChar * localname)
{
144 145 146
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
147
    ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
148
    if (prefix == NULL)
149
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
150 151 152
                        ctxt->errNo, XML_ERR_FATAL, NULL, 0,
                        (const char *) localname, NULL, NULL, 0, 0,
                        "Attribute %s redefined\n", localname);
153
    else
154
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
155 156 157 158
                        ctxt->errNo, XML_ERR_FATAL, NULL, 0,
                        (const char *) prefix, (const char *) localname,
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
                        localname);
159 160 161 162 163 164 165 166 167 168 169 170 171 172
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

/**
 * xmlFatalErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @extra:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
173
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
174 175 176
{
    const char *errmsg;

177 178 179
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
180 181
    switch (error) {
        case XML_ERR_INVALID_HEX_CHARREF:
182 183
            errmsg = "CharRef: invalid hexadecimal value\n";
            break;
184
        case XML_ERR_INVALID_DEC_CHARREF:
185 186
            errmsg = "CharRef: invalid decimal value\n";
            break;
187
        case XML_ERR_INVALID_CHARREF:
188 189
            errmsg = "CharRef: invalid value\n";
            break;
190
        case XML_ERR_INTERNAL_ERROR:
191 192
            errmsg = "internal error";
            break;
193
        case XML_ERR_PEREF_AT_EOF:
194 195
            errmsg = "PEReference at end of document\n";
            break;
196
        case XML_ERR_PEREF_IN_PROLOG:
197 198
            errmsg = "PEReference in prolog\n";
            break;
199
        case XML_ERR_PEREF_IN_EPILOG:
200 201
            errmsg = "PEReference in epilog\n";
            break;
202
        case XML_ERR_PEREF_NO_NAME:
203 204
            errmsg = "PEReference: no name\n";
            break;
205
        case XML_ERR_PEREF_SEMICOL_MISSING:
206 207
            errmsg = "PEReference: expecting ';'\n";
            break;
208
        case XML_ERR_ENTITY_LOOP:
209 210
            errmsg = "Detected an entity reference loop\n";
            break;
211
        case XML_ERR_ENTITY_NOT_STARTED:
212 213
            errmsg = "EntityValue: \" or ' expected\n";
            break;
214
        case XML_ERR_ENTITY_PE_INTERNAL:
215 216
            errmsg = "PEReferences forbidden in internal subset\n";
            break;
217
        case XML_ERR_ENTITY_NOT_FINISHED:
218 219
            errmsg = "EntityValue: \" or ' expected\n";
            break;
220
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
221 222
            errmsg = "AttValue: \" or ' expected\n";
            break;
223
        case XML_ERR_LT_IN_ATTRIBUTE:
224 225
            errmsg = "Unescaped '<' not allowed in attributes values\n";
            break;
226
        case XML_ERR_LITERAL_NOT_STARTED:
227 228
            errmsg = "SystemLiteral \" or ' expected\n";
            break;
229
        case XML_ERR_LITERAL_NOT_FINISHED:
230 231
            errmsg = "Unfinished System or Public ID \" or ' expected\n";
            break;
232
        case XML_ERR_MISPLACED_CDATA_END:
233 234
            errmsg = "Sequence ']]>' not allowed in content\n";
            break;
235
        case XML_ERR_URI_REQUIRED:
236 237
            errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
            break;
238
        case XML_ERR_PUBID_REQUIRED:
239 240
            errmsg = "PUBLIC, the Public Identifier is missing\n";
            break;
241
        case XML_ERR_HYPHEN_IN_COMMENT:
242 243
            errmsg = "Comment must not contain '--' (double-hyphen)\n";
            break;
244
        case XML_ERR_PI_NOT_STARTED:
245 246
            errmsg = "xmlParsePI : no target name\n";
            break;
247
        case XML_ERR_RESERVED_XML_NAME:
248 249
            errmsg = "Invalid PI name\n";
            break;
250
        case XML_ERR_NOTATION_NOT_STARTED:
251 252
            errmsg = "NOTATION: Name expected here\n";
            break;
253
        case XML_ERR_NOTATION_NOT_FINISHED:
254 255
            errmsg = "'>' required to close NOTATION declaration\n";
            break;
256
        case XML_ERR_VALUE_REQUIRED:
257 258
            errmsg = "Entity value required\n";
            break;
259
        case XML_ERR_URI_FRAGMENT:
260 261
            errmsg = "Fragment not allowed";
            break;
262
        case XML_ERR_ATTLIST_NOT_STARTED:
263 264
            errmsg = "'(' required to start ATTLIST enumeration\n";
            break;
265
        case XML_ERR_NMTOKEN_REQUIRED:
266 267
            errmsg = "NmToken expected in ATTLIST enumeration\n";
            break;
268
        case XML_ERR_ATTLIST_NOT_FINISHED:
269 270
            errmsg = "')' required to finish ATTLIST enumeration\n";
            break;
271
        case XML_ERR_MIXED_NOT_STARTED:
272 273
            errmsg = "MixedContentDecl : '|' or ')*' expected\n";
            break;
274
        case XML_ERR_PCDATA_REQUIRED:
275 276
            errmsg = "MixedContentDecl : '#PCDATA' expected\n";
            break;
277
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
278 279
            errmsg = "ContentDecl : Name or '(' expected\n";
            break;
280
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
281 282
            errmsg = "ContentDecl : ',' '|' or ')' expected\n";
            break;
283
        case XML_ERR_PEREF_IN_INT_SUBSET:
284 285 286
            errmsg =
                "PEReference: forbidden within markup decl in internal subset\n";
            break;
287
        case XML_ERR_GT_REQUIRED:
288 289
            errmsg = "expected '>'\n";
            break;
290
        case XML_ERR_CONDSEC_INVALID:
291 292
            errmsg = "XML conditional section '[' expected\n";
            break;
293
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
294 295 296 297 298 299
            errmsg = "Content error in the external subset\n";
            break;
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
            errmsg =
                "conditional section INCLUDE or IGNORE keyword expected\n";
            break;
300
        case XML_ERR_CONDSEC_NOT_FINISHED:
301 302
            errmsg = "XML conditional section not closed\n";
            break;
303
        case XML_ERR_XMLDECL_NOT_STARTED:
304 305
            errmsg = "Text declaration '<?xml' required\n";
            break;
306
        case XML_ERR_XMLDECL_NOT_FINISHED:
307 308
            errmsg = "parsing XML declaration: '?>' expected\n";
            break;
309
        case XML_ERR_EXT_ENTITY_STANDALONE:
310 311
            errmsg = "external parsed entities cannot be standalone\n";
            break;
312
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
313 314
            errmsg = "EntityRef: expecting ';'\n";
            break;
315
        case XML_ERR_DOCTYPE_NOT_FINISHED:
316 317
            errmsg = "DOCTYPE improperly terminated\n";
            break;
318
        case XML_ERR_LTSLASH_REQUIRED:
319 320
            errmsg = "EndTag: '</' not found\n";
            break;
321
        case XML_ERR_EQUAL_REQUIRED:
322 323
            errmsg = "expected '='\n";
            break;
324
        case XML_ERR_STRING_NOT_CLOSED:
325 326
            errmsg = "String not closed expecting \" or '\n";
            break;
327
        case XML_ERR_STRING_NOT_STARTED:
328 329
            errmsg = "String not started expecting ' or \"\n";
            break;
330
        case XML_ERR_ENCODING_NAME:
331 332
            errmsg = "Invalid XML encoding name\n";
            break;
333
        case XML_ERR_STANDALONE_VALUE:
334 335
            errmsg = "standalone accepts only 'yes' or 'no'\n";
            break;
336
        case XML_ERR_DOCUMENT_EMPTY:
337 338
            errmsg = "Document is empty\n";
            break;
339
        case XML_ERR_DOCUMENT_END:
340 341
            errmsg = "Extra content at the end of the document\n";
            break;
342
        case XML_ERR_NOT_WELL_BALANCED:
343 344
            errmsg = "chunk is not well balanced\n";
            break;
345
        case XML_ERR_EXTRA_CONTENT:
346 347
            errmsg = "extra content at the end of well balanced chunk\n";
            break;
348
        case XML_ERR_VERSION_MISSING:
349 350
            errmsg = "Malformed declaration expecting version\n";
            break;
351
#if 0
352 353 354
        case:
            errmsg = "\n";
            break;
355
#endif
356 357
        default:
            errmsg = "Unregistered error message\n";
358 359
    }
    ctxt->errNo = error;
360
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
361 362
                    XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
                    info);
363 364 365 366 367
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

368 369 370 371 372 373 374 375 376
/**
 * xmlFatalErrMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
377 378
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
               const char *msg)
379
{
380 381 382
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
383
    ctxt->errNo = error;
384
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
385
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
386 387 388 389 390
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

391 392 393 394 395 396 397 398 399 400 401 402 403 404
/**
 * xmlWarningMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 * @str2:  extra data
 *
 * Handle a warning.
 */
static void
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
              const char *msg, const xmlChar *str1, const xmlChar *str2)
{
Daniel Veillard's avatar
Daniel Veillard committed
405
    xmlStructuredErrorFunc schannel = NULL;
406
    
407 408 409
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
410
    ctxt->errNo = error;
411
    if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard's avatar
Daniel Veillard committed
412 413
        schannel = ctxt->sax->serror;
    __xmlRaiseError(schannel,
414 415
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
                    ctxt->userData,
416 417 418 419 420 421 422 423 424 425 426 427 428
                    ctxt, NULL, XML_FROM_PARSER, error,
                    XML_ERR_WARNING, NULL, 0,
		    (const char *) str1, (const char *) str2, NULL, 0, 0,
		    msg, (const char *) str1, (const char *) str2);
}

/**
 * xmlValidityError:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 *
429
 * Handle a validity error.
430 431 432 433 434
 */
static void
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
              const char *msg, const xmlChar *str1)
{
Daniel Veillard's avatar
Daniel Veillard committed
435
    xmlStructuredErrorFunc schannel = NULL;
436 437 438 439

    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
440
    ctxt->errNo = error;
441
    if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard's avatar
Daniel Veillard committed
442
        schannel = ctxt->sax->serror;
443
    __xmlRaiseError(schannel,
444
                    ctxt->vctxt.error, ctxt->vctxt.userData,
445 446 447 448 449 450 451
                    ctxt, NULL, XML_FROM_DTD, error,
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
		    NULL, NULL, 0, 0,
		    msg, (const char *) str1);
    ctxt->valid = 0;
}

452 453 454 455 456 457 458 459 460 461 462
/**
 * xmlFatalErrMsgInt:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  an integer value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
463
                  const char *msg, int val)
464
{
465 466 467
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
468
    ctxt->errNo = error;
469
    __xmlRaiseError(NULL, NULL, NULL,
470 471
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
472 473 474 475 476
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
/**
 * xmlFatalErrMsgStrIntStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  an string info
 * @val:  an integer value
 * @str2:  an string info
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                  const char *msg, const xmlChar *str1, int val, 
		  const xmlChar *str2)
{
493 494 495
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
496
    ctxt->errNo = error;
497
    __xmlRaiseError(NULL, NULL, NULL,
498 499 500 501 502 503 504 505
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, (const char *) str1, (const char *) str2,
		    NULL, val, 0, msg, str1, val, str2);
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

506 507 508 509 510 511 512 513 514 515 516
/**
 * xmlFatalErrMsgStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  a string value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517
                  const char *msg, const xmlChar * val)
518
{
519 520 521
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
522
    ctxt->errNo = error;
523
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
524 525 526
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
                    val);
527 528 529 530 531
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

532 533 534 535 536 537 538 539 540 541 542 543 544
/**
 * xmlErrMsgStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  a string value
 *
 * Handle a non fatal parser error
 */
static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                  const char *msg, const xmlChar * val)
{
545 546 547
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
548
    ctxt->errNo = error;
549
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
550 551 552 553 554
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
                    val);
}

555 556 557 558 559 560 561 562 563 564 565 566 567
/**
 * xmlNsErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the message
 * @info1:  extra information string
 * @info2:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
         const char *msg,
568 569
         const xmlChar * info1, const xmlChar * info2,
         const xmlChar * info3)
570
{
571 572 573
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
574
    ctxt->errNo = error;
575
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
576 577 578
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
                    (const char *) info2, (const char *) info3, 0, 0, msg,
                    info1, info2, info3);
579 580 581
    ctxt->nsWellFormed = 0;
}

582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
/************************************************************************
 *									*
 * 		SAX2 defaulted attributes handling			*
 *									*
 ************************************************************************/

/**
 * xmlDetectSAX2:
 * @ctxt:  an XML parser context
 *
 * Do the SAX2 detection and specific intialization
 */
static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
    if (ctxt == NULL) return;
597
#ifdef LIBXML_SAX1_ENABLED
598 599 600
    if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
        ((ctxt->sax->startElementNs != NULL) ||
         (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
601 602 603
#else
    ctxt->sax2 = 1;
#endif /* LIBXML_SAX1_ENABLED */
604 605 606 607

    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
608 609 610 611
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 
    		(ctxt->str_xml_ns == NULL)) {
        xmlErrMemory(ctxt, NULL);
    }
612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
}

typedef struct _xmlDefAttrs xmlDefAttrs;
typedef xmlDefAttrs *xmlDefAttrsPtr;
struct _xmlDefAttrs {
    int nbAttrs;	/* number of defaulted attributes on that element */
    int maxAttrs;       /* the size of the array */
    const xmlChar *values[4]; /* array of localname/prefix/values */
};

/**
 * xmlAddDefAttrs:
 * @ctxt:  an XML parser context
 * @fullname:  the element fullname
 * @fullattr:  the attribute fullname
 * @value:  the attribute value
 *
 * Add a defaulted attribute for an element
 */
static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
               const xmlChar *fullname,
               const xmlChar *fullattr,
               const xmlChar *value) {
    xmlDefAttrsPtr defaults;
    int len;
    const xmlChar *name;
    const xmlChar *prefix;

    if (ctxt->attsDefault == NULL) {
        ctxt->attsDefault = xmlHashCreate(10);
	if (ctxt->attsDefault == NULL)
	    goto mem_error;
    }

    /*
     * plit the element name into prefix:localname , the string found
     * are within the DTD and hen not associated to namespace names.
     */
    name = xmlSplitQName3(fullname, &len);
    if (name == NULL) {
        name = xmlDictLookup(ctxt->dict, fullname, -1);
	prefix = NULL;
    } else {
        name = xmlDictLookup(ctxt->dict, name, -1);
	prefix = xmlDictLookup(ctxt->dict, fullname, len);
    }

    /*
     * make sure there is some storage
     */
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
    if (defaults == NULL) {
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
	                               12 * sizeof(const xmlChar *));
	if (defaults == NULL)
	    goto mem_error;
	defaults->maxAttrs = 4;
	defaults->nbAttrs = 0;
	xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
        defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
		       (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
	if (defaults == NULL)
	    goto mem_error;
	defaults->maxAttrs *= 2;
	xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
    }

    /*
     * plit the element name into prefix:localname , the string found
     * are within the DTD and hen not associated to namespace names.
     */
    name = xmlSplitQName3(fullattr, &len);
    if (name == NULL) {
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
	prefix = NULL;
    } else {
        name = xmlDictLookup(ctxt->dict, name, -1);
	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
    }

    defaults->values[4 * defaults->nbAttrs] = name;
    defaults->values[4 * defaults->nbAttrs + 1] = prefix;
    /* intern the string and precompute the end */
    len = xmlStrlen(value);
    value = xmlDictLookup(ctxt->dict, value, len);
    defaults->values[4 * defaults->nbAttrs + 2] = value;
    defaults->values[4 * defaults->nbAttrs + 3] = value + len;
    defaults->nbAttrs++;

    return;

mem_error:
706
    xmlErrMemory(ctxt, NULL);
707 708 709
    return;
}

710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730
/**
 * xmlAddSpecialAttr:
 * @ctxt:  an XML parser context
 * @fullname:  the element fullname
 * @fullattr:  the attribute fullname
 * @type:  the attribute type
 *
 * Register that this attribute is not CDATA
 */
static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
		  const xmlChar *fullname,
		  const xmlChar *fullattr,
		  int type)
{
    if (ctxt->attsSpecial == NULL) {
        ctxt->attsSpecial = xmlHashCreate(10);
	if (ctxt->attsSpecial == NULL)
	    goto mem_error;
    }

731 732
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
                     (void *) (long) type);
733 734 735
    return;

mem_error:
736
    xmlErrMemory(ctxt, NULL);
737 738 739
    return;
}

740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
/**
 * xmlCheckLanguageID:
 * @lang:  pointer to the string value
 *
 * Checks that the value conforms to the LanguageID production:
 *
 * NOTE: this is somewhat deprecated, those productions were removed from
 *       the XML Second edition.
 *
 * [33] LanguageID ::= Langcode ('-' Subcode)*
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
 * [38] Subcode ::= ([a-z] | [A-Z])+
 *
 * Returns 1 if correct 0 otherwise
 **/
int
xmlCheckLanguageID(const xmlChar * lang)
{
    const xmlChar *cur = lang;

    if (cur == NULL)
        return (0);
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
        ((cur[0] == 'I') && (cur[1] == '-'))) {
        /*
         * IANA code
         */
        cur += 2;
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
               ((cur[0] == 'X') && (cur[1] == '-'))) {
        /*
         * User code
         */
        cur += 2;
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
               ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
        /*
         * ISO639
         */
        cur++;
        if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
            ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
        else
            return (0);
    } else
        return (0);
    while (cur[0] != 0) {       /* non input consuming */
        if (cur[0] != '-')
            return (0);
        cur++;
        if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
            ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
        else
            return (0);
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    }
    return (1);
}

812 813 814 815 816 817 818 819 820
/************************************************************************
 *									*
 * 		Parser stacks related functions and macros		*
 *									*
 ************************************************************************/

xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
                                     const xmlChar ** str);

821 822 823 824 825 826 827 828 829
#ifdef SAX2
/**
 * nsPush:
 * @ctxt:  an XML parser context
 * @prefix:  the namespace prefix or NULL
 * @URL:  the namespace name
 *
 * Pushes a new parser namespace on top of the ns stack
 *
830 831
 * Returns -1 in case of error, -2 if the namespace should be discarded
 *	   and the index in the stack otherwise.
832 833 834 835
 */
static int
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
{
836 837 838 839 840 841 842 843 844 845 846 847
    if (ctxt->options & XML_PARSE_NSCLEAN) {
        int i;
	for (i = 0;i < ctxt->nsNr;i += 2) {
	    if (ctxt->nsTab[i] == prefix) {
		/* in scope */
	        if (ctxt->nsTab[i + 1] == URL)
		    return(-2);
		/* out of scope keep it */
		break;
	    }
	}
    }
848 849 850 851 852 853
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
	ctxt->nsMax = 10;
	ctxt->nsNr = 0;
	ctxt->nsTab = (const xmlChar **)
	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
	if (ctxt->nsTab == NULL) {
854
	    xmlErrMemory(ctxt, NULL);
855 856 857 858 859 860
	    ctxt->nsMax = 0;
            return (-1);
	}
    } else if (ctxt->nsNr >= ctxt->nsMax) {
        ctxt->nsMax *= 2;
        ctxt->nsTab = (const xmlChar **)
861
	              xmlRealloc((char *) ctxt->nsTab,
862 863
				 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
        if (ctxt->nsTab == NULL) {
864
            xmlErrMemory(ctxt, NULL);
865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905
	    ctxt->nsMax /= 2;
            return (-1);
        }
    }
    ctxt->nsTab[ctxt->nsNr++] = prefix;
    ctxt->nsTab[ctxt->nsNr++] = URL;
    return (ctxt->nsNr);
}
/**
 * nsPop:
 * @ctxt: an XML parser context
 * @nr:  the number to pop
 *
 * Pops the top @nr parser prefix/namespace from the ns stack
 *
 * Returns the number of namespaces removed
 */
static int
nsPop(xmlParserCtxtPtr ctxt, int nr)
{
    int i;

    if (ctxt->nsTab == NULL) return(0);
    if (ctxt->nsNr < nr) {
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
        nr = ctxt->nsNr;
    }
    if (ctxt->nsNr <= 0)
        return (0);
    
    for (i = 0;i < nr;i++) {
         ctxt->nsNr--;
	 ctxt->nsTab[ctxt->nsNr] = NULL;
    }
    return(nr);
}
#endif

static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
    const xmlChar **atts;
906
    int *attallocs;
907 908 909
    int maxatts;

    if (ctxt->atts == NULL) {
910
	maxatts = 55; /* allow for 10 attrs by default */
911 912
	atts = (const xmlChar **)
	       xmlMalloc(maxatts * sizeof(xmlChar *));
913
	if (atts == NULL) goto mem_error;
914
	ctxt->atts = atts;
915 916 917
	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
	if (attallocs == NULL) goto mem_error;
	ctxt->attallocs = attallocs;
918
	ctxt->maxatts = maxatts;
919 920
    } else if (nr + 5 > ctxt->maxatts) {
	maxatts = (nr + 5) * 2;
921 922
	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
				     maxatts * sizeof(const xmlChar *));
923
	if (atts == NULL) goto mem_error;
924
	ctxt->atts = atts;
925 926 927 928
	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
	                             (maxatts / 5) * sizeof(int));
	if (attallocs == NULL) goto mem_error;
	ctxt->attallocs = attallocs;
929 930 931
	ctxt->maxatts = maxatts;
    }
    return(ctxt->maxatts);
932
mem_error:
933
    xmlErrMemory(ctxt, NULL);
934
    return(-1);
935 936
}

937 938 939
/**
 * inputPush:
 * @ctxt:  an XML parser context
940
 * @value:  the parser input
941 942
 *
 * Pushes a new parser input on top of the input stack
943 944
 *
 * Returns 0 in case of error, the index in the stack otherwise
945
 */
Daniel Veillard's avatar
Daniel Veillard committed
946 947 948 949 950 951 952 953 954 955
extern int
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
{
    if (ctxt->inputNr >= ctxt->inputMax) {
        ctxt->inputMax *= 2;
        ctxt->inputTab =
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
                                             ctxt->inputMax *
                                             sizeof(ctxt->inputTab[0]));
        if (ctxt->inputTab == NULL) {
956
            xmlErrMemory(ctxt, NULL);
Daniel Veillard's avatar
Daniel Veillard committed
957 958 959 960 961 962 963
            return (0);
        }
    }
    ctxt->inputTab[ctxt->inputNr] = value;
    ctxt->input = value;
    return (ctxt->inputNr++);
}
964
/**
Daniel Veillard's avatar
Daniel Veillard committed
965
 * inputPop:
966 967
 * @ctxt: an XML parser context
 *
Daniel Veillard's avatar
Daniel Veillard committed
968
 * Pops the top parser input from the input stack
969
 *
Daniel Veillard's avatar
Daniel Veillard committed
970
 * Returns the input just removed
971
 */
Daniel Veillard's avatar
Daniel Veillard committed
972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
extern xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)
{
    xmlParserInputPtr ret;

    if (ctxt->inputNr <= 0)
        return (0);
    ctxt->inputNr--;
    if (ctxt->inputNr > 0)
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
    else
        ctxt->input = NULL;
    ret = ctxt->inputTab[ctxt->inputNr];
    ctxt->inputTab[ctxt->inputNr] = 0;
    return (ret);
}
988
/**
Daniel Veillard's avatar
Daniel Veillard committed
989
 * nodePush:
990
 * @ctxt:  an XML parser context
Daniel Veillard's avatar
Daniel Veillard committed
991
 * @value:  the element node
992
 *
Daniel Veillard's avatar
Daniel Veillard committed
993
 * Pushes a new element node on top of the node stack
994 995
 *
 * Returns 0 in case of error, the index in the stack otherwise
996
 */
Daniel Veillard's avatar
Daniel Veillard committed
997 998 999 1000 1001 1002 1003 1004 1005 1006
extern int
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
{
    if (ctxt->nodeNr >= ctxt->nodeMax) {
        ctxt->nodeMax *= 2;
        ctxt->nodeTab =
            (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
                                      ctxt->nodeMax *
                                      sizeof(ctxt->nodeTab[0]));
        if (ctxt->nodeTab == NULL) {
1007
            xmlErrMemory(ctxt, NULL);
Daniel Veillard's avatar
Daniel Veillard committed
1008 1009 1010
            return (0);
        }
    }
1011
    if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
1012
	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1013 1014
		 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
			  xmlParserMaxDepth);
1015 1016 1017
	ctxt->instate = XML_PARSER_EOF;
	return(0);
    }
Daniel Veillard's avatar
Daniel Veillard committed
1018 1019 1020 1021
    ctxt->nodeTab[ctxt->nodeNr] = value;
    ctxt->node = value;
    return (ctxt->nodeNr++);
}
1022 1023 1024 1025 1026 1027 1028 1029
/**
 * nodePop:
 * @ctxt: an XML parser context
 *
 * Pops the top element node from the node stack
 *
 * Returns the node just removed
 */
Daniel Veillard's avatar
Daniel Veillard committed
1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045
extern xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)
{
    xmlNodePtr ret;

    if (ctxt->nodeNr <= 0)
        return (0);
    ctxt->nodeNr--;
    if (ctxt->nodeNr > 0)
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
    else
        ctxt->node = NULL;
    ret = ctxt->nodeTab[ctxt->nodeNr];
    ctxt->nodeTab[ctxt->nodeNr] = 0;
    return (ret);
}
1046 1047

#ifdef LIBXML_PUSH_ENABLED
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087
/**
 * nameNsPush:
 * @ctxt:  an XML parser context
 * @value:  the element name
 * @prefix:  the element prefix
 * @URI:  the element namespace name
 *
 * Pushes a new element name/prefix/URL on top of the name stack
 *
 * Returns -1 in case of error, the index in the stack otherwise
 */
static int
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
           const xmlChar *prefix, const xmlChar *URI, int nsNr)
{
    if (ctxt->nameNr >= ctxt->nameMax) {
        const xmlChar * *tmp;
        void **tmp2;
        ctxt->nameMax *= 2;
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
                                    ctxt->nameMax *