parser.c 345 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
 *            implemented on top of the SAX interfaces
 *
 * References:
 *   The XML specification:
 *     http://www.w3.org/TR/REC-xml
 *   Original 1.0 version:
 *     http://www.w3.org/TR/1998/REC-xml-19980210
 *   XML second edition working draft
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
 *
 * Okay this is a big file, the parser core is around 7000 lines, then it
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16 17 18 19 20 21 22 23 24
 * A number of helper functions and deprecated ones have been moved to
 * parserInternals.c to reduce this file size.
 * As much as possible the functions are associated with their relative
 * production in the XML specification. A few productions defining the
 * different ranges of character are actually implanted either in 
 * parserInternals.h or parserInternals.c
 * The DOM tree build is realized from the default SAX callbacks in
 * the module SAX.c.
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26 27 28 29
 * document.
 *
 * See Copyright for the status of this software.
 *
30
 * daniel@veillard.com
31 32
 */

33
#define IN_LIBXML
Bjorn Reese's avatar
Bjorn Reese committed
34 35
#include "libxml.h"

36
#if defined(WIN32) && !defined (__CYGWIN__)
37 38 39 40 41 42 43
#define XML_DIR_SEP '\\'
#else
#define XML_DIR_SEP '/'
#endif

#include <stdlib.h>
#include <string.h>
44
#include <stdarg.h>
45
#include <libxml/xmlmemory.h>
46 47
#include <libxml/threads.h>
#include <libxml/globals.h>
48 49 50 51 52 53 54 55 56
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/valid.h>
#include <libxml/entities.h>
#include <libxml/xmlerror.h>
#include <libxml/encoding.h>
#include <libxml/xmlIO.h>
#include <libxml/uri.h>
57 58 59
#ifdef LIBXML_CATALOG_ENABLED
#include <libxml/catalog.h>
#endif
60 61 62 63
#ifdef LIBXML_SCHEMAS_ENABLED
#include <libxml/xmlschemastypes.h>
#include <libxml/relaxng.h>
#endif
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif

83
/**
84
 * xmlParserMaxDepth:
85 86 87 88 89
 *
 * arbitrary depth limit for the XML documents that we allow to 
 * process. This is not a limitation of the parser but a safety 
 * boundary feature.
 */
90
unsigned int xmlParserMaxDepth = 1024;
91

92 93
#define SAX2 1

94
#define XML_PARSER_BIG_BUFFER_SIZE 300
95 96
#define XML_PARSER_BUFFER_SIZE 100

97 98
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"

99 100 101 102
/*
 * List of XML prefixed PI allowed by W3C specs
 */

103
static const char *xmlW3CPIs[] = {
104 105 106 107
    "xml-stylesheet",
    NULL
};

108

109 110 111 112
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
                                       const xmlChar **str);

113
static xmlParserErrors
114 115
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
	              xmlSAXHandlerPtr sax,
116
		      void *user_data, int depth, const xmlChar *URL,
117
		      const xmlChar *ID, xmlNodePtr *list);
118

119
#ifdef LIBXML_LEGACY_ENABLED
120 121 122
static void
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
                      xmlNodePtr lastNode);
123
#endif /* LIBXML_LEGACY_ENABLED */
124

125
static xmlParserErrors
126 127
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
128

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
/************************************************************************
 *									*
 * 		Some factorized error routines				*
 *									*
 ************************************************************************/

/**
 * xmlErrAttributeDup:
 * @ctxt:  an XML parser context
 * @prefix:  the attribute prefix
 * @localname:  the attribute localname
 *
 * Handle a redefinition of attribute error
 */
static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
                   const xmlChar * localname)
{
147 148 149
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
150
    ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
151
    if (prefix == NULL)
152
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
153 154 155
                        ctxt->errNo, XML_ERR_FATAL, NULL, 0,
                        (const char *) localname, NULL, NULL, 0, 0,
                        "Attribute %s redefined\n", localname);
156
    else
157
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
158 159 160 161
                        ctxt->errNo, XML_ERR_FATAL, NULL, 0,
                        (const char *) prefix, (const char *) localname,
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
                        localname);
162 163 164 165 166 167 168 169 170 171 172 173 174 175
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

/**
 * xmlFatalErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @extra:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
176
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
177 178 179
{
    const char *errmsg;

180 181 182
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
183 184
    switch (error) {
        case XML_ERR_INVALID_HEX_CHARREF:
185 186
            errmsg = "CharRef: invalid hexadecimal value\n";
            break;
187
        case XML_ERR_INVALID_DEC_CHARREF:
188 189
            errmsg = "CharRef: invalid decimal value\n";
            break;
190
        case XML_ERR_INVALID_CHARREF:
191 192
            errmsg = "CharRef: invalid value\n";
            break;
193
        case XML_ERR_INTERNAL_ERROR:
194 195
            errmsg = "internal error";
            break;
196
        case XML_ERR_PEREF_AT_EOF:
197 198
            errmsg = "PEReference at end of document\n";
            break;
199
        case XML_ERR_PEREF_IN_PROLOG:
200 201
            errmsg = "PEReference in prolog\n";
            break;
202
        case XML_ERR_PEREF_IN_EPILOG:
203 204
            errmsg = "PEReference in epilog\n";
            break;
205
        case XML_ERR_PEREF_NO_NAME:
206 207
            errmsg = "PEReference: no name\n";
            break;
208
        case XML_ERR_PEREF_SEMICOL_MISSING:
209 210
            errmsg = "PEReference: expecting ';'\n";
            break;
211
        case XML_ERR_ENTITY_LOOP:
212 213
            errmsg = "Detected an entity reference loop\n";
            break;
214
        case XML_ERR_ENTITY_NOT_STARTED:
215 216
            errmsg = "EntityValue: \" or ' expected\n";
            break;
217
        case XML_ERR_ENTITY_PE_INTERNAL:
218 219
            errmsg = "PEReferences forbidden in internal subset\n";
            break;
220
        case XML_ERR_ENTITY_NOT_FINISHED:
221 222
            errmsg = "EntityValue: \" or ' expected\n";
            break;
223
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
224 225
            errmsg = "AttValue: \" or ' expected\n";
            break;
226
        case XML_ERR_LT_IN_ATTRIBUTE:
227 228
            errmsg = "Unescaped '<' not allowed in attributes values\n";
            break;
229
        case XML_ERR_LITERAL_NOT_STARTED:
230 231
            errmsg = "SystemLiteral \" or ' expected\n";
            break;
232
        case XML_ERR_LITERAL_NOT_FINISHED:
233 234
            errmsg = "Unfinished System or Public ID \" or ' expected\n";
            break;
235
        case XML_ERR_MISPLACED_CDATA_END:
236 237
            errmsg = "Sequence ']]>' not allowed in content\n";
            break;
238
        case XML_ERR_URI_REQUIRED:
239 240
            errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
            break;
241
        case XML_ERR_PUBID_REQUIRED:
242 243
            errmsg = "PUBLIC, the Public Identifier is missing\n";
            break;
244
        case XML_ERR_HYPHEN_IN_COMMENT:
245 246
            errmsg = "Comment must not contain '--' (double-hyphen)\n";
            break;
247
        case XML_ERR_PI_NOT_STARTED:
248 249
            errmsg = "xmlParsePI : no target name\n";
            break;
250
        case XML_ERR_RESERVED_XML_NAME:
251 252
            errmsg = "Invalid PI name\n";
            break;
253
        case XML_ERR_NOTATION_NOT_STARTED:
254 255
            errmsg = "NOTATION: Name expected here\n";
            break;
256
        case XML_ERR_NOTATION_NOT_FINISHED:
257 258
            errmsg = "'>' required to close NOTATION declaration\n";
            break;
259
        case XML_ERR_VALUE_REQUIRED:
260 261
            errmsg = "Entity value required\n";
            break;
262
        case XML_ERR_URI_FRAGMENT:
263 264
            errmsg = "Fragment not allowed";
            break;
265
        case XML_ERR_ATTLIST_NOT_STARTED:
266 267
            errmsg = "'(' required to start ATTLIST enumeration\n";
            break;
268
        case XML_ERR_NMTOKEN_REQUIRED:
269 270
            errmsg = "NmToken expected in ATTLIST enumeration\n";
            break;
271
        case XML_ERR_ATTLIST_NOT_FINISHED:
272 273
            errmsg = "')' required to finish ATTLIST enumeration\n";
            break;
274
        case XML_ERR_MIXED_NOT_STARTED:
275 276
            errmsg = "MixedContentDecl : '|' or ')*' expected\n";
            break;
277
        case XML_ERR_PCDATA_REQUIRED:
278 279
            errmsg = "MixedContentDecl : '#PCDATA' expected\n";
            break;
280
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
281 282
            errmsg = "ContentDecl : Name or '(' expected\n";
            break;
283
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
284 285
            errmsg = "ContentDecl : ',' '|' or ')' expected\n";
            break;
286
        case XML_ERR_PEREF_IN_INT_SUBSET:
287 288 289
            errmsg =
                "PEReference: forbidden within markup decl in internal subset\n";
            break;
290
        case XML_ERR_GT_REQUIRED:
291 292
            errmsg = "expected '>'\n";
            break;
293
        case XML_ERR_CONDSEC_INVALID:
294 295
            errmsg = "XML conditional section '[' expected\n";
            break;
296
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
297 298 299 300 301 302
            errmsg = "Content error in the external subset\n";
            break;
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
            errmsg =
                "conditional section INCLUDE or IGNORE keyword expected\n";
            break;
303
        case XML_ERR_CONDSEC_NOT_FINISHED:
304 305
            errmsg = "XML conditional section not closed\n";
            break;
306
        case XML_ERR_XMLDECL_NOT_STARTED:
307 308
            errmsg = "Text declaration '<?xml' required\n";
            break;
309
        case XML_ERR_XMLDECL_NOT_FINISHED:
310 311
            errmsg = "parsing XML declaration: '?>' expected\n";
            break;
312
        case XML_ERR_EXT_ENTITY_STANDALONE:
313 314
            errmsg = "external parsed entities cannot be standalone\n";
            break;
315
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
316 317
            errmsg = "EntityRef: expecting ';'\n";
            break;
318
        case XML_ERR_DOCTYPE_NOT_FINISHED:
319 320
            errmsg = "DOCTYPE improperly terminated\n";
            break;
321
        case XML_ERR_LTSLASH_REQUIRED:
322 323
            errmsg = "EndTag: '</' not found\n";
            break;
324
        case XML_ERR_EQUAL_REQUIRED:
325 326
            errmsg = "expected '='\n";
            break;
327
        case XML_ERR_STRING_NOT_CLOSED:
328 329
            errmsg = "String not closed expecting \" or '\n";
            break;
330
        case XML_ERR_STRING_NOT_STARTED:
331 332
            errmsg = "String not started expecting ' or \"\n";
            break;
333
        case XML_ERR_ENCODING_NAME:
334 335
            errmsg = "Invalid XML encoding name\n";
            break;
336
        case XML_ERR_STANDALONE_VALUE:
337 338
            errmsg = "standalone accepts only 'yes' or 'no'\n";
            break;
339
        case XML_ERR_DOCUMENT_EMPTY:
340 341
            errmsg = "Document is empty\n";
            break;
342
        case XML_ERR_DOCUMENT_END:
343 344
            errmsg = "Extra content at the end of the document\n";
            break;
345
        case XML_ERR_NOT_WELL_BALANCED:
346 347
            errmsg = "chunk is not well balanced\n";
            break;
348
        case XML_ERR_EXTRA_CONTENT:
349 350
            errmsg = "extra content at the end of well balanced chunk\n";
            break;
351
        case XML_ERR_VERSION_MISSING:
352 353
            errmsg = "Malformed declaration expecting version\n";
            break;
354
#if 0
355 356 357
        case:
            errmsg = "\n";
            break;
358
#endif
359 360
        default:
            errmsg = "Unregistered error message\n";
361 362
    }
    ctxt->errNo = error;
363
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
364 365
                    XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
                    info);
366 367 368 369 370
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

371 372 373 374 375 376 377 378 379
/**
 * xmlFatalErrMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
380 381
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
               const char *msg)
382
{
383 384 385
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
386
    ctxt->errNo = error;
387
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
388
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
389 390 391 392 393
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

394 395 396 397 398 399 400 401 402 403 404 405 406 407
/**
 * xmlWarningMsg:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 * @str2:  extra data
 *
 * Handle a warning.
 */
static void
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
              const char *msg, const xmlChar *str1, const xmlChar *str2)
{
Daniel Veillard's avatar
Daniel Veillard committed
408
    xmlStructuredErrorFunc schannel = NULL;
409
    
410 411 412
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
413
    ctxt->errNo = error;
414
    if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard's avatar
Daniel Veillard committed
415 416
        schannel = ctxt->sax->serror;
    __xmlRaiseError(schannel,
417 418
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
                    ctxt->userData,
419 420 421 422 423 424 425 426 427 428 429 430 431
                    ctxt, NULL, XML_FROM_PARSER, error,
                    XML_ERR_WARNING, NULL, 0,
		    (const char *) str1, (const char *) str2, NULL, 0, 0,
		    msg, (const char *) str1, (const char *) str2);
}

/**
 * xmlValidityError:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  extra data
 *
432
 * Handle a validity error.
433 434 435 436 437
 */
static void
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
              const char *msg, const xmlChar *str1)
{
Daniel Veillard's avatar
Daniel Veillard committed
438
    xmlStructuredErrorFunc schannel = NULL;
439 440 441 442

    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
443
    ctxt->errNo = error;
444
    if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard's avatar
Daniel Veillard committed
445
        schannel = ctxt->sax->serror;
446
    __xmlRaiseError(schannel,
447
                    ctxt->vctxt.error, ctxt->vctxt.userData,
448 449 450 451 452 453 454
                    ctxt, NULL, XML_FROM_DTD, error,
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
		    NULL, NULL, 0, 0,
		    msg, (const char *) str1);
    ctxt->valid = 0;
}

455 456 457 458 459 460 461 462 463 464 465
/**
 * xmlFatalErrMsgInt:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  an integer value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
466
                  const char *msg, int val)
467
{
468 469 470
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
471
    ctxt->errNo = error;
472
    __xmlRaiseError(NULL, NULL, NULL,
473 474
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
475 476 477 478 479
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
/**
 * xmlFatalErrMsgStrIntStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @str1:  an string info
 * @val:  an integer value
 * @str2:  an string info
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                  const char *msg, const xmlChar *str1, int val, 
		  const xmlChar *str2)
{
496 497 498
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
499
    ctxt->errNo = error;
500
    __xmlRaiseError(NULL, NULL, NULL,
501 502 503 504 505 506 507 508
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, (const char *) str1, (const char *) str2,
		    NULL, val, 0, msg, str1, val, str2);
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

509 510 511 512 513 514 515 516 517 518 519
/**
 * xmlFatalErrMsgStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  a string value
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
520
                  const char *msg, const xmlChar * val)
521
{
522 523 524
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
525
    ctxt->errNo = error;
526
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
527 528 529
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
                    val);
530 531 532 533 534
    ctxt->wellFormed = 0;
    if (ctxt->recovery == 0)
        ctxt->disableSAX = 1;
}

535 536 537 538 539 540 541 542 543 544 545 546 547
/**
 * xmlErrMsgStr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the error message
 * @val:  a string value
 *
 * Handle a non fatal parser error
 */
static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                  const char *msg, const xmlChar * val)
{
548 549 550
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
551
    ctxt->errNo = error;
552
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
553 554 555 556 557
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
                    val);
}

558 559 560 561 562 563 564 565 566 567 568 569 570
/**
 * xmlNsErr:
 * @ctxt:  an XML parser context
 * @error:  the error number
 * @msg:  the message
 * @info1:  extra information string
 * @info2:  extra information string
 *
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
 */
static void
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
         const char *msg,
571 572
         const xmlChar * info1, const xmlChar * info2,
         const xmlChar * info3)
573
{
574 575 576
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
        (ctxt->instate == XML_PARSER_EOF))
	return;
577
    ctxt->errNo = error;
578
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
579 580 581
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
                    (const char *) info2, (const char *) info3, 0, 0, msg,
                    info1, info2, info3);
582 583 584
    ctxt->nsWellFormed = 0;
}

585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
/************************************************************************
 *									*
 * 		SAX2 defaulted attributes handling			*
 *									*
 ************************************************************************/

/**
 * xmlDetectSAX2:
 * @ctxt:  an XML parser context
 *
 * Do the SAX2 detection and specific intialization
 */
static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
    if (ctxt == NULL) return;
600
#ifdef LIBXML_SAX1_ENABLED
601 602 603
    if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
        ((ctxt->sax->startElementNs != NULL) ||
         (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
604 605 606
#else
    ctxt->sax2 = 1;
#endif /* LIBXML_SAX1_ENABLED */
607 608 609 610

    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
611 612 613 614
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 
    		(ctxt->str_xml_ns == NULL)) {
        xmlErrMemory(ctxt, NULL);
    }
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
}

typedef struct _xmlDefAttrs xmlDefAttrs;
typedef xmlDefAttrs *xmlDefAttrsPtr;
struct _xmlDefAttrs {
    int nbAttrs;	/* number of defaulted attributes on that element */
    int maxAttrs;       /* the size of the array */
    const xmlChar *values[4]; /* array of localname/prefix/values */
};

/**
 * xmlAddDefAttrs:
 * @ctxt:  an XML parser context
 * @fullname:  the element fullname
 * @fullattr:  the attribute fullname
 * @value:  the attribute value
 *
 * Add a defaulted attribute for an element
 */
static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
               const xmlChar *fullname,
               const xmlChar *fullattr,
               const xmlChar *value) {
    xmlDefAttrsPtr defaults;
    int len;
    const xmlChar *name;
    const xmlChar *prefix;

    if (ctxt->attsDefault == NULL) {
        ctxt->attsDefault = xmlHashCreate(10);
	if (ctxt->attsDefault == NULL)
	    goto mem_error;
    }

    /*
651 652
     * split the element name into prefix:localname , the string found
     * are within the DTD and then not associated to namespace names.
653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
     */
    name = xmlSplitQName3(fullname, &len);
    if (name == NULL) {
        name = xmlDictLookup(ctxt->dict, fullname, -1);
	prefix = NULL;
    } else {
        name = xmlDictLookup(ctxt->dict, name, -1);
	prefix = xmlDictLookup(ctxt->dict, fullname, len);
    }

    /*
     * make sure there is some storage
     */
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
    if (defaults == NULL) {
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
669
	                   (4 * 4) * sizeof(const xmlChar *));
670 671 672
	if (defaults == NULL)
	    goto mem_error;
	defaults->nbAttrs = 0;
673
	defaults->maxAttrs = 4;
674 675
	xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
676 677 678
        xmlDefAttrsPtr temp;

        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
679
		       (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
680
	if (temp == NULL)
681
	    goto mem_error;
682
	defaults = temp;
683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711
	defaults->maxAttrs *= 2;
	xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
    }

    /*
     * plit the element name into prefix:localname , the string found
     * are within the DTD and hen not associated to namespace names.
     */
    name = xmlSplitQName3(fullattr, &len);
    if (name == NULL) {
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
	prefix = NULL;
    } else {
        name = xmlDictLookup(ctxt->dict, name, -1);
	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
    }

    defaults->values[4 * defaults->nbAttrs] = name;
    defaults->values[4 * defaults->nbAttrs + 1] = prefix;
    /* intern the string and precompute the end */
    len = xmlStrlen(value);
    value = xmlDictLookup(ctxt->dict, value, len);
    defaults->values[4 * defaults->nbAttrs + 2] = value;
    defaults->values[4 * defaults->nbAttrs + 3] = value + len;
    defaults->nbAttrs++;

    return;

mem_error:
712
    xmlErrMemory(ctxt, NULL);
713 714 715
    return;
}

716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736
/**
 * xmlAddSpecialAttr:
 * @ctxt:  an XML parser context
 * @fullname:  the element fullname
 * @fullattr:  the attribute fullname
 * @type:  the attribute type
 *
 * Register that this attribute is not CDATA
 */
static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
		  const xmlChar *fullname,
		  const xmlChar *fullattr,
		  int type)
{
    if (ctxt->attsSpecial == NULL) {
        ctxt->attsSpecial = xmlHashCreate(10);
	if (ctxt->attsSpecial == NULL)
	    goto mem_error;
    }

737 738
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
                     (void *) (long) type);
739 740 741
    return;

mem_error:
742
    xmlErrMemory(ctxt, NULL);
743 744 745
    return;
}

746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
/**
 * xmlCheckLanguageID:
 * @lang:  pointer to the string value
 *
 * Checks that the value conforms to the LanguageID production:
 *
 * NOTE: this is somewhat deprecated, those productions were removed from
 *       the XML Second edition.
 *
 * [33] LanguageID ::= Langcode ('-' Subcode)*
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
 * [38] Subcode ::= ([a-z] | [A-Z])+
 *
 * Returns 1 if correct 0 otherwise
 **/
int
xmlCheckLanguageID(const xmlChar * lang)
{
    const xmlChar *cur = lang;

    if (cur == NULL)
        return (0);
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
        ((cur[0] == 'I') && (cur[1] == '-'))) {
        /*
         * IANA code
         */
        cur += 2;
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
               ((cur[0] == 'X') && (cur[1] == '-'))) {
        /*
         * User code
         */
        cur += 2;
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
               ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
        /*
         * ISO639
         */
        cur++;
        if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
            ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
        else
            return (0);
    } else
        return (0);
    while (cur[0] != 0) {       /* non input consuming */
        if (cur[0] != '-')
            return (0);
        cur++;
        if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
            ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
        else
            return (0);
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
            cur++;
    }
    return (1);
}

818 819 820 821 822 823 824 825 826
/************************************************************************
 *									*
 * 		Parser stacks related functions and macros		*
 *									*
 ************************************************************************/

xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
                                     const xmlChar ** str);

827 828 829 830 831 832 833 834 835
#ifdef SAX2
/**
 * nsPush:
 * @ctxt:  an XML parser context
 * @prefix:  the namespace prefix or NULL
 * @URL:  the namespace name
 *
 * Pushes a new parser namespace on top of the ns stack
 *
836 837
 * Returns -1 in case of error, -2 if the namespace should be discarded
 *	   and the index in the stack otherwise.
838 839 840 841
 */
static int
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
{
842 843 844 845 846 847 848 849 850 851 852 853
    if (ctxt->options & XML_PARSE_NSCLEAN) {
        int i;
	for (i = 0;i < ctxt->nsNr;i += 2) {
	    if (ctxt->nsTab[i] == prefix) {
		/* in scope */
	        if (ctxt->nsTab[i + 1] == URL)
		    return(-2);
		/* out of scope keep it */
		break;
	    }
	}
    }
854 855 856 857 858 859
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
	ctxt->nsMax = 10;
	ctxt->nsNr = 0;
	ctxt->nsTab = (const xmlChar **)
	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
	if (ctxt->nsTab == NULL) {
860
	    xmlErrMemory(ctxt, NULL);
861 862 863 864 865 866
	    ctxt->nsMax = 0;
            return (-1);
	}
    } else if (ctxt->nsNr >= ctxt->nsMax) {
        ctxt->nsMax *= 2;
        ctxt->nsTab = (const xmlChar **)
867
	              xmlRealloc((char *) ctxt->nsTab,
868 869
				 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
        if (ctxt->nsTab == NULL) {
870
            xmlErrMemory(ctxt, NULL);
871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
	    ctxt->nsMax /= 2;
            return (-1);
        }
    }
    ctxt->nsTab[ctxt->nsNr++] = prefix;
    ctxt->nsTab[ctxt->nsNr++] = URL;
    return (ctxt->nsNr);
}
/**
 * nsPop:
 * @ctxt: an XML parser context
 * @nr:  the number to pop
 *
 * Pops the top @nr parser prefix/namespace from the ns stack
 *
 * Returns the number of namespaces removed
 */
static int
nsPop(xmlParserCtxtPtr ctxt, int nr)
{
    int i;

    if (ctxt->nsTab == NULL) return(0);
    if (ctxt->nsNr < nr) {
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
        nr = ctxt->nsNr;
    }
    if (ctxt->nsNr <= 0)
        return (0);
    
    for (i = 0;i < nr;i++) {
         ctxt->nsNr--;
	 ctxt->nsTab[ctxt->nsNr] = NULL;
    }
    return(nr);
}
#endif

static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
    const xmlChar **atts;
912
    int *attallocs;
913 914 915
    int maxatts;

    if (ctxt->atts == NULL) {
916
	maxatts = 55; /* allow for 10 attrs by default */
917 918
	atts = (const xmlChar **)
	       xmlMalloc(maxatts * sizeof(xmlChar *));
919
	if (atts == NULL) goto mem_error;
920
	ctxt->atts = atts;
921 922 923
	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
	if (attallocs == NULL) goto mem_error;
	ctxt->attallocs = attallocs;
924
	ctxt->maxatts = maxatts;
925 926
    } else if (nr + 5 > ctxt->maxatts) {
	maxatts = (nr + 5) * 2;
927 928
	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
				     maxatts * sizeof(const xmlChar *));
929
	if (atts == NULL) goto mem_error;
930
	ctxt->atts = atts;
931 932 933 934
	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
	                             (maxatts / 5) * sizeof(int));
	if (attallocs == NULL) goto mem_error;
	ctxt->attallocs = attallocs;
935 936 937
	ctxt->maxatts = maxatts;
    }
    return(ctxt->maxatts);
938
mem_error:
939
    xmlErrMemory(ctxt, NULL);
940
    return(-1);
941 942
}

943 944 945
/**
 * inputPush:
 * @ctxt:  an XML parser context
946
 * @value:  the parser input
947 948
 *
 * Pushes a new parser input on top of the input stack
949 950
 *
 * Returns 0 in case of error, the index in the stack otherwise
951
 */
952
int
Daniel Veillard's avatar
Daniel Veillard committed
953 954
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
{
955 956
    if ((ctxt == NULL) || (value == NULL))
        return(0);
Daniel Veillard's avatar
Daniel Veillard committed
957 958 959 960 961 962 963
    if (ctxt->inputNr >= ctxt->inputMax) {
        ctxt->inputMax *= 2;
        ctxt->inputTab =
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
                                             ctxt->inputMax *
                                             sizeof(ctxt->inputTab[0]));
        if (ctxt->inputTab == NULL) {
964
            xmlErrMemory(ctxt, NULL);
Daniel Veillard's avatar
Daniel Veillard committed
965 966 967 968 969 970 971
            return (0);
        }
    }
    ctxt->inputTab[ctxt->inputNr] = value;
    ctxt->input = value;
    return (ctxt->inputNr++);
}
972
/**
Daniel Veillard's avatar
Daniel Veillard committed
973
 * inputPop:
974 975
 * @ctxt: an XML parser context
 *
Daniel Veillard's avatar
Daniel Veillard committed
976
 * Pops the top parser input from the input stack
977
 *
Daniel Veillard's avatar
Daniel Veillard committed
978
 * Returns the input just removed
979
 */
980
xmlParserInputPtr
Daniel Veillard's avatar
Daniel Veillard committed
981 982 983 984
inputPop(xmlParserCtxtPtr ctxt)
{
    xmlParserInputPtr ret;

985 986
    if (ctxt == NULL)
        return(NULL);
Daniel Veillard's avatar
Daniel Veillard committed
987 988 989 990 991 992 993 994 995 996 997
    if (ctxt->inputNr <= 0)
        return (0);
    ctxt->inputNr--;
    if (ctxt->inputNr > 0)
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
    else
        ctxt->input = NULL;
    ret = ctxt->inputTab[ctxt->inputNr];
    ctxt->inputTab[ctxt->inputNr] = 0;
    return (ret);
}
998
/**
Daniel Veillard's avatar
Daniel Veillard committed
999
 * nodePush:
1000
 * @ctxt:  an XML parser context
Daniel Veillard's avatar
Daniel Veillard committed
1001
 * @value:  the element node
1002
 *
Daniel Veillard's avatar
Daniel Veillard committed
1003
 * Pushes a new element node on top of the node stack
1004 1005
 *
 * Returns 0 in case of error, the index in the stack otherwise
1006
 */
1007
int
Daniel Veillard's avatar
Daniel Veillard committed
1008 1009
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
{
1010
    if (ctxt == NULL) return(0);
Daniel Veillard's avatar
Daniel Veillard committed
1011
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1012 1013 1014 1015
        xmlNodePtr *tmp;

	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
                                      ctxt->nodeMax * 2 *
Daniel Veillard's avatar
Daniel Veillard committed
1016
                                      sizeof(ctxt->nodeTab[0]));
1017
        if (tmp == NULL) {
1018
            xmlErrMemory(ctxt, NULL);
Daniel Veillard's avatar
Daniel Veillard committed
1019 1020
            return (0);
        }
1021 1022
        ctxt->nodeTab = tmp;
	ctxt->nodeMax *= 2;