Annotation of XML/parser.c, revision 1.124
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.121 daniel 13: #include "config.h"
1.26 daniel 14: #endif
1.121 daniel 15:
1.1 veillard 16: #include <stdio.h>
1.121 daniel 17: #include <string.h> /* for memset() only */
18: #ifdef HAVE_CTYPE_H
1.1 veillard 19: #include <ctype.h>
1.121 daniel 20: #endif
21: #ifdef HAVE_STDLIB_H
1.50 daniel 22: #include <stdlib.h>
1.121 daniel 23: #endif
24: #ifdef HAVE_SYS_STAT_H
1.9 httpng 25: #include <sys/stat.h>
1.121 daniel 26: #endif
1.9 httpng 27: #ifdef HAVE_FCNTL_H
28: #include <fcntl.h>
29: #endif
1.10 httpng 30: #ifdef HAVE_UNISTD_H
31: #include <unistd.h>
32: #endif
1.20 daniel 33: #ifdef HAVE_ZLIB_H
34: #include <zlib.h>
35: #endif
1.1 veillard 36:
1.119 daniel 37: #include "xmlmemory.h"
1.14 veillard 38: #include "tree.h"
1.1 veillard 39: #include "parser.h"
1.14 veillard 40: #include "entities.h"
1.75 daniel 41: #include "encoding.h"
1.61 daniel 42: #include "valid.h"
1.69 daniel 43: #include "parserInternals.h"
1.91 daniel 44: #include "xmlIO.h"
1.122 daniel 45: #include "xml-error.h"
1.1 veillard 46:
1.86 daniel 47: const char *xmlParserVersion = LIBXML_VERSION;
48:
1.91 daniel 49:
50: /************************************************************************
51: * *
52: * Input handling functions for progressive parsing *
53: * *
54: ************************************************************************/
55:
56: /* #define DEBUG_INPUT */
57:
1.110 daniel 58: #define INPUT_CHUNK 250
59: /* we need to keep enough input to show errors in context */
60: #define LINE_LEN 80
1.91 daniel 61:
62: #ifdef DEBUG_INPUT
63: #define CHECK_BUFFER(in) check_buffer(in)
64:
65: void check_buffer(xmlParserInputPtr in) {
66: if (in->base != in->buf->buffer->content) {
67: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
68: }
69: if (in->cur < in->base) {
70: fprintf(stderr, "xmlParserInput: cur < base problem\n");
71: }
72: if (in->cur > in->base + in->buf->buffer->use) {
73: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
74: }
75: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
76: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
77: in->buf->buffer->use, in->buf->buffer->size);
78: }
79:
1.110 daniel 80: #else
81: #define CHECK_BUFFER(in)
82: #endif
83:
1.91 daniel 84:
85: /**
86: * xmlParserInputRead:
87: * @in: an XML parser input
88: * @len: an indicative size for the lookahead
89: *
90: * This function refresh the input for the parser. It doesn't try to
91: * preserve pointers to the input buffer, and discard already read data
92: *
1.123 daniel 93: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 94: * end of this entity
95: */
96: int
97: xmlParserInputRead(xmlParserInputPtr in, int len) {
98: int ret;
99: int used;
100: int index;
101:
102: #ifdef DEBUG_INPUT
103: fprintf(stderr, "Read\n");
104: #endif
105: if (in->buf == NULL) return(-1);
106: if (in->base == NULL) return(-1);
107: if (in->cur == NULL) return(-1);
108: if (in->buf->buffer == NULL) return(-1);
109:
110: CHECK_BUFFER(in);
111:
112: used = in->cur - in->buf->buffer->content;
113: ret = xmlBufferShrink(in->buf->buffer, used);
114: if (ret > 0) {
115: in->cur -= ret;
116: in->consumed += ret;
117: }
118: ret = xmlParserInputBufferRead(in->buf, len);
119: if (in->base != in->buf->buffer->content) {
120: /*
121: * the buffer has been realloced
122: */
123: index = in->cur - in->base;
124: in->base = in->buf->buffer->content;
125: in->cur = &in->buf->buffer->content[index];
126: }
127:
128: CHECK_BUFFER(in);
129:
130: return(ret);
131: }
132:
133: /**
134: * xmlParserInputGrow:
135: * @in: an XML parser input
136: * @len: an indicative size for the lookahead
137: *
138: * This function increase the input for the parser. It tries to
139: * preserve pointers to the input buffer, and keep already read data
140: *
1.123 daniel 141: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 142: * end of this entity
143: */
144: int
145: xmlParserInputGrow(xmlParserInputPtr in, int len) {
146: int ret;
147: int index;
148:
149: #ifdef DEBUG_INPUT
150: fprintf(stderr, "Grow\n");
151: #endif
152: if (in->buf == NULL) return(-1);
153: if (in->base == NULL) return(-1);
154: if (in->cur == NULL) return(-1);
155: if (in->buf->buffer == NULL) return(-1);
156:
157: CHECK_BUFFER(in);
158:
159: index = in->cur - in->base;
160: if (in->buf->buffer->use > index + INPUT_CHUNK) {
161:
162: CHECK_BUFFER(in);
163:
164: return(0);
165: }
166: ret = xmlParserInputBufferGrow(in->buf, len);
167: if (in->base != in->buf->buffer->content) {
168: /*
169: * the buffer has been realloced
170: */
171: index = in->cur - in->base;
172: in->base = in->buf->buffer->content;
173: in->cur = &in->buf->buffer->content[index];
174: }
175:
176: CHECK_BUFFER(in);
177:
178: return(ret);
179: }
180:
181: /**
182: * xmlParserInputShrink:
183: * @in: an XML parser input
184: *
185: * This function removes used input for the parser.
186: */
187: void
188: xmlParserInputShrink(xmlParserInputPtr in) {
189: int used;
190: int ret;
191: int index;
192:
193: #ifdef DEBUG_INPUT
194: fprintf(stderr, "Shrink\n");
195: #endif
196: if (in->buf == NULL) return;
197: if (in->base == NULL) return;
198: if (in->cur == NULL) return;
199: if (in->buf->buffer == NULL) return;
200:
201: CHECK_BUFFER(in);
202:
203: used = in->cur - in->buf->buffer->content;
204: if (used > INPUT_CHUNK) {
1.110 daniel 205: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 206: if (ret > 0) {
207: in->cur -= ret;
208: in->consumed += ret;
209: }
210: }
211:
212: CHECK_BUFFER(in);
213:
214: if (in->buf->buffer->use > INPUT_CHUNK) {
215: return;
216: }
217: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
218: if (in->base != in->buf->buffer->content) {
219: /*
220: * the buffer has been realloced
221: */
222: index = in->cur - in->base;
223: in->base = in->buf->buffer->content;
224: in->cur = &in->buf->buffer->content[index];
225: }
226:
227: CHECK_BUFFER(in);
228: }
229:
1.45 daniel 230: /************************************************************************
231: * *
232: * Parser stacks related functions and macros *
233: * *
234: ************************************************************************/
1.79 daniel 235:
236: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 237: int xmlDoValidityCheckingDefaultValue = 0;
1.79 daniel 238:
1.1 veillard 239: /*
1.40 daniel 240: * Generic function for accessing stacks in the Parser Context
1.1 veillard 241: */
242:
1.31 daniel 243: #define PUSH_AND_POP(type, name) \
1.72 daniel 244: extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 245: if (ctxt->name##Nr >= ctxt->name##Max) { \
246: ctxt->name##Max *= 2; \
1.119 daniel 247: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 248: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
249: if (ctxt->name##Tab == NULL) { \
1.31 daniel 250: fprintf(stderr, "realloc failed !\n"); \
251: exit(1); \
252: } \
253: } \
1.40 daniel 254: ctxt->name##Tab[ctxt->name##Nr] = value; \
255: ctxt->name = value; \
256: return(ctxt->name##Nr++); \
1.31 daniel 257: } \
1.72 daniel 258: extern type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 259: type ret; \
1.40 daniel 260: if (ctxt->name##Nr <= 0) return(0); \
261: ctxt->name##Nr--; \
1.50 daniel 262: if (ctxt->name##Nr > 0) \
263: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
264: else \
265: ctxt->name = NULL; \
1.69 daniel 266: ret = ctxt->name##Tab[ctxt->name##Nr]; \
267: ctxt->name##Tab[ctxt->name##Nr] = 0; \
268: return(ret); \
1.31 daniel 269: } \
270:
1.40 daniel 271: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 272: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 273:
1.55 daniel 274: /*
275: * Macros for accessing the content. Those should be used only by the parser,
276: * and not exported.
277: *
278: * Dirty macros, i.e. one need to make assumption on the context to use them
279: *
1.123 daniel 280: * CUR_PTR return the current pointer to the xmlChar to be parsed.
281: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.55 daniel 282: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
283: * in UNICODE mode. This should be used internally by the parser
284: * only to compare to ASCII values otherwise it would break when
285: * running with UTF-8 encoding.
1.123 daniel 286: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 287: * to compare on ASCII based substring.
1.123 daniel 288: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 289: * strings within the parser.
290: *
1.77 daniel 291: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 292: *
293: * CURRENT Returns the current char value, with the full decoding of
294: * UTF-8 if we are using this mode. It returns an int.
295: * NEXT Skip to the next character, this does the proper decoding
296: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 297: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.55 daniel 298: */
1.45 daniel 299:
1.97 daniel 300: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 301: #define SKIP(val) ctxt->input->cur += (val)
302: #define NXT(val) ctxt->input->cur[(val)]
303: #define CUR_PTR ctxt->input->cur
1.97 daniel 304: #define SHRINK xmlParserInputShrink(ctxt->input); \
305: if ((*ctxt->input->cur == 0) && \
306: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
307: xmlPopInput(ctxt)
308:
309: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
310: if ((*ctxt->input->cur == 0) && \
311: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
312: xmlPopInput(ctxt)
1.55 daniel 313:
314: #define SKIP_BLANKS \
1.101 daniel 315: do { \
316: while (IS_BLANK(CUR)) NEXT; \
317: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
318: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
319: } while (IS_BLANK(CUR));
1.55 daniel 320:
321: #define CURRENT (*ctxt->input->cur)
1.91 daniel 322: #define NEXT { \
1.97 daniel 323: if (ctxt->token != 0) ctxt->token = 0; \
324: else { \
1.91 daniel 325: if ((*ctxt->input->cur == 0) && \
326: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
327: xmlPopInput(ctxt); \
328: } else { \
329: if (*(ctxt->input->cur) == '\n') { \
330: ctxt->input->line++; ctxt->input->col = 1; \
331: } else ctxt->input->col++; \
332: ctxt->input->cur++; \
333: if (*ctxt->input->cur == 0) \
334: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.96 daniel 335: } \
336: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
337: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
1.97 daniel 338: }}
1.91 daniel 339:
1.42 daniel 340:
1.97 daniel 341: /************************************************************************
342: * *
343: * Commodity functions to handle entities processing *
344: * *
345: ************************************************************************/
1.40 daniel 346:
1.50 daniel 347: /**
348: * xmlPopInput:
349: * @ctxt: an XML parser context
350: *
1.40 daniel 351: * xmlPopInput: the current input pointed by ctxt->input came to an end
352: * pop it and return the next char.
1.45 daniel 353: *
1.123 daniel 354: * Returns the current xmlChar in the parser context
1.40 daniel 355: */
1.123 daniel 356: xmlChar
1.55 daniel 357: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 358: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 359: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 360: if ((*ctxt->input->cur == 0) &&
361: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
362: return(xmlPopInput(ctxt));
1.40 daniel 363: return(CUR);
364: }
365:
1.50 daniel 366: /**
367: * xmlPushInput:
368: * @ctxt: an XML parser context
369: * @input: an XML parser input fragment (entity, XML fragment ...).
370: *
1.40 daniel 371: * xmlPushInput: switch to a new input stream which is stacked on top
372: * of the previous one(s).
373: */
1.55 daniel 374: void
375: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 376: if (input == NULL) return;
377: inputPush(ctxt, input);
378: }
379:
1.50 daniel 380: /**
1.69 daniel 381: * xmlFreeInputStream:
1.101 daniel 382: * @input: an xmlP arserInputPtr
1.69 daniel 383: *
384: * Free up an input stream.
385: */
386: void
387: xmlFreeInputStream(xmlParserInputPtr input) {
388: if (input == NULL) return;
389:
1.119 daniel 390: if (input->filename != NULL) xmlFree((char *) input->filename);
391: if (input->directory != NULL) xmlFree((char *) input->directory);
1.69 daniel 392: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 393: input->free((xmlChar *) input->base);
1.93 veillard 394: if (input->buf != NULL)
395: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 396: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 397: xmlFree(input);
1.69 daniel 398: }
399:
400: /**
1.96 daniel 401: * xmlNewInputStream:
402: * @ctxt: an XML parser context
403: *
404: * Create a new input stream structure
405: * Returns the new input stream or NULL
406: */
407: xmlParserInputPtr
408: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
409: xmlParserInputPtr input;
410:
1.119 daniel 411: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 412: if (input == NULL) {
1.123 daniel 413: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 414: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 415: ctxt->sax->error(ctxt->userData,
416: "malloc: couldn't allocate a new input stream\n");
1.123 daniel 417: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 418: return(NULL);
419: }
420: input->filename = NULL;
421: input->directory = NULL;
422: input->base = NULL;
423: input->cur = NULL;
424: input->buf = NULL;
425: input->line = 1;
426: input->col = 1;
427: input->buf = NULL;
428: input->free = NULL;
429: input->consumed = 0;
430: return(input);
431: }
432:
433: /**
1.50 daniel 434: * xmlNewEntityInputStream:
435: * @ctxt: an XML parser context
436: * @entity: an Entity pointer
437: *
1.82 daniel 438: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 439: *
440: * Returns the new input stream or NULL
1.45 daniel 441: */
1.50 daniel 442: xmlParserInputPtr
443: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 444: xmlParserInputPtr input;
445:
446: if (entity == NULL) {
1.123 daniel 447: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 448: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 449: ctxt->sax->error(ctxt->userData,
1.45 daniel 450: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 451: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 452: return(NULL);
1.45 daniel 453: }
454: if (entity->content == NULL) {
1.113 daniel 455: switch (entity->type) {
456: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 457: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
459: ctxt->sax->error(ctxt->userData,
460: "xmlNewEntityInputStream unparsed entity !\n");
461: break;
462: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
463: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 464: return(xmlLoadExternalEntity((char *) entity->SystemID,
465: (char *) entity->ExternalID, ctxt->input));
1.113 daniel 466: case XML_INTERNAL_GENERAL_ENTITY:
467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
468: ctxt->sax->error(ctxt->userData,
469: "Internal entity %s without content !\n", entity->name);
470: break;
471: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 472: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 473: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
474: ctxt->sax->error(ctxt->userData,
475: "Internal parameter entity %s without content !\n", entity->name);
476: break;
477: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 478: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
480: ctxt->sax->error(ctxt->userData,
481: "Predefined entity %s without content !\n", entity->name);
482: break;
483: }
1.50 daniel 484: return(NULL);
1.45 daniel 485: }
1.96 daniel 486: input = xmlNewInputStream(ctxt);
1.45 daniel 487: if (input == NULL) {
1.50 daniel 488: return(NULL);
1.45 daniel 489: }
1.123 daniel 490: input->filename = (char *) entity->SystemID; /* TODO !!! char <- xmlChar */
1.45 daniel 491: input->base = entity->content;
492: input->cur = entity->content;
1.50 daniel 493: return(input);
1.45 daniel 494: }
495:
1.59 daniel 496: /**
497: * xmlNewStringInputStream:
498: * @ctxt: an XML parser context
1.96 daniel 499: * @buffer: an memory buffer
1.59 daniel 500: *
501: * Create a new input stream based on a memory buffer.
1.68 daniel 502: * Returns the new input stream
1.59 daniel 503: */
504: xmlParserInputPtr
1.123 daniel 505: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 506: xmlParserInputPtr input;
507:
1.96 daniel 508: if (buffer == NULL) {
1.123 daniel 509: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 510: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 511: ctxt->sax->error(ctxt->userData,
1.59 daniel 512: "internal: xmlNewStringInputStream string = NULL\n");
513: return(NULL);
514: }
1.96 daniel 515: input = xmlNewInputStream(ctxt);
1.59 daniel 516: if (input == NULL) {
517: return(NULL);
518: }
1.96 daniel 519: input->base = buffer;
520: input->cur = buffer;
1.59 daniel 521: return(input);
522: }
523:
1.76 daniel 524: /**
525: * xmlNewInputFromFile:
526: * @ctxt: an XML parser context
527: * @filename: the filename to use as entity
528: *
529: * Create a new input stream based on a file.
530: *
531: * Returns the new input stream or NULL in case of error
532: */
533: xmlParserInputPtr
1.79 daniel 534: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 535: xmlParserInputBufferPtr buf;
1.76 daniel 536: xmlParserInputPtr inputStream;
1.111 daniel 537: char *directory = NULL;
1.76 daniel 538:
1.96 daniel 539: if (ctxt == NULL) return(NULL);
1.91 daniel 540: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 541: if (buf == NULL) {
1.106 daniel 542: char name[1024];
543:
1.94 daniel 544: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
545: #ifdef WIN32
546: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
547: #else
548: sprintf(name, "%s/%s", ctxt->input->directory, filename);
549: #endif
550: buf = xmlParserInputBufferCreateFilename(name,
551: XML_CHAR_ENCODING_NONE);
1.106 daniel 552: if (buf != NULL)
1.119 daniel 553: directory = xmlMemStrdup(ctxt->input->directory);
1.106 daniel 554: }
555: if ((buf == NULL) && (ctxt->directory != NULL)) {
556: #ifdef WIN32
557: sprintf(name, "%s\\%s", ctxt->directory, filename);
558: #else
559: sprintf(name, "%s/%s", ctxt->directory, filename);
560: #endif
561: buf = xmlParserInputBufferCreateFilename(name,
562: XML_CHAR_ENCODING_NONE);
563: if (buf != NULL)
1.119 daniel 564: directory = xmlMemStrdup(ctxt->directory);
1.106 daniel 565: }
566: if (buf == NULL)
1.94 daniel 567: return(NULL);
568: }
569: if (directory == NULL)
570: directory = xmlParserGetDirectory(filename);
1.76 daniel 571:
1.96 daniel 572: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 573: if (inputStream == NULL) {
1.119 daniel 574: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 575: return(NULL);
576: }
577:
1.119 daniel 578: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 579: inputStream->directory = directory;
1.91 daniel 580: inputStream->buf = buf;
1.76 daniel 581:
1.91 daniel 582: inputStream->base = inputStream->buf->buffer->content;
583: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 584: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 585: ctxt->directory = directory;
1.76 daniel 586: return(inputStream);
587: }
588:
1.77 daniel 589: /************************************************************************
590: * *
1.97 daniel 591: * Commodity functions to handle parser contexts *
592: * *
593: ************************************************************************/
594:
595: /**
596: * xmlInitParserCtxt:
597: * @ctxt: an XML parser context
598: *
599: * Initialize a parser context
600: */
601:
602: void
603: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
604: {
605: xmlSAXHandler *sax;
606:
1.119 daniel 607: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 608: if (sax == NULL) {
609: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
610: }
611:
612: /* Allocate the Input stack */
1.119 daniel 613: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 614: ctxt->inputNr = 0;
615: ctxt->inputMax = 5;
616: ctxt->input = NULL;
617: ctxt->version = NULL;
618: ctxt->encoding = NULL;
619: ctxt->standalone = -1;
1.98 daniel 620: ctxt->hasExternalSubset = 0;
621: ctxt->hasPErefs = 0;
1.97 daniel 622: ctxt->html = 0;
1.98 daniel 623: ctxt->external = 0;
1.97 daniel 624: ctxt->instate = XML_PARSER_PROLOG;
625: ctxt->token = 0;
1.106 daniel 626: ctxt->directory = NULL;
1.97 daniel 627:
628: /* Allocate the Node stack */
1.119 daniel 629: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 630: ctxt->nodeNr = 0;
631: ctxt->nodeMax = 10;
632: ctxt->node = NULL;
633:
634: if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
635: else {
636: ctxt->sax = sax;
637: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
638: }
639: ctxt->userData = ctxt;
640: ctxt->myDoc = NULL;
641: ctxt->wellFormed = 1;
1.99 daniel 642: ctxt->valid = 1;
1.100 daniel 643: ctxt->validate = xmlDoValidityCheckingDefaultValue;
644: ctxt->vctxt.userData = ctxt;
645: ctxt->vctxt.error = xmlParserValidityError;
646: ctxt->vctxt.warning = xmlParserValidityWarning;
1.97 daniel 647: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
648: ctxt->record_info = 0;
649: xmlInitNodeInfoSeq(&ctxt->node_seq);
650: }
651:
652: /**
653: * xmlFreeParserCtxt:
654: * @ctxt: an XML parser context
655: *
656: * Free all the memory used by a parser context. However the parsed
657: * document in ctxt->myDoc is not freed.
658: */
659:
660: void
661: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
662: {
663: xmlParserInputPtr input;
664:
665: if (ctxt == NULL) return;
666:
667: while ((input = inputPop(ctxt)) != NULL) {
668: xmlFreeInputStream(input);
669: }
670:
1.119 daniel 671: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
672: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
673: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
674: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.97 daniel 675: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 676: xmlFree(ctxt->sax);
677: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
678: xmlFree(ctxt);
1.97 daniel 679: }
680:
681: /**
682: * xmlNewParserCtxt:
683: *
684: * Allocate and initialize a new parser context.
685: *
686: * Returns the xmlParserCtxtPtr or NULL
687: */
688:
689: xmlParserCtxtPtr
690: xmlNewParserCtxt()
691: {
692: xmlParserCtxtPtr ctxt;
693:
1.119 daniel 694: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 695: if (ctxt == NULL) {
696: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
697: perror("malloc");
698: return(NULL);
699: }
700: xmlInitParserCtxt(ctxt);
701: return(ctxt);
702: }
703:
704: /**
705: * xmlClearParserCtxt:
706: * @ctxt: an XML parser context
707: *
708: * Clear (release owned resources) and reinitialize a parser context
709: */
710:
711: void
712: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
713: {
714: xmlClearNodeInfoSeq(&ctxt->node_seq);
715: xmlInitParserCtxt(ctxt);
716: }
717:
718: /************************************************************************
719: * *
1.77 daniel 720: * Commodity functions to handle entities *
721: * *
722: ************************************************************************/
723:
1.97 daniel 724: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
725: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
726:
727: /**
728: * xmlParseCharRef:
729: * @ctxt: an XML parser context
730: *
731: * parse Reference declarations
732: *
733: * [66] CharRef ::= '&#' [0-9]+ ';' |
734: * '&#x' [0-9a-fA-F]+ ';'
735: *
1.98 daniel 736: * [ WFC: Legal Character ]
737: * Characters referred to using character references must match the
738: * production for Char.
739: *
1.97 daniel 740: * Returns the value parsed (as an int)
1.77 daniel 741: */
1.97 daniel 742: int
743: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
744: int val = 0;
745:
1.111 daniel 746: if (ctxt->token != 0) {
747: val = ctxt->token;
748: ctxt->token = 0;
749: return(val);
750: }
1.97 daniel 751: if ((CUR == '&') && (NXT(1) == '#') &&
752: (NXT(2) == 'x')) {
753: SKIP(3);
754: while (CUR != ';') {
755: if ((CUR >= '0') && (CUR <= '9'))
756: val = val * 16 + (CUR - '0');
757: else if ((CUR >= 'a') && (CUR <= 'f'))
758: val = val * 16 + (CUR - 'a') + 10;
759: else if ((CUR >= 'A') && (CUR <= 'F'))
760: val = val * 16 + (CUR - 'A') + 10;
761: else {
1.123 daniel 762: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 763: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
764: ctxt->sax->error(ctxt->userData,
765: "xmlParseCharRef: invalid hexadecimal value\n");
766: ctxt->wellFormed = 0;
767: val = 0;
768: break;
769: }
770: NEXT;
771: }
772: if (CUR == ';')
773: NEXT;
774: } else if ((CUR == '&') && (NXT(1) == '#')) {
775: SKIP(2);
776: while (CUR != ';') {
777: if ((CUR >= '0') && (CUR <= '9'))
778: val = val * 10 + (CUR - '0');
779: else {
1.123 daniel 780: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
782: ctxt->sax->error(ctxt->userData,
783: "xmlParseCharRef: invalid decimal value\n");
784: ctxt->wellFormed = 0;
785: val = 0;
786: break;
787: }
788: NEXT;
789: }
790: if (CUR == ';')
791: NEXT;
792: } else {
1.123 daniel 793: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 795: ctxt->sax->error(ctxt->userData,
796: "xmlParseCharRef: invalid value\n");
1.97 daniel 797: ctxt->wellFormed = 0;
798: }
1.98 daniel 799:
1.97 daniel 800: /*
1.98 daniel 801: * [ WFC: Legal Character ]
802: * Characters referred to using character references must match the
803: * production for Char.
1.97 daniel 804: */
805: if (IS_CHAR(val)) {
806: return(val);
807: } else {
1.123 daniel 808: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 809: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 810: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 811: val);
812: ctxt->wellFormed = 0;
813: }
814: return(0);
1.77 daniel 815: }
816:
1.96 daniel 817: /**
818: * xmlParserHandleReference:
819: * @ctxt: the parser context
820: *
1.97 daniel 821: * [67] Reference ::= EntityRef | CharRef
822: *
1.96 daniel 823: * [68] EntityRef ::= '&' Name ';'
824: *
1.98 daniel 825: * [ WFC: Entity Declared ]
826: * the Name given in the entity reference must match that in an entity
827: * declaration, except that well-formed documents need not declare any
828: * of the following entities: amp, lt, gt, apos, quot.
829: *
830: * [ WFC: Parsed Entity ]
831: * An entity reference must not contain the name of an unparsed entity
832: *
1.97 daniel 833: * [66] CharRef ::= '&#' [0-9]+ ';' |
834: * '&#x' [0-9a-fA-F]+ ';'
835: *
1.96 daniel 836: * A PEReference may have been detectect in the current input stream
837: * the handling is done accordingly to
838: * http://www.w3.org/TR/REC-xml#entproc
839: */
840: void
841: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 842: xmlParserInputPtr input;
1.123 daniel 843: xmlChar *name;
1.97 daniel 844: xmlEntityPtr ent = NULL;
845:
1.111 daniel 846: if (ctxt->token != 0) return;
1.97 daniel 847: if (CUR != '&') return;
848: GROW;
849: if ((CUR == '&') && (NXT(1) == '#')) {
850: switch(ctxt->instate) {
1.109 daniel 851: case XML_PARSER_CDATA_SECTION:
852: return;
1.97 daniel 853: case XML_PARSER_COMMENT:
854: return;
855: case XML_PARSER_EOF:
1.123 daniel 856: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 857: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
858: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
859: ctxt->wellFormed = 0;
860: return;
861: case XML_PARSER_PROLOG:
1.123 daniel 862: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 863: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
864: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
865: ctxt->wellFormed = 0;
866: return;
867: case XML_PARSER_EPILOG:
1.123 daniel 868: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 869: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
870: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
871: ctxt->wellFormed = 0;
872: return;
873: case XML_PARSER_DTD:
1.123 daniel 874: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 875: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
876: ctxt->sax->error(ctxt->userData,
877: "CharRef are forbiden in DTDs!\n");
878: ctxt->wellFormed = 0;
879: return;
880: case XML_PARSER_ENTITY_DECL:
881: /* we just ignore it there */
882: return;
883: case XML_PARSER_ENTITY_VALUE:
884: /*
885: * NOTE: in the case of entity values, we don't do the
886: * substitution here since we need the litteral
887: * entity value to be able to save the internal
888: * subset of the document.
889: * This will be handled by xmlDecodeEntities
890: */
891: return;
892: case XML_PARSER_CONTENT:
893: case XML_PARSER_ATTRIBUTE_VALUE:
1.116 daniel 894: /* !!! this may not be Ok for UTF-8, multibyte sequence */
1.97 daniel 895: ctxt->token = xmlParseCharRef(ctxt);
896: return;
897: }
898: return;
899: }
900:
901: switch(ctxt->instate) {
1.109 daniel 902: case XML_PARSER_CDATA_SECTION:
903: return;
1.97 daniel 904: case XML_PARSER_COMMENT:
905: return;
906: case XML_PARSER_EOF:
1.123 daniel 907: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 908: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
909: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
910: ctxt->wellFormed = 0;
911: return;
912: case XML_PARSER_PROLOG:
1.123 daniel 913: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 914: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
915: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
916: ctxt->wellFormed = 0;
917: return;
918: case XML_PARSER_EPILOG:
1.123 daniel 919: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 920: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
921: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
922: ctxt->wellFormed = 0;
923: return;
924: case XML_PARSER_ENTITY_VALUE:
925: /*
926: * NOTE: in the case of entity values, we don't do the
927: * substitution here since we need the litteral
928: * entity value to be able to save the internal
929: * subset of the document.
930: * This will be handled by xmlDecodeEntities
931: */
932: return;
933: case XML_PARSER_ATTRIBUTE_VALUE:
934: /*
935: * NOTE: in the case of attributes values, we don't do the
936: * substitution here unless we are in a mode where
937: * the parser is explicitely asked to substitute
938: * entities. The SAX callback is called with values
939: * without entity substitution.
940: * This will then be handled by xmlDecodeEntities
941: */
1.113 daniel 942: return;
1.97 daniel 943: case XML_PARSER_ENTITY_DECL:
944: /*
945: * we just ignore it there
946: * the substitution will be done once the entity is referenced
947: */
948: return;
949: case XML_PARSER_DTD:
1.123 daniel 950: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 951: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
952: ctxt->sax->error(ctxt->userData,
953: "Entity references are forbiden in DTDs!\n");
954: ctxt->wellFormed = 0;
955: return;
956: case XML_PARSER_CONTENT:
1.113 daniel 957: return;
1.97 daniel 958: }
959:
960: NEXT;
961: name = xmlScanName(ctxt);
962: if (name == NULL) {
1.123 daniel 963: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 964: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
965: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
966: ctxt->wellFormed = 0;
967: ctxt->token = '&';
968: return;
969: }
970: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 971: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
973: ctxt->sax->error(ctxt->userData,
974: "Entity reference: ';' expected\n");
975: ctxt->wellFormed = 0;
976: ctxt->token = '&';
1.119 daniel 977: xmlFree(name);
1.97 daniel 978: return;
979: }
980: SKIP(xmlStrlen(name) + 1);
981: if (ctxt->sax != NULL) {
982: if (ctxt->sax->getEntity != NULL)
983: ent = ctxt->sax->getEntity(ctxt->userData, name);
984: }
1.98 daniel 985:
986: /*
987: * [ WFC: Entity Declared ]
988: * the Name given in the entity reference must match that in an entity
989: * declaration, except that well-formed documents need not declare any
990: * of the following entities: amp, lt, gt, apos, quot.
991: */
1.97 daniel 992: if (ent == NULL)
993: ent = xmlGetPredefinedEntity(name);
994: if (ent == NULL) {
1.123 daniel 995: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
997: ctxt->sax->error(ctxt->userData,
1.98 daniel 998: "Entity reference: entity %s not declared\n",
999: name);
1.97 daniel 1000: ctxt->wellFormed = 0;
1.119 daniel 1001: xmlFree(name);
1.97 daniel 1002: return;
1003: }
1.98 daniel 1004:
1005: /*
1006: * [ WFC: Parsed Entity ]
1007: * An entity reference must not contain the name of an unparsed entity
1008: */
1009: if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1010: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1011: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1012: ctxt->sax->error(ctxt->userData,
1013: "Entity reference to unparsed entity %s\n", name);
1014: ctxt->wellFormed = 0;
1015: }
1016:
1.97 daniel 1017: if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
1018: ctxt->token = ent->content[0];
1.119 daniel 1019: xmlFree(name);
1.97 daniel 1020: return;
1021: }
1022: input = xmlNewEntityInputStream(ctxt, ent);
1023: xmlPushInput(ctxt, input);
1.119 daniel 1024: xmlFree(name);
1.96 daniel 1025: return;
1026: }
1027:
1028: /**
1029: * xmlParserHandlePEReference:
1030: * @ctxt: the parser context
1031: *
1032: * [69] PEReference ::= '%' Name ';'
1033: *
1.98 daniel 1034: * [ WFC: No Recursion ]
1035: * TODO A parsed entity must not contain a recursive
1036: * reference to itself, either directly or indirectly.
1037: *
1038: * [ WFC: Entity Declared ]
1039: * In a document without any DTD, a document with only an internal DTD
1040: * subset which contains no parameter entity references, or a document
1041: * with "standalone='yes'", ... ... The declaration of a parameter
1042: * entity must precede any reference to it...
1043: *
1044: * [ VC: Entity Declared ]
1045: * In a document with an external subset or external parameter entities
1046: * with "standalone='no'", ... ... The declaration of a parameter entity
1047: * must precede any reference to it...
1048: *
1049: * [ WFC: In DTD ]
1050: * Parameter-entity references may only appear in the DTD.
1051: * NOTE: misleading but this is handled.
1052: *
1053: * A PEReference may have been detected in the current input stream
1.96 daniel 1054: * the handling is done accordingly to
1055: * http://www.w3.org/TR/REC-xml#entproc
1056: * i.e.
1057: * - Included in literal in entity values
1058: * - Included as Paraemeter Entity reference within DTDs
1059: */
1060: void
1061: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1062: xmlChar *name;
1.96 daniel 1063: xmlEntityPtr entity = NULL;
1064: xmlParserInputPtr input;
1065:
1.111 daniel 1066: if (ctxt->token != 0) return;
1067: if (CUR != '%') return;
1.96 daniel 1068: switch(ctxt->instate) {
1.109 daniel 1069: case XML_PARSER_CDATA_SECTION:
1070: return;
1.97 daniel 1071: case XML_PARSER_COMMENT:
1072: return;
1.96 daniel 1073: case XML_PARSER_EOF:
1.123 daniel 1074: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1075: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1076: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1077: ctxt->wellFormed = 0;
1078: return;
1079: case XML_PARSER_PROLOG:
1.123 daniel 1080: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1081: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1082: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1083: ctxt->wellFormed = 0;
1084: return;
1.97 daniel 1085: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1086: case XML_PARSER_CONTENT:
1087: case XML_PARSER_ATTRIBUTE_VALUE:
1088: /* we just ignore it there */
1089: return;
1090: case XML_PARSER_EPILOG:
1.123 daniel 1091: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1092: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1093: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1094: ctxt->wellFormed = 0;
1095: return;
1.97 daniel 1096: case XML_PARSER_ENTITY_VALUE:
1097: /*
1098: * NOTE: in the case of entity values, we don't do the
1099: * substitution here since we need the litteral
1100: * entity value to be able to save the internal
1101: * subset of the document.
1102: * This will be handled by xmlDecodeEntities
1103: */
1104: return;
1.96 daniel 1105: case XML_PARSER_DTD:
1.98 daniel 1106: /*
1107: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1108: * In the internal DTD subset, parameter-entity references
1109: * can occur only where markup declarations can occur, not
1110: * within markup declarations.
1111: * In that case this is handled in xmlParseMarkupDecl
1112: */
1113: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1114: return;
1.96 daniel 1115: }
1116:
1117: NEXT;
1118: name = xmlParseName(ctxt);
1119: if (name == NULL) {
1.123 daniel 1120: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1121: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1122: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1123: ctxt->wellFormed = 0;
1124: } else {
1125: if (CUR == ';') {
1126: NEXT;
1.98 daniel 1127: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1128: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1129: if (entity == NULL) {
1.98 daniel 1130:
1131: /*
1132: * [ WFC: Entity Declared ]
1133: * In a document without any DTD, a document with only an
1134: * internal DTD subset which contains no parameter entity
1135: * references, or a document with "standalone='yes'", ...
1136: * ... The declaration of a parameter entity must precede
1137: * any reference to it...
1138: */
1139: if ((ctxt->standalone == 1) ||
1140: ((ctxt->hasExternalSubset == 0) &&
1141: (ctxt->hasPErefs == 0))) {
1142: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1143: ctxt->sax->error(ctxt->userData,
1144: "PEReference: %%%s; not found\n", name);
1145: ctxt->wellFormed = 0;
1146: } else {
1147: /*
1148: * [ VC: Entity Declared ]
1149: * In a document with an external subset or external
1150: * parameter entities with "standalone='no'", ...
1151: * ... The declaration of a parameter entity must precede
1152: * any reference to it...
1153: */
1154: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1155: ctxt->sax->warning(ctxt->userData,
1156: "PEReference: %%%s; not found\n", name);
1157: ctxt->valid = 0;
1158: }
1.96 daniel 1159: } else {
1160: if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
1161: (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
1162: /*
1163: * TODO !!!! handle the extra spaces added before and after
1164: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1165: * TODO !!!! Avoid quote processing in parameters value
1166: * c.f. http://www.w3.org/TR/REC-xml#inliteral
1167: */
1168: input = xmlNewEntityInputStream(ctxt, entity);
1169: xmlPushInput(ctxt, input);
1170: } else {
1171: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1172: ctxt->sax->error(ctxt->userData,
1173: "xmlHandlePEReference: %s is not a parameter entity\n",
1174: name);
1175: ctxt->wellFormed = 0;
1176: }
1177: }
1178: } else {
1.123 daniel 1179: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1180: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1181: ctxt->sax->error(ctxt->userData,
1182: "xmlHandlePEReference: expecting ';'\n");
1183: ctxt->wellFormed = 0;
1184: }
1.119 daniel 1185: xmlFree(name);
1.97 daniel 1186: }
1187: }
1188:
1189: /*
1190: * Macro used to grow the current buffer.
1191: */
1192: #define growBuffer(buffer) { \
1193: buffer##_size *= 2; \
1.123 daniel 1194: buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1195: if (buffer == NULL) { \
1196: perror("realloc failed"); \
1197: exit(1); \
1198: } \
1.96 daniel 1199: }
1.77 daniel 1200:
1201: /**
1202: * xmlDecodeEntities:
1203: * @ctxt: the parser context
1204: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1205: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1206: * @end: an end marker xmlChar, 0 if none
1207: * @end2: an end marker xmlChar, 0 if none
1208: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1209: *
1210: * [67] Reference ::= EntityRef | CharRef
1211: *
1212: * [69] PEReference ::= '%' Name ';'
1213: *
1214: * Returns A newly allocated string with the substitution done. The caller
1215: * must deallocate it !
1216: */
1.123 daniel 1217: xmlChar *
1.77 daniel 1218: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1219: xmlChar end, xmlChar end2, xmlChar end3) {
1220: xmlChar *buffer = NULL;
1.78 daniel 1221: int buffer_size = 0;
1.123 daniel 1222: xmlChar *out = NULL;
1.78 daniel 1223:
1.123 daniel 1224: xmlChar *current = NULL;
1.77 daniel 1225: xmlEntityPtr ent;
1.91 daniel 1226: int nbchars = 0;
1.77 daniel 1227: unsigned int max = (unsigned int) len;
1.123 daniel 1228: xmlChar cur;
1.77 daniel 1229:
1230: /*
1231: * allocate a translation buffer.
1232: */
1233: buffer_size = 1000;
1.123 daniel 1234: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1235: if (buffer == NULL) {
1236: perror("xmlDecodeEntities: malloc failed");
1237: return(NULL);
1238: }
1239: out = buffer;
1240:
1.78 daniel 1241: /*
1242: * Ok loop until we reach one of the ending char or a size limit.
1243: */
1.97 daniel 1244: cur = CUR;
1245: while ((nbchars < max) && (cur != end) &&
1246: (cur != end2) && (cur != end3)) {
1.77 daniel 1247:
1.98 daniel 1248: if (cur == 0) break;
1249: if ((cur == '&') && (NXT(1) == '#')) {
1250: int val = xmlParseCharRef(ctxt);
1251: *out++ = val;
1252: nbchars += 3;
1253: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1254: ent = xmlParseEntityRef(ctxt);
1255: if ((ent != NULL) &&
1256: (ctxt->replaceEntities != 0)) {
1257: current = ent->content;
1258: while (*current != 0) {
1259: *out++ = *current++;
1260: if (out - buffer > buffer_size - 100) {
1261: int index = out - buffer;
1262:
1263: growBuffer(buffer);
1264: out = &buffer[index];
1.77 daniel 1265: }
1266: }
1.98 daniel 1267: nbchars += 3 + xmlStrlen(ent->name);
1268: } else if (ent != NULL) {
1269: int i = xmlStrlen(ent->name);
1.123 daniel 1270: const xmlChar *cur = ent->name;
1.98 daniel 1271:
1272: nbchars += i + 2;
1273: *out++ = '&';
1274: if (out - buffer > buffer_size - i - 100) {
1275: int index = out - buffer;
1276:
1277: growBuffer(buffer);
1278: out = &buffer[index];
1279: }
1280: for (;i > 0;i--)
1281: *out++ = *cur++;
1282: *out++ = ';';
1.77 daniel 1283: }
1.97 daniel 1284: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1285: /*
1.77 daniel 1286: * a PEReference induce to switch the entity flow,
1287: * we break here to flush the current set of chars
1288: * parsed if any. We will be called back later.
1.97 daniel 1289: */
1.91 daniel 1290: if (nbchars != 0) break;
1.77 daniel 1291:
1292: xmlParsePEReference(ctxt);
1.79 daniel 1293:
1.97 daniel 1294: /*
1.79 daniel 1295: * Pop-up of finished entities.
1.97 daniel 1296: */
1.79 daniel 1297: while ((CUR == 0) && (ctxt->inputNr > 1))
1298: xmlPopInput(ctxt);
1299:
1.98 daniel 1300: break;
1.77 daniel 1301: } else {
1.116 daniel 1302: /* invalid for UTF-8 , use COPY(out); !!!!!! */
1.97 daniel 1303: *out++ = cur;
1.91 daniel 1304: nbchars++;
1.86 daniel 1305: if (out - buffer > buffer_size - 100) {
1306: int index = out - buffer;
1307:
1308: growBuffer(buffer);
1309: out = &buffer[index];
1310: }
1.77 daniel 1311: NEXT;
1312: }
1.97 daniel 1313: cur = CUR;
1.77 daniel 1314: }
1315: *out++ = 0;
1316: return(buffer);
1317: }
1318:
1.1 veillard 1319:
1.28 daniel 1320: /************************************************************************
1321: * *
1.75 daniel 1322: * Commodity functions to handle encodings *
1323: * *
1324: ************************************************************************/
1325:
1326: /**
1327: * xmlSwitchEncoding:
1328: * @ctxt: the parser context
1.124 ! daniel 1329: * @enc: the encoding value (number)
1.75 daniel 1330: *
1331: * change the input functions when discovering the character encoding
1332: * of a given entity.
1333: */
1334: void
1335: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1336: {
1337: switch (enc) {
1338: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 1339: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 1340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1341: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1342: ctxt->wellFormed = 0;
1343: break;
1344: case XML_CHAR_ENCODING_NONE:
1345: /* let's assume it's UTF-8 without the XML decl */
1346: return;
1347: case XML_CHAR_ENCODING_UTF8:
1348: /* default encoding, no conversion should be needed */
1349: return;
1350: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 1351: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1352: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1353: ctxt->sax->error(ctxt->userData,
1354: "char encoding UTF16 little endian not supported\n");
1355: break;
1356: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 1357: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1359: ctxt->sax->error(ctxt->userData,
1360: "char encoding UTF16 big endian not supported\n");
1361: break;
1362: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 1363: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1364: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1365: ctxt->sax->error(ctxt->userData,
1366: "char encoding USC4 little endian not supported\n");
1367: break;
1368: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 1369: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1370: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1371: ctxt->sax->error(ctxt->userData,
1372: "char encoding USC4 big endian not supported\n");
1373: break;
1374: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 1375: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1376: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1377: ctxt->sax->error(ctxt->userData,
1378: "char encoding EBCDIC not supported\n");
1379: break;
1380: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 1381: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1383: ctxt->sax->error(ctxt->userData,
1384: "char encoding UCS4 2143 not supported\n");
1385: break;
1386: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 1387: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1389: ctxt->sax->error(ctxt->userData,
1390: "char encoding UCS4 3412 not supported\n");
1391: break;
1392: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 1393: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1394: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1395: ctxt->sax->error(ctxt->userData,
1396: "char encoding UCS2 not supported\n");
1397: break;
1398: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 1399: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1401: ctxt->sax->error(ctxt->userData,
1402: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
1403: break;
1404: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 1405: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1406: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1407: ctxt->sax->error(ctxt->userData,
1408: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
1409: break;
1410: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 1411: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1412: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1413: ctxt->sax->error(ctxt->userData,
1414: "char encoding ISO_8859_3 not supported\n");
1415: break;
1416: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 1417: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1418: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1419: ctxt->sax->error(ctxt->userData,
1420: "char encoding ISO_8859_4 not supported\n");
1421: break;
1422: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 1423: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1425: ctxt->sax->error(ctxt->userData,
1426: "char encoding ISO_8859_5 not supported\n");
1427: break;
1428: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 1429: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1431: ctxt->sax->error(ctxt->userData,
1432: "char encoding ISO_8859_6 not supported\n");
1433: break;
1434: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 1435: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1436: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1437: ctxt->sax->error(ctxt->userData,
1438: "char encoding ISO_8859_7 not supported\n");
1439: break;
1440: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 1441: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1442: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1443: ctxt->sax->error(ctxt->userData,
1444: "char encoding ISO_8859_8 not supported\n");
1445: break;
1446: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 1447: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1448: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1449: ctxt->sax->error(ctxt->userData,
1450: "char encoding ISO_8859_9 not supported\n");
1451: break;
1452: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 1453: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1455: ctxt->sax->error(ctxt->userData,
1456: "char encoding ISO-2022-JPnot supported\n");
1457: break;
1458: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 1459: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1460: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1461: ctxt->sax->error(ctxt->userData,
1462: "char encoding Shift_JISnot supported\n");
1463: break;
1464: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 1465: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1467: ctxt->sax->error(ctxt->userData,
1468: "char encoding EUC-JPnot supported\n");
1469: break;
1470: }
1471: }
1472:
1473: /************************************************************************
1474: * *
1.123 daniel 1475: * Commodity functions to handle xmlChars *
1.28 daniel 1476: * *
1477: ************************************************************************/
1478:
1.50 daniel 1479: /**
1480: * xmlStrndup:
1.123 daniel 1481: * @cur: the input xmlChar *
1.50 daniel 1482: * @len: the len of @cur
1483: *
1.123 daniel 1484: * a strndup for array of xmlChar's
1.68 daniel 1485: *
1.123 daniel 1486: * Returns a new xmlChar * or NULL
1.1 veillard 1487: */
1.123 daniel 1488: xmlChar *
1489: xmlStrndup(const xmlChar *cur, int len) {
1490: xmlChar *ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 1491:
1492: if (ret == NULL) {
1.86 daniel 1493: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 1494: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 1495: return(NULL);
1496: }
1.123 daniel 1497: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 1498: ret[len] = 0;
1499: return(ret);
1500: }
1501:
1.50 daniel 1502: /**
1503: * xmlStrdup:
1.123 daniel 1504: * @cur: the input xmlChar *
1.50 daniel 1505: *
1.123 daniel 1506: * a strdup for array of xmlChar's
1.68 daniel 1507: *
1.123 daniel 1508: * Returns a new xmlChar * or NULL
1.1 veillard 1509: */
1.123 daniel 1510: xmlChar *
1511: xmlStrdup(const xmlChar *cur) {
1512: const xmlChar *p = cur;
1.1 veillard 1513:
1514: while (IS_CHAR(*p)) p++;
1515: return(xmlStrndup(cur, p - cur));
1516: }
1517:
1.50 daniel 1518: /**
1519: * xmlCharStrndup:
1520: * @cur: the input char *
1521: * @len: the len of @cur
1522: *
1.123 daniel 1523: * a strndup for char's to xmlChar's
1.68 daniel 1524: *
1.123 daniel 1525: * Returns a new xmlChar * or NULL
1.45 daniel 1526: */
1527:
1.123 daniel 1528: xmlChar *
1.55 daniel 1529: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 1530: int i;
1.123 daniel 1531: xmlChar *ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 1532:
1533: if (ret == NULL) {
1.86 daniel 1534: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 1535: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1536: return(NULL);
1537: }
1538: for (i = 0;i < len;i++)
1.123 daniel 1539: ret[i] = (xmlChar) cur[i];
1.45 daniel 1540: ret[len] = 0;
1541: return(ret);
1542: }
1543:
1.50 daniel 1544: /**
1545: * xmlCharStrdup:
1546: * @cur: the input char *
1547: * @len: the len of @cur
1548: *
1.123 daniel 1549: * a strdup for char's to xmlChar's
1.68 daniel 1550: *
1.123 daniel 1551: * Returns a new xmlChar * or NULL
1.45 daniel 1552: */
1553:
1.123 daniel 1554: xmlChar *
1.55 daniel 1555: xmlCharStrdup(const char *cur) {
1.45 daniel 1556: const char *p = cur;
1557:
1558: while (*p != '\0') p++;
1559: return(xmlCharStrndup(cur, p - cur));
1560: }
1561:
1.50 daniel 1562: /**
1563: * xmlStrcmp:
1.123 daniel 1564: * @str1: the first xmlChar *
1565: * @str2: the second xmlChar *
1.50 daniel 1566: *
1.123 daniel 1567: * a strcmp for xmlChar's
1.68 daniel 1568: *
1569: * Returns the integer result of the comparison
1.14 veillard 1570: */
1571:
1.55 daniel 1572: int
1.123 daniel 1573: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 1574: register int tmp;
1575:
1576: do {
1577: tmp = *str1++ - *str2++;
1578: if (tmp != 0) return(tmp);
1579: } while ((*str1 != 0) && (*str2 != 0));
1580: return (*str1 - *str2);
1581: }
1582:
1.50 daniel 1583: /**
1584: * xmlStrncmp:
1.123 daniel 1585: * @str1: the first xmlChar *
1586: * @str2: the second xmlChar *
1.50 daniel 1587: * @len: the max comparison length
1588: *
1.123 daniel 1589: * a strncmp for xmlChar's
1.68 daniel 1590: *
1591: * Returns the integer result of the comparison
1.14 veillard 1592: */
1593:
1.55 daniel 1594: int
1.123 daniel 1595: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 1596: register int tmp;
1597:
1598: if (len <= 0) return(0);
1599: do {
1600: tmp = *str1++ - *str2++;
1601: if (tmp != 0) return(tmp);
1602: len--;
1603: if (len <= 0) return(0);
1604: } while ((*str1 != 0) && (*str2 != 0));
1605: return (*str1 - *str2);
1606: }
1607:
1.50 daniel 1608: /**
1609: * xmlStrchr:
1.123 daniel 1610: * @str: the xmlChar * array
1611: * @val: the xmlChar to search
1.50 daniel 1612: *
1.123 daniel 1613: * a strchr for xmlChar's
1.68 daniel 1614: *
1.123 daniel 1615: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 1616: */
1617:
1.123 daniel 1618: const xmlChar *
1619: xmlStrchr(const xmlChar *str, xmlChar val) {
1.14 veillard 1620: while (*str != 0) {
1.123 daniel 1621: if (*str == val) return((xmlChar *) str);
1.14 veillard 1622: str++;
1623: }
1624: return(NULL);
1.89 daniel 1625: }
1626:
1627: /**
1628: * xmlStrstr:
1.123 daniel 1629: * @str: the xmlChar * array (haystack)
1630: * @val: the xmlChar to search (needle)
1.89 daniel 1631: *
1.123 daniel 1632: * a strstr for xmlChar's
1.89 daniel 1633: *
1.123 daniel 1634: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1635: */
1636:
1.123 daniel 1637: const xmlChar *
1638: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 1639: int n;
1640:
1641: if (str == NULL) return(NULL);
1642: if (val == NULL) return(NULL);
1643: n = xmlStrlen(val);
1644:
1645: if (n == 0) return(str);
1646: while (*str != 0) {
1647: if (*str == *val) {
1.123 daniel 1648: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 1649: }
1650: str++;
1651: }
1652: return(NULL);
1653: }
1654:
1655: /**
1656: * xmlStrsub:
1.123 daniel 1657: * @str: the xmlChar * array (haystack)
1.89 daniel 1658: * @start: the index of the first char (zero based)
1659: * @len: the length of the substring
1660: *
1661: * Extract a substring of a given string
1662: *
1.123 daniel 1663: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1664: */
1665:
1.123 daniel 1666: xmlChar *
1667: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 1668: int i;
1669:
1670: if (str == NULL) return(NULL);
1671: if (start < 0) return(NULL);
1.90 daniel 1672: if (len < 0) return(NULL);
1.89 daniel 1673:
1674: for (i = 0;i < start;i++) {
1675: if (*str == 0) return(NULL);
1676: str++;
1677: }
1678: if (*str == 0) return(NULL);
1679: return(xmlStrndup(str, len));
1.14 veillard 1680: }
1.28 daniel 1681:
1.50 daniel 1682: /**
1683: * xmlStrlen:
1.123 daniel 1684: * @str: the xmlChar * array
1.50 daniel 1685: *
1.123 daniel 1686: * lenght of a xmlChar's string
1.68 daniel 1687: *
1.123 daniel 1688: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 1689: */
1690:
1.55 daniel 1691: int
1.123 daniel 1692: xmlStrlen(const xmlChar *str) {
1.45 daniel 1693: int len = 0;
1694:
1695: if (str == NULL) return(0);
1696: while (*str != 0) {
1697: str++;
1698: len++;
1699: }
1700: return(len);
1701: }
1702:
1.50 daniel 1703: /**
1704: * xmlStrncat:
1.123 daniel 1705: * @cur: the original xmlChar * array
1706: * @add: the xmlChar * array added
1.50 daniel 1707: * @len: the length of @add
1708: *
1.123 daniel 1709: * a strncat for array of xmlChar's
1.68 daniel 1710: *
1.123 daniel 1711: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1712: */
1713:
1.123 daniel 1714: xmlChar *
1715: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 1716: int size;
1.123 daniel 1717: xmlChar *ret;
1.45 daniel 1718:
1719: if ((add == NULL) || (len == 0))
1720: return(cur);
1721: if (cur == NULL)
1722: return(xmlStrndup(add, len));
1723:
1724: size = xmlStrlen(cur);
1.123 daniel 1725: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 1726: if (ret == NULL) {
1.86 daniel 1727: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 1728: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1729: return(cur);
1730: }
1.123 daniel 1731: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 1732: ret[size + len] = 0;
1733: return(ret);
1734: }
1735:
1.50 daniel 1736: /**
1737: * xmlStrcat:
1.123 daniel 1738: * @cur: the original xmlChar * array
1739: * @add: the xmlChar * array added
1.50 daniel 1740: *
1.123 daniel 1741: * a strcat for array of xmlChar's
1.68 daniel 1742: *
1.123 daniel 1743: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1744: */
1.123 daniel 1745: xmlChar *
1746: xmlStrcat(xmlChar *cur, const xmlChar *add) {
1747: const xmlChar *p = add;
1.45 daniel 1748:
1749: if (add == NULL) return(cur);
1750: if (cur == NULL)
1751: return(xmlStrdup(add));
1752:
1753: while (IS_CHAR(*p)) p++;
1754: return(xmlStrncat(cur, add, p - add));
1755: }
1756:
1757: /************************************************************************
1758: * *
1759: * Commodity functions, cleanup needed ? *
1760: * *
1761: ************************************************************************/
1762:
1.50 daniel 1763: /**
1764: * areBlanks:
1765: * @ctxt: an XML parser context
1.123 daniel 1766: * @str: a xmlChar *
1.50 daniel 1767: * @len: the size of @str
1768: *
1.45 daniel 1769: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1770: *
1.99 daniel 1771: * TODO: Whether white space are significant has to be checked accordingly
1772: * to DTD informations if available
1.68 daniel 1773: *
1774: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1775: */
1776:
1.123 daniel 1777: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 1778: int i, ret;
1.45 daniel 1779: xmlNodePtr lastChild;
1780:
1781: for (i = 0;i < len;i++)
1782: if (!(IS_BLANK(str[i]))) return(0);
1783:
1784: if (CUR != '<') return(0);
1.72 daniel 1785: if (ctxt->node == NULL) return(0);
1.104 daniel 1786: if (ctxt->myDoc != NULL) {
1787: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1788: if (ret == 0) return(1);
1789: if (ret == 1) return(0);
1790: }
1791: /*
1792: * heuristic
1793: */
1.45 daniel 1794: lastChild = xmlGetLastChild(ctxt->node);
1795: if (lastChild == NULL) {
1796: if (ctxt->node->content != NULL) return(0);
1797: } else if (xmlNodeIsText(lastChild))
1798: return(0);
1.104 daniel 1799: else if ((ctxt->node->childs != NULL) &&
1800: (xmlNodeIsText(ctxt->node->childs)))
1801: return(0);
1.45 daniel 1802: return(1);
1803: }
1804:
1.50 daniel 1805: /**
1806: * xmlHandleEntity:
1807: * @ctxt: an XML parser context
1808: * @entity: an XML entity pointer.
1809: *
1810: * Default handling of defined entities, when should we define a new input
1.45 daniel 1811: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 1812: *
1813: * OBSOLETE: to be removed at some point.
1.45 daniel 1814: */
1815:
1.55 daniel 1816: void
1817: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1818: int len;
1.50 daniel 1819: xmlParserInputPtr input;
1.45 daniel 1820:
1821: if (entity->content == NULL) {
1.123 daniel 1822: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 1823: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1824: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 1825: entity->name);
1.59 daniel 1826: ctxt->wellFormed = 0;
1.45 daniel 1827: return;
1828: }
1829: len = xmlStrlen(entity->content);
1830: if (len <= 2) goto handle_as_char;
1831:
1832: /*
1833: * Redefine its content as an input stream.
1834: */
1.50 daniel 1835: input = xmlNewEntityInputStream(ctxt, entity);
1836: xmlPushInput(ctxt, input);
1.45 daniel 1837: return;
1838:
1839: handle_as_char:
1840: /*
1841: * Just handle the content as a set of chars.
1842: */
1.72 daniel 1843: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 1844: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 1845:
1846: }
1847:
1848: /*
1849: * Forward definition for recusive behaviour.
1850: */
1.77 daniel 1851: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1852: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1853:
1.28 daniel 1854: /************************************************************************
1855: * *
1856: * Extra stuff for namespace support *
1857: * Relates to http://www.w3.org/TR/WD-xml-names *
1858: * *
1859: ************************************************************************/
1860:
1.50 daniel 1861: /**
1862: * xmlNamespaceParseNCName:
1863: * @ctxt: an XML parser context
1864: *
1865: * parse an XML namespace name.
1.28 daniel 1866: *
1867: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1868: *
1869: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1870: * CombiningChar | Extender
1.68 daniel 1871: *
1872: * Returns the namespace name or NULL
1.28 daniel 1873: */
1874:
1.123 daniel 1875: xmlChar *
1.55 daniel 1876: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.123 daniel 1877: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 1878: int len = 0;
1.28 daniel 1879:
1.40 daniel 1880: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1.28 daniel 1881:
1.40 daniel 1882: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1883: (CUR == '.') || (CUR == '-') ||
1884: (CUR == '_') ||
1885: (IS_COMBINING(CUR)) ||
1.91 daniel 1886: (IS_EXTENDER(CUR))) {
1887: buf[len++] = CUR;
1.40 daniel 1888: NEXT;
1.91 daniel 1889: if (len >= XML_MAX_NAMELEN) {
1890: fprintf(stderr,
1891: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1892: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1893: (CUR == '.') || (CUR == '-') ||
1894: (CUR == '_') ||
1895: (IS_COMBINING(CUR)) ||
1896: (IS_EXTENDER(CUR)))
1897: NEXT;
1898: break;
1899: }
1900: }
1901: return(xmlStrndup(buf, len));
1.28 daniel 1902: }
1903:
1.50 daniel 1904: /**
1905: * xmlNamespaceParseQName:
1906: * @ctxt: an XML parser context
1.123 daniel 1907: * @prefix: a xmlChar **
1.50 daniel 1908: *
1909: * parse an XML qualified name
1.28 daniel 1910: *
1911: * [NS 5] QName ::= (Prefix ':')? LocalPart
1912: *
1913: * [NS 6] Prefix ::= NCName
1914: *
1915: * [NS 7] LocalPart ::= NCName
1.68 daniel 1916: *
1917: * Returns the function returns the local part, and prefix is updated
1.50 daniel 1918: * to get the Prefix if any.
1.28 daniel 1919: */
1920:
1.123 daniel 1921: xmlChar *
1922: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
1923: xmlChar *ret = NULL;
1.28 daniel 1924:
1925: *prefix = NULL;
1926: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1927: if (CUR == ':') {
1.28 daniel 1928: *prefix = ret;
1.40 daniel 1929: NEXT;
1.28 daniel 1930: ret = xmlNamespaceParseNCName(ctxt);
1931: }
1932:
1933: return(ret);
1934: }
1935:
1.50 daniel 1936: /**
1.72 daniel 1937: * xmlSplitQName:
1938: * @name: an XML parser context
1.123 daniel 1939: * @prefix: a xmlChar **
1.72 daniel 1940: *
1941: * parse an XML qualified name string
1942: *
1943: * [NS 5] QName ::= (Prefix ':')? LocalPart
1944: *
1945: * [NS 6] Prefix ::= NCName
1946: *
1947: * [NS 7] LocalPart ::= NCName
1948: *
1949: * Returns the function returns the local part, and prefix is updated
1950: * to get the Prefix if any.
1951: */
1952:
1.123 daniel 1953: xmlChar *
1954: xmlSplitQName(const xmlChar *name, xmlChar **prefix) {
1955: xmlChar *ret = NULL;
1956: const xmlChar *q;
1957: const xmlChar *cur = name;
1.72 daniel 1958:
1959: *prefix = NULL;
1.113 daniel 1960:
1961: /* xml: prefix is not really a namespace */
1962: if ((cur[0] == 'x') && (cur[1] == 'm') &&
1963: (cur[2] == 'l') && (cur[3] == ':'))
1964: return(xmlStrdup(name));
1965:
1.72 daniel 1966: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1967: q = cur++;
1968:
1969: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1970: (*cur == '.') || (*cur == '-') ||
1971: (*cur == '_') ||
1972: (IS_COMBINING(*cur)) ||
1973: (IS_EXTENDER(*cur)))
1974: cur++;
1975:
1976: ret = xmlStrndup(q, cur - q);
1977:
1978: if (*cur == ':') {
1979: cur++;
1980: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1981: *prefix = ret;
1982:
1983: q = cur++;
1984:
1985: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1986: (*cur == '.') || (*cur == '-') ||
1987: (*cur == '_') ||
1988: (IS_COMBINING(*cur)) ||
1989: (IS_EXTENDER(*cur)))
1990: cur++;
1991:
1992: ret = xmlStrndup(q, cur - q);
1993: }
1994:
1995: return(ret);
1996: }
1997: /**
1.50 daniel 1998: * xmlNamespaceParseNSDef:
1999: * @ctxt: an XML parser context
2000: *
2001: * parse a namespace prefix declaration
1.28 daniel 2002: *
2003: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2004: *
2005: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 2006: *
2007: * Returns the namespace name
1.28 daniel 2008: */
2009:
1.123 daniel 2010: xmlChar *
1.55 daniel 2011: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 2012: xmlChar *name = NULL;
1.28 daniel 2013:
1.40 daniel 2014: if ((CUR == 'x') && (NXT(1) == 'm') &&
2015: (NXT(2) == 'l') && (NXT(3) == 'n') &&
2016: (NXT(4) == 's')) {
2017: SKIP(5);
2018: if (CUR == ':') {
2019: NEXT;
1.28 daniel 2020: name = xmlNamespaceParseNCName(ctxt);
2021: }
2022: }
1.39 daniel 2023: return(name);
1.28 daniel 2024: }
2025:
1.50 daniel 2026: /**
2027: * xmlParseQuotedString:
2028: * @ctxt: an XML parser context
2029: *
1.45 daniel 2030: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 2031: * To be removed at next drop of binary compatibility
1.68 daniel 2032: *
2033: * Returns the string parser or NULL.
1.45 daniel 2034: */
1.123 daniel 2035: xmlChar *
1.55 daniel 2036: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.123 daniel 2037: xmlChar *ret = NULL;
2038: const xmlChar *q;
1.45 daniel 2039:
2040: if (CUR == '"') {
2041: NEXT;
2042: q = CUR_PTR;
2043: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 2044: if (CUR != '"') {
1.123 daniel 2045: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2046: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2047: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 2048: ctxt->wellFormed = 0;
1.55 daniel 2049: } else {
1.45 daniel 2050: ret = xmlStrndup(q, CUR_PTR - q);
2051: NEXT;
2052: }
2053: } else if (CUR == '\''){
2054: NEXT;
2055: q = CUR_PTR;
2056: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 2057: if (CUR != '\'') {
1.123 daniel 2058: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2059: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2060: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 2061: ctxt->wellFormed = 0;
1.55 daniel 2062: } else {
1.45 daniel 2063: ret = xmlStrndup(q, CUR_PTR - q);
2064: NEXT;
2065: }
2066: }
2067: return(ret);
2068: }
2069:
1.50 daniel 2070: /**
2071: * xmlParseNamespace:
2072: * @ctxt: an XML parser context
2073: *
1.45 daniel 2074: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2075: *
2076: * This is what the older xml-name Working Draft specified, a bunch of
2077: * other stuff may still rely on it, so support is still here as
2078: * if ot was declared on the root of the Tree:-(
1.110 daniel 2079: *
2080: * To be removed at next drop of binary compatibility
1.45 daniel 2081: */
2082:
1.55 daniel 2083: void
2084: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 2085: xmlChar *href = NULL;
2086: xmlChar *prefix = NULL;
1.45 daniel 2087: int garbage = 0;
2088:
2089: /*
2090: * We just skipped "namespace" or "xml:namespace"
2091: */
2092: SKIP_BLANKS;
2093:
2094: while (IS_CHAR(CUR) && (CUR != '>')) {
2095: /*
2096: * We can have "ns" or "prefix" attributes
2097: * Old encoding as 'href' or 'AS' attributes is still supported
2098: */
2099: if ((CUR == 'n') && (NXT(1) == 's')) {
2100: garbage = 0;
2101: SKIP(2);
2102: SKIP_BLANKS;
2103:
2104: if (CUR != '=') continue;
2105: NEXT;
2106: SKIP_BLANKS;
2107:
2108: href = xmlParseQuotedString(ctxt);
2109: SKIP_BLANKS;
2110: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
2111: (NXT(2) == 'e') && (NXT(3) == 'f')) {
2112: garbage = 0;
2113: SKIP(4);
2114: SKIP_BLANKS;
2115:
2116: if (CUR != '=') continue;
2117: NEXT;
2118: SKIP_BLANKS;
2119:
2120: href = xmlParseQuotedString(ctxt);
2121: SKIP_BLANKS;
2122: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
2123: (NXT(2) == 'e') && (NXT(3) == 'f') &&
2124: (NXT(4) == 'i') && (NXT(5) == 'x')) {
2125: garbage = 0;
2126: SKIP(6);
2127: SKIP_BLANKS;
2128:
2129: if (CUR != '=') continue;
2130: NEXT;
2131: SKIP_BLANKS;
2132:
2133: prefix = xmlParseQuotedString(ctxt);
2134: SKIP_BLANKS;
2135: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
2136: garbage = 0;
2137: SKIP(2);
2138: SKIP_BLANKS;
2139:
2140: if (CUR != '=') continue;
2141: NEXT;
2142: SKIP_BLANKS;
2143:
2144: prefix = xmlParseQuotedString(ctxt);
2145: SKIP_BLANKS;
2146: } else if ((CUR == '?') && (NXT(1) == '>')) {
2147: garbage = 0;
1.91 daniel 2148: NEXT;
1.45 daniel 2149: } else {
2150: /*
2151: * Found garbage when parsing the namespace
2152: */
1.122 daniel 2153: if (!garbage) {
1.55 daniel 2154: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2155: ctxt->sax->error(ctxt->userData,
2156: "xmlParseNamespace found garbage\n");
2157: }
1.123 daniel 2158: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 2159: ctxt->wellFormed = 0;
1.45 daniel 2160: NEXT;
2161: }
2162: }
2163:
2164: MOVETO_ENDTAG(CUR_PTR);
2165: NEXT;
2166:
2167: /*
2168: * Register the DTD.
1.72 daniel 2169: if (href != NULL)
2170: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 2171: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 2172: */
2173:
1.119 daniel 2174: if (prefix != NULL) xmlFree(prefix);
2175: if (href != NULL) xmlFree(href);
1.45 daniel 2176: }
2177:
1.28 daniel 2178: /************************************************************************
2179: * *
2180: * The parser itself *
2181: * Relates to http://www.w3.org/TR/REC-xml *
2182: * *
2183: ************************************************************************/
1.14 veillard 2184:
1.50 daniel 2185: /**
1.97 daniel 2186: * xmlScanName:
2187: * @ctxt: an XML parser context
2188: *
2189: * Trickery: parse an XML name but without consuming the input flow
2190: * Needed for rollback cases.
2191: *
2192: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2193: * CombiningChar | Extender
2194: *
2195: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2196: *
2197: * [6] Names ::= Name (S Name)*
2198: *
2199: * Returns the Name parsed or NULL
2200: */
2201:
1.123 daniel 2202: xmlChar *
1.97 daniel 2203: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 2204: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 2205: int len = 0;
2206:
2207: GROW;
2208: if (!IS_LETTER(CUR) && (CUR != '_') &&
2209: (CUR != ':')) {
2210: return(NULL);
2211: }
2212:
2213: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2214: (NXT(len) == '.') || (NXT(len) == '-') ||
2215: (NXT(len) == '_') || (NXT(len) == ':') ||
2216: (IS_COMBINING(NXT(len))) ||
2217: (IS_EXTENDER(NXT(len)))) {
2218: buf[len] = NXT(len);
2219: len++;
2220: if (len >= XML_MAX_NAMELEN) {
2221: fprintf(stderr,
2222: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
2223: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2224: (NXT(len) == '.') || (NXT(len) == '-') ||
2225: (NXT(len) == '_') || (NXT(len) == ':') ||
2226: (IS_COMBINING(NXT(len))) ||
2227: (IS_EXTENDER(NXT(len))))
2228: len++;
2229: break;
2230: }
2231: }
2232: return(xmlStrndup(buf, len));
2233: }
2234:
2235: /**
1.50 daniel 2236: * xmlParseName:
2237: * @ctxt: an XML parser context
2238: *
2239: * parse an XML name.
1.22 daniel 2240: *
2241: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2242: * CombiningChar | Extender
2243: *
2244: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2245: *
2246: * [6] Names ::= Name (S Name)*
1.68 daniel 2247: *
2248: * Returns the Name parsed or NULL
1.1 veillard 2249: */
2250:
1.123 daniel 2251: xmlChar *
1.55 daniel 2252: xmlParseName(xmlParserCtxtPtr ctxt) {
1.123 daniel 2253: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 2254: int len = 0;
1.123 daniel 2255: xmlChar cur;
1.1 veillard 2256:
1.91 daniel 2257: GROW;
1.97 daniel 2258: cur = CUR;
2259: if (!IS_LETTER(cur) && (cur != '_') &&
2260: (cur != ':')) {
1.91 daniel 2261: return(NULL);
2262: }
1.40 daniel 2263:
1.97 daniel 2264: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2265: (cur == '.') || (cur == '-') ||
2266: (cur == '_') || (cur == ':') ||
2267: (IS_COMBINING(cur)) ||
2268: (IS_EXTENDER(cur))) {
2269: buf[len++] = cur;
1.40 daniel 2270: NEXT;
1.97 daniel 2271: cur = CUR;
1.91 daniel 2272: if (len >= XML_MAX_NAMELEN) {
2273: fprintf(stderr,
2274: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.97 daniel 2275: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2276: (cur == '.') || (cur == '-') ||
2277: (cur == '_') || (cur == ':') ||
2278: (IS_COMBINING(cur)) ||
2279: (IS_EXTENDER(cur))) {
2280: NEXT;
2281: cur = CUR;
2282: }
1.91 daniel 2283: break;
2284: }
2285: }
2286: return(xmlStrndup(buf, len));
1.22 daniel 2287: }
2288:
1.50 daniel 2289: /**
2290: * xmlParseNmtoken:
2291: * @ctxt: an XML parser context
2292: *
2293: * parse an XML Nmtoken.
1.22 daniel 2294: *
2295: * [7] Nmtoken ::= (NameChar)+
2296: *
2297: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 2298: *
2299: * Returns the Nmtoken parsed or NULL
1.22 daniel 2300: */
2301:
1.123 daniel 2302: xmlChar *
1.55 daniel 2303: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 2304: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 2305: int len = 0;
1.22 daniel 2306:
1.91 daniel 2307: GROW;
1.40 daniel 2308: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2309: (CUR == '.') || (CUR == '-') ||
2310: (CUR == '_') || (CUR == ':') ||
2311: (IS_COMBINING(CUR)) ||
1.91 daniel 2312: (IS_EXTENDER(CUR))) {
2313: buf[len++] = CUR;
1.40 daniel 2314: NEXT;
1.91 daniel 2315: if (len >= XML_MAX_NAMELEN) {
2316: fprintf(stderr,
2317: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
2318: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2319: (CUR == '.') || (CUR == '-') ||
2320: (CUR == '_') || (CUR == ':') ||
2321: (IS_COMBINING(CUR)) ||
2322: (IS_EXTENDER(CUR)))
2323: NEXT;
2324: break;
2325: }
2326: }
2327: return(xmlStrndup(buf, len));
1.1 veillard 2328: }
2329:
1.50 daniel 2330: /**
2331: * xmlParseEntityValue:
2332: * @ctxt: an XML parser context
1.78 daniel 2333: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 2334: *
2335: * parse a value for ENTITY decl.
1.24 daniel 2336: *
2337: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2338: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 2339: *
1.78 daniel 2340: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 2341: */
2342:
1.123 daniel 2343: xmlChar *
2344: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2345: xmlChar *ret = NULL;
2346: const xmlChar *org = NULL;
2347: const xmlChar *tst = NULL;
2348: const xmlChar *temp = NULL;
1.98 daniel 2349: xmlParserInputPtr input;
1.24 daniel 2350:
1.91 daniel 2351: SHRINK;
1.40 daniel 2352: if (CUR == '"') {
1.96 daniel 2353: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2354: input = ctxt->input;
1.40 daniel 2355: NEXT;
1.78 daniel 2356: org = CUR_PTR;
1.98 daniel 2357: /*
2358: * NOTE: 4.4.5 Included in Literal
2359: * When a parameter entity reference appears in a literal entity
2360: * value, ... a single or double quote character in the replacement
2361: * text is always treated as a normal data character and will not
2362: * terminate the literal.
2363: * In practice it means we stop the loop only when back at parsing
2364: * the initial entity and the quote is found
2365: */
2366: while ((CUR != '"') || (ctxt->input != input)) {
1.79 daniel 2367: tst = CUR_PTR;
1.98 daniel 2368: /*
2369: * NOTE: 4.4.7 Bypassed
2370: * When a general entity reference appears in the EntityValue in
2371: * an entity declaration, it is bypassed and left as is.
2372: * so XML_SUBSTITUTE_REF is not set.
2373: */
2374: if (ctxt->input != input)
2375: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2376: 0, 0, 0);
2377: else
2378: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2379: '"', 0, 0);
1.94 daniel 2380:
2381: /*
2382: * Pop-up of finished entities.
2383: */
2384: while ((CUR == 0) && (ctxt->inputNr > 1))
2385: xmlPopInput(ctxt);
2386:
2387: if ((temp == NULL) && (tst == CUR_PTR)) {
1.123 daniel 2388: ret = xmlStrndup((xmlChar *) "", 0);
1.94 daniel 2389: break;
2390: }
2391: if ((temp[0] == 0) && (tst == CUR_PTR)) {
1.119 daniel 2392: xmlFree((char *)temp);
1.123 daniel 2393: ret = xmlStrndup((xmlChar *) "", 0);
1.94 daniel 2394: break;
2395: }
1.79 daniel 2396: ret = xmlStrcat(ret, temp);
1.119 daniel 2397: if (temp != NULL) xmlFree((char *)temp);
1.94 daniel 2398: GROW;
1.79 daniel 2399: }
1.77 daniel 2400: if (CUR != '"') {
1.123 daniel 2401: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 2402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.79 daniel 2403: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 2404: ctxt->wellFormed = 0;
1.78 daniel 2405: } else {
1.99 daniel 2406: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2407: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2408: if (ret == NULL)
1.123 daniel 2409: ret = xmlStrndup((xmlChar *) "", 0);
1.40 daniel 2410: NEXT;
1.78 daniel 2411: }
1.40 daniel 2412: } else if (CUR == '\'') {
1.96 daniel 2413: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2414: input = ctxt->input;
1.40 daniel 2415: NEXT;
1.78 daniel 2416: org = CUR_PTR;
1.98 daniel 2417: /*
2418: * NOTE: 4.4.5 Included in Literal
2419: * When a parameter entity reference appears in a literal entity
2420: * value, ... a single or double quote character in the replacement
2421: * text is always treated as a normal data character and will not
2422: * terminate the literal.
2423: * In practice it means we stop the loop only when back at parsing
2424: * the initial entity and the quote is found
2425: */
2426: while ((CUR != '\'') || (ctxt->input != input)) {
1.79 daniel 2427: tst = CUR_PTR;
1.98 daniel 2428: /*
2429: * NOTE: 4.4.7 Bypassed
2430: * When a general entity reference appears in the EntityValue in
2431: * an entity declaration, it is bypassed and left as is.
2432: * so XML_SUBSTITUTE_REF is not set.
2433: */
2434: if (ctxt->input != input)
2435: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2436: 0, 0, 0);
2437: else
2438: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2439: '\'', 0, 0);
1.94 daniel 2440:
2441: /*
2442: * Pop-up of finished entities.
2443: */
2444: while ((CUR == 0) && (ctxt->inputNr > 1))
2445: xmlPopInput(ctxt);
2446:
2447: if ((temp == NULL) && (tst == CUR_PTR)) {
1.123 daniel 2448: ret = xmlStrndup((xmlChar *) "", 0);
1.94 daniel 2449: break;
2450: }
2451: if ((temp[0] == 0) && (tst == CUR_PTR)) {
1.119 daniel 2452: xmlFree((char *)temp);
1.123 daniel 2453: ret = xmlStrndup((xmlChar *) "", 0);
1.94 daniel 2454: break;
2455: }
1.79 daniel 2456: ret = xmlStrcat(ret, temp);
1.119 daniel 2457: if (temp != NULL) xmlFree((char *)temp);
1.94 daniel 2458: GROW;
1.79 daniel 2459: }
1.77 daniel 2460: if (CUR != '\'') {
1.123 daniel 2461: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 2462: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2463: ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
1.59 daniel 2464: ctxt->wellFormed = 0;
1.78 daniel 2465: } else {
1.99 daniel 2466: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2467: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2468: if (ret == NULL)
1.123 daniel 2469: ret = xmlStrndup((xmlChar *) "", 0);
1.40 daniel 2470: NEXT;
1.78 daniel 2471: }
1.24 daniel 2472: } else {
1.123 daniel 2473: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1.55 daniel 2474: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2475: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1.59 daniel 2476: ctxt->wellFormed = 0;
1.24 daniel 2477: }
2478:
2479: return(ret);
2480: }
2481:
1.50 daniel 2482: /**
2483: * xmlParseAttValue:
2484: * @ctxt: an XML parser context
2485: *
2486: * parse a value for an attribute
1.78 daniel 2487: * Note: the parser won't do substitution of entities here, this
1.113 daniel 2488: * will be handled later in xmlStringGetNodeList
1.29 daniel 2489: *
2490: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2491: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 2492: *
2493: * Returns the AttValue parsed or NULL.
1.29 daniel 2494: */
2495:
1.123 daniel 2496: xmlChar *
1.55 daniel 2497: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.123 daniel 2498: xmlChar *ret = NULL;
1.29 daniel 2499:
1.91 daniel 2500: SHRINK;
1.40 daniel 2501: if (CUR == '"') {
1.96 daniel 2502: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2503: NEXT;
1.98 daniel 2504: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
1.77 daniel 2505: if (CUR == '<') {
2506: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2507: ctxt->sax->error(ctxt->userData,
2508: "Unescaped '<' not allowed in attributes values\n");
1.123 daniel 2509: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.77 daniel 2510: ctxt->wellFormed = 0;
1.29 daniel 2511: }
1.77 daniel 2512: if (CUR != '"') {
1.55 daniel 2513: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2514: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.123 daniel 2515: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
1.59 daniel 2516: ctxt->wellFormed = 0;
1.77 daniel 2517: } else
1.40 daniel 2518: NEXT;
2519: } else if (CUR == '\'') {
1.96 daniel 2520: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2521: NEXT;
1.98 daniel 2522: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
1.77 daniel 2523: if (CUR == '<') {
2524: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2525: ctxt->sax->error(ctxt->userData,
2526: "Unescaped '<' not allowed in attributes values\n");
1.123 daniel 2527: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.77 daniel 2528: ctxt->wellFormed = 0;
1.29 daniel 2529: }
1.77 daniel 2530: if (CUR != '\'') {
1.55 daniel 2531: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2532: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.123 daniel 2533: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
1.59 daniel 2534: ctxt->wellFormed = 0;
1.77 daniel 2535: } else
1.40 daniel 2536: NEXT;
1.29 daniel 2537: } else {
1.123 daniel 2538: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 2539: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2540: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2541: ctxt->wellFormed = 0;
1.29 daniel 2542: }
2543:
2544: return(ret);
2545: }
2546:
1.50 daniel 2547: /**
2548: * xmlParseSystemLiteral:
2549: * @ctxt: an XML parser context
2550: *
2551: * parse an XML Literal
1.21 daniel 2552: *
1.22 daniel 2553: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2554: *
2555: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2556: */
2557:
1.123 daniel 2558: xmlChar *
1.55 daniel 2559: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.123 daniel 2560: const xmlChar *q;
2561: xmlChar *ret = NULL;
1.21 daniel 2562:
1.91 daniel 2563: SHRINK;
1.40 daniel 2564: if (CUR == '"') {
2565: NEXT;
2566: q = CUR_PTR;
2567: while ((IS_CHAR(CUR)) && (CUR != '"'))
2568: NEXT;
2569: if (!IS_CHAR(CUR)) {
1.55 daniel 2570: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2571: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.123 daniel 2572: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.59 daniel 2573: ctxt->wellFormed = 0;
1.21 daniel 2574: } else {
1.40 daniel 2575: ret = xmlStrndup(q, CUR_PTR - q);
2576: NEXT;
1.21 daniel 2577: }
1.40 daniel 2578: } else if (CUR == '\'') {
2579: NEXT;
2580: q = CUR_PTR;
2581: while ((IS_CHAR(CUR)) && (CUR != '\''))
2582: NEXT;
2583: if (!IS_CHAR(CUR)) {
1.55 daniel 2584: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2585: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.123 daniel 2586: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.59 daniel 2587: ctxt->wellFormed = 0;
1.21 daniel 2588: } else {
1.40 daniel 2589: ret = xmlStrndup(q, CUR_PTR - q);
2590: NEXT;
1.21 daniel 2591: }
2592: } else {
1.55 daniel 2593: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2594: ctxt->sax->error(ctxt->userData,
2595: "SystemLiteral \" or ' expected\n");
1.123 daniel 2596: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 2597: ctxt->wellFormed = 0;
1.21 daniel 2598: }
2599:
2600: return(ret);
2601: }
2602:
1.50 daniel 2603: /**
2604: * xmlParsePubidLiteral:
2605: * @ctxt: an XML parser context
1.21 daniel 2606: *
1.50 daniel 2607: * parse an XML public literal
1.68 daniel 2608: *
2609: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2610: *
2611: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2612: */
2613:
1.123 daniel 2614: xmlChar *
1.55 daniel 2615: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.123 daniel 2616: const xmlChar *q;
2617: xmlChar *ret = NULL;
1.21 daniel 2618: /*
2619: * Name ::= (Letter | '_') (NameChar)*
2620: */
1.91 daniel 2621: SHRINK;
1.40 daniel 2622: if (CUR == '"') {
2623: NEXT;
2624: q = CUR_PTR;
2625: while (IS_PUBIDCHAR(CUR)) NEXT;
2626: if (CUR != '"') {
1.55 daniel 2627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2628: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.123 daniel 2629: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.59 daniel 2630: ctxt->wellFormed = 0;
1.21 daniel 2631: } else {
1.40 daniel 2632: ret = xmlStrndup(q, CUR_PTR - q);
2633: NEXT;
1.21 daniel 2634: }
1.40 daniel 2635: } else if (CUR == '\'') {
2636: NEXT;
2637: q = CUR_PTR;
2638: while ((IS_LETTER(CUR)) && (CUR != '\''))
2639: NEXT;
2640: if (!IS_LETTER(CUR)) {
1.55 daniel 2641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2642: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.123 daniel 2643: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.59 daniel 2644: ctxt->wellFormed = 0;
1.21 daniel 2645: } else {
1.40 daniel 2646: ret = xmlStrndup(q, CUR_PTR - q);
2647: NEXT;
1.21 daniel 2648: }
2649: } else {
1.55 daniel 2650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2651: ctxt->sax->error(ctxt->userData,
2652: "SystemLiteral \" or ' expected\n");
1.123 daniel 2653: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 2654: ctxt->wellFormed = 0;
1.21 daniel 2655: }
2656:
2657: return(ret);
2658: }
2659:
1.50 daniel 2660: /**
2661: * xmlParseCharData:
2662: * @ctxt: an XML parser context
2663: * @cdata: int indicating whether we are within a CDATA section
2664: *
2665: * parse a CharData section.
2666: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2667: *
2668: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2669: */
2670:
1.55 daniel 2671: void
2672: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.123 daniel 2673: xmlChar buf[1000];
1.91 daniel 2674: int nbchar = 0;
1.123 daniel 2675: xmlChar cur;
1.27 daniel 2676:
1.91 daniel 2677: SHRINK;
1.97 daniel 2678: /*
2679: * !!!!!!!!!!!!
2680: * NOTE: NXT(0) is used here to avoid breaking on < or &
2681: * entities substitutions.
2682: */
2683: cur = CUR;
2684: while ((IS_CHAR(cur)) && (cur != '<') &&
2685: (cur != '&')) {
2686: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2687: (NXT(2) == '>')) {
2688: if (cdata) break;
2689: else {
2690: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2691: ctxt->sax->error(ctxt->userData,
1.59 daniel 2692: "Sequence ']]>' not allowed in content\n");
1.123 daniel 2693: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.59 daniel 2694: ctxt->wellFormed = 0;
2695: }
2696: }
1.91 daniel 2697: buf[nbchar++] = CUR;
2698: if (nbchar == 1000) {
2699: /*
2700: * Ok the segment is to be consumed as chars.
2701: */
2702: if (ctxt->sax != NULL) {
2703: if (areBlanks(ctxt, buf, nbchar)) {
2704: if (ctxt->sax->ignorableWhitespace != NULL)
2705: ctxt->sax->ignorableWhitespace(ctxt->userData,
2706: buf, nbchar);
2707: } else {
2708: if (ctxt->sax->characters != NULL)
2709: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2710: }
2711: }
2712: nbchar = 0;
2713: }
1.40 daniel 2714: NEXT;
1.97 daniel 2715: cur = CUR;
1.27 daniel 2716: }
1.91 daniel 2717: if (nbchar != 0) {
2718: /*
2719: * Ok the segment is to be consumed as chars.
2720: */
2721: if (ctxt->sax != NULL) {
2722: if (areBlanks(ctxt, buf, nbchar)) {
2723: if (ctxt->sax->ignorableWhitespace != NULL)
2724: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2725: } else {
2726: if (ctxt->sax->characters != NULL)
2727: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2728: }
2729: }
1.45 daniel 2730: }
1.27 daniel 2731: }
2732:
1.50 daniel 2733: /**
2734: * xmlParseExternalID:
2735: * @ctxt: an XML parser context
1.123 daniel 2736: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 2737: * @strict: indicate whether we should restrict parsing to only
2738: * production [75], see NOTE below
1.50 daniel 2739: *
1.67 daniel 2740: * Parse an External ID or a Public ID
2741: *
2742: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2743: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2744: *
2745: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2746: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2747: *
2748: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2749: *
1.68 daniel 2750: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2751: * case publicID receives PubidLiteral, is strict is off
2752: * it is possible to return NULL and have publicID set.
1.22 daniel 2753: */
2754:
1.123 daniel 2755: xmlChar *
2756: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2757: xmlChar *URI = NULL;
1.22 daniel 2758:
1.91 daniel 2759: SHRINK;
1.40 daniel 2760: if ((CUR == 'S') && (NXT(1) == 'Y') &&
2761: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2762: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2763: SKIP(6);
1.59 daniel 2764: if (!IS_BLANK(CUR)) {
2765: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2766: ctxt->sax->error(ctxt->userData,
1.59 daniel 2767: "Space required after 'SYSTEM'\n");
1.123 daniel 2768: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2769: ctxt->wellFormed = 0;
2770: }
1.42 daniel 2771: SKIP_BLANKS;
1.39 daniel 2772: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2773: if (URI == NULL) {
1.55 daniel 2774: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2775: ctxt->sax->error(ctxt->userData,
1.39 daniel 2776: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 2777: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 2778: ctxt->wellFormed = 0;
2779: }
1.40 daniel 2780: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
2781: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2782: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2783: SKIP(6);
1.59 daniel 2784: if (!IS_BLANK(CUR)) {
2785: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2786: ctxt->sax->error(ctxt->userData,
1.59 daniel 2787: "Space required after 'PUBLIC'\n");
1.123 daniel 2788: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2789: ctxt->wellFormed = 0;
2790: }
1.42 daniel 2791: SKIP_BLANKS;
1.39 daniel 2792: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2793: if (*publicID == NULL) {
1.55 daniel 2794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2795: ctxt->sax->error(ctxt->userData,
1.39 daniel 2796: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 2797: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 2798: ctxt->wellFormed = 0;
2799: }
1.67 daniel 2800: if (strict) {
2801: /*
2802: * We don't handle [83] so "S SystemLiteral" is required.
2803: */
2804: if (!IS_BLANK(CUR)) {
2805: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2806: ctxt->sax->error(ctxt->userData,
1.67 daniel 2807: "Space required after the Public Identifier\n");
1.123 daniel 2808: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2809: ctxt->wellFormed = 0;
2810: }
2811: } else {
2812: /*
2813: * We handle [83] so we return immediately, if
2814: * "S SystemLiteral" is not detected. From a purely parsing
2815: * point of view that's a nice mess.
2816: */
1.123 daniel 2817: const xmlChar *ptr = CUR_PTR;
1.67 daniel 2818: if (!IS_BLANK(*ptr)) return(NULL);
2819:
2820: while (IS_BLANK(*ptr)) ptr++;
2821: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 2822: }
1.42 daniel 2823: SKIP_BLANKS;
1.39 daniel 2824: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2825: if (URI == NULL) {
1.55 daniel 2826: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2827: ctxt->sax->error(ctxt->userData,
1.39 daniel 2828: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 2829: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 2830: ctxt->wellFormed = 0;
2831: }
1.22 daniel 2832: }
1.39 daniel 2833: return(URI);
1.22 daniel 2834: }
2835:
1.50 daniel 2836: /**
2837: * xmlParseComment:
1.69 daniel 2838: * @ctxt: an XML parser context
1.50 daniel 2839: *
1.3 veillard 2840: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 2841: * The spec says that "For compatibility, the string "--" (double-hyphen)
2842: * must not occur within comments. "
1.22 daniel 2843: *
2844: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2845: */
1.72 daniel 2846: void
1.114 daniel 2847: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.123 daniel 2848: const xmlChar *q, *start;
2849: const xmlChar *r;
2850: xmlChar *val;
1.3 veillard 2851:
2852: /*
1.22 daniel 2853: * Check that there is a comment right here.
1.3 veillard 2854: */
1.40 daniel 2855: if ((CUR != '<') || (NXT(1) != '!') ||
1.72 daniel 2856: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2857:
1.97 daniel 2858: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2859: SHRINK;
1.40 daniel 2860: SKIP(4);
2861: start = q = CUR_PTR;
2862: NEXT;
2863: r = CUR_PTR;
2864: NEXT;
2865: while (IS_CHAR(CUR) &&
2866: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 2867: (*r != '-') || (*q != '-'))) {
1.59 daniel 2868: if ((*r == '-') && (*q == '-')) {
1.55 daniel 2869: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2870: ctxt->sax->error(ctxt->userData,
1.38 daniel 2871: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 2872: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 2873: ctxt->wellFormed = 0;
2874: }
1.40 daniel 2875: NEXT;r++;q++;
1.3 veillard 2876: }
1.40 daniel 2877: if (!IS_CHAR(CUR)) {
1.55 daniel 2878: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2879: ctxt->sax->error(ctxt->userData,
2880: "Comment not terminated \n<!--%.50s\n", start);
1.123 daniel 2881: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 2882: ctxt->wellFormed = 0;
1.3 veillard 2883: } else {
1.40 daniel 2884: NEXT;
1.114 daniel 2885: val = xmlStrndup(start, q - start);
2886: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
2887: ctxt->sax->comment(ctxt->userData, val);
1.119 daniel 2888: xmlFree(val);
1.3 veillard 2889: }
2890: }
2891:
1.50 daniel 2892: /**
2893: * xmlParsePITarget:
2894: * @ctxt: an XML parser context
2895: *
2896: * parse the name of a PI
1.22 daniel 2897: *
2898: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2899: *
2900: * Returns the PITarget name or NULL
1.22 daniel 2901: */
2902:
1.123 daniel 2903: xmlChar *
1.55 daniel 2904: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 2905: xmlChar *name;
1.22 daniel 2906:
2907: name = xmlParseName(ctxt);
2908: if ((name != NULL) && (name[3] == 0) &&
2909: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2910: ((name[1] == 'm') || (name[1] == 'M')) &&
2911: ((name[2] == 'l') || (name[2] == 'L'))) {
1.122 daniel 2912: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
2913: ctxt->sax->error(ctxt->userData,
2914: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 2915: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 2916: /* ctxt->wellFormed = 0; !!! ? */
2917: }
1.22 daniel 2918: return(NULL);
2919: }
2920: return(name);
2921: }
2922:
1.50 daniel 2923: /**
2924: * xmlParsePI:
2925: * @ctxt: an XML parser context
2926: *
2927: * parse an XML Processing Instruction.
1.22 daniel 2928: *
2929: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2930: *
1.69 daniel 2931: * The processing is transfered to SAX once parsed.
1.3 veillard 2932: */
2933:
1.55 daniel 2934: void
2935: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.123 daniel 2936: xmlChar *target;
1.22 daniel 2937:
1.40 daniel 2938: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 2939: /*
2940: * this is a Processing Instruction.
2941: */
1.40 daniel 2942: SKIP(2);
1.91 daniel 2943: SHRINK;
1.3 veillard 2944:
2945: /*
1.22 daniel 2946: * Parse the target name and check for special support like
2947: * namespace.
1.3 veillard 2948: */
1.22 daniel 2949: target = xmlParsePITarget(ctxt);
2950: if (target != NULL) {
1.123 daniel 2951: const xmlChar *q;
1.72 daniel 2952:
1.114 daniel 2953: if (!IS_BLANK(CUR)) {
2954: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2955: ctxt->sax->error(ctxt->userData,
2956: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 2957: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 2958: ctxt->wellFormed = 0;
2959: }
2960: SKIP_BLANKS;
2961: q = CUR_PTR;
1.72 daniel 2962: while (IS_CHAR(CUR) &&
2963: ((CUR != '?') || (NXT(1) != '>')))
2964: NEXT;
2965: if (!IS_CHAR(CUR)) {
2966: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2967: ctxt->sax->error(ctxt->userData,
1.72 daniel 2968: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 2969: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 2970: ctxt->wellFormed = 0;
1.22 daniel 2971: } else {
1.123 daniel 2972: xmlChar *data;
1.44 daniel 2973:
1.72 daniel 2974: data = xmlStrndup(q, CUR_PTR - q);
2975: SKIP(2);
1.44 daniel 2976:
1.72 daniel 2977: /*
2978: * SAX: PI detected.
2979: */
2980: if ((ctxt->sax) &&
2981: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2982: ctxt->sax->processingInstruction(ctxt->userData,
2983: target, data);
1.119 daniel 2984: xmlFree(data);
1.22 daniel 2985: }
1.119 daniel 2986: xmlFree(target);
1.3 veillard 2987: } else {
1.55 daniel 2988: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2989: ctxt->sax->error(ctxt->userData,
2990: "xmlParsePI : no target name\n");
1.123 daniel 2991: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 2992: ctxt->wellFormed = 0;
1.22 daniel 2993: }
2994: }
2995: }
2996:
1.50 daniel 2997: /**
2998: * xmlParseNotationDecl:
2999: * @ctxt: an XML parser context
3000: *
3001: * parse a notation declaration
1.22 daniel 3002: *
3003: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3004: *
3005: * Hence there is actually 3 choices:
3006: * 'PUBLIC' S PubidLiteral
3007: * 'PUBLIC' S PubidLiteral S SystemLiteral
3008: * and 'SYSTEM' S SystemLiteral
1.50 daniel 3009: *
1.67 daniel 3010: * See the NOTE on xmlParseExternalID().
1.22 daniel 3011: */
3012:
1.55 daniel 3013: void
3014: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3015: xmlChar *name;
3016: xmlChar *Pubid;
3017: xmlChar *Systemid;
1.22 daniel 3018:
1.40 daniel 3019: if ((CUR == '<') && (NXT(1) == '!') &&
3020: (NXT(2) == 'N') && (NXT(3) == 'O') &&
3021: (NXT(4) == 'T') && (NXT(5) == 'A') &&
3022: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 3023: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 3024: SHRINK;
1.40 daniel 3025: SKIP(10);
1.67 daniel 3026: if (!IS_BLANK(CUR)) {
3027: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3028: ctxt->sax->error(ctxt->userData,
3029: "Space required after '<!NOTATION'\n");
1.123 daniel 3030: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 3031: ctxt->wellFormed = 0;
3032: return;
3033: }
3034: SKIP_BLANKS;
1.22 daniel 3035:
3036: name = xmlParseName(ctxt);
3037: if (name == NULL) {
1.55 daniel 3038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3039: ctxt->sax->error(ctxt->userData,
3040: "NOTATION: Name expected here\n");
1.123 daniel 3041: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 3042: ctxt->wellFormed = 0;
3043: return;
3044: }
3045: if (!IS_BLANK(CUR)) {
3046: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3047: ctxt->sax->error(ctxt->userData,
1.67 daniel 3048: "Space required after the NOTATION name'\n");
1.123 daniel 3049: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3050: ctxt->wellFormed = 0;
1.22 daniel 3051: return;
3052: }
1.42 daniel 3053: SKIP_BLANKS;
1.67 daniel 3054:
1.22 daniel 3055: /*
1.67 daniel 3056: * Parse the IDs.
1.22 daniel 3057: */
1.67 daniel 3058: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
3059: SKIP_BLANKS;
3060:
3061: if (CUR == '>') {
1.40 daniel 3062: NEXT;
1.72 daniel 3063: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 3064: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 3065: } else {
3066: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3067: ctxt->sax->error(ctxt->userData,
1.67 daniel 3068: "'>' required to close NOTATION declaration\n");
1.123 daniel 3069: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 3070: ctxt->wellFormed = 0;
3071: }
1.119 daniel 3072: xmlFree(name);
3073: if (Systemid != NULL) xmlFree(Systemid);
3074: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 3075: }
3076: }
3077:
1.50 daniel 3078: /**
3079: * xmlParseEntityDecl:
3080: * @ctxt: an XML parser context
3081: *
3082: * parse <!ENTITY declarations
1.22 daniel 3083: *
3084: * [70] EntityDecl ::= GEDecl | PEDecl
3085: *
3086: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3087: *
3088: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3089: *
3090: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3091: *
3092: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 3093: *
3094: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 3095: *
3096: * [ VC: Notation Declared ]
1.116 daniel 3097: * The Name must match the declared name of a notation.
1.22 daniel 3098: */
3099:
1.55 daniel 3100: void
3101: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3102: xmlChar *name = NULL;
3103: xmlChar *value = NULL;
3104: xmlChar *URI = NULL, *literal = NULL;
3105: xmlChar *ndata = NULL;
1.39 daniel 3106: int isParameter = 0;
1.123 daniel 3107: xmlChar *orig = NULL;
1.22 daniel 3108:
1.94 daniel 3109: GROW;
1.40 daniel 3110: if ((CUR == '<') && (NXT(1) == '!') &&
3111: (NXT(2) == 'E') && (NXT(3) == 'N') &&
3112: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 3113: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 3114: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 3115: SHRINK;
1.40 daniel 3116: SKIP(8);
1.59 daniel 3117: if (!IS_BLANK(CUR)) {
3118: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3119: ctxt->sax->error(ctxt->userData,
3120: "Space required after '<!ENTITY'\n");
1.123 daniel 3121: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3122: ctxt->wellFormed = 0;
3123: }
3124: SKIP_BLANKS;
1.40 daniel 3125:
3126: if (CUR == '%') {
3127: NEXT;
1.59 daniel 3128: if (!IS_BLANK(CUR)) {
3129: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3130: ctxt->sax->error(ctxt->userData,
3131: "Space required after '%'\n");
1.123 daniel 3132: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3133: ctxt->wellFormed = 0;
3134: }
1.42 daniel 3135: SKIP_BLANKS;
1.39 daniel 3136: isParameter = 1;
1.22 daniel 3137: }
3138:
3139: name = xmlParseName(ctxt);
1.24 daniel 3140: if (name == NULL) {
1.55 daniel 3141: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3142: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 3143: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3144: ctxt->wellFormed = 0;
1.24 daniel 3145: return;
3146: }
1.59 daniel 3147: if (!IS_BLANK(CUR)) {
3148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3149: ctxt->sax->error(ctxt->userData,
1.59 daniel 3150: "Space required after the entity name\n");
1.123 daniel 3151: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3152: ctxt->wellFormed = 0;
3153: }
1.42 daniel 3154: SKIP_BLANKS;
1.24 daniel 3155:
1.22 daniel 3156: /*
1.68 daniel 3157: * handle the various case of definitions...
1.22 daniel 3158: */
1.39 daniel 3159: if (isParameter) {
1.40 daniel 3160: if ((CUR == '"') || (CUR == '\''))
1.78 daniel 3161: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 3162: if (value) {
1.72 daniel 3163: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3164: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3165: XML_INTERNAL_PARAMETER_ENTITY,
3166: NULL, NULL, value);
3167: }
1.24 daniel 3168: else {
1.67 daniel 3169: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 3170: if (URI) {
1.72 daniel 3171: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3172: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3173: XML_EXTERNAL_PARAMETER_ENTITY,
3174: literal, URI, NULL);
3175: }
1.24 daniel 3176: }
3177: } else {
1.40 daniel 3178: if ((CUR == '"') || (CUR == '\'')) {
1.78 daniel 3179: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 3180: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3181: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3182: XML_INTERNAL_GENERAL_ENTITY,
3183: NULL, NULL, value);
3184: } else {
1.67 daniel 3185: URI = xmlParseExternalID(ctxt, &literal, 1);
1.59 daniel 3186: if ((CUR != '>') && (!IS_BLANK(CUR))) {
3187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3188: ctxt->sax->error(ctxt->userData,
1.59 daniel 3189: "Space required before 'NDATA'\n");
1.123 daniel 3190: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3191: ctxt->wellFormed = 0;
3192: }
1.42 daniel 3193: SKIP_BLANKS;
1.40 daniel 3194: if ((CUR == 'N') && (NXT(1) == 'D') &&
3195: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3196: (NXT(4) == 'A')) {
3197: SKIP(5);
1.59 daniel 3198: if (!IS_BLANK(CUR)) {
3199: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3200: ctxt->sax->error(ctxt->userData,
1.59 daniel 3201: "Space required after 'NDATA'\n");
1.123 daniel 3202: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3203: ctxt->wellFormed = 0;
3204: }
1.42 daniel 3205: SKIP_BLANKS;
1.24 daniel 3206: ndata = xmlParseName(ctxt);
1.116 daniel 3207: if ((ctxt->sax != NULL) &&
3208: (ctxt->sax->unparsedEntityDecl != NULL))
3209: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 3210: literal, URI, ndata);
3211: } else {
1.72 daniel 3212: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3213: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3214: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3215: literal, URI, NULL);
1.24 daniel 3216: }
3217: }
3218: }
1.42 daniel 3219: SKIP_BLANKS;
1.40 daniel 3220: if (CUR != '>') {
1.55 daniel 3221: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3222: ctxt->sax->error(ctxt->userData,
1.31 daniel 3223: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 3224: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 3225: ctxt->wellFormed = 0;
1.24 daniel 3226: } else
1.40 daniel 3227: NEXT;
1.78 daniel 3228: if (orig != NULL) {
3229: /*
1.98 daniel 3230: * Ugly mechanism to save the raw entity value.
1.78 daniel 3231: */
3232: xmlEntityPtr cur = NULL;
3233:
1.98 daniel 3234: if (isParameter) {
3235: if ((ctxt->sax != NULL) &&
3236: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 3237: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 3238: } else {
3239: if ((ctxt->sax != NULL) &&
3240: (ctxt->sax->getEntity != NULL))
1.120 daniel 3241: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 3242: }
3243: if (cur != NULL) {
3244: if (cur->orig != NULL)
1.119 daniel 3245: xmlFree(orig);
1.98 daniel 3246: else
3247: cur->orig = orig;
3248: } else
1.119 daniel 3249: xmlFree(orig);
1.78 daniel 3250: }
1.119 daniel 3251: if (name != NULL) xmlFree(name);
3252: if (value != NULL) xmlFree(value);
3253: if (URI != NULL) xmlFree(URI);
3254: if (literal != NULL) xmlFree(literal);
3255: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 3256: }
3257: }
3258:
1.50 daniel 3259: /**
1.59 daniel 3260: * xmlParseDefaultDecl:
3261: * @ctxt: an XML parser context
3262: * @value: Receive a possible fixed default value for the attribute
3263: *
3264: * Parse an attribute default declaration
3265: *
3266: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3267: *
1.99 daniel 3268: * [ VC: Required Attribute ]
1.117 daniel 3269: * if the default declaration is the keyword #REQUIRED, then the
3270: * attribute must be specified for all elements of the type in the
3271: * attribute-list declaration.
1.99 daniel 3272: *
3273: * [ VC: Attribute Default Legal ]
1.102 daniel 3274: * The declared default value must meet the lexical constraints of
3275: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3276: *
3277: * [ VC: Fixed Attribute Default ]
1.117 daniel 3278: * if an attribute has a default value declared with the #FIXED
3279: * keyword, instances of that attribute must match the default value.
1.99 daniel 3280: *
3281: * [ WFC: No < in Attribute Values ]
3282: * handled in xmlParseAttValue()
3283: *
1.59 daniel 3284: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3285: * or XML_ATTRIBUTE_FIXED.
3286: */
3287:
3288: int
1.123 daniel 3289: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 3290: int val;
1.123 daniel 3291: xmlChar *ret;
1.59 daniel 3292:
3293: *value = NULL;
3294: if ((CUR == '#') && (NXT(1) == 'R') &&
3295: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3296: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3297: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3298: (NXT(8) == 'D')) {
3299: SKIP(9);
3300: return(XML_ATTRIBUTE_REQUIRED);
3301: }
3302: if ((CUR == '#') && (NXT(1) == 'I') &&
3303: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3304: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3305: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3306: SKIP(8);
3307: return(XML_ATTRIBUTE_IMPLIED);
3308: }
3309: val = XML_ATTRIBUTE_NONE;
3310: if ((CUR == '#') && (NXT(1) == 'F') &&
3311: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3312: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3313: SKIP(6);
3314: val = XML_ATTRIBUTE_FIXED;
3315: if (!IS_BLANK(CUR)) {
3316: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3317: ctxt->sax->error(ctxt->userData,
3318: "Space required after '#FIXED'\n");
1.123 daniel 3319: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3320: ctxt->wellFormed = 0;
3321: }
3322: SKIP_BLANKS;
3323: }
3324: ret = xmlParseAttValue(ctxt);
1.96 daniel 3325: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3326: if (ret == NULL) {
3327: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3328: ctxt->sax->error(ctxt->userData,
1.59 daniel 3329: "Attribute default value declaration error\n");
3330: ctxt->wellFormed = 0;
3331: } else
3332: *value = ret;
3333: return(val);
3334: }
3335:
3336: /**
1.66 daniel 3337: * xmlParseNotationType:
3338: * @ctxt: an XML parser context
3339: *
3340: * parse an Notation attribute type.
3341: *
1.99 daniel 3342: * Note: the leading 'NOTATION' S part has already being parsed...
3343: *
1.66 daniel 3344: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3345: *
1.99 daniel 3346: * [ VC: Notation Attributes ]
1.117 daniel 3347: * Values of this type must match one of the notation names included
1.99 daniel 3348: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3349: *
3350: * Returns: the notation attribute tree built while parsing
3351: */
3352:
3353: xmlEnumerationPtr
3354: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3355: xmlChar *name;
1.66 daniel 3356: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3357:
3358: if (CUR != '(') {
3359: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3360: ctxt->sax->error(ctxt->userData,
3361: "'(' required to start 'NOTATION'\n");
1.123 daniel 3362: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 3363: ctxt->wellFormed = 0;
3364: return(NULL);
3365: }
1.91 daniel 3366: SHRINK;
1.66 daniel 3367: do {
3368: NEXT;
3369: SKIP_BLANKS;
3370: name = xmlParseName(ctxt);
3371: if (name == NULL) {
3372: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3373: ctxt->sax->error(ctxt->userData,
1.66 daniel 3374: "Name expected in NOTATION declaration\n");
1.123 daniel 3375: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 3376: ctxt->wellFormed = 0;
3377: return(ret);
3378: }
3379: cur = xmlCreateEnumeration(name);
1.119 daniel 3380: xmlFree(name);
1.66 daniel 3381: if (cur == NULL) return(ret);
3382: if (last == NULL) ret = last = cur;
3383: else {
3384: last->next = cur;
3385: last = cur;
3386: }
3387: SKIP_BLANKS;
3388: } while (CUR == '|');
3389: if (CUR != ')') {
3390: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3391: ctxt->sax->error(ctxt->userData,
1.66 daniel 3392: "')' required to finish NOTATION declaration\n");
1.123 daniel 3393: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 3394: ctxt->wellFormed = 0;
3395: return(ret);
3396: }
3397: NEXT;
3398: return(ret);
3399: }
3400:
3401: /**
3402: * xmlParseEnumerationType:
3403: * @ctxt: an XML parser context
3404: *
3405: * parse an Enumeration attribute type.
3406: *
3407: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3408: *
1.99 daniel 3409: * [ VC: Enumeration ]
1.117 daniel 3410: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 3411: * the declaration
3412: *
1.66 daniel 3413: * Returns: the enumeration attribute tree built while parsing
3414: */
3415:
3416: xmlEnumerationPtr
3417: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3418: xmlChar *name;
1.66 daniel 3419: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3420:
3421: if (CUR != '(') {
3422: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3423: ctxt->sax->error(ctxt->userData,
1.66 daniel 3424: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 3425: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 3426: ctxt->wellFormed = 0;
3427: return(NULL);
3428: }
1.91 daniel 3429: SHRINK;
1.66 daniel 3430: do {
3431: NEXT;
3432: SKIP_BLANKS;
3433: name = xmlParseNmtoken(ctxt);
3434: if (name == NULL) {
3435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3436: ctxt->sax->error(ctxt->userData,
1.66 daniel 3437: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 3438: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 3439: ctxt->wellFormed = 0;
3440: return(ret);
3441: }
3442: cur = xmlCreateEnumeration(name);
1.119 daniel 3443: xmlFree(name);
1.66 daniel 3444: if (cur == NULL) return(ret);
3445: if (last == NULL) ret = last = cur;
3446: else {
3447: last->next = cur;
3448: last = cur;
3449: }
3450: SKIP_BLANKS;
3451: } while (CUR == '|');
3452: if (CUR != ')') {
3453: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3454: ctxt->sax->error(ctxt->userData,
1.66 daniel 3455: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 3456: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 3457: ctxt->wellFormed = 0;
3458: return(ret);
3459: }
3460: NEXT;
3461: return(ret);
3462: }
3463:
3464: /**
1.50 daniel 3465: * xmlParseEnumeratedType:
3466: * @ctxt: an XML parser context
1.66 daniel 3467: * @tree: the enumeration tree built while parsing
1.50 daniel 3468: *
1.66 daniel 3469: * parse an Enumerated attribute type.
1.22 daniel 3470: *
3471: * [57] EnumeratedType ::= NotationType | Enumeration
3472: *
3473: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3474: *
1.50 daniel 3475: *
1.66 daniel 3476: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3477: */
3478:
1.66 daniel 3479: int
3480: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3481: if ((CUR == 'N') && (NXT(1) == 'O') &&
3482: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3483: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3484: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3485: SKIP(8);
3486: if (!IS_BLANK(CUR)) {
3487: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3488: ctxt->sax->error(ctxt->userData,
3489: "Space required after 'NOTATION'\n");
1.123 daniel 3490: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 3491: ctxt->wellFormed = 0;
3492: return(0);
3493: }
3494: SKIP_BLANKS;
3495: *tree = xmlParseNotationType(ctxt);
3496: if (*tree == NULL) return(0);
3497: return(XML_ATTRIBUTE_NOTATION);
3498: }
3499: *tree = xmlParseEnumerationType(ctxt);
3500: if (*tree == NULL) return(0);
3501: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3502: }
3503:
1.50 daniel 3504: /**
3505: * xmlParseAttributeType:
3506: * @ctxt: an XML parser context
1.66 daniel 3507: * @tree: the enumeration tree built while parsing
1.50 daniel 3508: *
1.59 daniel 3509: * parse the Attribute list def for an element
1.22 daniel 3510: *
3511: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3512: *
3513: * [55] StringType ::= 'CDATA'
3514: *
3515: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3516: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3517: *
1.102 daniel 3518: * Validity constraints for attribute values syntax are checked in
3519: * xmlValidateAttributeValue()
3520: *
1.99 daniel 3521: * [ VC: ID ]
1.117 daniel 3522: * Values of type ID must match the Name production. A name must not
1.99 daniel 3523: * appear more than once in an XML document as a value of this type;
3524: * i.e., ID values must uniquely identify the elements which bear them.
3525: *
3526: * [ VC: One ID per Element Type ]
1.117 daniel 3527: * No element type may have more than one ID attribute specified.
1.99 daniel 3528: *
3529: * [ VC: ID Attribute Default ]
1.117 daniel 3530: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 3531: *
3532: * [ VC: IDREF ]
1.102 daniel 3533: * Values of type IDREF must match the Name production, and values
1.117 daniel 3534: * of type IDREFS must match Names; TODO each IDREF Name must match the value
3535: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 3536: * values must match the value of some ID attribute.
3537: *
3538: * [ VC: Entity Name ]
1.102 daniel 3539: * Values of type ENTITY must match the Name production, values
1.117 daniel 3540: * of type ENTITIES must match Names; TODO each Entity Name must match the
3541: * name of an unparsed entity declared in the DTD.
1.99 daniel 3542: *
3543: * [ VC: Name Token ]
1.102 daniel 3544: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3545: * of type NMTOKENS must match Nmtokens.
3546: *
1.69 daniel 3547: * Returns the attribute type
1.22 daniel 3548: */
1.59 daniel 3549: int
1.66 daniel 3550: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3551: SHRINK;
1.40 daniel 3552: if ((CUR == 'C') && (NXT(1) == 'D') &&
3553: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3554: (NXT(4) == 'A')) {
3555: SKIP(5);
1.66 daniel 3556: return(XML_ATTRIBUTE_CDATA);
1.40 daniel 3557: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3558: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3559: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3560: SKIP(6);
3561: return(XML_ATTRIBUTE_IDREFS);
3562: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3563: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3564: (NXT(4) == 'F')) {
3565: SKIP(5);
1.59 daniel 3566: return(XML_ATTRIBUTE_IDREF);
1.66 daniel 3567: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
3568: SKIP(2);
3569: return(XML_ATTRIBUTE_ID);
1.40 daniel 3570: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3571: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3572: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3573: SKIP(6);
1.59 daniel 3574: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 3575: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3576: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3577: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3578: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3579: SKIP(8);
1.59 daniel 3580: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 3581: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3582: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3583: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3584: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3585: SKIP(8);
3586: return(XML_ATTRIBUTE_NMTOKENS);
3587: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3588: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3589: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3590: (NXT(6) == 'N')) {
3591: SKIP(7);
1.59 daniel 3592: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3593: }
1.66 daniel 3594: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3595: }
3596:
1.50 daniel 3597: /**
3598: * xmlParseAttributeListDecl:
3599: * @ctxt: an XML parser context
3600: *
3601: * : parse the Attribute list def for an element
1.22 daniel 3602: *
3603: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3604: *
3605: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3606: *
1.22 daniel 3607: */
1.55 daniel 3608: void
3609: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3610: xmlChar *elemName;
3611: xmlChar *attrName;
1.103 daniel 3612: xmlEnumerationPtr tree;
1.22 daniel 3613:
1.40 daniel 3614: if ((CUR == '<') && (NXT(1) == '!') &&
3615: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3616: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3617: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3618: (NXT(8) == 'T')) {
1.40 daniel 3619: SKIP(9);
1.59 daniel 3620: if (!IS_BLANK(CUR)) {
3621: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3622: ctxt->sax->error(ctxt->userData,
3623: "Space required after '<!ATTLIST'\n");
1.123 daniel 3624: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3625: ctxt->wellFormed = 0;
3626: }
1.42 daniel 3627: SKIP_BLANKS;
1.59 daniel 3628: elemName = xmlParseName(ctxt);
3629: if (elemName == NULL) {
1.55 daniel 3630: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3631: ctxt->sax->error(ctxt->userData,
3632: "ATTLIST: no name for Element\n");
1.123 daniel 3633: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3634: ctxt->wellFormed = 0;
1.22 daniel 3635: return;
3636: }
1.42 daniel 3637: SKIP_BLANKS;
1.40 daniel 3638: while (CUR != '>') {
1.123 daniel 3639: const xmlChar *check = CUR_PTR;
1.59 daniel 3640: int type;
3641: int def;
1.123 daniel 3642: xmlChar *defaultValue = NULL;
1.59 daniel 3643:
1.103 daniel 3644: tree = NULL;
1.59 daniel 3645: attrName = xmlParseName(ctxt);
3646: if (attrName == NULL) {
3647: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3648: ctxt->sax->error(ctxt->userData,
3649: "ATTLIST: no name for Attribute\n");
1.123 daniel 3650: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3651: ctxt->wellFormed = 0;
3652: break;
3653: }
1.97 daniel 3654: GROW;
1.59 daniel 3655: if (!IS_BLANK(CUR)) {
3656: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3657: ctxt->sax->error(ctxt->userData,
1.59 daniel 3658: "Space required after the attribute name\n");
1.123 daniel 3659: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3660: ctxt->wellFormed = 0;
3661: break;
3662: }
3663: SKIP_BLANKS;
3664:
1.66 daniel 3665: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 3666: if (type <= 0) break;
1.22 daniel 3667:
1.97 daniel 3668: GROW;
1.59 daniel 3669: if (!IS_BLANK(CUR)) {
3670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3671: ctxt->sax->error(ctxt->userData,
1.59 daniel 3672: "Space required after the attribute type\n");
1.123 daniel 3673: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3674: ctxt->wellFormed = 0;
3675: break;
3676: }
1.42 daniel 3677: SKIP_BLANKS;
1.59 daniel 3678:
3679: def = xmlParseDefaultDecl(ctxt, &defaultValue);
3680: if (def <= 0) break;
3681:
1.97 daniel 3682: GROW;
1.59 daniel 3683: if (CUR != '>') {
3684: if (!IS_BLANK(CUR)) {
3685: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3686: ctxt->sax->error(ctxt->userData,
1.59 daniel 3687: "Space required after the attribute default value\n");
1.123 daniel 3688: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3689: ctxt->wellFormed = 0;
3690: break;
3691: }
3692: SKIP_BLANKS;
3693: }
1.40 daniel 3694: if (check == CUR_PTR) {
1.55 daniel 3695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3696: ctxt->sax->error(ctxt->userData,
1.59 daniel 3697: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 3698: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 3699: break;
3700: }
1.72 daniel 3701: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3702: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3703: type, def, defaultValue, tree);
1.59 daniel 3704: if (attrName != NULL)
1.119 daniel 3705: xmlFree(attrName);
1.59 daniel 3706: if (defaultValue != NULL)
1.119 daniel 3707: xmlFree(defaultValue);
1.97 daniel 3708: GROW;
1.22 daniel 3709: }
1.40 daniel 3710: if (CUR == '>')
3711: NEXT;
1.22 daniel 3712:
1.119 daniel 3713: xmlFree(elemName);
1.22 daniel 3714: }
3715: }
3716:
1.50 daniel 3717: /**
1.61 daniel 3718: * xmlParseElementMixedContentDecl:
3719: * @ctxt: an XML parser context
3720: *
3721: * parse the declaration for a Mixed Element content
3722: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3723: *
3724: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3725: * '(' S? '#PCDATA' S? ')'
3726: *
1.99 daniel 3727: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3728: *
3729: * [ VC: No Duplicate Types ]
1.117 daniel 3730: * The same name must not appear more than once in a single
3731: * mixed-content declaration.
1.99 daniel 3732: *
1.61 daniel 3733: * returns: the list of the xmlElementContentPtr describing the element choices
3734: */
3735: xmlElementContentPtr
1.62 daniel 3736: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3737: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 3738: xmlChar *elem = NULL;
1.61 daniel 3739:
1.97 daniel 3740: GROW;
1.61 daniel 3741: if ((CUR == '#') && (NXT(1) == 'P') &&
3742: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3743: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3744: (NXT(6) == 'A')) {
3745: SKIP(7);
3746: SKIP_BLANKS;
1.91 daniel 3747: SHRINK;
1.63 daniel 3748: if (CUR == ')') {
3749: NEXT;
3750: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3751: return(ret);
3752: }
1.61 daniel 3753: if ((CUR == '(') || (CUR == '|')) {
3754: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3755: if (ret == NULL) return(NULL);
1.99 daniel 3756: }
1.61 daniel 3757: while (CUR == '|') {
1.64 daniel 3758: NEXT;
1.61 daniel 3759: if (elem == NULL) {
3760: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3761: if (ret == NULL) return(NULL);
3762: ret->c1 = cur;
1.64 daniel 3763: cur = ret;
1.61 daniel 3764: } else {
1.64 daniel 3765: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3766: if (n == NULL) return(NULL);
3767: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3768: cur->c2 = n;
3769: cur = n;
1.119 daniel 3770: xmlFree(elem);
1.61 daniel 3771: }
3772: SKIP_BLANKS;
3773: elem = xmlParseName(ctxt);
3774: if (elem == NULL) {
3775: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3776: ctxt->sax->error(ctxt->userData,
1.61 daniel 3777: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 3778: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 3779: ctxt->wellFormed = 0;
3780: xmlFreeElementContent(cur);
3781: return(NULL);
3782: }
3783: SKIP_BLANKS;
1.97 daniel 3784: GROW;
1.61 daniel 3785: }
1.63 daniel 3786: if ((CUR == ')') && (NXT(1) == '*')) {
1.66 daniel 3787: if (elem != NULL) {
1.61 daniel 3788: cur->c2 = xmlNewElementContent(elem,
3789: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3790: xmlFree(elem);
1.66 daniel 3791: }
1.65 daniel 3792: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 3793: SKIP(2);
1.61 daniel 3794: } else {
1.119 daniel 3795: if (elem != NULL) xmlFree(elem);
1.61 daniel 3796: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3797: ctxt->sax->error(ctxt->userData,
1.63 daniel 3798: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 3799: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 3800: ctxt->wellFormed = 0;
3801: xmlFreeElementContent(ret);
3802: return(NULL);
3803: }
3804:
3805: } else {
3806: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3807: ctxt->sax->error(ctxt->userData,
1.61 daniel 3808: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 3809: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 3810: ctxt->wellFormed = 0;
3811: }
3812: return(ret);
3813: }
3814:
3815: /**
3816: * xmlParseElementChildrenContentDecl:
1.50 daniel 3817: * @ctxt: an XML parser context
3818: *
1.61 daniel 3819: * parse the declaration for a Mixed Element content
3820: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3821: *
1.61 daniel 3822: *
1.22 daniel 3823: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3824: *
3825: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3826: *
3827: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3828: *
3829: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3830: *
1.99 daniel 3831: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3832: * TODO Parameter-entity replacement text must be properly nested
3833: * with parenthetized groups. That is to say, if either of the
3834: * opening or closing parentheses in a choice, seq, or Mixed
3835: * construct is contained in the replacement text for a parameter
3836: * entity, both must be contained in the same replacement text. For
3837: * interoperability, if a parameter-entity reference appears in a
3838: * choice, seq, or Mixed construct, its replacement text should not
3839: * be empty, and neither the first nor last non-blank character of
3840: * the replacement text should be a connector (| or ,).
3841: *
1.62 daniel 3842: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3843: * hierarchy.
3844: */
3845: xmlElementContentPtr
1.62 daniel 3846: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3847: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 3848: xmlChar *elem;
3849: xmlChar type = 0;
1.62 daniel 3850:
3851: SKIP_BLANKS;
1.94 daniel 3852: GROW;
1.62 daniel 3853: if (CUR == '(') {
1.63 daniel 3854: /* Recurse on first child */
1.62 daniel 3855: NEXT;
3856: SKIP_BLANKS;
3857: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3858: SKIP_BLANKS;
1.101 daniel 3859: GROW;
1.62 daniel 3860: } else {
3861: elem = xmlParseName(ctxt);
3862: if (elem == NULL) {
3863: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3864: ctxt->sax->error(ctxt->userData,
1.62 daniel 3865: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 3866: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 3867: ctxt->wellFormed = 0;
3868: return(NULL);
3869: }
3870: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3871: GROW;
1.62 daniel 3872: if (CUR == '?') {
1.104 daniel 3873: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3874: NEXT;
3875: } else if (CUR == '*') {
1.104 daniel 3876: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3877: NEXT;
3878: } else if (CUR == '+') {
1.104 daniel 3879: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3880: NEXT;
3881: } else {
1.104 daniel 3882: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3883: }
1.119 daniel 3884: xmlFree(elem);
1.101 daniel 3885: GROW;
1.62 daniel 3886: }
3887: SKIP_BLANKS;
1.91 daniel 3888: SHRINK;
1.62 daniel 3889: while (CUR != ')') {
1.63 daniel 3890: /*
3891: * Each loop we parse one separator and one element.
3892: */
1.62 daniel 3893: if (CUR == ',') {
3894: if (type == 0) type = CUR;
3895:
3896: /*
3897: * Detect "Name | Name , Name" error
3898: */
3899: else if (type != CUR) {
3900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3901: ctxt->sax->error(ctxt->userData,
1.62 daniel 3902: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3903: type);
1.123 daniel 3904: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3905: ctxt->wellFormed = 0;
3906: xmlFreeElementContent(ret);
3907: return(NULL);
3908: }
1.64 daniel 3909: NEXT;
1.62 daniel 3910:
1.63 daniel 3911: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3912: if (op == NULL) {
3913: xmlFreeElementContent(ret);
3914: return(NULL);
3915: }
3916: if (last == NULL) {
3917: op->c1 = ret;
1.65 daniel 3918: ret = cur = op;
1.63 daniel 3919: } else {
3920: cur->c2 = op;
3921: op->c1 = last;
3922: cur =op;
1.65 daniel 3923: last = NULL;
1.63 daniel 3924: }
1.62 daniel 3925: } else if (CUR == '|') {
3926: if (type == 0) type = CUR;
3927:
3928: /*
1.63 daniel 3929: * Detect "Name , Name | Name" error
1.62 daniel 3930: */
3931: else if (type != CUR) {
3932: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3933: ctxt->sax->error(ctxt->userData,
1.62 daniel 3934: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3935: type);
1.123 daniel 3936: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3937: ctxt->wellFormed = 0;
3938: xmlFreeElementContent(ret);
3939: return(NULL);
3940: }
1.64 daniel 3941: NEXT;
1.62 daniel 3942:
1.63 daniel 3943: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3944: if (op == NULL) {
3945: xmlFreeElementContent(ret);
3946: return(NULL);
3947: }
3948: if (last == NULL) {
3949: op->c1 = ret;
1.65 daniel 3950: ret = cur = op;
1.63 daniel 3951: } else {
3952: cur->c2 = op;
3953: op->c1 = last;
3954: cur =op;
1.65 daniel 3955: last = NULL;
1.63 daniel 3956: }
1.62 daniel 3957: } else {
3958: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3959: ctxt->sax->error(ctxt->userData,
1.62 daniel 3960: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3961: ctxt->wellFormed = 0;
1.123 daniel 3962: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 3963: xmlFreeElementContent(ret);
3964: return(NULL);
3965: }
1.101 daniel 3966: GROW;
1.62 daniel 3967: SKIP_BLANKS;
1.101 daniel 3968: GROW;
1.62 daniel 3969: if (CUR == '(') {
1.63 daniel 3970: /* Recurse on second child */
1.62 daniel 3971: NEXT;
3972: SKIP_BLANKS;
1.65 daniel 3973: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 3974: SKIP_BLANKS;
3975: } else {
3976: elem = xmlParseName(ctxt);
3977: if (elem == NULL) {
3978: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3979: ctxt->sax->error(ctxt->userData,
1.122 daniel 3980: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 3981: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 3982: ctxt->wellFormed = 0;
3983: return(NULL);
3984: }
1.65 daniel 3985: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3986: xmlFree(elem);
1.105 daniel 3987: if (CUR == '?') {
3988: last->ocur = XML_ELEMENT_CONTENT_OPT;
3989: NEXT;
3990: } else if (CUR == '*') {
3991: last->ocur = XML_ELEMENT_CONTENT_MULT;
3992: NEXT;
3993: } else if (CUR == '+') {
3994: last->ocur = XML_ELEMENT_CONTENT_PLUS;
3995: NEXT;
3996: } else {
3997: last->ocur = XML_ELEMENT_CONTENT_ONCE;
3998: }
1.63 daniel 3999: }
4000: SKIP_BLANKS;
1.97 daniel 4001: GROW;
1.64 daniel 4002: }
1.65 daniel 4003: if ((cur != NULL) && (last != NULL)) {
4004: cur->c2 = last;
1.62 daniel 4005: }
4006: NEXT;
4007: if (CUR == '?') {
4008: ret->ocur = XML_ELEMENT_CONTENT_OPT;
4009: NEXT;
4010: } else if (CUR == '*') {
4011: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4012: NEXT;
4013: } else if (CUR == '+') {
4014: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4015: NEXT;
4016: }
4017: return(ret);
1.61 daniel 4018: }
4019:
4020: /**
4021: * xmlParseElementContentDecl:
4022: * @ctxt: an XML parser context
4023: * @name: the name of the element being defined.
4024: * @result: the Element Content pointer will be stored here if any
1.22 daniel 4025: *
1.61 daniel 4026: * parse the declaration for an Element content either Mixed or Children,
4027: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4028: *
4029: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 4030: *
1.61 daniel 4031: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 4032: */
4033:
1.61 daniel 4034: int
1.123 daniel 4035: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 4036: xmlElementContentPtr *result) {
4037:
4038: xmlElementContentPtr tree = NULL;
4039: int res;
4040:
4041: *result = NULL;
4042:
4043: if (CUR != '(') {
4044: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4045: ctxt->sax->error(ctxt->userData,
1.61 daniel 4046: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 4047: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 4048: ctxt->wellFormed = 0;
4049: return(-1);
4050: }
4051: NEXT;
1.97 daniel 4052: GROW;
1.61 daniel 4053: SKIP_BLANKS;
4054: if ((CUR == '#') && (NXT(1) == 'P') &&
4055: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4056: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4057: (NXT(6) == 'A')) {
1.62 daniel 4058: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 4059: res = XML_ELEMENT_TYPE_MIXED;
4060: } else {
1.62 daniel 4061: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 4062: res = XML_ELEMENT_TYPE_ELEMENT;
4063: }
4064: SKIP_BLANKS;
1.63 daniel 4065: /****************************
1.61 daniel 4066: if (CUR != ')') {
4067: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4068: ctxt->sax->error(ctxt->userData,
1.61 daniel 4069: "xmlParseElementContentDecl : ')' expected\n");
4070: ctxt->wellFormed = 0;
4071: return(-1);
4072: }
1.63 daniel 4073: ****************************/
4074: *result = tree;
1.61 daniel 4075: return(res);
1.22 daniel 4076: }
4077:
1.50 daniel 4078: /**
4079: * xmlParseElementDecl:
4080: * @ctxt: an XML parser context
4081: *
4082: * parse an Element declaration.
1.22 daniel 4083: *
4084: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4085: *
1.99 daniel 4086: * [ VC: Unique Element Type Declaration ]
1.117 daniel 4087: * No element type may be declared more than once
1.69 daniel 4088: *
4089: * Returns the type of the element, or -1 in case of error
1.22 daniel 4090: */
1.59 daniel 4091: int
1.55 daniel 4092: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4093: xmlChar *name;
1.59 daniel 4094: int ret = -1;
1.61 daniel 4095: xmlElementContentPtr content = NULL;
1.22 daniel 4096:
1.97 daniel 4097: GROW;
1.40 daniel 4098: if ((CUR == '<') && (NXT(1) == '!') &&
4099: (NXT(2) == 'E') && (NXT(3) == 'L') &&
4100: (NXT(4) == 'E') && (NXT(5) == 'M') &&
4101: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 4102: (NXT(8) == 'T')) {
1.40 daniel 4103: SKIP(9);
1.59 daniel 4104: if (!IS_BLANK(CUR)) {
4105: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4106: ctxt->sax->error(ctxt->userData,
1.59 daniel 4107: "Space required after 'ELEMENT'\n");
1.123 daniel 4108: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4109: ctxt->wellFormed = 0;
4110: }
1.42 daniel 4111: SKIP_BLANKS;
1.22 daniel 4112: name = xmlParseName(ctxt);
4113: if (name == NULL) {
1.55 daniel 4114: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4115: ctxt->sax->error(ctxt->userData,
1.59 daniel 4116: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 4117: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4118: ctxt->wellFormed = 0;
4119: return(-1);
4120: }
4121: if (!IS_BLANK(CUR)) {
4122: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4123: ctxt->sax->error(ctxt->userData,
1.59 daniel 4124: "Space required after the element name\n");
1.123 daniel 4125: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4126: ctxt->wellFormed = 0;
1.22 daniel 4127: }
1.42 daniel 4128: SKIP_BLANKS;
1.40 daniel 4129: if ((CUR == 'E') && (NXT(1) == 'M') &&
4130: (NXT(2) == 'P') && (NXT(3) == 'T') &&
4131: (NXT(4) == 'Y')) {
4132: SKIP(5);
1.22 daniel 4133: /*
4134: * Element must always be empty.
4135: */
1.59 daniel 4136: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 4137: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
4138: (NXT(2) == 'Y')) {
4139: SKIP(3);
1.22 daniel 4140: /*
4141: * Element is a generic container.
4142: */
1.59 daniel 4143: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 4144: } else if (CUR == '(') {
4145: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 4146: } else {
1.98 daniel 4147: /*
4148: * [ WFC: PEs in Internal Subset ] error handling.
4149: */
4150: if ((CUR == '%') && (ctxt->external == 0) &&
4151: (ctxt->inputNr == 1)) {
4152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4153: ctxt->sax->error(ctxt->userData,
4154: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 4155: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 4156: } else {
4157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4158: ctxt->sax->error(ctxt->userData,
4159: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 4160: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 4161: }
1.61 daniel 4162: ctxt->wellFormed = 0;
1.119 daniel 4163: if (name != NULL) xmlFree(name);
1.61 daniel 4164: return(-1);
1.22 daniel 4165: }
1.42 daniel 4166: SKIP_BLANKS;
1.40 daniel 4167: if (CUR != '>') {
1.55 daniel 4168: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4169: ctxt->sax->error(ctxt->userData,
1.31 daniel 4170: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 4171: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 4172: ctxt->wellFormed = 0;
1.61 daniel 4173: } else {
1.40 daniel 4174: NEXT;
1.72 daniel 4175: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 4176: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4177: content);
1.61 daniel 4178: }
1.84 daniel 4179: if (content != NULL) {
4180: xmlFreeElementContent(content);
4181: }
1.61 daniel 4182: if (name != NULL) {
1.119 daniel 4183: xmlFree(name);
1.61 daniel 4184: }
1.22 daniel 4185: }
1.59 daniel 4186: return(ret);
1.22 daniel 4187: }
4188:
1.50 daniel 4189: /**
4190: * xmlParseMarkupDecl:
4191: * @ctxt: an XML parser context
4192: *
4193: * parse Markup declarations
1.22 daniel 4194: *
4195: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4196: * NotationDecl | PI | Comment
4197: *
1.98 daniel 4198: * [ VC: Proper Declaration/PE Nesting ]
4199: * TODO Parameter-entity replacement text must be properly nested with
4200: * markup declarations. That is to say, if either the first character
4201: * or the last character of a markup declaration (markupdecl above) is
4202: * contained in the replacement text for a parameter-entity reference,
4203: * both must be contained in the same replacement text.
4204: *
4205: * [ WFC: PEs in Internal Subset ]
4206: * In the internal DTD subset, parameter-entity references can occur
4207: * only where markup declarations can occur, not within markup declarations.
4208: * (This does not apply to references that occur in external parameter
4209: * entities or to the external subset.)
1.22 daniel 4210: */
1.55 daniel 4211: void
4212: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4213: GROW;
1.22 daniel 4214: xmlParseElementDecl(ctxt);
4215: xmlParseAttributeListDecl(ctxt);
4216: xmlParseEntityDecl(ctxt);
4217: xmlParseNotationDecl(ctxt);
4218: xmlParsePI(ctxt);
1.114 daniel 4219: xmlParseComment(ctxt);
1.98 daniel 4220: /*
4221: * This is only for internal subset. On external entities,
4222: * the replacement is done before parsing stage
4223: */
4224: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4225: xmlParsePEReference(ctxt);
1.97 daniel 4226: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4227: }
4228:
1.50 daniel 4229: /**
1.76 daniel 4230: * xmlParseTextDecl:
4231: * @ctxt: an XML parser context
4232: *
4233: * parse an XML declaration header for external entities
4234: *
4235: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4236: *
4237: * Returns the only valuable info for an external parsed entity, the encoding
4238: */
4239:
1.123 daniel 4240: xmlChar *
1.76 daniel 4241: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4242: xmlChar *version;
4243: xmlChar *encoding = NULL;
1.76 daniel 4244:
4245: /*
4246: * We know that '<?xml' is here.
4247: */
4248: SKIP(5);
4249:
4250: if (!IS_BLANK(CUR)) {
4251: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4252: ctxt->sax->error(ctxt->userData,
4253: "Space needed after '<?xml'\n");
1.123 daniel 4254: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4255: ctxt->wellFormed = 0;
4256: }
4257: SKIP_BLANKS;
4258:
4259: /*
4260: * We may have the VersionInfo here.
4261: */
4262: version = xmlParseVersionInfo(ctxt);
4263: if (version == NULL)
4264: version = xmlCharStrdup(XML_DEFAULT_VERSION);
4265: ctxt->version = xmlStrdup(version);
1.119 daniel 4266: xmlFree(version);
1.76 daniel 4267:
4268: /*
4269: * We must have the encoding declaration
4270: */
4271: if (!IS_BLANK(CUR)) {
4272: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4273: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 4274: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4275: ctxt->wellFormed = 0;
4276: }
4277: encoding = xmlParseEncodingDecl(ctxt);
4278:
4279: SKIP_BLANKS;
4280: if ((CUR == '?') && (NXT(1) == '>')) {
4281: SKIP(2);
4282: } else if (CUR == '>') {
4283: /* Deprecated old WD ... */
4284: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4285: ctxt->sax->error(ctxt->userData,
4286: "XML declaration must end-up with '?>'\n");
1.123 daniel 4287: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4288: ctxt->wellFormed = 0;
4289: NEXT;
4290: } else {
4291: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4292: ctxt->sax->error(ctxt->userData,
4293: "parsing XML declaration: '?>' expected\n");
1.123 daniel 4294: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4295: ctxt->wellFormed = 0;
4296: MOVETO_ENDTAG(CUR_PTR);
4297: NEXT;
4298: }
4299: return(encoding);
4300: }
4301:
4302: /*
4303: * xmlParseConditionalSections
4304: * @ctxt: an XML parser context
4305: *
4306: * TODO : Conditionnal section are not yet supported !
4307: *
4308: * [61] conditionalSect ::= includeSect | ignoreSect
4309: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4310: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4311: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4312: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4313: */
4314:
4315: void
4316: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4317: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4318: ctxt->sax->warning(ctxt->userData,
4319: "XML conditional section not supported\n");
4320: /*
4321: * Skip up to the end of the conditionnal section.
4322: */
4323: while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
4324: NEXT;
4325: if (CUR == 0) {
4326: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4327: ctxt->sax->error(ctxt->userData,
4328: "XML conditional section not closed\n");
1.123 daniel 4329: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 4330: ctxt->wellFormed = 0;
4331: }
4332: }
4333:
4334: /**
1.124 ! daniel 4335: * xmlParseExternalSubset:
1.76 daniel 4336: * @ctxt: an XML parser context
1.124 ! daniel 4337: * @ExternalID: the external identifier
! 4338: * @SystemID: the system identifier (or URL)
1.76 daniel 4339: *
4340: * parse Markup declarations from an external subset
4341: *
4342: * [30] extSubset ::= textDecl? extSubsetDecl
4343: *
4344: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4345: */
4346: void
1.123 daniel 4347: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4348: const xmlChar *SystemID) {
1.76 daniel 4349: if ((CUR == '<') && (NXT(1) == '?') &&
4350: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4351: (NXT(4) == 'l')) {
4352: xmlParseTextDecl(ctxt);
4353: }
1.79 daniel 4354: if (ctxt->myDoc == NULL) {
1.116 daniel 4355: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 4356: }
4357: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4358: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4359:
1.96 daniel 4360: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4361: ctxt->external = 1;
1.76 daniel 4362: while (((CUR == '<') && (NXT(1) == '?')) ||
4363: ((CUR == '<') && (NXT(1) == '!')) ||
4364: IS_BLANK(CUR)) {
1.123 daniel 4365: const xmlChar *check = CUR_PTR;
1.115 daniel 4366: int cons = ctxt->input->consumed;
4367:
1.76 daniel 4368: if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4369: xmlParseConditionalSections(ctxt);
4370: } else if (IS_BLANK(CUR)) {
4371: NEXT;
4372: } else if (CUR == '%') {
4373: xmlParsePEReference(ctxt);
4374: } else
4375: xmlParseMarkupDecl(ctxt);
1.77 daniel 4376:
4377: /*
4378: * Pop-up of finished entities.
4379: */
4380: while ((CUR == 0) && (ctxt->inputNr > 1))
4381: xmlPopInput(ctxt);
4382:
1.115 daniel 4383: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
4384: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4385: ctxt->sax->error(ctxt->userData,
4386: "Content error in the external subset\n");
4387: ctxt->wellFormed = 0;
1.123 daniel 4388: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 4389: break;
4390: }
1.76 daniel 4391: }
4392:
4393: if (CUR != 0) {
4394: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4395: ctxt->sax->error(ctxt->userData,
4396: "Extra content at the end of the document\n");
1.123 daniel 4397: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 4398: ctxt->wellFormed = 0;
4399: }
4400:
4401: }
4402:
4403: /**
1.77 daniel 4404: * xmlParseReference:
4405: * @ctxt: an XML parser context
4406: *
4407: * parse and handle entity references in content, depending on the SAX
4408: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4409: * CharRef, a predefined entity, if there is no reference() callback.
4410: * or if the parser was asked to switch to that mode.
1.77 daniel 4411: *
4412: * [67] Reference ::= EntityRef | CharRef
4413: */
4414: void
4415: xmlParseReference(xmlParserCtxtPtr ctxt) {
4416: xmlEntityPtr ent;
1.123 daniel 4417: xmlChar *val;
1.77 daniel 4418: if (CUR != '&') return;
4419:
1.113 daniel 4420: if (ctxt->inputNr > 1) {
1.123 daniel 4421: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 4422:
4423: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4424: ctxt->sax->characters(ctxt->userData, cur, 1);
4425: if (ctxt->token == '&')
4426: ctxt->token = 0;
4427: else {
4428: SKIP(1);
4429: }
4430: return;
4431: }
1.77 daniel 4432: if (NXT(1) == '#') {
1.123 daniel 4433: xmlChar out[2];
1.77 daniel 4434: int val = xmlParseCharRef(ctxt);
1.117 daniel 4435: /* invalid for UTF-8 variable encoding !!!!! */
1.77 daniel 4436: out[0] = val;
4437: out[1] = 0;
4438: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4439: ctxt->sax->characters(ctxt->userData, out, 1);
4440: } else {
4441: ent = xmlParseEntityRef(ctxt);
4442: if (ent == NULL) return;
4443: if ((ent->name != NULL) &&
1.113 daniel 4444: (ent->type != XML_INTERNAL_PREDEFINED_ENTITY)) {
4445: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4446: (ctxt->replaceEntities == 0)) {
4447: /*
4448: * Create a node.
4449: */
4450: ctxt->sax->reference(ctxt->userData, ent->name);
4451: return;
4452: } else if (ctxt->replaceEntities) {
4453: xmlParserInputPtr input;
1.79 daniel 4454:
1.113 daniel 4455: input = xmlNewEntityInputStream(ctxt, ent);
4456: xmlPushInput(ctxt, input);
4457: return;
4458: }
1.77 daniel 4459: }
4460: val = ent->content;
4461: if (val == NULL) return;
4462: /*
4463: * inline the entity.
4464: */
4465: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4466: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
4467: }
1.24 daniel 4468: }
4469:
1.50 daniel 4470: /**
4471: * xmlParseEntityRef:
4472: * @ctxt: an XML parser context
4473: *
4474: * parse ENTITY references declarations
1.24 daniel 4475: *
4476: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4477: *
1.98 daniel 4478: * [ WFC: Entity Declared ]
4479: * In a document without any DTD, a document with only an internal DTD
4480: * subset which contains no parameter entity references, or a document
4481: * with "standalone='yes'", the Name given in the entity reference
4482: * must match that in an entity declaration, except that well-formed
4483: * documents need not declare any of the following entities: amp, lt,
4484: * gt, apos, quot. The declaration of a parameter entity must precede
4485: * any reference to it. Similarly, the declaration of a general entity
4486: * must precede any reference to it which appears in a default value in an
4487: * attribute-list declaration. Note that if entities are declared in the
4488: * external subset or in external parameter entities, a non-validating
4489: * processor is not obligated to read and process their declarations;
4490: * for such documents, the rule that an entity must be declared is a
4491: * well-formedness constraint only if standalone='yes'.
4492: *
4493: * [ WFC: Parsed Entity ]
4494: * An entity reference must not contain the name of an unparsed entity
4495: *
1.77 daniel 4496: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4497: */
1.77 daniel 4498: xmlEntityPtr
1.55 daniel 4499: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 4500: xmlChar *name;
1.72 daniel 4501: xmlEntityPtr ent = NULL;
1.24 daniel 4502:
1.91 daniel 4503: GROW;
1.111 daniel 4504:
1.40 daniel 4505: if (CUR == '&') {
4506: NEXT;
1.24 daniel 4507: name = xmlParseName(ctxt);
4508: if (name == NULL) {
1.55 daniel 4509: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4510: ctxt->sax->error(ctxt->userData,
4511: "xmlParseEntityRef: no name\n");
1.123 daniel 4512: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4513: ctxt->wellFormed = 0;
1.24 daniel 4514: } else {
1.40 daniel 4515: if (CUR == ';') {
4516: NEXT;
1.24 daniel 4517: /*
1.77 daniel 4518: * Ask first SAX for entity resolution, otherwise try the
4519: * predefined set.
4520: */
4521: if (ctxt->sax != NULL) {
4522: if (ctxt->sax->getEntity != NULL)
4523: ent = ctxt->sax->getEntity(ctxt->userData, name);
4524: if (ent == NULL)
4525: ent = xmlGetPredefinedEntity(name);
4526: }
4527: /*
1.98 daniel 4528: * [ WFC: Entity Declared ]
4529: * In a document without any DTD, a document with only an
4530: * internal DTD subset which contains no parameter entity
4531: * references, or a document with "standalone='yes'", the
4532: * Name given in the entity reference must match that in an
4533: * entity declaration, except that well-formed documents
4534: * need not declare any of the following entities: amp, lt,
4535: * gt, apos, quot.
4536: * The declaration of a parameter entity must precede any
4537: * reference to it.
4538: * Similarly, the declaration of a general entity must
4539: * precede any reference to it which appears in a default
4540: * value in an attribute-list declaration. Note that if
4541: * entities are declared in the external subset or in
4542: * external parameter entities, a non-validating processor
4543: * is not obligated to read and process their declarations;
4544: * for such documents, the rule that an entity must be
4545: * declared is a well-formedness constraint only if
4546: * standalone='yes'.
1.59 daniel 4547: */
1.77 daniel 4548: if (ent == NULL) {
1.98 daniel 4549: if ((ctxt->standalone == 1) ||
4550: ((ctxt->hasExternalSubset == 0) &&
4551: (ctxt->hasPErefs == 0))) {
4552: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 4553: ctxt->sax->error(ctxt->userData,
4554: "Entity '%s' not defined\n", name);
1.123 daniel 4555: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 4556: ctxt->wellFormed = 0;
4557: } else {
1.98 daniel 4558: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4559: ctxt->sax->warning(ctxt->userData,
4560: "Entity '%s' not defined\n", name);
1.123 daniel 4561: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 4562: }
1.77 daniel 4563: }
1.59 daniel 4564:
4565: /*
1.98 daniel 4566: * [ WFC: Parsed Entity ]
4567: * An entity reference must not contain the name of an
4568: * unparsed entity
4569: */
4570: else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
4571: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4572: ctxt->sax->error(ctxt->userData,
4573: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 4574: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 4575: ctxt->wellFormed = 0;
4576: }
4577:
4578: /*
4579: * [ WFC: No External Entity References ]
4580: * Attribute values cannot contain direct or indirect
4581: * entity references to external entities.
4582: */
4583: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4584: (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
4585: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4586: ctxt->sax->error(ctxt->userData,
4587: "Attribute references external entity '%s'\n", name);
1.123 daniel 4588: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 4589: ctxt->wellFormed = 0;
4590: }
4591: /*
4592: * [ WFC: No < in Attribute Values ]
4593: * The replacement text of any entity referred to directly or
4594: * indirectly in an attribute value (other than "<") must
4595: * not contain a <.
1.59 daniel 4596: */
1.98 daniel 4597: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 4598: (ent != NULL) &&
4599: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 4600: (ent->content != NULL) &&
4601: (xmlStrchr(ent->content, '<'))) {
4602: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4603: ctxt->sax->error(ctxt->userData,
4604: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 4605: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 4606: ctxt->wellFormed = 0;
4607: }
4608:
4609: /*
4610: * Internal check, no parameter entities here ...
4611: */
4612: else {
1.59 daniel 4613: switch (ent->type) {
4614: case XML_INTERNAL_PARAMETER_ENTITY:
4615: case XML_EXTERNAL_PARAMETER_ENTITY:
4616: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4617: ctxt->sax->error(ctxt->userData,
1.59 daniel 4618: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 4619: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 4620: ctxt->wellFormed = 0;
4621: break;
4622: }
4623: }
4624:
4625: /*
1.98 daniel 4626: * [ WFC: No Recursion ]
1.117 daniel 4627: * TODO A parsed entity must not contain a recursive reference
4628: * to itself, either directly or indirectly.
1.59 daniel 4629: */
1.77 daniel 4630:
1.24 daniel 4631: } else {
1.55 daniel 4632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4633: ctxt->sax->error(ctxt->userData,
1.59 daniel 4634: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 4635: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 4636: ctxt->wellFormed = 0;
1.24 daniel 4637: }
1.119 daniel 4638: xmlFree(name);
1.24 daniel 4639: }
4640: }
1.77 daniel 4641: return(ent);
1.24 daniel 4642: }
4643:
1.50 daniel 4644: /**
4645: * xmlParsePEReference:
4646: * @ctxt: an XML parser context
4647: *
4648: * parse PEReference declarations
1.77 daniel 4649: * The entity content is handled directly by pushing it's content as
4650: * a new input stream.
1.22 daniel 4651: *
4652: * [69] PEReference ::= '%' Name ';'
1.68 daniel 4653: *
1.98 daniel 4654: * [ WFC: No Recursion ]
4655: * TODO A parsed entity must not contain a recursive
4656: * reference to itself, either directly or indirectly.
4657: *
4658: * [ WFC: Entity Declared ]
4659: * In a document without any DTD, a document with only an internal DTD
4660: * subset which contains no parameter entity references, or a document
4661: * with "standalone='yes'", ... ... The declaration of a parameter
4662: * entity must precede any reference to it...
4663: *
4664: * [ VC: Entity Declared ]
4665: * In a document with an external subset or external parameter entities
4666: * with "standalone='no'", ... ... The declaration of a parameter entity
4667: * must precede any reference to it...
4668: *
4669: * [ WFC: In DTD ]
4670: * Parameter-entity references may only appear in the DTD.
4671: * NOTE: misleading but this is handled.
1.22 daniel 4672: */
1.77 daniel 4673: void
1.55 daniel 4674: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 4675: xmlChar *name;
1.72 daniel 4676: xmlEntityPtr entity = NULL;
1.50 daniel 4677: xmlParserInputPtr input;
1.22 daniel 4678:
1.40 daniel 4679: if (CUR == '%') {
4680: NEXT;
1.22 daniel 4681: name = xmlParseName(ctxt);
4682: if (name == NULL) {
1.55 daniel 4683: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4684: ctxt->sax->error(ctxt->userData,
4685: "xmlParsePEReference: no name\n");
1.123 daniel 4686: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4687: ctxt->wellFormed = 0;
1.22 daniel 4688: } else {
1.40 daniel 4689: if (CUR == ';') {
4690: NEXT;
1.98 daniel 4691: if ((ctxt->sax != NULL) &&
4692: (ctxt->sax->getParameterEntity != NULL))
4693: entity = ctxt->sax->getParameterEntity(ctxt->userData,
4694: name);
1.45 daniel 4695: if (entity == NULL) {
1.98 daniel 4696: /*
4697: * [ WFC: Entity Declared ]
4698: * In a document without any DTD, a document with only an
4699: * internal DTD subset which contains no parameter entity
4700: * references, or a document with "standalone='yes'", ...
4701: * ... The declaration of a parameter entity must precede
4702: * any reference to it...
4703: */
4704: if ((ctxt->standalone == 1) ||
4705: ((ctxt->hasExternalSubset == 0) &&
4706: (ctxt->hasPErefs == 0))) {
4707: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4708: ctxt->sax->error(ctxt->userData,
4709: "PEReference: %%%s; not found\n", name);
1.123 daniel 4710: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 4711: ctxt->wellFormed = 0;
4712: } else {
4713: /*
4714: * [ VC: Entity Declared ]
4715: * In a document with an external subset or external
4716: * parameter entities with "standalone='no'", ...
4717: * ... The declaration of a parameter entity must precede
4718: * any reference to it...
4719: */
4720: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4721: ctxt->sax->warning(ctxt->userData,
4722: "PEReference: %%%s; not found\n", name);
4723: ctxt->valid = 0;
4724: }
1.50 daniel 4725: } else {
1.98 daniel 4726: /*
4727: * Internal checking in case the entity quest barfed
4728: */
4729: if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
4730: (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
4731: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4732: ctxt->sax->warning(ctxt->userData,
4733: "Internal: %%%s; is not a parameter entity\n", name);
4734: } else {
4735: input = xmlNewEntityInputStream(ctxt, entity);
4736: xmlPushInput(ctxt, input);
4737: }
1.45 daniel 4738: }
1.98 daniel 4739: ctxt->hasPErefs = 1;
1.22 daniel 4740: } else {
1.55 daniel 4741: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4742: ctxt->sax->error(ctxt->userData,
1.59 daniel 4743: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 4744: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 4745: ctxt->wellFormed = 0;
1.22 daniel 4746: }
1.119 daniel 4747: xmlFree(name);
1.3 veillard 4748: }
4749: }
4750: }
4751:
1.50 daniel 4752: /**
4753: * xmlParseDocTypeDecl :
4754: * @ctxt: an XML parser context
4755: *
4756: * parse a DOCTYPE declaration
1.21 daniel 4757: *
1.22 daniel 4758: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
4759: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 4760: *
4761: * [ VC: Root Element Type ]
1.99 daniel 4762: * The Name in the document type declaration must match the element
1.98 daniel 4763: * type of the root element.
1.21 daniel 4764: */
4765:
1.55 daniel 4766: void
4767: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4768: xmlChar *name;
4769: xmlChar *ExternalID = NULL;
4770: xmlChar *URI = NULL;
1.21 daniel 4771:
4772: /*
4773: * We know that '<!DOCTYPE' has been detected.
4774: */
1.40 daniel 4775: SKIP(9);
1.21 daniel 4776:
1.42 daniel 4777: SKIP_BLANKS;
1.21 daniel 4778:
4779: /*
4780: * Parse the DOCTYPE name.
4781: */
4782: name = xmlParseName(ctxt);
4783: if (name == NULL) {
1.55 daniel 4784: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4785: ctxt->sax->error(ctxt->userData,
4786: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 4787: ctxt->wellFormed = 0;
1.123 daniel 4788: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 4789: }
4790:
1.42 daniel 4791: SKIP_BLANKS;
1.21 daniel 4792:
4793: /*
1.22 daniel 4794: * Check for SystemID and ExternalID
4795: */
1.67 daniel 4796: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 4797:
4798: if ((URI != NULL) || (ExternalID != NULL)) {
4799: ctxt->hasExternalSubset = 1;
4800: }
4801:
1.42 daniel 4802: SKIP_BLANKS;
1.36 daniel 4803:
1.76 daniel 4804: /*
4805: * NOTE: the SAX callback may try to fetch the external subset
4806: * entity and fill it up !
4807: */
1.72 daniel 4808: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 4809: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 4810:
4811: /*
4812: * Is there any DTD definition ?
4813: */
1.40 daniel 4814: if (CUR == '[') {
1.96 daniel 4815: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 4816: NEXT;
1.22 daniel 4817: /*
4818: * Parse the succession of Markup declarations and
4819: * PEReferences.
4820: * Subsequence (markupdecl | PEReference | S)*
4821: */
1.40 daniel 4822: while (CUR != ']') {
1.123 daniel 4823: const xmlChar *check = CUR_PTR;
1.115 daniel 4824: int cons = ctxt->input->consumed;
1.22 daniel 4825:
1.42 daniel 4826: SKIP_BLANKS;
1.22 daniel 4827: xmlParseMarkupDecl(ctxt);
1.50 daniel 4828: xmlParsePEReference(ctxt);
1.22 daniel 4829:
1.115 daniel 4830: /*
4831: * Pop-up of finished entities.
4832: */
4833: while ((CUR == 0) && (ctxt->inputNr > 1))
4834: xmlPopInput(ctxt);
4835:
1.118 daniel 4836: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 4837: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4838: ctxt->sax->error(ctxt->userData,
1.31 daniel 4839: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 4840: ctxt->wellFormed = 0;
1.123 daniel 4841: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 4842: break;
4843: }
4844: }
1.40 daniel 4845: if (CUR == ']') NEXT;
1.22 daniel 4846: }
4847:
4848: /*
4849: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 4850: */
1.40 daniel 4851: if (CUR != '>') {
1.55 daniel 4852: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4853: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 4854: ctxt->wellFormed = 0;
1.123 daniel 4855: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 4856: }
1.40 daniel 4857: NEXT;
1.22 daniel 4858:
4859: /*
1.99 daniel 4860: * Cleanup
1.22 daniel 4861: */
1.119 daniel 4862: if (URI != NULL) xmlFree(URI);
4863: if (ExternalID != NULL) xmlFree(ExternalID);
4864: if (name != NULL) xmlFree(name);
1.21 daniel 4865: }
4866:
1.50 daniel 4867: /**
4868: * xmlParseAttribute:
4869: * @ctxt: an XML parser context
1.123 daniel 4870: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 4871: *
4872: * parse an attribute
1.3 veillard 4873: *
1.22 daniel 4874: * [41] Attribute ::= Name Eq AttValue
4875: *
1.98 daniel 4876: * [ WFC: No External Entity References ]
4877: * Attribute values cannot contain direct or indirect entity references
4878: * to external entities.
4879: *
4880: * [ WFC: No < in Attribute Values ]
4881: * The replacement text of any entity referred to directly or indirectly in
4882: * an attribute value (other than "<") must not contain a <.
4883: *
4884: * [ VC: Attribute Value Type ]
1.117 daniel 4885: * The attribute must have been declared; the value must be of the type
1.99 daniel 4886: * declared for it.
1.98 daniel 4887: *
1.22 daniel 4888: * [25] Eq ::= S? '=' S?
4889: *
1.29 daniel 4890: * With namespace:
4891: *
4892: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 4893: *
4894: * Also the case QName == xmlns:??? is handled independently as a namespace
4895: * definition.
1.69 daniel 4896: *
1.72 daniel 4897: * Returns the attribute name, and the value in *value.
1.3 veillard 4898: */
4899:
1.123 daniel 4900: xmlChar *
4901: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
4902: xmlChar *name, *val;
1.3 veillard 4903:
1.72 daniel 4904: *value = NULL;
4905: name = xmlParseName(ctxt);
1.22 daniel 4906: if (name == NULL) {
1.55 daniel 4907: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4908: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 4909: ctxt->wellFormed = 0;
1.123 daniel 4910: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 4911: return(NULL);
1.3 veillard 4912: }
4913:
4914: /*
1.29 daniel 4915: * read the value
1.3 veillard 4916: */
1.42 daniel 4917: SKIP_BLANKS;
1.40 daniel 4918: if (CUR == '=') {
4919: NEXT;
1.42 daniel 4920: SKIP_BLANKS;
1.72 daniel 4921: val = xmlParseAttValue(ctxt);
1.96 daniel 4922: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 4923: } else {
1.55 daniel 4924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4925: ctxt->sax->error(ctxt->userData,
1.59 daniel 4926: "Specification mandate value for attribute %s\n", name);
1.123 daniel 4927: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 4928: ctxt->wellFormed = 0;
1.52 daniel 4929: return(NULL);
1.43 daniel 4930: }
4931:
1.72 daniel 4932: *value = val;
4933: return(name);
1.3 veillard 4934: }
4935:
1.50 daniel 4936: /**
4937: * xmlParseStartTag:
4938: * @ctxt: an XML parser context
4939: *
4940: * parse a start of tag either for rule element or
4941: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 4942: *
4943: * [40] STag ::= '<' Name (S Attribute)* S? '>'
4944: *
1.98 daniel 4945: * [ WFC: Unique Att Spec ]
4946: * No attribute name may appear more than once in the same start-tag or
4947: * empty-element tag.
4948: *
1.29 daniel 4949: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
4950: *
1.98 daniel 4951: * [ WFC: Unique Att Spec ]
4952: * No attribute name may appear more than once in the same start-tag or
4953: * empty-element tag.
4954: *
1.29 daniel 4955: * With namespace:
4956: *
4957: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
4958: *
4959: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 4960: *
4961: * Returns the element name parsed
1.2 veillard 4962: */
4963:
1.123 daniel 4964: xmlChar *
1.69 daniel 4965: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 4966: xmlChar *name;
4967: xmlChar *attname;
4968: xmlChar *attvalue;
4969: const xmlChar **atts = NULL;
1.72 daniel 4970: int nbatts = 0;
4971: int maxatts = 0;
4972: int i;
1.2 veillard 4973:
1.83 daniel 4974: if (CUR != '<') return(NULL);
1.40 daniel 4975: NEXT;
1.3 veillard 4976:
1.72 daniel 4977: name = xmlParseName(ctxt);
1.59 daniel 4978: if (name == NULL) {
4979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4980: ctxt->sax->error(ctxt->userData,
1.59 daniel 4981: "xmlParseStartTag: invalid element name\n");
1.123 daniel 4982: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4983: ctxt->wellFormed = 0;
1.83 daniel 4984: return(NULL);
1.50 daniel 4985: }
4986:
4987: /*
1.3 veillard 4988: * Now parse the attributes, it ends up with the ending
4989: *
4990: * (S Attribute)* S?
4991: */
1.42 daniel 4992: SKIP_BLANKS;
1.91 daniel 4993: GROW;
1.40 daniel 4994: while ((IS_CHAR(CUR)) &&
4995: (CUR != '>') &&
4996: ((CUR != '/') || (NXT(1) != '>'))) {
1.123 daniel 4997: const xmlChar *q = CUR_PTR;
1.91 daniel 4998: int cons = ctxt->input->consumed;
1.29 daniel 4999:
1.72 daniel 5000: attname = xmlParseAttribute(ctxt, &attvalue);
5001: if ((attname != NULL) && (attvalue != NULL)) {
5002: /*
1.98 daniel 5003: * [ WFC: Unique Att Spec ]
5004: * No attribute name may appear more than once in the same
5005: * start-tag or empty-element tag.
1.72 daniel 5006: */
5007: for (i = 0; i < nbatts;i += 2) {
5008: if (!xmlStrcmp(atts[i], attname)) {
5009: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5010: ctxt->sax->error(ctxt->userData,
5011: "Attribute %s redefined\n",
5012: attname);
1.72 daniel 5013: ctxt->wellFormed = 0;
1.123 daniel 5014: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 5015: xmlFree(attname);
5016: xmlFree(attvalue);
1.98 daniel 5017: goto failed;
1.72 daniel 5018: }
5019: }
5020:
5021: /*
5022: * Add the pair to atts
5023: */
5024: if (atts == NULL) {
5025: maxatts = 10;
1.123 daniel 5026: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 5027: if (atts == NULL) {
1.86 daniel 5028: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 5029: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5030: return(NULL);
1.72 daniel 5031: }
5032: } else if (nbatts + 2 < maxatts) {
5033: maxatts *= 2;
1.123 daniel 5034: atts = (const xmlChar **) xmlRealloc(atts,
5035: maxatts * sizeof(xmlChar *));
1.72 daniel 5036: if (atts == NULL) {
1.86 daniel 5037: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 5038: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5039: return(NULL);
1.72 daniel 5040: }
5041: }
5042: atts[nbatts++] = attname;
5043: atts[nbatts++] = attvalue;
5044: atts[nbatts] = NULL;
5045: atts[nbatts + 1] = NULL;
5046: }
5047:
1.116 daniel 5048: failed:
1.42 daniel 5049: SKIP_BLANKS;
1.91 daniel 5050: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 5051: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5052: ctxt->sax->error(ctxt->userData,
1.31 daniel 5053: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 5054: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 5055: ctxt->wellFormed = 0;
1.29 daniel 5056: break;
1.3 veillard 5057: }
1.91 daniel 5058: GROW;
1.3 veillard 5059: }
5060:
1.43 daniel 5061: /*
1.72 daniel 5062: * SAX: Start of Element !
1.43 daniel 5063: */
1.72 daniel 5064: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 5065: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 5066:
1.72 daniel 5067: if (atts != NULL) {
1.123 daniel 5068: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 5069: xmlFree(atts);
1.72 daniel 5070: }
1.83 daniel 5071: return(name);
1.3 veillard 5072: }
5073:
1.50 daniel 5074: /**
5075: * xmlParseEndTag:
5076: * @ctxt: an XML parser context
1.83 daniel 5077: * @tagname: the tag name as parsed in the opening tag.
1.50 daniel 5078: *
5079: * parse an end of tag
1.27 daniel 5080: *
5081: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 5082: *
5083: * With namespace
5084: *
1.72 daniel 5085: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 5086: */
5087:
1.55 daniel 5088: void
1.123 daniel 5089: xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlChar *tagname) {
5090: xmlChar *name;
1.7 veillard 5091:
1.91 daniel 5092: GROW;
1.40 daniel 5093: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 5094: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5095: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 5096: ctxt->wellFormed = 0;
1.123 daniel 5097: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 5098: return;
5099: }
1.40 daniel 5100: SKIP(2);
1.7 veillard 5101:
1.72 daniel 5102: name = xmlParseName(ctxt);
1.7 veillard 5103:
5104: /*
5105: * We should definitely be at the ending "S? '>'" part
5106: */
1.91 daniel 5107: GROW;
1.42 daniel 5108: SKIP_BLANKS;
1.40 daniel 5109: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 5110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5111: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 5112: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5113: ctxt->wellFormed = 0;
1.7 veillard 5114: } else
1.40 daniel 5115: NEXT;
1.7 veillard 5116:
1.72 daniel 5117: /*
1.98 daniel 5118: * [ WFC: Element Type Match ]
5119: * The Name in an element's end-tag must match the element type in the
5120: * start-tag.
5121: *
1.83 daniel 5122: */
5123: if (xmlStrcmp(name, tagname)) {
5124: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5125: ctxt->sax->error(ctxt->userData,
5126: "Opening and ending tag mismatch: %s and %s\n", tagname, name);
1.122 daniel 5127:
1.123 daniel 5128: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 5129: ctxt->wellFormed = 0;
5130: }
5131:
5132: /*
1.72 daniel 5133: * SAX: End of Tag
5134: */
5135: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 5136: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 5137:
5138: if (name != NULL)
1.119 daniel 5139: xmlFree(name);
1.72 daniel 5140:
1.7 veillard 5141: return;
5142: }
5143:
1.50 daniel 5144: /**
5145: * xmlParseCDSect:
5146: * @ctxt: an XML parser context
5147: *
5148: * Parse escaped pure raw content.
1.29 daniel 5149: *
5150: * [18] CDSect ::= CDStart CData CDEnd
5151: *
5152: * [19] CDStart ::= '<![CDATA['
5153: *
5154: * [20] Data ::= (Char* - (Char* ']]>' Char*))
5155: *
5156: * [21] CDEnd ::= ']]>'
1.3 veillard 5157: */
1.55 daniel 5158: void
5159: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.123 daniel 5160: const xmlChar *base;
5161: xmlChar r, s;
5162: xmlChar cur;
1.3 veillard 5163:
1.106 daniel 5164: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 5165: (NXT(2) == '[') && (NXT(3) == 'C') &&
5166: (NXT(4) == 'D') && (NXT(5) == 'A') &&
5167: (NXT(6) == 'T') && (NXT(7) == 'A') &&
5168: (NXT(8) == '[')) {
5169: SKIP(9);
1.29 daniel 5170: } else
1.45 daniel 5171: return;
1.109 daniel 5172:
5173: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.40 daniel 5174: base = CUR_PTR;
5175: if (!IS_CHAR(CUR)) {
1.55 daniel 5176: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5177: ctxt->sax->error(ctxt->userData,
5178: "CData section not finished\n%.50s\n", base);
1.59 daniel 5179: ctxt->wellFormed = 0;
1.123 daniel 5180: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 5181: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 5182: return;
1.3 veillard 5183: }
1.110 daniel 5184: r = CUR;
1.91 daniel 5185: NEXT;
1.40 daniel 5186: if (!IS_CHAR(CUR)) {
1.55 daniel 5187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5188: ctxt->sax->error(ctxt->userData,
5189: "CData section not finished\n%.50s\n", base);
1.123 daniel 5190: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 5191: ctxt->wellFormed = 0;
1.109 daniel 5192: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 5193: return;
1.3 veillard 5194: }
1.110 daniel 5195: s = CUR;
1.91 daniel 5196: NEXT;
1.108 veillard 5197: cur = CUR;
5198: while (IS_CHAR(cur) &&
1.110 daniel 5199: ((r != ']') || (s != ']') || (cur != '>'))) {
5200: r = s;
5201: s = cur;
5202: NEXT;
1.108 veillard 5203: cur = CUR;
1.3 veillard 5204: }
1.109 daniel 5205: ctxt->instate = XML_PARSER_CONTENT;
1.40 daniel 5206: if (!IS_CHAR(CUR)) {
1.55 daniel 5207: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5208: ctxt->sax->error(ctxt->userData,
5209: "CData section not finished\n%.50s\n", base);
1.123 daniel 5210: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 5211: ctxt->wellFormed = 0;
1.45 daniel 5212: return;
1.3 veillard 5213: }
1.107 daniel 5214: NEXT;
1.16 daniel 5215:
1.45 daniel 5216: /*
5217: * Ok the segment [base CUR_PTR] is to be consumed as chars.
5218: */
5219: if (ctxt->sax != NULL) {
1.107 daniel 5220: if (ctxt->sax->cdataBlock != NULL)
1.110 daniel 5221: ctxt->sax->cdataBlock(ctxt->userData, base, (CUR_PTR - base) - 3);
1.45 daniel 5222: }
1.2 veillard 5223: }
5224:
1.50 daniel 5225: /**
5226: * xmlParseContent:
5227: * @ctxt: an XML parser context
5228: *
5229: * Parse a content:
1.2 veillard 5230: *
1.27 daniel 5231: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 5232: */
5233:
1.55 daniel 5234: void
5235: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 5236: GROW;
1.40 daniel 5237: while ((CUR != '<') || (NXT(1) != '/')) {
1.123 daniel 5238: const xmlChar *test = CUR_PTR;
1.91 daniel 5239: int cons = ctxt->input->consumed;
1.123 daniel 5240: xmlChar tok = ctxt->token;
1.27 daniel 5241:
5242: /*
5243: * First case : a Processing Instruction.
5244: */
1.40 daniel 5245: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 5246: xmlParsePI(ctxt);
5247: }
1.72 daniel 5248:
1.27 daniel 5249: /*
5250: * Second case : a CDSection
5251: */
1.40 daniel 5252: else if ((CUR == '<') && (NXT(1) == '!') &&
5253: (NXT(2) == '[') && (NXT(3) == 'C') &&
5254: (NXT(4) == 'D') && (NXT(5) == 'A') &&
5255: (NXT(6) == 'T') && (NXT(7) == 'A') &&
5256: (NXT(8) == '[')) {
1.45 daniel 5257: xmlParseCDSect(ctxt);
1.27 daniel 5258: }
1.72 daniel 5259:
1.27 daniel 5260: /*
5261: * Third case : a comment
5262: */
1.40 daniel 5263: else if ((CUR == '<') && (NXT(1) == '!') &&
5264: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 5265: xmlParseComment(ctxt);
1.97 daniel 5266: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 5267: }
1.72 daniel 5268:
1.27 daniel 5269: /*
5270: * Fourth case : a sub-element.
5271: */
1.40 daniel 5272: else if (CUR == '<') {
1.72 daniel 5273: xmlParseElement(ctxt);
1.45 daniel 5274: }
1.72 daniel 5275:
1.45 daniel 5276: /*
1.50 daniel 5277: * Fifth case : a reference. If if has not been resolved,
5278: * parsing returns it's Name, create the node
1.45 daniel 5279: */
1.97 daniel 5280:
1.45 daniel 5281: else if (CUR == '&') {
1.77 daniel 5282: xmlParseReference(ctxt);
1.27 daniel 5283: }
1.72 daniel 5284:
1.27 daniel 5285: /*
5286: * Last case, text. Note that References are handled directly.
5287: */
5288: else {
1.45 daniel 5289: xmlParseCharData(ctxt, 0);
1.3 veillard 5290: }
1.14 veillard 5291:
1.91 daniel 5292: GROW;
1.14 veillard 5293: /*
1.45 daniel 5294: * Pop-up of finished entities.
1.14 veillard 5295: */
1.69 daniel 5296: while ((CUR == 0) && (ctxt->inputNr > 1))
5297: xmlPopInput(ctxt);
1.45 daniel 5298:
1.113 daniel 5299: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
5300: (tok == ctxt->token)) {
1.55 daniel 5301: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5302: ctxt->sax->error(ctxt->userData,
1.59 daniel 5303: "detected an error in element content\n");
1.123 daniel 5304: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 5305: ctxt->wellFormed = 0;
1.29 daniel 5306: break;
5307: }
1.3 veillard 5308: }
1.2 veillard 5309: }
5310:
1.50 daniel 5311: /**
5312: * xmlParseElement:
5313: * @ctxt: an XML parser context
5314: *
5315: * parse an XML element, this is highly recursive
1.26 daniel 5316: *
5317: * [39] element ::= EmptyElemTag | STag content ETag
5318: *
1.98 daniel 5319: * [ WFC: Element Type Match ]
5320: * The Name in an element's end-tag must match the element type in the
5321: * start-tag.
5322: *
5323: * [ VC: Element Valid ]
1.117 daniel 5324: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 5325: * where the Name matches the element type and one of the following holds:
5326: * - The declaration matches EMPTY and the element has no content.
5327: * - The declaration matches children and the sequence of child elements
5328: * belongs to the language generated by the regular expression in the
5329: * content model, with optional white space (characters matching the
5330: * nonterminal S) between each pair of child elements.
5331: * - The declaration matches Mixed and the content consists of character
5332: * data and child elements whose types match names in the content model.
5333: * - The declaration matches ANY, and the types of any child elements have
5334: * been declared.
1.2 veillard 5335: */
1.26 daniel 5336:
1.72 daniel 5337: void
1.69 daniel 5338: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 5339: const xmlChar *openTag = CUR_PTR;
5340: xmlChar *name;
1.32 daniel 5341: xmlParserNodeInfo node_info;
1.118 daniel 5342: xmlNodePtr ret;
1.2 veillard 5343:
1.32 daniel 5344: /* Capture start position */
1.118 daniel 5345: if (ctxt->record_info) {
5346: node_info.begin_pos = ctxt->input->consumed +
5347: (CUR_PTR - ctxt->input->base);
5348: node_info.begin_line = ctxt->input->line;
5349: }
1.32 daniel 5350:
1.83 daniel 5351: name = xmlParseStartTag(ctxt);
5352: if (name == NULL) {
5353: return;
5354: }
1.118 daniel 5355: ret = ctxt->node;
1.2 veillard 5356:
5357: /*
1.99 daniel 5358: * [ VC: Root Element Type ]
5359: * The Name in the document type declaration must match the element
5360: * type of the root element.
5361: */
1.105 daniel 5362: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
5363: ctxt->node && (ctxt->node == ctxt->myDoc->root))
1.102 daniel 5364: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 5365:
5366: /*
1.2 veillard 5367: * Check for an Empty Element.
5368: */
1.40 daniel 5369: if ((CUR == '/') && (NXT(1) == '>')) {
5370: SKIP(2);
1.72 daniel 5371: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 5372: ctxt->sax->endElement(ctxt->userData, name);
1.119 daniel 5373: xmlFree(name);
1.72 daniel 5374: return;
1.2 veillard 5375: }
1.91 daniel 5376: if (CUR == '>') {
5377: NEXT;
5378: } else {
1.55 daniel 5379: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5380: ctxt->sax->error(ctxt->userData,
5381: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 5382: openTag);
1.59 daniel 5383: ctxt->wellFormed = 0;
1.123 daniel 5384: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 5385:
5386: /*
5387: * end of parsing of this node.
5388: */
5389: nodePop(ctxt);
1.119 daniel 5390: xmlFree(name);
1.118 daniel 5391:
5392: /*
5393: * Capture end position and add node
5394: */
5395: if ( ret != NULL && ctxt->record_info ) {
5396: node_info.end_pos = ctxt->input->consumed +
5397: (CUR_PTR - ctxt->input->base);
5398: node_info.end_line = ctxt->input->line;
5399: node_info.node = ret;
5400: xmlParserAddNodeInfo(ctxt, &node_info);
5401: }
1.72 daniel 5402: return;
1.2 veillard 5403: }
5404:
5405: /*
5406: * Parse the content of the element:
5407: */
1.45 daniel 5408: xmlParseContent(ctxt);
1.40 daniel 5409: if (!IS_CHAR(CUR)) {
1.55 daniel 5410: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5411: ctxt->sax->error(ctxt->userData,
1.57 daniel 5412: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 5413: ctxt->wellFormed = 0;
1.123 daniel 5414: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 5415:
5416: /*
5417: * end of parsing of this node.
5418: */
5419: nodePop(ctxt);
1.119 daniel 5420: xmlFree(name);
1.72 daniel 5421: return;
1.2 veillard 5422: }
5423:
5424: /*
1.27 daniel 5425: * parse the end of tag: '</' should be here.
1.2 veillard 5426: */
1.83 daniel 5427: xmlParseEndTag(ctxt, name);
1.119 daniel 5428: xmlFree(name);
1.118 daniel 5429:
5430: /*
5431: * Capture end position and add node
5432: */
5433: if ( ret != NULL && ctxt->record_info ) {
5434: node_info.end_pos = ctxt->input->consumed +
5435: (CUR_PTR - ctxt->input->base);
5436: node_info.end_line = ctxt->input->line;
5437: node_info.node = ret;
5438: xmlParserAddNodeInfo(ctxt, &node_info);
5439: }
1.2 veillard 5440: }
5441:
1.50 daniel 5442: /**
5443: * xmlParseVersionNum:
5444: * @ctxt: an XML parser context
5445: *
5446: * parse the XML version value.
1.29 daniel 5447: *
5448: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 5449: *
5450: * Returns the string giving the XML version number, or NULL
1.29 daniel 5451: */
1.123 daniel 5452: xmlChar *
1.55 daniel 5453: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.123 daniel 5454: const xmlChar *q = CUR_PTR;
5455: xmlChar *ret;
1.29 daniel 5456:
1.40 daniel 5457: while (IS_CHAR(CUR) &&
5458: (((CUR >= 'a') && (CUR <= 'z')) ||
5459: ((CUR >= 'A') && (CUR <= 'Z')) ||
5460: ((CUR >= '0') && (CUR <= '9')) ||
5461: (CUR == '_') || (CUR == '.') ||
5462: (CUR == ':') || (CUR == '-'))) NEXT;
5463: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5464: return(ret);
5465: }
5466:
1.50 daniel 5467: /**
5468: * xmlParseVersionInfo:
5469: * @ctxt: an XML parser context
5470: *
5471: * parse the XML version.
1.29 daniel 5472: *
5473: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
5474: *
5475: * [25] Eq ::= S? '=' S?
1.50 daniel 5476: *
1.68 daniel 5477: * Returns the version string, e.g. "1.0"
1.29 daniel 5478: */
5479:
1.123 daniel 5480: xmlChar *
1.55 daniel 5481: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 5482: xmlChar *version = NULL;
5483: const xmlChar *q;
1.29 daniel 5484:
1.40 daniel 5485: if ((CUR == 'v') && (NXT(1) == 'e') &&
5486: (NXT(2) == 'r') && (NXT(3) == 's') &&
5487: (NXT(4) == 'i') && (NXT(5) == 'o') &&
5488: (NXT(6) == 'n')) {
5489: SKIP(7);
1.42 daniel 5490: SKIP_BLANKS;
1.40 daniel 5491: if (CUR != '=') {
1.55 daniel 5492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5493: ctxt->sax->error(ctxt->userData,
5494: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 5495: ctxt->wellFormed = 0;
1.123 daniel 5496: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 5497: return(NULL);
5498: }
1.40 daniel 5499: NEXT;
1.42 daniel 5500: SKIP_BLANKS;
1.40 daniel 5501: if (CUR == '"') {
5502: NEXT;
5503: q = CUR_PTR;
1.29 daniel 5504: version = xmlParseVersionNum(ctxt);
1.55 daniel 5505: if (CUR != '"') {
5506: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5507: ctxt->sax->error(ctxt->userData,
5508: "String not closed\n%.50s\n", q);
1.59 daniel 5509: ctxt->wellFormed = 0;
1.123 daniel 5510: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 5511: } else
1.40 daniel 5512: NEXT;
5513: } else if (CUR == '\''){
5514: NEXT;
5515: q = CUR_PTR;
1.29 daniel 5516: version = xmlParseVersionNum(ctxt);
1.55 daniel 5517: if (CUR != '\'') {
5518: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5519: ctxt->sax->error(ctxt->userData,
5520: "String not closed\n%.50s\n", q);
1.123 daniel 5521: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 5522: ctxt->wellFormed = 0;
1.55 daniel 5523: } else
1.40 daniel 5524: NEXT;
1.31 daniel 5525: } else {
1.55 daniel 5526: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5527: ctxt->sax->error(ctxt->userData,
1.59 daniel 5528: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 5529: ctxt->wellFormed = 0;
1.123 daniel 5530: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 5531: }
5532: }
5533: return(version);
5534: }
5535:
1.50 daniel 5536: /**
5537: * xmlParseEncName:
5538: * @ctxt: an XML parser context
5539: *
5540: * parse the XML encoding name
1.29 daniel 5541: *
5542: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 5543: *
1.68 daniel 5544: * Returns the encoding name value or NULL
1.29 daniel 5545: */
1.123 daniel 5546: xmlChar *
1.55 daniel 5547: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.123 daniel 5548: const xmlChar *q = CUR_PTR;
5549: xmlChar *ret = NULL;
1.29 daniel 5550:
1.40 daniel 5551: if (((CUR >= 'a') && (CUR <= 'z')) ||
5552: ((CUR >= 'A') && (CUR <= 'Z'))) {
5553: NEXT;
5554: while (IS_CHAR(CUR) &&
5555: (((CUR >= 'a') && (CUR <= 'z')) ||
5556: ((CUR >= 'A') && (CUR <= 'Z')) ||
5557: ((CUR >= '0') && (CUR <= '9')) ||
5558: (CUR == '-'))) NEXT;
5559: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5560: } else {
1.55 daniel 5561: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5562: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 5563: ctxt->wellFormed = 0;
1.123 daniel 5564: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 5565: }
5566: return(ret);
5567: }
5568:
1.50 daniel 5569: /**
5570: * xmlParseEncodingDecl:
5571: * @ctxt: an XML parser context
5572: *
5573: * parse the XML encoding declaration
1.29 daniel 5574: *
5575: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 5576: *
5577: * TODO: this should setup the conversion filters.
5578: *
1.68 daniel 5579: * Returns the encoding value or NULL
1.29 daniel 5580: */
5581:
1.123 daniel 5582: xmlChar *
1.55 daniel 5583: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5584: xmlChar *encoding = NULL;
5585: const xmlChar *q;
1.29 daniel 5586:
1.42 daniel 5587: SKIP_BLANKS;
1.40 daniel 5588: if ((CUR == 'e') && (NXT(1) == 'n') &&
5589: (NXT(2) == 'c') && (NXT(3) == 'o') &&
5590: (NXT(4) == 'd') && (NXT(5) == 'i') &&
5591: (NXT(6) == 'n') && (NXT(7) == 'g')) {
5592: SKIP(8);
1.42 daniel 5593: SKIP_BLANKS;
1.40 daniel 5594: if (CUR != '=') {
1.55 daniel 5595: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5596: ctxt->sax->error(ctxt->userData,
5597: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 5598: ctxt->wellFormed = 0;
1.123 daniel 5599: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 5600: return(NULL);
5601: }
1.40 daniel 5602: NEXT;
1.42 daniel 5603: SKIP_BLANKS;
1.40 daniel 5604: if (CUR == '"') {
5605: NEXT;
5606: q = CUR_PTR;
1.29 daniel 5607: encoding = xmlParseEncName(ctxt);
1.55 daniel 5608: if (CUR != '"') {
5609: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5610: ctxt->sax->error(ctxt->userData,
5611: "String not closed\n%.50s\n", q);
1.59 daniel 5612: ctxt->wellFormed = 0;
1.123 daniel 5613: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 5614: } else
1.40 daniel 5615: NEXT;
5616: } else if (CUR == '\''){
5617: NEXT;
5618: q = CUR_PTR;
1.29 daniel 5619: encoding = xmlParseEncName(ctxt);
1.55 daniel 5620: if (CUR != '\'') {
5621: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5622: ctxt->sax->error(ctxt->userData,
5623: "String not closed\n%.50s\n", q);
1.59 daniel 5624: ctxt->wellFormed = 0;
1.123 daniel 5625: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 5626: } else
1.40 daniel 5627: NEXT;
5628: } else if (CUR == '"'){
1.55 daniel 5629: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5630: ctxt->sax->error(ctxt->userData,
1.59 daniel 5631: "xmlParseEncodingDecl : expected ' or \"\n");
5632: ctxt->wellFormed = 0;
1.123 daniel 5633: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 5634: }
5635: }
5636: return(encoding);
5637: }
5638:
1.50 daniel 5639: /**
5640: * xmlParseSDDecl:
5641: * @ctxt: an XML parser context
5642: *
5643: * parse the XML standalone declaration
1.29 daniel 5644: *
5645: * [32] SDDecl ::= S 'standalone' Eq
5646: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 5647: *
5648: * [ VC: Standalone Document Declaration ]
5649: * TODO The standalone document declaration must have the value "no"
5650: * if any external markup declarations contain declarations of:
5651: * - attributes with default values, if elements to which these
5652: * attributes apply appear in the document without specifications
5653: * of values for these attributes, or
5654: * - entities (other than amp, lt, gt, apos, quot), if references
5655: * to those entities appear in the document, or
5656: * - attributes with values subject to normalization, where the
5657: * attribute appears in the document with a value which will change
5658: * as a result of normalization, or
5659: * - element types with element content, if white space occurs directly
5660: * within any instance of those types.
1.68 daniel 5661: *
5662: * Returns 1 if standalone, 0 otherwise
1.29 daniel 5663: */
5664:
1.55 daniel 5665: int
5666: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5667: int standalone = -1;
5668:
1.42 daniel 5669: SKIP_BLANKS;
1.40 daniel 5670: if ((CUR == 's') && (NXT(1) == 't') &&
5671: (NXT(2) == 'a') && (NXT(3) == 'n') &&
5672: (NXT(4) == 'd') && (NXT(5) == 'a') &&
5673: (NXT(6) == 'l') && (NXT(7) == 'o') &&
5674: (NXT(8) == 'n') && (NXT(9) == 'e')) {
5675: SKIP(10);
1.81 daniel 5676: SKIP_BLANKS;
1.40 daniel 5677: if (CUR != '=') {
1.55 daniel 5678: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5679: ctxt->sax->error(ctxt->userData,
1.59 daniel 5680: "XML standalone declaration : expected '='\n");
1.123 daniel 5681: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 5682: ctxt->wellFormed = 0;
1.32 daniel 5683: return(standalone);
5684: }
1.40 daniel 5685: NEXT;
1.42 daniel 5686: SKIP_BLANKS;
1.40 daniel 5687: if (CUR == '\''){
5688: NEXT;
5689: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5690: standalone = 0;
1.40 daniel 5691: SKIP(2);
5692: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5693: (NXT(2) == 's')) {
1.29 daniel 5694: standalone = 1;
1.40 daniel 5695: SKIP(3);
1.29 daniel 5696: } else {
1.55 daniel 5697: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5698: ctxt->sax->error(ctxt->userData,
5699: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 5700: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 5701: ctxt->wellFormed = 0;
1.29 daniel 5702: }
1.55 daniel 5703: if (CUR != '\'') {
5704: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5705: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 5706: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 5707: ctxt->wellFormed = 0;
1.55 daniel 5708: } else
1.40 daniel 5709: NEXT;
5710: } else if (CUR == '"'){
5711: NEXT;
5712: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5713: standalone = 0;
1.40 daniel 5714: SKIP(2);
5715: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5716: (NXT(2) == 's')) {
1.29 daniel 5717: standalone = 1;
1.40 daniel 5718: SKIP(3);
1.29 daniel 5719: } else {
1.55 daniel 5720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5721: ctxt->sax->error(ctxt->userData,
1.59 daniel 5722: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 5723: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 5724: ctxt->wellFormed = 0;
1.29 daniel 5725: }
1.55 daniel 5726: if (CUR != '"') {
5727: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5728: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5729: ctxt->wellFormed = 0;
1.123 daniel 5730: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 5731: } else
1.40 daniel 5732: NEXT;
1.37 daniel 5733: } else {
1.55 daniel 5734: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5735: ctxt->sax->error(ctxt->userData,
5736: "Standalone value not found\n");
1.59 daniel 5737: ctxt->wellFormed = 0;
1.123 daniel 5738: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 5739: }
1.29 daniel 5740: }
5741: return(standalone);
5742: }
5743:
1.50 daniel 5744: /**
5745: * xmlParseXMLDecl:
5746: * @ctxt: an XML parser context
5747: *
5748: * parse an XML declaration header
1.29 daniel 5749: *
5750: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 5751: */
5752:
1.55 daniel 5753: void
5754: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5755: xmlChar *version;
1.1 veillard 5756:
5757: /*
1.19 daniel 5758: * We know that '<?xml' is here.
1.1 veillard 5759: */
1.40 daniel 5760: SKIP(5);
1.1 veillard 5761:
1.59 daniel 5762: if (!IS_BLANK(CUR)) {
5763: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5764: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 5765: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5766: ctxt->wellFormed = 0;
5767: }
1.42 daniel 5768: SKIP_BLANKS;
1.1 veillard 5769:
5770: /*
1.29 daniel 5771: * We should have the VersionInfo here.
1.1 veillard 5772: */
1.29 daniel 5773: version = xmlParseVersionInfo(ctxt);
5774: if (version == NULL)
1.45 daniel 5775: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 5776: ctxt->version = xmlStrdup(version);
1.119 daniel 5777: xmlFree(version);
1.29 daniel 5778:
5779: /*
5780: * We may have the encoding declaration
5781: */
1.59 daniel 5782: if (!IS_BLANK(CUR)) {
5783: if ((CUR == '?') && (NXT(1) == '>')) {
5784: SKIP(2);
5785: return;
5786: }
5787: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5788: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 5789: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5790: ctxt->wellFormed = 0;
5791: }
1.72 daniel 5792: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 5793:
5794: /*
1.29 daniel 5795: * We may have the standalone status.
1.1 veillard 5796: */
1.72 daniel 5797: if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
1.59 daniel 5798: if ((CUR == '?') && (NXT(1) == '>')) {
5799: SKIP(2);
5800: return;
5801: }
5802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5803: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5804: ctxt->wellFormed = 0;
1.123 daniel 5805: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5806: }
5807: SKIP_BLANKS;
1.72 daniel 5808: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 5809:
1.42 daniel 5810: SKIP_BLANKS;
1.40 daniel 5811: if ((CUR == '?') && (NXT(1) == '>')) {
5812: SKIP(2);
5813: } else if (CUR == '>') {
1.31 daniel 5814: /* Deprecated old WD ... */
1.55 daniel 5815: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5816: ctxt->sax->error(ctxt->userData,
5817: "XML declaration must end-up with '?>'\n");
1.59 daniel 5818: ctxt->wellFormed = 0;
1.123 daniel 5819: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 5820: NEXT;
1.29 daniel 5821: } else {
1.55 daniel 5822: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5823: ctxt->sax->error(ctxt->userData,
5824: "parsing XML declaration: '?>' expected\n");
1.59 daniel 5825: ctxt->wellFormed = 0;
1.123 daniel 5826: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 5827: MOVETO_ENDTAG(CUR_PTR);
5828: NEXT;
1.29 daniel 5829: }
1.1 veillard 5830: }
5831:
1.50 daniel 5832: /**
5833: * xmlParseMisc:
5834: * @ctxt: an XML parser context
5835: *
5836: * parse an XML Misc* optionnal field.
1.21 daniel 5837: *
1.22 daniel 5838: * [27] Misc ::= Comment | PI | S
1.1 veillard 5839: */
5840:
1.55 daniel 5841: void
5842: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 5843: while (((CUR == '<') && (NXT(1) == '?')) ||
5844: ((CUR == '<') && (NXT(1) == '!') &&
5845: (NXT(2) == '-') && (NXT(3) == '-')) ||
5846: IS_BLANK(CUR)) {
5847: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 5848: xmlParsePI(ctxt);
1.40 daniel 5849: } else if (IS_BLANK(CUR)) {
5850: NEXT;
1.1 veillard 5851: } else
1.114 daniel 5852: xmlParseComment(ctxt);
1.1 veillard 5853: }
5854: }
5855:
1.50 daniel 5856: /**
5857: * xmlParseDocument :
5858: * @ctxt: an XML parser context
5859: *
5860: * parse an XML document (and build a tree if using the standard SAX
5861: * interface).
1.21 daniel 5862: *
1.22 daniel 5863: * [1] document ::= prolog element Misc*
1.29 daniel 5864: *
5865: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 5866: *
1.68 daniel 5867: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 5868: * as a result of the parsing.
1.1 veillard 5869: */
5870:
1.55 daniel 5871: int
5872: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 5873: xmlDefaultSAXHandlerInit();
5874:
1.91 daniel 5875: GROW;
5876:
1.14 veillard 5877: /*
1.44 daniel 5878: * SAX: beginning of the document processing.
5879: */
1.72 daniel 5880: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 5881: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 5882:
5883: /*
1.117 daniel 5884: * TODO We should check for encoding here and plug-in some
5885: * conversion code !!!!
1.14 veillard 5886: */
1.1 veillard 5887:
5888: /*
5889: * Wipe out everything which is before the first '<'
5890: */
1.59 daniel 5891: if (IS_BLANK(CUR)) {
5892: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5893: ctxt->sax->error(ctxt->userData,
1.59 daniel 5894: "Extra spaces at the beginning of the document are not allowed\n");
1.123 daniel 5895: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.59 daniel 5896: ctxt->wellFormed = 0;
5897: SKIP_BLANKS;
5898: }
5899:
5900: if (CUR == 0) {
5901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5902: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 5903: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 5904: ctxt->wellFormed = 0;
5905: }
1.1 veillard 5906:
5907: /*
5908: * Check for the XMLDecl in the Prolog.
5909: */
1.91 daniel 5910: GROW;
1.40 daniel 5911: if ((CUR == '<') && (NXT(1) == '?') &&
5912: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5913: (NXT(4) == 'l')) {
1.19 daniel 5914: xmlParseXMLDecl(ctxt);
5915: /* SKIP_EOL(cur); */
1.42 daniel 5916: SKIP_BLANKS;
1.40 daniel 5917: } else if ((CUR == '<') && (NXT(1) == '?') &&
5918: (NXT(2) == 'X') && (NXT(3) == 'M') &&
5919: (NXT(4) == 'L')) {
1.19 daniel 5920: /*
5921: * The first drafts were using <?XML and the final W3C REC
5922: * now use <?xml ...
5923: */
1.16 daniel 5924: xmlParseXMLDecl(ctxt);
1.1 veillard 5925: /* SKIP_EOL(cur); */
1.42 daniel 5926: SKIP_BLANKS;
1.1 veillard 5927: } else {
1.72 daniel 5928: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 5929: }
1.72 daniel 5930: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 5931: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 5932:
5933: /*
5934: * The Misc part of the Prolog
5935: */
1.91 daniel 5936: GROW;
1.16 daniel 5937: xmlParseMisc(ctxt);
1.1 veillard 5938:
5939: /*
1.29 daniel 5940: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 5941: * (doctypedecl Misc*)?
5942: */
1.91 daniel 5943: GROW;
1.40 daniel 5944: if ((CUR == '<') && (NXT(1) == '!') &&
5945: (NXT(2) == 'D') && (NXT(3) == 'O') &&
5946: (NXT(4) == 'C') && (NXT(5) == 'T') &&
5947: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
5948: (NXT(8) == 'E')) {
1.22 daniel 5949: xmlParseDocTypeDecl(ctxt);
1.96 daniel 5950: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 5951: xmlParseMisc(ctxt);
1.21 daniel 5952: }
5953:
5954: /*
5955: * Time to start parsing the tree itself
1.1 veillard 5956: */
1.91 daniel 5957: GROW;
1.96 daniel 5958: ctxt->instate = XML_PARSER_CONTENT;
1.72 daniel 5959: xmlParseElement(ctxt);
1.96 daniel 5960: ctxt->instate = XML_PARSER_EPILOG;
1.33 daniel 5961:
5962: /*
5963: * The Misc part at the end
5964: */
5965: xmlParseMisc(ctxt);
1.16 daniel 5966:
1.59 daniel 5967: if (CUR != 0) {
5968: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5969: ctxt->sax->error(ctxt->userData,
1.59 daniel 5970: "Extra content at the end of the document\n");
5971: ctxt->wellFormed = 0;
1.123 daniel 5972: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.59 daniel 5973: }
1.96 daniel 5974: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 5975:
1.44 daniel 5976: /*
5977: * SAX: end of the document processing.
5978: */
1.72 daniel 5979: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 5980: ctxt->sax->endDocument(ctxt->userData);
1.59 daniel 5981: if (! ctxt->wellFormed) return(-1);
1.16 daniel 5982: return(0);
5983: }
5984:
1.98 daniel 5985: /************************************************************************
5986: * *
5987: * I/O front end functions to the parser *
5988: * *
5989: ************************************************************************/
5990:
1.50 daniel 5991: /**
1.86 daniel 5992: * xmlCreateDocParserCtxt :
1.123 daniel 5993: * @cur: a pointer to an array of xmlChar
1.50 daniel 5994: *
1.69 daniel 5995: * Create a parser context for an XML in-memory document.
5996: *
5997: * Returns the new parser context or NULL
1.16 daniel 5998: */
1.69 daniel 5999: xmlParserCtxtPtr
1.123 daniel 6000: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 6001: xmlParserCtxtPtr ctxt;
1.40 daniel 6002: xmlParserInputPtr input;
1.75 daniel 6003: xmlCharEncoding enc;
1.16 daniel 6004:
1.97 daniel 6005: ctxt = xmlNewParserCtxt();
1.16 daniel 6006: if (ctxt == NULL) {
6007: return(NULL);
6008: }
1.96 daniel 6009: input = xmlNewInputStream(ctxt);
1.40 daniel 6010: if (input == NULL) {
1.97 daniel 6011: xmlFreeParserCtxt(ctxt);
1.40 daniel 6012: return(NULL);
6013: }
6014:
1.75 daniel 6015: /*
6016: * plug some encoding conversion routines here. !!!
6017: */
6018: enc = xmlDetectCharEncoding(cur);
6019: xmlSwitchEncoding(ctxt, enc);
6020:
1.40 daniel 6021: input->base = cur;
6022: input->cur = cur;
6023:
6024: inputPush(ctxt, input);
1.69 daniel 6025: return(ctxt);
6026: }
6027:
6028: /**
6029: * xmlSAXParseDoc :
6030: * @sax: the SAX handler block
1.123 daniel 6031: * @cur: a pointer to an array of xmlChar
1.69 daniel 6032: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6033: * documents
6034: *
6035: * parse an XML in-memory document and build a tree.
6036: * It use the given SAX function block to handle the parsing callback.
6037: * If sax is NULL, fallback to the default DOM tree building routines.
6038: *
6039: * Returns the resulting document tree
6040: */
6041:
6042: xmlDocPtr
1.123 daniel 6043: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 6044: xmlDocPtr ret;
6045: xmlParserCtxtPtr ctxt;
6046:
6047: if (cur == NULL) return(NULL);
1.16 daniel 6048:
6049:
1.69 daniel 6050: ctxt = xmlCreateDocParserCtxt(cur);
6051: if (ctxt == NULL) return(NULL);
1.74 daniel 6052: if (sax != NULL) {
6053: ctxt->sax = sax;
6054: ctxt->userData = NULL;
6055: }
1.69 daniel 6056:
1.16 daniel 6057: xmlParseDocument(ctxt);
1.72 daniel 6058: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6059: else {
6060: ret = NULL;
1.72 daniel 6061: xmlFreeDoc(ctxt->myDoc);
6062: ctxt->myDoc = NULL;
1.59 daniel 6063: }
1.86 daniel 6064: if (sax != NULL)
6065: ctxt->sax = NULL;
1.69 daniel 6066: xmlFreeParserCtxt(ctxt);
1.16 daniel 6067:
1.1 veillard 6068: return(ret);
6069: }
6070:
1.50 daniel 6071: /**
1.55 daniel 6072: * xmlParseDoc :
1.123 daniel 6073: * @cur: a pointer to an array of xmlChar
1.55 daniel 6074: *
6075: * parse an XML in-memory document and build a tree.
6076: *
1.68 daniel 6077: * Returns the resulting document tree
1.55 daniel 6078: */
6079:
1.69 daniel 6080: xmlDocPtr
1.123 daniel 6081: xmlParseDoc(xmlChar *cur) {
1.59 daniel 6082: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 6083: }
6084:
6085: /**
6086: * xmlSAXParseDTD :
6087: * @sax: the SAX handler block
6088: * @ExternalID: a NAME* containing the External ID of the DTD
6089: * @SystemID: a NAME* containing the URL to the DTD
6090: *
6091: * Load and parse an external subset.
6092: *
6093: * Returns the resulting xmlDtdPtr or NULL in case of error.
6094: */
6095:
6096: xmlDtdPtr
1.123 daniel 6097: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
6098: const xmlChar *SystemID) {
1.76 daniel 6099: xmlDtdPtr ret = NULL;
6100: xmlParserCtxtPtr ctxt;
1.83 daniel 6101: xmlParserInputPtr input = NULL;
1.76 daniel 6102: xmlCharEncoding enc;
6103:
6104: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
6105:
1.97 daniel 6106: ctxt = xmlNewParserCtxt();
1.76 daniel 6107: if (ctxt == NULL) {
6108: return(NULL);
6109: }
6110:
6111: /*
6112: * Set-up the SAX context
6113: */
6114: if (ctxt == NULL) return(NULL);
6115: if (sax != NULL) {
1.93 veillard 6116: if (ctxt->sax != NULL)
1.119 daniel 6117: xmlFree(ctxt->sax);
1.76 daniel 6118: ctxt->sax = sax;
6119: ctxt->userData = NULL;
6120: }
6121:
6122: /*
6123: * Ask the Entity resolver to load the damn thing
6124: */
6125:
6126: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
6127: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
6128: if (input == NULL) {
1.86 daniel 6129: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 6130: xmlFreeParserCtxt(ctxt);
6131: return(NULL);
6132: }
6133:
6134: /*
6135: * plug some encoding conversion routines here. !!!
6136: */
6137: xmlPushInput(ctxt, input);
6138: enc = xmlDetectCharEncoding(ctxt->input->cur);
6139: xmlSwitchEncoding(ctxt, enc);
6140:
1.95 veillard 6141: if (input->filename == NULL)
1.116 daniel 6142: input->filename = (char *) xmlStrdup(SystemID); /* !!!!!!! */
1.76 daniel 6143: input->line = 1;
6144: input->col = 1;
6145: input->base = ctxt->input->cur;
6146: input->cur = ctxt->input->cur;
6147: input->free = NULL;
6148:
6149: /*
6150: * let's parse that entity knowing it's an external subset.
6151: */
1.79 daniel 6152: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 6153:
6154: if (ctxt->myDoc != NULL) {
6155: if (ctxt->wellFormed) {
6156: ret = ctxt->myDoc->intSubset;
6157: ctxt->myDoc->intSubset = NULL;
6158: } else {
6159: ret = NULL;
6160: }
6161: xmlFreeDoc(ctxt->myDoc);
6162: ctxt->myDoc = NULL;
6163: }
1.86 daniel 6164: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 6165: xmlFreeParserCtxt(ctxt);
6166:
6167: return(ret);
6168: }
6169:
6170: /**
6171: * xmlParseDTD :
6172: * @ExternalID: a NAME* containing the External ID of the DTD
6173: * @SystemID: a NAME* containing the URL to the DTD
6174: *
6175: * Load and parse an external subset.
6176: *
6177: * Returns the resulting xmlDtdPtr or NULL in case of error.
6178: */
6179:
6180: xmlDtdPtr
1.123 daniel 6181: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 6182: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 6183: }
6184:
6185: /**
6186: * xmlRecoverDoc :
1.123 daniel 6187: * @cur: a pointer to an array of xmlChar
1.59 daniel 6188: *
6189: * parse an XML in-memory document and build a tree.
6190: * In the case the document is not Well Formed, a tree is built anyway
6191: *
1.68 daniel 6192: * Returns the resulting document tree
1.59 daniel 6193: */
6194:
1.69 daniel 6195: xmlDocPtr
1.123 daniel 6196: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 6197: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 6198: }
6199:
6200: /**
1.69 daniel 6201: * xmlCreateFileParserCtxt :
1.50 daniel 6202: * @filename: the filename
6203: *
1.69 daniel 6204: * Create a parser context for a file content.
6205: * Automatic support for ZLIB/Compress compressed document is provided
6206: * by default if found at compile-time.
1.50 daniel 6207: *
1.69 daniel 6208: * Returns the new parser context or NULL
1.9 httpng 6209: */
1.69 daniel 6210: xmlParserCtxtPtr
6211: xmlCreateFileParserCtxt(const char *filename)
6212: {
6213: xmlParserCtxtPtr ctxt;
1.40 daniel 6214: xmlParserInputPtr inputStream;
1.91 daniel 6215: xmlParserInputBufferPtr buf;
1.111 daniel 6216: char *directory = NULL;
1.9 httpng 6217:
1.91 daniel 6218: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
6219: if (buf == NULL) return(NULL);
1.9 httpng 6220:
1.97 daniel 6221: ctxt = xmlNewParserCtxt();
1.16 daniel 6222: if (ctxt == NULL) {
6223: return(NULL);
6224: }
1.97 daniel 6225:
1.96 daniel 6226: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 6227: if (inputStream == NULL) {
1.97 daniel 6228: xmlFreeParserCtxt(ctxt);
1.40 daniel 6229: return(NULL);
6230: }
6231:
1.119 daniel 6232: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 6233: inputStream->buf = buf;
6234: inputStream->base = inputStream->buf->buffer->content;
6235: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 6236:
1.40 daniel 6237: inputPush(ctxt, inputStream);
1.110 daniel 6238: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 6239: directory = xmlParserGetDirectory(filename);
6240: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 6241: ctxt->directory = directory;
1.106 daniel 6242:
1.69 daniel 6243: return(ctxt);
6244: }
6245:
6246: /**
6247: * xmlSAXParseFile :
6248: * @sax: the SAX handler block
6249: * @filename: the filename
6250: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6251: * documents
6252: *
6253: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6254: * compressed document is provided by default if found at compile-time.
6255: * It use the given SAX function block to handle the parsing callback.
6256: * If sax is NULL, fallback to the default DOM tree building routines.
6257: *
6258: * Returns the resulting document tree
6259: */
6260:
1.79 daniel 6261: xmlDocPtr
6262: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 6263: int recovery) {
6264: xmlDocPtr ret;
6265: xmlParserCtxtPtr ctxt;
1.111 daniel 6266: char *directory = NULL;
1.69 daniel 6267:
6268: ctxt = xmlCreateFileParserCtxt(filename);
6269: if (ctxt == NULL) return(NULL);
1.74 daniel 6270: if (sax != NULL) {
1.93 veillard 6271: if (ctxt->sax != NULL)
1.119 daniel 6272: xmlFree(ctxt->sax);
1.74 daniel 6273: ctxt->sax = sax;
6274: ctxt->userData = NULL;
6275: }
1.106 daniel 6276:
1.110 daniel 6277: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 6278: directory = xmlParserGetDirectory(filename);
6279: if ((ctxt->directory == NULL) && (directory != NULL))
1.123 daniel 6280: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); /* !!!!!!! */
1.16 daniel 6281:
6282: xmlParseDocument(ctxt);
1.40 daniel 6283:
1.72 daniel 6284: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6285: else {
6286: ret = NULL;
1.72 daniel 6287: xmlFreeDoc(ctxt->myDoc);
6288: ctxt->myDoc = NULL;
1.59 daniel 6289: }
1.86 daniel 6290: if (sax != NULL)
6291: ctxt->sax = NULL;
1.69 daniel 6292: xmlFreeParserCtxt(ctxt);
1.20 daniel 6293:
6294: return(ret);
6295: }
6296:
1.55 daniel 6297: /**
6298: * xmlParseFile :
6299: * @filename: the filename
6300: *
6301: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6302: * compressed document is provided by default if found at compile-time.
6303: *
1.68 daniel 6304: * Returns the resulting document tree
1.55 daniel 6305: */
6306:
1.79 daniel 6307: xmlDocPtr
6308: xmlParseFile(const char *filename) {
1.59 daniel 6309: return(xmlSAXParseFile(NULL, filename, 0));
6310: }
6311:
6312: /**
6313: * xmlRecoverFile :
6314: * @filename: the filename
6315: *
6316: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6317: * compressed document is provided by default if found at compile-time.
6318: * In the case the document is not Well Formed, a tree is built anyway
6319: *
1.68 daniel 6320: * Returns the resulting document tree
1.59 daniel 6321: */
6322:
1.79 daniel 6323: xmlDocPtr
6324: xmlRecoverFile(const char *filename) {
1.59 daniel 6325: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 6326: }
1.32 daniel 6327:
1.50 daniel 6328: /**
1.69 daniel 6329: * xmlCreateMemoryParserCtxt :
1.68 daniel 6330: * @buffer: an pointer to a char array
1.50 daniel 6331: * @size: the siwe of the array
6332: *
1.69 daniel 6333: * Create a parser context for an XML in-memory document.
1.50 daniel 6334: *
1.69 daniel 6335: * Returns the new parser context or NULL
1.20 daniel 6336: */
1.69 daniel 6337: xmlParserCtxtPtr
6338: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 6339: xmlParserCtxtPtr ctxt;
1.40 daniel 6340: xmlParserInputPtr input;
1.75 daniel 6341: xmlCharEncoding enc;
1.40 daniel 6342:
6343: buffer[size - 1] = '\0';
6344:
1.97 daniel 6345: ctxt = xmlNewParserCtxt();
1.20 daniel 6346: if (ctxt == NULL) {
6347: return(NULL);
6348: }
1.97 daniel 6349:
1.96 daniel 6350: input = xmlNewInputStream(ctxt);
1.40 daniel 6351: if (input == NULL) {
1.97 daniel 6352: xmlFreeParserCtxt(ctxt);
1.40 daniel 6353: return(NULL);
6354: }
1.20 daniel 6355:
1.40 daniel 6356: input->filename = NULL;
6357: input->line = 1;
6358: input->col = 1;
1.96 daniel 6359: input->buf = NULL;
1.91 daniel 6360: input->consumed = 0;
1.45 daniel 6361:
6362: /*
1.75 daniel 6363: * plug some encoding conversion routines here. !!!
1.45 daniel 6364: */
1.116 daniel 6365: enc = xmlDetectCharEncoding(BAD_CAST buffer);
1.75 daniel 6366: xmlSwitchEncoding(ctxt, enc);
6367:
1.116 daniel 6368: input->base = BAD_CAST buffer;
6369: input->cur = BAD_CAST buffer;
1.69 daniel 6370: input->free = NULL;
1.20 daniel 6371:
1.40 daniel 6372: inputPush(ctxt, input);
1.69 daniel 6373: return(ctxt);
6374: }
6375:
6376: /**
6377: * xmlSAXParseMemory :
6378: * @sax: the SAX handler block
6379: * @buffer: an pointer to a char array
6380: * @size: the siwe of the array
6381: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6382: * documents
6383: *
6384: * parse an XML in-memory block and use the given SAX function block
6385: * to handle the parsing callback. If sax is NULL, fallback to the default
6386: * DOM tree building routines.
6387: *
6388: * Returns the resulting document tree
6389: */
6390: xmlDocPtr
6391: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
6392: xmlDocPtr ret;
6393: xmlParserCtxtPtr ctxt;
6394:
6395: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6396: if (ctxt == NULL) return(NULL);
1.74 daniel 6397: if (sax != NULL) {
6398: ctxt->sax = sax;
6399: ctxt->userData = NULL;
6400: }
1.20 daniel 6401:
6402: xmlParseDocument(ctxt);
1.40 daniel 6403:
1.72 daniel 6404: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6405: else {
6406: ret = NULL;
1.72 daniel 6407: xmlFreeDoc(ctxt->myDoc);
6408: ctxt->myDoc = NULL;
1.59 daniel 6409: }
1.86 daniel 6410: if (sax != NULL)
6411: ctxt->sax = NULL;
1.69 daniel 6412: xmlFreeParserCtxt(ctxt);
1.16 daniel 6413:
1.9 httpng 6414: return(ret);
1.17 daniel 6415: }
6416:
1.55 daniel 6417: /**
6418: * xmlParseMemory :
1.68 daniel 6419: * @buffer: an pointer to a char array
1.55 daniel 6420: * @size: the size of the array
6421: *
6422: * parse an XML in-memory block and build a tree.
6423: *
1.68 daniel 6424: * Returns the resulting document tree
1.55 daniel 6425: */
6426:
6427: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 6428: return(xmlSAXParseMemory(NULL, buffer, size, 0));
6429: }
6430:
6431: /**
6432: * xmlRecoverMemory :
1.68 daniel 6433: * @buffer: an pointer to a char array
1.59 daniel 6434: * @size: the size of the array
6435: *
6436: * parse an XML in-memory block and build a tree.
6437: * In the case the document is not Well Formed, a tree is built anyway
6438: *
1.68 daniel 6439: * Returns the resulting document tree
1.59 daniel 6440: */
6441:
6442: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
6443: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 6444: }
6445:
6446:
1.50 daniel 6447: /**
6448: * xmlSetupParserForBuffer:
6449: * @ctxt: an XML parser context
1.123 daniel 6450: * @buffer: a xmlChar * buffer
1.50 daniel 6451: * @filename: a file name
6452: *
1.19 daniel 6453: * Setup the parser context to parse a new buffer; Clears any prior
6454: * contents from the parser context. The buffer parameter must not be
6455: * NULL, but the filename parameter can be
6456: */
1.55 daniel 6457: void
1.123 daniel 6458: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 6459: const char* filename)
6460: {
1.96 daniel 6461: xmlParserInputPtr input;
1.40 daniel 6462:
1.96 daniel 6463: input = xmlNewInputStream(ctxt);
6464: if (input == NULL) {
6465: perror("malloc");
1.119 daniel 6466: xmlFree(ctxt);
1.96 daniel 6467: exit(1);
6468: }
6469:
6470: xmlClearParserCtxt(ctxt);
6471: if (filename != NULL)
1.119 daniel 6472: input->filename = xmlMemStrdup(filename);
1.96 daniel 6473: input->base = buffer;
6474: input->cur = buffer;
6475: inputPush(ctxt, input);
1.17 daniel 6476: }
6477:
1.123 daniel 6478: /**
6479: * xmlSAXUserParseFile:
6480: * @sax: a SAX handler
6481: * @user_data: The user data returned on SAX callbacks
6482: * @filename: a file name
6483: *
6484: * parse an XML file and call the given SAX handler routines.
6485: * Automatic support for ZLIB/Compress compressed document is provided
6486: *
6487: * Returns 0 in case of success or a error number otherwise
6488: */
6489: int xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
6490: const char *filename) {
6491: int ret = 0;
6492: xmlParserCtxtPtr ctxt;
6493:
6494: ctxt = xmlCreateFileParserCtxt(filename);
6495: if (ctxt == NULL) return -1;
6496: ctxt->sax = sax;
6497: ctxt->userData = user_data;
6498:
6499: xmlParseDocument(ctxt);
6500:
6501: if (ctxt->wellFormed)
6502: ret = 0;
6503: else {
6504: if (ctxt->errNo != 0)
6505: ret = ctxt->errNo;
6506: else
6507: ret = -1;
6508: }
6509: if (sax != NULL)
6510: ctxt->sax = NULL;
6511: xmlFreeParserCtxt(ctxt);
6512:
6513: return ret;
6514: }
6515:
6516: /**
6517: * xmlSAXUserParseMemory:
6518: * @sax: a SAX handler
6519: * @user_data: The user data returned on SAX callbacks
6520: * @buffer: an in-memory XML document input
6521: * @size: the lenght of the XML document in bytes
6522: *
6523: * A better SAX parsing routine.
6524: * parse an XML in-memory buffer and call the given SAX handler routines.
6525: *
6526: * Returns 0 in case of success or a error number otherwise
6527: */
6528: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
6529: char *buffer, int size) {
6530: int ret = 0;
6531: xmlParserCtxtPtr ctxt;
6532:
6533: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6534: if (ctxt == NULL) return -1;
6535: ctxt->sax = sax;
6536: ctxt->userData = user_data;
6537:
6538: xmlParseDocument(ctxt);
6539:
6540: if (ctxt->wellFormed)
6541: ret = 0;
6542: else {
6543: if (ctxt->errNo != 0)
6544: ret = ctxt->errNo;
6545: else
6546: ret = -1;
6547: }
6548: if (sax != NULL)
6549: ctxt->sax = NULL;
6550: xmlFreeParserCtxt(ctxt);
6551:
6552: return ret;
6553: }
6554:
1.32 daniel 6555:
1.98 daniel 6556: /************************************************************************
6557: * *
6558: * Miscelaneous *
6559: * *
6560: ************************************************************************/
6561:
6562:
1.50 daniel 6563: /**
6564: * xmlParserFindNodeInfo:
6565: * @ctxt: an XML parser context
6566: * @node: an XML node within the tree
6567: *
6568: * Find the parser node info struct for a given node
6569: *
1.68 daniel 6570: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 6571: */
6572: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
6573: const xmlNode* node)
6574: {
6575: unsigned long pos;
6576:
6577: /* Find position where node should be at */
6578: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
6579: if ( ctx->node_seq.buffer[pos].node == node )
6580: return &ctx->node_seq.buffer[pos];
6581: else
6582: return NULL;
6583: }
6584:
6585:
1.50 daniel 6586: /**
6587: * xmlInitNodeInfoSeq :
6588: * @seq: a node info sequence pointer
6589: *
6590: * -- Initialize (set to initial state) node info sequence
1.32 daniel 6591: */
1.55 daniel 6592: void
6593: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6594: {
6595: seq->length = 0;
6596: seq->maximum = 0;
6597: seq->buffer = NULL;
6598: }
6599:
1.50 daniel 6600: /**
6601: * xmlClearNodeInfoSeq :
6602: * @seq: a node info sequence pointer
6603: *
6604: * -- Clear (release memory and reinitialize) node
1.32 daniel 6605: * info sequence
6606: */
1.55 daniel 6607: void
6608: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6609: {
6610: if ( seq->buffer != NULL )
1.119 daniel 6611: xmlFree(seq->buffer);
1.32 daniel 6612: xmlInitNodeInfoSeq(seq);
6613: }
6614:
6615:
1.50 daniel 6616: /**
6617: * xmlParserFindNodeInfoIndex:
6618: * @seq: a node info sequence pointer
6619: * @node: an XML node pointer
6620: *
6621: *
1.32 daniel 6622: * xmlParserFindNodeInfoIndex : Find the index that the info record for
6623: * the given node is or should be at in a sorted sequence
1.68 daniel 6624: *
6625: * Returns a long indicating the position of the record
1.32 daniel 6626: */
6627: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
6628: const xmlNode* node)
6629: {
6630: unsigned long upper, lower, middle;
6631: int found = 0;
6632:
6633: /* Do a binary search for the key */
6634: lower = 1;
6635: upper = seq->length;
6636: middle = 0;
6637: while ( lower <= upper && !found) {
6638: middle = lower + (upper - lower) / 2;
6639: if ( node == seq->buffer[middle - 1].node )
6640: found = 1;
6641: else if ( node < seq->buffer[middle - 1].node )
6642: upper = middle - 1;
6643: else
6644: lower = middle + 1;
6645: }
6646:
6647: /* Return position */
6648: if ( middle == 0 || seq->buffer[middle - 1].node < node )
6649: return middle;
6650: else
6651: return middle - 1;
6652: }
6653:
6654:
1.50 daniel 6655: /**
6656: * xmlParserAddNodeInfo:
6657: * @ctxt: an XML parser context
1.68 daniel 6658: * @info: a node info sequence pointer
1.50 daniel 6659: *
6660: * Insert node info record into the sorted sequence
1.32 daniel 6661: */
1.55 daniel 6662: void
6663: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 6664: const xmlParserNodeInfo* info)
1.32 daniel 6665: {
6666: unsigned long pos;
6667: static unsigned int block_size = 5;
6668:
6669: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 6670: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
6671: if ( pos < ctxt->node_seq.length
6672: && ctxt->node_seq.buffer[pos].node == info->node ) {
6673: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 6674: }
6675:
6676: /* Otherwise, we need to add new node to buffer */
6677: else {
6678: /* Expand buffer by 5 if needed */
1.55 daniel 6679: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 6680: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 6681: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
6682: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 6683:
1.55 daniel 6684: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 6685: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 6686: else
1.119 daniel 6687: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 6688:
6689: if ( tmp_buffer == NULL ) {
1.55 daniel 6690: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6691: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 6692: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 6693: return;
6694: }
1.55 daniel 6695: ctxt->node_seq.buffer = tmp_buffer;
6696: ctxt->node_seq.maximum += block_size;
1.32 daniel 6697: }
6698:
6699: /* If position is not at end, move elements out of the way */
1.55 daniel 6700: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 6701: unsigned long i;
6702:
1.55 daniel 6703: for ( i = ctxt->node_seq.length; i > pos; i-- )
6704: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 6705: }
6706:
6707: /* Copy element and increase length */
1.55 daniel 6708: ctxt->node_seq.buffer[pos] = *info;
6709: ctxt->node_seq.length++;
1.32 daniel 6710: }
6711: }
1.77 daniel 6712:
1.98 daniel 6713:
6714: /**
6715: * xmlSubstituteEntitiesDefault :
6716: * @val: int 0 or 1
6717: *
6718: * Set and return the previous value for default entity support.
6719: * Initially the parser always keep entity references instead of substituting
6720: * entity values in the output. This function has to be used to change the
6721: * default parser behaviour
6722: * SAX::subtituteEntities() has to be used for changing that on a file by
6723: * file basis.
6724: *
6725: * Returns the last value for 0 for no substitution, 1 for substitution.
6726: */
6727:
6728: int
6729: xmlSubstituteEntitiesDefault(int val) {
6730: int old = xmlSubstituteEntitiesDefaultValue;
6731:
6732: xmlSubstituteEntitiesDefaultValue = val;
6733: return(old);
6734: }
1.77 daniel 6735:
Webmaster