Annotation of XML/parser.c, revision 1.123
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.121 daniel 13: #include "config.h"
1.26 daniel 14: #endif
1.121 daniel 15:
1.1 veillard 16: #include <stdio.h>
1.121 daniel 17: #include <string.h> /* for memset() only */
18: #ifdef HAVE_CTYPE_H
1.1 veillard 19: #include <ctype.h>
1.121 daniel 20: #endif
21: #ifdef HAVE_STDLIB_H
1.50 daniel 22: #include <stdlib.h>
1.121 daniel 23: #endif
24: #ifdef HAVE_SYS_STAT_H
1.9 httpng 25: #include <sys/stat.h>
1.121 daniel 26: #endif
1.9 httpng 27: #ifdef HAVE_FCNTL_H
28: #include <fcntl.h>
29: #endif
1.10 httpng 30: #ifdef HAVE_UNISTD_H
31: #include <unistd.h>
32: #endif
1.20 daniel 33: #ifdef HAVE_ZLIB_H
34: #include <zlib.h>
35: #endif
1.1 veillard 36:
1.119 daniel 37: #include "xmlmemory.h"
1.14 veillard 38: #include "tree.h"
1.1 veillard 39: #include "parser.h"
1.14 veillard 40: #include "entities.h"
1.75 daniel 41: #include "encoding.h"
1.61 daniel 42: #include "valid.h"
1.69 daniel 43: #include "parserInternals.h"
1.91 daniel 44: #include "xmlIO.h"
1.122 daniel 45: #include "xml-error.h"
1.1 veillard 46:
1.86 daniel 47: const char *xmlParserVersion = LIBXML_VERSION;
48:
1.91 daniel 49:
50: /************************************************************************
51: * *
52: * Input handling functions for progressive parsing *
53: * *
54: ************************************************************************/
55:
56: /* #define DEBUG_INPUT */
57:
1.110 daniel 58: #define INPUT_CHUNK 250
59: /* we need to keep enough input to show errors in context */
60: #define LINE_LEN 80
1.91 daniel 61:
62: #ifdef DEBUG_INPUT
63: #define CHECK_BUFFER(in) check_buffer(in)
64:
65: void check_buffer(xmlParserInputPtr in) {
66: if (in->base != in->buf->buffer->content) {
67: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
68: }
69: if (in->cur < in->base) {
70: fprintf(stderr, "xmlParserInput: cur < base problem\n");
71: }
72: if (in->cur > in->base + in->buf->buffer->use) {
73: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
74: }
75: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
76: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
77: in->buf->buffer->use, in->buf->buffer->size);
78: }
79:
1.110 daniel 80: #else
81: #define CHECK_BUFFER(in)
82: #endif
83:
1.91 daniel 84:
85: /**
86: * xmlParserInputRead:
87: * @in: an XML parser input
88: * @len: an indicative size for the lookahead
89: *
90: * This function refresh the input for the parser. It doesn't try to
91: * preserve pointers to the input buffer, and discard already read data
92: *
1.123 ! daniel 93: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 94: * end of this entity
95: */
96: int
97: xmlParserInputRead(xmlParserInputPtr in, int len) {
98: int ret;
99: int used;
100: int index;
101:
102: #ifdef DEBUG_INPUT
103: fprintf(stderr, "Read\n");
104: #endif
105: if (in->buf == NULL) return(-1);
106: if (in->base == NULL) return(-1);
107: if (in->cur == NULL) return(-1);
108: if (in->buf->buffer == NULL) return(-1);
109:
110: CHECK_BUFFER(in);
111:
112: used = in->cur - in->buf->buffer->content;
113: ret = xmlBufferShrink(in->buf->buffer, used);
114: if (ret > 0) {
115: in->cur -= ret;
116: in->consumed += ret;
117: }
118: ret = xmlParserInputBufferRead(in->buf, len);
119: if (in->base != in->buf->buffer->content) {
120: /*
121: * the buffer has been realloced
122: */
123: index = in->cur - in->base;
124: in->base = in->buf->buffer->content;
125: in->cur = &in->buf->buffer->content[index];
126: }
127:
128: CHECK_BUFFER(in);
129:
130: return(ret);
131: }
132:
133: /**
134: * xmlParserInputGrow:
135: * @in: an XML parser input
136: * @len: an indicative size for the lookahead
137: *
138: * This function increase the input for the parser. It tries to
139: * preserve pointers to the input buffer, and keep already read data
140: *
1.123 ! daniel 141: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 142: * end of this entity
143: */
144: int
145: xmlParserInputGrow(xmlParserInputPtr in, int len) {
146: int ret;
147: int index;
148:
149: #ifdef DEBUG_INPUT
150: fprintf(stderr, "Grow\n");
151: #endif
152: if (in->buf == NULL) return(-1);
153: if (in->base == NULL) return(-1);
154: if (in->cur == NULL) return(-1);
155: if (in->buf->buffer == NULL) return(-1);
156:
157: CHECK_BUFFER(in);
158:
159: index = in->cur - in->base;
160: if (in->buf->buffer->use > index + INPUT_CHUNK) {
161:
162: CHECK_BUFFER(in);
163:
164: return(0);
165: }
166: ret = xmlParserInputBufferGrow(in->buf, len);
167: if (in->base != in->buf->buffer->content) {
168: /*
169: * the buffer has been realloced
170: */
171: index = in->cur - in->base;
172: in->base = in->buf->buffer->content;
173: in->cur = &in->buf->buffer->content[index];
174: }
175:
176: CHECK_BUFFER(in);
177:
178: return(ret);
179: }
180:
181: /**
182: * xmlParserInputShrink:
183: * @in: an XML parser input
184: *
185: * This function removes used input for the parser.
186: */
187: void
188: xmlParserInputShrink(xmlParserInputPtr in) {
189: int used;
190: int ret;
191: int index;
192:
193: #ifdef DEBUG_INPUT
194: fprintf(stderr, "Shrink\n");
195: #endif
196: if (in->buf == NULL) return;
197: if (in->base == NULL) return;
198: if (in->cur == NULL) return;
199: if (in->buf->buffer == NULL) return;
200:
201: CHECK_BUFFER(in);
202:
203: used = in->cur - in->buf->buffer->content;
204: if (used > INPUT_CHUNK) {
1.110 daniel 205: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 206: if (ret > 0) {
207: in->cur -= ret;
208: in->consumed += ret;
209: }
210: }
211:
212: CHECK_BUFFER(in);
213:
214: if (in->buf->buffer->use > INPUT_CHUNK) {
215: return;
216: }
217: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
218: if (in->base != in->buf->buffer->content) {
219: /*
220: * the buffer has been realloced
221: */
222: index = in->cur - in->base;
223: in->base = in->buf->buffer->content;
224: in->cur = &in->buf->buffer->content[index];
225: }
226:
227: CHECK_BUFFER(in);
228: }
229:
1.45 daniel 230: /************************************************************************
231: * *
232: * Parser stacks related functions and macros *
233: * *
234: ************************************************************************/
1.79 daniel 235:
236: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 237: int xmlDoValidityCheckingDefaultValue = 0;
1.79 daniel 238:
1.1 veillard 239: /*
1.40 daniel 240: * Generic function for accessing stacks in the Parser Context
1.1 veillard 241: */
242:
1.31 daniel 243: #define PUSH_AND_POP(type, name) \
1.72 daniel 244: extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 245: if (ctxt->name##Nr >= ctxt->name##Max) { \
246: ctxt->name##Max *= 2; \
1.119 daniel 247: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 248: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
249: if (ctxt->name##Tab == NULL) { \
1.31 daniel 250: fprintf(stderr, "realloc failed !\n"); \
251: exit(1); \
252: } \
253: } \
1.40 daniel 254: ctxt->name##Tab[ctxt->name##Nr] = value; \
255: ctxt->name = value; \
256: return(ctxt->name##Nr++); \
1.31 daniel 257: } \
1.72 daniel 258: extern type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 259: type ret; \
1.40 daniel 260: if (ctxt->name##Nr <= 0) return(0); \
261: ctxt->name##Nr--; \
1.50 daniel 262: if (ctxt->name##Nr > 0) \
263: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
264: else \
265: ctxt->name = NULL; \
1.69 daniel 266: ret = ctxt->name##Tab[ctxt->name##Nr]; \
267: ctxt->name##Tab[ctxt->name##Nr] = 0; \
268: return(ret); \
1.31 daniel 269: } \
270:
1.40 daniel 271: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 272: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 273:
1.55 daniel 274: /*
275: * Macros for accessing the content. Those should be used only by the parser,
276: * and not exported.
277: *
278: * Dirty macros, i.e. one need to make assumption on the context to use them
279: *
1.123 ! daniel 280: * CUR_PTR return the current pointer to the xmlChar to be parsed.
! 281: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.55 daniel 282: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
283: * in UNICODE mode. This should be used internally by the parser
284: * only to compare to ASCII values otherwise it would break when
285: * running with UTF-8 encoding.
1.123 ! daniel 286: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 287: * to compare on ASCII based substring.
1.123 ! daniel 288: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 289: * strings within the parser.
290: *
1.77 daniel 291: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 292: *
293: * CURRENT Returns the current char value, with the full decoding of
294: * UTF-8 if we are using this mode. It returns an int.
295: * NEXT Skip to the next character, this does the proper decoding
296: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 297: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.55 daniel 298: */
1.45 daniel 299:
1.97 daniel 300: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 301: #define SKIP(val) ctxt->input->cur += (val)
302: #define NXT(val) ctxt->input->cur[(val)]
303: #define CUR_PTR ctxt->input->cur
1.97 daniel 304: #define SHRINK xmlParserInputShrink(ctxt->input); \
305: if ((*ctxt->input->cur == 0) && \
306: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
307: xmlPopInput(ctxt)
308:
309: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
310: if ((*ctxt->input->cur == 0) && \
311: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
312: xmlPopInput(ctxt)
1.55 daniel 313:
314: #define SKIP_BLANKS \
1.101 daniel 315: do { \
316: while (IS_BLANK(CUR)) NEXT; \
317: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
318: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
319: } while (IS_BLANK(CUR));
1.55 daniel 320:
321: #define CURRENT (*ctxt->input->cur)
1.91 daniel 322: #define NEXT { \
1.97 daniel 323: if (ctxt->token != 0) ctxt->token = 0; \
324: else { \
1.91 daniel 325: if ((*ctxt->input->cur == 0) && \
326: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
327: xmlPopInput(ctxt); \
328: } else { \
329: if (*(ctxt->input->cur) == '\n') { \
330: ctxt->input->line++; ctxt->input->col = 1; \
331: } else ctxt->input->col++; \
332: ctxt->input->cur++; \
333: if (*ctxt->input->cur == 0) \
334: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.96 daniel 335: } \
336: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
337: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
1.97 daniel 338: }}
1.91 daniel 339:
1.42 daniel 340:
1.97 daniel 341: /************************************************************************
342: * *
343: * Commodity functions to handle entities processing *
344: * *
345: ************************************************************************/
1.40 daniel 346:
1.50 daniel 347: /**
348: * xmlPopInput:
349: * @ctxt: an XML parser context
350: *
1.40 daniel 351: * xmlPopInput: the current input pointed by ctxt->input came to an end
352: * pop it and return the next char.
1.45 daniel 353: *
1.123 ! daniel 354: * Returns the current xmlChar in the parser context
1.40 daniel 355: */
1.123 ! daniel 356: xmlChar
1.55 daniel 357: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 358: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 359: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 360: if ((*ctxt->input->cur == 0) &&
361: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
362: return(xmlPopInput(ctxt));
1.40 daniel 363: return(CUR);
364: }
365:
1.50 daniel 366: /**
367: * xmlPushInput:
368: * @ctxt: an XML parser context
369: * @input: an XML parser input fragment (entity, XML fragment ...).
370: *
1.40 daniel 371: * xmlPushInput: switch to a new input stream which is stacked on top
372: * of the previous one(s).
373: */
1.55 daniel 374: void
375: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 376: if (input == NULL) return;
377: inputPush(ctxt, input);
378: }
379:
1.50 daniel 380: /**
1.69 daniel 381: * xmlFreeInputStream:
1.101 daniel 382: * @input: an xmlP arserInputPtr
1.69 daniel 383: *
384: * Free up an input stream.
385: */
386: void
387: xmlFreeInputStream(xmlParserInputPtr input) {
388: if (input == NULL) return;
389:
1.119 daniel 390: if (input->filename != NULL) xmlFree((char *) input->filename);
391: if (input->directory != NULL) xmlFree((char *) input->directory);
1.69 daniel 392: if ((input->free != NULL) && (input->base != NULL))
1.123 ! daniel 393: input->free((xmlChar *) input->base);
1.93 veillard 394: if (input->buf != NULL)
395: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 396: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 397: xmlFree(input);
1.69 daniel 398: }
399:
400: /**
1.96 daniel 401: * xmlNewInputStream:
402: * @ctxt: an XML parser context
403: *
404: * Create a new input stream structure
405: * Returns the new input stream or NULL
406: */
407: xmlParserInputPtr
408: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
409: xmlParserInputPtr input;
410:
1.119 daniel 411: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 412: if (input == NULL) {
1.123 ! daniel 413: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 414: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 415: ctxt->sax->error(ctxt->userData,
416: "malloc: couldn't allocate a new input stream\n");
1.123 ! daniel 417: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 418: return(NULL);
419: }
420: input->filename = NULL;
421: input->directory = NULL;
422: input->base = NULL;
423: input->cur = NULL;
424: input->buf = NULL;
425: input->line = 1;
426: input->col = 1;
427: input->buf = NULL;
428: input->free = NULL;
429: input->consumed = 0;
430: return(input);
431: }
432:
433: /**
1.50 daniel 434: * xmlNewEntityInputStream:
435: * @ctxt: an XML parser context
436: * @entity: an Entity pointer
437: *
1.82 daniel 438: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 439: *
440: * Returns the new input stream or NULL
1.45 daniel 441: */
1.50 daniel 442: xmlParserInputPtr
443: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 444: xmlParserInputPtr input;
445:
446: if (entity == NULL) {
1.123 ! daniel 447: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 448: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 449: ctxt->sax->error(ctxt->userData,
1.45 daniel 450: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 ! daniel 451: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 452: return(NULL);
1.45 daniel 453: }
454: if (entity->content == NULL) {
1.113 daniel 455: switch (entity->type) {
456: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 ! daniel 457: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
459: ctxt->sax->error(ctxt->userData,
460: "xmlNewEntityInputStream unparsed entity !\n");
461: break;
462: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
463: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 464: return(xmlLoadExternalEntity((char *) entity->SystemID,
465: (char *) entity->ExternalID, ctxt->input));
1.113 daniel 466: case XML_INTERNAL_GENERAL_ENTITY:
467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
468: ctxt->sax->error(ctxt->userData,
469: "Internal entity %s without content !\n", entity->name);
470: break;
471: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 ! daniel 472: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 473: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
474: ctxt->sax->error(ctxt->userData,
475: "Internal parameter entity %s without content !\n", entity->name);
476: break;
477: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 ! daniel 478: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
480: ctxt->sax->error(ctxt->userData,
481: "Predefined entity %s without content !\n", entity->name);
482: break;
483: }
1.50 daniel 484: return(NULL);
1.45 daniel 485: }
1.96 daniel 486: input = xmlNewInputStream(ctxt);
1.45 daniel 487: if (input == NULL) {
1.50 daniel 488: return(NULL);
1.45 daniel 489: }
1.123 ! daniel 490: input->filename = (char *) entity->SystemID; /* TODO !!! char <- xmlChar */
1.45 daniel 491: input->base = entity->content;
492: input->cur = entity->content;
1.50 daniel 493: return(input);
1.45 daniel 494: }
495:
1.59 daniel 496: /**
497: * xmlNewStringInputStream:
498: * @ctxt: an XML parser context
1.96 daniel 499: * @buffer: an memory buffer
1.59 daniel 500: *
501: * Create a new input stream based on a memory buffer.
1.68 daniel 502: * Returns the new input stream
1.59 daniel 503: */
504: xmlParserInputPtr
1.123 ! daniel 505: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 506: xmlParserInputPtr input;
507:
1.96 daniel 508: if (buffer == NULL) {
1.123 ! daniel 509: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 510: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 511: ctxt->sax->error(ctxt->userData,
1.59 daniel 512: "internal: xmlNewStringInputStream string = NULL\n");
513: return(NULL);
514: }
1.96 daniel 515: input = xmlNewInputStream(ctxt);
1.59 daniel 516: if (input == NULL) {
517: return(NULL);
518: }
1.96 daniel 519: input->base = buffer;
520: input->cur = buffer;
1.59 daniel 521: return(input);
522: }
523:
1.76 daniel 524: /**
525: * xmlNewInputFromFile:
526: * @ctxt: an XML parser context
527: * @filename: the filename to use as entity
528: *
529: * Create a new input stream based on a file.
530: *
531: * Returns the new input stream or NULL in case of error
532: */
533: xmlParserInputPtr
1.79 daniel 534: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 535: xmlParserInputBufferPtr buf;
1.76 daniel 536: xmlParserInputPtr inputStream;
1.111 daniel 537: char *directory = NULL;
1.76 daniel 538:
1.96 daniel 539: if (ctxt == NULL) return(NULL);
1.91 daniel 540: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 541: if (buf == NULL) {
1.106 daniel 542: char name[1024];
543:
1.94 daniel 544: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
545: #ifdef WIN32
546: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
547: #else
548: sprintf(name, "%s/%s", ctxt->input->directory, filename);
549: #endif
550: buf = xmlParserInputBufferCreateFilename(name,
551: XML_CHAR_ENCODING_NONE);
1.106 daniel 552: if (buf != NULL)
1.119 daniel 553: directory = xmlMemStrdup(ctxt->input->directory);
1.106 daniel 554: }
555: if ((buf == NULL) && (ctxt->directory != NULL)) {
556: #ifdef WIN32
557: sprintf(name, "%s\\%s", ctxt->directory, filename);
558: #else
559: sprintf(name, "%s/%s", ctxt->directory, filename);
560: #endif
561: buf = xmlParserInputBufferCreateFilename(name,
562: XML_CHAR_ENCODING_NONE);
563: if (buf != NULL)
1.119 daniel 564: directory = xmlMemStrdup(ctxt->directory);
1.106 daniel 565: }
566: if (buf == NULL)
1.94 daniel 567: return(NULL);
568: }
569: if (directory == NULL)
570: directory = xmlParserGetDirectory(filename);
1.76 daniel 571:
1.96 daniel 572: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 573: if (inputStream == NULL) {
1.119 daniel 574: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 575: return(NULL);
576: }
577:
1.119 daniel 578: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 579: inputStream->directory = directory;
1.91 daniel 580: inputStream->buf = buf;
1.76 daniel 581:
1.91 daniel 582: inputStream->base = inputStream->buf->buffer->content;
583: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 584: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 585: ctxt->directory = directory;
1.76 daniel 586: return(inputStream);
587: }
588:
1.77 daniel 589: /************************************************************************
590: * *
1.97 daniel 591: * Commodity functions to handle parser contexts *
592: * *
593: ************************************************************************/
594:
595: /**
596: * xmlInitParserCtxt:
597: * @ctxt: an XML parser context
598: *
599: * Initialize a parser context
600: */
601:
602: void
603: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
604: {
605: xmlSAXHandler *sax;
606:
1.119 daniel 607: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 608: if (sax == NULL) {
609: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
610: }
611:
612: /* Allocate the Input stack */
1.119 daniel 613: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 614: ctxt->inputNr = 0;
615: ctxt->inputMax = 5;
616: ctxt->input = NULL;
617: ctxt->version = NULL;
618: ctxt->encoding = NULL;
619: ctxt->standalone = -1;
1.98 daniel 620: ctxt->hasExternalSubset = 0;
621: ctxt->hasPErefs = 0;
1.97 daniel 622: ctxt->html = 0;
1.98 daniel 623: ctxt->external = 0;
1.97 daniel 624: ctxt->instate = XML_PARSER_PROLOG;
625: ctxt->token = 0;
1.106 daniel 626: ctxt->directory = NULL;
1.97 daniel 627:
628: /* Allocate the Node stack */
1.119 daniel 629: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 630: ctxt->nodeNr = 0;
631: ctxt->nodeMax = 10;
632: ctxt->node = NULL;
633:
634: if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
635: else {
636: ctxt->sax = sax;
637: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
638: }
639: ctxt->userData = ctxt;
640: ctxt->myDoc = NULL;
641: ctxt->wellFormed = 1;
1.99 daniel 642: ctxt->valid = 1;
1.100 daniel 643: ctxt->validate = xmlDoValidityCheckingDefaultValue;
644: ctxt->vctxt.userData = ctxt;
645: ctxt->vctxt.error = xmlParserValidityError;
646: ctxt->vctxt.warning = xmlParserValidityWarning;
1.97 daniel 647: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
648: ctxt->record_info = 0;
649: xmlInitNodeInfoSeq(&ctxt->node_seq);
650: }
651:
652: /**
653: * xmlFreeParserCtxt:
654: * @ctxt: an XML parser context
655: *
656: * Free all the memory used by a parser context. However the parsed
657: * document in ctxt->myDoc is not freed.
658: */
659:
660: void
661: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
662: {
663: xmlParserInputPtr input;
664:
665: if (ctxt == NULL) return;
666:
667: while ((input = inputPop(ctxt)) != NULL) {
668: xmlFreeInputStream(input);
669: }
670:
1.119 daniel 671: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
672: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
673: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
674: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.97 daniel 675: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 676: xmlFree(ctxt->sax);
677: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
678: xmlFree(ctxt);
1.97 daniel 679: }
680:
681: /**
682: * xmlNewParserCtxt:
683: *
684: * Allocate and initialize a new parser context.
685: *
686: * Returns the xmlParserCtxtPtr or NULL
687: */
688:
689: xmlParserCtxtPtr
690: xmlNewParserCtxt()
691: {
692: xmlParserCtxtPtr ctxt;
693:
1.119 daniel 694: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 695: if (ctxt == NULL) {
696: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
697: perror("malloc");
698: return(NULL);
699: }
700: xmlInitParserCtxt(ctxt);
701: return(ctxt);
702: }
703:
704: /**
705: * xmlClearParserCtxt:
706: * @ctxt: an XML parser context
707: *
708: * Clear (release owned resources) and reinitialize a parser context
709: */
710:
711: void
712: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
713: {
714: xmlClearNodeInfoSeq(&ctxt->node_seq);
715: xmlInitParserCtxt(ctxt);
716: }
717:
718: /************************************************************************
719: * *
1.77 daniel 720: * Commodity functions to handle entities *
721: * *
722: ************************************************************************/
723:
1.97 daniel 724: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
725: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
726:
727: /**
728: * xmlParseCharRef:
729: * @ctxt: an XML parser context
730: *
731: * parse Reference declarations
732: *
733: * [66] CharRef ::= '&#' [0-9]+ ';' |
734: * '&#x' [0-9a-fA-F]+ ';'
735: *
1.98 daniel 736: * [ WFC: Legal Character ]
737: * Characters referred to using character references must match the
738: * production for Char.
739: *
1.97 daniel 740: * Returns the value parsed (as an int)
1.77 daniel 741: */
1.97 daniel 742: int
743: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
744: int val = 0;
745:
1.111 daniel 746: if (ctxt->token != 0) {
747: val = ctxt->token;
748: ctxt->token = 0;
749: return(val);
750: }
1.97 daniel 751: if ((CUR == '&') && (NXT(1) == '#') &&
752: (NXT(2) == 'x')) {
753: SKIP(3);
754: while (CUR != ';') {
755: if ((CUR >= '0') && (CUR <= '9'))
756: val = val * 16 + (CUR - '0');
757: else if ((CUR >= 'a') && (CUR <= 'f'))
758: val = val * 16 + (CUR - 'a') + 10;
759: else if ((CUR >= 'A') && (CUR <= 'F'))
760: val = val * 16 + (CUR - 'A') + 10;
761: else {
1.123 ! daniel 762: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 763: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
764: ctxt->sax->error(ctxt->userData,
765: "xmlParseCharRef: invalid hexadecimal value\n");
766: ctxt->wellFormed = 0;
767: val = 0;
768: break;
769: }
770: NEXT;
771: }
772: if (CUR == ';')
773: NEXT;
774: } else if ((CUR == '&') && (NXT(1) == '#')) {
775: SKIP(2);
776: while (CUR != ';') {
777: if ((CUR >= '0') && (CUR <= '9'))
778: val = val * 10 + (CUR - '0');
779: else {
1.123 ! daniel 780: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
782: ctxt->sax->error(ctxt->userData,
783: "xmlParseCharRef: invalid decimal value\n");
784: ctxt->wellFormed = 0;
785: val = 0;
786: break;
787: }
788: NEXT;
789: }
790: if (CUR == ';')
791: NEXT;
792: } else {
1.123 ! daniel 793: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 795: ctxt->sax->error(ctxt->userData,
796: "xmlParseCharRef: invalid value\n");
1.97 daniel 797: ctxt->wellFormed = 0;
798: }
1.98 daniel 799:
1.97 daniel 800: /*
1.98 daniel 801: * [ WFC: Legal Character ]
802: * Characters referred to using character references must match the
803: * production for Char.
1.97 daniel 804: */
805: if (IS_CHAR(val)) {
806: return(val);
807: } else {
1.123 ! daniel 808: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 809: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 ! daniel 810: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 811: val);
812: ctxt->wellFormed = 0;
813: }
814: return(0);
1.77 daniel 815: }
816:
1.96 daniel 817: /**
818: * xmlParserHandleReference:
819: * @ctxt: the parser context
820: *
1.97 daniel 821: * [67] Reference ::= EntityRef | CharRef
822: *
1.96 daniel 823: * [68] EntityRef ::= '&' Name ';'
824: *
1.98 daniel 825: * [ WFC: Entity Declared ]
826: * the Name given in the entity reference must match that in an entity
827: * declaration, except that well-formed documents need not declare any
828: * of the following entities: amp, lt, gt, apos, quot.
829: *
830: * [ WFC: Parsed Entity ]
831: * An entity reference must not contain the name of an unparsed entity
832: *
1.97 daniel 833: * [66] CharRef ::= '&#' [0-9]+ ';' |
834: * '&#x' [0-9a-fA-F]+ ';'
835: *
1.96 daniel 836: * A PEReference may have been detectect in the current input stream
837: * the handling is done accordingly to
838: * http://www.w3.org/TR/REC-xml#entproc
839: */
840: void
841: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 842: xmlParserInputPtr input;
1.123 ! daniel 843: xmlChar *name;
1.97 daniel 844: xmlEntityPtr ent = NULL;
845:
1.111 daniel 846: if (ctxt->token != 0) return;
1.97 daniel 847: if (CUR != '&') return;
848: GROW;
849: if ((CUR == '&') && (NXT(1) == '#')) {
850: switch(ctxt->instate) {
1.109 daniel 851: case XML_PARSER_CDATA_SECTION:
852: return;
1.97 daniel 853: case XML_PARSER_COMMENT:
854: return;
855: case XML_PARSER_EOF:
1.123 ! daniel 856: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 857: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
858: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
859: ctxt->wellFormed = 0;
860: return;
861: case XML_PARSER_PROLOG:
1.123 ! daniel 862: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 863: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
864: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
865: ctxt->wellFormed = 0;
866: return;
867: case XML_PARSER_EPILOG:
1.123 ! daniel 868: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 869: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
870: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
871: ctxt->wellFormed = 0;
872: return;
873: case XML_PARSER_DTD:
1.123 ! daniel 874: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 875: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
876: ctxt->sax->error(ctxt->userData,
877: "CharRef are forbiden in DTDs!\n");
878: ctxt->wellFormed = 0;
879: return;
880: case XML_PARSER_ENTITY_DECL:
881: /* we just ignore it there */
882: return;
883: case XML_PARSER_ENTITY_VALUE:
884: /*
885: * NOTE: in the case of entity values, we don't do the
886: * substitution here since we need the litteral
887: * entity value to be able to save the internal
888: * subset of the document.
889: * This will be handled by xmlDecodeEntities
890: */
891: return;
892: case XML_PARSER_CONTENT:
893: case XML_PARSER_ATTRIBUTE_VALUE:
1.116 daniel 894: /* !!! this may not be Ok for UTF-8, multibyte sequence */
1.97 daniel 895: ctxt->token = xmlParseCharRef(ctxt);
896: return;
897: }
898: return;
899: }
900:
901: switch(ctxt->instate) {
1.109 daniel 902: case XML_PARSER_CDATA_SECTION:
903: return;
1.97 daniel 904: case XML_PARSER_COMMENT:
905: return;
906: case XML_PARSER_EOF:
1.123 ! daniel 907: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 908: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
909: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
910: ctxt->wellFormed = 0;
911: return;
912: case XML_PARSER_PROLOG:
1.123 ! daniel 913: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 914: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
915: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
916: ctxt->wellFormed = 0;
917: return;
918: case XML_PARSER_EPILOG:
1.123 ! daniel 919: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 920: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
921: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
922: ctxt->wellFormed = 0;
923: return;
924: case XML_PARSER_ENTITY_VALUE:
925: /*
926: * NOTE: in the case of entity values, we don't do the
927: * substitution here since we need the litteral
928: * entity value to be able to save the internal
929: * subset of the document.
930: * This will be handled by xmlDecodeEntities
931: */
932: return;
933: case XML_PARSER_ATTRIBUTE_VALUE:
934: /*
935: * NOTE: in the case of attributes values, we don't do the
936: * substitution here unless we are in a mode where
937: * the parser is explicitely asked to substitute
938: * entities. The SAX callback is called with values
939: * without entity substitution.
940: * This will then be handled by xmlDecodeEntities
941: */
1.113 daniel 942: return;
1.97 daniel 943: case XML_PARSER_ENTITY_DECL:
944: /*
945: * we just ignore it there
946: * the substitution will be done once the entity is referenced
947: */
948: return;
949: case XML_PARSER_DTD:
1.123 ! daniel 950: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 951: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
952: ctxt->sax->error(ctxt->userData,
953: "Entity references are forbiden in DTDs!\n");
954: ctxt->wellFormed = 0;
955: return;
956: case XML_PARSER_CONTENT:
1.113 daniel 957: return;
1.97 daniel 958: }
959:
960: NEXT;
961: name = xmlScanName(ctxt);
962: if (name == NULL) {
1.123 ! daniel 963: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 964: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
965: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
966: ctxt->wellFormed = 0;
967: ctxt->token = '&';
968: return;
969: }
970: if (NXT(xmlStrlen(name)) != ';') {
1.123 ! daniel 971: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
973: ctxt->sax->error(ctxt->userData,
974: "Entity reference: ';' expected\n");
975: ctxt->wellFormed = 0;
976: ctxt->token = '&';
1.119 daniel 977: xmlFree(name);
1.97 daniel 978: return;
979: }
980: SKIP(xmlStrlen(name) + 1);
981: if (ctxt->sax != NULL) {
982: if (ctxt->sax->getEntity != NULL)
983: ent = ctxt->sax->getEntity(ctxt->userData, name);
984: }
1.98 daniel 985:
986: /*
987: * [ WFC: Entity Declared ]
988: * the Name given in the entity reference must match that in an entity
989: * declaration, except that well-formed documents need not declare any
990: * of the following entities: amp, lt, gt, apos, quot.
991: */
1.97 daniel 992: if (ent == NULL)
993: ent = xmlGetPredefinedEntity(name);
994: if (ent == NULL) {
1.123 ! daniel 995: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
997: ctxt->sax->error(ctxt->userData,
1.98 daniel 998: "Entity reference: entity %s not declared\n",
999: name);
1.97 daniel 1000: ctxt->wellFormed = 0;
1.119 daniel 1001: xmlFree(name);
1.97 daniel 1002: return;
1003: }
1.98 daniel 1004:
1005: /*
1006: * [ WFC: Parsed Entity ]
1007: * An entity reference must not contain the name of an unparsed entity
1008: */
1009: if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 ! daniel 1010: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1011: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1012: ctxt->sax->error(ctxt->userData,
1013: "Entity reference to unparsed entity %s\n", name);
1014: ctxt->wellFormed = 0;
1015: }
1016:
1.97 daniel 1017: if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
1018: ctxt->token = ent->content[0];
1.119 daniel 1019: xmlFree(name);
1.97 daniel 1020: return;
1021: }
1022: input = xmlNewEntityInputStream(ctxt, ent);
1023: xmlPushInput(ctxt, input);
1.119 daniel 1024: xmlFree(name);
1.96 daniel 1025: return;
1026: }
1027:
1028: /**
1029: * xmlParserHandlePEReference:
1030: * @ctxt: the parser context
1031: *
1032: * [69] PEReference ::= '%' Name ';'
1033: *
1.98 daniel 1034: * [ WFC: No Recursion ]
1035: * TODO A parsed entity must not contain a recursive
1036: * reference to itself, either directly or indirectly.
1037: *
1038: * [ WFC: Entity Declared ]
1039: * In a document without any DTD, a document with only an internal DTD
1040: * subset which contains no parameter entity references, or a document
1041: * with "standalone='yes'", ... ... The declaration of a parameter
1042: * entity must precede any reference to it...
1043: *
1044: * [ VC: Entity Declared ]
1045: * In a document with an external subset or external parameter entities
1046: * with "standalone='no'", ... ... The declaration of a parameter entity
1047: * must precede any reference to it...
1048: *
1049: * [ WFC: In DTD ]
1050: * Parameter-entity references may only appear in the DTD.
1051: * NOTE: misleading but this is handled.
1052: *
1053: * A PEReference may have been detected in the current input stream
1.96 daniel 1054: * the handling is done accordingly to
1055: * http://www.w3.org/TR/REC-xml#entproc
1056: * i.e.
1057: * - Included in literal in entity values
1058: * - Included as Paraemeter Entity reference within DTDs
1059: */
1060: void
1061: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 1062: xmlChar *name;
1.96 daniel 1063: xmlEntityPtr entity = NULL;
1064: xmlParserInputPtr input;
1065:
1.111 daniel 1066: if (ctxt->token != 0) return;
1067: if (CUR != '%') return;
1.96 daniel 1068: switch(ctxt->instate) {
1.109 daniel 1069: case XML_PARSER_CDATA_SECTION:
1070: return;
1.97 daniel 1071: case XML_PARSER_COMMENT:
1072: return;
1.96 daniel 1073: case XML_PARSER_EOF:
1.123 ! daniel 1074: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1075: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1076: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1077: ctxt->wellFormed = 0;
1078: return;
1079: case XML_PARSER_PROLOG:
1.123 ! daniel 1080: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1081: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1082: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1083: ctxt->wellFormed = 0;
1084: return;
1.97 daniel 1085: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1086: case XML_PARSER_CONTENT:
1087: case XML_PARSER_ATTRIBUTE_VALUE:
1088: /* we just ignore it there */
1089: return;
1090: case XML_PARSER_EPILOG:
1.123 ! daniel 1091: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1092: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1093: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1094: ctxt->wellFormed = 0;
1095: return;
1.97 daniel 1096: case XML_PARSER_ENTITY_VALUE:
1097: /*
1098: * NOTE: in the case of entity values, we don't do the
1099: * substitution here since we need the litteral
1100: * entity value to be able to save the internal
1101: * subset of the document.
1102: * This will be handled by xmlDecodeEntities
1103: */
1104: return;
1.96 daniel 1105: case XML_PARSER_DTD:
1.98 daniel 1106: /*
1107: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1108: * In the internal DTD subset, parameter-entity references
1109: * can occur only where markup declarations can occur, not
1110: * within markup declarations.
1111: * In that case this is handled in xmlParseMarkupDecl
1112: */
1113: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1114: return;
1.96 daniel 1115: }
1116:
1117: NEXT;
1118: name = xmlParseName(ctxt);
1119: if (name == NULL) {
1.123 ! daniel 1120: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1121: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1122: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1123: ctxt->wellFormed = 0;
1124: } else {
1125: if (CUR == ';') {
1126: NEXT;
1.98 daniel 1127: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1128: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1129: if (entity == NULL) {
1.98 daniel 1130:
1131: /*
1132: * [ WFC: Entity Declared ]
1133: * In a document without any DTD, a document with only an
1134: * internal DTD subset which contains no parameter entity
1135: * references, or a document with "standalone='yes'", ...
1136: * ... The declaration of a parameter entity must precede
1137: * any reference to it...
1138: */
1139: if ((ctxt->standalone == 1) ||
1140: ((ctxt->hasExternalSubset == 0) &&
1141: (ctxt->hasPErefs == 0))) {
1142: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1143: ctxt->sax->error(ctxt->userData,
1144: "PEReference: %%%s; not found\n", name);
1145: ctxt->wellFormed = 0;
1146: } else {
1147: /*
1148: * [ VC: Entity Declared ]
1149: * In a document with an external subset or external
1150: * parameter entities with "standalone='no'", ...
1151: * ... The declaration of a parameter entity must precede
1152: * any reference to it...
1153: */
1154: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1155: ctxt->sax->warning(ctxt->userData,
1156: "PEReference: %%%s; not found\n", name);
1157: ctxt->valid = 0;
1158: }
1.96 daniel 1159: } else {
1160: if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
1161: (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
1162: /*
1163: * TODO !!!! handle the extra spaces added before and after
1164: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1165: * TODO !!!! Avoid quote processing in parameters value
1166: * c.f. http://www.w3.org/TR/REC-xml#inliteral
1167: */
1168: input = xmlNewEntityInputStream(ctxt, entity);
1169: xmlPushInput(ctxt, input);
1170: } else {
1171: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1172: ctxt->sax->error(ctxt->userData,
1173: "xmlHandlePEReference: %s is not a parameter entity\n",
1174: name);
1175: ctxt->wellFormed = 0;
1176: }
1177: }
1178: } else {
1.123 ! daniel 1179: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1180: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1181: ctxt->sax->error(ctxt->userData,
1182: "xmlHandlePEReference: expecting ';'\n");
1183: ctxt->wellFormed = 0;
1184: }
1.119 daniel 1185: xmlFree(name);
1.97 daniel 1186: }
1187: }
1188:
1189: /*
1190: * Macro used to grow the current buffer.
1191: */
1192: #define growBuffer(buffer) { \
1193: buffer##_size *= 2; \
1.123 ! daniel 1194: buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1195: if (buffer == NULL) { \
1196: perror("realloc failed"); \
1197: exit(1); \
1198: } \
1.96 daniel 1199: }
1.77 daniel 1200:
1201: /**
1202: * xmlDecodeEntities:
1203: * @ctxt: the parser context
1204: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1205: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 ! daniel 1206: * @end: an end marker xmlChar, 0 if none
! 1207: * @end2: an end marker xmlChar, 0 if none
! 1208: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1209: *
1210: * [67] Reference ::= EntityRef | CharRef
1211: *
1212: * [69] PEReference ::= '%' Name ';'
1213: *
1214: * Returns A newly allocated string with the substitution done. The caller
1215: * must deallocate it !
1216: */
1.123 ! daniel 1217: xmlChar *
1.77 daniel 1218: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 ! daniel 1219: xmlChar end, xmlChar end2, xmlChar end3) {
! 1220: xmlChar *buffer = NULL;
1.78 daniel 1221: int buffer_size = 0;
1.123 ! daniel 1222: xmlChar *out = NULL;
1.78 daniel 1223:
1.123 ! daniel 1224: xmlChar *current = NULL;
1.77 daniel 1225: xmlEntityPtr ent;
1.91 daniel 1226: int nbchars = 0;
1.77 daniel 1227: unsigned int max = (unsigned int) len;
1.123 ! daniel 1228: xmlChar cur;
1.77 daniel 1229:
1230: /*
1231: * allocate a translation buffer.
1232: */
1233: buffer_size = 1000;
1.123 ! daniel 1234: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1235: if (buffer == NULL) {
1236: perror("xmlDecodeEntities: malloc failed");
1237: return(NULL);
1238: }
1239: out = buffer;
1240:
1.78 daniel 1241: /*
1242: * Ok loop until we reach one of the ending char or a size limit.
1243: */
1.97 daniel 1244: cur = CUR;
1245: while ((nbchars < max) && (cur != end) &&
1246: (cur != end2) && (cur != end3)) {
1.77 daniel 1247:
1.98 daniel 1248: if (cur == 0) break;
1249: if ((cur == '&') && (NXT(1) == '#')) {
1250: int val = xmlParseCharRef(ctxt);
1251: *out++ = val;
1252: nbchars += 3;
1253: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1254: ent = xmlParseEntityRef(ctxt);
1255: if ((ent != NULL) &&
1256: (ctxt->replaceEntities != 0)) {
1257: current = ent->content;
1258: while (*current != 0) {
1259: *out++ = *current++;
1260: if (out - buffer > buffer_size - 100) {
1261: int index = out - buffer;
1262:
1263: growBuffer(buffer);
1264: out = &buffer[index];
1.77 daniel 1265: }
1266: }
1.98 daniel 1267: nbchars += 3 + xmlStrlen(ent->name);
1268: } else if (ent != NULL) {
1269: int i = xmlStrlen(ent->name);
1.123 ! daniel 1270: const xmlChar *cur = ent->name;
1.98 daniel 1271:
1272: nbchars += i + 2;
1273: *out++ = '&';
1274: if (out - buffer > buffer_size - i - 100) {
1275: int index = out - buffer;
1276:
1277: growBuffer(buffer);
1278: out = &buffer[index];
1279: }
1280: for (;i > 0;i--)
1281: *out++ = *cur++;
1282: *out++ = ';';
1.77 daniel 1283: }
1.97 daniel 1284: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1285: /*
1.77 daniel 1286: * a PEReference induce to switch the entity flow,
1287: * we break here to flush the current set of chars
1288: * parsed if any. We will be called back later.
1.97 daniel 1289: */
1.91 daniel 1290: if (nbchars != 0) break;
1.77 daniel 1291:
1292: xmlParsePEReference(ctxt);
1.79 daniel 1293:
1.97 daniel 1294: /*
1.79 daniel 1295: * Pop-up of finished entities.
1.97 daniel 1296: */
1.79 daniel 1297: while ((CUR == 0) && (ctxt->inputNr > 1))
1298: xmlPopInput(ctxt);
1299:
1.98 daniel 1300: break;
1.77 daniel 1301: } else {
1.116 daniel 1302: /* invalid for UTF-8 , use COPY(out); !!!!!! */
1.97 daniel 1303: *out++ = cur;
1.91 daniel 1304: nbchars++;
1.86 daniel 1305: if (out - buffer > buffer_size - 100) {
1306: int index = out - buffer;
1307:
1308: growBuffer(buffer);
1309: out = &buffer[index];
1310: }
1.77 daniel 1311: NEXT;
1312: }
1.97 daniel 1313: cur = CUR;
1.77 daniel 1314: }
1315: *out++ = 0;
1316: return(buffer);
1317: }
1318:
1.1 veillard 1319:
1.28 daniel 1320: /************************************************************************
1321: * *
1.75 daniel 1322: * Commodity functions to handle encodings *
1323: * *
1324: ************************************************************************/
1325:
1326: /**
1327: * xmlSwitchEncoding:
1328: * @ctxt: the parser context
1329: * @len: the len of @cur
1330: *
1331: * change the input functions when discovering the character encoding
1332: * of a given entity.
1333: *
1334: */
1335: void
1336: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1337: {
1338: switch (enc) {
1339: case XML_CHAR_ENCODING_ERROR:
1.123 ! daniel 1340: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 1341: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1342: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1343: ctxt->wellFormed = 0;
1344: break;
1345: case XML_CHAR_ENCODING_NONE:
1346: /* let's assume it's UTF-8 without the XML decl */
1347: return;
1348: case XML_CHAR_ENCODING_UTF8:
1349: /* default encoding, no conversion should be needed */
1350: return;
1351: case XML_CHAR_ENCODING_UTF16LE:
1.123 ! daniel 1352: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1353: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1354: ctxt->sax->error(ctxt->userData,
1355: "char encoding UTF16 little endian not supported\n");
1356: break;
1357: case XML_CHAR_ENCODING_UTF16BE:
1.123 ! daniel 1358: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1359: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1360: ctxt->sax->error(ctxt->userData,
1361: "char encoding UTF16 big endian not supported\n");
1362: break;
1363: case XML_CHAR_ENCODING_UCS4LE:
1.123 ! daniel 1364: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1365: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1366: ctxt->sax->error(ctxt->userData,
1367: "char encoding USC4 little endian not supported\n");
1368: break;
1369: case XML_CHAR_ENCODING_UCS4BE:
1.123 ! daniel 1370: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1371: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1372: ctxt->sax->error(ctxt->userData,
1373: "char encoding USC4 big endian not supported\n");
1374: break;
1375: case XML_CHAR_ENCODING_EBCDIC:
1.123 ! daniel 1376: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1377: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1378: ctxt->sax->error(ctxt->userData,
1379: "char encoding EBCDIC not supported\n");
1380: break;
1381: case XML_CHAR_ENCODING_UCS4_2143:
1.123 ! daniel 1382: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1383: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1384: ctxt->sax->error(ctxt->userData,
1385: "char encoding UCS4 2143 not supported\n");
1386: break;
1387: case XML_CHAR_ENCODING_UCS4_3412:
1.123 ! daniel 1388: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1389: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1390: ctxt->sax->error(ctxt->userData,
1391: "char encoding UCS4 3412 not supported\n");
1392: break;
1393: case XML_CHAR_ENCODING_UCS2:
1.123 ! daniel 1394: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1395: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1396: ctxt->sax->error(ctxt->userData,
1397: "char encoding UCS2 not supported\n");
1398: break;
1399: case XML_CHAR_ENCODING_8859_1:
1.123 ! daniel 1400: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1401: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1402: ctxt->sax->error(ctxt->userData,
1403: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
1404: break;
1405: case XML_CHAR_ENCODING_8859_2:
1.123 ! daniel 1406: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1408: ctxt->sax->error(ctxt->userData,
1409: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
1410: break;
1411: case XML_CHAR_ENCODING_8859_3:
1.123 ! daniel 1412: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1413: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1414: ctxt->sax->error(ctxt->userData,
1415: "char encoding ISO_8859_3 not supported\n");
1416: break;
1417: case XML_CHAR_ENCODING_8859_4:
1.123 ! daniel 1418: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1420: ctxt->sax->error(ctxt->userData,
1421: "char encoding ISO_8859_4 not supported\n");
1422: break;
1423: case XML_CHAR_ENCODING_8859_5:
1.123 ! daniel 1424: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1425: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1426: ctxt->sax->error(ctxt->userData,
1427: "char encoding ISO_8859_5 not supported\n");
1428: break;
1429: case XML_CHAR_ENCODING_8859_6:
1.123 ! daniel 1430: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1431: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1432: ctxt->sax->error(ctxt->userData,
1433: "char encoding ISO_8859_6 not supported\n");
1434: break;
1435: case XML_CHAR_ENCODING_8859_7:
1.123 ! daniel 1436: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1437: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1438: ctxt->sax->error(ctxt->userData,
1439: "char encoding ISO_8859_7 not supported\n");
1440: break;
1441: case XML_CHAR_ENCODING_8859_8:
1.123 ! daniel 1442: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1443: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1444: ctxt->sax->error(ctxt->userData,
1445: "char encoding ISO_8859_8 not supported\n");
1446: break;
1447: case XML_CHAR_ENCODING_8859_9:
1.123 ! daniel 1448: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1449: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1450: ctxt->sax->error(ctxt->userData,
1451: "char encoding ISO_8859_9 not supported\n");
1452: break;
1453: case XML_CHAR_ENCODING_2022_JP:
1.123 ! daniel 1454: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1456: ctxt->sax->error(ctxt->userData,
1457: "char encoding ISO-2022-JPnot supported\n");
1458: break;
1459: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 ! daniel 1460: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1461: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1462: ctxt->sax->error(ctxt->userData,
1463: "char encoding Shift_JISnot supported\n");
1464: break;
1465: case XML_CHAR_ENCODING_EUC_JP:
1.123 ! daniel 1466: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 1467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1468: ctxt->sax->error(ctxt->userData,
1469: "char encoding EUC-JPnot supported\n");
1470: break;
1471: }
1472: }
1473:
1474: /************************************************************************
1475: * *
1.123 ! daniel 1476: * Commodity functions to handle xmlChars *
1.28 daniel 1477: * *
1478: ************************************************************************/
1479:
1.50 daniel 1480: /**
1481: * xmlStrndup:
1.123 ! daniel 1482: * @cur: the input xmlChar *
1.50 daniel 1483: * @len: the len of @cur
1484: *
1.123 ! daniel 1485: * a strndup for array of xmlChar's
1.68 daniel 1486: *
1.123 ! daniel 1487: * Returns a new xmlChar * or NULL
1.1 veillard 1488: */
1.123 ! daniel 1489: xmlChar *
! 1490: xmlStrndup(const xmlChar *cur, int len) {
! 1491: xmlChar *ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 1492:
1493: if (ret == NULL) {
1.86 daniel 1494: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 ! daniel 1495: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 1496: return(NULL);
1497: }
1.123 ! daniel 1498: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 1499: ret[len] = 0;
1500: return(ret);
1501: }
1502:
1.50 daniel 1503: /**
1504: * xmlStrdup:
1.123 ! daniel 1505: * @cur: the input xmlChar *
1.50 daniel 1506: *
1.123 ! daniel 1507: * a strdup for array of xmlChar's
1.68 daniel 1508: *
1.123 ! daniel 1509: * Returns a new xmlChar * or NULL
1.1 veillard 1510: */
1.123 ! daniel 1511: xmlChar *
! 1512: xmlStrdup(const xmlChar *cur) {
! 1513: const xmlChar *p = cur;
1.1 veillard 1514:
1515: while (IS_CHAR(*p)) p++;
1516: return(xmlStrndup(cur, p - cur));
1517: }
1518:
1.50 daniel 1519: /**
1520: * xmlCharStrndup:
1521: * @cur: the input char *
1522: * @len: the len of @cur
1523: *
1.123 ! daniel 1524: * a strndup for char's to xmlChar's
1.68 daniel 1525: *
1.123 ! daniel 1526: * Returns a new xmlChar * or NULL
1.45 daniel 1527: */
1528:
1.123 ! daniel 1529: xmlChar *
1.55 daniel 1530: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 1531: int i;
1.123 ! daniel 1532: xmlChar *ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 1533:
1534: if (ret == NULL) {
1.86 daniel 1535: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 ! daniel 1536: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1537: return(NULL);
1538: }
1539: for (i = 0;i < len;i++)
1.123 ! daniel 1540: ret[i] = (xmlChar) cur[i];
1.45 daniel 1541: ret[len] = 0;
1542: return(ret);
1543: }
1544:
1.50 daniel 1545: /**
1546: * xmlCharStrdup:
1547: * @cur: the input char *
1548: * @len: the len of @cur
1549: *
1.123 ! daniel 1550: * a strdup for char's to xmlChar's
1.68 daniel 1551: *
1.123 ! daniel 1552: * Returns a new xmlChar * or NULL
1.45 daniel 1553: */
1554:
1.123 ! daniel 1555: xmlChar *
1.55 daniel 1556: xmlCharStrdup(const char *cur) {
1.45 daniel 1557: const char *p = cur;
1558:
1559: while (*p != '\0') p++;
1560: return(xmlCharStrndup(cur, p - cur));
1561: }
1562:
1.50 daniel 1563: /**
1564: * xmlStrcmp:
1.123 ! daniel 1565: * @str1: the first xmlChar *
! 1566: * @str2: the second xmlChar *
1.50 daniel 1567: *
1.123 ! daniel 1568: * a strcmp for xmlChar's
1.68 daniel 1569: *
1570: * Returns the integer result of the comparison
1.14 veillard 1571: */
1572:
1.55 daniel 1573: int
1.123 ! daniel 1574: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 1575: register int tmp;
1576:
1577: do {
1578: tmp = *str1++ - *str2++;
1579: if (tmp != 0) return(tmp);
1580: } while ((*str1 != 0) && (*str2 != 0));
1581: return (*str1 - *str2);
1582: }
1583:
1.50 daniel 1584: /**
1585: * xmlStrncmp:
1.123 ! daniel 1586: * @str1: the first xmlChar *
! 1587: * @str2: the second xmlChar *
1.50 daniel 1588: * @len: the max comparison length
1589: *
1.123 ! daniel 1590: * a strncmp for xmlChar's
1.68 daniel 1591: *
1592: * Returns the integer result of the comparison
1.14 veillard 1593: */
1594:
1.55 daniel 1595: int
1.123 ! daniel 1596: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 1597: register int tmp;
1598:
1599: if (len <= 0) return(0);
1600: do {
1601: tmp = *str1++ - *str2++;
1602: if (tmp != 0) return(tmp);
1603: len--;
1604: if (len <= 0) return(0);
1605: } while ((*str1 != 0) && (*str2 != 0));
1606: return (*str1 - *str2);
1607: }
1608:
1.50 daniel 1609: /**
1610: * xmlStrchr:
1.123 ! daniel 1611: * @str: the xmlChar * array
! 1612: * @val: the xmlChar to search
1.50 daniel 1613: *
1.123 ! daniel 1614: * a strchr for xmlChar's
1.68 daniel 1615: *
1.123 ! daniel 1616: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 1617: */
1618:
1.123 ! daniel 1619: const xmlChar *
! 1620: xmlStrchr(const xmlChar *str, xmlChar val) {
1.14 veillard 1621: while (*str != 0) {
1.123 ! daniel 1622: if (*str == val) return((xmlChar *) str);
1.14 veillard 1623: str++;
1624: }
1625: return(NULL);
1.89 daniel 1626: }
1627:
1628: /**
1629: * xmlStrstr:
1.123 ! daniel 1630: * @str: the xmlChar * array (haystack)
! 1631: * @val: the xmlChar to search (needle)
1.89 daniel 1632: *
1.123 ! daniel 1633: * a strstr for xmlChar's
1.89 daniel 1634: *
1.123 ! daniel 1635: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1636: */
1637:
1.123 ! daniel 1638: const xmlChar *
! 1639: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 1640: int n;
1641:
1642: if (str == NULL) return(NULL);
1643: if (val == NULL) return(NULL);
1644: n = xmlStrlen(val);
1645:
1646: if (n == 0) return(str);
1647: while (*str != 0) {
1648: if (*str == *val) {
1.123 ! daniel 1649: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 1650: }
1651: str++;
1652: }
1653: return(NULL);
1654: }
1655:
1656: /**
1657: * xmlStrsub:
1.123 ! daniel 1658: * @str: the xmlChar * array (haystack)
1.89 daniel 1659: * @start: the index of the first char (zero based)
1660: * @len: the length of the substring
1661: *
1662: * Extract a substring of a given string
1663: *
1.123 ! daniel 1664: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1665: */
1666:
1.123 ! daniel 1667: xmlChar *
! 1668: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 1669: int i;
1670:
1671: if (str == NULL) return(NULL);
1672: if (start < 0) return(NULL);
1.90 daniel 1673: if (len < 0) return(NULL);
1.89 daniel 1674:
1675: for (i = 0;i < start;i++) {
1676: if (*str == 0) return(NULL);
1677: str++;
1678: }
1679: if (*str == 0) return(NULL);
1680: return(xmlStrndup(str, len));
1.14 veillard 1681: }
1.28 daniel 1682:
1.50 daniel 1683: /**
1684: * xmlStrlen:
1.123 ! daniel 1685: * @str: the xmlChar * array
1.50 daniel 1686: *
1.123 ! daniel 1687: * lenght of a xmlChar's string
1.68 daniel 1688: *
1.123 ! daniel 1689: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 1690: */
1691:
1.55 daniel 1692: int
1.123 ! daniel 1693: xmlStrlen(const xmlChar *str) {
1.45 daniel 1694: int len = 0;
1695:
1696: if (str == NULL) return(0);
1697: while (*str != 0) {
1698: str++;
1699: len++;
1700: }
1701: return(len);
1702: }
1703:
1.50 daniel 1704: /**
1705: * xmlStrncat:
1.123 ! daniel 1706: * @cur: the original xmlChar * array
! 1707: * @add: the xmlChar * array added
1.50 daniel 1708: * @len: the length of @add
1709: *
1.123 ! daniel 1710: * a strncat for array of xmlChar's
1.68 daniel 1711: *
1.123 ! daniel 1712: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1713: */
1714:
1.123 ! daniel 1715: xmlChar *
! 1716: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 1717: int size;
1.123 ! daniel 1718: xmlChar *ret;
1.45 daniel 1719:
1720: if ((add == NULL) || (len == 0))
1721: return(cur);
1722: if (cur == NULL)
1723: return(xmlStrndup(add, len));
1724:
1725: size = xmlStrlen(cur);
1.123 ! daniel 1726: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 1727: if (ret == NULL) {
1.86 daniel 1728: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 ! daniel 1729: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1730: return(cur);
1731: }
1.123 ! daniel 1732: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 1733: ret[size + len] = 0;
1734: return(ret);
1735: }
1736:
1.50 daniel 1737: /**
1738: * xmlStrcat:
1.123 ! daniel 1739: * @cur: the original xmlChar * array
! 1740: * @add: the xmlChar * array added
1.50 daniel 1741: *
1.123 ! daniel 1742: * a strcat for array of xmlChar's
1.68 daniel 1743: *
1.123 ! daniel 1744: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1745: */
1.123 ! daniel 1746: xmlChar *
! 1747: xmlStrcat(xmlChar *cur, const xmlChar *add) {
! 1748: const xmlChar *p = add;
1.45 daniel 1749:
1750: if (add == NULL) return(cur);
1751: if (cur == NULL)
1752: return(xmlStrdup(add));
1753:
1754: while (IS_CHAR(*p)) p++;
1755: return(xmlStrncat(cur, add, p - add));
1756: }
1757:
1758: /************************************************************************
1759: * *
1760: * Commodity functions, cleanup needed ? *
1761: * *
1762: ************************************************************************/
1763:
1.50 daniel 1764: /**
1765: * areBlanks:
1766: * @ctxt: an XML parser context
1.123 ! daniel 1767: * @str: a xmlChar *
1.50 daniel 1768: * @len: the size of @str
1769: *
1.45 daniel 1770: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1771: *
1.99 daniel 1772: * TODO: Whether white space are significant has to be checked accordingly
1773: * to DTD informations if available
1.68 daniel 1774: *
1775: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1776: */
1777:
1.123 ! daniel 1778: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 1779: int i, ret;
1.45 daniel 1780: xmlNodePtr lastChild;
1781:
1782: for (i = 0;i < len;i++)
1783: if (!(IS_BLANK(str[i]))) return(0);
1784:
1785: if (CUR != '<') return(0);
1.72 daniel 1786: if (ctxt->node == NULL) return(0);
1.104 daniel 1787: if (ctxt->myDoc != NULL) {
1788: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1789: if (ret == 0) return(1);
1790: if (ret == 1) return(0);
1791: }
1792: /*
1793: * heuristic
1794: */
1.45 daniel 1795: lastChild = xmlGetLastChild(ctxt->node);
1796: if (lastChild == NULL) {
1797: if (ctxt->node->content != NULL) return(0);
1798: } else if (xmlNodeIsText(lastChild))
1799: return(0);
1.104 daniel 1800: else if ((ctxt->node->childs != NULL) &&
1801: (xmlNodeIsText(ctxt->node->childs)))
1802: return(0);
1.45 daniel 1803: return(1);
1804: }
1805:
1.50 daniel 1806: /**
1807: * xmlHandleEntity:
1808: * @ctxt: an XML parser context
1809: * @entity: an XML entity pointer.
1810: *
1811: * Default handling of defined entities, when should we define a new input
1.45 daniel 1812: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 1813: *
1814: * OBSOLETE: to be removed at some point.
1.45 daniel 1815: */
1816:
1.55 daniel 1817: void
1818: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1819: int len;
1.50 daniel 1820: xmlParserInputPtr input;
1.45 daniel 1821:
1822: if (entity->content == NULL) {
1.123 ! daniel 1823: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 1824: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1825: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 1826: entity->name);
1.59 daniel 1827: ctxt->wellFormed = 0;
1.45 daniel 1828: return;
1829: }
1830: len = xmlStrlen(entity->content);
1831: if (len <= 2) goto handle_as_char;
1832:
1833: /*
1834: * Redefine its content as an input stream.
1835: */
1.50 daniel 1836: input = xmlNewEntityInputStream(ctxt, entity);
1837: xmlPushInput(ctxt, input);
1.45 daniel 1838: return;
1839:
1840: handle_as_char:
1841: /*
1842: * Just handle the content as a set of chars.
1843: */
1.72 daniel 1844: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 1845: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 1846:
1847: }
1848:
1849: /*
1850: * Forward definition for recusive behaviour.
1851: */
1.77 daniel 1852: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1853: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1854:
1.28 daniel 1855: /************************************************************************
1856: * *
1857: * Extra stuff for namespace support *
1858: * Relates to http://www.w3.org/TR/WD-xml-names *
1859: * *
1860: ************************************************************************/
1861:
1.50 daniel 1862: /**
1863: * xmlNamespaceParseNCName:
1864: * @ctxt: an XML parser context
1865: *
1866: * parse an XML namespace name.
1.28 daniel 1867: *
1868: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1869: *
1870: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1871: * CombiningChar | Extender
1.68 daniel 1872: *
1873: * Returns the namespace name or NULL
1.28 daniel 1874: */
1875:
1.123 ! daniel 1876: xmlChar *
1.55 daniel 1877: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 1878: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 1879: int len = 0;
1.28 daniel 1880:
1.40 daniel 1881: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1.28 daniel 1882:
1.40 daniel 1883: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1884: (CUR == '.') || (CUR == '-') ||
1885: (CUR == '_') ||
1886: (IS_COMBINING(CUR)) ||
1.91 daniel 1887: (IS_EXTENDER(CUR))) {
1888: buf[len++] = CUR;
1.40 daniel 1889: NEXT;
1.91 daniel 1890: if (len >= XML_MAX_NAMELEN) {
1891: fprintf(stderr,
1892: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1893: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1894: (CUR == '.') || (CUR == '-') ||
1895: (CUR == '_') ||
1896: (IS_COMBINING(CUR)) ||
1897: (IS_EXTENDER(CUR)))
1898: NEXT;
1899: break;
1900: }
1901: }
1902: return(xmlStrndup(buf, len));
1.28 daniel 1903: }
1904:
1.50 daniel 1905: /**
1906: * xmlNamespaceParseQName:
1907: * @ctxt: an XML parser context
1.123 ! daniel 1908: * @prefix: a xmlChar **
1.50 daniel 1909: *
1910: * parse an XML qualified name
1.28 daniel 1911: *
1912: * [NS 5] QName ::= (Prefix ':')? LocalPart
1913: *
1914: * [NS 6] Prefix ::= NCName
1915: *
1916: * [NS 7] LocalPart ::= NCName
1.68 daniel 1917: *
1918: * Returns the function returns the local part, and prefix is updated
1.50 daniel 1919: * to get the Prefix if any.
1.28 daniel 1920: */
1921:
1.123 ! daniel 1922: xmlChar *
! 1923: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
! 1924: xmlChar *ret = NULL;
1.28 daniel 1925:
1926: *prefix = NULL;
1927: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1928: if (CUR == ':') {
1.28 daniel 1929: *prefix = ret;
1.40 daniel 1930: NEXT;
1.28 daniel 1931: ret = xmlNamespaceParseNCName(ctxt);
1932: }
1933:
1934: return(ret);
1935: }
1936:
1.50 daniel 1937: /**
1.72 daniel 1938: * xmlSplitQName:
1939: * @name: an XML parser context
1.123 ! daniel 1940: * @prefix: a xmlChar **
1.72 daniel 1941: *
1942: * parse an XML qualified name string
1943: *
1944: * [NS 5] QName ::= (Prefix ':')? LocalPart
1945: *
1946: * [NS 6] Prefix ::= NCName
1947: *
1948: * [NS 7] LocalPart ::= NCName
1949: *
1950: * Returns the function returns the local part, and prefix is updated
1951: * to get the Prefix if any.
1952: */
1953:
1.123 ! daniel 1954: xmlChar *
! 1955: xmlSplitQName(const xmlChar *name, xmlChar **prefix) {
! 1956: xmlChar *ret = NULL;
! 1957: const xmlChar *q;
! 1958: const xmlChar *cur = name;
1.72 daniel 1959:
1960: *prefix = NULL;
1.113 daniel 1961:
1962: /* xml: prefix is not really a namespace */
1963: if ((cur[0] == 'x') && (cur[1] == 'm') &&
1964: (cur[2] == 'l') && (cur[3] == ':'))
1965: return(xmlStrdup(name));
1966:
1.72 daniel 1967: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1968: q = cur++;
1969:
1970: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1971: (*cur == '.') || (*cur == '-') ||
1972: (*cur == '_') ||
1973: (IS_COMBINING(*cur)) ||
1974: (IS_EXTENDER(*cur)))
1975: cur++;
1976:
1977: ret = xmlStrndup(q, cur - q);
1978:
1979: if (*cur == ':') {
1980: cur++;
1981: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1982: *prefix = ret;
1983:
1984: q = cur++;
1985:
1986: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1987: (*cur == '.') || (*cur == '-') ||
1988: (*cur == '_') ||
1989: (IS_COMBINING(*cur)) ||
1990: (IS_EXTENDER(*cur)))
1991: cur++;
1992:
1993: ret = xmlStrndup(q, cur - q);
1994: }
1995:
1996: return(ret);
1997: }
1998: /**
1.50 daniel 1999: * xmlNamespaceParseNSDef:
2000: * @ctxt: an XML parser context
2001: *
2002: * parse a namespace prefix declaration
1.28 daniel 2003: *
2004: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2005: *
2006: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 2007: *
2008: * Returns the namespace name
1.28 daniel 2009: */
2010:
1.123 ! daniel 2011: xmlChar *
1.55 daniel 2012: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2013: xmlChar *name = NULL;
1.28 daniel 2014:
1.40 daniel 2015: if ((CUR == 'x') && (NXT(1) == 'm') &&
2016: (NXT(2) == 'l') && (NXT(3) == 'n') &&
2017: (NXT(4) == 's')) {
2018: SKIP(5);
2019: if (CUR == ':') {
2020: NEXT;
1.28 daniel 2021: name = xmlNamespaceParseNCName(ctxt);
2022: }
2023: }
1.39 daniel 2024: return(name);
1.28 daniel 2025: }
2026:
1.50 daniel 2027: /**
2028: * xmlParseQuotedString:
2029: * @ctxt: an XML parser context
2030: *
1.45 daniel 2031: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 2032: * To be removed at next drop of binary compatibility
1.68 daniel 2033: *
2034: * Returns the string parser or NULL.
1.45 daniel 2035: */
1.123 ! daniel 2036: xmlChar *
1.55 daniel 2037: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2038: xmlChar *ret = NULL;
! 2039: const xmlChar *q;
1.45 daniel 2040:
2041: if (CUR == '"') {
2042: NEXT;
2043: q = CUR_PTR;
2044: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 2045: if (CUR != '"') {
1.123 ! daniel 2046: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2047: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2048: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 2049: ctxt->wellFormed = 0;
1.55 daniel 2050: } else {
1.45 daniel 2051: ret = xmlStrndup(q, CUR_PTR - q);
2052: NEXT;
2053: }
2054: } else if (CUR == '\''){
2055: NEXT;
2056: q = CUR_PTR;
2057: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 2058: if (CUR != '\'') {
1.123 ! daniel 2059: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2060: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2061: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 2062: ctxt->wellFormed = 0;
1.55 daniel 2063: } else {
1.45 daniel 2064: ret = xmlStrndup(q, CUR_PTR - q);
2065: NEXT;
2066: }
2067: }
2068: return(ret);
2069: }
2070:
1.50 daniel 2071: /**
2072: * xmlParseNamespace:
2073: * @ctxt: an XML parser context
2074: *
1.45 daniel 2075: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2076: *
2077: * This is what the older xml-name Working Draft specified, a bunch of
2078: * other stuff may still rely on it, so support is still here as
2079: * if ot was declared on the root of the Tree:-(
1.110 daniel 2080: *
2081: * To be removed at next drop of binary compatibility
1.45 daniel 2082: */
2083:
1.55 daniel 2084: void
2085: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2086: xmlChar *href = NULL;
! 2087: xmlChar *prefix = NULL;
1.45 daniel 2088: int garbage = 0;
2089:
2090: /*
2091: * We just skipped "namespace" or "xml:namespace"
2092: */
2093: SKIP_BLANKS;
2094:
2095: while (IS_CHAR(CUR) && (CUR != '>')) {
2096: /*
2097: * We can have "ns" or "prefix" attributes
2098: * Old encoding as 'href' or 'AS' attributes is still supported
2099: */
2100: if ((CUR == 'n') && (NXT(1) == 's')) {
2101: garbage = 0;
2102: SKIP(2);
2103: SKIP_BLANKS;
2104:
2105: if (CUR != '=') continue;
2106: NEXT;
2107: SKIP_BLANKS;
2108:
2109: href = xmlParseQuotedString(ctxt);
2110: SKIP_BLANKS;
2111: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
2112: (NXT(2) == 'e') && (NXT(3) == 'f')) {
2113: garbage = 0;
2114: SKIP(4);
2115: SKIP_BLANKS;
2116:
2117: if (CUR != '=') continue;
2118: NEXT;
2119: SKIP_BLANKS;
2120:
2121: href = xmlParseQuotedString(ctxt);
2122: SKIP_BLANKS;
2123: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
2124: (NXT(2) == 'e') && (NXT(3) == 'f') &&
2125: (NXT(4) == 'i') && (NXT(5) == 'x')) {
2126: garbage = 0;
2127: SKIP(6);
2128: SKIP_BLANKS;
2129:
2130: if (CUR != '=') continue;
2131: NEXT;
2132: SKIP_BLANKS;
2133:
2134: prefix = xmlParseQuotedString(ctxt);
2135: SKIP_BLANKS;
2136: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
2137: garbage = 0;
2138: SKIP(2);
2139: SKIP_BLANKS;
2140:
2141: if (CUR != '=') continue;
2142: NEXT;
2143: SKIP_BLANKS;
2144:
2145: prefix = xmlParseQuotedString(ctxt);
2146: SKIP_BLANKS;
2147: } else if ((CUR == '?') && (NXT(1) == '>')) {
2148: garbage = 0;
1.91 daniel 2149: NEXT;
1.45 daniel 2150: } else {
2151: /*
2152: * Found garbage when parsing the namespace
2153: */
1.122 daniel 2154: if (!garbage) {
1.55 daniel 2155: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2156: ctxt->sax->error(ctxt->userData,
2157: "xmlParseNamespace found garbage\n");
2158: }
1.123 ! daniel 2159: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 2160: ctxt->wellFormed = 0;
1.45 daniel 2161: NEXT;
2162: }
2163: }
2164:
2165: MOVETO_ENDTAG(CUR_PTR);
2166: NEXT;
2167:
2168: /*
2169: * Register the DTD.
1.72 daniel 2170: if (href != NULL)
2171: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 2172: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 2173: */
2174:
1.119 daniel 2175: if (prefix != NULL) xmlFree(prefix);
2176: if (href != NULL) xmlFree(href);
1.45 daniel 2177: }
2178:
1.28 daniel 2179: /************************************************************************
2180: * *
2181: * The parser itself *
2182: * Relates to http://www.w3.org/TR/REC-xml *
2183: * *
2184: ************************************************************************/
1.14 veillard 2185:
1.50 daniel 2186: /**
1.97 daniel 2187: * xmlScanName:
2188: * @ctxt: an XML parser context
2189: *
2190: * Trickery: parse an XML name but without consuming the input flow
2191: * Needed for rollback cases.
2192: *
2193: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2194: * CombiningChar | Extender
2195: *
2196: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2197: *
2198: * [6] Names ::= Name (S Name)*
2199: *
2200: * Returns the Name parsed or NULL
2201: */
2202:
1.123 ! daniel 2203: xmlChar *
1.97 daniel 2204: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2205: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 2206: int len = 0;
2207:
2208: GROW;
2209: if (!IS_LETTER(CUR) && (CUR != '_') &&
2210: (CUR != ':')) {
2211: return(NULL);
2212: }
2213:
2214: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2215: (NXT(len) == '.') || (NXT(len) == '-') ||
2216: (NXT(len) == '_') || (NXT(len) == ':') ||
2217: (IS_COMBINING(NXT(len))) ||
2218: (IS_EXTENDER(NXT(len)))) {
2219: buf[len] = NXT(len);
2220: len++;
2221: if (len >= XML_MAX_NAMELEN) {
2222: fprintf(stderr,
2223: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
2224: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2225: (NXT(len) == '.') || (NXT(len) == '-') ||
2226: (NXT(len) == '_') || (NXT(len) == ':') ||
2227: (IS_COMBINING(NXT(len))) ||
2228: (IS_EXTENDER(NXT(len))))
2229: len++;
2230: break;
2231: }
2232: }
2233: return(xmlStrndup(buf, len));
2234: }
2235:
2236: /**
1.50 daniel 2237: * xmlParseName:
2238: * @ctxt: an XML parser context
2239: *
2240: * parse an XML name.
1.22 daniel 2241: *
2242: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2243: * CombiningChar | Extender
2244: *
2245: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2246: *
2247: * [6] Names ::= Name (S Name)*
1.68 daniel 2248: *
2249: * Returns the Name parsed or NULL
1.1 veillard 2250: */
2251:
1.123 ! daniel 2252: xmlChar *
1.55 daniel 2253: xmlParseName(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2254: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 2255: int len = 0;
1.123 ! daniel 2256: xmlChar cur;
1.1 veillard 2257:
1.91 daniel 2258: GROW;
1.97 daniel 2259: cur = CUR;
2260: if (!IS_LETTER(cur) && (cur != '_') &&
2261: (cur != ':')) {
1.91 daniel 2262: return(NULL);
2263: }
1.40 daniel 2264:
1.97 daniel 2265: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2266: (cur == '.') || (cur == '-') ||
2267: (cur == '_') || (cur == ':') ||
2268: (IS_COMBINING(cur)) ||
2269: (IS_EXTENDER(cur))) {
2270: buf[len++] = cur;
1.40 daniel 2271: NEXT;
1.97 daniel 2272: cur = CUR;
1.91 daniel 2273: if (len >= XML_MAX_NAMELEN) {
2274: fprintf(stderr,
2275: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.97 daniel 2276: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2277: (cur == '.') || (cur == '-') ||
2278: (cur == '_') || (cur == ':') ||
2279: (IS_COMBINING(cur)) ||
2280: (IS_EXTENDER(cur))) {
2281: NEXT;
2282: cur = CUR;
2283: }
1.91 daniel 2284: break;
2285: }
2286: }
2287: return(xmlStrndup(buf, len));
1.22 daniel 2288: }
2289:
1.50 daniel 2290: /**
2291: * xmlParseNmtoken:
2292: * @ctxt: an XML parser context
2293: *
2294: * parse an XML Nmtoken.
1.22 daniel 2295: *
2296: * [7] Nmtoken ::= (NameChar)+
2297: *
2298: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 2299: *
2300: * Returns the Nmtoken parsed or NULL
1.22 daniel 2301: */
2302:
1.123 ! daniel 2303: xmlChar *
1.55 daniel 2304: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2305: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 2306: int len = 0;
1.22 daniel 2307:
1.91 daniel 2308: GROW;
1.40 daniel 2309: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2310: (CUR == '.') || (CUR == '-') ||
2311: (CUR == '_') || (CUR == ':') ||
2312: (IS_COMBINING(CUR)) ||
1.91 daniel 2313: (IS_EXTENDER(CUR))) {
2314: buf[len++] = CUR;
1.40 daniel 2315: NEXT;
1.91 daniel 2316: if (len >= XML_MAX_NAMELEN) {
2317: fprintf(stderr,
2318: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
2319: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2320: (CUR == '.') || (CUR == '-') ||
2321: (CUR == '_') || (CUR == ':') ||
2322: (IS_COMBINING(CUR)) ||
2323: (IS_EXTENDER(CUR)))
2324: NEXT;
2325: break;
2326: }
2327: }
2328: return(xmlStrndup(buf, len));
1.1 veillard 2329: }
2330:
1.50 daniel 2331: /**
2332: * xmlParseEntityValue:
2333: * @ctxt: an XML parser context
1.78 daniel 2334: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 2335: *
2336: * parse a value for ENTITY decl.
1.24 daniel 2337: *
2338: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2339: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 2340: *
1.78 daniel 2341: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 2342: */
2343:
1.123 ! daniel 2344: xmlChar *
! 2345: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
! 2346: xmlChar *ret = NULL;
! 2347: const xmlChar *org = NULL;
! 2348: const xmlChar *tst = NULL;
! 2349: const xmlChar *temp = NULL;
1.98 daniel 2350: xmlParserInputPtr input;
1.24 daniel 2351:
1.91 daniel 2352: SHRINK;
1.40 daniel 2353: if (CUR == '"') {
1.96 daniel 2354: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2355: input = ctxt->input;
1.40 daniel 2356: NEXT;
1.78 daniel 2357: org = CUR_PTR;
1.98 daniel 2358: /*
2359: * NOTE: 4.4.5 Included in Literal
2360: * When a parameter entity reference appears in a literal entity
2361: * value, ... a single or double quote character in the replacement
2362: * text is always treated as a normal data character and will not
2363: * terminate the literal.
2364: * In practice it means we stop the loop only when back at parsing
2365: * the initial entity and the quote is found
2366: */
2367: while ((CUR != '"') || (ctxt->input != input)) {
1.79 daniel 2368: tst = CUR_PTR;
1.98 daniel 2369: /*
2370: * NOTE: 4.4.7 Bypassed
2371: * When a general entity reference appears in the EntityValue in
2372: * an entity declaration, it is bypassed and left as is.
2373: * so XML_SUBSTITUTE_REF is not set.
2374: */
2375: if (ctxt->input != input)
2376: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2377: 0, 0, 0);
2378: else
2379: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2380: '"', 0, 0);
1.94 daniel 2381:
2382: /*
2383: * Pop-up of finished entities.
2384: */
2385: while ((CUR == 0) && (ctxt->inputNr > 1))
2386: xmlPopInput(ctxt);
2387:
2388: if ((temp == NULL) && (tst == CUR_PTR)) {
1.123 ! daniel 2389: ret = xmlStrndup((xmlChar *) "", 0);
1.94 daniel 2390: break;
2391: }
2392: if ((temp[0] == 0) && (tst == CUR_PTR)) {
1.119 daniel 2393: xmlFree((char *)temp);
1.123 ! daniel 2394: ret = xmlStrndup((xmlChar *) "", 0);
1.94 daniel 2395: break;
2396: }
1.79 daniel 2397: ret = xmlStrcat(ret, temp);
1.119 daniel 2398: if (temp != NULL) xmlFree((char *)temp);
1.94 daniel 2399: GROW;
1.79 daniel 2400: }
1.77 daniel 2401: if (CUR != '"') {
1.123 ! daniel 2402: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 2403: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.79 daniel 2404: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 2405: ctxt->wellFormed = 0;
1.78 daniel 2406: } else {
1.99 daniel 2407: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2408: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2409: if (ret == NULL)
1.123 ! daniel 2410: ret = xmlStrndup((xmlChar *) "", 0);
1.40 daniel 2411: NEXT;
1.78 daniel 2412: }
1.40 daniel 2413: } else if (CUR == '\'') {
1.96 daniel 2414: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2415: input = ctxt->input;
1.40 daniel 2416: NEXT;
1.78 daniel 2417: org = CUR_PTR;
1.98 daniel 2418: /*
2419: * NOTE: 4.4.5 Included in Literal
2420: * When a parameter entity reference appears in a literal entity
2421: * value, ... a single or double quote character in the replacement
2422: * text is always treated as a normal data character and will not
2423: * terminate the literal.
2424: * In practice it means we stop the loop only when back at parsing
2425: * the initial entity and the quote is found
2426: */
2427: while ((CUR != '\'') || (ctxt->input != input)) {
1.79 daniel 2428: tst = CUR_PTR;
1.98 daniel 2429: /*
2430: * NOTE: 4.4.7 Bypassed
2431: * When a general entity reference appears in the EntityValue in
2432: * an entity declaration, it is bypassed and left as is.
2433: * so XML_SUBSTITUTE_REF is not set.
2434: */
2435: if (ctxt->input != input)
2436: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2437: 0, 0, 0);
2438: else
2439: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2440: '\'', 0, 0);
1.94 daniel 2441:
2442: /*
2443: * Pop-up of finished entities.
2444: */
2445: while ((CUR == 0) && (ctxt->inputNr > 1))
2446: xmlPopInput(ctxt);
2447:
2448: if ((temp == NULL) && (tst == CUR_PTR)) {
1.123 ! daniel 2449: ret = xmlStrndup((xmlChar *) "", 0);
1.94 daniel 2450: break;
2451: }
2452: if ((temp[0] == 0) && (tst == CUR_PTR)) {
1.119 daniel 2453: xmlFree((char *)temp);
1.123 ! daniel 2454: ret = xmlStrndup((xmlChar *) "", 0);
1.94 daniel 2455: break;
2456: }
1.79 daniel 2457: ret = xmlStrcat(ret, temp);
1.119 daniel 2458: if (temp != NULL) xmlFree((char *)temp);
1.94 daniel 2459: GROW;
1.79 daniel 2460: }
1.77 daniel 2461: if (CUR != '\'') {
1.123 ! daniel 2462: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 2463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2464: ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
1.59 daniel 2465: ctxt->wellFormed = 0;
1.78 daniel 2466: } else {
1.99 daniel 2467: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2468: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2469: if (ret == NULL)
1.123 ! daniel 2470: ret = xmlStrndup((xmlChar *) "", 0);
1.40 daniel 2471: NEXT;
1.78 daniel 2472: }
1.24 daniel 2473: } else {
1.123 ! daniel 2474: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1.55 daniel 2475: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2476: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1.59 daniel 2477: ctxt->wellFormed = 0;
1.24 daniel 2478: }
2479:
2480: return(ret);
2481: }
2482:
1.50 daniel 2483: /**
2484: * xmlParseAttValue:
2485: * @ctxt: an XML parser context
2486: *
2487: * parse a value for an attribute
1.78 daniel 2488: * Note: the parser won't do substitution of entities here, this
1.113 daniel 2489: * will be handled later in xmlStringGetNodeList
1.29 daniel 2490: *
2491: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2492: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 2493: *
2494: * Returns the AttValue parsed or NULL.
1.29 daniel 2495: */
2496:
1.123 ! daniel 2497: xmlChar *
1.55 daniel 2498: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2499: xmlChar *ret = NULL;
1.29 daniel 2500:
1.91 daniel 2501: SHRINK;
1.40 daniel 2502: if (CUR == '"') {
1.96 daniel 2503: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2504: NEXT;
1.98 daniel 2505: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
1.77 daniel 2506: if (CUR == '<') {
2507: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2508: ctxt->sax->error(ctxt->userData,
2509: "Unescaped '<' not allowed in attributes values\n");
1.123 ! daniel 2510: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.77 daniel 2511: ctxt->wellFormed = 0;
1.29 daniel 2512: }
1.77 daniel 2513: if (CUR != '"') {
1.55 daniel 2514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2515: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.123 ! daniel 2516: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
1.59 daniel 2517: ctxt->wellFormed = 0;
1.77 daniel 2518: } else
1.40 daniel 2519: NEXT;
2520: } else if (CUR == '\'') {
1.96 daniel 2521: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2522: NEXT;
1.98 daniel 2523: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
1.77 daniel 2524: if (CUR == '<') {
2525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2526: ctxt->sax->error(ctxt->userData,
2527: "Unescaped '<' not allowed in attributes values\n");
1.123 ! daniel 2528: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.77 daniel 2529: ctxt->wellFormed = 0;
1.29 daniel 2530: }
1.77 daniel 2531: if (CUR != '\'') {
1.55 daniel 2532: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2533: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.123 ! daniel 2534: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
1.59 daniel 2535: ctxt->wellFormed = 0;
1.77 daniel 2536: } else
1.40 daniel 2537: NEXT;
1.29 daniel 2538: } else {
1.123 ! daniel 2539: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 2540: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2541: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2542: ctxt->wellFormed = 0;
1.29 daniel 2543: }
2544:
2545: return(ret);
2546: }
2547:
1.50 daniel 2548: /**
2549: * xmlParseSystemLiteral:
2550: * @ctxt: an XML parser context
2551: *
2552: * parse an XML Literal
1.21 daniel 2553: *
1.22 daniel 2554: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2555: *
2556: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2557: */
2558:
1.123 ! daniel 2559: xmlChar *
1.55 daniel 2560: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2561: const xmlChar *q;
! 2562: xmlChar *ret = NULL;
1.21 daniel 2563:
1.91 daniel 2564: SHRINK;
1.40 daniel 2565: if (CUR == '"') {
2566: NEXT;
2567: q = CUR_PTR;
2568: while ((IS_CHAR(CUR)) && (CUR != '"'))
2569: NEXT;
2570: if (!IS_CHAR(CUR)) {
1.55 daniel 2571: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2572: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.123 ! daniel 2573: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.59 daniel 2574: ctxt->wellFormed = 0;
1.21 daniel 2575: } else {
1.40 daniel 2576: ret = xmlStrndup(q, CUR_PTR - q);
2577: NEXT;
1.21 daniel 2578: }
1.40 daniel 2579: } else if (CUR == '\'') {
2580: NEXT;
2581: q = CUR_PTR;
2582: while ((IS_CHAR(CUR)) && (CUR != '\''))
2583: NEXT;
2584: if (!IS_CHAR(CUR)) {
1.55 daniel 2585: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2586: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.123 ! daniel 2587: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.59 daniel 2588: ctxt->wellFormed = 0;
1.21 daniel 2589: } else {
1.40 daniel 2590: ret = xmlStrndup(q, CUR_PTR - q);
2591: NEXT;
1.21 daniel 2592: }
2593: } else {
1.55 daniel 2594: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2595: ctxt->sax->error(ctxt->userData,
2596: "SystemLiteral \" or ' expected\n");
1.123 ! daniel 2597: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 2598: ctxt->wellFormed = 0;
1.21 daniel 2599: }
2600:
2601: return(ret);
2602: }
2603:
1.50 daniel 2604: /**
2605: * xmlParsePubidLiteral:
2606: * @ctxt: an XML parser context
1.21 daniel 2607: *
1.50 daniel 2608: * parse an XML public literal
1.68 daniel 2609: *
2610: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2611: *
2612: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2613: */
2614:
1.123 ! daniel 2615: xmlChar *
1.55 daniel 2616: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2617: const xmlChar *q;
! 2618: xmlChar *ret = NULL;
1.21 daniel 2619: /*
2620: * Name ::= (Letter | '_') (NameChar)*
2621: */
1.91 daniel 2622: SHRINK;
1.40 daniel 2623: if (CUR == '"') {
2624: NEXT;
2625: q = CUR_PTR;
2626: while (IS_PUBIDCHAR(CUR)) NEXT;
2627: if (CUR != '"') {
1.55 daniel 2628: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2629: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.123 ! daniel 2630: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.59 daniel 2631: ctxt->wellFormed = 0;
1.21 daniel 2632: } else {
1.40 daniel 2633: ret = xmlStrndup(q, CUR_PTR - q);
2634: NEXT;
1.21 daniel 2635: }
1.40 daniel 2636: } else if (CUR == '\'') {
2637: NEXT;
2638: q = CUR_PTR;
2639: while ((IS_LETTER(CUR)) && (CUR != '\''))
2640: NEXT;
2641: if (!IS_LETTER(CUR)) {
1.55 daniel 2642: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2643: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.123 ! daniel 2644: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.59 daniel 2645: ctxt->wellFormed = 0;
1.21 daniel 2646: } else {
1.40 daniel 2647: ret = xmlStrndup(q, CUR_PTR - q);
2648: NEXT;
1.21 daniel 2649: }
2650: } else {
1.55 daniel 2651: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2652: ctxt->sax->error(ctxt->userData,
2653: "SystemLiteral \" or ' expected\n");
1.123 ! daniel 2654: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 2655: ctxt->wellFormed = 0;
1.21 daniel 2656: }
2657:
2658: return(ret);
2659: }
2660:
1.50 daniel 2661: /**
2662: * xmlParseCharData:
2663: * @ctxt: an XML parser context
2664: * @cdata: int indicating whether we are within a CDATA section
2665: *
2666: * parse a CharData section.
2667: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2668: *
2669: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2670: */
2671:
1.55 daniel 2672: void
2673: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.123 ! daniel 2674: xmlChar buf[1000];
1.91 daniel 2675: int nbchar = 0;
1.123 ! daniel 2676: xmlChar cur;
1.27 daniel 2677:
1.91 daniel 2678: SHRINK;
1.97 daniel 2679: /*
2680: * !!!!!!!!!!!!
2681: * NOTE: NXT(0) is used here to avoid breaking on < or &
2682: * entities substitutions.
2683: */
2684: cur = CUR;
2685: while ((IS_CHAR(cur)) && (cur != '<') &&
2686: (cur != '&')) {
2687: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2688: (NXT(2) == '>')) {
2689: if (cdata) break;
2690: else {
2691: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2692: ctxt->sax->error(ctxt->userData,
1.59 daniel 2693: "Sequence ']]>' not allowed in content\n");
1.123 ! daniel 2694: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.59 daniel 2695: ctxt->wellFormed = 0;
2696: }
2697: }
1.91 daniel 2698: buf[nbchar++] = CUR;
2699: if (nbchar == 1000) {
2700: /*
2701: * Ok the segment is to be consumed as chars.
2702: */
2703: if (ctxt->sax != NULL) {
2704: if (areBlanks(ctxt, buf, nbchar)) {
2705: if (ctxt->sax->ignorableWhitespace != NULL)
2706: ctxt->sax->ignorableWhitespace(ctxt->userData,
2707: buf, nbchar);
2708: } else {
2709: if (ctxt->sax->characters != NULL)
2710: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2711: }
2712: }
2713: nbchar = 0;
2714: }
1.40 daniel 2715: NEXT;
1.97 daniel 2716: cur = CUR;
1.27 daniel 2717: }
1.91 daniel 2718: if (nbchar != 0) {
2719: /*
2720: * Ok the segment is to be consumed as chars.
2721: */
2722: if (ctxt->sax != NULL) {
2723: if (areBlanks(ctxt, buf, nbchar)) {
2724: if (ctxt->sax->ignorableWhitespace != NULL)
2725: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2726: } else {
2727: if (ctxt->sax->characters != NULL)
2728: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2729: }
2730: }
1.45 daniel 2731: }
1.27 daniel 2732: }
2733:
1.50 daniel 2734: /**
2735: * xmlParseExternalID:
2736: * @ctxt: an XML parser context
1.123 ! daniel 2737: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 2738: * @strict: indicate whether we should restrict parsing to only
2739: * production [75], see NOTE below
1.50 daniel 2740: *
1.67 daniel 2741: * Parse an External ID or a Public ID
2742: *
2743: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2744: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2745: *
2746: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2747: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2748: *
2749: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2750: *
1.68 daniel 2751: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2752: * case publicID receives PubidLiteral, is strict is off
2753: * it is possible to return NULL and have publicID set.
1.22 daniel 2754: */
2755:
1.123 ! daniel 2756: xmlChar *
! 2757: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
! 2758: xmlChar *URI = NULL;
1.22 daniel 2759:
1.91 daniel 2760: SHRINK;
1.40 daniel 2761: if ((CUR == 'S') && (NXT(1) == 'Y') &&
2762: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2763: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2764: SKIP(6);
1.59 daniel 2765: if (!IS_BLANK(CUR)) {
2766: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2767: ctxt->sax->error(ctxt->userData,
1.59 daniel 2768: "Space required after 'SYSTEM'\n");
1.123 ! daniel 2769: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2770: ctxt->wellFormed = 0;
2771: }
1.42 daniel 2772: SKIP_BLANKS;
1.39 daniel 2773: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2774: if (URI == NULL) {
1.55 daniel 2775: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2776: ctxt->sax->error(ctxt->userData,
1.39 daniel 2777: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 ! daniel 2778: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 2779: ctxt->wellFormed = 0;
2780: }
1.40 daniel 2781: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
2782: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2783: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2784: SKIP(6);
1.59 daniel 2785: if (!IS_BLANK(CUR)) {
2786: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2787: ctxt->sax->error(ctxt->userData,
1.59 daniel 2788: "Space required after 'PUBLIC'\n");
1.123 ! daniel 2789: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2790: ctxt->wellFormed = 0;
2791: }
1.42 daniel 2792: SKIP_BLANKS;
1.39 daniel 2793: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2794: if (*publicID == NULL) {
1.55 daniel 2795: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2796: ctxt->sax->error(ctxt->userData,
1.39 daniel 2797: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 ! daniel 2798: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 2799: ctxt->wellFormed = 0;
2800: }
1.67 daniel 2801: if (strict) {
2802: /*
2803: * We don't handle [83] so "S SystemLiteral" is required.
2804: */
2805: if (!IS_BLANK(CUR)) {
2806: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2807: ctxt->sax->error(ctxt->userData,
1.67 daniel 2808: "Space required after the Public Identifier\n");
1.123 ! daniel 2809: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2810: ctxt->wellFormed = 0;
2811: }
2812: } else {
2813: /*
2814: * We handle [83] so we return immediately, if
2815: * "S SystemLiteral" is not detected. From a purely parsing
2816: * point of view that's a nice mess.
2817: */
1.123 ! daniel 2818: const xmlChar *ptr = CUR_PTR;
1.67 daniel 2819: if (!IS_BLANK(*ptr)) return(NULL);
2820:
2821: while (IS_BLANK(*ptr)) ptr++;
2822: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 2823: }
1.42 daniel 2824: SKIP_BLANKS;
1.39 daniel 2825: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2826: if (URI == NULL) {
1.55 daniel 2827: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2828: ctxt->sax->error(ctxt->userData,
1.39 daniel 2829: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 ! daniel 2830: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 2831: ctxt->wellFormed = 0;
2832: }
1.22 daniel 2833: }
1.39 daniel 2834: return(URI);
1.22 daniel 2835: }
2836:
1.50 daniel 2837: /**
2838: * xmlParseComment:
1.69 daniel 2839: * @ctxt: an XML parser context
1.50 daniel 2840: *
1.3 veillard 2841: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 2842: * The spec says that "For compatibility, the string "--" (double-hyphen)
2843: * must not occur within comments. "
1.22 daniel 2844: *
2845: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2846: */
1.72 daniel 2847: void
1.114 daniel 2848: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2849: const xmlChar *q, *start;
! 2850: const xmlChar *r;
! 2851: xmlChar *val;
1.3 veillard 2852:
2853: /*
1.22 daniel 2854: * Check that there is a comment right here.
1.3 veillard 2855: */
1.40 daniel 2856: if ((CUR != '<') || (NXT(1) != '!') ||
1.72 daniel 2857: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2858:
1.97 daniel 2859: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2860: SHRINK;
1.40 daniel 2861: SKIP(4);
2862: start = q = CUR_PTR;
2863: NEXT;
2864: r = CUR_PTR;
2865: NEXT;
2866: while (IS_CHAR(CUR) &&
2867: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 2868: (*r != '-') || (*q != '-'))) {
1.59 daniel 2869: if ((*r == '-') && (*q == '-')) {
1.55 daniel 2870: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2871: ctxt->sax->error(ctxt->userData,
1.38 daniel 2872: "Comment must not contain '--' (double-hyphen)`\n");
1.123 ! daniel 2873: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 2874: ctxt->wellFormed = 0;
2875: }
1.40 daniel 2876: NEXT;r++;q++;
1.3 veillard 2877: }
1.40 daniel 2878: if (!IS_CHAR(CUR)) {
1.55 daniel 2879: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2880: ctxt->sax->error(ctxt->userData,
2881: "Comment not terminated \n<!--%.50s\n", start);
1.123 ! daniel 2882: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 2883: ctxt->wellFormed = 0;
1.3 veillard 2884: } else {
1.40 daniel 2885: NEXT;
1.114 daniel 2886: val = xmlStrndup(start, q - start);
2887: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
2888: ctxt->sax->comment(ctxt->userData, val);
1.119 daniel 2889: xmlFree(val);
1.3 veillard 2890: }
2891: }
2892:
1.50 daniel 2893: /**
2894: * xmlParsePITarget:
2895: * @ctxt: an XML parser context
2896: *
2897: * parse the name of a PI
1.22 daniel 2898: *
2899: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2900: *
2901: * Returns the PITarget name or NULL
1.22 daniel 2902: */
2903:
1.123 ! daniel 2904: xmlChar *
1.55 daniel 2905: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2906: xmlChar *name;
1.22 daniel 2907:
2908: name = xmlParseName(ctxt);
2909: if ((name != NULL) && (name[3] == 0) &&
2910: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2911: ((name[1] == 'm') || (name[1] == 'M')) &&
2912: ((name[2] == 'l') || (name[2] == 'L'))) {
1.122 daniel 2913: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
2914: ctxt->sax->error(ctxt->userData,
2915: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 ! daniel 2916: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 2917: /* ctxt->wellFormed = 0; !!! ? */
2918: }
1.22 daniel 2919: return(NULL);
2920: }
2921: return(name);
2922: }
2923:
1.50 daniel 2924: /**
2925: * xmlParsePI:
2926: * @ctxt: an XML parser context
2927: *
2928: * parse an XML Processing Instruction.
1.22 daniel 2929: *
2930: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2931: *
1.69 daniel 2932: * The processing is transfered to SAX once parsed.
1.3 veillard 2933: */
2934:
1.55 daniel 2935: void
2936: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 2937: xmlChar *target;
1.22 daniel 2938:
1.40 daniel 2939: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 2940: /*
2941: * this is a Processing Instruction.
2942: */
1.40 daniel 2943: SKIP(2);
1.91 daniel 2944: SHRINK;
1.3 veillard 2945:
2946: /*
1.22 daniel 2947: * Parse the target name and check for special support like
2948: * namespace.
1.3 veillard 2949: */
1.22 daniel 2950: target = xmlParsePITarget(ctxt);
2951: if (target != NULL) {
1.123 ! daniel 2952: const xmlChar *q;
1.72 daniel 2953:
1.114 daniel 2954: if (!IS_BLANK(CUR)) {
2955: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2956: ctxt->sax->error(ctxt->userData,
2957: "xmlParsePI: PI %s space expected\n", target);
1.123 ! daniel 2958: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 2959: ctxt->wellFormed = 0;
2960: }
2961: SKIP_BLANKS;
2962: q = CUR_PTR;
1.72 daniel 2963: while (IS_CHAR(CUR) &&
2964: ((CUR != '?') || (NXT(1) != '>')))
2965: NEXT;
2966: if (!IS_CHAR(CUR)) {
2967: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2968: ctxt->sax->error(ctxt->userData,
1.72 daniel 2969: "xmlParsePI: PI %s never end ...\n", target);
1.123 ! daniel 2970: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 2971: ctxt->wellFormed = 0;
1.22 daniel 2972: } else {
1.123 ! daniel 2973: xmlChar *data;
1.44 daniel 2974:
1.72 daniel 2975: data = xmlStrndup(q, CUR_PTR - q);
2976: SKIP(2);
1.44 daniel 2977:
1.72 daniel 2978: /*
2979: * SAX: PI detected.
2980: */
2981: if ((ctxt->sax) &&
2982: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2983: ctxt->sax->processingInstruction(ctxt->userData,
2984: target, data);
1.119 daniel 2985: xmlFree(data);
1.22 daniel 2986: }
1.119 daniel 2987: xmlFree(target);
1.3 veillard 2988: } else {
1.55 daniel 2989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2990: ctxt->sax->error(ctxt->userData,
2991: "xmlParsePI : no target name\n");
1.123 ! daniel 2992: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 2993: ctxt->wellFormed = 0;
1.22 daniel 2994: }
2995: }
2996: }
2997:
1.50 daniel 2998: /**
2999: * xmlParseNotationDecl:
3000: * @ctxt: an XML parser context
3001: *
3002: * parse a notation declaration
1.22 daniel 3003: *
3004: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3005: *
3006: * Hence there is actually 3 choices:
3007: * 'PUBLIC' S PubidLiteral
3008: * 'PUBLIC' S PubidLiteral S SystemLiteral
3009: * and 'SYSTEM' S SystemLiteral
1.50 daniel 3010: *
1.67 daniel 3011: * See the NOTE on xmlParseExternalID().
1.22 daniel 3012: */
3013:
1.55 daniel 3014: void
3015: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 3016: xmlChar *name;
! 3017: xmlChar *Pubid;
! 3018: xmlChar *Systemid;
1.22 daniel 3019:
1.40 daniel 3020: if ((CUR == '<') && (NXT(1) == '!') &&
3021: (NXT(2) == 'N') && (NXT(3) == 'O') &&
3022: (NXT(4) == 'T') && (NXT(5) == 'A') &&
3023: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 3024: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 3025: SHRINK;
1.40 daniel 3026: SKIP(10);
1.67 daniel 3027: if (!IS_BLANK(CUR)) {
3028: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3029: ctxt->sax->error(ctxt->userData,
3030: "Space required after '<!NOTATION'\n");
1.123 ! daniel 3031: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 3032: ctxt->wellFormed = 0;
3033: return;
3034: }
3035: SKIP_BLANKS;
1.22 daniel 3036:
3037: name = xmlParseName(ctxt);
3038: if (name == NULL) {
1.55 daniel 3039: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3040: ctxt->sax->error(ctxt->userData,
3041: "NOTATION: Name expected here\n");
1.123 ! daniel 3042: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 3043: ctxt->wellFormed = 0;
3044: return;
3045: }
3046: if (!IS_BLANK(CUR)) {
3047: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3048: ctxt->sax->error(ctxt->userData,
1.67 daniel 3049: "Space required after the NOTATION name'\n");
1.123 ! daniel 3050: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3051: ctxt->wellFormed = 0;
1.22 daniel 3052: return;
3053: }
1.42 daniel 3054: SKIP_BLANKS;
1.67 daniel 3055:
1.22 daniel 3056: /*
1.67 daniel 3057: * Parse the IDs.
1.22 daniel 3058: */
1.67 daniel 3059: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
3060: SKIP_BLANKS;
3061:
3062: if (CUR == '>') {
1.40 daniel 3063: NEXT;
1.72 daniel 3064: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 3065: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 3066: } else {
3067: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3068: ctxt->sax->error(ctxt->userData,
1.67 daniel 3069: "'>' required to close NOTATION declaration\n");
1.123 ! daniel 3070: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 3071: ctxt->wellFormed = 0;
3072: }
1.119 daniel 3073: xmlFree(name);
3074: if (Systemid != NULL) xmlFree(Systemid);
3075: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 3076: }
3077: }
3078:
1.50 daniel 3079: /**
3080: * xmlParseEntityDecl:
3081: * @ctxt: an XML parser context
3082: *
3083: * parse <!ENTITY declarations
1.22 daniel 3084: *
3085: * [70] EntityDecl ::= GEDecl | PEDecl
3086: *
3087: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3088: *
3089: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3090: *
3091: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3092: *
3093: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 3094: *
3095: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 3096: *
3097: * [ VC: Notation Declared ]
1.116 daniel 3098: * The Name must match the declared name of a notation.
1.22 daniel 3099: */
3100:
1.55 daniel 3101: void
3102: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 3103: xmlChar *name = NULL;
! 3104: xmlChar *value = NULL;
! 3105: xmlChar *URI = NULL, *literal = NULL;
! 3106: xmlChar *ndata = NULL;
1.39 daniel 3107: int isParameter = 0;
1.123 ! daniel 3108: xmlChar *orig = NULL;
1.22 daniel 3109:
1.94 daniel 3110: GROW;
1.40 daniel 3111: if ((CUR == '<') && (NXT(1) == '!') &&
3112: (NXT(2) == 'E') && (NXT(3) == 'N') &&
3113: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 3114: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 3115: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 3116: SHRINK;
1.40 daniel 3117: SKIP(8);
1.59 daniel 3118: if (!IS_BLANK(CUR)) {
3119: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3120: ctxt->sax->error(ctxt->userData,
3121: "Space required after '<!ENTITY'\n");
1.123 ! daniel 3122: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3123: ctxt->wellFormed = 0;
3124: }
3125: SKIP_BLANKS;
1.40 daniel 3126:
3127: if (CUR == '%') {
3128: NEXT;
1.59 daniel 3129: if (!IS_BLANK(CUR)) {
3130: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3131: ctxt->sax->error(ctxt->userData,
3132: "Space required after '%'\n");
1.123 ! daniel 3133: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3134: ctxt->wellFormed = 0;
3135: }
1.42 daniel 3136: SKIP_BLANKS;
1.39 daniel 3137: isParameter = 1;
1.22 daniel 3138: }
3139:
3140: name = xmlParseName(ctxt);
1.24 daniel 3141: if (name == NULL) {
1.55 daniel 3142: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3143: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 ! daniel 3144: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3145: ctxt->wellFormed = 0;
1.24 daniel 3146: return;
3147: }
1.59 daniel 3148: if (!IS_BLANK(CUR)) {
3149: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3150: ctxt->sax->error(ctxt->userData,
1.59 daniel 3151: "Space required after the entity name\n");
1.123 ! daniel 3152: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3153: ctxt->wellFormed = 0;
3154: }
1.42 daniel 3155: SKIP_BLANKS;
1.24 daniel 3156:
1.22 daniel 3157: /*
1.68 daniel 3158: * handle the various case of definitions...
1.22 daniel 3159: */
1.39 daniel 3160: if (isParameter) {
1.40 daniel 3161: if ((CUR == '"') || (CUR == '\''))
1.78 daniel 3162: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 3163: if (value) {
1.72 daniel 3164: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3165: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3166: XML_INTERNAL_PARAMETER_ENTITY,
3167: NULL, NULL, value);
3168: }
1.24 daniel 3169: else {
1.67 daniel 3170: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 3171: if (URI) {
1.72 daniel 3172: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3173: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3174: XML_EXTERNAL_PARAMETER_ENTITY,
3175: literal, URI, NULL);
3176: }
1.24 daniel 3177: }
3178: } else {
1.40 daniel 3179: if ((CUR == '"') || (CUR == '\'')) {
1.78 daniel 3180: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 3181: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3182: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3183: XML_INTERNAL_GENERAL_ENTITY,
3184: NULL, NULL, value);
3185: } else {
1.67 daniel 3186: URI = xmlParseExternalID(ctxt, &literal, 1);
1.59 daniel 3187: if ((CUR != '>') && (!IS_BLANK(CUR))) {
3188: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3189: ctxt->sax->error(ctxt->userData,
1.59 daniel 3190: "Space required before 'NDATA'\n");
1.123 ! daniel 3191: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3192: ctxt->wellFormed = 0;
3193: }
1.42 daniel 3194: SKIP_BLANKS;
1.40 daniel 3195: if ((CUR == 'N') && (NXT(1) == 'D') &&
3196: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3197: (NXT(4) == 'A')) {
3198: SKIP(5);
1.59 daniel 3199: if (!IS_BLANK(CUR)) {
3200: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3201: ctxt->sax->error(ctxt->userData,
1.59 daniel 3202: "Space required after 'NDATA'\n");
1.123 ! daniel 3203: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3204: ctxt->wellFormed = 0;
3205: }
1.42 daniel 3206: SKIP_BLANKS;
1.24 daniel 3207: ndata = xmlParseName(ctxt);
1.116 daniel 3208: if ((ctxt->sax != NULL) &&
3209: (ctxt->sax->unparsedEntityDecl != NULL))
3210: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 3211: literal, URI, ndata);
3212: } else {
1.72 daniel 3213: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3214: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3215: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3216: literal, URI, NULL);
1.24 daniel 3217: }
3218: }
3219: }
1.42 daniel 3220: SKIP_BLANKS;
1.40 daniel 3221: if (CUR != '>') {
1.55 daniel 3222: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3223: ctxt->sax->error(ctxt->userData,
1.31 daniel 3224: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 ! daniel 3225: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 3226: ctxt->wellFormed = 0;
1.24 daniel 3227: } else
1.40 daniel 3228: NEXT;
1.78 daniel 3229: if (orig != NULL) {
3230: /*
1.98 daniel 3231: * Ugly mechanism to save the raw entity value.
1.78 daniel 3232: */
3233: xmlEntityPtr cur = NULL;
3234:
1.98 daniel 3235: if (isParameter) {
3236: if ((ctxt->sax != NULL) &&
3237: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 3238: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 3239: } else {
3240: if ((ctxt->sax != NULL) &&
3241: (ctxt->sax->getEntity != NULL))
1.120 daniel 3242: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 3243: }
3244: if (cur != NULL) {
3245: if (cur->orig != NULL)
1.119 daniel 3246: xmlFree(orig);
1.98 daniel 3247: else
3248: cur->orig = orig;
3249: } else
1.119 daniel 3250: xmlFree(orig);
1.78 daniel 3251: }
1.119 daniel 3252: if (name != NULL) xmlFree(name);
3253: if (value != NULL) xmlFree(value);
3254: if (URI != NULL) xmlFree(URI);
3255: if (literal != NULL) xmlFree(literal);
3256: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 3257: }
3258: }
3259:
1.50 daniel 3260: /**
1.59 daniel 3261: * xmlParseDefaultDecl:
3262: * @ctxt: an XML parser context
3263: * @value: Receive a possible fixed default value for the attribute
3264: *
3265: * Parse an attribute default declaration
3266: *
3267: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3268: *
1.99 daniel 3269: * [ VC: Required Attribute ]
1.117 daniel 3270: * if the default declaration is the keyword #REQUIRED, then the
3271: * attribute must be specified for all elements of the type in the
3272: * attribute-list declaration.
1.99 daniel 3273: *
3274: * [ VC: Attribute Default Legal ]
1.102 daniel 3275: * The declared default value must meet the lexical constraints of
3276: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3277: *
3278: * [ VC: Fixed Attribute Default ]
1.117 daniel 3279: * if an attribute has a default value declared with the #FIXED
3280: * keyword, instances of that attribute must match the default value.
1.99 daniel 3281: *
3282: * [ WFC: No < in Attribute Values ]
3283: * handled in xmlParseAttValue()
3284: *
1.59 daniel 3285: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3286: * or XML_ATTRIBUTE_FIXED.
3287: */
3288:
3289: int
1.123 ! daniel 3290: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 3291: int val;
1.123 ! daniel 3292: xmlChar *ret;
1.59 daniel 3293:
3294: *value = NULL;
3295: if ((CUR == '#') && (NXT(1) == 'R') &&
3296: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3297: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3298: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3299: (NXT(8) == 'D')) {
3300: SKIP(9);
3301: return(XML_ATTRIBUTE_REQUIRED);
3302: }
3303: if ((CUR == '#') && (NXT(1) == 'I') &&
3304: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3305: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3306: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3307: SKIP(8);
3308: return(XML_ATTRIBUTE_IMPLIED);
3309: }
3310: val = XML_ATTRIBUTE_NONE;
3311: if ((CUR == '#') && (NXT(1) == 'F') &&
3312: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3313: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3314: SKIP(6);
3315: val = XML_ATTRIBUTE_FIXED;
3316: if (!IS_BLANK(CUR)) {
3317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3318: ctxt->sax->error(ctxt->userData,
3319: "Space required after '#FIXED'\n");
1.123 ! daniel 3320: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3321: ctxt->wellFormed = 0;
3322: }
3323: SKIP_BLANKS;
3324: }
3325: ret = xmlParseAttValue(ctxt);
1.96 daniel 3326: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3327: if (ret == NULL) {
3328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3329: ctxt->sax->error(ctxt->userData,
1.59 daniel 3330: "Attribute default value declaration error\n");
3331: ctxt->wellFormed = 0;
3332: } else
3333: *value = ret;
3334: return(val);
3335: }
3336:
3337: /**
1.66 daniel 3338: * xmlParseNotationType:
3339: * @ctxt: an XML parser context
3340: *
3341: * parse an Notation attribute type.
3342: *
1.99 daniel 3343: * Note: the leading 'NOTATION' S part has already being parsed...
3344: *
1.66 daniel 3345: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3346: *
1.99 daniel 3347: * [ VC: Notation Attributes ]
1.117 daniel 3348: * Values of this type must match one of the notation names included
1.99 daniel 3349: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3350: *
3351: * Returns: the notation attribute tree built while parsing
3352: */
3353:
3354: xmlEnumerationPtr
3355: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 3356: xmlChar *name;
1.66 daniel 3357: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3358:
3359: if (CUR != '(') {
3360: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3361: ctxt->sax->error(ctxt->userData,
3362: "'(' required to start 'NOTATION'\n");
1.123 ! daniel 3363: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 3364: ctxt->wellFormed = 0;
3365: return(NULL);
3366: }
1.91 daniel 3367: SHRINK;
1.66 daniel 3368: do {
3369: NEXT;
3370: SKIP_BLANKS;
3371: name = xmlParseName(ctxt);
3372: if (name == NULL) {
3373: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3374: ctxt->sax->error(ctxt->userData,
1.66 daniel 3375: "Name expected in NOTATION declaration\n");
1.123 ! daniel 3376: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 3377: ctxt->wellFormed = 0;
3378: return(ret);
3379: }
3380: cur = xmlCreateEnumeration(name);
1.119 daniel 3381: xmlFree(name);
1.66 daniel 3382: if (cur == NULL) return(ret);
3383: if (last == NULL) ret = last = cur;
3384: else {
3385: last->next = cur;
3386: last = cur;
3387: }
3388: SKIP_BLANKS;
3389: } while (CUR == '|');
3390: if (CUR != ')') {
3391: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3392: ctxt->sax->error(ctxt->userData,
1.66 daniel 3393: "')' required to finish NOTATION declaration\n");
1.123 ! daniel 3394: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 3395: ctxt->wellFormed = 0;
3396: return(ret);
3397: }
3398: NEXT;
3399: return(ret);
3400: }
3401:
3402: /**
3403: * xmlParseEnumerationType:
3404: * @ctxt: an XML parser context
3405: *
3406: * parse an Enumeration attribute type.
3407: *
3408: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3409: *
1.99 daniel 3410: * [ VC: Enumeration ]
1.117 daniel 3411: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 3412: * the declaration
3413: *
1.66 daniel 3414: * Returns: the enumeration attribute tree built while parsing
3415: */
3416:
3417: xmlEnumerationPtr
3418: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 3419: xmlChar *name;
1.66 daniel 3420: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3421:
3422: if (CUR != '(') {
3423: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3424: ctxt->sax->error(ctxt->userData,
1.66 daniel 3425: "'(' required to start ATTLIST enumeration\n");
1.123 ! daniel 3426: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 3427: ctxt->wellFormed = 0;
3428: return(NULL);
3429: }
1.91 daniel 3430: SHRINK;
1.66 daniel 3431: do {
3432: NEXT;
3433: SKIP_BLANKS;
3434: name = xmlParseNmtoken(ctxt);
3435: if (name == NULL) {
3436: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3437: ctxt->sax->error(ctxt->userData,
1.66 daniel 3438: "NmToken expected in ATTLIST enumeration\n");
1.123 ! daniel 3439: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 3440: ctxt->wellFormed = 0;
3441: return(ret);
3442: }
3443: cur = xmlCreateEnumeration(name);
1.119 daniel 3444: xmlFree(name);
1.66 daniel 3445: if (cur == NULL) return(ret);
3446: if (last == NULL) ret = last = cur;
3447: else {
3448: last->next = cur;
3449: last = cur;
3450: }
3451: SKIP_BLANKS;
3452: } while (CUR == '|');
3453: if (CUR != ')') {
3454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3455: ctxt->sax->error(ctxt->userData,
1.66 daniel 3456: "')' required to finish ATTLIST enumeration\n");
1.123 ! daniel 3457: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 3458: ctxt->wellFormed = 0;
3459: return(ret);
3460: }
3461: NEXT;
3462: return(ret);
3463: }
3464:
3465: /**
1.50 daniel 3466: * xmlParseEnumeratedType:
3467: * @ctxt: an XML parser context
1.66 daniel 3468: * @tree: the enumeration tree built while parsing
1.50 daniel 3469: *
1.66 daniel 3470: * parse an Enumerated attribute type.
1.22 daniel 3471: *
3472: * [57] EnumeratedType ::= NotationType | Enumeration
3473: *
3474: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3475: *
1.50 daniel 3476: *
1.66 daniel 3477: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3478: */
3479:
1.66 daniel 3480: int
3481: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3482: if ((CUR == 'N') && (NXT(1) == 'O') &&
3483: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3484: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3485: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3486: SKIP(8);
3487: if (!IS_BLANK(CUR)) {
3488: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3489: ctxt->sax->error(ctxt->userData,
3490: "Space required after 'NOTATION'\n");
1.123 ! daniel 3491: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 3492: ctxt->wellFormed = 0;
3493: return(0);
3494: }
3495: SKIP_BLANKS;
3496: *tree = xmlParseNotationType(ctxt);
3497: if (*tree == NULL) return(0);
3498: return(XML_ATTRIBUTE_NOTATION);
3499: }
3500: *tree = xmlParseEnumerationType(ctxt);
3501: if (*tree == NULL) return(0);
3502: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3503: }
3504:
1.50 daniel 3505: /**
3506: * xmlParseAttributeType:
3507: * @ctxt: an XML parser context
1.66 daniel 3508: * @tree: the enumeration tree built while parsing
1.50 daniel 3509: *
1.59 daniel 3510: * parse the Attribute list def for an element
1.22 daniel 3511: *
3512: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3513: *
3514: * [55] StringType ::= 'CDATA'
3515: *
3516: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3517: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3518: *
1.102 daniel 3519: * Validity constraints for attribute values syntax are checked in
3520: * xmlValidateAttributeValue()
3521: *
1.99 daniel 3522: * [ VC: ID ]
1.117 daniel 3523: * Values of type ID must match the Name production. A name must not
1.99 daniel 3524: * appear more than once in an XML document as a value of this type;
3525: * i.e., ID values must uniquely identify the elements which bear them.
3526: *
3527: * [ VC: One ID per Element Type ]
1.117 daniel 3528: * No element type may have more than one ID attribute specified.
1.99 daniel 3529: *
3530: * [ VC: ID Attribute Default ]
1.117 daniel 3531: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 3532: *
3533: * [ VC: IDREF ]
1.102 daniel 3534: * Values of type IDREF must match the Name production, and values
1.117 daniel 3535: * of type IDREFS must match Names; TODO each IDREF Name must match the value
3536: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 3537: * values must match the value of some ID attribute.
3538: *
3539: * [ VC: Entity Name ]
1.102 daniel 3540: * Values of type ENTITY must match the Name production, values
1.117 daniel 3541: * of type ENTITIES must match Names; TODO each Entity Name must match the
3542: * name of an unparsed entity declared in the DTD.
1.99 daniel 3543: *
3544: * [ VC: Name Token ]
1.102 daniel 3545: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3546: * of type NMTOKENS must match Nmtokens.
3547: *
1.69 daniel 3548: * Returns the attribute type
1.22 daniel 3549: */
1.59 daniel 3550: int
1.66 daniel 3551: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3552: SHRINK;
1.40 daniel 3553: if ((CUR == 'C') && (NXT(1) == 'D') &&
3554: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3555: (NXT(4) == 'A')) {
3556: SKIP(5);
1.66 daniel 3557: return(XML_ATTRIBUTE_CDATA);
1.40 daniel 3558: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3559: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3560: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3561: SKIP(6);
3562: return(XML_ATTRIBUTE_IDREFS);
3563: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3564: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3565: (NXT(4) == 'F')) {
3566: SKIP(5);
1.59 daniel 3567: return(XML_ATTRIBUTE_IDREF);
1.66 daniel 3568: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
3569: SKIP(2);
3570: return(XML_ATTRIBUTE_ID);
1.40 daniel 3571: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3572: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3573: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3574: SKIP(6);
1.59 daniel 3575: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 3576: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3577: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3578: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3579: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3580: SKIP(8);
1.59 daniel 3581: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 3582: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3583: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3584: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3585: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3586: SKIP(8);
3587: return(XML_ATTRIBUTE_NMTOKENS);
3588: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3589: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3590: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3591: (NXT(6) == 'N')) {
3592: SKIP(7);
1.59 daniel 3593: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3594: }
1.66 daniel 3595: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3596: }
3597:
1.50 daniel 3598: /**
3599: * xmlParseAttributeListDecl:
3600: * @ctxt: an XML parser context
3601: *
3602: * : parse the Attribute list def for an element
1.22 daniel 3603: *
3604: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3605: *
3606: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3607: *
1.22 daniel 3608: */
1.55 daniel 3609: void
3610: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 3611: xmlChar *elemName;
! 3612: xmlChar *attrName;
1.103 daniel 3613: xmlEnumerationPtr tree;
1.22 daniel 3614:
1.40 daniel 3615: if ((CUR == '<') && (NXT(1) == '!') &&
3616: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3617: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3618: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3619: (NXT(8) == 'T')) {
1.40 daniel 3620: SKIP(9);
1.59 daniel 3621: if (!IS_BLANK(CUR)) {
3622: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3623: ctxt->sax->error(ctxt->userData,
3624: "Space required after '<!ATTLIST'\n");
1.123 ! daniel 3625: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3626: ctxt->wellFormed = 0;
3627: }
1.42 daniel 3628: SKIP_BLANKS;
1.59 daniel 3629: elemName = xmlParseName(ctxt);
3630: if (elemName == NULL) {
1.55 daniel 3631: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3632: ctxt->sax->error(ctxt->userData,
3633: "ATTLIST: no name for Element\n");
1.123 ! daniel 3634: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3635: ctxt->wellFormed = 0;
1.22 daniel 3636: return;
3637: }
1.42 daniel 3638: SKIP_BLANKS;
1.40 daniel 3639: while (CUR != '>') {
1.123 ! daniel 3640: const xmlChar *check = CUR_PTR;
1.59 daniel 3641: int type;
3642: int def;
1.123 ! daniel 3643: xmlChar *defaultValue = NULL;
1.59 daniel 3644:
1.103 daniel 3645: tree = NULL;
1.59 daniel 3646: attrName = xmlParseName(ctxt);
3647: if (attrName == NULL) {
3648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3649: ctxt->sax->error(ctxt->userData,
3650: "ATTLIST: no name for Attribute\n");
1.123 ! daniel 3651: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3652: ctxt->wellFormed = 0;
3653: break;
3654: }
1.97 daniel 3655: GROW;
1.59 daniel 3656: if (!IS_BLANK(CUR)) {
3657: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3658: ctxt->sax->error(ctxt->userData,
1.59 daniel 3659: "Space required after the attribute name\n");
1.123 ! daniel 3660: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3661: ctxt->wellFormed = 0;
3662: break;
3663: }
3664: SKIP_BLANKS;
3665:
1.66 daniel 3666: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 3667: if (type <= 0) break;
1.22 daniel 3668:
1.97 daniel 3669: GROW;
1.59 daniel 3670: if (!IS_BLANK(CUR)) {
3671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3672: ctxt->sax->error(ctxt->userData,
1.59 daniel 3673: "Space required after the attribute type\n");
1.123 ! daniel 3674: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3675: ctxt->wellFormed = 0;
3676: break;
3677: }
1.42 daniel 3678: SKIP_BLANKS;
1.59 daniel 3679:
3680: def = xmlParseDefaultDecl(ctxt, &defaultValue);
3681: if (def <= 0) break;
3682:
1.97 daniel 3683: GROW;
1.59 daniel 3684: if (CUR != '>') {
3685: if (!IS_BLANK(CUR)) {
3686: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3687: ctxt->sax->error(ctxt->userData,
1.59 daniel 3688: "Space required after the attribute default value\n");
1.123 ! daniel 3689: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3690: ctxt->wellFormed = 0;
3691: break;
3692: }
3693: SKIP_BLANKS;
3694: }
1.40 daniel 3695: if (check == CUR_PTR) {
1.55 daniel 3696: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3697: ctxt->sax->error(ctxt->userData,
1.59 daniel 3698: "xmlParseAttributeListDecl: detected internal error\n");
1.123 ! daniel 3699: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 3700: break;
3701: }
1.72 daniel 3702: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3703: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3704: type, def, defaultValue, tree);
1.59 daniel 3705: if (attrName != NULL)
1.119 daniel 3706: xmlFree(attrName);
1.59 daniel 3707: if (defaultValue != NULL)
1.119 daniel 3708: xmlFree(defaultValue);
1.97 daniel 3709: GROW;
1.22 daniel 3710: }
1.40 daniel 3711: if (CUR == '>')
3712: NEXT;
1.22 daniel 3713:
1.119 daniel 3714: xmlFree(elemName);
1.22 daniel 3715: }
3716: }
3717:
1.50 daniel 3718: /**
1.61 daniel 3719: * xmlParseElementMixedContentDecl:
3720: * @ctxt: an XML parser context
3721: *
3722: * parse the declaration for a Mixed Element content
3723: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3724: *
3725: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3726: * '(' S? '#PCDATA' S? ')'
3727: *
1.99 daniel 3728: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3729: *
3730: * [ VC: No Duplicate Types ]
1.117 daniel 3731: * The same name must not appear more than once in a single
3732: * mixed-content declaration.
1.99 daniel 3733: *
1.61 daniel 3734: * returns: the list of the xmlElementContentPtr describing the element choices
3735: */
3736: xmlElementContentPtr
1.62 daniel 3737: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3738: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 ! daniel 3739: xmlChar *elem = NULL;
1.61 daniel 3740:
1.97 daniel 3741: GROW;
1.61 daniel 3742: if ((CUR == '#') && (NXT(1) == 'P') &&
3743: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3744: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3745: (NXT(6) == 'A')) {
3746: SKIP(7);
3747: SKIP_BLANKS;
1.91 daniel 3748: SHRINK;
1.63 daniel 3749: if (CUR == ')') {
3750: NEXT;
3751: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3752: return(ret);
3753: }
1.61 daniel 3754: if ((CUR == '(') || (CUR == '|')) {
3755: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3756: if (ret == NULL) return(NULL);
1.99 daniel 3757: }
1.61 daniel 3758: while (CUR == '|') {
1.64 daniel 3759: NEXT;
1.61 daniel 3760: if (elem == NULL) {
3761: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3762: if (ret == NULL) return(NULL);
3763: ret->c1 = cur;
1.64 daniel 3764: cur = ret;
1.61 daniel 3765: } else {
1.64 daniel 3766: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3767: if (n == NULL) return(NULL);
3768: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3769: cur->c2 = n;
3770: cur = n;
1.119 daniel 3771: xmlFree(elem);
1.61 daniel 3772: }
3773: SKIP_BLANKS;
3774: elem = xmlParseName(ctxt);
3775: if (elem == NULL) {
3776: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3777: ctxt->sax->error(ctxt->userData,
1.61 daniel 3778: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 ! daniel 3779: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 3780: ctxt->wellFormed = 0;
3781: xmlFreeElementContent(cur);
3782: return(NULL);
3783: }
3784: SKIP_BLANKS;
1.97 daniel 3785: GROW;
1.61 daniel 3786: }
1.63 daniel 3787: if ((CUR == ')') && (NXT(1) == '*')) {
1.66 daniel 3788: if (elem != NULL) {
1.61 daniel 3789: cur->c2 = xmlNewElementContent(elem,
3790: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3791: xmlFree(elem);
1.66 daniel 3792: }
1.65 daniel 3793: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 3794: SKIP(2);
1.61 daniel 3795: } else {
1.119 daniel 3796: if (elem != NULL) xmlFree(elem);
1.61 daniel 3797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3798: ctxt->sax->error(ctxt->userData,
1.63 daniel 3799: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 ! daniel 3800: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 3801: ctxt->wellFormed = 0;
3802: xmlFreeElementContent(ret);
3803: return(NULL);
3804: }
3805:
3806: } else {
3807: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3808: ctxt->sax->error(ctxt->userData,
1.61 daniel 3809: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 ! daniel 3810: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 3811: ctxt->wellFormed = 0;
3812: }
3813: return(ret);
3814: }
3815:
3816: /**
3817: * xmlParseElementChildrenContentDecl:
1.50 daniel 3818: * @ctxt: an XML parser context
3819: *
1.61 daniel 3820: * parse the declaration for a Mixed Element content
3821: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3822: *
1.61 daniel 3823: *
1.22 daniel 3824: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3825: *
3826: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3827: *
3828: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3829: *
3830: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3831: *
1.99 daniel 3832: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3833: * TODO Parameter-entity replacement text must be properly nested
3834: * with parenthetized groups. That is to say, if either of the
3835: * opening or closing parentheses in a choice, seq, or Mixed
3836: * construct is contained in the replacement text for a parameter
3837: * entity, both must be contained in the same replacement text. For
3838: * interoperability, if a parameter-entity reference appears in a
3839: * choice, seq, or Mixed construct, its replacement text should not
3840: * be empty, and neither the first nor last non-blank character of
3841: * the replacement text should be a connector (| or ,).
3842: *
1.62 daniel 3843: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3844: * hierarchy.
3845: */
3846: xmlElementContentPtr
1.62 daniel 3847: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3848: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 ! daniel 3849: xmlChar *elem;
! 3850: xmlChar type = 0;
1.62 daniel 3851:
3852: SKIP_BLANKS;
1.94 daniel 3853: GROW;
1.62 daniel 3854: if (CUR == '(') {
1.63 daniel 3855: /* Recurse on first child */
1.62 daniel 3856: NEXT;
3857: SKIP_BLANKS;
3858: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3859: SKIP_BLANKS;
1.101 daniel 3860: GROW;
1.62 daniel 3861: } else {
3862: elem = xmlParseName(ctxt);
3863: if (elem == NULL) {
3864: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3865: ctxt->sax->error(ctxt->userData,
1.62 daniel 3866: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 ! daniel 3867: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 3868: ctxt->wellFormed = 0;
3869: return(NULL);
3870: }
3871: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3872: GROW;
1.62 daniel 3873: if (CUR == '?') {
1.104 daniel 3874: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3875: NEXT;
3876: } else if (CUR == '*') {
1.104 daniel 3877: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3878: NEXT;
3879: } else if (CUR == '+') {
1.104 daniel 3880: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3881: NEXT;
3882: } else {
1.104 daniel 3883: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3884: }
1.119 daniel 3885: xmlFree(elem);
1.101 daniel 3886: GROW;
1.62 daniel 3887: }
3888: SKIP_BLANKS;
1.91 daniel 3889: SHRINK;
1.62 daniel 3890: while (CUR != ')') {
1.63 daniel 3891: /*
3892: * Each loop we parse one separator and one element.
3893: */
1.62 daniel 3894: if (CUR == ',') {
3895: if (type == 0) type = CUR;
3896:
3897: /*
3898: * Detect "Name | Name , Name" error
3899: */
3900: else if (type != CUR) {
3901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3902: ctxt->sax->error(ctxt->userData,
1.62 daniel 3903: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3904: type);
1.123 ! daniel 3905: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3906: ctxt->wellFormed = 0;
3907: xmlFreeElementContent(ret);
3908: return(NULL);
3909: }
1.64 daniel 3910: NEXT;
1.62 daniel 3911:
1.63 daniel 3912: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3913: if (op == NULL) {
3914: xmlFreeElementContent(ret);
3915: return(NULL);
3916: }
3917: if (last == NULL) {
3918: op->c1 = ret;
1.65 daniel 3919: ret = cur = op;
1.63 daniel 3920: } else {
3921: cur->c2 = op;
3922: op->c1 = last;
3923: cur =op;
1.65 daniel 3924: last = NULL;
1.63 daniel 3925: }
1.62 daniel 3926: } else if (CUR == '|') {
3927: if (type == 0) type = CUR;
3928:
3929: /*
1.63 daniel 3930: * Detect "Name , Name | Name" error
1.62 daniel 3931: */
3932: else if (type != CUR) {
3933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3934: ctxt->sax->error(ctxt->userData,
1.62 daniel 3935: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3936: type);
1.123 ! daniel 3937: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3938: ctxt->wellFormed = 0;
3939: xmlFreeElementContent(ret);
3940: return(NULL);
3941: }
1.64 daniel 3942: NEXT;
1.62 daniel 3943:
1.63 daniel 3944: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3945: if (op == NULL) {
3946: xmlFreeElementContent(ret);
3947: return(NULL);
3948: }
3949: if (last == NULL) {
3950: op->c1 = ret;
1.65 daniel 3951: ret = cur = op;
1.63 daniel 3952: } else {
3953: cur->c2 = op;
3954: op->c1 = last;
3955: cur =op;
1.65 daniel 3956: last = NULL;
1.63 daniel 3957: }
1.62 daniel 3958: } else {
3959: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3960: ctxt->sax->error(ctxt->userData,
1.62 daniel 3961: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3962: ctxt->wellFormed = 0;
1.123 ! daniel 3963: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 3964: xmlFreeElementContent(ret);
3965: return(NULL);
3966: }
1.101 daniel 3967: GROW;
1.62 daniel 3968: SKIP_BLANKS;
1.101 daniel 3969: GROW;
1.62 daniel 3970: if (CUR == '(') {
1.63 daniel 3971: /* Recurse on second child */
1.62 daniel 3972: NEXT;
3973: SKIP_BLANKS;
1.65 daniel 3974: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 3975: SKIP_BLANKS;
3976: } else {
3977: elem = xmlParseName(ctxt);
3978: if (elem == NULL) {
3979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3980: ctxt->sax->error(ctxt->userData,
1.122 daniel 3981: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 ! daniel 3982: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 3983: ctxt->wellFormed = 0;
3984: return(NULL);
3985: }
1.65 daniel 3986: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3987: xmlFree(elem);
1.105 daniel 3988: if (CUR == '?') {
3989: last->ocur = XML_ELEMENT_CONTENT_OPT;
3990: NEXT;
3991: } else if (CUR == '*') {
3992: last->ocur = XML_ELEMENT_CONTENT_MULT;
3993: NEXT;
3994: } else if (CUR == '+') {
3995: last->ocur = XML_ELEMENT_CONTENT_PLUS;
3996: NEXT;
3997: } else {
3998: last->ocur = XML_ELEMENT_CONTENT_ONCE;
3999: }
1.63 daniel 4000: }
4001: SKIP_BLANKS;
1.97 daniel 4002: GROW;
1.64 daniel 4003: }
1.65 daniel 4004: if ((cur != NULL) && (last != NULL)) {
4005: cur->c2 = last;
1.62 daniel 4006: }
4007: NEXT;
4008: if (CUR == '?') {
4009: ret->ocur = XML_ELEMENT_CONTENT_OPT;
4010: NEXT;
4011: } else if (CUR == '*') {
4012: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4013: NEXT;
4014: } else if (CUR == '+') {
4015: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4016: NEXT;
4017: }
4018: return(ret);
1.61 daniel 4019: }
4020:
4021: /**
4022: * xmlParseElementContentDecl:
4023: * @ctxt: an XML parser context
4024: * @name: the name of the element being defined.
4025: * @result: the Element Content pointer will be stored here if any
1.22 daniel 4026: *
1.61 daniel 4027: * parse the declaration for an Element content either Mixed or Children,
4028: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4029: *
4030: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 4031: *
1.61 daniel 4032: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 4033: */
4034:
1.61 daniel 4035: int
1.123 ! daniel 4036: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 4037: xmlElementContentPtr *result) {
4038:
4039: xmlElementContentPtr tree = NULL;
4040: int res;
4041:
4042: *result = NULL;
4043:
4044: if (CUR != '(') {
4045: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4046: ctxt->sax->error(ctxt->userData,
1.61 daniel 4047: "xmlParseElementContentDecl : '(' expected\n");
1.123 ! daniel 4048: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 4049: ctxt->wellFormed = 0;
4050: return(-1);
4051: }
4052: NEXT;
1.97 daniel 4053: GROW;
1.61 daniel 4054: SKIP_BLANKS;
4055: if ((CUR == '#') && (NXT(1) == 'P') &&
4056: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4057: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4058: (NXT(6) == 'A')) {
1.62 daniel 4059: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 4060: res = XML_ELEMENT_TYPE_MIXED;
4061: } else {
1.62 daniel 4062: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 4063: res = XML_ELEMENT_TYPE_ELEMENT;
4064: }
4065: SKIP_BLANKS;
1.63 daniel 4066: /****************************
1.61 daniel 4067: if (CUR != ')') {
4068: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4069: ctxt->sax->error(ctxt->userData,
1.61 daniel 4070: "xmlParseElementContentDecl : ')' expected\n");
4071: ctxt->wellFormed = 0;
4072: return(-1);
4073: }
1.63 daniel 4074: ****************************/
4075: *result = tree;
1.61 daniel 4076: return(res);
1.22 daniel 4077: }
4078:
1.50 daniel 4079: /**
4080: * xmlParseElementDecl:
4081: * @ctxt: an XML parser context
4082: *
4083: * parse an Element declaration.
1.22 daniel 4084: *
4085: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4086: *
1.99 daniel 4087: * [ VC: Unique Element Type Declaration ]
1.117 daniel 4088: * No element type may be declared more than once
1.69 daniel 4089: *
4090: * Returns the type of the element, or -1 in case of error
1.22 daniel 4091: */
1.59 daniel 4092: int
1.55 daniel 4093: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 4094: xmlChar *name;
1.59 daniel 4095: int ret = -1;
1.61 daniel 4096: xmlElementContentPtr content = NULL;
1.22 daniel 4097:
1.97 daniel 4098: GROW;
1.40 daniel 4099: if ((CUR == '<') && (NXT(1) == '!') &&
4100: (NXT(2) == 'E') && (NXT(3) == 'L') &&
4101: (NXT(4) == 'E') && (NXT(5) == 'M') &&
4102: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 4103: (NXT(8) == 'T')) {
1.40 daniel 4104: SKIP(9);
1.59 daniel 4105: if (!IS_BLANK(CUR)) {
4106: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4107: ctxt->sax->error(ctxt->userData,
1.59 daniel 4108: "Space required after 'ELEMENT'\n");
1.123 ! daniel 4109: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4110: ctxt->wellFormed = 0;
4111: }
1.42 daniel 4112: SKIP_BLANKS;
1.22 daniel 4113: name = xmlParseName(ctxt);
4114: if (name == NULL) {
1.55 daniel 4115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4116: ctxt->sax->error(ctxt->userData,
1.59 daniel 4117: "xmlParseElementDecl: no name for Element\n");
1.123 ! daniel 4118: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4119: ctxt->wellFormed = 0;
4120: return(-1);
4121: }
4122: if (!IS_BLANK(CUR)) {
4123: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4124: ctxt->sax->error(ctxt->userData,
1.59 daniel 4125: "Space required after the element name\n");
1.123 ! daniel 4126: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4127: ctxt->wellFormed = 0;
1.22 daniel 4128: }
1.42 daniel 4129: SKIP_BLANKS;
1.40 daniel 4130: if ((CUR == 'E') && (NXT(1) == 'M') &&
4131: (NXT(2) == 'P') && (NXT(3) == 'T') &&
4132: (NXT(4) == 'Y')) {
4133: SKIP(5);
1.22 daniel 4134: /*
4135: * Element must always be empty.
4136: */
1.59 daniel 4137: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 4138: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
4139: (NXT(2) == 'Y')) {
4140: SKIP(3);
1.22 daniel 4141: /*
4142: * Element is a generic container.
4143: */
1.59 daniel 4144: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 4145: } else if (CUR == '(') {
4146: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 4147: } else {
1.98 daniel 4148: /*
4149: * [ WFC: PEs in Internal Subset ] error handling.
4150: */
4151: if ((CUR == '%') && (ctxt->external == 0) &&
4152: (ctxt->inputNr == 1)) {
4153: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4154: ctxt->sax->error(ctxt->userData,
4155: "PEReference: forbidden within markup decl in internal subset\n");
1.123 ! daniel 4156: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 4157: } else {
4158: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4159: ctxt->sax->error(ctxt->userData,
4160: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 ! daniel 4161: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 4162: }
1.61 daniel 4163: ctxt->wellFormed = 0;
1.119 daniel 4164: if (name != NULL) xmlFree(name);
1.61 daniel 4165: return(-1);
1.22 daniel 4166: }
1.42 daniel 4167: SKIP_BLANKS;
1.40 daniel 4168: if (CUR != '>') {
1.55 daniel 4169: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4170: ctxt->sax->error(ctxt->userData,
1.31 daniel 4171: "xmlParseElementDecl: expected '>' at the end\n");
1.123 ! daniel 4172: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 4173: ctxt->wellFormed = 0;
1.61 daniel 4174: } else {
1.40 daniel 4175: NEXT;
1.72 daniel 4176: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 4177: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4178: content);
1.61 daniel 4179: }
1.84 daniel 4180: if (content != NULL) {
4181: xmlFreeElementContent(content);
4182: }
1.61 daniel 4183: if (name != NULL) {
1.119 daniel 4184: xmlFree(name);
1.61 daniel 4185: }
1.22 daniel 4186: }
1.59 daniel 4187: return(ret);
1.22 daniel 4188: }
4189:
1.50 daniel 4190: /**
4191: * xmlParseMarkupDecl:
4192: * @ctxt: an XML parser context
4193: *
4194: * parse Markup declarations
1.22 daniel 4195: *
4196: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4197: * NotationDecl | PI | Comment
4198: *
1.98 daniel 4199: * [ VC: Proper Declaration/PE Nesting ]
4200: * TODO Parameter-entity replacement text must be properly nested with
4201: * markup declarations. That is to say, if either the first character
4202: * or the last character of a markup declaration (markupdecl above) is
4203: * contained in the replacement text for a parameter-entity reference,
4204: * both must be contained in the same replacement text.
4205: *
4206: * [ WFC: PEs in Internal Subset ]
4207: * In the internal DTD subset, parameter-entity references can occur
4208: * only where markup declarations can occur, not within markup declarations.
4209: * (This does not apply to references that occur in external parameter
4210: * entities or to the external subset.)
1.22 daniel 4211: */
1.55 daniel 4212: void
4213: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4214: GROW;
1.22 daniel 4215: xmlParseElementDecl(ctxt);
4216: xmlParseAttributeListDecl(ctxt);
4217: xmlParseEntityDecl(ctxt);
4218: xmlParseNotationDecl(ctxt);
4219: xmlParsePI(ctxt);
1.114 daniel 4220: xmlParseComment(ctxt);
1.98 daniel 4221: /*
4222: * This is only for internal subset. On external entities,
4223: * the replacement is done before parsing stage
4224: */
4225: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4226: xmlParsePEReference(ctxt);
1.97 daniel 4227: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4228: }
4229:
1.50 daniel 4230: /**
1.76 daniel 4231: * xmlParseTextDecl:
4232: * @ctxt: an XML parser context
4233: *
4234: * parse an XML declaration header for external entities
4235: *
4236: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4237: *
4238: * Returns the only valuable info for an external parsed entity, the encoding
4239: */
4240:
1.123 ! daniel 4241: xmlChar *
1.76 daniel 4242: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 4243: xmlChar *version;
! 4244: xmlChar *encoding = NULL;
1.76 daniel 4245:
4246: /*
4247: * We know that '<?xml' is here.
4248: */
4249: SKIP(5);
4250:
4251: if (!IS_BLANK(CUR)) {
4252: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4253: ctxt->sax->error(ctxt->userData,
4254: "Space needed after '<?xml'\n");
1.123 ! daniel 4255: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4256: ctxt->wellFormed = 0;
4257: }
4258: SKIP_BLANKS;
4259:
4260: /*
4261: * We may have the VersionInfo here.
4262: */
4263: version = xmlParseVersionInfo(ctxt);
4264: if (version == NULL)
4265: version = xmlCharStrdup(XML_DEFAULT_VERSION);
4266: ctxt->version = xmlStrdup(version);
1.119 daniel 4267: xmlFree(version);
1.76 daniel 4268:
4269: /*
4270: * We must have the encoding declaration
4271: */
4272: if (!IS_BLANK(CUR)) {
4273: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4274: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 ! daniel 4275: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4276: ctxt->wellFormed = 0;
4277: }
4278: encoding = xmlParseEncodingDecl(ctxt);
4279:
4280: SKIP_BLANKS;
4281: if ((CUR == '?') && (NXT(1) == '>')) {
4282: SKIP(2);
4283: } else if (CUR == '>') {
4284: /* Deprecated old WD ... */
4285: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4286: ctxt->sax->error(ctxt->userData,
4287: "XML declaration must end-up with '?>'\n");
1.123 ! daniel 4288: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4289: ctxt->wellFormed = 0;
4290: NEXT;
4291: } else {
4292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4293: ctxt->sax->error(ctxt->userData,
4294: "parsing XML declaration: '?>' expected\n");
1.123 ! daniel 4295: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4296: ctxt->wellFormed = 0;
4297: MOVETO_ENDTAG(CUR_PTR);
4298: NEXT;
4299: }
4300: return(encoding);
4301: }
4302:
4303: /*
4304: * xmlParseConditionalSections
4305: * @ctxt: an XML parser context
4306: *
4307: * TODO : Conditionnal section are not yet supported !
4308: *
4309: * [61] conditionalSect ::= includeSect | ignoreSect
4310: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4311: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4312: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4313: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4314: */
4315:
4316: void
4317: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4318: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4319: ctxt->sax->warning(ctxt->userData,
4320: "XML conditional section not supported\n");
4321: /*
4322: * Skip up to the end of the conditionnal section.
4323: */
4324: while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
4325: NEXT;
4326: if (CUR == 0) {
4327: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4328: ctxt->sax->error(ctxt->userData,
4329: "XML conditional section not closed\n");
1.123 ! daniel 4330: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 4331: ctxt->wellFormed = 0;
4332: }
4333: }
4334:
4335: /**
4336: * xmlParseExternalSubset
4337: * @ctxt: an XML parser context
4338: *
4339: * parse Markup declarations from an external subset
4340: *
4341: * [30] extSubset ::= textDecl? extSubsetDecl
4342: *
4343: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4344: */
4345: void
1.123 ! daniel 4346: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
! 4347: const xmlChar *SystemID) {
1.76 daniel 4348: if ((CUR == '<') && (NXT(1) == '?') &&
4349: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4350: (NXT(4) == 'l')) {
4351: xmlParseTextDecl(ctxt);
4352: }
1.79 daniel 4353: if (ctxt->myDoc == NULL) {
1.116 daniel 4354: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 4355: }
4356: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4357: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4358:
1.96 daniel 4359: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4360: ctxt->external = 1;
1.76 daniel 4361: while (((CUR == '<') && (NXT(1) == '?')) ||
4362: ((CUR == '<') && (NXT(1) == '!')) ||
4363: IS_BLANK(CUR)) {
1.123 ! daniel 4364: const xmlChar *check = CUR_PTR;
1.115 daniel 4365: int cons = ctxt->input->consumed;
4366:
1.76 daniel 4367: if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4368: xmlParseConditionalSections(ctxt);
4369: } else if (IS_BLANK(CUR)) {
4370: NEXT;
4371: } else if (CUR == '%') {
4372: xmlParsePEReference(ctxt);
4373: } else
4374: xmlParseMarkupDecl(ctxt);
1.77 daniel 4375:
4376: /*
4377: * Pop-up of finished entities.
4378: */
4379: while ((CUR == 0) && (ctxt->inputNr > 1))
4380: xmlPopInput(ctxt);
4381:
1.115 daniel 4382: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
4383: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4384: ctxt->sax->error(ctxt->userData,
4385: "Content error in the external subset\n");
4386: ctxt->wellFormed = 0;
1.123 ! daniel 4387: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 4388: break;
4389: }
1.76 daniel 4390: }
4391:
4392: if (CUR != 0) {
4393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4394: ctxt->sax->error(ctxt->userData,
4395: "Extra content at the end of the document\n");
1.123 ! daniel 4396: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 4397: ctxt->wellFormed = 0;
4398: }
4399:
4400: }
4401:
4402: /**
1.77 daniel 4403: * xmlParseReference:
4404: * @ctxt: an XML parser context
4405: *
4406: * parse and handle entity references in content, depending on the SAX
4407: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4408: * CharRef, a predefined entity, if there is no reference() callback.
4409: * or if the parser was asked to switch to that mode.
1.77 daniel 4410: *
4411: * [67] Reference ::= EntityRef | CharRef
4412: */
4413: void
4414: xmlParseReference(xmlParserCtxtPtr ctxt) {
4415: xmlEntityPtr ent;
1.123 ! daniel 4416: xmlChar *val;
1.77 daniel 4417: if (CUR != '&') return;
4418:
1.113 daniel 4419: if (ctxt->inputNr > 1) {
1.123 ! daniel 4420: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 4421:
4422: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4423: ctxt->sax->characters(ctxt->userData, cur, 1);
4424: if (ctxt->token == '&')
4425: ctxt->token = 0;
4426: else {
4427: SKIP(1);
4428: }
4429: return;
4430: }
1.77 daniel 4431: if (NXT(1) == '#') {
1.123 ! daniel 4432: xmlChar out[2];
1.77 daniel 4433: int val = xmlParseCharRef(ctxt);
1.117 daniel 4434: /* invalid for UTF-8 variable encoding !!!!! */
1.77 daniel 4435: out[0] = val;
4436: out[1] = 0;
4437: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4438: ctxt->sax->characters(ctxt->userData, out, 1);
4439: } else {
4440: ent = xmlParseEntityRef(ctxt);
4441: if (ent == NULL) return;
4442: if ((ent->name != NULL) &&
1.113 daniel 4443: (ent->type != XML_INTERNAL_PREDEFINED_ENTITY)) {
4444: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4445: (ctxt->replaceEntities == 0)) {
4446: /*
4447: * Create a node.
4448: */
4449: ctxt->sax->reference(ctxt->userData, ent->name);
4450: return;
4451: } else if (ctxt->replaceEntities) {
4452: xmlParserInputPtr input;
1.79 daniel 4453:
1.113 daniel 4454: input = xmlNewEntityInputStream(ctxt, ent);
4455: xmlPushInput(ctxt, input);
4456: return;
4457: }
1.77 daniel 4458: }
4459: val = ent->content;
4460: if (val == NULL) return;
4461: /*
4462: * inline the entity.
4463: */
4464: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4465: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
4466: }
1.24 daniel 4467: }
4468:
1.50 daniel 4469: /**
4470: * xmlParseEntityRef:
4471: * @ctxt: an XML parser context
4472: *
4473: * parse ENTITY references declarations
1.24 daniel 4474: *
4475: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4476: *
1.98 daniel 4477: * [ WFC: Entity Declared ]
4478: * In a document without any DTD, a document with only an internal DTD
4479: * subset which contains no parameter entity references, or a document
4480: * with "standalone='yes'", the Name given in the entity reference
4481: * must match that in an entity declaration, except that well-formed
4482: * documents need not declare any of the following entities: amp, lt,
4483: * gt, apos, quot. The declaration of a parameter entity must precede
4484: * any reference to it. Similarly, the declaration of a general entity
4485: * must precede any reference to it which appears in a default value in an
4486: * attribute-list declaration. Note that if entities are declared in the
4487: * external subset or in external parameter entities, a non-validating
4488: * processor is not obligated to read and process their declarations;
4489: * for such documents, the rule that an entity must be declared is a
4490: * well-formedness constraint only if standalone='yes'.
4491: *
4492: * [ WFC: Parsed Entity ]
4493: * An entity reference must not contain the name of an unparsed entity
4494: *
1.77 daniel 4495: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4496: */
1.77 daniel 4497: xmlEntityPtr
1.55 daniel 4498: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 4499: xmlChar *name;
1.72 daniel 4500: xmlEntityPtr ent = NULL;
1.24 daniel 4501:
1.91 daniel 4502: GROW;
1.111 daniel 4503:
1.40 daniel 4504: if (CUR == '&') {
4505: NEXT;
1.24 daniel 4506: name = xmlParseName(ctxt);
4507: if (name == NULL) {
1.55 daniel 4508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4509: ctxt->sax->error(ctxt->userData,
4510: "xmlParseEntityRef: no name\n");
1.123 ! daniel 4511: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4512: ctxt->wellFormed = 0;
1.24 daniel 4513: } else {
1.40 daniel 4514: if (CUR == ';') {
4515: NEXT;
1.24 daniel 4516: /*
1.77 daniel 4517: * Ask first SAX for entity resolution, otherwise try the
4518: * predefined set.
4519: */
4520: if (ctxt->sax != NULL) {
4521: if (ctxt->sax->getEntity != NULL)
4522: ent = ctxt->sax->getEntity(ctxt->userData, name);
4523: if (ent == NULL)
4524: ent = xmlGetPredefinedEntity(name);
4525: }
4526: /*
1.98 daniel 4527: * [ WFC: Entity Declared ]
4528: * In a document without any DTD, a document with only an
4529: * internal DTD subset which contains no parameter entity
4530: * references, or a document with "standalone='yes'", the
4531: * Name given in the entity reference must match that in an
4532: * entity declaration, except that well-formed documents
4533: * need not declare any of the following entities: amp, lt,
4534: * gt, apos, quot.
4535: * The declaration of a parameter entity must precede any
4536: * reference to it.
4537: * Similarly, the declaration of a general entity must
4538: * precede any reference to it which appears in a default
4539: * value in an attribute-list declaration. Note that if
4540: * entities are declared in the external subset or in
4541: * external parameter entities, a non-validating processor
4542: * is not obligated to read and process their declarations;
4543: * for such documents, the rule that an entity must be
4544: * declared is a well-formedness constraint only if
4545: * standalone='yes'.
1.59 daniel 4546: */
1.77 daniel 4547: if (ent == NULL) {
1.98 daniel 4548: if ((ctxt->standalone == 1) ||
4549: ((ctxt->hasExternalSubset == 0) &&
4550: (ctxt->hasPErefs == 0))) {
4551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 4552: ctxt->sax->error(ctxt->userData,
4553: "Entity '%s' not defined\n", name);
1.123 ! daniel 4554: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 4555: ctxt->wellFormed = 0;
4556: } else {
1.98 daniel 4557: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4558: ctxt->sax->warning(ctxt->userData,
4559: "Entity '%s' not defined\n", name);
1.123 ! daniel 4560: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 4561: }
1.77 daniel 4562: }
1.59 daniel 4563:
4564: /*
1.98 daniel 4565: * [ WFC: Parsed Entity ]
4566: * An entity reference must not contain the name of an
4567: * unparsed entity
4568: */
4569: else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
4570: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4571: ctxt->sax->error(ctxt->userData,
4572: "Entity reference to unparsed entity %s\n", name);
1.123 ! daniel 4573: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 4574: ctxt->wellFormed = 0;
4575: }
4576:
4577: /*
4578: * [ WFC: No External Entity References ]
4579: * Attribute values cannot contain direct or indirect
4580: * entity references to external entities.
4581: */
4582: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4583: (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
4584: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4585: ctxt->sax->error(ctxt->userData,
4586: "Attribute references external entity '%s'\n", name);
1.123 ! daniel 4587: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 4588: ctxt->wellFormed = 0;
4589: }
4590: /*
4591: * [ WFC: No < in Attribute Values ]
4592: * The replacement text of any entity referred to directly or
4593: * indirectly in an attribute value (other than "<") must
4594: * not contain a <.
1.59 daniel 4595: */
1.98 daniel 4596: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 4597: (ent != NULL) &&
4598: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 4599: (ent->content != NULL) &&
4600: (xmlStrchr(ent->content, '<'))) {
4601: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4602: ctxt->sax->error(ctxt->userData,
4603: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 ! daniel 4604: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 4605: ctxt->wellFormed = 0;
4606: }
4607:
4608: /*
4609: * Internal check, no parameter entities here ...
4610: */
4611: else {
1.59 daniel 4612: switch (ent->type) {
4613: case XML_INTERNAL_PARAMETER_ENTITY:
4614: case XML_EXTERNAL_PARAMETER_ENTITY:
4615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4616: ctxt->sax->error(ctxt->userData,
1.59 daniel 4617: "Attempt to reference the parameter entity '%s'\n", name);
1.123 ! daniel 4618: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 4619: ctxt->wellFormed = 0;
4620: break;
4621: }
4622: }
4623:
4624: /*
1.98 daniel 4625: * [ WFC: No Recursion ]
1.117 daniel 4626: * TODO A parsed entity must not contain a recursive reference
4627: * to itself, either directly or indirectly.
1.59 daniel 4628: */
1.77 daniel 4629:
1.24 daniel 4630: } else {
1.55 daniel 4631: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4632: ctxt->sax->error(ctxt->userData,
1.59 daniel 4633: "xmlParseEntityRef: expecting ';'\n");
1.123 ! daniel 4634: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 4635: ctxt->wellFormed = 0;
1.24 daniel 4636: }
1.119 daniel 4637: xmlFree(name);
1.24 daniel 4638: }
4639: }
1.77 daniel 4640: return(ent);
1.24 daniel 4641: }
4642:
1.50 daniel 4643: /**
4644: * xmlParsePEReference:
4645: * @ctxt: an XML parser context
4646: *
4647: * parse PEReference declarations
1.77 daniel 4648: * The entity content is handled directly by pushing it's content as
4649: * a new input stream.
1.22 daniel 4650: *
4651: * [69] PEReference ::= '%' Name ';'
1.68 daniel 4652: *
1.98 daniel 4653: * [ WFC: No Recursion ]
4654: * TODO A parsed entity must not contain a recursive
4655: * reference to itself, either directly or indirectly.
4656: *
4657: * [ WFC: Entity Declared ]
4658: * In a document without any DTD, a document with only an internal DTD
4659: * subset which contains no parameter entity references, or a document
4660: * with "standalone='yes'", ... ... The declaration of a parameter
4661: * entity must precede any reference to it...
4662: *
4663: * [ VC: Entity Declared ]
4664: * In a document with an external subset or external parameter entities
4665: * with "standalone='no'", ... ... The declaration of a parameter entity
4666: * must precede any reference to it...
4667: *
4668: * [ WFC: In DTD ]
4669: * Parameter-entity references may only appear in the DTD.
4670: * NOTE: misleading but this is handled.
1.22 daniel 4671: */
1.77 daniel 4672: void
1.55 daniel 4673: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 4674: xmlChar *name;
1.72 daniel 4675: xmlEntityPtr entity = NULL;
1.50 daniel 4676: xmlParserInputPtr input;
1.22 daniel 4677:
1.40 daniel 4678: if (CUR == '%') {
4679: NEXT;
1.22 daniel 4680: name = xmlParseName(ctxt);
4681: if (name == NULL) {
1.55 daniel 4682: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4683: ctxt->sax->error(ctxt->userData,
4684: "xmlParsePEReference: no name\n");
1.123 ! daniel 4685: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4686: ctxt->wellFormed = 0;
1.22 daniel 4687: } else {
1.40 daniel 4688: if (CUR == ';') {
4689: NEXT;
1.98 daniel 4690: if ((ctxt->sax != NULL) &&
4691: (ctxt->sax->getParameterEntity != NULL))
4692: entity = ctxt->sax->getParameterEntity(ctxt->userData,
4693: name);
1.45 daniel 4694: if (entity == NULL) {
1.98 daniel 4695: /*
4696: * [ WFC: Entity Declared ]
4697: * In a document without any DTD, a document with only an
4698: * internal DTD subset which contains no parameter entity
4699: * references, or a document with "standalone='yes'", ...
4700: * ... The declaration of a parameter entity must precede
4701: * any reference to it...
4702: */
4703: if ((ctxt->standalone == 1) ||
4704: ((ctxt->hasExternalSubset == 0) &&
4705: (ctxt->hasPErefs == 0))) {
4706: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4707: ctxt->sax->error(ctxt->userData,
4708: "PEReference: %%%s; not found\n", name);
1.123 ! daniel 4709: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 4710: ctxt->wellFormed = 0;
4711: } else {
4712: /*
4713: * [ VC: Entity Declared ]
4714: * In a document with an external subset or external
4715: * parameter entities with "standalone='no'", ...
4716: * ... The declaration of a parameter entity must precede
4717: * any reference to it...
4718: */
4719: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4720: ctxt->sax->warning(ctxt->userData,
4721: "PEReference: %%%s; not found\n", name);
4722: ctxt->valid = 0;
4723: }
1.50 daniel 4724: } else {
1.98 daniel 4725: /*
4726: * Internal checking in case the entity quest barfed
4727: */
4728: if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
4729: (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
4730: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4731: ctxt->sax->warning(ctxt->userData,
4732: "Internal: %%%s; is not a parameter entity\n", name);
4733: } else {
4734: input = xmlNewEntityInputStream(ctxt, entity);
4735: xmlPushInput(ctxt, input);
4736: }
1.45 daniel 4737: }
1.98 daniel 4738: ctxt->hasPErefs = 1;
1.22 daniel 4739: } else {
1.55 daniel 4740: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4741: ctxt->sax->error(ctxt->userData,
1.59 daniel 4742: "xmlParsePEReference: expecting ';'\n");
1.123 ! daniel 4743: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 4744: ctxt->wellFormed = 0;
1.22 daniel 4745: }
1.119 daniel 4746: xmlFree(name);
1.3 veillard 4747: }
4748: }
4749: }
4750:
1.50 daniel 4751: /**
4752: * xmlParseDocTypeDecl :
4753: * @ctxt: an XML parser context
4754: *
4755: * parse a DOCTYPE declaration
1.21 daniel 4756: *
1.22 daniel 4757: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
4758: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 4759: *
4760: * [ VC: Root Element Type ]
1.99 daniel 4761: * The Name in the document type declaration must match the element
1.98 daniel 4762: * type of the root element.
1.21 daniel 4763: */
4764:
1.55 daniel 4765: void
4766: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 4767: xmlChar *name;
! 4768: xmlChar *ExternalID = NULL;
! 4769: xmlChar *URI = NULL;
1.21 daniel 4770:
4771: /*
4772: * We know that '<!DOCTYPE' has been detected.
4773: */
1.40 daniel 4774: SKIP(9);
1.21 daniel 4775:
1.42 daniel 4776: SKIP_BLANKS;
1.21 daniel 4777:
4778: /*
4779: * Parse the DOCTYPE name.
4780: */
4781: name = xmlParseName(ctxt);
4782: if (name == NULL) {
1.55 daniel 4783: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4784: ctxt->sax->error(ctxt->userData,
4785: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 4786: ctxt->wellFormed = 0;
1.123 ! daniel 4787: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 4788: }
4789:
1.42 daniel 4790: SKIP_BLANKS;
1.21 daniel 4791:
4792: /*
1.22 daniel 4793: * Check for SystemID and ExternalID
4794: */
1.67 daniel 4795: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 4796:
4797: if ((URI != NULL) || (ExternalID != NULL)) {
4798: ctxt->hasExternalSubset = 1;
4799: }
4800:
1.42 daniel 4801: SKIP_BLANKS;
1.36 daniel 4802:
1.76 daniel 4803: /*
4804: * NOTE: the SAX callback may try to fetch the external subset
4805: * entity and fill it up !
4806: */
1.72 daniel 4807: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 4808: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 4809:
4810: /*
4811: * Is there any DTD definition ?
4812: */
1.40 daniel 4813: if (CUR == '[') {
1.96 daniel 4814: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 4815: NEXT;
1.22 daniel 4816: /*
4817: * Parse the succession of Markup declarations and
4818: * PEReferences.
4819: * Subsequence (markupdecl | PEReference | S)*
4820: */
1.40 daniel 4821: while (CUR != ']') {
1.123 ! daniel 4822: const xmlChar *check = CUR_PTR;
1.115 daniel 4823: int cons = ctxt->input->consumed;
1.22 daniel 4824:
1.42 daniel 4825: SKIP_BLANKS;
1.22 daniel 4826: xmlParseMarkupDecl(ctxt);
1.50 daniel 4827: xmlParsePEReference(ctxt);
1.22 daniel 4828:
1.115 daniel 4829: /*
4830: * Pop-up of finished entities.
4831: */
4832: while ((CUR == 0) && (ctxt->inputNr > 1))
4833: xmlPopInput(ctxt);
4834:
1.118 daniel 4835: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 4836: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4837: ctxt->sax->error(ctxt->userData,
1.31 daniel 4838: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 4839: ctxt->wellFormed = 0;
1.123 ! daniel 4840: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 4841: break;
4842: }
4843: }
1.40 daniel 4844: if (CUR == ']') NEXT;
1.22 daniel 4845: }
4846:
4847: /*
4848: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 4849: */
1.40 daniel 4850: if (CUR != '>') {
1.55 daniel 4851: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4852: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 4853: ctxt->wellFormed = 0;
1.123 ! daniel 4854: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 4855: }
1.40 daniel 4856: NEXT;
1.22 daniel 4857:
4858: /*
1.99 daniel 4859: * Cleanup
1.22 daniel 4860: */
1.119 daniel 4861: if (URI != NULL) xmlFree(URI);
4862: if (ExternalID != NULL) xmlFree(ExternalID);
4863: if (name != NULL) xmlFree(name);
1.21 daniel 4864: }
4865:
1.50 daniel 4866: /**
4867: * xmlParseAttribute:
4868: * @ctxt: an XML parser context
1.123 ! daniel 4869: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 4870: *
4871: * parse an attribute
1.3 veillard 4872: *
1.22 daniel 4873: * [41] Attribute ::= Name Eq AttValue
4874: *
1.98 daniel 4875: * [ WFC: No External Entity References ]
4876: * Attribute values cannot contain direct or indirect entity references
4877: * to external entities.
4878: *
4879: * [ WFC: No < in Attribute Values ]
4880: * The replacement text of any entity referred to directly or indirectly in
4881: * an attribute value (other than "<") must not contain a <.
4882: *
4883: * [ VC: Attribute Value Type ]
1.117 daniel 4884: * The attribute must have been declared; the value must be of the type
1.99 daniel 4885: * declared for it.
1.98 daniel 4886: *
1.22 daniel 4887: * [25] Eq ::= S? '=' S?
4888: *
1.29 daniel 4889: * With namespace:
4890: *
4891: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 4892: *
4893: * Also the case QName == xmlns:??? is handled independently as a namespace
4894: * definition.
1.69 daniel 4895: *
1.72 daniel 4896: * Returns the attribute name, and the value in *value.
1.3 veillard 4897: */
4898:
1.123 ! daniel 4899: xmlChar *
! 4900: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
! 4901: xmlChar *name, *val;
1.3 veillard 4902:
1.72 daniel 4903: *value = NULL;
4904: name = xmlParseName(ctxt);
1.22 daniel 4905: if (name == NULL) {
1.55 daniel 4906: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4907: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 4908: ctxt->wellFormed = 0;
1.123 ! daniel 4909: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 4910: return(NULL);
1.3 veillard 4911: }
4912:
4913: /*
1.29 daniel 4914: * read the value
1.3 veillard 4915: */
1.42 daniel 4916: SKIP_BLANKS;
1.40 daniel 4917: if (CUR == '=') {
4918: NEXT;
1.42 daniel 4919: SKIP_BLANKS;
1.72 daniel 4920: val = xmlParseAttValue(ctxt);
1.96 daniel 4921: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 4922: } else {
1.55 daniel 4923: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4924: ctxt->sax->error(ctxt->userData,
1.59 daniel 4925: "Specification mandate value for attribute %s\n", name);
1.123 ! daniel 4926: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 4927: ctxt->wellFormed = 0;
1.52 daniel 4928: return(NULL);
1.43 daniel 4929: }
4930:
1.72 daniel 4931: *value = val;
4932: return(name);
1.3 veillard 4933: }
4934:
1.50 daniel 4935: /**
4936: * xmlParseStartTag:
4937: * @ctxt: an XML parser context
4938: *
4939: * parse a start of tag either for rule element or
4940: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 4941: *
4942: * [40] STag ::= '<' Name (S Attribute)* S? '>'
4943: *
1.98 daniel 4944: * [ WFC: Unique Att Spec ]
4945: * No attribute name may appear more than once in the same start-tag or
4946: * empty-element tag.
4947: *
1.29 daniel 4948: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
4949: *
1.98 daniel 4950: * [ WFC: Unique Att Spec ]
4951: * No attribute name may appear more than once in the same start-tag or
4952: * empty-element tag.
4953: *
1.29 daniel 4954: * With namespace:
4955: *
4956: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
4957: *
4958: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 4959: *
4960: * Returns the element name parsed
1.2 veillard 4961: */
4962:
1.123 ! daniel 4963: xmlChar *
1.69 daniel 4964: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 4965: xmlChar *name;
! 4966: xmlChar *attname;
! 4967: xmlChar *attvalue;
! 4968: const xmlChar **atts = NULL;
1.72 daniel 4969: int nbatts = 0;
4970: int maxatts = 0;
4971: int i;
1.2 veillard 4972:
1.83 daniel 4973: if (CUR != '<') return(NULL);
1.40 daniel 4974: NEXT;
1.3 veillard 4975:
1.72 daniel 4976: name = xmlParseName(ctxt);
1.59 daniel 4977: if (name == NULL) {
4978: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4979: ctxt->sax->error(ctxt->userData,
1.59 daniel 4980: "xmlParseStartTag: invalid element name\n");
1.123 ! daniel 4981: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4982: ctxt->wellFormed = 0;
1.83 daniel 4983: return(NULL);
1.50 daniel 4984: }
4985:
4986: /*
1.3 veillard 4987: * Now parse the attributes, it ends up with the ending
4988: *
4989: * (S Attribute)* S?
4990: */
1.42 daniel 4991: SKIP_BLANKS;
1.91 daniel 4992: GROW;
1.40 daniel 4993: while ((IS_CHAR(CUR)) &&
4994: (CUR != '>') &&
4995: ((CUR != '/') || (NXT(1) != '>'))) {
1.123 ! daniel 4996: const xmlChar *q = CUR_PTR;
1.91 daniel 4997: int cons = ctxt->input->consumed;
1.29 daniel 4998:
1.72 daniel 4999: attname = xmlParseAttribute(ctxt, &attvalue);
5000: if ((attname != NULL) && (attvalue != NULL)) {
5001: /*
1.98 daniel 5002: * [ WFC: Unique Att Spec ]
5003: * No attribute name may appear more than once in the same
5004: * start-tag or empty-element tag.
1.72 daniel 5005: */
5006: for (i = 0; i < nbatts;i += 2) {
5007: if (!xmlStrcmp(atts[i], attname)) {
5008: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5009: ctxt->sax->error(ctxt->userData,
5010: "Attribute %s redefined\n",
5011: attname);
1.72 daniel 5012: ctxt->wellFormed = 0;
1.123 ! daniel 5013: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 5014: xmlFree(attname);
5015: xmlFree(attvalue);
1.98 daniel 5016: goto failed;
1.72 daniel 5017: }
5018: }
5019:
5020: /*
5021: * Add the pair to atts
5022: */
5023: if (atts == NULL) {
5024: maxatts = 10;
1.123 ! daniel 5025: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 5026: if (atts == NULL) {
1.86 daniel 5027: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 ! daniel 5028: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5029: return(NULL);
1.72 daniel 5030: }
5031: } else if (nbatts + 2 < maxatts) {
5032: maxatts *= 2;
1.123 ! daniel 5033: atts = (const xmlChar **) xmlRealloc(atts,
! 5034: maxatts * sizeof(xmlChar *));
1.72 daniel 5035: if (atts == NULL) {
1.86 daniel 5036: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 ! daniel 5037: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5038: return(NULL);
1.72 daniel 5039: }
5040: }
5041: atts[nbatts++] = attname;
5042: atts[nbatts++] = attvalue;
5043: atts[nbatts] = NULL;
5044: atts[nbatts + 1] = NULL;
5045: }
5046:
1.116 daniel 5047: failed:
1.42 daniel 5048: SKIP_BLANKS;
1.91 daniel 5049: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 5050: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5051: ctxt->sax->error(ctxt->userData,
1.31 daniel 5052: "xmlParseStartTag: problem parsing attributes\n");
1.123 ! daniel 5053: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 5054: ctxt->wellFormed = 0;
1.29 daniel 5055: break;
1.3 veillard 5056: }
1.91 daniel 5057: GROW;
1.3 veillard 5058: }
5059:
1.43 daniel 5060: /*
1.72 daniel 5061: * SAX: Start of Element !
1.43 daniel 5062: */
1.72 daniel 5063: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 5064: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 5065:
1.72 daniel 5066: if (atts != NULL) {
1.123 ! daniel 5067: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 5068: xmlFree(atts);
1.72 daniel 5069: }
1.83 daniel 5070: return(name);
1.3 veillard 5071: }
5072:
1.50 daniel 5073: /**
5074: * xmlParseEndTag:
5075: * @ctxt: an XML parser context
1.83 daniel 5076: * @tagname: the tag name as parsed in the opening tag.
1.50 daniel 5077: *
5078: * parse an end of tag
1.27 daniel 5079: *
5080: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 5081: *
5082: * With namespace
5083: *
1.72 daniel 5084: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 5085: */
5086:
1.55 daniel 5087: void
1.123 ! daniel 5088: xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlChar *tagname) {
! 5089: xmlChar *name;
1.7 veillard 5090:
1.91 daniel 5091: GROW;
1.40 daniel 5092: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 5093: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5094: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 5095: ctxt->wellFormed = 0;
1.123 ! daniel 5096: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 5097: return;
5098: }
1.40 daniel 5099: SKIP(2);
1.7 veillard 5100:
1.72 daniel 5101: name = xmlParseName(ctxt);
1.7 veillard 5102:
5103: /*
5104: * We should definitely be at the ending "S? '>'" part
5105: */
1.91 daniel 5106: GROW;
1.42 daniel 5107: SKIP_BLANKS;
1.40 daniel 5108: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 5109: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5110: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 ! daniel 5111: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5112: ctxt->wellFormed = 0;
1.7 veillard 5113: } else
1.40 daniel 5114: NEXT;
1.7 veillard 5115:
1.72 daniel 5116: /*
1.98 daniel 5117: * [ WFC: Element Type Match ]
5118: * The Name in an element's end-tag must match the element type in the
5119: * start-tag.
5120: *
1.83 daniel 5121: */
5122: if (xmlStrcmp(name, tagname)) {
5123: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5124: ctxt->sax->error(ctxt->userData,
5125: "Opening and ending tag mismatch: %s and %s\n", tagname, name);
1.122 daniel 5126:
1.123 ! daniel 5127: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 5128: ctxt->wellFormed = 0;
5129: }
5130:
5131: /*
1.72 daniel 5132: * SAX: End of Tag
5133: */
5134: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 5135: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 5136:
5137: if (name != NULL)
1.119 daniel 5138: xmlFree(name);
1.72 daniel 5139:
1.7 veillard 5140: return;
5141: }
5142:
1.50 daniel 5143: /**
5144: * xmlParseCDSect:
5145: * @ctxt: an XML parser context
5146: *
5147: * Parse escaped pure raw content.
1.29 daniel 5148: *
5149: * [18] CDSect ::= CDStart CData CDEnd
5150: *
5151: * [19] CDStart ::= '<![CDATA['
5152: *
5153: * [20] Data ::= (Char* - (Char* ']]>' Char*))
5154: *
5155: * [21] CDEnd ::= ']]>'
1.3 veillard 5156: */
1.55 daniel 5157: void
5158: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 5159: const xmlChar *base;
! 5160: xmlChar r, s;
! 5161: xmlChar cur;
1.3 veillard 5162:
1.106 daniel 5163: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 5164: (NXT(2) == '[') && (NXT(3) == 'C') &&
5165: (NXT(4) == 'D') && (NXT(5) == 'A') &&
5166: (NXT(6) == 'T') && (NXT(7) == 'A') &&
5167: (NXT(8) == '[')) {
5168: SKIP(9);
1.29 daniel 5169: } else
1.45 daniel 5170: return;
1.109 daniel 5171:
5172: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.40 daniel 5173: base = CUR_PTR;
5174: if (!IS_CHAR(CUR)) {
1.55 daniel 5175: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5176: ctxt->sax->error(ctxt->userData,
5177: "CData section not finished\n%.50s\n", base);
1.59 daniel 5178: ctxt->wellFormed = 0;
1.123 ! daniel 5179: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 5180: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 5181: return;
1.3 veillard 5182: }
1.110 daniel 5183: r = CUR;
1.91 daniel 5184: NEXT;
1.40 daniel 5185: if (!IS_CHAR(CUR)) {
1.55 daniel 5186: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5187: ctxt->sax->error(ctxt->userData,
5188: "CData section not finished\n%.50s\n", base);
1.123 ! daniel 5189: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 5190: ctxt->wellFormed = 0;
1.109 daniel 5191: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 5192: return;
1.3 veillard 5193: }
1.110 daniel 5194: s = CUR;
1.91 daniel 5195: NEXT;
1.108 veillard 5196: cur = CUR;
5197: while (IS_CHAR(cur) &&
1.110 daniel 5198: ((r != ']') || (s != ']') || (cur != '>'))) {
5199: r = s;
5200: s = cur;
5201: NEXT;
1.108 veillard 5202: cur = CUR;
1.3 veillard 5203: }
1.109 daniel 5204: ctxt->instate = XML_PARSER_CONTENT;
1.40 daniel 5205: if (!IS_CHAR(CUR)) {
1.55 daniel 5206: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5207: ctxt->sax->error(ctxt->userData,
5208: "CData section not finished\n%.50s\n", base);
1.123 ! daniel 5209: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 5210: ctxt->wellFormed = 0;
1.45 daniel 5211: return;
1.3 veillard 5212: }
1.107 daniel 5213: NEXT;
1.16 daniel 5214:
1.45 daniel 5215: /*
5216: * Ok the segment [base CUR_PTR] is to be consumed as chars.
5217: */
5218: if (ctxt->sax != NULL) {
1.107 daniel 5219: if (ctxt->sax->cdataBlock != NULL)
1.110 daniel 5220: ctxt->sax->cdataBlock(ctxt->userData, base, (CUR_PTR - base) - 3);
1.45 daniel 5221: }
1.2 veillard 5222: }
5223:
1.50 daniel 5224: /**
5225: * xmlParseContent:
5226: * @ctxt: an XML parser context
5227: *
5228: * Parse a content:
1.2 veillard 5229: *
1.27 daniel 5230: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 5231: */
5232:
1.55 daniel 5233: void
5234: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 5235: GROW;
1.40 daniel 5236: while ((CUR != '<') || (NXT(1) != '/')) {
1.123 ! daniel 5237: const xmlChar *test = CUR_PTR;
1.91 daniel 5238: int cons = ctxt->input->consumed;
1.123 ! daniel 5239: xmlChar tok = ctxt->token;
1.27 daniel 5240:
5241: /*
5242: * First case : a Processing Instruction.
5243: */
1.40 daniel 5244: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 5245: xmlParsePI(ctxt);
5246: }
1.72 daniel 5247:
1.27 daniel 5248: /*
5249: * Second case : a CDSection
5250: */
1.40 daniel 5251: else if ((CUR == '<') && (NXT(1) == '!') &&
5252: (NXT(2) == '[') && (NXT(3) == 'C') &&
5253: (NXT(4) == 'D') && (NXT(5) == 'A') &&
5254: (NXT(6) == 'T') && (NXT(7) == 'A') &&
5255: (NXT(8) == '[')) {
1.45 daniel 5256: xmlParseCDSect(ctxt);
1.27 daniel 5257: }
1.72 daniel 5258:
1.27 daniel 5259: /*
5260: * Third case : a comment
5261: */
1.40 daniel 5262: else if ((CUR == '<') && (NXT(1) == '!') &&
5263: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 5264: xmlParseComment(ctxt);
1.97 daniel 5265: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 5266: }
1.72 daniel 5267:
1.27 daniel 5268: /*
5269: * Fourth case : a sub-element.
5270: */
1.40 daniel 5271: else if (CUR == '<') {
1.72 daniel 5272: xmlParseElement(ctxt);
1.45 daniel 5273: }
1.72 daniel 5274:
1.45 daniel 5275: /*
1.50 daniel 5276: * Fifth case : a reference. If if has not been resolved,
5277: * parsing returns it's Name, create the node
1.45 daniel 5278: */
1.97 daniel 5279:
1.45 daniel 5280: else if (CUR == '&') {
1.77 daniel 5281: xmlParseReference(ctxt);
1.27 daniel 5282: }
1.72 daniel 5283:
1.27 daniel 5284: /*
5285: * Last case, text. Note that References are handled directly.
5286: */
5287: else {
1.45 daniel 5288: xmlParseCharData(ctxt, 0);
1.3 veillard 5289: }
1.14 veillard 5290:
1.91 daniel 5291: GROW;
1.14 veillard 5292: /*
1.45 daniel 5293: * Pop-up of finished entities.
1.14 veillard 5294: */
1.69 daniel 5295: while ((CUR == 0) && (ctxt->inputNr > 1))
5296: xmlPopInput(ctxt);
1.45 daniel 5297:
1.113 daniel 5298: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
5299: (tok == ctxt->token)) {
1.55 daniel 5300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5301: ctxt->sax->error(ctxt->userData,
1.59 daniel 5302: "detected an error in element content\n");
1.123 ! daniel 5303: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 5304: ctxt->wellFormed = 0;
1.29 daniel 5305: break;
5306: }
1.3 veillard 5307: }
1.2 veillard 5308: }
5309:
1.50 daniel 5310: /**
5311: * xmlParseElement:
5312: * @ctxt: an XML parser context
5313: *
5314: * parse an XML element, this is highly recursive
1.26 daniel 5315: *
5316: * [39] element ::= EmptyElemTag | STag content ETag
5317: *
1.98 daniel 5318: * [ WFC: Element Type Match ]
5319: * The Name in an element's end-tag must match the element type in the
5320: * start-tag.
5321: *
5322: * [ VC: Element Valid ]
1.117 daniel 5323: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 5324: * where the Name matches the element type and one of the following holds:
5325: * - The declaration matches EMPTY and the element has no content.
5326: * - The declaration matches children and the sequence of child elements
5327: * belongs to the language generated by the regular expression in the
5328: * content model, with optional white space (characters matching the
5329: * nonterminal S) between each pair of child elements.
5330: * - The declaration matches Mixed and the content consists of character
5331: * data and child elements whose types match names in the content model.
5332: * - The declaration matches ANY, and the types of any child elements have
5333: * been declared.
1.2 veillard 5334: */
1.26 daniel 5335:
1.72 daniel 5336: void
1.69 daniel 5337: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 5338: const xmlChar *openTag = CUR_PTR;
! 5339: xmlChar *name;
1.32 daniel 5340: xmlParserNodeInfo node_info;
1.118 daniel 5341: xmlNodePtr ret;
1.2 veillard 5342:
1.32 daniel 5343: /* Capture start position */
1.118 daniel 5344: if (ctxt->record_info) {
5345: node_info.begin_pos = ctxt->input->consumed +
5346: (CUR_PTR - ctxt->input->base);
5347: node_info.begin_line = ctxt->input->line;
5348: }
1.32 daniel 5349:
1.83 daniel 5350: name = xmlParseStartTag(ctxt);
5351: if (name == NULL) {
5352: return;
5353: }
1.118 daniel 5354: ret = ctxt->node;
1.2 veillard 5355:
5356: /*
1.99 daniel 5357: * [ VC: Root Element Type ]
5358: * The Name in the document type declaration must match the element
5359: * type of the root element.
5360: */
1.105 daniel 5361: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
5362: ctxt->node && (ctxt->node == ctxt->myDoc->root))
1.102 daniel 5363: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 5364:
5365: /*
1.2 veillard 5366: * Check for an Empty Element.
5367: */
1.40 daniel 5368: if ((CUR == '/') && (NXT(1) == '>')) {
5369: SKIP(2);
1.72 daniel 5370: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 5371: ctxt->sax->endElement(ctxt->userData, name);
1.119 daniel 5372: xmlFree(name);
1.72 daniel 5373: return;
1.2 veillard 5374: }
1.91 daniel 5375: if (CUR == '>') {
5376: NEXT;
5377: } else {
1.55 daniel 5378: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5379: ctxt->sax->error(ctxt->userData,
5380: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 5381: openTag);
1.59 daniel 5382: ctxt->wellFormed = 0;
1.123 ! daniel 5383: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 5384:
5385: /*
5386: * end of parsing of this node.
5387: */
5388: nodePop(ctxt);
1.119 daniel 5389: xmlFree(name);
1.118 daniel 5390:
5391: /*
5392: * Capture end position and add node
5393: */
5394: if ( ret != NULL && ctxt->record_info ) {
5395: node_info.end_pos = ctxt->input->consumed +
5396: (CUR_PTR - ctxt->input->base);
5397: node_info.end_line = ctxt->input->line;
5398: node_info.node = ret;
5399: xmlParserAddNodeInfo(ctxt, &node_info);
5400: }
1.72 daniel 5401: return;
1.2 veillard 5402: }
5403:
5404: /*
5405: * Parse the content of the element:
5406: */
1.45 daniel 5407: xmlParseContent(ctxt);
1.40 daniel 5408: if (!IS_CHAR(CUR)) {
1.55 daniel 5409: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5410: ctxt->sax->error(ctxt->userData,
1.57 daniel 5411: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 5412: ctxt->wellFormed = 0;
1.123 ! daniel 5413: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 5414:
5415: /*
5416: * end of parsing of this node.
5417: */
5418: nodePop(ctxt);
1.119 daniel 5419: xmlFree(name);
1.72 daniel 5420: return;
1.2 veillard 5421: }
5422:
5423: /*
1.27 daniel 5424: * parse the end of tag: '</' should be here.
1.2 veillard 5425: */
1.83 daniel 5426: xmlParseEndTag(ctxt, name);
1.119 daniel 5427: xmlFree(name);
1.118 daniel 5428:
5429: /*
5430: * Capture end position and add node
5431: */
5432: if ( ret != NULL && ctxt->record_info ) {
5433: node_info.end_pos = ctxt->input->consumed +
5434: (CUR_PTR - ctxt->input->base);
5435: node_info.end_line = ctxt->input->line;
5436: node_info.node = ret;
5437: xmlParserAddNodeInfo(ctxt, &node_info);
5438: }
1.2 veillard 5439: }
5440:
1.50 daniel 5441: /**
5442: * xmlParseVersionNum:
5443: * @ctxt: an XML parser context
5444: *
5445: * parse the XML version value.
1.29 daniel 5446: *
5447: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 5448: *
5449: * Returns the string giving the XML version number, or NULL
1.29 daniel 5450: */
1.123 ! daniel 5451: xmlChar *
1.55 daniel 5452: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 5453: const xmlChar *q = CUR_PTR;
! 5454: xmlChar *ret;
1.29 daniel 5455:
1.40 daniel 5456: while (IS_CHAR(CUR) &&
5457: (((CUR >= 'a') && (CUR <= 'z')) ||
5458: ((CUR >= 'A') && (CUR <= 'Z')) ||
5459: ((CUR >= '0') && (CUR <= '9')) ||
5460: (CUR == '_') || (CUR == '.') ||
5461: (CUR == ':') || (CUR == '-'))) NEXT;
5462: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5463: return(ret);
5464: }
5465:
1.50 daniel 5466: /**
5467: * xmlParseVersionInfo:
5468: * @ctxt: an XML parser context
5469: *
5470: * parse the XML version.
1.29 daniel 5471: *
5472: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
5473: *
5474: * [25] Eq ::= S? '=' S?
1.50 daniel 5475: *
1.68 daniel 5476: * Returns the version string, e.g. "1.0"
1.29 daniel 5477: */
5478:
1.123 ! daniel 5479: xmlChar *
1.55 daniel 5480: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 5481: xmlChar *version = NULL;
! 5482: const xmlChar *q;
1.29 daniel 5483:
1.40 daniel 5484: if ((CUR == 'v') && (NXT(1) == 'e') &&
5485: (NXT(2) == 'r') && (NXT(3) == 's') &&
5486: (NXT(4) == 'i') && (NXT(5) == 'o') &&
5487: (NXT(6) == 'n')) {
5488: SKIP(7);
1.42 daniel 5489: SKIP_BLANKS;
1.40 daniel 5490: if (CUR != '=') {
1.55 daniel 5491: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5492: ctxt->sax->error(ctxt->userData,
5493: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 5494: ctxt->wellFormed = 0;
1.123 ! daniel 5495: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 5496: return(NULL);
5497: }
1.40 daniel 5498: NEXT;
1.42 daniel 5499: SKIP_BLANKS;
1.40 daniel 5500: if (CUR == '"') {
5501: NEXT;
5502: q = CUR_PTR;
1.29 daniel 5503: version = xmlParseVersionNum(ctxt);
1.55 daniel 5504: if (CUR != '"') {
5505: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5506: ctxt->sax->error(ctxt->userData,
5507: "String not closed\n%.50s\n", q);
1.59 daniel 5508: ctxt->wellFormed = 0;
1.123 ! daniel 5509: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 5510: } else
1.40 daniel 5511: NEXT;
5512: } else if (CUR == '\''){
5513: NEXT;
5514: q = CUR_PTR;
1.29 daniel 5515: version = xmlParseVersionNum(ctxt);
1.55 daniel 5516: if (CUR != '\'') {
5517: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5518: ctxt->sax->error(ctxt->userData,
5519: "String not closed\n%.50s\n", q);
1.123 ! daniel 5520: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 5521: ctxt->wellFormed = 0;
1.55 daniel 5522: } else
1.40 daniel 5523: NEXT;
1.31 daniel 5524: } else {
1.55 daniel 5525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5526: ctxt->sax->error(ctxt->userData,
1.59 daniel 5527: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 5528: ctxt->wellFormed = 0;
1.123 ! daniel 5529: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 5530: }
5531: }
5532: return(version);
5533: }
5534:
1.50 daniel 5535: /**
5536: * xmlParseEncName:
5537: * @ctxt: an XML parser context
5538: *
5539: * parse the XML encoding name
1.29 daniel 5540: *
5541: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 5542: *
1.68 daniel 5543: * Returns the encoding name value or NULL
1.29 daniel 5544: */
1.123 ! daniel 5545: xmlChar *
1.55 daniel 5546: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 5547: const xmlChar *q = CUR_PTR;
! 5548: xmlChar *ret = NULL;
1.29 daniel 5549:
1.40 daniel 5550: if (((CUR >= 'a') && (CUR <= 'z')) ||
5551: ((CUR >= 'A') && (CUR <= 'Z'))) {
5552: NEXT;
5553: while (IS_CHAR(CUR) &&
5554: (((CUR >= 'a') && (CUR <= 'z')) ||
5555: ((CUR >= 'A') && (CUR <= 'Z')) ||
5556: ((CUR >= '0') && (CUR <= '9')) ||
5557: (CUR == '-'))) NEXT;
5558: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5559: } else {
1.55 daniel 5560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5561: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 5562: ctxt->wellFormed = 0;
1.123 ! daniel 5563: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 5564: }
5565: return(ret);
5566: }
5567:
1.50 daniel 5568: /**
5569: * xmlParseEncodingDecl:
5570: * @ctxt: an XML parser context
5571: *
5572: * parse the XML encoding declaration
1.29 daniel 5573: *
5574: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 5575: *
5576: * TODO: this should setup the conversion filters.
5577: *
1.68 daniel 5578: * Returns the encoding value or NULL
1.29 daniel 5579: */
5580:
1.123 ! daniel 5581: xmlChar *
1.55 daniel 5582: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 5583: xmlChar *encoding = NULL;
! 5584: const xmlChar *q;
1.29 daniel 5585:
1.42 daniel 5586: SKIP_BLANKS;
1.40 daniel 5587: if ((CUR == 'e') && (NXT(1) == 'n') &&
5588: (NXT(2) == 'c') && (NXT(3) == 'o') &&
5589: (NXT(4) == 'd') && (NXT(5) == 'i') &&
5590: (NXT(6) == 'n') && (NXT(7) == 'g')) {
5591: SKIP(8);
1.42 daniel 5592: SKIP_BLANKS;
1.40 daniel 5593: if (CUR != '=') {
1.55 daniel 5594: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5595: ctxt->sax->error(ctxt->userData,
5596: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 5597: ctxt->wellFormed = 0;
1.123 ! daniel 5598: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 5599: return(NULL);
5600: }
1.40 daniel 5601: NEXT;
1.42 daniel 5602: SKIP_BLANKS;
1.40 daniel 5603: if (CUR == '"') {
5604: NEXT;
5605: q = CUR_PTR;
1.29 daniel 5606: encoding = xmlParseEncName(ctxt);
1.55 daniel 5607: if (CUR != '"') {
5608: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5609: ctxt->sax->error(ctxt->userData,
5610: "String not closed\n%.50s\n", q);
1.59 daniel 5611: ctxt->wellFormed = 0;
1.123 ! daniel 5612: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 5613: } else
1.40 daniel 5614: NEXT;
5615: } else if (CUR == '\''){
5616: NEXT;
5617: q = CUR_PTR;
1.29 daniel 5618: encoding = xmlParseEncName(ctxt);
1.55 daniel 5619: if (CUR != '\'') {
5620: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5621: ctxt->sax->error(ctxt->userData,
5622: "String not closed\n%.50s\n", q);
1.59 daniel 5623: ctxt->wellFormed = 0;
1.123 ! daniel 5624: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 5625: } else
1.40 daniel 5626: NEXT;
5627: } else if (CUR == '"'){
1.55 daniel 5628: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5629: ctxt->sax->error(ctxt->userData,
1.59 daniel 5630: "xmlParseEncodingDecl : expected ' or \"\n");
5631: ctxt->wellFormed = 0;
1.123 ! daniel 5632: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 5633: }
5634: }
5635: return(encoding);
5636: }
5637:
1.50 daniel 5638: /**
5639: * xmlParseSDDecl:
5640: * @ctxt: an XML parser context
5641: *
5642: * parse the XML standalone declaration
1.29 daniel 5643: *
5644: * [32] SDDecl ::= S 'standalone' Eq
5645: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 5646: *
5647: * [ VC: Standalone Document Declaration ]
5648: * TODO The standalone document declaration must have the value "no"
5649: * if any external markup declarations contain declarations of:
5650: * - attributes with default values, if elements to which these
5651: * attributes apply appear in the document without specifications
5652: * of values for these attributes, or
5653: * - entities (other than amp, lt, gt, apos, quot), if references
5654: * to those entities appear in the document, or
5655: * - attributes with values subject to normalization, where the
5656: * attribute appears in the document with a value which will change
5657: * as a result of normalization, or
5658: * - element types with element content, if white space occurs directly
5659: * within any instance of those types.
1.68 daniel 5660: *
5661: * Returns 1 if standalone, 0 otherwise
1.29 daniel 5662: */
5663:
1.55 daniel 5664: int
5665: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5666: int standalone = -1;
5667:
1.42 daniel 5668: SKIP_BLANKS;
1.40 daniel 5669: if ((CUR == 's') && (NXT(1) == 't') &&
5670: (NXT(2) == 'a') && (NXT(3) == 'n') &&
5671: (NXT(4) == 'd') && (NXT(5) == 'a') &&
5672: (NXT(6) == 'l') && (NXT(7) == 'o') &&
5673: (NXT(8) == 'n') && (NXT(9) == 'e')) {
5674: SKIP(10);
1.81 daniel 5675: SKIP_BLANKS;
1.40 daniel 5676: if (CUR != '=') {
1.55 daniel 5677: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5678: ctxt->sax->error(ctxt->userData,
1.59 daniel 5679: "XML standalone declaration : expected '='\n");
1.123 ! daniel 5680: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 5681: ctxt->wellFormed = 0;
1.32 daniel 5682: return(standalone);
5683: }
1.40 daniel 5684: NEXT;
1.42 daniel 5685: SKIP_BLANKS;
1.40 daniel 5686: if (CUR == '\''){
5687: NEXT;
5688: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5689: standalone = 0;
1.40 daniel 5690: SKIP(2);
5691: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5692: (NXT(2) == 's')) {
1.29 daniel 5693: standalone = 1;
1.40 daniel 5694: SKIP(3);
1.29 daniel 5695: } else {
1.55 daniel 5696: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5697: ctxt->sax->error(ctxt->userData,
5698: "standalone accepts only 'yes' or 'no'\n");
1.123 ! daniel 5699: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 5700: ctxt->wellFormed = 0;
1.29 daniel 5701: }
1.55 daniel 5702: if (CUR != '\'') {
5703: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5704: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 ! daniel 5705: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 5706: ctxt->wellFormed = 0;
1.55 daniel 5707: } else
1.40 daniel 5708: NEXT;
5709: } else if (CUR == '"'){
5710: NEXT;
5711: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5712: standalone = 0;
1.40 daniel 5713: SKIP(2);
5714: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5715: (NXT(2) == 's')) {
1.29 daniel 5716: standalone = 1;
1.40 daniel 5717: SKIP(3);
1.29 daniel 5718: } else {
1.55 daniel 5719: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5720: ctxt->sax->error(ctxt->userData,
1.59 daniel 5721: "standalone accepts only 'yes' or 'no'\n");
1.123 ! daniel 5722: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 5723: ctxt->wellFormed = 0;
1.29 daniel 5724: }
1.55 daniel 5725: if (CUR != '"') {
5726: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5727: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5728: ctxt->wellFormed = 0;
1.123 ! daniel 5729: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 5730: } else
1.40 daniel 5731: NEXT;
1.37 daniel 5732: } else {
1.55 daniel 5733: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5734: ctxt->sax->error(ctxt->userData,
5735: "Standalone value not found\n");
1.59 daniel 5736: ctxt->wellFormed = 0;
1.123 ! daniel 5737: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 5738: }
1.29 daniel 5739: }
5740: return(standalone);
5741: }
5742:
1.50 daniel 5743: /**
5744: * xmlParseXMLDecl:
5745: * @ctxt: an XML parser context
5746: *
5747: * parse an XML declaration header
1.29 daniel 5748: *
5749: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 5750: */
5751:
1.55 daniel 5752: void
5753: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 ! daniel 5754: xmlChar *version;
1.1 veillard 5755:
5756: /*
1.19 daniel 5757: * We know that '<?xml' is here.
1.1 veillard 5758: */
1.40 daniel 5759: SKIP(5);
1.1 veillard 5760:
1.59 daniel 5761: if (!IS_BLANK(CUR)) {
5762: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5763: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 ! daniel 5764: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5765: ctxt->wellFormed = 0;
5766: }
1.42 daniel 5767: SKIP_BLANKS;
1.1 veillard 5768:
5769: /*
1.29 daniel 5770: * We should have the VersionInfo here.
1.1 veillard 5771: */
1.29 daniel 5772: version = xmlParseVersionInfo(ctxt);
5773: if (version == NULL)
1.45 daniel 5774: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 5775: ctxt->version = xmlStrdup(version);
1.119 daniel 5776: xmlFree(version);
1.29 daniel 5777:
5778: /*
5779: * We may have the encoding declaration
5780: */
1.59 daniel 5781: if (!IS_BLANK(CUR)) {
5782: if ((CUR == '?') && (NXT(1) == '>')) {
5783: SKIP(2);
5784: return;
5785: }
5786: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5787: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 ! daniel 5788: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5789: ctxt->wellFormed = 0;
5790: }
1.72 daniel 5791: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 5792:
5793: /*
1.29 daniel 5794: * We may have the standalone status.
1.1 veillard 5795: */
1.72 daniel 5796: if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
1.59 daniel 5797: if ((CUR == '?') && (NXT(1) == '>')) {
5798: SKIP(2);
5799: return;
5800: }
5801: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5802: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5803: ctxt->wellFormed = 0;
1.123 ! daniel 5804: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5805: }
5806: SKIP_BLANKS;
1.72 daniel 5807: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 5808:
1.42 daniel 5809: SKIP_BLANKS;
1.40 daniel 5810: if ((CUR == '?') && (NXT(1) == '>')) {
5811: SKIP(2);
5812: } else if (CUR == '>') {
1.31 daniel 5813: /* Deprecated old WD ... */
1.55 daniel 5814: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5815: ctxt->sax->error(ctxt->userData,
5816: "XML declaration must end-up with '?>'\n");
1.59 daniel 5817: ctxt->wellFormed = 0;
1.123 ! daniel 5818: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 5819: NEXT;
1.29 daniel 5820: } else {
1.55 daniel 5821: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5822: ctxt->sax->error(ctxt->userData,
5823: "parsing XML declaration: '?>' expected\n");
1.59 daniel 5824: ctxt->wellFormed = 0;
1.123 ! daniel 5825: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 5826: MOVETO_ENDTAG(CUR_PTR);
5827: NEXT;
1.29 daniel 5828: }
1.1 veillard 5829: }
5830:
1.50 daniel 5831: /**
5832: * xmlParseMisc:
5833: * @ctxt: an XML parser context
5834: *
5835: * parse an XML Misc* optionnal field.
1.21 daniel 5836: *
1.22 daniel 5837: * [27] Misc ::= Comment | PI | S
1.1 veillard 5838: */
5839:
1.55 daniel 5840: void
5841: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 5842: while (((CUR == '<') && (NXT(1) == '?')) ||
5843: ((CUR == '<') && (NXT(1) == '!') &&
5844: (NXT(2) == '-') && (NXT(3) == '-')) ||
5845: IS_BLANK(CUR)) {
5846: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 5847: xmlParsePI(ctxt);
1.40 daniel 5848: } else if (IS_BLANK(CUR)) {
5849: NEXT;
1.1 veillard 5850: } else
1.114 daniel 5851: xmlParseComment(ctxt);
1.1 veillard 5852: }
5853: }
5854:
1.50 daniel 5855: /**
5856: * xmlParseDocument :
5857: * @ctxt: an XML parser context
5858: *
5859: * parse an XML document (and build a tree if using the standard SAX
5860: * interface).
1.21 daniel 5861: *
1.22 daniel 5862: * [1] document ::= prolog element Misc*
1.29 daniel 5863: *
5864: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 5865: *
1.68 daniel 5866: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 5867: * as a result of the parsing.
1.1 veillard 5868: */
5869:
1.55 daniel 5870: int
5871: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 5872: xmlDefaultSAXHandlerInit();
5873:
1.91 daniel 5874: GROW;
5875:
1.14 veillard 5876: /*
1.44 daniel 5877: * SAX: beginning of the document processing.
5878: */
1.72 daniel 5879: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 5880: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 5881:
5882: /*
1.117 daniel 5883: * TODO We should check for encoding here and plug-in some
5884: * conversion code !!!!
1.14 veillard 5885: */
1.1 veillard 5886:
5887: /*
5888: * Wipe out everything which is before the first '<'
5889: */
1.59 daniel 5890: if (IS_BLANK(CUR)) {
5891: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5892: ctxt->sax->error(ctxt->userData,
1.59 daniel 5893: "Extra spaces at the beginning of the document are not allowed\n");
1.123 ! daniel 5894: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.59 daniel 5895: ctxt->wellFormed = 0;
5896: SKIP_BLANKS;
5897: }
5898:
5899: if (CUR == 0) {
5900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5901: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 ! daniel 5902: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 5903: ctxt->wellFormed = 0;
5904: }
1.1 veillard 5905:
5906: /*
5907: * Check for the XMLDecl in the Prolog.
5908: */
1.91 daniel 5909: GROW;
1.40 daniel 5910: if ((CUR == '<') && (NXT(1) == '?') &&
5911: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5912: (NXT(4) == 'l')) {
1.19 daniel 5913: xmlParseXMLDecl(ctxt);
5914: /* SKIP_EOL(cur); */
1.42 daniel 5915: SKIP_BLANKS;
1.40 daniel 5916: } else if ((CUR == '<') && (NXT(1) == '?') &&
5917: (NXT(2) == 'X') && (NXT(3) == 'M') &&
5918: (NXT(4) == 'L')) {
1.19 daniel 5919: /*
5920: * The first drafts were using <?XML and the final W3C REC
5921: * now use <?xml ...
5922: */
1.16 daniel 5923: xmlParseXMLDecl(ctxt);
1.1 veillard 5924: /* SKIP_EOL(cur); */
1.42 daniel 5925: SKIP_BLANKS;
1.1 veillard 5926: } else {
1.72 daniel 5927: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 5928: }
1.72 daniel 5929: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 5930: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 5931:
5932: /*
5933: * The Misc part of the Prolog
5934: */
1.91 daniel 5935: GROW;
1.16 daniel 5936: xmlParseMisc(ctxt);
1.1 veillard 5937:
5938: /*
1.29 daniel 5939: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 5940: * (doctypedecl Misc*)?
5941: */
1.91 daniel 5942: GROW;
1.40 daniel 5943: if ((CUR == '<') && (NXT(1) == '!') &&
5944: (NXT(2) == 'D') && (NXT(3) == 'O') &&
5945: (NXT(4) == 'C') && (NXT(5) == 'T') &&
5946: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
5947: (NXT(8) == 'E')) {
1.22 daniel 5948: xmlParseDocTypeDecl(ctxt);
1.96 daniel 5949: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 5950: xmlParseMisc(ctxt);
1.21 daniel 5951: }
5952:
5953: /*
5954: * Time to start parsing the tree itself
1.1 veillard 5955: */
1.91 daniel 5956: GROW;
1.96 daniel 5957: ctxt->instate = XML_PARSER_CONTENT;
1.72 daniel 5958: xmlParseElement(ctxt);
1.96 daniel 5959: ctxt->instate = XML_PARSER_EPILOG;
1.33 daniel 5960:
5961: /*
5962: * The Misc part at the end
5963: */
5964: xmlParseMisc(ctxt);
1.16 daniel 5965:
1.59 daniel 5966: if (CUR != 0) {
5967: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5968: ctxt->sax->error(ctxt->userData,
1.59 daniel 5969: "Extra content at the end of the document\n");
5970: ctxt->wellFormed = 0;
1.123 ! daniel 5971: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.59 daniel 5972: }
1.96 daniel 5973: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 5974:
1.44 daniel 5975: /*
5976: * SAX: end of the document processing.
5977: */
1.72 daniel 5978: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 5979: ctxt->sax->endDocument(ctxt->userData);
1.59 daniel 5980: if (! ctxt->wellFormed) return(-1);
1.16 daniel 5981: return(0);
5982: }
5983:
1.98 daniel 5984: /************************************************************************
5985: * *
5986: * I/O front end functions to the parser *
5987: * *
5988: ************************************************************************/
5989:
1.50 daniel 5990: /**
1.86 daniel 5991: * xmlCreateDocParserCtxt :
1.123 ! daniel 5992: * @cur: a pointer to an array of xmlChar
1.50 daniel 5993: *
1.69 daniel 5994: * Create a parser context for an XML in-memory document.
5995: *
5996: * Returns the new parser context or NULL
1.16 daniel 5997: */
1.69 daniel 5998: xmlParserCtxtPtr
1.123 ! daniel 5999: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 6000: xmlParserCtxtPtr ctxt;
1.40 daniel 6001: xmlParserInputPtr input;
1.75 daniel 6002: xmlCharEncoding enc;
1.16 daniel 6003:
1.97 daniel 6004: ctxt = xmlNewParserCtxt();
1.16 daniel 6005: if (ctxt == NULL) {
6006: return(NULL);
6007: }
1.96 daniel 6008: input = xmlNewInputStream(ctxt);
1.40 daniel 6009: if (input == NULL) {
1.97 daniel 6010: xmlFreeParserCtxt(ctxt);
1.40 daniel 6011: return(NULL);
6012: }
6013:
1.75 daniel 6014: /*
6015: * plug some encoding conversion routines here. !!!
6016: */
6017: enc = xmlDetectCharEncoding(cur);
6018: xmlSwitchEncoding(ctxt, enc);
6019:
1.40 daniel 6020: input->base = cur;
6021: input->cur = cur;
6022:
6023: inputPush(ctxt, input);
1.69 daniel 6024: return(ctxt);
6025: }
6026:
6027: /**
6028: * xmlSAXParseDoc :
6029: * @sax: the SAX handler block
1.123 ! daniel 6030: * @cur: a pointer to an array of xmlChar
1.69 daniel 6031: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6032: * documents
6033: *
6034: * parse an XML in-memory document and build a tree.
6035: * It use the given SAX function block to handle the parsing callback.
6036: * If sax is NULL, fallback to the default DOM tree building routines.
6037: *
6038: * Returns the resulting document tree
6039: */
6040:
6041: xmlDocPtr
1.123 ! daniel 6042: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 6043: xmlDocPtr ret;
6044: xmlParserCtxtPtr ctxt;
6045:
6046: if (cur == NULL) return(NULL);
1.16 daniel 6047:
6048:
1.69 daniel 6049: ctxt = xmlCreateDocParserCtxt(cur);
6050: if (ctxt == NULL) return(NULL);
1.74 daniel 6051: if (sax != NULL) {
6052: ctxt->sax = sax;
6053: ctxt->userData = NULL;
6054: }
1.69 daniel 6055:
1.16 daniel 6056: xmlParseDocument(ctxt);
1.72 daniel 6057: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6058: else {
6059: ret = NULL;
1.72 daniel 6060: xmlFreeDoc(ctxt->myDoc);
6061: ctxt->myDoc = NULL;
1.59 daniel 6062: }
1.86 daniel 6063: if (sax != NULL)
6064: ctxt->sax = NULL;
1.69 daniel 6065: xmlFreeParserCtxt(ctxt);
1.16 daniel 6066:
1.1 veillard 6067: return(ret);
6068: }
6069:
1.50 daniel 6070: /**
1.55 daniel 6071: * xmlParseDoc :
1.123 ! daniel 6072: * @cur: a pointer to an array of xmlChar
1.55 daniel 6073: *
6074: * parse an XML in-memory document and build a tree.
6075: *
1.68 daniel 6076: * Returns the resulting document tree
1.55 daniel 6077: */
6078:
1.69 daniel 6079: xmlDocPtr
1.123 ! daniel 6080: xmlParseDoc(xmlChar *cur) {
1.59 daniel 6081: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 6082: }
6083:
6084: /**
6085: * xmlSAXParseDTD :
6086: * @sax: the SAX handler block
6087: * @ExternalID: a NAME* containing the External ID of the DTD
6088: * @SystemID: a NAME* containing the URL to the DTD
6089: *
6090: * Load and parse an external subset.
6091: *
6092: * Returns the resulting xmlDtdPtr or NULL in case of error.
6093: */
6094:
6095: xmlDtdPtr
1.123 ! daniel 6096: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
! 6097: const xmlChar *SystemID) {
1.76 daniel 6098: xmlDtdPtr ret = NULL;
6099: xmlParserCtxtPtr ctxt;
1.83 daniel 6100: xmlParserInputPtr input = NULL;
1.76 daniel 6101: xmlCharEncoding enc;
6102:
6103: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
6104:
1.97 daniel 6105: ctxt = xmlNewParserCtxt();
1.76 daniel 6106: if (ctxt == NULL) {
6107: return(NULL);
6108: }
6109:
6110: /*
6111: * Set-up the SAX context
6112: */
6113: if (ctxt == NULL) return(NULL);
6114: if (sax != NULL) {
1.93 veillard 6115: if (ctxt->sax != NULL)
1.119 daniel 6116: xmlFree(ctxt->sax);
1.76 daniel 6117: ctxt->sax = sax;
6118: ctxt->userData = NULL;
6119: }
6120:
6121: /*
6122: * Ask the Entity resolver to load the damn thing
6123: */
6124:
6125: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
6126: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
6127: if (input == NULL) {
1.86 daniel 6128: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 6129: xmlFreeParserCtxt(ctxt);
6130: return(NULL);
6131: }
6132:
6133: /*
6134: * plug some encoding conversion routines here. !!!
6135: */
6136: xmlPushInput(ctxt, input);
6137: enc = xmlDetectCharEncoding(ctxt->input->cur);
6138: xmlSwitchEncoding(ctxt, enc);
6139:
1.95 veillard 6140: if (input->filename == NULL)
1.116 daniel 6141: input->filename = (char *) xmlStrdup(SystemID); /* !!!!!!! */
1.76 daniel 6142: input->line = 1;
6143: input->col = 1;
6144: input->base = ctxt->input->cur;
6145: input->cur = ctxt->input->cur;
6146: input->free = NULL;
6147:
6148: /*
6149: * let's parse that entity knowing it's an external subset.
6150: */
1.79 daniel 6151: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 6152:
6153: if (ctxt->myDoc != NULL) {
6154: if (ctxt->wellFormed) {
6155: ret = ctxt->myDoc->intSubset;
6156: ctxt->myDoc->intSubset = NULL;
6157: } else {
6158: ret = NULL;
6159: }
6160: xmlFreeDoc(ctxt->myDoc);
6161: ctxt->myDoc = NULL;
6162: }
1.86 daniel 6163: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 6164: xmlFreeParserCtxt(ctxt);
6165:
6166: return(ret);
6167: }
6168:
6169: /**
6170: * xmlParseDTD :
6171: * @ExternalID: a NAME* containing the External ID of the DTD
6172: * @SystemID: a NAME* containing the URL to the DTD
6173: *
6174: * Load and parse an external subset.
6175: *
6176: * Returns the resulting xmlDtdPtr or NULL in case of error.
6177: */
6178:
6179: xmlDtdPtr
1.123 ! daniel 6180: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 6181: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 6182: }
6183:
6184: /**
6185: * xmlRecoverDoc :
1.123 ! daniel 6186: * @cur: a pointer to an array of xmlChar
1.59 daniel 6187: *
6188: * parse an XML in-memory document and build a tree.
6189: * In the case the document is not Well Formed, a tree is built anyway
6190: *
1.68 daniel 6191: * Returns the resulting document tree
1.59 daniel 6192: */
6193:
1.69 daniel 6194: xmlDocPtr
1.123 ! daniel 6195: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 6196: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 6197: }
6198:
6199: /**
1.69 daniel 6200: * xmlCreateFileParserCtxt :
1.50 daniel 6201: * @filename: the filename
6202: *
1.69 daniel 6203: * Create a parser context for a file content.
6204: * Automatic support for ZLIB/Compress compressed document is provided
6205: * by default if found at compile-time.
1.50 daniel 6206: *
1.69 daniel 6207: * Returns the new parser context or NULL
1.9 httpng 6208: */
1.69 daniel 6209: xmlParserCtxtPtr
6210: xmlCreateFileParserCtxt(const char *filename)
6211: {
6212: xmlParserCtxtPtr ctxt;
1.40 daniel 6213: xmlParserInputPtr inputStream;
1.91 daniel 6214: xmlParserInputBufferPtr buf;
1.111 daniel 6215: char *directory = NULL;
1.9 httpng 6216:
1.91 daniel 6217: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
6218: if (buf == NULL) return(NULL);
1.9 httpng 6219:
1.97 daniel 6220: ctxt = xmlNewParserCtxt();
1.16 daniel 6221: if (ctxt == NULL) {
6222: return(NULL);
6223: }
1.97 daniel 6224:
1.96 daniel 6225: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 6226: if (inputStream == NULL) {
1.97 daniel 6227: xmlFreeParserCtxt(ctxt);
1.40 daniel 6228: return(NULL);
6229: }
6230:
1.119 daniel 6231: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 6232: inputStream->buf = buf;
6233: inputStream->base = inputStream->buf->buffer->content;
6234: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 6235:
1.40 daniel 6236: inputPush(ctxt, inputStream);
1.110 daniel 6237: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 6238: directory = xmlParserGetDirectory(filename);
6239: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 6240: ctxt->directory = directory;
1.106 daniel 6241:
1.69 daniel 6242: return(ctxt);
6243: }
6244:
6245: /**
6246: * xmlSAXParseFile :
6247: * @sax: the SAX handler block
6248: * @filename: the filename
6249: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6250: * documents
6251: *
6252: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6253: * compressed document is provided by default if found at compile-time.
6254: * It use the given SAX function block to handle the parsing callback.
6255: * If sax is NULL, fallback to the default DOM tree building routines.
6256: *
6257: * Returns the resulting document tree
6258: */
6259:
1.79 daniel 6260: xmlDocPtr
6261: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 6262: int recovery) {
6263: xmlDocPtr ret;
6264: xmlParserCtxtPtr ctxt;
1.111 daniel 6265: char *directory = NULL;
1.69 daniel 6266:
6267: ctxt = xmlCreateFileParserCtxt(filename);
6268: if (ctxt == NULL) return(NULL);
1.74 daniel 6269: if (sax != NULL) {
1.93 veillard 6270: if (ctxt->sax != NULL)
1.119 daniel 6271: xmlFree(ctxt->sax);
1.74 daniel 6272: ctxt->sax = sax;
6273: ctxt->userData = NULL;
6274: }
1.106 daniel 6275:
1.110 daniel 6276: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 6277: directory = xmlParserGetDirectory(filename);
6278: if ((ctxt->directory == NULL) && (directory != NULL))
1.123 ! daniel 6279: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); /* !!!!!!! */
1.16 daniel 6280:
6281: xmlParseDocument(ctxt);
1.40 daniel 6282:
1.72 daniel 6283: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6284: else {
6285: ret = NULL;
1.72 daniel 6286: xmlFreeDoc(ctxt->myDoc);
6287: ctxt->myDoc = NULL;
1.59 daniel 6288: }
1.86 daniel 6289: if (sax != NULL)
6290: ctxt->sax = NULL;
1.69 daniel 6291: xmlFreeParserCtxt(ctxt);
1.20 daniel 6292:
6293: return(ret);
6294: }
6295:
1.55 daniel 6296: /**
6297: * xmlParseFile :
6298: * @filename: the filename
6299: *
6300: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6301: * compressed document is provided by default if found at compile-time.
6302: *
1.68 daniel 6303: * Returns the resulting document tree
1.55 daniel 6304: */
6305:
1.79 daniel 6306: xmlDocPtr
6307: xmlParseFile(const char *filename) {
1.59 daniel 6308: return(xmlSAXParseFile(NULL, filename, 0));
6309: }
6310:
6311: /**
6312: * xmlRecoverFile :
6313: * @filename: the filename
6314: *
6315: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6316: * compressed document is provided by default if found at compile-time.
6317: * In the case the document is not Well Formed, a tree is built anyway
6318: *
1.68 daniel 6319: * Returns the resulting document tree
1.59 daniel 6320: */
6321:
1.79 daniel 6322: xmlDocPtr
6323: xmlRecoverFile(const char *filename) {
1.59 daniel 6324: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 6325: }
1.32 daniel 6326:
1.50 daniel 6327: /**
1.69 daniel 6328: * xmlCreateMemoryParserCtxt :
1.68 daniel 6329: * @buffer: an pointer to a char array
1.50 daniel 6330: * @size: the siwe of the array
6331: *
1.69 daniel 6332: * Create a parser context for an XML in-memory document.
1.50 daniel 6333: *
1.69 daniel 6334: * Returns the new parser context or NULL
1.20 daniel 6335: */
1.69 daniel 6336: xmlParserCtxtPtr
6337: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 6338: xmlParserCtxtPtr ctxt;
1.40 daniel 6339: xmlParserInputPtr input;
1.75 daniel 6340: xmlCharEncoding enc;
1.40 daniel 6341:
6342: buffer[size - 1] = '\0';
6343:
1.97 daniel 6344: ctxt = xmlNewParserCtxt();
1.20 daniel 6345: if (ctxt == NULL) {
6346: return(NULL);
6347: }
1.97 daniel 6348:
1.96 daniel 6349: input = xmlNewInputStream(ctxt);
1.40 daniel 6350: if (input == NULL) {
1.97 daniel 6351: xmlFreeParserCtxt(ctxt);
1.40 daniel 6352: return(NULL);
6353: }
1.20 daniel 6354:
1.40 daniel 6355: input->filename = NULL;
6356: input->line = 1;
6357: input->col = 1;
1.96 daniel 6358: input->buf = NULL;
1.91 daniel 6359: input->consumed = 0;
1.45 daniel 6360:
6361: /*
1.75 daniel 6362: * plug some encoding conversion routines here. !!!
1.45 daniel 6363: */
1.116 daniel 6364: enc = xmlDetectCharEncoding(BAD_CAST buffer);
1.75 daniel 6365: xmlSwitchEncoding(ctxt, enc);
6366:
1.116 daniel 6367: input->base = BAD_CAST buffer;
6368: input->cur = BAD_CAST buffer;
1.69 daniel 6369: input->free = NULL;
1.20 daniel 6370:
1.40 daniel 6371: inputPush(ctxt, input);
1.69 daniel 6372: return(ctxt);
6373: }
6374:
6375: /**
6376: * xmlSAXParseMemory :
6377: * @sax: the SAX handler block
6378: * @buffer: an pointer to a char array
6379: * @size: the siwe of the array
6380: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6381: * documents
6382: *
6383: * parse an XML in-memory block and use the given SAX function block
6384: * to handle the parsing callback. If sax is NULL, fallback to the default
6385: * DOM tree building routines.
6386: *
6387: * Returns the resulting document tree
6388: */
6389: xmlDocPtr
6390: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
6391: xmlDocPtr ret;
6392: xmlParserCtxtPtr ctxt;
6393:
6394: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6395: if (ctxt == NULL) return(NULL);
1.74 daniel 6396: if (sax != NULL) {
6397: ctxt->sax = sax;
6398: ctxt->userData = NULL;
6399: }
1.20 daniel 6400:
6401: xmlParseDocument(ctxt);
1.40 daniel 6402:
1.72 daniel 6403: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6404: else {
6405: ret = NULL;
1.72 daniel 6406: xmlFreeDoc(ctxt->myDoc);
6407: ctxt->myDoc = NULL;
1.59 daniel 6408: }
1.86 daniel 6409: if (sax != NULL)
6410: ctxt->sax = NULL;
1.69 daniel 6411: xmlFreeParserCtxt(ctxt);
1.16 daniel 6412:
1.9 httpng 6413: return(ret);
1.17 daniel 6414: }
6415:
1.55 daniel 6416: /**
6417: * xmlParseMemory :
1.68 daniel 6418: * @buffer: an pointer to a char array
1.55 daniel 6419: * @size: the size of the array
6420: *
6421: * parse an XML in-memory block and build a tree.
6422: *
1.68 daniel 6423: * Returns the resulting document tree
1.55 daniel 6424: */
6425:
6426: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 6427: return(xmlSAXParseMemory(NULL, buffer, size, 0));
6428: }
6429:
6430: /**
6431: * xmlRecoverMemory :
1.68 daniel 6432: * @buffer: an pointer to a char array
1.59 daniel 6433: * @size: the size of the array
6434: *
6435: * parse an XML in-memory block and build a tree.
6436: * In the case the document is not Well Formed, a tree is built anyway
6437: *
1.68 daniel 6438: * Returns the resulting document tree
1.59 daniel 6439: */
6440:
6441: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
6442: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 6443: }
6444:
6445:
1.50 daniel 6446: /**
6447: * xmlSetupParserForBuffer:
6448: * @ctxt: an XML parser context
1.123 ! daniel 6449: * @buffer: a xmlChar * buffer
1.50 daniel 6450: * @filename: a file name
6451: *
1.19 daniel 6452: * Setup the parser context to parse a new buffer; Clears any prior
6453: * contents from the parser context. The buffer parameter must not be
6454: * NULL, but the filename parameter can be
6455: */
1.55 daniel 6456: void
1.123 ! daniel 6457: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 6458: const char* filename)
6459: {
1.96 daniel 6460: xmlParserInputPtr input;
1.40 daniel 6461:
1.96 daniel 6462: input = xmlNewInputStream(ctxt);
6463: if (input == NULL) {
6464: perror("malloc");
1.119 daniel 6465: xmlFree(ctxt);
1.96 daniel 6466: exit(1);
6467: }
6468:
6469: xmlClearParserCtxt(ctxt);
6470: if (filename != NULL)
1.119 daniel 6471: input->filename = xmlMemStrdup(filename);
1.96 daniel 6472: input->base = buffer;
6473: input->cur = buffer;
6474: inputPush(ctxt, input);
1.17 daniel 6475: }
6476:
1.123 ! daniel 6477: /**
! 6478: * xmlSAXUserParseFile:
! 6479: * @sax: a SAX handler
! 6480: * @user_data: The user data returned on SAX callbacks
! 6481: * @filename: a file name
! 6482: *
! 6483: * parse an XML file and call the given SAX handler routines.
! 6484: * Automatic support for ZLIB/Compress compressed document is provided
! 6485: *
! 6486: * Returns 0 in case of success or a error number otherwise
! 6487: */
! 6488: int xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
! 6489: const char *filename) {
! 6490: int ret = 0;
! 6491: xmlParserCtxtPtr ctxt;
! 6492:
! 6493: ctxt = xmlCreateFileParserCtxt(filename);
! 6494: if (ctxt == NULL) return -1;
! 6495: ctxt->sax = sax;
! 6496: ctxt->userData = user_data;
! 6497:
! 6498: xmlParseDocument(ctxt);
! 6499:
! 6500: if (ctxt->wellFormed)
! 6501: ret = 0;
! 6502: else {
! 6503: if (ctxt->errNo != 0)
! 6504: ret = ctxt->errNo;
! 6505: else
! 6506: ret = -1;
! 6507: }
! 6508: if (sax != NULL)
! 6509: ctxt->sax = NULL;
! 6510: xmlFreeParserCtxt(ctxt);
! 6511:
! 6512: return ret;
! 6513: }
! 6514:
! 6515: /**
! 6516: * xmlSAXUserParseMemory:
! 6517: * @sax: a SAX handler
! 6518: * @user_data: The user data returned on SAX callbacks
! 6519: * @buffer: an in-memory XML document input
! 6520: * @size: the lenght of the XML document in bytes
! 6521: *
! 6522: * A better SAX parsing routine.
! 6523: * parse an XML in-memory buffer and call the given SAX handler routines.
! 6524: *
! 6525: * Returns 0 in case of success or a error number otherwise
! 6526: */
! 6527: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
! 6528: char *buffer, int size) {
! 6529: int ret = 0;
! 6530: xmlParserCtxtPtr ctxt;
! 6531:
! 6532: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
! 6533: if (ctxt == NULL) return -1;
! 6534: ctxt->sax = sax;
! 6535: ctxt->userData = user_data;
! 6536:
! 6537: xmlParseDocument(ctxt);
! 6538:
! 6539: if (ctxt->wellFormed)
! 6540: ret = 0;
! 6541: else {
! 6542: if (ctxt->errNo != 0)
! 6543: ret = ctxt->errNo;
! 6544: else
! 6545: ret = -1;
! 6546: }
! 6547: if (sax != NULL)
! 6548: ctxt->sax = NULL;
! 6549: xmlFreeParserCtxt(ctxt);
! 6550:
! 6551: return ret;
! 6552: }
! 6553:
1.32 daniel 6554:
1.98 daniel 6555: /************************************************************************
6556: * *
6557: * Miscelaneous *
6558: * *
6559: ************************************************************************/
6560:
6561:
1.50 daniel 6562: /**
6563: * xmlParserFindNodeInfo:
6564: * @ctxt: an XML parser context
6565: * @node: an XML node within the tree
6566: *
6567: * Find the parser node info struct for a given node
6568: *
1.68 daniel 6569: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 6570: */
6571: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
6572: const xmlNode* node)
6573: {
6574: unsigned long pos;
6575:
6576: /* Find position where node should be at */
6577: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
6578: if ( ctx->node_seq.buffer[pos].node == node )
6579: return &ctx->node_seq.buffer[pos];
6580: else
6581: return NULL;
6582: }
6583:
6584:
1.50 daniel 6585: /**
6586: * xmlInitNodeInfoSeq :
6587: * @seq: a node info sequence pointer
6588: *
6589: * -- Initialize (set to initial state) node info sequence
1.32 daniel 6590: */
1.55 daniel 6591: void
6592: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6593: {
6594: seq->length = 0;
6595: seq->maximum = 0;
6596: seq->buffer = NULL;
6597: }
6598:
1.50 daniel 6599: /**
6600: * xmlClearNodeInfoSeq :
6601: * @seq: a node info sequence pointer
6602: *
6603: * -- Clear (release memory and reinitialize) node
1.32 daniel 6604: * info sequence
6605: */
1.55 daniel 6606: void
6607: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6608: {
6609: if ( seq->buffer != NULL )
1.119 daniel 6610: xmlFree(seq->buffer);
1.32 daniel 6611: xmlInitNodeInfoSeq(seq);
6612: }
6613:
6614:
1.50 daniel 6615: /**
6616: * xmlParserFindNodeInfoIndex:
6617: * @seq: a node info sequence pointer
6618: * @node: an XML node pointer
6619: *
6620: *
1.32 daniel 6621: * xmlParserFindNodeInfoIndex : Find the index that the info record for
6622: * the given node is or should be at in a sorted sequence
1.68 daniel 6623: *
6624: * Returns a long indicating the position of the record
1.32 daniel 6625: */
6626: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
6627: const xmlNode* node)
6628: {
6629: unsigned long upper, lower, middle;
6630: int found = 0;
6631:
6632: /* Do a binary search for the key */
6633: lower = 1;
6634: upper = seq->length;
6635: middle = 0;
6636: while ( lower <= upper && !found) {
6637: middle = lower + (upper - lower) / 2;
6638: if ( node == seq->buffer[middle - 1].node )
6639: found = 1;
6640: else if ( node < seq->buffer[middle - 1].node )
6641: upper = middle - 1;
6642: else
6643: lower = middle + 1;
6644: }
6645:
6646: /* Return position */
6647: if ( middle == 0 || seq->buffer[middle - 1].node < node )
6648: return middle;
6649: else
6650: return middle - 1;
6651: }
6652:
6653:
1.50 daniel 6654: /**
6655: * xmlParserAddNodeInfo:
6656: * @ctxt: an XML parser context
1.68 daniel 6657: * @info: a node info sequence pointer
1.50 daniel 6658: *
6659: * Insert node info record into the sorted sequence
1.32 daniel 6660: */
1.55 daniel 6661: void
6662: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 6663: const xmlParserNodeInfo* info)
1.32 daniel 6664: {
6665: unsigned long pos;
6666: static unsigned int block_size = 5;
6667:
6668: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 6669: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
6670: if ( pos < ctxt->node_seq.length
6671: && ctxt->node_seq.buffer[pos].node == info->node ) {
6672: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 6673: }
6674:
6675: /* Otherwise, we need to add new node to buffer */
6676: else {
6677: /* Expand buffer by 5 if needed */
1.55 daniel 6678: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 6679: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 6680: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
6681: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 6682:
1.55 daniel 6683: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 6684: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 6685: else
1.119 daniel 6686: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 6687:
6688: if ( tmp_buffer == NULL ) {
1.55 daniel 6689: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6690: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 ! daniel 6691: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 6692: return;
6693: }
1.55 daniel 6694: ctxt->node_seq.buffer = tmp_buffer;
6695: ctxt->node_seq.maximum += block_size;
1.32 daniel 6696: }
6697:
6698: /* If position is not at end, move elements out of the way */
1.55 daniel 6699: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 6700: unsigned long i;
6701:
1.55 daniel 6702: for ( i = ctxt->node_seq.length; i > pos; i-- )
6703: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 6704: }
6705:
6706: /* Copy element and increase length */
1.55 daniel 6707: ctxt->node_seq.buffer[pos] = *info;
6708: ctxt->node_seq.length++;
1.32 daniel 6709: }
6710: }
1.77 daniel 6711:
1.98 daniel 6712:
6713: /**
6714: * xmlSubstituteEntitiesDefault :
6715: * @val: int 0 or 1
6716: *
6717: * Set and return the previous value for default entity support.
6718: * Initially the parser always keep entity references instead of substituting
6719: * entity values in the output. This function has to be used to change the
6720: * default parser behaviour
6721: * SAX::subtituteEntities() has to be used for changing that on a file by
6722: * file basis.
6723: *
6724: * Returns the last value for 0 for no substitution, 1 for substitution.
6725: */
6726:
6727: int
6728: xmlSubstituteEntitiesDefault(int val) {
6729: int old = xmlSubstituteEntitiesDefaultValue;
6730:
6731: xmlSubstituteEntitiesDefaultValue = val;
6732: return(old);
6733: }
1.77 daniel 6734:
Webmaster