Annotation of XML/parser.c, revision 1.103
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.75 daniel 33: #include "encoding.h"
1.61 daniel 34: #include "valid.h"
1.69 daniel 35: #include "parserInternals.h"
1.91 daniel 36: #include "xmlIO.h"
1.1 veillard 37:
1.86 daniel 38: const char *xmlParserVersion = LIBXML_VERSION;
39:
1.91 daniel 40: #define XML_MAX_NAMELEN 1000
41:
42: /************************************************************************
43: * *
44: * Input handling functions for progressive parsing *
45: * *
46: ************************************************************************/
47:
48: /* #define DEBUG_INPUT */
49:
50: #define INPUT_CHUNK 50
51:
52: #ifdef DEBUG_INPUT
53: #define CHECK_BUFFER(in) check_buffer(in)
54: #else
55: #define CHECK_BUFFER(in)
56: #endif
57:
58: void check_buffer(xmlParserInputPtr in) {
59: if (in->base != in->buf->buffer->content) {
60: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
61: }
62: if (in->cur < in->base) {
63: fprintf(stderr, "xmlParserInput: cur < base problem\n");
64: }
65: if (in->cur > in->base + in->buf->buffer->use) {
66: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
67: }
68: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
69: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
70: in->buf->buffer->use, in->buf->buffer->size);
71: }
72:
73:
74: /**
75: * xmlParserInputRead:
76: * @in: an XML parser input
77: * @len: an indicative size for the lookahead
78: *
79: * This function refresh the input for the parser. It doesn't try to
80: * preserve pointers to the input buffer, and discard already read data
81: *
82: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
83: * end of this entity
84: */
85: int
86: xmlParserInputRead(xmlParserInputPtr in, int len) {
87: int ret;
88: int used;
89: int index;
90:
91: #ifdef DEBUG_INPUT
92: fprintf(stderr, "Read\n");
93: #endif
94: if (in->buf == NULL) return(-1);
95: if (in->base == NULL) return(-1);
96: if (in->cur == NULL) return(-1);
97: if (in->buf->buffer == NULL) return(-1);
98:
99: CHECK_BUFFER(in);
100:
101: used = in->cur - in->buf->buffer->content;
102: ret = xmlBufferShrink(in->buf->buffer, used);
103: if (ret > 0) {
104: in->cur -= ret;
105: in->consumed += ret;
106: }
107: ret = xmlParserInputBufferRead(in->buf, len);
108: if (in->base != in->buf->buffer->content) {
109: /*
110: * the buffer has been realloced
111: */
112: index = in->cur - in->base;
113: in->base = in->buf->buffer->content;
114: in->cur = &in->buf->buffer->content[index];
115: }
116:
117: CHECK_BUFFER(in);
118:
119: return(ret);
120: }
121:
122: /**
123: * xmlParserInputGrow:
124: * @in: an XML parser input
125: * @len: an indicative size for the lookahead
126: *
127: * This function increase the input for the parser. It tries to
128: * preserve pointers to the input buffer, and keep already read data
129: *
130: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
131: * end of this entity
132: */
133: int
134: xmlParserInputGrow(xmlParserInputPtr in, int len) {
135: int ret;
136: int index;
137:
138: #ifdef DEBUG_INPUT
139: fprintf(stderr, "Grow\n");
140: #endif
141: if (in->buf == NULL) return(-1);
142: if (in->base == NULL) return(-1);
143: if (in->cur == NULL) return(-1);
144: if (in->buf->buffer == NULL) return(-1);
145:
146: CHECK_BUFFER(in);
147:
148: index = in->cur - in->base;
149: if (in->buf->buffer->use > index + INPUT_CHUNK) {
150:
151: CHECK_BUFFER(in);
152:
153: return(0);
154: }
155: ret = xmlParserInputBufferGrow(in->buf, len);
156: if (in->base != in->buf->buffer->content) {
157: /*
158: * the buffer has been realloced
159: */
160: index = in->cur - in->base;
161: in->base = in->buf->buffer->content;
162: in->cur = &in->buf->buffer->content[index];
163: }
164:
165: CHECK_BUFFER(in);
166:
167: return(ret);
168: }
169:
170: /**
171: * xmlParserInputShrink:
172: * @in: an XML parser input
173: *
174: * This function removes used input for the parser.
175: */
176: void
177: xmlParserInputShrink(xmlParserInputPtr in) {
178: int used;
179: int ret;
180: int index;
181:
182: #ifdef DEBUG_INPUT
183: fprintf(stderr, "Shrink\n");
184: #endif
185: if (in->buf == NULL) return;
186: if (in->base == NULL) return;
187: if (in->cur == NULL) return;
188: if (in->buf->buffer == NULL) return;
189:
190: CHECK_BUFFER(in);
191:
192: used = in->cur - in->buf->buffer->content;
193: if (used > INPUT_CHUNK) {
194: ret = xmlBufferShrink(in->buf->buffer, used);
195: if (ret > 0) {
196: in->cur -= ret;
197: in->consumed += ret;
198: }
199: }
200:
201: CHECK_BUFFER(in);
202:
203: if (in->buf->buffer->use > INPUT_CHUNK) {
204: return;
205: }
206: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
207: if (in->base != in->buf->buffer->content) {
208: /*
209: * the buffer has been realloced
210: */
211: index = in->cur - in->base;
212: in->base = in->buf->buffer->content;
213: in->cur = &in->buf->buffer->content[index];
214: }
215:
216: CHECK_BUFFER(in);
217: }
218:
1.45 daniel 219: /************************************************************************
220: * *
221: * Parser stacks related functions and macros *
222: * *
223: ************************************************************************/
1.79 daniel 224:
225: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 226: int xmlDoValidityCheckingDefaultValue = 0;
1.79 daniel 227:
1.1 veillard 228: /*
1.40 daniel 229: * Generic function for accessing stacks in the Parser Context
1.1 veillard 230: */
231:
1.31 daniel 232: #define PUSH_AND_POP(type, name) \
1.72 daniel 233: extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 234: if (ctxt->name##Nr >= ctxt->name##Max) { \
235: ctxt->name##Max *= 2; \
1.40 daniel 236: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
237: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
238: if (ctxt->name##Tab == NULL) { \
1.31 daniel 239: fprintf(stderr, "realloc failed !\n"); \
240: exit(1); \
241: } \
242: } \
1.40 daniel 243: ctxt->name##Tab[ctxt->name##Nr] = value; \
244: ctxt->name = value; \
245: return(ctxt->name##Nr++); \
1.31 daniel 246: } \
1.72 daniel 247: extern type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 248: type ret; \
1.40 daniel 249: if (ctxt->name##Nr <= 0) return(0); \
250: ctxt->name##Nr--; \
1.50 daniel 251: if (ctxt->name##Nr > 0) \
252: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
253: else \
254: ctxt->name = NULL; \
1.69 daniel 255: ret = ctxt->name##Tab[ctxt->name##Nr]; \
256: ctxt->name##Tab[ctxt->name##Nr] = 0; \
257: return(ret); \
1.31 daniel 258: } \
259:
1.40 daniel 260: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 261: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 262:
1.55 daniel 263: /*
264: * Macros for accessing the content. Those should be used only by the parser,
265: * and not exported.
266: *
267: * Dirty macros, i.e. one need to make assumption on the context to use them
268: *
269: * CUR_PTR return the current pointer to the CHAR to be parsed.
270: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
271: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
272: * in UNICODE mode. This should be used internally by the parser
273: * only to compare to ASCII values otherwise it would break when
274: * running with UTF-8 encoding.
275: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
276: * to compare on ASCII based substring.
277: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
278: * strings within the parser.
279: *
1.77 daniel 280: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 281: *
282: * CURRENT Returns the current char value, with the full decoding of
283: * UTF-8 if we are using this mode. It returns an int.
284: * NEXT Skip to the next character, this does the proper decoding
285: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 286: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.55 daniel 287: */
1.45 daniel 288:
1.97 daniel 289: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 290: #define SKIP(val) ctxt->input->cur += (val)
291: #define NXT(val) ctxt->input->cur[(val)]
292: #define CUR_PTR ctxt->input->cur
1.97 daniel 293: #define SHRINK xmlParserInputShrink(ctxt->input); \
294: if ((*ctxt->input->cur == 0) && \
295: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
296: xmlPopInput(ctxt)
297:
298: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
299: if ((*ctxt->input->cur == 0) && \
300: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
301: xmlPopInput(ctxt)
1.55 daniel 302:
303: #define SKIP_BLANKS \
1.101 daniel 304: do { \
305: while (IS_BLANK(CUR)) NEXT; \
306: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
307: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
308: } while (IS_BLANK(CUR));
1.55 daniel 309:
310: #ifndef USE_UTF_8
311: #define CURRENT (*ctxt->input->cur)
1.91 daniel 312: #define NEXT { \
1.97 daniel 313: if (ctxt->token != 0) ctxt->token = 0; \
314: else { \
1.91 daniel 315: if ((*ctxt->input->cur == 0) && \
316: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
317: xmlPopInput(ctxt); \
318: } else { \
319: if (*(ctxt->input->cur) == '\n') { \
320: ctxt->input->line++; ctxt->input->col = 1; \
321: } else ctxt->input->col++; \
322: ctxt->input->cur++; \
323: if (*ctxt->input->cur == 0) \
324: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.96 daniel 325: } \
326: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
327: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
1.97 daniel 328: }}
1.91 daniel 329:
1.55 daniel 330: #else
331: #endif
1.42 daniel 332:
1.97 daniel 333: /************************************************************************
334: * *
335: * Commodity functions to handle entities processing *
336: * *
337: ************************************************************************/
1.40 daniel 338:
1.50 daniel 339: /**
340: * xmlPopInput:
341: * @ctxt: an XML parser context
342: *
1.40 daniel 343: * xmlPopInput: the current input pointed by ctxt->input came to an end
344: * pop it and return the next char.
1.45 daniel 345: *
1.68 daniel 346: * Returns the current CHAR in the parser context
1.40 daniel 347: */
1.55 daniel 348: CHAR
349: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 350: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 351: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 352: if ((*ctxt->input->cur == 0) &&
353: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
354: return(xmlPopInput(ctxt));
1.40 daniel 355: return(CUR);
356: }
357:
1.50 daniel 358: /**
359: * xmlPushInput:
360: * @ctxt: an XML parser context
361: * @input: an XML parser input fragment (entity, XML fragment ...).
362: *
1.40 daniel 363: * xmlPushInput: switch to a new input stream which is stacked on top
364: * of the previous one(s).
365: */
1.55 daniel 366: void
367: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 368: if (input == NULL) return;
369: inputPush(ctxt, input);
370: }
371:
1.50 daniel 372: /**
1.69 daniel 373: * xmlFreeInputStream:
1.101 daniel 374: * @input: an xmlP arserInputPtr
1.69 daniel 375: *
376: * Free up an input stream.
377: */
378: void
379: xmlFreeInputStream(xmlParserInputPtr input) {
380: if (input == NULL) return;
381:
382: if (input->filename != NULL) free((char *) input->filename);
1.94 daniel 383: if (input->directory != NULL) free((char *) input->directory);
1.69 daniel 384: if ((input->free != NULL) && (input->base != NULL))
385: input->free((char *) input->base);
1.93 veillard 386: if (input->buf != NULL)
387: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 388: memset(input, -1, sizeof(xmlParserInput));
389: free(input);
390: }
391:
392: /**
1.96 daniel 393: * xmlNewInputStream:
394: * @ctxt: an XML parser context
395: *
396: * Create a new input stream structure
397: * Returns the new input stream or NULL
398: */
399: xmlParserInputPtr
400: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
401: xmlParserInputPtr input;
402:
403: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
404: if (input == NULL) {
405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
406: ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
407: return(NULL);
408: }
409: input->filename = NULL;
410: input->directory = NULL;
411: input->base = NULL;
412: input->cur = NULL;
413: input->buf = NULL;
414: input->line = 1;
415: input->col = 1;
416: input->buf = NULL;
417: input->free = NULL;
418: input->consumed = 0;
419: return(input);
420: }
421:
422: /**
1.50 daniel 423: * xmlNewEntityInputStream:
424: * @ctxt: an XML parser context
425: * @entity: an Entity pointer
426: *
1.82 daniel 427: * Create a new input stream based on an xmlEntityPtr
1.68 daniel 428: * Returns the new input stream
1.45 daniel 429: */
1.50 daniel 430: xmlParserInputPtr
431: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 432: xmlParserInputPtr input;
433:
434: if (entity == NULL) {
1.55 daniel 435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 436: ctxt->sax->error(ctxt->userData,
1.45 daniel 437: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 438: return(NULL);
1.45 daniel 439: }
440: if (entity->content == NULL) {
1.55 daniel 441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 442: ctxt->sax->error(ctxt->userData,
1.45 daniel 443: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 444: return(NULL);
1.45 daniel 445: }
1.96 daniel 446: input = xmlNewInputStream(ctxt);
1.45 daniel 447: if (input == NULL) {
1.50 daniel 448: return(NULL);
1.45 daniel 449: }
450: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
451: input->base = entity->content;
452: input->cur = entity->content;
1.50 daniel 453: return(input);
1.45 daniel 454: }
455:
1.59 daniel 456: /**
457: * xmlNewStringInputStream:
458: * @ctxt: an XML parser context
1.96 daniel 459: * @buffer: an memory buffer
1.59 daniel 460: *
461: * Create a new input stream based on a memory buffer.
1.68 daniel 462: * Returns the new input stream
1.59 daniel 463: */
464: xmlParserInputPtr
1.96 daniel 465: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const CHAR *buffer) {
1.59 daniel 466: xmlParserInputPtr input;
467:
1.96 daniel 468: if (buffer == NULL) {
1.59 daniel 469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 470: ctxt->sax->error(ctxt->userData,
1.59 daniel 471: "internal: xmlNewStringInputStream string = NULL\n");
472: return(NULL);
473: }
1.96 daniel 474: input = xmlNewInputStream(ctxt);
1.59 daniel 475: if (input == NULL) {
476: return(NULL);
477: }
1.96 daniel 478: input->base = buffer;
479: input->cur = buffer;
1.59 daniel 480: return(input);
481: }
482:
1.76 daniel 483: /**
484: * xmlNewInputFromFile:
485: * @ctxt: an XML parser context
486: * @filename: the filename to use as entity
487: *
488: * Create a new input stream based on a file.
489: *
490: * Returns the new input stream or NULL in case of error
491: */
492: xmlParserInputPtr
1.79 daniel 493: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 494: xmlParserInputBufferPtr buf;
1.76 daniel 495: xmlParserInputPtr inputStream;
1.94 daniel 496: const char *directory = NULL;
1.76 daniel 497:
1.96 daniel 498: if (ctxt == NULL) return(NULL);
1.91 daniel 499: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 500: if (buf == NULL) {
501: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
502: char name[1024];
503: #ifdef WIN32
504: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
505: #else
506: sprintf(name, "%s/%s", ctxt->input->directory, filename);
507: #endif
508: buf = xmlParserInputBufferCreateFilename(name,
509: XML_CHAR_ENCODING_NONE);
510: if (buf == NULL)
511: return(NULL);
512: directory = strdup(ctxt->input->directory);
513: } else
514: return(NULL);
515: }
516: if (directory == NULL)
517: directory = xmlParserGetDirectory(filename);
1.76 daniel 518:
1.96 daniel 519: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 520: if (inputStream == NULL) {
1.96 daniel 521: if (directory != NULL) free((char *) directory);
1.76 daniel 522: return(NULL);
523: }
524:
525: inputStream->filename = strdup(filename);
1.94 daniel 526: inputStream->directory = directory;
1.91 daniel 527: inputStream->buf = buf;
1.76 daniel 528:
1.91 daniel 529: inputStream->base = inputStream->buf->buffer->content;
530: inputStream->cur = inputStream->buf->buffer->content;
1.76 daniel 531: return(inputStream);
532: }
533:
1.77 daniel 534: /************************************************************************
535: * *
1.97 daniel 536: * Commodity functions to handle parser contexts *
537: * *
538: ************************************************************************/
539:
540: /**
541: * xmlInitParserCtxt:
542: * @ctxt: an XML parser context
543: *
544: * Initialize a parser context
545: */
546:
547: void
548: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
549: {
550: xmlSAXHandler *sax;
551:
552: sax = (xmlSAXHandler *) malloc(sizeof(xmlSAXHandler));
553: if (sax == NULL) {
554: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
555: }
556:
557: /* Allocate the Input stack */
558: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
559: ctxt->inputNr = 0;
560: ctxt->inputMax = 5;
561: ctxt->input = NULL;
562: ctxt->version = NULL;
563: ctxt->encoding = NULL;
564: ctxt->standalone = -1;
1.98 daniel 565: ctxt->hasExternalSubset = 0;
566: ctxt->hasPErefs = 0;
1.97 daniel 567: ctxt->html = 0;
1.98 daniel 568: ctxt->external = 0;
1.97 daniel 569: ctxt->instate = XML_PARSER_PROLOG;
570: ctxt->token = 0;
571:
572: /* Allocate the Node stack */
573: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
574: ctxt->nodeNr = 0;
575: ctxt->nodeMax = 10;
576: ctxt->node = NULL;
577:
578: if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
579: else {
580: ctxt->sax = sax;
581: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
582: }
583: ctxt->userData = ctxt;
584: ctxt->myDoc = NULL;
585: ctxt->wellFormed = 1;
1.99 daniel 586: ctxt->valid = 1;
1.100 daniel 587: ctxt->validate = xmlDoValidityCheckingDefaultValue;
588: ctxt->vctxt.userData = ctxt;
589: ctxt->vctxt.error = xmlParserValidityError;
590: ctxt->vctxt.warning = xmlParserValidityWarning;
1.97 daniel 591: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
592: ctxt->record_info = 0;
593: xmlInitNodeInfoSeq(&ctxt->node_seq);
594: }
595:
596: /**
597: * xmlFreeParserCtxt:
598: * @ctxt: an XML parser context
599: *
600: * Free all the memory used by a parser context. However the parsed
601: * document in ctxt->myDoc is not freed.
602: */
603:
604: void
605: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
606: {
607: xmlParserInputPtr input;
608:
609: if (ctxt == NULL) return;
610:
611: while ((input = inputPop(ctxt)) != NULL) {
612: xmlFreeInputStream(input);
613: }
614:
615: if (ctxt->nodeTab != NULL) free(ctxt->nodeTab);
616: if (ctxt->inputTab != NULL) free(ctxt->inputTab);
617: if (ctxt->version != NULL) free((char *) ctxt->version);
618: if (ctxt->encoding != NULL) free((char *) ctxt->encoding);
619: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
620: free(ctxt->sax);
621: free(ctxt);
622: }
623:
624: /**
625: * xmlNewParserCtxt:
626: *
627: * Allocate and initialize a new parser context.
628: *
629: * Returns the xmlParserCtxtPtr or NULL
630: */
631:
632: xmlParserCtxtPtr
633: xmlNewParserCtxt()
634: {
635: xmlParserCtxtPtr ctxt;
636:
637: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
638: if (ctxt == NULL) {
639: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
640: perror("malloc");
641: return(NULL);
642: }
643: xmlInitParserCtxt(ctxt);
644: return(ctxt);
645: }
646:
647: /**
648: * xmlClearParserCtxt:
649: * @ctxt: an XML parser context
650: *
651: * Clear (release owned resources) and reinitialize a parser context
652: */
653:
654: void
655: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
656: {
657: xmlClearNodeInfoSeq(&ctxt->node_seq);
658: xmlInitParserCtxt(ctxt);
659: }
660:
661: /************************************************************************
662: * *
1.77 daniel 663: * Commodity functions to handle entities *
664: * *
665: ************************************************************************/
666:
1.97 daniel 667: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
668: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
669:
670: /**
671: * xmlParseCharRef:
672: * @ctxt: an XML parser context
673: *
674: * parse Reference declarations
675: *
676: * [66] CharRef ::= '&#' [0-9]+ ';' |
677: * '&#x' [0-9a-fA-F]+ ';'
678: *
1.98 daniel 679: * [ WFC: Legal Character ]
680: * Characters referred to using character references must match the
681: * production for Char.
682: *
1.97 daniel 683: * Returns the value parsed (as an int)
1.77 daniel 684: */
1.97 daniel 685: int
686: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
687: int val = 0;
688:
689: if ((CUR == '&') && (NXT(1) == '#') &&
690: (NXT(2) == 'x')) {
691: SKIP(3);
692: while (CUR != ';') {
693: if ((CUR >= '0') && (CUR <= '9'))
694: val = val * 16 + (CUR - '0');
695: else if ((CUR >= 'a') && (CUR <= 'f'))
696: val = val * 16 + (CUR - 'a') + 10;
697: else if ((CUR >= 'A') && (CUR <= 'F'))
698: val = val * 16 + (CUR - 'A') + 10;
699: else {
700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
701: ctxt->sax->error(ctxt->userData,
702: "xmlParseCharRef: invalid hexadecimal value\n");
703: ctxt->wellFormed = 0;
704: val = 0;
705: break;
706: }
707: NEXT;
708: }
709: if (CUR == ';')
710: NEXT;
711: } else if ((CUR == '&') && (NXT(1) == '#')) {
712: SKIP(2);
713: while (CUR != ';') {
714: if ((CUR >= '0') && (CUR <= '9'))
715: val = val * 10 + (CUR - '0');
716: else {
717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
718: ctxt->sax->error(ctxt->userData,
719: "xmlParseCharRef: invalid decimal value\n");
720: ctxt->wellFormed = 0;
721: val = 0;
722: break;
723: }
724: NEXT;
725: }
726: if (CUR == ';')
727: NEXT;
728: } else {
729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 730: ctxt->sax->error(ctxt->userData,
731: "xmlParseCharRef: invalid value\n");
1.97 daniel 732: ctxt->wellFormed = 0;
733: }
1.98 daniel 734:
1.97 daniel 735: /*
1.98 daniel 736: * [ WFC: Legal Character ]
737: * Characters referred to using character references must match the
738: * production for Char.
1.97 daniel 739: */
740: if (IS_CHAR(val)) {
741: return(val);
742: } else {
743: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 744: ctxt->sax->error(ctxt->userData, "CharRef: invalid CHAR value %d\n",
1.97 daniel 745: val);
746: ctxt->wellFormed = 0;
747: }
748: return(0);
1.77 daniel 749: }
750:
1.96 daniel 751: /**
752: * xmlParserHandleReference:
753: * @ctxt: the parser context
754: *
1.97 daniel 755: * [67] Reference ::= EntityRef | CharRef
756: *
1.96 daniel 757: * [68] EntityRef ::= '&' Name ';'
758: *
1.98 daniel 759: * [ WFC: Entity Declared ]
760: * the Name given in the entity reference must match that in an entity
761: * declaration, except that well-formed documents need not declare any
762: * of the following entities: amp, lt, gt, apos, quot.
763: *
764: * [ WFC: Parsed Entity ]
765: * An entity reference must not contain the name of an unparsed entity
766: *
1.97 daniel 767: * [66] CharRef ::= '&#' [0-9]+ ';' |
768: * '&#x' [0-9a-fA-F]+ ';'
769: *
1.96 daniel 770: * A PEReference may have been detectect in the current input stream
771: * the handling is done accordingly to
772: * http://www.w3.org/TR/REC-xml#entproc
773: */
774: void
775: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 776: xmlParserInputPtr input;
777: CHAR *name;
778: xmlEntityPtr ent = NULL;
779:
780: if (CUR != '&') return;
781: GROW;
782: if ((CUR == '&') && (NXT(1) == '#')) {
783: switch(ctxt->instate) {
784: case XML_PARSER_COMMENT:
785: return;
786: case XML_PARSER_EOF:
787: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
788: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
789: ctxt->wellFormed = 0;
790: return;
791: case XML_PARSER_PROLOG:
792: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
793: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
794: ctxt->wellFormed = 0;
795: return;
796: case XML_PARSER_EPILOG:
797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
798: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
799: ctxt->wellFormed = 0;
800: return;
801: case XML_PARSER_DTD:
802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
803: ctxt->sax->error(ctxt->userData,
804: "CharRef are forbiden in DTDs!\n");
805: ctxt->wellFormed = 0;
806: return;
807: case XML_PARSER_ENTITY_DECL:
808: /* we just ignore it there */
809: return;
810: case XML_PARSER_ENTITY_VALUE:
811: /*
812: * NOTE: in the case of entity values, we don't do the
813: * substitution here since we need the litteral
814: * entity value to be able to save the internal
815: * subset of the document.
816: * This will be handled by xmlDecodeEntities
817: */
818: return;
819: case XML_PARSER_CONTENT:
820: case XML_PARSER_ATTRIBUTE_VALUE:
821: /* TODO this may not be Ok for UTF-8, multibyte sequence */
822: ctxt->token = xmlParseCharRef(ctxt);
823: return;
824: }
825: return;
826: }
827:
828: switch(ctxt->instate) {
829: case XML_PARSER_COMMENT:
830: return;
831: case XML_PARSER_EOF:
832: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
833: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
834: ctxt->wellFormed = 0;
835: return;
836: case XML_PARSER_PROLOG:
837: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
838: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
839: ctxt->wellFormed = 0;
840: return;
841: case XML_PARSER_EPILOG:
842: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
843: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
844: ctxt->wellFormed = 0;
845: return;
846: case XML_PARSER_ENTITY_VALUE:
847: /*
848: * NOTE: in the case of entity values, we don't do the
849: * substitution here since we need the litteral
850: * entity value to be able to save the internal
851: * subset of the document.
852: * This will be handled by xmlDecodeEntities
853: */
854: return;
855: case XML_PARSER_ATTRIBUTE_VALUE:
856: /*
857: * NOTE: in the case of attributes values, we don't do the
858: * substitution here unless we are in a mode where
859: * the parser is explicitely asked to substitute
860: * entities. The SAX callback is called with values
861: * without entity substitution.
862: * This will then be handled by xmlDecodeEntities
863: */
864: if (ctxt->replaceEntities == 0) return;
865: break;
866: case XML_PARSER_ENTITY_DECL:
867: /*
868: * we just ignore it there
869: * the substitution will be done once the entity is referenced
870: */
871: return;
872: case XML_PARSER_DTD:
873: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
874: ctxt->sax->error(ctxt->userData,
875: "Entity references are forbiden in DTDs!\n");
876: ctxt->wellFormed = 0;
877: return;
878: case XML_PARSER_CONTENT:
879: /*
880: * NOTE: in the case of attributes values, we don't do the
881: * substitution here unless we are in a mode where
882: * the parser is explicitely asked to substitute
883: * entities. The SAX callback is called with values
884: * without entity substitution.
885: * This will then be handled by xmlDecodeEntities
886: */
887: if (ctxt->replaceEntities == 0) return;
888: break;
889: }
890:
891: NEXT;
892: name = xmlScanName(ctxt);
893: if (name == NULL) {
894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
895: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
896: ctxt->wellFormed = 0;
897: ctxt->token = '&';
898: return;
899: }
900: if (NXT(xmlStrlen(name)) != ';') {
901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
902: ctxt->sax->error(ctxt->userData,
903: "Entity reference: ';' expected\n");
904: ctxt->wellFormed = 0;
905: ctxt->token = '&';
906: return;
907: }
908: SKIP(xmlStrlen(name) + 1);
909: if (ctxt->sax != NULL) {
910: if (ctxt->sax->getEntity != NULL)
911: ent = ctxt->sax->getEntity(ctxt->userData, name);
912: }
1.98 daniel 913:
914: /*
915: * [ WFC: Entity Declared ]
916: * the Name given in the entity reference must match that in an entity
917: * declaration, except that well-formed documents need not declare any
918: * of the following entities: amp, lt, gt, apos, quot.
919: */
1.97 daniel 920: if (ent == NULL)
921: ent = xmlGetPredefinedEntity(name);
922: if (ent == NULL) {
923: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
924: ctxt->sax->error(ctxt->userData,
1.98 daniel 925: "Entity reference: entity %s not declared\n",
926: name);
1.97 daniel 927: ctxt->wellFormed = 0;
928: return;
929: }
1.98 daniel 930:
931: /*
932: * [ WFC: Parsed Entity ]
933: * An entity reference must not contain the name of an unparsed entity
934: */
935: if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
936: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
937: ctxt->sax->error(ctxt->userData,
938: "Entity reference to unparsed entity %s\n", name);
939: ctxt->wellFormed = 0;
940: }
941:
1.97 daniel 942: if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
943: ctxt->token = ent->content[0];
944: return;
945: }
946: input = xmlNewEntityInputStream(ctxt, ent);
947: xmlPushInput(ctxt, input);
1.96 daniel 948: return;
949: }
950:
951: /**
952: * xmlParserHandlePEReference:
953: * @ctxt: the parser context
954: *
955: * [69] PEReference ::= '%' Name ';'
956: *
1.98 daniel 957: * [ WFC: No Recursion ]
958: * TODO A parsed entity must not contain a recursive
959: * reference to itself, either directly or indirectly.
960: *
961: * [ WFC: Entity Declared ]
962: * In a document without any DTD, a document with only an internal DTD
963: * subset which contains no parameter entity references, or a document
964: * with "standalone='yes'", ... ... The declaration of a parameter
965: * entity must precede any reference to it...
966: *
967: * [ VC: Entity Declared ]
968: * In a document with an external subset or external parameter entities
969: * with "standalone='no'", ... ... The declaration of a parameter entity
970: * must precede any reference to it...
971: *
972: * [ WFC: In DTD ]
973: * Parameter-entity references may only appear in the DTD.
974: * NOTE: misleading but this is handled.
975: *
976: * A PEReference may have been detected in the current input stream
1.96 daniel 977: * the handling is done accordingly to
978: * http://www.w3.org/TR/REC-xml#entproc
979: * i.e.
980: * - Included in literal in entity values
981: * - Included as Paraemeter Entity reference within DTDs
982: */
983: void
984: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
985: CHAR *name;
986: xmlEntityPtr entity = NULL;
987: xmlParserInputPtr input;
988:
989: switch(ctxt->instate) {
1.97 daniel 990: case XML_PARSER_COMMENT:
991: return;
1.96 daniel 992: case XML_PARSER_EOF:
993: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
994: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
995: ctxt->wellFormed = 0;
996: return;
997: case XML_PARSER_PROLOG:
998: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
999: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1000: ctxt->wellFormed = 0;
1001: return;
1.97 daniel 1002: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1003: case XML_PARSER_CONTENT:
1004: case XML_PARSER_ATTRIBUTE_VALUE:
1005: /* we just ignore it there */
1006: return;
1007: case XML_PARSER_EPILOG:
1008: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1009: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1010: ctxt->wellFormed = 0;
1011: return;
1.97 daniel 1012: case XML_PARSER_ENTITY_VALUE:
1013: /*
1014: * NOTE: in the case of entity values, we don't do the
1015: * substitution here since we need the litteral
1016: * entity value to be able to save the internal
1017: * subset of the document.
1018: * This will be handled by xmlDecodeEntities
1019: */
1020: return;
1.96 daniel 1021: case XML_PARSER_DTD:
1.98 daniel 1022: /*
1023: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1024: * In the internal DTD subset, parameter-entity references
1025: * can occur only where markup declarations can occur, not
1026: * within markup declarations.
1027: * In that case this is handled in xmlParseMarkupDecl
1028: */
1029: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1030: return;
1.96 daniel 1031: }
1032:
1033: NEXT;
1034: name = xmlParseName(ctxt);
1035: if (name == NULL) {
1036: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1037: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1038: ctxt->wellFormed = 0;
1039: } else {
1040: if (CUR == ';') {
1041: NEXT;
1.98 daniel 1042: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1043: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1044: if (entity == NULL) {
1.98 daniel 1045:
1046: /*
1047: * [ WFC: Entity Declared ]
1048: * In a document without any DTD, a document with only an
1049: * internal DTD subset which contains no parameter entity
1050: * references, or a document with "standalone='yes'", ...
1051: * ... The declaration of a parameter entity must precede
1052: * any reference to it...
1053: */
1054: if ((ctxt->standalone == 1) ||
1055: ((ctxt->hasExternalSubset == 0) &&
1056: (ctxt->hasPErefs == 0))) {
1057: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1058: ctxt->sax->error(ctxt->userData,
1059: "PEReference: %%%s; not found\n", name);
1060: ctxt->wellFormed = 0;
1061: } else {
1062: /*
1063: * [ VC: Entity Declared ]
1064: * In a document with an external subset or external
1065: * parameter entities with "standalone='no'", ...
1066: * ... The declaration of a parameter entity must precede
1067: * any reference to it...
1068: */
1069: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1070: ctxt->sax->warning(ctxt->userData,
1071: "PEReference: %%%s; not found\n", name);
1072: ctxt->valid = 0;
1073: }
1.96 daniel 1074: } else {
1075: if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
1076: (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
1077: /*
1078: * TODO !!!! handle the extra spaces added before and after
1079: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1080: * TODO !!!! Avoid quote processing in parameters value
1081: * c.f. http://www.w3.org/TR/REC-xml#inliteral
1082: */
1083: input = xmlNewEntityInputStream(ctxt, entity);
1084: xmlPushInput(ctxt, input);
1085: } else {
1086: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1087: ctxt->sax->error(ctxt->userData,
1088: "xmlHandlePEReference: %s is not a parameter entity\n",
1089: name);
1090: ctxt->wellFormed = 0;
1091: }
1092: }
1093: } else {
1094: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1095: ctxt->sax->error(ctxt->userData,
1096: "xmlHandlePEReference: expecting ';'\n");
1097: ctxt->wellFormed = 0;
1098: }
1.97 daniel 1099: free(name);
1100: }
1101: }
1102:
1103: /*
1104: * Macro used to grow the current buffer.
1105: */
1106: #define growBuffer(buffer) { \
1107: buffer##_size *= 2; \
1108: buffer = (CHAR *) realloc(buffer, buffer##_size * sizeof(CHAR)); \
1109: if (buffer == NULL) { \
1110: perror("realloc failed"); \
1111: exit(1); \
1112: } \
1.96 daniel 1113: }
1.77 daniel 1114:
1115: /**
1116: * xmlDecodeEntities:
1117: * @ctxt: the parser context
1118: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1119: * @len: the len to decode (in bytes !), -1 for no size limit
1120: * @end: an end marker CHAR, 0 if none
1121: * @end2: an end marker CHAR, 0 if none
1122: * @end3: an end marker CHAR, 0 if none
1123: *
1124: * [67] Reference ::= EntityRef | CharRef
1125: *
1126: * [69] PEReference ::= '%' Name ';'
1127: *
1128: * Returns A newly allocated string with the substitution done. The caller
1129: * must deallocate it !
1130: */
1131: CHAR *
1132: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1133: CHAR end, CHAR end2, CHAR end3) {
1134: CHAR *buffer = NULL;
1.78 daniel 1135: int buffer_size = 0;
1.77 daniel 1136: CHAR *out = NULL;
1.78 daniel 1137:
1.97 daniel 1138: CHAR *current = NULL;
1.77 daniel 1139: xmlEntityPtr ent;
1.91 daniel 1140: int nbchars = 0;
1.77 daniel 1141: unsigned int max = (unsigned int) len;
1.97 daniel 1142: CHAR cur;
1.77 daniel 1143:
1144: /*
1145: * allocate a translation buffer.
1146: */
1147: buffer_size = 1000;
1148: buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR));
1149: if (buffer == NULL) {
1150: perror("xmlDecodeEntities: malloc failed");
1151: return(NULL);
1152: }
1153: out = buffer;
1154:
1.78 daniel 1155: /*
1156: * Ok loop until we reach one of the ending char or a size limit.
1157: */
1.97 daniel 1158: cur = CUR;
1159: while ((nbchars < max) && (cur != end) &&
1160: (cur != end2) && (cur != end3)) {
1.77 daniel 1161:
1.98 daniel 1162: if (cur == 0) break;
1163: if ((cur == '&') && (NXT(1) == '#')) {
1164: int val = xmlParseCharRef(ctxt);
1165: *out++ = val;
1166: nbchars += 3;
1167: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1168: ent = xmlParseEntityRef(ctxt);
1169: if ((ent != NULL) &&
1170: (ctxt->replaceEntities != 0)) {
1171: current = ent->content;
1172: while (*current != 0) {
1173: *out++ = *current++;
1174: if (out - buffer > buffer_size - 100) {
1175: int index = out - buffer;
1176:
1177: growBuffer(buffer);
1178: out = &buffer[index];
1.77 daniel 1179: }
1180: }
1.98 daniel 1181: nbchars += 3 + xmlStrlen(ent->name);
1182: } else if (ent != NULL) {
1183: int i = xmlStrlen(ent->name);
1184: const CHAR *cur = ent->name;
1185:
1186: nbchars += i + 2;
1187: *out++ = '&';
1188: if (out - buffer > buffer_size - i - 100) {
1189: int index = out - buffer;
1190:
1191: growBuffer(buffer);
1192: out = &buffer[index];
1193: }
1194: for (;i > 0;i--)
1195: *out++ = *cur++;
1196: *out++ = ';';
1.77 daniel 1197: }
1.97 daniel 1198: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1199: /*
1.77 daniel 1200: * a PEReference induce to switch the entity flow,
1201: * we break here to flush the current set of chars
1202: * parsed if any. We will be called back later.
1.97 daniel 1203: */
1.91 daniel 1204: if (nbchars != 0) break;
1.77 daniel 1205:
1206: xmlParsePEReference(ctxt);
1.79 daniel 1207:
1.97 daniel 1208: /*
1.79 daniel 1209: * Pop-up of finished entities.
1.97 daniel 1210: */
1.79 daniel 1211: while ((CUR == 0) && (ctxt->inputNr > 1))
1212: xmlPopInput(ctxt);
1213:
1.98 daniel 1214: break;
1.77 daniel 1215: } else {
1216: /* TODO: invalid for UTF-8 , use COPY(out); */
1.97 daniel 1217: *out++ = cur;
1.91 daniel 1218: nbchars++;
1.86 daniel 1219: if (out - buffer > buffer_size - 100) {
1220: int index = out - buffer;
1221:
1222: growBuffer(buffer);
1223: out = &buffer[index];
1224: }
1.77 daniel 1225: NEXT;
1226: }
1.97 daniel 1227: cur = CUR;
1.77 daniel 1228: }
1229: *out++ = 0;
1230: return(buffer);
1231: }
1232:
1.1 veillard 1233:
1.28 daniel 1234: /************************************************************************
1235: * *
1.75 daniel 1236: * Commodity functions to handle encodings *
1237: * *
1238: ************************************************************************/
1239:
1240: /**
1241: * xmlSwitchEncoding:
1242: * @ctxt: the parser context
1243: * @len: the len of @cur
1244: *
1245: * change the input functions when discovering the character encoding
1246: * of a given entity.
1247: *
1248: */
1249: void
1250: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1251: {
1252: switch (enc) {
1253: case XML_CHAR_ENCODING_ERROR:
1254: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1255: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1256: ctxt->wellFormed = 0;
1257: break;
1258: case XML_CHAR_ENCODING_NONE:
1259: /* let's assume it's UTF-8 without the XML decl */
1260: return;
1261: case XML_CHAR_ENCODING_UTF8:
1262: /* default encoding, no conversion should be needed */
1263: return;
1264: case XML_CHAR_ENCODING_UTF16LE:
1265: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1266: ctxt->sax->error(ctxt->userData,
1267: "char encoding UTF16 little endian not supported\n");
1268: break;
1269: case XML_CHAR_ENCODING_UTF16BE:
1270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1271: ctxt->sax->error(ctxt->userData,
1272: "char encoding UTF16 big endian not supported\n");
1273: break;
1274: case XML_CHAR_ENCODING_UCS4LE:
1275: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1276: ctxt->sax->error(ctxt->userData,
1277: "char encoding USC4 little endian not supported\n");
1278: break;
1279: case XML_CHAR_ENCODING_UCS4BE:
1280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1281: ctxt->sax->error(ctxt->userData,
1282: "char encoding USC4 big endian not supported\n");
1283: break;
1284: case XML_CHAR_ENCODING_EBCDIC:
1285: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1286: ctxt->sax->error(ctxt->userData,
1287: "char encoding EBCDIC not supported\n");
1288: break;
1289: case XML_CHAR_ENCODING_UCS4_2143:
1290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1291: ctxt->sax->error(ctxt->userData,
1292: "char encoding UCS4 2143 not supported\n");
1293: break;
1294: case XML_CHAR_ENCODING_UCS4_3412:
1295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1296: ctxt->sax->error(ctxt->userData,
1297: "char encoding UCS4 3412 not supported\n");
1298: break;
1299: case XML_CHAR_ENCODING_UCS2:
1300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1301: ctxt->sax->error(ctxt->userData,
1302: "char encoding UCS2 not supported\n");
1303: break;
1304: case XML_CHAR_ENCODING_8859_1:
1305: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1306: ctxt->sax->error(ctxt->userData,
1307: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
1308: break;
1309: case XML_CHAR_ENCODING_8859_2:
1310: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1311: ctxt->sax->error(ctxt->userData,
1312: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
1313: break;
1314: case XML_CHAR_ENCODING_8859_3:
1315: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1316: ctxt->sax->error(ctxt->userData,
1317: "char encoding ISO_8859_3 not supported\n");
1318: break;
1319: case XML_CHAR_ENCODING_8859_4:
1320: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1321: ctxt->sax->error(ctxt->userData,
1322: "char encoding ISO_8859_4 not supported\n");
1323: break;
1324: case XML_CHAR_ENCODING_8859_5:
1325: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1326: ctxt->sax->error(ctxt->userData,
1327: "char encoding ISO_8859_5 not supported\n");
1328: break;
1329: case XML_CHAR_ENCODING_8859_6:
1330: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1331: ctxt->sax->error(ctxt->userData,
1332: "char encoding ISO_8859_6 not supported\n");
1333: break;
1334: case XML_CHAR_ENCODING_8859_7:
1335: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1336: ctxt->sax->error(ctxt->userData,
1337: "char encoding ISO_8859_7 not supported\n");
1338: break;
1339: case XML_CHAR_ENCODING_8859_8:
1340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1341: ctxt->sax->error(ctxt->userData,
1342: "char encoding ISO_8859_8 not supported\n");
1343: break;
1344: case XML_CHAR_ENCODING_8859_9:
1345: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1346: ctxt->sax->error(ctxt->userData,
1347: "char encoding ISO_8859_9 not supported\n");
1348: break;
1349: case XML_CHAR_ENCODING_2022_JP:
1350: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1351: ctxt->sax->error(ctxt->userData,
1352: "char encoding ISO-2022-JPnot supported\n");
1353: break;
1354: case XML_CHAR_ENCODING_SHIFT_JIS:
1355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1356: ctxt->sax->error(ctxt->userData,
1357: "char encoding Shift_JISnot supported\n");
1358: break;
1359: case XML_CHAR_ENCODING_EUC_JP:
1360: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1361: ctxt->sax->error(ctxt->userData,
1362: "char encoding EUC-JPnot supported\n");
1363: break;
1364: }
1365: }
1366:
1367: /************************************************************************
1368: * *
1.28 daniel 1369: * Commodity functions to handle CHARs *
1370: * *
1371: ************************************************************************/
1372:
1.50 daniel 1373: /**
1374: * xmlStrndup:
1375: * @cur: the input CHAR *
1376: * @len: the len of @cur
1377: *
1378: * a strndup for array of CHAR's
1.68 daniel 1379: *
1380: * Returns a new CHAR * or NULL
1.1 veillard 1381: */
1.55 daniel 1382: CHAR *
1383: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 1384: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1385:
1386: if (ret == NULL) {
1.86 daniel 1387: fprintf(stderr, "malloc of %ld byte failed\n",
1388: (len + 1) * (long)sizeof(CHAR));
1.1 veillard 1389: return(NULL);
1390: }
1391: memcpy(ret, cur, len * sizeof(CHAR));
1392: ret[len] = 0;
1393: return(ret);
1394: }
1395:
1.50 daniel 1396: /**
1397: * xmlStrdup:
1398: * @cur: the input CHAR *
1399: *
1400: * a strdup for array of CHAR's
1.68 daniel 1401: *
1402: * Returns a new CHAR * or NULL
1.1 veillard 1403: */
1.55 daniel 1404: CHAR *
1405: xmlStrdup(const CHAR *cur) {
1.6 httpng 1406: const CHAR *p = cur;
1.1 veillard 1407:
1408: while (IS_CHAR(*p)) p++;
1409: return(xmlStrndup(cur, p - cur));
1410: }
1411:
1.50 daniel 1412: /**
1413: * xmlCharStrndup:
1414: * @cur: the input char *
1415: * @len: the len of @cur
1416: *
1417: * a strndup for char's to CHAR's
1.68 daniel 1418: *
1419: * Returns a new CHAR * or NULL
1.45 daniel 1420: */
1421:
1.55 daniel 1422: CHAR *
1423: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 1424: int i;
1425: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1426:
1427: if (ret == NULL) {
1.86 daniel 1428: fprintf(stderr, "malloc of %ld byte failed\n",
1429: (len + 1) * (long)sizeof(CHAR));
1.45 daniel 1430: return(NULL);
1431: }
1432: for (i = 0;i < len;i++)
1433: ret[i] = (CHAR) cur[i];
1434: ret[len] = 0;
1435: return(ret);
1436: }
1437:
1.50 daniel 1438: /**
1439: * xmlCharStrdup:
1440: * @cur: the input char *
1441: * @len: the len of @cur
1442: *
1443: * a strdup for char's to CHAR's
1.68 daniel 1444: *
1445: * Returns a new CHAR * or NULL
1.45 daniel 1446: */
1447:
1.55 daniel 1448: CHAR *
1449: xmlCharStrdup(const char *cur) {
1.45 daniel 1450: const char *p = cur;
1451:
1452: while (*p != '\0') p++;
1453: return(xmlCharStrndup(cur, p - cur));
1454: }
1455:
1.50 daniel 1456: /**
1457: * xmlStrcmp:
1458: * @str1: the first CHAR *
1459: * @str2: the second CHAR *
1460: *
1461: * a strcmp for CHAR's
1.68 daniel 1462: *
1463: * Returns the integer result of the comparison
1.14 veillard 1464: */
1465:
1.55 daniel 1466: int
1467: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 1468: register int tmp;
1469:
1470: do {
1471: tmp = *str1++ - *str2++;
1472: if (tmp != 0) return(tmp);
1473: } while ((*str1 != 0) && (*str2 != 0));
1474: return (*str1 - *str2);
1475: }
1476:
1.50 daniel 1477: /**
1478: * xmlStrncmp:
1479: * @str1: the first CHAR *
1480: * @str2: the second CHAR *
1481: * @len: the max comparison length
1482: *
1483: * a strncmp for CHAR's
1.68 daniel 1484: *
1485: * Returns the integer result of the comparison
1.14 veillard 1486: */
1487:
1.55 daniel 1488: int
1489: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 1490: register int tmp;
1491:
1492: if (len <= 0) return(0);
1493: do {
1494: tmp = *str1++ - *str2++;
1495: if (tmp != 0) return(tmp);
1496: len--;
1497: if (len <= 0) return(0);
1498: } while ((*str1 != 0) && (*str2 != 0));
1499: return (*str1 - *str2);
1500: }
1501:
1.50 daniel 1502: /**
1503: * xmlStrchr:
1504: * @str: the CHAR * array
1505: * @val: the CHAR to search
1506: *
1507: * a strchr for CHAR's
1.68 daniel 1508: *
1509: * Returns the CHAR * for the first occurence or NULL.
1.14 veillard 1510: */
1511:
1.89 daniel 1512: const CHAR *
1.55 daniel 1513: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 1514: while (*str != 0) {
1515: if (*str == val) return((CHAR *) str);
1516: str++;
1517: }
1518: return(NULL);
1.89 daniel 1519: }
1520:
1521: /**
1522: * xmlStrstr:
1523: * @str: the CHAR * array (haystack)
1524: * @val: the CHAR to search (needle)
1525: *
1526: * a strstr for CHAR's
1527: *
1528: * Returns the CHAR * for the first occurence or NULL.
1529: */
1530:
1531: const CHAR *
1532: xmlStrstr(const CHAR *str, CHAR *val) {
1533: int n;
1534:
1535: if (str == NULL) return(NULL);
1536: if (val == NULL) return(NULL);
1537: n = xmlStrlen(val);
1538:
1539: if (n == 0) return(str);
1540: while (*str != 0) {
1541: if (*str == *val) {
1542: if (!xmlStrncmp(str, val, n)) return((const CHAR *) str);
1543: }
1544: str++;
1545: }
1546: return(NULL);
1547: }
1548:
1549: /**
1550: * xmlStrsub:
1551: * @str: the CHAR * array (haystack)
1552: * @start: the index of the first char (zero based)
1553: * @len: the length of the substring
1554: *
1555: * Extract a substring of a given string
1556: *
1557: * Returns the CHAR * for the first occurence or NULL.
1558: */
1559:
1560: CHAR *
1561: xmlStrsub(const CHAR *str, int start, int len) {
1562: int i;
1563:
1564: if (str == NULL) return(NULL);
1565: if (start < 0) return(NULL);
1.90 daniel 1566: if (len < 0) return(NULL);
1.89 daniel 1567:
1568: for (i = 0;i < start;i++) {
1569: if (*str == 0) return(NULL);
1570: str++;
1571: }
1572: if (*str == 0) return(NULL);
1573: return(xmlStrndup(str, len));
1.14 veillard 1574: }
1.28 daniel 1575:
1.50 daniel 1576: /**
1577: * xmlStrlen:
1578: * @str: the CHAR * array
1579: *
1580: * lenght of a CHAR's string
1.68 daniel 1581: *
1582: * Returns the number of CHAR contained in the ARRAY.
1.45 daniel 1583: */
1584:
1.55 daniel 1585: int
1586: xmlStrlen(const CHAR *str) {
1.45 daniel 1587: int len = 0;
1588:
1589: if (str == NULL) return(0);
1590: while (*str != 0) {
1591: str++;
1592: len++;
1593: }
1594: return(len);
1595: }
1596:
1.50 daniel 1597: /**
1598: * xmlStrncat:
1.68 daniel 1599: * @cur: the original CHAR * array
1.50 daniel 1600: * @add: the CHAR * array added
1601: * @len: the length of @add
1602: *
1603: * a strncat for array of CHAR's
1.68 daniel 1604: *
1605: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1606: */
1607:
1.55 daniel 1608: CHAR *
1609: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 1610: int size;
1611: CHAR *ret;
1612:
1613: if ((add == NULL) || (len == 0))
1614: return(cur);
1615: if (cur == NULL)
1616: return(xmlStrndup(add, len));
1617:
1618: size = xmlStrlen(cur);
1619: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
1620: if (ret == NULL) {
1.86 daniel 1621: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1622: (size + len + 1) * (long)sizeof(CHAR));
1.45 daniel 1623: return(cur);
1624: }
1625: memcpy(&ret[size], add, len * sizeof(CHAR));
1626: ret[size + len] = 0;
1627: return(ret);
1628: }
1629:
1.50 daniel 1630: /**
1631: * xmlStrcat:
1.68 daniel 1632: * @cur: the original CHAR * array
1.50 daniel 1633: * @add: the CHAR * array added
1634: *
1635: * a strcat for array of CHAR's
1.68 daniel 1636: *
1637: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1638: */
1.55 daniel 1639: CHAR *
1640: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 1641: const CHAR *p = add;
1642:
1643: if (add == NULL) return(cur);
1644: if (cur == NULL)
1645: return(xmlStrdup(add));
1646:
1647: while (IS_CHAR(*p)) p++;
1648: return(xmlStrncat(cur, add, p - add));
1649: }
1650:
1651: /************************************************************************
1652: * *
1653: * Commodity functions, cleanup needed ? *
1654: * *
1655: ************************************************************************/
1656:
1.50 daniel 1657: /**
1658: * areBlanks:
1659: * @ctxt: an XML parser context
1660: * @str: a CHAR *
1661: * @len: the size of @str
1662: *
1.45 daniel 1663: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1664: *
1.99 daniel 1665: * TODO: Whether white space are significant has to be checked accordingly
1666: * to DTD informations if available
1.68 daniel 1667: *
1668: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1669: */
1670:
1671: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
1672: int i;
1673: xmlNodePtr lastChild;
1674:
1675: for (i = 0;i < len;i++)
1676: if (!(IS_BLANK(str[i]))) return(0);
1677:
1678: if (CUR != '<') return(0);
1.72 daniel 1679: if (ctxt->node == NULL) return(0);
1.45 daniel 1680: lastChild = xmlGetLastChild(ctxt->node);
1681: if (lastChild == NULL) {
1682: if (ctxt->node->content != NULL) return(0);
1683: } else if (xmlNodeIsText(lastChild))
1684: return(0);
1685: return(1);
1686: }
1687:
1.50 daniel 1688: /**
1689: * xmlHandleEntity:
1690: * @ctxt: an XML parser context
1691: * @entity: an XML entity pointer.
1692: *
1693: * Default handling of defined entities, when should we define a new input
1.45 daniel 1694: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 1695: *
1696: * OBSOLETE: to be removed at some point.
1.45 daniel 1697: */
1698:
1.55 daniel 1699: void
1700: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1701: int len;
1.50 daniel 1702: xmlParserInputPtr input;
1.45 daniel 1703:
1704: if (entity->content == NULL) {
1.55 daniel 1705: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1706: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 1707: entity->name);
1.59 daniel 1708: ctxt->wellFormed = 0;
1.45 daniel 1709: return;
1710: }
1711: len = xmlStrlen(entity->content);
1712: if (len <= 2) goto handle_as_char;
1713:
1714: /*
1715: * Redefine its content as an input stream.
1716: */
1.50 daniel 1717: input = xmlNewEntityInputStream(ctxt, entity);
1718: xmlPushInput(ctxt, input);
1.45 daniel 1719: return;
1720:
1721: handle_as_char:
1722: /*
1723: * Just handle the content as a set of chars.
1724: */
1.72 daniel 1725: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 1726: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 1727:
1728: }
1729:
1730: /*
1731: * Forward definition for recusive behaviour.
1732: */
1.77 daniel 1733: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1734: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1735:
1.28 daniel 1736: /************************************************************************
1737: * *
1738: * Extra stuff for namespace support *
1739: * Relates to http://www.w3.org/TR/WD-xml-names *
1740: * *
1741: ************************************************************************/
1742:
1.50 daniel 1743: /**
1744: * xmlNamespaceParseNCName:
1745: * @ctxt: an XML parser context
1746: *
1747: * parse an XML namespace name.
1.28 daniel 1748: *
1749: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1750: *
1751: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1752: * CombiningChar | Extender
1.68 daniel 1753: *
1754: * Returns the namespace name or NULL
1.28 daniel 1755: */
1756:
1.55 daniel 1757: CHAR *
1758: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.91 daniel 1759: CHAR buf[XML_MAX_NAMELEN];
1760: int len = 0;
1.28 daniel 1761:
1.40 daniel 1762: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1.28 daniel 1763:
1.40 daniel 1764: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1765: (CUR == '.') || (CUR == '-') ||
1766: (CUR == '_') ||
1767: (IS_COMBINING(CUR)) ||
1.91 daniel 1768: (IS_EXTENDER(CUR))) {
1769: buf[len++] = CUR;
1.40 daniel 1770: NEXT;
1.91 daniel 1771: if (len >= XML_MAX_NAMELEN) {
1772: fprintf(stderr,
1773: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1774: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1775: (CUR == '.') || (CUR == '-') ||
1776: (CUR == '_') ||
1777: (IS_COMBINING(CUR)) ||
1778: (IS_EXTENDER(CUR)))
1779: NEXT;
1780: break;
1781: }
1782: }
1783: return(xmlStrndup(buf, len));
1.28 daniel 1784: }
1785:
1.50 daniel 1786: /**
1787: * xmlNamespaceParseQName:
1788: * @ctxt: an XML parser context
1789: * @prefix: a CHAR **
1790: *
1791: * parse an XML qualified name
1.28 daniel 1792: *
1793: * [NS 5] QName ::= (Prefix ':')? LocalPart
1794: *
1795: * [NS 6] Prefix ::= NCName
1796: *
1797: * [NS 7] LocalPart ::= NCName
1.68 daniel 1798: *
1799: * Returns the function returns the local part, and prefix is updated
1.50 daniel 1800: * to get the Prefix if any.
1.28 daniel 1801: */
1802:
1.55 daniel 1803: CHAR *
1804: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1805: CHAR *ret = NULL;
1806:
1807: *prefix = NULL;
1808: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1809: if (CUR == ':') {
1.28 daniel 1810: *prefix = ret;
1.40 daniel 1811: NEXT;
1.28 daniel 1812: ret = xmlNamespaceParseNCName(ctxt);
1813: }
1814:
1815: return(ret);
1816: }
1817:
1.50 daniel 1818: /**
1.72 daniel 1819: * xmlSplitQName:
1820: * @name: an XML parser context
1821: * @prefix: a CHAR **
1822: *
1823: * parse an XML qualified name string
1824: *
1825: * [NS 5] QName ::= (Prefix ':')? LocalPart
1826: *
1827: * [NS 6] Prefix ::= NCName
1828: *
1829: * [NS 7] LocalPart ::= NCName
1830: *
1831: * Returns the function returns the local part, and prefix is updated
1832: * to get the Prefix if any.
1833: */
1834:
1835: CHAR *
1836: xmlSplitQName(const CHAR *name, CHAR **prefix) {
1837: CHAR *ret = NULL;
1838: const CHAR *q;
1839: const CHAR *cur = name;
1840:
1841: *prefix = NULL;
1842: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1843: q = cur++;
1844:
1845: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1846: (*cur == '.') || (*cur == '-') ||
1847: (*cur == '_') ||
1848: (IS_COMBINING(*cur)) ||
1849: (IS_EXTENDER(*cur)))
1850: cur++;
1851:
1852: ret = xmlStrndup(q, cur - q);
1853:
1854: if (*cur == ':') {
1855: cur++;
1856: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1857: *prefix = ret;
1858:
1859: q = cur++;
1860:
1861: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1862: (*cur == '.') || (*cur == '-') ||
1863: (*cur == '_') ||
1864: (IS_COMBINING(*cur)) ||
1865: (IS_EXTENDER(*cur)))
1866: cur++;
1867:
1868: ret = xmlStrndup(q, cur - q);
1869: }
1870:
1871: return(ret);
1872: }
1873: /**
1.50 daniel 1874: * xmlNamespaceParseNSDef:
1875: * @ctxt: an XML parser context
1876: *
1877: * parse a namespace prefix declaration
1.28 daniel 1878: *
1879: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1880: *
1881: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 1882: *
1883: * Returns the namespace name
1.28 daniel 1884: */
1885:
1.55 daniel 1886: CHAR *
1887: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1888: CHAR *name = NULL;
1889:
1.40 daniel 1890: if ((CUR == 'x') && (NXT(1) == 'm') &&
1891: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1892: (NXT(4) == 's')) {
1893: SKIP(5);
1894: if (CUR == ':') {
1895: NEXT;
1.28 daniel 1896: name = xmlNamespaceParseNCName(ctxt);
1897: }
1898: }
1.39 daniel 1899: return(name);
1.28 daniel 1900: }
1901:
1.50 daniel 1902: /**
1903: * xmlParseQuotedString:
1904: * @ctxt: an XML parser context
1905: *
1.45 daniel 1906: * [OLD] Parse and return a string between quotes or doublequotes
1.68 daniel 1907: *
1908: * Returns the string parser or NULL.
1.45 daniel 1909: */
1.55 daniel 1910: CHAR *
1911: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1912: CHAR *ret = NULL;
1913: const CHAR *q;
1914:
1915: if (CUR == '"') {
1916: NEXT;
1917: q = CUR_PTR;
1918: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1919: if (CUR != '"') {
1920: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1921: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1922: ctxt->wellFormed = 0;
1.55 daniel 1923: } else {
1.45 daniel 1924: ret = xmlStrndup(q, CUR_PTR - q);
1925: NEXT;
1926: }
1927: } else if (CUR == '\''){
1928: NEXT;
1929: q = CUR_PTR;
1930: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1931: if (CUR != '\'') {
1932: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1933: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1934: ctxt->wellFormed = 0;
1.55 daniel 1935: } else {
1.45 daniel 1936: ret = xmlStrndup(q, CUR_PTR - q);
1937: NEXT;
1938: }
1939: }
1940: return(ret);
1941: }
1942:
1.50 daniel 1943: /**
1944: * xmlParseNamespace:
1945: * @ctxt: an XML parser context
1946: *
1.45 daniel 1947: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1948: *
1949: * This is what the older xml-name Working Draft specified, a bunch of
1950: * other stuff may still rely on it, so support is still here as
1951: * if ot was declared on the root of the Tree:-(
1952: */
1953:
1.55 daniel 1954: void
1955: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 1956: CHAR *href = NULL;
1957: CHAR *prefix = NULL;
1958: int garbage = 0;
1959:
1960: /*
1961: * We just skipped "namespace" or "xml:namespace"
1962: */
1963: SKIP_BLANKS;
1964:
1965: while (IS_CHAR(CUR) && (CUR != '>')) {
1966: /*
1967: * We can have "ns" or "prefix" attributes
1968: * Old encoding as 'href' or 'AS' attributes is still supported
1969: */
1970: if ((CUR == 'n') && (NXT(1) == 's')) {
1971: garbage = 0;
1972: SKIP(2);
1973: SKIP_BLANKS;
1974:
1975: if (CUR != '=') continue;
1976: NEXT;
1977: SKIP_BLANKS;
1978:
1979: href = xmlParseQuotedString(ctxt);
1980: SKIP_BLANKS;
1981: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1982: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1983: garbage = 0;
1984: SKIP(4);
1985: SKIP_BLANKS;
1986:
1987: if (CUR != '=') continue;
1988: NEXT;
1989: SKIP_BLANKS;
1990:
1991: href = xmlParseQuotedString(ctxt);
1992: SKIP_BLANKS;
1993: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1994: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1995: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1996: garbage = 0;
1997: SKIP(6);
1998: SKIP_BLANKS;
1999:
2000: if (CUR != '=') continue;
2001: NEXT;
2002: SKIP_BLANKS;
2003:
2004: prefix = xmlParseQuotedString(ctxt);
2005: SKIP_BLANKS;
2006: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
2007: garbage = 0;
2008: SKIP(2);
2009: SKIP_BLANKS;
2010:
2011: if (CUR != '=') continue;
2012: NEXT;
2013: SKIP_BLANKS;
2014:
2015: prefix = xmlParseQuotedString(ctxt);
2016: SKIP_BLANKS;
2017: } else if ((CUR == '?') && (NXT(1) == '>')) {
2018: garbage = 0;
1.91 daniel 2019: NEXT;
1.45 daniel 2020: } else {
2021: /*
2022: * Found garbage when parsing the namespace
2023: */
2024: if (!garbage)
1.55 daniel 2025: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2026: ctxt->sax->error(ctxt->userData, "xmlParseNamespace found garbage\n");
1.59 daniel 2027: ctxt->wellFormed = 0;
1.45 daniel 2028: NEXT;
2029: }
2030: }
2031:
2032: MOVETO_ENDTAG(CUR_PTR);
2033: NEXT;
2034:
2035: /*
2036: * Register the DTD.
1.72 daniel 2037: if (href != NULL)
2038: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 2039: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 2040: */
2041:
2042: if (prefix != NULL) free(prefix);
2043: if (href != NULL) free(href);
2044: }
2045:
1.28 daniel 2046: /************************************************************************
2047: * *
2048: * The parser itself *
2049: * Relates to http://www.w3.org/TR/REC-xml *
2050: * *
2051: ************************************************************************/
1.14 veillard 2052:
1.50 daniel 2053: /**
1.97 daniel 2054: * xmlScanName:
2055: * @ctxt: an XML parser context
2056: *
2057: * Trickery: parse an XML name but without consuming the input flow
2058: * Needed for rollback cases.
2059: *
2060: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2061: * CombiningChar | Extender
2062: *
2063: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2064: *
2065: * [6] Names ::= Name (S Name)*
2066: *
2067: * Returns the Name parsed or NULL
2068: */
2069:
2070: CHAR *
2071: xmlScanName(xmlParserCtxtPtr ctxt) {
2072: CHAR buf[XML_MAX_NAMELEN];
2073: int len = 0;
2074:
2075: GROW;
2076: if (!IS_LETTER(CUR) && (CUR != '_') &&
2077: (CUR != ':')) {
2078: return(NULL);
2079: }
2080:
2081: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2082: (NXT(len) == '.') || (NXT(len) == '-') ||
2083: (NXT(len) == '_') || (NXT(len) == ':') ||
2084: (IS_COMBINING(NXT(len))) ||
2085: (IS_EXTENDER(NXT(len)))) {
2086: buf[len] = NXT(len);
2087: len++;
2088: if (len >= XML_MAX_NAMELEN) {
2089: fprintf(stderr,
2090: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
2091: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2092: (NXT(len) == '.') || (NXT(len) == '-') ||
2093: (NXT(len) == '_') || (NXT(len) == ':') ||
2094: (IS_COMBINING(NXT(len))) ||
2095: (IS_EXTENDER(NXT(len))))
2096: len++;
2097: break;
2098: }
2099: }
2100: return(xmlStrndup(buf, len));
2101: }
2102:
2103: /**
1.50 daniel 2104: * xmlParseName:
2105: * @ctxt: an XML parser context
2106: *
2107: * parse an XML name.
1.22 daniel 2108: *
2109: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2110: * CombiningChar | Extender
2111: *
2112: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2113: *
2114: * [6] Names ::= Name (S Name)*
1.68 daniel 2115: *
2116: * Returns the Name parsed or NULL
1.1 veillard 2117: */
2118:
1.55 daniel 2119: CHAR *
2120: xmlParseName(xmlParserCtxtPtr ctxt) {
1.91 daniel 2121: CHAR buf[XML_MAX_NAMELEN];
2122: int len = 0;
1.97 daniel 2123: CHAR cur;
1.1 veillard 2124:
1.91 daniel 2125: GROW;
1.97 daniel 2126: cur = CUR;
2127: if (!IS_LETTER(cur) && (cur != '_') &&
2128: (cur != ':')) {
1.91 daniel 2129: return(NULL);
2130: }
1.40 daniel 2131:
1.97 daniel 2132: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2133: (cur == '.') || (cur == '-') ||
2134: (cur == '_') || (cur == ':') ||
2135: (IS_COMBINING(cur)) ||
2136: (IS_EXTENDER(cur))) {
2137: buf[len++] = cur;
1.40 daniel 2138: NEXT;
1.97 daniel 2139: cur = CUR;
1.91 daniel 2140: if (len >= XML_MAX_NAMELEN) {
2141: fprintf(stderr,
2142: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.97 daniel 2143: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2144: (cur == '.') || (cur == '-') ||
2145: (cur == '_') || (cur == ':') ||
2146: (IS_COMBINING(cur)) ||
2147: (IS_EXTENDER(cur))) {
2148: NEXT;
2149: cur = CUR;
2150: }
1.91 daniel 2151: break;
2152: }
2153: }
2154: return(xmlStrndup(buf, len));
1.22 daniel 2155: }
2156:
1.50 daniel 2157: /**
2158: * xmlParseNmtoken:
2159: * @ctxt: an XML parser context
2160: *
2161: * parse an XML Nmtoken.
1.22 daniel 2162: *
2163: * [7] Nmtoken ::= (NameChar)+
2164: *
2165: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 2166: *
2167: * Returns the Nmtoken parsed or NULL
1.22 daniel 2168: */
2169:
1.55 daniel 2170: CHAR *
2171: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.91 daniel 2172: CHAR buf[XML_MAX_NAMELEN];
2173: int len = 0;
1.22 daniel 2174:
1.91 daniel 2175: GROW;
1.40 daniel 2176: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2177: (CUR == '.') || (CUR == '-') ||
2178: (CUR == '_') || (CUR == ':') ||
2179: (IS_COMBINING(CUR)) ||
1.91 daniel 2180: (IS_EXTENDER(CUR))) {
2181: buf[len++] = CUR;
1.40 daniel 2182: NEXT;
1.91 daniel 2183: if (len >= XML_MAX_NAMELEN) {
2184: fprintf(stderr,
2185: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
2186: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2187: (CUR == '.') || (CUR == '-') ||
2188: (CUR == '_') || (CUR == ':') ||
2189: (IS_COMBINING(CUR)) ||
2190: (IS_EXTENDER(CUR)))
2191: NEXT;
2192: break;
2193: }
2194: }
2195: return(xmlStrndup(buf, len));
1.1 veillard 2196: }
2197:
1.50 daniel 2198: /**
2199: * xmlParseEntityValue:
2200: * @ctxt: an XML parser context
1.78 daniel 2201: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 2202: *
2203: * parse a value for ENTITY decl.
1.24 daniel 2204: *
2205: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2206: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 2207: *
1.78 daniel 2208: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 2209: */
2210:
1.55 daniel 2211: CHAR *
1.78 daniel 2212: xmlParseEntityValue(xmlParserCtxtPtr ctxt, CHAR **orig) {
1.77 daniel 2213: CHAR *ret = NULL;
1.78 daniel 2214: const CHAR *org = NULL;
1.79 daniel 2215: const CHAR *tst = NULL;
2216: const CHAR *temp = NULL;
1.98 daniel 2217: xmlParserInputPtr input;
1.24 daniel 2218:
1.91 daniel 2219: SHRINK;
1.40 daniel 2220: if (CUR == '"') {
1.96 daniel 2221: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2222: input = ctxt->input;
1.40 daniel 2223: NEXT;
1.78 daniel 2224: org = CUR_PTR;
1.98 daniel 2225: /*
2226: * NOTE: 4.4.5 Included in Literal
2227: * When a parameter entity reference appears in a literal entity
2228: * value, ... a single or double quote character in the replacement
2229: * text is always treated as a normal data character and will not
2230: * terminate the literal.
2231: * In practice it means we stop the loop only when back at parsing
2232: * the initial entity and the quote is found
2233: */
2234: while ((CUR != '"') || (ctxt->input != input)) {
1.79 daniel 2235: tst = CUR_PTR;
1.98 daniel 2236: /*
2237: * NOTE: 4.4.7 Bypassed
2238: * When a general entity reference appears in the EntityValue in
2239: * an entity declaration, it is bypassed and left as is.
2240: * so XML_SUBSTITUTE_REF is not set.
2241: */
2242: if (ctxt->input != input)
2243: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2244: 0, 0, 0);
2245: else
2246: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2247: '"', 0, 0);
1.94 daniel 2248:
2249: /*
2250: * Pop-up of finished entities.
2251: */
2252: while ((CUR == 0) && (ctxt->inputNr > 1))
2253: xmlPopInput(ctxt);
2254:
2255: if ((temp == NULL) && (tst == CUR_PTR)) {
2256: ret = xmlStrndup("", 0);
2257: break;
2258: }
2259: if ((temp[0] == 0) && (tst == CUR_PTR)) {
2260: free((char *)temp);
2261: ret = xmlStrndup("", 0);
2262: break;
2263: }
1.79 daniel 2264: ret = xmlStrcat(ret, temp);
1.80 daniel 2265: if (temp != NULL) free((char *)temp);
1.94 daniel 2266: GROW;
1.79 daniel 2267: }
1.77 daniel 2268: if (CUR != '"') {
1.55 daniel 2269: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.79 daniel 2270: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 2271: ctxt->wellFormed = 0;
1.78 daniel 2272: } else {
1.99 daniel 2273: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2274: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2275: if (ret == NULL)
2276: ret = xmlStrndup("", 0);
1.40 daniel 2277: NEXT;
1.78 daniel 2278: }
1.40 daniel 2279: } else if (CUR == '\'') {
1.96 daniel 2280: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2281: input = ctxt->input;
1.40 daniel 2282: NEXT;
1.78 daniel 2283: org = CUR_PTR;
1.98 daniel 2284: /*
2285: * NOTE: 4.4.5 Included in Literal
2286: * When a parameter entity reference appears in a literal entity
2287: * value, ... a single or double quote character in the replacement
2288: * text is always treated as a normal data character and will not
2289: * terminate the literal.
2290: * In practice it means we stop the loop only when back at parsing
2291: * the initial entity and the quote is found
2292: */
2293: while ((CUR != '\'') || (ctxt->input != input)) {
1.79 daniel 2294: tst = CUR_PTR;
1.98 daniel 2295: /*
2296: * NOTE: 4.4.7 Bypassed
2297: * When a general entity reference appears in the EntityValue in
2298: * an entity declaration, it is bypassed and left as is.
2299: * so XML_SUBSTITUTE_REF is not set.
2300: */
2301: if (ctxt->input != input)
2302: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2303: 0, 0, 0);
2304: else
2305: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2306: '\'', 0, 0);
1.94 daniel 2307:
2308: /*
2309: * Pop-up of finished entities.
2310: */
2311: while ((CUR == 0) && (ctxt->inputNr > 1))
2312: xmlPopInput(ctxt);
2313:
2314: if ((temp == NULL) && (tst == CUR_PTR)) {
2315: ret = xmlStrndup("", 0);
2316: break;
2317: }
2318: if ((temp[0] == 0) && (tst == CUR_PTR)) {
2319: free((char *)temp);
2320: ret = xmlStrndup("", 0);
2321: break;
2322: }
1.79 daniel 2323: ret = xmlStrcat(ret, temp);
1.80 daniel 2324: if (temp != NULL) free((char *)temp);
1.94 daniel 2325: GROW;
1.79 daniel 2326: }
1.77 daniel 2327: if (CUR != '\'') {
1.55 daniel 2328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2329: ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
1.59 daniel 2330: ctxt->wellFormed = 0;
1.78 daniel 2331: } else {
1.99 daniel 2332: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2333: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2334: if (ret == NULL)
2335: ret = xmlStrndup("", 0);
1.40 daniel 2336: NEXT;
1.78 daniel 2337: }
1.24 daniel 2338: } else {
1.55 daniel 2339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2340: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1.59 daniel 2341: ctxt->wellFormed = 0;
1.24 daniel 2342: }
2343:
2344: return(ret);
2345: }
2346:
1.50 daniel 2347: /**
2348: * xmlParseAttValue:
2349: * @ctxt: an XML parser context
2350: *
2351: * parse a value for an attribute
1.78 daniel 2352: * Note: the parser won't do substitution of entities here, this
1.79 daniel 2353: * will be handled later in xmlStringGetNodeList, unless it was
2354: * asked for ctxt->replaceEntities != 0
1.29 daniel 2355: *
2356: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2357: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 2358: *
2359: * Returns the AttValue parsed or NULL.
1.29 daniel 2360: */
2361:
1.55 daniel 2362: CHAR *
2363: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.77 daniel 2364: CHAR *ret = NULL;
1.29 daniel 2365:
1.91 daniel 2366: SHRINK;
1.40 daniel 2367: if (CUR == '"') {
1.96 daniel 2368: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2369: NEXT;
1.98 daniel 2370: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
1.77 daniel 2371: if (CUR == '<') {
2372: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2373: ctxt->sax->error(ctxt->userData,
2374: "Unescaped '<' not allowed in attributes values\n");
2375: ctxt->wellFormed = 0;
1.29 daniel 2376: }
1.77 daniel 2377: if (CUR != '"') {
1.55 daniel 2378: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2379: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2380: ctxt->wellFormed = 0;
1.77 daniel 2381: } else
1.40 daniel 2382: NEXT;
2383: } else if (CUR == '\'') {
1.96 daniel 2384: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2385: NEXT;
1.98 daniel 2386: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
1.77 daniel 2387: if (CUR == '<') {
2388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2389: ctxt->sax->error(ctxt->userData,
2390: "Unescaped '<' not allowed in attributes values\n");
2391: ctxt->wellFormed = 0;
1.29 daniel 2392: }
1.77 daniel 2393: if (CUR != '\'') {
1.55 daniel 2394: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2395: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2396: ctxt->wellFormed = 0;
1.77 daniel 2397: } else
1.40 daniel 2398: NEXT;
1.29 daniel 2399: } else {
1.55 daniel 2400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2401: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2402: ctxt->wellFormed = 0;
1.29 daniel 2403: }
2404:
2405: return(ret);
2406: }
2407:
1.50 daniel 2408: /**
2409: * xmlParseSystemLiteral:
2410: * @ctxt: an XML parser context
2411: *
2412: * parse an XML Literal
1.21 daniel 2413: *
1.22 daniel 2414: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2415: *
2416: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2417: */
2418:
1.55 daniel 2419: CHAR *
2420: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2421: const CHAR *q;
2422: CHAR *ret = NULL;
2423:
1.91 daniel 2424: SHRINK;
1.40 daniel 2425: if (CUR == '"') {
2426: NEXT;
2427: q = CUR_PTR;
2428: while ((IS_CHAR(CUR)) && (CUR != '"'))
2429: NEXT;
2430: if (!IS_CHAR(CUR)) {
1.55 daniel 2431: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2432: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2433: ctxt->wellFormed = 0;
1.21 daniel 2434: } else {
1.40 daniel 2435: ret = xmlStrndup(q, CUR_PTR - q);
2436: NEXT;
1.21 daniel 2437: }
1.40 daniel 2438: } else if (CUR == '\'') {
2439: NEXT;
2440: q = CUR_PTR;
2441: while ((IS_CHAR(CUR)) && (CUR != '\''))
2442: NEXT;
2443: if (!IS_CHAR(CUR)) {
1.55 daniel 2444: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2445: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2446: ctxt->wellFormed = 0;
1.21 daniel 2447: } else {
1.40 daniel 2448: ret = xmlStrndup(q, CUR_PTR - q);
2449: NEXT;
1.21 daniel 2450: }
2451: } else {
1.55 daniel 2452: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2453: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2454: ctxt->wellFormed = 0;
1.21 daniel 2455: }
2456:
2457: return(ret);
2458: }
2459:
1.50 daniel 2460: /**
2461: * xmlParsePubidLiteral:
2462: * @ctxt: an XML parser context
1.21 daniel 2463: *
1.50 daniel 2464: * parse an XML public literal
1.68 daniel 2465: *
2466: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2467: *
2468: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2469: */
2470:
1.55 daniel 2471: CHAR *
2472: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2473: const CHAR *q;
2474: CHAR *ret = NULL;
2475: /*
2476: * Name ::= (Letter | '_') (NameChar)*
2477: */
1.91 daniel 2478: SHRINK;
1.40 daniel 2479: if (CUR == '"') {
2480: NEXT;
2481: q = CUR_PTR;
2482: while (IS_PUBIDCHAR(CUR)) NEXT;
2483: if (CUR != '"') {
1.55 daniel 2484: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2485: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2486: ctxt->wellFormed = 0;
1.21 daniel 2487: } else {
1.40 daniel 2488: ret = xmlStrndup(q, CUR_PTR - q);
2489: NEXT;
1.21 daniel 2490: }
1.40 daniel 2491: } else if (CUR == '\'') {
2492: NEXT;
2493: q = CUR_PTR;
2494: while ((IS_LETTER(CUR)) && (CUR != '\''))
2495: NEXT;
2496: if (!IS_LETTER(CUR)) {
1.55 daniel 2497: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2498: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2499: ctxt->wellFormed = 0;
1.21 daniel 2500: } else {
1.40 daniel 2501: ret = xmlStrndup(q, CUR_PTR - q);
2502: NEXT;
1.21 daniel 2503: }
2504: } else {
1.55 daniel 2505: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2506: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2507: ctxt->wellFormed = 0;
1.21 daniel 2508: }
2509:
2510: return(ret);
2511: }
2512:
1.50 daniel 2513: /**
2514: * xmlParseCharData:
2515: * @ctxt: an XML parser context
2516: * @cdata: int indicating whether we are within a CDATA section
2517: *
2518: * parse a CharData section.
2519: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2520: *
2521: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2522: */
2523:
1.55 daniel 2524: void
2525: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.91 daniel 2526: CHAR buf[1000];
2527: int nbchar = 0;
1.97 daniel 2528: CHAR cur;
1.27 daniel 2529:
1.91 daniel 2530: SHRINK;
1.97 daniel 2531: /*
2532: * !!!!!!!!!!!!
2533: * NOTE: NXT(0) is used here to avoid breaking on < or &
2534: * entities substitutions.
2535: */
2536: cur = CUR;
2537: while ((IS_CHAR(cur)) && (cur != '<') &&
2538: (cur != '&')) {
2539: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2540: (NXT(2) == '>')) {
2541: if (cdata) break;
2542: else {
2543: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2544: ctxt->sax->error(ctxt->userData,
1.59 daniel 2545: "Sequence ']]>' not allowed in content\n");
2546: ctxt->wellFormed = 0;
2547: }
2548: }
1.91 daniel 2549: buf[nbchar++] = CUR;
2550: if (nbchar == 1000) {
2551: /*
2552: * Ok the segment is to be consumed as chars.
2553: */
2554: if (ctxt->sax != NULL) {
2555: if (areBlanks(ctxt, buf, nbchar)) {
2556: if (ctxt->sax->ignorableWhitespace != NULL)
2557: ctxt->sax->ignorableWhitespace(ctxt->userData,
2558: buf, nbchar);
2559: } else {
2560: if (ctxt->sax->characters != NULL)
2561: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2562: }
2563: }
2564: nbchar = 0;
2565: }
1.40 daniel 2566: NEXT;
1.97 daniel 2567: cur = CUR;
1.27 daniel 2568: }
1.91 daniel 2569: if (nbchar != 0) {
2570: /*
2571: * Ok the segment is to be consumed as chars.
2572: */
2573: if (ctxt->sax != NULL) {
2574: if (areBlanks(ctxt, buf, nbchar)) {
2575: if (ctxt->sax->ignorableWhitespace != NULL)
2576: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2577: } else {
2578: if (ctxt->sax->characters != NULL)
2579: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2580: }
2581: }
1.45 daniel 2582: }
1.27 daniel 2583: }
2584:
1.50 daniel 2585: /**
2586: * xmlParseExternalID:
2587: * @ctxt: an XML parser context
2588: * @publicID: a CHAR** receiving PubidLiteral
1.67 daniel 2589: * @strict: indicate whether we should restrict parsing to only
2590: * production [75], see NOTE below
1.50 daniel 2591: *
1.67 daniel 2592: * Parse an External ID or a Public ID
2593: *
2594: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2595: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2596: *
2597: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2598: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2599: *
2600: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2601: *
1.68 daniel 2602: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2603: * case publicID receives PubidLiteral, is strict is off
2604: * it is possible to return NULL and have publicID set.
1.22 daniel 2605: */
2606:
1.55 daniel 2607: CHAR *
1.67 daniel 2608: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
1.39 daniel 2609: CHAR *URI = NULL;
1.22 daniel 2610:
1.91 daniel 2611: SHRINK;
1.40 daniel 2612: if ((CUR == 'S') && (NXT(1) == 'Y') &&
2613: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2614: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2615: SKIP(6);
1.59 daniel 2616: if (!IS_BLANK(CUR)) {
2617: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2618: ctxt->sax->error(ctxt->userData,
1.59 daniel 2619: "Space required after 'SYSTEM'\n");
2620: ctxt->wellFormed = 0;
2621: }
1.42 daniel 2622: SKIP_BLANKS;
1.39 daniel 2623: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2624: if (URI == NULL) {
1.55 daniel 2625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2626: ctxt->sax->error(ctxt->userData,
1.39 daniel 2627: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2628: ctxt->wellFormed = 0;
2629: }
1.40 daniel 2630: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
2631: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2632: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2633: SKIP(6);
1.59 daniel 2634: if (!IS_BLANK(CUR)) {
2635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2636: ctxt->sax->error(ctxt->userData,
1.59 daniel 2637: "Space required after 'PUBLIC'\n");
2638: ctxt->wellFormed = 0;
2639: }
1.42 daniel 2640: SKIP_BLANKS;
1.39 daniel 2641: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2642: if (*publicID == NULL) {
1.55 daniel 2643: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2644: ctxt->sax->error(ctxt->userData,
1.39 daniel 2645: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2646: ctxt->wellFormed = 0;
2647: }
1.67 daniel 2648: if (strict) {
2649: /*
2650: * We don't handle [83] so "S SystemLiteral" is required.
2651: */
2652: if (!IS_BLANK(CUR)) {
2653: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2654: ctxt->sax->error(ctxt->userData,
1.67 daniel 2655: "Space required after the Public Identifier\n");
2656: ctxt->wellFormed = 0;
2657: }
2658: } else {
2659: /*
2660: * We handle [83] so we return immediately, if
2661: * "S SystemLiteral" is not detected. From a purely parsing
2662: * point of view that's a nice mess.
2663: */
2664: const CHAR *ptr = CUR_PTR;
2665: if (!IS_BLANK(*ptr)) return(NULL);
2666:
2667: while (IS_BLANK(*ptr)) ptr++;
2668: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 2669: }
1.42 daniel 2670: SKIP_BLANKS;
1.39 daniel 2671: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2672: if (URI == NULL) {
1.55 daniel 2673: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2674: ctxt->sax->error(ctxt->userData,
1.39 daniel 2675: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2676: ctxt->wellFormed = 0;
2677: }
1.22 daniel 2678: }
1.39 daniel 2679: return(URI);
1.22 daniel 2680: }
2681:
1.50 daniel 2682: /**
2683: * xmlParseComment:
1.69 daniel 2684: * @ctxt: an XML parser context
2685: * @create: should we create a node, or just skip the content
1.50 daniel 2686: *
1.3 veillard 2687: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 2688: * This may or may not create a node (depending on the context)
1.38 daniel 2689: * The spec says that "For compatibility, the string "--" (double-hyphen)
2690: * must not occur within comments. "
1.22 daniel 2691: *
2692: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2693: */
1.72 daniel 2694: void
1.69 daniel 2695: xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1.17 daniel 2696: const CHAR *q, *start;
2697: const CHAR *r;
1.39 daniel 2698: CHAR *val;
1.3 veillard 2699:
2700: /*
1.22 daniel 2701: * Check that there is a comment right here.
1.3 veillard 2702: */
1.40 daniel 2703: if ((CUR != '<') || (NXT(1) != '!') ||
1.72 daniel 2704: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2705:
1.97 daniel 2706: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2707: SHRINK;
1.40 daniel 2708: SKIP(4);
2709: start = q = CUR_PTR;
2710: NEXT;
2711: r = CUR_PTR;
2712: NEXT;
2713: while (IS_CHAR(CUR) &&
2714: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 2715: (*r != '-') || (*q != '-'))) {
1.59 daniel 2716: if ((*r == '-') && (*q == '-')) {
1.55 daniel 2717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2718: ctxt->sax->error(ctxt->userData,
1.38 daniel 2719: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2720: ctxt->wellFormed = 0;
2721: }
1.40 daniel 2722: NEXT;r++;q++;
1.3 veillard 2723: }
1.40 daniel 2724: if (!IS_CHAR(CUR)) {
1.55 daniel 2725: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2726: ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", start);
1.59 daniel 2727: ctxt->wellFormed = 0;
1.3 veillard 2728: } else {
1.40 daniel 2729: NEXT;
1.31 daniel 2730: if (create) {
1.39 daniel 2731: val = xmlStrndup(start, q - start);
1.72 daniel 2732: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
1.74 daniel 2733: ctxt->sax->comment(ctxt->userData, val);
1.39 daniel 2734: free(val);
1.31 daniel 2735: }
1.3 veillard 2736: }
2737: }
2738:
1.50 daniel 2739: /**
2740: * xmlParsePITarget:
2741: * @ctxt: an XML parser context
2742: *
2743: * parse the name of a PI
1.22 daniel 2744: *
2745: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2746: *
2747: * Returns the PITarget name or NULL
1.22 daniel 2748: */
2749:
1.55 daniel 2750: CHAR *
2751: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 2752: CHAR *name;
2753:
2754: name = xmlParseName(ctxt);
2755: if ((name != NULL) && (name[3] == 0) &&
2756: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2757: ((name[1] == 'm') || (name[1] == 'M')) &&
2758: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 2759: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2760: ctxt->sax->error(ctxt->userData, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 2761: return(NULL);
2762: }
2763: return(name);
2764: }
2765:
1.50 daniel 2766: /**
2767: * xmlParsePI:
2768: * @ctxt: an XML parser context
2769: *
2770: * parse an XML Processing Instruction.
1.22 daniel 2771: *
2772: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2773: *
1.69 daniel 2774: * The processing is transfered to SAX once parsed.
1.3 veillard 2775: */
2776:
1.55 daniel 2777: void
2778: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 2779: CHAR *target;
2780:
1.40 daniel 2781: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 2782: /*
2783: * this is a Processing Instruction.
2784: */
1.40 daniel 2785: SKIP(2);
1.91 daniel 2786: SHRINK;
1.3 veillard 2787:
2788: /*
1.22 daniel 2789: * Parse the target name and check for special support like
2790: * namespace.
1.3 veillard 2791: */
1.22 daniel 2792: target = xmlParsePITarget(ctxt);
2793: if (target != NULL) {
1.72 daniel 2794: const CHAR *q = CUR_PTR;
2795:
2796: while (IS_CHAR(CUR) &&
2797: ((CUR != '?') || (NXT(1) != '>')))
2798: NEXT;
2799: if (!IS_CHAR(CUR)) {
2800: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2801: ctxt->sax->error(ctxt->userData,
1.72 daniel 2802: "xmlParsePI: PI %s never end ...\n", target);
2803: ctxt->wellFormed = 0;
1.22 daniel 2804: } else {
1.72 daniel 2805: CHAR *data;
1.44 daniel 2806:
1.72 daniel 2807: data = xmlStrndup(q, CUR_PTR - q);
2808: SKIP(2);
1.44 daniel 2809:
1.72 daniel 2810: /*
2811: * SAX: PI detected.
2812: */
2813: if ((ctxt->sax) &&
2814: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2815: ctxt->sax->processingInstruction(ctxt->userData,
2816: target, data);
1.72 daniel 2817: free(data);
1.22 daniel 2818: }
1.39 daniel 2819: free(target);
1.3 veillard 2820: } else {
1.55 daniel 2821: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2822: ctxt->sax->error(ctxt->userData,
2823: "xmlParsePI : no target name\n");
1.59 daniel 2824: ctxt->wellFormed = 0;
1.22 daniel 2825: }
2826: }
2827: }
2828:
1.50 daniel 2829: /**
2830: * xmlParseNotationDecl:
2831: * @ctxt: an XML parser context
2832: *
2833: * parse a notation declaration
1.22 daniel 2834: *
2835: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2836: *
2837: * Hence there is actually 3 choices:
2838: * 'PUBLIC' S PubidLiteral
2839: * 'PUBLIC' S PubidLiteral S SystemLiteral
2840: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2841: *
1.67 daniel 2842: * See the NOTE on xmlParseExternalID().
1.22 daniel 2843: */
2844:
1.55 daniel 2845: void
2846: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2847: CHAR *name;
1.67 daniel 2848: CHAR *Pubid;
2849: CHAR *Systemid;
1.22 daniel 2850:
1.40 daniel 2851: if ((CUR == '<') && (NXT(1) == '!') &&
2852: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2853: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2854: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2855: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 2856: SHRINK;
1.40 daniel 2857: SKIP(10);
1.67 daniel 2858: if (!IS_BLANK(CUR)) {
2859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2860: ctxt->sax->error(ctxt->userData, "Space required after '<!NOTATION'\n");
1.67 daniel 2861: ctxt->wellFormed = 0;
2862: return;
2863: }
2864: SKIP_BLANKS;
1.22 daniel 2865:
2866: name = xmlParseName(ctxt);
2867: if (name == NULL) {
1.55 daniel 2868: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2869: ctxt->sax->error(ctxt->userData, "NOTATION: Name expected here\n");
1.67 daniel 2870: ctxt->wellFormed = 0;
2871: return;
2872: }
2873: if (!IS_BLANK(CUR)) {
2874: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2875: ctxt->sax->error(ctxt->userData,
1.67 daniel 2876: "Space required after the NOTATION name'\n");
1.59 daniel 2877: ctxt->wellFormed = 0;
1.22 daniel 2878: return;
2879: }
1.42 daniel 2880: SKIP_BLANKS;
1.67 daniel 2881:
1.22 daniel 2882: /*
1.67 daniel 2883: * Parse the IDs.
1.22 daniel 2884: */
1.67 daniel 2885: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
2886: SKIP_BLANKS;
2887:
2888: if (CUR == '>') {
1.40 daniel 2889: NEXT;
1.72 daniel 2890: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 2891: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2892: } else {
2893: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2894: ctxt->sax->error(ctxt->userData,
1.67 daniel 2895: "'>' required to close NOTATION declaration\n");
2896: ctxt->wellFormed = 0;
2897: }
1.22 daniel 2898: free(name);
1.67 daniel 2899: if (Systemid != NULL) free(Systemid);
2900: if (Pubid != NULL) free(Pubid);
1.22 daniel 2901: }
2902: }
2903:
1.50 daniel 2904: /**
2905: * xmlParseEntityDecl:
2906: * @ctxt: an XML parser context
2907: *
2908: * parse <!ENTITY declarations
1.22 daniel 2909: *
2910: * [70] EntityDecl ::= GEDecl | PEDecl
2911: *
2912: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2913: *
2914: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2915: *
2916: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2917: *
2918: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2919: *
2920: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 2921: *
2922: * [ VC: Notation Declared ]
2923: * TODO The Name must match the declared name of a notation.
1.22 daniel 2924: */
2925:
1.55 daniel 2926: void
2927: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 2928: CHAR *name = NULL;
1.24 daniel 2929: CHAR *value = NULL;
1.39 daniel 2930: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 2931: CHAR *ndata = NULL;
1.39 daniel 2932: int isParameter = 0;
1.78 daniel 2933: CHAR *orig = NULL;
1.22 daniel 2934:
1.94 daniel 2935: GROW;
1.40 daniel 2936: if ((CUR == '<') && (NXT(1) == '!') &&
2937: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2938: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2939: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 2940: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 2941: SHRINK;
1.40 daniel 2942: SKIP(8);
1.59 daniel 2943: if (!IS_BLANK(CUR)) {
2944: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2945: ctxt->sax->error(ctxt->userData, "Space required after '<!ENTITY'\n");
1.59 daniel 2946: ctxt->wellFormed = 0;
2947: }
2948: SKIP_BLANKS;
1.40 daniel 2949:
2950: if (CUR == '%') {
2951: NEXT;
1.59 daniel 2952: if (!IS_BLANK(CUR)) {
2953: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2954: ctxt->sax->error(ctxt->userData, "Space required after '%'\n");
1.59 daniel 2955: ctxt->wellFormed = 0;
2956: }
1.42 daniel 2957: SKIP_BLANKS;
1.39 daniel 2958: isParameter = 1;
1.22 daniel 2959: }
2960:
2961: name = xmlParseName(ctxt);
1.24 daniel 2962: if (name == NULL) {
1.55 daniel 2963: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2964: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 2965: ctxt->wellFormed = 0;
1.24 daniel 2966: return;
2967: }
1.59 daniel 2968: if (!IS_BLANK(CUR)) {
2969: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2970: ctxt->sax->error(ctxt->userData,
1.59 daniel 2971: "Space required after the entity name\n");
2972: ctxt->wellFormed = 0;
2973: }
1.42 daniel 2974: SKIP_BLANKS;
1.24 daniel 2975:
1.22 daniel 2976: /*
1.68 daniel 2977: * handle the various case of definitions...
1.22 daniel 2978: */
1.39 daniel 2979: if (isParameter) {
1.40 daniel 2980: if ((CUR == '"') || (CUR == '\''))
1.78 daniel 2981: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 2982: if (value) {
1.72 daniel 2983: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2984: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2985: XML_INTERNAL_PARAMETER_ENTITY,
2986: NULL, NULL, value);
2987: }
1.24 daniel 2988: else {
1.67 daniel 2989: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 2990: if (URI) {
1.72 daniel 2991: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2992: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2993: XML_EXTERNAL_PARAMETER_ENTITY,
2994: literal, URI, NULL);
2995: }
1.24 daniel 2996: }
2997: } else {
1.40 daniel 2998: if ((CUR == '"') || (CUR == '\'')) {
1.78 daniel 2999: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 3000: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3001: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3002: XML_INTERNAL_GENERAL_ENTITY,
3003: NULL, NULL, value);
3004: } else {
1.67 daniel 3005: URI = xmlParseExternalID(ctxt, &literal, 1);
1.59 daniel 3006: if ((CUR != '>') && (!IS_BLANK(CUR))) {
3007: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3008: ctxt->sax->error(ctxt->userData,
1.59 daniel 3009: "Space required before 'NDATA'\n");
3010: ctxt->wellFormed = 0;
3011: }
1.42 daniel 3012: SKIP_BLANKS;
1.40 daniel 3013: if ((CUR == 'N') && (NXT(1) == 'D') &&
3014: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3015: (NXT(4) == 'A')) {
3016: SKIP(5);
1.59 daniel 3017: if (!IS_BLANK(CUR)) {
3018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3019: ctxt->sax->error(ctxt->userData,
1.59 daniel 3020: "Space required after 'NDATA'\n");
3021: ctxt->wellFormed = 0;
3022: }
1.42 daniel 3023: SKIP_BLANKS;
1.24 daniel 3024: ndata = xmlParseName(ctxt);
1.72 daniel 3025: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3026: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3027: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
3028: literal, URI, ndata);
3029: } else {
1.72 daniel 3030: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3031: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3032: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3033: literal, URI, NULL);
1.24 daniel 3034: }
3035: }
3036: }
1.42 daniel 3037: SKIP_BLANKS;
1.40 daniel 3038: if (CUR != '>') {
1.55 daniel 3039: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3040: ctxt->sax->error(ctxt->userData,
1.31 daniel 3041: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3042: ctxt->wellFormed = 0;
1.24 daniel 3043: } else
1.40 daniel 3044: NEXT;
1.78 daniel 3045: if (orig != NULL) {
3046: /*
1.98 daniel 3047: * Ugly mechanism to save the raw entity value.
1.78 daniel 3048: */
3049: xmlEntityPtr cur = NULL;
3050:
1.98 daniel 3051: if (isParameter) {
3052: if ((ctxt->sax != NULL) &&
3053: (ctxt->sax->getParameterEntity != NULL))
3054: cur = ctxt->sax->getParameterEntity(ctxt, name);
3055: } else {
3056: if ((ctxt->sax != NULL) &&
3057: (ctxt->sax->getEntity != NULL))
3058: cur = ctxt->sax->getEntity(ctxt, name);
3059: }
3060: if (cur != NULL) {
3061: if (cur->orig != NULL)
3062: free(orig);
3063: else
3064: cur->orig = orig;
3065: } else
1.78 daniel 3066: free(orig);
3067: }
1.39 daniel 3068: if (name != NULL) free(name);
3069: if (value != NULL) free(value);
3070: if (URI != NULL) free(URI);
3071: if (literal != NULL) free(literal);
3072: if (ndata != NULL) free(ndata);
1.22 daniel 3073: }
3074: }
3075:
1.50 daniel 3076: /**
1.59 daniel 3077: * xmlParseDefaultDecl:
3078: * @ctxt: an XML parser context
3079: * @value: Receive a possible fixed default value for the attribute
3080: *
3081: * Parse an attribute default declaration
3082: *
3083: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3084: *
1.99 daniel 3085: * [ VC: Required Attribute ]
3086: * TODO if the default declaration is the keyword #REQUIRED, then the
3087: * attribute must be specified for all elements of the type in the
3088: * attribute-list declaration.
3089: *
3090: * [ VC: Attribute Default Legal ]
1.102 daniel 3091: * The declared default value must meet the lexical constraints of
3092: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3093: *
3094: * [ VC: Fixed Attribute Default ]
3095: * TODO if an attribute has a default value declared with the #FIXED
3096: * keyword, instances of that attribute must match the default value.
3097: *
3098: * [ WFC: No < in Attribute Values ]
3099: * handled in xmlParseAttValue()
3100: *
1.59 daniel 3101: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3102: * or XML_ATTRIBUTE_FIXED.
3103: */
3104:
3105: int
3106: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
3107: int val;
3108: CHAR *ret;
3109:
3110: *value = NULL;
3111: if ((CUR == '#') && (NXT(1) == 'R') &&
3112: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3113: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3114: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3115: (NXT(8) == 'D')) {
3116: SKIP(9);
3117: return(XML_ATTRIBUTE_REQUIRED);
3118: }
3119: if ((CUR == '#') && (NXT(1) == 'I') &&
3120: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3121: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3122: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3123: SKIP(8);
3124: return(XML_ATTRIBUTE_IMPLIED);
3125: }
3126: val = XML_ATTRIBUTE_NONE;
3127: if ((CUR == '#') && (NXT(1) == 'F') &&
3128: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3129: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3130: SKIP(6);
3131: val = XML_ATTRIBUTE_FIXED;
3132: if (!IS_BLANK(CUR)) {
3133: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3134: ctxt->sax->error(ctxt->userData, "Space required after '#FIXED'\n");
1.59 daniel 3135: ctxt->wellFormed = 0;
3136: }
3137: SKIP_BLANKS;
3138: }
3139: ret = xmlParseAttValue(ctxt);
1.96 daniel 3140: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3141: if (ret == NULL) {
3142: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3143: ctxt->sax->error(ctxt->userData,
1.59 daniel 3144: "Attribute default value declaration error\n");
3145: ctxt->wellFormed = 0;
3146: } else
3147: *value = ret;
3148: return(val);
3149: }
3150:
3151: /**
1.66 daniel 3152: * xmlParseNotationType:
3153: * @ctxt: an XML parser context
3154: *
3155: * parse an Notation attribute type.
3156: *
1.99 daniel 3157: * Note: the leading 'NOTATION' S part has already being parsed...
3158: *
1.66 daniel 3159: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3160: *
1.99 daniel 3161: * [ VC: Notation Attributes ]
3162: * TODO Values of this type must match one of the notation names included
3163: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3164: *
3165: * Returns: the notation attribute tree built while parsing
3166: */
3167:
3168: xmlEnumerationPtr
3169: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3170: CHAR *name;
3171: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3172:
3173: if (CUR != '(') {
3174: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3175: ctxt->sax->error(ctxt->userData, "'(' required to start 'NOTATION'\n");
1.66 daniel 3176: ctxt->wellFormed = 0;
3177: return(NULL);
3178: }
1.91 daniel 3179: SHRINK;
1.66 daniel 3180: do {
3181: NEXT;
3182: SKIP_BLANKS;
3183: name = xmlParseName(ctxt);
3184: if (name == NULL) {
3185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3186: ctxt->sax->error(ctxt->userData,
1.66 daniel 3187: "Name expected in NOTATION declaration\n");
3188: ctxt->wellFormed = 0;
3189: return(ret);
3190: }
3191: cur = xmlCreateEnumeration(name);
1.67 daniel 3192: free(name);
1.66 daniel 3193: if (cur == NULL) return(ret);
3194: if (last == NULL) ret = last = cur;
3195: else {
3196: last->next = cur;
3197: last = cur;
3198: }
3199: SKIP_BLANKS;
3200: } while (CUR == '|');
3201: if (CUR != ')') {
3202: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3203: ctxt->sax->error(ctxt->userData,
1.66 daniel 3204: "')' required to finish NOTATION declaration\n");
3205: ctxt->wellFormed = 0;
3206: return(ret);
3207: }
3208: NEXT;
3209: return(ret);
3210: }
3211:
3212: /**
3213: * xmlParseEnumerationType:
3214: * @ctxt: an XML parser context
3215: *
3216: * parse an Enumeration attribute type.
3217: *
3218: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3219: *
1.99 daniel 3220: * [ VC: Enumeration ]
3221: * TODO Values of this type must match one of the Nmtoken tokens in
3222: * the declaration
3223: *
1.66 daniel 3224: * Returns: the enumeration attribute tree built while parsing
3225: */
3226:
3227: xmlEnumerationPtr
3228: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3229: CHAR *name;
3230: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3231:
3232: if (CUR != '(') {
3233: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3234: ctxt->sax->error(ctxt->userData,
1.66 daniel 3235: "'(' required to start ATTLIST enumeration\n");
3236: ctxt->wellFormed = 0;
3237: return(NULL);
3238: }
1.91 daniel 3239: SHRINK;
1.66 daniel 3240: do {
3241: NEXT;
3242: SKIP_BLANKS;
3243: name = xmlParseNmtoken(ctxt);
3244: if (name == NULL) {
3245: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3246: ctxt->sax->error(ctxt->userData,
1.66 daniel 3247: "NmToken expected in ATTLIST enumeration\n");
3248: ctxt->wellFormed = 0;
3249: return(ret);
3250: }
3251: cur = xmlCreateEnumeration(name);
1.67 daniel 3252: free(name);
1.66 daniel 3253: if (cur == NULL) return(ret);
3254: if (last == NULL) ret = last = cur;
3255: else {
3256: last->next = cur;
3257: last = cur;
3258: }
3259: SKIP_BLANKS;
3260: } while (CUR == '|');
3261: if (CUR != ')') {
3262: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3263: ctxt->sax->error(ctxt->userData,
1.66 daniel 3264: "')' required to finish ATTLIST enumeration\n");
3265: ctxt->wellFormed = 0;
3266: return(ret);
3267: }
3268: NEXT;
3269: return(ret);
3270: }
3271:
3272: /**
1.50 daniel 3273: * xmlParseEnumeratedType:
3274: * @ctxt: an XML parser context
1.66 daniel 3275: * @tree: the enumeration tree built while parsing
1.50 daniel 3276: *
1.66 daniel 3277: * parse an Enumerated attribute type.
1.22 daniel 3278: *
3279: * [57] EnumeratedType ::= NotationType | Enumeration
3280: *
3281: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3282: *
1.50 daniel 3283: *
1.66 daniel 3284: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3285: */
3286:
1.66 daniel 3287: int
3288: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3289: if ((CUR == 'N') && (NXT(1) == 'O') &&
3290: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3291: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3292: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3293: SKIP(8);
3294: if (!IS_BLANK(CUR)) {
3295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3296: ctxt->sax->error(ctxt->userData, "Space required after 'NOTATION'\n");
1.66 daniel 3297: ctxt->wellFormed = 0;
3298: return(0);
3299: }
3300: SKIP_BLANKS;
3301: *tree = xmlParseNotationType(ctxt);
3302: if (*tree == NULL) return(0);
3303: return(XML_ATTRIBUTE_NOTATION);
3304: }
3305: *tree = xmlParseEnumerationType(ctxt);
3306: if (*tree == NULL) return(0);
3307: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3308: }
3309:
1.50 daniel 3310: /**
3311: * xmlParseAttributeType:
3312: * @ctxt: an XML parser context
1.66 daniel 3313: * @tree: the enumeration tree built while parsing
1.50 daniel 3314: *
1.59 daniel 3315: * parse the Attribute list def for an element
1.22 daniel 3316: *
3317: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3318: *
3319: * [55] StringType ::= 'CDATA'
3320: *
3321: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3322: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3323: *
1.102 daniel 3324: * Validity constraints for attribute values syntax are checked in
3325: * xmlValidateAttributeValue()
3326: *
1.99 daniel 3327: * [ VC: ID ]
1.102 daniel 3328: * Values of type ID must match the Name production. TODO A name must not
1.99 daniel 3329: * appear more than once in an XML document as a value of this type;
3330: * i.e., ID values must uniquely identify the elements which bear them.
3331: *
3332: * [ VC: One ID per Element Type ]
3333: * TODO No element type may have more than one ID attribute specified.
3334: *
3335: * [ VC: ID Attribute Default ]
3336: * TODO An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3337: *
3338: * [ VC: IDREF ]
1.102 daniel 3339: * Values of type IDREF must match the Name production, and values
3340: * of type IDREFS must match Names; TODO each Name must match the value of
1.99 daniel 3341: * an ID attribute on some element in the XML document; i.e. IDREF
3342: * values must match the value of some ID attribute.
3343: *
3344: * [ VC: Entity Name ]
1.102 daniel 3345: * Values of type ENTITY must match the Name production, values
3346: * of type ENTITIES must match Names; TODO each Name must match the name of
1.99 daniel 3347: * an unparsed entity declared in the DTD.
3348: *
3349: * [ VC: Name Token ]
1.102 daniel 3350: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3351: * of type NMTOKENS must match Nmtokens.
3352: *
1.69 daniel 3353: * Returns the attribute type
1.22 daniel 3354: */
1.59 daniel 3355: int
1.66 daniel 3356: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3357: SHRINK;
1.40 daniel 3358: if ((CUR == 'C') && (NXT(1) == 'D') &&
3359: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3360: (NXT(4) == 'A')) {
3361: SKIP(5);
1.66 daniel 3362: return(XML_ATTRIBUTE_CDATA);
1.40 daniel 3363: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3364: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3365: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3366: SKIP(6);
3367: return(XML_ATTRIBUTE_IDREFS);
3368: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3369: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3370: (NXT(4) == 'F')) {
3371: SKIP(5);
1.59 daniel 3372: return(XML_ATTRIBUTE_IDREF);
1.66 daniel 3373: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
3374: SKIP(2);
3375: return(XML_ATTRIBUTE_ID);
1.40 daniel 3376: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3377: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3378: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3379: SKIP(6);
1.59 daniel 3380: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 3381: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3382: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3383: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3384: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3385: SKIP(8);
1.59 daniel 3386: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 3387: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3388: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3389: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3390: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3391: SKIP(8);
3392: return(XML_ATTRIBUTE_NMTOKENS);
3393: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3394: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3395: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3396: (NXT(6) == 'N')) {
3397: SKIP(7);
1.59 daniel 3398: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3399: }
1.66 daniel 3400: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3401: }
3402:
1.50 daniel 3403: /**
3404: * xmlParseAttributeListDecl:
3405: * @ctxt: an XML parser context
3406: *
3407: * : parse the Attribute list def for an element
1.22 daniel 3408: *
3409: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3410: *
3411: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3412: *
1.22 daniel 3413: */
1.55 daniel 3414: void
3415: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 daniel 3416: CHAR *elemName;
3417: CHAR *attrName;
1.103 ! daniel 3418: xmlEnumerationPtr tree;
1.22 daniel 3419:
1.40 daniel 3420: if ((CUR == '<') && (NXT(1) == '!') &&
3421: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3422: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3423: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3424: (NXT(8) == 'T')) {
1.40 daniel 3425: SKIP(9);
1.59 daniel 3426: if (!IS_BLANK(CUR)) {
3427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3428: ctxt->sax->error(ctxt->userData, "Space required after '<!ATTLIST'\n");
1.59 daniel 3429: ctxt->wellFormed = 0;
3430: }
1.42 daniel 3431: SKIP_BLANKS;
1.59 daniel 3432: elemName = xmlParseName(ctxt);
3433: if (elemName == NULL) {
1.55 daniel 3434: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3435: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Element\n");
1.59 daniel 3436: ctxt->wellFormed = 0;
1.22 daniel 3437: return;
3438: }
1.42 daniel 3439: SKIP_BLANKS;
1.40 daniel 3440: while (CUR != '>') {
3441: const CHAR *check = CUR_PTR;
1.59 daniel 3442: int type;
3443: int def;
3444: CHAR *defaultValue = NULL;
3445:
1.103 ! daniel 3446: tree = NULL;
1.59 daniel 3447: attrName = xmlParseName(ctxt);
3448: if (attrName == NULL) {
3449: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3450: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Attribute\n");
1.59 daniel 3451: ctxt->wellFormed = 0;
3452: break;
3453: }
1.97 daniel 3454: GROW;
1.59 daniel 3455: if (!IS_BLANK(CUR)) {
3456: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3457: ctxt->sax->error(ctxt->userData,
1.59 daniel 3458: "Space required after the attribute name\n");
3459: ctxt->wellFormed = 0;
3460: break;
3461: }
3462: SKIP_BLANKS;
3463:
1.66 daniel 3464: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 3465: if (type <= 0) break;
1.22 daniel 3466:
1.97 daniel 3467: GROW;
1.59 daniel 3468: if (!IS_BLANK(CUR)) {
3469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3470: ctxt->sax->error(ctxt->userData,
1.59 daniel 3471: "Space required after the attribute type\n");
3472: ctxt->wellFormed = 0;
3473: break;
3474: }
1.42 daniel 3475: SKIP_BLANKS;
1.59 daniel 3476:
3477: def = xmlParseDefaultDecl(ctxt, &defaultValue);
3478: if (def <= 0) break;
3479:
1.97 daniel 3480: GROW;
1.59 daniel 3481: if (CUR != '>') {
3482: if (!IS_BLANK(CUR)) {
3483: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3484: ctxt->sax->error(ctxt->userData,
1.59 daniel 3485: "Space required after the attribute default value\n");
3486: ctxt->wellFormed = 0;
3487: break;
3488: }
3489: SKIP_BLANKS;
3490: }
1.40 daniel 3491: if (check == CUR_PTR) {
1.55 daniel 3492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3493: ctxt->sax->error(ctxt->userData,
1.59 daniel 3494: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 3495: break;
3496: }
1.72 daniel 3497: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3498: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3499: type, def, defaultValue, tree);
1.59 daniel 3500: if (attrName != NULL)
3501: free(attrName);
3502: if (defaultValue != NULL)
3503: free(defaultValue);
1.97 daniel 3504: GROW;
1.22 daniel 3505: }
1.40 daniel 3506: if (CUR == '>')
3507: NEXT;
1.22 daniel 3508:
1.59 daniel 3509: free(elemName);
1.22 daniel 3510: }
3511: }
3512:
1.50 daniel 3513: /**
1.61 daniel 3514: * xmlParseElementMixedContentDecl:
3515: * @ctxt: an XML parser context
3516: *
3517: * parse the declaration for a Mixed Element content
3518: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3519: *
3520: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3521: * '(' S? '#PCDATA' S? ')'
3522: *
1.99 daniel 3523: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3524: *
3525: * [ VC: No Duplicate Types ]
3526: * TODO The same name must not appear more than once in a single
3527: * mixed-content declaration.
3528: *
1.61 daniel 3529: * returns: the list of the xmlElementContentPtr describing the element choices
3530: */
3531: xmlElementContentPtr
1.62 daniel 3532: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3533: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.61 daniel 3534: CHAR *elem = NULL;
3535:
1.97 daniel 3536: GROW;
1.61 daniel 3537: if ((CUR == '#') && (NXT(1) == 'P') &&
3538: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3539: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3540: (NXT(6) == 'A')) {
3541: SKIP(7);
3542: SKIP_BLANKS;
1.91 daniel 3543: SHRINK;
1.63 daniel 3544: if (CUR == ')') {
3545: NEXT;
3546: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3547: return(ret);
3548: }
1.61 daniel 3549: if ((CUR == '(') || (CUR == '|')) {
3550: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3551: if (ret == NULL) return(NULL);
1.99 daniel 3552: }
1.61 daniel 3553: while (CUR == '|') {
1.64 daniel 3554: NEXT;
1.61 daniel 3555: if (elem == NULL) {
3556: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3557: if (ret == NULL) return(NULL);
3558: ret->c1 = cur;
1.64 daniel 3559: cur = ret;
1.61 daniel 3560: } else {
1.64 daniel 3561: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3562: if (n == NULL) return(NULL);
3563: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3564: cur->c2 = n;
3565: cur = n;
1.66 daniel 3566: free(elem);
1.61 daniel 3567: }
3568: SKIP_BLANKS;
3569: elem = xmlParseName(ctxt);
3570: if (elem == NULL) {
3571: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3572: ctxt->sax->error(ctxt->userData,
1.61 daniel 3573: "xmlParseElementMixedContentDecl : Name expected\n");
3574: ctxt->wellFormed = 0;
3575: xmlFreeElementContent(cur);
3576: return(NULL);
3577: }
3578: SKIP_BLANKS;
1.97 daniel 3579: GROW;
1.61 daniel 3580: }
1.63 daniel 3581: if ((CUR == ')') && (NXT(1) == '*')) {
1.66 daniel 3582: if (elem != NULL) {
1.61 daniel 3583: cur->c2 = xmlNewElementContent(elem,
3584: XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 3585: free(elem);
3586: }
1.65 daniel 3587: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 3588: SKIP(2);
1.61 daniel 3589: } else {
1.66 daniel 3590: if (elem != NULL) free(elem);
1.61 daniel 3591: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3592: ctxt->sax->error(ctxt->userData,
1.63 daniel 3593: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3594: ctxt->wellFormed = 0;
3595: xmlFreeElementContent(ret);
3596: return(NULL);
3597: }
3598:
3599: } else {
3600: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3601: ctxt->sax->error(ctxt->userData,
1.61 daniel 3602: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3603: ctxt->wellFormed = 0;
3604: }
3605: return(ret);
3606: }
3607:
3608: /**
3609: * xmlParseElementChildrenContentDecl:
1.50 daniel 3610: * @ctxt: an XML parser context
3611: *
1.61 daniel 3612: * parse the declaration for a Mixed Element content
3613: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3614: *
1.61 daniel 3615: *
1.22 daniel 3616: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3617: *
3618: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3619: *
3620: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3621: *
3622: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3623: *
1.99 daniel 3624: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3625: * TODO Parameter-entity replacement text must be properly nested
3626: * with parenthetized groups. That is to say, if either of the
3627: * opening or closing parentheses in a choice, seq, or Mixed
3628: * construct is contained in the replacement text for a parameter
3629: * entity, both must be contained in the same replacement text. For
3630: * interoperability, if a parameter-entity reference appears in a
3631: * choice, seq, or Mixed construct, its replacement text should not
3632: * be empty, and neither the first nor last non-blank character of
3633: * the replacement text should be a connector (| or ,).
3634: *
1.62 daniel 3635: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3636: * hierarchy.
3637: */
3638: xmlElementContentPtr
1.62 daniel 3639: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3640: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.62 daniel 3641: CHAR *elem;
3642: CHAR type = 0;
3643:
3644: SKIP_BLANKS;
1.94 daniel 3645: GROW;
1.62 daniel 3646: if (CUR == '(') {
1.63 daniel 3647: /* Recurse on first child */
1.62 daniel 3648: NEXT;
3649: SKIP_BLANKS;
3650: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3651: SKIP_BLANKS;
1.101 daniel 3652: GROW;
1.62 daniel 3653: } else {
3654: elem = xmlParseName(ctxt);
3655: if (elem == NULL) {
3656: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3657: ctxt->sax->error(ctxt->userData,
1.62 daniel 3658: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3659: ctxt->wellFormed = 0;
3660: return(NULL);
3661: }
3662: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3663: GROW;
1.62 daniel 3664: if (CUR == '?') {
3665: ret->ocur = XML_ELEMENT_CONTENT_OPT;
3666: NEXT;
3667: } else if (CUR == '*') {
3668: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3669: NEXT;
3670: } else if (CUR == '+') {
3671: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3672: NEXT;
3673: } else {
3674: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
3675: }
1.66 daniel 3676: free(elem);
1.101 daniel 3677: GROW;
1.62 daniel 3678: }
3679: SKIP_BLANKS;
1.91 daniel 3680: SHRINK;
1.62 daniel 3681: while (CUR != ')') {
1.63 daniel 3682: /*
3683: * Each loop we parse one separator and one element.
3684: */
1.62 daniel 3685: if (CUR == ',') {
3686: if (type == 0) type = CUR;
3687:
3688: /*
3689: * Detect "Name | Name , Name" error
3690: */
3691: else if (type != CUR) {
3692: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3693: ctxt->sax->error(ctxt->userData,
1.62 daniel 3694: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3695: type);
3696: ctxt->wellFormed = 0;
3697: xmlFreeElementContent(ret);
3698: return(NULL);
3699: }
1.64 daniel 3700: NEXT;
1.62 daniel 3701:
1.63 daniel 3702: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3703: if (op == NULL) {
3704: xmlFreeElementContent(ret);
3705: return(NULL);
3706: }
3707: if (last == NULL) {
3708: op->c1 = ret;
1.65 daniel 3709: ret = cur = op;
1.63 daniel 3710: } else {
3711: cur->c2 = op;
3712: op->c1 = last;
3713: cur =op;
1.65 daniel 3714: last = NULL;
1.63 daniel 3715: }
1.62 daniel 3716: } else if (CUR == '|') {
3717: if (type == 0) type = CUR;
3718:
3719: /*
1.63 daniel 3720: * Detect "Name , Name | Name" error
1.62 daniel 3721: */
3722: else if (type != CUR) {
3723: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3724: ctxt->sax->error(ctxt->userData,
1.62 daniel 3725: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3726: type);
3727: ctxt->wellFormed = 0;
3728: xmlFreeElementContent(ret);
3729: return(NULL);
3730: }
1.64 daniel 3731: NEXT;
1.62 daniel 3732:
1.63 daniel 3733: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3734: if (op == NULL) {
3735: xmlFreeElementContent(ret);
3736: return(NULL);
3737: }
3738: if (last == NULL) {
3739: op->c1 = ret;
1.65 daniel 3740: ret = cur = op;
1.63 daniel 3741: } else {
3742: cur->c2 = op;
3743: op->c1 = last;
3744: cur =op;
1.65 daniel 3745: last = NULL;
1.63 daniel 3746: }
1.62 daniel 3747: } else {
3748: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3749: ctxt->sax->error(ctxt->userData,
1.62 daniel 3750: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3751: ctxt->wellFormed = 0;
3752: xmlFreeElementContent(ret);
3753: return(NULL);
3754: }
1.101 daniel 3755: GROW;
1.62 daniel 3756: SKIP_BLANKS;
1.101 daniel 3757: GROW;
1.62 daniel 3758: if (CUR == '(') {
1.63 daniel 3759: /* Recurse on second child */
1.62 daniel 3760: NEXT;
3761: SKIP_BLANKS;
1.65 daniel 3762: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 3763: SKIP_BLANKS;
3764: } else {
3765: elem = xmlParseName(ctxt);
3766: if (elem == NULL) {
3767: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3768: ctxt->sax->error(ctxt->userData,
1.62 daniel 3769: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3770: ctxt->wellFormed = 0;
3771: return(NULL);
3772: }
1.65 daniel 3773: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 3774: free(elem);
1.62 daniel 3775: }
1.63 daniel 3776: if (CUR == '?') {
3777: ret->ocur = XML_ELEMENT_CONTENT_OPT;
3778: NEXT;
3779: } else if (CUR == '*') {
3780: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3781: NEXT;
3782: } else if (CUR == '+') {
3783: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3784: NEXT;
3785: } else {
3786: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
3787: }
3788: SKIP_BLANKS;
1.97 daniel 3789: GROW;
1.64 daniel 3790: }
1.65 daniel 3791: if ((cur != NULL) && (last != NULL)) {
3792: cur->c2 = last;
1.62 daniel 3793: }
3794: NEXT;
3795: if (CUR == '?') {
3796: ret->ocur = XML_ELEMENT_CONTENT_OPT;
3797: NEXT;
3798: } else if (CUR == '*') {
3799: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3800: NEXT;
3801: } else if (CUR == '+') {
3802: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3803: NEXT;
3804: } else {
3805: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
3806: }
3807: return(ret);
1.61 daniel 3808: }
3809:
3810: /**
3811: * xmlParseElementContentDecl:
3812: * @ctxt: an XML parser context
3813: * @name: the name of the element being defined.
3814: * @result: the Element Content pointer will be stored here if any
1.22 daniel 3815: *
1.61 daniel 3816: * parse the declaration for an Element content either Mixed or Children,
3817: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
3818: *
3819: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 3820: *
1.61 daniel 3821: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 3822: */
3823:
1.61 daniel 3824: int
3825: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
3826: xmlElementContentPtr *result) {
3827:
3828: xmlElementContentPtr tree = NULL;
3829: int res;
3830:
3831: *result = NULL;
3832:
3833: if (CUR != '(') {
3834: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3835: ctxt->sax->error(ctxt->userData,
1.61 daniel 3836: "xmlParseElementContentDecl : '(' expected\n");
3837: ctxt->wellFormed = 0;
3838: return(-1);
3839: }
3840: NEXT;
1.97 daniel 3841: GROW;
1.61 daniel 3842: SKIP_BLANKS;
3843: if ((CUR == '#') && (NXT(1) == 'P') &&
3844: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3845: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3846: (NXT(6) == 'A')) {
1.62 daniel 3847: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 3848: res = XML_ELEMENT_TYPE_MIXED;
3849: } else {
1.62 daniel 3850: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 3851: res = XML_ELEMENT_TYPE_ELEMENT;
3852: }
3853: SKIP_BLANKS;
1.63 daniel 3854: /****************************
1.61 daniel 3855: if (CUR != ')') {
3856: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3857: ctxt->sax->error(ctxt->userData,
1.61 daniel 3858: "xmlParseElementContentDecl : ')' expected\n");
3859: ctxt->wellFormed = 0;
3860: return(-1);
3861: }
1.63 daniel 3862: ****************************/
3863: *result = tree;
1.61 daniel 3864: return(res);
1.22 daniel 3865: }
3866:
1.50 daniel 3867: /**
3868: * xmlParseElementDecl:
3869: * @ctxt: an XML parser context
3870: *
3871: * parse an Element declaration.
1.22 daniel 3872: *
3873: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
3874: *
1.99 daniel 3875: * [ VC: Unique Element Type Declaration ]
3876: * TODO No element type may be declared more than once
1.69 daniel 3877: *
3878: * Returns the type of the element, or -1 in case of error
1.22 daniel 3879: */
1.59 daniel 3880: int
1.55 daniel 3881: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 3882: CHAR *name;
1.59 daniel 3883: int ret = -1;
1.61 daniel 3884: xmlElementContentPtr content = NULL;
1.22 daniel 3885:
1.97 daniel 3886: GROW;
1.40 daniel 3887: if ((CUR == '<') && (NXT(1) == '!') &&
3888: (NXT(2) == 'E') && (NXT(3) == 'L') &&
3889: (NXT(4) == 'E') && (NXT(5) == 'M') &&
3890: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 3891: (NXT(8) == 'T')) {
1.40 daniel 3892: SKIP(9);
1.59 daniel 3893: if (!IS_BLANK(CUR)) {
3894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3895: ctxt->sax->error(ctxt->userData,
1.59 daniel 3896: "Space required after 'ELEMENT'\n");
3897: ctxt->wellFormed = 0;
3898: }
1.42 daniel 3899: SKIP_BLANKS;
1.22 daniel 3900: name = xmlParseName(ctxt);
3901: if (name == NULL) {
1.55 daniel 3902: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3903: ctxt->sax->error(ctxt->userData,
1.59 daniel 3904: "xmlParseElementDecl: no name for Element\n");
3905: ctxt->wellFormed = 0;
3906: return(-1);
3907: }
3908: if (!IS_BLANK(CUR)) {
3909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3910: ctxt->sax->error(ctxt->userData,
1.59 daniel 3911: "Space required after the element name\n");
3912: ctxt->wellFormed = 0;
1.22 daniel 3913: }
1.42 daniel 3914: SKIP_BLANKS;
1.40 daniel 3915: if ((CUR == 'E') && (NXT(1) == 'M') &&
3916: (NXT(2) == 'P') && (NXT(3) == 'T') &&
3917: (NXT(4) == 'Y')) {
3918: SKIP(5);
1.22 daniel 3919: /*
3920: * Element must always be empty.
3921: */
1.59 daniel 3922: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 3923: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
3924: (NXT(2) == 'Y')) {
3925: SKIP(3);
1.22 daniel 3926: /*
3927: * Element is a generic container.
3928: */
1.59 daniel 3929: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 3930: } else if (CUR == '(') {
3931: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 3932: } else {
1.98 daniel 3933: /*
3934: * [ WFC: PEs in Internal Subset ] error handling.
3935: */
3936: if ((CUR == '%') && (ctxt->external == 0) &&
3937: (ctxt->inputNr == 1)) {
3938: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3939: ctxt->sax->error(ctxt->userData,
3940: "PEReference: forbidden within markup decl in internal subset\n");
3941: } else {
3942: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943: ctxt->sax->error(ctxt->userData,
3944: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
3945: }
1.61 daniel 3946: ctxt->wellFormed = 0;
3947: if (name != NULL) free(name);
3948: return(-1);
1.22 daniel 3949: }
1.42 daniel 3950: SKIP_BLANKS;
1.40 daniel 3951: if (CUR != '>') {
1.55 daniel 3952: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3953: ctxt->sax->error(ctxt->userData,
1.31 daniel 3954: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 3955: ctxt->wellFormed = 0;
1.61 daniel 3956: } else {
1.40 daniel 3957: NEXT;
1.72 daniel 3958: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 3959: ctxt->sax->elementDecl(ctxt->userData, name, ret,
3960: content);
1.61 daniel 3961: }
1.84 daniel 3962: if (content != NULL) {
3963: xmlFreeElementContent(content);
3964: }
1.61 daniel 3965: if (name != NULL) {
3966: free(name);
3967: }
1.22 daniel 3968: }
1.59 daniel 3969: return(ret);
1.22 daniel 3970: }
3971:
1.50 daniel 3972: /**
3973: * xmlParseMarkupDecl:
3974: * @ctxt: an XML parser context
3975: *
3976: * parse Markup declarations
1.22 daniel 3977: *
3978: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
3979: * NotationDecl | PI | Comment
3980: *
1.98 daniel 3981: * [ VC: Proper Declaration/PE Nesting ]
3982: * TODO Parameter-entity replacement text must be properly nested with
3983: * markup declarations. That is to say, if either the first character
3984: * or the last character of a markup declaration (markupdecl above) is
3985: * contained in the replacement text for a parameter-entity reference,
3986: * both must be contained in the same replacement text.
3987: *
3988: * [ WFC: PEs in Internal Subset ]
3989: * In the internal DTD subset, parameter-entity references can occur
3990: * only where markup declarations can occur, not within markup declarations.
3991: * (This does not apply to references that occur in external parameter
3992: * entities or to the external subset.)
1.22 daniel 3993: */
1.55 daniel 3994: void
3995: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 3996: GROW;
1.22 daniel 3997: xmlParseElementDecl(ctxt);
3998: xmlParseAttributeListDecl(ctxt);
3999: xmlParseEntityDecl(ctxt);
4000: xmlParseNotationDecl(ctxt);
4001: xmlParsePI(ctxt);
1.31 daniel 4002: xmlParseComment(ctxt, 0);
1.98 daniel 4003: /*
4004: * This is only for internal subset. On external entities,
4005: * the replacement is done before parsing stage
4006: */
4007: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4008: xmlParsePEReference(ctxt);
1.97 daniel 4009: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4010: }
4011:
1.50 daniel 4012: /**
1.76 daniel 4013: * xmlParseTextDecl:
4014: * @ctxt: an XML parser context
4015: *
4016: * parse an XML declaration header for external entities
4017: *
4018: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4019: *
4020: * Returns the only valuable info for an external parsed entity, the encoding
4021: */
4022:
4023: CHAR *
4024: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4025: CHAR *version;
4026: CHAR *encoding = NULL;
4027:
4028: /*
4029: * We know that '<?xml' is here.
4030: */
4031: SKIP(5);
4032:
4033: if (!IS_BLANK(CUR)) {
4034: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4035: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
4036: ctxt->wellFormed = 0;
4037: }
4038: SKIP_BLANKS;
4039:
4040: /*
4041: * We may have the VersionInfo here.
4042: */
4043: version = xmlParseVersionInfo(ctxt);
1.99 daniel 4044:
1.76 daniel 4045: /* TODO: we should actually inherit from the referencing doc if absent
4046: if (version == NULL)
4047: version = xmlCharStrdup(XML_DEFAULT_VERSION);
4048: ctxt->version = xmlStrdup(version);
4049: */
1.99 daniel 4050:
1.76 daniel 4051: if (version != NULL)
4052: free(version);
4053:
4054: /*
4055: * We must have the encoding declaration
4056: */
4057: if (!IS_BLANK(CUR)) {
4058: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4059: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
4060: ctxt->wellFormed = 0;
4061: }
4062: encoding = xmlParseEncodingDecl(ctxt);
4063:
4064: SKIP_BLANKS;
4065: if ((CUR == '?') && (NXT(1) == '>')) {
4066: SKIP(2);
4067: } else if (CUR == '>') {
4068: /* Deprecated old WD ... */
4069: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4070: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
4071: ctxt->wellFormed = 0;
4072: NEXT;
4073: } else {
4074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4075: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
4076: ctxt->wellFormed = 0;
4077: MOVETO_ENDTAG(CUR_PTR);
4078: NEXT;
4079: }
4080: return(encoding);
4081: }
4082:
4083: /*
4084: * xmlParseConditionalSections
4085: * @ctxt: an XML parser context
4086: *
4087: * TODO : Conditionnal section are not yet supported !
4088: *
4089: * [61] conditionalSect ::= includeSect | ignoreSect
4090: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4091: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4092: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4093: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4094: */
4095:
4096: void
4097: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4098: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4099: ctxt->sax->warning(ctxt->userData,
4100: "XML conditional section not supported\n");
4101: /*
4102: * Skip up to the end of the conditionnal section.
4103: */
4104: while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
4105: NEXT;
4106: if (CUR == 0) {
4107: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4108: ctxt->sax->error(ctxt->userData,
4109: "XML conditional section not closed\n");
4110: ctxt->wellFormed = 0;
4111: }
4112: }
4113:
4114: /**
4115: * xmlParseExternalSubset
4116: * @ctxt: an XML parser context
4117: *
4118: * parse Markup declarations from an external subset
4119: *
4120: * [30] extSubset ::= textDecl? extSubsetDecl
4121: *
4122: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4123: *
4124: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
4125: */
4126: void
1.79 daniel 4127: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
4128: const CHAR *SystemID) {
1.76 daniel 4129: if ((CUR == '<') && (NXT(1) == '?') &&
4130: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4131: (NXT(4) == 'l')) {
4132: xmlParseTextDecl(ctxt);
4133: }
1.79 daniel 4134: if (ctxt->myDoc == NULL) {
4135: ctxt->myDoc = xmlNewDoc("1.0");
4136: }
4137: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4138: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4139:
1.96 daniel 4140: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4141: ctxt->external = 1;
1.76 daniel 4142: while (((CUR == '<') && (NXT(1) == '?')) ||
4143: ((CUR == '<') && (NXT(1) == '!')) ||
4144: IS_BLANK(CUR)) {
4145: if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4146: xmlParseConditionalSections(ctxt);
4147: } else if (IS_BLANK(CUR)) {
4148: NEXT;
4149: } else if (CUR == '%') {
4150: xmlParsePEReference(ctxt);
4151: } else
4152: xmlParseMarkupDecl(ctxt);
1.77 daniel 4153:
4154: /*
4155: * Pop-up of finished entities.
4156: */
4157: while ((CUR == 0) && (ctxt->inputNr > 1))
4158: xmlPopInput(ctxt);
4159:
1.76 daniel 4160: }
4161:
4162: if (CUR != 0) {
4163: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4164: ctxt->sax->error(ctxt->userData,
4165: "Extra content at the end of the document\n");
4166: ctxt->wellFormed = 0;
4167: }
4168:
4169: }
4170:
4171: /**
1.77 daniel 4172: * xmlParseReference:
4173: * @ctxt: an XML parser context
4174: *
4175: * parse and handle entity references in content, depending on the SAX
4176: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4177: * CharRef, a predefined entity, if there is no reference() callback.
4178: * or if the parser was asked to switch to that mode.
1.77 daniel 4179: *
4180: * [67] Reference ::= EntityRef | CharRef
4181: */
4182: void
4183: xmlParseReference(xmlParserCtxtPtr ctxt) {
4184: xmlEntityPtr ent;
4185: CHAR *val;
4186: if (CUR != '&') return;
4187:
4188: if (NXT(1) == '#') {
4189: CHAR out[2];
4190: int val = xmlParseCharRef(ctxt);
4191: /* TODO: invalid for UTF-8 variable encoding !!! */
4192: out[0] = val;
4193: out[1] = 0;
4194: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4195: ctxt->sax->characters(ctxt->userData, out, 1);
4196: } else {
4197: ent = xmlParseEntityRef(ctxt);
4198: if (ent == NULL) return;
4199: if ((ent->name != NULL) &&
4200: (ent->type != XML_INTERNAL_PREDEFINED_ENTITY) &&
1.79 daniel 4201: (ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4202: (ctxt->replaceEntities == 0)) {
4203:
1.77 daniel 4204: /*
4205: * Create a node.
4206: */
4207: ctxt->sax->reference(ctxt->userData, ent->name);
4208: return;
4209: }
4210: val = ent->content;
4211: if (val == NULL) return;
4212: /*
4213: * inline the entity.
4214: */
4215: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4216: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
4217: }
1.24 daniel 4218: }
4219:
1.50 daniel 4220: /**
4221: * xmlParseEntityRef:
4222: * @ctxt: an XML parser context
4223: *
4224: * parse ENTITY references declarations
1.24 daniel 4225: *
4226: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4227: *
1.98 daniel 4228: * [ WFC: Entity Declared ]
4229: * In a document without any DTD, a document with only an internal DTD
4230: * subset which contains no parameter entity references, or a document
4231: * with "standalone='yes'", the Name given in the entity reference
4232: * must match that in an entity declaration, except that well-formed
4233: * documents need not declare any of the following entities: amp, lt,
4234: * gt, apos, quot. The declaration of a parameter entity must precede
4235: * any reference to it. Similarly, the declaration of a general entity
4236: * must precede any reference to it which appears in a default value in an
4237: * attribute-list declaration. Note that if entities are declared in the
4238: * external subset or in external parameter entities, a non-validating
4239: * processor is not obligated to read and process their declarations;
4240: * for such documents, the rule that an entity must be declared is a
4241: * well-formedness constraint only if standalone='yes'.
4242: *
4243: * [ WFC: Parsed Entity ]
4244: * An entity reference must not contain the name of an unparsed entity
4245: *
1.77 daniel 4246: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4247: */
1.77 daniel 4248: xmlEntityPtr
1.55 daniel 4249: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.24 daniel 4250: CHAR *name;
1.72 daniel 4251: xmlEntityPtr ent = NULL;
1.24 daniel 4252:
1.91 daniel 4253: GROW;
1.40 daniel 4254: if (CUR == '&') {
4255: NEXT;
1.24 daniel 4256: name = xmlParseName(ctxt);
4257: if (name == NULL) {
1.55 daniel 4258: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4259: ctxt->sax->error(ctxt->userData,
4260: "xmlParseEntityRef: no name\n");
1.59 daniel 4261: ctxt->wellFormed = 0;
1.24 daniel 4262: } else {
1.40 daniel 4263: if (CUR == ';') {
4264: NEXT;
1.24 daniel 4265: /*
1.77 daniel 4266: * Ask first SAX for entity resolution, otherwise try the
4267: * predefined set.
4268: */
4269: if (ctxt->sax != NULL) {
4270: if (ctxt->sax->getEntity != NULL)
4271: ent = ctxt->sax->getEntity(ctxt->userData, name);
4272: if (ent == NULL)
4273: ent = xmlGetPredefinedEntity(name);
4274: }
4275: /*
1.98 daniel 4276: * [ WFC: Entity Declared ]
4277: * In a document without any DTD, a document with only an
4278: * internal DTD subset which contains no parameter entity
4279: * references, or a document with "standalone='yes'", the
4280: * Name given in the entity reference must match that in an
4281: * entity declaration, except that well-formed documents
4282: * need not declare any of the following entities: amp, lt,
4283: * gt, apos, quot.
4284: * The declaration of a parameter entity must precede any
4285: * reference to it.
4286: * Similarly, the declaration of a general entity must
4287: * precede any reference to it which appears in a default
4288: * value in an attribute-list declaration. Note that if
4289: * entities are declared in the external subset or in
4290: * external parameter entities, a non-validating processor
4291: * is not obligated to read and process their declarations;
4292: * for such documents, the rule that an entity must be
4293: * declared is a well-formedness constraint only if
4294: * standalone='yes'.
1.59 daniel 4295: */
1.77 daniel 4296: if (ent == NULL) {
1.98 daniel 4297: if ((ctxt->standalone == 1) ||
4298: ((ctxt->hasExternalSubset == 0) &&
4299: (ctxt->hasPErefs == 0))) {
4300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 4301: ctxt->sax->error(ctxt->userData,
4302: "Entity '%s' not defined\n", name);
4303: ctxt->wellFormed = 0;
4304: } else {
1.98 daniel 4305: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4306: ctxt->sax->warning(ctxt->userData,
4307: "Entity '%s' not defined\n", name);
1.59 daniel 4308: }
1.77 daniel 4309: }
1.59 daniel 4310:
4311: /*
1.98 daniel 4312: * [ WFC: Parsed Entity ]
4313: * An entity reference must not contain the name of an
4314: * unparsed entity
4315: */
4316: else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
4317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4318: ctxt->sax->error(ctxt->userData,
4319: "Entity reference to unparsed entity %s\n", name);
4320: ctxt->wellFormed = 0;
4321: }
4322:
4323: /*
4324: * [ WFC: No External Entity References ]
4325: * Attribute values cannot contain direct or indirect
4326: * entity references to external entities.
4327: */
4328: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4329: (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
4330: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4331: ctxt->sax->error(ctxt->userData,
4332: "Attribute references external entity '%s'\n", name);
4333: ctxt->wellFormed = 0;
4334: }
4335: /*
4336: * [ WFC: No < in Attribute Values ]
4337: * The replacement text of any entity referred to directly or
4338: * indirectly in an attribute value (other than "<") must
4339: * not contain a <.
1.59 daniel 4340: */
1.98 daniel 4341: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4342: (ent != NULL) && (xmlStrcmp(ent->name, "lt")) &&
4343: (ent->content != NULL) &&
4344: (xmlStrchr(ent->content, '<'))) {
4345: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4346: ctxt->sax->error(ctxt->userData,
4347: "'<' in entity '%s' is not allowed in attributes values\n", name);
4348: ctxt->wellFormed = 0;
4349: }
4350:
4351: /*
4352: * Internal check, no parameter entities here ...
4353: */
4354: else {
1.59 daniel 4355: switch (ent->type) {
4356: case XML_INTERNAL_PARAMETER_ENTITY:
4357: case XML_EXTERNAL_PARAMETER_ENTITY:
4358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4359: ctxt->sax->error(ctxt->userData,
1.59 daniel 4360: "Attempt to reference the parameter entity '%s'\n", name);
4361: ctxt->wellFormed = 0;
4362: break;
4363: }
4364: }
4365:
4366: /*
1.98 daniel 4367: * [ WFC: No Recursion ]
4368: * TODO A parsed entity must not contain a recursive
4369: * reference to itself, either directly or indirectly.
1.59 daniel 4370: */
1.77 daniel 4371:
1.24 daniel 4372: } else {
1.55 daniel 4373: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4374: ctxt->sax->error(ctxt->userData,
1.59 daniel 4375: "xmlParseEntityRef: expecting ';'\n");
4376: ctxt->wellFormed = 0;
1.24 daniel 4377: }
1.45 daniel 4378: free(name);
1.24 daniel 4379: }
4380: }
1.77 daniel 4381: return(ent);
1.24 daniel 4382: }
4383:
1.50 daniel 4384: /**
4385: * xmlParsePEReference:
4386: * @ctxt: an XML parser context
4387: *
4388: * parse PEReference declarations
1.77 daniel 4389: * The entity content is handled directly by pushing it's content as
4390: * a new input stream.
1.22 daniel 4391: *
4392: * [69] PEReference ::= '%' Name ';'
1.68 daniel 4393: *
1.98 daniel 4394: * [ WFC: No Recursion ]
4395: * TODO A parsed entity must not contain a recursive
4396: * reference to itself, either directly or indirectly.
4397: *
4398: * [ WFC: Entity Declared ]
4399: * In a document without any DTD, a document with only an internal DTD
4400: * subset which contains no parameter entity references, or a document
4401: * with "standalone='yes'", ... ... The declaration of a parameter
4402: * entity must precede any reference to it...
4403: *
4404: * [ VC: Entity Declared ]
4405: * In a document with an external subset or external parameter entities
4406: * with "standalone='no'", ... ... The declaration of a parameter entity
4407: * must precede any reference to it...
4408: *
4409: * [ WFC: In DTD ]
4410: * Parameter-entity references may only appear in the DTD.
4411: * NOTE: misleading but this is handled.
1.22 daniel 4412: */
1.77 daniel 4413: void
1.55 daniel 4414: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 4415: CHAR *name;
1.72 daniel 4416: xmlEntityPtr entity = NULL;
1.50 daniel 4417: xmlParserInputPtr input;
1.22 daniel 4418:
1.40 daniel 4419: if (CUR == '%') {
4420: NEXT;
1.22 daniel 4421: name = xmlParseName(ctxt);
4422: if (name == NULL) {
1.55 daniel 4423: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4424: ctxt->sax->error(ctxt->userData, "xmlParsePEReference: no name\n");
1.59 daniel 4425: ctxt->wellFormed = 0;
1.22 daniel 4426: } else {
1.40 daniel 4427: if (CUR == ';') {
4428: NEXT;
1.98 daniel 4429: if ((ctxt->sax != NULL) &&
4430: (ctxt->sax->getParameterEntity != NULL))
4431: entity = ctxt->sax->getParameterEntity(ctxt->userData,
4432: name);
1.45 daniel 4433: if (entity == NULL) {
1.98 daniel 4434: /*
4435: * [ WFC: Entity Declared ]
4436: * In a document without any DTD, a document with only an
4437: * internal DTD subset which contains no parameter entity
4438: * references, or a document with "standalone='yes'", ...
4439: * ... The declaration of a parameter entity must precede
4440: * any reference to it...
4441: */
4442: if ((ctxt->standalone == 1) ||
4443: ((ctxt->hasExternalSubset == 0) &&
4444: (ctxt->hasPErefs == 0))) {
4445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4446: ctxt->sax->error(ctxt->userData,
4447: "PEReference: %%%s; not found\n", name);
4448: ctxt->wellFormed = 0;
4449: } else {
4450: /*
4451: * [ VC: Entity Declared ]
4452: * In a document with an external subset or external
4453: * parameter entities with "standalone='no'", ...
4454: * ... The declaration of a parameter entity must precede
4455: * any reference to it...
4456: */
4457: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4458: ctxt->sax->warning(ctxt->userData,
4459: "PEReference: %%%s; not found\n", name);
4460: ctxt->valid = 0;
4461: }
1.50 daniel 4462: } else {
1.98 daniel 4463: /*
4464: * Internal checking in case the entity quest barfed
4465: */
4466: if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
4467: (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
4468: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4469: ctxt->sax->warning(ctxt->userData,
4470: "Internal: %%%s; is not a parameter entity\n", name);
4471: } else {
4472: input = xmlNewEntityInputStream(ctxt, entity);
4473: xmlPushInput(ctxt, input);
4474: }
1.45 daniel 4475: }
1.98 daniel 4476: ctxt->hasPErefs = 1;
1.22 daniel 4477: } else {
1.55 daniel 4478: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4479: ctxt->sax->error(ctxt->userData,
1.59 daniel 4480: "xmlParsePEReference: expecting ';'\n");
4481: ctxt->wellFormed = 0;
1.22 daniel 4482: }
1.45 daniel 4483: free(name);
1.3 veillard 4484: }
4485: }
4486: }
4487:
1.50 daniel 4488: /**
4489: * xmlParseDocTypeDecl :
4490: * @ctxt: an XML parser context
4491: *
4492: * parse a DOCTYPE declaration
1.21 daniel 4493: *
1.22 daniel 4494: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
4495: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 4496: *
4497: * [ VC: Root Element Type ]
1.99 daniel 4498: * The Name in the document type declaration must match the element
1.98 daniel 4499: * type of the root element.
1.21 daniel 4500: */
4501:
1.55 daniel 4502: void
4503: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.21 daniel 4504: CHAR *name;
4505: CHAR *ExternalID = NULL;
1.39 daniel 4506: CHAR *URI = NULL;
1.21 daniel 4507:
4508: /*
4509: * We know that '<!DOCTYPE' has been detected.
4510: */
1.40 daniel 4511: SKIP(9);
1.21 daniel 4512:
1.42 daniel 4513: SKIP_BLANKS;
1.21 daniel 4514:
4515: /*
4516: * Parse the DOCTYPE name.
4517: */
4518: name = xmlParseName(ctxt);
4519: if (name == NULL) {
1.55 daniel 4520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4521: ctxt->sax->error(ctxt->userData, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 4522: ctxt->wellFormed = 0;
1.21 daniel 4523: }
4524:
1.42 daniel 4525: SKIP_BLANKS;
1.21 daniel 4526:
4527: /*
1.22 daniel 4528: * Check for SystemID and ExternalID
4529: */
1.67 daniel 4530: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 4531:
4532: if ((URI != NULL) || (ExternalID != NULL)) {
4533: ctxt->hasExternalSubset = 1;
4534: }
4535:
1.42 daniel 4536: SKIP_BLANKS;
1.36 daniel 4537:
1.76 daniel 4538: /*
4539: * NOTE: the SAX callback may try to fetch the external subset
4540: * entity and fill it up !
4541: */
1.72 daniel 4542: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 4543: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 4544:
4545: /*
4546: * Is there any DTD definition ?
4547: */
1.40 daniel 4548: if (CUR == '[') {
1.96 daniel 4549: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 4550: NEXT;
1.22 daniel 4551: /*
4552: * Parse the succession of Markup declarations and
4553: * PEReferences.
4554: * Subsequence (markupdecl | PEReference | S)*
4555: */
1.40 daniel 4556: while (CUR != ']') {
4557: const CHAR *check = CUR_PTR;
1.22 daniel 4558:
1.42 daniel 4559: SKIP_BLANKS;
1.22 daniel 4560: xmlParseMarkupDecl(ctxt);
1.50 daniel 4561: xmlParsePEReference(ctxt);
1.22 daniel 4562:
1.40 daniel 4563: if (CUR_PTR == check) {
1.55 daniel 4564: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4565: ctxt->sax->error(ctxt->userData,
1.31 daniel 4566: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 4567: ctxt->wellFormed = 0;
1.22 daniel 4568: break;
4569: }
1.77 daniel 4570:
4571: /*
4572: * Pop-up of finished entities.
4573: */
4574: while ((CUR == 0) && (ctxt->inputNr > 1))
4575: xmlPopInput(ctxt);
4576:
1.22 daniel 4577: }
1.40 daniel 4578: if (CUR == ']') NEXT;
1.22 daniel 4579: }
4580:
4581: /*
4582: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 4583: */
1.40 daniel 4584: if (CUR != '>') {
1.55 daniel 4585: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4586: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 4587: ctxt->wellFormed = 0;
1.22 daniel 4588: /* We shouldn't try to resynchronize ... */
1.21 daniel 4589: }
1.40 daniel 4590: NEXT;
1.22 daniel 4591:
4592: /*
1.99 daniel 4593: * Cleanup
1.22 daniel 4594: */
1.39 daniel 4595: if (URI != NULL) free(URI);
1.22 daniel 4596: if (ExternalID != NULL) free(ExternalID);
4597: if (name != NULL) free(name);
1.21 daniel 4598: }
4599:
1.50 daniel 4600: /**
4601: * xmlParseAttribute:
4602: * @ctxt: an XML parser context
1.72 daniel 4603: * @value: a CHAR ** used to store the value of the attribute
1.50 daniel 4604: *
4605: * parse an attribute
1.3 veillard 4606: *
1.22 daniel 4607: * [41] Attribute ::= Name Eq AttValue
4608: *
1.98 daniel 4609: * [ WFC: No External Entity References ]
4610: * Attribute values cannot contain direct or indirect entity references
4611: * to external entities.
4612: *
4613: * [ WFC: No < in Attribute Values ]
4614: * The replacement text of any entity referred to directly or indirectly in
4615: * an attribute value (other than "<") must not contain a <.
4616: *
4617: * [ VC: Attribute Value Type ]
4618: * TODO The attribute must have been declared; the value must be of the type
1.99 daniel 4619: * declared for it.
1.98 daniel 4620: *
1.22 daniel 4621: * [25] Eq ::= S? '=' S?
4622: *
1.29 daniel 4623: * With namespace:
4624: *
4625: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 4626: *
4627: * Also the case QName == xmlns:??? is handled independently as a namespace
4628: * definition.
1.69 daniel 4629: *
1.72 daniel 4630: * Returns the attribute name, and the value in *value.
1.3 veillard 4631: */
4632:
1.72 daniel 4633: CHAR *
4634: xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) {
1.59 daniel 4635: CHAR *name, *val;
1.3 veillard 4636:
1.72 daniel 4637: *value = NULL;
4638: name = xmlParseName(ctxt);
1.22 daniel 4639: if (name == NULL) {
1.55 daniel 4640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4641: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 4642: ctxt->wellFormed = 0;
1.52 daniel 4643: return(NULL);
1.3 veillard 4644: }
4645:
4646: /*
1.29 daniel 4647: * read the value
1.3 veillard 4648: */
1.42 daniel 4649: SKIP_BLANKS;
1.40 daniel 4650: if (CUR == '=') {
4651: NEXT;
1.42 daniel 4652: SKIP_BLANKS;
1.72 daniel 4653: val = xmlParseAttValue(ctxt);
1.96 daniel 4654: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 4655: } else {
1.55 daniel 4656: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4657: ctxt->sax->error(ctxt->userData,
1.59 daniel 4658: "Specification mandate value for attribute %s\n", name);
4659: ctxt->wellFormed = 0;
1.52 daniel 4660: return(NULL);
1.43 daniel 4661: }
4662:
1.72 daniel 4663: *value = val;
4664: return(name);
1.3 veillard 4665: }
4666:
1.50 daniel 4667: /**
4668: * xmlParseStartTag:
4669: * @ctxt: an XML parser context
4670: *
4671: * parse a start of tag either for rule element or
4672: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 4673: *
4674: * [40] STag ::= '<' Name (S Attribute)* S? '>'
4675: *
1.98 daniel 4676: * [ WFC: Unique Att Spec ]
4677: * No attribute name may appear more than once in the same start-tag or
4678: * empty-element tag.
4679: *
1.29 daniel 4680: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
4681: *
1.98 daniel 4682: * [ WFC: Unique Att Spec ]
4683: * No attribute name may appear more than once in the same start-tag or
4684: * empty-element tag.
4685: *
1.29 daniel 4686: * With namespace:
4687: *
4688: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
4689: *
4690: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 4691: *
4692: * Returns the element name parsed
1.2 veillard 4693: */
4694:
1.83 daniel 4695: CHAR *
1.69 daniel 4696: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.72 daniel 4697: CHAR *name;
4698: CHAR *attname;
4699: CHAR *attvalue;
4700: const CHAR **atts = NULL;
4701: int nbatts = 0;
4702: int maxatts = 0;
4703: int i;
1.2 veillard 4704:
1.83 daniel 4705: if (CUR != '<') return(NULL);
1.40 daniel 4706: NEXT;
1.3 veillard 4707:
1.72 daniel 4708: name = xmlParseName(ctxt);
1.59 daniel 4709: if (name == NULL) {
4710: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4711: ctxt->sax->error(ctxt->userData,
1.59 daniel 4712: "xmlParseStartTag: invalid element name\n");
4713: ctxt->wellFormed = 0;
1.83 daniel 4714: return(NULL);
1.50 daniel 4715: }
4716:
4717: /*
1.3 veillard 4718: * Now parse the attributes, it ends up with the ending
4719: *
4720: * (S Attribute)* S?
4721: */
1.42 daniel 4722: SKIP_BLANKS;
1.91 daniel 4723: GROW;
1.40 daniel 4724: while ((IS_CHAR(CUR)) &&
4725: (CUR != '>') &&
4726: ((CUR != '/') || (NXT(1) != '>'))) {
4727: const CHAR *q = CUR_PTR;
1.91 daniel 4728: int cons = ctxt->input->consumed;
1.29 daniel 4729:
1.72 daniel 4730: attname = xmlParseAttribute(ctxt, &attvalue);
4731: if ((attname != NULL) && (attvalue != NULL)) {
4732: /*
1.98 daniel 4733: * [ WFC: Unique Att Spec ]
4734: * No attribute name may appear more than once in the same
4735: * start-tag or empty-element tag.
1.72 daniel 4736: */
4737: for (i = 0; i < nbatts;i += 2) {
4738: if (!xmlStrcmp(atts[i], attname)) {
4739: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4740: ctxt->sax->error(ctxt->userData,
4741: "Attribute %s redefined\n",
4742: attname);
1.72 daniel 4743: ctxt->wellFormed = 0;
4744: free(attname);
4745: free(attvalue);
1.98 daniel 4746: goto failed;
1.72 daniel 4747: }
4748: }
4749:
4750: /*
4751: * Add the pair to atts
4752: */
4753: if (atts == NULL) {
4754: maxatts = 10;
4755: atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
4756: if (atts == NULL) {
1.86 daniel 4757: fprintf(stderr, "malloc of %ld byte failed\n",
4758: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4759: return(NULL);
1.72 daniel 4760: }
4761: } else if (nbatts + 2 < maxatts) {
4762: maxatts *= 2;
4763: atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
4764: if (atts == NULL) {
1.86 daniel 4765: fprintf(stderr, "realloc of %ld byte failed\n",
4766: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4767: return(NULL);
1.72 daniel 4768: }
4769: }
4770: atts[nbatts++] = attname;
4771: atts[nbatts++] = attvalue;
4772: atts[nbatts] = NULL;
4773: atts[nbatts + 1] = NULL;
1.98 daniel 4774: failed:
1.72 daniel 4775: }
4776:
1.42 daniel 4777: SKIP_BLANKS;
1.91 daniel 4778: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 4779: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4780: ctxt->sax->error(ctxt->userData,
1.31 daniel 4781: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 4782: ctxt->wellFormed = 0;
1.29 daniel 4783: break;
1.3 veillard 4784: }
1.91 daniel 4785: GROW;
1.3 veillard 4786: }
4787:
1.43 daniel 4788: /*
1.72 daniel 4789: * SAX: Start of Element !
1.43 daniel 4790: */
1.72 daniel 4791: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 4792: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 4793:
1.72 daniel 4794: if (atts != NULL) {
4795: for (i = 0;i < nbatts;i++) free((CHAR *) atts[i]);
4796: free(atts);
4797: }
1.83 daniel 4798: return(name);
1.3 veillard 4799: }
4800:
1.50 daniel 4801: /**
4802: * xmlParseEndTag:
4803: * @ctxt: an XML parser context
1.83 daniel 4804: * @tagname: the tag name as parsed in the opening tag.
1.50 daniel 4805: *
4806: * parse an end of tag
1.27 daniel 4807: *
4808: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 4809: *
4810: * With namespace
4811: *
1.72 daniel 4812: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 4813: */
4814:
1.55 daniel 4815: void
1.83 daniel 4816: xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname) {
1.72 daniel 4817: CHAR *name;
1.7 veillard 4818:
1.91 daniel 4819: GROW;
1.40 daniel 4820: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 4821: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4822: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 4823: ctxt->wellFormed = 0;
1.27 daniel 4824: return;
4825: }
1.40 daniel 4826: SKIP(2);
1.7 veillard 4827:
1.72 daniel 4828: name = xmlParseName(ctxt);
1.7 veillard 4829:
4830: /*
4831: * We should definitely be at the ending "S? '>'" part
4832: */
1.91 daniel 4833: GROW;
1.42 daniel 4834: SKIP_BLANKS;
1.40 daniel 4835: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 4836: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4837: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 4838: ctxt->wellFormed = 0;
1.7 veillard 4839: } else
1.40 daniel 4840: NEXT;
1.7 veillard 4841:
1.72 daniel 4842: /*
1.98 daniel 4843: * [ WFC: Element Type Match ]
4844: * The Name in an element's end-tag must match the element type in the
4845: * start-tag.
4846: *
1.83 daniel 4847: */
4848: if (xmlStrcmp(name, tagname)) {
4849: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4850: ctxt->sax->error(ctxt->userData,
4851: "Opening and ending tag mismatch: %s and %s\n", tagname, name);
4852: ctxt->wellFormed = 0;
4853: }
4854:
4855: /*
1.72 daniel 4856: * SAX: End of Tag
4857: */
4858: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 4859: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 4860:
4861: if (name != NULL)
4862: free(name);
4863:
1.7 veillard 4864: return;
4865: }
4866:
1.50 daniel 4867: /**
4868: * xmlParseCDSect:
4869: * @ctxt: an XML parser context
4870: *
4871: * Parse escaped pure raw content.
1.29 daniel 4872: *
4873: * [18] CDSect ::= CDStart CData CDEnd
4874: *
4875: * [19] CDStart ::= '<![CDATA['
4876: *
4877: * [20] Data ::= (Char* - (Char* ']]>' Char*))
4878: *
4879: * [21] CDEnd ::= ']]>'
1.3 veillard 4880: */
1.55 daniel 4881: void
4882: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 4883: const CHAR *r, *s, *base;
1.3 veillard 4884:
1.40 daniel 4885: if ((CUR == '<') && (NXT(1) == '!') &&
4886: (NXT(2) == '[') && (NXT(3) == 'C') &&
4887: (NXT(4) == 'D') && (NXT(5) == 'A') &&
4888: (NXT(6) == 'T') && (NXT(7) == 'A') &&
4889: (NXT(8) == '[')) {
4890: SKIP(9);
1.29 daniel 4891: } else
1.45 daniel 4892: return;
1.40 daniel 4893: base = CUR_PTR;
4894: if (!IS_CHAR(CUR)) {
1.55 daniel 4895: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4896: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4897: ctxt->wellFormed = 0;
1.45 daniel 4898: return;
1.3 veillard 4899: }
1.91 daniel 4900: r = CUR_PTR;
4901: NEXT;
1.40 daniel 4902: if (!IS_CHAR(CUR)) {
1.55 daniel 4903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4904: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4905: ctxt->wellFormed = 0;
1.45 daniel 4906: return;
1.3 veillard 4907: }
1.91 daniel 4908: s = CUR_PTR;
4909: NEXT;
1.40 daniel 4910: while (IS_CHAR(CUR) &&
4911: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
4912: r++;s++;NEXT;
1.3 veillard 4913: }
1.40 daniel 4914: if (!IS_CHAR(CUR)) {
1.55 daniel 4915: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4916: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4917: ctxt->wellFormed = 0;
1.45 daniel 4918: return;
1.3 veillard 4919: }
1.16 daniel 4920:
1.45 daniel 4921: /*
4922: * Ok the segment [base CUR_PTR] is to be consumed as chars.
4923: */
4924: if (ctxt->sax != NULL) {
1.72 daniel 4925: if (areBlanks(ctxt, base, CUR_PTR - base)) {
4926: if (ctxt->sax->ignorableWhitespace != NULL)
1.74 daniel 4927: ctxt->sax->ignorableWhitespace(ctxt->userData, base,
1.72 daniel 4928: (CUR_PTR - base) - 2);
4929: } else {
4930: if (ctxt->sax->characters != NULL)
1.74 daniel 4931: ctxt->sax->characters(ctxt->userData, base, (CUR_PTR - base) - 2);
1.72 daniel 4932: }
1.45 daniel 4933: }
1.2 veillard 4934: }
4935:
1.50 daniel 4936: /**
4937: * xmlParseContent:
4938: * @ctxt: an XML parser context
4939: *
4940: * Parse a content:
1.2 veillard 4941: *
1.27 daniel 4942: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 4943: */
4944:
1.55 daniel 4945: void
4946: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 4947: GROW;
1.40 daniel 4948: while ((CUR != '<') || (NXT(1) != '/')) {
4949: const CHAR *test = CUR_PTR;
1.91 daniel 4950: int cons = ctxt->input->consumed;
1.27 daniel 4951:
4952: /*
4953: * First case : a Processing Instruction.
4954: */
1.40 daniel 4955: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 4956: xmlParsePI(ctxt);
4957: }
1.72 daniel 4958:
1.27 daniel 4959: /*
4960: * Second case : a CDSection
4961: */
1.40 daniel 4962: else if ((CUR == '<') && (NXT(1) == '!') &&
4963: (NXT(2) == '[') && (NXT(3) == 'C') &&
4964: (NXT(4) == 'D') && (NXT(5) == 'A') &&
4965: (NXT(6) == 'T') && (NXT(7) == 'A') &&
4966: (NXT(8) == '[')) {
1.45 daniel 4967: xmlParseCDSect(ctxt);
1.27 daniel 4968: }
1.72 daniel 4969:
1.27 daniel 4970: /*
4971: * Third case : a comment
4972: */
1.40 daniel 4973: else if ((CUR == '<') && (NXT(1) == '!') &&
4974: (NXT(2) == '-') && (NXT(3) == '-')) {
1.72 daniel 4975: xmlParseComment(ctxt, 1);
1.97 daniel 4976: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 4977: }
1.72 daniel 4978:
1.27 daniel 4979: /*
4980: * Fourth case : a sub-element.
4981: */
1.40 daniel 4982: else if (CUR == '<') {
1.72 daniel 4983: xmlParseElement(ctxt);
1.45 daniel 4984: }
1.72 daniel 4985:
1.45 daniel 4986: /*
1.50 daniel 4987: * Fifth case : a reference. If if has not been resolved,
4988: * parsing returns it's Name, create the node
1.45 daniel 4989: */
1.97 daniel 4990:
1.45 daniel 4991: else if (CUR == '&') {
1.77 daniel 4992: xmlParseReference(ctxt);
1.27 daniel 4993: }
1.72 daniel 4994:
1.27 daniel 4995: /*
4996: * Last case, text. Note that References are handled directly.
4997: */
4998: else {
1.45 daniel 4999: xmlParseCharData(ctxt, 0);
1.3 veillard 5000: }
1.14 veillard 5001:
1.91 daniel 5002: GROW;
1.14 veillard 5003: /*
1.45 daniel 5004: * Pop-up of finished entities.
1.14 veillard 5005: */
1.69 daniel 5006: while ((CUR == 0) && (ctxt->inputNr > 1))
5007: xmlPopInput(ctxt);
1.45 daniel 5008:
1.91 daniel 5009: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
1.55 daniel 5010: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5011: ctxt->sax->error(ctxt->userData,
1.59 daniel 5012: "detected an error in element content\n");
5013: ctxt->wellFormed = 0;
1.29 daniel 5014: break;
5015: }
1.3 veillard 5016: }
1.2 veillard 5017: }
5018:
1.50 daniel 5019: /**
5020: * xmlParseElement:
5021: * @ctxt: an XML parser context
5022: *
5023: * parse an XML element, this is highly recursive
1.26 daniel 5024: *
5025: * [39] element ::= EmptyElemTag | STag content ETag
5026: *
1.98 daniel 5027: * [ WFC: Element Type Match ]
5028: * The Name in an element's end-tag must match the element type in the
5029: * start-tag.
5030: *
5031: * [ VC: Element Valid ]
5032: * TODO An element is valid if there is a declaration matching elementdecl
1.99 daniel 5033: * where the Name matches the element type and one of the following holds:
5034: * - The declaration matches EMPTY and the element has no content.
5035: * - The declaration matches children and the sequence of child elements
5036: * belongs to the language generated by the regular expression in the
5037: * content model, with optional white space (characters matching the
5038: * nonterminal S) between each pair of child elements.
5039: * - The declaration matches Mixed and the content consists of character
5040: * data and child elements whose types match names in the content model.
5041: * - The declaration matches ANY, and the types of any child elements have
5042: * been declared.
1.2 veillard 5043: */
1.26 daniel 5044:
1.72 daniel 5045: void
1.69 daniel 5046: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.40 daniel 5047: const CHAR *openTag = CUR_PTR;
1.83 daniel 5048: CHAR *name;
1.32 daniel 5049: xmlParserNodeInfo node_info;
1.2 veillard 5050:
1.32 daniel 5051: /* Capture start position */
1.40 daniel 5052: node_info.begin_pos = CUR_PTR - ctxt->input->base;
5053: node_info.begin_line = ctxt->input->line;
1.32 daniel 5054:
1.83 daniel 5055: name = xmlParseStartTag(ctxt);
5056: if (name == NULL) {
5057: return;
5058: }
1.2 veillard 5059:
5060: /*
1.99 daniel 5061: * [ VC: Root Element Type ]
5062: * The Name in the document type declaration must match the element
5063: * type of the root element.
5064: */
1.102 daniel 5065: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc)
5066: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
5067:
1.99 daniel 5068:
5069: /*
1.2 veillard 5070: * Check for an Empty Element.
5071: */
1.40 daniel 5072: if ((CUR == '/') && (NXT(1) == '>')) {
5073: SKIP(2);
1.72 daniel 5074: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 5075: ctxt->sax->endElement(ctxt->userData, name);
5076: free(name);
1.72 daniel 5077: return;
1.2 veillard 5078: }
1.91 daniel 5079: if (CUR == '>') {
5080: NEXT;
5081: } else {
1.55 daniel 5082: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5083: ctxt->sax->error(ctxt->userData, "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 5084: openTag);
1.59 daniel 5085: ctxt->wellFormed = 0;
1.45 daniel 5086:
5087: /*
5088: * end of parsing of this node.
5089: */
5090: nodePop(ctxt);
1.83 daniel 5091: free(name);
1.72 daniel 5092: return;
1.2 veillard 5093: }
5094:
5095: /*
5096: * Parse the content of the element:
5097: */
1.45 daniel 5098: xmlParseContent(ctxt);
1.40 daniel 5099: if (!IS_CHAR(CUR)) {
1.55 daniel 5100: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5101: ctxt->sax->error(ctxt->userData,
1.57 daniel 5102: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 5103: ctxt->wellFormed = 0;
1.45 daniel 5104:
5105: /*
5106: * end of parsing of this node.
5107: */
5108: nodePop(ctxt);
1.83 daniel 5109: free(name);
1.72 daniel 5110: return;
1.2 veillard 5111: }
5112:
5113: /*
1.27 daniel 5114: * parse the end of tag: '</' should be here.
1.2 veillard 5115: */
1.83 daniel 5116: xmlParseEndTag(ctxt, name);
5117: free(name);
1.2 veillard 5118: }
5119:
1.50 daniel 5120: /**
5121: * xmlParseVersionNum:
5122: * @ctxt: an XML parser context
5123: *
5124: * parse the XML version value.
1.29 daniel 5125: *
5126: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 5127: *
5128: * Returns the string giving the XML version number, or NULL
1.29 daniel 5129: */
1.55 daniel 5130: CHAR *
5131: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 5132: const CHAR *q = CUR_PTR;
1.29 daniel 5133: CHAR *ret;
5134:
1.40 daniel 5135: while (IS_CHAR(CUR) &&
5136: (((CUR >= 'a') && (CUR <= 'z')) ||
5137: ((CUR >= 'A') && (CUR <= 'Z')) ||
5138: ((CUR >= '0') && (CUR <= '9')) ||
5139: (CUR == '_') || (CUR == '.') ||
5140: (CUR == ':') || (CUR == '-'))) NEXT;
5141: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5142: return(ret);
5143: }
5144:
1.50 daniel 5145: /**
5146: * xmlParseVersionInfo:
5147: * @ctxt: an XML parser context
5148: *
5149: * parse the XML version.
1.29 daniel 5150: *
5151: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
5152: *
5153: * [25] Eq ::= S? '=' S?
1.50 daniel 5154: *
1.68 daniel 5155: * Returns the version string, e.g. "1.0"
1.29 daniel 5156: */
5157:
1.55 daniel 5158: CHAR *
5159: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 5160: CHAR *version = NULL;
5161: const CHAR *q;
5162:
1.40 daniel 5163: if ((CUR == 'v') && (NXT(1) == 'e') &&
5164: (NXT(2) == 'r') && (NXT(3) == 's') &&
5165: (NXT(4) == 'i') && (NXT(5) == 'o') &&
5166: (NXT(6) == 'n')) {
5167: SKIP(7);
1.42 daniel 5168: SKIP_BLANKS;
1.40 daniel 5169: if (CUR != '=') {
1.55 daniel 5170: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5171: ctxt->sax->error(ctxt->userData, "xmlParseVersionInfo : expected '='\n");
1.59 daniel 5172: ctxt->wellFormed = 0;
1.31 daniel 5173: return(NULL);
5174: }
1.40 daniel 5175: NEXT;
1.42 daniel 5176: SKIP_BLANKS;
1.40 daniel 5177: if (CUR == '"') {
5178: NEXT;
5179: q = CUR_PTR;
1.29 daniel 5180: version = xmlParseVersionNum(ctxt);
1.55 daniel 5181: if (CUR != '"') {
5182: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5183: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5184: ctxt->wellFormed = 0;
1.55 daniel 5185: } else
1.40 daniel 5186: NEXT;
5187: } else if (CUR == '\''){
5188: NEXT;
5189: q = CUR_PTR;
1.29 daniel 5190: version = xmlParseVersionNum(ctxt);
1.55 daniel 5191: if (CUR != '\'') {
5192: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5193: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5194: ctxt->wellFormed = 0;
1.55 daniel 5195: } else
1.40 daniel 5196: NEXT;
1.31 daniel 5197: } else {
1.55 daniel 5198: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5199: ctxt->sax->error(ctxt->userData,
1.59 daniel 5200: "xmlParseVersionInfo : expected ' or \"\n");
5201: ctxt->wellFormed = 0;
1.29 daniel 5202: }
5203: }
5204: return(version);
5205: }
5206:
1.50 daniel 5207: /**
5208: * xmlParseEncName:
5209: * @ctxt: an XML parser context
5210: *
5211: * parse the XML encoding name
1.29 daniel 5212: *
5213: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 5214: *
1.68 daniel 5215: * Returns the encoding name value or NULL
1.29 daniel 5216: */
1.55 daniel 5217: CHAR *
5218: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 5219: const CHAR *q = CUR_PTR;
1.29 daniel 5220: CHAR *ret = NULL;
5221:
1.40 daniel 5222: if (((CUR >= 'a') && (CUR <= 'z')) ||
5223: ((CUR >= 'A') && (CUR <= 'Z'))) {
5224: NEXT;
5225: while (IS_CHAR(CUR) &&
5226: (((CUR >= 'a') && (CUR <= 'z')) ||
5227: ((CUR >= 'A') && (CUR <= 'Z')) ||
5228: ((CUR >= '0') && (CUR <= '9')) ||
5229: (CUR == '-'))) NEXT;
5230: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5231: } else {
1.55 daniel 5232: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5233: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 5234: ctxt->wellFormed = 0;
1.29 daniel 5235: }
5236: return(ret);
5237: }
5238:
1.50 daniel 5239: /**
5240: * xmlParseEncodingDecl:
5241: * @ctxt: an XML parser context
5242: *
5243: * parse the XML encoding declaration
1.29 daniel 5244: *
5245: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 5246: *
5247: * TODO: this should setup the conversion filters.
5248: *
1.68 daniel 5249: * Returns the encoding value or NULL
1.29 daniel 5250: */
5251:
1.55 daniel 5252: CHAR *
5253: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5254: CHAR *encoding = NULL;
5255: const CHAR *q;
5256:
1.42 daniel 5257: SKIP_BLANKS;
1.40 daniel 5258: if ((CUR == 'e') && (NXT(1) == 'n') &&
5259: (NXT(2) == 'c') && (NXT(3) == 'o') &&
5260: (NXT(4) == 'd') && (NXT(5) == 'i') &&
5261: (NXT(6) == 'n') && (NXT(7) == 'g')) {
5262: SKIP(8);
1.42 daniel 5263: SKIP_BLANKS;
1.40 daniel 5264: if (CUR != '=') {
1.55 daniel 5265: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5266: ctxt->sax->error(ctxt->userData, "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 5267: ctxt->wellFormed = 0;
1.31 daniel 5268: return(NULL);
5269: }
1.40 daniel 5270: NEXT;
1.42 daniel 5271: SKIP_BLANKS;
1.40 daniel 5272: if (CUR == '"') {
5273: NEXT;
5274: q = CUR_PTR;
1.29 daniel 5275: encoding = xmlParseEncName(ctxt);
1.55 daniel 5276: if (CUR != '"') {
5277: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5278: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5279: ctxt->wellFormed = 0;
1.55 daniel 5280: } else
1.40 daniel 5281: NEXT;
5282: } else if (CUR == '\''){
5283: NEXT;
5284: q = CUR_PTR;
1.29 daniel 5285: encoding = xmlParseEncName(ctxt);
1.55 daniel 5286: if (CUR != '\'') {
5287: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5288: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5289: ctxt->wellFormed = 0;
1.55 daniel 5290: } else
1.40 daniel 5291: NEXT;
5292: } else if (CUR == '"'){
1.55 daniel 5293: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5294: ctxt->sax->error(ctxt->userData,
1.59 daniel 5295: "xmlParseEncodingDecl : expected ' or \"\n");
5296: ctxt->wellFormed = 0;
1.29 daniel 5297: }
5298: }
5299: return(encoding);
5300: }
5301:
1.50 daniel 5302: /**
5303: * xmlParseSDDecl:
5304: * @ctxt: an XML parser context
5305: *
5306: * parse the XML standalone declaration
1.29 daniel 5307: *
5308: * [32] SDDecl ::= S 'standalone' Eq
5309: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 5310: *
5311: * [ VC: Standalone Document Declaration ]
5312: * TODO The standalone document declaration must have the value "no"
5313: * if any external markup declarations contain declarations of:
5314: * - attributes with default values, if elements to which these
5315: * attributes apply appear in the document without specifications
5316: * of values for these attributes, or
5317: * - entities (other than amp, lt, gt, apos, quot), if references
5318: * to those entities appear in the document, or
5319: * - attributes with values subject to normalization, where the
5320: * attribute appears in the document with a value which will change
5321: * as a result of normalization, or
5322: * - element types with element content, if white space occurs directly
5323: * within any instance of those types.
1.68 daniel 5324: *
5325: * Returns 1 if standalone, 0 otherwise
1.29 daniel 5326: */
5327:
1.55 daniel 5328: int
5329: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5330: int standalone = -1;
5331:
1.42 daniel 5332: SKIP_BLANKS;
1.40 daniel 5333: if ((CUR == 's') && (NXT(1) == 't') &&
5334: (NXT(2) == 'a') && (NXT(3) == 'n') &&
5335: (NXT(4) == 'd') && (NXT(5) == 'a') &&
5336: (NXT(6) == 'l') && (NXT(7) == 'o') &&
5337: (NXT(8) == 'n') && (NXT(9) == 'e')) {
5338: SKIP(10);
1.81 daniel 5339: SKIP_BLANKS;
1.40 daniel 5340: if (CUR != '=') {
1.55 daniel 5341: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5342: ctxt->sax->error(ctxt->userData,
1.59 daniel 5343: "XML standalone declaration : expected '='\n");
5344: ctxt->wellFormed = 0;
1.32 daniel 5345: return(standalone);
5346: }
1.40 daniel 5347: NEXT;
1.42 daniel 5348: SKIP_BLANKS;
1.40 daniel 5349: if (CUR == '\''){
5350: NEXT;
5351: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5352: standalone = 0;
1.40 daniel 5353: SKIP(2);
5354: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5355: (NXT(2) == 's')) {
1.29 daniel 5356: standalone = 1;
1.40 daniel 5357: SKIP(3);
1.29 daniel 5358: } else {
1.55 daniel 5359: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5360: ctxt->sax->error(ctxt->userData, "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 5361: ctxt->wellFormed = 0;
1.29 daniel 5362: }
1.55 daniel 5363: if (CUR != '\'') {
5364: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5365: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5366: ctxt->wellFormed = 0;
1.55 daniel 5367: } else
1.40 daniel 5368: NEXT;
5369: } else if (CUR == '"'){
5370: NEXT;
5371: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5372: standalone = 0;
1.40 daniel 5373: SKIP(2);
5374: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5375: (NXT(2) == 's')) {
1.29 daniel 5376: standalone = 1;
1.40 daniel 5377: SKIP(3);
1.29 daniel 5378: } else {
1.55 daniel 5379: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5380: ctxt->sax->error(ctxt->userData,
1.59 daniel 5381: "standalone accepts only 'yes' or 'no'\n");
5382: ctxt->wellFormed = 0;
1.29 daniel 5383: }
1.55 daniel 5384: if (CUR != '"') {
5385: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5386: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5387: ctxt->wellFormed = 0;
1.55 daniel 5388: } else
1.40 daniel 5389: NEXT;
1.37 daniel 5390: } else {
1.55 daniel 5391: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5392: ctxt->sax->error(ctxt->userData, "Standalone value not found\n");
1.59 daniel 5393: ctxt->wellFormed = 0;
1.37 daniel 5394: }
1.29 daniel 5395: }
5396: return(standalone);
5397: }
5398:
1.50 daniel 5399: /**
5400: * xmlParseXMLDecl:
5401: * @ctxt: an XML parser context
5402: *
5403: * parse an XML declaration header
1.29 daniel 5404: *
5405: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 5406: */
5407:
1.55 daniel 5408: void
5409: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 5410: CHAR *version;
5411:
5412: /*
1.19 daniel 5413: * We know that '<?xml' is here.
1.1 veillard 5414: */
1.40 daniel 5415: SKIP(5);
1.1 veillard 5416:
1.59 daniel 5417: if (!IS_BLANK(CUR)) {
5418: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5419: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 5420: ctxt->wellFormed = 0;
5421: }
1.42 daniel 5422: SKIP_BLANKS;
1.1 veillard 5423:
5424: /*
1.29 daniel 5425: * We should have the VersionInfo here.
1.1 veillard 5426: */
1.29 daniel 5427: version = xmlParseVersionInfo(ctxt);
5428: if (version == NULL)
1.45 daniel 5429: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 5430: ctxt->version = xmlStrdup(version);
1.45 daniel 5431: free(version);
1.29 daniel 5432:
5433: /*
5434: * We may have the encoding declaration
5435: */
1.59 daniel 5436: if (!IS_BLANK(CUR)) {
5437: if ((CUR == '?') && (NXT(1) == '>')) {
5438: SKIP(2);
5439: return;
5440: }
5441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5442: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5443: ctxt->wellFormed = 0;
5444: }
1.72 daniel 5445: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 5446:
5447: /*
1.29 daniel 5448: * We may have the standalone status.
1.1 veillard 5449: */
1.72 daniel 5450: if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
1.59 daniel 5451: if ((CUR == '?') && (NXT(1) == '>')) {
5452: SKIP(2);
5453: return;
5454: }
5455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5456: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5457: ctxt->wellFormed = 0;
5458: }
5459: SKIP_BLANKS;
1.72 daniel 5460: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 5461:
1.42 daniel 5462: SKIP_BLANKS;
1.40 daniel 5463: if ((CUR == '?') && (NXT(1) == '>')) {
5464: SKIP(2);
5465: } else if (CUR == '>') {
1.31 daniel 5466: /* Deprecated old WD ... */
1.55 daniel 5467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5468: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
1.59 daniel 5469: ctxt->wellFormed = 0;
1.40 daniel 5470: NEXT;
1.29 daniel 5471: } else {
1.55 daniel 5472: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5473: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
1.59 daniel 5474: ctxt->wellFormed = 0;
1.40 daniel 5475: MOVETO_ENDTAG(CUR_PTR);
5476: NEXT;
1.29 daniel 5477: }
1.1 veillard 5478: }
5479:
1.50 daniel 5480: /**
5481: * xmlParseMisc:
5482: * @ctxt: an XML parser context
5483: *
5484: * parse an XML Misc* optionnal field.
1.21 daniel 5485: *
1.22 daniel 5486: * [27] Misc ::= Comment | PI | S
1.1 veillard 5487: */
5488:
1.55 daniel 5489: void
5490: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 5491: while (((CUR == '<') && (NXT(1) == '?')) ||
5492: ((CUR == '<') && (NXT(1) == '!') &&
5493: (NXT(2) == '-') && (NXT(3) == '-')) ||
5494: IS_BLANK(CUR)) {
5495: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 5496: xmlParsePI(ctxt);
1.40 daniel 5497: } else if (IS_BLANK(CUR)) {
5498: NEXT;
1.1 veillard 5499: } else
1.31 daniel 5500: xmlParseComment(ctxt, 0);
1.1 veillard 5501: }
5502: }
5503:
1.50 daniel 5504: /**
5505: * xmlParseDocument :
5506: * @ctxt: an XML parser context
5507: *
5508: * parse an XML document (and build a tree if using the standard SAX
5509: * interface).
1.21 daniel 5510: *
1.22 daniel 5511: * [1] document ::= prolog element Misc*
1.29 daniel 5512: *
5513: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 5514: *
1.68 daniel 5515: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 5516: * as a result of the parsing.
1.1 veillard 5517: */
5518:
1.55 daniel 5519: int
5520: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 5521: xmlDefaultSAXHandlerInit();
5522:
1.91 daniel 5523: GROW;
5524:
1.14 veillard 5525: /*
1.44 daniel 5526: * SAX: beginning of the document processing.
5527: */
1.72 daniel 5528: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 5529: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 5530:
5531: /*
1.14 veillard 5532: * We should check for encoding here and plug-in some
5533: * conversion code TODO !!!!
5534: */
1.1 veillard 5535:
5536: /*
5537: * Wipe out everything which is before the first '<'
5538: */
1.59 daniel 5539: if (IS_BLANK(CUR)) {
5540: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5541: ctxt->sax->error(ctxt->userData,
1.59 daniel 5542: "Extra spaces at the beginning of the document are not allowed\n");
5543: ctxt->wellFormed = 0;
5544: SKIP_BLANKS;
5545: }
5546:
5547: if (CUR == 0) {
5548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5549: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 5550: ctxt->wellFormed = 0;
5551: }
1.1 veillard 5552:
5553: /*
5554: * Check for the XMLDecl in the Prolog.
5555: */
1.91 daniel 5556: GROW;
1.40 daniel 5557: if ((CUR == '<') && (NXT(1) == '?') &&
5558: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5559: (NXT(4) == 'l')) {
1.19 daniel 5560: xmlParseXMLDecl(ctxt);
5561: /* SKIP_EOL(cur); */
1.42 daniel 5562: SKIP_BLANKS;
1.40 daniel 5563: } else if ((CUR == '<') && (NXT(1) == '?') &&
5564: (NXT(2) == 'X') && (NXT(3) == 'M') &&
5565: (NXT(4) == 'L')) {
1.19 daniel 5566: /*
5567: * The first drafts were using <?XML and the final W3C REC
5568: * now use <?xml ...
5569: */
1.16 daniel 5570: xmlParseXMLDecl(ctxt);
1.1 veillard 5571: /* SKIP_EOL(cur); */
1.42 daniel 5572: SKIP_BLANKS;
1.1 veillard 5573: } else {
1.72 daniel 5574: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 5575: }
1.72 daniel 5576: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 5577: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 5578:
5579: /*
5580: * The Misc part of the Prolog
5581: */
1.91 daniel 5582: GROW;
1.16 daniel 5583: xmlParseMisc(ctxt);
1.1 veillard 5584:
5585: /*
1.29 daniel 5586: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 5587: * (doctypedecl Misc*)?
5588: */
1.91 daniel 5589: GROW;
1.40 daniel 5590: if ((CUR == '<') && (NXT(1) == '!') &&
5591: (NXT(2) == 'D') && (NXT(3) == 'O') &&
5592: (NXT(4) == 'C') && (NXT(5) == 'T') &&
5593: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
5594: (NXT(8) == 'E')) {
1.22 daniel 5595: xmlParseDocTypeDecl(ctxt);
1.96 daniel 5596: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 5597: xmlParseMisc(ctxt);
1.21 daniel 5598: }
5599:
5600: /*
5601: * Time to start parsing the tree itself
1.1 veillard 5602: */
1.91 daniel 5603: GROW;
1.96 daniel 5604: ctxt->instate = XML_PARSER_CONTENT;
1.72 daniel 5605: xmlParseElement(ctxt);
1.96 daniel 5606: ctxt->instate = XML_PARSER_EPILOG;
1.33 daniel 5607:
5608: /*
5609: * The Misc part at the end
5610: */
5611: xmlParseMisc(ctxt);
1.16 daniel 5612:
1.59 daniel 5613: if (CUR != 0) {
5614: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5615: ctxt->sax->error(ctxt->userData,
1.59 daniel 5616: "Extra content at the end of the document\n");
5617: ctxt->wellFormed = 0;
5618: }
1.96 daniel 5619: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 5620:
1.44 daniel 5621: /*
5622: * SAX: end of the document processing.
5623: */
1.72 daniel 5624: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 5625: ctxt->sax->endDocument(ctxt->userData);
1.59 daniel 5626: if (! ctxt->wellFormed) return(-1);
1.16 daniel 5627: return(0);
5628: }
5629:
1.98 daniel 5630: /************************************************************************
5631: * *
5632: * I/O front end functions to the parser *
5633: * *
5634: ************************************************************************/
5635:
1.50 daniel 5636: /**
1.86 daniel 5637: * xmlCreateDocParserCtxt :
1.50 daniel 5638: * @cur: a pointer to an array of CHAR
5639: *
1.69 daniel 5640: * Create a parser context for an XML in-memory document.
5641: *
5642: * Returns the new parser context or NULL
1.16 daniel 5643: */
1.69 daniel 5644: xmlParserCtxtPtr
5645: xmlCreateDocParserCtxt(CHAR *cur) {
1.16 daniel 5646: xmlParserCtxtPtr ctxt;
1.40 daniel 5647: xmlParserInputPtr input;
1.75 daniel 5648: xmlCharEncoding enc;
1.16 daniel 5649:
1.97 daniel 5650: ctxt = xmlNewParserCtxt();
1.16 daniel 5651: if (ctxt == NULL) {
5652: return(NULL);
5653: }
1.96 daniel 5654: input = xmlNewInputStream(ctxt);
1.40 daniel 5655: if (input == NULL) {
1.97 daniel 5656: xmlFreeParserCtxt(ctxt);
1.40 daniel 5657: return(NULL);
5658: }
5659:
1.75 daniel 5660: /*
5661: * plug some encoding conversion routines here. !!!
5662: */
5663: enc = xmlDetectCharEncoding(cur);
5664: xmlSwitchEncoding(ctxt, enc);
5665:
1.40 daniel 5666: input->base = cur;
5667: input->cur = cur;
5668:
5669: inputPush(ctxt, input);
1.69 daniel 5670: return(ctxt);
5671: }
5672:
5673: /**
5674: * xmlSAXParseDoc :
5675: * @sax: the SAX handler block
5676: * @cur: a pointer to an array of CHAR
5677: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5678: * documents
5679: *
5680: * parse an XML in-memory document and build a tree.
5681: * It use the given SAX function block to handle the parsing callback.
5682: * If sax is NULL, fallback to the default DOM tree building routines.
5683: *
5684: * Returns the resulting document tree
5685: */
5686:
5687: xmlDocPtr
5688: xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
5689: xmlDocPtr ret;
5690: xmlParserCtxtPtr ctxt;
5691:
5692: if (cur == NULL) return(NULL);
1.16 daniel 5693:
5694:
1.69 daniel 5695: ctxt = xmlCreateDocParserCtxt(cur);
5696: if (ctxt == NULL) return(NULL);
1.74 daniel 5697: if (sax != NULL) {
5698: ctxt->sax = sax;
5699: ctxt->userData = NULL;
5700: }
1.69 daniel 5701:
1.16 daniel 5702: xmlParseDocument(ctxt);
1.72 daniel 5703: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5704: else {
5705: ret = NULL;
1.72 daniel 5706: xmlFreeDoc(ctxt->myDoc);
5707: ctxt->myDoc = NULL;
1.59 daniel 5708: }
1.86 daniel 5709: if (sax != NULL)
5710: ctxt->sax = NULL;
1.69 daniel 5711: xmlFreeParserCtxt(ctxt);
1.16 daniel 5712:
1.1 veillard 5713: return(ret);
5714: }
5715:
1.50 daniel 5716: /**
1.55 daniel 5717: * xmlParseDoc :
5718: * @cur: a pointer to an array of CHAR
5719: *
5720: * parse an XML in-memory document and build a tree.
5721: *
1.68 daniel 5722: * Returns the resulting document tree
1.55 daniel 5723: */
5724:
1.69 daniel 5725: xmlDocPtr
5726: xmlParseDoc(CHAR *cur) {
1.59 daniel 5727: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 5728: }
5729:
5730: /**
5731: * xmlSAXParseDTD :
5732: * @sax: the SAX handler block
5733: * @ExternalID: a NAME* containing the External ID of the DTD
5734: * @SystemID: a NAME* containing the URL to the DTD
5735: *
5736: * Load and parse an external subset.
5737: *
5738: * Returns the resulting xmlDtdPtr or NULL in case of error.
5739: */
5740:
5741: xmlDtdPtr
5742: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const CHAR *ExternalID,
5743: const CHAR *SystemID) {
5744: xmlDtdPtr ret = NULL;
5745: xmlParserCtxtPtr ctxt;
1.83 daniel 5746: xmlParserInputPtr input = NULL;
1.76 daniel 5747: xmlCharEncoding enc;
5748:
5749: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
5750:
1.97 daniel 5751: ctxt = xmlNewParserCtxt();
1.76 daniel 5752: if (ctxt == NULL) {
5753: return(NULL);
5754: }
5755:
5756: /*
5757: * Set-up the SAX context
5758: */
5759: if (ctxt == NULL) return(NULL);
5760: if (sax != NULL) {
1.93 veillard 5761: if (ctxt->sax != NULL)
5762: free(ctxt->sax);
1.76 daniel 5763: ctxt->sax = sax;
5764: ctxt->userData = NULL;
5765: }
5766:
5767: /*
5768: * Ask the Entity resolver to load the damn thing
5769: */
5770:
5771: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
5772: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
5773: if (input == NULL) {
1.86 daniel 5774: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5775: xmlFreeParserCtxt(ctxt);
5776: return(NULL);
5777: }
5778:
5779: /*
5780: * plug some encoding conversion routines here. !!!
5781: */
5782: xmlPushInput(ctxt, input);
5783: enc = xmlDetectCharEncoding(ctxt->input->cur);
5784: xmlSwitchEncoding(ctxt, enc);
5785:
1.95 veillard 5786: if (input->filename == NULL)
5787: input->filename = xmlStrdup(SystemID);
1.76 daniel 5788: input->line = 1;
5789: input->col = 1;
5790: input->base = ctxt->input->cur;
5791: input->cur = ctxt->input->cur;
5792: input->free = NULL;
5793:
5794: /*
5795: * let's parse that entity knowing it's an external subset.
5796: */
1.79 daniel 5797: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 5798:
5799: if (ctxt->myDoc != NULL) {
5800: if (ctxt->wellFormed) {
5801: ret = ctxt->myDoc->intSubset;
5802: ctxt->myDoc->intSubset = NULL;
5803: } else {
5804: ret = NULL;
5805: }
5806: xmlFreeDoc(ctxt->myDoc);
5807: ctxt->myDoc = NULL;
5808: }
1.86 daniel 5809: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5810: xmlFreeParserCtxt(ctxt);
5811:
5812: return(ret);
5813: }
5814:
5815: /**
5816: * xmlParseDTD :
5817: * @ExternalID: a NAME* containing the External ID of the DTD
5818: * @SystemID: a NAME* containing the URL to the DTD
5819: *
5820: * Load and parse an external subset.
5821: *
5822: * Returns the resulting xmlDtdPtr or NULL in case of error.
5823: */
5824:
5825: xmlDtdPtr
5826: xmlParseDTD(const CHAR *ExternalID, const CHAR *SystemID) {
5827: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 5828: }
5829:
5830: /**
5831: * xmlRecoverDoc :
5832: * @cur: a pointer to an array of CHAR
5833: *
5834: * parse an XML in-memory document and build a tree.
5835: * In the case the document is not Well Formed, a tree is built anyway
5836: *
1.68 daniel 5837: * Returns the resulting document tree
1.59 daniel 5838: */
5839:
1.69 daniel 5840: xmlDocPtr
5841: xmlRecoverDoc(CHAR *cur) {
1.59 daniel 5842: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 5843: }
5844:
5845: /**
1.69 daniel 5846: * xmlCreateFileParserCtxt :
1.50 daniel 5847: * @filename: the filename
5848: *
1.69 daniel 5849: * Create a parser context for a file content.
5850: * Automatic support for ZLIB/Compress compressed document is provided
5851: * by default if found at compile-time.
1.50 daniel 5852: *
1.69 daniel 5853: * Returns the new parser context or NULL
1.9 httpng 5854: */
1.69 daniel 5855: xmlParserCtxtPtr
5856: xmlCreateFileParserCtxt(const char *filename)
5857: {
5858: xmlParserCtxtPtr ctxt;
1.40 daniel 5859: xmlParserInputPtr inputStream;
1.91 daniel 5860: xmlParserInputBufferPtr buf;
1.9 httpng 5861:
1.91 daniel 5862: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
5863: if (buf == NULL) return(NULL);
1.9 httpng 5864:
1.97 daniel 5865: ctxt = xmlNewParserCtxt();
1.16 daniel 5866: if (ctxt == NULL) {
5867: return(NULL);
5868: }
1.97 daniel 5869:
1.96 daniel 5870: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 5871: if (inputStream == NULL) {
1.97 daniel 5872: xmlFreeParserCtxt(ctxt);
1.40 daniel 5873: return(NULL);
5874: }
5875:
5876: inputStream->filename = strdup(filename);
1.91 daniel 5877: inputStream->buf = buf;
5878: inputStream->base = inputStream->buf->buffer->content;
5879: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 5880:
1.40 daniel 5881: inputPush(ctxt, inputStream);
1.69 daniel 5882: return(ctxt);
5883: }
5884:
5885: /**
5886: * xmlSAXParseFile :
5887: * @sax: the SAX handler block
5888: * @filename: the filename
5889: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5890: * documents
5891: *
5892: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5893: * compressed document is provided by default if found at compile-time.
5894: * It use the given SAX function block to handle the parsing callback.
5895: * If sax is NULL, fallback to the default DOM tree building routines.
5896: *
5897: * Returns the resulting document tree
5898: */
5899:
1.79 daniel 5900: xmlDocPtr
5901: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 5902: int recovery) {
5903: xmlDocPtr ret;
5904: xmlParserCtxtPtr ctxt;
5905:
5906: ctxt = xmlCreateFileParserCtxt(filename);
5907: if (ctxt == NULL) return(NULL);
1.74 daniel 5908: if (sax != NULL) {
1.93 veillard 5909: if (ctxt->sax != NULL)
5910: free(ctxt->sax);
1.74 daniel 5911: ctxt->sax = sax;
5912: ctxt->userData = NULL;
5913: }
1.16 daniel 5914:
5915: xmlParseDocument(ctxt);
1.40 daniel 5916:
1.72 daniel 5917: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5918: else {
5919: ret = NULL;
1.72 daniel 5920: xmlFreeDoc(ctxt->myDoc);
5921: ctxt->myDoc = NULL;
1.59 daniel 5922: }
1.86 daniel 5923: if (sax != NULL)
5924: ctxt->sax = NULL;
1.69 daniel 5925: xmlFreeParserCtxt(ctxt);
1.20 daniel 5926:
5927: return(ret);
5928: }
5929:
1.55 daniel 5930: /**
5931: * xmlParseFile :
5932: * @filename: the filename
5933: *
5934: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5935: * compressed document is provided by default if found at compile-time.
5936: *
1.68 daniel 5937: * Returns the resulting document tree
1.55 daniel 5938: */
5939:
1.79 daniel 5940: xmlDocPtr
5941: xmlParseFile(const char *filename) {
1.59 daniel 5942: return(xmlSAXParseFile(NULL, filename, 0));
5943: }
5944:
5945: /**
5946: * xmlRecoverFile :
5947: * @filename: the filename
5948: *
5949: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5950: * compressed document is provided by default if found at compile-time.
5951: * In the case the document is not Well Formed, a tree is built anyway
5952: *
1.68 daniel 5953: * Returns the resulting document tree
1.59 daniel 5954: */
5955:
1.79 daniel 5956: xmlDocPtr
5957: xmlRecoverFile(const char *filename) {
1.59 daniel 5958: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 5959: }
1.32 daniel 5960:
1.50 daniel 5961: /**
1.69 daniel 5962: * xmlCreateMemoryParserCtxt :
1.68 daniel 5963: * @buffer: an pointer to a char array
1.50 daniel 5964: * @size: the siwe of the array
5965: *
1.69 daniel 5966: * Create a parser context for an XML in-memory document.
1.50 daniel 5967: *
1.69 daniel 5968: * Returns the new parser context or NULL
1.20 daniel 5969: */
1.69 daniel 5970: xmlParserCtxtPtr
5971: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 5972: xmlParserCtxtPtr ctxt;
1.40 daniel 5973: xmlParserInputPtr input;
1.75 daniel 5974: xmlCharEncoding enc;
1.40 daniel 5975:
5976: buffer[size - 1] = '\0';
5977:
1.97 daniel 5978: ctxt = xmlNewParserCtxt();
1.20 daniel 5979: if (ctxt == NULL) {
5980: return(NULL);
5981: }
1.97 daniel 5982:
1.96 daniel 5983: input = xmlNewInputStream(ctxt);
1.40 daniel 5984: if (input == NULL) {
1.97 daniel 5985: xmlFreeParserCtxt(ctxt);
1.40 daniel 5986: return(NULL);
5987: }
1.20 daniel 5988:
1.40 daniel 5989: input->filename = NULL;
5990: input->line = 1;
5991: input->col = 1;
1.96 daniel 5992: input->buf = NULL;
1.91 daniel 5993: input->consumed = 0;
1.45 daniel 5994:
5995: /*
1.75 daniel 5996: * plug some encoding conversion routines here. !!!
1.45 daniel 5997: */
1.75 daniel 5998: enc = xmlDetectCharEncoding(buffer);
5999: xmlSwitchEncoding(ctxt, enc);
6000:
1.40 daniel 6001: input->base = buffer;
6002: input->cur = buffer;
1.69 daniel 6003: input->free = NULL;
1.20 daniel 6004:
1.40 daniel 6005: inputPush(ctxt, input);
1.69 daniel 6006: return(ctxt);
6007: }
6008:
6009: /**
6010: * xmlSAXParseMemory :
6011: * @sax: the SAX handler block
6012: * @buffer: an pointer to a char array
6013: * @size: the siwe of the array
6014: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6015: * documents
6016: *
6017: * parse an XML in-memory block and use the given SAX function block
6018: * to handle the parsing callback. If sax is NULL, fallback to the default
6019: * DOM tree building routines.
6020: *
6021: * Returns the resulting document tree
6022: */
6023: xmlDocPtr
6024: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
6025: xmlDocPtr ret;
6026: xmlParserCtxtPtr ctxt;
6027:
6028: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6029: if (ctxt == NULL) return(NULL);
1.74 daniel 6030: if (sax != NULL) {
6031: ctxt->sax = sax;
6032: ctxt->userData = NULL;
6033: }
1.20 daniel 6034:
6035: xmlParseDocument(ctxt);
1.40 daniel 6036:
1.72 daniel 6037: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6038: else {
6039: ret = NULL;
1.72 daniel 6040: xmlFreeDoc(ctxt->myDoc);
6041: ctxt->myDoc = NULL;
1.59 daniel 6042: }
1.86 daniel 6043: if (sax != NULL)
6044: ctxt->sax = NULL;
1.69 daniel 6045: xmlFreeParserCtxt(ctxt);
1.16 daniel 6046:
1.9 httpng 6047: return(ret);
1.17 daniel 6048: }
6049:
1.55 daniel 6050: /**
6051: * xmlParseMemory :
1.68 daniel 6052: * @buffer: an pointer to a char array
1.55 daniel 6053: * @size: the size of the array
6054: *
6055: * parse an XML in-memory block and build a tree.
6056: *
1.68 daniel 6057: * Returns the resulting document tree
1.55 daniel 6058: */
6059:
6060: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 6061: return(xmlSAXParseMemory(NULL, buffer, size, 0));
6062: }
6063:
6064: /**
6065: * xmlRecoverMemory :
1.68 daniel 6066: * @buffer: an pointer to a char array
1.59 daniel 6067: * @size: the size of the array
6068: *
6069: * parse an XML in-memory block and build a tree.
6070: * In the case the document is not Well Formed, a tree is built anyway
6071: *
1.68 daniel 6072: * Returns the resulting document tree
1.59 daniel 6073: */
6074:
6075: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
6076: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 6077: }
6078:
6079:
1.50 daniel 6080: /**
6081: * xmlSetupParserForBuffer:
6082: * @ctxt: an XML parser context
6083: * @buffer: a CHAR * buffer
6084: * @filename: a file name
6085: *
1.19 daniel 6086: * Setup the parser context to parse a new buffer; Clears any prior
6087: * contents from the parser context. The buffer parameter must not be
6088: * NULL, but the filename parameter can be
6089: */
1.55 daniel 6090: void
6091: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 6092: const char* filename)
6093: {
1.96 daniel 6094: xmlParserInputPtr input;
1.40 daniel 6095:
1.96 daniel 6096: input = xmlNewInputStream(ctxt);
6097: if (input == NULL) {
6098: perror("malloc");
6099: free(ctxt);
6100: exit(1);
6101: }
6102:
6103: xmlClearParserCtxt(ctxt);
6104: if (filename != NULL)
6105: input->filename = strdup(filename);
6106: input->base = buffer;
6107: input->cur = buffer;
6108: inputPush(ctxt, input);
1.17 daniel 6109: }
6110:
1.32 daniel 6111:
1.98 daniel 6112: /************************************************************************
6113: * *
6114: * Miscelaneous *
6115: * *
6116: ************************************************************************/
6117:
6118:
1.50 daniel 6119: /**
6120: * xmlParserFindNodeInfo:
6121: * @ctxt: an XML parser context
6122: * @node: an XML node within the tree
6123: *
6124: * Find the parser node info struct for a given node
6125: *
1.68 daniel 6126: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 6127: */
6128: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
6129: const xmlNode* node)
6130: {
6131: unsigned long pos;
6132:
6133: /* Find position where node should be at */
6134: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
6135: if ( ctx->node_seq.buffer[pos].node == node )
6136: return &ctx->node_seq.buffer[pos];
6137: else
6138: return NULL;
6139: }
6140:
6141:
1.50 daniel 6142: /**
6143: * xmlInitNodeInfoSeq :
6144: * @seq: a node info sequence pointer
6145: *
6146: * -- Initialize (set to initial state) node info sequence
1.32 daniel 6147: */
1.55 daniel 6148: void
6149: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6150: {
6151: seq->length = 0;
6152: seq->maximum = 0;
6153: seq->buffer = NULL;
6154: }
6155:
1.50 daniel 6156: /**
6157: * xmlClearNodeInfoSeq :
6158: * @seq: a node info sequence pointer
6159: *
6160: * -- Clear (release memory and reinitialize) node
1.32 daniel 6161: * info sequence
6162: */
1.55 daniel 6163: void
6164: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6165: {
6166: if ( seq->buffer != NULL )
6167: free(seq->buffer);
6168: xmlInitNodeInfoSeq(seq);
6169: }
6170:
6171:
1.50 daniel 6172: /**
6173: * xmlParserFindNodeInfoIndex:
6174: * @seq: a node info sequence pointer
6175: * @node: an XML node pointer
6176: *
6177: *
1.32 daniel 6178: * xmlParserFindNodeInfoIndex : Find the index that the info record for
6179: * the given node is or should be at in a sorted sequence
1.68 daniel 6180: *
6181: * Returns a long indicating the position of the record
1.32 daniel 6182: */
6183: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
6184: const xmlNode* node)
6185: {
6186: unsigned long upper, lower, middle;
6187: int found = 0;
6188:
6189: /* Do a binary search for the key */
6190: lower = 1;
6191: upper = seq->length;
6192: middle = 0;
6193: while ( lower <= upper && !found) {
6194: middle = lower + (upper - lower) / 2;
6195: if ( node == seq->buffer[middle - 1].node )
6196: found = 1;
6197: else if ( node < seq->buffer[middle - 1].node )
6198: upper = middle - 1;
6199: else
6200: lower = middle + 1;
6201: }
6202:
6203: /* Return position */
6204: if ( middle == 0 || seq->buffer[middle - 1].node < node )
6205: return middle;
6206: else
6207: return middle - 1;
6208: }
6209:
6210:
1.50 daniel 6211: /**
6212: * xmlParserAddNodeInfo:
6213: * @ctxt: an XML parser context
1.68 daniel 6214: * @info: a node info sequence pointer
1.50 daniel 6215: *
6216: * Insert node info record into the sorted sequence
1.32 daniel 6217: */
1.55 daniel 6218: void
6219: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 6220: const xmlParserNodeInfo* info)
1.32 daniel 6221: {
6222: unsigned long pos;
6223: static unsigned int block_size = 5;
6224:
6225: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 6226: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
6227: if ( pos < ctxt->node_seq.length
6228: && ctxt->node_seq.buffer[pos].node == info->node ) {
6229: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 6230: }
6231:
6232: /* Otherwise, we need to add new node to buffer */
6233: else {
6234: /* Expand buffer by 5 if needed */
1.55 daniel 6235: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 6236: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 6237: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
6238: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 6239:
1.55 daniel 6240: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 6241: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
6242: else
1.55 daniel 6243: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 6244:
6245: if ( tmp_buffer == NULL ) {
1.55 daniel 6246: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6247: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.32 daniel 6248: return;
6249: }
1.55 daniel 6250: ctxt->node_seq.buffer = tmp_buffer;
6251: ctxt->node_seq.maximum += block_size;
1.32 daniel 6252: }
6253:
6254: /* If position is not at end, move elements out of the way */
1.55 daniel 6255: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 6256: unsigned long i;
6257:
1.55 daniel 6258: for ( i = ctxt->node_seq.length; i > pos; i-- )
6259: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 6260: }
6261:
6262: /* Copy element and increase length */
1.55 daniel 6263: ctxt->node_seq.buffer[pos] = *info;
6264: ctxt->node_seq.length++;
1.32 daniel 6265: }
6266: }
1.77 daniel 6267:
1.98 daniel 6268:
6269: /**
6270: * xmlSubstituteEntitiesDefault :
6271: * @val: int 0 or 1
6272: *
6273: * Set and return the previous value for default entity support.
6274: * Initially the parser always keep entity references instead of substituting
6275: * entity values in the output. This function has to be used to change the
6276: * default parser behaviour
6277: * SAX::subtituteEntities() has to be used for changing that on a file by
6278: * file basis.
6279: *
6280: * Returns the last value for 0 for no substitution, 1 for substitution.
6281: */
6282:
6283: int
6284: xmlSubstituteEntitiesDefault(int val) {
6285: int old = xmlSubstituteEntitiesDefaultValue;
6286:
6287: xmlSubstituteEntitiesDefaultValue = val;
6288: return(old);
6289: }
1.77 daniel 6290:
Webmaster