Annotation of XML/parser.c, revision 1.104
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.75 daniel 33: #include "encoding.h"
1.61 daniel 34: #include "valid.h"
1.69 daniel 35: #include "parserInternals.h"
1.91 daniel 36: #include "xmlIO.h"
1.1 veillard 37:
1.86 daniel 38: const char *xmlParserVersion = LIBXML_VERSION;
39:
1.91 daniel 40: #define XML_MAX_NAMELEN 1000
41:
42: /************************************************************************
43: * *
44: * Input handling functions for progressive parsing *
45: * *
46: ************************************************************************/
47:
48: /* #define DEBUG_INPUT */
49:
50: #define INPUT_CHUNK 50
51:
52: #ifdef DEBUG_INPUT
53: #define CHECK_BUFFER(in) check_buffer(in)
54: #else
55: #define CHECK_BUFFER(in)
56: #endif
57:
58: void check_buffer(xmlParserInputPtr in) {
59: if (in->base != in->buf->buffer->content) {
60: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
61: }
62: if (in->cur < in->base) {
63: fprintf(stderr, "xmlParserInput: cur < base problem\n");
64: }
65: if (in->cur > in->base + in->buf->buffer->use) {
66: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
67: }
68: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
69: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
70: in->buf->buffer->use, in->buf->buffer->size);
71: }
72:
73:
74: /**
75: * xmlParserInputRead:
76: * @in: an XML parser input
77: * @len: an indicative size for the lookahead
78: *
79: * This function refresh the input for the parser. It doesn't try to
80: * preserve pointers to the input buffer, and discard already read data
81: *
82: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
83: * end of this entity
84: */
85: int
86: xmlParserInputRead(xmlParserInputPtr in, int len) {
87: int ret;
88: int used;
89: int index;
90:
91: #ifdef DEBUG_INPUT
92: fprintf(stderr, "Read\n");
93: #endif
94: if (in->buf == NULL) return(-1);
95: if (in->base == NULL) return(-1);
96: if (in->cur == NULL) return(-1);
97: if (in->buf->buffer == NULL) return(-1);
98:
99: CHECK_BUFFER(in);
100:
101: used = in->cur - in->buf->buffer->content;
102: ret = xmlBufferShrink(in->buf->buffer, used);
103: if (ret > 0) {
104: in->cur -= ret;
105: in->consumed += ret;
106: }
107: ret = xmlParserInputBufferRead(in->buf, len);
108: if (in->base != in->buf->buffer->content) {
109: /*
110: * the buffer has been realloced
111: */
112: index = in->cur - in->base;
113: in->base = in->buf->buffer->content;
114: in->cur = &in->buf->buffer->content[index];
115: }
116:
117: CHECK_BUFFER(in);
118:
119: return(ret);
120: }
121:
122: /**
123: * xmlParserInputGrow:
124: * @in: an XML parser input
125: * @len: an indicative size for the lookahead
126: *
127: * This function increase the input for the parser. It tries to
128: * preserve pointers to the input buffer, and keep already read data
129: *
130: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
131: * end of this entity
132: */
133: int
134: xmlParserInputGrow(xmlParserInputPtr in, int len) {
135: int ret;
136: int index;
137:
138: #ifdef DEBUG_INPUT
139: fprintf(stderr, "Grow\n");
140: #endif
141: if (in->buf == NULL) return(-1);
142: if (in->base == NULL) return(-1);
143: if (in->cur == NULL) return(-1);
144: if (in->buf->buffer == NULL) return(-1);
145:
146: CHECK_BUFFER(in);
147:
148: index = in->cur - in->base;
149: if (in->buf->buffer->use > index + INPUT_CHUNK) {
150:
151: CHECK_BUFFER(in);
152:
153: return(0);
154: }
155: ret = xmlParserInputBufferGrow(in->buf, len);
156: if (in->base != in->buf->buffer->content) {
157: /*
158: * the buffer has been realloced
159: */
160: index = in->cur - in->base;
161: in->base = in->buf->buffer->content;
162: in->cur = &in->buf->buffer->content[index];
163: }
164:
165: CHECK_BUFFER(in);
166:
167: return(ret);
168: }
169:
170: /**
171: * xmlParserInputShrink:
172: * @in: an XML parser input
173: *
174: * This function removes used input for the parser.
175: */
176: void
177: xmlParserInputShrink(xmlParserInputPtr in) {
178: int used;
179: int ret;
180: int index;
181:
182: #ifdef DEBUG_INPUT
183: fprintf(stderr, "Shrink\n");
184: #endif
185: if (in->buf == NULL) return;
186: if (in->base == NULL) return;
187: if (in->cur == NULL) return;
188: if (in->buf->buffer == NULL) return;
189:
190: CHECK_BUFFER(in);
191:
192: used = in->cur - in->buf->buffer->content;
193: if (used > INPUT_CHUNK) {
194: ret = xmlBufferShrink(in->buf->buffer, used);
195: if (ret > 0) {
196: in->cur -= ret;
197: in->consumed += ret;
198: }
199: }
200:
201: CHECK_BUFFER(in);
202:
203: if (in->buf->buffer->use > INPUT_CHUNK) {
204: return;
205: }
206: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
207: if (in->base != in->buf->buffer->content) {
208: /*
209: * the buffer has been realloced
210: */
211: index = in->cur - in->base;
212: in->base = in->buf->buffer->content;
213: in->cur = &in->buf->buffer->content[index];
214: }
215:
216: CHECK_BUFFER(in);
217: }
218:
1.45 daniel 219: /************************************************************************
220: * *
221: * Parser stacks related functions and macros *
222: * *
223: ************************************************************************/
1.79 daniel 224:
225: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 226: int xmlDoValidityCheckingDefaultValue = 0;
1.79 daniel 227:
1.1 veillard 228: /*
1.40 daniel 229: * Generic function for accessing stacks in the Parser Context
1.1 veillard 230: */
231:
1.31 daniel 232: #define PUSH_AND_POP(type, name) \
1.72 daniel 233: extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 234: if (ctxt->name##Nr >= ctxt->name##Max) { \
235: ctxt->name##Max *= 2; \
1.40 daniel 236: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
237: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
238: if (ctxt->name##Tab == NULL) { \
1.31 daniel 239: fprintf(stderr, "realloc failed !\n"); \
240: exit(1); \
241: } \
242: } \
1.40 daniel 243: ctxt->name##Tab[ctxt->name##Nr] = value; \
244: ctxt->name = value; \
245: return(ctxt->name##Nr++); \
1.31 daniel 246: } \
1.72 daniel 247: extern type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 248: type ret; \
1.40 daniel 249: if (ctxt->name##Nr <= 0) return(0); \
250: ctxt->name##Nr--; \
1.50 daniel 251: if (ctxt->name##Nr > 0) \
252: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
253: else \
254: ctxt->name = NULL; \
1.69 daniel 255: ret = ctxt->name##Tab[ctxt->name##Nr]; \
256: ctxt->name##Tab[ctxt->name##Nr] = 0; \
257: return(ret); \
1.31 daniel 258: } \
259:
1.40 daniel 260: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 261: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 262:
1.55 daniel 263: /*
264: * Macros for accessing the content. Those should be used only by the parser,
265: * and not exported.
266: *
267: * Dirty macros, i.e. one need to make assumption on the context to use them
268: *
269: * CUR_PTR return the current pointer to the CHAR to be parsed.
270: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
271: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
272: * in UNICODE mode. This should be used internally by the parser
273: * only to compare to ASCII values otherwise it would break when
274: * running with UTF-8 encoding.
275: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
276: * to compare on ASCII based substring.
277: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
278: * strings within the parser.
279: *
1.77 daniel 280: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 281: *
282: * CURRENT Returns the current char value, with the full decoding of
283: * UTF-8 if we are using this mode. It returns an int.
284: * NEXT Skip to the next character, this does the proper decoding
285: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 286: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.55 daniel 287: */
1.45 daniel 288:
1.97 daniel 289: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 290: #define SKIP(val) ctxt->input->cur += (val)
291: #define NXT(val) ctxt->input->cur[(val)]
292: #define CUR_PTR ctxt->input->cur
1.97 daniel 293: #define SHRINK xmlParserInputShrink(ctxt->input); \
294: if ((*ctxt->input->cur == 0) && \
295: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
296: xmlPopInput(ctxt)
297:
298: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
299: if ((*ctxt->input->cur == 0) && \
300: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
301: xmlPopInput(ctxt)
1.55 daniel 302:
303: #define SKIP_BLANKS \
1.101 daniel 304: do { \
305: while (IS_BLANK(CUR)) NEXT; \
306: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
307: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
308: } while (IS_BLANK(CUR));
1.55 daniel 309:
310: #ifndef USE_UTF_8
311: #define CURRENT (*ctxt->input->cur)
1.91 daniel 312: #define NEXT { \
1.97 daniel 313: if (ctxt->token != 0) ctxt->token = 0; \
314: else { \
1.91 daniel 315: if ((*ctxt->input->cur == 0) && \
316: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
317: xmlPopInput(ctxt); \
318: } else { \
319: if (*(ctxt->input->cur) == '\n') { \
320: ctxt->input->line++; ctxt->input->col = 1; \
321: } else ctxt->input->col++; \
322: ctxt->input->cur++; \
323: if (*ctxt->input->cur == 0) \
324: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.96 daniel 325: } \
326: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
327: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
1.97 daniel 328: }}
1.91 daniel 329:
1.55 daniel 330: #else
331: #endif
1.42 daniel 332:
1.97 daniel 333: /************************************************************************
334: * *
335: * Commodity functions to handle entities processing *
336: * *
337: ************************************************************************/
1.40 daniel 338:
1.50 daniel 339: /**
340: * xmlPopInput:
341: * @ctxt: an XML parser context
342: *
1.40 daniel 343: * xmlPopInput: the current input pointed by ctxt->input came to an end
344: * pop it and return the next char.
1.45 daniel 345: *
1.68 daniel 346: * Returns the current CHAR in the parser context
1.40 daniel 347: */
1.55 daniel 348: CHAR
349: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 350: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 351: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 352: if ((*ctxt->input->cur == 0) &&
353: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
354: return(xmlPopInput(ctxt));
1.40 daniel 355: return(CUR);
356: }
357:
1.50 daniel 358: /**
359: * xmlPushInput:
360: * @ctxt: an XML parser context
361: * @input: an XML parser input fragment (entity, XML fragment ...).
362: *
1.40 daniel 363: * xmlPushInput: switch to a new input stream which is stacked on top
364: * of the previous one(s).
365: */
1.55 daniel 366: void
367: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 368: if (input == NULL) return;
369: inputPush(ctxt, input);
370: }
371:
1.50 daniel 372: /**
1.69 daniel 373: * xmlFreeInputStream:
1.101 daniel 374: * @input: an xmlP arserInputPtr
1.69 daniel 375: *
376: * Free up an input stream.
377: */
378: void
379: xmlFreeInputStream(xmlParserInputPtr input) {
380: if (input == NULL) return;
381:
382: if (input->filename != NULL) free((char *) input->filename);
1.94 daniel 383: if (input->directory != NULL) free((char *) input->directory);
1.69 daniel 384: if ((input->free != NULL) && (input->base != NULL))
385: input->free((char *) input->base);
1.93 veillard 386: if (input->buf != NULL)
387: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 388: memset(input, -1, sizeof(xmlParserInput));
389: free(input);
390: }
391:
392: /**
1.96 daniel 393: * xmlNewInputStream:
394: * @ctxt: an XML parser context
395: *
396: * Create a new input stream structure
397: * Returns the new input stream or NULL
398: */
399: xmlParserInputPtr
400: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
401: xmlParserInputPtr input;
402:
403: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
404: if (input == NULL) {
405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
406: ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
407: return(NULL);
408: }
409: input->filename = NULL;
410: input->directory = NULL;
411: input->base = NULL;
412: input->cur = NULL;
413: input->buf = NULL;
414: input->line = 1;
415: input->col = 1;
416: input->buf = NULL;
417: input->free = NULL;
418: input->consumed = 0;
419: return(input);
420: }
421:
422: /**
1.50 daniel 423: * xmlNewEntityInputStream:
424: * @ctxt: an XML parser context
425: * @entity: an Entity pointer
426: *
1.82 daniel 427: * Create a new input stream based on an xmlEntityPtr
1.68 daniel 428: * Returns the new input stream
1.45 daniel 429: */
1.50 daniel 430: xmlParserInputPtr
431: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 432: xmlParserInputPtr input;
433:
434: if (entity == NULL) {
1.55 daniel 435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 436: ctxt->sax->error(ctxt->userData,
1.45 daniel 437: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 438: return(NULL);
1.45 daniel 439: }
440: if (entity->content == NULL) {
1.55 daniel 441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 442: ctxt->sax->error(ctxt->userData,
1.45 daniel 443: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 444: return(NULL);
1.45 daniel 445: }
1.96 daniel 446: input = xmlNewInputStream(ctxt);
1.45 daniel 447: if (input == NULL) {
1.50 daniel 448: return(NULL);
1.45 daniel 449: }
450: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
451: input->base = entity->content;
452: input->cur = entity->content;
1.50 daniel 453: return(input);
1.45 daniel 454: }
455:
1.59 daniel 456: /**
457: * xmlNewStringInputStream:
458: * @ctxt: an XML parser context
1.96 daniel 459: * @buffer: an memory buffer
1.59 daniel 460: *
461: * Create a new input stream based on a memory buffer.
1.68 daniel 462: * Returns the new input stream
1.59 daniel 463: */
464: xmlParserInputPtr
1.96 daniel 465: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const CHAR *buffer) {
1.59 daniel 466: xmlParserInputPtr input;
467:
1.96 daniel 468: if (buffer == NULL) {
1.59 daniel 469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 470: ctxt->sax->error(ctxt->userData,
1.59 daniel 471: "internal: xmlNewStringInputStream string = NULL\n");
472: return(NULL);
473: }
1.96 daniel 474: input = xmlNewInputStream(ctxt);
1.59 daniel 475: if (input == NULL) {
476: return(NULL);
477: }
1.96 daniel 478: input->base = buffer;
479: input->cur = buffer;
1.59 daniel 480: return(input);
481: }
482:
1.76 daniel 483: /**
484: * xmlNewInputFromFile:
485: * @ctxt: an XML parser context
486: * @filename: the filename to use as entity
487: *
488: * Create a new input stream based on a file.
489: *
490: * Returns the new input stream or NULL in case of error
491: */
492: xmlParserInputPtr
1.79 daniel 493: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 494: xmlParserInputBufferPtr buf;
1.76 daniel 495: xmlParserInputPtr inputStream;
1.94 daniel 496: const char *directory = NULL;
1.76 daniel 497:
1.96 daniel 498: if (ctxt == NULL) return(NULL);
1.91 daniel 499: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 500: if (buf == NULL) {
501: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
502: char name[1024];
503: #ifdef WIN32
504: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
505: #else
506: sprintf(name, "%s/%s", ctxt->input->directory, filename);
507: #endif
508: buf = xmlParserInputBufferCreateFilename(name,
509: XML_CHAR_ENCODING_NONE);
510: if (buf == NULL)
511: return(NULL);
512: directory = strdup(ctxt->input->directory);
513: } else
514: return(NULL);
515: }
516: if (directory == NULL)
517: directory = xmlParserGetDirectory(filename);
1.76 daniel 518:
1.96 daniel 519: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 520: if (inputStream == NULL) {
1.96 daniel 521: if (directory != NULL) free((char *) directory);
1.76 daniel 522: return(NULL);
523: }
524:
525: inputStream->filename = strdup(filename);
1.94 daniel 526: inputStream->directory = directory;
1.91 daniel 527: inputStream->buf = buf;
1.76 daniel 528:
1.91 daniel 529: inputStream->base = inputStream->buf->buffer->content;
530: inputStream->cur = inputStream->buf->buffer->content;
1.76 daniel 531: return(inputStream);
532: }
533:
1.77 daniel 534: /************************************************************************
535: * *
1.97 daniel 536: * Commodity functions to handle parser contexts *
537: * *
538: ************************************************************************/
539:
540: /**
541: * xmlInitParserCtxt:
542: * @ctxt: an XML parser context
543: *
544: * Initialize a parser context
545: */
546:
547: void
548: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
549: {
550: xmlSAXHandler *sax;
551:
552: sax = (xmlSAXHandler *) malloc(sizeof(xmlSAXHandler));
553: if (sax == NULL) {
554: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
555: }
556:
557: /* Allocate the Input stack */
558: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
559: ctxt->inputNr = 0;
560: ctxt->inputMax = 5;
561: ctxt->input = NULL;
562: ctxt->version = NULL;
563: ctxt->encoding = NULL;
564: ctxt->standalone = -1;
1.98 daniel 565: ctxt->hasExternalSubset = 0;
566: ctxt->hasPErefs = 0;
1.97 daniel 567: ctxt->html = 0;
1.98 daniel 568: ctxt->external = 0;
1.97 daniel 569: ctxt->instate = XML_PARSER_PROLOG;
570: ctxt->token = 0;
571:
572: /* Allocate the Node stack */
573: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
574: ctxt->nodeNr = 0;
575: ctxt->nodeMax = 10;
576: ctxt->node = NULL;
577:
578: if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
579: else {
580: ctxt->sax = sax;
581: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
582: }
583: ctxt->userData = ctxt;
584: ctxt->myDoc = NULL;
585: ctxt->wellFormed = 1;
1.99 daniel 586: ctxt->valid = 1;
1.100 daniel 587: ctxt->validate = xmlDoValidityCheckingDefaultValue;
588: ctxt->vctxt.userData = ctxt;
589: ctxt->vctxt.error = xmlParserValidityError;
590: ctxt->vctxt.warning = xmlParserValidityWarning;
1.97 daniel 591: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
592: ctxt->record_info = 0;
593: xmlInitNodeInfoSeq(&ctxt->node_seq);
594: }
595:
596: /**
597: * xmlFreeParserCtxt:
598: * @ctxt: an XML parser context
599: *
600: * Free all the memory used by a parser context. However the parsed
601: * document in ctxt->myDoc is not freed.
602: */
603:
604: void
605: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
606: {
607: xmlParserInputPtr input;
608:
609: if (ctxt == NULL) return;
610:
611: while ((input = inputPop(ctxt)) != NULL) {
612: xmlFreeInputStream(input);
613: }
614:
615: if (ctxt->nodeTab != NULL) free(ctxt->nodeTab);
616: if (ctxt->inputTab != NULL) free(ctxt->inputTab);
617: if (ctxt->version != NULL) free((char *) ctxt->version);
618: if (ctxt->encoding != NULL) free((char *) ctxt->encoding);
619: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
620: free(ctxt->sax);
621: free(ctxt);
622: }
623:
624: /**
625: * xmlNewParserCtxt:
626: *
627: * Allocate and initialize a new parser context.
628: *
629: * Returns the xmlParserCtxtPtr or NULL
630: */
631:
632: xmlParserCtxtPtr
633: xmlNewParserCtxt()
634: {
635: xmlParserCtxtPtr ctxt;
636:
637: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
638: if (ctxt == NULL) {
639: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
640: perror("malloc");
641: return(NULL);
642: }
643: xmlInitParserCtxt(ctxt);
644: return(ctxt);
645: }
646:
647: /**
648: * xmlClearParserCtxt:
649: * @ctxt: an XML parser context
650: *
651: * Clear (release owned resources) and reinitialize a parser context
652: */
653:
654: void
655: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
656: {
657: xmlClearNodeInfoSeq(&ctxt->node_seq);
658: xmlInitParserCtxt(ctxt);
659: }
660:
661: /************************************************************************
662: * *
1.77 daniel 663: * Commodity functions to handle entities *
664: * *
665: ************************************************************************/
666:
1.97 daniel 667: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
668: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
669:
670: /**
671: * xmlParseCharRef:
672: * @ctxt: an XML parser context
673: *
674: * parse Reference declarations
675: *
676: * [66] CharRef ::= '&#' [0-9]+ ';' |
677: * '&#x' [0-9a-fA-F]+ ';'
678: *
1.98 daniel 679: * [ WFC: Legal Character ]
680: * Characters referred to using character references must match the
681: * production for Char.
682: *
1.97 daniel 683: * Returns the value parsed (as an int)
1.77 daniel 684: */
1.97 daniel 685: int
686: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
687: int val = 0;
688:
689: if ((CUR == '&') && (NXT(1) == '#') &&
690: (NXT(2) == 'x')) {
691: SKIP(3);
692: while (CUR != ';') {
693: if ((CUR >= '0') && (CUR <= '9'))
694: val = val * 16 + (CUR - '0');
695: else if ((CUR >= 'a') && (CUR <= 'f'))
696: val = val * 16 + (CUR - 'a') + 10;
697: else if ((CUR >= 'A') && (CUR <= 'F'))
698: val = val * 16 + (CUR - 'A') + 10;
699: else {
700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
701: ctxt->sax->error(ctxt->userData,
702: "xmlParseCharRef: invalid hexadecimal value\n");
703: ctxt->wellFormed = 0;
704: val = 0;
705: break;
706: }
707: NEXT;
708: }
709: if (CUR == ';')
710: NEXT;
711: } else if ((CUR == '&') && (NXT(1) == '#')) {
712: SKIP(2);
713: while (CUR != ';') {
714: if ((CUR >= '0') && (CUR <= '9'))
715: val = val * 10 + (CUR - '0');
716: else {
717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
718: ctxt->sax->error(ctxt->userData,
719: "xmlParseCharRef: invalid decimal value\n");
720: ctxt->wellFormed = 0;
721: val = 0;
722: break;
723: }
724: NEXT;
725: }
726: if (CUR == ';')
727: NEXT;
728: } else {
729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 730: ctxt->sax->error(ctxt->userData,
731: "xmlParseCharRef: invalid value\n");
1.97 daniel 732: ctxt->wellFormed = 0;
733: }
1.98 daniel 734:
1.97 daniel 735: /*
1.98 daniel 736: * [ WFC: Legal Character ]
737: * Characters referred to using character references must match the
738: * production for Char.
1.97 daniel 739: */
740: if (IS_CHAR(val)) {
741: return(val);
742: } else {
743: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 744: ctxt->sax->error(ctxt->userData, "CharRef: invalid CHAR value %d\n",
1.97 daniel 745: val);
746: ctxt->wellFormed = 0;
747: }
748: return(0);
1.77 daniel 749: }
750:
1.96 daniel 751: /**
752: * xmlParserHandleReference:
753: * @ctxt: the parser context
754: *
1.97 daniel 755: * [67] Reference ::= EntityRef | CharRef
756: *
1.96 daniel 757: * [68] EntityRef ::= '&' Name ';'
758: *
1.98 daniel 759: * [ WFC: Entity Declared ]
760: * the Name given in the entity reference must match that in an entity
761: * declaration, except that well-formed documents need not declare any
762: * of the following entities: amp, lt, gt, apos, quot.
763: *
764: * [ WFC: Parsed Entity ]
765: * An entity reference must not contain the name of an unparsed entity
766: *
1.97 daniel 767: * [66] CharRef ::= '&#' [0-9]+ ';' |
768: * '&#x' [0-9a-fA-F]+ ';'
769: *
1.96 daniel 770: * A PEReference may have been detectect in the current input stream
771: * the handling is done accordingly to
772: * http://www.w3.org/TR/REC-xml#entproc
773: */
774: void
775: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 776: xmlParserInputPtr input;
777: CHAR *name;
778: xmlEntityPtr ent = NULL;
779:
780: if (CUR != '&') return;
781: GROW;
782: if ((CUR == '&') && (NXT(1) == '#')) {
783: switch(ctxt->instate) {
784: case XML_PARSER_COMMENT:
785: return;
786: case XML_PARSER_EOF:
787: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
788: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
789: ctxt->wellFormed = 0;
790: return;
791: case XML_PARSER_PROLOG:
792: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
793: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
794: ctxt->wellFormed = 0;
795: return;
796: case XML_PARSER_EPILOG:
797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
798: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
799: ctxt->wellFormed = 0;
800: return;
801: case XML_PARSER_DTD:
802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
803: ctxt->sax->error(ctxt->userData,
804: "CharRef are forbiden in DTDs!\n");
805: ctxt->wellFormed = 0;
806: return;
807: case XML_PARSER_ENTITY_DECL:
808: /* we just ignore it there */
809: return;
810: case XML_PARSER_ENTITY_VALUE:
811: /*
812: * NOTE: in the case of entity values, we don't do the
813: * substitution here since we need the litteral
814: * entity value to be able to save the internal
815: * subset of the document.
816: * This will be handled by xmlDecodeEntities
817: */
818: return;
819: case XML_PARSER_CONTENT:
820: case XML_PARSER_ATTRIBUTE_VALUE:
821: /* TODO this may not be Ok for UTF-8, multibyte sequence */
822: ctxt->token = xmlParseCharRef(ctxt);
823: return;
824: }
825: return;
826: }
827:
828: switch(ctxt->instate) {
829: case XML_PARSER_COMMENT:
830: return;
831: case XML_PARSER_EOF:
832: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
833: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
834: ctxt->wellFormed = 0;
835: return;
836: case XML_PARSER_PROLOG:
837: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
838: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
839: ctxt->wellFormed = 0;
840: return;
841: case XML_PARSER_EPILOG:
842: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
843: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
844: ctxt->wellFormed = 0;
845: return;
846: case XML_PARSER_ENTITY_VALUE:
847: /*
848: * NOTE: in the case of entity values, we don't do the
849: * substitution here since we need the litteral
850: * entity value to be able to save the internal
851: * subset of the document.
852: * This will be handled by xmlDecodeEntities
853: */
854: return;
855: case XML_PARSER_ATTRIBUTE_VALUE:
856: /*
857: * NOTE: in the case of attributes values, we don't do the
858: * substitution here unless we are in a mode where
859: * the parser is explicitely asked to substitute
860: * entities. The SAX callback is called with values
861: * without entity substitution.
862: * This will then be handled by xmlDecodeEntities
863: */
864: if (ctxt->replaceEntities == 0) return;
865: break;
866: case XML_PARSER_ENTITY_DECL:
867: /*
868: * we just ignore it there
869: * the substitution will be done once the entity is referenced
870: */
871: return;
872: case XML_PARSER_DTD:
873: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
874: ctxt->sax->error(ctxt->userData,
875: "Entity references are forbiden in DTDs!\n");
876: ctxt->wellFormed = 0;
877: return;
878: case XML_PARSER_CONTENT:
879: /*
880: * NOTE: in the case of attributes values, we don't do the
881: * substitution here unless we are in a mode where
882: * the parser is explicitely asked to substitute
883: * entities. The SAX callback is called with values
884: * without entity substitution.
885: * This will then be handled by xmlDecodeEntities
886: */
887: if (ctxt->replaceEntities == 0) return;
888: break;
889: }
890:
891: NEXT;
892: name = xmlScanName(ctxt);
893: if (name == NULL) {
894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
895: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
896: ctxt->wellFormed = 0;
897: ctxt->token = '&';
898: return;
899: }
900: if (NXT(xmlStrlen(name)) != ';') {
901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
902: ctxt->sax->error(ctxt->userData,
903: "Entity reference: ';' expected\n");
904: ctxt->wellFormed = 0;
905: ctxt->token = '&';
906: return;
907: }
908: SKIP(xmlStrlen(name) + 1);
909: if (ctxt->sax != NULL) {
910: if (ctxt->sax->getEntity != NULL)
911: ent = ctxt->sax->getEntity(ctxt->userData, name);
912: }
1.98 daniel 913:
914: /*
915: * [ WFC: Entity Declared ]
916: * the Name given in the entity reference must match that in an entity
917: * declaration, except that well-formed documents need not declare any
918: * of the following entities: amp, lt, gt, apos, quot.
919: */
1.97 daniel 920: if (ent == NULL)
921: ent = xmlGetPredefinedEntity(name);
922: if (ent == NULL) {
923: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
924: ctxt->sax->error(ctxt->userData,
1.98 daniel 925: "Entity reference: entity %s not declared\n",
926: name);
1.97 daniel 927: ctxt->wellFormed = 0;
928: return;
929: }
1.98 daniel 930:
931: /*
932: * [ WFC: Parsed Entity ]
933: * An entity reference must not contain the name of an unparsed entity
934: */
935: if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
936: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
937: ctxt->sax->error(ctxt->userData,
938: "Entity reference to unparsed entity %s\n", name);
939: ctxt->wellFormed = 0;
940: }
941:
1.97 daniel 942: if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
943: ctxt->token = ent->content[0];
944: return;
945: }
946: input = xmlNewEntityInputStream(ctxt, ent);
947: xmlPushInput(ctxt, input);
1.96 daniel 948: return;
949: }
950:
951: /**
952: * xmlParserHandlePEReference:
953: * @ctxt: the parser context
954: *
955: * [69] PEReference ::= '%' Name ';'
956: *
1.98 daniel 957: * [ WFC: No Recursion ]
958: * TODO A parsed entity must not contain a recursive
959: * reference to itself, either directly or indirectly.
960: *
961: * [ WFC: Entity Declared ]
962: * In a document without any DTD, a document with only an internal DTD
963: * subset which contains no parameter entity references, or a document
964: * with "standalone='yes'", ... ... The declaration of a parameter
965: * entity must precede any reference to it...
966: *
967: * [ VC: Entity Declared ]
968: * In a document with an external subset or external parameter entities
969: * with "standalone='no'", ... ... The declaration of a parameter entity
970: * must precede any reference to it...
971: *
972: * [ WFC: In DTD ]
973: * Parameter-entity references may only appear in the DTD.
974: * NOTE: misleading but this is handled.
975: *
976: * A PEReference may have been detected in the current input stream
1.96 daniel 977: * the handling is done accordingly to
978: * http://www.w3.org/TR/REC-xml#entproc
979: * i.e.
980: * - Included in literal in entity values
981: * - Included as Paraemeter Entity reference within DTDs
982: */
983: void
984: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
985: CHAR *name;
986: xmlEntityPtr entity = NULL;
987: xmlParserInputPtr input;
988:
989: switch(ctxt->instate) {
1.97 daniel 990: case XML_PARSER_COMMENT:
991: return;
1.96 daniel 992: case XML_PARSER_EOF:
993: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
994: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
995: ctxt->wellFormed = 0;
996: return;
997: case XML_PARSER_PROLOG:
998: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
999: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1000: ctxt->wellFormed = 0;
1001: return;
1.97 daniel 1002: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1003: case XML_PARSER_CONTENT:
1004: case XML_PARSER_ATTRIBUTE_VALUE:
1005: /* we just ignore it there */
1006: return;
1007: case XML_PARSER_EPILOG:
1008: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1009: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1010: ctxt->wellFormed = 0;
1011: return;
1.97 daniel 1012: case XML_PARSER_ENTITY_VALUE:
1013: /*
1014: * NOTE: in the case of entity values, we don't do the
1015: * substitution here since we need the litteral
1016: * entity value to be able to save the internal
1017: * subset of the document.
1018: * This will be handled by xmlDecodeEntities
1019: */
1020: return;
1.96 daniel 1021: case XML_PARSER_DTD:
1.98 daniel 1022: /*
1023: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1024: * In the internal DTD subset, parameter-entity references
1025: * can occur only where markup declarations can occur, not
1026: * within markup declarations.
1027: * In that case this is handled in xmlParseMarkupDecl
1028: */
1029: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1030: return;
1.96 daniel 1031: }
1032:
1033: NEXT;
1034: name = xmlParseName(ctxt);
1035: if (name == NULL) {
1036: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1037: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1038: ctxt->wellFormed = 0;
1039: } else {
1040: if (CUR == ';') {
1041: NEXT;
1.98 daniel 1042: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1043: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1044: if (entity == NULL) {
1.98 daniel 1045:
1046: /*
1047: * [ WFC: Entity Declared ]
1048: * In a document without any DTD, a document with only an
1049: * internal DTD subset which contains no parameter entity
1050: * references, or a document with "standalone='yes'", ...
1051: * ... The declaration of a parameter entity must precede
1052: * any reference to it...
1053: */
1054: if ((ctxt->standalone == 1) ||
1055: ((ctxt->hasExternalSubset == 0) &&
1056: (ctxt->hasPErefs == 0))) {
1057: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1058: ctxt->sax->error(ctxt->userData,
1059: "PEReference: %%%s; not found\n", name);
1060: ctxt->wellFormed = 0;
1061: } else {
1062: /*
1063: * [ VC: Entity Declared ]
1064: * In a document with an external subset or external
1065: * parameter entities with "standalone='no'", ...
1066: * ... The declaration of a parameter entity must precede
1067: * any reference to it...
1068: */
1069: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1070: ctxt->sax->warning(ctxt->userData,
1071: "PEReference: %%%s; not found\n", name);
1072: ctxt->valid = 0;
1073: }
1.96 daniel 1074: } else {
1075: if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
1076: (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
1077: /*
1078: * TODO !!!! handle the extra spaces added before and after
1079: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1080: * TODO !!!! Avoid quote processing in parameters value
1081: * c.f. http://www.w3.org/TR/REC-xml#inliteral
1082: */
1083: input = xmlNewEntityInputStream(ctxt, entity);
1084: xmlPushInput(ctxt, input);
1085: } else {
1086: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1087: ctxt->sax->error(ctxt->userData,
1088: "xmlHandlePEReference: %s is not a parameter entity\n",
1089: name);
1090: ctxt->wellFormed = 0;
1091: }
1092: }
1093: } else {
1094: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1095: ctxt->sax->error(ctxt->userData,
1096: "xmlHandlePEReference: expecting ';'\n");
1097: ctxt->wellFormed = 0;
1098: }
1.97 daniel 1099: free(name);
1100: }
1101: }
1102:
1103: /*
1104: * Macro used to grow the current buffer.
1105: */
1106: #define growBuffer(buffer) { \
1107: buffer##_size *= 2; \
1108: buffer = (CHAR *) realloc(buffer, buffer##_size * sizeof(CHAR)); \
1109: if (buffer == NULL) { \
1110: perror("realloc failed"); \
1111: exit(1); \
1112: } \
1.96 daniel 1113: }
1.77 daniel 1114:
1115: /**
1116: * xmlDecodeEntities:
1117: * @ctxt: the parser context
1118: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1119: * @len: the len to decode (in bytes !), -1 for no size limit
1120: * @end: an end marker CHAR, 0 if none
1121: * @end2: an end marker CHAR, 0 if none
1122: * @end3: an end marker CHAR, 0 if none
1123: *
1124: * [67] Reference ::= EntityRef | CharRef
1125: *
1126: * [69] PEReference ::= '%' Name ';'
1127: *
1128: * Returns A newly allocated string with the substitution done. The caller
1129: * must deallocate it !
1130: */
1131: CHAR *
1132: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1133: CHAR end, CHAR end2, CHAR end3) {
1134: CHAR *buffer = NULL;
1.78 daniel 1135: int buffer_size = 0;
1.77 daniel 1136: CHAR *out = NULL;
1.78 daniel 1137:
1.97 daniel 1138: CHAR *current = NULL;
1.77 daniel 1139: xmlEntityPtr ent;
1.91 daniel 1140: int nbchars = 0;
1.77 daniel 1141: unsigned int max = (unsigned int) len;
1.97 daniel 1142: CHAR cur;
1.77 daniel 1143:
1144: /*
1145: * allocate a translation buffer.
1146: */
1147: buffer_size = 1000;
1148: buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR));
1149: if (buffer == NULL) {
1150: perror("xmlDecodeEntities: malloc failed");
1151: return(NULL);
1152: }
1153: out = buffer;
1154:
1.78 daniel 1155: /*
1156: * Ok loop until we reach one of the ending char or a size limit.
1157: */
1.97 daniel 1158: cur = CUR;
1159: while ((nbchars < max) && (cur != end) &&
1160: (cur != end2) && (cur != end3)) {
1.77 daniel 1161:
1.98 daniel 1162: if (cur == 0) break;
1163: if ((cur == '&') && (NXT(1) == '#')) {
1164: int val = xmlParseCharRef(ctxt);
1165: *out++ = val;
1166: nbchars += 3;
1167: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1168: ent = xmlParseEntityRef(ctxt);
1169: if ((ent != NULL) &&
1170: (ctxt->replaceEntities != 0)) {
1171: current = ent->content;
1172: while (*current != 0) {
1173: *out++ = *current++;
1174: if (out - buffer > buffer_size - 100) {
1175: int index = out - buffer;
1176:
1177: growBuffer(buffer);
1178: out = &buffer[index];
1.77 daniel 1179: }
1180: }
1.98 daniel 1181: nbchars += 3 + xmlStrlen(ent->name);
1182: } else if (ent != NULL) {
1183: int i = xmlStrlen(ent->name);
1184: const CHAR *cur = ent->name;
1185:
1186: nbchars += i + 2;
1187: *out++ = '&';
1188: if (out - buffer > buffer_size - i - 100) {
1189: int index = out - buffer;
1190:
1191: growBuffer(buffer);
1192: out = &buffer[index];
1193: }
1194: for (;i > 0;i--)
1195: *out++ = *cur++;
1196: *out++ = ';';
1.77 daniel 1197: }
1.97 daniel 1198: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1199: /*
1.77 daniel 1200: * a PEReference induce to switch the entity flow,
1201: * we break here to flush the current set of chars
1202: * parsed if any. We will be called back later.
1.97 daniel 1203: */
1.91 daniel 1204: if (nbchars != 0) break;
1.77 daniel 1205:
1206: xmlParsePEReference(ctxt);
1.79 daniel 1207:
1.97 daniel 1208: /*
1.79 daniel 1209: * Pop-up of finished entities.
1.97 daniel 1210: */
1.79 daniel 1211: while ((CUR == 0) && (ctxt->inputNr > 1))
1212: xmlPopInput(ctxt);
1213:
1.98 daniel 1214: break;
1.77 daniel 1215: } else {
1216: /* TODO: invalid for UTF-8 , use COPY(out); */
1.97 daniel 1217: *out++ = cur;
1.91 daniel 1218: nbchars++;
1.86 daniel 1219: if (out - buffer > buffer_size - 100) {
1220: int index = out - buffer;
1221:
1222: growBuffer(buffer);
1223: out = &buffer[index];
1224: }
1.77 daniel 1225: NEXT;
1226: }
1.97 daniel 1227: cur = CUR;
1.77 daniel 1228: }
1229: *out++ = 0;
1230: return(buffer);
1231: }
1232:
1.1 veillard 1233:
1.28 daniel 1234: /************************************************************************
1235: * *
1.75 daniel 1236: * Commodity functions to handle encodings *
1237: * *
1238: ************************************************************************/
1239:
1240: /**
1241: * xmlSwitchEncoding:
1242: * @ctxt: the parser context
1243: * @len: the len of @cur
1244: *
1245: * change the input functions when discovering the character encoding
1246: * of a given entity.
1247: *
1248: */
1249: void
1250: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1251: {
1252: switch (enc) {
1253: case XML_CHAR_ENCODING_ERROR:
1254: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1255: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1256: ctxt->wellFormed = 0;
1257: break;
1258: case XML_CHAR_ENCODING_NONE:
1259: /* let's assume it's UTF-8 without the XML decl */
1260: return;
1261: case XML_CHAR_ENCODING_UTF8:
1262: /* default encoding, no conversion should be needed */
1263: return;
1264: case XML_CHAR_ENCODING_UTF16LE:
1265: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1266: ctxt->sax->error(ctxt->userData,
1267: "char encoding UTF16 little endian not supported\n");
1268: break;
1269: case XML_CHAR_ENCODING_UTF16BE:
1270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1271: ctxt->sax->error(ctxt->userData,
1272: "char encoding UTF16 big endian not supported\n");
1273: break;
1274: case XML_CHAR_ENCODING_UCS4LE:
1275: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1276: ctxt->sax->error(ctxt->userData,
1277: "char encoding USC4 little endian not supported\n");
1278: break;
1279: case XML_CHAR_ENCODING_UCS4BE:
1280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1281: ctxt->sax->error(ctxt->userData,
1282: "char encoding USC4 big endian not supported\n");
1283: break;
1284: case XML_CHAR_ENCODING_EBCDIC:
1285: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1286: ctxt->sax->error(ctxt->userData,
1287: "char encoding EBCDIC not supported\n");
1288: break;
1289: case XML_CHAR_ENCODING_UCS4_2143:
1290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1291: ctxt->sax->error(ctxt->userData,
1292: "char encoding UCS4 2143 not supported\n");
1293: break;
1294: case XML_CHAR_ENCODING_UCS4_3412:
1295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1296: ctxt->sax->error(ctxt->userData,
1297: "char encoding UCS4 3412 not supported\n");
1298: break;
1299: case XML_CHAR_ENCODING_UCS2:
1300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1301: ctxt->sax->error(ctxt->userData,
1302: "char encoding UCS2 not supported\n");
1303: break;
1304: case XML_CHAR_ENCODING_8859_1:
1305: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1306: ctxt->sax->error(ctxt->userData,
1307: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
1308: break;
1309: case XML_CHAR_ENCODING_8859_2:
1310: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1311: ctxt->sax->error(ctxt->userData,
1312: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
1313: break;
1314: case XML_CHAR_ENCODING_8859_3:
1315: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1316: ctxt->sax->error(ctxt->userData,
1317: "char encoding ISO_8859_3 not supported\n");
1318: break;
1319: case XML_CHAR_ENCODING_8859_4:
1320: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1321: ctxt->sax->error(ctxt->userData,
1322: "char encoding ISO_8859_4 not supported\n");
1323: break;
1324: case XML_CHAR_ENCODING_8859_5:
1325: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1326: ctxt->sax->error(ctxt->userData,
1327: "char encoding ISO_8859_5 not supported\n");
1328: break;
1329: case XML_CHAR_ENCODING_8859_6:
1330: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1331: ctxt->sax->error(ctxt->userData,
1332: "char encoding ISO_8859_6 not supported\n");
1333: break;
1334: case XML_CHAR_ENCODING_8859_7:
1335: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1336: ctxt->sax->error(ctxt->userData,
1337: "char encoding ISO_8859_7 not supported\n");
1338: break;
1339: case XML_CHAR_ENCODING_8859_8:
1340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1341: ctxt->sax->error(ctxt->userData,
1342: "char encoding ISO_8859_8 not supported\n");
1343: break;
1344: case XML_CHAR_ENCODING_8859_9:
1345: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1346: ctxt->sax->error(ctxt->userData,
1347: "char encoding ISO_8859_9 not supported\n");
1348: break;
1349: case XML_CHAR_ENCODING_2022_JP:
1350: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1351: ctxt->sax->error(ctxt->userData,
1352: "char encoding ISO-2022-JPnot supported\n");
1353: break;
1354: case XML_CHAR_ENCODING_SHIFT_JIS:
1355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1356: ctxt->sax->error(ctxt->userData,
1357: "char encoding Shift_JISnot supported\n");
1358: break;
1359: case XML_CHAR_ENCODING_EUC_JP:
1360: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1361: ctxt->sax->error(ctxt->userData,
1362: "char encoding EUC-JPnot supported\n");
1363: break;
1364: }
1365: }
1366:
1367: /************************************************************************
1368: * *
1.28 daniel 1369: * Commodity functions to handle CHARs *
1370: * *
1371: ************************************************************************/
1372:
1.50 daniel 1373: /**
1374: * xmlStrndup:
1375: * @cur: the input CHAR *
1376: * @len: the len of @cur
1377: *
1378: * a strndup for array of CHAR's
1.68 daniel 1379: *
1380: * Returns a new CHAR * or NULL
1.1 veillard 1381: */
1.55 daniel 1382: CHAR *
1383: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 1384: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1385:
1386: if (ret == NULL) {
1.86 daniel 1387: fprintf(stderr, "malloc of %ld byte failed\n",
1388: (len + 1) * (long)sizeof(CHAR));
1.1 veillard 1389: return(NULL);
1390: }
1391: memcpy(ret, cur, len * sizeof(CHAR));
1392: ret[len] = 0;
1393: return(ret);
1394: }
1395:
1.50 daniel 1396: /**
1397: * xmlStrdup:
1398: * @cur: the input CHAR *
1399: *
1400: * a strdup for array of CHAR's
1.68 daniel 1401: *
1402: * Returns a new CHAR * or NULL
1.1 veillard 1403: */
1.55 daniel 1404: CHAR *
1405: xmlStrdup(const CHAR *cur) {
1.6 httpng 1406: const CHAR *p = cur;
1.1 veillard 1407:
1408: while (IS_CHAR(*p)) p++;
1409: return(xmlStrndup(cur, p - cur));
1410: }
1411:
1.50 daniel 1412: /**
1413: * xmlCharStrndup:
1414: * @cur: the input char *
1415: * @len: the len of @cur
1416: *
1417: * a strndup for char's to CHAR's
1.68 daniel 1418: *
1419: * Returns a new CHAR * or NULL
1.45 daniel 1420: */
1421:
1.55 daniel 1422: CHAR *
1423: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 1424: int i;
1425: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1426:
1427: if (ret == NULL) {
1.86 daniel 1428: fprintf(stderr, "malloc of %ld byte failed\n",
1429: (len + 1) * (long)sizeof(CHAR));
1.45 daniel 1430: return(NULL);
1431: }
1432: for (i = 0;i < len;i++)
1433: ret[i] = (CHAR) cur[i];
1434: ret[len] = 0;
1435: return(ret);
1436: }
1437:
1.50 daniel 1438: /**
1439: * xmlCharStrdup:
1440: * @cur: the input char *
1441: * @len: the len of @cur
1442: *
1443: * a strdup for char's to CHAR's
1.68 daniel 1444: *
1445: * Returns a new CHAR * or NULL
1.45 daniel 1446: */
1447:
1.55 daniel 1448: CHAR *
1449: xmlCharStrdup(const char *cur) {
1.45 daniel 1450: const char *p = cur;
1451:
1452: while (*p != '\0') p++;
1453: return(xmlCharStrndup(cur, p - cur));
1454: }
1455:
1.50 daniel 1456: /**
1457: * xmlStrcmp:
1458: * @str1: the first CHAR *
1459: * @str2: the second CHAR *
1460: *
1461: * a strcmp for CHAR's
1.68 daniel 1462: *
1463: * Returns the integer result of the comparison
1.14 veillard 1464: */
1465:
1.55 daniel 1466: int
1467: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 1468: register int tmp;
1469:
1470: do {
1471: tmp = *str1++ - *str2++;
1472: if (tmp != 0) return(tmp);
1473: } while ((*str1 != 0) && (*str2 != 0));
1474: return (*str1 - *str2);
1475: }
1476:
1.50 daniel 1477: /**
1478: * xmlStrncmp:
1479: * @str1: the first CHAR *
1480: * @str2: the second CHAR *
1481: * @len: the max comparison length
1482: *
1483: * a strncmp for CHAR's
1.68 daniel 1484: *
1485: * Returns the integer result of the comparison
1.14 veillard 1486: */
1487:
1.55 daniel 1488: int
1489: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 1490: register int tmp;
1491:
1492: if (len <= 0) return(0);
1493: do {
1494: tmp = *str1++ - *str2++;
1495: if (tmp != 0) return(tmp);
1496: len--;
1497: if (len <= 0) return(0);
1498: } while ((*str1 != 0) && (*str2 != 0));
1499: return (*str1 - *str2);
1500: }
1501:
1.50 daniel 1502: /**
1503: * xmlStrchr:
1504: * @str: the CHAR * array
1505: * @val: the CHAR to search
1506: *
1507: * a strchr for CHAR's
1.68 daniel 1508: *
1509: * Returns the CHAR * for the first occurence or NULL.
1.14 veillard 1510: */
1511:
1.89 daniel 1512: const CHAR *
1.55 daniel 1513: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 1514: while (*str != 0) {
1515: if (*str == val) return((CHAR *) str);
1516: str++;
1517: }
1518: return(NULL);
1.89 daniel 1519: }
1520:
1521: /**
1522: * xmlStrstr:
1523: * @str: the CHAR * array (haystack)
1524: * @val: the CHAR to search (needle)
1525: *
1526: * a strstr for CHAR's
1527: *
1528: * Returns the CHAR * for the first occurence or NULL.
1529: */
1530:
1531: const CHAR *
1532: xmlStrstr(const CHAR *str, CHAR *val) {
1533: int n;
1534:
1535: if (str == NULL) return(NULL);
1536: if (val == NULL) return(NULL);
1537: n = xmlStrlen(val);
1538:
1539: if (n == 0) return(str);
1540: while (*str != 0) {
1541: if (*str == *val) {
1542: if (!xmlStrncmp(str, val, n)) return((const CHAR *) str);
1543: }
1544: str++;
1545: }
1546: return(NULL);
1547: }
1548:
1549: /**
1550: * xmlStrsub:
1551: * @str: the CHAR * array (haystack)
1552: * @start: the index of the first char (zero based)
1553: * @len: the length of the substring
1554: *
1555: * Extract a substring of a given string
1556: *
1557: * Returns the CHAR * for the first occurence or NULL.
1558: */
1559:
1560: CHAR *
1561: xmlStrsub(const CHAR *str, int start, int len) {
1562: int i;
1563:
1564: if (str == NULL) return(NULL);
1565: if (start < 0) return(NULL);
1.90 daniel 1566: if (len < 0) return(NULL);
1.89 daniel 1567:
1568: for (i = 0;i < start;i++) {
1569: if (*str == 0) return(NULL);
1570: str++;
1571: }
1572: if (*str == 0) return(NULL);
1573: return(xmlStrndup(str, len));
1.14 veillard 1574: }
1.28 daniel 1575:
1.50 daniel 1576: /**
1577: * xmlStrlen:
1578: * @str: the CHAR * array
1579: *
1580: * lenght of a CHAR's string
1.68 daniel 1581: *
1582: * Returns the number of CHAR contained in the ARRAY.
1.45 daniel 1583: */
1584:
1.55 daniel 1585: int
1586: xmlStrlen(const CHAR *str) {
1.45 daniel 1587: int len = 0;
1588:
1589: if (str == NULL) return(0);
1590: while (*str != 0) {
1591: str++;
1592: len++;
1593: }
1594: return(len);
1595: }
1596:
1.50 daniel 1597: /**
1598: * xmlStrncat:
1.68 daniel 1599: * @cur: the original CHAR * array
1.50 daniel 1600: * @add: the CHAR * array added
1601: * @len: the length of @add
1602: *
1603: * a strncat for array of CHAR's
1.68 daniel 1604: *
1605: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1606: */
1607:
1.55 daniel 1608: CHAR *
1609: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 1610: int size;
1611: CHAR *ret;
1612:
1613: if ((add == NULL) || (len == 0))
1614: return(cur);
1615: if (cur == NULL)
1616: return(xmlStrndup(add, len));
1617:
1618: size = xmlStrlen(cur);
1619: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
1620: if (ret == NULL) {
1.86 daniel 1621: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1622: (size + len + 1) * (long)sizeof(CHAR));
1.45 daniel 1623: return(cur);
1624: }
1625: memcpy(&ret[size], add, len * sizeof(CHAR));
1626: ret[size + len] = 0;
1627: return(ret);
1628: }
1629:
1.50 daniel 1630: /**
1631: * xmlStrcat:
1.68 daniel 1632: * @cur: the original CHAR * array
1.50 daniel 1633: * @add: the CHAR * array added
1634: *
1635: * a strcat for array of CHAR's
1.68 daniel 1636: *
1637: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1638: */
1.55 daniel 1639: CHAR *
1640: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 1641: const CHAR *p = add;
1642:
1643: if (add == NULL) return(cur);
1644: if (cur == NULL)
1645: return(xmlStrdup(add));
1646:
1647: while (IS_CHAR(*p)) p++;
1648: return(xmlStrncat(cur, add, p - add));
1649: }
1650:
1651: /************************************************************************
1652: * *
1653: * Commodity functions, cleanup needed ? *
1654: * *
1655: ************************************************************************/
1656:
1.50 daniel 1657: /**
1658: * areBlanks:
1659: * @ctxt: an XML parser context
1660: * @str: a CHAR *
1661: * @len: the size of @str
1662: *
1.45 daniel 1663: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1664: *
1.99 daniel 1665: * TODO: Whether white space are significant has to be checked accordingly
1666: * to DTD informations if available
1.68 daniel 1667: *
1668: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1669: */
1670:
1671: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
1.104 ! daniel 1672: int i, ret;
1.45 daniel 1673: xmlNodePtr lastChild;
1674:
1675: for (i = 0;i < len;i++)
1676: if (!(IS_BLANK(str[i]))) return(0);
1677:
1678: if (CUR != '<') return(0);
1.72 daniel 1679: if (ctxt->node == NULL) return(0);
1.104 ! daniel 1680: if (ctxt->myDoc != NULL) {
! 1681: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
! 1682: if (ret == 0) return(1);
! 1683: if (ret == 1) return(0);
! 1684: }
! 1685: /*
! 1686: * heuristic
! 1687: */
1.45 daniel 1688: lastChild = xmlGetLastChild(ctxt->node);
1689: if (lastChild == NULL) {
1690: if (ctxt->node->content != NULL) return(0);
1691: } else if (xmlNodeIsText(lastChild))
1692: return(0);
1.104 ! daniel 1693: else if ((ctxt->node->childs != NULL) &&
! 1694: (xmlNodeIsText(ctxt->node->childs)))
! 1695: return(0);
1.45 daniel 1696: return(1);
1697: }
1698:
1.50 daniel 1699: /**
1700: * xmlHandleEntity:
1701: * @ctxt: an XML parser context
1702: * @entity: an XML entity pointer.
1703: *
1704: * Default handling of defined entities, when should we define a new input
1.45 daniel 1705: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 1706: *
1707: * OBSOLETE: to be removed at some point.
1.45 daniel 1708: */
1709:
1.55 daniel 1710: void
1711: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1712: int len;
1.50 daniel 1713: xmlParserInputPtr input;
1.45 daniel 1714:
1715: if (entity->content == NULL) {
1.55 daniel 1716: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1717: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 1718: entity->name);
1.59 daniel 1719: ctxt->wellFormed = 0;
1.45 daniel 1720: return;
1721: }
1722: len = xmlStrlen(entity->content);
1723: if (len <= 2) goto handle_as_char;
1724:
1725: /*
1726: * Redefine its content as an input stream.
1727: */
1.50 daniel 1728: input = xmlNewEntityInputStream(ctxt, entity);
1729: xmlPushInput(ctxt, input);
1.45 daniel 1730: return;
1731:
1732: handle_as_char:
1733: /*
1734: * Just handle the content as a set of chars.
1735: */
1.72 daniel 1736: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 1737: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 1738:
1739: }
1740:
1741: /*
1742: * Forward definition for recusive behaviour.
1743: */
1.77 daniel 1744: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1745: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1746:
1.28 daniel 1747: /************************************************************************
1748: * *
1749: * Extra stuff for namespace support *
1750: * Relates to http://www.w3.org/TR/WD-xml-names *
1751: * *
1752: ************************************************************************/
1753:
1.50 daniel 1754: /**
1755: * xmlNamespaceParseNCName:
1756: * @ctxt: an XML parser context
1757: *
1758: * parse an XML namespace name.
1.28 daniel 1759: *
1760: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1761: *
1762: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1763: * CombiningChar | Extender
1.68 daniel 1764: *
1765: * Returns the namespace name or NULL
1.28 daniel 1766: */
1767:
1.55 daniel 1768: CHAR *
1769: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.91 daniel 1770: CHAR buf[XML_MAX_NAMELEN];
1771: int len = 0;
1.28 daniel 1772:
1.40 daniel 1773: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1.28 daniel 1774:
1.40 daniel 1775: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1776: (CUR == '.') || (CUR == '-') ||
1777: (CUR == '_') ||
1778: (IS_COMBINING(CUR)) ||
1.91 daniel 1779: (IS_EXTENDER(CUR))) {
1780: buf[len++] = CUR;
1.40 daniel 1781: NEXT;
1.91 daniel 1782: if (len >= XML_MAX_NAMELEN) {
1783: fprintf(stderr,
1784: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1785: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1786: (CUR == '.') || (CUR == '-') ||
1787: (CUR == '_') ||
1788: (IS_COMBINING(CUR)) ||
1789: (IS_EXTENDER(CUR)))
1790: NEXT;
1791: break;
1792: }
1793: }
1794: return(xmlStrndup(buf, len));
1.28 daniel 1795: }
1796:
1.50 daniel 1797: /**
1798: * xmlNamespaceParseQName:
1799: * @ctxt: an XML parser context
1800: * @prefix: a CHAR **
1801: *
1802: * parse an XML qualified name
1.28 daniel 1803: *
1804: * [NS 5] QName ::= (Prefix ':')? LocalPart
1805: *
1806: * [NS 6] Prefix ::= NCName
1807: *
1808: * [NS 7] LocalPart ::= NCName
1.68 daniel 1809: *
1810: * Returns the function returns the local part, and prefix is updated
1.50 daniel 1811: * to get the Prefix if any.
1.28 daniel 1812: */
1813:
1.55 daniel 1814: CHAR *
1815: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1816: CHAR *ret = NULL;
1817:
1818: *prefix = NULL;
1819: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1820: if (CUR == ':') {
1.28 daniel 1821: *prefix = ret;
1.40 daniel 1822: NEXT;
1.28 daniel 1823: ret = xmlNamespaceParseNCName(ctxt);
1824: }
1825:
1826: return(ret);
1827: }
1828:
1.50 daniel 1829: /**
1.72 daniel 1830: * xmlSplitQName:
1831: * @name: an XML parser context
1832: * @prefix: a CHAR **
1833: *
1834: * parse an XML qualified name string
1835: *
1836: * [NS 5] QName ::= (Prefix ':')? LocalPart
1837: *
1838: * [NS 6] Prefix ::= NCName
1839: *
1840: * [NS 7] LocalPart ::= NCName
1841: *
1842: * Returns the function returns the local part, and prefix is updated
1843: * to get the Prefix if any.
1844: */
1845:
1846: CHAR *
1847: xmlSplitQName(const CHAR *name, CHAR **prefix) {
1848: CHAR *ret = NULL;
1849: const CHAR *q;
1850: const CHAR *cur = name;
1851:
1852: *prefix = NULL;
1853: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1854: q = cur++;
1855:
1856: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1857: (*cur == '.') || (*cur == '-') ||
1858: (*cur == '_') ||
1859: (IS_COMBINING(*cur)) ||
1860: (IS_EXTENDER(*cur)))
1861: cur++;
1862:
1863: ret = xmlStrndup(q, cur - q);
1864:
1865: if (*cur == ':') {
1866: cur++;
1867: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1868: *prefix = ret;
1869:
1870: q = cur++;
1871:
1872: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1873: (*cur == '.') || (*cur == '-') ||
1874: (*cur == '_') ||
1875: (IS_COMBINING(*cur)) ||
1876: (IS_EXTENDER(*cur)))
1877: cur++;
1878:
1879: ret = xmlStrndup(q, cur - q);
1880: }
1881:
1882: return(ret);
1883: }
1884: /**
1.50 daniel 1885: * xmlNamespaceParseNSDef:
1886: * @ctxt: an XML parser context
1887: *
1888: * parse a namespace prefix declaration
1.28 daniel 1889: *
1890: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1891: *
1892: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 1893: *
1894: * Returns the namespace name
1.28 daniel 1895: */
1896:
1.55 daniel 1897: CHAR *
1898: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1899: CHAR *name = NULL;
1900:
1.40 daniel 1901: if ((CUR == 'x') && (NXT(1) == 'm') &&
1902: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1903: (NXT(4) == 's')) {
1904: SKIP(5);
1905: if (CUR == ':') {
1906: NEXT;
1.28 daniel 1907: name = xmlNamespaceParseNCName(ctxt);
1908: }
1909: }
1.39 daniel 1910: return(name);
1.28 daniel 1911: }
1912:
1.50 daniel 1913: /**
1914: * xmlParseQuotedString:
1915: * @ctxt: an XML parser context
1916: *
1.45 daniel 1917: * [OLD] Parse and return a string between quotes or doublequotes
1.68 daniel 1918: *
1919: * Returns the string parser or NULL.
1.45 daniel 1920: */
1.55 daniel 1921: CHAR *
1922: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1923: CHAR *ret = NULL;
1924: const CHAR *q;
1925:
1926: if (CUR == '"') {
1927: NEXT;
1928: q = CUR_PTR;
1929: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1930: if (CUR != '"') {
1931: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1932: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1933: ctxt->wellFormed = 0;
1.55 daniel 1934: } else {
1.45 daniel 1935: ret = xmlStrndup(q, CUR_PTR - q);
1936: NEXT;
1937: }
1938: } else if (CUR == '\''){
1939: NEXT;
1940: q = CUR_PTR;
1941: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1942: if (CUR != '\'') {
1943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1944: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1945: ctxt->wellFormed = 0;
1.55 daniel 1946: } else {
1.45 daniel 1947: ret = xmlStrndup(q, CUR_PTR - q);
1948: NEXT;
1949: }
1950: }
1951: return(ret);
1952: }
1953:
1.50 daniel 1954: /**
1955: * xmlParseNamespace:
1956: * @ctxt: an XML parser context
1957: *
1.45 daniel 1958: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1959: *
1960: * This is what the older xml-name Working Draft specified, a bunch of
1961: * other stuff may still rely on it, so support is still here as
1962: * if ot was declared on the root of the Tree:-(
1963: */
1964:
1.55 daniel 1965: void
1966: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 1967: CHAR *href = NULL;
1968: CHAR *prefix = NULL;
1969: int garbage = 0;
1970:
1971: /*
1972: * We just skipped "namespace" or "xml:namespace"
1973: */
1974: SKIP_BLANKS;
1975:
1976: while (IS_CHAR(CUR) && (CUR != '>')) {
1977: /*
1978: * We can have "ns" or "prefix" attributes
1979: * Old encoding as 'href' or 'AS' attributes is still supported
1980: */
1981: if ((CUR == 'n') && (NXT(1) == 's')) {
1982: garbage = 0;
1983: SKIP(2);
1984: SKIP_BLANKS;
1985:
1986: if (CUR != '=') continue;
1987: NEXT;
1988: SKIP_BLANKS;
1989:
1990: href = xmlParseQuotedString(ctxt);
1991: SKIP_BLANKS;
1992: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1993: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1994: garbage = 0;
1995: SKIP(4);
1996: SKIP_BLANKS;
1997:
1998: if (CUR != '=') continue;
1999: NEXT;
2000: SKIP_BLANKS;
2001:
2002: href = xmlParseQuotedString(ctxt);
2003: SKIP_BLANKS;
2004: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
2005: (NXT(2) == 'e') && (NXT(3) == 'f') &&
2006: (NXT(4) == 'i') && (NXT(5) == 'x')) {
2007: garbage = 0;
2008: SKIP(6);
2009: SKIP_BLANKS;
2010:
2011: if (CUR != '=') continue;
2012: NEXT;
2013: SKIP_BLANKS;
2014:
2015: prefix = xmlParseQuotedString(ctxt);
2016: SKIP_BLANKS;
2017: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
2018: garbage = 0;
2019: SKIP(2);
2020: SKIP_BLANKS;
2021:
2022: if (CUR != '=') continue;
2023: NEXT;
2024: SKIP_BLANKS;
2025:
2026: prefix = xmlParseQuotedString(ctxt);
2027: SKIP_BLANKS;
2028: } else if ((CUR == '?') && (NXT(1) == '>')) {
2029: garbage = 0;
1.91 daniel 2030: NEXT;
1.45 daniel 2031: } else {
2032: /*
2033: * Found garbage when parsing the namespace
2034: */
2035: if (!garbage)
1.55 daniel 2036: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2037: ctxt->sax->error(ctxt->userData, "xmlParseNamespace found garbage\n");
1.59 daniel 2038: ctxt->wellFormed = 0;
1.45 daniel 2039: NEXT;
2040: }
2041: }
2042:
2043: MOVETO_ENDTAG(CUR_PTR);
2044: NEXT;
2045:
2046: /*
2047: * Register the DTD.
1.72 daniel 2048: if (href != NULL)
2049: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 2050: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 2051: */
2052:
2053: if (prefix != NULL) free(prefix);
2054: if (href != NULL) free(href);
2055: }
2056:
1.28 daniel 2057: /************************************************************************
2058: * *
2059: * The parser itself *
2060: * Relates to http://www.w3.org/TR/REC-xml *
2061: * *
2062: ************************************************************************/
1.14 veillard 2063:
1.50 daniel 2064: /**
1.97 daniel 2065: * xmlScanName:
2066: * @ctxt: an XML parser context
2067: *
2068: * Trickery: parse an XML name but without consuming the input flow
2069: * Needed for rollback cases.
2070: *
2071: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2072: * CombiningChar | Extender
2073: *
2074: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2075: *
2076: * [6] Names ::= Name (S Name)*
2077: *
2078: * Returns the Name parsed or NULL
2079: */
2080:
2081: CHAR *
2082: xmlScanName(xmlParserCtxtPtr ctxt) {
2083: CHAR buf[XML_MAX_NAMELEN];
2084: int len = 0;
2085:
2086: GROW;
2087: if (!IS_LETTER(CUR) && (CUR != '_') &&
2088: (CUR != ':')) {
2089: return(NULL);
2090: }
2091:
2092: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2093: (NXT(len) == '.') || (NXT(len) == '-') ||
2094: (NXT(len) == '_') || (NXT(len) == ':') ||
2095: (IS_COMBINING(NXT(len))) ||
2096: (IS_EXTENDER(NXT(len)))) {
2097: buf[len] = NXT(len);
2098: len++;
2099: if (len >= XML_MAX_NAMELEN) {
2100: fprintf(stderr,
2101: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
2102: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2103: (NXT(len) == '.') || (NXT(len) == '-') ||
2104: (NXT(len) == '_') || (NXT(len) == ':') ||
2105: (IS_COMBINING(NXT(len))) ||
2106: (IS_EXTENDER(NXT(len))))
2107: len++;
2108: break;
2109: }
2110: }
2111: return(xmlStrndup(buf, len));
2112: }
2113:
2114: /**
1.50 daniel 2115: * xmlParseName:
2116: * @ctxt: an XML parser context
2117: *
2118: * parse an XML name.
1.22 daniel 2119: *
2120: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2121: * CombiningChar | Extender
2122: *
2123: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2124: *
2125: * [6] Names ::= Name (S Name)*
1.68 daniel 2126: *
2127: * Returns the Name parsed or NULL
1.1 veillard 2128: */
2129:
1.55 daniel 2130: CHAR *
2131: xmlParseName(xmlParserCtxtPtr ctxt) {
1.91 daniel 2132: CHAR buf[XML_MAX_NAMELEN];
2133: int len = 0;
1.97 daniel 2134: CHAR cur;
1.1 veillard 2135:
1.91 daniel 2136: GROW;
1.97 daniel 2137: cur = CUR;
2138: if (!IS_LETTER(cur) && (cur != '_') &&
2139: (cur != ':')) {
1.91 daniel 2140: return(NULL);
2141: }
1.40 daniel 2142:
1.97 daniel 2143: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2144: (cur == '.') || (cur == '-') ||
2145: (cur == '_') || (cur == ':') ||
2146: (IS_COMBINING(cur)) ||
2147: (IS_EXTENDER(cur))) {
2148: buf[len++] = cur;
1.40 daniel 2149: NEXT;
1.97 daniel 2150: cur = CUR;
1.91 daniel 2151: if (len >= XML_MAX_NAMELEN) {
2152: fprintf(stderr,
2153: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.97 daniel 2154: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2155: (cur == '.') || (cur == '-') ||
2156: (cur == '_') || (cur == ':') ||
2157: (IS_COMBINING(cur)) ||
2158: (IS_EXTENDER(cur))) {
2159: NEXT;
2160: cur = CUR;
2161: }
1.91 daniel 2162: break;
2163: }
2164: }
2165: return(xmlStrndup(buf, len));
1.22 daniel 2166: }
2167:
1.50 daniel 2168: /**
2169: * xmlParseNmtoken:
2170: * @ctxt: an XML parser context
2171: *
2172: * parse an XML Nmtoken.
1.22 daniel 2173: *
2174: * [7] Nmtoken ::= (NameChar)+
2175: *
2176: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 2177: *
2178: * Returns the Nmtoken parsed or NULL
1.22 daniel 2179: */
2180:
1.55 daniel 2181: CHAR *
2182: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.91 daniel 2183: CHAR buf[XML_MAX_NAMELEN];
2184: int len = 0;
1.22 daniel 2185:
1.91 daniel 2186: GROW;
1.40 daniel 2187: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2188: (CUR == '.') || (CUR == '-') ||
2189: (CUR == '_') || (CUR == ':') ||
2190: (IS_COMBINING(CUR)) ||
1.91 daniel 2191: (IS_EXTENDER(CUR))) {
2192: buf[len++] = CUR;
1.40 daniel 2193: NEXT;
1.91 daniel 2194: if (len >= XML_MAX_NAMELEN) {
2195: fprintf(stderr,
2196: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
2197: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2198: (CUR == '.') || (CUR == '-') ||
2199: (CUR == '_') || (CUR == ':') ||
2200: (IS_COMBINING(CUR)) ||
2201: (IS_EXTENDER(CUR)))
2202: NEXT;
2203: break;
2204: }
2205: }
2206: return(xmlStrndup(buf, len));
1.1 veillard 2207: }
2208:
1.50 daniel 2209: /**
2210: * xmlParseEntityValue:
2211: * @ctxt: an XML parser context
1.78 daniel 2212: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 2213: *
2214: * parse a value for ENTITY decl.
1.24 daniel 2215: *
2216: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2217: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 2218: *
1.78 daniel 2219: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 2220: */
2221:
1.55 daniel 2222: CHAR *
1.78 daniel 2223: xmlParseEntityValue(xmlParserCtxtPtr ctxt, CHAR **orig) {
1.77 daniel 2224: CHAR *ret = NULL;
1.78 daniel 2225: const CHAR *org = NULL;
1.79 daniel 2226: const CHAR *tst = NULL;
2227: const CHAR *temp = NULL;
1.98 daniel 2228: xmlParserInputPtr input;
1.24 daniel 2229:
1.91 daniel 2230: SHRINK;
1.40 daniel 2231: if (CUR == '"') {
1.96 daniel 2232: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2233: input = ctxt->input;
1.40 daniel 2234: NEXT;
1.78 daniel 2235: org = CUR_PTR;
1.98 daniel 2236: /*
2237: * NOTE: 4.4.5 Included in Literal
2238: * When a parameter entity reference appears in a literal entity
2239: * value, ... a single or double quote character in the replacement
2240: * text is always treated as a normal data character and will not
2241: * terminate the literal.
2242: * In practice it means we stop the loop only when back at parsing
2243: * the initial entity and the quote is found
2244: */
2245: while ((CUR != '"') || (ctxt->input != input)) {
1.79 daniel 2246: tst = CUR_PTR;
1.98 daniel 2247: /*
2248: * NOTE: 4.4.7 Bypassed
2249: * When a general entity reference appears in the EntityValue in
2250: * an entity declaration, it is bypassed and left as is.
2251: * so XML_SUBSTITUTE_REF is not set.
2252: */
2253: if (ctxt->input != input)
2254: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2255: 0, 0, 0);
2256: else
2257: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2258: '"', 0, 0);
1.94 daniel 2259:
2260: /*
2261: * Pop-up of finished entities.
2262: */
2263: while ((CUR == 0) && (ctxt->inputNr > 1))
2264: xmlPopInput(ctxt);
2265:
2266: if ((temp == NULL) && (tst == CUR_PTR)) {
2267: ret = xmlStrndup("", 0);
2268: break;
2269: }
2270: if ((temp[0] == 0) && (tst == CUR_PTR)) {
2271: free((char *)temp);
2272: ret = xmlStrndup("", 0);
2273: break;
2274: }
1.79 daniel 2275: ret = xmlStrcat(ret, temp);
1.80 daniel 2276: if (temp != NULL) free((char *)temp);
1.94 daniel 2277: GROW;
1.79 daniel 2278: }
1.77 daniel 2279: if (CUR != '"') {
1.55 daniel 2280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.79 daniel 2281: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 2282: ctxt->wellFormed = 0;
1.78 daniel 2283: } else {
1.99 daniel 2284: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2285: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2286: if (ret == NULL)
2287: ret = xmlStrndup("", 0);
1.40 daniel 2288: NEXT;
1.78 daniel 2289: }
1.40 daniel 2290: } else if (CUR == '\'') {
1.96 daniel 2291: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2292: input = ctxt->input;
1.40 daniel 2293: NEXT;
1.78 daniel 2294: org = CUR_PTR;
1.98 daniel 2295: /*
2296: * NOTE: 4.4.5 Included in Literal
2297: * When a parameter entity reference appears in a literal entity
2298: * value, ... a single or double quote character in the replacement
2299: * text is always treated as a normal data character and will not
2300: * terminate the literal.
2301: * In practice it means we stop the loop only when back at parsing
2302: * the initial entity and the quote is found
2303: */
2304: while ((CUR != '\'') || (ctxt->input != input)) {
1.79 daniel 2305: tst = CUR_PTR;
1.98 daniel 2306: /*
2307: * NOTE: 4.4.7 Bypassed
2308: * When a general entity reference appears in the EntityValue in
2309: * an entity declaration, it is bypassed and left as is.
2310: * so XML_SUBSTITUTE_REF is not set.
2311: */
2312: if (ctxt->input != input)
2313: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2314: 0, 0, 0);
2315: else
2316: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2317: '\'', 0, 0);
1.94 daniel 2318:
2319: /*
2320: * Pop-up of finished entities.
2321: */
2322: while ((CUR == 0) && (ctxt->inputNr > 1))
2323: xmlPopInput(ctxt);
2324:
2325: if ((temp == NULL) && (tst == CUR_PTR)) {
2326: ret = xmlStrndup("", 0);
2327: break;
2328: }
2329: if ((temp[0] == 0) && (tst == CUR_PTR)) {
2330: free((char *)temp);
2331: ret = xmlStrndup("", 0);
2332: break;
2333: }
1.79 daniel 2334: ret = xmlStrcat(ret, temp);
1.80 daniel 2335: if (temp != NULL) free((char *)temp);
1.94 daniel 2336: GROW;
1.79 daniel 2337: }
1.77 daniel 2338: if (CUR != '\'') {
1.55 daniel 2339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2340: ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
1.59 daniel 2341: ctxt->wellFormed = 0;
1.78 daniel 2342: } else {
1.99 daniel 2343: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2344: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2345: if (ret == NULL)
2346: ret = xmlStrndup("", 0);
1.40 daniel 2347: NEXT;
1.78 daniel 2348: }
1.24 daniel 2349: } else {
1.55 daniel 2350: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2351: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1.59 daniel 2352: ctxt->wellFormed = 0;
1.24 daniel 2353: }
2354:
2355: return(ret);
2356: }
2357:
1.50 daniel 2358: /**
2359: * xmlParseAttValue:
2360: * @ctxt: an XML parser context
2361: *
2362: * parse a value for an attribute
1.78 daniel 2363: * Note: the parser won't do substitution of entities here, this
1.79 daniel 2364: * will be handled later in xmlStringGetNodeList, unless it was
2365: * asked for ctxt->replaceEntities != 0
1.29 daniel 2366: *
2367: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2368: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 2369: *
2370: * Returns the AttValue parsed or NULL.
1.29 daniel 2371: */
2372:
1.55 daniel 2373: CHAR *
2374: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.77 daniel 2375: CHAR *ret = NULL;
1.29 daniel 2376:
1.91 daniel 2377: SHRINK;
1.40 daniel 2378: if (CUR == '"') {
1.96 daniel 2379: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2380: NEXT;
1.98 daniel 2381: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
1.77 daniel 2382: if (CUR == '<') {
2383: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2384: ctxt->sax->error(ctxt->userData,
2385: "Unescaped '<' not allowed in attributes values\n");
2386: ctxt->wellFormed = 0;
1.29 daniel 2387: }
1.77 daniel 2388: if (CUR != '"') {
1.55 daniel 2389: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2390: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2391: ctxt->wellFormed = 0;
1.77 daniel 2392: } else
1.40 daniel 2393: NEXT;
2394: } else if (CUR == '\'') {
1.96 daniel 2395: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2396: NEXT;
1.98 daniel 2397: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
1.77 daniel 2398: if (CUR == '<') {
2399: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2400: ctxt->sax->error(ctxt->userData,
2401: "Unescaped '<' not allowed in attributes values\n");
2402: ctxt->wellFormed = 0;
1.29 daniel 2403: }
1.77 daniel 2404: if (CUR != '\'') {
1.55 daniel 2405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2406: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2407: ctxt->wellFormed = 0;
1.77 daniel 2408: } else
1.40 daniel 2409: NEXT;
1.29 daniel 2410: } else {
1.55 daniel 2411: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2412: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2413: ctxt->wellFormed = 0;
1.29 daniel 2414: }
2415:
2416: return(ret);
2417: }
2418:
1.50 daniel 2419: /**
2420: * xmlParseSystemLiteral:
2421: * @ctxt: an XML parser context
2422: *
2423: * parse an XML Literal
1.21 daniel 2424: *
1.22 daniel 2425: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2426: *
2427: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2428: */
2429:
1.55 daniel 2430: CHAR *
2431: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2432: const CHAR *q;
2433: CHAR *ret = NULL;
2434:
1.91 daniel 2435: SHRINK;
1.40 daniel 2436: if (CUR == '"') {
2437: NEXT;
2438: q = CUR_PTR;
2439: while ((IS_CHAR(CUR)) && (CUR != '"'))
2440: NEXT;
2441: if (!IS_CHAR(CUR)) {
1.55 daniel 2442: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2443: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2444: ctxt->wellFormed = 0;
1.21 daniel 2445: } else {
1.40 daniel 2446: ret = xmlStrndup(q, CUR_PTR - q);
2447: NEXT;
1.21 daniel 2448: }
1.40 daniel 2449: } else if (CUR == '\'') {
2450: NEXT;
2451: q = CUR_PTR;
2452: while ((IS_CHAR(CUR)) && (CUR != '\''))
2453: NEXT;
2454: if (!IS_CHAR(CUR)) {
1.55 daniel 2455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2456: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2457: ctxt->wellFormed = 0;
1.21 daniel 2458: } else {
1.40 daniel 2459: ret = xmlStrndup(q, CUR_PTR - q);
2460: NEXT;
1.21 daniel 2461: }
2462: } else {
1.55 daniel 2463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2464: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2465: ctxt->wellFormed = 0;
1.21 daniel 2466: }
2467:
2468: return(ret);
2469: }
2470:
1.50 daniel 2471: /**
2472: * xmlParsePubidLiteral:
2473: * @ctxt: an XML parser context
1.21 daniel 2474: *
1.50 daniel 2475: * parse an XML public literal
1.68 daniel 2476: *
2477: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2478: *
2479: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2480: */
2481:
1.55 daniel 2482: CHAR *
2483: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2484: const CHAR *q;
2485: CHAR *ret = NULL;
2486: /*
2487: * Name ::= (Letter | '_') (NameChar)*
2488: */
1.91 daniel 2489: SHRINK;
1.40 daniel 2490: if (CUR == '"') {
2491: NEXT;
2492: q = CUR_PTR;
2493: while (IS_PUBIDCHAR(CUR)) NEXT;
2494: if (CUR != '"') {
1.55 daniel 2495: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2496: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2497: ctxt->wellFormed = 0;
1.21 daniel 2498: } else {
1.40 daniel 2499: ret = xmlStrndup(q, CUR_PTR - q);
2500: NEXT;
1.21 daniel 2501: }
1.40 daniel 2502: } else if (CUR == '\'') {
2503: NEXT;
2504: q = CUR_PTR;
2505: while ((IS_LETTER(CUR)) && (CUR != '\''))
2506: NEXT;
2507: if (!IS_LETTER(CUR)) {
1.55 daniel 2508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2509: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2510: ctxt->wellFormed = 0;
1.21 daniel 2511: } else {
1.40 daniel 2512: ret = xmlStrndup(q, CUR_PTR - q);
2513: NEXT;
1.21 daniel 2514: }
2515: } else {
1.55 daniel 2516: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2517: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2518: ctxt->wellFormed = 0;
1.21 daniel 2519: }
2520:
2521: return(ret);
2522: }
2523:
1.50 daniel 2524: /**
2525: * xmlParseCharData:
2526: * @ctxt: an XML parser context
2527: * @cdata: int indicating whether we are within a CDATA section
2528: *
2529: * parse a CharData section.
2530: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2531: *
2532: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2533: */
2534:
1.55 daniel 2535: void
2536: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.91 daniel 2537: CHAR buf[1000];
2538: int nbchar = 0;
1.97 daniel 2539: CHAR cur;
1.27 daniel 2540:
1.91 daniel 2541: SHRINK;
1.97 daniel 2542: /*
2543: * !!!!!!!!!!!!
2544: * NOTE: NXT(0) is used here to avoid breaking on < or &
2545: * entities substitutions.
2546: */
2547: cur = CUR;
2548: while ((IS_CHAR(cur)) && (cur != '<') &&
2549: (cur != '&')) {
2550: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2551: (NXT(2) == '>')) {
2552: if (cdata) break;
2553: else {
2554: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2555: ctxt->sax->error(ctxt->userData,
1.59 daniel 2556: "Sequence ']]>' not allowed in content\n");
2557: ctxt->wellFormed = 0;
2558: }
2559: }
1.91 daniel 2560: buf[nbchar++] = CUR;
2561: if (nbchar == 1000) {
2562: /*
2563: * Ok the segment is to be consumed as chars.
2564: */
2565: if (ctxt->sax != NULL) {
2566: if (areBlanks(ctxt, buf, nbchar)) {
2567: if (ctxt->sax->ignorableWhitespace != NULL)
2568: ctxt->sax->ignorableWhitespace(ctxt->userData,
2569: buf, nbchar);
2570: } else {
2571: if (ctxt->sax->characters != NULL)
2572: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2573: }
2574: }
2575: nbchar = 0;
2576: }
1.40 daniel 2577: NEXT;
1.97 daniel 2578: cur = CUR;
1.27 daniel 2579: }
1.91 daniel 2580: if (nbchar != 0) {
2581: /*
2582: * Ok the segment is to be consumed as chars.
2583: */
2584: if (ctxt->sax != NULL) {
2585: if (areBlanks(ctxt, buf, nbchar)) {
2586: if (ctxt->sax->ignorableWhitespace != NULL)
2587: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2588: } else {
2589: if (ctxt->sax->characters != NULL)
2590: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2591: }
2592: }
1.45 daniel 2593: }
1.27 daniel 2594: }
2595:
1.50 daniel 2596: /**
2597: * xmlParseExternalID:
2598: * @ctxt: an XML parser context
2599: * @publicID: a CHAR** receiving PubidLiteral
1.67 daniel 2600: * @strict: indicate whether we should restrict parsing to only
2601: * production [75], see NOTE below
1.50 daniel 2602: *
1.67 daniel 2603: * Parse an External ID or a Public ID
2604: *
2605: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2606: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2607: *
2608: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2609: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2610: *
2611: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2612: *
1.68 daniel 2613: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2614: * case publicID receives PubidLiteral, is strict is off
2615: * it is possible to return NULL and have publicID set.
1.22 daniel 2616: */
2617:
1.55 daniel 2618: CHAR *
1.67 daniel 2619: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
1.39 daniel 2620: CHAR *URI = NULL;
1.22 daniel 2621:
1.91 daniel 2622: SHRINK;
1.40 daniel 2623: if ((CUR == 'S') && (NXT(1) == 'Y') &&
2624: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2625: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2626: SKIP(6);
1.59 daniel 2627: if (!IS_BLANK(CUR)) {
2628: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2629: ctxt->sax->error(ctxt->userData,
1.59 daniel 2630: "Space required after 'SYSTEM'\n");
2631: ctxt->wellFormed = 0;
2632: }
1.42 daniel 2633: SKIP_BLANKS;
1.39 daniel 2634: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2635: if (URI == NULL) {
1.55 daniel 2636: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2637: ctxt->sax->error(ctxt->userData,
1.39 daniel 2638: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2639: ctxt->wellFormed = 0;
2640: }
1.40 daniel 2641: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
2642: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2643: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2644: SKIP(6);
1.59 daniel 2645: if (!IS_BLANK(CUR)) {
2646: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2647: ctxt->sax->error(ctxt->userData,
1.59 daniel 2648: "Space required after 'PUBLIC'\n");
2649: ctxt->wellFormed = 0;
2650: }
1.42 daniel 2651: SKIP_BLANKS;
1.39 daniel 2652: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2653: if (*publicID == NULL) {
1.55 daniel 2654: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2655: ctxt->sax->error(ctxt->userData,
1.39 daniel 2656: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2657: ctxt->wellFormed = 0;
2658: }
1.67 daniel 2659: if (strict) {
2660: /*
2661: * We don't handle [83] so "S SystemLiteral" is required.
2662: */
2663: if (!IS_BLANK(CUR)) {
2664: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2665: ctxt->sax->error(ctxt->userData,
1.67 daniel 2666: "Space required after the Public Identifier\n");
2667: ctxt->wellFormed = 0;
2668: }
2669: } else {
2670: /*
2671: * We handle [83] so we return immediately, if
2672: * "S SystemLiteral" is not detected. From a purely parsing
2673: * point of view that's a nice mess.
2674: */
2675: const CHAR *ptr = CUR_PTR;
2676: if (!IS_BLANK(*ptr)) return(NULL);
2677:
2678: while (IS_BLANK(*ptr)) ptr++;
2679: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 2680: }
1.42 daniel 2681: SKIP_BLANKS;
1.39 daniel 2682: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2683: if (URI == NULL) {
1.55 daniel 2684: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2685: ctxt->sax->error(ctxt->userData,
1.39 daniel 2686: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2687: ctxt->wellFormed = 0;
2688: }
1.22 daniel 2689: }
1.39 daniel 2690: return(URI);
1.22 daniel 2691: }
2692:
1.50 daniel 2693: /**
2694: * xmlParseComment:
1.69 daniel 2695: * @ctxt: an XML parser context
2696: * @create: should we create a node, or just skip the content
1.50 daniel 2697: *
1.3 veillard 2698: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 2699: * This may or may not create a node (depending on the context)
1.38 daniel 2700: * The spec says that "For compatibility, the string "--" (double-hyphen)
2701: * must not occur within comments. "
1.22 daniel 2702: *
2703: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2704: */
1.72 daniel 2705: void
1.69 daniel 2706: xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1.17 daniel 2707: const CHAR *q, *start;
2708: const CHAR *r;
1.39 daniel 2709: CHAR *val;
1.3 veillard 2710:
2711: /*
1.22 daniel 2712: * Check that there is a comment right here.
1.3 veillard 2713: */
1.40 daniel 2714: if ((CUR != '<') || (NXT(1) != '!') ||
1.72 daniel 2715: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2716:
1.97 daniel 2717: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2718: SHRINK;
1.40 daniel 2719: SKIP(4);
2720: start = q = CUR_PTR;
2721: NEXT;
2722: r = CUR_PTR;
2723: NEXT;
2724: while (IS_CHAR(CUR) &&
2725: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 2726: (*r != '-') || (*q != '-'))) {
1.59 daniel 2727: if ((*r == '-') && (*q == '-')) {
1.55 daniel 2728: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2729: ctxt->sax->error(ctxt->userData,
1.38 daniel 2730: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2731: ctxt->wellFormed = 0;
2732: }
1.40 daniel 2733: NEXT;r++;q++;
1.3 veillard 2734: }
1.40 daniel 2735: if (!IS_CHAR(CUR)) {
1.55 daniel 2736: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2737: ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", start);
1.59 daniel 2738: ctxt->wellFormed = 0;
1.3 veillard 2739: } else {
1.40 daniel 2740: NEXT;
1.31 daniel 2741: if (create) {
1.39 daniel 2742: val = xmlStrndup(start, q - start);
1.72 daniel 2743: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
1.74 daniel 2744: ctxt->sax->comment(ctxt->userData, val);
1.39 daniel 2745: free(val);
1.31 daniel 2746: }
1.3 veillard 2747: }
2748: }
2749:
1.50 daniel 2750: /**
2751: * xmlParsePITarget:
2752: * @ctxt: an XML parser context
2753: *
2754: * parse the name of a PI
1.22 daniel 2755: *
2756: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2757: *
2758: * Returns the PITarget name or NULL
1.22 daniel 2759: */
2760:
1.55 daniel 2761: CHAR *
2762: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 2763: CHAR *name;
2764:
2765: name = xmlParseName(ctxt);
2766: if ((name != NULL) && (name[3] == 0) &&
2767: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2768: ((name[1] == 'm') || (name[1] == 'M')) &&
2769: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 2770: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2771: ctxt->sax->error(ctxt->userData, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 2772: return(NULL);
2773: }
2774: return(name);
2775: }
2776:
1.50 daniel 2777: /**
2778: * xmlParsePI:
2779: * @ctxt: an XML parser context
2780: *
2781: * parse an XML Processing Instruction.
1.22 daniel 2782: *
2783: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2784: *
1.69 daniel 2785: * The processing is transfered to SAX once parsed.
1.3 veillard 2786: */
2787:
1.55 daniel 2788: void
2789: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 2790: CHAR *target;
2791:
1.40 daniel 2792: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 2793: /*
2794: * this is a Processing Instruction.
2795: */
1.40 daniel 2796: SKIP(2);
1.91 daniel 2797: SHRINK;
1.3 veillard 2798:
2799: /*
1.22 daniel 2800: * Parse the target name and check for special support like
2801: * namespace.
1.3 veillard 2802: */
1.22 daniel 2803: target = xmlParsePITarget(ctxt);
2804: if (target != NULL) {
1.72 daniel 2805: const CHAR *q = CUR_PTR;
2806:
2807: while (IS_CHAR(CUR) &&
2808: ((CUR != '?') || (NXT(1) != '>')))
2809: NEXT;
2810: if (!IS_CHAR(CUR)) {
2811: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2812: ctxt->sax->error(ctxt->userData,
1.72 daniel 2813: "xmlParsePI: PI %s never end ...\n", target);
2814: ctxt->wellFormed = 0;
1.22 daniel 2815: } else {
1.72 daniel 2816: CHAR *data;
1.44 daniel 2817:
1.72 daniel 2818: data = xmlStrndup(q, CUR_PTR - q);
2819: SKIP(2);
1.44 daniel 2820:
1.72 daniel 2821: /*
2822: * SAX: PI detected.
2823: */
2824: if ((ctxt->sax) &&
2825: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2826: ctxt->sax->processingInstruction(ctxt->userData,
2827: target, data);
1.72 daniel 2828: free(data);
1.22 daniel 2829: }
1.39 daniel 2830: free(target);
1.3 veillard 2831: } else {
1.55 daniel 2832: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2833: ctxt->sax->error(ctxt->userData,
2834: "xmlParsePI : no target name\n");
1.59 daniel 2835: ctxt->wellFormed = 0;
1.22 daniel 2836: }
2837: }
2838: }
2839:
1.50 daniel 2840: /**
2841: * xmlParseNotationDecl:
2842: * @ctxt: an XML parser context
2843: *
2844: * parse a notation declaration
1.22 daniel 2845: *
2846: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2847: *
2848: * Hence there is actually 3 choices:
2849: * 'PUBLIC' S PubidLiteral
2850: * 'PUBLIC' S PubidLiteral S SystemLiteral
2851: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2852: *
1.67 daniel 2853: * See the NOTE on xmlParseExternalID().
1.22 daniel 2854: */
2855:
1.55 daniel 2856: void
2857: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2858: CHAR *name;
1.67 daniel 2859: CHAR *Pubid;
2860: CHAR *Systemid;
1.22 daniel 2861:
1.40 daniel 2862: if ((CUR == '<') && (NXT(1) == '!') &&
2863: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2864: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2865: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2866: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 2867: SHRINK;
1.40 daniel 2868: SKIP(10);
1.67 daniel 2869: if (!IS_BLANK(CUR)) {
2870: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2871: ctxt->sax->error(ctxt->userData, "Space required after '<!NOTATION'\n");
1.67 daniel 2872: ctxt->wellFormed = 0;
2873: return;
2874: }
2875: SKIP_BLANKS;
1.22 daniel 2876:
2877: name = xmlParseName(ctxt);
2878: if (name == NULL) {
1.55 daniel 2879: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2880: ctxt->sax->error(ctxt->userData, "NOTATION: Name expected here\n");
1.67 daniel 2881: ctxt->wellFormed = 0;
2882: return;
2883: }
2884: if (!IS_BLANK(CUR)) {
2885: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2886: ctxt->sax->error(ctxt->userData,
1.67 daniel 2887: "Space required after the NOTATION name'\n");
1.59 daniel 2888: ctxt->wellFormed = 0;
1.22 daniel 2889: return;
2890: }
1.42 daniel 2891: SKIP_BLANKS;
1.67 daniel 2892:
1.22 daniel 2893: /*
1.67 daniel 2894: * Parse the IDs.
1.22 daniel 2895: */
1.67 daniel 2896: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
2897: SKIP_BLANKS;
2898:
2899: if (CUR == '>') {
1.40 daniel 2900: NEXT;
1.72 daniel 2901: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 2902: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2903: } else {
2904: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2905: ctxt->sax->error(ctxt->userData,
1.67 daniel 2906: "'>' required to close NOTATION declaration\n");
2907: ctxt->wellFormed = 0;
2908: }
1.22 daniel 2909: free(name);
1.67 daniel 2910: if (Systemid != NULL) free(Systemid);
2911: if (Pubid != NULL) free(Pubid);
1.22 daniel 2912: }
2913: }
2914:
1.50 daniel 2915: /**
2916: * xmlParseEntityDecl:
2917: * @ctxt: an XML parser context
2918: *
2919: * parse <!ENTITY declarations
1.22 daniel 2920: *
2921: * [70] EntityDecl ::= GEDecl | PEDecl
2922: *
2923: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2924: *
2925: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2926: *
2927: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2928: *
2929: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2930: *
2931: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 2932: *
2933: * [ VC: Notation Declared ]
2934: * TODO The Name must match the declared name of a notation.
1.22 daniel 2935: */
2936:
1.55 daniel 2937: void
2938: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 2939: CHAR *name = NULL;
1.24 daniel 2940: CHAR *value = NULL;
1.39 daniel 2941: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 2942: CHAR *ndata = NULL;
1.39 daniel 2943: int isParameter = 0;
1.78 daniel 2944: CHAR *orig = NULL;
1.22 daniel 2945:
1.94 daniel 2946: GROW;
1.40 daniel 2947: if ((CUR == '<') && (NXT(1) == '!') &&
2948: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2949: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2950: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 2951: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 2952: SHRINK;
1.40 daniel 2953: SKIP(8);
1.59 daniel 2954: if (!IS_BLANK(CUR)) {
2955: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2956: ctxt->sax->error(ctxt->userData, "Space required after '<!ENTITY'\n");
1.59 daniel 2957: ctxt->wellFormed = 0;
2958: }
2959: SKIP_BLANKS;
1.40 daniel 2960:
2961: if (CUR == '%') {
2962: NEXT;
1.59 daniel 2963: if (!IS_BLANK(CUR)) {
2964: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2965: ctxt->sax->error(ctxt->userData, "Space required after '%'\n");
1.59 daniel 2966: ctxt->wellFormed = 0;
2967: }
1.42 daniel 2968: SKIP_BLANKS;
1.39 daniel 2969: isParameter = 1;
1.22 daniel 2970: }
2971:
2972: name = xmlParseName(ctxt);
1.24 daniel 2973: if (name == NULL) {
1.55 daniel 2974: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2975: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 2976: ctxt->wellFormed = 0;
1.24 daniel 2977: return;
2978: }
1.59 daniel 2979: if (!IS_BLANK(CUR)) {
2980: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2981: ctxt->sax->error(ctxt->userData,
1.59 daniel 2982: "Space required after the entity name\n");
2983: ctxt->wellFormed = 0;
2984: }
1.42 daniel 2985: SKIP_BLANKS;
1.24 daniel 2986:
1.22 daniel 2987: /*
1.68 daniel 2988: * handle the various case of definitions...
1.22 daniel 2989: */
1.39 daniel 2990: if (isParameter) {
1.40 daniel 2991: if ((CUR == '"') || (CUR == '\''))
1.78 daniel 2992: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 2993: if (value) {
1.72 daniel 2994: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2995: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2996: XML_INTERNAL_PARAMETER_ENTITY,
2997: NULL, NULL, value);
2998: }
1.24 daniel 2999: else {
1.67 daniel 3000: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 3001: if (URI) {
1.72 daniel 3002: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3003: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3004: XML_EXTERNAL_PARAMETER_ENTITY,
3005: literal, URI, NULL);
3006: }
1.24 daniel 3007: }
3008: } else {
1.40 daniel 3009: if ((CUR == '"') || (CUR == '\'')) {
1.78 daniel 3010: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 3011: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3012: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3013: XML_INTERNAL_GENERAL_ENTITY,
3014: NULL, NULL, value);
3015: } else {
1.67 daniel 3016: URI = xmlParseExternalID(ctxt, &literal, 1);
1.59 daniel 3017: if ((CUR != '>') && (!IS_BLANK(CUR))) {
3018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3019: ctxt->sax->error(ctxt->userData,
1.59 daniel 3020: "Space required before 'NDATA'\n");
3021: ctxt->wellFormed = 0;
3022: }
1.42 daniel 3023: SKIP_BLANKS;
1.40 daniel 3024: if ((CUR == 'N') && (NXT(1) == 'D') &&
3025: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3026: (NXT(4) == 'A')) {
3027: SKIP(5);
1.59 daniel 3028: if (!IS_BLANK(CUR)) {
3029: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3030: ctxt->sax->error(ctxt->userData,
1.59 daniel 3031: "Space required after 'NDATA'\n");
3032: ctxt->wellFormed = 0;
3033: }
1.42 daniel 3034: SKIP_BLANKS;
1.24 daniel 3035: ndata = xmlParseName(ctxt);
1.72 daniel 3036: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3037: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3038: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
3039: literal, URI, ndata);
3040: } else {
1.72 daniel 3041: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3042: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3043: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3044: literal, URI, NULL);
1.24 daniel 3045: }
3046: }
3047: }
1.42 daniel 3048: SKIP_BLANKS;
1.40 daniel 3049: if (CUR != '>') {
1.55 daniel 3050: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3051: ctxt->sax->error(ctxt->userData,
1.31 daniel 3052: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3053: ctxt->wellFormed = 0;
1.24 daniel 3054: } else
1.40 daniel 3055: NEXT;
1.78 daniel 3056: if (orig != NULL) {
3057: /*
1.98 daniel 3058: * Ugly mechanism to save the raw entity value.
1.78 daniel 3059: */
3060: xmlEntityPtr cur = NULL;
3061:
1.98 daniel 3062: if (isParameter) {
3063: if ((ctxt->sax != NULL) &&
3064: (ctxt->sax->getParameterEntity != NULL))
3065: cur = ctxt->sax->getParameterEntity(ctxt, name);
3066: } else {
3067: if ((ctxt->sax != NULL) &&
3068: (ctxt->sax->getEntity != NULL))
3069: cur = ctxt->sax->getEntity(ctxt, name);
3070: }
3071: if (cur != NULL) {
3072: if (cur->orig != NULL)
3073: free(orig);
3074: else
3075: cur->orig = orig;
3076: } else
1.78 daniel 3077: free(orig);
3078: }
1.39 daniel 3079: if (name != NULL) free(name);
3080: if (value != NULL) free(value);
3081: if (URI != NULL) free(URI);
3082: if (literal != NULL) free(literal);
3083: if (ndata != NULL) free(ndata);
1.22 daniel 3084: }
3085: }
3086:
1.50 daniel 3087: /**
1.59 daniel 3088: * xmlParseDefaultDecl:
3089: * @ctxt: an XML parser context
3090: * @value: Receive a possible fixed default value for the attribute
3091: *
3092: * Parse an attribute default declaration
3093: *
3094: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3095: *
1.99 daniel 3096: * [ VC: Required Attribute ]
3097: * TODO if the default declaration is the keyword #REQUIRED, then the
3098: * attribute must be specified for all elements of the type in the
3099: * attribute-list declaration.
3100: *
3101: * [ VC: Attribute Default Legal ]
1.102 daniel 3102: * The declared default value must meet the lexical constraints of
3103: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3104: *
3105: * [ VC: Fixed Attribute Default ]
3106: * TODO if an attribute has a default value declared with the #FIXED
3107: * keyword, instances of that attribute must match the default value.
3108: *
3109: * [ WFC: No < in Attribute Values ]
3110: * handled in xmlParseAttValue()
3111: *
1.59 daniel 3112: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3113: * or XML_ATTRIBUTE_FIXED.
3114: */
3115:
3116: int
3117: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
3118: int val;
3119: CHAR *ret;
3120:
3121: *value = NULL;
3122: if ((CUR == '#') && (NXT(1) == 'R') &&
3123: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3124: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3125: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3126: (NXT(8) == 'D')) {
3127: SKIP(9);
3128: return(XML_ATTRIBUTE_REQUIRED);
3129: }
3130: if ((CUR == '#') && (NXT(1) == 'I') &&
3131: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3132: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3133: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3134: SKIP(8);
3135: return(XML_ATTRIBUTE_IMPLIED);
3136: }
3137: val = XML_ATTRIBUTE_NONE;
3138: if ((CUR == '#') && (NXT(1) == 'F') &&
3139: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3140: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3141: SKIP(6);
3142: val = XML_ATTRIBUTE_FIXED;
3143: if (!IS_BLANK(CUR)) {
3144: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3145: ctxt->sax->error(ctxt->userData, "Space required after '#FIXED'\n");
1.59 daniel 3146: ctxt->wellFormed = 0;
3147: }
3148: SKIP_BLANKS;
3149: }
3150: ret = xmlParseAttValue(ctxt);
1.96 daniel 3151: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3152: if (ret == NULL) {
3153: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3154: ctxt->sax->error(ctxt->userData,
1.59 daniel 3155: "Attribute default value declaration error\n");
3156: ctxt->wellFormed = 0;
3157: } else
3158: *value = ret;
3159: return(val);
3160: }
3161:
3162: /**
1.66 daniel 3163: * xmlParseNotationType:
3164: * @ctxt: an XML parser context
3165: *
3166: * parse an Notation attribute type.
3167: *
1.99 daniel 3168: * Note: the leading 'NOTATION' S part has already being parsed...
3169: *
1.66 daniel 3170: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3171: *
1.99 daniel 3172: * [ VC: Notation Attributes ]
3173: * TODO Values of this type must match one of the notation names included
3174: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3175: *
3176: * Returns: the notation attribute tree built while parsing
3177: */
3178:
3179: xmlEnumerationPtr
3180: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3181: CHAR *name;
3182: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3183:
3184: if (CUR != '(') {
3185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3186: ctxt->sax->error(ctxt->userData, "'(' required to start 'NOTATION'\n");
1.66 daniel 3187: ctxt->wellFormed = 0;
3188: return(NULL);
3189: }
1.91 daniel 3190: SHRINK;
1.66 daniel 3191: do {
3192: NEXT;
3193: SKIP_BLANKS;
3194: name = xmlParseName(ctxt);
3195: if (name == NULL) {
3196: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3197: ctxt->sax->error(ctxt->userData,
1.66 daniel 3198: "Name expected in NOTATION declaration\n");
3199: ctxt->wellFormed = 0;
3200: return(ret);
3201: }
3202: cur = xmlCreateEnumeration(name);
1.67 daniel 3203: free(name);
1.66 daniel 3204: if (cur == NULL) return(ret);
3205: if (last == NULL) ret = last = cur;
3206: else {
3207: last->next = cur;
3208: last = cur;
3209: }
3210: SKIP_BLANKS;
3211: } while (CUR == '|');
3212: if (CUR != ')') {
3213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3214: ctxt->sax->error(ctxt->userData,
1.66 daniel 3215: "')' required to finish NOTATION declaration\n");
3216: ctxt->wellFormed = 0;
3217: return(ret);
3218: }
3219: NEXT;
3220: return(ret);
3221: }
3222:
3223: /**
3224: * xmlParseEnumerationType:
3225: * @ctxt: an XML parser context
3226: *
3227: * parse an Enumeration attribute type.
3228: *
3229: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3230: *
1.99 daniel 3231: * [ VC: Enumeration ]
3232: * TODO Values of this type must match one of the Nmtoken tokens in
3233: * the declaration
3234: *
1.66 daniel 3235: * Returns: the enumeration attribute tree built while parsing
3236: */
3237:
3238: xmlEnumerationPtr
3239: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3240: CHAR *name;
3241: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3242:
3243: if (CUR != '(') {
3244: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3245: ctxt->sax->error(ctxt->userData,
1.66 daniel 3246: "'(' required to start ATTLIST enumeration\n");
3247: ctxt->wellFormed = 0;
3248: return(NULL);
3249: }
1.91 daniel 3250: SHRINK;
1.66 daniel 3251: do {
3252: NEXT;
3253: SKIP_BLANKS;
3254: name = xmlParseNmtoken(ctxt);
3255: if (name == NULL) {
3256: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3257: ctxt->sax->error(ctxt->userData,
1.66 daniel 3258: "NmToken expected in ATTLIST enumeration\n");
3259: ctxt->wellFormed = 0;
3260: return(ret);
3261: }
3262: cur = xmlCreateEnumeration(name);
1.67 daniel 3263: free(name);
1.66 daniel 3264: if (cur == NULL) return(ret);
3265: if (last == NULL) ret = last = cur;
3266: else {
3267: last->next = cur;
3268: last = cur;
3269: }
3270: SKIP_BLANKS;
3271: } while (CUR == '|');
3272: if (CUR != ')') {
3273: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3274: ctxt->sax->error(ctxt->userData,
1.66 daniel 3275: "')' required to finish ATTLIST enumeration\n");
3276: ctxt->wellFormed = 0;
3277: return(ret);
3278: }
3279: NEXT;
3280: return(ret);
3281: }
3282:
3283: /**
1.50 daniel 3284: * xmlParseEnumeratedType:
3285: * @ctxt: an XML parser context
1.66 daniel 3286: * @tree: the enumeration tree built while parsing
1.50 daniel 3287: *
1.66 daniel 3288: * parse an Enumerated attribute type.
1.22 daniel 3289: *
3290: * [57] EnumeratedType ::= NotationType | Enumeration
3291: *
3292: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3293: *
1.50 daniel 3294: *
1.66 daniel 3295: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3296: */
3297:
1.66 daniel 3298: int
3299: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3300: if ((CUR == 'N') && (NXT(1) == 'O') &&
3301: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3302: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3303: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3304: SKIP(8);
3305: if (!IS_BLANK(CUR)) {
3306: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3307: ctxt->sax->error(ctxt->userData, "Space required after 'NOTATION'\n");
1.66 daniel 3308: ctxt->wellFormed = 0;
3309: return(0);
3310: }
3311: SKIP_BLANKS;
3312: *tree = xmlParseNotationType(ctxt);
3313: if (*tree == NULL) return(0);
3314: return(XML_ATTRIBUTE_NOTATION);
3315: }
3316: *tree = xmlParseEnumerationType(ctxt);
3317: if (*tree == NULL) return(0);
3318: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3319: }
3320:
1.50 daniel 3321: /**
3322: * xmlParseAttributeType:
3323: * @ctxt: an XML parser context
1.66 daniel 3324: * @tree: the enumeration tree built while parsing
1.50 daniel 3325: *
1.59 daniel 3326: * parse the Attribute list def for an element
1.22 daniel 3327: *
3328: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3329: *
3330: * [55] StringType ::= 'CDATA'
3331: *
3332: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3333: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3334: *
1.102 daniel 3335: * Validity constraints for attribute values syntax are checked in
3336: * xmlValidateAttributeValue()
3337: *
1.99 daniel 3338: * [ VC: ID ]
1.102 daniel 3339: * Values of type ID must match the Name production. TODO A name must not
1.99 daniel 3340: * appear more than once in an XML document as a value of this type;
3341: * i.e., ID values must uniquely identify the elements which bear them.
3342: *
3343: * [ VC: One ID per Element Type ]
3344: * TODO No element type may have more than one ID attribute specified.
3345: *
3346: * [ VC: ID Attribute Default ]
3347: * TODO An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3348: *
3349: * [ VC: IDREF ]
1.102 daniel 3350: * Values of type IDREF must match the Name production, and values
3351: * of type IDREFS must match Names; TODO each Name must match the value of
1.99 daniel 3352: * an ID attribute on some element in the XML document; i.e. IDREF
3353: * values must match the value of some ID attribute.
3354: *
3355: * [ VC: Entity Name ]
1.102 daniel 3356: * Values of type ENTITY must match the Name production, values
3357: * of type ENTITIES must match Names; TODO each Name must match the name of
1.99 daniel 3358: * an unparsed entity declared in the DTD.
3359: *
3360: * [ VC: Name Token ]
1.102 daniel 3361: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3362: * of type NMTOKENS must match Nmtokens.
3363: *
1.69 daniel 3364: * Returns the attribute type
1.22 daniel 3365: */
1.59 daniel 3366: int
1.66 daniel 3367: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3368: SHRINK;
1.40 daniel 3369: if ((CUR == 'C') && (NXT(1) == 'D') &&
3370: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3371: (NXT(4) == 'A')) {
3372: SKIP(5);
1.66 daniel 3373: return(XML_ATTRIBUTE_CDATA);
1.40 daniel 3374: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3375: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3376: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3377: SKIP(6);
3378: return(XML_ATTRIBUTE_IDREFS);
3379: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3380: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3381: (NXT(4) == 'F')) {
3382: SKIP(5);
1.59 daniel 3383: return(XML_ATTRIBUTE_IDREF);
1.66 daniel 3384: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
3385: SKIP(2);
3386: return(XML_ATTRIBUTE_ID);
1.40 daniel 3387: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3388: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3389: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3390: SKIP(6);
1.59 daniel 3391: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 3392: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3393: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3394: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3395: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3396: SKIP(8);
1.59 daniel 3397: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 3398: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3399: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3400: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3401: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3402: SKIP(8);
3403: return(XML_ATTRIBUTE_NMTOKENS);
3404: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3405: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3406: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3407: (NXT(6) == 'N')) {
3408: SKIP(7);
1.59 daniel 3409: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3410: }
1.66 daniel 3411: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3412: }
3413:
1.50 daniel 3414: /**
3415: * xmlParseAttributeListDecl:
3416: * @ctxt: an XML parser context
3417: *
3418: * : parse the Attribute list def for an element
1.22 daniel 3419: *
3420: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3421: *
3422: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3423: *
1.22 daniel 3424: */
1.55 daniel 3425: void
3426: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 daniel 3427: CHAR *elemName;
3428: CHAR *attrName;
1.103 daniel 3429: xmlEnumerationPtr tree;
1.22 daniel 3430:
1.40 daniel 3431: if ((CUR == '<') && (NXT(1) == '!') &&
3432: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3433: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3434: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3435: (NXT(8) == 'T')) {
1.40 daniel 3436: SKIP(9);
1.59 daniel 3437: if (!IS_BLANK(CUR)) {
3438: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3439: ctxt->sax->error(ctxt->userData, "Space required after '<!ATTLIST'\n");
1.59 daniel 3440: ctxt->wellFormed = 0;
3441: }
1.42 daniel 3442: SKIP_BLANKS;
1.59 daniel 3443: elemName = xmlParseName(ctxt);
3444: if (elemName == NULL) {
1.55 daniel 3445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3446: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Element\n");
1.59 daniel 3447: ctxt->wellFormed = 0;
1.22 daniel 3448: return;
3449: }
1.42 daniel 3450: SKIP_BLANKS;
1.40 daniel 3451: while (CUR != '>') {
3452: const CHAR *check = CUR_PTR;
1.59 daniel 3453: int type;
3454: int def;
3455: CHAR *defaultValue = NULL;
3456:
1.103 daniel 3457: tree = NULL;
1.59 daniel 3458: attrName = xmlParseName(ctxt);
3459: if (attrName == NULL) {
3460: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3461: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Attribute\n");
1.59 daniel 3462: ctxt->wellFormed = 0;
3463: break;
3464: }
1.97 daniel 3465: GROW;
1.59 daniel 3466: if (!IS_BLANK(CUR)) {
3467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3468: ctxt->sax->error(ctxt->userData,
1.59 daniel 3469: "Space required after the attribute name\n");
3470: ctxt->wellFormed = 0;
3471: break;
3472: }
3473: SKIP_BLANKS;
3474:
1.66 daniel 3475: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 3476: if (type <= 0) break;
1.22 daniel 3477:
1.97 daniel 3478: GROW;
1.59 daniel 3479: if (!IS_BLANK(CUR)) {
3480: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3481: ctxt->sax->error(ctxt->userData,
1.59 daniel 3482: "Space required after the attribute type\n");
3483: ctxt->wellFormed = 0;
3484: break;
3485: }
1.42 daniel 3486: SKIP_BLANKS;
1.59 daniel 3487:
3488: def = xmlParseDefaultDecl(ctxt, &defaultValue);
3489: if (def <= 0) break;
3490:
1.97 daniel 3491: GROW;
1.59 daniel 3492: if (CUR != '>') {
3493: if (!IS_BLANK(CUR)) {
3494: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3495: ctxt->sax->error(ctxt->userData,
1.59 daniel 3496: "Space required after the attribute default value\n");
3497: ctxt->wellFormed = 0;
3498: break;
3499: }
3500: SKIP_BLANKS;
3501: }
1.40 daniel 3502: if (check == CUR_PTR) {
1.55 daniel 3503: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3504: ctxt->sax->error(ctxt->userData,
1.59 daniel 3505: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 3506: break;
3507: }
1.72 daniel 3508: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3509: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3510: type, def, defaultValue, tree);
1.59 daniel 3511: if (attrName != NULL)
3512: free(attrName);
3513: if (defaultValue != NULL)
3514: free(defaultValue);
1.97 daniel 3515: GROW;
1.22 daniel 3516: }
1.40 daniel 3517: if (CUR == '>')
3518: NEXT;
1.22 daniel 3519:
1.59 daniel 3520: free(elemName);
1.22 daniel 3521: }
3522: }
3523:
1.50 daniel 3524: /**
1.61 daniel 3525: * xmlParseElementMixedContentDecl:
3526: * @ctxt: an XML parser context
3527: *
3528: * parse the declaration for a Mixed Element content
3529: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3530: *
3531: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3532: * '(' S? '#PCDATA' S? ')'
3533: *
1.99 daniel 3534: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3535: *
3536: * [ VC: No Duplicate Types ]
3537: * TODO The same name must not appear more than once in a single
3538: * mixed-content declaration.
3539: *
1.61 daniel 3540: * returns: the list of the xmlElementContentPtr describing the element choices
3541: */
3542: xmlElementContentPtr
1.62 daniel 3543: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3544: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.61 daniel 3545: CHAR *elem = NULL;
3546:
1.97 daniel 3547: GROW;
1.61 daniel 3548: if ((CUR == '#') && (NXT(1) == 'P') &&
3549: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3550: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3551: (NXT(6) == 'A')) {
3552: SKIP(7);
3553: SKIP_BLANKS;
1.91 daniel 3554: SHRINK;
1.63 daniel 3555: if (CUR == ')') {
3556: NEXT;
3557: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3558: return(ret);
3559: }
1.61 daniel 3560: if ((CUR == '(') || (CUR == '|')) {
3561: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3562: if (ret == NULL) return(NULL);
1.99 daniel 3563: }
1.61 daniel 3564: while (CUR == '|') {
1.64 daniel 3565: NEXT;
1.61 daniel 3566: if (elem == NULL) {
3567: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3568: if (ret == NULL) return(NULL);
3569: ret->c1 = cur;
1.64 daniel 3570: cur = ret;
1.61 daniel 3571: } else {
1.64 daniel 3572: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3573: if (n == NULL) return(NULL);
3574: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3575: cur->c2 = n;
3576: cur = n;
1.66 daniel 3577: free(elem);
1.61 daniel 3578: }
3579: SKIP_BLANKS;
3580: elem = xmlParseName(ctxt);
3581: if (elem == NULL) {
3582: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3583: ctxt->sax->error(ctxt->userData,
1.61 daniel 3584: "xmlParseElementMixedContentDecl : Name expected\n");
3585: ctxt->wellFormed = 0;
3586: xmlFreeElementContent(cur);
3587: return(NULL);
3588: }
3589: SKIP_BLANKS;
1.97 daniel 3590: GROW;
1.61 daniel 3591: }
1.63 daniel 3592: if ((CUR == ')') && (NXT(1) == '*')) {
1.66 daniel 3593: if (elem != NULL) {
1.61 daniel 3594: cur->c2 = xmlNewElementContent(elem,
3595: XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 3596: free(elem);
3597: }
1.65 daniel 3598: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 3599: SKIP(2);
1.61 daniel 3600: } else {
1.66 daniel 3601: if (elem != NULL) free(elem);
1.61 daniel 3602: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3603: ctxt->sax->error(ctxt->userData,
1.63 daniel 3604: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3605: ctxt->wellFormed = 0;
3606: xmlFreeElementContent(ret);
3607: return(NULL);
3608: }
3609:
3610: } else {
3611: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3612: ctxt->sax->error(ctxt->userData,
1.61 daniel 3613: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3614: ctxt->wellFormed = 0;
3615: }
3616: return(ret);
3617: }
3618:
3619: /**
3620: * xmlParseElementChildrenContentDecl:
1.50 daniel 3621: * @ctxt: an XML parser context
3622: *
1.61 daniel 3623: * parse the declaration for a Mixed Element content
3624: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3625: *
1.61 daniel 3626: *
1.22 daniel 3627: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3628: *
3629: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3630: *
3631: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3632: *
3633: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3634: *
1.99 daniel 3635: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3636: * TODO Parameter-entity replacement text must be properly nested
3637: * with parenthetized groups. That is to say, if either of the
3638: * opening or closing parentheses in a choice, seq, or Mixed
3639: * construct is contained in the replacement text for a parameter
3640: * entity, both must be contained in the same replacement text. For
3641: * interoperability, if a parameter-entity reference appears in a
3642: * choice, seq, or Mixed construct, its replacement text should not
3643: * be empty, and neither the first nor last non-blank character of
3644: * the replacement text should be a connector (| or ,).
3645: *
1.62 daniel 3646: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3647: * hierarchy.
3648: */
3649: xmlElementContentPtr
1.62 daniel 3650: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3651: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.62 daniel 3652: CHAR *elem;
3653: CHAR type = 0;
3654:
3655: SKIP_BLANKS;
1.94 daniel 3656: GROW;
1.62 daniel 3657: if (CUR == '(') {
1.63 daniel 3658: /* Recurse on first child */
1.62 daniel 3659: NEXT;
3660: SKIP_BLANKS;
3661: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3662: SKIP_BLANKS;
1.101 daniel 3663: GROW;
1.62 daniel 3664: } else {
3665: elem = xmlParseName(ctxt);
3666: if (elem == NULL) {
3667: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3668: ctxt->sax->error(ctxt->userData,
1.62 daniel 3669: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3670: ctxt->wellFormed = 0;
3671: return(NULL);
3672: }
3673: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3674: GROW;
1.62 daniel 3675: if (CUR == '?') {
1.104 ! daniel 3676: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3677: NEXT;
3678: } else if (CUR == '*') {
1.104 ! daniel 3679: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3680: NEXT;
3681: } else if (CUR == '+') {
1.104 ! daniel 3682: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3683: NEXT;
3684: } else {
1.104 ! daniel 3685: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3686: }
1.66 daniel 3687: free(elem);
1.101 daniel 3688: GROW;
1.62 daniel 3689: }
3690: SKIP_BLANKS;
1.91 daniel 3691: SHRINK;
1.62 daniel 3692: while (CUR != ')') {
1.63 daniel 3693: /*
3694: * Each loop we parse one separator and one element.
3695: */
1.62 daniel 3696: if (CUR == ',') {
3697: if (type == 0) type = CUR;
3698:
3699: /*
3700: * Detect "Name | Name , Name" error
3701: */
3702: else if (type != CUR) {
3703: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3704: ctxt->sax->error(ctxt->userData,
1.62 daniel 3705: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3706: type);
3707: ctxt->wellFormed = 0;
3708: xmlFreeElementContent(ret);
3709: return(NULL);
3710: }
1.64 daniel 3711: NEXT;
1.62 daniel 3712:
1.63 daniel 3713: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3714: if (op == NULL) {
3715: xmlFreeElementContent(ret);
3716: return(NULL);
3717: }
3718: if (last == NULL) {
3719: op->c1 = ret;
1.65 daniel 3720: ret = cur = op;
1.63 daniel 3721: } else {
3722: cur->c2 = op;
3723: op->c1 = last;
3724: cur =op;
1.65 daniel 3725: last = NULL;
1.63 daniel 3726: }
1.62 daniel 3727: } else if (CUR == '|') {
3728: if (type == 0) type = CUR;
3729:
3730: /*
1.63 daniel 3731: * Detect "Name , Name | Name" error
1.62 daniel 3732: */
3733: else if (type != CUR) {
3734: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3735: ctxt->sax->error(ctxt->userData,
1.62 daniel 3736: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3737: type);
3738: ctxt->wellFormed = 0;
3739: xmlFreeElementContent(ret);
3740: return(NULL);
3741: }
1.64 daniel 3742: NEXT;
1.62 daniel 3743:
1.63 daniel 3744: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3745: if (op == NULL) {
3746: xmlFreeElementContent(ret);
3747: return(NULL);
3748: }
3749: if (last == NULL) {
3750: op->c1 = ret;
1.65 daniel 3751: ret = cur = op;
1.63 daniel 3752: } else {
3753: cur->c2 = op;
3754: op->c1 = last;
3755: cur =op;
1.65 daniel 3756: last = NULL;
1.63 daniel 3757: }
1.62 daniel 3758: } else {
3759: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3760: ctxt->sax->error(ctxt->userData,
1.62 daniel 3761: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3762: ctxt->wellFormed = 0;
3763: xmlFreeElementContent(ret);
3764: return(NULL);
3765: }
1.101 daniel 3766: GROW;
1.62 daniel 3767: SKIP_BLANKS;
1.101 daniel 3768: GROW;
1.62 daniel 3769: if (CUR == '(') {
1.63 daniel 3770: /* Recurse on second child */
1.62 daniel 3771: NEXT;
3772: SKIP_BLANKS;
1.65 daniel 3773: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 3774: SKIP_BLANKS;
3775: } else {
3776: elem = xmlParseName(ctxt);
3777: if (elem == NULL) {
3778: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3779: ctxt->sax->error(ctxt->userData,
1.62 daniel 3780: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3781: ctxt->wellFormed = 0;
3782: return(NULL);
3783: }
1.65 daniel 3784: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 3785: free(elem);
1.62 daniel 3786: }
1.63 daniel 3787: if (CUR == '?') {
1.104 ! daniel 3788: last->ocur = XML_ELEMENT_CONTENT_OPT;
1.63 daniel 3789: NEXT;
3790: } else if (CUR == '*') {
1.104 ! daniel 3791: last->ocur = XML_ELEMENT_CONTENT_MULT;
1.63 daniel 3792: NEXT;
3793: } else if (CUR == '+') {
1.104 ! daniel 3794: last->ocur = XML_ELEMENT_CONTENT_PLUS;
1.63 daniel 3795: NEXT;
3796: } else {
1.104 ! daniel 3797: last->ocur = XML_ELEMENT_CONTENT_ONCE;
1.63 daniel 3798: }
3799: SKIP_BLANKS;
1.97 daniel 3800: GROW;
1.64 daniel 3801: }
1.65 daniel 3802: if ((cur != NULL) && (last != NULL)) {
3803: cur->c2 = last;
1.62 daniel 3804: }
3805: NEXT;
3806: if (CUR == '?') {
3807: ret->ocur = XML_ELEMENT_CONTENT_OPT;
3808: NEXT;
3809: } else if (CUR == '*') {
3810: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3811: NEXT;
3812: } else if (CUR == '+') {
3813: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3814: NEXT;
3815: } else {
3816: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
3817: }
3818: return(ret);
1.61 daniel 3819: }
3820:
3821: /**
3822: * xmlParseElementContentDecl:
3823: * @ctxt: an XML parser context
3824: * @name: the name of the element being defined.
3825: * @result: the Element Content pointer will be stored here if any
1.22 daniel 3826: *
1.61 daniel 3827: * parse the declaration for an Element content either Mixed or Children,
3828: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
3829: *
3830: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 3831: *
1.61 daniel 3832: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 3833: */
3834:
1.61 daniel 3835: int
3836: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
3837: xmlElementContentPtr *result) {
3838:
3839: xmlElementContentPtr tree = NULL;
3840: int res;
3841:
3842: *result = NULL;
3843:
3844: if (CUR != '(') {
3845: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3846: ctxt->sax->error(ctxt->userData,
1.61 daniel 3847: "xmlParseElementContentDecl : '(' expected\n");
3848: ctxt->wellFormed = 0;
3849: return(-1);
3850: }
3851: NEXT;
1.97 daniel 3852: GROW;
1.61 daniel 3853: SKIP_BLANKS;
3854: if ((CUR == '#') && (NXT(1) == 'P') &&
3855: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3856: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3857: (NXT(6) == 'A')) {
1.62 daniel 3858: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 3859: res = XML_ELEMENT_TYPE_MIXED;
3860: } else {
1.62 daniel 3861: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 3862: res = XML_ELEMENT_TYPE_ELEMENT;
3863: }
3864: SKIP_BLANKS;
1.63 daniel 3865: /****************************
1.61 daniel 3866: if (CUR != ')') {
3867: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3868: ctxt->sax->error(ctxt->userData,
1.61 daniel 3869: "xmlParseElementContentDecl : ')' expected\n");
3870: ctxt->wellFormed = 0;
3871: return(-1);
3872: }
1.63 daniel 3873: ****************************/
3874: *result = tree;
1.61 daniel 3875: return(res);
1.22 daniel 3876: }
3877:
1.50 daniel 3878: /**
3879: * xmlParseElementDecl:
3880: * @ctxt: an XML parser context
3881: *
3882: * parse an Element declaration.
1.22 daniel 3883: *
3884: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
3885: *
1.99 daniel 3886: * [ VC: Unique Element Type Declaration ]
3887: * TODO No element type may be declared more than once
1.69 daniel 3888: *
3889: * Returns the type of the element, or -1 in case of error
1.22 daniel 3890: */
1.59 daniel 3891: int
1.55 daniel 3892: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 3893: CHAR *name;
1.59 daniel 3894: int ret = -1;
1.61 daniel 3895: xmlElementContentPtr content = NULL;
1.22 daniel 3896:
1.97 daniel 3897: GROW;
1.40 daniel 3898: if ((CUR == '<') && (NXT(1) == '!') &&
3899: (NXT(2) == 'E') && (NXT(3) == 'L') &&
3900: (NXT(4) == 'E') && (NXT(5) == 'M') &&
3901: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 3902: (NXT(8) == 'T')) {
1.40 daniel 3903: SKIP(9);
1.59 daniel 3904: if (!IS_BLANK(CUR)) {
3905: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3906: ctxt->sax->error(ctxt->userData,
1.59 daniel 3907: "Space required after 'ELEMENT'\n");
3908: ctxt->wellFormed = 0;
3909: }
1.42 daniel 3910: SKIP_BLANKS;
1.22 daniel 3911: name = xmlParseName(ctxt);
3912: if (name == NULL) {
1.55 daniel 3913: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3914: ctxt->sax->error(ctxt->userData,
1.59 daniel 3915: "xmlParseElementDecl: no name for Element\n");
3916: ctxt->wellFormed = 0;
3917: return(-1);
3918: }
3919: if (!IS_BLANK(CUR)) {
3920: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3921: ctxt->sax->error(ctxt->userData,
1.59 daniel 3922: "Space required after the element name\n");
3923: ctxt->wellFormed = 0;
1.22 daniel 3924: }
1.42 daniel 3925: SKIP_BLANKS;
1.40 daniel 3926: if ((CUR == 'E') && (NXT(1) == 'M') &&
3927: (NXT(2) == 'P') && (NXT(3) == 'T') &&
3928: (NXT(4) == 'Y')) {
3929: SKIP(5);
1.22 daniel 3930: /*
3931: * Element must always be empty.
3932: */
1.59 daniel 3933: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 3934: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
3935: (NXT(2) == 'Y')) {
3936: SKIP(3);
1.22 daniel 3937: /*
3938: * Element is a generic container.
3939: */
1.59 daniel 3940: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 3941: } else if (CUR == '(') {
3942: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 3943: } else {
1.98 daniel 3944: /*
3945: * [ WFC: PEs in Internal Subset ] error handling.
3946: */
3947: if ((CUR == '%') && (ctxt->external == 0) &&
3948: (ctxt->inputNr == 1)) {
3949: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3950: ctxt->sax->error(ctxt->userData,
3951: "PEReference: forbidden within markup decl in internal subset\n");
3952: } else {
3953: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3954: ctxt->sax->error(ctxt->userData,
3955: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
3956: }
1.61 daniel 3957: ctxt->wellFormed = 0;
3958: if (name != NULL) free(name);
3959: return(-1);
1.22 daniel 3960: }
1.42 daniel 3961: SKIP_BLANKS;
1.40 daniel 3962: if (CUR != '>') {
1.55 daniel 3963: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3964: ctxt->sax->error(ctxt->userData,
1.31 daniel 3965: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 3966: ctxt->wellFormed = 0;
1.61 daniel 3967: } else {
1.40 daniel 3968: NEXT;
1.72 daniel 3969: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 3970: ctxt->sax->elementDecl(ctxt->userData, name, ret,
3971: content);
1.61 daniel 3972: }
1.84 daniel 3973: if (content != NULL) {
3974: xmlFreeElementContent(content);
3975: }
1.61 daniel 3976: if (name != NULL) {
3977: free(name);
3978: }
1.22 daniel 3979: }
1.59 daniel 3980: return(ret);
1.22 daniel 3981: }
3982:
1.50 daniel 3983: /**
3984: * xmlParseMarkupDecl:
3985: * @ctxt: an XML parser context
3986: *
3987: * parse Markup declarations
1.22 daniel 3988: *
3989: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
3990: * NotationDecl | PI | Comment
3991: *
1.98 daniel 3992: * [ VC: Proper Declaration/PE Nesting ]
3993: * TODO Parameter-entity replacement text must be properly nested with
3994: * markup declarations. That is to say, if either the first character
3995: * or the last character of a markup declaration (markupdecl above) is
3996: * contained in the replacement text for a parameter-entity reference,
3997: * both must be contained in the same replacement text.
3998: *
3999: * [ WFC: PEs in Internal Subset ]
4000: * In the internal DTD subset, parameter-entity references can occur
4001: * only where markup declarations can occur, not within markup declarations.
4002: * (This does not apply to references that occur in external parameter
4003: * entities or to the external subset.)
1.22 daniel 4004: */
1.55 daniel 4005: void
4006: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4007: GROW;
1.22 daniel 4008: xmlParseElementDecl(ctxt);
4009: xmlParseAttributeListDecl(ctxt);
4010: xmlParseEntityDecl(ctxt);
4011: xmlParseNotationDecl(ctxt);
4012: xmlParsePI(ctxt);
1.31 daniel 4013: xmlParseComment(ctxt, 0);
1.98 daniel 4014: /*
4015: * This is only for internal subset. On external entities,
4016: * the replacement is done before parsing stage
4017: */
4018: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4019: xmlParsePEReference(ctxt);
1.97 daniel 4020: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4021: }
4022:
1.50 daniel 4023: /**
1.76 daniel 4024: * xmlParseTextDecl:
4025: * @ctxt: an XML parser context
4026: *
4027: * parse an XML declaration header for external entities
4028: *
4029: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4030: *
4031: * Returns the only valuable info for an external parsed entity, the encoding
4032: */
4033:
4034: CHAR *
4035: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4036: CHAR *version;
4037: CHAR *encoding = NULL;
4038:
4039: /*
4040: * We know that '<?xml' is here.
4041: */
4042: SKIP(5);
4043:
4044: if (!IS_BLANK(CUR)) {
4045: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4046: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
4047: ctxt->wellFormed = 0;
4048: }
4049: SKIP_BLANKS;
4050:
4051: /*
4052: * We may have the VersionInfo here.
4053: */
4054: version = xmlParseVersionInfo(ctxt);
1.99 daniel 4055:
1.76 daniel 4056: /* TODO: we should actually inherit from the referencing doc if absent
4057: if (version == NULL)
4058: version = xmlCharStrdup(XML_DEFAULT_VERSION);
4059: ctxt->version = xmlStrdup(version);
4060: */
1.99 daniel 4061:
1.76 daniel 4062: if (version != NULL)
4063: free(version);
4064:
4065: /*
4066: * We must have the encoding declaration
4067: */
4068: if (!IS_BLANK(CUR)) {
4069: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4070: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
4071: ctxt->wellFormed = 0;
4072: }
4073: encoding = xmlParseEncodingDecl(ctxt);
4074:
4075: SKIP_BLANKS;
4076: if ((CUR == '?') && (NXT(1) == '>')) {
4077: SKIP(2);
4078: } else if (CUR == '>') {
4079: /* Deprecated old WD ... */
4080: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4081: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
4082: ctxt->wellFormed = 0;
4083: NEXT;
4084: } else {
4085: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4086: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
4087: ctxt->wellFormed = 0;
4088: MOVETO_ENDTAG(CUR_PTR);
4089: NEXT;
4090: }
4091: return(encoding);
4092: }
4093:
4094: /*
4095: * xmlParseConditionalSections
4096: * @ctxt: an XML parser context
4097: *
4098: * TODO : Conditionnal section are not yet supported !
4099: *
4100: * [61] conditionalSect ::= includeSect | ignoreSect
4101: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4102: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4103: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4104: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4105: */
4106:
4107: void
4108: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4109: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4110: ctxt->sax->warning(ctxt->userData,
4111: "XML conditional section not supported\n");
4112: /*
4113: * Skip up to the end of the conditionnal section.
4114: */
4115: while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
4116: NEXT;
4117: if (CUR == 0) {
4118: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4119: ctxt->sax->error(ctxt->userData,
4120: "XML conditional section not closed\n");
4121: ctxt->wellFormed = 0;
4122: }
4123: }
4124:
4125: /**
4126: * xmlParseExternalSubset
4127: * @ctxt: an XML parser context
4128: *
4129: * parse Markup declarations from an external subset
4130: *
4131: * [30] extSubset ::= textDecl? extSubsetDecl
4132: *
4133: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4134: *
4135: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
4136: */
4137: void
1.79 daniel 4138: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
4139: const CHAR *SystemID) {
1.76 daniel 4140: if ((CUR == '<') && (NXT(1) == '?') &&
4141: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4142: (NXT(4) == 'l')) {
4143: xmlParseTextDecl(ctxt);
4144: }
1.79 daniel 4145: if (ctxt->myDoc == NULL) {
4146: ctxt->myDoc = xmlNewDoc("1.0");
4147: }
4148: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4149: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4150:
1.96 daniel 4151: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4152: ctxt->external = 1;
1.76 daniel 4153: while (((CUR == '<') && (NXT(1) == '?')) ||
4154: ((CUR == '<') && (NXT(1) == '!')) ||
4155: IS_BLANK(CUR)) {
4156: if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4157: xmlParseConditionalSections(ctxt);
4158: } else if (IS_BLANK(CUR)) {
4159: NEXT;
4160: } else if (CUR == '%') {
4161: xmlParsePEReference(ctxt);
4162: } else
4163: xmlParseMarkupDecl(ctxt);
1.77 daniel 4164:
4165: /*
4166: * Pop-up of finished entities.
4167: */
4168: while ((CUR == 0) && (ctxt->inputNr > 1))
4169: xmlPopInput(ctxt);
4170:
1.76 daniel 4171: }
4172:
4173: if (CUR != 0) {
4174: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4175: ctxt->sax->error(ctxt->userData,
4176: "Extra content at the end of the document\n");
4177: ctxt->wellFormed = 0;
4178: }
4179:
4180: }
4181:
4182: /**
1.77 daniel 4183: * xmlParseReference:
4184: * @ctxt: an XML parser context
4185: *
4186: * parse and handle entity references in content, depending on the SAX
4187: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4188: * CharRef, a predefined entity, if there is no reference() callback.
4189: * or if the parser was asked to switch to that mode.
1.77 daniel 4190: *
4191: * [67] Reference ::= EntityRef | CharRef
4192: */
4193: void
4194: xmlParseReference(xmlParserCtxtPtr ctxt) {
4195: xmlEntityPtr ent;
4196: CHAR *val;
4197: if (CUR != '&') return;
4198:
4199: if (NXT(1) == '#') {
4200: CHAR out[2];
4201: int val = xmlParseCharRef(ctxt);
4202: /* TODO: invalid for UTF-8 variable encoding !!! */
4203: out[0] = val;
4204: out[1] = 0;
4205: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4206: ctxt->sax->characters(ctxt->userData, out, 1);
4207: } else {
4208: ent = xmlParseEntityRef(ctxt);
4209: if (ent == NULL) return;
4210: if ((ent->name != NULL) &&
4211: (ent->type != XML_INTERNAL_PREDEFINED_ENTITY) &&
1.79 daniel 4212: (ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4213: (ctxt->replaceEntities == 0)) {
4214:
1.77 daniel 4215: /*
4216: * Create a node.
4217: */
4218: ctxt->sax->reference(ctxt->userData, ent->name);
4219: return;
4220: }
4221: val = ent->content;
4222: if (val == NULL) return;
4223: /*
4224: * inline the entity.
4225: */
4226: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4227: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
4228: }
1.24 daniel 4229: }
4230:
1.50 daniel 4231: /**
4232: * xmlParseEntityRef:
4233: * @ctxt: an XML parser context
4234: *
4235: * parse ENTITY references declarations
1.24 daniel 4236: *
4237: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4238: *
1.98 daniel 4239: * [ WFC: Entity Declared ]
4240: * In a document without any DTD, a document with only an internal DTD
4241: * subset which contains no parameter entity references, or a document
4242: * with "standalone='yes'", the Name given in the entity reference
4243: * must match that in an entity declaration, except that well-formed
4244: * documents need not declare any of the following entities: amp, lt,
4245: * gt, apos, quot. The declaration of a parameter entity must precede
4246: * any reference to it. Similarly, the declaration of a general entity
4247: * must precede any reference to it which appears in a default value in an
4248: * attribute-list declaration. Note that if entities are declared in the
4249: * external subset or in external parameter entities, a non-validating
4250: * processor is not obligated to read and process their declarations;
4251: * for such documents, the rule that an entity must be declared is a
4252: * well-formedness constraint only if standalone='yes'.
4253: *
4254: * [ WFC: Parsed Entity ]
4255: * An entity reference must not contain the name of an unparsed entity
4256: *
1.77 daniel 4257: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4258: */
1.77 daniel 4259: xmlEntityPtr
1.55 daniel 4260: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.24 daniel 4261: CHAR *name;
1.72 daniel 4262: xmlEntityPtr ent = NULL;
1.24 daniel 4263:
1.91 daniel 4264: GROW;
1.40 daniel 4265: if (CUR == '&') {
4266: NEXT;
1.24 daniel 4267: name = xmlParseName(ctxt);
4268: if (name == NULL) {
1.55 daniel 4269: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4270: ctxt->sax->error(ctxt->userData,
4271: "xmlParseEntityRef: no name\n");
1.59 daniel 4272: ctxt->wellFormed = 0;
1.24 daniel 4273: } else {
1.40 daniel 4274: if (CUR == ';') {
4275: NEXT;
1.24 daniel 4276: /*
1.77 daniel 4277: * Ask first SAX for entity resolution, otherwise try the
4278: * predefined set.
4279: */
4280: if (ctxt->sax != NULL) {
4281: if (ctxt->sax->getEntity != NULL)
4282: ent = ctxt->sax->getEntity(ctxt->userData, name);
4283: if (ent == NULL)
4284: ent = xmlGetPredefinedEntity(name);
4285: }
4286: /*
1.98 daniel 4287: * [ WFC: Entity Declared ]
4288: * In a document without any DTD, a document with only an
4289: * internal DTD subset which contains no parameter entity
4290: * references, or a document with "standalone='yes'", the
4291: * Name given in the entity reference must match that in an
4292: * entity declaration, except that well-formed documents
4293: * need not declare any of the following entities: amp, lt,
4294: * gt, apos, quot.
4295: * The declaration of a parameter entity must precede any
4296: * reference to it.
4297: * Similarly, the declaration of a general entity must
4298: * precede any reference to it which appears in a default
4299: * value in an attribute-list declaration. Note that if
4300: * entities are declared in the external subset or in
4301: * external parameter entities, a non-validating processor
4302: * is not obligated to read and process their declarations;
4303: * for such documents, the rule that an entity must be
4304: * declared is a well-formedness constraint only if
4305: * standalone='yes'.
1.59 daniel 4306: */
1.77 daniel 4307: if (ent == NULL) {
1.98 daniel 4308: if ((ctxt->standalone == 1) ||
4309: ((ctxt->hasExternalSubset == 0) &&
4310: (ctxt->hasPErefs == 0))) {
4311: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 4312: ctxt->sax->error(ctxt->userData,
4313: "Entity '%s' not defined\n", name);
4314: ctxt->wellFormed = 0;
4315: } else {
1.98 daniel 4316: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4317: ctxt->sax->warning(ctxt->userData,
4318: "Entity '%s' not defined\n", name);
1.59 daniel 4319: }
1.77 daniel 4320: }
1.59 daniel 4321:
4322: /*
1.98 daniel 4323: * [ WFC: Parsed Entity ]
4324: * An entity reference must not contain the name of an
4325: * unparsed entity
4326: */
4327: else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
4328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4329: ctxt->sax->error(ctxt->userData,
4330: "Entity reference to unparsed entity %s\n", name);
4331: ctxt->wellFormed = 0;
4332: }
4333:
4334: /*
4335: * [ WFC: No External Entity References ]
4336: * Attribute values cannot contain direct or indirect
4337: * entity references to external entities.
4338: */
4339: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4340: (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
4341: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4342: ctxt->sax->error(ctxt->userData,
4343: "Attribute references external entity '%s'\n", name);
4344: ctxt->wellFormed = 0;
4345: }
4346: /*
4347: * [ WFC: No < in Attribute Values ]
4348: * The replacement text of any entity referred to directly or
4349: * indirectly in an attribute value (other than "<") must
4350: * not contain a <.
1.59 daniel 4351: */
1.98 daniel 4352: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4353: (ent != NULL) && (xmlStrcmp(ent->name, "lt")) &&
4354: (ent->content != NULL) &&
4355: (xmlStrchr(ent->content, '<'))) {
4356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4357: ctxt->sax->error(ctxt->userData,
4358: "'<' in entity '%s' is not allowed in attributes values\n", name);
4359: ctxt->wellFormed = 0;
4360: }
4361:
4362: /*
4363: * Internal check, no parameter entities here ...
4364: */
4365: else {
1.59 daniel 4366: switch (ent->type) {
4367: case XML_INTERNAL_PARAMETER_ENTITY:
4368: case XML_EXTERNAL_PARAMETER_ENTITY:
4369: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4370: ctxt->sax->error(ctxt->userData,
1.59 daniel 4371: "Attempt to reference the parameter entity '%s'\n", name);
4372: ctxt->wellFormed = 0;
4373: break;
4374: }
4375: }
4376:
4377: /*
1.98 daniel 4378: * [ WFC: No Recursion ]
4379: * TODO A parsed entity must not contain a recursive
4380: * reference to itself, either directly or indirectly.
1.59 daniel 4381: */
1.77 daniel 4382:
1.24 daniel 4383: } else {
1.55 daniel 4384: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4385: ctxt->sax->error(ctxt->userData,
1.59 daniel 4386: "xmlParseEntityRef: expecting ';'\n");
4387: ctxt->wellFormed = 0;
1.24 daniel 4388: }
1.45 daniel 4389: free(name);
1.24 daniel 4390: }
4391: }
1.77 daniel 4392: return(ent);
1.24 daniel 4393: }
4394:
1.50 daniel 4395: /**
4396: * xmlParsePEReference:
4397: * @ctxt: an XML parser context
4398: *
4399: * parse PEReference declarations
1.77 daniel 4400: * The entity content is handled directly by pushing it's content as
4401: * a new input stream.
1.22 daniel 4402: *
4403: * [69] PEReference ::= '%' Name ';'
1.68 daniel 4404: *
1.98 daniel 4405: * [ WFC: No Recursion ]
4406: * TODO A parsed entity must not contain a recursive
4407: * reference to itself, either directly or indirectly.
4408: *
4409: * [ WFC: Entity Declared ]
4410: * In a document without any DTD, a document with only an internal DTD
4411: * subset which contains no parameter entity references, or a document
4412: * with "standalone='yes'", ... ... The declaration of a parameter
4413: * entity must precede any reference to it...
4414: *
4415: * [ VC: Entity Declared ]
4416: * In a document with an external subset or external parameter entities
4417: * with "standalone='no'", ... ... The declaration of a parameter entity
4418: * must precede any reference to it...
4419: *
4420: * [ WFC: In DTD ]
4421: * Parameter-entity references may only appear in the DTD.
4422: * NOTE: misleading but this is handled.
1.22 daniel 4423: */
1.77 daniel 4424: void
1.55 daniel 4425: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 4426: CHAR *name;
1.72 daniel 4427: xmlEntityPtr entity = NULL;
1.50 daniel 4428: xmlParserInputPtr input;
1.22 daniel 4429:
1.40 daniel 4430: if (CUR == '%') {
4431: NEXT;
1.22 daniel 4432: name = xmlParseName(ctxt);
4433: if (name == NULL) {
1.55 daniel 4434: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4435: ctxt->sax->error(ctxt->userData, "xmlParsePEReference: no name\n");
1.59 daniel 4436: ctxt->wellFormed = 0;
1.22 daniel 4437: } else {
1.40 daniel 4438: if (CUR == ';') {
4439: NEXT;
1.98 daniel 4440: if ((ctxt->sax != NULL) &&
4441: (ctxt->sax->getParameterEntity != NULL))
4442: entity = ctxt->sax->getParameterEntity(ctxt->userData,
4443: name);
1.45 daniel 4444: if (entity == NULL) {
1.98 daniel 4445: /*
4446: * [ WFC: Entity Declared ]
4447: * In a document without any DTD, a document with only an
4448: * internal DTD subset which contains no parameter entity
4449: * references, or a document with "standalone='yes'", ...
4450: * ... The declaration of a parameter entity must precede
4451: * any reference to it...
4452: */
4453: if ((ctxt->standalone == 1) ||
4454: ((ctxt->hasExternalSubset == 0) &&
4455: (ctxt->hasPErefs == 0))) {
4456: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4457: ctxt->sax->error(ctxt->userData,
4458: "PEReference: %%%s; not found\n", name);
4459: ctxt->wellFormed = 0;
4460: } else {
4461: /*
4462: * [ VC: Entity Declared ]
4463: * In a document with an external subset or external
4464: * parameter entities with "standalone='no'", ...
4465: * ... The declaration of a parameter entity must precede
4466: * any reference to it...
4467: */
4468: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4469: ctxt->sax->warning(ctxt->userData,
4470: "PEReference: %%%s; not found\n", name);
4471: ctxt->valid = 0;
4472: }
1.50 daniel 4473: } else {
1.98 daniel 4474: /*
4475: * Internal checking in case the entity quest barfed
4476: */
4477: if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
4478: (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
4479: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4480: ctxt->sax->warning(ctxt->userData,
4481: "Internal: %%%s; is not a parameter entity\n", name);
4482: } else {
4483: input = xmlNewEntityInputStream(ctxt, entity);
4484: xmlPushInput(ctxt, input);
4485: }
1.45 daniel 4486: }
1.98 daniel 4487: ctxt->hasPErefs = 1;
1.22 daniel 4488: } else {
1.55 daniel 4489: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4490: ctxt->sax->error(ctxt->userData,
1.59 daniel 4491: "xmlParsePEReference: expecting ';'\n");
4492: ctxt->wellFormed = 0;
1.22 daniel 4493: }
1.45 daniel 4494: free(name);
1.3 veillard 4495: }
4496: }
4497: }
4498:
1.50 daniel 4499: /**
4500: * xmlParseDocTypeDecl :
4501: * @ctxt: an XML parser context
4502: *
4503: * parse a DOCTYPE declaration
1.21 daniel 4504: *
1.22 daniel 4505: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
4506: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 4507: *
4508: * [ VC: Root Element Type ]
1.99 daniel 4509: * The Name in the document type declaration must match the element
1.98 daniel 4510: * type of the root element.
1.21 daniel 4511: */
4512:
1.55 daniel 4513: void
4514: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.21 daniel 4515: CHAR *name;
4516: CHAR *ExternalID = NULL;
1.39 daniel 4517: CHAR *URI = NULL;
1.21 daniel 4518:
4519: /*
4520: * We know that '<!DOCTYPE' has been detected.
4521: */
1.40 daniel 4522: SKIP(9);
1.21 daniel 4523:
1.42 daniel 4524: SKIP_BLANKS;
1.21 daniel 4525:
4526: /*
4527: * Parse the DOCTYPE name.
4528: */
4529: name = xmlParseName(ctxt);
4530: if (name == NULL) {
1.55 daniel 4531: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4532: ctxt->sax->error(ctxt->userData, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 4533: ctxt->wellFormed = 0;
1.21 daniel 4534: }
4535:
1.42 daniel 4536: SKIP_BLANKS;
1.21 daniel 4537:
4538: /*
1.22 daniel 4539: * Check for SystemID and ExternalID
4540: */
1.67 daniel 4541: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 4542:
4543: if ((URI != NULL) || (ExternalID != NULL)) {
4544: ctxt->hasExternalSubset = 1;
4545: }
4546:
1.42 daniel 4547: SKIP_BLANKS;
1.36 daniel 4548:
1.76 daniel 4549: /*
4550: * NOTE: the SAX callback may try to fetch the external subset
4551: * entity and fill it up !
4552: */
1.72 daniel 4553: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 4554: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 4555:
4556: /*
4557: * Is there any DTD definition ?
4558: */
1.40 daniel 4559: if (CUR == '[') {
1.96 daniel 4560: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 4561: NEXT;
1.22 daniel 4562: /*
4563: * Parse the succession of Markup declarations and
4564: * PEReferences.
4565: * Subsequence (markupdecl | PEReference | S)*
4566: */
1.40 daniel 4567: while (CUR != ']') {
4568: const CHAR *check = CUR_PTR;
1.22 daniel 4569:
1.42 daniel 4570: SKIP_BLANKS;
1.22 daniel 4571: xmlParseMarkupDecl(ctxt);
1.50 daniel 4572: xmlParsePEReference(ctxt);
1.22 daniel 4573:
1.40 daniel 4574: if (CUR_PTR == check) {
1.55 daniel 4575: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4576: ctxt->sax->error(ctxt->userData,
1.31 daniel 4577: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 4578: ctxt->wellFormed = 0;
1.22 daniel 4579: break;
4580: }
1.77 daniel 4581:
4582: /*
4583: * Pop-up of finished entities.
4584: */
4585: while ((CUR == 0) && (ctxt->inputNr > 1))
4586: xmlPopInput(ctxt);
4587:
1.22 daniel 4588: }
1.40 daniel 4589: if (CUR == ']') NEXT;
1.22 daniel 4590: }
4591:
4592: /*
4593: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 4594: */
1.40 daniel 4595: if (CUR != '>') {
1.55 daniel 4596: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4597: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 4598: ctxt->wellFormed = 0;
1.22 daniel 4599: /* We shouldn't try to resynchronize ... */
1.21 daniel 4600: }
1.40 daniel 4601: NEXT;
1.22 daniel 4602:
4603: /*
1.99 daniel 4604: * Cleanup
1.22 daniel 4605: */
1.39 daniel 4606: if (URI != NULL) free(URI);
1.22 daniel 4607: if (ExternalID != NULL) free(ExternalID);
4608: if (name != NULL) free(name);
1.21 daniel 4609: }
4610:
1.50 daniel 4611: /**
4612: * xmlParseAttribute:
4613: * @ctxt: an XML parser context
1.72 daniel 4614: * @value: a CHAR ** used to store the value of the attribute
1.50 daniel 4615: *
4616: * parse an attribute
1.3 veillard 4617: *
1.22 daniel 4618: * [41] Attribute ::= Name Eq AttValue
4619: *
1.98 daniel 4620: * [ WFC: No External Entity References ]
4621: * Attribute values cannot contain direct or indirect entity references
4622: * to external entities.
4623: *
4624: * [ WFC: No < in Attribute Values ]
4625: * The replacement text of any entity referred to directly or indirectly in
4626: * an attribute value (other than "<") must not contain a <.
4627: *
4628: * [ VC: Attribute Value Type ]
4629: * TODO The attribute must have been declared; the value must be of the type
1.99 daniel 4630: * declared for it.
1.98 daniel 4631: *
1.22 daniel 4632: * [25] Eq ::= S? '=' S?
4633: *
1.29 daniel 4634: * With namespace:
4635: *
4636: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 4637: *
4638: * Also the case QName == xmlns:??? is handled independently as a namespace
4639: * definition.
1.69 daniel 4640: *
1.72 daniel 4641: * Returns the attribute name, and the value in *value.
1.3 veillard 4642: */
4643:
1.72 daniel 4644: CHAR *
4645: xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) {
1.59 daniel 4646: CHAR *name, *val;
1.3 veillard 4647:
1.72 daniel 4648: *value = NULL;
4649: name = xmlParseName(ctxt);
1.22 daniel 4650: if (name == NULL) {
1.55 daniel 4651: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4652: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 4653: ctxt->wellFormed = 0;
1.52 daniel 4654: return(NULL);
1.3 veillard 4655: }
4656:
4657: /*
1.29 daniel 4658: * read the value
1.3 veillard 4659: */
1.42 daniel 4660: SKIP_BLANKS;
1.40 daniel 4661: if (CUR == '=') {
4662: NEXT;
1.42 daniel 4663: SKIP_BLANKS;
1.72 daniel 4664: val = xmlParseAttValue(ctxt);
1.96 daniel 4665: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 4666: } else {
1.55 daniel 4667: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4668: ctxt->sax->error(ctxt->userData,
1.59 daniel 4669: "Specification mandate value for attribute %s\n", name);
4670: ctxt->wellFormed = 0;
1.52 daniel 4671: return(NULL);
1.43 daniel 4672: }
4673:
1.72 daniel 4674: *value = val;
4675: return(name);
1.3 veillard 4676: }
4677:
1.50 daniel 4678: /**
4679: * xmlParseStartTag:
4680: * @ctxt: an XML parser context
4681: *
4682: * parse a start of tag either for rule element or
4683: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 4684: *
4685: * [40] STag ::= '<' Name (S Attribute)* S? '>'
4686: *
1.98 daniel 4687: * [ WFC: Unique Att Spec ]
4688: * No attribute name may appear more than once in the same start-tag or
4689: * empty-element tag.
4690: *
1.29 daniel 4691: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
4692: *
1.98 daniel 4693: * [ WFC: Unique Att Spec ]
4694: * No attribute name may appear more than once in the same start-tag or
4695: * empty-element tag.
4696: *
1.29 daniel 4697: * With namespace:
4698: *
4699: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
4700: *
4701: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 4702: *
4703: * Returns the element name parsed
1.2 veillard 4704: */
4705:
1.83 daniel 4706: CHAR *
1.69 daniel 4707: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.72 daniel 4708: CHAR *name;
4709: CHAR *attname;
4710: CHAR *attvalue;
4711: const CHAR **atts = NULL;
4712: int nbatts = 0;
4713: int maxatts = 0;
4714: int i;
1.2 veillard 4715:
1.83 daniel 4716: if (CUR != '<') return(NULL);
1.40 daniel 4717: NEXT;
1.3 veillard 4718:
1.72 daniel 4719: name = xmlParseName(ctxt);
1.59 daniel 4720: if (name == NULL) {
4721: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4722: ctxt->sax->error(ctxt->userData,
1.59 daniel 4723: "xmlParseStartTag: invalid element name\n");
4724: ctxt->wellFormed = 0;
1.83 daniel 4725: return(NULL);
1.50 daniel 4726: }
4727:
4728: /*
1.3 veillard 4729: * Now parse the attributes, it ends up with the ending
4730: *
4731: * (S Attribute)* S?
4732: */
1.42 daniel 4733: SKIP_BLANKS;
1.91 daniel 4734: GROW;
1.40 daniel 4735: while ((IS_CHAR(CUR)) &&
4736: (CUR != '>') &&
4737: ((CUR != '/') || (NXT(1) != '>'))) {
4738: const CHAR *q = CUR_PTR;
1.91 daniel 4739: int cons = ctxt->input->consumed;
1.29 daniel 4740:
1.72 daniel 4741: attname = xmlParseAttribute(ctxt, &attvalue);
4742: if ((attname != NULL) && (attvalue != NULL)) {
4743: /*
1.98 daniel 4744: * [ WFC: Unique Att Spec ]
4745: * No attribute name may appear more than once in the same
4746: * start-tag or empty-element tag.
1.72 daniel 4747: */
4748: for (i = 0; i < nbatts;i += 2) {
4749: if (!xmlStrcmp(atts[i], attname)) {
4750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4751: ctxt->sax->error(ctxt->userData,
4752: "Attribute %s redefined\n",
4753: attname);
1.72 daniel 4754: ctxt->wellFormed = 0;
4755: free(attname);
4756: free(attvalue);
1.98 daniel 4757: goto failed;
1.72 daniel 4758: }
4759: }
4760:
4761: /*
4762: * Add the pair to atts
4763: */
4764: if (atts == NULL) {
4765: maxatts = 10;
4766: atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
4767: if (atts == NULL) {
1.86 daniel 4768: fprintf(stderr, "malloc of %ld byte failed\n",
4769: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4770: return(NULL);
1.72 daniel 4771: }
4772: } else if (nbatts + 2 < maxatts) {
4773: maxatts *= 2;
4774: atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
4775: if (atts == NULL) {
1.86 daniel 4776: fprintf(stderr, "realloc of %ld byte failed\n",
4777: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4778: return(NULL);
1.72 daniel 4779: }
4780: }
4781: atts[nbatts++] = attname;
4782: atts[nbatts++] = attvalue;
4783: atts[nbatts] = NULL;
4784: atts[nbatts + 1] = NULL;
1.98 daniel 4785: failed:
1.72 daniel 4786: }
4787:
1.42 daniel 4788: SKIP_BLANKS;
1.91 daniel 4789: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 4790: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4791: ctxt->sax->error(ctxt->userData,
1.31 daniel 4792: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 4793: ctxt->wellFormed = 0;
1.29 daniel 4794: break;
1.3 veillard 4795: }
1.91 daniel 4796: GROW;
1.3 veillard 4797: }
4798:
1.43 daniel 4799: /*
1.72 daniel 4800: * SAX: Start of Element !
1.43 daniel 4801: */
1.72 daniel 4802: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 4803: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 4804:
1.72 daniel 4805: if (atts != NULL) {
4806: for (i = 0;i < nbatts;i++) free((CHAR *) atts[i]);
4807: free(atts);
4808: }
1.83 daniel 4809: return(name);
1.3 veillard 4810: }
4811:
1.50 daniel 4812: /**
4813: * xmlParseEndTag:
4814: * @ctxt: an XML parser context
1.83 daniel 4815: * @tagname: the tag name as parsed in the opening tag.
1.50 daniel 4816: *
4817: * parse an end of tag
1.27 daniel 4818: *
4819: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 4820: *
4821: * With namespace
4822: *
1.72 daniel 4823: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 4824: */
4825:
1.55 daniel 4826: void
1.83 daniel 4827: xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname) {
1.72 daniel 4828: CHAR *name;
1.7 veillard 4829:
1.91 daniel 4830: GROW;
1.40 daniel 4831: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 4832: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4833: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 4834: ctxt->wellFormed = 0;
1.27 daniel 4835: return;
4836: }
1.40 daniel 4837: SKIP(2);
1.7 veillard 4838:
1.72 daniel 4839: name = xmlParseName(ctxt);
1.7 veillard 4840:
4841: /*
4842: * We should definitely be at the ending "S? '>'" part
4843: */
1.91 daniel 4844: GROW;
1.42 daniel 4845: SKIP_BLANKS;
1.40 daniel 4846: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 4847: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4848: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 4849: ctxt->wellFormed = 0;
1.7 veillard 4850: } else
1.40 daniel 4851: NEXT;
1.7 veillard 4852:
1.72 daniel 4853: /*
1.98 daniel 4854: * [ WFC: Element Type Match ]
4855: * The Name in an element's end-tag must match the element type in the
4856: * start-tag.
4857: *
1.83 daniel 4858: */
4859: if (xmlStrcmp(name, tagname)) {
4860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4861: ctxt->sax->error(ctxt->userData,
4862: "Opening and ending tag mismatch: %s and %s\n", tagname, name);
4863: ctxt->wellFormed = 0;
4864: }
4865:
4866: /*
1.72 daniel 4867: * SAX: End of Tag
4868: */
4869: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 4870: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 4871:
4872: if (name != NULL)
4873: free(name);
4874:
1.7 veillard 4875: return;
4876: }
4877:
1.50 daniel 4878: /**
4879: * xmlParseCDSect:
4880: * @ctxt: an XML parser context
4881: *
4882: * Parse escaped pure raw content.
1.29 daniel 4883: *
4884: * [18] CDSect ::= CDStart CData CDEnd
4885: *
4886: * [19] CDStart ::= '<![CDATA['
4887: *
4888: * [20] Data ::= (Char* - (Char* ']]>' Char*))
4889: *
4890: * [21] CDEnd ::= ']]>'
1.3 veillard 4891: */
1.55 daniel 4892: void
4893: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 4894: const CHAR *r, *s, *base;
1.3 veillard 4895:
1.40 daniel 4896: if ((CUR == '<') && (NXT(1) == '!') &&
4897: (NXT(2) == '[') && (NXT(3) == 'C') &&
4898: (NXT(4) == 'D') && (NXT(5) == 'A') &&
4899: (NXT(6) == 'T') && (NXT(7) == 'A') &&
4900: (NXT(8) == '[')) {
4901: SKIP(9);
1.29 daniel 4902: } else
1.45 daniel 4903: return;
1.40 daniel 4904: base = CUR_PTR;
4905: if (!IS_CHAR(CUR)) {
1.55 daniel 4906: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4907: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4908: ctxt->wellFormed = 0;
1.45 daniel 4909: return;
1.3 veillard 4910: }
1.91 daniel 4911: r = CUR_PTR;
4912: NEXT;
1.40 daniel 4913: if (!IS_CHAR(CUR)) {
1.55 daniel 4914: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4915: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4916: ctxt->wellFormed = 0;
1.45 daniel 4917: return;
1.3 veillard 4918: }
1.91 daniel 4919: s = CUR_PTR;
4920: NEXT;
1.40 daniel 4921: while (IS_CHAR(CUR) &&
4922: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
4923: r++;s++;NEXT;
1.3 veillard 4924: }
1.40 daniel 4925: if (!IS_CHAR(CUR)) {
1.55 daniel 4926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4927: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4928: ctxt->wellFormed = 0;
1.45 daniel 4929: return;
1.3 veillard 4930: }
1.16 daniel 4931:
1.45 daniel 4932: /*
4933: * Ok the segment [base CUR_PTR] is to be consumed as chars.
4934: */
4935: if (ctxt->sax != NULL) {
1.72 daniel 4936: if (areBlanks(ctxt, base, CUR_PTR - base)) {
4937: if (ctxt->sax->ignorableWhitespace != NULL)
1.74 daniel 4938: ctxt->sax->ignorableWhitespace(ctxt->userData, base,
1.72 daniel 4939: (CUR_PTR - base) - 2);
4940: } else {
4941: if (ctxt->sax->characters != NULL)
1.74 daniel 4942: ctxt->sax->characters(ctxt->userData, base, (CUR_PTR - base) - 2);
1.72 daniel 4943: }
1.45 daniel 4944: }
1.2 veillard 4945: }
4946:
1.50 daniel 4947: /**
4948: * xmlParseContent:
4949: * @ctxt: an XML parser context
4950: *
4951: * Parse a content:
1.2 veillard 4952: *
1.27 daniel 4953: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 4954: */
4955:
1.55 daniel 4956: void
4957: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 4958: GROW;
1.40 daniel 4959: while ((CUR != '<') || (NXT(1) != '/')) {
4960: const CHAR *test = CUR_PTR;
1.91 daniel 4961: int cons = ctxt->input->consumed;
1.27 daniel 4962:
4963: /*
4964: * First case : a Processing Instruction.
4965: */
1.40 daniel 4966: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 4967: xmlParsePI(ctxt);
4968: }
1.72 daniel 4969:
1.27 daniel 4970: /*
4971: * Second case : a CDSection
4972: */
1.40 daniel 4973: else if ((CUR == '<') && (NXT(1) == '!') &&
4974: (NXT(2) == '[') && (NXT(3) == 'C') &&
4975: (NXT(4) == 'D') && (NXT(5) == 'A') &&
4976: (NXT(6) == 'T') && (NXT(7) == 'A') &&
4977: (NXT(8) == '[')) {
1.45 daniel 4978: xmlParseCDSect(ctxt);
1.27 daniel 4979: }
1.72 daniel 4980:
1.27 daniel 4981: /*
4982: * Third case : a comment
4983: */
1.40 daniel 4984: else if ((CUR == '<') && (NXT(1) == '!') &&
4985: (NXT(2) == '-') && (NXT(3) == '-')) {
1.72 daniel 4986: xmlParseComment(ctxt, 1);
1.97 daniel 4987: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 4988: }
1.72 daniel 4989:
1.27 daniel 4990: /*
4991: * Fourth case : a sub-element.
4992: */
1.40 daniel 4993: else if (CUR == '<') {
1.72 daniel 4994: xmlParseElement(ctxt);
1.45 daniel 4995: }
1.72 daniel 4996:
1.45 daniel 4997: /*
1.50 daniel 4998: * Fifth case : a reference. If if has not been resolved,
4999: * parsing returns it's Name, create the node
1.45 daniel 5000: */
1.97 daniel 5001:
1.45 daniel 5002: else if (CUR == '&') {
1.77 daniel 5003: xmlParseReference(ctxt);
1.27 daniel 5004: }
1.72 daniel 5005:
1.27 daniel 5006: /*
5007: * Last case, text. Note that References are handled directly.
5008: */
5009: else {
1.45 daniel 5010: xmlParseCharData(ctxt, 0);
1.3 veillard 5011: }
1.14 veillard 5012:
1.91 daniel 5013: GROW;
1.14 veillard 5014: /*
1.45 daniel 5015: * Pop-up of finished entities.
1.14 veillard 5016: */
1.69 daniel 5017: while ((CUR == 0) && (ctxt->inputNr > 1))
5018: xmlPopInput(ctxt);
1.45 daniel 5019:
1.91 daniel 5020: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
1.55 daniel 5021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5022: ctxt->sax->error(ctxt->userData,
1.59 daniel 5023: "detected an error in element content\n");
5024: ctxt->wellFormed = 0;
1.29 daniel 5025: break;
5026: }
1.3 veillard 5027: }
1.2 veillard 5028: }
5029:
1.50 daniel 5030: /**
5031: * xmlParseElement:
5032: * @ctxt: an XML parser context
5033: *
5034: * parse an XML element, this is highly recursive
1.26 daniel 5035: *
5036: * [39] element ::= EmptyElemTag | STag content ETag
5037: *
1.98 daniel 5038: * [ WFC: Element Type Match ]
5039: * The Name in an element's end-tag must match the element type in the
5040: * start-tag.
5041: *
5042: * [ VC: Element Valid ]
5043: * TODO An element is valid if there is a declaration matching elementdecl
1.99 daniel 5044: * where the Name matches the element type and one of the following holds:
5045: * - The declaration matches EMPTY and the element has no content.
5046: * - The declaration matches children and the sequence of child elements
5047: * belongs to the language generated by the regular expression in the
5048: * content model, with optional white space (characters matching the
5049: * nonterminal S) between each pair of child elements.
5050: * - The declaration matches Mixed and the content consists of character
5051: * data and child elements whose types match names in the content model.
5052: * - The declaration matches ANY, and the types of any child elements have
5053: * been declared.
1.2 veillard 5054: */
1.26 daniel 5055:
1.72 daniel 5056: void
1.69 daniel 5057: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.40 daniel 5058: const CHAR *openTag = CUR_PTR;
1.83 daniel 5059: CHAR *name;
1.32 daniel 5060: xmlParserNodeInfo node_info;
1.2 veillard 5061:
1.32 daniel 5062: /* Capture start position */
1.40 daniel 5063: node_info.begin_pos = CUR_PTR - ctxt->input->base;
5064: node_info.begin_line = ctxt->input->line;
1.32 daniel 5065:
1.83 daniel 5066: name = xmlParseStartTag(ctxt);
5067: if (name == NULL) {
5068: return;
5069: }
1.2 veillard 5070:
5071: /*
1.99 daniel 5072: * [ VC: Root Element Type ]
5073: * The Name in the document type declaration must match the element
5074: * type of the root element.
5075: */
1.102 daniel 5076: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc)
5077: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
5078:
1.99 daniel 5079:
5080: /*
1.2 veillard 5081: * Check for an Empty Element.
5082: */
1.40 daniel 5083: if ((CUR == '/') && (NXT(1) == '>')) {
5084: SKIP(2);
1.72 daniel 5085: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 5086: ctxt->sax->endElement(ctxt->userData, name);
5087: free(name);
1.72 daniel 5088: return;
1.2 veillard 5089: }
1.91 daniel 5090: if (CUR == '>') {
5091: NEXT;
5092: } else {
1.55 daniel 5093: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5094: ctxt->sax->error(ctxt->userData, "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 5095: openTag);
1.59 daniel 5096: ctxt->wellFormed = 0;
1.45 daniel 5097:
5098: /*
5099: * end of parsing of this node.
5100: */
5101: nodePop(ctxt);
1.83 daniel 5102: free(name);
1.72 daniel 5103: return;
1.2 veillard 5104: }
5105:
5106: /*
5107: * Parse the content of the element:
5108: */
1.45 daniel 5109: xmlParseContent(ctxt);
1.40 daniel 5110: if (!IS_CHAR(CUR)) {
1.55 daniel 5111: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5112: ctxt->sax->error(ctxt->userData,
1.57 daniel 5113: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 5114: ctxt->wellFormed = 0;
1.45 daniel 5115:
5116: /*
5117: * end of parsing of this node.
5118: */
5119: nodePop(ctxt);
1.83 daniel 5120: free(name);
1.72 daniel 5121: return;
1.2 veillard 5122: }
5123:
5124: /*
1.27 daniel 5125: * parse the end of tag: '</' should be here.
1.2 veillard 5126: */
1.83 daniel 5127: xmlParseEndTag(ctxt, name);
5128: free(name);
1.2 veillard 5129: }
5130:
1.50 daniel 5131: /**
5132: * xmlParseVersionNum:
5133: * @ctxt: an XML parser context
5134: *
5135: * parse the XML version value.
1.29 daniel 5136: *
5137: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 5138: *
5139: * Returns the string giving the XML version number, or NULL
1.29 daniel 5140: */
1.55 daniel 5141: CHAR *
5142: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 5143: const CHAR *q = CUR_PTR;
1.29 daniel 5144: CHAR *ret;
5145:
1.40 daniel 5146: while (IS_CHAR(CUR) &&
5147: (((CUR >= 'a') && (CUR <= 'z')) ||
5148: ((CUR >= 'A') && (CUR <= 'Z')) ||
5149: ((CUR >= '0') && (CUR <= '9')) ||
5150: (CUR == '_') || (CUR == '.') ||
5151: (CUR == ':') || (CUR == '-'))) NEXT;
5152: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5153: return(ret);
5154: }
5155:
1.50 daniel 5156: /**
5157: * xmlParseVersionInfo:
5158: * @ctxt: an XML parser context
5159: *
5160: * parse the XML version.
1.29 daniel 5161: *
5162: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
5163: *
5164: * [25] Eq ::= S? '=' S?
1.50 daniel 5165: *
1.68 daniel 5166: * Returns the version string, e.g. "1.0"
1.29 daniel 5167: */
5168:
1.55 daniel 5169: CHAR *
5170: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 5171: CHAR *version = NULL;
5172: const CHAR *q;
5173:
1.40 daniel 5174: if ((CUR == 'v') && (NXT(1) == 'e') &&
5175: (NXT(2) == 'r') && (NXT(3) == 's') &&
5176: (NXT(4) == 'i') && (NXT(5) == 'o') &&
5177: (NXT(6) == 'n')) {
5178: SKIP(7);
1.42 daniel 5179: SKIP_BLANKS;
1.40 daniel 5180: if (CUR != '=') {
1.55 daniel 5181: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5182: ctxt->sax->error(ctxt->userData, "xmlParseVersionInfo : expected '='\n");
1.59 daniel 5183: ctxt->wellFormed = 0;
1.31 daniel 5184: return(NULL);
5185: }
1.40 daniel 5186: NEXT;
1.42 daniel 5187: SKIP_BLANKS;
1.40 daniel 5188: if (CUR == '"') {
5189: NEXT;
5190: q = CUR_PTR;
1.29 daniel 5191: version = xmlParseVersionNum(ctxt);
1.55 daniel 5192: if (CUR != '"') {
5193: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5194: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5195: ctxt->wellFormed = 0;
1.55 daniel 5196: } else
1.40 daniel 5197: NEXT;
5198: } else if (CUR == '\''){
5199: NEXT;
5200: q = CUR_PTR;
1.29 daniel 5201: version = xmlParseVersionNum(ctxt);
1.55 daniel 5202: if (CUR != '\'') {
5203: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5204: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5205: ctxt->wellFormed = 0;
1.55 daniel 5206: } else
1.40 daniel 5207: NEXT;
1.31 daniel 5208: } else {
1.55 daniel 5209: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5210: ctxt->sax->error(ctxt->userData,
1.59 daniel 5211: "xmlParseVersionInfo : expected ' or \"\n");
5212: ctxt->wellFormed = 0;
1.29 daniel 5213: }
5214: }
5215: return(version);
5216: }
5217:
1.50 daniel 5218: /**
5219: * xmlParseEncName:
5220: * @ctxt: an XML parser context
5221: *
5222: * parse the XML encoding name
1.29 daniel 5223: *
5224: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 5225: *
1.68 daniel 5226: * Returns the encoding name value or NULL
1.29 daniel 5227: */
1.55 daniel 5228: CHAR *
5229: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 5230: const CHAR *q = CUR_PTR;
1.29 daniel 5231: CHAR *ret = NULL;
5232:
1.40 daniel 5233: if (((CUR >= 'a') && (CUR <= 'z')) ||
5234: ((CUR >= 'A') && (CUR <= 'Z'))) {
5235: NEXT;
5236: while (IS_CHAR(CUR) &&
5237: (((CUR >= 'a') && (CUR <= 'z')) ||
5238: ((CUR >= 'A') && (CUR <= 'Z')) ||
5239: ((CUR >= '0') && (CUR <= '9')) ||
5240: (CUR == '-'))) NEXT;
5241: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5242: } else {
1.55 daniel 5243: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5244: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 5245: ctxt->wellFormed = 0;
1.29 daniel 5246: }
5247: return(ret);
5248: }
5249:
1.50 daniel 5250: /**
5251: * xmlParseEncodingDecl:
5252: * @ctxt: an XML parser context
5253: *
5254: * parse the XML encoding declaration
1.29 daniel 5255: *
5256: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 5257: *
5258: * TODO: this should setup the conversion filters.
5259: *
1.68 daniel 5260: * Returns the encoding value or NULL
1.29 daniel 5261: */
5262:
1.55 daniel 5263: CHAR *
5264: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5265: CHAR *encoding = NULL;
5266: const CHAR *q;
5267:
1.42 daniel 5268: SKIP_BLANKS;
1.40 daniel 5269: if ((CUR == 'e') && (NXT(1) == 'n') &&
5270: (NXT(2) == 'c') && (NXT(3) == 'o') &&
5271: (NXT(4) == 'd') && (NXT(5) == 'i') &&
5272: (NXT(6) == 'n') && (NXT(7) == 'g')) {
5273: SKIP(8);
1.42 daniel 5274: SKIP_BLANKS;
1.40 daniel 5275: if (CUR != '=') {
1.55 daniel 5276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5277: ctxt->sax->error(ctxt->userData, "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 5278: ctxt->wellFormed = 0;
1.31 daniel 5279: return(NULL);
5280: }
1.40 daniel 5281: NEXT;
1.42 daniel 5282: SKIP_BLANKS;
1.40 daniel 5283: if (CUR == '"') {
5284: NEXT;
5285: q = CUR_PTR;
1.29 daniel 5286: encoding = xmlParseEncName(ctxt);
1.55 daniel 5287: if (CUR != '"') {
5288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5289: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5290: ctxt->wellFormed = 0;
1.55 daniel 5291: } else
1.40 daniel 5292: NEXT;
5293: } else if (CUR == '\''){
5294: NEXT;
5295: q = CUR_PTR;
1.29 daniel 5296: encoding = xmlParseEncName(ctxt);
1.55 daniel 5297: if (CUR != '\'') {
5298: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5299: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5300: ctxt->wellFormed = 0;
1.55 daniel 5301: } else
1.40 daniel 5302: NEXT;
5303: } else if (CUR == '"'){
1.55 daniel 5304: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5305: ctxt->sax->error(ctxt->userData,
1.59 daniel 5306: "xmlParseEncodingDecl : expected ' or \"\n");
5307: ctxt->wellFormed = 0;
1.29 daniel 5308: }
5309: }
5310: return(encoding);
5311: }
5312:
1.50 daniel 5313: /**
5314: * xmlParseSDDecl:
5315: * @ctxt: an XML parser context
5316: *
5317: * parse the XML standalone declaration
1.29 daniel 5318: *
5319: * [32] SDDecl ::= S 'standalone' Eq
5320: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 5321: *
5322: * [ VC: Standalone Document Declaration ]
5323: * TODO The standalone document declaration must have the value "no"
5324: * if any external markup declarations contain declarations of:
5325: * - attributes with default values, if elements to which these
5326: * attributes apply appear in the document without specifications
5327: * of values for these attributes, or
5328: * - entities (other than amp, lt, gt, apos, quot), if references
5329: * to those entities appear in the document, or
5330: * - attributes with values subject to normalization, where the
5331: * attribute appears in the document with a value which will change
5332: * as a result of normalization, or
5333: * - element types with element content, if white space occurs directly
5334: * within any instance of those types.
1.68 daniel 5335: *
5336: * Returns 1 if standalone, 0 otherwise
1.29 daniel 5337: */
5338:
1.55 daniel 5339: int
5340: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5341: int standalone = -1;
5342:
1.42 daniel 5343: SKIP_BLANKS;
1.40 daniel 5344: if ((CUR == 's') && (NXT(1) == 't') &&
5345: (NXT(2) == 'a') && (NXT(3) == 'n') &&
5346: (NXT(4) == 'd') && (NXT(5) == 'a') &&
5347: (NXT(6) == 'l') && (NXT(7) == 'o') &&
5348: (NXT(8) == 'n') && (NXT(9) == 'e')) {
5349: SKIP(10);
1.81 daniel 5350: SKIP_BLANKS;
1.40 daniel 5351: if (CUR != '=') {
1.55 daniel 5352: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5353: ctxt->sax->error(ctxt->userData,
1.59 daniel 5354: "XML standalone declaration : expected '='\n");
5355: ctxt->wellFormed = 0;
1.32 daniel 5356: return(standalone);
5357: }
1.40 daniel 5358: NEXT;
1.42 daniel 5359: SKIP_BLANKS;
1.40 daniel 5360: if (CUR == '\''){
5361: NEXT;
5362: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5363: standalone = 0;
1.40 daniel 5364: SKIP(2);
5365: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5366: (NXT(2) == 's')) {
1.29 daniel 5367: standalone = 1;
1.40 daniel 5368: SKIP(3);
1.29 daniel 5369: } else {
1.55 daniel 5370: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5371: ctxt->sax->error(ctxt->userData, "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 5372: ctxt->wellFormed = 0;
1.29 daniel 5373: }
1.55 daniel 5374: if (CUR != '\'') {
5375: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5376: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5377: ctxt->wellFormed = 0;
1.55 daniel 5378: } else
1.40 daniel 5379: NEXT;
5380: } else if (CUR == '"'){
5381: NEXT;
5382: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5383: standalone = 0;
1.40 daniel 5384: SKIP(2);
5385: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5386: (NXT(2) == 's')) {
1.29 daniel 5387: standalone = 1;
1.40 daniel 5388: SKIP(3);
1.29 daniel 5389: } else {
1.55 daniel 5390: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5391: ctxt->sax->error(ctxt->userData,
1.59 daniel 5392: "standalone accepts only 'yes' or 'no'\n");
5393: ctxt->wellFormed = 0;
1.29 daniel 5394: }
1.55 daniel 5395: if (CUR != '"') {
5396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5397: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5398: ctxt->wellFormed = 0;
1.55 daniel 5399: } else
1.40 daniel 5400: NEXT;
1.37 daniel 5401: } else {
1.55 daniel 5402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5403: ctxt->sax->error(ctxt->userData, "Standalone value not found\n");
1.59 daniel 5404: ctxt->wellFormed = 0;
1.37 daniel 5405: }
1.29 daniel 5406: }
5407: return(standalone);
5408: }
5409:
1.50 daniel 5410: /**
5411: * xmlParseXMLDecl:
5412: * @ctxt: an XML parser context
5413: *
5414: * parse an XML declaration header
1.29 daniel 5415: *
5416: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 5417: */
5418:
1.55 daniel 5419: void
5420: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 5421: CHAR *version;
5422:
5423: /*
1.19 daniel 5424: * We know that '<?xml' is here.
1.1 veillard 5425: */
1.40 daniel 5426: SKIP(5);
1.1 veillard 5427:
1.59 daniel 5428: if (!IS_BLANK(CUR)) {
5429: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5430: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 5431: ctxt->wellFormed = 0;
5432: }
1.42 daniel 5433: SKIP_BLANKS;
1.1 veillard 5434:
5435: /*
1.29 daniel 5436: * We should have the VersionInfo here.
1.1 veillard 5437: */
1.29 daniel 5438: version = xmlParseVersionInfo(ctxt);
5439: if (version == NULL)
1.45 daniel 5440: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 5441: ctxt->version = xmlStrdup(version);
1.45 daniel 5442: free(version);
1.29 daniel 5443:
5444: /*
5445: * We may have the encoding declaration
5446: */
1.59 daniel 5447: if (!IS_BLANK(CUR)) {
5448: if ((CUR == '?') && (NXT(1) == '>')) {
5449: SKIP(2);
5450: return;
5451: }
5452: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5453: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5454: ctxt->wellFormed = 0;
5455: }
1.72 daniel 5456: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 5457:
5458: /*
1.29 daniel 5459: * We may have the standalone status.
1.1 veillard 5460: */
1.72 daniel 5461: if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
1.59 daniel 5462: if ((CUR == '?') && (NXT(1) == '>')) {
5463: SKIP(2);
5464: return;
5465: }
5466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5467: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5468: ctxt->wellFormed = 0;
5469: }
5470: SKIP_BLANKS;
1.72 daniel 5471: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 5472:
1.42 daniel 5473: SKIP_BLANKS;
1.40 daniel 5474: if ((CUR == '?') && (NXT(1) == '>')) {
5475: SKIP(2);
5476: } else if (CUR == '>') {
1.31 daniel 5477: /* Deprecated old WD ... */
1.55 daniel 5478: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5479: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
1.59 daniel 5480: ctxt->wellFormed = 0;
1.40 daniel 5481: NEXT;
1.29 daniel 5482: } else {
1.55 daniel 5483: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5484: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
1.59 daniel 5485: ctxt->wellFormed = 0;
1.40 daniel 5486: MOVETO_ENDTAG(CUR_PTR);
5487: NEXT;
1.29 daniel 5488: }
1.1 veillard 5489: }
5490:
1.50 daniel 5491: /**
5492: * xmlParseMisc:
5493: * @ctxt: an XML parser context
5494: *
5495: * parse an XML Misc* optionnal field.
1.21 daniel 5496: *
1.22 daniel 5497: * [27] Misc ::= Comment | PI | S
1.1 veillard 5498: */
5499:
1.55 daniel 5500: void
5501: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 5502: while (((CUR == '<') && (NXT(1) == '?')) ||
5503: ((CUR == '<') && (NXT(1) == '!') &&
5504: (NXT(2) == '-') && (NXT(3) == '-')) ||
5505: IS_BLANK(CUR)) {
5506: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 5507: xmlParsePI(ctxt);
1.40 daniel 5508: } else if (IS_BLANK(CUR)) {
5509: NEXT;
1.1 veillard 5510: } else
1.31 daniel 5511: xmlParseComment(ctxt, 0);
1.1 veillard 5512: }
5513: }
5514:
1.50 daniel 5515: /**
5516: * xmlParseDocument :
5517: * @ctxt: an XML parser context
5518: *
5519: * parse an XML document (and build a tree if using the standard SAX
5520: * interface).
1.21 daniel 5521: *
1.22 daniel 5522: * [1] document ::= prolog element Misc*
1.29 daniel 5523: *
5524: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 5525: *
1.68 daniel 5526: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 5527: * as a result of the parsing.
1.1 veillard 5528: */
5529:
1.55 daniel 5530: int
5531: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 5532: xmlDefaultSAXHandlerInit();
5533:
1.91 daniel 5534: GROW;
5535:
1.14 veillard 5536: /*
1.44 daniel 5537: * SAX: beginning of the document processing.
5538: */
1.72 daniel 5539: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 5540: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 5541:
5542: /*
1.14 veillard 5543: * We should check for encoding here and plug-in some
5544: * conversion code TODO !!!!
5545: */
1.1 veillard 5546:
5547: /*
5548: * Wipe out everything which is before the first '<'
5549: */
1.59 daniel 5550: if (IS_BLANK(CUR)) {
5551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5552: ctxt->sax->error(ctxt->userData,
1.59 daniel 5553: "Extra spaces at the beginning of the document are not allowed\n");
5554: ctxt->wellFormed = 0;
5555: SKIP_BLANKS;
5556: }
5557:
5558: if (CUR == 0) {
5559: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5560: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 5561: ctxt->wellFormed = 0;
5562: }
1.1 veillard 5563:
5564: /*
5565: * Check for the XMLDecl in the Prolog.
5566: */
1.91 daniel 5567: GROW;
1.40 daniel 5568: if ((CUR == '<') && (NXT(1) == '?') &&
5569: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5570: (NXT(4) == 'l')) {
1.19 daniel 5571: xmlParseXMLDecl(ctxt);
5572: /* SKIP_EOL(cur); */
1.42 daniel 5573: SKIP_BLANKS;
1.40 daniel 5574: } else if ((CUR == '<') && (NXT(1) == '?') &&
5575: (NXT(2) == 'X') && (NXT(3) == 'M') &&
5576: (NXT(4) == 'L')) {
1.19 daniel 5577: /*
5578: * The first drafts were using <?XML and the final W3C REC
5579: * now use <?xml ...
5580: */
1.16 daniel 5581: xmlParseXMLDecl(ctxt);
1.1 veillard 5582: /* SKIP_EOL(cur); */
1.42 daniel 5583: SKIP_BLANKS;
1.1 veillard 5584: } else {
1.72 daniel 5585: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 5586: }
1.72 daniel 5587: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 5588: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 5589:
5590: /*
5591: * The Misc part of the Prolog
5592: */
1.91 daniel 5593: GROW;
1.16 daniel 5594: xmlParseMisc(ctxt);
1.1 veillard 5595:
5596: /*
1.29 daniel 5597: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 5598: * (doctypedecl Misc*)?
5599: */
1.91 daniel 5600: GROW;
1.40 daniel 5601: if ((CUR == '<') && (NXT(1) == '!') &&
5602: (NXT(2) == 'D') && (NXT(3) == 'O') &&
5603: (NXT(4) == 'C') && (NXT(5) == 'T') &&
5604: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
5605: (NXT(8) == 'E')) {
1.22 daniel 5606: xmlParseDocTypeDecl(ctxt);
1.96 daniel 5607: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 5608: xmlParseMisc(ctxt);
1.21 daniel 5609: }
5610:
5611: /*
5612: * Time to start parsing the tree itself
1.1 veillard 5613: */
1.91 daniel 5614: GROW;
1.96 daniel 5615: ctxt->instate = XML_PARSER_CONTENT;
1.72 daniel 5616: xmlParseElement(ctxt);
1.96 daniel 5617: ctxt->instate = XML_PARSER_EPILOG;
1.33 daniel 5618:
5619: /*
5620: * The Misc part at the end
5621: */
5622: xmlParseMisc(ctxt);
1.16 daniel 5623:
1.59 daniel 5624: if (CUR != 0) {
5625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5626: ctxt->sax->error(ctxt->userData,
1.59 daniel 5627: "Extra content at the end of the document\n");
5628: ctxt->wellFormed = 0;
5629: }
1.96 daniel 5630: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 5631:
1.44 daniel 5632: /*
5633: * SAX: end of the document processing.
5634: */
1.72 daniel 5635: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 5636: ctxt->sax->endDocument(ctxt->userData);
1.59 daniel 5637: if (! ctxt->wellFormed) return(-1);
1.16 daniel 5638: return(0);
5639: }
5640:
1.98 daniel 5641: /************************************************************************
5642: * *
5643: * I/O front end functions to the parser *
5644: * *
5645: ************************************************************************/
5646:
1.50 daniel 5647: /**
1.86 daniel 5648: * xmlCreateDocParserCtxt :
1.50 daniel 5649: * @cur: a pointer to an array of CHAR
5650: *
1.69 daniel 5651: * Create a parser context for an XML in-memory document.
5652: *
5653: * Returns the new parser context or NULL
1.16 daniel 5654: */
1.69 daniel 5655: xmlParserCtxtPtr
5656: xmlCreateDocParserCtxt(CHAR *cur) {
1.16 daniel 5657: xmlParserCtxtPtr ctxt;
1.40 daniel 5658: xmlParserInputPtr input;
1.75 daniel 5659: xmlCharEncoding enc;
1.16 daniel 5660:
1.97 daniel 5661: ctxt = xmlNewParserCtxt();
1.16 daniel 5662: if (ctxt == NULL) {
5663: return(NULL);
5664: }
1.96 daniel 5665: input = xmlNewInputStream(ctxt);
1.40 daniel 5666: if (input == NULL) {
1.97 daniel 5667: xmlFreeParserCtxt(ctxt);
1.40 daniel 5668: return(NULL);
5669: }
5670:
1.75 daniel 5671: /*
5672: * plug some encoding conversion routines here. !!!
5673: */
5674: enc = xmlDetectCharEncoding(cur);
5675: xmlSwitchEncoding(ctxt, enc);
5676:
1.40 daniel 5677: input->base = cur;
5678: input->cur = cur;
5679:
5680: inputPush(ctxt, input);
1.69 daniel 5681: return(ctxt);
5682: }
5683:
5684: /**
5685: * xmlSAXParseDoc :
5686: * @sax: the SAX handler block
5687: * @cur: a pointer to an array of CHAR
5688: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5689: * documents
5690: *
5691: * parse an XML in-memory document and build a tree.
5692: * It use the given SAX function block to handle the parsing callback.
5693: * If sax is NULL, fallback to the default DOM tree building routines.
5694: *
5695: * Returns the resulting document tree
5696: */
5697:
5698: xmlDocPtr
5699: xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
5700: xmlDocPtr ret;
5701: xmlParserCtxtPtr ctxt;
5702:
5703: if (cur == NULL) return(NULL);
1.16 daniel 5704:
5705:
1.69 daniel 5706: ctxt = xmlCreateDocParserCtxt(cur);
5707: if (ctxt == NULL) return(NULL);
1.74 daniel 5708: if (sax != NULL) {
5709: ctxt->sax = sax;
5710: ctxt->userData = NULL;
5711: }
1.69 daniel 5712:
1.16 daniel 5713: xmlParseDocument(ctxt);
1.72 daniel 5714: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5715: else {
5716: ret = NULL;
1.72 daniel 5717: xmlFreeDoc(ctxt->myDoc);
5718: ctxt->myDoc = NULL;
1.59 daniel 5719: }
1.86 daniel 5720: if (sax != NULL)
5721: ctxt->sax = NULL;
1.69 daniel 5722: xmlFreeParserCtxt(ctxt);
1.16 daniel 5723:
1.1 veillard 5724: return(ret);
5725: }
5726:
1.50 daniel 5727: /**
1.55 daniel 5728: * xmlParseDoc :
5729: * @cur: a pointer to an array of CHAR
5730: *
5731: * parse an XML in-memory document and build a tree.
5732: *
1.68 daniel 5733: * Returns the resulting document tree
1.55 daniel 5734: */
5735:
1.69 daniel 5736: xmlDocPtr
5737: xmlParseDoc(CHAR *cur) {
1.59 daniel 5738: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 5739: }
5740:
5741: /**
5742: * xmlSAXParseDTD :
5743: * @sax: the SAX handler block
5744: * @ExternalID: a NAME* containing the External ID of the DTD
5745: * @SystemID: a NAME* containing the URL to the DTD
5746: *
5747: * Load and parse an external subset.
5748: *
5749: * Returns the resulting xmlDtdPtr or NULL in case of error.
5750: */
5751:
5752: xmlDtdPtr
5753: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const CHAR *ExternalID,
5754: const CHAR *SystemID) {
5755: xmlDtdPtr ret = NULL;
5756: xmlParserCtxtPtr ctxt;
1.83 daniel 5757: xmlParserInputPtr input = NULL;
1.76 daniel 5758: xmlCharEncoding enc;
5759:
5760: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
5761:
1.97 daniel 5762: ctxt = xmlNewParserCtxt();
1.76 daniel 5763: if (ctxt == NULL) {
5764: return(NULL);
5765: }
5766:
5767: /*
5768: * Set-up the SAX context
5769: */
5770: if (ctxt == NULL) return(NULL);
5771: if (sax != NULL) {
1.93 veillard 5772: if (ctxt->sax != NULL)
5773: free(ctxt->sax);
1.76 daniel 5774: ctxt->sax = sax;
5775: ctxt->userData = NULL;
5776: }
5777:
5778: /*
5779: * Ask the Entity resolver to load the damn thing
5780: */
5781:
5782: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
5783: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
5784: if (input == NULL) {
1.86 daniel 5785: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5786: xmlFreeParserCtxt(ctxt);
5787: return(NULL);
5788: }
5789:
5790: /*
5791: * plug some encoding conversion routines here. !!!
5792: */
5793: xmlPushInput(ctxt, input);
5794: enc = xmlDetectCharEncoding(ctxt->input->cur);
5795: xmlSwitchEncoding(ctxt, enc);
5796:
1.95 veillard 5797: if (input->filename == NULL)
5798: input->filename = xmlStrdup(SystemID);
1.76 daniel 5799: input->line = 1;
5800: input->col = 1;
5801: input->base = ctxt->input->cur;
5802: input->cur = ctxt->input->cur;
5803: input->free = NULL;
5804:
5805: /*
5806: * let's parse that entity knowing it's an external subset.
5807: */
1.79 daniel 5808: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 5809:
5810: if (ctxt->myDoc != NULL) {
5811: if (ctxt->wellFormed) {
5812: ret = ctxt->myDoc->intSubset;
5813: ctxt->myDoc->intSubset = NULL;
5814: } else {
5815: ret = NULL;
5816: }
5817: xmlFreeDoc(ctxt->myDoc);
5818: ctxt->myDoc = NULL;
5819: }
1.86 daniel 5820: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5821: xmlFreeParserCtxt(ctxt);
5822:
5823: return(ret);
5824: }
5825:
5826: /**
5827: * xmlParseDTD :
5828: * @ExternalID: a NAME* containing the External ID of the DTD
5829: * @SystemID: a NAME* containing the URL to the DTD
5830: *
5831: * Load and parse an external subset.
5832: *
5833: * Returns the resulting xmlDtdPtr or NULL in case of error.
5834: */
5835:
5836: xmlDtdPtr
5837: xmlParseDTD(const CHAR *ExternalID, const CHAR *SystemID) {
5838: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 5839: }
5840:
5841: /**
5842: * xmlRecoverDoc :
5843: * @cur: a pointer to an array of CHAR
5844: *
5845: * parse an XML in-memory document and build a tree.
5846: * In the case the document is not Well Formed, a tree is built anyway
5847: *
1.68 daniel 5848: * Returns the resulting document tree
1.59 daniel 5849: */
5850:
1.69 daniel 5851: xmlDocPtr
5852: xmlRecoverDoc(CHAR *cur) {
1.59 daniel 5853: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 5854: }
5855:
5856: /**
1.69 daniel 5857: * xmlCreateFileParserCtxt :
1.50 daniel 5858: * @filename: the filename
5859: *
1.69 daniel 5860: * Create a parser context for a file content.
5861: * Automatic support for ZLIB/Compress compressed document is provided
5862: * by default if found at compile-time.
1.50 daniel 5863: *
1.69 daniel 5864: * Returns the new parser context or NULL
1.9 httpng 5865: */
1.69 daniel 5866: xmlParserCtxtPtr
5867: xmlCreateFileParserCtxt(const char *filename)
5868: {
5869: xmlParserCtxtPtr ctxt;
1.40 daniel 5870: xmlParserInputPtr inputStream;
1.91 daniel 5871: xmlParserInputBufferPtr buf;
1.9 httpng 5872:
1.91 daniel 5873: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
5874: if (buf == NULL) return(NULL);
1.9 httpng 5875:
1.97 daniel 5876: ctxt = xmlNewParserCtxt();
1.16 daniel 5877: if (ctxt == NULL) {
5878: return(NULL);
5879: }
1.97 daniel 5880:
1.96 daniel 5881: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 5882: if (inputStream == NULL) {
1.97 daniel 5883: xmlFreeParserCtxt(ctxt);
1.40 daniel 5884: return(NULL);
5885: }
5886:
5887: inputStream->filename = strdup(filename);
1.91 daniel 5888: inputStream->buf = buf;
5889: inputStream->base = inputStream->buf->buffer->content;
5890: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 5891:
1.40 daniel 5892: inputPush(ctxt, inputStream);
1.69 daniel 5893: return(ctxt);
5894: }
5895:
5896: /**
5897: * xmlSAXParseFile :
5898: * @sax: the SAX handler block
5899: * @filename: the filename
5900: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5901: * documents
5902: *
5903: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5904: * compressed document is provided by default if found at compile-time.
5905: * It use the given SAX function block to handle the parsing callback.
5906: * If sax is NULL, fallback to the default DOM tree building routines.
5907: *
5908: * Returns the resulting document tree
5909: */
5910:
1.79 daniel 5911: xmlDocPtr
5912: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 5913: int recovery) {
5914: xmlDocPtr ret;
5915: xmlParserCtxtPtr ctxt;
5916:
5917: ctxt = xmlCreateFileParserCtxt(filename);
5918: if (ctxt == NULL) return(NULL);
1.74 daniel 5919: if (sax != NULL) {
1.93 veillard 5920: if (ctxt->sax != NULL)
5921: free(ctxt->sax);
1.74 daniel 5922: ctxt->sax = sax;
5923: ctxt->userData = NULL;
5924: }
1.16 daniel 5925:
5926: xmlParseDocument(ctxt);
1.40 daniel 5927:
1.72 daniel 5928: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5929: else {
5930: ret = NULL;
1.72 daniel 5931: xmlFreeDoc(ctxt->myDoc);
5932: ctxt->myDoc = NULL;
1.59 daniel 5933: }
1.86 daniel 5934: if (sax != NULL)
5935: ctxt->sax = NULL;
1.69 daniel 5936: xmlFreeParserCtxt(ctxt);
1.20 daniel 5937:
5938: return(ret);
5939: }
5940:
1.55 daniel 5941: /**
5942: * xmlParseFile :
5943: * @filename: the filename
5944: *
5945: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5946: * compressed document is provided by default if found at compile-time.
5947: *
1.68 daniel 5948: * Returns the resulting document tree
1.55 daniel 5949: */
5950:
1.79 daniel 5951: xmlDocPtr
5952: xmlParseFile(const char *filename) {
1.59 daniel 5953: return(xmlSAXParseFile(NULL, filename, 0));
5954: }
5955:
5956: /**
5957: * xmlRecoverFile :
5958: * @filename: the filename
5959: *
5960: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5961: * compressed document is provided by default if found at compile-time.
5962: * In the case the document is not Well Formed, a tree is built anyway
5963: *
1.68 daniel 5964: * Returns the resulting document tree
1.59 daniel 5965: */
5966:
1.79 daniel 5967: xmlDocPtr
5968: xmlRecoverFile(const char *filename) {
1.59 daniel 5969: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 5970: }
1.32 daniel 5971:
1.50 daniel 5972: /**
1.69 daniel 5973: * xmlCreateMemoryParserCtxt :
1.68 daniel 5974: * @buffer: an pointer to a char array
1.50 daniel 5975: * @size: the siwe of the array
5976: *
1.69 daniel 5977: * Create a parser context for an XML in-memory document.
1.50 daniel 5978: *
1.69 daniel 5979: * Returns the new parser context or NULL
1.20 daniel 5980: */
1.69 daniel 5981: xmlParserCtxtPtr
5982: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 5983: xmlParserCtxtPtr ctxt;
1.40 daniel 5984: xmlParserInputPtr input;
1.75 daniel 5985: xmlCharEncoding enc;
1.40 daniel 5986:
5987: buffer[size - 1] = '\0';
5988:
1.97 daniel 5989: ctxt = xmlNewParserCtxt();
1.20 daniel 5990: if (ctxt == NULL) {
5991: return(NULL);
5992: }
1.97 daniel 5993:
1.96 daniel 5994: input = xmlNewInputStream(ctxt);
1.40 daniel 5995: if (input == NULL) {
1.97 daniel 5996: xmlFreeParserCtxt(ctxt);
1.40 daniel 5997: return(NULL);
5998: }
1.20 daniel 5999:
1.40 daniel 6000: input->filename = NULL;
6001: input->line = 1;
6002: input->col = 1;
1.96 daniel 6003: input->buf = NULL;
1.91 daniel 6004: input->consumed = 0;
1.45 daniel 6005:
6006: /*
1.75 daniel 6007: * plug some encoding conversion routines here. !!!
1.45 daniel 6008: */
1.75 daniel 6009: enc = xmlDetectCharEncoding(buffer);
6010: xmlSwitchEncoding(ctxt, enc);
6011:
1.40 daniel 6012: input->base = buffer;
6013: input->cur = buffer;
1.69 daniel 6014: input->free = NULL;
1.20 daniel 6015:
1.40 daniel 6016: inputPush(ctxt, input);
1.69 daniel 6017: return(ctxt);
6018: }
6019:
6020: /**
6021: * xmlSAXParseMemory :
6022: * @sax: the SAX handler block
6023: * @buffer: an pointer to a char array
6024: * @size: the siwe of the array
6025: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6026: * documents
6027: *
6028: * parse an XML in-memory block and use the given SAX function block
6029: * to handle the parsing callback. If sax is NULL, fallback to the default
6030: * DOM tree building routines.
6031: *
6032: * Returns the resulting document tree
6033: */
6034: xmlDocPtr
6035: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
6036: xmlDocPtr ret;
6037: xmlParserCtxtPtr ctxt;
6038:
6039: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6040: if (ctxt == NULL) return(NULL);
1.74 daniel 6041: if (sax != NULL) {
6042: ctxt->sax = sax;
6043: ctxt->userData = NULL;
6044: }
1.20 daniel 6045:
6046: xmlParseDocument(ctxt);
1.40 daniel 6047:
1.72 daniel 6048: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6049: else {
6050: ret = NULL;
1.72 daniel 6051: xmlFreeDoc(ctxt->myDoc);
6052: ctxt->myDoc = NULL;
1.59 daniel 6053: }
1.86 daniel 6054: if (sax != NULL)
6055: ctxt->sax = NULL;
1.69 daniel 6056: xmlFreeParserCtxt(ctxt);
1.16 daniel 6057:
1.9 httpng 6058: return(ret);
1.17 daniel 6059: }
6060:
1.55 daniel 6061: /**
6062: * xmlParseMemory :
1.68 daniel 6063: * @buffer: an pointer to a char array
1.55 daniel 6064: * @size: the size of the array
6065: *
6066: * parse an XML in-memory block and build a tree.
6067: *
1.68 daniel 6068: * Returns the resulting document tree
1.55 daniel 6069: */
6070:
6071: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 6072: return(xmlSAXParseMemory(NULL, buffer, size, 0));
6073: }
6074:
6075: /**
6076: * xmlRecoverMemory :
1.68 daniel 6077: * @buffer: an pointer to a char array
1.59 daniel 6078: * @size: the size of the array
6079: *
6080: * parse an XML in-memory block and build a tree.
6081: * In the case the document is not Well Formed, a tree is built anyway
6082: *
1.68 daniel 6083: * Returns the resulting document tree
1.59 daniel 6084: */
6085:
6086: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
6087: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 6088: }
6089:
6090:
1.50 daniel 6091: /**
6092: * xmlSetupParserForBuffer:
6093: * @ctxt: an XML parser context
6094: * @buffer: a CHAR * buffer
6095: * @filename: a file name
6096: *
1.19 daniel 6097: * Setup the parser context to parse a new buffer; Clears any prior
6098: * contents from the parser context. The buffer parameter must not be
6099: * NULL, but the filename parameter can be
6100: */
1.55 daniel 6101: void
6102: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 6103: const char* filename)
6104: {
1.96 daniel 6105: xmlParserInputPtr input;
1.40 daniel 6106:
1.96 daniel 6107: input = xmlNewInputStream(ctxt);
6108: if (input == NULL) {
6109: perror("malloc");
6110: free(ctxt);
6111: exit(1);
6112: }
6113:
6114: xmlClearParserCtxt(ctxt);
6115: if (filename != NULL)
6116: input->filename = strdup(filename);
6117: input->base = buffer;
6118: input->cur = buffer;
6119: inputPush(ctxt, input);
1.17 daniel 6120: }
6121:
1.32 daniel 6122:
1.98 daniel 6123: /************************************************************************
6124: * *
6125: * Miscelaneous *
6126: * *
6127: ************************************************************************/
6128:
6129:
1.50 daniel 6130: /**
6131: * xmlParserFindNodeInfo:
6132: * @ctxt: an XML parser context
6133: * @node: an XML node within the tree
6134: *
6135: * Find the parser node info struct for a given node
6136: *
1.68 daniel 6137: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 6138: */
6139: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
6140: const xmlNode* node)
6141: {
6142: unsigned long pos;
6143:
6144: /* Find position where node should be at */
6145: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
6146: if ( ctx->node_seq.buffer[pos].node == node )
6147: return &ctx->node_seq.buffer[pos];
6148: else
6149: return NULL;
6150: }
6151:
6152:
1.50 daniel 6153: /**
6154: * xmlInitNodeInfoSeq :
6155: * @seq: a node info sequence pointer
6156: *
6157: * -- Initialize (set to initial state) node info sequence
1.32 daniel 6158: */
1.55 daniel 6159: void
6160: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6161: {
6162: seq->length = 0;
6163: seq->maximum = 0;
6164: seq->buffer = NULL;
6165: }
6166:
1.50 daniel 6167: /**
6168: * xmlClearNodeInfoSeq :
6169: * @seq: a node info sequence pointer
6170: *
6171: * -- Clear (release memory and reinitialize) node
1.32 daniel 6172: * info sequence
6173: */
1.55 daniel 6174: void
6175: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6176: {
6177: if ( seq->buffer != NULL )
6178: free(seq->buffer);
6179: xmlInitNodeInfoSeq(seq);
6180: }
6181:
6182:
1.50 daniel 6183: /**
6184: * xmlParserFindNodeInfoIndex:
6185: * @seq: a node info sequence pointer
6186: * @node: an XML node pointer
6187: *
6188: *
1.32 daniel 6189: * xmlParserFindNodeInfoIndex : Find the index that the info record for
6190: * the given node is or should be at in a sorted sequence
1.68 daniel 6191: *
6192: * Returns a long indicating the position of the record
1.32 daniel 6193: */
6194: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
6195: const xmlNode* node)
6196: {
6197: unsigned long upper, lower, middle;
6198: int found = 0;
6199:
6200: /* Do a binary search for the key */
6201: lower = 1;
6202: upper = seq->length;
6203: middle = 0;
6204: while ( lower <= upper && !found) {
6205: middle = lower + (upper - lower) / 2;
6206: if ( node == seq->buffer[middle - 1].node )
6207: found = 1;
6208: else if ( node < seq->buffer[middle - 1].node )
6209: upper = middle - 1;
6210: else
6211: lower = middle + 1;
6212: }
6213:
6214: /* Return position */
6215: if ( middle == 0 || seq->buffer[middle - 1].node < node )
6216: return middle;
6217: else
6218: return middle - 1;
6219: }
6220:
6221:
1.50 daniel 6222: /**
6223: * xmlParserAddNodeInfo:
6224: * @ctxt: an XML parser context
1.68 daniel 6225: * @info: a node info sequence pointer
1.50 daniel 6226: *
6227: * Insert node info record into the sorted sequence
1.32 daniel 6228: */
1.55 daniel 6229: void
6230: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 6231: const xmlParserNodeInfo* info)
1.32 daniel 6232: {
6233: unsigned long pos;
6234: static unsigned int block_size = 5;
6235:
6236: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 6237: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
6238: if ( pos < ctxt->node_seq.length
6239: && ctxt->node_seq.buffer[pos].node == info->node ) {
6240: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 6241: }
6242:
6243: /* Otherwise, we need to add new node to buffer */
6244: else {
6245: /* Expand buffer by 5 if needed */
1.55 daniel 6246: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 6247: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 6248: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
6249: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 6250:
1.55 daniel 6251: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 6252: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
6253: else
1.55 daniel 6254: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 6255:
6256: if ( tmp_buffer == NULL ) {
1.55 daniel 6257: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6258: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.32 daniel 6259: return;
6260: }
1.55 daniel 6261: ctxt->node_seq.buffer = tmp_buffer;
6262: ctxt->node_seq.maximum += block_size;
1.32 daniel 6263: }
6264:
6265: /* If position is not at end, move elements out of the way */
1.55 daniel 6266: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 6267: unsigned long i;
6268:
1.55 daniel 6269: for ( i = ctxt->node_seq.length; i > pos; i-- )
6270: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 6271: }
6272:
6273: /* Copy element and increase length */
1.55 daniel 6274: ctxt->node_seq.buffer[pos] = *info;
6275: ctxt->node_seq.length++;
1.32 daniel 6276: }
6277: }
1.77 daniel 6278:
1.98 daniel 6279:
6280: /**
6281: * xmlSubstituteEntitiesDefault :
6282: * @val: int 0 or 1
6283: *
6284: * Set and return the previous value for default entity support.
6285: * Initially the parser always keep entity references instead of substituting
6286: * entity values in the output. This function has to be used to change the
6287: * default parser behaviour
6288: * SAX::subtituteEntities() has to be used for changing that on a file by
6289: * file basis.
6290: *
6291: * Returns the last value for 0 for no substitution, 1 for substitution.
6292: */
6293:
6294: int
6295: xmlSubstituteEntitiesDefault(int val) {
6296: int old = xmlSubstituteEntitiesDefaultValue;
6297:
6298: xmlSubstituteEntitiesDefaultValue = val;
6299: return(old);
6300: }
1.77 daniel 6301:
Webmaster