Annotation of XML/parser.c, revision 1.95
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.75 daniel 33: #include "encoding.h"
1.61 daniel 34: #include "valid.h"
1.69 daniel 35: #include "parserInternals.h"
1.91 daniel 36: #include "xmlIO.h"
1.1 veillard 37:
1.86 daniel 38: const char *xmlParserVersion = LIBXML_VERSION;
39:
1.91 daniel 40: #define XML_MAX_NAMELEN 1000
41:
42: /************************************************************************
43: * *
44: * Input handling functions for progressive parsing *
45: * *
46: ************************************************************************/
47:
48: /* #define DEBUG_INPUT */
49:
50: #define INPUT_CHUNK 50
51:
52: #ifdef DEBUG_INPUT
53: #define CHECK_BUFFER(in) check_buffer(in)
54: #else
55: #define CHECK_BUFFER(in)
56: #endif
57:
58: void check_buffer(xmlParserInputPtr in) {
59: if (in->base != in->buf->buffer->content) {
60: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
61: }
62: if (in->cur < in->base) {
63: fprintf(stderr, "xmlParserInput: cur < base problem\n");
64: }
65: if (in->cur > in->base + in->buf->buffer->use) {
66: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
67: }
68: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
69: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
70: in->buf->buffer->use, in->buf->buffer->size);
71: }
72:
73:
74: /**
75: * xmlParserInputRead:
76: * @in: an XML parser input
77: * @len: an indicative size for the lookahead
78: *
79: * This function refresh the input for the parser. It doesn't try to
80: * preserve pointers to the input buffer, and discard already read data
81: *
82: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
83: * end of this entity
84: */
85: int
86: xmlParserInputRead(xmlParserInputPtr in, int len) {
87: int ret;
88: int used;
89: int index;
90:
91: #ifdef DEBUG_INPUT
92: fprintf(stderr, "Read\n");
93: #endif
94: if (in->buf == NULL) return(-1);
95: if (in->base == NULL) return(-1);
96: if (in->cur == NULL) return(-1);
97: if (in->buf->buffer == NULL) return(-1);
98:
99: CHECK_BUFFER(in);
100:
101: used = in->cur - in->buf->buffer->content;
102: ret = xmlBufferShrink(in->buf->buffer, used);
103: if (ret > 0) {
104: in->cur -= ret;
105: in->consumed += ret;
106: }
107: ret = xmlParserInputBufferRead(in->buf, len);
108: if (in->base != in->buf->buffer->content) {
109: /*
110: * the buffer has been realloced
111: */
112: index = in->cur - in->base;
113: in->base = in->buf->buffer->content;
114: in->cur = &in->buf->buffer->content[index];
115: }
116:
117: CHECK_BUFFER(in);
118:
119: return(ret);
120: }
121:
122: /**
123: * xmlParserInputGrow:
124: * @in: an XML parser input
125: * @len: an indicative size for the lookahead
126: *
127: * This function increase the input for the parser. It tries to
128: * preserve pointers to the input buffer, and keep already read data
129: *
130: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
131: * end of this entity
132: */
133: int
134: xmlParserInputGrow(xmlParserInputPtr in, int len) {
135: int ret;
136: int index;
137:
138: #ifdef DEBUG_INPUT
139: fprintf(stderr, "Grow\n");
140: #endif
141: if (in->buf == NULL) return(-1);
142: if (in->base == NULL) return(-1);
143: if (in->cur == NULL) return(-1);
144: if (in->buf->buffer == NULL) return(-1);
145:
146: CHECK_BUFFER(in);
147:
148: index = in->cur - in->base;
149: if (in->buf->buffer->use > index + INPUT_CHUNK) {
150:
151: CHECK_BUFFER(in);
152:
153: return(0);
154: }
155: ret = xmlParserInputBufferGrow(in->buf, len);
156: if (in->base != in->buf->buffer->content) {
157: /*
158: * the buffer has been realloced
159: */
160: index = in->cur - in->base;
161: in->base = in->buf->buffer->content;
162: in->cur = &in->buf->buffer->content[index];
163: }
164:
165: CHECK_BUFFER(in);
166:
167: return(ret);
168: }
169:
170: /**
171: * xmlParserInputShrink:
172: * @in: an XML parser input
173: *
174: * This function removes used input for the parser.
175: */
176: void
177: xmlParserInputShrink(xmlParserInputPtr in) {
178: int used;
179: int ret;
180: int index;
181:
182: #ifdef DEBUG_INPUT
183: fprintf(stderr, "Shrink\n");
184: #endif
185: if (in->buf == NULL) return;
186: if (in->base == NULL) return;
187: if (in->cur == NULL) return;
188: if (in->buf->buffer == NULL) return;
189:
190: CHECK_BUFFER(in);
191:
192: used = in->cur - in->buf->buffer->content;
193: if (used > INPUT_CHUNK) {
194: ret = xmlBufferShrink(in->buf->buffer, used);
195: if (ret > 0) {
196: in->cur -= ret;
197: in->consumed += ret;
198: }
199: }
200:
201: CHECK_BUFFER(in);
202:
203: if (in->buf->buffer->use > INPUT_CHUNK) {
204: return;
205: }
206: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
207: if (in->base != in->buf->buffer->content) {
208: /*
209: * the buffer has been realloced
210: */
211: index = in->cur - in->base;
212: in->base = in->buf->buffer->content;
213: in->cur = &in->buf->buffer->content[index];
214: }
215:
216: CHECK_BUFFER(in);
217: }
218:
1.45 daniel 219: /************************************************************************
220: * *
221: * Parser stacks related functions and macros *
222: * *
223: ************************************************************************/
1.79 daniel 224:
225: int xmlSubstituteEntitiesDefaultValue = 0;
226:
1.1 veillard 227: /*
1.40 daniel 228: * Generic function for accessing stacks in the Parser Context
1.1 veillard 229: */
230:
1.31 daniel 231: #define PUSH_AND_POP(type, name) \
1.72 daniel 232: extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 233: if (ctxt->name##Nr >= ctxt->name##Max) { \
234: ctxt->name##Max *= 2; \
1.40 daniel 235: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
236: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
237: if (ctxt->name##Tab == NULL) { \
1.31 daniel 238: fprintf(stderr, "realloc failed !\n"); \
239: exit(1); \
240: } \
241: } \
1.40 daniel 242: ctxt->name##Tab[ctxt->name##Nr] = value; \
243: ctxt->name = value; \
244: return(ctxt->name##Nr++); \
1.31 daniel 245: } \
1.72 daniel 246: extern type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 247: type ret; \
1.40 daniel 248: if (ctxt->name##Nr <= 0) return(0); \
249: ctxt->name##Nr--; \
1.50 daniel 250: if (ctxt->name##Nr > 0) \
251: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
252: else \
253: ctxt->name = NULL; \
1.69 daniel 254: ret = ctxt->name##Tab[ctxt->name##Nr]; \
255: ctxt->name##Tab[ctxt->name##Nr] = 0; \
256: return(ret); \
1.31 daniel 257: } \
258:
1.40 daniel 259: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 260: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 261:
1.55 daniel 262: /*
263: * Macros for accessing the content. Those should be used only by the parser,
264: * and not exported.
265: *
266: * Dirty macros, i.e. one need to make assumption on the context to use them
267: *
268: * CUR_PTR return the current pointer to the CHAR to be parsed.
269: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
270: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
271: * in UNICODE mode. This should be used internally by the parser
272: * only to compare to ASCII values otherwise it would break when
273: * running with UTF-8 encoding.
274: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
275: * to compare on ASCII based substring.
276: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
277: * strings within the parser.
278: *
1.77 daniel 279: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 280: *
281: * CURRENT Returns the current char value, with the full decoding of
282: * UTF-8 if we are using this mode. It returns an int.
283: * NEXT Skip to the next character, this does the proper decoding
284: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 285: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.55 daniel 286: */
1.45 daniel 287:
288: #define CUR (*ctxt->input->cur)
1.55 daniel 289: #define SKIP(val) ctxt->input->cur += (val)
290: #define NXT(val) ctxt->input->cur[(val)]
291: #define CUR_PTR ctxt->input->cur
1.91 daniel 292: #define SHRINK xmlParserInputShrink(ctxt->input)
293: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
1.55 daniel 294:
295: #define SKIP_BLANKS \
296: while (IS_BLANK(*(ctxt->input->cur))) NEXT
297:
298: #ifndef USE_UTF_8
299: #define CURRENT (*ctxt->input->cur)
1.91 daniel 300: #define NEXT { \
301: if ((*ctxt->input->cur == 0) && \
302: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
303: xmlPopInput(ctxt); \
304: } else { \
305: if (*(ctxt->input->cur) == '\n') { \
306: ctxt->input->line++; ctxt->input->col = 1; \
307: } else ctxt->input->col++; \
308: ctxt->input->cur++; \
309: if (*ctxt->input->cur == 0) \
310: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
311: }}
312:
313: /****************************************
1.45 daniel 314: #define NEXT ((*ctxt->input->cur) ? \
315: (((*(ctxt->input->cur) == '\n') ? \
316: (ctxt->input->line++, ctxt->input->col = 1) : \
1.91 daniel 317: (ctxt->input->col++)), \
318: (ctxt->input->cur++), \
319: ((*ctxt->input->cur) ? \
320: (xmlParserInputGrow(ctxt->input, 100), \
321: ctxt->input->cur): \
322: (ctxt->input->cur))) : \
323: ((xmlParserInputGrow(ctxt->input, 100) > 0) ? \
324: ctxt->input->cur: \
325: (xmlPopInput(ctxt), ctxt->input->cur)))
326: ****************************************/
1.55 daniel 327: #else
328: #endif
1.42 daniel 329:
1.40 daniel 330:
1.50 daniel 331: /**
332: * xmlPopInput:
333: * @ctxt: an XML parser context
334: *
1.40 daniel 335: * xmlPopInput: the current input pointed by ctxt->input came to an end
336: * pop it and return the next char.
1.45 daniel 337: *
338: * TODO A deallocation of the popped Input structure is needed
1.68 daniel 339: *
340: * Returns the current CHAR in the parser context
1.40 daniel 341: */
1.55 daniel 342: CHAR
343: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 344: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 345: xmlFreeInputStream(inputPop(ctxt));
1.40 daniel 346: return(CUR);
347: }
348:
1.50 daniel 349: /**
350: * xmlPushInput:
351: * @ctxt: an XML parser context
352: * @input: an XML parser input fragment (entity, XML fragment ...).
353: *
1.40 daniel 354: * xmlPushInput: switch to a new input stream which is stacked on top
355: * of the previous one(s).
356: */
1.55 daniel 357: void
358: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 359: if (input == NULL) return;
360: inputPush(ctxt, input);
361: }
362:
1.50 daniel 363: /**
1.69 daniel 364: * xmlFreeInputStream:
365: * @input: an xmlParserInputPtr
366: *
367: * Free up an input stream.
368: */
369: void
370: xmlFreeInputStream(xmlParserInputPtr input) {
371: if (input == NULL) return;
372:
373: if (input->filename != NULL) free((char *) input->filename);
1.94 daniel 374: if (input->directory != NULL) free((char *) input->directory);
1.69 daniel 375: if ((input->free != NULL) && (input->base != NULL))
376: input->free((char *) input->base);
1.93 veillard 377: if (input->buf != NULL)
378: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 379: memset(input, -1, sizeof(xmlParserInput));
380: free(input);
381: }
382:
383: /**
1.50 daniel 384: * xmlNewEntityInputStream:
385: * @ctxt: an XML parser context
386: * @entity: an Entity pointer
387: *
1.82 daniel 388: * Create a new input stream based on an xmlEntityPtr
1.68 daniel 389: * Returns the new input stream
1.45 daniel 390: */
1.50 daniel 391: xmlParserInputPtr
392: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 393: xmlParserInputPtr input;
394:
395: if (entity == NULL) {
1.55 daniel 396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 397: ctxt->sax->error(ctxt->userData,
1.45 daniel 398: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 399: return(NULL);
1.45 daniel 400: }
401: if (entity->content == NULL) {
1.55 daniel 402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 403: ctxt->sax->error(ctxt->userData,
1.45 daniel 404: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 405: return(NULL);
1.45 daniel 406: }
407: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
408: if (input == NULL) {
1.55 daniel 409: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 410: ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
1.50 daniel 411: return(NULL);
1.45 daniel 412: }
413: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
1.94 daniel 414: input->directory = NULL;
1.45 daniel 415: input->base = entity->content;
416: input->cur = entity->content;
1.91 daniel 417: input->buf = NULL;
1.45 daniel 418: input->line = 1;
419: input->col = 1;
1.92 daniel 420: input->buf = NULL;
1.69 daniel 421: input->free = NULL;
1.91 daniel 422: input->consumed = 0;
1.50 daniel 423: return(input);
1.45 daniel 424: }
425:
1.59 daniel 426: /**
427: * xmlNewStringInputStream:
428: * @ctxt: an XML parser context
1.82 daniel 429: * @entity: an Entity memory buffer
1.59 daniel 430: *
431: * Create a new input stream based on a memory buffer.
1.68 daniel 432: * Returns the new input stream
1.59 daniel 433: */
434: xmlParserInputPtr
1.82 daniel 435: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, CHAR *entity) {
1.59 daniel 436: xmlParserInputPtr input;
437:
1.82 daniel 438: if (entity == NULL) {
1.59 daniel 439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 440: ctxt->sax->error(ctxt->userData,
1.59 daniel 441: "internal: xmlNewStringInputStream string = NULL\n");
442: return(NULL);
443: }
444: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
445: if (input == NULL) {
446: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 447: ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
1.59 daniel 448: return(NULL);
449: }
450: input->filename = NULL;
1.94 daniel 451: input->directory = NULL;
1.82 daniel 452: input->base = entity;
453: input->cur = entity;
1.91 daniel 454: input->buf = NULL;
1.59 daniel 455: input->line = 1;
456: input->col = 1;
1.69 daniel 457: input->free = NULL;
1.91 daniel 458: input->consumed = 0;
1.59 daniel 459: return(input);
460: }
461:
1.76 daniel 462: /**
463: * xmlNewInputFromFile:
464: * @ctxt: an XML parser context
465: * @filename: the filename to use as entity
466: *
467: * Create a new input stream based on a file.
468: *
469: * Returns the new input stream or NULL in case of error
470: */
471: xmlParserInputPtr
1.79 daniel 472: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 473: xmlParserInputBufferPtr buf;
1.76 daniel 474: xmlParserInputPtr inputStream;
1.94 daniel 475: const char *directory = NULL;
1.76 daniel 476:
1.91 daniel 477: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 478: if (buf == NULL) {
479: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
480: char name[1024];
481: #ifdef WIN32
482: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
483: #else
484: sprintf(name, "%s/%s", ctxt->input->directory, filename);
485: #endif
486: buf = xmlParserInputBufferCreateFilename(name,
487: XML_CHAR_ENCODING_NONE);
488: if (buf == NULL)
489: return(NULL);
490: directory = strdup(ctxt->input->directory);
491: } else
492: return(NULL);
493: }
494: if (directory == NULL)
495: directory = xmlParserGetDirectory(filename);
1.76 daniel 496:
497: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
498: if (inputStream == NULL) {
499: perror("malloc");
500: free(ctxt);
501: return(NULL);
502: }
503:
504: inputStream->filename = strdup(filename);
1.94 daniel 505: inputStream->directory = directory;
1.76 daniel 506: inputStream->line = 1;
507: inputStream->col = 1;
1.91 daniel 508: inputStream->buf = buf;
1.76 daniel 509:
1.91 daniel 510: inputStream->base = inputStream->buf->buffer->content;
511: inputStream->cur = inputStream->buf->buffer->content;
512: inputStream->free = NULL;
513: inputStream->consumed = 0;
1.76 daniel 514:
515: return(inputStream);
516: }
517:
1.77 daniel 518: /************************************************************************
519: * *
520: * Commodity functions to handle entities *
521: * *
522: ************************************************************************/
523:
524: /*
525: * Macro used to grow the current buffer.
526: */
1.78 daniel 527: #define growBuffer(buffer) { \
528: buffer##_size *= 2; \
529: buffer = (CHAR *) realloc(buffer, buffer##_size * sizeof(CHAR)); \
1.77 daniel 530: if (buffer == NULL) { \
531: perror("realloc failed"); \
532: exit(1); \
533: } \
534: }
535:
536:
537: /**
538: * xmlDecodeEntities:
539: * @ctxt: the parser context
540: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
541: * @len: the len to decode (in bytes !), -1 for no size limit
542: * @end: an end marker CHAR, 0 if none
543: * @end2: an end marker CHAR, 0 if none
544: * @end3: an end marker CHAR, 0 if none
545: *
546: * [67] Reference ::= EntityRef | CharRef
547: *
548: * [69] PEReference ::= '%' Name ';'
549: *
550: * Returns A newly allocated string with the substitution done. The caller
551: * must deallocate it !
552: */
553: CHAR *
554: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
555: CHAR end, CHAR end2, CHAR end3) {
556: CHAR *buffer = NULL;
1.78 daniel 557: int buffer_size = 0;
1.77 daniel 558: CHAR *out = NULL;
1.78 daniel 559:
1.77 daniel 560: CHAR *cur = NULL;
561: xmlEntityPtr ent;
1.91 daniel 562: int nbchars = 0;
1.77 daniel 563: unsigned int max = (unsigned int) len;
564:
565: /*
566: * allocate a translation buffer.
567: */
568: buffer_size = 1000;
569: buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR));
570: if (buffer == NULL) {
571: perror("xmlDecodeEntities: malloc failed");
572: return(NULL);
573: }
574: out = buffer;
575:
1.78 daniel 576: /*
577: * Ok loop until we reach one of the ending char or a size limit.
578: */
1.91 daniel 579: while ((nbchars < max) && (CUR != end) &&
1.77 daniel 580: (CUR != end2) && (CUR != end3)) {
581:
582: if (CUR == '&' && (what & XML_SUBSTITUTE_REF)) {
583: if (NXT(1) == '#') {
584: int val = xmlParseCharRef(ctxt);
585: /* TODO: invalid for UTF-8 variable encoding !!! */
586: *out++ = val;
1.91 daniel 587: nbchars += 3; /* !!!! */
1.77 daniel 588: } else {
589: ent = xmlParseEntityRef(ctxt);
590: if (ent != NULL) {
591: cur = ent->content;
592: while (*cur != 0) {
593: *out++ = *cur++;
594: if (out - buffer > buffer_size - 100) {
595: int index = out - buffer;
596:
1.78 daniel 597: growBuffer(buffer);
1.77 daniel 598: out = &buffer[index];
599: }
600: }
1.91 daniel 601: nbchars += 3 + xmlStrlen(ent->name);
1.77 daniel 602: }
603: }
604: } else if (CUR == '%' && (what & XML_SUBSTITUTE_PEREF)) {
605: /*
606: * a PEReference induce to switch the entity flow,
607: * we break here to flush the current set of chars
608: * parsed if any. We will be called back later.
609: */
1.91 daniel 610: if (nbchars != 0) break;
1.77 daniel 611:
612: xmlParsePEReference(ctxt);
1.79 daniel 613:
614: /*
615: * Pop-up of finished entities.
616: */
617: while ((CUR == 0) && (ctxt->inputNr > 1))
618: xmlPopInput(ctxt);
619:
1.78 daniel 620: break;
1.77 daniel 621: } else {
622: /* TODO: invalid for UTF-8 , use COPY(out); */
623: *out++ = CUR;
1.91 daniel 624: nbchars++;
1.86 daniel 625: if (out - buffer > buffer_size - 100) {
626: int index = out - buffer;
627:
628: growBuffer(buffer);
629: out = &buffer[index];
630: }
1.77 daniel 631: NEXT;
632: }
633: }
634: *out++ = 0;
635: return(buffer);
636: }
637:
1.1 veillard 638:
1.28 daniel 639: /************************************************************************
640: * *
1.75 daniel 641: * Commodity functions to handle encodings *
642: * *
643: ************************************************************************/
644:
645: /**
646: * xmlSwitchEncoding:
647: * @ctxt: the parser context
648: * @len: the len of @cur
649: *
650: * change the input functions when discovering the character encoding
651: * of a given entity.
652: *
653: */
654: void
655: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
656: {
657: switch (enc) {
658: case XML_CHAR_ENCODING_ERROR:
659: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
660: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
661: ctxt->wellFormed = 0;
662: break;
663: case XML_CHAR_ENCODING_NONE:
664: /* let's assume it's UTF-8 without the XML decl */
665: return;
666: case XML_CHAR_ENCODING_UTF8:
667: /* default encoding, no conversion should be needed */
668: return;
669: case XML_CHAR_ENCODING_UTF16LE:
670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
671: ctxt->sax->error(ctxt->userData,
672: "char encoding UTF16 little endian not supported\n");
673: break;
674: case XML_CHAR_ENCODING_UTF16BE:
675: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
676: ctxt->sax->error(ctxt->userData,
677: "char encoding UTF16 big endian not supported\n");
678: break;
679: case XML_CHAR_ENCODING_UCS4LE:
680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681: ctxt->sax->error(ctxt->userData,
682: "char encoding USC4 little endian not supported\n");
683: break;
684: case XML_CHAR_ENCODING_UCS4BE:
685: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
686: ctxt->sax->error(ctxt->userData,
687: "char encoding USC4 big endian not supported\n");
688: break;
689: case XML_CHAR_ENCODING_EBCDIC:
690: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
691: ctxt->sax->error(ctxt->userData,
692: "char encoding EBCDIC not supported\n");
693: break;
694: case XML_CHAR_ENCODING_UCS4_2143:
695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
696: ctxt->sax->error(ctxt->userData,
697: "char encoding UCS4 2143 not supported\n");
698: break;
699: case XML_CHAR_ENCODING_UCS4_3412:
700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
701: ctxt->sax->error(ctxt->userData,
702: "char encoding UCS4 3412 not supported\n");
703: break;
704: case XML_CHAR_ENCODING_UCS2:
705: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
706: ctxt->sax->error(ctxt->userData,
707: "char encoding UCS2 not supported\n");
708: break;
709: case XML_CHAR_ENCODING_8859_1:
710: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
711: ctxt->sax->error(ctxt->userData,
712: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
713: break;
714: case XML_CHAR_ENCODING_8859_2:
715: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
716: ctxt->sax->error(ctxt->userData,
717: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
718: break;
719: case XML_CHAR_ENCODING_8859_3:
720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
721: ctxt->sax->error(ctxt->userData,
722: "char encoding ISO_8859_3 not supported\n");
723: break;
724: case XML_CHAR_ENCODING_8859_4:
725: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
726: ctxt->sax->error(ctxt->userData,
727: "char encoding ISO_8859_4 not supported\n");
728: break;
729: case XML_CHAR_ENCODING_8859_5:
730: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
731: ctxt->sax->error(ctxt->userData,
732: "char encoding ISO_8859_5 not supported\n");
733: break;
734: case XML_CHAR_ENCODING_8859_6:
735: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
736: ctxt->sax->error(ctxt->userData,
737: "char encoding ISO_8859_6 not supported\n");
738: break;
739: case XML_CHAR_ENCODING_8859_7:
740: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
741: ctxt->sax->error(ctxt->userData,
742: "char encoding ISO_8859_7 not supported\n");
743: break;
744: case XML_CHAR_ENCODING_8859_8:
745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
746: ctxt->sax->error(ctxt->userData,
747: "char encoding ISO_8859_8 not supported\n");
748: break;
749: case XML_CHAR_ENCODING_8859_9:
750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
751: ctxt->sax->error(ctxt->userData,
752: "char encoding ISO_8859_9 not supported\n");
753: break;
754: case XML_CHAR_ENCODING_2022_JP:
755: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
756: ctxt->sax->error(ctxt->userData,
757: "char encoding ISO-2022-JPnot supported\n");
758: break;
759: case XML_CHAR_ENCODING_SHIFT_JIS:
760: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
761: ctxt->sax->error(ctxt->userData,
762: "char encoding Shift_JISnot supported\n");
763: break;
764: case XML_CHAR_ENCODING_EUC_JP:
765: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
766: ctxt->sax->error(ctxt->userData,
767: "char encoding EUC-JPnot supported\n");
768: break;
769: }
770: }
771:
772: /************************************************************************
773: * *
1.28 daniel 774: * Commodity functions to handle CHARs *
775: * *
776: ************************************************************************/
777:
1.50 daniel 778: /**
779: * xmlStrndup:
780: * @cur: the input CHAR *
781: * @len: the len of @cur
782: *
783: * a strndup for array of CHAR's
1.68 daniel 784: *
785: * Returns a new CHAR * or NULL
1.1 veillard 786: */
1.55 daniel 787: CHAR *
788: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 789: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
790:
791: if (ret == NULL) {
1.86 daniel 792: fprintf(stderr, "malloc of %ld byte failed\n",
793: (len + 1) * (long)sizeof(CHAR));
1.1 veillard 794: return(NULL);
795: }
796: memcpy(ret, cur, len * sizeof(CHAR));
797: ret[len] = 0;
798: return(ret);
799: }
800:
1.50 daniel 801: /**
802: * xmlStrdup:
803: * @cur: the input CHAR *
804: *
805: * a strdup for array of CHAR's
1.68 daniel 806: *
807: * Returns a new CHAR * or NULL
1.1 veillard 808: */
1.55 daniel 809: CHAR *
810: xmlStrdup(const CHAR *cur) {
1.6 httpng 811: const CHAR *p = cur;
1.1 veillard 812:
813: while (IS_CHAR(*p)) p++;
814: return(xmlStrndup(cur, p - cur));
815: }
816:
1.50 daniel 817: /**
818: * xmlCharStrndup:
819: * @cur: the input char *
820: * @len: the len of @cur
821: *
822: * a strndup for char's to CHAR's
1.68 daniel 823: *
824: * Returns a new CHAR * or NULL
1.45 daniel 825: */
826:
1.55 daniel 827: CHAR *
828: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 829: int i;
830: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
831:
832: if (ret == NULL) {
1.86 daniel 833: fprintf(stderr, "malloc of %ld byte failed\n",
834: (len + 1) * (long)sizeof(CHAR));
1.45 daniel 835: return(NULL);
836: }
837: for (i = 0;i < len;i++)
838: ret[i] = (CHAR) cur[i];
839: ret[len] = 0;
840: return(ret);
841: }
842:
1.50 daniel 843: /**
844: * xmlCharStrdup:
845: * @cur: the input char *
846: * @len: the len of @cur
847: *
848: * a strdup for char's to CHAR's
1.68 daniel 849: *
850: * Returns a new CHAR * or NULL
1.45 daniel 851: */
852:
1.55 daniel 853: CHAR *
854: xmlCharStrdup(const char *cur) {
1.45 daniel 855: const char *p = cur;
856:
857: while (*p != '\0') p++;
858: return(xmlCharStrndup(cur, p - cur));
859: }
860:
1.50 daniel 861: /**
862: * xmlStrcmp:
863: * @str1: the first CHAR *
864: * @str2: the second CHAR *
865: *
866: * a strcmp for CHAR's
1.68 daniel 867: *
868: * Returns the integer result of the comparison
1.14 veillard 869: */
870:
1.55 daniel 871: int
872: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 873: register int tmp;
874:
875: do {
876: tmp = *str1++ - *str2++;
877: if (tmp != 0) return(tmp);
878: } while ((*str1 != 0) && (*str2 != 0));
879: return (*str1 - *str2);
880: }
881:
1.50 daniel 882: /**
883: * xmlStrncmp:
884: * @str1: the first CHAR *
885: * @str2: the second CHAR *
886: * @len: the max comparison length
887: *
888: * a strncmp for CHAR's
1.68 daniel 889: *
890: * Returns the integer result of the comparison
1.14 veillard 891: */
892:
1.55 daniel 893: int
894: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 895: register int tmp;
896:
897: if (len <= 0) return(0);
898: do {
899: tmp = *str1++ - *str2++;
900: if (tmp != 0) return(tmp);
901: len--;
902: if (len <= 0) return(0);
903: } while ((*str1 != 0) && (*str2 != 0));
904: return (*str1 - *str2);
905: }
906:
1.50 daniel 907: /**
908: * xmlStrchr:
909: * @str: the CHAR * array
910: * @val: the CHAR to search
911: *
912: * a strchr for CHAR's
1.68 daniel 913: *
914: * Returns the CHAR * for the first occurence or NULL.
1.14 veillard 915: */
916:
1.89 daniel 917: const CHAR *
1.55 daniel 918: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 919: while (*str != 0) {
920: if (*str == val) return((CHAR *) str);
921: str++;
922: }
923: return(NULL);
1.89 daniel 924: }
925:
926: /**
927: * xmlStrstr:
928: * @str: the CHAR * array (haystack)
929: * @val: the CHAR to search (needle)
930: *
931: * a strstr for CHAR's
932: *
933: * Returns the CHAR * for the first occurence or NULL.
934: */
935:
936: const CHAR *
937: xmlStrstr(const CHAR *str, CHAR *val) {
938: int n;
939:
940: if (str == NULL) return(NULL);
941: if (val == NULL) return(NULL);
942: n = xmlStrlen(val);
943:
944: if (n == 0) return(str);
945: while (*str != 0) {
946: if (*str == *val) {
947: if (!xmlStrncmp(str, val, n)) return((const CHAR *) str);
948: }
949: str++;
950: }
951: return(NULL);
952: }
953:
954: /**
955: * xmlStrsub:
956: * @str: the CHAR * array (haystack)
957: * @start: the index of the first char (zero based)
958: * @len: the length of the substring
959: *
960: * Extract a substring of a given string
961: *
962: * Returns the CHAR * for the first occurence or NULL.
963: */
964:
965: CHAR *
966: xmlStrsub(const CHAR *str, int start, int len) {
967: int i;
968:
969: if (str == NULL) return(NULL);
970: if (start < 0) return(NULL);
1.90 daniel 971: if (len < 0) return(NULL);
1.89 daniel 972:
973: for (i = 0;i < start;i++) {
974: if (*str == 0) return(NULL);
975: str++;
976: }
977: if (*str == 0) return(NULL);
978: return(xmlStrndup(str, len));
1.14 veillard 979: }
1.28 daniel 980:
1.50 daniel 981: /**
982: * xmlStrlen:
983: * @str: the CHAR * array
984: *
985: * lenght of a CHAR's string
1.68 daniel 986: *
987: * Returns the number of CHAR contained in the ARRAY.
1.45 daniel 988: */
989:
1.55 daniel 990: int
991: xmlStrlen(const CHAR *str) {
1.45 daniel 992: int len = 0;
993:
994: if (str == NULL) return(0);
995: while (*str != 0) {
996: str++;
997: len++;
998: }
999: return(len);
1000: }
1001:
1.50 daniel 1002: /**
1003: * xmlStrncat:
1.68 daniel 1004: * @cur: the original CHAR * array
1.50 daniel 1005: * @add: the CHAR * array added
1006: * @len: the length of @add
1007: *
1008: * a strncat for array of CHAR's
1.68 daniel 1009: *
1010: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1011: */
1012:
1.55 daniel 1013: CHAR *
1014: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 1015: int size;
1016: CHAR *ret;
1017:
1018: if ((add == NULL) || (len == 0))
1019: return(cur);
1020: if (cur == NULL)
1021: return(xmlStrndup(add, len));
1022:
1023: size = xmlStrlen(cur);
1024: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
1025: if (ret == NULL) {
1.86 daniel 1026: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1027: (size + len + 1) * (long)sizeof(CHAR));
1.45 daniel 1028: return(cur);
1029: }
1030: memcpy(&ret[size], add, len * sizeof(CHAR));
1031: ret[size + len] = 0;
1032: return(ret);
1033: }
1034:
1.50 daniel 1035: /**
1036: * xmlStrcat:
1.68 daniel 1037: * @cur: the original CHAR * array
1.50 daniel 1038: * @add: the CHAR * array added
1039: *
1040: * a strcat for array of CHAR's
1.68 daniel 1041: *
1042: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1043: */
1.55 daniel 1044: CHAR *
1045: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 1046: const CHAR *p = add;
1047:
1048: if (add == NULL) return(cur);
1049: if (cur == NULL)
1050: return(xmlStrdup(add));
1051:
1052: while (IS_CHAR(*p)) p++;
1053: return(xmlStrncat(cur, add, p - add));
1054: }
1055:
1056: /************************************************************************
1057: * *
1058: * Commodity functions, cleanup needed ? *
1059: * *
1060: ************************************************************************/
1061:
1.50 daniel 1062: /**
1063: * areBlanks:
1064: * @ctxt: an XML parser context
1065: * @str: a CHAR *
1066: * @len: the size of @str
1067: *
1.45 daniel 1068: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1069: *
1070: * TODO: to be corrected accodingly to DTD information if available
1.68 daniel 1071: *
1072: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1073: */
1074:
1075: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
1076: int i;
1077: xmlNodePtr lastChild;
1078:
1079: for (i = 0;i < len;i++)
1080: if (!(IS_BLANK(str[i]))) return(0);
1081:
1082: if (CUR != '<') return(0);
1.72 daniel 1083: if (ctxt->node == NULL) return(0);
1.45 daniel 1084: lastChild = xmlGetLastChild(ctxt->node);
1085: if (lastChild == NULL) {
1086: if (ctxt->node->content != NULL) return(0);
1087: } else if (xmlNodeIsText(lastChild))
1088: return(0);
1089: return(1);
1090: }
1091:
1.50 daniel 1092: /**
1093: * xmlHandleEntity:
1094: * @ctxt: an XML parser context
1095: * @entity: an XML entity pointer.
1096: *
1097: * Default handling of defined entities, when should we define a new input
1.45 daniel 1098: * stream ? When do we just handle that as a set of chars ?
1.50 daniel 1099: * TODO: we should call the SAX handler here and have it resolve the issue
1.45 daniel 1100: */
1101:
1.55 daniel 1102: void
1103: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1104: int len;
1.50 daniel 1105: xmlParserInputPtr input;
1.45 daniel 1106:
1107: if (entity->content == NULL) {
1.55 daniel 1108: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1109: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 1110: entity->name);
1.59 daniel 1111: ctxt->wellFormed = 0;
1.45 daniel 1112: return;
1113: }
1114: len = xmlStrlen(entity->content);
1115: if (len <= 2) goto handle_as_char;
1116:
1117: /*
1118: * Redefine its content as an input stream.
1119: */
1.50 daniel 1120: input = xmlNewEntityInputStream(ctxt, entity);
1121: xmlPushInput(ctxt, input);
1.45 daniel 1122: return;
1123:
1124: handle_as_char:
1125: /*
1126: * Just handle the content as a set of chars.
1127: */
1.72 daniel 1128: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 1129: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 1130:
1131: }
1132:
1133: /*
1134: * Forward definition for recusive behaviour.
1135: */
1.77 daniel 1136: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1137: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1138:
1.28 daniel 1139: /************************************************************************
1140: * *
1141: * Extra stuff for namespace support *
1142: * Relates to http://www.w3.org/TR/WD-xml-names *
1143: * *
1144: ************************************************************************/
1145:
1.50 daniel 1146: /**
1147: * xmlNamespaceParseNCName:
1148: * @ctxt: an XML parser context
1149: *
1150: * parse an XML namespace name.
1.28 daniel 1151: *
1152: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1153: *
1154: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1155: * CombiningChar | Extender
1.68 daniel 1156: *
1157: * Returns the namespace name or NULL
1.28 daniel 1158: */
1159:
1.55 daniel 1160: CHAR *
1161: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.91 daniel 1162: CHAR buf[XML_MAX_NAMELEN];
1163: int len = 0;
1.28 daniel 1164:
1.40 daniel 1165: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1.28 daniel 1166:
1.40 daniel 1167: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1168: (CUR == '.') || (CUR == '-') ||
1169: (CUR == '_') ||
1170: (IS_COMBINING(CUR)) ||
1.91 daniel 1171: (IS_EXTENDER(CUR))) {
1172: buf[len++] = CUR;
1.40 daniel 1173: NEXT;
1.91 daniel 1174: if (len >= XML_MAX_NAMELEN) {
1175: fprintf(stderr,
1176: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1177: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1178: (CUR == '.') || (CUR == '-') ||
1179: (CUR == '_') ||
1180: (IS_COMBINING(CUR)) ||
1181: (IS_EXTENDER(CUR)))
1182: NEXT;
1183: break;
1184: }
1185: }
1186: return(xmlStrndup(buf, len));
1.28 daniel 1187: }
1188:
1.50 daniel 1189: /**
1190: * xmlNamespaceParseQName:
1191: * @ctxt: an XML parser context
1192: * @prefix: a CHAR **
1193: *
1194: * parse an XML qualified name
1.28 daniel 1195: *
1196: * [NS 5] QName ::= (Prefix ':')? LocalPart
1197: *
1198: * [NS 6] Prefix ::= NCName
1199: *
1200: * [NS 7] LocalPart ::= NCName
1.68 daniel 1201: *
1202: * Returns the function returns the local part, and prefix is updated
1.50 daniel 1203: * to get the Prefix if any.
1.28 daniel 1204: */
1205:
1.55 daniel 1206: CHAR *
1207: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1208: CHAR *ret = NULL;
1209:
1210: *prefix = NULL;
1211: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1212: if (CUR == ':') {
1.28 daniel 1213: *prefix = ret;
1.40 daniel 1214: NEXT;
1.28 daniel 1215: ret = xmlNamespaceParseNCName(ctxt);
1216: }
1217:
1218: return(ret);
1219: }
1220:
1.50 daniel 1221: /**
1.72 daniel 1222: * xmlSplitQName:
1223: * @name: an XML parser context
1224: * @prefix: a CHAR **
1225: *
1226: * parse an XML qualified name string
1227: *
1228: * [NS 5] QName ::= (Prefix ':')? LocalPart
1229: *
1230: * [NS 6] Prefix ::= NCName
1231: *
1232: * [NS 7] LocalPart ::= NCName
1233: *
1234: * Returns the function returns the local part, and prefix is updated
1235: * to get the Prefix if any.
1236: */
1237:
1238: CHAR *
1239: xmlSplitQName(const CHAR *name, CHAR **prefix) {
1240: CHAR *ret = NULL;
1241: const CHAR *q;
1242: const CHAR *cur = name;
1243:
1244: *prefix = NULL;
1245: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1246: q = cur++;
1247:
1248: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1249: (*cur == '.') || (*cur == '-') ||
1250: (*cur == '_') ||
1251: (IS_COMBINING(*cur)) ||
1252: (IS_EXTENDER(*cur)))
1253: cur++;
1254:
1255: ret = xmlStrndup(q, cur - q);
1256:
1257: if (*cur == ':') {
1258: cur++;
1259: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1260: *prefix = ret;
1261:
1262: q = cur++;
1263:
1264: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1265: (*cur == '.') || (*cur == '-') ||
1266: (*cur == '_') ||
1267: (IS_COMBINING(*cur)) ||
1268: (IS_EXTENDER(*cur)))
1269: cur++;
1270:
1271: ret = xmlStrndup(q, cur - q);
1272: }
1273:
1274: return(ret);
1275: }
1276: /**
1.50 daniel 1277: * xmlNamespaceParseNSDef:
1278: * @ctxt: an XML parser context
1279: *
1280: * parse a namespace prefix declaration
1.28 daniel 1281: *
1282: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1283: *
1284: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 1285: *
1286: * Returns the namespace name
1.28 daniel 1287: */
1288:
1.55 daniel 1289: CHAR *
1290: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1291: CHAR *name = NULL;
1292:
1.40 daniel 1293: if ((CUR == 'x') && (NXT(1) == 'm') &&
1294: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1295: (NXT(4) == 's')) {
1296: SKIP(5);
1297: if (CUR == ':') {
1298: NEXT;
1.28 daniel 1299: name = xmlNamespaceParseNCName(ctxt);
1300: }
1301: }
1.39 daniel 1302: return(name);
1.28 daniel 1303: }
1304:
1.50 daniel 1305: /**
1306: * xmlParseQuotedString:
1307: * @ctxt: an XML parser context
1308: *
1.45 daniel 1309: * [OLD] Parse and return a string between quotes or doublequotes
1.68 daniel 1310: *
1311: * Returns the string parser or NULL.
1.45 daniel 1312: */
1.55 daniel 1313: CHAR *
1314: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1315: CHAR *ret = NULL;
1316: const CHAR *q;
1317:
1318: if (CUR == '"') {
1319: NEXT;
1320: q = CUR_PTR;
1321: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1322: if (CUR != '"') {
1323: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1324: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1325: ctxt->wellFormed = 0;
1.55 daniel 1326: } else {
1.45 daniel 1327: ret = xmlStrndup(q, CUR_PTR - q);
1328: NEXT;
1329: }
1330: } else if (CUR == '\''){
1331: NEXT;
1332: q = CUR_PTR;
1333: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1334: if (CUR != '\'') {
1335: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1336: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1337: ctxt->wellFormed = 0;
1.55 daniel 1338: } else {
1.45 daniel 1339: ret = xmlStrndup(q, CUR_PTR - q);
1340: NEXT;
1341: }
1342: }
1343: return(ret);
1344: }
1345:
1.50 daniel 1346: /**
1347: * xmlParseNamespace:
1348: * @ctxt: an XML parser context
1349: *
1.45 daniel 1350: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1351: *
1352: * This is what the older xml-name Working Draft specified, a bunch of
1353: * other stuff may still rely on it, so support is still here as
1354: * if ot was declared on the root of the Tree:-(
1355: */
1356:
1.55 daniel 1357: void
1358: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 1359: CHAR *href = NULL;
1360: CHAR *prefix = NULL;
1361: int garbage = 0;
1362:
1363: /*
1364: * We just skipped "namespace" or "xml:namespace"
1365: */
1366: SKIP_BLANKS;
1367:
1368: while (IS_CHAR(CUR) && (CUR != '>')) {
1369: /*
1370: * We can have "ns" or "prefix" attributes
1371: * Old encoding as 'href' or 'AS' attributes is still supported
1372: */
1373: if ((CUR == 'n') && (NXT(1) == 's')) {
1374: garbage = 0;
1375: SKIP(2);
1376: SKIP_BLANKS;
1377:
1378: if (CUR != '=') continue;
1379: NEXT;
1380: SKIP_BLANKS;
1381:
1382: href = xmlParseQuotedString(ctxt);
1383: SKIP_BLANKS;
1384: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1385: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1386: garbage = 0;
1387: SKIP(4);
1388: SKIP_BLANKS;
1389:
1390: if (CUR != '=') continue;
1391: NEXT;
1392: SKIP_BLANKS;
1393:
1394: href = xmlParseQuotedString(ctxt);
1395: SKIP_BLANKS;
1396: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1397: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1398: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1399: garbage = 0;
1400: SKIP(6);
1401: SKIP_BLANKS;
1402:
1403: if (CUR != '=') continue;
1404: NEXT;
1405: SKIP_BLANKS;
1406:
1407: prefix = xmlParseQuotedString(ctxt);
1408: SKIP_BLANKS;
1409: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1410: garbage = 0;
1411: SKIP(2);
1412: SKIP_BLANKS;
1413:
1414: if (CUR != '=') continue;
1415: NEXT;
1416: SKIP_BLANKS;
1417:
1418: prefix = xmlParseQuotedString(ctxt);
1419: SKIP_BLANKS;
1420: } else if ((CUR == '?') && (NXT(1) == '>')) {
1421: garbage = 0;
1.91 daniel 1422: NEXT;
1.45 daniel 1423: } else {
1424: /*
1425: * Found garbage when parsing the namespace
1426: */
1427: if (!garbage)
1.55 daniel 1428: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1429: ctxt->sax->error(ctxt->userData, "xmlParseNamespace found garbage\n");
1.59 daniel 1430: ctxt->wellFormed = 0;
1.45 daniel 1431: NEXT;
1432: }
1433: }
1434:
1435: MOVETO_ENDTAG(CUR_PTR);
1436: NEXT;
1437:
1438: /*
1439: * Register the DTD.
1.72 daniel 1440: if (href != NULL)
1441: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 1442: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 1443: */
1444:
1445: if (prefix != NULL) free(prefix);
1446: if (href != NULL) free(href);
1447: }
1448:
1.28 daniel 1449: /************************************************************************
1450: * *
1451: * The parser itself *
1452: * Relates to http://www.w3.org/TR/REC-xml *
1453: * *
1454: ************************************************************************/
1.14 veillard 1455:
1.50 daniel 1456: /**
1457: * xmlParseName:
1458: * @ctxt: an XML parser context
1459: *
1460: * parse an XML name.
1.22 daniel 1461: *
1462: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1463: * CombiningChar | Extender
1464: *
1465: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1466: *
1467: * [6] Names ::= Name (S Name)*
1.68 daniel 1468: *
1469: * Returns the Name parsed or NULL
1.1 veillard 1470: */
1471:
1.55 daniel 1472: CHAR *
1473: xmlParseName(xmlParserCtxtPtr ctxt) {
1.91 daniel 1474: CHAR buf[XML_MAX_NAMELEN];
1475: int len = 0;
1.1 veillard 1476:
1.91 daniel 1477: GROW;
1.40 daniel 1478: if (!IS_LETTER(CUR) && (CUR != '_') &&
1.91 daniel 1479: (CUR != ':')) {
1480: return(NULL);
1481: }
1.40 daniel 1482:
1483: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1484: (CUR == '.') || (CUR == '-') ||
1485: (CUR == '_') || (CUR == ':') ||
1486: (IS_COMBINING(CUR)) ||
1.91 daniel 1487: (IS_EXTENDER(CUR))) {
1488: buf[len++] = CUR;
1.40 daniel 1489: NEXT;
1.91 daniel 1490: if (len >= XML_MAX_NAMELEN) {
1491: fprintf(stderr,
1492: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1493: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1494: (CUR == '.') || (CUR == '-') ||
1495: (CUR == '_') || (CUR == ':') ||
1496: (IS_COMBINING(CUR)) ||
1497: (IS_EXTENDER(CUR)))
1498: NEXT;
1499: break;
1500: }
1501: }
1502: return(xmlStrndup(buf, len));
1.22 daniel 1503: }
1504:
1.50 daniel 1505: /**
1506: * xmlParseNmtoken:
1507: * @ctxt: an XML parser context
1508: *
1509: * parse an XML Nmtoken.
1.22 daniel 1510: *
1511: * [7] Nmtoken ::= (NameChar)+
1512: *
1513: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 1514: *
1515: * Returns the Nmtoken parsed or NULL
1.22 daniel 1516: */
1517:
1.55 daniel 1518: CHAR *
1519: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.91 daniel 1520: CHAR buf[XML_MAX_NAMELEN];
1521: int len = 0;
1.22 daniel 1522:
1.91 daniel 1523: GROW;
1.40 daniel 1524: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1525: (CUR == '.') || (CUR == '-') ||
1526: (CUR == '_') || (CUR == ':') ||
1527: (IS_COMBINING(CUR)) ||
1.91 daniel 1528: (IS_EXTENDER(CUR))) {
1529: buf[len++] = CUR;
1.40 daniel 1530: NEXT;
1.91 daniel 1531: if (len >= XML_MAX_NAMELEN) {
1532: fprintf(stderr,
1533: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1534: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1535: (CUR == '.') || (CUR == '-') ||
1536: (CUR == '_') || (CUR == ':') ||
1537: (IS_COMBINING(CUR)) ||
1538: (IS_EXTENDER(CUR)))
1539: NEXT;
1540: break;
1541: }
1542: }
1543: return(xmlStrndup(buf, len));
1.1 veillard 1544: }
1545:
1.50 daniel 1546: /**
1547: * xmlParseEntityValue:
1548: * @ctxt: an XML parser context
1.78 daniel 1549: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 1550: *
1551: * parse a value for ENTITY decl.
1.24 daniel 1552: *
1553: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1554: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 1555: *
1.78 daniel 1556: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 1557: */
1558:
1.55 daniel 1559: CHAR *
1.78 daniel 1560: xmlParseEntityValue(xmlParserCtxtPtr ctxt, CHAR **orig) {
1.77 daniel 1561: CHAR *ret = NULL;
1.78 daniel 1562: const CHAR *org = NULL;
1.79 daniel 1563: const CHAR *tst = NULL;
1564: const CHAR *temp = NULL;
1.24 daniel 1565:
1.91 daniel 1566: SHRINK;
1.40 daniel 1567: if (CUR == '"') {
1568: NEXT;
1.78 daniel 1569: org = CUR_PTR;
1.79 daniel 1570: while (CUR != '"') {
1571: tst = CUR_PTR;
1572: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_BOTH, '"', 0, 0);
1.94 daniel 1573:
1574: /*
1575: * Pop-up of finished entities.
1576: */
1577: while ((CUR == 0) && (ctxt->inputNr > 1))
1578: xmlPopInput(ctxt);
1579:
1580: if ((temp == NULL) && (tst == CUR_PTR)) {
1581: ret = xmlStrndup("", 0);
1582: break;
1583: }
1584: if ((temp[0] == 0) && (tst == CUR_PTR)) {
1585: free((char *)temp);
1586: ret = xmlStrndup("", 0);
1587: break;
1588: }
1.79 daniel 1589: ret = xmlStrcat(ret, temp);
1.80 daniel 1590: if (temp != NULL) free((char *)temp);
1.94 daniel 1591: GROW;
1.79 daniel 1592: }
1.77 daniel 1593: if (CUR != '"') {
1.55 daniel 1594: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.79 daniel 1595: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 1596: ctxt->wellFormed = 0;
1.78 daniel 1597: } else {
1.94 daniel 1598: if (orig != NULL) /* TODO !!!!!!!!! */
1.78 daniel 1599: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 1600: if (ret == NULL)
1601: ret = xmlStrndup("", 0);
1.40 daniel 1602: NEXT;
1.78 daniel 1603: }
1.40 daniel 1604: } else if (CUR == '\'') {
1605: NEXT;
1.78 daniel 1606: org = CUR_PTR;
1.80 daniel 1607: while (CUR != '\'') {
1.79 daniel 1608: tst = CUR_PTR;
1609: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_BOTH, '\'', 0, 0);
1.94 daniel 1610:
1611: /*
1612: * Pop-up of finished entities.
1613: */
1614: while ((CUR == 0) && (ctxt->inputNr > 1))
1615: xmlPopInput(ctxt);
1616:
1617: if ((temp == NULL) && (tst == CUR_PTR)) {
1618: ret = xmlStrndup("", 0);
1619: break;
1620: }
1621: if ((temp[0] == 0) && (tst == CUR_PTR)) {
1622: free((char *)temp);
1623: ret = xmlStrndup("", 0);
1624: break;
1625: }
1.79 daniel 1626: ret = xmlStrcat(ret, temp);
1.80 daniel 1627: if (temp != NULL) free((char *)temp);
1.94 daniel 1628: GROW;
1.79 daniel 1629: }
1.77 daniel 1630: if (CUR != '\'') {
1.55 daniel 1631: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 1632: ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
1.59 daniel 1633: ctxt->wellFormed = 0;
1.78 daniel 1634: } else {
1.94 daniel 1635: if (orig != NULL) /* TODO !!!!!!!!! */
1.78 daniel 1636: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 1637: if (ret == NULL)
1638: ret = xmlStrndup("", 0);
1.40 daniel 1639: NEXT;
1.78 daniel 1640: }
1.24 daniel 1641: } else {
1.55 daniel 1642: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 1643: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1.59 daniel 1644: ctxt->wellFormed = 0;
1.24 daniel 1645: }
1646:
1647: return(ret);
1648: }
1649:
1.50 daniel 1650: /**
1651: * xmlParseAttValue:
1652: * @ctxt: an XML parser context
1653: *
1654: * parse a value for an attribute
1.78 daniel 1655: * Note: the parser won't do substitution of entities here, this
1.79 daniel 1656: * will be handled later in xmlStringGetNodeList, unless it was
1657: * asked for ctxt->replaceEntities != 0
1.29 daniel 1658: *
1659: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1660: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 1661: *
1662: * Returns the AttValue parsed or NULL.
1.29 daniel 1663: */
1664:
1.55 daniel 1665: CHAR *
1666: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.77 daniel 1667: CHAR *ret = NULL;
1.29 daniel 1668:
1.91 daniel 1669: SHRINK;
1.40 daniel 1670: if (CUR == '"') {
1671: NEXT;
1.79 daniel 1672: if (ctxt->replaceEntities != 0)
1673: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
1674: else
1675: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_NONE, '"', '<', 0);
1.77 daniel 1676: if (CUR == '<') {
1677: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1678: ctxt->sax->error(ctxt->userData,
1679: "Unescaped '<' not allowed in attributes values\n");
1680: ctxt->wellFormed = 0;
1.29 daniel 1681: }
1.77 daniel 1682: if (CUR != '"') {
1.55 daniel 1683: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 1684: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 1685: ctxt->wellFormed = 0;
1.77 daniel 1686: } else
1.40 daniel 1687: NEXT;
1688: } else if (CUR == '\'') {
1689: NEXT;
1.79 daniel 1690: if (ctxt->replaceEntities != 0)
1691: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
1692: else
1693: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_NONE, '\'', '<', 0);
1.77 daniel 1694: if (CUR == '<') {
1695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1696: ctxt->sax->error(ctxt->userData,
1697: "Unescaped '<' not allowed in attributes values\n");
1698: ctxt->wellFormed = 0;
1.29 daniel 1699: }
1.77 daniel 1700: if (CUR != '\'') {
1.55 daniel 1701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 1702: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 1703: ctxt->wellFormed = 0;
1.77 daniel 1704: } else
1.40 daniel 1705: NEXT;
1.29 daniel 1706: } else {
1.55 daniel 1707: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1708: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 1709: ctxt->wellFormed = 0;
1.29 daniel 1710: }
1711:
1712: return(ret);
1713: }
1714:
1.50 daniel 1715: /**
1716: * xmlParseSystemLiteral:
1717: * @ctxt: an XML parser context
1718: *
1719: * parse an XML Literal
1.21 daniel 1720: *
1.22 daniel 1721: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 1722: *
1723: * Returns the SystemLiteral parsed or NULL
1.21 daniel 1724: */
1725:
1.55 daniel 1726: CHAR *
1727: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1728: const CHAR *q;
1729: CHAR *ret = NULL;
1730:
1.91 daniel 1731: SHRINK;
1.40 daniel 1732: if (CUR == '"') {
1733: NEXT;
1734: q = CUR_PTR;
1735: while ((IS_CHAR(CUR)) && (CUR != '"'))
1736: NEXT;
1737: if (!IS_CHAR(CUR)) {
1.55 daniel 1738: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1739: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 1740: ctxt->wellFormed = 0;
1.21 daniel 1741: } else {
1.40 daniel 1742: ret = xmlStrndup(q, CUR_PTR - q);
1743: NEXT;
1.21 daniel 1744: }
1.40 daniel 1745: } else if (CUR == '\'') {
1746: NEXT;
1747: q = CUR_PTR;
1748: while ((IS_CHAR(CUR)) && (CUR != '\''))
1749: NEXT;
1750: if (!IS_CHAR(CUR)) {
1.55 daniel 1751: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1752: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 1753: ctxt->wellFormed = 0;
1.21 daniel 1754: } else {
1.40 daniel 1755: ret = xmlStrndup(q, CUR_PTR - q);
1756: NEXT;
1.21 daniel 1757: }
1758: } else {
1.55 daniel 1759: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1760: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 1761: ctxt->wellFormed = 0;
1.21 daniel 1762: }
1763:
1764: return(ret);
1765: }
1766:
1.50 daniel 1767: /**
1768: * xmlParsePubidLiteral:
1769: * @ctxt: an XML parser context
1.21 daniel 1770: *
1.50 daniel 1771: * parse an XML public literal
1.68 daniel 1772: *
1773: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1774: *
1775: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 1776: */
1777:
1.55 daniel 1778: CHAR *
1779: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1780: const CHAR *q;
1781: CHAR *ret = NULL;
1782: /*
1783: * Name ::= (Letter | '_') (NameChar)*
1784: */
1.91 daniel 1785: SHRINK;
1.40 daniel 1786: if (CUR == '"') {
1787: NEXT;
1788: q = CUR_PTR;
1789: while (IS_PUBIDCHAR(CUR)) NEXT;
1790: if (CUR != '"') {
1.55 daniel 1791: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1792: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 1793: ctxt->wellFormed = 0;
1.21 daniel 1794: } else {
1.40 daniel 1795: ret = xmlStrndup(q, CUR_PTR - q);
1796: NEXT;
1.21 daniel 1797: }
1.40 daniel 1798: } else if (CUR == '\'') {
1799: NEXT;
1800: q = CUR_PTR;
1801: while ((IS_LETTER(CUR)) && (CUR != '\''))
1802: NEXT;
1803: if (!IS_LETTER(CUR)) {
1.55 daniel 1804: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1805: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 1806: ctxt->wellFormed = 0;
1.21 daniel 1807: } else {
1.40 daniel 1808: ret = xmlStrndup(q, CUR_PTR - q);
1809: NEXT;
1.21 daniel 1810: }
1811: } else {
1.55 daniel 1812: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1813: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 1814: ctxt->wellFormed = 0;
1.21 daniel 1815: }
1816:
1817: return(ret);
1818: }
1819:
1.50 daniel 1820: /**
1821: * xmlParseCharData:
1822: * @ctxt: an XML parser context
1823: * @cdata: int indicating whether we are within a CDATA section
1824: *
1825: * parse a CharData section.
1826: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 1827: *
1828: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1829: */
1830:
1.55 daniel 1831: void
1832: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.91 daniel 1833: CHAR buf[1000];
1834: int nbchar = 0;
1.27 daniel 1835:
1.91 daniel 1836: SHRINK;
1.40 daniel 1837: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1838: (CUR != '&')) {
1.59 daniel 1839: if ((CUR == ']') && (NXT(1) == ']') &&
1840: (NXT(2) == '>')) {
1841: if (cdata) break;
1842: else {
1843: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1844: ctxt->sax->error(ctxt->userData,
1.59 daniel 1845: "Sequence ']]>' not allowed in content\n");
1846: ctxt->wellFormed = 0;
1847: }
1848: }
1.91 daniel 1849: buf[nbchar++] = CUR;
1850: if (nbchar == 1000) {
1851: /*
1852: * Ok the segment is to be consumed as chars.
1853: */
1854: if (ctxt->sax != NULL) {
1855: if (areBlanks(ctxt, buf, nbchar)) {
1856: if (ctxt->sax->ignorableWhitespace != NULL)
1857: ctxt->sax->ignorableWhitespace(ctxt->userData,
1858: buf, nbchar);
1859: } else {
1860: if (ctxt->sax->characters != NULL)
1861: ctxt->sax->characters(ctxt->userData, buf, nbchar);
1862: }
1863: }
1864: nbchar = 0;
1865: }
1.40 daniel 1866: NEXT;
1.27 daniel 1867: }
1.91 daniel 1868: if (nbchar != 0) {
1869: /*
1870: * Ok the segment is to be consumed as chars.
1871: */
1872: if (ctxt->sax != NULL) {
1873: if (areBlanks(ctxt, buf, nbchar)) {
1874: if (ctxt->sax->ignorableWhitespace != NULL)
1875: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
1876: } else {
1877: if (ctxt->sax->characters != NULL)
1878: ctxt->sax->characters(ctxt->userData, buf, nbchar);
1879: }
1880: }
1.45 daniel 1881: }
1.27 daniel 1882: }
1883:
1.50 daniel 1884: /**
1885: * xmlParseExternalID:
1886: * @ctxt: an XML parser context
1887: * @publicID: a CHAR** receiving PubidLiteral
1.67 daniel 1888: * @strict: indicate whether we should restrict parsing to only
1889: * production [75], see NOTE below
1.50 daniel 1890: *
1.67 daniel 1891: * Parse an External ID or a Public ID
1892: *
1893: * NOTE: Productions [75] and [83] interract badly since [75] can generate
1894: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 1895: *
1896: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1897: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 1898: *
1899: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1900: *
1.68 daniel 1901: * Returns the function returns SystemLiteral and in the second
1.67 daniel 1902: * case publicID receives PubidLiteral, is strict is off
1903: * it is possible to return NULL and have publicID set.
1.22 daniel 1904: */
1905:
1.55 daniel 1906: CHAR *
1.67 daniel 1907: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
1.39 daniel 1908: CHAR *URI = NULL;
1.22 daniel 1909:
1.91 daniel 1910: SHRINK;
1.40 daniel 1911: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1912: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1913: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1914: SKIP(6);
1.59 daniel 1915: if (!IS_BLANK(CUR)) {
1916: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1917: ctxt->sax->error(ctxt->userData,
1.59 daniel 1918: "Space required after 'SYSTEM'\n");
1919: ctxt->wellFormed = 0;
1920: }
1.42 daniel 1921: SKIP_BLANKS;
1.39 daniel 1922: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 1923: if (URI == NULL) {
1.55 daniel 1924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1925: ctxt->sax->error(ctxt->userData,
1.39 daniel 1926: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 1927: ctxt->wellFormed = 0;
1928: }
1.40 daniel 1929: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1930: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1931: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1932: SKIP(6);
1.59 daniel 1933: if (!IS_BLANK(CUR)) {
1934: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1935: ctxt->sax->error(ctxt->userData,
1.59 daniel 1936: "Space required after 'PUBLIC'\n");
1937: ctxt->wellFormed = 0;
1938: }
1.42 daniel 1939: SKIP_BLANKS;
1.39 daniel 1940: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 1941: if (*publicID == NULL) {
1.55 daniel 1942: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1943: ctxt->sax->error(ctxt->userData,
1.39 daniel 1944: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 1945: ctxt->wellFormed = 0;
1946: }
1.67 daniel 1947: if (strict) {
1948: /*
1949: * We don't handle [83] so "S SystemLiteral" is required.
1950: */
1951: if (!IS_BLANK(CUR)) {
1952: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1953: ctxt->sax->error(ctxt->userData,
1.67 daniel 1954: "Space required after the Public Identifier\n");
1955: ctxt->wellFormed = 0;
1956: }
1957: } else {
1958: /*
1959: * We handle [83] so we return immediately, if
1960: * "S SystemLiteral" is not detected. From a purely parsing
1961: * point of view that's a nice mess.
1962: */
1963: const CHAR *ptr = CUR_PTR;
1964: if (!IS_BLANK(*ptr)) return(NULL);
1965:
1966: while (IS_BLANK(*ptr)) ptr++;
1967: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 1968: }
1.42 daniel 1969: SKIP_BLANKS;
1.39 daniel 1970: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 1971: if (URI == NULL) {
1.55 daniel 1972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1973: ctxt->sax->error(ctxt->userData,
1.39 daniel 1974: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 1975: ctxt->wellFormed = 0;
1976: }
1.22 daniel 1977: }
1.39 daniel 1978: return(URI);
1.22 daniel 1979: }
1980:
1.50 daniel 1981: /**
1982: * xmlParseComment:
1.69 daniel 1983: * @ctxt: an XML parser context
1984: * @create: should we create a node, or just skip the content
1.50 daniel 1985: *
1.3 veillard 1986: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1987: * This may or may not create a node (depending on the context)
1.38 daniel 1988: * The spec says that "For compatibility, the string "--" (double-hyphen)
1989: * must not occur within comments. "
1.22 daniel 1990: *
1991: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 1992: */
1.72 daniel 1993: void
1.69 daniel 1994: xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1.17 daniel 1995: const CHAR *q, *start;
1996: const CHAR *r;
1.39 daniel 1997: CHAR *val;
1.3 veillard 1998:
1999: /*
1.22 daniel 2000: * Check that there is a comment right here.
1.3 veillard 2001: */
1.40 daniel 2002: if ((CUR != '<') || (NXT(1) != '!') ||
1.72 daniel 2003: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2004:
1.91 daniel 2005: SHRINK;
1.40 daniel 2006: SKIP(4);
2007: start = q = CUR_PTR;
2008: NEXT;
2009: r = CUR_PTR;
2010: NEXT;
2011: while (IS_CHAR(CUR) &&
2012: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 2013: (*r != '-') || (*q != '-'))) {
1.59 daniel 2014: if ((*r == '-') && (*q == '-')) {
1.55 daniel 2015: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2016: ctxt->sax->error(ctxt->userData,
1.38 daniel 2017: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2018: ctxt->wellFormed = 0;
2019: }
1.40 daniel 2020: NEXT;r++;q++;
1.3 veillard 2021: }
1.40 daniel 2022: if (!IS_CHAR(CUR)) {
1.55 daniel 2023: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2024: ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", start);
1.59 daniel 2025: ctxt->wellFormed = 0;
1.3 veillard 2026: } else {
1.40 daniel 2027: NEXT;
1.31 daniel 2028: if (create) {
1.39 daniel 2029: val = xmlStrndup(start, q - start);
1.72 daniel 2030: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
1.74 daniel 2031: ctxt->sax->comment(ctxt->userData, val);
1.39 daniel 2032: free(val);
1.31 daniel 2033: }
1.3 veillard 2034: }
2035: }
2036:
1.50 daniel 2037: /**
2038: * xmlParsePITarget:
2039: * @ctxt: an XML parser context
2040: *
2041: * parse the name of a PI
1.22 daniel 2042: *
2043: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2044: *
2045: * Returns the PITarget name or NULL
1.22 daniel 2046: */
2047:
1.55 daniel 2048: CHAR *
2049: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 2050: CHAR *name;
2051:
2052: name = xmlParseName(ctxt);
2053: if ((name != NULL) && (name[3] == 0) &&
2054: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2055: ((name[1] == 'm') || (name[1] == 'M')) &&
2056: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 2057: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2058: ctxt->sax->error(ctxt->userData, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 2059: return(NULL);
2060: }
2061: return(name);
2062: }
2063:
1.50 daniel 2064: /**
2065: * xmlParsePI:
2066: * @ctxt: an XML parser context
2067: *
2068: * parse an XML Processing Instruction.
1.22 daniel 2069: *
2070: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2071: *
1.69 daniel 2072: * The processing is transfered to SAX once parsed.
1.3 veillard 2073: */
2074:
1.55 daniel 2075: void
2076: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 2077: CHAR *target;
2078:
1.40 daniel 2079: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 2080: /*
2081: * this is a Processing Instruction.
2082: */
1.40 daniel 2083: SKIP(2);
1.91 daniel 2084: SHRINK;
1.3 veillard 2085:
2086: /*
1.22 daniel 2087: * Parse the target name and check for special support like
2088: * namespace.
2089: *
2090: * TODO : PI handling should be dynamically redefinable using an
2091: * API. Only namespace should be in the code IMHO ...
1.3 veillard 2092: */
1.22 daniel 2093: target = xmlParsePITarget(ctxt);
2094: if (target != NULL) {
1.72 daniel 2095: const CHAR *q = CUR_PTR;
2096:
2097: while (IS_CHAR(CUR) &&
2098: ((CUR != '?') || (NXT(1) != '>')))
2099: NEXT;
2100: if (!IS_CHAR(CUR)) {
2101: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2102: ctxt->sax->error(ctxt->userData,
1.72 daniel 2103: "xmlParsePI: PI %s never end ...\n", target);
2104: ctxt->wellFormed = 0;
1.22 daniel 2105: } else {
1.72 daniel 2106: CHAR *data;
1.44 daniel 2107:
1.72 daniel 2108: data = xmlStrndup(q, CUR_PTR - q);
2109: SKIP(2);
1.44 daniel 2110:
1.72 daniel 2111: /*
2112: * SAX: PI detected.
2113: */
2114: if ((ctxt->sax) &&
2115: (ctxt->sax->processingInstruction != NULL))
1.74 daniel 2116: ctxt->sax->processingInstruction(ctxt->userData, target, data);
1.72 daniel 2117: free(data);
1.22 daniel 2118: }
1.39 daniel 2119: free(target);
1.3 veillard 2120: } else {
1.55 daniel 2121: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2122: ctxt->sax->error(ctxt->userData, "xmlParsePI : no target name\n");
1.59 daniel 2123: ctxt->wellFormed = 0;
2124:
1.22 daniel 2125: /********* Should we try to complete parsing the PI ???
1.40 daniel 2126: while (IS_CHAR(CUR) &&
2127: (CUR != '?') && (CUR != '>'))
2128: NEXT;
2129: if (!IS_CHAR(CUR)) {
1.22 daniel 2130: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
2131: target);
2132: }
2133: ********************************************************/
2134: }
2135: }
2136: }
2137:
1.50 daniel 2138: /**
2139: * xmlParseNotationDecl:
2140: * @ctxt: an XML parser context
2141: *
2142: * parse a notation declaration
1.22 daniel 2143: *
2144: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2145: *
2146: * Hence there is actually 3 choices:
2147: * 'PUBLIC' S PubidLiteral
2148: * 'PUBLIC' S PubidLiteral S SystemLiteral
2149: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2150: *
1.67 daniel 2151: * See the NOTE on xmlParseExternalID().
1.22 daniel 2152: */
2153:
1.55 daniel 2154: void
2155: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2156: CHAR *name;
1.67 daniel 2157: CHAR *Pubid;
2158: CHAR *Systemid;
1.22 daniel 2159:
1.40 daniel 2160: if ((CUR == '<') && (NXT(1) == '!') &&
2161: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2162: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2163: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2164: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 2165: SHRINK;
1.40 daniel 2166: SKIP(10);
1.67 daniel 2167: if (!IS_BLANK(CUR)) {
2168: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2169: ctxt->sax->error(ctxt->userData, "Space required after '<!NOTATION'\n");
1.67 daniel 2170: ctxt->wellFormed = 0;
2171: return;
2172: }
2173: SKIP_BLANKS;
1.22 daniel 2174:
2175: name = xmlParseName(ctxt);
2176: if (name == NULL) {
1.55 daniel 2177: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2178: ctxt->sax->error(ctxt->userData, "NOTATION: Name expected here\n");
1.67 daniel 2179: ctxt->wellFormed = 0;
2180: return;
2181: }
2182: if (!IS_BLANK(CUR)) {
2183: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2184: ctxt->sax->error(ctxt->userData,
1.67 daniel 2185: "Space required after the NOTATION name'\n");
1.59 daniel 2186: ctxt->wellFormed = 0;
1.22 daniel 2187: return;
2188: }
1.42 daniel 2189: SKIP_BLANKS;
1.67 daniel 2190:
1.22 daniel 2191: /*
1.67 daniel 2192: * Parse the IDs.
1.22 daniel 2193: */
1.67 daniel 2194: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
2195: SKIP_BLANKS;
2196:
2197: if (CUR == '>') {
1.40 daniel 2198: NEXT;
1.72 daniel 2199: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 2200: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2201: } else {
2202: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2203: ctxt->sax->error(ctxt->userData,
1.67 daniel 2204: "'>' required to close NOTATION declaration\n");
2205: ctxt->wellFormed = 0;
2206: }
1.22 daniel 2207: free(name);
1.67 daniel 2208: if (Systemid != NULL) free(Systemid);
2209: if (Pubid != NULL) free(Pubid);
1.22 daniel 2210: }
2211: }
2212:
1.50 daniel 2213: /**
2214: * xmlParseEntityDecl:
2215: * @ctxt: an XML parser context
2216: *
2217: * parse <!ENTITY declarations
1.22 daniel 2218: *
2219: * [70] EntityDecl ::= GEDecl | PEDecl
2220: *
2221: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2222: *
2223: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2224: *
2225: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2226: *
2227: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2228: *
2229: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 2230: */
2231:
1.55 daniel 2232: void
2233: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 2234: CHAR *name = NULL;
1.24 daniel 2235: CHAR *value = NULL;
1.39 daniel 2236: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 2237: CHAR *ndata = NULL;
1.39 daniel 2238: int isParameter = 0;
1.78 daniel 2239: CHAR *orig = NULL;
1.22 daniel 2240:
1.94 daniel 2241: GROW;
1.40 daniel 2242: if ((CUR == '<') && (NXT(1) == '!') &&
2243: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2244: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2245: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.91 daniel 2246: SHRINK;
1.40 daniel 2247: SKIP(8);
1.59 daniel 2248: if (!IS_BLANK(CUR)) {
2249: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2250: ctxt->sax->error(ctxt->userData, "Space required after '<!ENTITY'\n");
1.59 daniel 2251: ctxt->wellFormed = 0;
2252: }
2253: SKIP_BLANKS;
1.40 daniel 2254:
2255: if (CUR == '%') {
2256: NEXT;
1.59 daniel 2257: if (!IS_BLANK(CUR)) {
2258: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2259: ctxt->sax->error(ctxt->userData, "Space required after '%'\n");
1.59 daniel 2260: ctxt->wellFormed = 0;
2261: }
1.42 daniel 2262: SKIP_BLANKS;
1.39 daniel 2263: isParameter = 1;
1.22 daniel 2264: }
2265:
2266: name = xmlParseName(ctxt);
1.24 daniel 2267: if (name == NULL) {
1.55 daniel 2268: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2269: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 2270: ctxt->wellFormed = 0;
1.24 daniel 2271: return;
2272: }
1.59 daniel 2273: if (!IS_BLANK(CUR)) {
2274: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2275: ctxt->sax->error(ctxt->userData,
1.59 daniel 2276: "Space required after the entity name\n");
2277: ctxt->wellFormed = 0;
2278: }
1.42 daniel 2279: SKIP_BLANKS;
1.24 daniel 2280:
1.22 daniel 2281: /*
1.68 daniel 2282: * handle the various case of definitions...
1.22 daniel 2283: */
1.39 daniel 2284: if (isParameter) {
1.40 daniel 2285: if ((CUR == '"') || (CUR == '\''))
1.78 daniel 2286: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 2287: if (value) {
1.72 daniel 2288: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2289: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2290: XML_INTERNAL_PARAMETER_ENTITY,
2291: NULL, NULL, value);
2292: }
1.24 daniel 2293: else {
1.67 daniel 2294: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 2295: if (URI) {
1.72 daniel 2296: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2297: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2298: XML_EXTERNAL_PARAMETER_ENTITY,
2299: literal, URI, NULL);
2300: }
1.24 daniel 2301: }
2302: } else {
1.40 daniel 2303: if ((CUR == '"') || (CUR == '\'')) {
1.78 daniel 2304: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 2305: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2306: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2307: XML_INTERNAL_GENERAL_ENTITY,
2308: NULL, NULL, value);
2309: } else {
1.67 daniel 2310: URI = xmlParseExternalID(ctxt, &literal, 1);
1.59 daniel 2311: if ((CUR != '>') && (!IS_BLANK(CUR))) {
2312: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2313: ctxt->sax->error(ctxt->userData,
1.59 daniel 2314: "Space required before 'NDATA'\n");
2315: ctxt->wellFormed = 0;
2316: }
1.42 daniel 2317: SKIP_BLANKS;
1.40 daniel 2318: if ((CUR == 'N') && (NXT(1) == 'D') &&
2319: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2320: (NXT(4) == 'A')) {
2321: SKIP(5);
1.59 daniel 2322: if (!IS_BLANK(CUR)) {
2323: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2324: ctxt->sax->error(ctxt->userData,
1.59 daniel 2325: "Space required after 'NDATA'\n");
2326: ctxt->wellFormed = 0;
2327: }
1.42 daniel 2328: SKIP_BLANKS;
1.24 daniel 2329: ndata = xmlParseName(ctxt);
1.72 daniel 2330: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2331: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2332: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
2333: literal, URI, ndata);
2334: } else {
1.72 daniel 2335: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2336: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2337: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
2338: literal, URI, NULL);
1.24 daniel 2339: }
2340: }
2341: }
1.42 daniel 2342: SKIP_BLANKS;
1.40 daniel 2343: if (CUR != '>') {
1.55 daniel 2344: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2345: ctxt->sax->error(ctxt->userData,
1.31 daniel 2346: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 2347: ctxt->wellFormed = 0;
1.24 daniel 2348: } else
1.40 daniel 2349: NEXT;
1.78 daniel 2350: if (orig != NULL) {
2351: /*
2352: * TODO: somwhat unclean, extending the SAx API would be better !
2353: */
2354: xmlEntityPtr cur = NULL;
2355:
2356: if ((ctxt->sax != NULL) && (ctxt->sax->getEntity != NULL))
2357: cur = ctxt->sax->getEntity(ctxt, name);
2358: if (cur != NULL)
2359: cur->orig = orig;
2360: else
2361: free(orig);
2362: }
1.39 daniel 2363: if (name != NULL) free(name);
2364: if (value != NULL) free(value);
2365: if (URI != NULL) free(URI);
2366: if (literal != NULL) free(literal);
2367: if (ndata != NULL) free(ndata);
1.22 daniel 2368: }
2369: }
2370:
1.50 daniel 2371: /**
1.59 daniel 2372: * xmlParseDefaultDecl:
2373: * @ctxt: an XML parser context
2374: * @value: Receive a possible fixed default value for the attribute
2375: *
2376: * Parse an attribute default declaration
2377: *
2378: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
2379: *
2380: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
2381: * or XML_ATTRIBUTE_FIXED.
2382: */
2383:
2384: int
2385: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
2386: int val;
2387: CHAR *ret;
2388:
2389: *value = NULL;
2390: if ((CUR == '#') && (NXT(1) == 'R') &&
2391: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
2392: (NXT(4) == 'U') && (NXT(5) == 'I') &&
2393: (NXT(6) == 'R') && (NXT(7) == 'E') &&
2394: (NXT(8) == 'D')) {
2395: SKIP(9);
2396: return(XML_ATTRIBUTE_REQUIRED);
2397: }
2398: if ((CUR == '#') && (NXT(1) == 'I') &&
2399: (NXT(2) == 'M') && (NXT(3) == 'P') &&
2400: (NXT(4) == 'L') && (NXT(5) == 'I') &&
2401: (NXT(6) == 'E') && (NXT(7) == 'D')) {
2402: SKIP(8);
2403: return(XML_ATTRIBUTE_IMPLIED);
2404: }
2405: val = XML_ATTRIBUTE_NONE;
2406: if ((CUR == '#') && (NXT(1) == 'F') &&
2407: (NXT(2) == 'I') && (NXT(3) == 'X') &&
2408: (NXT(4) == 'E') && (NXT(5) == 'D')) {
2409: SKIP(6);
2410: val = XML_ATTRIBUTE_FIXED;
2411: if (!IS_BLANK(CUR)) {
2412: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2413: ctxt->sax->error(ctxt->userData, "Space required after '#FIXED'\n");
1.59 daniel 2414: ctxt->wellFormed = 0;
2415: }
2416: SKIP_BLANKS;
2417: }
2418: ret = xmlParseAttValue(ctxt);
2419: if (ret == NULL) {
2420: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2421: ctxt->sax->error(ctxt->userData,
1.59 daniel 2422: "Attribute default value declaration error\n");
2423: ctxt->wellFormed = 0;
2424: } else
2425: *value = ret;
2426: return(val);
2427: }
2428:
2429: /**
1.66 daniel 2430: * xmlParseNotationType:
2431: * @ctxt: an XML parser context
2432: *
2433: * parse an Notation attribute type.
2434: *
2435: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2436: *
2437: * Note: the leading 'NOTATION' S part has already being parsed...
2438: *
2439: * Returns: the notation attribute tree built while parsing
2440: */
2441:
2442: xmlEnumerationPtr
2443: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
2444: CHAR *name;
2445: xmlEnumerationPtr ret = NULL, last = NULL, cur;
2446:
2447: if (CUR != '(') {
2448: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2449: ctxt->sax->error(ctxt->userData, "'(' required to start 'NOTATION'\n");
1.66 daniel 2450: ctxt->wellFormed = 0;
2451: return(NULL);
2452: }
1.91 daniel 2453: SHRINK;
1.66 daniel 2454: do {
2455: NEXT;
2456: SKIP_BLANKS;
2457: name = xmlParseName(ctxt);
2458: if (name == NULL) {
2459: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2460: ctxt->sax->error(ctxt->userData,
1.66 daniel 2461: "Name expected in NOTATION declaration\n");
2462: ctxt->wellFormed = 0;
2463: return(ret);
2464: }
2465: cur = xmlCreateEnumeration(name);
1.67 daniel 2466: free(name);
1.66 daniel 2467: if (cur == NULL) return(ret);
2468: if (last == NULL) ret = last = cur;
2469: else {
2470: last->next = cur;
2471: last = cur;
2472: }
2473: SKIP_BLANKS;
2474: } while (CUR == '|');
2475: if (CUR != ')') {
2476: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2477: ctxt->sax->error(ctxt->userData,
1.66 daniel 2478: "')' required to finish NOTATION declaration\n");
2479: ctxt->wellFormed = 0;
2480: return(ret);
2481: }
2482: NEXT;
2483: return(ret);
2484: }
2485:
2486: /**
2487: * xmlParseEnumerationType:
2488: * @ctxt: an XML parser context
2489: *
2490: * parse an Enumeration attribute type.
2491: *
2492: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
2493: *
2494: * Returns: the enumeration attribute tree built while parsing
2495: */
2496:
2497: xmlEnumerationPtr
2498: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
2499: CHAR *name;
2500: xmlEnumerationPtr ret = NULL, last = NULL, cur;
2501:
2502: if (CUR != '(') {
2503: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2504: ctxt->sax->error(ctxt->userData,
1.66 daniel 2505: "'(' required to start ATTLIST enumeration\n");
2506: ctxt->wellFormed = 0;
2507: return(NULL);
2508: }
1.91 daniel 2509: SHRINK;
1.66 daniel 2510: do {
2511: NEXT;
2512: SKIP_BLANKS;
2513: name = xmlParseNmtoken(ctxt);
2514: if (name == NULL) {
2515: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2516: ctxt->sax->error(ctxt->userData,
1.66 daniel 2517: "NmToken expected in ATTLIST enumeration\n");
2518: ctxt->wellFormed = 0;
2519: return(ret);
2520: }
2521: cur = xmlCreateEnumeration(name);
1.67 daniel 2522: free(name);
1.66 daniel 2523: if (cur == NULL) return(ret);
2524: if (last == NULL) ret = last = cur;
2525: else {
2526: last->next = cur;
2527: last = cur;
2528: }
2529: SKIP_BLANKS;
2530: } while (CUR == '|');
2531: if (CUR != ')') {
2532: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2533: ctxt->sax->error(ctxt->userData,
1.66 daniel 2534: "')' required to finish ATTLIST enumeration\n");
2535: ctxt->wellFormed = 0;
2536: return(ret);
2537: }
2538: NEXT;
2539: return(ret);
2540: }
2541:
2542: /**
1.50 daniel 2543: * xmlParseEnumeratedType:
2544: * @ctxt: an XML parser context
1.66 daniel 2545: * @tree: the enumeration tree built while parsing
1.50 daniel 2546: *
1.66 daniel 2547: * parse an Enumerated attribute type.
1.22 daniel 2548: *
2549: * [57] EnumeratedType ::= NotationType | Enumeration
2550: *
2551: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2552: *
1.50 daniel 2553: *
1.66 daniel 2554: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 2555: */
2556:
1.66 daniel 2557: int
2558: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
2559: if ((CUR == 'N') && (NXT(1) == 'O') &&
2560: (NXT(2) == 'T') && (NXT(3) == 'A') &&
2561: (NXT(4) == 'T') && (NXT(5) == 'I') &&
2562: (NXT(6) == 'O') && (NXT(7) == 'N')) {
2563: SKIP(8);
2564: if (!IS_BLANK(CUR)) {
2565: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2566: ctxt->sax->error(ctxt->userData, "Space required after 'NOTATION'\n");
1.66 daniel 2567: ctxt->wellFormed = 0;
2568: return(0);
2569: }
2570: SKIP_BLANKS;
2571: *tree = xmlParseNotationType(ctxt);
2572: if (*tree == NULL) return(0);
2573: return(XML_ATTRIBUTE_NOTATION);
2574: }
2575: *tree = xmlParseEnumerationType(ctxt);
2576: if (*tree == NULL) return(0);
2577: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 2578: }
2579:
1.50 daniel 2580: /**
2581: * xmlParseAttributeType:
2582: * @ctxt: an XML parser context
1.66 daniel 2583: * @tree: the enumeration tree built while parsing
1.50 daniel 2584: *
1.59 daniel 2585: * parse the Attribute list def for an element
1.22 daniel 2586: *
2587: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
2588: *
2589: * [55] StringType ::= 'CDATA'
2590: *
2591: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
2592: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 2593: *
1.69 daniel 2594: * Returns the attribute type
1.22 daniel 2595: */
1.59 daniel 2596: int
1.66 daniel 2597: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 2598: SHRINK;
1.40 daniel 2599: if ((CUR == 'C') && (NXT(1) == 'D') &&
2600: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2601: (NXT(4) == 'A')) {
2602: SKIP(5);
1.66 daniel 2603: return(XML_ATTRIBUTE_CDATA);
1.40 daniel 2604: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2605: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2606: (NXT(4) == 'F')) {
2607: SKIP(5);
1.59 daniel 2608: return(XML_ATTRIBUTE_IDREF);
1.66 daniel 2609: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
2610: SKIP(2);
2611: return(XML_ATTRIBUTE_ID);
1.40 daniel 2612: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2613: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2614: (NXT(4) == 'F') && (NXT(5) == 'S')) {
2615: SKIP(6);
1.59 daniel 2616: return(XML_ATTRIBUTE_IDREFS);
1.40 daniel 2617: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2618: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2619: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
2620: SKIP(6);
1.59 daniel 2621: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 2622: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2623: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2624: (NXT(4) == 'T') && (NXT(5) == 'I') &&
2625: (NXT(6) == 'E') && (NXT(7) == 'S')) {
2626: SKIP(8);
1.59 daniel 2627: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 2628: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2629: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2630: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 2631: (NXT(6) == 'N') && (NXT(7) == 'S')) {
2632: SKIP(8);
2633: return(XML_ATTRIBUTE_NMTOKENS);
2634: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2635: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2636: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 2637: (NXT(6) == 'N')) {
2638: SKIP(7);
1.59 daniel 2639: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 2640: }
1.66 daniel 2641: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 2642: }
2643:
1.50 daniel 2644: /**
2645: * xmlParseAttributeListDecl:
2646: * @ctxt: an XML parser context
2647: *
2648: * : parse the Attribute list def for an element
1.22 daniel 2649: *
2650: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2651: *
2652: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 2653: *
1.22 daniel 2654: */
1.55 daniel 2655: void
2656: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 daniel 2657: CHAR *elemName;
2658: CHAR *attrName;
1.66 daniel 2659: xmlEnumerationPtr tree = NULL;
1.22 daniel 2660:
1.40 daniel 2661: if ((CUR == '<') && (NXT(1) == '!') &&
2662: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2663: (NXT(4) == 'T') && (NXT(5) == 'L') &&
2664: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 2665: (NXT(8) == 'T')) {
1.40 daniel 2666: SKIP(9);
1.59 daniel 2667: if (!IS_BLANK(CUR)) {
2668: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2669: ctxt->sax->error(ctxt->userData, "Space required after '<!ATTLIST'\n");
1.59 daniel 2670: ctxt->wellFormed = 0;
2671: }
1.42 daniel 2672: SKIP_BLANKS;
1.59 daniel 2673: elemName = xmlParseName(ctxt);
2674: if (elemName == NULL) {
1.55 daniel 2675: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2676: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Element\n");
1.59 daniel 2677: ctxt->wellFormed = 0;
1.22 daniel 2678: return;
2679: }
1.42 daniel 2680: SKIP_BLANKS;
1.40 daniel 2681: while (CUR != '>') {
2682: const CHAR *check = CUR_PTR;
1.59 daniel 2683: int type;
2684: int def;
2685: CHAR *defaultValue = NULL;
2686:
2687: attrName = xmlParseName(ctxt);
2688: if (attrName == NULL) {
2689: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2690: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Attribute\n");
1.59 daniel 2691: ctxt->wellFormed = 0;
2692: break;
2693: }
2694: if (!IS_BLANK(CUR)) {
2695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2696: ctxt->sax->error(ctxt->userData,
1.59 daniel 2697: "Space required after the attribute name\n");
2698: ctxt->wellFormed = 0;
2699: break;
2700: }
2701: SKIP_BLANKS;
2702:
1.66 daniel 2703: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 2704: if (type <= 0) break;
1.22 daniel 2705:
1.59 daniel 2706: if (!IS_BLANK(CUR)) {
2707: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2708: ctxt->sax->error(ctxt->userData,
1.59 daniel 2709: "Space required after the attribute type\n");
2710: ctxt->wellFormed = 0;
2711: break;
2712: }
1.42 daniel 2713: SKIP_BLANKS;
1.59 daniel 2714:
2715: def = xmlParseDefaultDecl(ctxt, &defaultValue);
2716: if (def <= 0) break;
2717:
2718: if (CUR != '>') {
2719: if (!IS_BLANK(CUR)) {
2720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2721: ctxt->sax->error(ctxt->userData,
1.59 daniel 2722: "Space required after the attribute default value\n");
2723: ctxt->wellFormed = 0;
2724: break;
2725: }
2726: SKIP_BLANKS;
2727: }
1.40 daniel 2728: if (check == CUR_PTR) {
1.55 daniel 2729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2730: ctxt->sax->error(ctxt->userData,
1.59 daniel 2731: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 2732: break;
2733: }
1.72 daniel 2734: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 2735: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 2736: type, def, defaultValue, tree);
1.59 daniel 2737: if (attrName != NULL)
2738: free(attrName);
2739: if (defaultValue != NULL)
2740: free(defaultValue);
1.22 daniel 2741: }
1.40 daniel 2742: if (CUR == '>')
2743: NEXT;
1.22 daniel 2744:
1.59 daniel 2745: free(elemName);
1.22 daniel 2746: }
2747: }
2748:
1.50 daniel 2749: /**
1.61 daniel 2750: * xmlParseElementMixedContentDecl:
2751: * @ctxt: an XML parser context
2752: *
2753: * parse the declaration for a Mixed Element content
2754: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
2755: *
2756: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
2757: * '(' S? '#PCDATA' S? ')'
2758: *
2759: * returns: the list of the xmlElementContentPtr describing the element choices
2760: */
2761: xmlElementContentPtr
1.62 daniel 2762: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 2763: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.61 daniel 2764: CHAR *elem = NULL;
2765:
2766: if ((CUR == '#') && (NXT(1) == 'P') &&
2767: (NXT(2) == 'C') && (NXT(3) == 'D') &&
2768: (NXT(4) == 'A') && (NXT(5) == 'T') &&
2769: (NXT(6) == 'A')) {
2770: SKIP(7);
2771: SKIP_BLANKS;
1.91 daniel 2772: SHRINK;
1.63 daniel 2773: if (CUR == ')') {
2774: NEXT;
2775: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2776: return(ret);
2777: }
1.61 daniel 2778: if ((CUR == '(') || (CUR == '|')) {
2779: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2780: if (ret == NULL) return(NULL);
1.63 daniel 2781: } /********** else {
1.61 daniel 2782: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2783: ctxt->sax->error(ctxt->userData,
1.61 daniel 2784: "xmlParseElementMixedContentDecl : '|' or ')' expected\n");
2785: ctxt->wellFormed = 0;
2786: return(NULL);
1.63 daniel 2787: } **********/
1.61 daniel 2788: while (CUR == '|') {
1.64 daniel 2789: NEXT;
1.61 daniel 2790: if (elem == NULL) {
2791: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2792: if (ret == NULL) return(NULL);
2793: ret->c1 = cur;
1.64 daniel 2794: cur = ret;
1.61 daniel 2795: } else {
1.64 daniel 2796: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2797: if (n == NULL) return(NULL);
2798: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2799: cur->c2 = n;
2800: cur = n;
1.66 daniel 2801: free(elem);
1.61 daniel 2802: }
2803: SKIP_BLANKS;
2804: elem = xmlParseName(ctxt);
2805: if (elem == NULL) {
2806: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2807: ctxt->sax->error(ctxt->userData,
1.61 daniel 2808: "xmlParseElementMixedContentDecl : Name expected\n");
2809: ctxt->wellFormed = 0;
2810: xmlFreeElementContent(cur);
2811: return(NULL);
2812: }
2813: SKIP_BLANKS;
2814: }
1.63 daniel 2815: if ((CUR == ')') && (NXT(1) == '*')) {
1.66 daniel 2816: if (elem != NULL) {
1.61 daniel 2817: cur->c2 = xmlNewElementContent(elem,
2818: XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 2819: free(elem);
2820: }
1.65 daniel 2821: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 2822: SKIP(2);
1.61 daniel 2823: } else {
1.66 daniel 2824: if (elem != NULL) free(elem);
1.61 daniel 2825: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2826: ctxt->sax->error(ctxt->userData,
1.63 daniel 2827: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 2828: ctxt->wellFormed = 0;
2829: xmlFreeElementContent(ret);
2830: return(NULL);
2831: }
2832:
2833: } else {
2834: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2835: ctxt->sax->error(ctxt->userData,
1.61 daniel 2836: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
2837: ctxt->wellFormed = 0;
2838: }
2839: return(ret);
2840: }
2841:
2842: /**
2843: * xmlParseElementChildrenContentDecl:
1.50 daniel 2844: * @ctxt: an XML parser context
2845: *
1.61 daniel 2846: * parse the declaration for a Mixed Element content
2847: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 2848: *
1.61 daniel 2849: *
1.22 daniel 2850: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
2851: *
2852: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
2853: *
2854: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
2855: *
2856: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
2857: *
1.62 daniel 2858: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 2859: * hierarchy.
2860: */
2861: xmlElementContentPtr
1.62 daniel 2862: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 2863: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.62 daniel 2864: CHAR *elem;
2865: CHAR type = 0;
2866:
1.94 daniel 2867: /* !!!!!!!!!!!!!!!! PE Refs can occur here !!!!!!!!!!! */
1.62 daniel 2868: SKIP_BLANKS;
1.94 daniel 2869: GROW;
1.62 daniel 2870: if (CUR == '(') {
1.63 daniel 2871: /* Recurse on first child */
1.62 daniel 2872: NEXT;
2873: SKIP_BLANKS;
2874: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
2875: SKIP_BLANKS;
2876: } else {
2877: elem = xmlParseName(ctxt);
2878: if (elem == NULL) {
2879: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2880: ctxt->sax->error(ctxt->userData,
1.62 daniel 2881: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2882: ctxt->wellFormed = 0;
2883: return(NULL);
2884: }
2885: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2886: if (CUR == '?') {
2887: ret->ocur = XML_ELEMENT_CONTENT_OPT;
2888: NEXT;
2889: } else if (CUR == '*') {
2890: ret->ocur = XML_ELEMENT_CONTENT_MULT;
2891: NEXT;
2892: } else if (CUR == '+') {
2893: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2894: NEXT;
2895: } else {
2896: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2897: }
1.66 daniel 2898: free(elem);
1.62 daniel 2899: }
2900: SKIP_BLANKS;
1.91 daniel 2901: SHRINK;
1.62 daniel 2902: while (CUR != ')') {
1.63 daniel 2903: /*
2904: * Each loop we parse one separator and one element.
2905: */
1.62 daniel 2906: if (CUR == ',') {
2907: if (type == 0) type = CUR;
2908:
2909: /*
2910: * Detect "Name | Name , Name" error
2911: */
2912: else if (type != CUR) {
2913: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2914: ctxt->sax->error(ctxt->userData,
1.62 daniel 2915: "xmlParseElementChildrenContentDecl : '%c' expected\n",
2916: type);
2917: ctxt->wellFormed = 0;
2918: xmlFreeElementContent(ret);
2919: return(NULL);
2920: }
1.64 daniel 2921: NEXT;
1.62 daniel 2922:
1.63 daniel 2923: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
2924: if (op == NULL) {
2925: xmlFreeElementContent(ret);
2926: return(NULL);
2927: }
2928: if (last == NULL) {
2929: op->c1 = ret;
1.65 daniel 2930: ret = cur = op;
1.63 daniel 2931: } else {
2932: cur->c2 = op;
2933: op->c1 = last;
2934: cur =op;
1.65 daniel 2935: last = NULL;
1.63 daniel 2936: }
1.62 daniel 2937: } else if (CUR == '|') {
2938: if (type == 0) type = CUR;
2939:
2940: /*
1.63 daniel 2941: * Detect "Name , Name | Name" error
1.62 daniel 2942: */
2943: else if (type != CUR) {
2944: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2945: ctxt->sax->error(ctxt->userData,
1.62 daniel 2946: "xmlParseElementChildrenContentDecl : '%c' expected\n",
2947: type);
2948: ctxt->wellFormed = 0;
2949: xmlFreeElementContent(ret);
2950: return(NULL);
2951: }
1.64 daniel 2952: NEXT;
1.62 daniel 2953:
1.63 daniel 2954: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2955: if (op == NULL) {
2956: xmlFreeElementContent(ret);
2957: return(NULL);
2958: }
2959: if (last == NULL) {
2960: op->c1 = ret;
1.65 daniel 2961: ret = cur = op;
1.63 daniel 2962: } else {
2963: cur->c2 = op;
2964: op->c1 = last;
2965: cur =op;
1.65 daniel 2966: last = NULL;
1.63 daniel 2967: }
1.62 daniel 2968: } else {
2969: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2970: ctxt->sax->error(ctxt->userData,
1.62 daniel 2971: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
2972: ctxt->wellFormed = 0;
2973: xmlFreeElementContent(ret);
2974: return(NULL);
2975: }
2976: SKIP_BLANKS;
2977: if (CUR == '(') {
1.63 daniel 2978: /* Recurse on second child */
1.62 daniel 2979: NEXT;
2980: SKIP_BLANKS;
1.65 daniel 2981: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 2982: SKIP_BLANKS;
2983: } else {
2984: elem = xmlParseName(ctxt);
2985: if (elem == NULL) {
2986: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2987: ctxt->sax->error(ctxt->userData,
1.62 daniel 2988: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2989: ctxt->wellFormed = 0;
2990: return(NULL);
2991: }
1.65 daniel 2992: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 2993: free(elem);
1.62 daniel 2994: }
1.63 daniel 2995: if (CUR == '?') {
2996: ret->ocur = XML_ELEMENT_CONTENT_OPT;
2997: NEXT;
2998: } else if (CUR == '*') {
2999: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3000: NEXT;
3001: } else if (CUR == '+') {
3002: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3003: NEXT;
3004: } else {
3005: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
3006: }
3007: SKIP_BLANKS;
1.64 daniel 3008: }
1.65 daniel 3009: if ((cur != NULL) && (last != NULL)) {
3010: cur->c2 = last;
1.62 daniel 3011: }
3012: NEXT;
3013: if (CUR == '?') {
3014: ret->ocur = XML_ELEMENT_CONTENT_OPT;
3015: NEXT;
3016: } else if (CUR == '*') {
3017: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3018: NEXT;
3019: } else if (CUR == '+') {
3020: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3021: NEXT;
3022: } else {
3023: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
3024: }
3025: return(ret);
1.61 daniel 3026: }
3027:
3028: /**
3029: * xmlParseElementContentDecl:
3030: * @ctxt: an XML parser context
3031: * @name: the name of the element being defined.
3032: * @result: the Element Content pointer will be stored here if any
1.22 daniel 3033: *
1.61 daniel 3034: * parse the declaration for an Element content either Mixed or Children,
3035: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
3036: *
3037: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 3038: *
1.61 daniel 3039: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 3040: */
3041:
1.61 daniel 3042: int
3043: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
3044: xmlElementContentPtr *result) {
3045:
3046: xmlElementContentPtr tree = NULL;
3047: int res;
3048:
3049: *result = NULL;
3050:
3051: if (CUR != '(') {
3052: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3053: ctxt->sax->error(ctxt->userData,
1.61 daniel 3054: "xmlParseElementContentDecl : '(' expected\n");
3055: ctxt->wellFormed = 0;
3056: return(-1);
3057: }
3058: NEXT;
3059: SKIP_BLANKS;
3060: if ((CUR == '#') && (NXT(1) == 'P') &&
3061: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3062: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3063: (NXT(6) == 'A')) {
1.62 daniel 3064: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 3065: res = XML_ELEMENT_TYPE_MIXED;
3066: } else {
1.62 daniel 3067: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 3068: res = XML_ELEMENT_TYPE_ELEMENT;
3069: }
3070: SKIP_BLANKS;
1.63 daniel 3071: /****************************
1.61 daniel 3072: if (CUR != ')') {
3073: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3074: ctxt->sax->error(ctxt->userData,
1.61 daniel 3075: "xmlParseElementContentDecl : ')' expected\n");
3076: ctxt->wellFormed = 0;
3077: return(-1);
3078: }
1.63 daniel 3079: ****************************/
3080: *result = tree;
1.61 daniel 3081: return(res);
1.22 daniel 3082: }
3083:
1.50 daniel 3084: /**
3085: * xmlParseElementDecl:
3086: * @ctxt: an XML parser context
3087: *
3088: * parse an Element declaration.
1.22 daniel 3089: *
3090: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
3091: *
3092: * TODO There is a check [ VC: Unique Element Type Declaration ]
1.69 daniel 3093: *
3094: * Returns the type of the element, or -1 in case of error
1.22 daniel 3095: */
1.59 daniel 3096: int
1.55 daniel 3097: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 3098: CHAR *name;
1.59 daniel 3099: int ret = -1;
1.61 daniel 3100: xmlElementContentPtr content = NULL;
1.22 daniel 3101:
1.40 daniel 3102: if ((CUR == '<') && (NXT(1) == '!') &&
3103: (NXT(2) == 'E') && (NXT(3) == 'L') &&
3104: (NXT(4) == 'E') && (NXT(5) == 'M') &&
3105: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 3106: (NXT(8) == 'T')) {
1.40 daniel 3107: SKIP(9);
1.59 daniel 3108: if (!IS_BLANK(CUR)) {
3109: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3110: ctxt->sax->error(ctxt->userData,
1.59 daniel 3111: "Space required after 'ELEMENT'\n");
3112: ctxt->wellFormed = 0;
3113: }
1.42 daniel 3114: SKIP_BLANKS;
1.22 daniel 3115: name = xmlParseName(ctxt);
3116: if (name == NULL) {
1.55 daniel 3117: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3118: ctxt->sax->error(ctxt->userData,
1.59 daniel 3119: "xmlParseElementDecl: no name for Element\n");
3120: ctxt->wellFormed = 0;
3121: return(-1);
3122: }
3123: if (!IS_BLANK(CUR)) {
3124: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3125: ctxt->sax->error(ctxt->userData,
1.59 daniel 3126: "Space required after the element name\n");
3127: ctxt->wellFormed = 0;
1.22 daniel 3128: }
1.42 daniel 3129: SKIP_BLANKS;
1.40 daniel 3130: if ((CUR == 'E') && (NXT(1) == 'M') &&
3131: (NXT(2) == 'P') && (NXT(3) == 'T') &&
3132: (NXT(4) == 'Y')) {
3133: SKIP(5);
1.22 daniel 3134: /*
3135: * Element must always be empty.
3136: */
1.59 daniel 3137: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 3138: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
3139: (NXT(2) == 'Y')) {
3140: SKIP(3);
1.22 daniel 3141: /*
3142: * Element is a generic container.
3143: */
1.59 daniel 3144: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 3145: } else if (CUR == '(') {
3146: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 3147: } else {
1.61 daniel 3148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3149: ctxt->sax->error(ctxt->userData,
1.61 daniel 3150: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
3151: ctxt->wellFormed = 0;
3152: if (name != NULL) free(name);
3153: return(-1);
1.22 daniel 3154: }
1.42 daniel 3155: SKIP_BLANKS;
1.40 daniel 3156: if (CUR != '>') {
1.55 daniel 3157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3158: ctxt->sax->error(ctxt->userData,
1.31 daniel 3159: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 3160: ctxt->wellFormed = 0;
1.61 daniel 3161: } else {
1.40 daniel 3162: NEXT;
1.72 daniel 3163: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 3164: ctxt->sax->elementDecl(ctxt->userData, name, ret,
3165: content);
1.61 daniel 3166: }
1.84 daniel 3167: if (content != NULL) {
3168: xmlFreeElementContent(content);
3169: }
1.61 daniel 3170: if (name != NULL) {
3171: free(name);
3172: }
1.22 daniel 3173: }
1.59 daniel 3174: return(ret);
1.22 daniel 3175: }
3176:
1.50 daniel 3177: /**
3178: * xmlParseMarkupDecl:
3179: * @ctxt: an XML parser context
3180: *
3181: * parse Markup declarations
1.22 daniel 3182: *
3183: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
3184: * NotationDecl | PI | Comment
3185: *
3186: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
3187: */
1.55 daniel 3188: void
3189: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 3190: GROW;
1.22 daniel 3191: xmlParseElementDecl(ctxt);
3192: xmlParseAttributeListDecl(ctxt);
3193: xmlParseEntityDecl(ctxt);
3194: xmlParseNotationDecl(ctxt);
3195: xmlParsePI(ctxt);
1.31 daniel 3196: xmlParseComment(ctxt, 0);
1.22 daniel 3197: }
3198:
1.50 daniel 3199: /**
1.76 daniel 3200: * xmlParseTextDecl:
3201: * @ctxt: an XML parser context
3202: *
3203: * parse an XML declaration header for external entities
3204: *
3205: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
3206: *
3207: * Returns the only valuable info for an external parsed entity, the encoding
3208: */
3209:
3210: CHAR *
3211: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
3212: CHAR *version;
3213: CHAR *encoding = NULL;
3214:
3215: /*
3216: * We know that '<?xml' is here.
3217: */
3218: SKIP(5);
3219:
3220: if (!IS_BLANK(CUR)) {
3221: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3222: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
3223: ctxt->wellFormed = 0;
3224: }
3225: SKIP_BLANKS;
3226:
3227: /*
3228: * We may have the VersionInfo here.
3229: */
3230: version = xmlParseVersionInfo(ctxt);
3231: /* TODO: we should actually inherit from the referencing doc if absent
3232: if (version == NULL)
3233: version = xmlCharStrdup(XML_DEFAULT_VERSION);
3234: ctxt->version = xmlStrdup(version);
3235: */
3236: if (version != NULL)
3237: free(version);
3238:
3239: /*
3240: * We must have the encoding declaration
3241: */
3242: if (!IS_BLANK(CUR)) {
3243: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3244: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
3245: ctxt->wellFormed = 0;
3246: }
3247: encoding = xmlParseEncodingDecl(ctxt);
3248:
3249: SKIP_BLANKS;
3250: if ((CUR == '?') && (NXT(1) == '>')) {
3251: SKIP(2);
3252: } else if (CUR == '>') {
3253: /* Deprecated old WD ... */
3254: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3255: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
3256: ctxt->wellFormed = 0;
3257: NEXT;
3258: } else {
3259: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3260: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
3261: ctxt->wellFormed = 0;
3262: MOVETO_ENDTAG(CUR_PTR);
3263: NEXT;
3264: }
3265: return(encoding);
3266: }
3267:
3268: /*
3269: * xmlParseConditionalSections
3270: * @ctxt: an XML parser context
3271: *
3272: * TODO : Conditionnal section are not yet supported !
3273: *
3274: * [61] conditionalSect ::= includeSect | ignoreSect
3275: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
3276: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
3277: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
3278: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
3279: */
3280:
3281: void
3282: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
3283: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3284: ctxt->sax->warning(ctxt->userData,
3285: "XML conditional section not supported\n");
3286: /*
3287: * Skip up to the end of the conditionnal section.
3288: */
3289: while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
3290: NEXT;
3291: if (CUR == 0) {
3292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3293: ctxt->sax->error(ctxt->userData,
3294: "XML conditional section not closed\n");
3295: ctxt->wellFormed = 0;
3296: }
3297: }
3298:
3299: /**
3300: * xmlParseExternalSubset
3301: * @ctxt: an XML parser context
3302: *
3303: * parse Markup declarations from an external subset
3304: *
3305: * [30] extSubset ::= textDecl? extSubsetDecl
3306: *
3307: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
3308: *
3309: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
3310: */
3311: void
1.79 daniel 3312: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
3313: const CHAR *SystemID) {
1.76 daniel 3314: if ((CUR == '<') && (NXT(1) == '?') &&
3315: (NXT(2) == 'x') && (NXT(3) == 'm') &&
3316: (NXT(4) == 'l')) {
3317: xmlParseTextDecl(ctxt);
3318: }
1.79 daniel 3319: if (ctxt->myDoc == NULL) {
3320: ctxt->myDoc = xmlNewDoc("1.0");
3321: }
3322: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
3323: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
3324:
1.76 daniel 3325: while (((CUR == '<') && (NXT(1) == '?')) ||
3326: ((CUR == '<') && (NXT(1) == '!')) ||
3327: IS_BLANK(CUR)) {
3328: if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
3329: xmlParseConditionalSections(ctxt);
3330: } else if (IS_BLANK(CUR)) {
3331: NEXT;
3332: } else if (CUR == '%') {
3333: xmlParsePEReference(ctxt);
3334: } else
3335: xmlParseMarkupDecl(ctxt);
1.77 daniel 3336:
3337: /*
3338: * Pop-up of finished entities.
3339: */
3340: while ((CUR == 0) && (ctxt->inputNr > 1))
3341: xmlPopInput(ctxt);
3342:
1.76 daniel 3343: }
3344:
3345: if (CUR != 0) {
3346: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3347: ctxt->sax->error(ctxt->userData,
3348: "Extra content at the end of the document\n");
3349: ctxt->wellFormed = 0;
3350: }
3351:
3352: }
3353:
3354: /**
1.50 daniel 3355: * xmlParseCharRef:
3356: * @ctxt: an XML parser context
3357: *
3358: * parse Reference declarations
1.24 daniel 3359: *
3360: * [66] CharRef ::= '&#' [0-9]+ ';' |
3361: * '&#x' [0-9a-fA-F]+ ';'
1.68 daniel 3362: *
1.77 daniel 3363: * Returns the value parsed (as an int)
1.24 daniel 3364: */
1.77 daniel 3365: int
1.55 daniel 3366: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 3367: int val = 0;
1.24 daniel 3368:
1.40 daniel 3369: if ((CUR == '&') && (NXT(1) == '#') &&
3370: (NXT(2) == 'x')) {
3371: SKIP(3);
3372: while (CUR != ';') {
3373: if ((CUR >= '0') && (CUR <= '9'))
3374: val = val * 16 + (CUR - '0');
3375: else if ((CUR >= 'a') && (CUR <= 'f'))
3376: val = val * 16 + (CUR - 'a') + 10;
3377: else if ((CUR >= 'A') && (CUR <= 'F'))
3378: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 3379: else {
1.55 daniel 3380: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3381: ctxt->sax->error(ctxt->userData,
1.59 daniel 3382: "xmlParseCharRef: invalid hexadecimal value\n");
3383: ctxt->wellFormed = 0;
1.29 daniel 3384: val = 0;
1.24 daniel 3385: break;
3386: }
1.47 daniel 3387: NEXT;
1.24 daniel 3388: }
1.55 daniel 3389: if (CUR == ';')
1.40 daniel 3390: NEXT;
3391: } else if ((CUR == '&') && (NXT(1) == '#')) {
3392: SKIP(2);
3393: while (CUR != ';') {
3394: if ((CUR >= '0') && (CUR <= '9'))
1.55 daniel 3395: val = val * 10 + (CUR - '0');
1.24 daniel 3396: else {
1.55 daniel 3397: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3398: ctxt->sax->error(ctxt->userData,
1.58 daniel 3399: "xmlParseCharRef: invalid decimal value\n");
1.59 daniel 3400: ctxt->wellFormed = 0;
1.29 daniel 3401: val = 0;
1.24 daniel 3402: break;
3403: }
1.47 daniel 3404: NEXT;
1.24 daniel 3405: }
1.55 daniel 3406: if (CUR == ';')
1.40 daniel 3407: NEXT;
1.24 daniel 3408: } else {
1.55 daniel 3409: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3410: ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid value\n");
1.59 daniel 3411: ctxt->wellFormed = 0;
1.24 daniel 3412: }
1.29 daniel 3413: /*
3414: * Check the value IS_CHAR ...
3415: */
1.44 daniel 3416: if (IS_CHAR(val)) {
1.77 daniel 3417: return(val);
1.44 daniel 3418: } else {
1.55 daniel 3419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3420: ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid CHAR value %d\n",
1.58 daniel 3421: val);
1.59 daniel 3422: ctxt->wellFormed = 0;
1.29 daniel 3423: }
1.77 daniel 3424: return(0);
3425: }
3426:
3427: /**
3428: * xmlParseReference:
3429: * @ctxt: an XML parser context
3430: *
3431: * parse and handle entity references in content, depending on the SAX
3432: * interface, this may end-up in a call to character() if this is a
1.79 daniel 3433: * CharRef, a predefined entity, if there is no reference() callback.
3434: * or if the parser was asked to switch to that mode.
1.77 daniel 3435: *
3436: * [67] Reference ::= EntityRef | CharRef
3437: */
3438: void
3439: xmlParseReference(xmlParserCtxtPtr ctxt) {
3440: xmlEntityPtr ent;
3441: CHAR *val;
3442: if (CUR != '&') return;
3443:
3444: if (NXT(1) == '#') {
3445: CHAR out[2];
3446: int val = xmlParseCharRef(ctxt);
3447: /* TODO: invalid for UTF-8 variable encoding !!! */
3448: out[0] = val;
3449: out[1] = 0;
3450: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3451: ctxt->sax->characters(ctxt->userData, out, 1);
3452: } else {
3453: ent = xmlParseEntityRef(ctxt);
3454: if (ent == NULL) return;
3455: if ((ent->name != NULL) &&
3456: (ent->type != XML_INTERNAL_PREDEFINED_ENTITY) &&
1.79 daniel 3457: (ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
3458: (ctxt->replaceEntities == 0)) {
3459:
1.77 daniel 3460: /*
3461: * Create a node.
3462: */
3463: ctxt->sax->reference(ctxt->userData, ent->name);
3464: return;
3465: }
3466: val = ent->content;
3467: if (val == NULL) return;
3468: /*
3469: * inline the entity.
3470: */
3471: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3472: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
3473: }
1.24 daniel 3474: }
3475:
1.50 daniel 3476: /**
3477: * xmlParseEntityRef:
3478: * @ctxt: an XML parser context
3479: *
3480: * parse ENTITY references declarations
1.24 daniel 3481: *
3482: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 3483: *
1.77 daniel 3484: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 3485: */
1.77 daniel 3486: xmlEntityPtr
1.55 daniel 3487: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.24 daniel 3488: CHAR *name;
1.72 daniel 3489: xmlEntityPtr ent = NULL;
1.24 daniel 3490:
1.91 daniel 3491: GROW;
1.40 daniel 3492: if (CUR == '&') {
3493: NEXT;
1.24 daniel 3494: name = xmlParseName(ctxt);
3495: if (name == NULL) {
1.55 daniel 3496: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3497: ctxt->sax->error(ctxt->userData, "xmlParseEntityRef: no name\n");
1.59 daniel 3498: ctxt->wellFormed = 0;
1.24 daniel 3499: } else {
1.40 daniel 3500: if (CUR == ';') {
3501: NEXT;
1.24 daniel 3502: /*
1.77 daniel 3503: * Ask first SAX for entity resolution, otherwise try the
3504: * predefined set.
3505: */
3506: if (ctxt->sax != NULL) {
3507: if (ctxt->sax->getEntity != NULL)
3508: ent = ctxt->sax->getEntity(ctxt->userData, name);
3509: if (ent == NULL)
3510: ent = xmlGetPredefinedEntity(name);
3511: }
3512:
3513: /*
1.59 daniel 3514: * Well Formedness Constraint if:
3515: * - standalone
3516: * or
3517: * - no external subset and no external parameter entities
3518: * referenced
3519: * then
3520: * the entity referenced must have been declared
3521: *
1.72 daniel 3522: * TODO: to be double checked !!! This is wrong !
1.59 daniel 3523: */
1.77 daniel 3524: if (ent == NULL) {
3525: if (ctxt->sax != NULL) {
1.72 daniel 3526: if (((ctxt->sax->isStandalone != NULL) &&
1.77 daniel 3527: ctxt->sax->isStandalone(ctxt->userData) == 1) ||
1.72 daniel 3528: (((ctxt->sax->hasInternalSubset == NULL) ||
1.74 daniel 3529: ctxt->sax->hasInternalSubset(ctxt->userData) == 0) &&
1.72 daniel 3530: ((ctxt->sax->hasExternalSubset == NULL) ||
1.74 daniel 3531: ctxt->sax->hasExternalSubset(ctxt->userData) == 0))) {
1.77 daniel 3532: if (ctxt->sax->error != NULL)
3533: ctxt->sax->error(ctxt->userData,
3534: "Entity '%s' not defined\n", name);
3535: ctxt->wellFormed = 0;
3536: }
3537: } else {
3538: fprintf(stderr, "Entity '%s' not defined\n", name);
3539: ctxt->wellFormed = 0;
1.59 daniel 3540: }
1.77 daniel 3541: }
1.59 daniel 3542:
3543: /*
3544: * Well Formedness Constraint :
3545: * The referenced entity must be a parsed entity.
3546: */
3547: if (ent != NULL) {
3548: switch (ent->type) {
3549: case XML_INTERNAL_PARAMETER_ENTITY:
3550: case XML_EXTERNAL_PARAMETER_ENTITY:
3551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3552: ctxt->sax->error(ctxt->userData,
1.59 daniel 3553: "Attempt to reference the parameter entity '%s'\n", name);
3554: ctxt->wellFormed = 0;
3555: break;
3556:
3557: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
3558: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3559: ctxt->sax->error(ctxt->userData,
1.59 daniel 3560: "Attempt to reference unparsed entity '%s'\n", name);
3561: ctxt->wellFormed = 0;
3562: break;
3563: }
3564: }
3565:
3566: /*
1.77 daniel 3567: * TODO: !!!
1.59 daniel 3568: * Well Formedness Constraint :
3569: * The referenced entity must not lead to recursion !
3570: */
3571:
1.77 daniel 3572:
1.24 daniel 3573: } else {
1.55 daniel 3574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3575: ctxt->sax->error(ctxt->userData,
1.59 daniel 3576: "xmlParseEntityRef: expecting ';'\n");
3577: ctxt->wellFormed = 0;
1.24 daniel 3578: }
1.45 daniel 3579: free(name);
1.24 daniel 3580: }
3581: }
1.77 daniel 3582: return(ent);
1.24 daniel 3583: }
3584:
1.50 daniel 3585: /**
3586: * xmlParsePEReference:
3587: * @ctxt: an XML parser context
3588: *
3589: * parse PEReference declarations
1.77 daniel 3590: * The entity content is handled directly by pushing it's content as
3591: * a new input stream.
1.22 daniel 3592: *
3593: * [69] PEReference ::= '%' Name ';'
1.68 daniel 3594: *
1.22 daniel 3595: */
1.77 daniel 3596: void
1.55 daniel 3597: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 3598: CHAR *name;
1.72 daniel 3599: xmlEntityPtr entity = NULL;
1.50 daniel 3600: xmlParserInputPtr input;
1.22 daniel 3601:
1.40 daniel 3602: if (CUR == '%') {
3603: NEXT;
1.22 daniel 3604: name = xmlParseName(ctxt);
3605: if (name == NULL) {
1.55 daniel 3606: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3607: ctxt->sax->error(ctxt->userData, "xmlParsePEReference: no name\n");
1.59 daniel 3608: ctxt->wellFormed = 0;
1.22 daniel 3609: } else {
1.40 daniel 3610: if (CUR == ';') {
3611: NEXT;
1.72 daniel 3612: if ((ctxt->sax != NULL) && (ctxt->sax->getEntity != NULL))
1.79 daniel 3613: entity = ctxt->sax->getEntity(ctxt->userData, name);
1.72 daniel 3614: /* TODO !!!! Must check that it's of the proper type !!! */
1.45 daniel 3615: if (entity == NULL) {
1.55 daniel 3616: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.74 daniel 3617: ctxt->sax->warning(ctxt->userData,
1.59 daniel 3618: "xmlParsePEReference: %%%s; not found\n", name);
1.50 daniel 3619: } else {
3620: input = xmlNewEntityInputStream(ctxt, entity);
3621: xmlPushInput(ctxt, input);
1.45 daniel 3622: }
1.22 daniel 3623: } else {
1.55 daniel 3624: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3625: ctxt->sax->error(ctxt->userData,
1.59 daniel 3626: "xmlParsePEReference: expecting ';'\n");
3627: ctxt->wellFormed = 0;
1.22 daniel 3628: }
1.45 daniel 3629: free(name);
1.3 veillard 3630: }
3631: }
3632: }
3633:
1.50 daniel 3634: /**
3635: * xmlParseDocTypeDecl :
3636: * @ctxt: an XML parser context
3637: *
3638: * parse a DOCTYPE declaration
1.21 daniel 3639: *
1.22 daniel 3640: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
3641: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 3642: */
3643:
1.55 daniel 3644: void
3645: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.21 daniel 3646: CHAR *name;
3647: CHAR *ExternalID = NULL;
1.39 daniel 3648: CHAR *URI = NULL;
1.21 daniel 3649:
3650: /*
3651: * We know that '<!DOCTYPE' has been detected.
3652: */
1.40 daniel 3653: SKIP(9);
1.21 daniel 3654:
1.42 daniel 3655: SKIP_BLANKS;
1.21 daniel 3656:
3657: /*
3658: * Parse the DOCTYPE name.
3659: */
3660: name = xmlParseName(ctxt);
3661: if (name == NULL) {
1.55 daniel 3662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3663: ctxt->sax->error(ctxt->userData, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 3664: ctxt->wellFormed = 0;
1.21 daniel 3665: }
3666:
1.42 daniel 3667: SKIP_BLANKS;
1.21 daniel 3668:
3669: /*
1.22 daniel 3670: * Check for SystemID and ExternalID
3671: */
1.67 daniel 3672: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.42 daniel 3673: SKIP_BLANKS;
1.36 daniel 3674:
1.76 daniel 3675: /*
3676: * NOTE: the SAX callback may try to fetch the external subset
3677: * entity and fill it up !
3678: */
1.72 daniel 3679: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 3680: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 3681:
3682: /*
3683: * Is there any DTD definition ?
3684: */
1.40 daniel 3685: if (CUR == '[') {
3686: NEXT;
1.22 daniel 3687: /*
3688: * Parse the succession of Markup declarations and
3689: * PEReferences.
3690: * Subsequence (markupdecl | PEReference | S)*
3691: */
1.40 daniel 3692: while (CUR != ']') {
3693: const CHAR *check = CUR_PTR;
1.22 daniel 3694:
1.42 daniel 3695: SKIP_BLANKS;
1.22 daniel 3696: xmlParseMarkupDecl(ctxt);
1.50 daniel 3697: xmlParsePEReference(ctxt);
1.22 daniel 3698:
1.40 daniel 3699: if (CUR_PTR == check) {
1.55 daniel 3700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3701: ctxt->sax->error(ctxt->userData,
1.31 daniel 3702: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 3703: ctxt->wellFormed = 0;
1.22 daniel 3704: break;
3705: }
1.77 daniel 3706:
3707: /*
3708: * Pop-up of finished entities.
3709: */
3710: while ((CUR == 0) && (ctxt->inputNr > 1))
3711: xmlPopInput(ctxt);
3712:
1.22 daniel 3713: }
1.40 daniel 3714: if (CUR == ']') NEXT;
1.22 daniel 3715: }
3716:
3717: /*
3718: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 3719: */
1.40 daniel 3720: if (CUR != '>') {
1.55 daniel 3721: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3722: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 3723: ctxt->wellFormed = 0;
1.22 daniel 3724: /* We shouldn't try to resynchronize ... */
1.21 daniel 3725: }
1.40 daniel 3726: NEXT;
1.22 daniel 3727:
3728: /*
3729: * Cleanup, since we don't use all those identifiers
3730: * TODO : the DOCTYPE if available should be stored !
3731: */
1.39 daniel 3732: if (URI != NULL) free(URI);
1.22 daniel 3733: if (ExternalID != NULL) free(ExternalID);
3734: if (name != NULL) free(name);
1.21 daniel 3735: }
3736:
1.50 daniel 3737: /**
3738: * xmlParseAttribute:
3739: * @ctxt: an XML parser context
1.72 daniel 3740: * @value: a CHAR ** used to store the value of the attribute
1.50 daniel 3741: *
3742: * parse an attribute
1.3 veillard 3743: *
1.22 daniel 3744: * [41] Attribute ::= Name Eq AttValue
3745: *
3746: * [25] Eq ::= S? '=' S?
3747: *
1.29 daniel 3748: * With namespace:
3749: *
3750: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 3751: *
3752: * Also the case QName == xmlns:??? is handled independently as a namespace
3753: * definition.
1.69 daniel 3754: *
1.72 daniel 3755: * Returns the attribute name, and the value in *value.
1.3 veillard 3756: */
3757:
1.72 daniel 3758: CHAR *
3759: xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) {
1.59 daniel 3760: CHAR *name, *val;
1.3 veillard 3761:
1.72 daniel 3762: *value = NULL;
3763: name = xmlParseName(ctxt);
1.22 daniel 3764: if (name == NULL) {
1.55 daniel 3765: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3766: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 3767: ctxt->wellFormed = 0;
1.52 daniel 3768: return(NULL);
1.3 veillard 3769: }
3770:
3771: /*
1.29 daniel 3772: * read the value
1.3 veillard 3773: */
1.42 daniel 3774: SKIP_BLANKS;
1.40 daniel 3775: if (CUR == '=') {
3776: NEXT;
1.42 daniel 3777: SKIP_BLANKS;
1.72 daniel 3778: val = xmlParseAttValue(ctxt);
1.29 daniel 3779: } else {
1.55 daniel 3780: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3781: ctxt->sax->error(ctxt->userData,
1.59 daniel 3782: "Specification mandate value for attribute %s\n", name);
3783: ctxt->wellFormed = 0;
1.52 daniel 3784: return(NULL);
1.43 daniel 3785: }
3786:
1.72 daniel 3787: *value = val;
3788: return(name);
1.3 veillard 3789: }
3790:
1.50 daniel 3791: /**
3792: * xmlParseStartTag:
3793: * @ctxt: an XML parser context
3794: *
3795: * parse a start of tag either for rule element or
3796: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 3797: *
3798: * [40] STag ::= '<' Name (S Attribute)* S? '>'
3799: *
1.29 daniel 3800: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3801: *
3802: * With namespace:
3803: *
3804: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3805: *
3806: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 3807: *
3808: * Returns the element name parsed
1.2 veillard 3809: */
3810:
1.83 daniel 3811: CHAR *
1.69 daniel 3812: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.72 daniel 3813: CHAR *name;
3814: CHAR *attname;
3815: CHAR *attvalue;
3816: const CHAR **atts = NULL;
3817: int nbatts = 0;
3818: int maxatts = 0;
3819: int i;
1.2 veillard 3820:
1.83 daniel 3821: if (CUR != '<') return(NULL);
1.40 daniel 3822: NEXT;
1.3 veillard 3823:
1.72 daniel 3824: name = xmlParseName(ctxt);
1.59 daniel 3825: if (name == NULL) {
3826: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3827: ctxt->sax->error(ctxt->userData,
1.59 daniel 3828: "xmlParseStartTag: invalid element name\n");
3829: ctxt->wellFormed = 0;
1.83 daniel 3830: return(NULL);
1.50 daniel 3831: }
3832:
3833: /*
1.3 veillard 3834: * Now parse the attributes, it ends up with the ending
3835: *
3836: * (S Attribute)* S?
3837: */
1.42 daniel 3838: SKIP_BLANKS;
1.91 daniel 3839: GROW;
1.40 daniel 3840: while ((IS_CHAR(CUR)) &&
3841: (CUR != '>') &&
3842: ((CUR != '/') || (NXT(1) != '>'))) {
3843: const CHAR *q = CUR_PTR;
1.91 daniel 3844: int cons = ctxt->input->consumed;
1.29 daniel 3845:
1.72 daniel 3846: attname = xmlParseAttribute(ctxt, &attvalue);
3847: if ((attname != NULL) && (attvalue != NULL)) {
3848: /*
3849: * Well formedness requires at most one declaration of an attribute
3850: */
3851: for (i = 0; i < nbatts;i += 2) {
3852: if (!xmlStrcmp(atts[i], attname)) {
3853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3854: ctxt->sax->error(ctxt->userData, "Attribute %s redefined\n",
1.72 daniel 3855: name);
3856: ctxt->wellFormed = 0;
3857: free(attname);
3858: free(attvalue);
3859: break;
3860: }
3861: }
3862:
3863: /*
3864: * Add the pair to atts
3865: */
3866: if (atts == NULL) {
3867: maxatts = 10;
3868: atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
3869: if (atts == NULL) {
1.86 daniel 3870: fprintf(stderr, "malloc of %ld byte failed\n",
3871: maxatts * (long)sizeof(CHAR *));
1.83 daniel 3872: return(NULL);
1.72 daniel 3873: }
3874: } else if (nbatts + 2 < maxatts) {
3875: maxatts *= 2;
3876: atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
3877: if (atts == NULL) {
1.86 daniel 3878: fprintf(stderr, "realloc of %ld byte failed\n",
3879: maxatts * (long)sizeof(CHAR *));
1.83 daniel 3880: return(NULL);
1.72 daniel 3881: }
3882: }
3883: atts[nbatts++] = attname;
3884: atts[nbatts++] = attvalue;
3885: atts[nbatts] = NULL;
3886: atts[nbatts + 1] = NULL;
3887: }
3888:
1.42 daniel 3889: SKIP_BLANKS;
1.91 daniel 3890: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 3891: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3892: ctxt->sax->error(ctxt->userData,
1.31 daniel 3893: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 3894: ctxt->wellFormed = 0;
1.29 daniel 3895: break;
1.3 veillard 3896: }
1.91 daniel 3897: GROW;
1.3 veillard 3898: }
3899:
1.43 daniel 3900: /*
1.72 daniel 3901: * SAX: Start of Element !
1.43 daniel 3902: */
1.72 daniel 3903: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 3904: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 3905:
1.72 daniel 3906: if (atts != NULL) {
3907: for (i = 0;i < nbatts;i++) free((CHAR *) atts[i]);
3908: free(atts);
3909: }
1.83 daniel 3910: return(name);
1.3 veillard 3911: }
3912:
1.50 daniel 3913: /**
3914: * xmlParseEndTag:
3915: * @ctxt: an XML parser context
1.83 daniel 3916: * @tagname: the tag name as parsed in the opening tag.
1.50 daniel 3917: *
3918: * parse an end of tag
1.27 daniel 3919: *
3920: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 3921: *
3922: * With namespace
3923: *
1.72 daniel 3924: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 3925: */
3926:
1.55 daniel 3927: void
1.83 daniel 3928: xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname) {
1.72 daniel 3929: CHAR *name;
1.7 veillard 3930:
1.91 daniel 3931: GROW;
1.40 daniel 3932: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 3933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3934: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 3935: ctxt->wellFormed = 0;
1.27 daniel 3936: return;
3937: }
1.40 daniel 3938: SKIP(2);
1.7 veillard 3939:
1.72 daniel 3940: name = xmlParseName(ctxt);
1.7 veillard 3941:
3942: /*
3943: * We should definitely be at the ending "S? '>'" part
3944: */
1.91 daniel 3945: GROW;
1.42 daniel 3946: SKIP_BLANKS;
1.40 daniel 3947: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 3948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3949: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 3950: ctxt->wellFormed = 0;
1.7 veillard 3951: } else
1.40 daniel 3952: NEXT;
1.7 veillard 3953:
1.72 daniel 3954: /*
1.83 daniel 3955: * Well formedness constraints, opening and closing must match.
3956: */
3957: if (xmlStrcmp(name, tagname)) {
3958: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3959: ctxt->sax->error(ctxt->userData,
3960: "Opening and ending tag mismatch: %s and %s\n", tagname, name);
3961: ctxt->wellFormed = 0;
3962: }
3963:
3964: /*
1.72 daniel 3965: * SAX: End of Tag
3966: */
3967: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 3968: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 3969:
3970: if (name != NULL)
3971: free(name);
3972:
1.7 veillard 3973: return;
3974: }
3975:
1.50 daniel 3976: /**
3977: * xmlParseCDSect:
3978: * @ctxt: an XML parser context
3979: *
3980: * Parse escaped pure raw content.
1.29 daniel 3981: *
3982: * [18] CDSect ::= CDStart CData CDEnd
3983: *
3984: * [19] CDStart ::= '<![CDATA['
3985: *
3986: * [20] Data ::= (Char* - (Char* ']]>' Char*))
3987: *
3988: * [21] CDEnd ::= ']]>'
1.3 veillard 3989: */
1.55 daniel 3990: void
3991: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 3992: const CHAR *r, *s, *base;
1.3 veillard 3993:
1.40 daniel 3994: if ((CUR == '<') && (NXT(1) == '!') &&
3995: (NXT(2) == '[') && (NXT(3) == 'C') &&
3996: (NXT(4) == 'D') && (NXT(5) == 'A') &&
3997: (NXT(6) == 'T') && (NXT(7) == 'A') &&
3998: (NXT(8) == '[')) {
3999: SKIP(9);
1.29 daniel 4000: } else
1.45 daniel 4001: return;
1.40 daniel 4002: base = CUR_PTR;
4003: if (!IS_CHAR(CUR)) {
1.55 daniel 4004: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4005: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4006: ctxt->wellFormed = 0;
1.45 daniel 4007: return;
1.3 veillard 4008: }
1.91 daniel 4009: r = CUR_PTR;
4010: NEXT;
1.40 daniel 4011: if (!IS_CHAR(CUR)) {
1.55 daniel 4012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4013: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4014: ctxt->wellFormed = 0;
1.45 daniel 4015: return;
1.3 veillard 4016: }
1.91 daniel 4017: s = CUR_PTR;
4018: NEXT;
1.40 daniel 4019: while (IS_CHAR(CUR) &&
4020: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
4021: r++;s++;NEXT;
1.3 veillard 4022: }
1.40 daniel 4023: if (!IS_CHAR(CUR)) {
1.55 daniel 4024: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4025: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4026: ctxt->wellFormed = 0;
1.45 daniel 4027: return;
1.3 veillard 4028: }
1.16 daniel 4029:
1.45 daniel 4030: /*
4031: * Ok the segment [base CUR_PTR] is to be consumed as chars.
4032: */
4033: if (ctxt->sax != NULL) {
1.72 daniel 4034: if (areBlanks(ctxt, base, CUR_PTR - base)) {
4035: if (ctxt->sax->ignorableWhitespace != NULL)
1.74 daniel 4036: ctxt->sax->ignorableWhitespace(ctxt->userData, base,
1.72 daniel 4037: (CUR_PTR - base) - 2);
4038: } else {
4039: if (ctxt->sax->characters != NULL)
1.74 daniel 4040: ctxt->sax->characters(ctxt->userData, base, (CUR_PTR - base) - 2);
1.72 daniel 4041: }
1.45 daniel 4042: }
1.2 veillard 4043: }
4044:
1.50 daniel 4045: /**
4046: * xmlParseContent:
4047: * @ctxt: an XML parser context
4048: *
4049: * Parse a content:
1.2 veillard 4050: *
1.27 daniel 4051: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 4052: */
4053:
1.55 daniel 4054: void
4055: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.40 daniel 4056: while ((CUR != '<') || (NXT(1) != '/')) {
4057: const CHAR *test = CUR_PTR;
1.91 daniel 4058: int cons = ctxt->input->consumed;
1.27 daniel 4059:
4060: /*
4061: * First case : a Processing Instruction.
4062: */
1.40 daniel 4063: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 4064: xmlParsePI(ctxt);
4065: }
1.72 daniel 4066:
1.27 daniel 4067: /*
4068: * Second case : a CDSection
4069: */
1.40 daniel 4070: else if ((CUR == '<') && (NXT(1) == '!') &&
4071: (NXT(2) == '[') && (NXT(3) == 'C') &&
4072: (NXT(4) == 'D') && (NXT(5) == 'A') &&
4073: (NXT(6) == 'T') && (NXT(7) == 'A') &&
4074: (NXT(8) == '[')) {
1.45 daniel 4075: xmlParseCDSect(ctxt);
1.27 daniel 4076: }
1.72 daniel 4077:
1.27 daniel 4078: /*
4079: * Third case : a comment
4080: */
1.40 daniel 4081: else if ((CUR == '<') && (NXT(1) == '!') &&
4082: (NXT(2) == '-') && (NXT(3) == '-')) {
1.72 daniel 4083: xmlParseComment(ctxt, 1);
1.27 daniel 4084: }
1.72 daniel 4085:
1.27 daniel 4086: /*
4087: * Fourth case : a sub-element.
4088: */
1.40 daniel 4089: else if (CUR == '<') {
1.72 daniel 4090: xmlParseElement(ctxt);
1.45 daniel 4091: }
1.72 daniel 4092:
1.45 daniel 4093: /*
1.50 daniel 4094: * Fifth case : a reference. If if has not been resolved,
4095: * parsing returns it's Name, create the node
1.45 daniel 4096: */
4097: else if (CUR == '&') {
1.77 daniel 4098: xmlParseReference(ctxt);
1.27 daniel 4099: }
1.72 daniel 4100:
1.27 daniel 4101: /*
4102: * Last case, text. Note that References are handled directly.
4103: */
4104: else {
1.45 daniel 4105: xmlParseCharData(ctxt, 0);
1.3 veillard 4106: }
1.14 veillard 4107:
1.91 daniel 4108: GROW;
1.14 veillard 4109: /*
1.45 daniel 4110: * Pop-up of finished entities.
1.14 veillard 4111: */
1.69 daniel 4112: while ((CUR == 0) && (ctxt->inputNr > 1))
4113: xmlPopInput(ctxt);
1.45 daniel 4114:
1.91 daniel 4115: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
1.55 daniel 4116: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4117: ctxt->sax->error(ctxt->userData,
1.59 daniel 4118: "detected an error in element content\n");
4119: ctxt->wellFormed = 0;
1.29 daniel 4120: break;
4121: }
1.3 veillard 4122: }
1.2 veillard 4123: }
4124:
1.50 daniel 4125: /**
4126: * xmlParseElement:
4127: * @ctxt: an XML parser context
4128: *
4129: * parse an XML element, this is highly recursive
1.26 daniel 4130: *
4131: * [39] element ::= EmptyElemTag | STag content ETag
4132: *
4133: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 4134: */
1.26 daniel 4135:
1.72 daniel 4136: void
1.69 daniel 4137: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.40 daniel 4138: const CHAR *openTag = CUR_PTR;
1.83 daniel 4139: CHAR *name;
1.32 daniel 4140: xmlParserNodeInfo node_info;
1.2 veillard 4141:
1.32 daniel 4142: /* Capture start position */
1.40 daniel 4143: node_info.begin_pos = CUR_PTR - ctxt->input->base;
4144: node_info.begin_line = ctxt->input->line;
1.32 daniel 4145:
1.83 daniel 4146: name = xmlParseStartTag(ctxt);
4147: if (name == NULL) {
4148: return;
4149: }
1.2 veillard 4150:
4151: /*
4152: * Check for an Empty Element.
4153: */
1.40 daniel 4154: if ((CUR == '/') && (NXT(1) == '>')) {
4155: SKIP(2);
1.72 daniel 4156: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 4157: ctxt->sax->endElement(ctxt->userData, name);
4158: free(name);
1.72 daniel 4159: return;
1.2 veillard 4160: }
1.91 daniel 4161: if (CUR == '>') {
4162: NEXT;
4163: } else {
1.55 daniel 4164: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4165: ctxt->sax->error(ctxt->userData, "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 4166: openTag);
1.59 daniel 4167: ctxt->wellFormed = 0;
1.45 daniel 4168:
4169: /*
4170: * end of parsing of this node.
4171: */
4172: nodePop(ctxt);
1.83 daniel 4173: free(name);
1.72 daniel 4174: return;
1.2 veillard 4175: }
4176:
4177: /*
4178: * Parse the content of the element:
4179: */
1.45 daniel 4180: xmlParseContent(ctxt);
1.40 daniel 4181: if (!IS_CHAR(CUR)) {
1.55 daniel 4182: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4183: ctxt->sax->error(ctxt->userData,
1.57 daniel 4184: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 4185: ctxt->wellFormed = 0;
1.45 daniel 4186:
4187: /*
4188: * end of parsing of this node.
4189: */
4190: nodePop(ctxt);
1.83 daniel 4191: free(name);
1.72 daniel 4192: return;
1.2 veillard 4193: }
4194:
4195: /*
1.27 daniel 4196: * parse the end of tag: '</' should be here.
1.2 veillard 4197: */
1.83 daniel 4198: xmlParseEndTag(ctxt, name);
4199: free(name);
1.2 veillard 4200: }
4201:
1.50 daniel 4202: /**
4203: * xmlParseVersionNum:
4204: * @ctxt: an XML parser context
4205: *
4206: * parse the XML version value.
1.29 daniel 4207: *
4208: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 4209: *
4210: * Returns the string giving the XML version number, or NULL
1.29 daniel 4211: */
1.55 daniel 4212: CHAR *
4213: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 4214: const CHAR *q = CUR_PTR;
1.29 daniel 4215: CHAR *ret;
4216:
1.40 daniel 4217: while (IS_CHAR(CUR) &&
4218: (((CUR >= 'a') && (CUR <= 'z')) ||
4219: ((CUR >= 'A') && (CUR <= 'Z')) ||
4220: ((CUR >= '0') && (CUR <= '9')) ||
4221: (CUR == '_') || (CUR == '.') ||
4222: (CUR == ':') || (CUR == '-'))) NEXT;
4223: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 4224: return(ret);
4225: }
4226:
1.50 daniel 4227: /**
4228: * xmlParseVersionInfo:
4229: * @ctxt: an XML parser context
4230: *
4231: * parse the XML version.
1.29 daniel 4232: *
4233: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
4234: *
4235: * [25] Eq ::= S? '=' S?
1.50 daniel 4236: *
1.68 daniel 4237: * Returns the version string, e.g. "1.0"
1.29 daniel 4238: */
4239:
1.55 daniel 4240: CHAR *
4241: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 4242: CHAR *version = NULL;
4243: const CHAR *q;
4244:
1.40 daniel 4245: if ((CUR == 'v') && (NXT(1) == 'e') &&
4246: (NXT(2) == 'r') && (NXT(3) == 's') &&
4247: (NXT(4) == 'i') && (NXT(5) == 'o') &&
4248: (NXT(6) == 'n')) {
4249: SKIP(7);
1.42 daniel 4250: SKIP_BLANKS;
1.40 daniel 4251: if (CUR != '=') {
1.55 daniel 4252: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4253: ctxt->sax->error(ctxt->userData, "xmlParseVersionInfo : expected '='\n");
1.59 daniel 4254: ctxt->wellFormed = 0;
1.31 daniel 4255: return(NULL);
4256: }
1.40 daniel 4257: NEXT;
1.42 daniel 4258: SKIP_BLANKS;
1.40 daniel 4259: if (CUR == '"') {
4260: NEXT;
4261: q = CUR_PTR;
1.29 daniel 4262: version = xmlParseVersionNum(ctxt);
1.55 daniel 4263: if (CUR != '"') {
4264: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4265: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4266: ctxt->wellFormed = 0;
1.55 daniel 4267: } else
1.40 daniel 4268: NEXT;
4269: } else if (CUR == '\''){
4270: NEXT;
4271: q = CUR_PTR;
1.29 daniel 4272: version = xmlParseVersionNum(ctxt);
1.55 daniel 4273: if (CUR != '\'') {
4274: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4275: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4276: ctxt->wellFormed = 0;
1.55 daniel 4277: } else
1.40 daniel 4278: NEXT;
1.31 daniel 4279: } else {
1.55 daniel 4280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4281: ctxt->sax->error(ctxt->userData,
1.59 daniel 4282: "xmlParseVersionInfo : expected ' or \"\n");
4283: ctxt->wellFormed = 0;
1.29 daniel 4284: }
4285: }
4286: return(version);
4287: }
4288:
1.50 daniel 4289: /**
4290: * xmlParseEncName:
4291: * @ctxt: an XML parser context
4292: *
4293: * parse the XML encoding name
1.29 daniel 4294: *
4295: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 4296: *
1.68 daniel 4297: * Returns the encoding name value or NULL
1.29 daniel 4298: */
1.55 daniel 4299: CHAR *
4300: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 4301: const CHAR *q = CUR_PTR;
1.29 daniel 4302: CHAR *ret = NULL;
4303:
1.40 daniel 4304: if (((CUR >= 'a') && (CUR <= 'z')) ||
4305: ((CUR >= 'A') && (CUR <= 'Z'))) {
4306: NEXT;
4307: while (IS_CHAR(CUR) &&
4308: (((CUR >= 'a') && (CUR <= 'z')) ||
4309: ((CUR >= 'A') && (CUR <= 'Z')) ||
4310: ((CUR >= '0') && (CUR <= '9')) ||
4311: (CUR == '-'))) NEXT;
4312: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 4313: } else {
1.55 daniel 4314: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4315: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 4316: ctxt->wellFormed = 0;
1.29 daniel 4317: }
4318: return(ret);
4319: }
4320:
1.50 daniel 4321: /**
4322: * xmlParseEncodingDecl:
4323: * @ctxt: an XML parser context
4324: *
4325: * parse the XML encoding declaration
1.29 daniel 4326: *
4327: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 4328: *
4329: * TODO: this should setup the conversion filters.
4330: *
1.68 daniel 4331: * Returns the encoding value or NULL
1.29 daniel 4332: */
4333:
1.55 daniel 4334: CHAR *
4335: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 4336: CHAR *encoding = NULL;
4337: const CHAR *q;
4338:
1.42 daniel 4339: SKIP_BLANKS;
1.40 daniel 4340: if ((CUR == 'e') && (NXT(1) == 'n') &&
4341: (NXT(2) == 'c') && (NXT(3) == 'o') &&
4342: (NXT(4) == 'd') && (NXT(5) == 'i') &&
4343: (NXT(6) == 'n') && (NXT(7) == 'g')) {
4344: SKIP(8);
1.42 daniel 4345: SKIP_BLANKS;
1.40 daniel 4346: if (CUR != '=') {
1.55 daniel 4347: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4348: ctxt->sax->error(ctxt->userData, "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 4349: ctxt->wellFormed = 0;
1.31 daniel 4350: return(NULL);
4351: }
1.40 daniel 4352: NEXT;
1.42 daniel 4353: SKIP_BLANKS;
1.40 daniel 4354: if (CUR == '"') {
4355: NEXT;
4356: q = CUR_PTR;
1.29 daniel 4357: encoding = xmlParseEncName(ctxt);
1.55 daniel 4358: if (CUR != '"') {
4359: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4360: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4361: ctxt->wellFormed = 0;
1.55 daniel 4362: } else
1.40 daniel 4363: NEXT;
4364: } else if (CUR == '\''){
4365: NEXT;
4366: q = CUR_PTR;
1.29 daniel 4367: encoding = xmlParseEncName(ctxt);
1.55 daniel 4368: if (CUR != '\'') {
4369: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4370: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4371: ctxt->wellFormed = 0;
1.55 daniel 4372: } else
1.40 daniel 4373: NEXT;
4374: } else if (CUR == '"'){
1.55 daniel 4375: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4376: ctxt->sax->error(ctxt->userData,
1.59 daniel 4377: "xmlParseEncodingDecl : expected ' or \"\n");
4378: ctxt->wellFormed = 0;
1.29 daniel 4379: }
4380: }
4381: return(encoding);
4382: }
4383:
1.50 daniel 4384: /**
4385: * xmlParseSDDecl:
4386: * @ctxt: an XML parser context
4387: *
4388: * parse the XML standalone declaration
1.29 daniel 4389: *
4390: * [32] SDDecl ::= S 'standalone' Eq
4391: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.68 daniel 4392: *
4393: * Returns 1 if standalone, 0 otherwise
1.29 daniel 4394: */
4395:
1.55 daniel 4396: int
4397: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 4398: int standalone = -1;
4399:
1.42 daniel 4400: SKIP_BLANKS;
1.40 daniel 4401: if ((CUR == 's') && (NXT(1) == 't') &&
4402: (NXT(2) == 'a') && (NXT(3) == 'n') &&
4403: (NXT(4) == 'd') && (NXT(5) == 'a') &&
4404: (NXT(6) == 'l') && (NXT(7) == 'o') &&
4405: (NXT(8) == 'n') && (NXT(9) == 'e')) {
4406: SKIP(10);
1.81 daniel 4407: SKIP_BLANKS;
1.40 daniel 4408: if (CUR != '=') {
1.55 daniel 4409: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4410: ctxt->sax->error(ctxt->userData,
1.59 daniel 4411: "XML standalone declaration : expected '='\n");
4412: ctxt->wellFormed = 0;
1.32 daniel 4413: return(standalone);
4414: }
1.40 daniel 4415: NEXT;
1.42 daniel 4416: SKIP_BLANKS;
1.40 daniel 4417: if (CUR == '\''){
4418: NEXT;
4419: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 4420: standalone = 0;
1.40 daniel 4421: SKIP(2);
4422: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
4423: (NXT(2) == 's')) {
1.29 daniel 4424: standalone = 1;
1.40 daniel 4425: SKIP(3);
1.29 daniel 4426: } else {
1.55 daniel 4427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4428: ctxt->sax->error(ctxt->userData, "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 4429: ctxt->wellFormed = 0;
1.29 daniel 4430: }
1.55 daniel 4431: if (CUR != '\'') {
4432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4433: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 4434: ctxt->wellFormed = 0;
1.55 daniel 4435: } else
1.40 daniel 4436: NEXT;
4437: } else if (CUR == '"'){
4438: NEXT;
4439: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 4440: standalone = 0;
1.40 daniel 4441: SKIP(2);
4442: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
4443: (NXT(2) == 's')) {
1.29 daniel 4444: standalone = 1;
1.40 daniel 4445: SKIP(3);
1.29 daniel 4446: } else {
1.55 daniel 4447: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4448: ctxt->sax->error(ctxt->userData,
1.59 daniel 4449: "standalone accepts only 'yes' or 'no'\n");
4450: ctxt->wellFormed = 0;
1.29 daniel 4451: }
1.55 daniel 4452: if (CUR != '"') {
4453: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4454: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 4455: ctxt->wellFormed = 0;
1.55 daniel 4456: } else
1.40 daniel 4457: NEXT;
1.37 daniel 4458: } else {
1.55 daniel 4459: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4460: ctxt->sax->error(ctxt->userData, "Standalone value not found\n");
1.59 daniel 4461: ctxt->wellFormed = 0;
1.37 daniel 4462: }
1.29 daniel 4463: }
4464: return(standalone);
4465: }
4466:
1.50 daniel 4467: /**
4468: * xmlParseXMLDecl:
4469: * @ctxt: an XML parser context
4470: *
4471: * parse an XML declaration header
1.29 daniel 4472: *
4473: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 4474: */
4475:
1.55 daniel 4476: void
4477: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 4478: CHAR *version;
4479:
4480: /*
1.19 daniel 4481: * We know that '<?xml' is here.
1.1 veillard 4482: */
1.40 daniel 4483: SKIP(5);
1.1 veillard 4484:
1.59 daniel 4485: if (!IS_BLANK(CUR)) {
4486: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4487: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 4488: ctxt->wellFormed = 0;
4489: }
1.42 daniel 4490: SKIP_BLANKS;
1.1 veillard 4491:
4492: /*
1.29 daniel 4493: * We should have the VersionInfo here.
1.1 veillard 4494: */
1.29 daniel 4495: version = xmlParseVersionInfo(ctxt);
4496: if (version == NULL)
1.45 daniel 4497: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 4498: ctxt->version = xmlStrdup(version);
1.45 daniel 4499: free(version);
1.29 daniel 4500:
4501: /*
4502: * We may have the encoding declaration
4503: */
1.59 daniel 4504: if (!IS_BLANK(CUR)) {
4505: if ((CUR == '?') && (NXT(1) == '>')) {
4506: SKIP(2);
4507: return;
4508: }
4509: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4510: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 4511: ctxt->wellFormed = 0;
4512: }
1.72 daniel 4513: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 4514:
4515: /*
1.29 daniel 4516: * We may have the standalone status.
1.1 veillard 4517: */
1.72 daniel 4518: if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
1.59 daniel 4519: if ((CUR == '?') && (NXT(1) == '>')) {
4520: SKIP(2);
4521: return;
4522: }
4523: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4524: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 4525: ctxt->wellFormed = 0;
4526: }
4527: SKIP_BLANKS;
1.72 daniel 4528: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 4529:
1.42 daniel 4530: SKIP_BLANKS;
1.40 daniel 4531: if ((CUR == '?') && (NXT(1) == '>')) {
4532: SKIP(2);
4533: } else if (CUR == '>') {
1.31 daniel 4534: /* Deprecated old WD ... */
1.55 daniel 4535: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4536: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
1.59 daniel 4537: ctxt->wellFormed = 0;
1.40 daniel 4538: NEXT;
1.29 daniel 4539: } else {
1.55 daniel 4540: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4541: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
1.59 daniel 4542: ctxt->wellFormed = 0;
1.40 daniel 4543: MOVETO_ENDTAG(CUR_PTR);
4544: NEXT;
1.29 daniel 4545: }
1.1 veillard 4546: }
4547:
1.50 daniel 4548: /**
4549: * xmlParseMisc:
4550: * @ctxt: an XML parser context
4551: *
4552: * parse an XML Misc* optionnal field.
1.21 daniel 4553: *
1.22 daniel 4554: * [27] Misc ::= Comment | PI | S
1.1 veillard 4555: */
4556:
1.55 daniel 4557: void
4558: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 4559: while (((CUR == '<') && (NXT(1) == '?')) ||
4560: ((CUR == '<') && (NXT(1) == '!') &&
4561: (NXT(2) == '-') && (NXT(3) == '-')) ||
4562: IS_BLANK(CUR)) {
4563: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 4564: xmlParsePI(ctxt);
1.40 daniel 4565: } else if (IS_BLANK(CUR)) {
4566: NEXT;
1.1 veillard 4567: } else
1.31 daniel 4568: xmlParseComment(ctxt, 0);
1.1 veillard 4569: }
4570: }
4571:
1.50 daniel 4572: /**
4573: * xmlParseDocument :
4574: * @ctxt: an XML parser context
4575: *
4576: * parse an XML document (and build a tree if using the standard SAX
4577: * interface).
1.21 daniel 4578: *
1.22 daniel 4579: * [1] document ::= prolog element Misc*
1.29 daniel 4580: *
4581: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 4582: *
1.68 daniel 4583: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 4584: * as a result of the parsing.
1.1 veillard 4585: */
4586:
1.55 daniel 4587: int
4588: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 4589: xmlDefaultSAXHandlerInit();
4590:
1.91 daniel 4591: GROW;
4592:
1.14 veillard 4593: /*
1.44 daniel 4594: * SAX: beginning of the document processing.
4595: */
1.72 daniel 4596: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 4597: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 4598:
4599: /*
1.14 veillard 4600: * We should check for encoding here and plug-in some
4601: * conversion code TODO !!!!
4602: */
1.1 veillard 4603:
4604: /*
4605: * Wipe out everything which is before the first '<'
4606: */
1.59 daniel 4607: if (IS_BLANK(CUR)) {
4608: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4609: ctxt->sax->error(ctxt->userData,
1.59 daniel 4610: "Extra spaces at the beginning of the document are not allowed\n");
4611: ctxt->wellFormed = 0;
4612: SKIP_BLANKS;
4613: }
4614:
4615: if (CUR == 0) {
4616: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4617: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 4618: ctxt->wellFormed = 0;
4619: }
1.1 veillard 4620:
4621: /*
4622: * Check for the XMLDecl in the Prolog.
4623: */
1.91 daniel 4624: GROW;
1.40 daniel 4625: if ((CUR == '<') && (NXT(1) == '?') &&
4626: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4627: (NXT(4) == 'l')) {
1.19 daniel 4628: xmlParseXMLDecl(ctxt);
4629: /* SKIP_EOL(cur); */
1.42 daniel 4630: SKIP_BLANKS;
1.40 daniel 4631: } else if ((CUR == '<') && (NXT(1) == '?') &&
4632: (NXT(2) == 'X') && (NXT(3) == 'M') &&
4633: (NXT(4) == 'L')) {
1.19 daniel 4634: /*
4635: * The first drafts were using <?XML and the final W3C REC
4636: * now use <?xml ...
4637: */
1.16 daniel 4638: xmlParseXMLDecl(ctxt);
1.1 veillard 4639: /* SKIP_EOL(cur); */
1.42 daniel 4640: SKIP_BLANKS;
1.1 veillard 4641: } else {
1.72 daniel 4642: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 4643: }
1.72 daniel 4644: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 4645: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 4646:
4647: /*
4648: * The Misc part of the Prolog
4649: */
1.91 daniel 4650: GROW;
1.16 daniel 4651: xmlParseMisc(ctxt);
1.1 veillard 4652:
4653: /*
1.29 daniel 4654: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 4655: * (doctypedecl Misc*)?
4656: */
1.91 daniel 4657: GROW;
1.40 daniel 4658: if ((CUR == '<') && (NXT(1) == '!') &&
4659: (NXT(2) == 'D') && (NXT(3) == 'O') &&
4660: (NXT(4) == 'C') && (NXT(5) == 'T') &&
4661: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
4662: (NXT(8) == 'E')) {
1.22 daniel 4663: xmlParseDocTypeDecl(ctxt);
4664: xmlParseMisc(ctxt);
1.21 daniel 4665: }
4666:
4667: /*
4668: * Time to start parsing the tree itself
1.1 veillard 4669: */
1.91 daniel 4670: GROW;
1.72 daniel 4671: xmlParseElement(ctxt);
1.33 daniel 4672:
4673: /*
4674: * The Misc part at the end
4675: */
4676: xmlParseMisc(ctxt);
1.16 daniel 4677:
1.59 daniel 4678: if (CUR != 0) {
4679: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4680: ctxt->sax->error(ctxt->userData,
1.59 daniel 4681: "Extra content at the end of the document\n");
4682: ctxt->wellFormed = 0;
4683: }
4684:
1.44 daniel 4685: /*
4686: * SAX: end of the document processing.
4687: */
1.72 daniel 4688: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 4689: ctxt->sax->endDocument(ctxt->userData);
1.59 daniel 4690: if (! ctxt->wellFormed) return(-1);
1.16 daniel 4691: return(0);
4692: }
4693:
1.50 daniel 4694: /**
1.86 daniel 4695: * xmlCreateDocParserCtxt :
1.50 daniel 4696: * @cur: a pointer to an array of CHAR
4697: *
1.69 daniel 4698: * Create a parser context for an XML in-memory document.
4699: *
4700: * Returns the new parser context or NULL
1.16 daniel 4701: */
1.69 daniel 4702: xmlParserCtxtPtr
4703: xmlCreateDocParserCtxt(CHAR *cur) {
1.16 daniel 4704: xmlParserCtxtPtr ctxt;
1.40 daniel 4705: xmlParserInputPtr input;
1.75 daniel 4706: xmlCharEncoding enc;
1.16 daniel 4707:
4708: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4709: if (ctxt == NULL) {
4710: perror("malloc");
4711: return(NULL);
4712: }
1.40 daniel 4713: xmlInitParserCtxt(ctxt);
4714: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4715: if (input == NULL) {
4716: perror("malloc");
4717: free(ctxt);
4718: return(NULL);
4719: }
4720:
1.75 daniel 4721: /*
4722: * plug some encoding conversion routines here. !!!
4723: */
4724: enc = xmlDetectCharEncoding(cur);
4725: xmlSwitchEncoding(ctxt, enc);
4726:
1.40 daniel 4727: input->filename = NULL;
4728: input->line = 1;
4729: input->col = 1;
4730: input->base = cur;
4731: input->cur = cur;
1.91 daniel 4732: input->buf = NULL;
1.69 daniel 4733: input->free = NULL;
1.93 veillard 4734: input->consumed = 0;
1.40 daniel 4735:
4736: inputPush(ctxt, input);
1.69 daniel 4737: return(ctxt);
4738: }
4739:
4740: /**
4741: * xmlSAXParseDoc :
4742: * @sax: the SAX handler block
4743: * @cur: a pointer to an array of CHAR
4744: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4745: * documents
4746: *
4747: * parse an XML in-memory document and build a tree.
4748: * It use the given SAX function block to handle the parsing callback.
4749: * If sax is NULL, fallback to the default DOM tree building routines.
4750: *
4751: * Returns the resulting document tree
4752: */
4753:
4754: xmlDocPtr
4755: xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
4756: xmlDocPtr ret;
4757: xmlParserCtxtPtr ctxt;
4758:
4759: if (cur == NULL) return(NULL);
1.16 daniel 4760:
4761:
1.69 daniel 4762: ctxt = xmlCreateDocParserCtxt(cur);
4763: if (ctxt == NULL) return(NULL);
1.74 daniel 4764: if (sax != NULL) {
4765: ctxt->sax = sax;
4766: ctxt->userData = NULL;
4767: }
1.69 daniel 4768:
1.16 daniel 4769: xmlParseDocument(ctxt);
1.72 daniel 4770: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 4771: else {
4772: ret = NULL;
1.72 daniel 4773: xmlFreeDoc(ctxt->myDoc);
4774: ctxt->myDoc = NULL;
1.59 daniel 4775: }
1.86 daniel 4776: if (sax != NULL)
4777: ctxt->sax = NULL;
1.69 daniel 4778: xmlFreeParserCtxt(ctxt);
1.16 daniel 4779:
1.1 veillard 4780: return(ret);
4781: }
4782:
1.50 daniel 4783: /**
1.55 daniel 4784: * xmlParseDoc :
4785: * @cur: a pointer to an array of CHAR
4786: *
4787: * parse an XML in-memory document and build a tree.
4788: *
1.68 daniel 4789: * Returns the resulting document tree
1.55 daniel 4790: */
4791:
1.69 daniel 4792: xmlDocPtr
4793: xmlParseDoc(CHAR *cur) {
1.59 daniel 4794: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 4795: }
4796:
4797: /**
4798: * xmlSAXParseDTD :
4799: * @sax: the SAX handler block
4800: * @ExternalID: a NAME* containing the External ID of the DTD
4801: * @SystemID: a NAME* containing the URL to the DTD
4802: *
4803: * Load and parse an external subset.
4804: *
4805: * Returns the resulting xmlDtdPtr or NULL in case of error.
4806: */
4807:
4808: xmlDtdPtr
4809: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const CHAR *ExternalID,
4810: const CHAR *SystemID) {
4811: xmlDtdPtr ret = NULL;
4812: xmlParserCtxtPtr ctxt;
1.83 daniel 4813: xmlParserInputPtr input = NULL;
1.76 daniel 4814: xmlCharEncoding enc;
4815:
4816: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
4817:
4818: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4819: if (ctxt == NULL) {
4820: perror("malloc");
4821: return(NULL);
4822: }
4823: xmlInitParserCtxt(ctxt);
4824:
4825: /*
4826: * Set-up the SAX context
4827: */
4828: if (ctxt == NULL) return(NULL);
4829: if (sax != NULL) {
1.93 veillard 4830: if (ctxt->sax != NULL)
4831: free(ctxt->sax);
1.76 daniel 4832: ctxt->sax = sax;
4833: ctxt->userData = NULL;
4834: }
4835:
4836: /*
4837: * Ask the Entity resolver to load the damn thing
4838: */
4839:
4840: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
4841: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
4842: if (input == NULL) {
1.86 daniel 4843: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 4844: xmlFreeParserCtxt(ctxt);
4845: return(NULL);
4846: }
4847:
4848: /*
4849: * plug some encoding conversion routines here. !!!
4850: */
4851: xmlPushInput(ctxt, input);
4852: enc = xmlDetectCharEncoding(ctxt->input->cur);
4853: xmlSwitchEncoding(ctxt, enc);
4854:
1.95 ! veillard 4855: if (input->filename == NULL)
! 4856: input->filename = xmlStrdup(SystemID);
1.76 daniel 4857: input->line = 1;
4858: input->col = 1;
4859: input->base = ctxt->input->cur;
4860: input->cur = ctxt->input->cur;
4861: input->free = NULL;
4862:
4863: /*
4864: * let's parse that entity knowing it's an external subset.
4865: */
1.79 daniel 4866: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 4867:
4868: if (ctxt->myDoc != NULL) {
4869: if (ctxt->wellFormed) {
4870: ret = ctxt->myDoc->intSubset;
4871: ctxt->myDoc->intSubset = NULL;
4872: } else {
4873: ret = NULL;
4874: }
4875: xmlFreeDoc(ctxt->myDoc);
4876: ctxt->myDoc = NULL;
4877: }
1.86 daniel 4878: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 4879: xmlFreeParserCtxt(ctxt);
4880:
4881: return(ret);
4882: }
4883:
4884: /**
4885: * xmlParseDTD :
4886: * @ExternalID: a NAME* containing the External ID of the DTD
4887: * @SystemID: a NAME* containing the URL to the DTD
4888: *
4889: * Load and parse an external subset.
4890: *
4891: * Returns the resulting xmlDtdPtr or NULL in case of error.
4892: */
4893:
4894: xmlDtdPtr
4895: xmlParseDTD(const CHAR *ExternalID, const CHAR *SystemID) {
4896: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 4897: }
4898:
4899: /**
4900: * xmlRecoverDoc :
4901: * @cur: a pointer to an array of CHAR
4902: *
4903: * parse an XML in-memory document and build a tree.
4904: * In the case the document is not Well Formed, a tree is built anyway
4905: *
1.68 daniel 4906: * Returns the resulting document tree
1.59 daniel 4907: */
4908:
1.69 daniel 4909: xmlDocPtr
4910: xmlRecoverDoc(CHAR *cur) {
1.59 daniel 4911: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 4912: }
4913:
4914: /**
1.69 daniel 4915: * xmlCreateFileParserCtxt :
1.50 daniel 4916: * @filename: the filename
4917: *
1.69 daniel 4918: * Create a parser context for a file content.
4919: * Automatic support for ZLIB/Compress compressed document is provided
4920: * by default if found at compile-time.
1.50 daniel 4921: *
1.69 daniel 4922: * Returns the new parser context or NULL
1.9 httpng 4923: */
1.69 daniel 4924: xmlParserCtxtPtr
4925: xmlCreateFileParserCtxt(const char *filename)
4926: {
4927: xmlParserCtxtPtr ctxt;
1.40 daniel 4928: xmlParserInputPtr inputStream;
1.91 daniel 4929: xmlParserInputBufferPtr buf;
1.9 httpng 4930:
1.91 daniel 4931: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
4932: if (buf == NULL) return(NULL);
1.9 httpng 4933:
1.16 daniel 4934: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4935: if (ctxt == NULL) {
4936: perror("malloc");
4937: return(NULL);
4938: }
1.40 daniel 4939: xmlInitParserCtxt(ctxt);
4940: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4941: if (inputStream == NULL) {
4942: perror("malloc");
4943: free(ctxt);
4944: return(NULL);
4945: }
4946:
4947: inputStream->filename = strdup(filename);
1.95 ! veillard 4948: inputStream->directory = NULL;
1.40 daniel 4949: inputStream->line = 1;
4950: inputStream->col = 1;
1.91 daniel 4951: inputStream->buf = buf;
1.93 veillard 4952: inputStream->consumed = 0;
1.45 daniel 4953:
1.91 daniel 4954: inputStream->base = inputStream->buf->buffer->content;
4955: inputStream->cur = inputStream->buf->buffer->content;
4956: inputStream->free = NULL;
1.16 daniel 4957:
1.40 daniel 4958: inputPush(ctxt, inputStream);
1.69 daniel 4959: return(ctxt);
4960: }
4961:
4962: /**
4963: * xmlSAXParseFile :
4964: * @sax: the SAX handler block
4965: * @filename: the filename
4966: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4967: * documents
4968: *
4969: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4970: * compressed document is provided by default if found at compile-time.
4971: * It use the given SAX function block to handle the parsing callback.
4972: * If sax is NULL, fallback to the default DOM tree building routines.
4973: *
4974: * Returns the resulting document tree
4975: */
4976:
1.79 daniel 4977: xmlDocPtr
4978: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 4979: int recovery) {
4980: xmlDocPtr ret;
4981: xmlParserCtxtPtr ctxt;
4982:
4983: ctxt = xmlCreateFileParserCtxt(filename);
4984: if (ctxt == NULL) return(NULL);
1.74 daniel 4985: if (sax != NULL) {
1.93 veillard 4986: if (ctxt->sax != NULL)
4987: free(ctxt->sax);
1.74 daniel 4988: ctxt->sax = sax;
4989: ctxt->userData = NULL;
4990: }
1.16 daniel 4991:
4992: xmlParseDocument(ctxt);
1.40 daniel 4993:
1.72 daniel 4994: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 4995: else {
4996: ret = NULL;
1.72 daniel 4997: xmlFreeDoc(ctxt->myDoc);
4998: ctxt->myDoc = NULL;
1.59 daniel 4999: }
1.86 daniel 5000: if (sax != NULL)
5001: ctxt->sax = NULL;
1.69 daniel 5002: xmlFreeParserCtxt(ctxt);
1.20 daniel 5003:
5004: return(ret);
5005: }
5006:
1.55 daniel 5007: /**
5008: * xmlParseFile :
5009: * @filename: the filename
5010: *
5011: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5012: * compressed document is provided by default if found at compile-time.
5013: *
1.68 daniel 5014: * Returns the resulting document tree
1.55 daniel 5015: */
5016:
1.79 daniel 5017: xmlDocPtr
5018: xmlParseFile(const char *filename) {
1.59 daniel 5019: return(xmlSAXParseFile(NULL, filename, 0));
5020: }
5021:
5022: /**
5023: * xmlRecoverFile :
5024: * @filename: the filename
5025: *
5026: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5027: * compressed document is provided by default if found at compile-time.
5028: * In the case the document is not Well Formed, a tree is built anyway
5029: *
1.68 daniel 5030: * Returns the resulting document tree
1.59 daniel 5031: */
5032:
1.79 daniel 5033: xmlDocPtr
5034: xmlRecoverFile(const char *filename) {
1.59 daniel 5035: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 5036: }
1.32 daniel 5037:
1.50 daniel 5038: /**
1.82 daniel 5039: * xmlSubstituteEntitiesDefault :
5040: * @val: int 0 or 1
1.79 daniel 5041: *
5042: * Set and return the previous value for default entity support.
5043: * Initially the parser always keep entity references instead of substituting
5044: * entity values in the output. This function has to be used to change the
5045: * default parser behaviour
5046: * SAX::subtituteEntities() has to be used for changing that on a file by
5047: * file basis.
5048: *
5049: * Returns the last value for 0 for no substitution, 1 for substitution.
5050: */
5051:
5052: int
5053: xmlSubstituteEntitiesDefault(int val) {
5054: int old = xmlSubstituteEntitiesDefaultValue;
5055:
5056: xmlSubstituteEntitiesDefaultValue = val;
5057: return(old);
5058: }
5059:
5060: /**
1.69 daniel 5061: * xmlCreateMemoryParserCtxt :
1.68 daniel 5062: * @buffer: an pointer to a char array
1.50 daniel 5063: * @size: the siwe of the array
5064: *
1.69 daniel 5065: * Create a parser context for an XML in-memory document.
1.50 daniel 5066: *
1.69 daniel 5067: * Returns the new parser context or NULL
1.20 daniel 5068: */
1.69 daniel 5069: xmlParserCtxtPtr
5070: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 5071: xmlParserCtxtPtr ctxt;
1.40 daniel 5072: xmlParserInputPtr input;
1.75 daniel 5073: xmlCharEncoding enc;
1.40 daniel 5074:
5075: buffer[size - 1] = '\0';
5076:
1.20 daniel 5077: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
5078: if (ctxt == NULL) {
5079: perror("malloc");
5080: return(NULL);
5081: }
1.40 daniel 5082: xmlInitParserCtxt(ctxt);
5083: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
5084: if (input == NULL) {
5085: perror("malloc");
1.50 daniel 5086: free(ctxt->nodeTab);
5087: free(ctxt->inputTab);
1.40 daniel 5088: free(ctxt);
5089: return(NULL);
5090: }
1.20 daniel 5091:
1.40 daniel 5092: input->filename = NULL;
5093: input->line = 1;
5094: input->col = 1;
1.91 daniel 5095: input->consumed = 0;
1.45 daniel 5096:
5097: /*
1.75 daniel 5098: * plug some encoding conversion routines here. !!!
1.45 daniel 5099: */
1.75 daniel 5100: enc = xmlDetectCharEncoding(buffer);
5101: xmlSwitchEncoding(ctxt, enc);
5102:
1.40 daniel 5103: input->base = buffer;
5104: input->cur = buffer;
1.69 daniel 5105: input->free = NULL;
1.20 daniel 5106:
1.40 daniel 5107: inputPush(ctxt, input);
1.69 daniel 5108: return(ctxt);
5109: }
5110:
5111: /**
5112: * xmlSAXParseMemory :
5113: * @sax: the SAX handler block
5114: * @buffer: an pointer to a char array
5115: * @size: the siwe of the array
5116: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5117: * documents
5118: *
5119: * parse an XML in-memory block and use the given SAX function block
5120: * to handle the parsing callback. If sax is NULL, fallback to the default
5121: * DOM tree building routines.
5122: *
5123: * Returns the resulting document tree
5124: */
5125: xmlDocPtr
5126: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
5127: xmlDocPtr ret;
5128: xmlParserCtxtPtr ctxt;
5129:
5130: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
5131: if (ctxt == NULL) return(NULL);
1.74 daniel 5132: if (sax != NULL) {
5133: ctxt->sax = sax;
5134: ctxt->userData = NULL;
5135: }
1.20 daniel 5136:
5137: xmlParseDocument(ctxt);
1.40 daniel 5138:
1.72 daniel 5139: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5140: else {
5141: ret = NULL;
1.72 daniel 5142: xmlFreeDoc(ctxt->myDoc);
5143: ctxt->myDoc = NULL;
1.59 daniel 5144: }
1.86 daniel 5145: if (sax != NULL)
5146: ctxt->sax = NULL;
1.69 daniel 5147: xmlFreeParserCtxt(ctxt);
1.16 daniel 5148:
1.9 httpng 5149: return(ret);
1.17 daniel 5150: }
5151:
1.55 daniel 5152: /**
5153: * xmlParseMemory :
1.68 daniel 5154: * @buffer: an pointer to a char array
1.55 daniel 5155: * @size: the size of the array
5156: *
5157: * parse an XML in-memory block and build a tree.
5158: *
1.68 daniel 5159: * Returns the resulting document tree
1.55 daniel 5160: */
5161:
5162: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 5163: return(xmlSAXParseMemory(NULL, buffer, size, 0));
5164: }
5165:
5166: /**
5167: * xmlRecoverMemory :
1.68 daniel 5168: * @buffer: an pointer to a char array
1.59 daniel 5169: * @size: the size of the array
5170: *
5171: * parse an XML in-memory block and build a tree.
5172: * In the case the document is not Well Formed, a tree is built anyway
5173: *
1.68 daniel 5174: * Returns the resulting document tree
1.59 daniel 5175: */
5176:
5177: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
5178: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.55 daniel 5179: }
1.17 daniel 5180:
1.50 daniel 5181: /**
5182: * xmlInitParserCtxt:
5183: * @ctxt: an XML parser context
5184: *
5185: * Initialize a parser context
5186: */
5187:
1.55 daniel 5188: void
5189: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 5190: {
1.86 daniel 5191: xmlSAXHandler *sax;
5192:
5193: sax = (xmlSAXHandler *) malloc(sizeof(xmlSAXHandler));
5194: if (sax == NULL) {
5195: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
5196: }
5197:
1.69 daniel 5198: /* Allocate the Input stack */
5199: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
5200: ctxt->inputNr = 0;
5201: ctxt->inputMax = 5;
5202: ctxt->input = NULL;
1.72 daniel 5203: ctxt->version = NULL;
5204: ctxt->encoding = NULL;
5205: ctxt->standalone = -1;
1.69 daniel 5206:
5207: /* Allocate the Node stack */
5208: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
5209: ctxt->nodeNr = 0;
5210: ctxt->nodeMax = 10;
5211: ctxt->node = NULL;
5212:
1.86 daniel 5213: if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
5214: else {
5215: ctxt->sax = sax;
5216: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
5217: }
1.74 daniel 5218: ctxt->userData = ctxt;
1.72 daniel 5219: ctxt->myDoc = NULL;
1.69 daniel 5220: ctxt->wellFormed = 1;
1.79 daniel 5221: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1.69 daniel 5222: ctxt->record_info = 0;
5223: xmlInitNodeInfoSeq(&ctxt->node_seq);
5224: }
5225:
5226: /**
5227: * xmlFreeParserCtxt:
5228: * @ctxt: an XML parser context
5229: *
5230: * Free all the memory used by a parser context. However the parsed
1.72 daniel 5231: * document in ctxt->myDoc is not freed.
1.69 daniel 5232: */
5233:
5234: void
5235: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
5236: {
5237: xmlParserInputPtr input;
5238:
5239: if (ctxt == NULL) return;
5240:
5241: while ((input = inputPop(ctxt)) != NULL) {
5242: xmlFreeInputStream(input);
5243: }
5244:
5245: if (ctxt->nodeTab != NULL) free(ctxt->nodeTab);
5246: if (ctxt->inputTab != NULL) free(ctxt->inputTab);
1.73 daniel 5247: if (ctxt->version != NULL) free((char *) ctxt->version);
1.95 ! veillard 5248: if (ctxt->encoding != NULL) free((char *) ctxt->encoding);
1.86 daniel 5249: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
5250: free(ctxt->sax);
1.69 daniel 5251: free(ctxt);
1.17 daniel 5252: }
5253:
1.50 daniel 5254: /**
5255: * xmlClearParserCtxt:
5256: * @ctxt: an XML parser context
5257: *
5258: * Clear (release owned resources) and reinitialize a parser context
5259: */
1.17 daniel 5260:
1.55 daniel 5261: void
5262: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 5263: {
1.32 daniel 5264: xmlClearNodeInfoSeq(&ctxt->node_seq);
5265: xmlInitParserCtxt(ctxt);
1.17 daniel 5266: }
5267:
5268:
1.50 daniel 5269: /**
5270: * xmlSetupParserForBuffer:
5271: * @ctxt: an XML parser context
5272: * @buffer: a CHAR * buffer
5273: * @filename: a file name
5274: *
1.19 daniel 5275: * Setup the parser context to parse a new buffer; Clears any prior
5276: * contents from the parser context. The buffer parameter must not be
5277: * NULL, but the filename parameter can be
5278: */
1.55 daniel 5279: void
5280: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 5281: const char* filename)
5282: {
1.40 daniel 5283: xmlParserInputPtr input;
5284:
5285: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
5286: if (input == NULL) {
5287: perror("malloc");
5288: free(ctxt);
5289: exit(1);
5290: }
5291:
1.17 daniel 5292: xmlClearParserCtxt(ctxt);
1.40 daniel 5293: if (input->filename != NULL)
5294: input->filename = strdup(filename);
5295: else
5296: input->filename = NULL;
5297: input->line = 1;
5298: input->col = 1;
5299: input->base = buffer;
5300: input->cur = buffer;
5301:
5302: inputPush(ctxt, input);
1.17 daniel 5303: }
5304:
1.32 daniel 5305:
1.50 daniel 5306: /**
5307: * xmlParserFindNodeInfo:
5308: * @ctxt: an XML parser context
5309: * @node: an XML node within the tree
5310: *
5311: * Find the parser node info struct for a given node
5312: *
1.68 daniel 5313: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 5314: */
5315: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
5316: const xmlNode* node)
5317: {
5318: unsigned long pos;
5319:
5320: /* Find position where node should be at */
5321: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
5322: if ( ctx->node_seq.buffer[pos].node == node )
5323: return &ctx->node_seq.buffer[pos];
5324: else
5325: return NULL;
5326: }
5327:
5328:
1.50 daniel 5329: /**
5330: * xmlInitNodeInfoSeq :
5331: * @seq: a node info sequence pointer
5332: *
5333: * -- Initialize (set to initial state) node info sequence
1.32 daniel 5334: */
1.55 daniel 5335: void
5336: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 5337: {
5338: seq->length = 0;
5339: seq->maximum = 0;
5340: seq->buffer = NULL;
5341: }
5342:
1.50 daniel 5343: /**
5344: * xmlClearNodeInfoSeq :
5345: * @seq: a node info sequence pointer
5346: *
5347: * -- Clear (release memory and reinitialize) node
1.32 daniel 5348: * info sequence
5349: */
1.55 daniel 5350: void
5351: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 5352: {
5353: if ( seq->buffer != NULL )
5354: free(seq->buffer);
5355: xmlInitNodeInfoSeq(seq);
5356: }
5357:
5358:
1.50 daniel 5359: /**
5360: * xmlParserFindNodeInfoIndex:
5361: * @seq: a node info sequence pointer
5362: * @node: an XML node pointer
5363: *
5364: *
1.32 daniel 5365: * xmlParserFindNodeInfoIndex : Find the index that the info record for
5366: * the given node is or should be at in a sorted sequence
1.68 daniel 5367: *
5368: * Returns a long indicating the position of the record
1.32 daniel 5369: */
5370: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
5371: const xmlNode* node)
5372: {
5373: unsigned long upper, lower, middle;
5374: int found = 0;
5375:
5376: /* Do a binary search for the key */
5377: lower = 1;
5378: upper = seq->length;
5379: middle = 0;
5380: while ( lower <= upper && !found) {
5381: middle = lower + (upper - lower) / 2;
5382: if ( node == seq->buffer[middle - 1].node )
5383: found = 1;
5384: else if ( node < seq->buffer[middle - 1].node )
5385: upper = middle - 1;
5386: else
5387: lower = middle + 1;
5388: }
5389:
5390: /* Return position */
5391: if ( middle == 0 || seq->buffer[middle - 1].node < node )
5392: return middle;
5393: else
5394: return middle - 1;
5395: }
5396:
5397:
1.50 daniel 5398: /**
5399: * xmlParserAddNodeInfo:
5400: * @ctxt: an XML parser context
1.68 daniel 5401: * @info: a node info sequence pointer
1.50 daniel 5402: *
5403: * Insert node info record into the sorted sequence
1.32 daniel 5404: */
1.55 daniel 5405: void
5406: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 5407: const xmlParserNodeInfo* info)
1.32 daniel 5408: {
5409: unsigned long pos;
5410: static unsigned int block_size = 5;
5411:
5412: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 5413: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
5414: if ( pos < ctxt->node_seq.length
5415: && ctxt->node_seq.buffer[pos].node == info->node ) {
5416: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 5417: }
5418:
5419: /* Otherwise, we need to add new node to buffer */
5420: else {
5421: /* Expand buffer by 5 if needed */
1.55 daniel 5422: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 5423: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 5424: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
5425: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 5426:
1.55 daniel 5427: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 5428: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
5429: else
1.55 daniel 5430: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 5431:
5432: if ( tmp_buffer == NULL ) {
1.55 daniel 5433: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5434: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.32 daniel 5435: return;
5436: }
1.55 daniel 5437: ctxt->node_seq.buffer = tmp_buffer;
5438: ctxt->node_seq.maximum += block_size;
1.32 daniel 5439: }
5440:
5441: /* If position is not at end, move elements out of the way */
1.55 daniel 5442: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 5443: unsigned long i;
5444:
1.55 daniel 5445: for ( i = ctxt->node_seq.length; i > pos; i-- )
5446: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 5447: }
5448:
5449: /* Copy element and increase length */
1.55 daniel 5450: ctxt->node_seq.buffer[pos] = *info;
5451: ctxt->node_seq.length++;
1.32 daniel 5452: }
5453: }
1.77 daniel 5454:
5455:
Webmaster