Annotation of XML/parser.c, revision 1.111
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.75 daniel 33: #include "encoding.h"
1.61 daniel 34: #include "valid.h"
1.69 daniel 35: #include "parserInternals.h"
1.91 daniel 36: #include "xmlIO.h"
1.1 veillard 37:
1.86 daniel 38: const char *xmlParserVersion = LIBXML_VERSION;
39:
1.91 daniel 40: #define XML_MAX_NAMELEN 1000
41:
42: /************************************************************************
43: * *
44: * Input handling functions for progressive parsing *
45: * *
46: ************************************************************************/
47:
48: /* #define DEBUG_INPUT */
49:
1.110 daniel 50: #define INPUT_CHUNK 250
51: /* we need to keep enough input to show errors in context */
52: #define LINE_LEN 80
1.91 daniel 53:
54: #ifdef DEBUG_INPUT
55: #define CHECK_BUFFER(in) check_buffer(in)
56:
57: void check_buffer(xmlParserInputPtr in) {
58: if (in->base != in->buf->buffer->content) {
59: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
60: }
61: if (in->cur < in->base) {
62: fprintf(stderr, "xmlParserInput: cur < base problem\n");
63: }
64: if (in->cur > in->base + in->buf->buffer->use) {
65: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
66: }
67: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
68: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
69: in->buf->buffer->use, in->buf->buffer->size);
70: }
71:
1.110 daniel 72: #else
73: #define CHECK_BUFFER(in)
74: #endif
75:
1.91 daniel 76:
77: /**
78: * xmlParserInputRead:
79: * @in: an XML parser input
80: * @len: an indicative size for the lookahead
81: *
82: * This function refresh the input for the parser. It doesn't try to
83: * preserve pointers to the input buffer, and discard already read data
84: *
85: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
86: * end of this entity
87: */
88: int
89: xmlParserInputRead(xmlParserInputPtr in, int len) {
90: int ret;
91: int used;
92: int index;
93:
94: #ifdef DEBUG_INPUT
95: fprintf(stderr, "Read\n");
96: #endif
97: if (in->buf == NULL) return(-1);
98: if (in->base == NULL) return(-1);
99: if (in->cur == NULL) return(-1);
100: if (in->buf->buffer == NULL) return(-1);
101:
102: CHECK_BUFFER(in);
103:
104: used = in->cur - in->buf->buffer->content;
105: ret = xmlBufferShrink(in->buf->buffer, used);
106: if (ret > 0) {
107: in->cur -= ret;
108: in->consumed += ret;
109: }
110: ret = xmlParserInputBufferRead(in->buf, len);
111: if (in->base != in->buf->buffer->content) {
112: /*
113: * the buffer has been realloced
114: */
115: index = in->cur - in->base;
116: in->base = in->buf->buffer->content;
117: in->cur = &in->buf->buffer->content[index];
118: }
119:
120: CHECK_BUFFER(in);
121:
122: return(ret);
123: }
124:
125: /**
126: * xmlParserInputGrow:
127: * @in: an XML parser input
128: * @len: an indicative size for the lookahead
129: *
130: * This function increase the input for the parser. It tries to
131: * preserve pointers to the input buffer, and keep already read data
132: *
133: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
134: * end of this entity
135: */
136: int
137: xmlParserInputGrow(xmlParserInputPtr in, int len) {
138: int ret;
139: int index;
140:
141: #ifdef DEBUG_INPUT
142: fprintf(stderr, "Grow\n");
143: #endif
144: if (in->buf == NULL) return(-1);
145: if (in->base == NULL) return(-1);
146: if (in->cur == NULL) return(-1);
147: if (in->buf->buffer == NULL) return(-1);
148:
149: CHECK_BUFFER(in);
150:
151: index = in->cur - in->base;
152: if (in->buf->buffer->use > index + INPUT_CHUNK) {
153:
154: CHECK_BUFFER(in);
155:
156: return(0);
157: }
158: ret = xmlParserInputBufferGrow(in->buf, len);
159: if (in->base != in->buf->buffer->content) {
160: /*
161: * the buffer has been realloced
162: */
163: index = in->cur - in->base;
164: in->base = in->buf->buffer->content;
165: in->cur = &in->buf->buffer->content[index];
166: }
167:
168: CHECK_BUFFER(in);
169:
170: return(ret);
171: }
172:
173: /**
174: * xmlParserInputShrink:
175: * @in: an XML parser input
176: *
177: * This function removes used input for the parser.
178: */
179: void
180: xmlParserInputShrink(xmlParserInputPtr in) {
181: int used;
182: int ret;
183: int index;
184:
185: #ifdef DEBUG_INPUT
186: fprintf(stderr, "Shrink\n");
187: #endif
188: if (in->buf == NULL) return;
189: if (in->base == NULL) return;
190: if (in->cur == NULL) return;
191: if (in->buf->buffer == NULL) return;
192:
193: CHECK_BUFFER(in);
194:
195: used = in->cur - in->buf->buffer->content;
196: if (used > INPUT_CHUNK) {
1.110 daniel 197: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 198: if (ret > 0) {
199: in->cur -= ret;
200: in->consumed += ret;
201: }
202: }
203:
204: CHECK_BUFFER(in);
205:
206: if (in->buf->buffer->use > INPUT_CHUNK) {
207: return;
208: }
209: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
210: if (in->base != in->buf->buffer->content) {
211: /*
212: * the buffer has been realloced
213: */
214: index = in->cur - in->base;
215: in->base = in->buf->buffer->content;
216: in->cur = &in->buf->buffer->content[index];
217: }
218:
219: CHECK_BUFFER(in);
220: }
221:
1.45 daniel 222: /************************************************************************
223: * *
224: * Parser stacks related functions and macros *
225: * *
226: ************************************************************************/
1.79 daniel 227:
228: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 229: int xmlDoValidityCheckingDefaultValue = 0;
1.79 daniel 230:
1.1 veillard 231: /*
1.40 daniel 232: * Generic function for accessing stacks in the Parser Context
1.1 veillard 233: */
234:
1.31 daniel 235: #define PUSH_AND_POP(type, name) \
1.72 daniel 236: extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 237: if (ctxt->name##Nr >= ctxt->name##Max) { \
238: ctxt->name##Max *= 2; \
1.40 daniel 239: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
240: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
241: if (ctxt->name##Tab == NULL) { \
1.31 daniel 242: fprintf(stderr, "realloc failed !\n"); \
243: exit(1); \
244: } \
245: } \
1.40 daniel 246: ctxt->name##Tab[ctxt->name##Nr] = value; \
247: ctxt->name = value; \
248: return(ctxt->name##Nr++); \
1.31 daniel 249: } \
1.72 daniel 250: extern type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 251: type ret; \
1.40 daniel 252: if (ctxt->name##Nr <= 0) return(0); \
253: ctxt->name##Nr--; \
1.50 daniel 254: if (ctxt->name##Nr > 0) \
255: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
256: else \
257: ctxt->name = NULL; \
1.69 daniel 258: ret = ctxt->name##Tab[ctxt->name##Nr]; \
259: ctxt->name##Tab[ctxt->name##Nr] = 0; \
260: return(ret); \
1.31 daniel 261: } \
262:
1.40 daniel 263: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 264: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 265:
1.55 daniel 266: /*
267: * Macros for accessing the content. Those should be used only by the parser,
268: * and not exported.
269: *
270: * Dirty macros, i.e. one need to make assumption on the context to use them
271: *
272: * CUR_PTR return the current pointer to the CHAR to be parsed.
273: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
274: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
275: * in UNICODE mode. This should be used internally by the parser
276: * only to compare to ASCII values otherwise it would break when
277: * running with UTF-8 encoding.
278: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
279: * to compare on ASCII based substring.
280: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
281: * strings within the parser.
282: *
1.77 daniel 283: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 284: *
285: * CURRENT Returns the current char value, with the full decoding of
286: * UTF-8 if we are using this mode. It returns an int.
287: * NEXT Skip to the next character, this does the proper decoding
288: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 289: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.55 daniel 290: */
1.45 daniel 291:
1.97 daniel 292: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 293: #define SKIP(val) ctxt->input->cur += (val)
294: #define NXT(val) ctxt->input->cur[(val)]
295: #define CUR_PTR ctxt->input->cur
1.97 daniel 296: #define SHRINK xmlParserInputShrink(ctxt->input); \
297: if ((*ctxt->input->cur == 0) && \
298: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
299: xmlPopInput(ctxt)
300:
301: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
302: if ((*ctxt->input->cur == 0) && \
303: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
304: xmlPopInput(ctxt)
1.55 daniel 305:
306: #define SKIP_BLANKS \
1.101 daniel 307: do { \
308: while (IS_BLANK(CUR)) NEXT; \
309: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
310: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
311: } while (IS_BLANK(CUR));
1.55 daniel 312:
313: #ifndef USE_UTF_8
314: #define CURRENT (*ctxt->input->cur)
1.91 daniel 315: #define NEXT { \
1.97 daniel 316: if (ctxt->token != 0) ctxt->token = 0; \
317: else { \
1.91 daniel 318: if ((*ctxt->input->cur == 0) && \
319: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
320: xmlPopInput(ctxt); \
321: } else { \
322: if (*(ctxt->input->cur) == '\n') { \
323: ctxt->input->line++; ctxt->input->col = 1; \
324: } else ctxt->input->col++; \
325: ctxt->input->cur++; \
326: if (*ctxt->input->cur == 0) \
327: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.96 daniel 328: } \
329: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
330: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
1.97 daniel 331: }}
1.91 daniel 332:
1.55 daniel 333: #else
334: #endif
1.42 daniel 335:
1.97 daniel 336: /************************************************************************
337: * *
338: * Commodity functions to handle entities processing *
339: * *
340: ************************************************************************/
1.40 daniel 341:
1.50 daniel 342: /**
343: * xmlPopInput:
344: * @ctxt: an XML parser context
345: *
1.40 daniel 346: * xmlPopInput: the current input pointed by ctxt->input came to an end
347: * pop it and return the next char.
1.45 daniel 348: *
1.68 daniel 349: * Returns the current CHAR in the parser context
1.40 daniel 350: */
1.55 daniel 351: CHAR
352: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 353: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 354: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 355: if ((*ctxt->input->cur == 0) &&
356: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
357: return(xmlPopInput(ctxt));
1.40 daniel 358: return(CUR);
359: }
360:
1.50 daniel 361: /**
362: * xmlPushInput:
363: * @ctxt: an XML parser context
364: * @input: an XML parser input fragment (entity, XML fragment ...).
365: *
1.40 daniel 366: * xmlPushInput: switch to a new input stream which is stacked on top
367: * of the previous one(s).
368: */
1.55 daniel 369: void
370: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 371: if (input == NULL) return;
372: inputPush(ctxt, input);
373: }
374:
1.50 daniel 375: /**
1.69 daniel 376: * xmlFreeInputStream:
1.101 daniel 377: * @input: an xmlP arserInputPtr
1.69 daniel 378: *
379: * Free up an input stream.
380: */
381: void
382: xmlFreeInputStream(xmlParserInputPtr input) {
383: if (input == NULL) return;
384:
385: if (input->filename != NULL) free((char *) input->filename);
1.94 daniel 386: if (input->directory != NULL) free((char *) input->directory);
1.69 daniel 387: if ((input->free != NULL) && (input->base != NULL))
388: input->free((char *) input->base);
1.93 veillard 389: if (input->buf != NULL)
390: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 391: memset(input, -1, sizeof(xmlParserInput));
392: free(input);
393: }
394:
395: /**
1.96 daniel 396: * xmlNewInputStream:
397: * @ctxt: an XML parser context
398: *
399: * Create a new input stream structure
400: * Returns the new input stream or NULL
401: */
402: xmlParserInputPtr
403: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
404: xmlParserInputPtr input;
405:
406: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
407: if (input == NULL) {
408: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
409: ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
410: return(NULL);
411: }
412: input->filename = NULL;
413: input->directory = NULL;
414: input->base = NULL;
415: input->cur = NULL;
416: input->buf = NULL;
417: input->line = 1;
418: input->col = 1;
419: input->buf = NULL;
420: input->free = NULL;
421: input->consumed = 0;
422: return(input);
423: }
424:
425: /**
1.50 daniel 426: * xmlNewEntityInputStream:
427: * @ctxt: an XML parser context
428: * @entity: an Entity pointer
429: *
1.82 daniel 430: * Create a new input stream based on an xmlEntityPtr
1.68 daniel 431: * Returns the new input stream
1.45 daniel 432: */
1.50 daniel 433: xmlParserInputPtr
434: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 435: xmlParserInputPtr input;
436:
437: if (entity == NULL) {
1.55 daniel 438: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 439: ctxt->sax->error(ctxt->userData,
1.45 daniel 440: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 441: return(NULL);
1.45 daniel 442: }
443: if (entity->content == NULL) {
1.55 daniel 444: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 445: ctxt->sax->error(ctxt->userData,
1.45 daniel 446: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 447: return(NULL);
1.45 daniel 448: }
1.96 daniel 449: input = xmlNewInputStream(ctxt);
1.45 daniel 450: if (input == NULL) {
1.50 daniel 451: return(NULL);
1.45 daniel 452: }
453: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
454: input->base = entity->content;
455: input->cur = entity->content;
1.50 daniel 456: return(input);
1.45 daniel 457: }
458:
1.59 daniel 459: /**
460: * xmlNewStringInputStream:
461: * @ctxt: an XML parser context
1.96 daniel 462: * @buffer: an memory buffer
1.59 daniel 463: *
464: * Create a new input stream based on a memory buffer.
1.68 daniel 465: * Returns the new input stream
1.59 daniel 466: */
467: xmlParserInputPtr
1.96 daniel 468: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const CHAR *buffer) {
1.59 daniel 469: xmlParserInputPtr input;
470:
1.96 daniel 471: if (buffer == NULL) {
1.59 daniel 472: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 473: ctxt->sax->error(ctxt->userData,
1.59 daniel 474: "internal: xmlNewStringInputStream string = NULL\n");
475: return(NULL);
476: }
1.96 daniel 477: input = xmlNewInputStream(ctxt);
1.59 daniel 478: if (input == NULL) {
479: return(NULL);
480: }
1.96 daniel 481: input->base = buffer;
482: input->cur = buffer;
1.59 daniel 483: return(input);
484: }
485:
1.76 daniel 486: /**
487: * xmlNewInputFromFile:
488: * @ctxt: an XML parser context
489: * @filename: the filename to use as entity
490: *
491: * Create a new input stream based on a file.
492: *
493: * Returns the new input stream or NULL in case of error
494: */
495: xmlParserInputPtr
1.79 daniel 496: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 497: xmlParserInputBufferPtr buf;
1.76 daniel 498: xmlParserInputPtr inputStream;
1.111 ! daniel 499: char *directory = NULL;
1.76 daniel 500:
1.96 daniel 501: if (ctxt == NULL) return(NULL);
1.91 daniel 502: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 503: if (buf == NULL) {
1.106 daniel 504: char name[1024];
505:
1.94 daniel 506: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
507: #ifdef WIN32
508: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
509: #else
510: sprintf(name, "%s/%s", ctxt->input->directory, filename);
511: #endif
512: buf = xmlParserInputBufferCreateFilename(name,
513: XML_CHAR_ENCODING_NONE);
1.106 daniel 514: if (buf != NULL)
515: directory = strdup(ctxt->input->directory);
516: }
517: if ((buf == NULL) && (ctxt->directory != NULL)) {
518: #ifdef WIN32
519: sprintf(name, "%s\\%s", ctxt->directory, filename);
520: #else
521: sprintf(name, "%s/%s", ctxt->directory, filename);
522: #endif
523: buf = xmlParserInputBufferCreateFilename(name,
524: XML_CHAR_ENCODING_NONE);
525: if (buf != NULL)
526: directory = strdup(ctxt->directory);
527: }
528: if (buf == NULL)
1.94 daniel 529: return(NULL);
530: }
531: if (directory == NULL)
532: directory = xmlParserGetDirectory(filename);
1.76 daniel 533:
1.96 daniel 534: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 535: if (inputStream == NULL) {
1.96 daniel 536: if (directory != NULL) free((char *) directory);
1.76 daniel 537: return(NULL);
538: }
539:
540: inputStream->filename = strdup(filename);
1.94 daniel 541: inputStream->directory = directory;
1.91 daniel 542: inputStream->buf = buf;
1.76 daniel 543:
1.91 daniel 544: inputStream->base = inputStream->buf->buffer->content;
545: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 546: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 547: ctxt->directory = directory;
1.76 daniel 548: return(inputStream);
549: }
550:
1.77 daniel 551: /************************************************************************
552: * *
1.97 daniel 553: * Commodity functions to handle parser contexts *
554: * *
555: ************************************************************************/
556:
557: /**
558: * xmlInitParserCtxt:
559: * @ctxt: an XML parser context
560: *
561: * Initialize a parser context
562: */
563:
564: void
565: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
566: {
567: xmlSAXHandler *sax;
568:
569: sax = (xmlSAXHandler *) malloc(sizeof(xmlSAXHandler));
570: if (sax == NULL) {
571: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
572: }
573:
574: /* Allocate the Input stack */
575: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
576: ctxt->inputNr = 0;
577: ctxt->inputMax = 5;
578: ctxt->input = NULL;
579: ctxt->version = NULL;
580: ctxt->encoding = NULL;
581: ctxt->standalone = -1;
1.98 daniel 582: ctxt->hasExternalSubset = 0;
583: ctxt->hasPErefs = 0;
1.97 daniel 584: ctxt->html = 0;
1.98 daniel 585: ctxt->external = 0;
1.97 daniel 586: ctxt->instate = XML_PARSER_PROLOG;
587: ctxt->token = 0;
1.106 daniel 588: ctxt->directory = NULL;
1.97 daniel 589:
590: /* Allocate the Node stack */
591: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
592: ctxt->nodeNr = 0;
593: ctxt->nodeMax = 10;
594: ctxt->node = NULL;
595:
596: if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
597: else {
598: ctxt->sax = sax;
599: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
600: }
601: ctxt->userData = ctxt;
602: ctxt->myDoc = NULL;
603: ctxt->wellFormed = 1;
1.99 daniel 604: ctxt->valid = 1;
1.100 daniel 605: ctxt->validate = xmlDoValidityCheckingDefaultValue;
606: ctxt->vctxt.userData = ctxt;
607: ctxt->vctxt.error = xmlParserValidityError;
608: ctxt->vctxt.warning = xmlParserValidityWarning;
1.97 daniel 609: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
610: ctxt->record_info = 0;
611: xmlInitNodeInfoSeq(&ctxt->node_seq);
612: }
613:
614: /**
615: * xmlFreeParserCtxt:
616: * @ctxt: an XML parser context
617: *
618: * Free all the memory used by a parser context. However the parsed
619: * document in ctxt->myDoc is not freed.
620: */
621:
622: void
623: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
624: {
625: xmlParserInputPtr input;
626:
627: if (ctxt == NULL) return;
628:
629: while ((input = inputPop(ctxt)) != NULL) {
630: xmlFreeInputStream(input);
631: }
632:
633: if (ctxt->nodeTab != NULL) free(ctxt->nodeTab);
634: if (ctxt->inputTab != NULL) free(ctxt->inputTab);
635: if (ctxt->version != NULL) free((char *) ctxt->version);
636: if (ctxt->encoding != NULL) free((char *) ctxt->encoding);
637: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
638: free(ctxt->sax);
1.106 daniel 639: if (ctxt->directory != NULL) free((char *) ctxt->directory);
1.97 daniel 640: free(ctxt);
641: }
642:
643: /**
644: * xmlNewParserCtxt:
645: *
646: * Allocate and initialize a new parser context.
647: *
648: * Returns the xmlParserCtxtPtr or NULL
649: */
650:
651: xmlParserCtxtPtr
652: xmlNewParserCtxt()
653: {
654: xmlParserCtxtPtr ctxt;
655:
656: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
657: if (ctxt == NULL) {
658: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
659: perror("malloc");
660: return(NULL);
661: }
662: xmlInitParserCtxt(ctxt);
663: return(ctxt);
664: }
665:
666: /**
667: * xmlClearParserCtxt:
668: * @ctxt: an XML parser context
669: *
670: * Clear (release owned resources) and reinitialize a parser context
671: */
672:
673: void
674: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
675: {
676: xmlClearNodeInfoSeq(&ctxt->node_seq);
677: xmlInitParserCtxt(ctxt);
678: }
679:
680: /************************************************************************
681: * *
1.77 daniel 682: * Commodity functions to handle entities *
683: * *
684: ************************************************************************/
685:
1.97 daniel 686: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
687: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
688:
689: /**
690: * xmlParseCharRef:
691: * @ctxt: an XML parser context
692: *
693: * parse Reference declarations
694: *
695: * [66] CharRef ::= '&#' [0-9]+ ';' |
696: * '&#x' [0-9a-fA-F]+ ';'
697: *
1.98 daniel 698: * [ WFC: Legal Character ]
699: * Characters referred to using character references must match the
700: * production for Char.
701: *
1.97 daniel 702: * Returns the value parsed (as an int)
1.77 daniel 703: */
1.97 daniel 704: int
705: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
706: int val = 0;
707:
1.111 ! daniel 708: if (ctxt->token != 0) {
! 709: val = ctxt->token;
! 710: ctxt->token = 0;
! 711: return(val);
! 712: }
1.97 daniel 713: if ((CUR == '&') && (NXT(1) == '#') &&
714: (NXT(2) == 'x')) {
715: SKIP(3);
716: while (CUR != ';') {
717: if ((CUR >= '0') && (CUR <= '9'))
718: val = val * 16 + (CUR - '0');
719: else if ((CUR >= 'a') && (CUR <= 'f'))
720: val = val * 16 + (CUR - 'a') + 10;
721: else if ((CUR >= 'A') && (CUR <= 'F'))
722: val = val * 16 + (CUR - 'A') + 10;
723: else {
724: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
725: ctxt->sax->error(ctxt->userData,
726: "xmlParseCharRef: invalid hexadecimal value\n");
727: ctxt->wellFormed = 0;
728: val = 0;
729: break;
730: }
731: NEXT;
732: }
733: if (CUR == ';')
734: NEXT;
735: } else if ((CUR == '&') && (NXT(1) == '#')) {
736: SKIP(2);
737: while (CUR != ';') {
738: if ((CUR >= '0') && (CUR <= '9'))
739: val = val * 10 + (CUR - '0');
740: else {
741: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
742: ctxt->sax->error(ctxt->userData,
743: "xmlParseCharRef: invalid decimal value\n");
744: ctxt->wellFormed = 0;
745: val = 0;
746: break;
747: }
748: NEXT;
749: }
750: if (CUR == ';')
751: NEXT;
752: } else {
753: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 754: ctxt->sax->error(ctxt->userData,
755: "xmlParseCharRef: invalid value\n");
1.97 daniel 756: ctxt->wellFormed = 0;
757: }
1.98 daniel 758:
1.97 daniel 759: /*
1.98 daniel 760: * [ WFC: Legal Character ]
761: * Characters referred to using character references must match the
762: * production for Char.
1.97 daniel 763: */
764: if (IS_CHAR(val)) {
765: return(val);
766: } else {
767: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 768: ctxt->sax->error(ctxt->userData, "CharRef: invalid CHAR value %d\n",
1.97 daniel 769: val);
770: ctxt->wellFormed = 0;
771: }
772: return(0);
1.77 daniel 773: }
774:
1.96 daniel 775: /**
776: * xmlParserHandleReference:
777: * @ctxt: the parser context
778: *
1.97 daniel 779: * [67] Reference ::= EntityRef | CharRef
780: *
1.96 daniel 781: * [68] EntityRef ::= '&' Name ';'
782: *
1.98 daniel 783: * [ WFC: Entity Declared ]
784: * the Name given in the entity reference must match that in an entity
785: * declaration, except that well-formed documents need not declare any
786: * of the following entities: amp, lt, gt, apos, quot.
787: *
788: * [ WFC: Parsed Entity ]
789: * An entity reference must not contain the name of an unparsed entity
790: *
1.97 daniel 791: * [66] CharRef ::= '&#' [0-9]+ ';' |
792: * '&#x' [0-9a-fA-F]+ ';'
793: *
1.96 daniel 794: * A PEReference may have been detectect in the current input stream
795: * the handling is done accordingly to
796: * http://www.w3.org/TR/REC-xml#entproc
797: */
798: void
799: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 800: xmlParserInputPtr input;
801: CHAR *name;
802: xmlEntityPtr ent = NULL;
803:
1.111 ! daniel 804: if (ctxt->token != 0) return;
1.97 daniel 805: if (CUR != '&') return;
806: GROW;
807: if ((CUR == '&') && (NXT(1) == '#')) {
808: switch(ctxt->instate) {
1.109 daniel 809: case XML_PARSER_CDATA_SECTION:
810: return;
1.97 daniel 811: case XML_PARSER_COMMENT:
812: return;
813: case XML_PARSER_EOF:
814: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
815: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
816: ctxt->wellFormed = 0;
817: return;
818: case XML_PARSER_PROLOG:
819: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
820: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
821: ctxt->wellFormed = 0;
822: return;
823: case XML_PARSER_EPILOG:
824: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
825: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
826: ctxt->wellFormed = 0;
827: return;
828: case XML_PARSER_DTD:
829: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
830: ctxt->sax->error(ctxt->userData,
831: "CharRef are forbiden in DTDs!\n");
832: ctxt->wellFormed = 0;
833: return;
834: case XML_PARSER_ENTITY_DECL:
835: /* we just ignore it there */
836: return;
837: case XML_PARSER_ENTITY_VALUE:
838: /*
839: * NOTE: in the case of entity values, we don't do the
840: * substitution here since we need the litteral
841: * entity value to be able to save the internal
842: * subset of the document.
843: * This will be handled by xmlDecodeEntities
844: */
845: return;
846: case XML_PARSER_CONTENT:
847: case XML_PARSER_ATTRIBUTE_VALUE:
848: /* TODO this may not be Ok for UTF-8, multibyte sequence */
849: ctxt->token = xmlParseCharRef(ctxt);
850: return;
851: }
852: return;
853: }
854:
855: switch(ctxt->instate) {
1.109 daniel 856: case XML_PARSER_CDATA_SECTION:
857: return;
1.97 daniel 858: case XML_PARSER_COMMENT:
859: return;
860: case XML_PARSER_EOF:
861: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
862: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
863: ctxt->wellFormed = 0;
864: return;
865: case XML_PARSER_PROLOG:
866: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
867: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
868: ctxt->wellFormed = 0;
869: return;
870: case XML_PARSER_EPILOG:
871: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
872: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
873: ctxt->wellFormed = 0;
874: return;
875: case XML_PARSER_ENTITY_VALUE:
876: /*
877: * NOTE: in the case of entity values, we don't do the
878: * substitution here since we need the litteral
879: * entity value to be able to save the internal
880: * subset of the document.
881: * This will be handled by xmlDecodeEntities
882: */
883: return;
884: case XML_PARSER_ATTRIBUTE_VALUE:
885: /*
886: * NOTE: in the case of attributes values, we don't do the
887: * substitution here unless we are in a mode where
888: * the parser is explicitely asked to substitute
889: * entities. The SAX callback is called with values
890: * without entity substitution.
891: * This will then be handled by xmlDecodeEntities
892: */
893: if (ctxt->replaceEntities == 0) return;
894: break;
895: case XML_PARSER_ENTITY_DECL:
896: /*
897: * we just ignore it there
898: * the substitution will be done once the entity is referenced
899: */
900: return;
901: case XML_PARSER_DTD:
902: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
903: ctxt->sax->error(ctxt->userData,
904: "Entity references are forbiden in DTDs!\n");
905: ctxt->wellFormed = 0;
906: return;
907: case XML_PARSER_CONTENT:
908: if (ctxt->replaceEntities == 0) return;
909: break;
910: }
911:
912: NEXT;
913: name = xmlScanName(ctxt);
914: if (name == NULL) {
915: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
916: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
917: ctxt->wellFormed = 0;
918: ctxt->token = '&';
919: return;
920: }
921: if (NXT(xmlStrlen(name)) != ';') {
922: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
923: ctxt->sax->error(ctxt->userData,
924: "Entity reference: ';' expected\n");
925: ctxt->wellFormed = 0;
926: ctxt->token = '&';
1.111 ! daniel 927: free(name);
1.97 daniel 928: return;
929: }
930: SKIP(xmlStrlen(name) + 1);
931: if (ctxt->sax != NULL) {
932: if (ctxt->sax->getEntity != NULL)
933: ent = ctxt->sax->getEntity(ctxt->userData, name);
934: }
1.98 daniel 935:
936: /*
937: * [ WFC: Entity Declared ]
938: * the Name given in the entity reference must match that in an entity
939: * declaration, except that well-formed documents need not declare any
940: * of the following entities: amp, lt, gt, apos, quot.
941: */
1.97 daniel 942: if (ent == NULL)
943: ent = xmlGetPredefinedEntity(name);
944: if (ent == NULL) {
945: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
946: ctxt->sax->error(ctxt->userData,
1.98 daniel 947: "Entity reference: entity %s not declared\n",
948: name);
1.97 daniel 949: ctxt->wellFormed = 0;
1.111 ! daniel 950: free(name);
1.97 daniel 951: return;
952: }
1.98 daniel 953:
954: /*
955: * [ WFC: Parsed Entity ]
956: * An entity reference must not contain the name of an unparsed entity
957: */
958: if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
959: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
960: ctxt->sax->error(ctxt->userData,
961: "Entity reference to unparsed entity %s\n", name);
962: ctxt->wellFormed = 0;
963: }
964:
1.97 daniel 965: if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
966: ctxt->token = ent->content[0];
1.111 ! daniel 967: free(name);
1.97 daniel 968: return;
969: }
970: input = xmlNewEntityInputStream(ctxt, ent);
971: xmlPushInput(ctxt, input);
1.111 ! daniel 972: free(name);
1.96 daniel 973: return;
974: }
975:
976: /**
977: * xmlParserHandlePEReference:
978: * @ctxt: the parser context
979: *
980: * [69] PEReference ::= '%' Name ';'
981: *
1.98 daniel 982: * [ WFC: No Recursion ]
983: * TODO A parsed entity must not contain a recursive
984: * reference to itself, either directly or indirectly.
985: *
986: * [ WFC: Entity Declared ]
987: * In a document without any DTD, a document with only an internal DTD
988: * subset which contains no parameter entity references, or a document
989: * with "standalone='yes'", ... ... The declaration of a parameter
990: * entity must precede any reference to it...
991: *
992: * [ VC: Entity Declared ]
993: * In a document with an external subset or external parameter entities
994: * with "standalone='no'", ... ... The declaration of a parameter entity
995: * must precede any reference to it...
996: *
997: * [ WFC: In DTD ]
998: * Parameter-entity references may only appear in the DTD.
999: * NOTE: misleading but this is handled.
1000: *
1001: * A PEReference may have been detected in the current input stream
1.96 daniel 1002: * the handling is done accordingly to
1003: * http://www.w3.org/TR/REC-xml#entproc
1004: * i.e.
1005: * - Included in literal in entity values
1006: * - Included as Paraemeter Entity reference within DTDs
1007: */
1008: void
1009: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1010: CHAR *name;
1011: xmlEntityPtr entity = NULL;
1012: xmlParserInputPtr input;
1013:
1.111 ! daniel 1014: if (ctxt->token != 0) return;
! 1015: if (CUR != '%') return;
1.96 daniel 1016: switch(ctxt->instate) {
1.109 daniel 1017: case XML_PARSER_CDATA_SECTION:
1018: return;
1.97 daniel 1019: case XML_PARSER_COMMENT:
1020: return;
1.96 daniel 1021: case XML_PARSER_EOF:
1022: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1023: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1024: ctxt->wellFormed = 0;
1025: return;
1026: case XML_PARSER_PROLOG:
1027: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1028: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1029: ctxt->wellFormed = 0;
1030: return;
1.97 daniel 1031: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1032: case XML_PARSER_CONTENT:
1033: case XML_PARSER_ATTRIBUTE_VALUE:
1034: /* we just ignore it there */
1035: return;
1036: case XML_PARSER_EPILOG:
1037: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1038: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1039: ctxt->wellFormed = 0;
1040: return;
1.97 daniel 1041: case XML_PARSER_ENTITY_VALUE:
1042: /*
1043: * NOTE: in the case of entity values, we don't do the
1044: * substitution here since we need the litteral
1045: * entity value to be able to save the internal
1046: * subset of the document.
1047: * This will be handled by xmlDecodeEntities
1048: */
1049: return;
1.96 daniel 1050: case XML_PARSER_DTD:
1.98 daniel 1051: /*
1052: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1053: * In the internal DTD subset, parameter-entity references
1054: * can occur only where markup declarations can occur, not
1055: * within markup declarations.
1056: * In that case this is handled in xmlParseMarkupDecl
1057: */
1058: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1059: return;
1.96 daniel 1060: }
1061:
1062: NEXT;
1063: name = xmlParseName(ctxt);
1064: if (name == NULL) {
1065: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1066: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1067: ctxt->wellFormed = 0;
1068: } else {
1069: if (CUR == ';') {
1070: NEXT;
1.98 daniel 1071: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1072: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1073: if (entity == NULL) {
1.98 daniel 1074:
1075: /*
1076: * [ WFC: Entity Declared ]
1077: * In a document without any DTD, a document with only an
1078: * internal DTD subset which contains no parameter entity
1079: * references, or a document with "standalone='yes'", ...
1080: * ... The declaration of a parameter entity must precede
1081: * any reference to it...
1082: */
1083: if ((ctxt->standalone == 1) ||
1084: ((ctxt->hasExternalSubset == 0) &&
1085: (ctxt->hasPErefs == 0))) {
1086: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1087: ctxt->sax->error(ctxt->userData,
1088: "PEReference: %%%s; not found\n", name);
1089: ctxt->wellFormed = 0;
1090: } else {
1091: /*
1092: * [ VC: Entity Declared ]
1093: * In a document with an external subset or external
1094: * parameter entities with "standalone='no'", ...
1095: * ... The declaration of a parameter entity must precede
1096: * any reference to it...
1097: */
1098: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1099: ctxt->sax->warning(ctxt->userData,
1100: "PEReference: %%%s; not found\n", name);
1101: ctxt->valid = 0;
1102: }
1.96 daniel 1103: } else {
1104: if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
1105: (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
1106: /*
1107: * TODO !!!! handle the extra spaces added before and after
1108: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1109: * TODO !!!! Avoid quote processing in parameters value
1110: * c.f. http://www.w3.org/TR/REC-xml#inliteral
1111: */
1112: input = xmlNewEntityInputStream(ctxt, entity);
1113: xmlPushInput(ctxt, input);
1114: } else {
1115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1116: ctxt->sax->error(ctxt->userData,
1117: "xmlHandlePEReference: %s is not a parameter entity\n",
1118: name);
1119: ctxt->wellFormed = 0;
1120: }
1121: }
1122: } else {
1123: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1124: ctxt->sax->error(ctxt->userData,
1125: "xmlHandlePEReference: expecting ';'\n");
1126: ctxt->wellFormed = 0;
1127: }
1.97 daniel 1128: free(name);
1129: }
1130: }
1131:
1132: /*
1133: * Macro used to grow the current buffer.
1134: */
1135: #define growBuffer(buffer) { \
1136: buffer##_size *= 2; \
1137: buffer = (CHAR *) realloc(buffer, buffer##_size * sizeof(CHAR)); \
1138: if (buffer == NULL) { \
1139: perror("realloc failed"); \
1140: exit(1); \
1141: } \
1.96 daniel 1142: }
1.77 daniel 1143:
1144: /**
1145: * xmlDecodeEntities:
1146: * @ctxt: the parser context
1147: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1148: * @len: the len to decode (in bytes !), -1 for no size limit
1149: * @end: an end marker CHAR, 0 if none
1150: * @end2: an end marker CHAR, 0 if none
1151: * @end3: an end marker CHAR, 0 if none
1152: *
1153: * [67] Reference ::= EntityRef | CharRef
1154: *
1155: * [69] PEReference ::= '%' Name ';'
1156: *
1157: * Returns A newly allocated string with the substitution done. The caller
1158: * must deallocate it !
1159: */
1160: CHAR *
1161: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1162: CHAR end, CHAR end2, CHAR end3) {
1163: CHAR *buffer = NULL;
1.78 daniel 1164: int buffer_size = 0;
1.77 daniel 1165: CHAR *out = NULL;
1.78 daniel 1166:
1.97 daniel 1167: CHAR *current = NULL;
1.77 daniel 1168: xmlEntityPtr ent;
1.91 daniel 1169: int nbchars = 0;
1.77 daniel 1170: unsigned int max = (unsigned int) len;
1.97 daniel 1171: CHAR cur;
1.77 daniel 1172:
1173: /*
1174: * allocate a translation buffer.
1175: */
1176: buffer_size = 1000;
1177: buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR));
1178: if (buffer == NULL) {
1179: perror("xmlDecodeEntities: malloc failed");
1180: return(NULL);
1181: }
1182: out = buffer;
1183:
1.78 daniel 1184: /*
1185: * Ok loop until we reach one of the ending char or a size limit.
1186: */
1.97 daniel 1187: cur = CUR;
1188: while ((nbchars < max) && (cur != end) &&
1189: (cur != end2) && (cur != end3)) {
1.77 daniel 1190:
1.98 daniel 1191: if (cur == 0) break;
1192: if ((cur == '&') && (NXT(1) == '#')) {
1193: int val = xmlParseCharRef(ctxt);
1194: *out++ = val;
1195: nbchars += 3;
1196: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1197: ent = xmlParseEntityRef(ctxt);
1198: if ((ent != NULL) &&
1199: (ctxt->replaceEntities != 0)) {
1200: current = ent->content;
1201: while (*current != 0) {
1202: *out++ = *current++;
1203: if (out - buffer > buffer_size - 100) {
1204: int index = out - buffer;
1205:
1206: growBuffer(buffer);
1207: out = &buffer[index];
1.77 daniel 1208: }
1209: }
1.98 daniel 1210: nbchars += 3 + xmlStrlen(ent->name);
1211: } else if (ent != NULL) {
1212: int i = xmlStrlen(ent->name);
1213: const CHAR *cur = ent->name;
1214:
1215: nbchars += i + 2;
1216: *out++ = '&';
1217: if (out - buffer > buffer_size - i - 100) {
1218: int index = out - buffer;
1219:
1220: growBuffer(buffer);
1221: out = &buffer[index];
1222: }
1223: for (;i > 0;i--)
1224: *out++ = *cur++;
1225: *out++ = ';';
1.77 daniel 1226: }
1.97 daniel 1227: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1228: /*
1.77 daniel 1229: * a PEReference induce to switch the entity flow,
1230: * we break here to flush the current set of chars
1231: * parsed if any. We will be called back later.
1.97 daniel 1232: */
1.91 daniel 1233: if (nbchars != 0) break;
1.77 daniel 1234:
1235: xmlParsePEReference(ctxt);
1.79 daniel 1236:
1.97 daniel 1237: /*
1.79 daniel 1238: * Pop-up of finished entities.
1.97 daniel 1239: */
1.79 daniel 1240: while ((CUR == 0) && (ctxt->inputNr > 1))
1241: xmlPopInput(ctxt);
1242:
1.98 daniel 1243: break;
1.77 daniel 1244: } else {
1245: /* TODO: invalid for UTF-8 , use COPY(out); */
1.97 daniel 1246: *out++ = cur;
1.91 daniel 1247: nbchars++;
1.86 daniel 1248: if (out - buffer > buffer_size - 100) {
1249: int index = out - buffer;
1250:
1251: growBuffer(buffer);
1252: out = &buffer[index];
1253: }
1.77 daniel 1254: NEXT;
1255: }
1.97 daniel 1256: cur = CUR;
1.77 daniel 1257: }
1258: *out++ = 0;
1259: return(buffer);
1260: }
1261:
1.1 veillard 1262:
1.28 daniel 1263: /************************************************************************
1264: * *
1.75 daniel 1265: * Commodity functions to handle encodings *
1266: * *
1267: ************************************************************************/
1268:
1269: /**
1270: * xmlSwitchEncoding:
1271: * @ctxt: the parser context
1272: * @len: the len of @cur
1273: *
1274: * change the input functions when discovering the character encoding
1275: * of a given entity.
1276: *
1277: */
1278: void
1279: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1280: {
1281: switch (enc) {
1282: case XML_CHAR_ENCODING_ERROR:
1283: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1284: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1285: ctxt->wellFormed = 0;
1286: break;
1287: case XML_CHAR_ENCODING_NONE:
1288: /* let's assume it's UTF-8 without the XML decl */
1289: return;
1290: case XML_CHAR_ENCODING_UTF8:
1291: /* default encoding, no conversion should be needed */
1292: return;
1293: case XML_CHAR_ENCODING_UTF16LE:
1294: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1295: ctxt->sax->error(ctxt->userData,
1296: "char encoding UTF16 little endian not supported\n");
1297: break;
1298: case XML_CHAR_ENCODING_UTF16BE:
1299: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1300: ctxt->sax->error(ctxt->userData,
1301: "char encoding UTF16 big endian not supported\n");
1302: break;
1303: case XML_CHAR_ENCODING_UCS4LE:
1304: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1305: ctxt->sax->error(ctxt->userData,
1306: "char encoding USC4 little endian not supported\n");
1307: break;
1308: case XML_CHAR_ENCODING_UCS4BE:
1309: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1310: ctxt->sax->error(ctxt->userData,
1311: "char encoding USC4 big endian not supported\n");
1312: break;
1313: case XML_CHAR_ENCODING_EBCDIC:
1314: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1315: ctxt->sax->error(ctxt->userData,
1316: "char encoding EBCDIC not supported\n");
1317: break;
1318: case XML_CHAR_ENCODING_UCS4_2143:
1319: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1320: ctxt->sax->error(ctxt->userData,
1321: "char encoding UCS4 2143 not supported\n");
1322: break;
1323: case XML_CHAR_ENCODING_UCS4_3412:
1324: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1325: ctxt->sax->error(ctxt->userData,
1326: "char encoding UCS4 3412 not supported\n");
1327: break;
1328: case XML_CHAR_ENCODING_UCS2:
1329: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1330: ctxt->sax->error(ctxt->userData,
1331: "char encoding UCS2 not supported\n");
1332: break;
1333: case XML_CHAR_ENCODING_8859_1:
1334: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1335: ctxt->sax->error(ctxt->userData,
1336: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
1337: break;
1338: case XML_CHAR_ENCODING_8859_2:
1339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1340: ctxt->sax->error(ctxt->userData,
1341: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
1342: break;
1343: case XML_CHAR_ENCODING_8859_3:
1344: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1345: ctxt->sax->error(ctxt->userData,
1346: "char encoding ISO_8859_3 not supported\n");
1347: break;
1348: case XML_CHAR_ENCODING_8859_4:
1349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1350: ctxt->sax->error(ctxt->userData,
1351: "char encoding ISO_8859_4 not supported\n");
1352: break;
1353: case XML_CHAR_ENCODING_8859_5:
1354: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1355: ctxt->sax->error(ctxt->userData,
1356: "char encoding ISO_8859_5 not supported\n");
1357: break;
1358: case XML_CHAR_ENCODING_8859_6:
1359: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1360: ctxt->sax->error(ctxt->userData,
1361: "char encoding ISO_8859_6 not supported\n");
1362: break;
1363: case XML_CHAR_ENCODING_8859_7:
1364: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1365: ctxt->sax->error(ctxt->userData,
1366: "char encoding ISO_8859_7 not supported\n");
1367: break;
1368: case XML_CHAR_ENCODING_8859_8:
1369: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1370: ctxt->sax->error(ctxt->userData,
1371: "char encoding ISO_8859_8 not supported\n");
1372: break;
1373: case XML_CHAR_ENCODING_8859_9:
1374: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1375: ctxt->sax->error(ctxt->userData,
1376: "char encoding ISO_8859_9 not supported\n");
1377: break;
1378: case XML_CHAR_ENCODING_2022_JP:
1379: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1380: ctxt->sax->error(ctxt->userData,
1381: "char encoding ISO-2022-JPnot supported\n");
1382: break;
1383: case XML_CHAR_ENCODING_SHIFT_JIS:
1384: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1385: ctxt->sax->error(ctxt->userData,
1386: "char encoding Shift_JISnot supported\n");
1387: break;
1388: case XML_CHAR_ENCODING_EUC_JP:
1389: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1390: ctxt->sax->error(ctxt->userData,
1391: "char encoding EUC-JPnot supported\n");
1392: break;
1393: }
1394: }
1395:
1396: /************************************************************************
1397: * *
1.28 daniel 1398: * Commodity functions to handle CHARs *
1399: * *
1400: ************************************************************************/
1401:
1.50 daniel 1402: /**
1403: * xmlStrndup:
1404: * @cur: the input CHAR *
1405: * @len: the len of @cur
1406: *
1407: * a strndup for array of CHAR's
1.68 daniel 1408: *
1409: * Returns a new CHAR * or NULL
1.1 veillard 1410: */
1.55 daniel 1411: CHAR *
1412: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 1413: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1414:
1415: if (ret == NULL) {
1.86 daniel 1416: fprintf(stderr, "malloc of %ld byte failed\n",
1417: (len + 1) * (long)sizeof(CHAR));
1.1 veillard 1418: return(NULL);
1419: }
1420: memcpy(ret, cur, len * sizeof(CHAR));
1421: ret[len] = 0;
1422: return(ret);
1423: }
1424:
1.50 daniel 1425: /**
1426: * xmlStrdup:
1427: * @cur: the input CHAR *
1428: *
1429: * a strdup for array of CHAR's
1.68 daniel 1430: *
1431: * Returns a new CHAR * or NULL
1.1 veillard 1432: */
1.55 daniel 1433: CHAR *
1434: xmlStrdup(const CHAR *cur) {
1.6 httpng 1435: const CHAR *p = cur;
1.1 veillard 1436:
1437: while (IS_CHAR(*p)) p++;
1438: return(xmlStrndup(cur, p - cur));
1439: }
1440:
1.50 daniel 1441: /**
1442: * xmlCharStrndup:
1443: * @cur: the input char *
1444: * @len: the len of @cur
1445: *
1446: * a strndup for char's to CHAR's
1.68 daniel 1447: *
1448: * Returns a new CHAR * or NULL
1.45 daniel 1449: */
1450:
1.55 daniel 1451: CHAR *
1452: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 1453: int i;
1454: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1455:
1456: if (ret == NULL) {
1.86 daniel 1457: fprintf(stderr, "malloc of %ld byte failed\n",
1458: (len + 1) * (long)sizeof(CHAR));
1.45 daniel 1459: return(NULL);
1460: }
1461: for (i = 0;i < len;i++)
1462: ret[i] = (CHAR) cur[i];
1463: ret[len] = 0;
1464: return(ret);
1465: }
1466:
1.50 daniel 1467: /**
1468: * xmlCharStrdup:
1469: * @cur: the input char *
1470: * @len: the len of @cur
1471: *
1472: * a strdup for char's to CHAR's
1.68 daniel 1473: *
1474: * Returns a new CHAR * or NULL
1.45 daniel 1475: */
1476:
1.55 daniel 1477: CHAR *
1478: xmlCharStrdup(const char *cur) {
1.45 daniel 1479: const char *p = cur;
1480:
1481: while (*p != '\0') p++;
1482: return(xmlCharStrndup(cur, p - cur));
1483: }
1484:
1.50 daniel 1485: /**
1486: * xmlStrcmp:
1487: * @str1: the first CHAR *
1488: * @str2: the second CHAR *
1489: *
1490: * a strcmp for CHAR's
1.68 daniel 1491: *
1492: * Returns the integer result of the comparison
1.14 veillard 1493: */
1494:
1.55 daniel 1495: int
1496: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 1497: register int tmp;
1498:
1499: do {
1500: tmp = *str1++ - *str2++;
1501: if (tmp != 0) return(tmp);
1502: } while ((*str1 != 0) && (*str2 != 0));
1503: return (*str1 - *str2);
1504: }
1505:
1.50 daniel 1506: /**
1507: * xmlStrncmp:
1508: * @str1: the first CHAR *
1509: * @str2: the second CHAR *
1510: * @len: the max comparison length
1511: *
1512: * a strncmp for CHAR's
1.68 daniel 1513: *
1514: * Returns the integer result of the comparison
1.14 veillard 1515: */
1516:
1.55 daniel 1517: int
1518: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 1519: register int tmp;
1520:
1521: if (len <= 0) return(0);
1522: do {
1523: tmp = *str1++ - *str2++;
1524: if (tmp != 0) return(tmp);
1525: len--;
1526: if (len <= 0) return(0);
1527: } while ((*str1 != 0) && (*str2 != 0));
1528: return (*str1 - *str2);
1529: }
1530:
1.50 daniel 1531: /**
1532: * xmlStrchr:
1533: * @str: the CHAR * array
1534: * @val: the CHAR to search
1535: *
1536: * a strchr for CHAR's
1.68 daniel 1537: *
1538: * Returns the CHAR * for the first occurence or NULL.
1.14 veillard 1539: */
1540:
1.89 daniel 1541: const CHAR *
1.55 daniel 1542: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 1543: while (*str != 0) {
1544: if (*str == val) return((CHAR *) str);
1545: str++;
1546: }
1547: return(NULL);
1.89 daniel 1548: }
1549:
1550: /**
1551: * xmlStrstr:
1552: * @str: the CHAR * array (haystack)
1553: * @val: the CHAR to search (needle)
1554: *
1555: * a strstr for CHAR's
1556: *
1557: * Returns the CHAR * for the first occurence or NULL.
1558: */
1559:
1560: const CHAR *
1561: xmlStrstr(const CHAR *str, CHAR *val) {
1562: int n;
1563:
1564: if (str == NULL) return(NULL);
1565: if (val == NULL) return(NULL);
1566: n = xmlStrlen(val);
1567:
1568: if (n == 0) return(str);
1569: while (*str != 0) {
1570: if (*str == *val) {
1571: if (!xmlStrncmp(str, val, n)) return((const CHAR *) str);
1572: }
1573: str++;
1574: }
1575: return(NULL);
1576: }
1577:
1578: /**
1579: * xmlStrsub:
1580: * @str: the CHAR * array (haystack)
1581: * @start: the index of the first char (zero based)
1582: * @len: the length of the substring
1583: *
1584: * Extract a substring of a given string
1585: *
1586: * Returns the CHAR * for the first occurence or NULL.
1587: */
1588:
1589: CHAR *
1590: xmlStrsub(const CHAR *str, int start, int len) {
1591: int i;
1592:
1593: if (str == NULL) return(NULL);
1594: if (start < 0) return(NULL);
1.90 daniel 1595: if (len < 0) return(NULL);
1.89 daniel 1596:
1597: for (i = 0;i < start;i++) {
1598: if (*str == 0) return(NULL);
1599: str++;
1600: }
1601: if (*str == 0) return(NULL);
1602: return(xmlStrndup(str, len));
1.14 veillard 1603: }
1.28 daniel 1604:
1.50 daniel 1605: /**
1606: * xmlStrlen:
1607: * @str: the CHAR * array
1608: *
1609: * lenght of a CHAR's string
1.68 daniel 1610: *
1611: * Returns the number of CHAR contained in the ARRAY.
1.45 daniel 1612: */
1613:
1.55 daniel 1614: int
1615: xmlStrlen(const CHAR *str) {
1.45 daniel 1616: int len = 0;
1617:
1618: if (str == NULL) return(0);
1619: while (*str != 0) {
1620: str++;
1621: len++;
1622: }
1623: return(len);
1624: }
1625:
1.50 daniel 1626: /**
1627: * xmlStrncat:
1.68 daniel 1628: * @cur: the original CHAR * array
1.50 daniel 1629: * @add: the CHAR * array added
1630: * @len: the length of @add
1631: *
1632: * a strncat for array of CHAR's
1.68 daniel 1633: *
1634: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1635: */
1636:
1.55 daniel 1637: CHAR *
1638: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 1639: int size;
1640: CHAR *ret;
1641:
1642: if ((add == NULL) || (len == 0))
1643: return(cur);
1644: if (cur == NULL)
1645: return(xmlStrndup(add, len));
1646:
1647: size = xmlStrlen(cur);
1648: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
1649: if (ret == NULL) {
1.86 daniel 1650: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1651: (size + len + 1) * (long)sizeof(CHAR));
1.45 daniel 1652: return(cur);
1653: }
1654: memcpy(&ret[size], add, len * sizeof(CHAR));
1655: ret[size + len] = 0;
1656: return(ret);
1657: }
1658:
1.50 daniel 1659: /**
1660: * xmlStrcat:
1.68 daniel 1661: * @cur: the original CHAR * array
1.50 daniel 1662: * @add: the CHAR * array added
1663: *
1664: * a strcat for array of CHAR's
1.68 daniel 1665: *
1666: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1667: */
1.55 daniel 1668: CHAR *
1669: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 1670: const CHAR *p = add;
1671:
1672: if (add == NULL) return(cur);
1673: if (cur == NULL)
1674: return(xmlStrdup(add));
1675:
1676: while (IS_CHAR(*p)) p++;
1677: return(xmlStrncat(cur, add, p - add));
1678: }
1679:
1680: /************************************************************************
1681: * *
1682: * Commodity functions, cleanup needed ? *
1683: * *
1684: ************************************************************************/
1685:
1.50 daniel 1686: /**
1687: * areBlanks:
1688: * @ctxt: an XML parser context
1689: * @str: a CHAR *
1690: * @len: the size of @str
1691: *
1.45 daniel 1692: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1693: *
1.99 daniel 1694: * TODO: Whether white space are significant has to be checked accordingly
1695: * to DTD informations if available
1.68 daniel 1696: *
1697: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1698: */
1699:
1700: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
1.104 daniel 1701: int i, ret;
1.45 daniel 1702: xmlNodePtr lastChild;
1703:
1704: for (i = 0;i < len;i++)
1705: if (!(IS_BLANK(str[i]))) return(0);
1706:
1707: if (CUR != '<') return(0);
1.72 daniel 1708: if (ctxt->node == NULL) return(0);
1.104 daniel 1709: if (ctxt->myDoc != NULL) {
1710: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1711: if (ret == 0) return(1);
1712: if (ret == 1) return(0);
1713: }
1714: /*
1715: * heuristic
1716: */
1.45 daniel 1717: lastChild = xmlGetLastChild(ctxt->node);
1718: if (lastChild == NULL) {
1719: if (ctxt->node->content != NULL) return(0);
1720: } else if (xmlNodeIsText(lastChild))
1721: return(0);
1.104 daniel 1722: else if ((ctxt->node->childs != NULL) &&
1723: (xmlNodeIsText(ctxt->node->childs)))
1724: return(0);
1.45 daniel 1725: return(1);
1726: }
1727:
1.50 daniel 1728: /**
1729: * xmlHandleEntity:
1730: * @ctxt: an XML parser context
1731: * @entity: an XML entity pointer.
1732: *
1733: * Default handling of defined entities, when should we define a new input
1.45 daniel 1734: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 1735: *
1736: * OBSOLETE: to be removed at some point.
1.45 daniel 1737: */
1738:
1.55 daniel 1739: void
1740: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1741: int len;
1.50 daniel 1742: xmlParserInputPtr input;
1.45 daniel 1743:
1744: if (entity->content == NULL) {
1.55 daniel 1745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1746: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 1747: entity->name);
1.59 daniel 1748: ctxt->wellFormed = 0;
1.45 daniel 1749: return;
1750: }
1751: len = xmlStrlen(entity->content);
1752: if (len <= 2) goto handle_as_char;
1753:
1754: /*
1755: * Redefine its content as an input stream.
1756: */
1.50 daniel 1757: input = xmlNewEntityInputStream(ctxt, entity);
1758: xmlPushInput(ctxt, input);
1.45 daniel 1759: return;
1760:
1761: handle_as_char:
1762: /*
1763: * Just handle the content as a set of chars.
1764: */
1.72 daniel 1765: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 1766: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 1767:
1768: }
1769:
1770: /*
1771: * Forward definition for recusive behaviour.
1772: */
1.77 daniel 1773: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1774: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1775:
1.28 daniel 1776: /************************************************************************
1777: * *
1778: * Extra stuff for namespace support *
1779: * Relates to http://www.w3.org/TR/WD-xml-names *
1780: * *
1781: ************************************************************************/
1782:
1.50 daniel 1783: /**
1784: * xmlNamespaceParseNCName:
1785: * @ctxt: an XML parser context
1786: *
1787: * parse an XML namespace name.
1.28 daniel 1788: *
1789: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1790: *
1791: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1792: * CombiningChar | Extender
1.68 daniel 1793: *
1794: * Returns the namespace name or NULL
1.28 daniel 1795: */
1796:
1.55 daniel 1797: CHAR *
1798: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.91 daniel 1799: CHAR buf[XML_MAX_NAMELEN];
1800: int len = 0;
1.28 daniel 1801:
1.40 daniel 1802: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1.28 daniel 1803:
1.40 daniel 1804: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1805: (CUR == '.') || (CUR == '-') ||
1806: (CUR == '_') ||
1807: (IS_COMBINING(CUR)) ||
1.91 daniel 1808: (IS_EXTENDER(CUR))) {
1809: buf[len++] = CUR;
1.40 daniel 1810: NEXT;
1.91 daniel 1811: if (len >= XML_MAX_NAMELEN) {
1812: fprintf(stderr,
1813: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1814: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1815: (CUR == '.') || (CUR == '-') ||
1816: (CUR == '_') ||
1817: (IS_COMBINING(CUR)) ||
1818: (IS_EXTENDER(CUR)))
1819: NEXT;
1820: break;
1821: }
1822: }
1823: return(xmlStrndup(buf, len));
1.28 daniel 1824: }
1825:
1.50 daniel 1826: /**
1827: * xmlNamespaceParseQName:
1828: * @ctxt: an XML parser context
1829: * @prefix: a CHAR **
1830: *
1831: * parse an XML qualified name
1.28 daniel 1832: *
1833: * [NS 5] QName ::= (Prefix ':')? LocalPart
1834: *
1835: * [NS 6] Prefix ::= NCName
1836: *
1837: * [NS 7] LocalPart ::= NCName
1.68 daniel 1838: *
1839: * Returns the function returns the local part, and prefix is updated
1.50 daniel 1840: * to get the Prefix if any.
1.28 daniel 1841: */
1842:
1.55 daniel 1843: CHAR *
1844: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1845: CHAR *ret = NULL;
1846:
1847: *prefix = NULL;
1848: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1849: if (CUR == ':') {
1.28 daniel 1850: *prefix = ret;
1.40 daniel 1851: NEXT;
1.28 daniel 1852: ret = xmlNamespaceParseNCName(ctxt);
1853: }
1854:
1855: return(ret);
1856: }
1857:
1.50 daniel 1858: /**
1.72 daniel 1859: * xmlSplitQName:
1860: * @name: an XML parser context
1861: * @prefix: a CHAR **
1862: *
1863: * parse an XML qualified name string
1864: *
1865: * [NS 5] QName ::= (Prefix ':')? LocalPart
1866: *
1867: * [NS 6] Prefix ::= NCName
1868: *
1869: * [NS 7] LocalPart ::= NCName
1870: *
1871: * Returns the function returns the local part, and prefix is updated
1872: * to get the Prefix if any.
1873: */
1874:
1875: CHAR *
1876: xmlSplitQName(const CHAR *name, CHAR **prefix) {
1877: CHAR *ret = NULL;
1878: const CHAR *q;
1879: const CHAR *cur = name;
1880:
1881: *prefix = NULL;
1882: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1883: q = cur++;
1884:
1885: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1886: (*cur == '.') || (*cur == '-') ||
1887: (*cur == '_') ||
1888: (IS_COMBINING(*cur)) ||
1889: (IS_EXTENDER(*cur)))
1890: cur++;
1891:
1892: ret = xmlStrndup(q, cur - q);
1893:
1894: if (*cur == ':') {
1895: cur++;
1896: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1897: *prefix = ret;
1898:
1899: q = cur++;
1900:
1901: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1902: (*cur == '.') || (*cur == '-') ||
1903: (*cur == '_') ||
1904: (IS_COMBINING(*cur)) ||
1905: (IS_EXTENDER(*cur)))
1906: cur++;
1907:
1908: ret = xmlStrndup(q, cur - q);
1909: }
1910:
1911: return(ret);
1912: }
1913: /**
1.50 daniel 1914: * xmlNamespaceParseNSDef:
1915: * @ctxt: an XML parser context
1916: *
1917: * parse a namespace prefix declaration
1.28 daniel 1918: *
1919: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1920: *
1921: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 1922: *
1923: * Returns the namespace name
1.28 daniel 1924: */
1925:
1.55 daniel 1926: CHAR *
1927: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1928: CHAR *name = NULL;
1929:
1.40 daniel 1930: if ((CUR == 'x') && (NXT(1) == 'm') &&
1931: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1932: (NXT(4) == 's')) {
1933: SKIP(5);
1934: if (CUR == ':') {
1935: NEXT;
1.28 daniel 1936: name = xmlNamespaceParseNCName(ctxt);
1937: }
1938: }
1.39 daniel 1939: return(name);
1.28 daniel 1940: }
1941:
1.50 daniel 1942: /**
1943: * xmlParseQuotedString:
1944: * @ctxt: an XML parser context
1945: *
1.45 daniel 1946: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 1947: * To be removed at next drop of binary compatibility
1.68 daniel 1948: *
1949: * Returns the string parser or NULL.
1.45 daniel 1950: */
1.55 daniel 1951: CHAR *
1952: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1953: CHAR *ret = NULL;
1954: const CHAR *q;
1955:
1956: if (CUR == '"') {
1957: NEXT;
1958: q = CUR_PTR;
1959: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1960: if (CUR != '"') {
1961: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1962: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1963: ctxt->wellFormed = 0;
1.55 daniel 1964: } else {
1.45 daniel 1965: ret = xmlStrndup(q, CUR_PTR - q);
1966: NEXT;
1967: }
1968: } else if (CUR == '\''){
1969: NEXT;
1970: q = CUR_PTR;
1971: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1972: if (CUR != '\'') {
1973: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1974: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1975: ctxt->wellFormed = 0;
1.55 daniel 1976: } else {
1.45 daniel 1977: ret = xmlStrndup(q, CUR_PTR - q);
1978: NEXT;
1979: }
1980: }
1981: return(ret);
1982: }
1983:
1.50 daniel 1984: /**
1985: * xmlParseNamespace:
1986: * @ctxt: an XML parser context
1987: *
1.45 daniel 1988: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1989: *
1990: * This is what the older xml-name Working Draft specified, a bunch of
1991: * other stuff may still rely on it, so support is still here as
1992: * if ot was declared on the root of the Tree:-(
1.110 daniel 1993: *
1994: * To be removed at next drop of binary compatibility
1.45 daniel 1995: */
1996:
1.55 daniel 1997: void
1998: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 1999: CHAR *href = NULL;
2000: CHAR *prefix = NULL;
2001: int garbage = 0;
2002:
2003: /*
2004: * We just skipped "namespace" or "xml:namespace"
2005: */
2006: SKIP_BLANKS;
2007:
2008: while (IS_CHAR(CUR) && (CUR != '>')) {
2009: /*
2010: * We can have "ns" or "prefix" attributes
2011: * Old encoding as 'href' or 'AS' attributes is still supported
2012: */
2013: if ((CUR == 'n') && (NXT(1) == 's')) {
2014: garbage = 0;
2015: SKIP(2);
2016: SKIP_BLANKS;
2017:
2018: if (CUR != '=') continue;
2019: NEXT;
2020: SKIP_BLANKS;
2021:
2022: href = xmlParseQuotedString(ctxt);
2023: SKIP_BLANKS;
2024: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
2025: (NXT(2) == 'e') && (NXT(3) == 'f')) {
2026: garbage = 0;
2027: SKIP(4);
2028: SKIP_BLANKS;
2029:
2030: if (CUR != '=') continue;
2031: NEXT;
2032: SKIP_BLANKS;
2033:
2034: href = xmlParseQuotedString(ctxt);
2035: SKIP_BLANKS;
2036: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
2037: (NXT(2) == 'e') && (NXT(3) == 'f') &&
2038: (NXT(4) == 'i') && (NXT(5) == 'x')) {
2039: garbage = 0;
2040: SKIP(6);
2041: SKIP_BLANKS;
2042:
2043: if (CUR != '=') continue;
2044: NEXT;
2045: SKIP_BLANKS;
2046:
2047: prefix = xmlParseQuotedString(ctxt);
2048: SKIP_BLANKS;
2049: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
2050: garbage = 0;
2051: SKIP(2);
2052: SKIP_BLANKS;
2053:
2054: if (CUR != '=') continue;
2055: NEXT;
2056: SKIP_BLANKS;
2057:
2058: prefix = xmlParseQuotedString(ctxt);
2059: SKIP_BLANKS;
2060: } else if ((CUR == '?') && (NXT(1) == '>')) {
2061: garbage = 0;
1.91 daniel 2062: NEXT;
1.45 daniel 2063: } else {
2064: /*
2065: * Found garbage when parsing the namespace
2066: */
2067: if (!garbage)
1.55 daniel 2068: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2069: ctxt->sax->error(ctxt->userData, "xmlParseNamespace found garbage\n");
1.59 daniel 2070: ctxt->wellFormed = 0;
1.45 daniel 2071: NEXT;
2072: }
2073: }
2074:
2075: MOVETO_ENDTAG(CUR_PTR);
2076: NEXT;
2077:
2078: /*
2079: * Register the DTD.
1.72 daniel 2080: if (href != NULL)
2081: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 2082: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 2083: */
2084:
2085: if (prefix != NULL) free(prefix);
2086: if (href != NULL) free(href);
2087: }
2088:
1.28 daniel 2089: /************************************************************************
2090: * *
2091: * The parser itself *
2092: * Relates to http://www.w3.org/TR/REC-xml *
2093: * *
2094: ************************************************************************/
1.14 veillard 2095:
1.50 daniel 2096: /**
1.97 daniel 2097: * xmlScanName:
2098: * @ctxt: an XML parser context
2099: *
2100: * Trickery: parse an XML name but without consuming the input flow
2101: * Needed for rollback cases.
2102: *
2103: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2104: * CombiningChar | Extender
2105: *
2106: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2107: *
2108: * [6] Names ::= Name (S Name)*
2109: *
2110: * Returns the Name parsed or NULL
2111: */
2112:
2113: CHAR *
2114: xmlScanName(xmlParserCtxtPtr ctxt) {
2115: CHAR buf[XML_MAX_NAMELEN];
2116: int len = 0;
2117:
2118: GROW;
2119: if (!IS_LETTER(CUR) && (CUR != '_') &&
2120: (CUR != ':')) {
2121: return(NULL);
2122: }
2123:
2124: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2125: (NXT(len) == '.') || (NXT(len) == '-') ||
2126: (NXT(len) == '_') || (NXT(len) == ':') ||
2127: (IS_COMBINING(NXT(len))) ||
2128: (IS_EXTENDER(NXT(len)))) {
2129: buf[len] = NXT(len);
2130: len++;
2131: if (len >= XML_MAX_NAMELEN) {
2132: fprintf(stderr,
2133: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
2134: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2135: (NXT(len) == '.') || (NXT(len) == '-') ||
2136: (NXT(len) == '_') || (NXT(len) == ':') ||
2137: (IS_COMBINING(NXT(len))) ||
2138: (IS_EXTENDER(NXT(len))))
2139: len++;
2140: break;
2141: }
2142: }
2143: return(xmlStrndup(buf, len));
2144: }
2145:
2146: /**
1.50 daniel 2147: * xmlParseName:
2148: * @ctxt: an XML parser context
2149: *
2150: * parse an XML name.
1.22 daniel 2151: *
2152: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2153: * CombiningChar | Extender
2154: *
2155: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2156: *
2157: * [6] Names ::= Name (S Name)*
1.68 daniel 2158: *
2159: * Returns the Name parsed or NULL
1.1 veillard 2160: */
2161:
1.55 daniel 2162: CHAR *
2163: xmlParseName(xmlParserCtxtPtr ctxt) {
1.91 daniel 2164: CHAR buf[XML_MAX_NAMELEN];
2165: int len = 0;
1.97 daniel 2166: CHAR cur;
1.1 veillard 2167:
1.91 daniel 2168: GROW;
1.97 daniel 2169: cur = CUR;
2170: if (!IS_LETTER(cur) && (cur != '_') &&
2171: (cur != ':')) {
1.91 daniel 2172: return(NULL);
2173: }
1.40 daniel 2174:
1.97 daniel 2175: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2176: (cur == '.') || (cur == '-') ||
2177: (cur == '_') || (cur == ':') ||
2178: (IS_COMBINING(cur)) ||
2179: (IS_EXTENDER(cur))) {
2180: buf[len++] = cur;
1.40 daniel 2181: NEXT;
1.97 daniel 2182: cur = CUR;
1.91 daniel 2183: if (len >= XML_MAX_NAMELEN) {
2184: fprintf(stderr,
2185: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.97 daniel 2186: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2187: (cur == '.') || (cur == '-') ||
2188: (cur == '_') || (cur == ':') ||
2189: (IS_COMBINING(cur)) ||
2190: (IS_EXTENDER(cur))) {
2191: NEXT;
2192: cur = CUR;
2193: }
1.91 daniel 2194: break;
2195: }
2196: }
2197: return(xmlStrndup(buf, len));
1.22 daniel 2198: }
2199:
1.50 daniel 2200: /**
2201: * xmlParseNmtoken:
2202: * @ctxt: an XML parser context
2203: *
2204: * parse an XML Nmtoken.
1.22 daniel 2205: *
2206: * [7] Nmtoken ::= (NameChar)+
2207: *
2208: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 2209: *
2210: * Returns the Nmtoken parsed or NULL
1.22 daniel 2211: */
2212:
1.55 daniel 2213: CHAR *
2214: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.91 daniel 2215: CHAR buf[XML_MAX_NAMELEN];
2216: int len = 0;
1.22 daniel 2217:
1.91 daniel 2218: GROW;
1.40 daniel 2219: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2220: (CUR == '.') || (CUR == '-') ||
2221: (CUR == '_') || (CUR == ':') ||
2222: (IS_COMBINING(CUR)) ||
1.91 daniel 2223: (IS_EXTENDER(CUR))) {
2224: buf[len++] = CUR;
1.40 daniel 2225: NEXT;
1.91 daniel 2226: if (len >= XML_MAX_NAMELEN) {
2227: fprintf(stderr,
2228: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
2229: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2230: (CUR == '.') || (CUR == '-') ||
2231: (CUR == '_') || (CUR == ':') ||
2232: (IS_COMBINING(CUR)) ||
2233: (IS_EXTENDER(CUR)))
2234: NEXT;
2235: break;
2236: }
2237: }
2238: return(xmlStrndup(buf, len));
1.1 veillard 2239: }
2240:
1.50 daniel 2241: /**
2242: * xmlParseEntityValue:
2243: * @ctxt: an XML parser context
1.78 daniel 2244: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 2245: *
2246: * parse a value for ENTITY decl.
1.24 daniel 2247: *
2248: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2249: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 2250: *
1.78 daniel 2251: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 2252: */
2253:
1.55 daniel 2254: CHAR *
1.78 daniel 2255: xmlParseEntityValue(xmlParserCtxtPtr ctxt, CHAR **orig) {
1.77 daniel 2256: CHAR *ret = NULL;
1.78 daniel 2257: const CHAR *org = NULL;
1.79 daniel 2258: const CHAR *tst = NULL;
2259: const CHAR *temp = NULL;
1.98 daniel 2260: xmlParserInputPtr input;
1.24 daniel 2261:
1.91 daniel 2262: SHRINK;
1.40 daniel 2263: if (CUR == '"') {
1.96 daniel 2264: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2265: input = ctxt->input;
1.40 daniel 2266: NEXT;
1.78 daniel 2267: org = CUR_PTR;
1.98 daniel 2268: /*
2269: * NOTE: 4.4.5 Included in Literal
2270: * When a parameter entity reference appears in a literal entity
2271: * value, ... a single or double quote character in the replacement
2272: * text is always treated as a normal data character and will not
2273: * terminate the literal.
2274: * In practice it means we stop the loop only when back at parsing
2275: * the initial entity and the quote is found
2276: */
2277: while ((CUR != '"') || (ctxt->input != input)) {
1.79 daniel 2278: tst = CUR_PTR;
1.98 daniel 2279: /*
2280: * NOTE: 4.4.7 Bypassed
2281: * When a general entity reference appears in the EntityValue in
2282: * an entity declaration, it is bypassed and left as is.
2283: * so XML_SUBSTITUTE_REF is not set.
2284: */
2285: if (ctxt->input != input)
2286: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2287: 0, 0, 0);
2288: else
2289: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2290: '"', 0, 0);
1.94 daniel 2291:
2292: /*
2293: * Pop-up of finished entities.
2294: */
2295: while ((CUR == 0) && (ctxt->inputNr > 1))
2296: xmlPopInput(ctxt);
2297:
2298: if ((temp == NULL) && (tst == CUR_PTR)) {
2299: ret = xmlStrndup("", 0);
2300: break;
2301: }
2302: if ((temp[0] == 0) && (tst == CUR_PTR)) {
2303: free((char *)temp);
2304: ret = xmlStrndup("", 0);
2305: break;
2306: }
1.79 daniel 2307: ret = xmlStrcat(ret, temp);
1.80 daniel 2308: if (temp != NULL) free((char *)temp);
1.94 daniel 2309: GROW;
1.79 daniel 2310: }
1.77 daniel 2311: if (CUR != '"') {
1.55 daniel 2312: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.79 daniel 2313: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 2314: ctxt->wellFormed = 0;
1.78 daniel 2315: } else {
1.99 daniel 2316: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2317: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2318: if (ret == NULL)
2319: ret = xmlStrndup("", 0);
1.40 daniel 2320: NEXT;
1.78 daniel 2321: }
1.40 daniel 2322: } else if (CUR == '\'') {
1.96 daniel 2323: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2324: input = ctxt->input;
1.40 daniel 2325: NEXT;
1.78 daniel 2326: org = CUR_PTR;
1.98 daniel 2327: /*
2328: * NOTE: 4.4.5 Included in Literal
2329: * When a parameter entity reference appears in a literal entity
2330: * value, ... a single or double quote character in the replacement
2331: * text is always treated as a normal data character and will not
2332: * terminate the literal.
2333: * In practice it means we stop the loop only when back at parsing
2334: * the initial entity and the quote is found
2335: */
2336: while ((CUR != '\'') || (ctxt->input != input)) {
1.79 daniel 2337: tst = CUR_PTR;
1.98 daniel 2338: /*
2339: * NOTE: 4.4.7 Bypassed
2340: * When a general entity reference appears in the EntityValue in
2341: * an entity declaration, it is bypassed and left as is.
2342: * so XML_SUBSTITUTE_REF is not set.
2343: */
2344: if (ctxt->input != input)
2345: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2346: 0, 0, 0);
2347: else
2348: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2349: '\'', 0, 0);
1.94 daniel 2350:
2351: /*
2352: * Pop-up of finished entities.
2353: */
2354: while ((CUR == 0) && (ctxt->inputNr > 1))
2355: xmlPopInput(ctxt);
2356:
2357: if ((temp == NULL) && (tst == CUR_PTR)) {
2358: ret = xmlStrndup("", 0);
2359: break;
2360: }
2361: if ((temp[0] == 0) && (tst == CUR_PTR)) {
2362: free((char *)temp);
2363: ret = xmlStrndup("", 0);
2364: break;
2365: }
1.79 daniel 2366: ret = xmlStrcat(ret, temp);
1.80 daniel 2367: if (temp != NULL) free((char *)temp);
1.94 daniel 2368: GROW;
1.79 daniel 2369: }
1.77 daniel 2370: if (CUR != '\'') {
1.55 daniel 2371: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2372: ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
1.59 daniel 2373: ctxt->wellFormed = 0;
1.78 daniel 2374: } else {
1.99 daniel 2375: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2376: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2377: if (ret == NULL)
2378: ret = xmlStrndup("", 0);
1.40 daniel 2379: NEXT;
1.78 daniel 2380: }
1.24 daniel 2381: } else {
1.55 daniel 2382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2383: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1.59 daniel 2384: ctxt->wellFormed = 0;
1.24 daniel 2385: }
2386:
2387: return(ret);
2388: }
2389:
1.50 daniel 2390: /**
2391: * xmlParseAttValue:
2392: * @ctxt: an XML parser context
2393: *
2394: * parse a value for an attribute
1.78 daniel 2395: * Note: the parser won't do substitution of entities here, this
1.79 daniel 2396: * will be handled later in xmlStringGetNodeList, unless it was
2397: * asked for ctxt->replaceEntities != 0
1.29 daniel 2398: *
2399: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2400: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 2401: *
2402: * Returns the AttValue parsed or NULL.
1.29 daniel 2403: */
2404:
1.55 daniel 2405: CHAR *
2406: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.77 daniel 2407: CHAR *ret = NULL;
1.29 daniel 2408:
1.91 daniel 2409: SHRINK;
1.40 daniel 2410: if (CUR == '"') {
1.96 daniel 2411: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2412: NEXT;
1.98 daniel 2413: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
1.77 daniel 2414: if (CUR == '<') {
2415: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2416: ctxt->sax->error(ctxt->userData,
2417: "Unescaped '<' not allowed in attributes values\n");
2418: ctxt->wellFormed = 0;
1.29 daniel 2419: }
1.77 daniel 2420: if (CUR != '"') {
1.55 daniel 2421: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2422: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2423: ctxt->wellFormed = 0;
1.77 daniel 2424: } else
1.40 daniel 2425: NEXT;
2426: } else if (CUR == '\'') {
1.96 daniel 2427: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2428: NEXT;
1.98 daniel 2429: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
1.77 daniel 2430: if (CUR == '<') {
2431: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2432: ctxt->sax->error(ctxt->userData,
2433: "Unescaped '<' not allowed in attributes values\n");
2434: ctxt->wellFormed = 0;
1.29 daniel 2435: }
1.77 daniel 2436: if (CUR != '\'') {
1.55 daniel 2437: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2438: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2439: ctxt->wellFormed = 0;
1.77 daniel 2440: } else
1.40 daniel 2441: NEXT;
1.29 daniel 2442: } else {
1.55 daniel 2443: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2444: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2445: ctxt->wellFormed = 0;
1.29 daniel 2446: }
2447:
2448: return(ret);
2449: }
2450:
1.50 daniel 2451: /**
2452: * xmlParseSystemLiteral:
2453: * @ctxt: an XML parser context
2454: *
2455: * parse an XML Literal
1.21 daniel 2456: *
1.22 daniel 2457: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2458: *
2459: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2460: */
2461:
1.55 daniel 2462: CHAR *
2463: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2464: const CHAR *q;
2465: CHAR *ret = NULL;
2466:
1.91 daniel 2467: SHRINK;
1.40 daniel 2468: if (CUR == '"') {
2469: NEXT;
2470: q = CUR_PTR;
2471: while ((IS_CHAR(CUR)) && (CUR != '"'))
2472: NEXT;
2473: if (!IS_CHAR(CUR)) {
1.55 daniel 2474: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2475: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2476: ctxt->wellFormed = 0;
1.21 daniel 2477: } else {
1.40 daniel 2478: ret = xmlStrndup(q, CUR_PTR - q);
2479: NEXT;
1.21 daniel 2480: }
1.40 daniel 2481: } else if (CUR == '\'') {
2482: NEXT;
2483: q = CUR_PTR;
2484: while ((IS_CHAR(CUR)) && (CUR != '\''))
2485: NEXT;
2486: if (!IS_CHAR(CUR)) {
1.55 daniel 2487: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2488: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2489: ctxt->wellFormed = 0;
1.21 daniel 2490: } else {
1.40 daniel 2491: ret = xmlStrndup(q, CUR_PTR - q);
2492: NEXT;
1.21 daniel 2493: }
2494: } else {
1.55 daniel 2495: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2496: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2497: ctxt->wellFormed = 0;
1.21 daniel 2498: }
2499:
2500: return(ret);
2501: }
2502:
1.50 daniel 2503: /**
2504: * xmlParsePubidLiteral:
2505: * @ctxt: an XML parser context
1.21 daniel 2506: *
1.50 daniel 2507: * parse an XML public literal
1.68 daniel 2508: *
2509: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2510: *
2511: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2512: */
2513:
1.55 daniel 2514: CHAR *
2515: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2516: const CHAR *q;
2517: CHAR *ret = NULL;
2518: /*
2519: * Name ::= (Letter | '_') (NameChar)*
2520: */
1.91 daniel 2521: SHRINK;
1.40 daniel 2522: if (CUR == '"') {
2523: NEXT;
2524: q = CUR_PTR;
2525: while (IS_PUBIDCHAR(CUR)) NEXT;
2526: if (CUR != '"') {
1.55 daniel 2527: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2528: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2529: ctxt->wellFormed = 0;
1.21 daniel 2530: } else {
1.40 daniel 2531: ret = xmlStrndup(q, CUR_PTR - q);
2532: NEXT;
1.21 daniel 2533: }
1.40 daniel 2534: } else if (CUR == '\'') {
2535: NEXT;
2536: q = CUR_PTR;
2537: while ((IS_LETTER(CUR)) && (CUR != '\''))
2538: NEXT;
2539: if (!IS_LETTER(CUR)) {
1.55 daniel 2540: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2541: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2542: ctxt->wellFormed = 0;
1.21 daniel 2543: } else {
1.40 daniel 2544: ret = xmlStrndup(q, CUR_PTR - q);
2545: NEXT;
1.21 daniel 2546: }
2547: } else {
1.55 daniel 2548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2549: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2550: ctxt->wellFormed = 0;
1.21 daniel 2551: }
2552:
2553: return(ret);
2554: }
2555:
1.50 daniel 2556: /**
2557: * xmlParseCharData:
2558: * @ctxt: an XML parser context
2559: * @cdata: int indicating whether we are within a CDATA section
2560: *
2561: * parse a CharData section.
2562: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2563: *
2564: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2565: */
2566:
1.55 daniel 2567: void
2568: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.91 daniel 2569: CHAR buf[1000];
2570: int nbchar = 0;
1.97 daniel 2571: CHAR cur;
1.27 daniel 2572:
1.91 daniel 2573: SHRINK;
1.97 daniel 2574: /*
2575: * !!!!!!!!!!!!
2576: * NOTE: NXT(0) is used here to avoid breaking on < or &
2577: * entities substitutions.
2578: */
2579: cur = CUR;
2580: while ((IS_CHAR(cur)) && (cur != '<') &&
2581: (cur != '&')) {
2582: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2583: (NXT(2) == '>')) {
2584: if (cdata) break;
2585: else {
2586: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2587: ctxt->sax->error(ctxt->userData,
1.59 daniel 2588: "Sequence ']]>' not allowed in content\n");
2589: ctxt->wellFormed = 0;
2590: }
2591: }
1.91 daniel 2592: buf[nbchar++] = CUR;
2593: if (nbchar == 1000) {
2594: /*
2595: * Ok the segment is to be consumed as chars.
2596: */
2597: if (ctxt->sax != NULL) {
2598: if (areBlanks(ctxt, buf, nbchar)) {
2599: if (ctxt->sax->ignorableWhitespace != NULL)
2600: ctxt->sax->ignorableWhitespace(ctxt->userData,
2601: buf, nbchar);
2602: } else {
2603: if (ctxt->sax->characters != NULL)
2604: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2605: }
2606: }
2607: nbchar = 0;
2608: }
1.40 daniel 2609: NEXT;
1.97 daniel 2610: cur = CUR;
1.27 daniel 2611: }
1.91 daniel 2612: if (nbchar != 0) {
2613: /*
2614: * Ok the segment is to be consumed as chars.
2615: */
2616: if (ctxt->sax != NULL) {
2617: if (areBlanks(ctxt, buf, nbchar)) {
2618: if (ctxt->sax->ignorableWhitespace != NULL)
2619: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2620: } else {
2621: if (ctxt->sax->characters != NULL)
2622: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2623: }
2624: }
1.45 daniel 2625: }
1.27 daniel 2626: }
2627:
1.50 daniel 2628: /**
2629: * xmlParseExternalID:
2630: * @ctxt: an XML parser context
2631: * @publicID: a CHAR** receiving PubidLiteral
1.67 daniel 2632: * @strict: indicate whether we should restrict parsing to only
2633: * production [75], see NOTE below
1.50 daniel 2634: *
1.67 daniel 2635: * Parse an External ID or a Public ID
2636: *
2637: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2638: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2639: *
2640: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2641: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2642: *
2643: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2644: *
1.68 daniel 2645: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2646: * case publicID receives PubidLiteral, is strict is off
2647: * it is possible to return NULL and have publicID set.
1.22 daniel 2648: */
2649:
1.55 daniel 2650: CHAR *
1.67 daniel 2651: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
1.39 daniel 2652: CHAR *URI = NULL;
1.22 daniel 2653:
1.91 daniel 2654: SHRINK;
1.40 daniel 2655: if ((CUR == 'S') && (NXT(1) == 'Y') &&
2656: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2657: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2658: SKIP(6);
1.59 daniel 2659: if (!IS_BLANK(CUR)) {
2660: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2661: ctxt->sax->error(ctxt->userData,
1.59 daniel 2662: "Space required after 'SYSTEM'\n");
2663: ctxt->wellFormed = 0;
2664: }
1.42 daniel 2665: SKIP_BLANKS;
1.39 daniel 2666: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2667: if (URI == NULL) {
1.55 daniel 2668: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2669: ctxt->sax->error(ctxt->userData,
1.39 daniel 2670: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2671: ctxt->wellFormed = 0;
2672: }
1.40 daniel 2673: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
2674: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2675: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2676: SKIP(6);
1.59 daniel 2677: if (!IS_BLANK(CUR)) {
2678: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2679: ctxt->sax->error(ctxt->userData,
1.59 daniel 2680: "Space required after 'PUBLIC'\n");
2681: ctxt->wellFormed = 0;
2682: }
1.42 daniel 2683: SKIP_BLANKS;
1.39 daniel 2684: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2685: if (*publicID == NULL) {
1.55 daniel 2686: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2687: ctxt->sax->error(ctxt->userData,
1.39 daniel 2688: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2689: ctxt->wellFormed = 0;
2690: }
1.67 daniel 2691: if (strict) {
2692: /*
2693: * We don't handle [83] so "S SystemLiteral" is required.
2694: */
2695: if (!IS_BLANK(CUR)) {
2696: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2697: ctxt->sax->error(ctxt->userData,
1.67 daniel 2698: "Space required after the Public Identifier\n");
2699: ctxt->wellFormed = 0;
2700: }
2701: } else {
2702: /*
2703: * We handle [83] so we return immediately, if
2704: * "S SystemLiteral" is not detected. From a purely parsing
2705: * point of view that's a nice mess.
2706: */
2707: const CHAR *ptr = CUR_PTR;
2708: if (!IS_BLANK(*ptr)) return(NULL);
2709:
2710: while (IS_BLANK(*ptr)) ptr++;
2711: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 2712: }
1.42 daniel 2713: SKIP_BLANKS;
1.39 daniel 2714: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2715: if (URI == NULL) {
1.55 daniel 2716: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2717: ctxt->sax->error(ctxt->userData,
1.39 daniel 2718: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2719: ctxt->wellFormed = 0;
2720: }
1.22 daniel 2721: }
1.39 daniel 2722: return(URI);
1.22 daniel 2723: }
2724:
1.50 daniel 2725: /**
2726: * xmlParseComment:
1.69 daniel 2727: * @ctxt: an XML parser context
2728: * @create: should we create a node, or just skip the content
1.50 daniel 2729: *
1.3 veillard 2730: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 2731: * This may or may not create a node (depending on the context)
1.38 daniel 2732: * The spec says that "For compatibility, the string "--" (double-hyphen)
2733: * must not occur within comments. "
1.22 daniel 2734: *
2735: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2736: */
1.72 daniel 2737: void
1.69 daniel 2738: xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1.17 daniel 2739: const CHAR *q, *start;
2740: const CHAR *r;
1.39 daniel 2741: CHAR *val;
1.3 veillard 2742:
2743: /*
1.22 daniel 2744: * Check that there is a comment right here.
1.3 veillard 2745: */
1.40 daniel 2746: if ((CUR != '<') || (NXT(1) != '!') ||
1.72 daniel 2747: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2748:
1.97 daniel 2749: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2750: SHRINK;
1.40 daniel 2751: SKIP(4);
2752: start = q = CUR_PTR;
2753: NEXT;
2754: r = CUR_PTR;
2755: NEXT;
2756: while (IS_CHAR(CUR) &&
2757: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 2758: (*r != '-') || (*q != '-'))) {
1.59 daniel 2759: if ((*r == '-') && (*q == '-')) {
1.55 daniel 2760: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2761: ctxt->sax->error(ctxt->userData,
1.38 daniel 2762: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2763: ctxt->wellFormed = 0;
2764: }
1.40 daniel 2765: NEXT;r++;q++;
1.3 veillard 2766: }
1.40 daniel 2767: if (!IS_CHAR(CUR)) {
1.55 daniel 2768: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2769: ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", start);
1.59 daniel 2770: ctxt->wellFormed = 0;
1.3 veillard 2771: } else {
1.40 daniel 2772: NEXT;
1.31 daniel 2773: if (create) {
1.39 daniel 2774: val = xmlStrndup(start, q - start);
1.72 daniel 2775: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
1.74 daniel 2776: ctxt->sax->comment(ctxt->userData, val);
1.39 daniel 2777: free(val);
1.31 daniel 2778: }
1.3 veillard 2779: }
2780: }
2781:
1.50 daniel 2782: /**
2783: * xmlParsePITarget:
2784: * @ctxt: an XML parser context
2785: *
2786: * parse the name of a PI
1.22 daniel 2787: *
2788: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2789: *
2790: * Returns the PITarget name or NULL
1.22 daniel 2791: */
2792:
1.55 daniel 2793: CHAR *
2794: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 2795: CHAR *name;
2796:
2797: name = xmlParseName(ctxt);
2798: if ((name != NULL) && (name[3] == 0) &&
2799: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2800: ((name[1] == 'm') || (name[1] == 'M')) &&
2801: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 2802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2803: ctxt->sax->error(ctxt->userData, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 2804: return(NULL);
2805: }
2806: return(name);
2807: }
2808:
1.50 daniel 2809: /**
2810: * xmlParsePI:
2811: * @ctxt: an XML parser context
2812: *
2813: * parse an XML Processing Instruction.
1.22 daniel 2814: *
2815: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2816: *
1.69 daniel 2817: * The processing is transfered to SAX once parsed.
1.3 veillard 2818: */
2819:
1.55 daniel 2820: void
2821: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 2822: CHAR *target;
2823:
1.40 daniel 2824: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 2825: /*
2826: * this is a Processing Instruction.
2827: */
1.40 daniel 2828: SKIP(2);
1.91 daniel 2829: SHRINK;
1.3 veillard 2830:
2831: /*
1.22 daniel 2832: * Parse the target name and check for special support like
2833: * namespace.
1.3 veillard 2834: */
1.22 daniel 2835: target = xmlParsePITarget(ctxt);
2836: if (target != NULL) {
1.72 daniel 2837: const CHAR *q = CUR_PTR;
2838:
2839: while (IS_CHAR(CUR) &&
2840: ((CUR != '?') || (NXT(1) != '>')))
2841: NEXT;
2842: if (!IS_CHAR(CUR)) {
2843: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2844: ctxt->sax->error(ctxt->userData,
1.72 daniel 2845: "xmlParsePI: PI %s never end ...\n", target);
2846: ctxt->wellFormed = 0;
1.22 daniel 2847: } else {
1.72 daniel 2848: CHAR *data;
1.44 daniel 2849:
1.72 daniel 2850: data = xmlStrndup(q, CUR_PTR - q);
2851: SKIP(2);
1.44 daniel 2852:
1.72 daniel 2853: /*
2854: * SAX: PI detected.
2855: */
2856: if ((ctxt->sax) &&
2857: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2858: ctxt->sax->processingInstruction(ctxt->userData,
2859: target, data);
1.72 daniel 2860: free(data);
1.22 daniel 2861: }
1.39 daniel 2862: free(target);
1.3 veillard 2863: } else {
1.55 daniel 2864: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2865: ctxt->sax->error(ctxt->userData,
2866: "xmlParsePI : no target name\n");
1.59 daniel 2867: ctxt->wellFormed = 0;
1.22 daniel 2868: }
2869: }
2870: }
2871:
1.50 daniel 2872: /**
2873: * xmlParseNotationDecl:
2874: * @ctxt: an XML parser context
2875: *
2876: * parse a notation declaration
1.22 daniel 2877: *
2878: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2879: *
2880: * Hence there is actually 3 choices:
2881: * 'PUBLIC' S PubidLiteral
2882: * 'PUBLIC' S PubidLiteral S SystemLiteral
2883: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2884: *
1.67 daniel 2885: * See the NOTE on xmlParseExternalID().
1.22 daniel 2886: */
2887:
1.55 daniel 2888: void
2889: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2890: CHAR *name;
1.67 daniel 2891: CHAR *Pubid;
2892: CHAR *Systemid;
1.22 daniel 2893:
1.40 daniel 2894: if ((CUR == '<') && (NXT(1) == '!') &&
2895: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2896: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2897: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2898: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 2899: SHRINK;
1.40 daniel 2900: SKIP(10);
1.67 daniel 2901: if (!IS_BLANK(CUR)) {
2902: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2903: ctxt->sax->error(ctxt->userData, "Space required after '<!NOTATION'\n");
1.67 daniel 2904: ctxt->wellFormed = 0;
2905: return;
2906: }
2907: SKIP_BLANKS;
1.22 daniel 2908:
2909: name = xmlParseName(ctxt);
2910: if (name == NULL) {
1.55 daniel 2911: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2912: ctxt->sax->error(ctxt->userData, "NOTATION: Name expected here\n");
1.67 daniel 2913: ctxt->wellFormed = 0;
2914: return;
2915: }
2916: if (!IS_BLANK(CUR)) {
2917: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2918: ctxt->sax->error(ctxt->userData,
1.67 daniel 2919: "Space required after the NOTATION name'\n");
1.59 daniel 2920: ctxt->wellFormed = 0;
1.22 daniel 2921: return;
2922: }
1.42 daniel 2923: SKIP_BLANKS;
1.67 daniel 2924:
1.22 daniel 2925: /*
1.67 daniel 2926: * Parse the IDs.
1.22 daniel 2927: */
1.67 daniel 2928: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
2929: SKIP_BLANKS;
2930:
2931: if (CUR == '>') {
1.40 daniel 2932: NEXT;
1.72 daniel 2933: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 2934: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2935: } else {
2936: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2937: ctxt->sax->error(ctxt->userData,
1.67 daniel 2938: "'>' required to close NOTATION declaration\n");
2939: ctxt->wellFormed = 0;
2940: }
1.22 daniel 2941: free(name);
1.67 daniel 2942: if (Systemid != NULL) free(Systemid);
2943: if (Pubid != NULL) free(Pubid);
1.22 daniel 2944: }
2945: }
2946:
1.50 daniel 2947: /**
2948: * xmlParseEntityDecl:
2949: * @ctxt: an XML parser context
2950: *
2951: * parse <!ENTITY declarations
1.22 daniel 2952: *
2953: * [70] EntityDecl ::= GEDecl | PEDecl
2954: *
2955: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2956: *
2957: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2958: *
2959: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2960: *
2961: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2962: *
2963: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 2964: *
2965: * [ VC: Notation Declared ]
2966: * TODO The Name must match the declared name of a notation.
1.22 daniel 2967: */
2968:
1.55 daniel 2969: void
2970: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 2971: CHAR *name = NULL;
1.24 daniel 2972: CHAR *value = NULL;
1.39 daniel 2973: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 2974: CHAR *ndata = NULL;
1.39 daniel 2975: int isParameter = 0;
1.78 daniel 2976: CHAR *orig = NULL;
1.22 daniel 2977:
1.94 daniel 2978: GROW;
1.40 daniel 2979: if ((CUR == '<') && (NXT(1) == '!') &&
2980: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2981: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2982: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 2983: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 2984: SHRINK;
1.40 daniel 2985: SKIP(8);
1.59 daniel 2986: if (!IS_BLANK(CUR)) {
2987: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2988: ctxt->sax->error(ctxt->userData, "Space required after '<!ENTITY'\n");
1.59 daniel 2989: ctxt->wellFormed = 0;
2990: }
2991: SKIP_BLANKS;
1.40 daniel 2992:
2993: if (CUR == '%') {
2994: NEXT;
1.59 daniel 2995: if (!IS_BLANK(CUR)) {
2996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2997: ctxt->sax->error(ctxt->userData, "Space required after '%'\n");
1.59 daniel 2998: ctxt->wellFormed = 0;
2999: }
1.42 daniel 3000: SKIP_BLANKS;
1.39 daniel 3001: isParameter = 1;
1.22 daniel 3002: }
3003:
3004: name = xmlParseName(ctxt);
1.24 daniel 3005: if (name == NULL) {
1.55 daniel 3006: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3007: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 3008: ctxt->wellFormed = 0;
1.24 daniel 3009: return;
3010: }
1.59 daniel 3011: if (!IS_BLANK(CUR)) {
3012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3013: ctxt->sax->error(ctxt->userData,
1.59 daniel 3014: "Space required after the entity name\n");
3015: ctxt->wellFormed = 0;
3016: }
1.42 daniel 3017: SKIP_BLANKS;
1.24 daniel 3018:
1.22 daniel 3019: /*
1.68 daniel 3020: * handle the various case of definitions...
1.22 daniel 3021: */
1.39 daniel 3022: if (isParameter) {
1.40 daniel 3023: if ((CUR == '"') || (CUR == '\''))
1.78 daniel 3024: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 3025: if (value) {
1.72 daniel 3026: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3027: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3028: XML_INTERNAL_PARAMETER_ENTITY,
3029: NULL, NULL, value);
3030: }
1.24 daniel 3031: else {
1.67 daniel 3032: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 3033: if (URI) {
1.72 daniel 3034: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3035: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3036: XML_EXTERNAL_PARAMETER_ENTITY,
3037: literal, URI, NULL);
3038: }
1.24 daniel 3039: }
3040: } else {
1.40 daniel 3041: if ((CUR == '"') || (CUR == '\'')) {
1.78 daniel 3042: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 3043: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3044: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3045: XML_INTERNAL_GENERAL_ENTITY,
3046: NULL, NULL, value);
3047: } else {
1.67 daniel 3048: URI = xmlParseExternalID(ctxt, &literal, 1);
1.59 daniel 3049: if ((CUR != '>') && (!IS_BLANK(CUR))) {
3050: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3051: ctxt->sax->error(ctxt->userData,
1.59 daniel 3052: "Space required before 'NDATA'\n");
3053: ctxt->wellFormed = 0;
3054: }
1.42 daniel 3055: SKIP_BLANKS;
1.40 daniel 3056: if ((CUR == 'N') && (NXT(1) == 'D') &&
3057: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3058: (NXT(4) == 'A')) {
3059: SKIP(5);
1.59 daniel 3060: if (!IS_BLANK(CUR)) {
3061: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3062: ctxt->sax->error(ctxt->userData,
1.59 daniel 3063: "Space required after 'NDATA'\n");
3064: ctxt->wellFormed = 0;
3065: }
1.42 daniel 3066: SKIP_BLANKS;
1.24 daniel 3067: ndata = xmlParseName(ctxt);
1.72 daniel 3068: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3069: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3070: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
3071: literal, URI, ndata);
3072: } else {
1.72 daniel 3073: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3074: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3075: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3076: literal, URI, NULL);
1.24 daniel 3077: }
3078: }
3079: }
1.42 daniel 3080: SKIP_BLANKS;
1.40 daniel 3081: if (CUR != '>') {
1.55 daniel 3082: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3083: ctxt->sax->error(ctxt->userData,
1.31 daniel 3084: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3085: ctxt->wellFormed = 0;
1.24 daniel 3086: } else
1.40 daniel 3087: NEXT;
1.78 daniel 3088: if (orig != NULL) {
3089: /*
1.98 daniel 3090: * Ugly mechanism to save the raw entity value.
1.78 daniel 3091: */
3092: xmlEntityPtr cur = NULL;
3093:
1.98 daniel 3094: if (isParameter) {
3095: if ((ctxt->sax != NULL) &&
3096: (ctxt->sax->getParameterEntity != NULL))
3097: cur = ctxt->sax->getParameterEntity(ctxt, name);
3098: } else {
3099: if ((ctxt->sax != NULL) &&
3100: (ctxt->sax->getEntity != NULL))
3101: cur = ctxt->sax->getEntity(ctxt, name);
3102: }
3103: if (cur != NULL) {
3104: if (cur->orig != NULL)
3105: free(orig);
3106: else
3107: cur->orig = orig;
3108: } else
1.78 daniel 3109: free(orig);
3110: }
1.39 daniel 3111: if (name != NULL) free(name);
3112: if (value != NULL) free(value);
3113: if (URI != NULL) free(URI);
3114: if (literal != NULL) free(literal);
3115: if (ndata != NULL) free(ndata);
1.22 daniel 3116: }
3117: }
3118:
1.50 daniel 3119: /**
1.59 daniel 3120: * xmlParseDefaultDecl:
3121: * @ctxt: an XML parser context
3122: * @value: Receive a possible fixed default value for the attribute
3123: *
3124: * Parse an attribute default declaration
3125: *
3126: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3127: *
1.99 daniel 3128: * [ VC: Required Attribute ]
3129: * TODO if the default declaration is the keyword #REQUIRED, then the
3130: * attribute must be specified for all elements of the type in the
3131: * attribute-list declaration.
3132: *
3133: * [ VC: Attribute Default Legal ]
1.102 daniel 3134: * The declared default value must meet the lexical constraints of
3135: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3136: *
3137: * [ VC: Fixed Attribute Default ]
3138: * TODO if an attribute has a default value declared with the #FIXED
3139: * keyword, instances of that attribute must match the default value.
3140: *
3141: * [ WFC: No < in Attribute Values ]
3142: * handled in xmlParseAttValue()
3143: *
1.59 daniel 3144: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3145: * or XML_ATTRIBUTE_FIXED.
3146: */
3147:
3148: int
3149: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
3150: int val;
3151: CHAR *ret;
3152:
3153: *value = NULL;
3154: if ((CUR == '#') && (NXT(1) == 'R') &&
3155: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3156: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3157: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3158: (NXT(8) == 'D')) {
3159: SKIP(9);
3160: return(XML_ATTRIBUTE_REQUIRED);
3161: }
3162: if ((CUR == '#') && (NXT(1) == 'I') &&
3163: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3164: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3165: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3166: SKIP(8);
3167: return(XML_ATTRIBUTE_IMPLIED);
3168: }
3169: val = XML_ATTRIBUTE_NONE;
3170: if ((CUR == '#') && (NXT(1) == 'F') &&
3171: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3172: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3173: SKIP(6);
3174: val = XML_ATTRIBUTE_FIXED;
3175: if (!IS_BLANK(CUR)) {
3176: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3177: ctxt->sax->error(ctxt->userData, "Space required after '#FIXED'\n");
1.59 daniel 3178: ctxt->wellFormed = 0;
3179: }
3180: SKIP_BLANKS;
3181: }
3182: ret = xmlParseAttValue(ctxt);
1.96 daniel 3183: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3184: if (ret == NULL) {
3185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3186: ctxt->sax->error(ctxt->userData,
1.59 daniel 3187: "Attribute default value declaration error\n");
3188: ctxt->wellFormed = 0;
3189: } else
3190: *value = ret;
3191: return(val);
3192: }
3193:
3194: /**
1.66 daniel 3195: * xmlParseNotationType:
3196: * @ctxt: an XML parser context
3197: *
3198: * parse an Notation attribute type.
3199: *
1.99 daniel 3200: * Note: the leading 'NOTATION' S part has already being parsed...
3201: *
1.66 daniel 3202: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3203: *
1.99 daniel 3204: * [ VC: Notation Attributes ]
3205: * TODO Values of this type must match one of the notation names included
3206: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3207: *
3208: * Returns: the notation attribute tree built while parsing
3209: */
3210:
3211: xmlEnumerationPtr
3212: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3213: CHAR *name;
3214: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3215:
3216: if (CUR != '(') {
3217: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3218: ctxt->sax->error(ctxt->userData, "'(' required to start 'NOTATION'\n");
1.66 daniel 3219: ctxt->wellFormed = 0;
3220: return(NULL);
3221: }
1.91 daniel 3222: SHRINK;
1.66 daniel 3223: do {
3224: NEXT;
3225: SKIP_BLANKS;
3226: name = xmlParseName(ctxt);
3227: if (name == NULL) {
3228: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3229: ctxt->sax->error(ctxt->userData,
1.66 daniel 3230: "Name expected in NOTATION declaration\n");
3231: ctxt->wellFormed = 0;
3232: return(ret);
3233: }
3234: cur = xmlCreateEnumeration(name);
1.67 daniel 3235: free(name);
1.66 daniel 3236: if (cur == NULL) return(ret);
3237: if (last == NULL) ret = last = cur;
3238: else {
3239: last->next = cur;
3240: last = cur;
3241: }
3242: SKIP_BLANKS;
3243: } while (CUR == '|');
3244: if (CUR != ')') {
3245: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3246: ctxt->sax->error(ctxt->userData,
1.66 daniel 3247: "')' required to finish NOTATION declaration\n");
3248: ctxt->wellFormed = 0;
3249: return(ret);
3250: }
3251: NEXT;
3252: return(ret);
3253: }
3254:
3255: /**
3256: * xmlParseEnumerationType:
3257: * @ctxt: an XML parser context
3258: *
3259: * parse an Enumeration attribute type.
3260: *
3261: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3262: *
1.99 daniel 3263: * [ VC: Enumeration ]
3264: * TODO Values of this type must match one of the Nmtoken tokens in
3265: * the declaration
3266: *
1.66 daniel 3267: * Returns: the enumeration attribute tree built while parsing
3268: */
3269:
3270: xmlEnumerationPtr
3271: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3272: CHAR *name;
3273: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3274:
3275: if (CUR != '(') {
3276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3277: ctxt->sax->error(ctxt->userData,
1.66 daniel 3278: "'(' required to start ATTLIST enumeration\n");
3279: ctxt->wellFormed = 0;
3280: return(NULL);
3281: }
1.91 daniel 3282: SHRINK;
1.66 daniel 3283: do {
3284: NEXT;
3285: SKIP_BLANKS;
3286: name = xmlParseNmtoken(ctxt);
3287: if (name == NULL) {
3288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3289: ctxt->sax->error(ctxt->userData,
1.66 daniel 3290: "NmToken expected in ATTLIST enumeration\n");
3291: ctxt->wellFormed = 0;
3292: return(ret);
3293: }
3294: cur = xmlCreateEnumeration(name);
1.67 daniel 3295: free(name);
1.66 daniel 3296: if (cur == NULL) return(ret);
3297: if (last == NULL) ret = last = cur;
3298: else {
3299: last->next = cur;
3300: last = cur;
3301: }
3302: SKIP_BLANKS;
3303: } while (CUR == '|');
3304: if (CUR != ')') {
3305: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3306: ctxt->sax->error(ctxt->userData,
1.66 daniel 3307: "')' required to finish ATTLIST enumeration\n");
3308: ctxt->wellFormed = 0;
3309: return(ret);
3310: }
3311: NEXT;
3312: return(ret);
3313: }
3314:
3315: /**
1.50 daniel 3316: * xmlParseEnumeratedType:
3317: * @ctxt: an XML parser context
1.66 daniel 3318: * @tree: the enumeration tree built while parsing
1.50 daniel 3319: *
1.66 daniel 3320: * parse an Enumerated attribute type.
1.22 daniel 3321: *
3322: * [57] EnumeratedType ::= NotationType | Enumeration
3323: *
3324: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3325: *
1.50 daniel 3326: *
1.66 daniel 3327: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3328: */
3329:
1.66 daniel 3330: int
3331: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3332: if ((CUR == 'N') && (NXT(1) == 'O') &&
3333: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3334: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3335: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3336: SKIP(8);
3337: if (!IS_BLANK(CUR)) {
3338: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3339: ctxt->sax->error(ctxt->userData, "Space required after 'NOTATION'\n");
1.66 daniel 3340: ctxt->wellFormed = 0;
3341: return(0);
3342: }
3343: SKIP_BLANKS;
3344: *tree = xmlParseNotationType(ctxt);
3345: if (*tree == NULL) return(0);
3346: return(XML_ATTRIBUTE_NOTATION);
3347: }
3348: *tree = xmlParseEnumerationType(ctxt);
3349: if (*tree == NULL) return(0);
3350: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3351: }
3352:
1.50 daniel 3353: /**
3354: * xmlParseAttributeType:
3355: * @ctxt: an XML parser context
1.66 daniel 3356: * @tree: the enumeration tree built while parsing
1.50 daniel 3357: *
1.59 daniel 3358: * parse the Attribute list def for an element
1.22 daniel 3359: *
3360: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3361: *
3362: * [55] StringType ::= 'CDATA'
3363: *
3364: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3365: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3366: *
1.102 daniel 3367: * Validity constraints for attribute values syntax are checked in
3368: * xmlValidateAttributeValue()
3369: *
1.99 daniel 3370: * [ VC: ID ]
1.102 daniel 3371: * Values of type ID must match the Name production. TODO A name must not
1.99 daniel 3372: * appear more than once in an XML document as a value of this type;
3373: * i.e., ID values must uniquely identify the elements which bear them.
3374: *
3375: * [ VC: One ID per Element Type ]
3376: * TODO No element type may have more than one ID attribute specified.
3377: *
3378: * [ VC: ID Attribute Default ]
3379: * TODO An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3380: *
3381: * [ VC: IDREF ]
1.102 daniel 3382: * Values of type IDREF must match the Name production, and values
3383: * of type IDREFS must match Names; TODO each Name must match the value of
1.99 daniel 3384: * an ID attribute on some element in the XML document; i.e. IDREF
3385: * values must match the value of some ID attribute.
3386: *
3387: * [ VC: Entity Name ]
1.102 daniel 3388: * Values of type ENTITY must match the Name production, values
3389: * of type ENTITIES must match Names; TODO each Name must match the name of
1.99 daniel 3390: * an unparsed entity declared in the DTD.
3391: *
3392: * [ VC: Name Token ]
1.102 daniel 3393: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3394: * of type NMTOKENS must match Nmtokens.
3395: *
1.69 daniel 3396: * Returns the attribute type
1.22 daniel 3397: */
1.59 daniel 3398: int
1.66 daniel 3399: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3400: SHRINK;
1.40 daniel 3401: if ((CUR == 'C') && (NXT(1) == 'D') &&
3402: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3403: (NXT(4) == 'A')) {
3404: SKIP(5);
1.66 daniel 3405: return(XML_ATTRIBUTE_CDATA);
1.40 daniel 3406: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3407: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3408: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3409: SKIP(6);
3410: return(XML_ATTRIBUTE_IDREFS);
3411: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3412: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3413: (NXT(4) == 'F')) {
3414: SKIP(5);
1.59 daniel 3415: return(XML_ATTRIBUTE_IDREF);
1.66 daniel 3416: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
3417: SKIP(2);
3418: return(XML_ATTRIBUTE_ID);
1.40 daniel 3419: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3420: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3421: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3422: SKIP(6);
1.59 daniel 3423: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 3424: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3425: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3426: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3427: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3428: SKIP(8);
1.59 daniel 3429: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 3430: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3431: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3432: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3433: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3434: SKIP(8);
3435: return(XML_ATTRIBUTE_NMTOKENS);
3436: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3437: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3438: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3439: (NXT(6) == 'N')) {
3440: SKIP(7);
1.59 daniel 3441: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3442: }
1.66 daniel 3443: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3444: }
3445:
1.50 daniel 3446: /**
3447: * xmlParseAttributeListDecl:
3448: * @ctxt: an XML parser context
3449: *
3450: * : parse the Attribute list def for an element
1.22 daniel 3451: *
3452: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3453: *
3454: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3455: *
1.22 daniel 3456: */
1.55 daniel 3457: void
3458: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 daniel 3459: CHAR *elemName;
3460: CHAR *attrName;
1.103 daniel 3461: xmlEnumerationPtr tree;
1.22 daniel 3462:
1.40 daniel 3463: if ((CUR == '<') && (NXT(1) == '!') &&
3464: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3465: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3466: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3467: (NXT(8) == 'T')) {
1.40 daniel 3468: SKIP(9);
1.59 daniel 3469: if (!IS_BLANK(CUR)) {
3470: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3471: ctxt->sax->error(ctxt->userData, "Space required after '<!ATTLIST'\n");
1.59 daniel 3472: ctxt->wellFormed = 0;
3473: }
1.42 daniel 3474: SKIP_BLANKS;
1.59 daniel 3475: elemName = xmlParseName(ctxt);
3476: if (elemName == NULL) {
1.55 daniel 3477: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3478: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Element\n");
1.59 daniel 3479: ctxt->wellFormed = 0;
1.22 daniel 3480: return;
3481: }
1.42 daniel 3482: SKIP_BLANKS;
1.40 daniel 3483: while (CUR != '>') {
3484: const CHAR *check = CUR_PTR;
1.59 daniel 3485: int type;
3486: int def;
3487: CHAR *defaultValue = NULL;
3488:
1.103 daniel 3489: tree = NULL;
1.59 daniel 3490: attrName = xmlParseName(ctxt);
3491: if (attrName == NULL) {
3492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3493: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Attribute\n");
1.59 daniel 3494: ctxt->wellFormed = 0;
3495: break;
3496: }
1.97 daniel 3497: GROW;
1.59 daniel 3498: if (!IS_BLANK(CUR)) {
3499: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3500: ctxt->sax->error(ctxt->userData,
1.59 daniel 3501: "Space required after the attribute name\n");
3502: ctxt->wellFormed = 0;
3503: break;
3504: }
3505: SKIP_BLANKS;
3506:
1.66 daniel 3507: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 3508: if (type <= 0) break;
1.22 daniel 3509:
1.97 daniel 3510: GROW;
1.59 daniel 3511: if (!IS_BLANK(CUR)) {
3512: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3513: ctxt->sax->error(ctxt->userData,
1.59 daniel 3514: "Space required after the attribute type\n");
3515: ctxt->wellFormed = 0;
3516: break;
3517: }
1.42 daniel 3518: SKIP_BLANKS;
1.59 daniel 3519:
3520: def = xmlParseDefaultDecl(ctxt, &defaultValue);
3521: if (def <= 0) break;
3522:
1.97 daniel 3523: GROW;
1.59 daniel 3524: if (CUR != '>') {
3525: if (!IS_BLANK(CUR)) {
3526: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3527: ctxt->sax->error(ctxt->userData,
1.59 daniel 3528: "Space required after the attribute default value\n");
3529: ctxt->wellFormed = 0;
3530: break;
3531: }
3532: SKIP_BLANKS;
3533: }
1.40 daniel 3534: if (check == CUR_PTR) {
1.55 daniel 3535: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3536: ctxt->sax->error(ctxt->userData,
1.59 daniel 3537: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 3538: break;
3539: }
1.72 daniel 3540: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3541: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3542: type, def, defaultValue, tree);
1.59 daniel 3543: if (attrName != NULL)
3544: free(attrName);
3545: if (defaultValue != NULL)
3546: free(defaultValue);
1.97 daniel 3547: GROW;
1.22 daniel 3548: }
1.40 daniel 3549: if (CUR == '>')
3550: NEXT;
1.22 daniel 3551:
1.59 daniel 3552: free(elemName);
1.22 daniel 3553: }
3554: }
3555:
1.50 daniel 3556: /**
1.61 daniel 3557: * xmlParseElementMixedContentDecl:
3558: * @ctxt: an XML parser context
3559: *
3560: * parse the declaration for a Mixed Element content
3561: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3562: *
3563: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3564: * '(' S? '#PCDATA' S? ')'
3565: *
1.99 daniel 3566: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3567: *
3568: * [ VC: No Duplicate Types ]
3569: * TODO The same name must not appear more than once in a single
3570: * mixed-content declaration.
3571: *
1.61 daniel 3572: * returns: the list of the xmlElementContentPtr describing the element choices
3573: */
3574: xmlElementContentPtr
1.62 daniel 3575: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3576: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.61 daniel 3577: CHAR *elem = NULL;
3578:
1.97 daniel 3579: GROW;
1.61 daniel 3580: if ((CUR == '#') && (NXT(1) == 'P') &&
3581: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3582: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3583: (NXT(6) == 'A')) {
3584: SKIP(7);
3585: SKIP_BLANKS;
1.91 daniel 3586: SHRINK;
1.63 daniel 3587: if (CUR == ')') {
3588: NEXT;
3589: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3590: return(ret);
3591: }
1.61 daniel 3592: if ((CUR == '(') || (CUR == '|')) {
3593: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3594: if (ret == NULL) return(NULL);
1.99 daniel 3595: }
1.61 daniel 3596: while (CUR == '|') {
1.64 daniel 3597: NEXT;
1.61 daniel 3598: if (elem == NULL) {
3599: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3600: if (ret == NULL) return(NULL);
3601: ret->c1 = cur;
1.64 daniel 3602: cur = ret;
1.61 daniel 3603: } else {
1.64 daniel 3604: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3605: if (n == NULL) return(NULL);
3606: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3607: cur->c2 = n;
3608: cur = n;
1.66 daniel 3609: free(elem);
1.61 daniel 3610: }
3611: SKIP_BLANKS;
3612: elem = xmlParseName(ctxt);
3613: if (elem == NULL) {
3614: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3615: ctxt->sax->error(ctxt->userData,
1.61 daniel 3616: "xmlParseElementMixedContentDecl : Name expected\n");
3617: ctxt->wellFormed = 0;
3618: xmlFreeElementContent(cur);
3619: return(NULL);
3620: }
3621: SKIP_BLANKS;
1.97 daniel 3622: GROW;
1.61 daniel 3623: }
1.63 daniel 3624: if ((CUR == ')') && (NXT(1) == '*')) {
1.66 daniel 3625: if (elem != NULL) {
1.61 daniel 3626: cur->c2 = xmlNewElementContent(elem,
3627: XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 3628: free(elem);
3629: }
1.65 daniel 3630: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 3631: SKIP(2);
1.61 daniel 3632: } else {
1.66 daniel 3633: if (elem != NULL) free(elem);
1.61 daniel 3634: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3635: ctxt->sax->error(ctxt->userData,
1.63 daniel 3636: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3637: ctxt->wellFormed = 0;
3638: xmlFreeElementContent(ret);
3639: return(NULL);
3640: }
3641:
3642: } else {
3643: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3644: ctxt->sax->error(ctxt->userData,
1.61 daniel 3645: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3646: ctxt->wellFormed = 0;
3647: }
3648: return(ret);
3649: }
3650:
3651: /**
3652: * xmlParseElementChildrenContentDecl:
1.50 daniel 3653: * @ctxt: an XML parser context
3654: *
1.61 daniel 3655: * parse the declaration for a Mixed Element content
3656: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3657: *
1.61 daniel 3658: *
1.22 daniel 3659: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3660: *
3661: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3662: *
3663: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3664: *
3665: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3666: *
1.99 daniel 3667: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3668: * TODO Parameter-entity replacement text must be properly nested
3669: * with parenthetized groups. That is to say, if either of the
3670: * opening or closing parentheses in a choice, seq, or Mixed
3671: * construct is contained in the replacement text for a parameter
3672: * entity, both must be contained in the same replacement text. For
3673: * interoperability, if a parameter-entity reference appears in a
3674: * choice, seq, or Mixed construct, its replacement text should not
3675: * be empty, and neither the first nor last non-blank character of
3676: * the replacement text should be a connector (| or ,).
3677: *
1.62 daniel 3678: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3679: * hierarchy.
3680: */
3681: xmlElementContentPtr
1.62 daniel 3682: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3683: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.62 daniel 3684: CHAR *elem;
3685: CHAR type = 0;
3686:
3687: SKIP_BLANKS;
1.94 daniel 3688: GROW;
1.62 daniel 3689: if (CUR == '(') {
1.63 daniel 3690: /* Recurse on first child */
1.62 daniel 3691: NEXT;
3692: SKIP_BLANKS;
3693: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3694: SKIP_BLANKS;
1.101 daniel 3695: GROW;
1.62 daniel 3696: } else {
3697: elem = xmlParseName(ctxt);
3698: if (elem == NULL) {
3699: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3700: ctxt->sax->error(ctxt->userData,
1.62 daniel 3701: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3702: ctxt->wellFormed = 0;
3703: return(NULL);
3704: }
3705: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3706: GROW;
1.62 daniel 3707: if (CUR == '?') {
1.104 daniel 3708: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3709: NEXT;
3710: } else if (CUR == '*') {
1.104 daniel 3711: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3712: NEXT;
3713: } else if (CUR == '+') {
1.104 daniel 3714: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3715: NEXT;
3716: } else {
1.104 daniel 3717: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3718: }
1.66 daniel 3719: free(elem);
1.101 daniel 3720: GROW;
1.62 daniel 3721: }
3722: SKIP_BLANKS;
1.91 daniel 3723: SHRINK;
1.62 daniel 3724: while (CUR != ')') {
1.63 daniel 3725: /*
3726: * Each loop we parse one separator and one element.
3727: */
1.62 daniel 3728: if (CUR == ',') {
3729: if (type == 0) type = CUR;
3730:
3731: /*
3732: * Detect "Name | Name , Name" error
3733: */
3734: else if (type != CUR) {
3735: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3736: ctxt->sax->error(ctxt->userData,
1.62 daniel 3737: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3738: type);
3739: ctxt->wellFormed = 0;
3740: xmlFreeElementContent(ret);
3741: return(NULL);
3742: }
1.64 daniel 3743: NEXT;
1.62 daniel 3744:
1.63 daniel 3745: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3746: if (op == NULL) {
3747: xmlFreeElementContent(ret);
3748: return(NULL);
3749: }
3750: if (last == NULL) {
3751: op->c1 = ret;
1.65 daniel 3752: ret = cur = op;
1.63 daniel 3753: } else {
3754: cur->c2 = op;
3755: op->c1 = last;
3756: cur =op;
1.65 daniel 3757: last = NULL;
1.63 daniel 3758: }
1.62 daniel 3759: } else if (CUR == '|') {
3760: if (type == 0) type = CUR;
3761:
3762: /*
1.63 daniel 3763: * Detect "Name , Name | Name" error
1.62 daniel 3764: */
3765: else if (type != CUR) {
3766: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3767: ctxt->sax->error(ctxt->userData,
1.62 daniel 3768: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3769: type);
3770: ctxt->wellFormed = 0;
3771: xmlFreeElementContent(ret);
3772: return(NULL);
3773: }
1.64 daniel 3774: NEXT;
1.62 daniel 3775:
1.63 daniel 3776: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3777: if (op == NULL) {
3778: xmlFreeElementContent(ret);
3779: return(NULL);
3780: }
3781: if (last == NULL) {
3782: op->c1 = ret;
1.65 daniel 3783: ret = cur = op;
1.63 daniel 3784: } else {
3785: cur->c2 = op;
3786: op->c1 = last;
3787: cur =op;
1.65 daniel 3788: last = NULL;
1.63 daniel 3789: }
1.62 daniel 3790: } else {
3791: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3792: ctxt->sax->error(ctxt->userData,
1.62 daniel 3793: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3794: ctxt->wellFormed = 0;
3795: xmlFreeElementContent(ret);
3796: return(NULL);
3797: }
1.101 daniel 3798: GROW;
1.62 daniel 3799: SKIP_BLANKS;
1.101 daniel 3800: GROW;
1.62 daniel 3801: if (CUR == '(') {
1.63 daniel 3802: /* Recurse on second child */
1.62 daniel 3803: NEXT;
3804: SKIP_BLANKS;
1.65 daniel 3805: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 3806: SKIP_BLANKS;
3807: } else {
3808: elem = xmlParseName(ctxt);
3809: if (elem == NULL) {
3810: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3811: ctxt->sax->error(ctxt->userData,
1.62 daniel 3812: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3813: ctxt->wellFormed = 0;
3814: return(NULL);
3815: }
1.65 daniel 3816: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 3817: free(elem);
1.105 daniel 3818: if (CUR == '?') {
3819: last->ocur = XML_ELEMENT_CONTENT_OPT;
3820: NEXT;
3821: } else if (CUR == '*') {
3822: last->ocur = XML_ELEMENT_CONTENT_MULT;
3823: NEXT;
3824: } else if (CUR == '+') {
3825: last->ocur = XML_ELEMENT_CONTENT_PLUS;
3826: NEXT;
3827: } else {
3828: last->ocur = XML_ELEMENT_CONTENT_ONCE;
3829: }
1.63 daniel 3830: }
3831: SKIP_BLANKS;
1.97 daniel 3832: GROW;
1.64 daniel 3833: }
1.65 daniel 3834: if ((cur != NULL) && (last != NULL)) {
3835: cur->c2 = last;
1.62 daniel 3836: }
3837: NEXT;
3838: if (CUR == '?') {
3839: ret->ocur = XML_ELEMENT_CONTENT_OPT;
3840: NEXT;
3841: } else if (CUR == '*') {
3842: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3843: NEXT;
3844: } else if (CUR == '+') {
3845: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3846: NEXT;
3847: }
3848: return(ret);
1.61 daniel 3849: }
3850:
3851: /**
3852: * xmlParseElementContentDecl:
3853: * @ctxt: an XML parser context
3854: * @name: the name of the element being defined.
3855: * @result: the Element Content pointer will be stored here if any
1.22 daniel 3856: *
1.61 daniel 3857: * parse the declaration for an Element content either Mixed or Children,
3858: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
3859: *
3860: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 3861: *
1.61 daniel 3862: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 3863: */
3864:
1.61 daniel 3865: int
3866: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
3867: xmlElementContentPtr *result) {
3868:
3869: xmlElementContentPtr tree = NULL;
3870: int res;
3871:
3872: *result = NULL;
3873:
3874: if (CUR != '(') {
3875: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3876: ctxt->sax->error(ctxt->userData,
1.61 daniel 3877: "xmlParseElementContentDecl : '(' expected\n");
3878: ctxt->wellFormed = 0;
3879: return(-1);
3880: }
3881: NEXT;
1.97 daniel 3882: GROW;
1.61 daniel 3883: SKIP_BLANKS;
3884: if ((CUR == '#') && (NXT(1) == 'P') &&
3885: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3886: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3887: (NXT(6) == 'A')) {
1.62 daniel 3888: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 3889: res = XML_ELEMENT_TYPE_MIXED;
3890: } else {
1.62 daniel 3891: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 3892: res = XML_ELEMENT_TYPE_ELEMENT;
3893: }
3894: SKIP_BLANKS;
1.63 daniel 3895: /****************************
1.61 daniel 3896: if (CUR != ')') {
3897: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3898: ctxt->sax->error(ctxt->userData,
1.61 daniel 3899: "xmlParseElementContentDecl : ')' expected\n");
3900: ctxt->wellFormed = 0;
3901: return(-1);
3902: }
1.63 daniel 3903: ****************************/
3904: *result = tree;
1.61 daniel 3905: return(res);
1.22 daniel 3906: }
3907:
1.50 daniel 3908: /**
3909: * xmlParseElementDecl:
3910: * @ctxt: an XML parser context
3911: *
3912: * parse an Element declaration.
1.22 daniel 3913: *
3914: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
3915: *
1.99 daniel 3916: * [ VC: Unique Element Type Declaration ]
3917: * TODO No element type may be declared more than once
1.69 daniel 3918: *
3919: * Returns the type of the element, or -1 in case of error
1.22 daniel 3920: */
1.59 daniel 3921: int
1.55 daniel 3922: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 3923: CHAR *name;
1.59 daniel 3924: int ret = -1;
1.61 daniel 3925: xmlElementContentPtr content = NULL;
1.22 daniel 3926:
1.97 daniel 3927: GROW;
1.40 daniel 3928: if ((CUR == '<') && (NXT(1) == '!') &&
3929: (NXT(2) == 'E') && (NXT(3) == 'L') &&
3930: (NXT(4) == 'E') && (NXT(5) == 'M') &&
3931: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 3932: (NXT(8) == 'T')) {
1.40 daniel 3933: SKIP(9);
1.59 daniel 3934: if (!IS_BLANK(CUR)) {
3935: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3936: ctxt->sax->error(ctxt->userData,
1.59 daniel 3937: "Space required after 'ELEMENT'\n");
3938: ctxt->wellFormed = 0;
3939: }
1.42 daniel 3940: SKIP_BLANKS;
1.22 daniel 3941: name = xmlParseName(ctxt);
3942: if (name == NULL) {
1.55 daniel 3943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3944: ctxt->sax->error(ctxt->userData,
1.59 daniel 3945: "xmlParseElementDecl: no name for Element\n");
3946: ctxt->wellFormed = 0;
3947: return(-1);
3948: }
3949: if (!IS_BLANK(CUR)) {
3950: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3951: ctxt->sax->error(ctxt->userData,
1.59 daniel 3952: "Space required after the element name\n");
3953: ctxt->wellFormed = 0;
1.22 daniel 3954: }
1.42 daniel 3955: SKIP_BLANKS;
1.40 daniel 3956: if ((CUR == 'E') && (NXT(1) == 'M') &&
3957: (NXT(2) == 'P') && (NXT(3) == 'T') &&
3958: (NXT(4) == 'Y')) {
3959: SKIP(5);
1.22 daniel 3960: /*
3961: * Element must always be empty.
3962: */
1.59 daniel 3963: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 3964: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
3965: (NXT(2) == 'Y')) {
3966: SKIP(3);
1.22 daniel 3967: /*
3968: * Element is a generic container.
3969: */
1.59 daniel 3970: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 3971: } else if (CUR == '(') {
3972: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 3973: } else {
1.98 daniel 3974: /*
3975: * [ WFC: PEs in Internal Subset ] error handling.
3976: */
3977: if ((CUR == '%') && (ctxt->external == 0) &&
3978: (ctxt->inputNr == 1)) {
3979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3980: ctxt->sax->error(ctxt->userData,
3981: "PEReference: forbidden within markup decl in internal subset\n");
3982: } else {
3983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3984: ctxt->sax->error(ctxt->userData,
3985: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
3986: }
1.61 daniel 3987: ctxt->wellFormed = 0;
3988: if (name != NULL) free(name);
3989: return(-1);
1.22 daniel 3990: }
1.42 daniel 3991: SKIP_BLANKS;
1.40 daniel 3992: if (CUR != '>') {
1.55 daniel 3993: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3994: ctxt->sax->error(ctxt->userData,
1.31 daniel 3995: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 3996: ctxt->wellFormed = 0;
1.61 daniel 3997: } else {
1.40 daniel 3998: NEXT;
1.72 daniel 3999: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 4000: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4001: content);
1.61 daniel 4002: }
1.84 daniel 4003: if (content != NULL) {
4004: xmlFreeElementContent(content);
4005: }
1.61 daniel 4006: if (name != NULL) {
4007: free(name);
4008: }
1.22 daniel 4009: }
1.59 daniel 4010: return(ret);
1.22 daniel 4011: }
4012:
1.50 daniel 4013: /**
4014: * xmlParseMarkupDecl:
4015: * @ctxt: an XML parser context
4016: *
4017: * parse Markup declarations
1.22 daniel 4018: *
4019: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4020: * NotationDecl | PI | Comment
4021: *
1.98 daniel 4022: * [ VC: Proper Declaration/PE Nesting ]
4023: * TODO Parameter-entity replacement text must be properly nested with
4024: * markup declarations. That is to say, if either the first character
4025: * or the last character of a markup declaration (markupdecl above) is
4026: * contained in the replacement text for a parameter-entity reference,
4027: * both must be contained in the same replacement text.
4028: *
4029: * [ WFC: PEs in Internal Subset ]
4030: * In the internal DTD subset, parameter-entity references can occur
4031: * only where markup declarations can occur, not within markup declarations.
4032: * (This does not apply to references that occur in external parameter
4033: * entities or to the external subset.)
1.22 daniel 4034: */
1.55 daniel 4035: void
4036: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4037: GROW;
1.22 daniel 4038: xmlParseElementDecl(ctxt);
4039: xmlParseAttributeListDecl(ctxt);
4040: xmlParseEntityDecl(ctxt);
4041: xmlParseNotationDecl(ctxt);
4042: xmlParsePI(ctxt);
1.31 daniel 4043: xmlParseComment(ctxt, 0);
1.98 daniel 4044: /*
4045: * This is only for internal subset. On external entities,
4046: * the replacement is done before parsing stage
4047: */
4048: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4049: xmlParsePEReference(ctxt);
1.97 daniel 4050: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4051: }
4052:
1.50 daniel 4053: /**
1.76 daniel 4054: * xmlParseTextDecl:
4055: * @ctxt: an XML parser context
4056: *
4057: * parse an XML declaration header for external entities
4058: *
4059: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4060: *
4061: * Returns the only valuable info for an external parsed entity, the encoding
4062: */
4063:
4064: CHAR *
4065: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4066: CHAR *version;
4067: CHAR *encoding = NULL;
4068:
4069: /*
4070: * We know that '<?xml' is here.
4071: */
4072: SKIP(5);
4073:
4074: if (!IS_BLANK(CUR)) {
4075: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4076: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
4077: ctxt->wellFormed = 0;
4078: }
4079: SKIP_BLANKS;
4080:
4081: /*
4082: * We may have the VersionInfo here.
4083: */
4084: version = xmlParseVersionInfo(ctxt);
1.99 daniel 4085:
1.76 daniel 4086: /* TODO: we should actually inherit from the referencing doc if absent
4087: if (version == NULL)
4088: version = xmlCharStrdup(XML_DEFAULT_VERSION);
4089: ctxt->version = xmlStrdup(version);
4090: */
1.99 daniel 4091:
1.76 daniel 4092: if (version != NULL)
4093: free(version);
4094:
4095: /*
4096: * We must have the encoding declaration
4097: */
4098: if (!IS_BLANK(CUR)) {
4099: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4100: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
4101: ctxt->wellFormed = 0;
4102: }
4103: encoding = xmlParseEncodingDecl(ctxt);
4104:
4105: SKIP_BLANKS;
4106: if ((CUR == '?') && (NXT(1) == '>')) {
4107: SKIP(2);
4108: } else if (CUR == '>') {
4109: /* Deprecated old WD ... */
4110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4111: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
4112: ctxt->wellFormed = 0;
4113: NEXT;
4114: } else {
4115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4116: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
4117: ctxt->wellFormed = 0;
4118: MOVETO_ENDTAG(CUR_PTR);
4119: NEXT;
4120: }
4121: return(encoding);
4122: }
4123:
4124: /*
4125: * xmlParseConditionalSections
4126: * @ctxt: an XML parser context
4127: *
4128: * TODO : Conditionnal section are not yet supported !
4129: *
4130: * [61] conditionalSect ::= includeSect | ignoreSect
4131: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4132: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4133: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4134: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4135: */
4136:
4137: void
4138: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4139: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4140: ctxt->sax->warning(ctxt->userData,
4141: "XML conditional section not supported\n");
4142: /*
4143: * Skip up to the end of the conditionnal section.
4144: */
4145: while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
4146: NEXT;
4147: if (CUR == 0) {
4148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4149: ctxt->sax->error(ctxt->userData,
4150: "XML conditional section not closed\n");
4151: ctxt->wellFormed = 0;
4152: }
4153: }
4154:
4155: /**
4156: * xmlParseExternalSubset
4157: * @ctxt: an XML parser context
4158: *
4159: * parse Markup declarations from an external subset
4160: *
4161: * [30] extSubset ::= textDecl? extSubsetDecl
4162: *
4163: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4164: *
4165: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
4166: */
4167: void
1.79 daniel 4168: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
4169: const CHAR *SystemID) {
1.76 daniel 4170: if ((CUR == '<') && (NXT(1) == '?') &&
4171: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4172: (NXT(4) == 'l')) {
4173: xmlParseTextDecl(ctxt);
4174: }
1.79 daniel 4175: if (ctxt->myDoc == NULL) {
4176: ctxt->myDoc = xmlNewDoc("1.0");
4177: }
4178: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4179: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4180:
1.96 daniel 4181: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4182: ctxt->external = 1;
1.76 daniel 4183: while (((CUR == '<') && (NXT(1) == '?')) ||
4184: ((CUR == '<') && (NXT(1) == '!')) ||
4185: IS_BLANK(CUR)) {
4186: if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4187: xmlParseConditionalSections(ctxt);
4188: } else if (IS_BLANK(CUR)) {
4189: NEXT;
4190: } else if (CUR == '%') {
4191: xmlParsePEReference(ctxt);
4192: } else
4193: xmlParseMarkupDecl(ctxt);
1.77 daniel 4194:
4195: /*
4196: * Pop-up of finished entities.
4197: */
4198: while ((CUR == 0) && (ctxt->inputNr > 1))
4199: xmlPopInput(ctxt);
4200:
1.76 daniel 4201: }
4202:
4203: if (CUR != 0) {
4204: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4205: ctxt->sax->error(ctxt->userData,
4206: "Extra content at the end of the document\n");
4207: ctxt->wellFormed = 0;
4208: }
4209:
4210: }
4211:
4212: /**
1.77 daniel 4213: * xmlParseReference:
4214: * @ctxt: an XML parser context
4215: *
4216: * parse and handle entity references in content, depending on the SAX
4217: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4218: * CharRef, a predefined entity, if there is no reference() callback.
4219: * or if the parser was asked to switch to that mode.
1.77 daniel 4220: *
4221: * [67] Reference ::= EntityRef | CharRef
4222: */
4223: void
4224: xmlParseReference(xmlParserCtxtPtr ctxt) {
4225: xmlEntityPtr ent;
4226: CHAR *val;
4227: if (CUR != '&') return;
4228:
4229: if (NXT(1) == '#') {
4230: CHAR out[2];
4231: int val = xmlParseCharRef(ctxt);
4232: /* TODO: invalid for UTF-8 variable encoding !!! */
4233: out[0] = val;
4234: out[1] = 0;
4235: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4236: ctxt->sax->characters(ctxt->userData, out, 1);
4237: } else {
4238: ent = xmlParseEntityRef(ctxt);
4239: if (ent == NULL) return;
4240: if ((ent->name != NULL) &&
4241: (ent->type != XML_INTERNAL_PREDEFINED_ENTITY) &&
1.79 daniel 4242: (ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4243: (ctxt->replaceEntities == 0)) {
4244:
1.77 daniel 4245: /*
4246: * Create a node.
4247: */
4248: ctxt->sax->reference(ctxt->userData, ent->name);
4249: return;
4250: }
4251: val = ent->content;
4252: if (val == NULL) return;
4253: /*
4254: * inline the entity.
4255: */
4256: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4257: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
4258: }
1.24 daniel 4259: }
4260:
1.50 daniel 4261: /**
4262: * xmlParseEntityRef:
4263: * @ctxt: an XML parser context
4264: *
4265: * parse ENTITY references declarations
1.24 daniel 4266: *
4267: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4268: *
1.98 daniel 4269: * [ WFC: Entity Declared ]
4270: * In a document without any DTD, a document with only an internal DTD
4271: * subset which contains no parameter entity references, or a document
4272: * with "standalone='yes'", the Name given in the entity reference
4273: * must match that in an entity declaration, except that well-formed
4274: * documents need not declare any of the following entities: amp, lt,
4275: * gt, apos, quot. The declaration of a parameter entity must precede
4276: * any reference to it. Similarly, the declaration of a general entity
4277: * must precede any reference to it which appears in a default value in an
4278: * attribute-list declaration. Note that if entities are declared in the
4279: * external subset or in external parameter entities, a non-validating
4280: * processor is not obligated to read and process their declarations;
4281: * for such documents, the rule that an entity must be declared is a
4282: * well-formedness constraint only if standalone='yes'.
4283: *
4284: * [ WFC: Parsed Entity ]
4285: * An entity reference must not contain the name of an unparsed entity
4286: *
1.77 daniel 4287: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4288: */
1.77 daniel 4289: xmlEntityPtr
1.55 daniel 4290: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.24 daniel 4291: CHAR *name;
1.72 daniel 4292: xmlEntityPtr ent = NULL;
1.24 daniel 4293:
1.91 daniel 4294: GROW;
1.111 ! daniel 4295:
1.40 daniel 4296: if (CUR == '&') {
4297: NEXT;
1.24 daniel 4298: name = xmlParseName(ctxt);
4299: if (name == NULL) {
1.55 daniel 4300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4301: ctxt->sax->error(ctxt->userData,
4302: "xmlParseEntityRef: no name\n");
1.59 daniel 4303: ctxt->wellFormed = 0;
1.24 daniel 4304: } else {
1.40 daniel 4305: if (CUR == ';') {
4306: NEXT;
1.24 daniel 4307: /*
1.77 daniel 4308: * Ask first SAX for entity resolution, otherwise try the
4309: * predefined set.
4310: */
4311: if (ctxt->sax != NULL) {
4312: if (ctxt->sax->getEntity != NULL)
4313: ent = ctxt->sax->getEntity(ctxt->userData, name);
4314: if (ent == NULL)
4315: ent = xmlGetPredefinedEntity(name);
4316: }
4317: /*
1.98 daniel 4318: * [ WFC: Entity Declared ]
4319: * In a document without any DTD, a document with only an
4320: * internal DTD subset which contains no parameter entity
4321: * references, or a document with "standalone='yes'", the
4322: * Name given in the entity reference must match that in an
4323: * entity declaration, except that well-formed documents
4324: * need not declare any of the following entities: amp, lt,
4325: * gt, apos, quot.
4326: * The declaration of a parameter entity must precede any
4327: * reference to it.
4328: * Similarly, the declaration of a general entity must
4329: * precede any reference to it which appears in a default
4330: * value in an attribute-list declaration. Note that if
4331: * entities are declared in the external subset or in
4332: * external parameter entities, a non-validating processor
4333: * is not obligated to read and process their declarations;
4334: * for such documents, the rule that an entity must be
4335: * declared is a well-formedness constraint only if
4336: * standalone='yes'.
1.59 daniel 4337: */
1.77 daniel 4338: if (ent == NULL) {
1.98 daniel 4339: if ((ctxt->standalone == 1) ||
4340: ((ctxt->hasExternalSubset == 0) &&
4341: (ctxt->hasPErefs == 0))) {
4342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 4343: ctxt->sax->error(ctxt->userData,
4344: "Entity '%s' not defined\n", name);
4345: ctxt->wellFormed = 0;
4346: } else {
1.98 daniel 4347: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4348: ctxt->sax->warning(ctxt->userData,
4349: "Entity '%s' not defined\n", name);
1.59 daniel 4350: }
1.77 daniel 4351: }
1.59 daniel 4352:
4353: /*
1.98 daniel 4354: * [ WFC: Parsed Entity ]
4355: * An entity reference must not contain the name of an
4356: * unparsed entity
4357: */
4358: else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
4359: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4360: ctxt->sax->error(ctxt->userData,
4361: "Entity reference to unparsed entity %s\n", name);
4362: ctxt->wellFormed = 0;
4363: }
4364:
4365: /*
4366: * [ WFC: No External Entity References ]
4367: * Attribute values cannot contain direct or indirect
4368: * entity references to external entities.
4369: */
4370: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4371: (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
4372: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4373: ctxt->sax->error(ctxt->userData,
4374: "Attribute references external entity '%s'\n", name);
4375: ctxt->wellFormed = 0;
4376: }
4377: /*
4378: * [ WFC: No < in Attribute Values ]
4379: * The replacement text of any entity referred to directly or
4380: * indirectly in an attribute value (other than "<") must
4381: * not contain a <.
1.59 daniel 4382: */
1.98 daniel 4383: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4384: (ent != NULL) && (xmlStrcmp(ent->name, "lt")) &&
4385: (ent->content != NULL) &&
4386: (xmlStrchr(ent->content, '<'))) {
4387: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4388: ctxt->sax->error(ctxt->userData,
4389: "'<' in entity '%s' is not allowed in attributes values\n", name);
4390: ctxt->wellFormed = 0;
4391: }
4392:
4393: /*
4394: * Internal check, no parameter entities here ...
4395: */
4396: else {
1.59 daniel 4397: switch (ent->type) {
4398: case XML_INTERNAL_PARAMETER_ENTITY:
4399: case XML_EXTERNAL_PARAMETER_ENTITY:
4400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4401: ctxt->sax->error(ctxt->userData,
1.59 daniel 4402: "Attempt to reference the parameter entity '%s'\n", name);
4403: ctxt->wellFormed = 0;
4404: break;
4405: }
4406: }
4407:
4408: /*
1.98 daniel 4409: * [ WFC: No Recursion ]
4410: * TODO A parsed entity must not contain a recursive
4411: * reference to itself, either directly or indirectly.
1.59 daniel 4412: */
1.77 daniel 4413:
1.24 daniel 4414: } else {
1.55 daniel 4415: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4416: ctxt->sax->error(ctxt->userData,
1.59 daniel 4417: "xmlParseEntityRef: expecting ';'\n");
4418: ctxt->wellFormed = 0;
1.24 daniel 4419: }
1.45 daniel 4420: free(name);
1.24 daniel 4421: }
4422: }
1.77 daniel 4423: return(ent);
1.24 daniel 4424: }
4425:
1.50 daniel 4426: /**
4427: * xmlParsePEReference:
4428: * @ctxt: an XML parser context
4429: *
4430: * parse PEReference declarations
1.77 daniel 4431: * The entity content is handled directly by pushing it's content as
4432: * a new input stream.
1.22 daniel 4433: *
4434: * [69] PEReference ::= '%' Name ';'
1.68 daniel 4435: *
1.98 daniel 4436: * [ WFC: No Recursion ]
4437: * TODO A parsed entity must not contain a recursive
4438: * reference to itself, either directly or indirectly.
4439: *
4440: * [ WFC: Entity Declared ]
4441: * In a document without any DTD, a document with only an internal DTD
4442: * subset which contains no parameter entity references, or a document
4443: * with "standalone='yes'", ... ... The declaration of a parameter
4444: * entity must precede any reference to it...
4445: *
4446: * [ VC: Entity Declared ]
4447: * In a document with an external subset or external parameter entities
4448: * with "standalone='no'", ... ... The declaration of a parameter entity
4449: * must precede any reference to it...
4450: *
4451: * [ WFC: In DTD ]
4452: * Parameter-entity references may only appear in the DTD.
4453: * NOTE: misleading but this is handled.
1.22 daniel 4454: */
1.77 daniel 4455: void
1.55 daniel 4456: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 4457: CHAR *name;
1.72 daniel 4458: xmlEntityPtr entity = NULL;
1.50 daniel 4459: xmlParserInputPtr input;
1.22 daniel 4460:
1.40 daniel 4461: if (CUR == '%') {
4462: NEXT;
1.22 daniel 4463: name = xmlParseName(ctxt);
4464: if (name == NULL) {
1.55 daniel 4465: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4466: ctxt->sax->error(ctxt->userData, "xmlParsePEReference: no name\n");
1.59 daniel 4467: ctxt->wellFormed = 0;
1.22 daniel 4468: } else {
1.40 daniel 4469: if (CUR == ';') {
4470: NEXT;
1.98 daniel 4471: if ((ctxt->sax != NULL) &&
4472: (ctxt->sax->getParameterEntity != NULL))
4473: entity = ctxt->sax->getParameterEntity(ctxt->userData,
4474: name);
1.45 daniel 4475: if (entity == NULL) {
1.98 daniel 4476: /*
4477: * [ WFC: Entity Declared ]
4478: * In a document without any DTD, a document with only an
4479: * internal DTD subset which contains no parameter entity
4480: * references, or a document with "standalone='yes'", ...
4481: * ... The declaration of a parameter entity must precede
4482: * any reference to it...
4483: */
4484: if ((ctxt->standalone == 1) ||
4485: ((ctxt->hasExternalSubset == 0) &&
4486: (ctxt->hasPErefs == 0))) {
4487: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4488: ctxt->sax->error(ctxt->userData,
4489: "PEReference: %%%s; not found\n", name);
4490: ctxt->wellFormed = 0;
4491: } else {
4492: /*
4493: * [ VC: Entity Declared ]
4494: * In a document with an external subset or external
4495: * parameter entities with "standalone='no'", ...
4496: * ... The declaration of a parameter entity must precede
4497: * any reference to it...
4498: */
4499: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4500: ctxt->sax->warning(ctxt->userData,
4501: "PEReference: %%%s; not found\n", name);
4502: ctxt->valid = 0;
4503: }
1.50 daniel 4504: } else {
1.98 daniel 4505: /*
4506: * Internal checking in case the entity quest barfed
4507: */
4508: if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
4509: (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
4510: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4511: ctxt->sax->warning(ctxt->userData,
4512: "Internal: %%%s; is not a parameter entity\n", name);
4513: } else {
4514: input = xmlNewEntityInputStream(ctxt, entity);
4515: xmlPushInput(ctxt, input);
4516: }
1.45 daniel 4517: }
1.98 daniel 4518: ctxt->hasPErefs = 1;
1.22 daniel 4519: } else {
1.55 daniel 4520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4521: ctxt->sax->error(ctxt->userData,
1.59 daniel 4522: "xmlParsePEReference: expecting ';'\n");
4523: ctxt->wellFormed = 0;
1.22 daniel 4524: }
1.45 daniel 4525: free(name);
1.3 veillard 4526: }
4527: }
4528: }
4529:
1.50 daniel 4530: /**
4531: * xmlParseDocTypeDecl :
4532: * @ctxt: an XML parser context
4533: *
4534: * parse a DOCTYPE declaration
1.21 daniel 4535: *
1.22 daniel 4536: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
4537: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 4538: *
4539: * [ VC: Root Element Type ]
1.99 daniel 4540: * The Name in the document type declaration must match the element
1.98 daniel 4541: * type of the root element.
1.21 daniel 4542: */
4543:
1.55 daniel 4544: void
4545: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.21 daniel 4546: CHAR *name;
4547: CHAR *ExternalID = NULL;
1.39 daniel 4548: CHAR *URI = NULL;
1.21 daniel 4549:
4550: /*
4551: * We know that '<!DOCTYPE' has been detected.
4552: */
1.40 daniel 4553: SKIP(9);
1.21 daniel 4554:
1.42 daniel 4555: SKIP_BLANKS;
1.21 daniel 4556:
4557: /*
4558: * Parse the DOCTYPE name.
4559: */
4560: name = xmlParseName(ctxt);
4561: if (name == NULL) {
1.55 daniel 4562: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4563: ctxt->sax->error(ctxt->userData, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 4564: ctxt->wellFormed = 0;
1.21 daniel 4565: }
4566:
1.42 daniel 4567: SKIP_BLANKS;
1.21 daniel 4568:
4569: /*
1.22 daniel 4570: * Check for SystemID and ExternalID
4571: */
1.67 daniel 4572: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 4573:
4574: if ((URI != NULL) || (ExternalID != NULL)) {
4575: ctxt->hasExternalSubset = 1;
4576: }
4577:
1.42 daniel 4578: SKIP_BLANKS;
1.36 daniel 4579:
1.76 daniel 4580: /*
4581: * NOTE: the SAX callback may try to fetch the external subset
4582: * entity and fill it up !
4583: */
1.72 daniel 4584: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 4585: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 4586:
4587: /*
4588: * Is there any DTD definition ?
4589: */
1.40 daniel 4590: if (CUR == '[') {
1.96 daniel 4591: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 4592: NEXT;
1.22 daniel 4593: /*
4594: * Parse the succession of Markup declarations and
4595: * PEReferences.
4596: * Subsequence (markupdecl | PEReference | S)*
4597: */
1.40 daniel 4598: while (CUR != ']') {
4599: const CHAR *check = CUR_PTR;
1.22 daniel 4600:
1.42 daniel 4601: SKIP_BLANKS;
1.22 daniel 4602: xmlParseMarkupDecl(ctxt);
1.50 daniel 4603: xmlParsePEReference(ctxt);
1.22 daniel 4604:
1.40 daniel 4605: if (CUR_PTR == check) {
1.55 daniel 4606: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4607: ctxt->sax->error(ctxt->userData,
1.31 daniel 4608: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 4609: ctxt->wellFormed = 0;
1.22 daniel 4610: break;
4611: }
1.77 daniel 4612:
4613: /*
4614: * Pop-up of finished entities.
4615: */
4616: while ((CUR == 0) && (ctxt->inputNr > 1))
4617: xmlPopInput(ctxt);
4618:
1.22 daniel 4619: }
1.40 daniel 4620: if (CUR == ']') NEXT;
1.22 daniel 4621: }
4622:
4623: /*
4624: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 4625: */
1.40 daniel 4626: if (CUR != '>') {
1.55 daniel 4627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4628: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 4629: ctxt->wellFormed = 0;
1.22 daniel 4630: /* We shouldn't try to resynchronize ... */
1.21 daniel 4631: }
1.40 daniel 4632: NEXT;
1.22 daniel 4633:
4634: /*
1.99 daniel 4635: * Cleanup
1.22 daniel 4636: */
1.39 daniel 4637: if (URI != NULL) free(URI);
1.22 daniel 4638: if (ExternalID != NULL) free(ExternalID);
4639: if (name != NULL) free(name);
1.21 daniel 4640: }
4641:
1.50 daniel 4642: /**
4643: * xmlParseAttribute:
4644: * @ctxt: an XML parser context
1.72 daniel 4645: * @value: a CHAR ** used to store the value of the attribute
1.50 daniel 4646: *
4647: * parse an attribute
1.3 veillard 4648: *
1.22 daniel 4649: * [41] Attribute ::= Name Eq AttValue
4650: *
1.98 daniel 4651: * [ WFC: No External Entity References ]
4652: * Attribute values cannot contain direct or indirect entity references
4653: * to external entities.
4654: *
4655: * [ WFC: No < in Attribute Values ]
4656: * The replacement text of any entity referred to directly or indirectly in
4657: * an attribute value (other than "<") must not contain a <.
4658: *
4659: * [ VC: Attribute Value Type ]
4660: * TODO The attribute must have been declared; the value must be of the type
1.99 daniel 4661: * declared for it.
1.98 daniel 4662: *
1.22 daniel 4663: * [25] Eq ::= S? '=' S?
4664: *
1.29 daniel 4665: * With namespace:
4666: *
4667: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 4668: *
4669: * Also the case QName == xmlns:??? is handled independently as a namespace
4670: * definition.
1.69 daniel 4671: *
1.72 daniel 4672: * Returns the attribute name, and the value in *value.
1.3 veillard 4673: */
4674:
1.72 daniel 4675: CHAR *
4676: xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) {
1.59 daniel 4677: CHAR *name, *val;
1.3 veillard 4678:
1.72 daniel 4679: *value = NULL;
4680: name = xmlParseName(ctxt);
1.22 daniel 4681: if (name == NULL) {
1.55 daniel 4682: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4683: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 4684: ctxt->wellFormed = 0;
1.52 daniel 4685: return(NULL);
1.3 veillard 4686: }
4687:
4688: /*
1.29 daniel 4689: * read the value
1.3 veillard 4690: */
1.42 daniel 4691: SKIP_BLANKS;
1.40 daniel 4692: if (CUR == '=') {
4693: NEXT;
1.42 daniel 4694: SKIP_BLANKS;
1.72 daniel 4695: val = xmlParseAttValue(ctxt);
1.96 daniel 4696: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 4697: } else {
1.55 daniel 4698: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4699: ctxt->sax->error(ctxt->userData,
1.59 daniel 4700: "Specification mandate value for attribute %s\n", name);
4701: ctxt->wellFormed = 0;
1.52 daniel 4702: return(NULL);
1.43 daniel 4703: }
4704:
1.72 daniel 4705: *value = val;
4706: return(name);
1.3 veillard 4707: }
4708:
1.50 daniel 4709: /**
4710: * xmlParseStartTag:
4711: * @ctxt: an XML parser context
4712: *
4713: * parse a start of tag either for rule element or
4714: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 4715: *
4716: * [40] STag ::= '<' Name (S Attribute)* S? '>'
4717: *
1.98 daniel 4718: * [ WFC: Unique Att Spec ]
4719: * No attribute name may appear more than once in the same start-tag or
4720: * empty-element tag.
4721: *
1.29 daniel 4722: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
4723: *
1.98 daniel 4724: * [ WFC: Unique Att Spec ]
4725: * No attribute name may appear more than once in the same start-tag or
4726: * empty-element tag.
4727: *
1.29 daniel 4728: * With namespace:
4729: *
4730: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
4731: *
4732: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 4733: *
4734: * Returns the element name parsed
1.2 veillard 4735: */
4736:
1.83 daniel 4737: CHAR *
1.69 daniel 4738: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.72 daniel 4739: CHAR *name;
4740: CHAR *attname;
4741: CHAR *attvalue;
4742: const CHAR **atts = NULL;
4743: int nbatts = 0;
4744: int maxatts = 0;
4745: int i;
1.2 veillard 4746:
1.83 daniel 4747: if (CUR != '<') return(NULL);
1.40 daniel 4748: NEXT;
1.3 veillard 4749:
1.72 daniel 4750: name = xmlParseName(ctxt);
1.59 daniel 4751: if (name == NULL) {
4752: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4753: ctxt->sax->error(ctxt->userData,
1.59 daniel 4754: "xmlParseStartTag: invalid element name\n");
4755: ctxt->wellFormed = 0;
1.83 daniel 4756: return(NULL);
1.50 daniel 4757: }
4758:
4759: /*
1.3 veillard 4760: * Now parse the attributes, it ends up with the ending
4761: *
4762: * (S Attribute)* S?
4763: */
1.42 daniel 4764: SKIP_BLANKS;
1.91 daniel 4765: GROW;
1.40 daniel 4766: while ((IS_CHAR(CUR)) &&
4767: (CUR != '>') &&
4768: ((CUR != '/') || (NXT(1) != '>'))) {
4769: const CHAR *q = CUR_PTR;
1.91 daniel 4770: int cons = ctxt->input->consumed;
1.29 daniel 4771:
1.72 daniel 4772: attname = xmlParseAttribute(ctxt, &attvalue);
4773: if ((attname != NULL) && (attvalue != NULL)) {
4774: /*
1.98 daniel 4775: * [ WFC: Unique Att Spec ]
4776: * No attribute name may appear more than once in the same
4777: * start-tag or empty-element tag.
1.72 daniel 4778: */
4779: for (i = 0; i < nbatts;i += 2) {
4780: if (!xmlStrcmp(atts[i], attname)) {
4781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4782: ctxt->sax->error(ctxt->userData,
4783: "Attribute %s redefined\n",
4784: attname);
1.72 daniel 4785: ctxt->wellFormed = 0;
4786: free(attname);
4787: free(attvalue);
1.98 daniel 4788: goto failed;
1.72 daniel 4789: }
4790: }
4791:
4792: /*
4793: * Add the pair to atts
4794: */
4795: if (atts == NULL) {
4796: maxatts = 10;
4797: atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
4798: if (atts == NULL) {
1.86 daniel 4799: fprintf(stderr, "malloc of %ld byte failed\n",
4800: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4801: return(NULL);
1.72 daniel 4802: }
4803: } else if (nbatts + 2 < maxatts) {
4804: maxatts *= 2;
4805: atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
4806: if (atts == NULL) {
1.86 daniel 4807: fprintf(stderr, "realloc of %ld byte failed\n",
4808: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4809: return(NULL);
1.72 daniel 4810: }
4811: }
4812: atts[nbatts++] = attname;
4813: atts[nbatts++] = attvalue;
4814: atts[nbatts] = NULL;
4815: atts[nbatts + 1] = NULL;
1.98 daniel 4816: failed:
1.72 daniel 4817: }
4818:
1.42 daniel 4819: SKIP_BLANKS;
1.91 daniel 4820: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 4821: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4822: ctxt->sax->error(ctxt->userData,
1.31 daniel 4823: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 4824: ctxt->wellFormed = 0;
1.29 daniel 4825: break;
1.3 veillard 4826: }
1.91 daniel 4827: GROW;
1.3 veillard 4828: }
4829:
1.43 daniel 4830: /*
1.72 daniel 4831: * SAX: Start of Element !
1.43 daniel 4832: */
1.72 daniel 4833: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 4834: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 4835:
1.72 daniel 4836: if (atts != NULL) {
4837: for (i = 0;i < nbatts;i++) free((CHAR *) atts[i]);
4838: free(atts);
4839: }
1.83 daniel 4840: return(name);
1.3 veillard 4841: }
4842:
1.50 daniel 4843: /**
4844: * xmlParseEndTag:
4845: * @ctxt: an XML parser context
1.83 daniel 4846: * @tagname: the tag name as parsed in the opening tag.
1.50 daniel 4847: *
4848: * parse an end of tag
1.27 daniel 4849: *
4850: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 4851: *
4852: * With namespace
4853: *
1.72 daniel 4854: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 4855: */
4856:
1.55 daniel 4857: void
1.83 daniel 4858: xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname) {
1.72 daniel 4859: CHAR *name;
1.7 veillard 4860:
1.91 daniel 4861: GROW;
1.40 daniel 4862: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 4863: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4864: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 4865: ctxt->wellFormed = 0;
1.27 daniel 4866: return;
4867: }
1.40 daniel 4868: SKIP(2);
1.7 veillard 4869:
1.72 daniel 4870: name = xmlParseName(ctxt);
1.7 veillard 4871:
4872: /*
4873: * We should definitely be at the ending "S? '>'" part
4874: */
1.91 daniel 4875: GROW;
1.42 daniel 4876: SKIP_BLANKS;
1.40 daniel 4877: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 4878: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4879: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 4880: ctxt->wellFormed = 0;
1.7 veillard 4881: } else
1.40 daniel 4882: NEXT;
1.7 veillard 4883:
1.72 daniel 4884: /*
1.98 daniel 4885: * [ WFC: Element Type Match ]
4886: * The Name in an element's end-tag must match the element type in the
4887: * start-tag.
4888: *
1.83 daniel 4889: */
4890: if (xmlStrcmp(name, tagname)) {
4891: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4892: ctxt->sax->error(ctxt->userData,
4893: "Opening and ending tag mismatch: %s and %s\n", tagname, name);
4894: ctxt->wellFormed = 0;
4895: }
4896:
4897: /*
1.72 daniel 4898: * SAX: End of Tag
4899: */
4900: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 4901: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 4902:
4903: if (name != NULL)
4904: free(name);
4905:
1.7 veillard 4906: return;
4907: }
4908:
1.50 daniel 4909: /**
4910: * xmlParseCDSect:
4911: * @ctxt: an XML parser context
4912: *
4913: * Parse escaped pure raw content.
1.29 daniel 4914: *
4915: * [18] CDSect ::= CDStart CData CDEnd
4916: *
4917: * [19] CDStart ::= '<![CDATA['
4918: *
4919: * [20] Data ::= (Char* - (Char* ']]>' Char*))
4920: *
4921: * [21] CDEnd ::= ']]>'
1.3 veillard 4922: */
1.55 daniel 4923: void
4924: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.110 daniel 4925: const CHAR *base;
4926: CHAR r, s;
1.108 veillard 4927: CHAR cur;
1.3 veillard 4928:
1.106 daniel 4929: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 4930: (NXT(2) == '[') && (NXT(3) == 'C') &&
4931: (NXT(4) == 'D') && (NXT(5) == 'A') &&
4932: (NXT(6) == 'T') && (NXT(7) == 'A') &&
4933: (NXT(8) == '[')) {
4934: SKIP(9);
1.29 daniel 4935: } else
1.45 daniel 4936: return;
1.109 daniel 4937:
4938: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.40 daniel 4939: base = CUR_PTR;
4940: if (!IS_CHAR(CUR)) {
1.55 daniel 4941: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4942: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4943: ctxt->wellFormed = 0;
1.109 daniel 4944: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 4945: return;
1.3 veillard 4946: }
1.110 daniel 4947: r = CUR;
1.91 daniel 4948: NEXT;
1.40 daniel 4949: if (!IS_CHAR(CUR)) {
1.55 daniel 4950: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4951: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4952: ctxt->wellFormed = 0;
1.109 daniel 4953: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 4954: return;
1.3 veillard 4955: }
1.110 daniel 4956: s = CUR;
1.91 daniel 4957: NEXT;
1.108 veillard 4958: cur = CUR;
4959: while (IS_CHAR(cur) &&
1.110 daniel 4960: ((r != ']') || (s != ']') || (cur != '>'))) {
4961: r = s;
4962: s = cur;
4963: NEXT;
1.108 veillard 4964: cur = CUR;
1.3 veillard 4965: }
1.109 daniel 4966: ctxt->instate = XML_PARSER_CONTENT;
1.40 daniel 4967: if (!IS_CHAR(CUR)) {
1.55 daniel 4968: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4969: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4970: ctxt->wellFormed = 0;
1.45 daniel 4971: return;
1.3 veillard 4972: }
1.107 daniel 4973: NEXT;
1.16 daniel 4974:
1.45 daniel 4975: /*
4976: * Ok the segment [base CUR_PTR] is to be consumed as chars.
4977: */
4978: if (ctxt->sax != NULL) {
1.107 daniel 4979: if (ctxt->sax->cdataBlock != NULL)
1.110 daniel 4980: ctxt->sax->cdataBlock(ctxt->userData, base, (CUR_PTR - base) - 3);
1.45 daniel 4981: }
1.2 veillard 4982: }
4983:
1.50 daniel 4984: /**
4985: * xmlParseContent:
4986: * @ctxt: an XML parser context
4987: *
4988: * Parse a content:
1.2 veillard 4989: *
1.27 daniel 4990: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 4991: */
4992:
1.55 daniel 4993: void
4994: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 4995: GROW;
1.40 daniel 4996: while ((CUR != '<') || (NXT(1) != '/')) {
4997: const CHAR *test = CUR_PTR;
1.91 daniel 4998: int cons = ctxt->input->consumed;
1.27 daniel 4999:
5000: /*
5001: * First case : a Processing Instruction.
5002: */
1.40 daniel 5003: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 5004: xmlParsePI(ctxt);
5005: }
1.72 daniel 5006:
1.27 daniel 5007: /*
5008: * Second case : a CDSection
5009: */
1.40 daniel 5010: else if ((CUR == '<') && (NXT(1) == '!') &&
5011: (NXT(2) == '[') && (NXT(3) == 'C') &&
5012: (NXT(4) == 'D') && (NXT(5) == 'A') &&
5013: (NXT(6) == 'T') && (NXT(7) == 'A') &&
5014: (NXT(8) == '[')) {
1.45 daniel 5015: xmlParseCDSect(ctxt);
1.27 daniel 5016: }
1.72 daniel 5017:
1.27 daniel 5018: /*
5019: * Third case : a comment
5020: */
1.40 daniel 5021: else if ((CUR == '<') && (NXT(1) == '!') &&
5022: (NXT(2) == '-') && (NXT(3) == '-')) {
1.72 daniel 5023: xmlParseComment(ctxt, 1);
1.97 daniel 5024: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 5025: }
1.72 daniel 5026:
1.27 daniel 5027: /*
5028: * Fourth case : a sub-element.
5029: */
1.40 daniel 5030: else if (CUR == '<') {
1.72 daniel 5031: xmlParseElement(ctxt);
1.45 daniel 5032: }
1.72 daniel 5033:
1.45 daniel 5034: /*
1.50 daniel 5035: * Fifth case : a reference. If if has not been resolved,
5036: * parsing returns it's Name, create the node
1.45 daniel 5037: */
1.97 daniel 5038:
1.45 daniel 5039: else if (CUR == '&') {
1.77 daniel 5040: xmlParseReference(ctxt);
1.27 daniel 5041: }
1.72 daniel 5042:
1.27 daniel 5043: /*
5044: * Last case, text. Note that References are handled directly.
5045: */
5046: else {
1.45 daniel 5047: xmlParseCharData(ctxt, 0);
1.3 veillard 5048: }
1.14 veillard 5049:
1.91 daniel 5050: GROW;
1.14 veillard 5051: /*
1.45 daniel 5052: * Pop-up of finished entities.
1.14 veillard 5053: */
1.69 daniel 5054: while ((CUR == 0) && (ctxt->inputNr > 1))
5055: xmlPopInput(ctxt);
1.45 daniel 5056:
1.91 daniel 5057: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
1.55 daniel 5058: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5059: ctxt->sax->error(ctxt->userData,
1.59 daniel 5060: "detected an error in element content\n");
5061: ctxt->wellFormed = 0;
1.29 daniel 5062: break;
5063: }
1.3 veillard 5064: }
1.2 veillard 5065: }
5066:
1.50 daniel 5067: /**
5068: * xmlParseElement:
5069: * @ctxt: an XML parser context
5070: *
5071: * parse an XML element, this is highly recursive
1.26 daniel 5072: *
5073: * [39] element ::= EmptyElemTag | STag content ETag
5074: *
1.98 daniel 5075: * [ WFC: Element Type Match ]
5076: * The Name in an element's end-tag must match the element type in the
5077: * start-tag.
5078: *
5079: * [ VC: Element Valid ]
5080: * TODO An element is valid if there is a declaration matching elementdecl
1.99 daniel 5081: * where the Name matches the element type and one of the following holds:
5082: * - The declaration matches EMPTY and the element has no content.
5083: * - The declaration matches children and the sequence of child elements
5084: * belongs to the language generated by the regular expression in the
5085: * content model, with optional white space (characters matching the
5086: * nonterminal S) between each pair of child elements.
5087: * - The declaration matches Mixed and the content consists of character
5088: * data and child elements whose types match names in the content model.
5089: * - The declaration matches ANY, and the types of any child elements have
5090: * been declared.
1.2 veillard 5091: */
1.26 daniel 5092:
1.72 daniel 5093: void
1.69 daniel 5094: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.40 daniel 5095: const CHAR *openTag = CUR_PTR;
1.83 daniel 5096: CHAR *name;
1.32 daniel 5097: xmlParserNodeInfo node_info;
1.2 veillard 5098:
1.32 daniel 5099: /* Capture start position */
1.40 daniel 5100: node_info.begin_pos = CUR_PTR - ctxt->input->base;
5101: node_info.begin_line = ctxt->input->line;
1.32 daniel 5102:
1.83 daniel 5103: name = xmlParseStartTag(ctxt);
5104: if (name == NULL) {
5105: return;
5106: }
1.2 veillard 5107:
5108: /*
1.99 daniel 5109: * [ VC: Root Element Type ]
5110: * The Name in the document type declaration must match the element
5111: * type of the root element.
5112: */
1.105 daniel 5113: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
5114: ctxt->node && (ctxt->node == ctxt->myDoc->root))
1.102 daniel 5115: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 5116:
5117: /*
1.2 veillard 5118: * Check for an Empty Element.
5119: */
1.40 daniel 5120: if ((CUR == '/') && (NXT(1) == '>')) {
5121: SKIP(2);
1.72 daniel 5122: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 5123: ctxt->sax->endElement(ctxt->userData, name);
5124: free(name);
1.72 daniel 5125: return;
1.2 veillard 5126: }
1.91 daniel 5127: if (CUR == '>') {
5128: NEXT;
5129: } else {
1.55 daniel 5130: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5131: ctxt->sax->error(ctxt->userData, "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 5132: openTag);
1.59 daniel 5133: ctxt->wellFormed = 0;
1.45 daniel 5134:
5135: /*
5136: * end of parsing of this node.
5137: */
5138: nodePop(ctxt);
1.83 daniel 5139: free(name);
1.72 daniel 5140: return;
1.2 veillard 5141: }
5142:
5143: /*
5144: * Parse the content of the element:
5145: */
1.45 daniel 5146: xmlParseContent(ctxt);
1.40 daniel 5147: if (!IS_CHAR(CUR)) {
1.55 daniel 5148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5149: ctxt->sax->error(ctxt->userData,
1.57 daniel 5150: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 5151: ctxt->wellFormed = 0;
1.45 daniel 5152:
5153: /*
5154: * end of parsing of this node.
5155: */
5156: nodePop(ctxt);
1.83 daniel 5157: free(name);
1.72 daniel 5158: return;
1.2 veillard 5159: }
5160:
5161: /*
1.27 daniel 5162: * parse the end of tag: '</' should be here.
1.2 veillard 5163: */
1.83 daniel 5164: xmlParseEndTag(ctxt, name);
5165: free(name);
1.2 veillard 5166: }
5167:
1.50 daniel 5168: /**
5169: * xmlParseVersionNum:
5170: * @ctxt: an XML parser context
5171: *
5172: * parse the XML version value.
1.29 daniel 5173: *
5174: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 5175: *
5176: * Returns the string giving the XML version number, or NULL
1.29 daniel 5177: */
1.55 daniel 5178: CHAR *
5179: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 5180: const CHAR *q = CUR_PTR;
1.29 daniel 5181: CHAR *ret;
5182:
1.40 daniel 5183: while (IS_CHAR(CUR) &&
5184: (((CUR >= 'a') && (CUR <= 'z')) ||
5185: ((CUR >= 'A') && (CUR <= 'Z')) ||
5186: ((CUR >= '0') && (CUR <= '9')) ||
5187: (CUR == '_') || (CUR == '.') ||
5188: (CUR == ':') || (CUR == '-'))) NEXT;
5189: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5190: return(ret);
5191: }
5192:
1.50 daniel 5193: /**
5194: * xmlParseVersionInfo:
5195: * @ctxt: an XML parser context
5196: *
5197: * parse the XML version.
1.29 daniel 5198: *
5199: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
5200: *
5201: * [25] Eq ::= S? '=' S?
1.50 daniel 5202: *
1.68 daniel 5203: * Returns the version string, e.g. "1.0"
1.29 daniel 5204: */
5205:
1.55 daniel 5206: CHAR *
5207: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 5208: CHAR *version = NULL;
5209: const CHAR *q;
5210:
1.40 daniel 5211: if ((CUR == 'v') && (NXT(1) == 'e') &&
5212: (NXT(2) == 'r') && (NXT(3) == 's') &&
5213: (NXT(4) == 'i') && (NXT(5) == 'o') &&
5214: (NXT(6) == 'n')) {
5215: SKIP(7);
1.42 daniel 5216: SKIP_BLANKS;
1.40 daniel 5217: if (CUR != '=') {
1.55 daniel 5218: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5219: ctxt->sax->error(ctxt->userData, "xmlParseVersionInfo : expected '='\n");
1.59 daniel 5220: ctxt->wellFormed = 0;
1.31 daniel 5221: return(NULL);
5222: }
1.40 daniel 5223: NEXT;
1.42 daniel 5224: SKIP_BLANKS;
1.40 daniel 5225: if (CUR == '"') {
5226: NEXT;
5227: q = CUR_PTR;
1.29 daniel 5228: version = xmlParseVersionNum(ctxt);
1.55 daniel 5229: if (CUR != '"') {
5230: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5231: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5232: ctxt->wellFormed = 0;
1.55 daniel 5233: } else
1.40 daniel 5234: NEXT;
5235: } else if (CUR == '\''){
5236: NEXT;
5237: q = CUR_PTR;
1.29 daniel 5238: version = xmlParseVersionNum(ctxt);
1.55 daniel 5239: if (CUR != '\'') {
5240: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5241: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5242: ctxt->wellFormed = 0;
1.55 daniel 5243: } else
1.40 daniel 5244: NEXT;
1.31 daniel 5245: } else {
1.55 daniel 5246: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5247: ctxt->sax->error(ctxt->userData,
1.59 daniel 5248: "xmlParseVersionInfo : expected ' or \"\n");
5249: ctxt->wellFormed = 0;
1.29 daniel 5250: }
5251: }
5252: return(version);
5253: }
5254:
1.50 daniel 5255: /**
5256: * xmlParseEncName:
5257: * @ctxt: an XML parser context
5258: *
5259: * parse the XML encoding name
1.29 daniel 5260: *
5261: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 5262: *
1.68 daniel 5263: * Returns the encoding name value or NULL
1.29 daniel 5264: */
1.55 daniel 5265: CHAR *
5266: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 5267: const CHAR *q = CUR_PTR;
1.29 daniel 5268: CHAR *ret = NULL;
5269:
1.40 daniel 5270: if (((CUR >= 'a') && (CUR <= 'z')) ||
5271: ((CUR >= 'A') && (CUR <= 'Z'))) {
5272: NEXT;
5273: while (IS_CHAR(CUR) &&
5274: (((CUR >= 'a') && (CUR <= 'z')) ||
5275: ((CUR >= 'A') && (CUR <= 'Z')) ||
5276: ((CUR >= '0') && (CUR <= '9')) ||
5277: (CUR == '-'))) NEXT;
5278: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5279: } else {
1.55 daniel 5280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5281: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 5282: ctxt->wellFormed = 0;
1.29 daniel 5283: }
5284: return(ret);
5285: }
5286:
1.50 daniel 5287: /**
5288: * xmlParseEncodingDecl:
5289: * @ctxt: an XML parser context
5290: *
5291: * parse the XML encoding declaration
1.29 daniel 5292: *
5293: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 5294: *
5295: * TODO: this should setup the conversion filters.
5296: *
1.68 daniel 5297: * Returns the encoding value or NULL
1.29 daniel 5298: */
5299:
1.55 daniel 5300: CHAR *
5301: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5302: CHAR *encoding = NULL;
5303: const CHAR *q;
5304:
1.42 daniel 5305: SKIP_BLANKS;
1.40 daniel 5306: if ((CUR == 'e') && (NXT(1) == 'n') &&
5307: (NXT(2) == 'c') && (NXT(3) == 'o') &&
5308: (NXT(4) == 'd') && (NXT(5) == 'i') &&
5309: (NXT(6) == 'n') && (NXT(7) == 'g')) {
5310: SKIP(8);
1.42 daniel 5311: SKIP_BLANKS;
1.40 daniel 5312: if (CUR != '=') {
1.55 daniel 5313: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5314: ctxt->sax->error(ctxt->userData, "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 5315: ctxt->wellFormed = 0;
1.31 daniel 5316: return(NULL);
5317: }
1.40 daniel 5318: NEXT;
1.42 daniel 5319: SKIP_BLANKS;
1.40 daniel 5320: if (CUR == '"') {
5321: NEXT;
5322: q = CUR_PTR;
1.29 daniel 5323: encoding = xmlParseEncName(ctxt);
1.55 daniel 5324: if (CUR != '"') {
5325: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5326: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5327: ctxt->wellFormed = 0;
1.55 daniel 5328: } else
1.40 daniel 5329: NEXT;
5330: } else if (CUR == '\''){
5331: NEXT;
5332: q = CUR_PTR;
1.29 daniel 5333: encoding = xmlParseEncName(ctxt);
1.55 daniel 5334: if (CUR != '\'') {
5335: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5336: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5337: ctxt->wellFormed = 0;
1.55 daniel 5338: } else
1.40 daniel 5339: NEXT;
5340: } else if (CUR == '"'){
1.55 daniel 5341: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5342: ctxt->sax->error(ctxt->userData,
1.59 daniel 5343: "xmlParseEncodingDecl : expected ' or \"\n");
5344: ctxt->wellFormed = 0;
1.29 daniel 5345: }
5346: }
5347: return(encoding);
5348: }
5349:
1.50 daniel 5350: /**
5351: * xmlParseSDDecl:
5352: * @ctxt: an XML parser context
5353: *
5354: * parse the XML standalone declaration
1.29 daniel 5355: *
5356: * [32] SDDecl ::= S 'standalone' Eq
5357: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 5358: *
5359: * [ VC: Standalone Document Declaration ]
5360: * TODO The standalone document declaration must have the value "no"
5361: * if any external markup declarations contain declarations of:
5362: * - attributes with default values, if elements to which these
5363: * attributes apply appear in the document without specifications
5364: * of values for these attributes, or
5365: * - entities (other than amp, lt, gt, apos, quot), if references
5366: * to those entities appear in the document, or
5367: * - attributes with values subject to normalization, where the
5368: * attribute appears in the document with a value which will change
5369: * as a result of normalization, or
5370: * - element types with element content, if white space occurs directly
5371: * within any instance of those types.
1.68 daniel 5372: *
5373: * Returns 1 if standalone, 0 otherwise
1.29 daniel 5374: */
5375:
1.55 daniel 5376: int
5377: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5378: int standalone = -1;
5379:
1.42 daniel 5380: SKIP_BLANKS;
1.40 daniel 5381: if ((CUR == 's') && (NXT(1) == 't') &&
5382: (NXT(2) == 'a') && (NXT(3) == 'n') &&
5383: (NXT(4) == 'd') && (NXT(5) == 'a') &&
5384: (NXT(6) == 'l') && (NXT(7) == 'o') &&
5385: (NXT(8) == 'n') && (NXT(9) == 'e')) {
5386: SKIP(10);
1.81 daniel 5387: SKIP_BLANKS;
1.40 daniel 5388: if (CUR != '=') {
1.55 daniel 5389: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5390: ctxt->sax->error(ctxt->userData,
1.59 daniel 5391: "XML standalone declaration : expected '='\n");
5392: ctxt->wellFormed = 0;
1.32 daniel 5393: return(standalone);
5394: }
1.40 daniel 5395: NEXT;
1.42 daniel 5396: SKIP_BLANKS;
1.40 daniel 5397: if (CUR == '\''){
5398: NEXT;
5399: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5400: standalone = 0;
1.40 daniel 5401: SKIP(2);
5402: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5403: (NXT(2) == 's')) {
1.29 daniel 5404: standalone = 1;
1.40 daniel 5405: SKIP(3);
1.29 daniel 5406: } else {
1.55 daniel 5407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5408: ctxt->sax->error(ctxt->userData, "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 5409: ctxt->wellFormed = 0;
1.29 daniel 5410: }
1.55 daniel 5411: if (CUR != '\'') {
5412: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5413: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5414: ctxt->wellFormed = 0;
1.55 daniel 5415: } else
1.40 daniel 5416: NEXT;
5417: } else if (CUR == '"'){
5418: NEXT;
5419: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5420: standalone = 0;
1.40 daniel 5421: SKIP(2);
5422: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5423: (NXT(2) == 's')) {
1.29 daniel 5424: standalone = 1;
1.40 daniel 5425: SKIP(3);
1.29 daniel 5426: } else {
1.55 daniel 5427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5428: ctxt->sax->error(ctxt->userData,
1.59 daniel 5429: "standalone accepts only 'yes' or 'no'\n");
5430: ctxt->wellFormed = 0;
1.29 daniel 5431: }
1.55 daniel 5432: if (CUR != '"') {
5433: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5434: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5435: ctxt->wellFormed = 0;
1.55 daniel 5436: } else
1.40 daniel 5437: NEXT;
1.37 daniel 5438: } else {
1.55 daniel 5439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5440: ctxt->sax->error(ctxt->userData, "Standalone value not found\n");
1.59 daniel 5441: ctxt->wellFormed = 0;
1.37 daniel 5442: }
1.29 daniel 5443: }
5444: return(standalone);
5445: }
5446:
1.50 daniel 5447: /**
5448: * xmlParseXMLDecl:
5449: * @ctxt: an XML parser context
5450: *
5451: * parse an XML declaration header
1.29 daniel 5452: *
5453: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 5454: */
5455:
1.55 daniel 5456: void
5457: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 5458: CHAR *version;
5459:
5460: /*
1.19 daniel 5461: * We know that '<?xml' is here.
1.1 veillard 5462: */
1.40 daniel 5463: SKIP(5);
1.1 veillard 5464:
1.59 daniel 5465: if (!IS_BLANK(CUR)) {
5466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5467: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 5468: ctxt->wellFormed = 0;
5469: }
1.42 daniel 5470: SKIP_BLANKS;
1.1 veillard 5471:
5472: /*
1.29 daniel 5473: * We should have the VersionInfo here.
1.1 veillard 5474: */
1.29 daniel 5475: version = xmlParseVersionInfo(ctxt);
5476: if (version == NULL)
1.45 daniel 5477: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 5478: ctxt->version = xmlStrdup(version);
1.45 daniel 5479: free(version);
1.29 daniel 5480:
5481: /*
5482: * We may have the encoding declaration
5483: */
1.59 daniel 5484: if (!IS_BLANK(CUR)) {
5485: if ((CUR == '?') && (NXT(1) == '>')) {
5486: SKIP(2);
5487: return;
5488: }
5489: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5490: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5491: ctxt->wellFormed = 0;
5492: }
1.72 daniel 5493: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 5494:
5495: /*
1.29 daniel 5496: * We may have the standalone status.
1.1 veillard 5497: */
1.72 daniel 5498: if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
1.59 daniel 5499: if ((CUR == '?') && (NXT(1) == '>')) {
5500: SKIP(2);
5501: return;
5502: }
5503: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5504: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5505: ctxt->wellFormed = 0;
5506: }
5507: SKIP_BLANKS;
1.72 daniel 5508: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 5509:
1.42 daniel 5510: SKIP_BLANKS;
1.40 daniel 5511: if ((CUR == '?') && (NXT(1) == '>')) {
5512: SKIP(2);
5513: } else if (CUR == '>') {
1.31 daniel 5514: /* Deprecated old WD ... */
1.55 daniel 5515: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5516: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
1.59 daniel 5517: ctxt->wellFormed = 0;
1.40 daniel 5518: NEXT;
1.29 daniel 5519: } else {
1.55 daniel 5520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5521: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
1.59 daniel 5522: ctxt->wellFormed = 0;
1.40 daniel 5523: MOVETO_ENDTAG(CUR_PTR);
5524: NEXT;
1.29 daniel 5525: }
1.1 veillard 5526: }
5527:
1.50 daniel 5528: /**
5529: * xmlParseMisc:
5530: * @ctxt: an XML parser context
5531: *
5532: * parse an XML Misc* optionnal field.
1.21 daniel 5533: *
1.22 daniel 5534: * [27] Misc ::= Comment | PI | S
1.1 veillard 5535: */
5536:
1.55 daniel 5537: void
5538: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 5539: while (((CUR == '<') && (NXT(1) == '?')) ||
5540: ((CUR == '<') && (NXT(1) == '!') &&
5541: (NXT(2) == '-') && (NXT(3) == '-')) ||
5542: IS_BLANK(CUR)) {
5543: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 5544: xmlParsePI(ctxt);
1.40 daniel 5545: } else if (IS_BLANK(CUR)) {
5546: NEXT;
1.1 veillard 5547: } else
1.31 daniel 5548: xmlParseComment(ctxt, 0);
1.1 veillard 5549: }
5550: }
5551:
1.50 daniel 5552: /**
5553: * xmlParseDocument :
5554: * @ctxt: an XML parser context
5555: *
5556: * parse an XML document (and build a tree if using the standard SAX
5557: * interface).
1.21 daniel 5558: *
1.22 daniel 5559: * [1] document ::= prolog element Misc*
1.29 daniel 5560: *
5561: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 5562: *
1.68 daniel 5563: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 5564: * as a result of the parsing.
1.1 veillard 5565: */
5566:
1.55 daniel 5567: int
5568: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 5569: xmlDefaultSAXHandlerInit();
5570:
1.91 daniel 5571: GROW;
5572:
1.14 veillard 5573: /*
1.44 daniel 5574: * SAX: beginning of the document processing.
5575: */
1.72 daniel 5576: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 5577: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 5578:
5579: /*
1.14 veillard 5580: * We should check for encoding here and plug-in some
5581: * conversion code TODO !!!!
5582: */
1.1 veillard 5583:
5584: /*
5585: * Wipe out everything which is before the first '<'
5586: */
1.59 daniel 5587: if (IS_BLANK(CUR)) {
5588: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5589: ctxt->sax->error(ctxt->userData,
1.59 daniel 5590: "Extra spaces at the beginning of the document are not allowed\n");
5591: ctxt->wellFormed = 0;
5592: SKIP_BLANKS;
5593: }
5594:
5595: if (CUR == 0) {
5596: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5597: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 5598: ctxt->wellFormed = 0;
5599: }
1.1 veillard 5600:
5601: /*
5602: * Check for the XMLDecl in the Prolog.
5603: */
1.91 daniel 5604: GROW;
1.40 daniel 5605: if ((CUR == '<') && (NXT(1) == '?') &&
5606: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5607: (NXT(4) == 'l')) {
1.19 daniel 5608: xmlParseXMLDecl(ctxt);
5609: /* SKIP_EOL(cur); */
1.42 daniel 5610: SKIP_BLANKS;
1.40 daniel 5611: } else if ((CUR == '<') && (NXT(1) == '?') &&
5612: (NXT(2) == 'X') && (NXT(3) == 'M') &&
5613: (NXT(4) == 'L')) {
1.19 daniel 5614: /*
5615: * The first drafts were using <?XML and the final W3C REC
5616: * now use <?xml ...
5617: */
1.16 daniel 5618: xmlParseXMLDecl(ctxt);
1.1 veillard 5619: /* SKIP_EOL(cur); */
1.42 daniel 5620: SKIP_BLANKS;
1.1 veillard 5621: } else {
1.72 daniel 5622: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 5623: }
1.72 daniel 5624: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 5625: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 5626:
5627: /*
5628: * The Misc part of the Prolog
5629: */
1.91 daniel 5630: GROW;
1.16 daniel 5631: xmlParseMisc(ctxt);
1.1 veillard 5632:
5633: /*
1.29 daniel 5634: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 5635: * (doctypedecl Misc*)?
5636: */
1.91 daniel 5637: GROW;
1.40 daniel 5638: if ((CUR == '<') && (NXT(1) == '!') &&
5639: (NXT(2) == 'D') && (NXT(3) == 'O') &&
5640: (NXT(4) == 'C') && (NXT(5) == 'T') &&
5641: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
5642: (NXT(8) == 'E')) {
1.22 daniel 5643: xmlParseDocTypeDecl(ctxt);
1.96 daniel 5644: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 5645: xmlParseMisc(ctxt);
1.21 daniel 5646: }
5647:
5648: /*
5649: * Time to start parsing the tree itself
1.1 veillard 5650: */
1.91 daniel 5651: GROW;
1.96 daniel 5652: ctxt->instate = XML_PARSER_CONTENT;
1.72 daniel 5653: xmlParseElement(ctxt);
1.96 daniel 5654: ctxt->instate = XML_PARSER_EPILOG;
1.33 daniel 5655:
5656: /*
5657: * The Misc part at the end
5658: */
5659: xmlParseMisc(ctxt);
1.16 daniel 5660:
1.59 daniel 5661: if (CUR != 0) {
5662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5663: ctxt->sax->error(ctxt->userData,
1.59 daniel 5664: "Extra content at the end of the document\n");
5665: ctxt->wellFormed = 0;
5666: }
1.96 daniel 5667: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 5668:
1.44 daniel 5669: /*
5670: * SAX: end of the document processing.
5671: */
1.72 daniel 5672: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 5673: ctxt->sax->endDocument(ctxt->userData);
1.59 daniel 5674: if (! ctxt->wellFormed) return(-1);
1.16 daniel 5675: return(0);
5676: }
5677:
1.98 daniel 5678: /************************************************************************
5679: * *
5680: * I/O front end functions to the parser *
5681: * *
5682: ************************************************************************/
5683:
1.50 daniel 5684: /**
1.86 daniel 5685: * xmlCreateDocParserCtxt :
1.50 daniel 5686: * @cur: a pointer to an array of CHAR
5687: *
1.69 daniel 5688: * Create a parser context for an XML in-memory document.
5689: *
5690: * Returns the new parser context or NULL
1.16 daniel 5691: */
1.69 daniel 5692: xmlParserCtxtPtr
5693: xmlCreateDocParserCtxt(CHAR *cur) {
1.16 daniel 5694: xmlParserCtxtPtr ctxt;
1.40 daniel 5695: xmlParserInputPtr input;
1.75 daniel 5696: xmlCharEncoding enc;
1.16 daniel 5697:
1.97 daniel 5698: ctxt = xmlNewParserCtxt();
1.16 daniel 5699: if (ctxt == NULL) {
5700: return(NULL);
5701: }
1.96 daniel 5702: input = xmlNewInputStream(ctxt);
1.40 daniel 5703: if (input == NULL) {
1.97 daniel 5704: xmlFreeParserCtxt(ctxt);
1.40 daniel 5705: return(NULL);
5706: }
5707:
1.75 daniel 5708: /*
5709: * plug some encoding conversion routines here. !!!
5710: */
5711: enc = xmlDetectCharEncoding(cur);
5712: xmlSwitchEncoding(ctxt, enc);
5713:
1.40 daniel 5714: input->base = cur;
5715: input->cur = cur;
5716:
5717: inputPush(ctxt, input);
1.69 daniel 5718: return(ctxt);
5719: }
5720:
5721: /**
5722: * xmlSAXParseDoc :
5723: * @sax: the SAX handler block
5724: * @cur: a pointer to an array of CHAR
5725: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5726: * documents
5727: *
5728: * parse an XML in-memory document and build a tree.
5729: * It use the given SAX function block to handle the parsing callback.
5730: * If sax is NULL, fallback to the default DOM tree building routines.
5731: *
5732: * Returns the resulting document tree
5733: */
5734:
5735: xmlDocPtr
5736: xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
5737: xmlDocPtr ret;
5738: xmlParserCtxtPtr ctxt;
5739:
5740: if (cur == NULL) return(NULL);
1.16 daniel 5741:
5742:
1.69 daniel 5743: ctxt = xmlCreateDocParserCtxt(cur);
5744: if (ctxt == NULL) return(NULL);
1.74 daniel 5745: if (sax != NULL) {
5746: ctxt->sax = sax;
5747: ctxt->userData = NULL;
5748: }
1.69 daniel 5749:
1.16 daniel 5750: xmlParseDocument(ctxt);
1.72 daniel 5751: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5752: else {
5753: ret = NULL;
1.72 daniel 5754: xmlFreeDoc(ctxt->myDoc);
5755: ctxt->myDoc = NULL;
1.59 daniel 5756: }
1.86 daniel 5757: if (sax != NULL)
5758: ctxt->sax = NULL;
1.69 daniel 5759: xmlFreeParserCtxt(ctxt);
1.16 daniel 5760:
1.1 veillard 5761: return(ret);
5762: }
5763:
1.50 daniel 5764: /**
1.55 daniel 5765: * xmlParseDoc :
5766: * @cur: a pointer to an array of CHAR
5767: *
5768: * parse an XML in-memory document and build a tree.
5769: *
1.68 daniel 5770: * Returns the resulting document tree
1.55 daniel 5771: */
5772:
1.69 daniel 5773: xmlDocPtr
5774: xmlParseDoc(CHAR *cur) {
1.59 daniel 5775: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 5776: }
5777:
5778: /**
5779: * xmlSAXParseDTD :
5780: * @sax: the SAX handler block
5781: * @ExternalID: a NAME* containing the External ID of the DTD
5782: * @SystemID: a NAME* containing the URL to the DTD
5783: *
5784: * Load and parse an external subset.
5785: *
5786: * Returns the resulting xmlDtdPtr or NULL in case of error.
5787: */
5788:
5789: xmlDtdPtr
5790: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const CHAR *ExternalID,
5791: const CHAR *SystemID) {
5792: xmlDtdPtr ret = NULL;
5793: xmlParserCtxtPtr ctxt;
1.83 daniel 5794: xmlParserInputPtr input = NULL;
1.76 daniel 5795: xmlCharEncoding enc;
5796:
5797: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
5798:
1.97 daniel 5799: ctxt = xmlNewParserCtxt();
1.76 daniel 5800: if (ctxt == NULL) {
5801: return(NULL);
5802: }
5803:
5804: /*
5805: * Set-up the SAX context
5806: */
5807: if (ctxt == NULL) return(NULL);
5808: if (sax != NULL) {
1.93 veillard 5809: if (ctxt->sax != NULL)
5810: free(ctxt->sax);
1.76 daniel 5811: ctxt->sax = sax;
5812: ctxt->userData = NULL;
5813: }
5814:
5815: /*
5816: * Ask the Entity resolver to load the damn thing
5817: */
5818:
5819: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
5820: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
5821: if (input == NULL) {
1.86 daniel 5822: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5823: xmlFreeParserCtxt(ctxt);
5824: return(NULL);
5825: }
5826:
5827: /*
5828: * plug some encoding conversion routines here. !!!
5829: */
5830: xmlPushInput(ctxt, input);
5831: enc = xmlDetectCharEncoding(ctxt->input->cur);
5832: xmlSwitchEncoding(ctxt, enc);
5833:
1.95 veillard 5834: if (input->filename == NULL)
5835: input->filename = xmlStrdup(SystemID);
1.76 daniel 5836: input->line = 1;
5837: input->col = 1;
5838: input->base = ctxt->input->cur;
5839: input->cur = ctxt->input->cur;
5840: input->free = NULL;
5841:
5842: /*
5843: * let's parse that entity knowing it's an external subset.
5844: */
1.79 daniel 5845: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 5846:
5847: if (ctxt->myDoc != NULL) {
5848: if (ctxt->wellFormed) {
5849: ret = ctxt->myDoc->intSubset;
5850: ctxt->myDoc->intSubset = NULL;
5851: } else {
5852: ret = NULL;
5853: }
5854: xmlFreeDoc(ctxt->myDoc);
5855: ctxt->myDoc = NULL;
5856: }
1.86 daniel 5857: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5858: xmlFreeParserCtxt(ctxt);
5859:
5860: return(ret);
5861: }
5862:
5863: /**
5864: * xmlParseDTD :
5865: * @ExternalID: a NAME* containing the External ID of the DTD
5866: * @SystemID: a NAME* containing the URL to the DTD
5867: *
5868: * Load and parse an external subset.
5869: *
5870: * Returns the resulting xmlDtdPtr or NULL in case of error.
5871: */
5872:
5873: xmlDtdPtr
5874: xmlParseDTD(const CHAR *ExternalID, const CHAR *SystemID) {
5875: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 5876: }
5877:
5878: /**
5879: * xmlRecoverDoc :
5880: * @cur: a pointer to an array of CHAR
5881: *
5882: * parse an XML in-memory document and build a tree.
5883: * In the case the document is not Well Formed, a tree is built anyway
5884: *
1.68 daniel 5885: * Returns the resulting document tree
1.59 daniel 5886: */
5887:
1.69 daniel 5888: xmlDocPtr
5889: xmlRecoverDoc(CHAR *cur) {
1.59 daniel 5890: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 5891: }
5892:
5893: /**
1.69 daniel 5894: * xmlCreateFileParserCtxt :
1.50 daniel 5895: * @filename: the filename
5896: *
1.69 daniel 5897: * Create a parser context for a file content.
5898: * Automatic support for ZLIB/Compress compressed document is provided
5899: * by default if found at compile-time.
1.50 daniel 5900: *
1.69 daniel 5901: * Returns the new parser context or NULL
1.9 httpng 5902: */
1.69 daniel 5903: xmlParserCtxtPtr
5904: xmlCreateFileParserCtxt(const char *filename)
5905: {
5906: xmlParserCtxtPtr ctxt;
1.40 daniel 5907: xmlParserInputPtr inputStream;
1.91 daniel 5908: xmlParserInputBufferPtr buf;
1.111 ! daniel 5909: char *directory = NULL;
1.9 httpng 5910:
1.91 daniel 5911: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
5912: if (buf == NULL) return(NULL);
1.9 httpng 5913:
1.97 daniel 5914: ctxt = xmlNewParserCtxt();
1.16 daniel 5915: if (ctxt == NULL) {
5916: return(NULL);
5917: }
1.97 daniel 5918:
1.96 daniel 5919: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 5920: if (inputStream == NULL) {
1.97 daniel 5921: xmlFreeParserCtxt(ctxt);
1.40 daniel 5922: return(NULL);
5923: }
5924:
5925: inputStream->filename = strdup(filename);
1.91 daniel 5926: inputStream->buf = buf;
5927: inputStream->base = inputStream->buf->buffer->content;
5928: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 5929:
1.40 daniel 5930: inputPush(ctxt, inputStream);
1.110 daniel 5931: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 5932: directory = xmlParserGetDirectory(filename);
5933: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 5934: ctxt->directory = directory;
1.106 daniel 5935:
1.69 daniel 5936: return(ctxt);
5937: }
5938:
5939: /**
5940: * xmlSAXParseFile :
5941: * @sax: the SAX handler block
5942: * @filename: the filename
5943: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5944: * documents
5945: *
5946: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5947: * compressed document is provided by default if found at compile-time.
5948: * It use the given SAX function block to handle the parsing callback.
5949: * If sax is NULL, fallback to the default DOM tree building routines.
5950: *
5951: * Returns the resulting document tree
5952: */
5953:
1.79 daniel 5954: xmlDocPtr
5955: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 5956: int recovery) {
5957: xmlDocPtr ret;
5958: xmlParserCtxtPtr ctxt;
1.111 ! daniel 5959: char *directory = NULL;
1.69 daniel 5960:
5961: ctxt = xmlCreateFileParserCtxt(filename);
5962: if (ctxt == NULL) return(NULL);
1.74 daniel 5963: if (sax != NULL) {
1.93 veillard 5964: if (ctxt->sax != NULL)
5965: free(ctxt->sax);
1.74 daniel 5966: ctxt->sax = sax;
5967: ctxt->userData = NULL;
5968: }
1.106 daniel 5969:
1.110 daniel 5970: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 5971: directory = xmlParserGetDirectory(filename);
5972: if ((ctxt->directory == NULL) && (directory != NULL))
5973: ctxt->directory = xmlStrdup(directory);
1.16 daniel 5974:
5975: xmlParseDocument(ctxt);
1.40 daniel 5976:
1.72 daniel 5977: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5978: else {
5979: ret = NULL;
1.72 daniel 5980: xmlFreeDoc(ctxt->myDoc);
5981: ctxt->myDoc = NULL;
1.59 daniel 5982: }
1.86 daniel 5983: if (sax != NULL)
5984: ctxt->sax = NULL;
1.69 daniel 5985: xmlFreeParserCtxt(ctxt);
1.20 daniel 5986:
5987: return(ret);
5988: }
5989:
1.55 daniel 5990: /**
5991: * xmlParseFile :
5992: * @filename: the filename
5993: *
5994: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5995: * compressed document is provided by default if found at compile-time.
5996: *
1.68 daniel 5997: * Returns the resulting document tree
1.55 daniel 5998: */
5999:
1.79 daniel 6000: xmlDocPtr
6001: xmlParseFile(const char *filename) {
1.59 daniel 6002: return(xmlSAXParseFile(NULL, filename, 0));
6003: }
6004:
6005: /**
6006: * xmlRecoverFile :
6007: * @filename: the filename
6008: *
6009: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6010: * compressed document is provided by default if found at compile-time.
6011: * In the case the document is not Well Formed, a tree is built anyway
6012: *
1.68 daniel 6013: * Returns the resulting document tree
1.59 daniel 6014: */
6015:
1.79 daniel 6016: xmlDocPtr
6017: xmlRecoverFile(const char *filename) {
1.59 daniel 6018: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 6019: }
1.32 daniel 6020:
1.50 daniel 6021: /**
1.69 daniel 6022: * xmlCreateMemoryParserCtxt :
1.68 daniel 6023: * @buffer: an pointer to a char array
1.50 daniel 6024: * @size: the siwe of the array
6025: *
1.69 daniel 6026: * Create a parser context for an XML in-memory document.
1.50 daniel 6027: *
1.69 daniel 6028: * Returns the new parser context or NULL
1.20 daniel 6029: */
1.69 daniel 6030: xmlParserCtxtPtr
6031: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 6032: xmlParserCtxtPtr ctxt;
1.40 daniel 6033: xmlParserInputPtr input;
1.75 daniel 6034: xmlCharEncoding enc;
1.40 daniel 6035:
6036: buffer[size - 1] = '\0';
6037:
1.97 daniel 6038: ctxt = xmlNewParserCtxt();
1.20 daniel 6039: if (ctxt == NULL) {
6040: return(NULL);
6041: }
1.97 daniel 6042:
1.96 daniel 6043: input = xmlNewInputStream(ctxt);
1.40 daniel 6044: if (input == NULL) {
1.97 daniel 6045: xmlFreeParserCtxt(ctxt);
1.40 daniel 6046: return(NULL);
6047: }
1.20 daniel 6048:
1.40 daniel 6049: input->filename = NULL;
6050: input->line = 1;
6051: input->col = 1;
1.96 daniel 6052: input->buf = NULL;
1.91 daniel 6053: input->consumed = 0;
1.45 daniel 6054:
6055: /*
1.75 daniel 6056: * plug some encoding conversion routines here. !!!
1.45 daniel 6057: */
1.75 daniel 6058: enc = xmlDetectCharEncoding(buffer);
6059: xmlSwitchEncoding(ctxt, enc);
6060:
1.40 daniel 6061: input->base = buffer;
6062: input->cur = buffer;
1.69 daniel 6063: input->free = NULL;
1.20 daniel 6064:
1.40 daniel 6065: inputPush(ctxt, input);
1.69 daniel 6066: return(ctxt);
6067: }
6068:
6069: /**
6070: * xmlSAXParseMemory :
6071: * @sax: the SAX handler block
6072: * @buffer: an pointer to a char array
6073: * @size: the siwe of the array
6074: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6075: * documents
6076: *
6077: * parse an XML in-memory block and use the given SAX function block
6078: * to handle the parsing callback. If sax is NULL, fallback to the default
6079: * DOM tree building routines.
6080: *
6081: * Returns the resulting document tree
6082: */
6083: xmlDocPtr
6084: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
6085: xmlDocPtr ret;
6086: xmlParserCtxtPtr ctxt;
6087:
6088: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6089: if (ctxt == NULL) return(NULL);
1.74 daniel 6090: if (sax != NULL) {
6091: ctxt->sax = sax;
6092: ctxt->userData = NULL;
6093: }
1.20 daniel 6094:
6095: xmlParseDocument(ctxt);
1.40 daniel 6096:
1.72 daniel 6097: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6098: else {
6099: ret = NULL;
1.72 daniel 6100: xmlFreeDoc(ctxt->myDoc);
6101: ctxt->myDoc = NULL;
1.59 daniel 6102: }
1.86 daniel 6103: if (sax != NULL)
6104: ctxt->sax = NULL;
1.69 daniel 6105: xmlFreeParserCtxt(ctxt);
1.16 daniel 6106:
1.9 httpng 6107: return(ret);
1.17 daniel 6108: }
6109:
1.55 daniel 6110: /**
6111: * xmlParseMemory :
1.68 daniel 6112: * @buffer: an pointer to a char array
1.55 daniel 6113: * @size: the size of the array
6114: *
6115: * parse an XML in-memory block and build a tree.
6116: *
1.68 daniel 6117: * Returns the resulting document tree
1.55 daniel 6118: */
6119:
6120: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 6121: return(xmlSAXParseMemory(NULL, buffer, size, 0));
6122: }
6123:
6124: /**
6125: * xmlRecoverMemory :
1.68 daniel 6126: * @buffer: an pointer to a char array
1.59 daniel 6127: * @size: the size of the array
6128: *
6129: * parse an XML in-memory block and build a tree.
6130: * In the case the document is not Well Formed, a tree is built anyway
6131: *
1.68 daniel 6132: * Returns the resulting document tree
1.59 daniel 6133: */
6134:
6135: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
6136: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 6137: }
6138:
6139:
1.50 daniel 6140: /**
6141: * xmlSetupParserForBuffer:
6142: * @ctxt: an XML parser context
6143: * @buffer: a CHAR * buffer
6144: * @filename: a file name
6145: *
1.19 daniel 6146: * Setup the parser context to parse a new buffer; Clears any prior
6147: * contents from the parser context. The buffer parameter must not be
6148: * NULL, but the filename parameter can be
6149: */
1.55 daniel 6150: void
6151: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 6152: const char* filename)
6153: {
1.96 daniel 6154: xmlParserInputPtr input;
1.40 daniel 6155:
1.96 daniel 6156: input = xmlNewInputStream(ctxt);
6157: if (input == NULL) {
6158: perror("malloc");
6159: free(ctxt);
6160: exit(1);
6161: }
6162:
6163: xmlClearParserCtxt(ctxt);
6164: if (filename != NULL)
6165: input->filename = strdup(filename);
6166: input->base = buffer;
6167: input->cur = buffer;
6168: inputPush(ctxt, input);
1.17 daniel 6169: }
6170:
1.32 daniel 6171:
1.98 daniel 6172: /************************************************************************
6173: * *
6174: * Miscelaneous *
6175: * *
6176: ************************************************************************/
6177:
6178:
1.50 daniel 6179: /**
6180: * xmlParserFindNodeInfo:
6181: * @ctxt: an XML parser context
6182: * @node: an XML node within the tree
6183: *
6184: * Find the parser node info struct for a given node
6185: *
1.68 daniel 6186: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 6187: */
6188: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
6189: const xmlNode* node)
6190: {
6191: unsigned long pos;
6192:
6193: /* Find position where node should be at */
6194: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
6195: if ( ctx->node_seq.buffer[pos].node == node )
6196: return &ctx->node_seq.buffer[pos];
6197: else
6198: return NULL;
6199: }
6200:
6201:
1.50 daniel 6202: /**
6203: * xmlInitNodeInfoSeq :
6204: * @seq: a node info sequence pointer
6205: *
6206: * -- Initialize (set to initial state) node info sequence
1.32 daniel 6207: */
1.55 daniel 6208: void
6209: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6210: {
6211: seq->length = 0;
6212: seq->maximum = 0;
6213: seq->buffer = NULL;
6214: }
6215:
1.50 daniel 6216: /**
6217: * xmlClearNodeInfoSeq :
6218: * @seq: a node info sequence pointer
6219: *
6220: * -- Clear (release memory and reinitialize) node
1.32 daniel 6221: * info sequence
6222: */
1.55 daniel 6223: void
6224: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6225: {
6226: if ( seq->buffer != NULL )
6227: free(seq->buffer);
6228: xmlInitNodeInfoSeq(seq);
6229: }
6230:
6231:
1.50 daniel 6232: /**
6233: * xmlParserFindNodeInfoIndex:
6234: * @seq: a node info sequence pointer
6235: * @node: an XML node pointer
6236: *
6237: *
1.32 daniel 6238: * xmlParserFindNodeInfoIndex : Find the index that the info record for
6239: * the given node is or should be at in a sorted sequence
1.68 daniel 6240: *
6241: * Returns a long indicating the position of the record
1.32 daniel 6242: */
6243: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
6244: const xmlNode* node)
6245: {
6246: unsigned long upper, lower, middle;
6247: int found = 0;
6248:
6249: /* Do a binary search for the key */
6250: lower = 1;
6251: upper = seq->length;
6252: middle = 0;
6253: while ( lower <= upper && !found) {
6254: middle = lower + (upper - lower) / 2;
6255: if ( node == seq->buffer[middle - 1].node )
6256: found = 1;
6257: else if ( node < seq->buffer[middle - 1].node )
6258: upper = middle - 1;
6259: else
6260: lower = middle + 1;
6261: }
6262:
6263: /* Return position */
6264: if ( middle == 0 || seq->buffer[middle - 1].node < node )
6265: return middle;
6266: else
6267: return middle - 1;
6268: }
6269:
6270:
1.50 daniel 6271: /**
6272: * xmlParserAddNodeInfo:
6273: * @ctxt: an XML parser context
1.68 daniel 6274: * @info: a node info sequence pointer
1.50 daniel 6275: *
6276: * Insert node info record into the sorted sequence
1.32 daniel 6277: */
1.55 daniel 6278: void
6279: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 6280: const xmlParserNodeInfo* info)
1.32 daniel 6281: {
6282: unsigned long pos;
6283: static unsigned int block_size = 5;
6284:
6285: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 6286: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
6287: if ( pos < ctxt->node_seq.length
6288: && ctxt->node_seq.buffer[pos].node == info->node ) {
6289: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 6290: }
6291:
6292: /* Otherwise, we need to add new node to buffer */
6293: else {
6294: /* Expand buffer by 5 if needed */
1.55 daniel 6295: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 6296: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 6297: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
6298: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 6299:
1.55 daniel 6300: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 6301: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
6302: else
1.55 daniel 6303: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 6304:
6305: if ( tmp_buffer == NULL ) {
1.55 daniel 6306: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6307: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.32 daniel 6308: return;
6309: }
1.55 daniel 6310: ctxt->node_seq.buffer = tmp_buffer;
6311: ctxt->node_seq.maximum += block_size;
1.32 daniel 6312: }
6313:
6314: /* If position is not at end, move elements out of the way */
1.55 daniel 6315: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 6316: unsigned long i;
6317:
1.55 daniel 6318: for ( i = ctxt->node_seq.length; i > pos; i-- )
6319: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 6320: }
6321:
6322: /* Copy element and increase length */
1.55 daniel 6323: ctxt->node_seq.buffer[pos] = *info;
6324: ctxt->node_seq.length++;
1.32 daniel 6325: }
6326: }
1.77 daniel 6327:
1.98 daniel 6328:
6329: /**
6330: * xmlSubstituteEntitiesDefault :
6331: * @val: int 0 or 1
6332: *
6333: * Set and return the previous value for default entity support.
6334: * Initially the parser always keep entity references instead of substituting
6335: * entity values in the output. This function has to be used to change the
6336: * default parser behaviour
6337: * SAX::subtituteEntities() has to be used for changing that on a file by
6338: * file basis.
6339: *
6340: * Returns the last value for 0 for no substitution, 1 for substitution.
6341: */
6342:
6343: int
6344: xmlSubstituteEntitiesDefault(int val) {
6345: int old = xmlSubstituteEntitiesDefaultValue;
6346:
6347: xmlSubstituteEntitiesDefaultValue = val;
6348: return(old);
6349: }
1.77 daniel 6350:
Webmaster