Annotation of XML/parser.c, revision 1.120
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.119 daniel 30: #include "xmlmemory.h"
1.14 veillard 31: #include "tree.h"
1.1 veillard 32: #include "parser.h"
1.14 veillard 33: #include "entities.h"
1.75 daniel 34: #include "encoding.h"
1.61 daniel 35: #include "valid.h"
1.69 daniel 36: #include "parserInternals.h"
1.91 daniel 37: #include "xmlIO.h"
1.1 veillard 38:
1.86 daniel 39: const char *xmlParserVersion = LIBXML_VERSION;
40:
1.91 daniel 41:
42: /************************************************************************
43: * *
44: * Input handling functions for progressive parsing *
45: * *
46: ************************************************************************/
47:
48: /* #define DEBUG_INPUT */
49:
1.110 daniel 50: #define INPUT_CHUNK 250
51: /* we need to keep enough input to show errors in context */
52: #define LINE_LEN 80
1.91 daniel 53:
54: #ifdef DEBUG_INPUT
55: #define CHECK_BUFFER(in) check_buffer(in)
56:
57: void check_buffer(xmlParserInputPtr in) {
58: if (in->base != in->buf->buffer->content) {
59: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
60: }
61: if (in->cur < in->base) {
62: fprintf(stderr, "xmlParserInput: cur < base problem\n");
63: }
64: if (in->cur > in->base + in->buf->buffer->use) {
65: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
66: }
67: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
68: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
69: in->buf->buffer->use, in->buf->buffer->size);
70: }
71:
1.110 daniel 72: #else
73: #define CHECK_BUFFER(in)
74: #endif
75:
1.91 daniel 76:
77: /**
78: * xmlParserInputRead:
79: * @in: an XML parser input
80: * @len: an indicative size for the lookahead
81: *
82: * This function refresh the input for the parser. It doesn't try to
83: * preserve pointers to the input buffer, and discard already read data
84: *
85: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
86: * end of this entity
87: */
88: int
89: xmlParserInputRead(xmlParserInputPtr in, int len) {
90: int ret;
91: int used;
92: int index;
93:
94: #ifdef DEBUG_INPUT
95: fprintf(stderr, "Read\n");
96: #endif
97: if (in->buf == NULL) return(-1);
98: if (in->base == NULL) return(-1);
99: if (in->cur == NULL) return(-1);
100: if (in->buf->buffer == NULL) return(-1);
101:
102: CHECK_BUFFER(in);
103:
104: used = in->cur - in->buf->buffer->content;
105: ret = xmlBufferShrink(in->buf->buffer, used);
106: if (ret > 0) {
107: in->cur -= ret;
108: in->consumed += ret;
109: }
110: ret = xmlParserInputBufferRead(in->buf, len);
111: if (in->base != in->buf->buffer->content) {
112: /*
113: * the buffer has been realloced
114: */
115: index = in->cur - in->base;
116: in->base = in->buf->buffer->content;
117: in->cur = &in->buf->buffer->content[index];
118: }
119:
120: CHECK_BUFFER(in);
121:
122: return(ret);
123: }
124:
125: /**
126: * xmlParserInputGrow:
127: * @in: an XML parser input
128: * @len: an indicative size for the lookahead
129: *
130: * This function increase the input for the parser. It tries to
131: * preserve pointers to the input buffer, and keep already read data
132: *
133: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
134: * end of this entity
135: */
136: int
137: xmlParserInputGrow(xmlParserInputPtr in, int len) {
138: int ret;
139: int index;
140:
141: #ifdef DEBUG_INPUT
142: fprintf(stderr, "Grow\n");
143: #endif
144: if (in->buf == NULL) return(-1);
145: if (in->base == NULL) return(-1);
146: if (in->cur == NULL) return(-1);
147: if (in->buf->buffer == NULL) return(-1);
148:
149: CHECK_BUFFER(in);
150:
151: index = in->cur - in->base;
152: if (in->buf->buffer->use > index + INPUT_CHUNK) {
153:
154: CHECK_BUFFER(in);
155:
156: return(0);
157: }
158: ret = xmlParserInputBufferGrow(in->buf, len);
159: if (in->base != in->buf->buffer->content) {
160: /*
161: * the buffer has been realloced
162: */
163: index = in->cur - in->base;
164: in->base = in->buf->buffer->content;
165: in->cur = &in->buf->buffer->content[index];
166: }
167:
168: CHECK_BUFFER(in);
169:
170: return(ret);
171: }
172:
173: /**
174: * xmlParserInputShrink:
175: * @in: an XML parser input
176: *
177: * This function removes used input for the parser.
178: */
179: void
180: xmlParserInputShrink(xmlParserInputPtr in) {
181: int used;
182: int ret;
183: int index;
184:
185: #ifdef DEBUG_INPUT
186: fprintf(stderr, "Shrink\n");
187: #endif
188: if (in->buf == NULL) return;
189: if (in->base == NULL) return;
190: if (in->cur == NULL) return;
191: if (in->buf->buffer == NULL) return;
192:
193: CHECK_BUFFER(in);
194:
195: used = in->cur - in->buf->buffer->content;
196: if (used > INPUT_CHUNK) {
1.110 daniel 197: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 198: if (ret > 0) {
199: in->cur -= ret;
200: in->consumed += ret;
201: }
202: }
203:
204: CHECK_BUFFER(in);
205:
206: if (in->buf->buffer->use > INPUT_CHUNK) {
207: return;
208: }
209: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
210: if (in->base != in->buf->buffer->content) {
211: /*
212: * the buffer has been realloced
213: */
214: index = in->cur - in->base;
215: in->base = in->buf->buffer->content;
216: in->cur = &in->buf->buffer->content[index];
217: }
218:
219: CHECK_BUFFER(in);
220: }
221:
1.45 daniel 222: /************************************************************************
223: * *
224: * Parser stacks related functions and macros *
225: * *
226: ************************************************************************/
1.79 daniel 227:
228: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 229: int xmlDoValidityCheckingDefaultValue = 0;
1.79 daniel 230:
1.1 veillard 231: /*
1.40 daniel 232: * Generic function for accessing stacks in the Parser Context
1.1 veillard 233: */
234:
1.31 daniel 235: #define PUSH_AND_POP(type, name) \
1.72 daniel 236: extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 237: if (ctxt->name##Nr >= ctxt->name##Max) { \
238: ctxt->name##Max *= 2; \
1.119 daniel 239: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 240: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
241: if (ctxt->name##Tab == NULL) { \
1.31 daniel 242: fprintf(stderr, "realloc failed !\n"); \
243: exit(1); \
244: } \
245: } \
1.40 daniel 246: ctxt->name##Tab[ctxt->name##Nr] = value; \
247: ctxt->name = value; \
248: return(ctxt->name##Nr++); \
1.31 daniel 249: } \
1.72 daniel 250: extern type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 251: type ret; \
1.40 daniel 252: if (ctxt->name##Nr <= 0) return(0); \
253: ctxt->name##Nr--; \
1.50 daniel 254: if (ctxt->name##Nr > 0) \
255: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
256: else \
257: ctxt->name = NULL; \
1.69 daniel 258: ret = ctxt->name##Tab[ctxt->name##Nr]; \
259: ctxt->name##Tab[ctxt->name##Nr] = 0; \
260: return(ret); \
1.31 daniel 261: } \
262:
1.40 daniel 263: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 264: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 265:
1.55 daniel 266: /*
267: * Macros for accessing the content. Those should be used only by the parser,
268: * and not exported.
269: *
270: * Dirty macros, i.e. one need to make assumption on the context to use them
271: *
272: * CUR_PTR return the current pointer to the CHAR to be parsed.
273: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
274: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
275: * in UNICODE mode. This should be used internally by the parser
276: * only to compare to ASCII values otherwise it would break when
277: * running with UTF-8 encoding.
278: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
279: * to compare on ASCII based substring.
280: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
281: * strings within the parser.
282: *
1.77 daniel 283: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 284: *
285: * CURRENT Returns the current char value, with the full decoding of
286: * UTF-8 if we are using this mode. It returns an int.
287: * NEXT Skip to the next character, this does the proper decoding
288: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 289: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.55 daniel 290: */
1.45 daniel 291:
1.97 daniel 292: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 293: #define SKIP(val) ctxt->input->cur += (val)
294: #define NXT(val) ctxt->input->cur[(val)]
295: #define CUR_PTR ctxt->input->cur
1.97 daniel 296: #define SHRINK xmlParserInputShrink(ctxt->input); \
297: if ((*ctxt->input->cur == 0) && \
298: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
299: xmlPopInput(ctxt)
300:
301: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
302: if ((*ctxt->input->cur == 0) && \
303: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
304: xmlPopInput(ctxt)
1.55 daniel 305:
306: #define SKIP_BLANKS \
1.101 daniel 307: do { \
308: while (IS_BLANK(CUR)) NEXT; \
309: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
310: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
311: } while (IS_BLANK(CUR));
1.55 daniel 312:
313: #define CURRENT (*ctxt->input->cur)
1.91 daniel 314: #define NEXT { \
1.97 daniel 315: if (ctxt->token != 0) ctxt->token = 0; \
316: else { \
1.91 daniel 317: if ((*ctxt->input->cur == 0) && \
318: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
319: xmlPopInput(ctxt); \
320: } else { \
321: if (*(ctxt->input->cur) == '\n') { \
322: ctxt->input->line++; ctxt->input->col = 1; \
323: } else ctxt->input->col++; \
324: ctxt->input->cur++; \
325: if (*ctxt->input->cur == 0) \
326: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.96 daniel 327: } \
328: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
329: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
1.97 daniel 330: }}
1.91 daniel 331:
1.42 daniel 332:
1.97 daniel 333: /************************************************************************
334: * *
335: * Commodity functions to handle entities processing *
336: * *
337: ************************************************************************/
1.40 daniel 338:
1.50 daniel 339: /**
340: * xmlPopInput:
341: * @ctxt: an XML parser context
342: *
1.40 daniel 343: * xmlPopInput: the current input pointed by ctxt->input came to an end
344: * pop it and return the next char.
1.45 daniel 345: *
1.68 daniel 346: * Returns the current CHAR in the parser context
1.40 daniel 347: */
1.55 daniel 348: CHAR
349: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 350: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 351: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 352: if ((*ctxt->input->cur == 0) &&
353: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
354: return(xmlPopInput(ctxt));
1.40 daniel 355: return(CUR);
356: }
357:
1.50 daniel 358: /**
359: * xmlPushInput:
360: * @ctxt: an XML parser context
361: * @input: an XML parser input fragment (entity, XML fragment ...).
362: *
1.40 daniel 363: * xmlPushInput: switch to a new input stream which is stacked on top
364: * of the previous one(s).
365: */
1.55 daniel 366: void
367: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 368: if (input == NULL) return;
369: inputPush(ctxt, input);
370: }
371:
1.50 daniel 372: /**
1.69 daniel 373: * xmlFreeInputStream:
1.101 daniel 374: * @input: an xmlP arserInputPtr
1.69 daniel 375: *
376: * Free up an input stream.
377: */
378: void
379: xmlFreeInputStream(xmlParserInputPtr input) {
380: if (input == NULL) return;
381:
1.119 daniel 382: if (input->filename != NULL) xmlFree((char *) input->filename);
383: if (input->directory != NULL) xmlFree((char *) input->directory);
1.69 daniel 384: if ((input->free != NULL) && (input->base != NULL))
1.116 daniel 385: input->free((CHAR *) input->base);
1.93 veillard 386: if (input->buf != NULL)
387: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 388: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 389: xmlFree(input);
1.69 daniel 390: }
391:
392: /**
1.96 daniel 393: * xmlNewInputStream:
394: * @ctxt: an XML parser context
395: *
396: * Create a new input stream structure
397: * Returns the new input stream or NULL
398: */
399: xmlParserInputPtr
400: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
401: xmlParserInputPtr input;
402:
1.119 daniel 403: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 404: if (input == NULL) {
405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
406: ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
407: return(NULL);
408: }
409: input->filename = NULL;
410: input->directory = NULL;
411: input->base = NULL;
412: input->cur = NULL;
413: input->buf = NULL;
414: input->line = 1;
415: input->col = 1;
416: input->buf = NULL;
417: input->free = NULL;
418: input->consumed = 0;
419: return(input);
420: }
421:
422: /**
1.50 daniel 423: * xmlNewEntityInputStream:
424: * @ctxt: an XML parser context
425: * @entity: an Entity pointer
426: *
1.82 daniel 427: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 428: *
429: * Returns the new input stream or NULL
1.45 daniel 430: */
1.50 daniel 431: xmlParserInputPtr
432: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 433: xmlParserInputPtr input;
434:
435: if (entity == NULL) {
1.55 daniel 436: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 437: ctxt->sax->error(ctxt->userData,
1.45 daniel 438: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 439: return(NULL);
1.45 daniel 440: }
441: if (entity->content == NULL) {
1.113 daniel 442: switch (entity->type) {
443: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
444: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
445: ctxt->sax->error(ctxt->userData,
446: "xmlNewEntityInputStream unparsed entity !\n");
447: break;
448: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
449: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 450: return(xmlLoadExternalEntity((char *) entity->SystemID,
451: (char *) entity->ExternalID, ctxt->input));
1.113 daniel 452: case XML_INTERNAL_GENERAL_ENTITY:
453: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
454: ctxt->sax->error(ctxt->userData,
455: "Internal entity %s without content !\n", entity->name);
456: break;
457: case XML_INTERNAL_PARAMETER_ENTITY:
458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
459: ctxt->sax->error(ctxt->userData,
460: "Internal parameter entity %s without content !\n", entity->name);
461: break;
462: case XML_INTERNAL_PREDEFINED_ENTITY:
463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
464: ctxt->sax->error(ctxt->userData,
465: "Predefined entity %s without content !\n", entity->name);
466: break;
467: }
1.50 daniel 468: return(NULL);
1.45 daniel 469: }
1.96 daniel 470: input = xmlNewInputStream(ctxt);
1.45 daniel 471: if (input == NULL) {
1.50 daniel 472: return(NULL);
1.45 daniel 473: }
1.116 daniel 474: input->filename = (char *) entity->SystemID; /* TODO !!! char <- CHAR */
1.45 daniel 475: input->base = entity->content;
476: input->cur = entity->content;
1.50 daniel 477: return(input);
1.45 daniel 478: }
479:
1.59 daniel 480: /**
481: * xmlNewStringInputStream:
482: * @ctxt: an XML parser context
1.96 daniel 483: * @buffer: an memory buffer
1.59 daniel 484: *
485: * Create a new input stream based on a memory buffer.
1.68 daniel 486: * Returns the new input stream
1.59 daniel 487: */
488: xmlParserInputPtr
1.96 daniel 489: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const CHAR *buffer) {
1.59 daniel 490: xmlParserInputPtr input;
491:
1.96 daniel 492: if (buffer == NULL) {
1.59 daniel 493: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 494: ctxt->sax->error(ctxt->userData,
1.59 daniel 495: "internal: xmlNewStringInputStream string = NULL\n");
496: return(NULL);
497: }
1.96 daniel 498: input = xmlNewInputStream(ctxt);
1.59 daniel 499: if (input == NULL) {
500: return(NULL);
501: }
1.96 daniel 502: input->base = buffer;
503: input->cur = buffer;
1.59 daniel 504: return(input);
505: }
506:
1.76 daniel 507: /**
508: * xmlNewInputFromFile:
509: * @ctxt: an XML parser context
510: * @filename: the filename to use as entity
511: *
512: * Create a new input stream based on a file.
513: *
514: * Returns the new input stream or NULL in case of error
515: */
516: xmlParserInputPtr
1.79 daniel 517: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 518: xmlParserInputBufferPtr buf;
1.76 daniel 519: xmlParserInputPtr inputStream;
1.111 daniel 520: char *directory = NULL;
1.76 daniel 521:
1.96 daniel 522: if (ctxt == NULL) return(NULL);
1.91 daniel 523: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 524: if (buf == NULL) {
1.106 daniel 525: char name[1024];
526:
1.94 daniel 527: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
528: #ifdef WIN32
529: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
530: #else
531: sprintf(name, "%s/%s", ctxt->input->directory, filename);
532: #endif
533: buf = xmlParserInputBufferCreateFilename(name,
534: XML_CHAR_ENCODING_NONE);
1.106 daniel 535: if (buf != NULL)
1.119 daniel 536: directory = xmlMemStrdup(ctxt->input->directory);
1.106 daniel 537: }
538: if ((buf == NULL) && (ctxt->directory != NULL)) {
539: #ifdef WIN32
540: sprintf(name, "%s\\%s", ctxt->directory, filename);
541: #else
542: sprintf(name, "%s/%s", ctxt->directory, filename);
543: #endif
544: buf = xmlParserInputBufferCreateFilename(name,
545: XML_CHAR_ENCODING_NONE);
546: if (buf != NULL)
1.119 daniel 547: directory = xmlMemStrdup(ctxt->directory);
1.106 daniel 548: }
549: if (buf == NULL)
1.94 daniel 550: return(NULL);
551: }
552: if (directory == NULL)
553: directory = xmlParserGetDirectory(filename);
1.76 daniel 554:
1.96 daniel 555: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 556: if (inputStream == NULL) {
1.119 daniel 557: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 558: return(NULL);
559: }
560:
1.119 daniel 561: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 562: inputStream->directory = directory;
1.91 daniel 563: inputStream->buf = buf;
1.76 daniel 564:
1.91 daniel 565: inputStream->base = inputStream->buf->buffer->content;
566: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 567: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 568: ctxt->directory = directory;
1.76 daniel 569: return(inputStream);
570: }
571:
1.77 daniel 572: /************************************************************************
573: * *
1.97 daniel 574: * Commodity functions to handle parser contexts *
575: * *
576: ************************************************************************/
577:
578: /**
579: * xmlInitParserCtxt:
580: * @ctxt: an XML parser context
581: *
582: * Initialize a parser context
583: */
584:
585: void
586: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
587: {
588: xmlSAXHandler *sax;
589:
1.119 daniel 590: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 591: if (sax == NULL) {
592: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
593: }
594:
595: /* Allocate the Input stack */
1.119 daniel 596: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 597: ctxt->inputNr = 0;
598: ctxt->inputMax = 5;
599: ctxt->input = NULL;
600: ctxt->version = NULL;
601: ctxt->encoding = NULL;
602: ctxt->standalone = -1;
1.98 daniel 603: ctxt->hasExternalSubset = 0;
604: ctxt->hasPErefs = 0;
1.97 daniel 605: ctxt->html = 0;
1.98 daniel 606: ctxt->external = 0;
1.97 daniel 607: ctxt->instate = XML_PARSER_PROLOG;
608: ctxt->token = 0;
1.106 daniel 609: ctxt->directory = NULL;
1.97 daniel 610:
611: /* Allocate the Node stack */
1.119 daniel 612: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 613: ctxt->nodeNr = 0;
614: ctxt->nodeMax = 10;
615: ctxt->node = NULL;
616:
617: if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
618: else {
619: ctxt->sax = sax;
620: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
621: }
622: ctxt->userData = ctxt;
623: ctxt->myDoc = NULL;
624: ctxt->wellFormed = 1;
1.99 daniel 625: ctxt->valid = 1;
1.100 daniel 626: ctxt->validate = xmlDoValidityCheckingDefaultValue;
627: ctxt->vctxt.userData = ctxt;
628: ctxt->vctxt.error = xmlParserValidityError;
629: ctxt->vctxt.warning = xmlParserValidityWarning;
1.97 daniel 630: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
631: ctxt->record_info = 0;
632: xmlInitNodeInfoSeq(&ctxt->node_seq);
633: }
634:
635: /**
636: * xmlFreeParserCtxt:
637: * @ctxt: an XML parser context
638: *
639: * Free all the memory used by a parser context. However the parsed
640: * document in ctxt->myDoc is not freed.
641: */
642:
643: void
644: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
645: {
646: xmlParserInputPtr input;
647:
648: if (ctxt == NULL) return;
649:
650: while ((input = inputPop(ctxt)) != NULL) {
651: xmlFreeInputStream(input);
652: }
653:
1.119 daniel 654: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
655: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
656: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
657: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.97 daniel 658: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 659: xmlFree(ctxt->sax);
660: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
661: xmlFree(ctxt);
1.97 daniel 662: }
663:
664: /**
665: * xmlNewParserCtxt:
666: *
667: * Allocate and initialize a new parser context.
668: *
669: * Returns the xmlParserCtxtPtr or NULL
670: */
671:
672: xmlParserCtxtPtr
673: xmlNewParserCtxt()
674: {
675: xmlParserCtxtPtr ctxt;
676:
1.119 daniel 677: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 678: if (ctxt == NULL) {
679: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
680: perror("malloc");
681: return(NULL);
682: }
683: xmlInitParserCtxt(ctxt);
684: return(ctxt);
685: }
686:
687: /**
688: * xmlClearParserCtxt:
689: * @ctxt: an XML parser context
690: *
691: * Clear (release owned resources) and reinitialize a parser context
692: */
693:
694: void
695: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
696: {
697: xmlClearNodeInfoSeq(&ctxt->node_seq);
698: xmlInitParserCtxt(ctxt);
699: }
700:
701: /************************************************************************
702: * *
1.77 daniel 703: * Commodity functions to handle entities *
704: * *
705: ************************************************************************/
706:
1.97 daniel 707: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
708: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
709:
710: /**
711: * xmlParseCharRef:
712: * @ctxt: an XML parser context
713: *
714: * parse Reference declarations
715: *
716: * [66] CharRef ::= '&#' [0-9]+ ';' |
717: * '&#x' [0-9a-fA-F]+ ';'
718: *
1.98 daniel 719: * [ WFC: Legal Character ]
720: * Characters referred to using character references must match the
721: * production for Char.
722: *
1.97 daniel 723: * Returns the value parsed (as an int)
1.77 daniel 724: */
1.97 daniel 725: int
726: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
727: int val = 0;
728:
1.111 daniel 729: if (ctxt->token != 0) {
730: val = ctxt->token;
731: ctxt->token = 0;
732: return(val);
733: }
1.97 daniel 734: if ((CUR == '&') && (NXT(1) == '#') &&
735: (NXT(2) == 'x')) {
736: SKIP(3);
737: while (CUR != ';') {
738: if ((CUR >= '0') && (CUR <= '9'))
739: val = val * 16 + (CUR - '0');
740: else if ((CUR >= 'a') && (CUR <= 'f'))
741: val = val * 16 + (CUR - 'a') + 10;
742: else if ((CUR >= 'A') && (CUR <= 'F'))
743: val = val * 16 + (CUR - 'A') + 10;
744: else {
745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
746: ctxt->sax->error(ctxt->userData,
747: "xmlParseCharRef: invalid hexadecimal value\n");
748: ctxt->wellFormed = 0;
749: val = 0;
750: break;
751: }
752: NEXT;
753: }
754: if (CUR == ';')
755: NEXT;
756: } else if ((CUR == '&') && (NXT(1) == '#')) {
757: SKIP(2);
758: while (CUR != ';') {
759: if ((CUR >= '0') && (CUR <= '9'))
760: val = val * 10 + (CUR - '0');
761: else {
762: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
763: ctxt->sax->error(ctxt->userData,
764: "xmlParseCharRef: invalid decimal value\n");
765: ctxt->wellFormed = 0;
766: val = 0;
767: break;
768: }
769: NEXT;
770: }
771: if (CUR == ';')
772: NEXT;
773: } else {
774: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 775: ctxt->sax->error(ctxt->userData,
776: "xmlParseCharRef: invalid value\n");
1.97 daniel 777: ctxt->wellFormed = 0;
778: }
1.98 daniel 779:
1.97 daniel 780: /*
1.98 daniel 781: * [ WFC: Legal Character ]
782: * Characters referred to using character references must match the
783: * production for Char.
1.97 daniel 784: */
785: if (IS_CHAR(val)) {
786: return(val);
787: } else {
788: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 789: ctxt->sax->error(ctxt->userData, "CharRef: invalid CHAR value %d\n",
1.97 daniel 790: val);
791: ctxt->wellFormed = 0;
792: }
793: return(0);
1.77 daniel 794: }
795:
1.96 daniel 796: /**
797: * xmlParserHandleReference:
798: * @ctxt: the parser context
799: *
1.97 daniel 800: * [67] Reference ::= EntityRef | CharRef
801: *
1.96 daniel 802: * [68] EntityRef ::= '&' Name ';'
803: *
1.98 daniel 804: * [ WFC: Entity Declared ]
805: * the Name given in the entity reference must match that in an entity
806: * declaration, except that well-formed documents need not declare any
807: * of the following entities: amp, lt, gt, apos, quot.
808: *
809: * [ WFC: Parsed Entity ]
810: * An entity reference must not contain the name of an unparsed entity
811: *
1.97 daniel 812: * [66] CharRef ::= '&#' [0-9]+ ';' |
813: * '&#x' [0-9a-fA-F]+ ';'
814: *
1.96 daniel 815: * A PEReference may have been detectect in the current input stream
816: * the handling is done accordingly to
817: * http://www.w3.org/TR/REC-xml#entproc
818: */
819: void
820: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 821: xmlParserInputPtr input;
822: CHAR *name;
823: xmlEntityPtr ent = NULL;
824:
1.111 daniel 825: if (ctxt->token != 0) return;
1.97 daniel 826: if (CUR != '&') return;
827: GROW;
828: if ((CUR == '&') && (NXT(1) == '#')) {
829: switch(ctxt->instate) {
1.109 daniel 830: case XML_PARSER_CDATA_SECTION:
831: return;
1.97 daniel 832: case XML_PARSER_COMMENT:
833: return;
834: case XML_PARSER_EOF:
835: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
836: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
837: ctxt->wellFormed = 0;
838: return;
839: case XML_PARSER_PROLOG:
840: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
841: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
842: ctxt->wellFormed = 0;
843: return;
844: case XML_PARSER_EPILOG:
845: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
846: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
847: ctxt->wellFormed = 0;
848: return;
849: case XML_PARSER_DTD:
850: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
851: ctxt->sax->error(ctxt->userData,
852: "CharRef are forbiden in DTDs!\n");
853: ctxt->wellFormed = 0;
854: return;
855: case XML_PARSER_ENTITY_DECL:
856: /* we just ignore it there */
857: return;
858: case XML_PARSER_ENTITY_VALUE:
859: /*
860: * NOTE: in the case of entity values, we don't do the
861: * substitution here since we need the litteral
862: * entity value to be able to save the internal
863: * subset of the document.
864: * This will be handled by xmlDecodeEntities
865: */
866: return;
867: case XML_PARSER_CONTENT:
868: case XML_PARSER_ATTRIBUTE_VALUE:
1.116 daniel 869: /* !!! this may not be Ok for UTF-8, multibyte sequence */
1.97 daniel 870: ctxt->token = xmlParseCharRef(ctxt);
871: return;
872: }
873: return;
874: }
875:
876: switch(ctxt->instate) {
1.109 daniel 877: case XML_PARSER_CDATA_SECTION:
878: return;
1.97 daniel 879: case XML_PARSER_COMMENT:
880: return;
881: case XML_PARSER_EOF:
882: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
883: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
884: ctxt->wellFormed = 0;
885: return;
886: case XML_PARSER_PROLOG:
887: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
888: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
889: ctxt->wellFormed = 0;
890: return;
891: case XML_PARSER_EPILOG:
892: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
893: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
894: ctxt->wellFormed = 0;
895: return;
896: case XML_PARSER_ENTITY_VALUE:
897: /*
898: * NOTE: in the case of entity values, we don't do the
899: * substitution here since we need the litteral
900: * entity value to be able to save the internal
901: * subset of the document.
902: * This will be handled by xmlDecodeEntities
903: */
904: return;
905: case XML_PARSER_ATTRIBUTE_VALUE:
906: /*
907: * NOTE: in the case of attributes values, we don't do the
908: * substitution here unless we are in a mode where
909: * the parser is explicitely asked to substitute
910: * entities. The SAX callback is called with values
911: * without entity substitution.
912: * This will then be handled by xmlDecodeEntities
913: */
1.113 daniel 914: return;
1.97 daniel 915: case XML_PARSER_ENTITY_DECL:
916: /*
917: * we just ignore it there
918: * the substitution will be done once the entity is referenced
919: */
920: return;
921: case XML_PARSER_DTD:
922: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
923: ctxt->sax->error(ctxt->userData,
924: "Entity references are forbiden in DTDs!\n");
925: ctxt->wellFormed = 0;
926: return;
927: case XML_PARSER_CONTENT:
1.113 daniel 928: return;
1.97 daniel 929: }
930:
931: NEXT;
932: name = xmlScanName(ctxt);
933: if (name == NULL) {
934: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
935: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
936: ctxt->wellFormed = 0;
937: ctxt->token = '&';
938: return;
939: }
940: if (NXT(xmlStrlen(name)) != ';') {
941: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
942: ctxt->sax->error(ctxt->userData,
943: "Entity reference: ';' expected\n");
944: ctxt->wellFormed = 0;
945: ctxt->token = '&';
1.119 daniel 946: xmlFree(name);
1.97 daniel 947: return;
948: }
949: SKIP(xmlStrlen(name) + 1);
950: if (ctxt->sax != NULL) {
951: if (ctxt->sax->getEntity != NULL)
952: ent = ctxt->sax->getEntity(ctxt->userData, name);
953: }
1.98 daniel 954:
955: /*
956: * [ WFC: Entity Declared ]
957: * the Name given in the entity reference must match that in an entity
958: * declaration, except that well-formed documents need not declare any
959: * of the following entities: amp, lt, gt, apos, quot.
960: */
1.97 daniel 961: if (ent == NULL)
962: ent = xmlGetPredefinedEntity(name);
963: if (ent == NULL) {
964: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
965: ctxt->sax->error(ctxt->userData,
1.98 daniel 966: "Entity reference: entity %s not declared\n",
967: name);
1.97 daniel 968: ctxt->wellFormed = 0;
1.119 daniel 969: xmlFree(name);
1.97 daniel 970: return;
971: }
1.98 daniel 972:
973: /*
974: * [ WFC: Parsed Entity ]
975: * An entity reference must not contain the name of an unparsed entity
976: */
977: if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
978: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
979: ctxt->sax->error(ctxt->userData,
980: "Entity reference to unparsed entity %s\n", name);
981: ctxt->wellFormed = 0;
982: }
983:
1.97 daniel 984: if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
985: ctxt->token = ent->content[0];
1.119 daniel 986: xmlFree(name);
1.97 daniel 987: return;
988: }
989: input = xmlNewEntityInputStream(ctxt, ent);
990: xmlPushInput(ctxt, input);
1.119 daniel 991: xmlFree(name);
1.96 daniel 992: return;
993: }
994:
995: /**
996: * xmlParserHandlePEReference:
997: * @ctxt: the parser context
998: *
999: * [69] PEReference ::= '%' Name ';'
1000: *
1.98 daniel 1001: * [ WFC: No Recursion ]
1002: * TODO A parsed entity must not contain a recursive
1003: * reference to itself, either directly or indirectly.
1004: *
1005: * [ WFC: Entity Declared ]
1006: * In a document without any DTD, a document with only an internal DTD
1007: * subset which contains no parameter entity references, or a document
1008: * with "standalone='yes'", ... ... The declaration of a parameter
1009: * entity must precede any reference to it...
1010: *
1011: * [ VC: Entity Declared ]
1012: * In a document with an external subset or external parameter entities
1013: * with "standalone='no'", ... ... The declaration of a parameter entity
1014: * must precede any reference to it...
1015: *
1016: * [ WFC: In DTD ]
1017: * Parameter-entity references may only appear in the DTD.
1018: * NOTE: misleading but this is handled.
1019: *
1020: * A PEReference may have been detected in the current input stream
1.96 daniel 1021: * the handling is done accordingly to
1022: * http://www.w3.org/TR/REC-xml#entproc
1023: * i.e.
1024: * - Included in literal in entity values
1025: * - Included as Paraemeter Entity reference within DTDs
1026: */
1027: void
1028: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1029: CHAR *name;
1030: xmlEntityPtr entity = NULL;
1031: xmlParserInputPtr input;
1032:
1.111 daniel 1033: if (ctxt->token != 0) return;
1034: if (CUR != '%') return;
1.96 daniel 1035: switch(ctxt->instate) {
1.109 daniel 1036: case XML_PARSER_CDATA_SECTION:
1037: return;
1.97 daniel 1038: case XML_PARSER_COMMENT:
1039: return;
1.96 daniel 1040: case XML_PARSER_EOF:
1041: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1042: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1043: ctxt->wellFormed = 0;
1044: return;
1045: case XML_PARSER_PROLOG:
1046: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1047: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1048: ctxt->wellFormed = 0;
1049: return;
1.97 daniel 1050: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1051: case XML_PARSER_CONTENT:
1052: case XML_PARSER_ATTRIBUTE_VALUE:
1053: /* we just ignore it there */
1054: return;
1055: case XML_PARSER_EPILOG:
1056: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1057: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1058: ctxt->wellFormed = 0;
1059: return;
1.97 daniel 1060: case XML_PARSER_ENTITY_VALUE:
1061: /*
1062: * NOTE: in the case of entity values, we don't do the
1063: * substitution here since we need the litteral
1064: * entity value to be able to save the internal
1065: * subset of the document.
1066: * This will be handled by xmlDecodeEntities
1067: */
1068: return;
1.96 daniel 1069: case XML_PARSER_DTD:
1.98 daniel 1070: /*
1071: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1072: * In the internal DTD subset, parameter-entity references
1073: * can occur only where markup declarations can occur, not
1074: * within markup declarations.
1075: * In that case this is handled in xmlParseMarkupDecl
1076: */
1077: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1078: return;
1.96 daniel 1079: }
1080:
1081: NEXT;
1082: name = xmlParseName(ctxt);
1083: if (name == NULL) {
1084: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1085: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1086: ctxt->wellFormed = 0;
1087: } else {
1088: if (CUR == ';') {
1089: NEXT;
1.98 daniel 1090: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1091: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1092: if (entity == NULL) {
1.98 daniel 1093:
1094: /*
1095: * [ WFC: Entity Declared ]
1096: * In a document without any DTD, a document with only an
1097: * internal DTD subset which contains no parameter entity
1098: * references, or a document with "standalone='yes'", ...
1099: * ... The declaration of a parameter entity must precede
1100: * any reference to it...
1101: */
1102: if ((ctxt->standalone == 1) ||
1103: ((ctxt->hasExternalSubset == 0) &&
1104: (ctxt->hasPErefs == 0))) {
1105: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1106: ctxt->sax->error(ctxt->userData,
1107: "PEReference: %%%s; not found\n", name);
1108: ctxt->wellFormed = 0;
1109: } else {
1110: /*
1111: * [ VC: Entity Declared ]
1112: * In a document with an external subset or external
1113: * parameter entities with "standalone='no'", ...
1114: * ... The declaration of a parameter entity must precede
1115: * any reference to it...
1116: */
1117: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1118: ctxt->sax->warning(ctxt->userData,
1119: "PEReference: %%%s; not found\n", name);
1120: ctxt->valid = 0;
1121: }
1.96 daniel 1122: } else {
1123: if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
1124: (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
1125: /*
1126: * TODO !!!! handle the extra spaces added before and after
1127: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1128: * TODO !!!! Avoid quote processing in parameters value
1129: * c.f. http://www.w3.org/TR/REC-xml#inliteral
1130: */
1131: input = xmlNewEntityInputStream(ctxt, entity);
1132: xmlPushInput(ctxt, input);
1133: } else {
1134: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1135: ctxt->sax->error(ctxt->userData,
1136: "xmlHandlePEReference: %s is not a parameter entity\n",
1137: name);
1138: ctxt->wellFormed = 0;
1139: }
1140: }
1141: } else {
1142: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1143: ctxt->sax->error(ctxt->userData,
1144: "xmlHandlePEReference: expecting ';'\n");
1145: ctxt->wellFormed = 0;
1146: }
1.119 daniel 1147: xmlFree(name);
1.97 daniel 1148: }
1149: }
1150:
1151: /*
1152: * Macro used to grow the current buffer.
1153: */
1154: #define growBuffer(buffer) { \
1155: buffer##_size *= 2; \
1.119 daniel 1156: buffer = (CHAR *) xmlRealloc(buffer, buffer##_size * sizeof(CHAR)); \
1.97 daniel 1157: if (buffer == NULL) { \
1158: perror("realloc failed"); \
1159: exit(1); \
1160: } \
1.96 daniel 1161: }
1.77 daniel 1162:
1163: /**
1164: * xmlDecodeEntities:
1165: * @ctxt: the parser context
1166: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1167: * @len: the len to decode (in bytes !), -1 for no size limit
1168: * @end: an end marker CHAR, 0 if none
1169: * @end2: an end marker CHAR, 0 if none
1170: * @end3: an end marker CHAR, 0 if none
1171: *
1172: * [67] Reference ::= EntityRef | CharRef
1173: *
1174: * [69] PEReference ::= '%' Name ';'
1175: *
1176: * Returns A newly allocated string with the substitution done. The caller
1177: * must deallocate it !
1178: */
1179: CHAR *
1180: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1181: CHAR end, CHAR end2, CHAR end3) {
1182: CHAR *buffer = NULL;
1.78 daniel 1183: int buffer_size = 0;
1.77 daniel 1184: CHAR *out = NULL;
1.78 daniel 1185:
1.97 daniel 1186: CHAR *current = NULL;
1.77 daniel 1187: xmlEntityPtr ent;
1.91 daniel 1188: int nbchars = 0;
1.77 daniel 1189: unsigned int max = (unsigned int) len;
1.97 daniel 1190: CHAR cur;
1.77 daniel 1191:
1192: /*
1193: * allocate a translation buffer.
1194: */
1195: buffer_size = 1000;
1.119 daniel 1196: buffer = (CHAR *) xmlMalloc(buffer_size * sizeof(CHAR));
1.77 daniel 1197: if (buffer == NULL) {
1198: perror("xmlDecodeEntities: malloc failed");
1199: return(NULL);
1200: }
1201: out = buffer;
1202:
1.78 daniel 1203: /*
1204: * Ok loop until we reach one of the ending char or a size limit.
1205: */
1.97 daniel 1206: cur = CUR;
1207: while ((nbchars < max) && (cur != end) &&
1208: (cur != end2) && (cur != end3)) {
1.77 daniel 1209:
1.98 daniel 1210: if (cur == 0) break;
1211: if ((cur == '&') && (NXT(1) == '#')) {
1212: int val = xmlParseCharRef(ctxt);
1213: *out++ = val;
1214: nbchars += 3;
1215: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1216: ent = xmlParseEntityRef(ctxt);
1217: if ((ent != NULL) &&
1218: (ctxt->replaceEntities != 0)) {
1219: current = ent->content;
1220: while (*current != 0) {
1221: *out++ = *current++;
1222: if (out - buffer > buffer_size - 100) {
1223: int index = out - buffer;
1224:
1225: growBuffer(buffer);
1226: out = &buffer[index];
1.77 daniel 1227: }
1228: }
1.98 daniel 1229: nbchars += 3 + xmlStrlen(ent->name);
1230: } else if (ent != NULL) {
1231: int i = xmlStrlen(ent->name);
1232: const CHAR *cur = ent->name;
1233:
1234: nbchars += i + 2;
1235: *out++ = '&';
1236: if (out - buffer > buffer_size - i - 100) {
1237: int index = out - buffer;
1238:
1239: growBuffer(buffer);
1240: out = &buffer[index];
1241: }
1242: for (;i > 0;i--)
1243: *out++ = *cur++;
1244: *out++ = ';';
1.77 daniel 1245: }
1.97 daniel 1246: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1247: /*
1.77 daniel 1248: * a PEReference induce to switch the entity flow,
1249: * we break here to flush the current set of chars
1250: * parsed if any. We will be called back later.
1.97 daniel 1251: */
1.91 daniel 1252: if (nbchars != 0) break;
1.77 daniel 1253:
1254: xmlParsePEReference(ctxt);
1.79 daniel 1255:
1.97 daniel 1256: /*
1.79 daniel 1257: * Pop-up of finished entities.
1.97 daniel 1258: */
1.79 daniel 1259: while ((CUR == 0) && (ctxt->inputNr > 1))
1260: xmlPopInput(ctxt);
1261:
1.98 daniel 1262: break;
1.77 daniel 1263: } else {
1.116 daniel 1264: /* invalid for UTF-8 , use COPY(out); !!!!!! */
1.97 daniel 1265: *out++ = cur;
1.91 daniel 1266: nbchars++;
1.86 daniel 1267: if (out - buffer > buffer_size - 100) {
1268: int index = out - buffer;
1269:
1270: growBuffer(buffer);
1271: out = &buffer[index];
1272: }
1.77 daniel 1273: NEXT;
1274: }
1.97 daniel 1275: cur = CUR;
1.77 daniel 1276: }
1277: *out++ = 0;
1278: return(buffer);
1279: }
1280:
1.1 veillard 1281:
1.28 daniel 1282: /************************************************************************
1283: * *
1.75 daniel 1284: * Commodity functions to handle encodings *
1285: * *
1286: ************************************************************************/
1287:
1288: /**
1289: * xmlSwitchEncoding:
1290: * @ctxt: the parser context
1291: * @len: the len of @cur
1292: *
1293: * change the input functions when discovering the character encoding
1294: * of a given entity.
1295: *
1296: */
1297: void
1298: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1299: {
1300: switch (enc) {
1301: case XML_CHAR_ENCODING_ERROR:
1302: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1303: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1304: ctxt->wellFormed = 0;
1305: break;
1306: case XML_CHAR_ENCODING_NONE:
1307: /* let's assume it's UTF-8 without the XML decl */
1308: return;
1309: case XML_CHAR_ENCODING_UTF8:
1310: /* default encoding, no conversion should be needed */
1311: return;
1312: case XML_CHAR_ENCODING_UTF16LE:
1313: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1314: ctxt->sax->error(ctxt->userData,
1315: "char encoding UTF16 little endian not supported\n");
1316: break;
1317: case XML_CHAR_ENCODING_UTF16BE:
1318: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1319: ctxt->sax->error(ctxt->userData,
1320: "char encoding UTF16 big endian not supported\n");
1321: break;
1322: case XML_CHAR_ENCODING_UCS4LE:
1323: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1324: ctxt->sax->error(ctxt->userData,
1325: "char encoding USC4 little endian not supported\n");
1326: break;
1327: case XML_CHAR_ENCODING_UCS4BE:
1328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1329: ctxt->sax->error(ctxt->userData,
1330: "char encoding USC4 big endian not supported\n");
1331: break;
1332: case XML_CHAR_ENCODING_EBCDIC:
1333: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1334: ctxt->sax->error(ctxt->userData,
1335: "char encoding EBCDIC not supported\n");
1336: break;
1337: case XML_CHAR_ENCODING_UCS4_2143:
1338: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1339: ctxt->sax->error(ctxt->userData,
1340: "char encoding UCS4 2143 not supported\n");
1341: break;
1342: case XML_CHAR_ENCODING_UCS4_3412:
1343: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1344: ctxt->sax->error(ctxt->userData,
1345: "char encoding UCS4 3412 not supported\n");
1346: break;
1347: case XML_CHAR_ENCODING_UCS2:
1348: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1349: ctxt->sax->error(ctxt->userData,
1350: "char encoding UCS2 not supported\n");
1351: break;
1352: case XML_CHAR_ENCODING_8859_1:
1353: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1354: ctxt->sax->error(ctxt->userData,
1355: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
1356: break;
1357: case XML_CHAR_ENCODING_8859_2:
1358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1359: ctxt->sax->error(ctxt->userData,
1360: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
1361: break;
1362: case XML_CHAR_ENCODING_8859_3:
1363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1364: ctxt->sax->error(ctxt->userData,
1365: "char encoding ISO_8859_3 not supported\n");
1366: break;
1367: case XML_CHAR_ENCODING_8859_4:
1368: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1369: ctxt->sax->error(ctxt->userData,
1370: "char encoding ISO_8859_4 not supported\n");
1371: break;
1372: case XML_CHAR_ENCODING_8859_5:
1373: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1374: ctxt->sax->error(ctxt->userData,
1375: "char encoding ISO_8859_5 not supported\n");
1376: break;
1377: case XML_CHAR_ENCODING_8859_6:
1378: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1379: ctxt->sax->error(ctxt->userData,
1380: "char encoding ISO_8859_6 not supported\n");
1381: break;
1382: case XML_CHAR_ENCODING_8859_7:
1383: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1384: ctxt->sax->error(ctxt->userData,
1385: "char encoding ISO_8859_7 not supported\n");
1386: break;
1387: case XML_CHAR_ENCODING_8859_8:
1388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1389: ctxt->sax->error(ctxt->userData,
1390: "char encoding ISO_8859_8 not supported\n");
1391: break;
1392: case XML_CHAR_ENCODING_8859_9:
1393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1394: ctxt->sax->error(ctxt->userData,
1395: "char encoding ISO_8859_9 not supported\n");
1396: break;
1397: case XML_CHAR_ENCODING_2022_JP:
1398: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1399: ctxt->sax->error(ctxt->userData,
1400: "char encoding ISO-2022-JPnot supported\n");
1401: break;
1402: case XML_CHAR_ENCODING_SHIFT_JIS:
1403: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1404: ctxt->sax->error(ctxt->userData,
1405: "char encoding Shift_JISnot supported\n");
1406: break;
1407: case XML_CHAR_ENCODING_EUC_JP:
1408: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1409: ctxt->sax->error(ctxt->userData,
1410: "char encoding EUC-JPnot supported\n");
1411: break;
1412: }
1413: }
1414:
1415: /************************************************************************
1416: * *
1.28 daniel 1417: * Commodity functions to handle CHARs *
1418: * *
1419: ************************************************************************/
1420:
1.50 daniel 1421: /**
1422: * xmlStrndup:
1423: * @cur: the input CHAR *
1424: * @len: the len of @cur
1425: *
1426: * a strndup for array of CHAR's
1.68 daniel 1427: *
1428: * Returns a new CHAR * or NULL
1.1 veillard 1429: */
1.55 daniel 1430: CHAR *
1431: xmlStrndup(const CHAR *cur, int len) {
1.119 daniel 1432: CHAR *ret = xmlMalloc((len + 1) * sizeof(CHAR));
1.1 veillard 1433:
1434: if (ret == NULL) {
1.86 daniel 1435: fprintf(stderr, "malloc of %ld byte failed\n",
1436: (len + 1) * (long)sizeof(CHAR));
1.1 veillard 1437: return(NULL);
1438: }
1439: memcpy(ret, cur, len * sizeof(CHAR));
1440: ret[len] = 0;
1441: return(ret);
1442: }
1443:
1.50 daniel 1444: /**
1445: * xmlStrdup:
1446: * @cur: the input CHAR *
1447: *
1448: * a strdup for array of CHAR's
1.68 daniel 1449: *
1450: * Returns a new CHAR * or NULL
1.1 veillard 1451: */
1.55 daniel 1452: CHAR *
1453: xmlStrdup(const CHAR *cur) {
1.6 httpng 1454: const CHAR *p = cur;
1.1 veillard 1455:
1456: while (IS_CHAR(*p)) p++;
1457: return(xmlStrndup(cur, p - cur));
1458: }
1459:
1.50 daniel 1460: /**
1461: * xmlCharStrndup:
1462: * @cur: the input char *
1463: * @len: the len of @cur
1464: *
1465: * a strndup for char's to CHAR's
1.68 daniel 1466: *
1467: * Returns a new CHAR * or NULL
1.45 daniel 1468: */
1469:
1.55 daniel 1470: CHAR *
1471: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 1472: int i;
1.119 daniel 1473: CHAR *ret = xmlMalloc((len + 1) * sizeof(CHAR));
1.45 daniel 1474:
1475: if (ret == NULL) {
1.86 daniel 1476: fprintf(stderr, "malloc of %ld byte failed\n",
1477: (len + 1) * (long)sizeof(CHAR));
1.45 daniel 1478: return(NULL);
1479: }
1480: for (i = 0;i < len;i++)
1481: ret[i] = (CHAR) cur[i];
1482: ret[len] = 0;
1483: return(ret);
1484: }
1485:
1.50 daniel 1486: /**
1487: * xmlCharStrdup:
1488: * @cur: the input char *
1489: * @len: the len of @cur
1490: *
1491: * a strdup for char's to CHAR's
1.68 daniel 1492: *
1493: * Returns a new CHAR * or NULL
1.45 daniel 1494: */
1495:
1.55 daniel 1496: CHAR *
1497: xmlCharStrdup(const char *cur) {
1.45 daniel 1498: const char *p = cur;
1499:
1500: while (*p != '\0') p++;
1501: return(xmlCharStrndup(cur, p - cur));
1502: }
1503:
1.50 daniel 1504: /**
1505: * xmlStrcmp:
1506: * @str1: the first CHAR *
1507: * @str2: the second CHAR *
1508: *
1509: * a strcmp for CHAR's
1.68 daniel 1510: *
1511: * Returns the integer result of the comparison
1.14 veillard 1512: */
1513:
1.55 daniel 1514: int
1515: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 1516: register int tmp;
1517:
1518: do {
1519: tmp = *str1++ - *str2++;
1520: if (tmp != 0) return(tmp);
1521: } while ((*str1 != 0) && (*str2 != 0));
1522: return (*str1 - *str2);
1523: }
1524:
1.50 daniel 1525: /**
1526: * xmlStrncmp:
1527: * @str1: the first CHAR *
1528: * @str2: the second CHAR *
1529: * @len: the max comparison length
1530: *
1531: * a strncmp for CHAR's
1.68 daniel 1532: *
1533: * Returns the integer result of the comparison
1.14 veillard 1534: */
1535:
1.55 daniel 1536: int
1537: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 1538: register int tmp;
1539:
1540: if (len <= 0) return(0);
1541: do {
1542: tmp = *str1++ - *str2++;
1543: if (tmp != 0) return(tmp);
1544: len--;
1545: if (len <= 0) return(0);
1546: } while ((*str1 != 0) && (*str2 != 0));
1547: return (*str1 - *str2);
1548: }
1549:
1.50 daniel 1550: /**
1551: * xmlStrchr:
1552: * @str: the CHAR * array
1553: * @val: the CHAR to search
1554: *
1555: * a strchr for CHAR's
1.68 daniel 1556: *
1557: * Returns the CHAR * for the first occurence or NULL.
1.14 veillard 1558: */
1559:
1.89 daniel 1560: const CHAR *
1.55 daniel 1561: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 1562: while (*str != 0) {
1563: if (*str == val) return((CHAR *) str);
1564: str++;
1565: }
1566: return(NULL);
1.89 daniel 1567: }
1568:
1569: /**
1570: * xmlStrstr:
1571: * @str: the CHAR * array (haystack)
1572: * @val: the CHAR to search (needle)
1573: *
1574: * a strstr for CHAR's
1575: *
1576: * Returns the CHAR * for the first occurence or NULL.
1577: */
1578:
1579: const CHAR *
1580: xmlStrstr(const CHAR *str, CHAR *val) {
1581: int n;
1582:
1583: if (str == NULL) return(NULL);
1584: if (val == NULL) return(NULL);
1585: n = xmlStrlen(val);
1586:
1587: if (n == 0) return(str);
1588: while (*str != 0) {
1589: if (*str == *val) {
1590: if (!xmlStrncmp(str, val, n)) return((const CHAR *) str);
1591: }
1592: str++;
1593: }
1594: return(NULL);
1595: }
1596:
1597: /**
1598: * xmlStrsub:
1599: * @str: the CHAR * array (haystack)
1600: * @start: the index of the first char (zero based)
1601: * @len: the length of the substring
1602: *
1603: * Extract a substring of a given string
1604: *
1605: * Returns the CHAR * for the first occurence or NULL.
1606: */
1607:
1608: CHAR *
1609: xmlStrsub(const CHAR *str, int start, int len) {
1610: int i;
1611:
1612: if (str == NULL) return(NULL);
1613: if (start < 0) return(NULL);
1.90 daniel 1614: if (len < 0) return(NULL);
1.89 daniel 1615:
1616: for (i = 0;i < start;i++) {
1617: if (*str == 0) return(NULL);
1618: str++;
1619: }
1620: if (*str == 0) return(NULL);
1621: return(xmlStrndup(str, len));
1.14 veillard 1622: }
1.28 daniel 1623:
1.50 daniel 1624: /**
1625: * xmlStrlen:
1626: * @str: the CHAR * array
1627: *
1628: * lenght of a CHAR's string
1.68 daniel 1629: *
1630: * Returns the number of CHAR contained in the ARRAY.
1.45 daniel 1631: */
1632:
1.55 daniel 1633: int
1634: xmlStrlen(const CHAR *str) {
1.45 daniel 1635: int len = 0;
1636:
1637: if (str == NULL) return(0);
1638: while (*str != 0) {
1639: str++;
1640: len++;
1641: }
1642: return(len);
1643: }
1644:
1.50 daniel 1645: /**
1646: * xmlStrncat:
1.68 daniel 1647: * @cur: the original CHAR * array
1.50 daniel 1648: * @add: the CHAR * array added
1649: * @len: the length of @add
1650: *
1651: * a strncat for array of CHAR's
1.68 daniel 1652: *
1653: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1654: */
1655:
1.55 daniel 1656: CHAR *
1657: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 1658: int size;
1659: CHAR *ret;
1660:
1661: if ((add == NULL) || (len == 0))
1662: return(cur);
1663: if (cur == NULL)
1664: return(xmlStrndup(add, len));
1665:
1666: size = xmlStrlen(cur);
1.119 daniel 1667: ret = xmlRealloc(cur, (size + len + 1) * sizeof(CHAR));
1.45 daniel 1668: if (ret == NULL) {
1.86 daniel 1669: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1670: (size + len + 1) * (long)sizeof(CHAR));
1.45 daniel 1671: return(cur);
1672: }
1673: memcpy(&ret[size], add, len * sizeof(CHAR));
1674: ret[size + len] = 0;
1675: return(ret);
1676: }
1677:
1.50 daniel 1678: /**
1679: * xmlStrcat:
1.68 daniel 1680: * @cur: the original CHAR * array
1.50 daniel 1681: * @add: the CHAR * array added
1682: *
1683: * a strcat for array of CHAR's
1.68 daniel 1684: *
1685: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1686: */
1.55 daniel 1687: CHAR *
1688: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 1689: const CHAR *p = add;
1690:
1691: if (add == NULL) return(cur);
1692: if (cur == NULL)
1693: return(xmlStrdup(add));
1694:
1695: while (IS_CHAR(*p)) p++;
1696: return(xmlStrncat(cur, add, p - add));
1697: }
1698:
1699: /************************************************************************
1700: * *
1701: * Commodity functions, cleanup needed ? *
1702: * *
1703: ************************************************************************/
1704:
1.50 daniel 1705: /**
1706: * areBlanks:
1707: * @ctxt: an XML parser context
1708: * @str: a CHAR *
1709: * @len: the size of @str
1710: *
1.45 daniel 1711: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1712: *
1.99 daniel 1713: * TODO: Whether white space are significant has to be checked accordingly
1714: * to DTD informations if available
1.68 daniel 1715: *
1716: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1717: */
1718:
1719: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
1.104 daniel 1720: int i, ret;
1.45 daniel 1721: xmlNodePtr lastChild;
1722:
1723: for (i = 0;i < len;i++)
1724: if (!(IS_BLANK(str[i]))) return(0);
1725:
1726: if (CUR != '<') return(0);
1.72 daniel 1727: if (ctxt->node == NULL) return(0);
1.104 daniel 1728: if (ctxt->myDoc != NULL) {
1729: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1730: if (ret == 0) return(1);
1731: if (ret == 1) return(0);
1732: }
1733: /*
1734: * heuristic
1735: */
1.45 daniel 1736: lastChild = xmlGetLastChild(ctxt->node);
1737: if (lastChild == NULL) {
1738: if (ctxt->node->content != NULL) return(0);
1739: } else if (xmlNodeIsText(lastChild))
1740: return(0);
1.104 daniel 1741: else if ((ctxt->node->childs != NULL) &&
1742: (xmlNodeIsText(ctxt->node->childs)))
1743: return(0);
1.45 daniel 1744: return(1);
1745: }
1746:
1.50 daniel 1747: /**
1748: * xmlHandleEntity:
1749: * @ctxt: an XML parser context
1750: * @entity: an XML entity pointer.
1751: *
1752: * Default handling of defined entities, when should we define a new input
1.45 daniel 1753: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 1754: *
1755: * OBSOLETE: to be removed at some point.
1.45 daniel 1756: */
1757:
1.55 daniel 1758: void
1759: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1760: int len;
1.50 daniel 1761: xmlParserInputPtr input;
1.45 daniel 1762:
1763: if (entity->content == NULL) {
1.55 daniel 1764: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1765: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 1766: entity->name);
1.59 daniel 1767: ctxt->wellFormed = 0;
1.45 daniel 1768: return;
1769: }
1770: len = xmlStrlen(entity->content);
1771: if (len <= 2) goto handle_as_char;
1772:
1773: /*
1774: * Redefine its content as an input stream.
1775: */
1.50 daniel 1776: input = xmlNewEntityInputStream(ctxt, entity);
1777: xmlPushInput(ctxt, input);
1.45 daniel 1778: return;
1779:
1780: handle_as_char:
1781: /*
1782: * Just handle the content as a set of chars.
1783: */
1.72 daniel 1784: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 1785: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 1786:
1787: }
1788:
1789: /*
1790: * Forward definition for recusive behaviour.
1791: */
1.77 daniel 1792: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1793: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1794:
1.28 daniel 1795: /************************************************************************
1796: * *
1797: * Extra stuff for namespace support *
1798: * Relates to http://www.w3.org/TR/WD-xml-names *
1799: * *
1800: ************************************************************************/
1801:
1.50 daniel 1802: /**
1803: * xmlNamespaceParseNCName:
1804: * @ctxt: an XML parser context
1805: *
1806: * parse an XML namespace name.
1.28 daniel 1807: *
1808: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1809: *
1810: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1811: * CombiningChar | Extender
1.68 daniel 1812: *
1813: * Returns the namespace name or NULL
1.28 daniel 1814: */
1815:
1.55 daniel 1816: CHAR *
1817: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.91 daniel 1818: CHAR buf[XML_MAX_NAMELEN];
1819: int len = 0;
1.28 daniel 1820:
1.40 daniel 1821: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1.28 daniel 1822:
1.40 daniel 1823: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1824: (CUR == '.') || (CUR == '-') ||
1825: (CUR == '_') ||
1826: (IS_COMBINING(CUR)) ||
1.91 daniel 1827: (IS_EXTENDER(CUR))) {
1828: buf[len++] = CUR;
1.40 daniel 1829: NEXT;
1.91 daniel 1830: if (len >= XML_MAX_NAMELEN) {
1831: fprintf(stderr,
1832: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1833: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1834: (CUR == '.') || (CUR == '-') ||
1835: (CUR == '_') ||
1836: (IS_COMBINING(CUR)) ||
1837: (IS_EXTENDER(CUR)))
1838: NEXT;
1839: break;
1840: }
1841: }
1842: return(xmlStrndup(buf, len));
1.28 daniel 1843: }
1844:
1.50 daniel 1845: /**
1846: * xmlNamespaceParseQName:
1847: * @ctxt: an XML parser context
1848: * @prefix: a CHAR **
1849: *
1850: * parse an XML qualified name
1.28 daniel 1851: *
1852: * [NS 5] QName ::= (Prefix ':')? LocalPart
1853: *
1854: * [NS 6] Prefix ::= NCName
1855: *
1856: * [NS 7] LocalPart ::= NCName
1.68 daniel 1857: *
1858: * Returns the function returns the local part, and prefix is updated
1.50 daniel 1859: * to get the Prefix if any.
1.28 daniel 1860: */
1861:
1.55 daniel 1862: CHAR *
1863: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1864: CHAR *ret = NULL;
1865:
1866: *prefix = NULL;
1867: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1868: if (CUR == ':') {
1.28 daniel 1869: *prefix = ret;
1.40 daniel 1870: NEXT;
1.28 daniel 1871: ret = xmlNamespaceParseNCName(ctxt);
1872: }
1873:
1874: return(ret);
1875: }
1876:
1.50 daniel 1877: /**
1.72 daniel 1878: * xmlSplitQName:
1879: * @name: an XML parser context
1880: * @prefix: a CHAR **
1881: *
1882: * parse an XML qualified name string
1883: *
1884: * [NS 5] QName ::= (Prefix ':')? LocalPart
1885: *
1886: * [NS 6] Prefix ::= NCName
1887: *
1888: * [NS 7] LocalPart ::= NCName
1889: *
1890: * Returns the function returns the local part, and prefix is updated
1891: * to get the Prefix if any.
1892: */
1893:
1894: CHAR *
1895: xmlSplitQName(const CHAR *name, CHAR **prefix) {
1896: CHAR *ret = NULL;
1897: const CHAR *q;
1898: const CHAR *cur = name;
1899:
1900: *prefix = NULL;
1.113 daniel 1901:
1902: /* xml: prefix is not really a namespace */
1903: if ((cur[0] == 'x') && (cur[1] == 'm') &&
1904: (cur[2] == 'l') && (cur[3] == ':'))
1905: return(xmlStrdup(name));
1906:
1.72 daniel 1907: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1908: q = cur++;
1909:
1910: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1911: (*cur == '.') || (*cur == '-') ||
1912: (*cur == '_') ||
1913: (IS_COMBINING(*cur)) ||
1914: (IS_EXTENDER(*cur)))
1915: cur++;
1916:
1917: ret = xmlStrndup(q, cur - q);
1918:
1919: if (*cur == ':') {
1920: cur++;
1921: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1922: *prefix = ret;
1923:
1924: q = cur++;
1925:
1926: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1927: (*cur == '.') || (*cur == '-') ||
1928: (*cur == '_') ||
1929: (IS_COMBINING(*cur)) ||
1930: (IS_EXTENDER(*cur)))
1931: cur++;
1932:
1933: ret = xmlStrndup(q, cur - q);
1934: }
1935:
1936: return(ret);
1937: }
1938: /**
1.50 daniel 1939: * xmlNamespaceParseNSDef:
1940: * @ctxt: an XML parser context
1941: *
1942: * parse a namespace prefix declaration
1.28 daniel 1943: *
1944: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1945: *
1946: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 1947: *
1948: * Returns the namespace name
1.28 daniel 1949: */
1950:
1.55 daniel 1951: CHAR *
1952: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1953: CHAR *name = NULL;
1954:
1.40 daniel 1955: if ((CUR == 'x') && (NXT(1) == 'm') &&
1956: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1957: (NXT(4) == 's')) {
1958: SKIP(5);
1959: if (CUR == ':') {
1960: NEXT;
1.28 daniel 1961: name = xmlNamespaceParseNCName(ctxt);
1962: }
1963: }
1.39 daniel 1964: return(name);
1.28 daniel 1965: }
1966:
1.50 daniel 1967: /**
1968: * xmlParseQuotedString:
1969: * @ctxt: an XML parser context
1970: *
1.45 daniel 1971: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 1972: * To be removed at next drop of binary compatibility
1.68 daniel 1973: *
1974: * Returns the string parser or NULL.
1.45 daniel 1975: */
1.55 daniel 1976: CHAR *
1977: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1978: CHAR *ret = NULL;
1979: const CHAR *q;
1980:
1981: if (CUR == '"') {
1982: NEXT;
1983: q = CUR_PTR;
1984: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1985: if (CUR != '"') {
1986: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1987: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1988: ctxt->wellFormed = 0;
1.55 daniel 1989: } else {
1.45 daniel 1990: ret = xmlStrndup(q, CUR_PTR - q);
1991: NEXT;
1992: }
1993: } else if (CUR == '\''){
1994: NEXT;
1995: q = CUR_PTR;
1996: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1997: if (CUR != '\'') {
1998: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1999: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 2000: ctxt->wellFormed = 0;
1.55 daniel 2001: } else {
1.45 daniel 2002: ret = xmlStrndup(q, CUR_PTR - q);
2003: NEXT;
2004: }
2005: }
2006: return(ret);
2007: }
2008:
1.50 daniel 2009: /**
2010: * xmlParseNamespace:
2011: * @ctxt: an XML parser context
2012: *
1.45 daniel 2013: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2014: *
2015: * This is what the older xml-name Working Draft specified, a bunch of
2016: * other stuff may still rely on it, so support is still here as
2017: * if ot was declared on the root of the Tree:-(
1.110 daniel 2018: *
2019: * To be removed at next drop of binary compatibility
1.45 daniel 2020: */
2021:
1.55 daniel 2022: void
2023: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 2024: CHAR *href = NULL;
2025: CHAR *prefix = NULL;
2026: int garbage = 0;
2027:
2028: /*
2029: * We just skipped "namespace" or "xml:namespace"
2030: */
2031: SKIP_BLANKS;
2032:
2033: while (IS_CHAR(CUR) && (CUR != '>')) {
2034: /*
2035: * We can have "ns" or "prefix" attributes
2036: * Old encoding as 'href' or 'AS' attributes is still supported
2037: */
2038: if ((CUR == 'n') && (NXT(1) == 's')) {
2039: garbage = 0;
2040: SKIP(2);
2041: SKIP_BLANKS;
2042:
2043: if (CUR != '=') continue;
2044: NEXT;
2045: SKIP_BLANKS;
2046:
2047: href = xmlParseQuotedString(ctxt);
2048: SKIP_BLANKS;
2049: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
2050: (NXT(2) == 'e') && (NXT(3) == 'f')) {
2051: garbage = 0;
2052: SKIP(4);
2053: SKIP_BLANKS;
2054:
2055: if (CUR != '=') continue;
2056: NEXT;
2057: SKIP_BLANKS;
2058:
2059: href = xmlParseQuotedString(ctxt);
2060: SKIP_BLANKS;
2061: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
2062: (NXT(2) == 'e') && (NXT(3) == 'f') &&
2063: (NXT(4) == 'i') && (NXT(5) == 'x')) {
2064: garbage = 0;
2065: SKIP(6);
2066: SKIP_BLANKS;
2067:
2068: if (CUR != '=') continue;
2069: NEXT;
2070: SKIP_BLANKS;
2071:
2072: prefix = xmlParseQuotedString(ctxt);
2073: SKIP_BLANKS;
2074: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
2075: garbage = 0;
2076: SKIP(2);
2077: SKIP_BLANKS;
2078:
2079: if (CUR != '=') continue;
2080: NEXT;
2081: SKIP_BLANKS;
2082:
2083: prefix = xmlParseQuotedString(ctxt);
2084: SKIP_BLANKS;
2085: } else if ((CUR == '?') && (NXT(1) == '>')) {
2086: garbage = 0;
1.91 daniel 2087: NEXT;
1.45 daniel 2088: } else {
2089: /*
2090: * Found garbage when parsing the namespace
2091: */
2092: if (!garbage)
1.55 daniel 2093: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2094: ctxt->sax->error(ctxt->userData, "xmlParseNamespace found garbage\n");
1.59 daniel 2095: ctxt->wellFormed = 0;
1.45 daniel 2096: NEXT;
2097: }
2098: }
2099:
2100: MOVETO_ENDTAG(CUR_PTR);
2101: NEXT;
2102:
2103: /*
2104: * Register the DTD.
1.72 daniel 2105: if (href != NULL)
2106: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 2107: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 2108: */
2109:
1.119 daniel 2110: if (prefix != NULL) xmlFree(prefix);
2111: if (href != NULL) xmlFree(href);
1.45 daniel 2112: }
2113:
1.28 daniel 2114: /************************************************************************
2115: * *
2116: * The parser itself *
2117: * Relates to http://www.w3.org/TR/REC-xml *
2118: * *
2119: ************************************************************************/
1.14 veillard 2120:
1.50 daniel 2121: /**
1.97 daniel 2122: * xmlScanName:
2123: * @ctxt: an XML parser context
2124: *
2125: * Trickery: parse an XML name but without consuming the input flow
2126: * Needed for rollback cases.
2127: *
2128: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2129: * CombiningChar | Extender
2130: *
2131: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2132: *
2133: * [6] Names ::= Name (S Name)*
2134: *
2135: * Returns the Name parsed or NULL
2136: */
2137:
2138: CHAR *
2139: xmlScanName(xmlParserCtxtPtr ctxt) {
2140: CHAR buf[XML_MAX_NAMELEN];
2141: int len = 0;
2142:
2143: GROW;
2144: if (!IS_LETTER(CUR) && (CUR != '_') &&
2145: (CUR != ':')) {
2146: return(NULL);
2147: }
2148:
2149: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2150: (NXT(len) == '.') || (NXT(len) == '-') ||
2151: (NXT(len) == '_') || (NXT(len) == ':') ||
2152: (IS_COMBINING(NXT(len))) ||
2153: (IS_EXTENDER(NXT(len)))) {
2154: buf[len] = NXT(len);
2155: len++;
2156: if (len >= XML_MAX_NAMELEN) {
2157: fprintf(stderr,
2158: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
2159: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2160: (NXT(len) == '.') || (NXT(len) == '-') ||
2161: (NXT(len) == '_') || (NXT(len) == ':') ||
2162: (IS_COMBINING(NXT(len))) ||
2163: (IS_EXTENDER(NXT(len))))
2164: len++;
2165: break;
2166: }
2167: }
2168: return(xmlStrndup(buf, len));
2169: }
2170:
2171: /**
1.50 daniel 2172: * xmlParseName:
2173: * @ctxt: an XML parser context
2174: *
2175: * parse an XML name.
1.22 daniel 2176: *
2177: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2178: * CombiningChar | Extender
2179: *
2180: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2181: *
2182: * [6] Names ::= Name (S Name)*
1.68 daniel 2183: *
2184: * Returns the Name parsed or NULL
1.1 veillard 2185: */
2186:
1.55 daniel 2187: CHAR *
2188: xmlParseName(xmlParserCtxtPtr ctxt) {
1.91 daniel 2189: CHAR buf[XML_MAX_NAMELEN];
2190: int len = 0;
1.97 daniel 2191: CHAR cur;
1.1 veillard 2192:
1.91 daniel 2193: GROW;
1.97 daniel 2194: cur = CUR;
2195: if (!IS_LETTER(cur) && (cur != '_') &&
2196: (cur != ':')) {
1.91 daniel 2197: return(NULL);
2198: }
1.40 daniel 2199:
1.97 daniel 2200: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2201: (cur == '.') || (cur == '-') ||
2202: (cur == '_') || (cur == ':') ||
2203: (IS_COMBINING(cur)) ||
2204: (IS_EXTENDER(cur))) {
2205: buf[len++] = cur;
1.40 daniel 2206: NEXT;
1.97 daniel 2207: cur = CUR;
1.91 daniel 2208: if (len >= XML_MAX_NAMELEN) {
2209: fprintf(stderr,
2210: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.97 daniel 2211: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2212: (cur == '.') || (cur == '-') ||
2213: (cur == '_') || (cur == ':') ||
2214: (IS_COMBINING(cur)) ||
2215: (IS_EXTENDER(cur))) {
2216: NEXT;
2217: cur = CUR;
2218: }
1.91 daniel 2219: break;
2220: }
2221: }
2222: return(xmlStrndup(buf, len));
1.22 daniel 2223: }
2224:
1.50 daniel 2225: /**
2226: * xmlParseNmtoken:
2227: * @ctxt: an XML parser context
2228: *
2229: * parse an XML Nmtoken.
1.22 daniel 2230: *
2231: * [7] Nmtoken ::= (NameChar)+
2232: *
2233: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 2234: *
2235: * Returns the Nmtoken parsed or NULL
1.22 daniel 2236: */
2237:
1.55 daniel 2238: CHAR *
2239: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.91 daniel 2240: CHAR buf[XML_MAX_NAMELEN];
2241: int len = 0;
1.22 daniel 2242:
1.91 daniel 2243: GROW;
1.40 daniel 2244: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2245: (CUR == '.') || (CUR == '-') ||
2246: (CUR == '_') || (CUR == ':') ||
2247: (IS_COMBINING(CUR)) ||
1.91 daniel 2248: (IS_EXTENDER(CUR))) {
2249: buf[len++] = CUR;
1.40 daniel 2250: NEXT;
1.91 daniel 2251: if (len >= XML_MAX_NAMELEN) {
2252: fprintf(stderr,
2253: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
2254: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2255: (CUR == '.') || (CUR == '-') ||
2256: (CUR == '_') || (CUR == ':') ||
2257: (IS_COMBINING(CUR)) ||
2258: (IS_EXTENDER(CUR)))
2259: NEXT;
2260: break;
2261: }
2262: }
2263: return(xmlStrndup(buf, len));
1.1 veillard 2264: }
2265:
1.50 daniel 2266: /**
2267: * xmlParseEntityValue:
2268: * @ctxt: an XML parser context
1.78 daniel 2269: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 2270: *
2271: * parse a value for ENTITY decl.
1.24 daniel 2272: *
2273: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2274: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 2275: *
1.78 daniel 2276: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 2277: */
2278:
1.55 daniel 2279: CHAR *
1.78 daniel 2280: xmlParseEntityValue(xmlParserCtxtPtr ctxt, CHAR **orig) {
1.77 daniel 2281: CHAR *ret = NULL;
1.78 daniel 2282: const CHAR *org = NULL;
1.79 daniel 2283: const CHAR *tst = NULL;
2284: const CHAR *temp = NULL;
1.98 daniel 2285: xmlParserInputPtr input;
1.24 daniel 2286:
1.91 daniel 2287: SHRINK;
1.40 daniel 2288: if (CUR == '"') {
1.96 daniel 2289: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2290: input = ctxt->input;
1.40 daniel 2291: NEXT;
1.78 daniel 2292: org = CUR_PTR;
1.98 daniel 2293: /*
2294: * NOTE: 4.4.5 Included in Literal
2295: * When a parameter entity reference appears in a literal entity
2296: * value, ... a single or double quote character in the replacement
2297: * text is always treated as a normal data character and will not
2298: * terminate the literal.
2299: * In practice it means we stop the loop only when back at parsing
2300: * the initial entity and the quote is found
2301: */
2302: while ((CUR != '"') || (ctxt->input != input)) {
1.79 daniel 2303: tst = CUR_PTR;
1.98 daniel 2304: /*
2305: * NOTE: 4.4.7 Bypassed
2306: * When a general entity reference appears in the EntityValue in
2307: * an entity declaration, it is bypassed and left as is.
2308: * so XML_SUBSTITUTE_REF is not set.
2309: */
2310: if (ctxt->input != input)
2311: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2312: 0, 0, 0);
2313: else
2314: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2315: '"', 0, 0);
1.94 daniel 2316:
2317: /*
2318: * Pop-up of finished entities.
2319: */
2320: while ((CUR == 0) && (ctxt->inputNr > 1))
2321: xmlPopInput(ctxt);
2322:
2323: if ((temp == NULL) && (tst == CUR_PTR)) {
1.116 daniel 2324: ret = xmlStrndup((CHAR *) "", 0);
1.94 daniel 2325: break;
2326: }
2327: if ((temp[0] == 0) && (tst == CUR_PTR)) {
1.119 daniel 2328: xmlFree((char *)temp);
1.116 daniel 2329: ret = xmlStrndup((CHAR *) "", 0);
1.94 daniel 2330: break;
2331: }
1.79 daniel 2332: ret = xmlStrcat(ret, temp);
1.119 daniel 2333: if (temp != NULL) xmlFree((char *)temp);
1.94 daniel 2334: GROW;
1.79 daniel 2335: }
1.77 daniel 2336: if (CUR != '"') {
1.55 daniel 2337: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.79 daniel 2338: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 2339: ctxt->wellFormed = 0;
1.78 daniel 2340: } else {
1.99 daniel 2341: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2342: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2343: if (ret == NULL)
1.116 daniel 2344: ret = xmlStrndup((CHAR *) "", 0);
1.40 daniel 2345: NEXT;
1.78 daniel 2346: }
1.40 daniel 2347: } else if (CUR == '\'') {
1.96 daniel 2348: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2349: input = ctxt->input;
1.40 daniel 2350: NEXT;
1.78 daniel 2351: org = CUR_PTR;
1.98 daniel 2352: /*
2353: * NOTE: 4.4.5 Included in Literal
2354: * When a parameter entity reference appears in a literal entity
2355: * value, ... a single or double quote character in the replacement
2356: * text is always treated as a normal data character and will not
2357: * terminate the literal.
2358: * In practice it means we stop the loop only when back at parsing
2359: * the initial entity and the quote is found
2360: */
2361: while ((CUR != '\'') || (ctxt->input != input)) {
1.79 daniel 2362: tst = CUR_PTR;
1.98 daniel 2363: /*
2364: * NOTE: 4.4.7 Bypassed
2365: * When a general entity reference appears in the EntityValue in
2366: * an entity declaration, it is bypassed and left as is.
2367: * so XML_SUBSTITUTE_REF is not set.
2368: */
2369: if (ctxt->input != input)
2370: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2371: 0, 0, 0);
2372: else
2373: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2374: '\'', 0, 0);
1.94 daniel 2375:
2376: /*
2377: * Pop-up of finished entities.
2378: */
2379: while ((CUR == 0) && (ctxt->inputNr > 1))
2380: xmlPopInput(ctxt);
2381:
2382: if ((temp == NULL) && (tst == CUR_PTR)) {
1.116 daniel 2383: ret = xmlStrndup((CHAR *) "", 0);
1.94 daniel 2384: break;
2385: }
2386: if ((temp[0] == 0) && (tst == CUR_PTR)) {
1.119 daniel 2387: xmlFree((char *)temp);
1.116 daniel 2388: ret = xmlStrndup((CHAR *) "", 0);
1.94 daniel 2389: break;
2390: }
1.79 daniel 2391: ret = xmlStrcat(ret, temp);
1.119 daniel 2392: if (temp != NULL) xmlFree((char *)temp);
1.94 daniel 2393: GROW;
1.79 daniel 2394: }
1.77 daniel 2395: if (CUR != '\'') {
1.55 daniel 2396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2397: ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
1.59 daniel 2398: ctxt->wellFormed = 0;
1.78 daniel 2399: } else {
1.99 daniel 2400: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2401: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2402: if (ret == NULL)
1.116 daniel 2403: ret = xmlStrndup((CHAR *) "", 0);
1.40 daniel 2404: NEXT;
1.78 daniel 2405: }
1.24 daniel 2406: } else {
1.55 daniel 2407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2408: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1.59 daniel 2409: ctxt->wellFormed = 0;
1.24 daniel 2410: }
2411:
2412: return(ret);
2413: }
2414:
1.50 daniel 2415: /**
2416: * xmlParseAttValue:
2417: * @ctxt: an XML parser context
2418: *
2419: * parse a value for an attribute
1.78 daniel 2420: * Note: the parser won't do substitution of entities here, this
1.113 daniel 2421: * will be handled later in xmlStringGetNodeList
1.29 daniel 2422: *
2423: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2424: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 2425: *
2426: * Returns the AttValue parsed or NULL.
1.29 daniel 2427: */
2428:
1.55 daniel 2429: CHAR *
2430: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.77 daniel 2431: CHAR *ret = NULL;
1.29 daniel 2432:
1.91 daniel 2433: SHRINK;
1.40 daniel 2434: if (CUR == '"') {
1.96 daniel 2435: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2436: NEXT;
1.98 daniel 2437: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
1.77 daniel 2438: if (CUR == '<') {
2439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2440: ctxt->sax->error(ctxt->userData,
2441: "Unescaped '<' not allowed in attributes values\n");
2442: ctxt->wellFormed = 0;
1.29 daniel 2443: }
1.77 daniel 2444: if (CUR != '"') {
1.55 daniel 2445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2446: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2447: ctxt->wellFormed = 0;
1.77 daniel 2448: } else
1.40 daniel 2449: NEXT;
2450: } else if (CUR == '\'') {
1.96 daniel 2451: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2452: NEXT;
1.98 daniel 2453: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
1.77 daniel 2454: if (CUR == '<') {
2455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2456: ctxt->sax->error(ctxt->userData,
2457: "Unescaped '<' not allowed in attributes values\n");
2458: ctxt->wellFormed = 0;
1.29 daniel 2459: }
1.77 daniel 2460: if (CUR != '\'') {
1.55 daniel 2461: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2462: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2463: ctxt->wellFormed = 0;
1.77 daniel 2464: } else
1.40 daniel 2465: NEXT;
1.29 daniel 2466: } else {
1.55 daniel 2467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2468: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2469: ctxt->wellFormed = 0;
1.29 daniel 2470: }
2471:
2472: return(ret);
2473: }
2474:
1.50 daniel 2475: /**
2476: * xmlParseSystemLiteral:
2477: * @ctxt: an XML parser context
2478: *
2479: * parse an XML Literal
1.21 daniel 2480: *
1.22 daniel 2481: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2482: *
2483: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2484: */
2485:
1.55 daniel 2486: CHAR *
2487: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2488: const CHAR *q;
2489: CHAR *ret = NULL;
2490:
1.91 daniel 2491: SHRINK;
1.40 daniel 2492: if (CUR == '"') {
2493: NEXT;
2494: q = CUR_PTR;
2495: while ((IS_CHAR(CUR)) && (CUR != '"'))
2496: NEXT;
2497: if (!IS_CHAR(CUR)) {
1.55 daniel 2498: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2499: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2500: ctxt->wellFormed = 0;
1.21 daniel 2501: } else {
1.40 daniel 2502: ret = xmlStrndup(q, CUR_PTR - q);
2503: NEXT;
1.21 daniel 2504: }
1.40 daniel 2505: } else if (CUR == '\'') {
2506: NEXT;
2507: q = CUR_PTR;
2508: while ((IS_CHAR(CUR)) && (CUR != '\''))
2509: NEXT;
2510: if (!IS_CHAR(CUR)) {
1.55 daniel 2511: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2512: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2513: ctxt->wellFormed = 0;
1.21 daniel 2514: } else {
1.40 daniel 2515: ret = xmlStrndup(q, CUR_PTR - q);
2516: NEXT;
1.21 daniel 2517: }
2518: } else {
1.55 daniel 2519: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2520: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2521: ctxt->wellFormed = 0;
1.21 daniel 2522: }
2523:
2524: return(ret);
2525: }
2526:
1.50 daniel 2527: /**
2528: * xmlParsePubidLiteral:
2529: * @ctxt: an XML parser context
1.21 daniel 2530: *
1.50 daniel 2531: * parse an XML public literal
1.68 daniel 2532: *
2533: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2534: *
2535: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2536: */
2537:
1.55 daniel 2538: CHAR *
2539: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2540: const CHAR *q;
2541: CHAR *ret = NULL;
2542: /*
2543: * Name ::= (Letter | '_') (NameChar)*
2544: */
1.91 daniel 2545: SHRINK;
1.40 daniel 2546: if (CUR == '"') {
2547: NEXT;
2548: q = CUR_PTR;
2549: while (IS_PUBIDCHAR(CUR)) NEXT;
2550: if (CUR != '"') {
1.55 daniel 2551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2552: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2553: ctxt->wellFormed = 0;
1.21 daniel 2554: } else {
1.40 daniel 2555: ret = xmlStrndup(q, CUR_PTR - q);
2556: NEXT;
1.21 daniel 2557: }
1.40 daniel 2558: } else if (CUR == '\'') {
2559: NEXT;
2560: q = CUR_PTR;
2561: while ((IS_LETTER(CUR)) && (CUR != '\''))
2562: NEXT;
2563: if (!IS_LETTER(CUR)) {
1.55 daniel 2564: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2565: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2566: ctxt->wellFormed = 0;
1.21 daniel 2567: } else {
1.40 daniel 2568: ret = xmlStrndup(q, CUR_PTR - q);
2569: NEXT;
1.21 daniel 2570: }
2571: } else {
1.55 daniel 2572: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2573: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2574: ctxt->wellFormed = 0;
1.21 daniel 2575: }
2576:
2577: return(ret);
2578: }
2579:
1.50 daniel 2580: /**
2581: * xmlParseCharData:
2582: * @ctxt: an XML parser context
2583: * @cdata: int indicating whether we are within a CDATA section
2584: *
2585: * parse a CharData section.
2586: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2587: *
2588: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2589: */
2590:
1.55 daniel 2591: void
2592: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.91 daniel 2593: CHAR buf[1000];
2594: int nbchar = 0;
1.97 daniel 2595: CHAR cur;
1.27 daniel 2596:
1.91 daniel 2597: SHRINK;
1.97 daniel 2598: /*
2599: * !!!!!!!!!!!!
2600: * NOTE: NXT(0) is used here to avoid breaking on < or &
2601: * entities substitutions.
2602: */
2603: cur = CUR;
2604: while ((IS_CHAR(cur)) && (cur != '<') &&
2605: (cur != '&')) {
2606: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2607: (NXT(2) == '>')) {
2608: if (cdata) break;
2609: else {
2610: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2611: ctxt->sax->error(ctxt->userData,
1.59 daniel 2612: "Sequence ']]>' not allowed in content\n");
2613: ctxt->wellFormed = 0;
2614: }
2615: }
1.91 daniel 2616: buf[nbchar++] = CUR;
2617: if (nbchar == 1000) {
2618: /*
2619: * Ok the segment is to be consumed as chars.
2620: */
2621: if (ctxt->sax != NULL) {
2622: if (areBlanks(ctxt, buf, nbchar)) {
2623: if (ctxt->sax->ignorableWhitespace != NULL)
2624: ctxt->sax->ignorableWhitespace(ctxt->userData,
2625: buf, nbchar);
2626: } else {
2627: if (ctxt->sax->characters != NULL)
2628: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2629: }
2630: }
2631: nbchar = 0;
2632: }
1.40 daniel 2633: NEXT;
1.97 daniel 2634: cur = CUR;
1.27 daniel 2635: }
1.91 daniel 2636: if (nbchar != 0) {
2637: /*
2638: * Ok the segment is to be consumed as chars.
2639: */
2640: if (ctxt->sax != NULL) {
2641: if (areBlanks(ctxt, buf, nbchar)) {
2642: if (ctxt->sax->ignorableWhitespace != NULL)
2643: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2644: } else {
2645: if (ctxt->sax->characters != NULL)
2646: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2647: }
2648: }
1.45 daniel 2649: }
1.27 daniel 2650: }
2651:
1.50 daniel 2652: /**
2653: * xmlParseExternalID:
2654: * @ctxt: an XML parser context
2655: * @publicID: a CHAR** receiving PubidLiteral
1.67 daniel 2656: * @strict: indicate whether we should restrict parsing to only
2657: * production [75], see NOTE below
1.50 daniel 2658: *
1.67 daniel 2659: * Parse an External ID or a Public ID
2660: *
2661: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2662: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2663: *
2664: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2665: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2666: *
2667: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2668: *
1.68 daniel 2669: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2670: * case publicID receives PubidLiteral, is strict is off
2671: * it is possible to return NULL and have publicID set.
1.22 daniel 2672: */
2673:
1.55 daniel 2674: CHAR *
1.67 daniel 2675: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
1.39 daniel 2676: CHAR *URI = NULL;
1.22 daniel 2677:
1.91 daniel 2678: SHRINK;
1.40 daniel 2679: if ((CUR == 'S') && (NXT(1) == 'Y') &&
2680: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2681: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2682: SKIP(6);
1.59 daniel 2683: if (!IS_BLANK(CUR)) {
2684: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2685: ctxt->sax->error(ctxt->userData,
1.59 daniel 2686: "Space required after 'SYSTEM'\n");
2687: ctxt->wellFormed = 0;
2688: }
1.42 daniel 2689: SKIP_BLANKS;
1.39 daniel 2690: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2691: if (URI == NULL) {
1.55 daniel 2692: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2693: ctxt->sax->error(ctxt->userData,
1.39 daniel 2694: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2695: ctxt->wellFormed = 0;
2696: }
1.40 daniel 2697: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
2698: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2699: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2700: SKIP(6);
1.59 daniel 2701: if (!IS_BLANK(CUR)) {
2702: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2703: ctxt->sax->error(ctxt->userData,
1.59 daniel 2704: "Space required after 'PUBLIC'\n");
2705: ctxt->wellFormed = 0;
2706: }
1.42 daniel 2707: SKIP_BLANKS;
1.39 daniel 2708: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2709: if (*publicID == NULL) {
1.55 daniel 2710: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2711: ctxt->sax->error(ctxt->userData,
1.39 daniel 2712: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2713: ctxt->wellFormed = 0;
2714: }
1.67 daniel 2715: if (strict) {
2716: /*
2717: * We don't handle [83] so "S SystemLiteral" is required.
2718: */
2719: if (!IS_BLANK(CUR)) {
2720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2721: ctxt->sax->error(ctxt->userData,
1.67 daniel 2722: "Space required after the Public Identifier\n");
2723: ctxt->wellFormed = 0;
2724: }
2725: } else {
2726: /*
2727: * We handle [83] so we return immediately, if
2728: * "S SystemLiteral" is not detected. From a purely parsing
2729: * point of view that's a nice mess.
2730: */
2731: const CHAR *ptr = CUR_PTR;
2732: if (!IS_BLANK(*ptr)) return(NULL);
2733:
2734: while (IS_BLANK(*ptr)) ptr++;
2735: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 2736: }
1.42 daniel 2737: SKIP_BLANKS;
1.39 daniel 2738: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2739: if (URI == NULL) {
1.55 daniel 2740: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2741: ctxt->sax->error(ctxt->userData,
1.39 daniel 2742: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2743: ctxt->wellFormed = 0;
2744: }
1.22 daniel 2745: }
1.39 daniel 2746: return(URI);
1.22 daniel 2747: }
2748:
1.50 daniel 2749: /**
2750: * xmlParseComment:
1.69 daniel 2751: * @ctxt: an XML parser context
1.50 daniel 2752: *
1.3 veillard 2753: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 2754: * The spec says that "For compatibility, the string "--" (double-hyphen)
2755: * must not occur within comments. "
1.22 daniel 2756: *
2757: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2758: */
1.72 daniel 2759: void
1.114 daniel 2760: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.17 daniel 2761: const CHAR *q, *start;
2762: const CHAR *r;
1.39 daniel 2763: CHAR *val;
1.3 veillard 2764:
2765: /*
1.22 daniel 2766: * Check that there is a comment right here.
1.3 veillard 2767: */
1.40 daniel 2768: if ((CUR != '<') || (NXT(1) != '!') ||
1.72 daniel 2769: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2770:
1.97 daniel 2771: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2772: SHRINK;
1.40 daniel 2773: SKIP(4);
2774: start = q = CUR_PTR;
2775: NEXT;
2776: r = CUR_PTR;
2777: NEXT;
2778: while (IS_CHAR(CUR) &&
2779: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 2780: (*r != '-') || (*q != '-'))) {
1.59 daniel 2781: if ((*r == '-') && (*q == '-')) {
1.55 daniel 2782: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2783: ctxt->sax->error(ctxt->userData,
1.38 daniel 2784: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2785: ctxt->wellFormed = 0;
2786: }
1.40 daniel 2787: NEXT;r++;q++;
1.3 veillard 2788: }
1.40 daniel 2789: if (!IS_CHAR(CUR)) {
1.55 daniel 2790: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2791: ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", start);
1.59 daniel 2792: ctxt->wellFormed = 0;
1.3 veillard 2793: } else {
1.40 daniel 2794: NEXT;
1.114 daniel 2795: val = xmlStrndup(start, q - start);
2796: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
2797: ctxt->sax->comment(ctxt->userData, val);
1.119 daniel 2798: xmlFree(val);
1.3 veillard 2799: }
2800: }
2801:
1.50 daniel 2802: /**
2803: * xmlParsePITarget:
2804: * @ctxt: an XML parser context
2805: *
2806: * parse the name of a PI
1.22 daniel 2807: *
2808: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2809: *
2810: * Returns the PITarget name or NULL
1.22 daniel 2811: */
2812:
1.55 daniel 2813: CHAR *
2814: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 2815: CHAR *name;
2816:
2817: name = xmlParseName(ctxt);
2818: if ((name != NULL) && (name[3] == 0) &&
2819: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2820: ((name[1] == 'm') || (name[1] == 'M')) &&
2821: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 2822: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2823: ctxt->sax->error(ctxt->userData, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 2824: return(NULL);
2825: }
2826: return(name);
2827: }
2828:
1.50 daniel 2829: /**
2830: * xmlParsePI:
2831: * @ctxt: an XML parser context
2832: *
2833: * parse an XML Processing Instruction.
1.22 daniel 2834: *
2835: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2836: *
1.69 daniel 2837: * The processing is transfered to SAX once parsed.
1.3 veillard 2838: */
2839:
1.55 daniel 2840: void
2841: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 2842: CHAR *target;
2843:
1.40 daniel 2844: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 2845: /*
2846: * this is a Processing Instruction.
2847: */
1.40 daniel 2848: SKIP(2);
1.91 daniel 2849: SHRINK;
1.3 veillard 2850:
2851: /*
1.22 daniel 2852: * Parse the target name and check for special support like
2853: * namespace.
1.3 veillard 2854: */
1.22 daniel 2855: target = xmlParsePITarget(ctxt);
2856: if (target != NULL) {
1.114 daniel 2857: const CHAR *q;
1.72 daniel 2858:
1.114 daniel 2859: if (!IS_BLANK(CUR)) {
2860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2861: ctxt->sax->error(ctxt->userData,
2862: "xmlParsePI: PI %s space expected\n", target);
2863: ctxt->wellFormed = 0;
2864: }
2865: SKIP_BLANKS;
2866: q = CUR_PTR;
1.72 daniel 2867: while (IS_CHAR(CUR) &&
2868: ((CUR != '?') || (NXT(1) != '>')))
2869: NEXT;
2870: if (!IS_CHAR(CUR)) {
2871: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2872: ctxt->sax->error(ctxt->userData,
1.72 daniel 2873: "xmlParsePI: PI %s never end ...\n", target);
2874: ctxt->wellFormed = 0;
1.22 daniel 2875: } else {
1.72 daniel 2876: CHAR *data;
1.44 daniel 2877:
1.72 daniel 2878: data = xmlStrndup(q, CUR_PTR - q);
2879: SKIP(2);
1.44 daniel 2880:
1.72 daniel 2881: /*
2882: * SAX: PI detected.
2883: */
2884: if ((ctxt->sax) &&
2885: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2886: ctxt->sax->processingInstruction(ctxt->userData,
2887: target, data);
1.119 daniel 2888: xmlFree(data);
1.22 daniel 2889: }
1.119 daniel 2890: xmlFree(target);
1.3 veillard 2891: } else {
1.55 daniel 2892: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2893: ctxt->sax->error(ctxt->userData,
2894: "xmlParsePI : no target name\n");
1.59 daniel 2895: ctxt->wellFormed = 0;
1.22 daniel 2896: }
2897: }
2898: }
2899:
1.50 daniel 2900: /**
2901: * xmlParseNotationDecl:
2902: * @ctxt: an XML parser context
2903: *
2904: * parse a notation declaration
1.22 daniel 2905: *
2906: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2907: *
2908: * Hence there is actually 3 choices:
2909: * 'PUBLIC' S PubidLiteral
2910: * 'PUBLIC' S PubidLiteral S SystemLiteral
2911: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2912: *
1.67 daniel 2913: * See the NOTE on xmlParseExternalID().
1.22 daniel 2914: */
2915:
1.55 daniel 2916: void
2917: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2918: CHAR *name;
1.67 daniel 2919: CHAR *Pubid;
2920: CHAR *Systemid;
1.22 daniel 2921:
1.40 daniel 2922: if ((CUR == '<') && (NXT(1) == '!') &&
2923: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2924: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2925: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2926: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 2927: SHRINK;
1.40 daniel 2928: SKIP(10);
1.67 daniel 2929: if (!IS_BLANK(CUR)) {
2930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2931: ctxt->sax->error(ctxt->userData, "Space required after '<!NOTATION'\n");
1.67 daniel 2932: ctxt->wellFormed = 0;
2933: return;
2934: }
2935: SKIP_BLANKS;
1.22 daniel 2936:
2937: name = xmlParseName(ctxt);
2938: if (name == NULL) {
1.55 daniel 2939: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2940: ctxt->sax->error(ctxt->userData, "NOTATION: Name expected here\n");
1.67 daniel 2941: ctxt->wellFormed = 0;
2942: return;
2943: }
2944: if (!IS_BLANK(CUR)) {
2945: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2946: ctxt->sax->error(ctxt->userData,
1.67 daniel 2947: "Space required after the NOTATION name'\n");
1.59 daniel 2948: ctxt->wellFormed = 0;
1.22 daniel 2949: return;
2950: }
1.42 daniel 2951: SKIP_BLANKS;
1.67 daniel 2952:
1.22 daniel 2953: /*
1.67 daniel 2954: * Parse the IDs.
1.22 daniel 2955: */
1.67 daniel 2956: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
2957: SKIP_BLANKS;
2958:
2959: if (CUR == '>') {
1.40 daniel 2960: NEXT;
1.72 daniel 2961: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 2962: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2963: } else {
2964: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2965: ctxt->sax->error(ctxt->userData,
1.67 daniel 2966: "'>' required to close NOTATION declaration\n");
2967: ctxt->wellFormed = 0;
2968: }
1.119 daniel 2969: xmlFree(name);
2970: if (Systemid != NULL) xmlFree(Systemid);
2971: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 2972: }
2973: }
2974:
1.50 daniel 2975: /**
2976: * xmlParseEntityDecl:
2977: * @ctxt: an XML parser context
2978: *
2979: * parse <!ENTITY declarations
1.22 daniel 2980: *
2981: * [70] EntityDecl ::= GEDecl | PEDecl
2982: *
2983: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2984: *
2985: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2986: *
2987: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2988: *
2989: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2990: *
2991: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 2992: *
2993: * [ VC: Notation Declared ]
1.116 daniel 2994: * The Name must match the declared name of a notation.
1.22 daniel 2995: */
2996:
1.55 daniel 2997: void
2998: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 2999: CHAR *name = NULL;
1.24 daniel 3000: CHAR *value = NULL;
1.39 daniel 3001: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 3002: CHAR *ndata = NULL;
1.39 daniel 3003: int isParameter = 0;
1.78 daniel 3004: CHAR *orig = NULL;
1.22 daniel 3005:
1.94 daniel 3006: GROW;
1.40 daniel 3007: if ((CUR == '<') && (NXT(1) == '!') &&
3008: (NXT(2) == 'E') && (NXT(3) == 'N') &&
3009: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 3010: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 3011: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 3012: SHRINK;
1.40 daniel 3013: SKIP(8);
1.59 daniel 3014: if (!IS_BLANK(CUR)) {
3015: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3016: ctxt->sax->error(ctxt->userData, "Space required after '<!ENTITY'\n");
1.59 daniel 3017: ctxt->wellFormed = 0;
3018: }
3019: SKIP_BLANKS;
1.40 daniel 3020:
3021: if (CUR == '%') {
3022: NEXT;
1.59 daniel 3023: if (!IS_BLANK(CUR)) {
3024: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3025: ctxt->sax->error(ctxt->userData, "Space required after '%'\n");
1.59 daniel 3026: ctxt->wellFormed = 0;
3027: }
1.42 daniel 3028: SKIP_BLANKS;
1.39 daniel 3029: isParameter = 1;
1.22 daniel 3030: }
3031:
3032: name = xmlParseName(ctxt);
1.24 daniel 3033: if (name == NULL) {
1.55 daniel 3034: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3035: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 3036: ctxt->wellFormed = 0;
1.24 daniel 3037: return;
3038: }
1.59 daniel 3039: if (!IS_BLANK(CUR)) {
3040: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3041: ctxt->sax->error(ctxt->userData,
1.59 daniel 3042: "Space required after the entity name\n");
3043: ctxt->wellFormed = 0;
3044: }
1.42 daniel 3045: SKIP_BLANKS;
1.24 daniel 3046:
1.22 daniel 3047: /*
1.68 daniel 3048: * handle the various case of definitions...
1.22 daniel 3049: */
1.39 daniel 3050: if (isParameter) {
1.40 daniel 3051: if ((CUR == '"') || (CUR == '\''))
1.78 daniel 3052: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 3053: if (value) {
1.72 daniel 3054: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3055: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3056: XML_INTERNAL_PARAMETER_ENTITY,
3057: NULL, NULL, value);
3058: }
1.24 daniel 3059: else {
1.67 daniel 3060: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 3061: if (URI) {
1.72 daniel 3062: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3063: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3064: XML_EXTERNAL_PARAMETER_ENTITY,
3065: literal, URI, NULL);
3066: }
1.24 daniel 3067: }
3068: } else {
1.40 daniel 3069: if ((CUR == '"') || (CUR == '\'')) {
1.78 daniel 3070: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 3071: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3072: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3073: XML_INTERNAL_GENERAL_ENTITY,
3074: NULL, NULL, value);
3075: } else {
1.67 daniel 3076: URI = xmlParseExternalID(ctxt, &literal, 1);
1.59 daniel 3077: if ((CUR != '>') && (!IS_BLANK(CUR))) {
3078: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3079: ctxt->sax->error(ctxt->userData,
1.59 daniel 3080: "Space required before 'NDATA'\n");
3081: ctxt->wellFormed = 0;
3082: }
1.42 daniel 3083: SKIP_BLANKS;
1.40 daniel 3084: if ((CUR == 'N') && (NXT(1) == 'D') &&
3085: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3086: (NXT(4) == 'A')) {
3087: SKIP(5);
1.59 daniel 3088: if (!IS_BLANK(CUR)) {
3089: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3090: ctxt->sax->error(ctxt->userData,
1.59 daniel 3091: "Space required after 'NDATA'\n");
3092: ctxt->wellFormed = 0;
3093: }
1.42 daniel 3094: SKIP_BLANKS;
1.24 daniel 3095: ndata = xmlParseName(ctxt);
1.116 daniel 3096: if ((ctxt->sax != NULL) &&
3097: (ctxt->sax->unparsedEntityDecl != NULL))
3098: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 3099: literal, URI, ndata);
3100: } else {
1.72 daniel 3101: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3102: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3103: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3104: literal, URI, NULL);
1.24 daniel 3105: }
3106: }
3107: }
1.42 daniel 3108: SKIP_BLANKS;
1.40 daniel 3109: if (CUR != '>') {
1.55 daniel 3110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3111: ctxt->sax->error(ctxt->userData,
1.31 daniel 3112: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3113: ctxt->wellFormed = 0;
1.24 daniel 3114: } else
1.40 daniel 3115: NEXT;
1.78 daniel 3116: if (orig != NULL) {
3117: /*
1.98 daniel 3118: * Ugly mechanism to save the raw entity value.
1.78 daniel 3119: */
3120: xmlEntityPtr cur = NULL;
3121:
1.98 daniel 3122: if (isParameter) {
3123: if ((ctxt->sax != NULL) &&
3124: (ctxt->sax->getParameterEntity != NULL))
1.120 ! daniel 3125: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 3126: } else {
3127: if ((ctxt->sax != NULL) &&
3128: (ctxt->sax->getEntity != NULL))
1.120 ! daniel 3129: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 3130: }
3131: if (cur != NULL) {
3132: if (cur->orig != NULL)
1.119 daniel 3133: xmlFree(orig);
1.98 daniel 3134: else
3135: cur->orig = orig;
3136: } else
1.119 daniel 3137: xmlFree(orig);
1.78 daniel 3138: }
1.119 daniel 3139: if (name != NULL) xmlFree(name);
3140: if (value != NULL) xmlFree(value);
3141: if (URI != NULL) xmlFree(URI);
3142: if (literal != NULL) xmlFree(literal);
3143: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 3144: }
3145: }
3146:
1.50 daniel 3147: /**
1.59 daniel 3148: * xmlParseDefaultDecl:
3149: * @ctxt: an XML parser context
3150: * @value: Receive a possible fixed default value for the attribute
3151: *
3152: * Parse an attribute default declaration
3153: *
3154: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3155: *
1.99 daniel 3156: * [ VC: Required Attribute ]
1.117 daniel 3157: * if the default declaration is the keyword #REQUIRED, then the
3158: * attribute must be specified for all elements of the type in the
3159: * attribute-list declaration.
1.99 daniel 3160: *
3161: * [ VC: Attribute Default Legal ]
1.102 daniel 3162: * The declared default value must meet the lexical constraints of
3163: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3164: *
3165: * [ VC: Fixed Attribute Default ]
1.117 daniel 3166: * if an attribute has a default value declared with the #FIXED
3167: * keyword, instances of that attribute must match the default value.
1.99 daniel 3168: *
3169: * [ WFC: No < in Attribute Values ]
3170: * handled in xmlParseAttValue()
3171: *
1.59 daniel 3172: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3173: * or XML_ATTRIBUTE_FIXED.
3174: */
3175:
3176: int
3177: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
3178: int val;
3179: CHAR *ret;
3180:
3181: *value = NULL;
3182: if ((CUR == '#') && (NXT(1) == 'R') &&
3183: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3184: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3185: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3186: (NXT(8) == 'D')) {
3187: SKIP(9);
3188: return(XML_ATTRIBUTE_REQUIRED);
3189: }
3190: if ((CUR == '#') && (NXT(1) == 'I') &&
3191: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3192: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3193: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3194: SKIP(8);
3195: return(XML_ATTRIBUTE_IMPLIED);
3196: }
3197: val = XML_ATTRIBUTE_NONE;
3198: if ((CUR == '#') && (NXT(1) == 'F') &&
3199: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3200: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3201: SKIP(6);
3202: val = XML_ATTRIBUTE_FIXED;
3203: if (!IS_BLANK(CUR)) {
3204: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3205: ctxt->sax->error(ctxt->userData, "Space required after '#FIXED'\n");
1.59 daniel 3206: ctxt->wellFormed = 0;
3207: }
3208: SKIP_BLANKS;
3209: }
3210: ret = xmlParseAttValue(ctxt);
1.96 daniel 3211: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3212: if (ret == NULL) {
3213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3214: ctxt->sax->error(ctxt->userData,
1.59 daniel 3215: "Attribute default value declaration error\n");
3216: ctxt->wellFormed = 0;
3217: } else
3218: *value = ret;
3219: return(val);
3220: }
3221:
3222: /**
1.66 daniel 3223: * xmlParseNotationType:
3224: * @ctxt: an XML parser context
3225: *
3226: * parse an Notation attribute type.
3227: *
1.99 daniel 3228: * Note: the leading 'NOTATION' S part has already being parsed...
3229: *
1.66 daniel 3230: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3231: *
1.99 daniel 3232: * [ VC: Notation Attributes ]
1.117 daniel 3233: * Values of this type must match one of the notation names included
1.99 daniel 3234: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3235: *
3236: * Returns: the notation attribute tree built while parsing
3237: */
3238:
3239: xmlEnumerationPtr
3240: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3241: CHAR *name;
3242: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3243:
3244: if (CUR != '(') {
3245: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3246: ctxt->sax->error(ctxt->userData, "'(' required to start 'NOTATION'\n");
1.66 daniel 3247: ctxt->wellFormed = 0;
3248: return(NULL);
3249: }
1.91 daniel 3250: SHRINK;
1.66 daniel 3251: do {
3252: NEXT;
3253: SKIP_BLANKS;
3254: name = xmlParseName(ctxt);
3255: if (name == NULL) {
3256: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3257: ctxt->sax->error(ctxt->userData,
1.66 daniel 3258: "Name expected in NOTATION declaration\n");
3259: ctxt->wellFormed = 0;
3260: return(ret);
3261: }
3262: cur = xmlCreateEnumeration(name);
1.119 daniel 3263: xmlFree(name);
1.66 daniel 3264: if (cur == NULL) return(ret);
3265: if (last == NULL) ret = last = cur;
3266: else {
3267: last->next = cur;
3268: last = cur;
3269: }
3270: SKIP_BLANKS;
3271: } while (CUR == '|');
3272: if (CUR != ')') {
3273: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3274: ctxt->sax->error(ctxt->userData,
1.66 daniel 3275: "')' required to finish NOTATION declaration\n");
3276: ctxt->wellFormed = 0;
3277: return(ret);
3278: }
3279: NEXT;
3280: return(ret);
3281: }
3282:
3283: /**
3284: * xmlParseEnumerationType:
3285: * @ctxt: an XML parser context
3286: *
3287: * parse an Enumeration attribute type.
3288: *
3289: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3290: *
1.99 daniel 3291: * [ VC: Enumeration ]
1.117 daniel 3292: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 3293: * the declaration
3294: *
1.66 daniel 3295: * Returns: the enumeration attribute tree built while parsing
3296: */
3297:
3298: xmlEnumerationPtr
3299: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3300: CHAR *name;
3301: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3302:
3303: if (CUR != '(') {
3304: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3305: ctxt->sax->error(ctxt->userData,
1.66 daniel 3306: "'(' required to start ATTLIST enumeration\n");
3307: ctxt->wellFormed = 0;
3308: return(NULL);
3309: }
1.91 daniel 3310: SHRINK;
1.66 daniel 3311: do {
3312: NEXT;
3313: SKIP_BLANKS;
3314: name = xmlParseNmtoken(ctxt);
3315: if (name == NULL) {
3316: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3317: ctxt->sax->error(ctxt->userData,
1.66 daniel 3318: "NmToken expected in ATTLIST enumeration\n");
3319: ctxt->wellFormed = 0;
3320: return(ret);
3321: }
3322: cur = xmlCreateEnumeration(name);
1.119 daniel 3323: xmlFree(name);
1.66 daniel 3324: if (cur == NULL) return(ret);
3325: if (last == NULL) ret = last = cur;
3326: else {
3327: last->next = cur;
3328: last = cur;
3329: }
3330: SKIP_BLANKS;
3331: } while (CUR == '|');
3332: if (CUR != ')') {
3333: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3334: ctxt->sax->error(ctxt->userData,
1.66 daniel 3335: "')' required to finish ATTLIST enumeration\n");
3336: ctxt->wellFormed = 0;
3337: return(ret);
3338: }
3339: NEXT;
3340: return(ret);
3341: }
3342:
3343: /**
1.50 daniel 3344: * xmlParseEnumeratedType:
3345: * @ctxt: an XML parser context
1.66 daniel 3346: * @tree: the enumeration tree built while parsing
1.50 daniel 3347: *
1.66 daniel 3348: * parse an Enumerated attribute type.
1.22 daniel 3349: *
3350: * [57] EnumeratedType ::= NotationType | Enumeration
3351: *
3352: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3353: *
1.50 daniel 3354: *
1.66 daniel 3355: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3356: */
3357:
1.66 daniel 3358: int
3359: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3360: if ((CUR == 'N') && (NXT(1) == 'O') &&
3361: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3362: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3363: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3364: SKIP(8);
3365: if (!IS_BLANK(CUR)) {
3366: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3367: ctxt->sax->error(ctxt->userData, "Space required after 'NOTATION'\n");
1.66 daniel 3368: ctxt->wellFormed = 0;
3369: return(0);
3370: }
3371: SKIP_BLANKS;
3372: *tree = xmlParseNotationType(ctxt);
3373: if (*tree == NULL) return(0);
3374: return(XML_ATTRIBUTE_NOTATION);
3375: }
3376: *tree = xmlParseEnumerationType(ctxt);
3377: if (*tree == NULL) return(0);
3378: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3379: }
3380:
1.50 daniel 3381: /**
3382: * xmlParseAttributeType:
3383: * @ctxt: an XML parser context
1.66 daniel 3384: * @tree: the enumeration tree built while parsing
1.50 daniel 3385: *
1.59 daniel 3386: * parse the Attribute list def for an element
1.22 daniel 3387: *
3388: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3389: *
3390: * [55] StringType ::= 'CDATA'
3391: *
3392: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3393: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3394: *
1.102 daniel 3395: * Validity constraints for attribute values syntax are checked in
3396: * xmlValidateAttributeValue()
3397: *
1.99 daniel 3398: * [ VC: ID ]
1.117 daniel 3399: * Values of type ID must match the Name production. A name must not
1.99 daniel 3400: * appear more than once in an XML document as a value of this type;
3401: * i.e., ID values must uniquely identify the elements which bear them.
3402: *
3403: * [ VC: One ID per Element Type ]
1.117 daniel 3404: * No element type may have more than one ID attribute specified.
1.99 daniel 3405: *
3406: * [ VC: ID Attribute Default ]
1.117 daniel 3407: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 3408: *
3409: * [ VC: IDREF ]
1.102 daniel 3410: * Values of type IDREF must match the Name production, and values
1.117 daniel 3411: * of type IDREFS must match Names; TODO each IDREF Name must match the value
3412: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 3413: * values must match the value of some ID attribute.
3414: *
3415: * [ VC: Entity Name ]
1.102 daniel 3416: * Values of type ENTITY must match the Name production, values
1.117 daniel 3417: * of type ENTITIES must match Names; TODO each Entity Name must match the
3418: * name of an unparsed entity declared in the DTD.
1.99 daniel 3419: *
3420: * [ VC: Name Token ]
1.102 daniel 3421: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3422: * of type NMTOKENS must match Nmtokens.
3423: *
1.69 daniel 3424: * Returns the attribute type
1.22 daniel 3425: */
1.59 daniel 3426: int
1.66 daniel 3427: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3428: SHRINK;
1.40 daniel 3429: if ((CUR == 'C') && (NXT(1) == 'D') &&
3430: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3431: (NXT(4) == 'A')) {
3432: SKIP(5);
1.66 daniel 3433: return(XML_ATTRIBUTE_CDATA);
1.40 daniel 3434: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3435: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3436: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3437: SKIP(6);
3438: return(XML_ATTRIBUTE_IDREFS);
3439: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3440: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3441: (NXT(4) == 'F')) {
3442: SKIP(5);
1.59 daniel 3443: return(XML_ATTRIBUTE_IDREF);
1.66 daniel 3444: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
3445: SKIP(2);
3446: return(XML_ATTRIBUTE_ID);
1.40 daniel 3447: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3448: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3449: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3450: SKIP(6);
1.59 daniel 3451: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 3452: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3453: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3454: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3455: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3456: SKIP(8);
1.59 daniel 3457: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 3458: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3459: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3460: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3461: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3462: SKIP(8);
3463: return(XML_ATTRIBUTE_NMTOKENS);
3464: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3465: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3466: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3467: (NXT(6) == 'N')) {
3468: SKIP(7);
1.59 daniel 3469: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3470: }
1.66 daniel 3471: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3472: }
3473:
1.50 daniel 3474: /**
3475: * xmlParseAttributeListDecl:
3476: * @ctxt: an XML parser context
3477: *
3478: * : parse the Attribute list def for an element
1.22 daniel 3479: *
3480: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3481: *
3482: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3483: *
1.22 daniel 3484: */
1.55 daniel 3485: void
3486: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 daniel 3487: CHAR *elemName;
3488: CHAR *attrName;
1.103 daniel 3489: xmlEnumerationPtr tree;
1.22 daniel 3490:
1.40 daniel 3491: if ((CUR == '<') && (NXT(1) == '!') &&
3492: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3493: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3494: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3495: (NXT(8) == 'T')) {
1.40 daniel 3496: SKIP(9);
1.59 daniel 3497: if (!IS_BLANK(CUR)) {
3498: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3499: ctxt->sax->error(ctxt->userData, "Space required after '<!ATTLIST'\n");
1.59 daniel 3500: ctxt->wellFormed = 0;
3501: }
1.42 daniel 3502: SKIP_BLANKS;
1.59 daniel 3503: elemName = xmlParseName(ctxt);
3504: if (elemName == NULL) {
1.55 daniel 3505: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3506: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Element\n");
1.59 daniel 3507: ctxt->wellFormed = 0;
1.22 daniel 3508: return;
3509: }
1.42 daniel 3510: SKIP_BLANKS;
1.40 daniel 3511: while (CUR != '>') {
3512: const CHAR *check = CUR_PTR;
1.59 daniel 3513: int type;
3514: int def;
3515: CHAR *defaultValue = NULL;
3516:
1.103 daniel 3517: tree = NULL;
1.59 daniel 3518: attrName = xmlParseName(ctxt);
3519: if (attrName == NULL) {
3520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3521: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Attribute\n");
1.59 daniel 3522: ctxt->wellFormed = 0;
3523: break;
3524: }
1.97 daniel 3525: GROW;
1.59 daniel 3526: if (!IS_BLANK(CUR)) {
3527: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3528: ctxt->sax->error(ctxt->userData,
1.59 daniel 3529: "Space required after the attribute name\n");
3530: ctxt->wellFormed = 0;
3531: break;
3532: }
3533: SKIP_BLANKS;
3534:
1.66 daniel 3535: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 3536: if (type <= 0) break;
1.22 daniel 3537:
1.97 daniel 3538: GROW;
1.59 daniel 3539: if (!IS_BLANK(CUR)) {
3540: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3541: ctxt->sax->error(ctxt->userData,
1.59 daniel 3542: "Space required after the attribute type\n");
3543: ctxt->wellFormed = 0;
3544: break;
3545: }
1.42 daniel 3546: SKIP_BLANKS;
1.59 daniel 3547:
3548: def = xmlParseDefaultDecl(ctxt, &defaultValue);
3549: if (def <= 0) break;
3550:
1.97 daniel 3551: GROW;
1.59 daniel 3552: if (CUR != '>') {
3553: if (!IS_BLANK(CUR)) {
3554: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3555: ctxt->sax->error(ctxt->userData,
1.59 daniel 3556: "Space required after the attribute default value\n");
3557: ctxt->wellFormed = 0;
3558: break;
3559: }
3560: SKIP_BLANKS;
3561: }
1.40 daniel 3562: if (check == CUR_PTR) {
1.55 daniel 3563: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3564: ctxt->sax->error(ctxt->userData,
1.59 daniel 3565: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 3566: break;
3567: }
1.72 daniel 3568: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3569: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3570: type, def, defaultValue, tree);
1.59 daniel 3571: if (attrName != NULL)
1.119 daniel 3572: xmlFree(attrName);
1.59 daniel 3573: if (defaultValue != NULL)
1.119 daniel 3574: xmlFree(defaultValue);
1.97 daniel 3575: GROW;
1.22 daniel 3576: }
1.40 daniel 3577: if (CUR == '>')
3578: NEXT;
1.22 daniel 3579:
1.119 daniel 3580: xmlFree(elemName);
1.22 daniel 3581: }
3582: }
3583:
1.50 daniel 3584: /**
1.61 daniel 3585: * xmlParseElementMixedContentDecl:
3586: * @ctxt: an XML parser context
3587: *
3588: * parse the declaration for a Mixed Element content
3589: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3590: *
3591: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3592: * '(' S? '#PCDATA' S? ')'
3593: *
1.99 daniel 3594: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3595: *
3596: * [ VC: No Duplicate Types ]
1.117 daniel 3597: * The same name must not appear more than once in a single
3598: * mixed-content declaration.
1.99 daniel 3599: *
1.61 daniel 3600: * returns: the list of the xmlElementContentPtr describing the element choices
3601: */
3602: xmlElementContentPtr
1.62 daniel 3603: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3604: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.61 daniel 3605: CHAR *elem = NULL;
3606:
1.97 daniel 3607: GROW;
1.61 daniel 3608: if ((CUR == '#') && (NXT(1) == 'P') &&
3609: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3610: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3611: (NXT(6) == 'A')) {
3612: SKIP(7);
3613: SKIP_BLANKS;
1.91 daniel 3614: SHRINK;
1.63 daniel 3615: if (CUR == ')') {
3616: NEXT;
3617: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3618: return(ret);
3619: }
1.61 daniel 3620: if ((CUR == '(') || (CUR == '|')) {
3621: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3622: if (ret == NULL) return(NULL);
1.99 daniel 3623: }
1.61 daniel 3624: while (CUR == '|') {
1.64 daniel 3625: NEXT;
1.61 daniel 3626: if (elem == NULL) {
3627: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3628: if (ret == NULL) return(NULL);
3629: ret->c1 = cur;
1.64 daniel 3630: cur = ret;
1.61 daniel 3631: } else {
1.64 daniel 3632: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3633: if (n == NULL) return(NULL);
3634: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3635: cur->c2 = n;
3636: cur = n;
1.119 daniel 3637: xmlFree(elem);
1.61 daniel 3638: }
3639: SKIP_BLANKS;
3640: elem = xmlParseName(ctxt);
3641: if (elem == NULL) {
3642: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3643: ctxt->sax->error(ctxt->userData,
1.61 daniel 3644: "xmlParseElementMixedContentDecl : Name expected\n");
3645: ctxt->wellFormed = 0;
3646: xmlFreeElementContent(cur);
3647: return(NULL);
3648: }
3649: SKIP_BLANKS;
1.97 daniel 3650: GROW;
1.61 daniel 3651: }
1.63 daniel 3652: if ((CUR == ')') && (NXT(1) == '*')) {
1.66 daniel 3653: if (elem != NULL) {
1.61 daniel 3654: cur->c2 = xmlNewElementContent(elem,
3655: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3656: xmlFree(elem);
1.66 daniel 3657: }
1.65 daniel 3658: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 3659: SKIP(2);
1.61 daniel 3660: } else {
1.119 daniel 3661: if (elem != NULL) xmlFree(elem);
1.61 daniel 3662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3663: ctxt->sax->error(ctxt->userData,
1.63 daniel 3664: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3665: ctxt->wellFormed = 0;
3666: xmlFreeElementContent(ret);
3667: return(NULL);
3668: }
3669:
3670: } else {
3671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3672: ctxt->sax->error(ctxt->userData,
1.61 daniel 3673: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3674: ctxt->wellFormed = 0;
3675: }
3676: return(ret);
3677: }
3678:
3679: /**
3680: * xmlParseElementChildrenContentDecl:
1.50 daniel 3681: * @ctxt: an XML parser context
3682: *
1.61 daniel 3683: * parse the declaration for a Mixed Element content
3684: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3685: *
1.61 daniel 3686: *
1.22 daniel 3687: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3688: *
3689: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3690: *
3691: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3692: *
3693: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3694: *
1.99 daniel 3695: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3696: * TODO Parameter-entity replacement text must be properly nested
3697: * with parenthetized groups. That is to say, if either of the
3698: * opening or closing parentheses in a choice, seq, or Mixed
3699: * construct is contained in the replacement text for a parameter
3700: * entity, both must be contained in the same replacement text. For
3701: * interoperability, if a parameter-entity reference appears in a
3702: * choice, seq, or Mixed construct, its replacement text should not
3703: * be empty, and neither the first nor last non-blank character of
3704: * the replacement text should be a connector (| or ,).
3705: *
1.62 daniel 3706: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3707: * hierarchy.
3708: */
3709: xmlElementContentPtr
1.62 daniel 3710: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3711: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.62 daniel 3712: CHAR *elem;
3713: CHAR type = 0;
3714:
3715: SKIP_BLANKS;
1.94 daniel 3716: GROW;
1.62 daniel 3717: if (CUR == '(') {
1.63 daniel 3718: /* Recurse on first child */
1.62 daniel 3719: NEXT;
3720: SKIP_BLANKS;
3721: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3722: SKIP_BLANKS;
1.101 daniel 3723: GROW;
1.62 daniel 3724: } else {
3725: elem = xmlParseName(ctxt);
3726: if (elem == NULL) {
3727: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3728: ctxt->sax->error(ctxt->userData,
1.62 daniel 3729: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3730: ctxt->wellFormed = 0;
3731: return(NULL);
3732: }
3733: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3734: GROW;
1.62 daniel 3735: if (CUR == '?') {
1.104 daniel 3736: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3737: NEXT;
3738: } else if (CUR == '*') {
1.104 daniel 3739: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3740: NEXT;
3741: } else if (CUR == '+') {
1.104 daniel 3742: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3743: NEXT;
3744: } else {
1.104 daniel 3745: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3746: }
1.119 daniel 3747: xmlFree(elem);
1.101 daniel 3748: GROW;
1.62 daniel 3749: }
3750: SKIP_BLANKS;
1.91 daniel 3751: SHRINK;
1.62 daniel 3752: while (CUR != ')') {
1.63 daniel 3753: /*
3754: * Each loop we parse one separator and one element.
3755: */
1.62 daniel 3756: if (CUR == ',') {
3757: if (type == 0) type = CUR;
3758:
3759: /*
3760: * Detect "Name | Name , Name" error
3761: */
3762: else if (type != CUR) {
3763: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3764: ctxt->sax->error(ctxt->userData,
1.62 daniel 3765: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3766: type);
3767: ctxt->wellFormed = 0;
3768: xmlFreeElementContent(ret);
3769: return(NULL);
3770: }
1.64 daniel 3771: NEXT;
1.62 daniel 3772:
1.63 daniel 3773: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3774: if (op == NULL) {
3775: xmlFreeElementContent(ret);
3776: return(NULL);
3777: }
3778: if (last == NULL) {
3779: op->c1 = ret;
1.65 daniel 3780: ret = cur = op;
1.63 daniel 3781: } else {
3782: cur->c2 = op;
3783: op->c1 = last;
3784: cur =op;
1.65 daniel 3785: last = NULL;
1.63 daniel 3786: }
1.62 daniel 3787: } else if (CUR == '|') {
3788: if (type == 0) type = CUR;
3789:
3790: /*
1.63 daniel 3791: * Detect "Name , Name | Name" error
1.62 daniel 3792: */
3793: else if (type != CUR) {
3794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3795: ctxt->sax->error(ctxt->userData,
1.62 daniel 3796: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3797: type);
3798: ctxt->wellFormed = 0;
3799: xmlFreeElementContent(ret);
3800: return(NULL);
3801: }
1.64 daniel 3802: NEXT;
1.62 daniel 3803:
1.63 daniel 3804: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3805: if (op == NULL) {
3806: xmlFreeElementContent(ret);
3807: return(NULL);
3808: }
3809: if (last == NULL) {
3810: op->c1 = ret;
1.65 daniel 3811: ret = cur = op;
1.63 daniel 3812: } else {
3813: cur->c2 = op;
3814: op->c1 = last;
3815: cur =op;
1.65 daniel 3816: last = NULL;
1.63 daniel 3817: }
1.62 daniel 3818: } else {
3819: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3820: ctxt->sax->error(ctxt->userData,
1.62 daniel 3821: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3822: ctxt->wellFormed = 0;
3823: xmlFreeElementContent(ret);
3824: return(NULL);
3825: }
1.101 daniel 3826: GROW;
1.62 daniel 3827: SKIP_BLANKS;
1.101 daniel 3828: GROW;
1.62 daniel 3829: if (CUR == '(') {
1.63 daniel 3830: /* Recurse on second child */
1.62 daniel 3831: NEXT;
3832: SKIP_BLANKS;
1.65 daniel 3833: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 3834: SKIP_BLANKS;
3835: } else {
3836: elem = xmlParseName(ctxt);
3837: if (elem == NULL) {
3838: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3839: ctxt->sax->error(ctxt->userData,
1.62 daniel 3840: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3841: ctxt->wellFormed = 0;
3842: return(NULL);
3843: }
1.65 daniel 3844: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3845: xmlFree(elem);
1.105 daniel 3846: if (CUR == '?') {
3847: last->ocur = XML_ELEMENT_CONTENT_OPT;
3848: NEXT;
3849: } else if (CUR == '*') {
3850: last->ocur = XML_ELEMENT_CONTENT_MULT;
3851: NEXT;
3852: } else if (CUR == '+') {
3853: last->ocur = XML_ELEMENT_CONTENT_PLUS;
3854: NEXT;
3855: } else {
3856: last->ocur = XML_ELEMENT_CONTENT_ONCE;
3857: }
1.63 daniel 3858: }
3859: SKIP_BLANKS;
1.97 daniel 3860: GROW;
1.64 daniel 3861: }
1.65 daniel 3862: if ((cur != NULL) && (last != NULL)) {
3863: cur->c2 = last;
1.62 daniel 3864: }
3865: NEXT;
3866: if (CUR == '?') {
3867: ret->ocur = XML_ELEMENT_CONTENT_OPT;
3868: NEXT;
3869: } else if (CUR == '*') {
3870: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3871: NEXT;
3872: } else if (CUR == '+') {
3873: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3874: NEXT;
3875: }
3876: return(ret);
1.61 daniel 3877: }
3878:
3879: /**
3880: * xmlParseElementContentDecl:
3881: * @ctxt: an XML parser context
3882: * @name: the name of the element being defined.
3883: * @result: the Element Content pointer will be stored here if any
1.22 daniel 3884: *
1.61 daniel 3885: * parse the declaration for an Element content either Mixed or Children,
3886: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
3887: *
3888: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 3889: *
1.61 daniel 3890: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 3891: */
3892:
1.61 daniel 3893: int
3894: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
3895: xmlElementContentPtr *result) {
3896:
3897: xmlElementContentPtr tree = NULL;
3898: int res;
3899:
3900: *result = NULL;
3901:
3902: if (CUR != '(') {
3903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3904: ctxt->sax->error(ctxt->userData,
1.61 daniel 3905: "xmlParseElementContentDecl : '(' expected\n");
3906: ctxt->wellFormed = 0;
3907: return(-1);
3908: }
3909: NEXT;
1.97 daniel 3910: GROW;
1.61 daniel 3911: SKIP_BLANKS;
3912: if ((CUR == '#') && (NXT(1) == 'P') &&
3913: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3914: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3915: (NXT(6) == 'A')) {
1.62 daniel 3916: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 3917: res = XML_ELEMENT_TYPE_MIXED;
3918: } else {
1.62 daniel 3919: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 3920: res = XML_ELEMENT_TYPE_ELEMENT;
3921: }
3922: SKIP_BLANKS;
1.63 daniel 3923: /****************************
1.61 daniel 3924: if (CUR != ')') {
3925: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3926: ctxt->sax->error(ctxt->userData,
1.61 daniel 3927: "xmlParseElementContentDecl : ')' expected\n");
3928: ctxt->wellFormed = 0;
3929: return(-1);
3930: }
1.63 daniel 3931: ****************************/
3932: *result = tree;
1.61 daniel 3933: return(res);
1.22 daniel 3934: }
3935:
1.50 daniel 3936: /**
3937: * xmlParseElementDecl:
3938: * @ctxt: an XML parser context
3939: *
3940: * parse an Element declaration.
1.22 daniel 3941: *
3942: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
3943: *
1.99 daniel 3944: * [ VC: Unique Element Type Declaration ]
1.117 daniel 3945: * No element type may be declared more than once
1.69 daniel 3946: *
3947: * Returns the type of the element, or -1 in case of error
1.22 daniel 3948: */
1.59 daniel 3949: int
1.55 daniel 3950: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 3951: CHAR *name;
1.59 daniel 3952: int ret = -1;
1.61 daniel 3953: xmlElementContentPtr content = NULL;
1.22 daniel 3954:
1.97 daniel 3955: GROW;
1.40 daniel 3956: if ((CUR == '<') && (NXT(1) == '!') &&
3957: (NXT(2) == 'E') && (NXT(3) == 'L') &&
3958: (NXT(4) == 'E') && (NXT(5) == 'M') &&
3959: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 3960: (NXT(8) == 'T')) {
1.40 daniel 3961: SKIP(9);
1.59 daniel 3962: if (!IS_BLANK(CUR)) {
3963: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3964: ctxt->sax->error(ctxt->userData,
1.59 daniel 3965: "Space required after 'ELEMENT'\n");
3966: ctxt->wellFormed = 0;
3967: }
1.42 daniel 3968: SKIP_BLANKS;
1.22 daniel 3969: name = xmlParseName(ctxt);
3970: if (name == NULL) {
1.55 daniel 3971: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3972: ctxt->sax->error(ctxt->userData,
1.59 daniel 3973: "xmlParseElementDecl: no name for Element\n");
3974: ctxt->wellFormed = 0;
3975: return(-1);
3976: }
3977: if (!IS_BLANK(CUR)) {
3978: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3979: ctxt->sax->error(ctxt->userData,
1.59 daniel 3980: "Space required after the element name\n");
3981: ctxt->wellFormed = 0;
1.22 daniel 3982: }
1.42 daniel 3983: SKIP_BLANKS;
1.40 daniel 3984: if ((CUR == 'E') && (NXT(1) == 'M') &&
3985: (NXT(2) == 'P') && (NXT(3) == 'T') &&
3986: (NXT(4) == 'Y')) {
3987: SKIP(5);
1.22 daniel 3988: /*
3989: * Element must always be empty.
3990: */
1.59 daniel 3991: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 3992: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
3993: (NXT(2) == 'Y')) {
3994: SKIP(3);
1.22 daniel 3995: /*
3996: * Element is a generic container.
3997: */
1.59 daniel 3998: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 3999: } else if (CUR == '(') {
4000: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 4001: } else {
1.98 daniel 4002: /*
4003: * [ WFC: PEs in Internal Subset ] error handling.
4004: */
4005: if ((CUR == '%') && (ctxt->external == 0) &&
4006: (ctxt->inputNr == 1)) {
4007: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4008: ctxt->sax->error(ctxt->userData,
4009: "PEReference: forbidden within markup decl in internal subset\n");
4010: } else {
4011: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4012: ctxt->sax->error(ctxt->userData,
4013: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4014: }
1.61 daniel 4015: ctxt->wellFormed = 0;
1.119 daniel 4016: if (name != NULL) xmlFree(name);
1.61 daniel 4017: return(-1);
1.22 daniel 4018: }
1.42 daniel 4019: SKIP_BLANKS;
1.40 daniel 4020: if (CUR != '>') {
1.55 daniel 4021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4022: ctxt->sax->error(ctxt->userData,
1.31 daniel 4023: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 4024: ctxt->wellFormed = 0;
1.61 daniel 4025: } else {
1.40 daniel 4026: NEXT;
1.72 daniel 4027: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 4028: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4029: content);
1.61 daniel 4030: }
1.84 daniel 4031: if (content != NULL) {
4032: xmlFreeElementContent(content);
4033: }
1.61 daniel 4034: if (name != NULL) {
1.119 daniel 4035: xmlFree(name);
1.61 daniel 4036: }
1.22 daniel 4037: }
1.59 daniel 4038: return(ret);
1.22 daniel 4039: }
4040:
1.50 daniel 4041: /**
4042: * xmlParseMarkupDecl:
4043: * @ctxt: an XML parser context
4044: *
4045: * parse Markup declarations
1.22 daniel 4046: *
4047: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4048: * NotationDecl | PI | Comment
4049: *
1.98 daniel 4050: * [ VC: Proper Declaration/PE Nesting ]
4051: * TODO Parameter-entity replacement text must be properly nested with
4052: * markup declarations. That is to say, if either the first character
4053: * or the last character of a markup declaration (markupdecl above) is
4054: * contained in the replacement text for a parameter-entity reference,
4055: * both must be contained in the same replacement text.
4056: *
4057: * [ WFC: PEs in Internal Subset ]
4058: * In the internal DTD subset, parameter-entity references can occur
4059: * only where markup declarations can occur, not within markup declarations.
4060: * (This does not apply to references that occur in external parameter
4061: * entities or to the external subset.)
1.22 daniel 4062: */
1.55 daniel 4063: void
4064: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4065: GROW;
1.22 daniel 4066: xmlParseElementDecl(ctxt);
4067: xmlParseAttributeListDecl(ctxt);
4068: xmlParseEntityDecl(ctxt);
4069: xmlParseNotationDecl(ctxt);
4070: xmlParsePI(ctxt);
1.114 daniel 4071: xmlParseComment(ctxt);
1.98 daniel 4072: /*
4073: * This is only for internal subset. On external entities,
4074: * the replacement is done before parsing stage
4075: */
4076: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4077: xmlParsePEReference(ctxt);
1.97 daniel 4078: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4079: }
4080:
1.50 daniel 4081: /**
1.76 daniel 4082: * xmlParseTextDecl:
4083: * @ctxt: an XML parser context
4084: *
4085: * parse an XML declaration header for external entities
4086: *
4087: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4088: *
4089: * Returns the only valuable info for an external parsed entity, the encoding
4090: */
4091:
4092: CHAR *
4093: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4094: CHAR *version;
4095: CHAR *encoding = NULL;
4096:
4097: /*
4098: * We know that '<?xml' is here.
4099: */
4100: SKIP(5);
4101:
4102: if (!IS_BLANK(CUR)) {
4103: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4104: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
4105: ctxt->wellFormed = 0;
4106: }
4107: SKIP_BLANKS;
4108:
4109: /*
4110: * We may have the VersionInfo here.
4111: */
4112: version = xmlParseVersionInfo(ctxt);
4113: if (version == NULL)
4114: version = xmlCharStrdup(XML_DEFAULT_VERSION);
4115: ctxt->version = xmlStrdup(version);
1.119 daniel 4116: xmlFree(version);
1.76 daniel 4117:
4118: /*
4119: * We must have the encoding declaration
4120: */
4121: if (!IS_BLANK(CUR)) {
4122: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4123: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
4124: ctxt->wellFormed = 0;
4125: }
4126: encoding = xmlParseEncodingDecl(ctxt);
4127:
4128: SKIP_BLANKS;
4129: if ((CUR == '?') && (NXT(1) == '>')) {
4130: SKIP(2);
4131: } else if (CUR == '>') {
4132: /* Deprecated old WD ... */
4133: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4134: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
4135: ctxt->wellFormed = 0;
4136: NEXT;
4137: } else {
4138: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4139: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
4140: ctxt->wellFormed = 0;
4141: MOVETO_ENDTAG(CUR_PTR);
4142: NEXT;
4143: }
4144: return(encoding);
4145: }
4146:
4147: /*
4148: * xmlParseConditionalSections
4149: * @ctxt: an XML parser context
4150: *
4151: * TODO : Conditionnal section are not yet supported !
4152: *
4153: * [61] conditionalSect ::= includeSect | ignoreSect
4154: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4155: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4156: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4157: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4158: */
4159:
4160: void
4161: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4162: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4163: ctxt->sax->warning(ctxt->userData,
4164: "XML conditional section not supported\n");
4165: /*
4166: * Skip up to the end of the conditionnal section.
4167: */
4168: while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
4169: NEXT;
4170: if (CUR == 0) {
4171: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4172: ctxt->sax->error(ctxt->userData,
4173: "XML conditional section not closed\n");
4174: ctxt->wellFormed = 0;
4175: }
4176: }
4177:
4178: /**
4179: * xmlParseExternalSubset
4180: * @ctxt: an XML parser context
4181: *
4182: * parse Markup declarations from an external subset
4183: *
4184: * [30] extSubset ::= textDecl? extSubsetDecl
4185: *
4186: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4187: */
4188: void
1.79 daniel 4189: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
4190: const CHAR *SystemID) {
1.76 daniel 4191: if ((CUR == '<') && (NXT(1) == '?') &&
4192: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4193: (NXT(4) == 'l')) {
4194: xmlParseTextDecl(ctxt);
4195: }
1.79 daniel 4196: if (ctxt->myDoc == NULL) {
1.116 daniel 4197: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 4198: }
4199: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4200: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4201:
1.96 daniel 4202: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4203: ctxt->external = 1;
1.76 daniel 4204: while (((CUR == '<') && (NXT(1) == '?')) ||
4205: ((CUR == '<') && (NXT(1) == '!')) ||
4206: IS_BLANK(CUR)) {
1.115 daniel 4207: const CHAR *check = CUR_PTR;
4208: int cons = ctxt->input->consumed;
4209:
1.76 daniel 4210: if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4211: xmlParseConditionalSections(ctxt);
4212: } else if (IS_BLANK(CUR)) {
4213: NEXT;
4214: } else if (CUR == '%') {
4215: xmlParsePEReference(ctxt);
4216: } else
4217: xmlParseMarkupDecl(ctxt);
1.77 daniel 4218:
4219: /*
4220: * Pop-up of finished entities.
4221: */
4222: while ((CUR == 0) && (ctxt->inputNr > 1))
4223: xmlPopInput(ctxt);
4224:
1.115 daniel 4225: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
4226: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4227: ctxt->sax->error(ctxt->userData,
4228: "Content error in the external subset\n");
4229: ctxt->wellFormed = 0;
4230: break;
4231: }
1.76 daniel 4232: }
4233:
4234: if (CUR != 0) {
4235: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4236: ctxt->sax->error(ctxt->userData,
4237: "Extra content at the end of the document\n");
4238: ctxt->wellFormed = 0;
4239: }
4240:
4241: }
4242:
4243: /**
1.77 daniel 4244: * xmlParseReference:
4245: * @ctxt: an XML parser context
4246: *
4247: * parse and handle entity references in content, depending on the SAX
4248: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4249: * CharRef, a predefined entity, if there is no reference() callback.
4250: * or if the parser was asked to switch to that mode.
1.77 daniel 4251: *
4252: * [67] Reference ::= EntityRef | CharRef
4253: */
4254: void
4255: xmlParseReference(xmlParserCtxtPtr ctxt) {
4256: xmlEntityPtr ent;
4257: CHAR *val;
4258: if (CUR != '&') return;
4259:
1.113 daniel 4260: if (ctxt->inputNr > 1) {
4261: CHAR cur[2] = { '&' , 0 } ;
4262:
4263: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4264: ctxt->sax->characters(ctxt->userData, cur, 1);
4265: if (ctxt->token == '&')
4266: ctxt->token = 0;
4267: else {
4268: SKIP(1);
4269: }
4270: return;
4271: }
1.77 daniel 4272: if (NXT(1) == '#') {
4273: CHAR out[2];
4274: int val = xmlParseCharRef(ctxt);
1.117 daniel 4275: /* invalid for UTF-8 variable encoding !!!!! */
1.77 daniel 4276: out[0] = val;
4277: out[1] = 0;
4278: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4279: ctxt->sax->characters(ctxt->userData, out, 1);
4280: } else {
4281: ent = xmlParseEntityRef(ctxt);
4282: if (ent == NULL) return;
4283: if ((ent->name != NULL) &&
1.113 daniel 4284: (ent->type != XML_INTERNAL_PREDEFINED_ENTITY)) {
4285: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4286: (ctxt->replaceEntities == 0)) {
4287: /*
4288: * Create a node.
4289: */
4290: ctxt->sax->reference(ctxt->userData, ent->name);
4291: return;
4292: } else if (ctxt->replaceEntities) {
4293: xmlParserInputPtr input;
1.79 daniel 4294:
1.113 daniel 4295: input = xmlNewEntityInputStream(ctxt, ent);
4296: xmlPushInput(ctxt, input);
4297: return;
4298: }
1.77 daniel 4299: }
4300: val = ent->content;
4301: if (val == NULL) return;
4302: /*
4303: * inline the entity.
4304: */
4305: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4306: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
4307: }
1.24 daniel 4308: }
4309:
1.50 daniel 4310: /**
4311: * xmlParseEntityRef:
4312: * @ctxt: an XML parser context
4313: *
4314: * parse ENTITY references declarations
1.24 daniel 4315: *
4316: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4317: *
1.98 daniel 4318: * [ WFC: Entity Declared ]
4319: * In a document without any DTD, a document with only an internal DTD
4320: * subset which contains no parameter entity references, or a document
4321: * with "standalone='yes'", the Name given in the entity reference
4322: * must match that in an entity declaration, except that well-formed
4323: * documents need not declare any of the following entities: amp, lt,
4324: * gt, apos, quot. The declaration of a parameter entity must precede
4325: * any reference to it. Similarly, the declaration of a general entity
4326: * must precede any reference to it which appears in a default value in an
4327: * attribute-list declaration. Note that if entities are declared in the
4328: * external subset or in external parameter entities, a non-validating
4329: * processor is not obligated to read and process their declarations;
4330: * for such documents, the rule that an entity must be declared is a
4331: * well-formedness constraint only if standalone='yes'.
4332: *
4333: * [ WFC: Parsed Entity ]
4334: * An entity reference must not contain the name of an unparsed entity
4335: *
1.77 daniel 4336: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4337: */
1.77 daniel 4338: xmlEntityPtr
1.55 daniel 4339: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.24 daniel 4340: CHAR *name;
1.72 daniel 4341: xmlEntityPtr ent = NULL;
1.24 daniel 4342:
1.91 daniel 4343: GROW;
1.111 daniel 4344:
1.40 daniel 4345: if (CUR == '&') {
4346: NEXT;
1.24 daniel 4347: name = xmlParseName(ctxt);
4348: if (name == NULL) {
1.55 daniel 4349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4350: ctxt->sax->error(ctxt->userData,
4351: "xmlParseEntityRef: no name\n");
1.59 daniel 4352: ctxt->wellFormed = 0;
1.24 daniel 4353: } else {
1.40 daniel 4354: if (CUR == ';') {
4355: NEXT;
1.24 daniel 4356: /*
1.77 daniel 4357: * Ask first SAX for entity resolution, otherwise try the
4358: * predefined set.
4359: */
4360: if (ctxt->sax != NULL) {
4361: if (ctxt->sax->getEntity != NULL)
4362: ent = ctxt->sax->getEntity(ctxt->userData, name);
4363: if (ent == NULL)
4364: ent = xmlGetPredefinedEntity(name);
4365: }
4366: /*
1.98 daniel 4367: * [ WFC: Entity Declared ]
4368: * In a document without any DTD, a document with only an
4369: * internal DTD subset which contains no parameter entity
4370: * references, or a document with "standalone='yes'", the
4371: * Name given in the entity reference must match that in an
4372: * entity declaration, except that well-formed documents
4373: * need not declare any of the following entities: amp, lt,
4374: * gt, apos, quot.
4375: * The declaration of a parameter entity must precede any
4376: * reference to it.
4377: * Similarly, the declaration of a general entity must
4378: * precede any reference to it which appears in a default
4379: * value in an attribute-list declaration. Note that if
4380: * entities are declared in the external subset or in
4381: * external parameter entities, a non-validating processor
4382: * is not obligated to read and process their declarations;
4383: * for such documents, the rule that an entity must be
4384: * declared is a well-formedness constraint only if
4385: * standalone='yes'.
1.59 daniel 4386: */
1.77 daniel 4387: if (ent == NULL) {
1.98 daniel 4388: if ((ctxt->standalone == 1) ||
4389: ((ctxt->hasExternalSubset == 0) &&
4390: (ctxt->hasPErefs == 0))) {
4391: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 4392: ctxt->sax->error(ctxt->userData,
4393: "Entity '%s' not defined\n", name);
4394: ctxt->wellFormed = 0;
4395: } else {
1.98 daniel 4396: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4397: ctxt->sax->warning(ctxt->userData,
4398: "Entity '%s' not defined\n", name);
1.59 daniel 4399: }
1.77 daniel 4400: }
1.59 daniel 4401:
4402: /*
1.98 daniel 4403: * [ WFC: Parsed Entity ]
4404: * An entity reference must not contain the name of an
4405: * unparsed entity
4406: */
4407: else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
4408: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4409: ctxt->sax->error(ctxt->userData,
4410: "Entity reference to unparsed entity %s\n", name);
4411: ctxt->wellFormed = 0;
4412: }
4413:
4414: /*
4415: * [ WFC: No External Entity References ]
4416: * Attribute values cannot contain direct or indirect
4417: * entity references to external entities.
4418: */
4419: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4420: (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
4421: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4422: ctxt->sax->error(ctxt->userData,
4423: "Attribute references external entity '%s'\n", name);
4424: ctxt->wellFormed = 0;
4425: }
4426: /*
4427: * [ WFC: No < in Attribute Values ]
4428: * The replacement text of any entity referred to directly or
4429: * indirectly in an attribute value (other than "<") must
4430: * not contain a <.
1.59 daniel 4431: */
1.98 daniel 4432: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 4433: (ent != NULL) &&
4434: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 4435: (ent->content != NULL) &&
4436: (xmlStrchr(ent->content, '<'))) {
4437: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4438: ctxt->sax->error(ctxt->userData,
4439: "'<' in entity '%s' is not allowed in attributes values\n", name);
4440: ctxt->wellFormed = 0;
4441: }
4442:
4443: /*
4444: * Internal check, no parameter entities here ...
4445: */
4446: else {
1.59 daniel 4447: switch (ent->type) {
4448: case XML_INTERNAL_PARAMETER_ENTITY:
4449: case XML_EXTERNAL_PARAMETER_ENTITY:
4450: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4451: ctxt->sax->error(ctxt->userData,
1.59 daniel 4452: "Attempt to reference the parameter entity '%s'\n", name);
4453: ctxt->wellFormed = 0;
4454: break;
4455: }
4456: }
4457:
4458: /*
1.98 daniel 4459: * [ WFC: No Recursion ]
1.117 daniel 4460: * TODO A parsed entity must not contain a recursive reference
4461: * to itself, either directly or indirectly.
1.59 daniel 4462: */
1.77 daniel 4463:
1.24 daniel 4464: } else {
1.55 daniel 4465: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4466: ctxt->sax->error(ctxt->userData,
1.59 daniel 4467: "xmlParseEntityRef: expecting ';'\n");
4468: ctxt->wellFormed = 0;
1.24 daniel 4469: }
1.119 daniel 4470: xmlFree(name);
1.24 daniel 4471: }
4472: }
1.77 daniel 4473: return(ent);
1.24 daniel 4474: }
4475:
1.50 daniel 4476: /**
4477: * xmlParsePEReference:
4478: * @ctxt: an XML parser context
4479: *
4480: * parse PEReference declarations
1.77 daniel 4481: * The entity content is handled directly by pushing it's content as
4482: * a new input stream.
1.22 daniel 4483: *
4484: * [69] PEReference ::= '%' Name ';'
1.68 daniel 4485: *
1.98 daniel 4486: * [ WFC: No Recursion ]
4487: * TODO A parsed entity must not contain a recursive
4488: * reference to itself, either directly or indirectly.
4489: *
4490: * [ WFC: Entity Declared ]
4491: * In a document without any DTD, a document with only an internal DTD
4492: * subset which contains no parameter entity references, or a document
4493: * with "standalone='yes'", ... ... The declaration of a parameter
4494: * entity must precede any reference to it...
4495: *
4496: * [ VC: Entity Declared ]
4497: * In a document with an external subset or external parameter entities
4498: * with "standalone='no'", ... ... The declaration of a parameter entity
4499: * must precede any reference to it...
4500: *
4501: * [ WFC: In DTD ]
4502: * Parameter-entity references may only appear in the DTD.
4503: * NOTE: misleading but this is handled.
1.22 daniel 4504: */
1.77 daniel 4505: void
1.55 daniel 4506: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 4507: CHAR *name;
1.72 daniel 4508: xmlEntityPtr entity = NULL;
1.50 daniel 4509: xmlParserInputPtr input;
1.22 daniel 4510:
1.40 daniel 4511: if (CUR == '%') {
4512: NEXT;
1.22 daniel 4513: name = xmlParseName(ctxt);
4514: if (name == NULL) {
1.55 daniel 4515: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4516: ctxt->sax->error(ctxt->userData, "xmlParsePEReference: no name\n");
1.59 daniel 4517: ctxt->wellFormed = 0;
1.22 daniel 4518: } else {
1.40 daniel 4519: if (CUR == ';') {
4520: NEXT;
1.98 daniel 4521: if ((ctxt->sax != NULL) &&
4522: (ctxt->sax->getParameterEntity != NULL))
4523: entity = ctxt->sax->getParameterEntity(ctxt->userData,
4524: name);
1.45 daniel 4525: if (entity == NULL) {
1.98 daniel 4526: /*
4527: * [ WFC: Entity Declared ]
4528: * In a document without any DTD, a document with only an
4529: * internal DTD subset which contains no parameter entity
4530: * references, or a document with "standalone='yes'", ...
4531: * ... The declaration of a parameter entity must precede
4532: * any reference to it...
4533: */
4534: if ((ctxt->standalone == 1) ||
4535: ((ctxt->hasExternalSubset == 0) &&
4536: (ctxt->hasPErefs == 0))) {
4537: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4538: ctxt->sax->error(ctxt->userData,
4539: "PEReference: %%%s; not found\n", name);
4540: ctxt->wellFormed = 0;
4541: } else {
4542: /*
4543: * [ VC: Entity Declared ]
4544: * In a document with an external subset or external
4545: * parameter entities with "standalone='no'", ...
4546: * ... The declaration of a parameter entity must precede
4547: * any reference to it...
4548: */
4549: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4550: ctxt->sax->warning(ctxt->userData,
4551: "PEReference: %%%s; not found\n", name);
4552: ctxt->valid = 0;
4553: }
1.50 daniel 4554: } else {
1.98 daniel 4555: /*
4556: * Internal checking in case the entity quest barfed
4557: */
4558: if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
4559: (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
4560: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4561: ctxt->sax->warning(ctxt->userData,
4562: "Internal: %%%s; is not a parameter entity\n", name);
4563: } else {
4564: input = xmlNewEntityInputStream(ctxt, entity);
4565: xmlPushInput(ctxt, input);
4566: }
1.45 daniel 4567: }
1.98 daniel 4568: ctxt->hasPErefs = 1;
1.22 daniel 4569: } else {
1.55 daniel 4570: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4571: ctxt->sax->error(ctxt->userData,
1.59 daniel 4572: "xmlParsePEReference: expecting ';'\n");
4573: ctxt->wellFormed = 0;
1.22 daniel 4574: }
1.119 daniel 4575: xmlFree(name);
1.3 veillard 4576: }
4577: }
4578: }
4579:
1.50 daniel 4580: /**
4581: * xmlParseDocTypeDecl :
4582: * @ctxt: an XML parser context
4583: *
4584: * parse a DOCTYPE declaration
1.21 daniel 4585: *
1.22 daniel 4586: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
4587: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 4588: *
4589: * [ VC: Root Element Type ]
1.99 daniel 4590: * The Name in the document type declaration must match the element
1.98 daniel 4591: * type of the root element.
1.21 daniel 4592: */
4593:
1.55 daniel 4594: void
4595: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.21 daniel 4596: CHAR *name;
4597: CHAR *ExternalID = NULL;
1.39 daniel 4598: CHAR *URI = NULL;
1.21 daniel 4599:
4600: /*
4601: * We know that '<!DOCTYPE' has been detected.
4602: */
1.40 daniel 4603: SKIP(9);
1.21 daniel 4604:
1.42 daniel 4605: SKIP_BLANKS;
1.21 daniel 4606:
4607: /*
4608: * Parse the DOCTYPE name.
4609: */
4610: name = xmlParseName(ctxt);
4611: if (name == NULL) {
1.55 daniel 4612: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4613: ctxt->sax->error(ctxt->userData, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 4614: ctxt->wellFormed = 0;
1.21 daniel 4615: }
4616:
1.42 daniel 4617: SKIP_BLANKS;
1.21 daniel 4618:
4619: /*
1.22 daniel 4620: * Check for SystemID and ExternalID
4621: */
1.67 daniel 4622: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 4623:
4624: if ((URI != NULL) || (ExternalID != NULL)) {
4625: ctxt->hasExternalSubset = 1;
4626: }
4627:
1.42 daniel 4628: SKIP_BLANKS;
1.36 daniel 4629:
1.76 daniel 4630: /*
4631: * NOTE: the SAX callback may try to fetch the external subset
4632: * entity and fill it up !
4633: */
1.72 daniel 4634: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 4635: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 4636:
4637: /*
4638: * Is there any DTD definition ?
4639: */
1.40 daniel 4640: if (CUR == '[') {
1.96 daniel 4641: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 4642: NEXT;
1.22 daniel 4643: /*
4644: * Parse the succession of Markup declarations and
4645: * PEReferences.
4646: * Subsequence (markupdecl | PEReference | S)*
4647: */
1.40 daniel 4648: while (CUR != ']') {
4649: const CHAR *check = CUR_PTR;
1.115 daniel 4650: int cons = ctxt->input->consumed;
1.22 daniel 4651:
1.42 daniel 4652: SKIP_BLANKS;
1.22 daniel 4653: xmlParseMarkupDecl(ctxt);
1.50 daniel 4654: xmlParsePEReference(ctxt);
1.22 daniel 4655:
1.115 daniel 4656: /*
4657: * Pop-up of finished entities.
4658: */
4659: while ((CUR == 0) && (ctxt->inputNr > 1))
4660: xmlPopInput(ctxt);
4661:
1.118 daniel 4662: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 4663: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4664: ctxt->sax->error(ctxt->userData,
1.31 daniel 4665: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 4666: ctxt->wellFormed = 0;
1.22 daniel 4667: break;
4668: }
4669: }
1.40 daniel 4670: if (CUR == ']') NEXT;
1.22 daniel 4671: }
4672:
4673: /*
4674: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 4675: */
1.40 daniel 4676: if (CUR != '>') {
1.55 daniel 4677: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4678: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 4679: ctxt->wellFormed = 0;
1.22 daniel 4680: /* We shouldn't try to resynchronize ... */
1.21 daniel 4681: }
1.40 daniel 4682: NEXT;
1.22 daniel 4683:
4684: /*
1.99 daniel 4685: * Cleanup
1.22 daniel 4686: */
1.119 daniel 4687: if (URI != NULL) xmlFree(URI);
4688: if (ExternalID != NULL) xmlFree(ExternalID);
4689: if (name != NULL) xmlFree(name);
1.21 daniel 4690: }
4691:
1.50 daniel 4692: /**
4693: * xmlParseAttribute:
4694: * @ctxt: an XML parser context
1.72 daniel 4695: * @value: a CHAR ** used to store the value of the attribute
1.50 daniel 4696: *
4697: * parse an attribute
1.3 veillard 4698: *
1.22 daniel 4699: * [41] Attribute ::= Name Eq AttValue
4700: *
1.98 daniel 4701: * [ WFC: No External Entity References ]
4702: * Attribute values cannot contain direct or indirect entity references
4703: * to external entities.
4704: *
4705: * [ WFC: No < in Attribute Values ]
4706: * The replacement text of any entity referred to directly or indirectly in
4707: * an attribute value (other than "<") must not contain a <.
4708: *
4709: * [ VC: Attribute Value Type ]
1.117 daniel 4710: * The attribute must have been declared; the value must be of the type
1.99 daniel 4711: * declared for it.
1.98 daniel 4712: *
1.22 daniel 4713: * [25] Eq ::= S? '=' S?
4714: *
1.29 daniel 4715: * With namespace:
4716: *
4717: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 4718: *
4719: * Also the case QName == xmlns:??? is handled independently as a namespace
4720: * definition.
1.69 daniel 4721: *
1.72 daniel 4722: * Returns the attribute name, and the value in *value.
1.3 veillard 4723: */
4724:
1.72 daniel 4725: CHAR *
4726: xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) {
1.59 daniel 4727: CHAR *name, *val;
1.3 veillard 4728:
1.72 daniel 4729: *value = NULL;
4730: name = xmlParseName(ctxt);
1.22 daniel 4731: if (name == NULL) {
1.55 daniel 4732: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4733: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 4734: ctxt->wellFormed = 0;
1.52 daniel 4735: return(NULL);
1.3 veillard 4736: }
4737:
4738: /*
1.29 daniel 4739: * read the value
1.3 veillard 4740: */
1.42 daniel 4741: SKIP_BLANKS;
1.40 daniel 4742: if (CUR == '=') {
4743: NEXT;
1.42 daniel 4744: SKIP_BLANKS;
1.72 daniel 4745: val = xmlParseAttValue(ctxt);
1.96 daniel 4746: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 4747: } else {
1.55 daniel 4748: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4749: ctxt->sax->error(ctxt->userData,
1.59 daniel 4750: "Specification mandate value for attribute %s\n", name);
4751: ctxt->wellFormed = 0;
1.52 daniel 4752: return(NULL);
1.43 daniel 4753: }
4754:
1.72 daniel 4755: *value = val;
4756: return(name);
1.3 veillard 4757: }
4758:
1.50 daniel 4759: /**
4760: * xmlParseStartTag:
4761: * @ctxt: an XML parser context
4762: *
4763: * parse a start of tag either for rule element or
4764: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 4765: *
4766: * [40] STag ::= '<' Name (S Attribute)* S? '>'
4767: *
1.98 daniel 4768: * [ WFC: Unique Att Spec ]
4769: * No attribute name may appear more than once in the same start-tag or
4770: * empty-element tag.
4771: *
1.29 daniel 4772: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
4773: *
1.98 daniel 4774: * [ WFC: Unique Att Spec ]
4775: * No attribute name may appear more than once in the same start-tag or
4776: * empty-element tag.
4777: *
1.29 daniel 4778: * With namespace:
4779: *
4780: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
4781: *
4782: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 4783: *
4784: * Returns the element name parsed
1.2 veillard 4785: */
4786:
1.83 daniel 4787: CHAR *
1.69 daniel 4788: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.72 daniel 4789: CHAR *name;
4790: CHAR *attname;
4791: CHAR *attvalue;
4792: const CHAR **atts = NULL;
4793: int nbatts = 0;
4794: int maxatts = 0;
4795: int i;
1.2 veillard 4796:
1.83 daniel 4797: if (CUR != '<') return(NULL);
1.40 daniel 4798: NEXT;
1.3 veillard 4799:
1.72 daniel 4800: name = xmlParseName(ctxt);
1.59 daniel 4801: if (name == NULL) {
4802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4803: ctxt->sax->error(ctxt->userData,
1.59 daniel 4804: "xmlParseStartTag: invalid element name\n");
4805: ctxt->wellFormed = 0;
1.83 daniel 4806: return(NULL);
1.50 daniel 4807: }
4808:
4809: /*
1.3 veillard 4810: * Now parse the attributes, it ends up with the ending
4811: *
4812: * (S Attribute)* S?
4813: */
1.42 daniel 4814: SKIP_BLANKS;
1.91 daniel 4815: GROW;
1.40 daniel 4816: while ((IS_CHAR(CUR)) &&
4817: (CUR != '>') &&
4818: ((CUR != '/') || (NXT(1) != '>'))) {
4819: const CHAR *q = CUR_PTR;
1.91 daniel 4820: int cons = ctxt->input->consumed;
1.29 daniel 4821:
1.72 daniel 4822: attname = xmlParseAttribute(ctxt, &attvalue);
4823: if ((attname != NULL) && (attvalue != NULL)) {
4824: /*
1.98 daniel 4825: * [ WFC: Unique Att Spec ]
4826: * No attribute name may appear more than once in the same
4827: * start-tag or empty-element tag.
1.72 daniel 4828: */
4829: for (i = 0; i < nbatts;i += 2) {
4830: if (!xmlStrcmp(atts[i], attname)) {
4831: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4832: ctxt->sax->error(ctxt->userData,
4833: "Attribute %s redefined\n",
4834: attname);
1.72 daniel 4835: ctxt->wellFormed = 0;
1.119 daniel 4836: xmlFree(attname);
4837: xmlFree(attvalue);
1.98 daniel 4838: goto failed;
1.72 daniel 4839: }
4840: }
4841:
4842: /*
4843: * Add the pair to atts
4844: */
4845: if (atts == NULL) {
4846: maxatts = 10;
1.119 daniel 4847: atts = (const CHAR **) xmlMalloc(maxatts * sizeof(CHAR *));
1.72 daniel 4848: if (atts == NULL) {
1.86 daniel 4849: fprintf(stderr, "malloc of %ld byte failed\n",
4850: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4851: return(NULL);
1.72 daniel 4852: }
4853: } else if (nbatts + 2 < maxatts) {
4854: maxatts *= 2;
1.119 daniel 4855: atts = (const CHAR **) xmlRealloc(atts, maxatts * sizeof(CHAR *));
1.72 daniel 4856: if (atts == NULL) {
1.86 daniel 4857: fprintf(stderr, "realloc of %ld byte failed\n",
4858: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4859: return(NULL);
1.72 daniel 4860: }
4861: }
4862: atts[nbatts++] = attname;
4863: atts[nbatts++] = attvalue;
4864: atts[nbatts] = NULL;
4865: atts[nbatts + 1] = NULL;
4866: }
4867:
1.116 daniel 4868: failed:
1.42 daniel 4869: SKIP_BLANKS;
1.91 daniel 4870: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 4871: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4872: ctxt->sax->error(ctxt->userData,
1.31 daniel 4873: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 4874: ctxt->wellFormed = 0;
1.29 daniel 4875: break;
1.3 veillard 4876: }
1.91 daniel 4877: GROW;
1.3 veillard 4878: }
4879:
1.43 daniel 4880: /*
1.72 daniel 4881: * SAX: Start of Element !
1.43 daniel 4882: */
1.72 daniel 4883: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 4884: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 4885:
1.72 daniel 4886: if (atts != NULL) {
1.119 daniel 4887: for (i = 0;i < nbatts;i++) xmlFree((CHAR *) atts[i]);
4888: xmlFree(atts);
1.72 daniel 4889: }
1.83 daniel 4890: return(name);
1.3 veillard 4891: }
4892:
1.50 daniel 4893: /**
4894: * xmlParseEndTag:
4895: * @ctxt: an XML parser context
1.83 daniel 4896: * @tagname: the tag name as parsed in the opening tag.
1.50 daniel 4897: *
4898: * parse an end of tag
1.27 daniel 4899: *
4900: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 4901: *
4902: * With namespace
4903: *
1.72 daniel 4904: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 4905: */
4906:
1.55 daniel 4907: void
1.83 daniel 4908: xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname) {
1.72 daniel 4909: CHAR *name;
1.7 veillard 4910:
1.91 daniel 4911: GROW;
1.40 daniel 4912: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 4913: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4914: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 4915: ctxt->wellFormed = 0;
1.27 daniel 4916: return;
4917: }
1.40 daniel 4918: SKIP(2);
1.7 veillard 4919:
1.72 daniel 4920: name = xmlParseName(ctxt);
1.7 veillard 4921:
4922: /*
4923: * We should definitely be at the ending "S? '>'" part
4924: */
1.91 daniel 4925: GROW;
1.42 daniel 4926: SKIP_BLANKS;
1.40 daniel 4927: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 4928: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4929: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 4930: ctxt->wellFormed = 0;
1.7 veillard 4931: } else
1.40 daniel 4932: NEXT;
1.7 veillard 4933:
1.72 daniel 4934: /*
1.98 daniel 4935: * [ WFC: Element Type Match ]
4936: * The Name in an element's end-tag must match the element type in the
4937: * start-tag.
4938: *
1.83 daniel 4939: */
4940: if (xmlStrcmp(name, tagname)) {
4941: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4942: ctxt->sax->error(ctxt->userData,
4943: "Opening and ending tag mismatch: %s and %s\n", tagname, name);
4944: ctxt->wellFormed = 0;
4945: }
4946:
4947: /*
1.72 daniel 4948: * SAX: End of Tag
4949: */
4950: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 4951: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 4952:
4953: if (name != NULL)
1.119 daniel 4954: xmlFree(name);
1.72 daniel 4955:
1.7 veillard 4956: return;
4957: }
4958:
1.50 daniel 4959: /**
4960: * xmlParseCDSect:
4961: * @ctxt: an XML parser context
4962: *
4963: * Parse escaped pure raw content.
1.29 daniel 4964: *
4965: * [18] CDSect ::= CDStart CData CDEnd
4966: *
4967: * [19] CDStart ::= '<![CDATA['
4968: *
4969: * [20] Data ::= (Char* - (Char* ']]>' Char*))
4970: *
4971: * [21] CDEnd ::= ']]>'
1.3 veillard 4972: */
1.55 daniel 4973: void
4974: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.110 daniel 4975: const CHAR *base;
4976: CHAR r, s;
1.108 veillard 4977: CHAR cur;
1.3 veillard 4978:
1.106 daniel 4979: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 4980: (NXT(2) == '[') && (NXT(3) == 'C') &&
4981: (NXT(4) == 'D') && (NXT(5) == 'A') &&
4982: (NXT(6) == 'T') && (NXT(7) == 'A') &&
4983: (NXT(8) == '[')) {
4984: SKIP(9);
1.29 daniel 4985: } else
1.45 daniel 4986: return;
1.109 daniel 4987:
4988: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.40 daniel 4989: base = CUR_PTR;
4990: if (!IS_CHAR(CUR)) {
1.55 daniel 4991: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4992: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4993: ctxt->wellFormed = 0;
1.109 daniel 4994: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 4995: return;
1.3 veillard 4996: }
1.110 daniel 4997: r = CUR;
1.91 daniel 4998: NEXT;
1.40 daniel 4999: if (!IS_CHAR(CUR)) {
1.55 daniel 5000: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5001: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 5002: ctxt->wellFormed = 0;
1.109 daniel 5003: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 5004: return;
1.3 veillard 5005: }
1.110 daniel 5006: s = CUR;
1.91 daniel 5007: NEXT;
1.108 veillard 5008: cur = CUR;
5009: while (IS_CHAR(cur) &&
1.110 daniel 5010: ((r != ']') || (s != ']') || (cur != '>'))) {
5011: r = s;
5012: s = cur;
5013: NEXT;
1.108 veillard 5014: cur = CUR;
1.3 veillard 5015: }
1.109 daniel 5016: ctxt->instate = XML_PARSER_CONTENT;
1.40 daniel 5017: if (!IS_CHAR(CUR)) {
1.55 daniel 5018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5019: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 5020: ctxt->wellFormed = 0;
1.45 daniel 5021: return;
1.3 veillard 5022: }
1.107 daniel 5023: NEXT;
1.16 daniel 5024:
1.45 daniel 5025: /*
5026: * Ok the segment [base CUR_PTR] is to be consumed as chars.
5027: */
5028: if (ctxt->sax != NULL) {
1.107 daniel 5029: if (ctxt->sax->cdataBlock != NULL)
1.110 daniel 5030: ctxt->sax->cdataBlock(ctxt->userData, base, (CUR_PTR - base) - 3);
1.45 daniel 5031: }
1.2 veillard 5032: }
5033:
1.50 daniel 5034: /**
5035: * xmlParseContent:
5036: * @ctxt: an XML parser context
5037: *
5038: * Parse a content:
1.2 veillard 5039: *
1.27 daniel 5040: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 5041: */
5042:
1.55 daniel 5043: void
5044: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 5045: GROW;
1.40 daniel 5046: while ((CUR != '<') || (NXT(1) != '/')) {
5047: const CHAR *test = CUR_PTR;
1.91 daniel 5048: int cons = ctxt->input->consumed;
1.113 daniel 5049: CHAR tok = ctxt->token;
1.27 daniel 5050:
5051: /*
5052: * First case : a Processing Instruction.
5053: */
1.40 daniel 5054: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 5055: xmlParsePI(ctxt);
5056: }
1.72 daniel 5057:
1.27 daniel 5058: /*
5059: * Second case : a CDSection
5060: */
1.40 daniel 5061: else if ((CUR == '<') && (NXT(1) == '!') &&
5062: (NXT(2) == '[') && (NXT(3) == 'C') &&
5063: (NXT(4) == 'D') && (NXT(5) == 'A') &&
5064: (NXT(6) == 'T') && (NXT(7) == 'A') &&
5065: (NXT(8) == '[')) {
1.45 daniel 5066: xmlParseCDSect(ctxt);
1.27 daniel 5067: }
1.72 daniel 5068:
1.27 daniel 5069: /*
5070: * Third case : a comment
5071: */
1.40 daniel 5072: else if ((CUR == '<') && (NXT(1) == '!') &&
5073: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 5074: xmlParseComment(ctxt);
1.97 daniel 5075: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 5076: }
1.72 daniel 5077:
1.27 daniel 5078: /*
5079: * Fourth case : a sub-element.
5080: */
1.40 daniel 5081: else if (CUR == '<') {
1.72 daniel 5082: xmlParseElement(ctxt);
1.45 daniel 5083: }
1.72 daniel 5084:
1.45 daniel 5085: /*
1.50 daniel 5086: * Fifth case : a reference. If if has not been resolved,
5087: * parsing returns it's Name, create the node
1.45 daniel 5088: */
1.97 daniel 5089:
1.45 daniel 5090: else if (CUR == '&') {
1.77 daniel 5091: xmlParseReference(ctxt);
1.27 daniel 5092: }
1.72 daniel 5093:
1.27 daniel 5094: /*
5095: * Last case, text. Note that References are handled directly.
5096: */
5097: else {
1.45 daniel 5098: xmlParseCharData(ctxt, 0);
1.3 veillard 5099: }
1.14 veillard 5100:
1.91 daniel 5101: GROW;
1.14 veillard 5102: /*
1.45 daniel 5103: * Pop-up of finished entities.
1.14 veillard 5104: */
1.69 daniel 5105: while ((CUR == 0) && (ctxt->inputNr > 1))
5106: xmlPopInput(ctxt);
1.45 daniel 5107:
1.113 daniel 5108: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
5109: (tok == ctxt->token)) {
1.55 daniel 5110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5111: ctxt->sax->error(ctxt->userData,
1.59 daniel 5112: "detected an error in element content\n");
5113: ctxt->wellFormed = 0;
1.29 daniel 5114: break;
5115: }
1.3 veillard 5116: }
1.2 veillard 5117: }
5118:
1.50 daniel 5119: /**
5120: * xmlParseElement:
5121: * @ctxt: an XML parser context
5122: *
5123: * parse an XML element, this is highly recursive
1.26 daniel 5124: *
5125: * [39] element ::= EmptyElemTag | STag content ETag
5126: *
1.98 daniel 5127: * [ WFC: Element Type Match ]
5128: * The Name in an element's end-tag must match the element type in the
5129: * start-tag.
5130: *
5131: * [ VC: Element Valid ]
1.117 daniel 5132: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 5133: * where the Name matches the element type and one of the following holds:
5134: * - The declaration matches EMPTY and the element has no content.
5135: * - The declaration matches children and the sequence of child elements
5136: * belongs to the language generated by the regular expression in the
5137: * content model, with optional white space (characters matching the
5138: * nonterminal S) between each pair of child elements.
5139: * - The declaration matches Mixed and the content consists of character
5140: * data and child elements whose types match names in the content model.
5141: * - The declaration matches ANY, and the types of any child elements have
5142: * been declared.
1.2 veillard 5143: */
1.26 daniel 5144:
1.72 daniel 5145: void
1.69 daniel 5146: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.40 daniel 5147: const CHAR *openTag = CUR_PTR;
1.83 daniel 5148: CHAR *name;
1.32 daniel 5149: xmlParserNodeInfo node_info;
1.118 daniel 5150: xmlNodePtr ret;
1.2 veillard 5151:
1.32 daniel 5152: /* Capture start position */
1.118 daniel 5153: if (ctxt->record_info) {
5154: node_info.begin_pos = ctxt->input->consumed +
5155: (CUR_PTR - ctxt->input->base);
5156: node_info.begin_line = ctxt->input->line;
5157: }
1.32 daniel 5158:
1.83 daniel 5159: name = xmlParseStartTag(ctxt);
5160: if (name == NULL) {
5161: return;
5162: }
1.118 daniel 5163: ret = ctxt->node;
1.2 veillard 5164:
5165: /*
1.99 daniel 5166: * [ VC: Root Element Type ]
5167: * The Name in the document type declaration must match the element
5168: * type of the root element.
5169: */
1.105 daniel 5170: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
5171: ctxt->node && (ctxt->node == ctxt->myDoc->root))
1.102 daniel 5172: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 5173:
5174: /*
1.2 veillard 5175: * Check for an Empty Element.
5176: */
1.40 daniel 5177: if ((CUR == '/') && (NXT(1) == '>')) {
5178: SKIP(2);
1.72 daniel 5179: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 5180: ctxt->sax->endElement(ctxt->userData, name);
1.119 daniel 5181: xmlFree(name);
1.72 daniel 5182: return;
1.2 veillard 5183: }
1.91 daniel 5184: if (CUR == '>') {
5185: NEXT;
5186: } else {
1.55 daniel 5187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5188: ctxt->sax->error(ctxt->userData, "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 5189: openTag);
1.59 daniel 5190: ctxt->wellFormed = 0;
1.45 daniel 5191:
5192: /*
5193: * end of parsing of this node.
5194: */
5195: nodePop(ctxt);
1.119 daniel 5196: xmlFree(name);
1.118 daniel 5197:
5198: /*
5199: * Capture end position and add node
5200: */
5201: if ( ret != NULL && ctxt->record_info ) {
5202: node_info.end_pos = ctxt->input->consumed +
5203: (CUR_PTR - ctxt->input->base);
5204: node_info.end_line = ctxt->input->line;
5205: node_info.node = ret;
5206: xmlParserAddNodeInfo(ctxt, &node_info);
5207: }
1.72 daniel 5208: return;
1.2 veillard 5209: }
5210:
5211: /*
5212: * Parse the content of the element:
5213: */
1.45 daniel 5214: xmlParseContent(ctxt);
1.40 daniel 5215: if (!IS_CHAR(CUR)) {
1.55 daniel 5216: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5217: ctxt->sax->error(ctxt->userData,
1.57 daniel 5218: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 5219: ctxt->wellFormed = 0;
1.45 daniel 5220:
5221: /*
5222: * end of parsing of this node.
5223: */
5224: nodePop(ctxt);
1.119 daniel 5225: xmlFree(name);
1.72 daniel 5226: return;
1.2 veillard 5227: }
5228:
5229: /*
1.27 daniel 5230: * parse the end of tag: '</' should be here.
1.2 veillard 5231: */
1.83 daniel 5232: xmlParseEndTag(ctxt, name);
1.119 daniel 5233: xmlFree(name);
1.118 daniel 5234:
5235: /*
5236: * Capture end position and add node
5237: */
5238: if ( ret != NULL && ctxt->record_info ) {
5239: node_info.end_pos = ctxt->input->consumed +
5240: (CUR_PTR - ctxt->input->base);
5241: node_info.end_line = ctxt->input->line;
5242: node_info.node = ret;
5243: xmlParserAddNodeInfo(ctxt, &node_info);
5244: }
1.2 veillard 5245: }
5246:
1.50 daniel 5247: /**
5248: * xmlParseVersionNum:
5249: * @ctxt: an XML parser context
5250: *
5251: * parse the XML version value.
1.29 daniel 5252: *
5253: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 5254: *
5255: * Returns the string giving the XML version number, or NULL
1.29 daniel 5256: */
1.55 daniel 5257: CHAR *
5258: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 5259: const CHAR *q = CUR_PTR;
1.29 daniel 5260: CHAR *ret;
5261:
1.40 daniel 5262: while (IS_CHAR(CUR) &&
5263: (((CUR >= 'a') && (CUR <= 'z')) ||
5264: ((CUR >= 'A') && (CUR <= 'Z')) ||
5265: ((CUR >= '0') && (CUR <= '9')) ||
5266: (CUR == '_') || (CUR == '.') ||
5267: (CUR == ':') || (CUR == '-'))) NEXT;
5268: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5269: return(ret);
5270: }
5271:
1.50 daniel 5272: /**
5273: * xmlParseVersionInfo:
5274: * @ctxt: an XML parser context
5275: *
5276: * parse the XML version.
1.29 daniel 5277: *
5278: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
5279: *
5280: * [25] Eq ::= S? '=' S?
1.50 daniel 5281: *
1.68 daniel 5282: * Returns the version string, e.g. "1.0"
1.29 daniel 5283: */
5284:
1.55 daniel 5285: CHAR *
5286: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 5287: CHAR *version = NULL;
5288: const CHAR *q;
5289:
1.40 daniel 5290: if ((CUR == 'v') && (NXT(1) == 'e') &&
5291: (NXT(2) == 'r') && (NXT(3) == 's') &&
5292: (NXT(4) == 'i') && (NXT(5) == 'o') &&
5293: (NXT(6) == 'n')) {
5294: SKIP(7);
1.42 daniel 5295: SKIP_BLANKS;
1.40 daniel 5296: if (CUR != '=') {
1.55 daniel 5297: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5298: ctxt->sax->error(ctxt->userData, "xmlParseVersionInfo : expected '='\n");
1.59 daniel 5299: ctxt->wellFormed = 0;
1.31 daniel 5300: return(NULL);
5301: }
1.40 daniel 5302: NEXT;
1.42 daniel 5303: SKIP_BLANKS;
1.40 daniel 5304: if (CUR == '"') {
5305: NEXT;
5306: q = CUR_PTR;
1.29 daniel 5307: version = xmlParseVersionNum(ctxt);
1.55 daniel 5308: if (CUR != '"') {
5309: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5310: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5311: ctxt->wellFormed = 0;
1.55 daniel 5312: } else
1.40 daniel 5313: NEXT;
5314: } else if (CUR == '\''){
5315: NEXT;
5316: q = CUR_PTR;
1.29 daniel 5317: version = xmlParseVersionNum(ctxt);
1.55 daniel 5318: if (CUR != '\'') {
5319: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5320: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5321: ctxt->wellFormed = 0;
1.55 daniel 5322: } else
1.40 daniel 5323: NEXT;
1.31 daniel 5324: } else {
1.55 daniel 5325: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5326: ctxt->sax->error(ctxt->userData,
1.59 daniel 5327: "xmlParseVersionInfo : expected ' or \"\n");
5328: ctxt->wellFormed = 0;
1.29 daniel 5329: }
5330: }
5331: return(version);
5332: }
5333:
1.50 daniel 5334: /**
5335: * xmlParseEncName:
5336: * @ctxt: an XML parser context
5337: *
5338: * parse the XML encoding name
1.29 daniel 5339: *
5340: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 5341: *
1.68 daniel 5342: * Returns the encoding name value or NULL
1.29 daniel 5343: */
1.55 daniel 5344: CHAR *
5345: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 5346: const CHAR *q = CUR_PTR;
1.29 daniel 5347: CHAR *ret = NULL;
5348:
1.40 daniel 5349: if (((CUR >= 'a') && (CUR <= 'z')) ||
5350: ((CUR >= 'A') && (CUR <= 'Z'))) {
5351: NEXT;
5352: while (IS_CHAR(CUR) &&
5353: (((CUR >= 'a') && (CUR <= 'z')) ||
5354: ((CUR >= 'A') && (CUR <= 'Z')) ||
5355: ((CUR >= '0') && (CUR <= '9')) ||
5356: (CUR == '-'))) NEXT;
5357: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5358: } else {
1.55 daniel 5359: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5360: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 5361: ctxt->wellFormed = 0;
1.29 daniel 5362: }
5363: return(ret);
5364: }
5365:
1.50 daniel 5366: /**
5367: * xmlParseEncodingDecl:
5368: * @ctxt: an XML parser context
5369: *
5370: * parse the XML encoding declaration
1.29 daniel 5371: *
5372: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 5373: *
5374: * TODO: this should setup the conversion filters.
5375: *
1.68 daniel 5376: * Returns the encoding value or NULL
1.29 daniel 5377: */
5378:
1.55 daniel 5379: CHAR *
5380: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5381: CHAR *encoding = NULL;
5382: const CHAR *q;
5383:
1.42 daniel 5384: SKIP_BLANKS;
1.40 daniel 5385: if ((CUR == 'e') && (NXT(1) == 'n') &&
5386: (NXT(2) == 'c') && (NXT(3) == 'o') &&
5387: (NXT(4) == 'd') && (NXT(5) == 'i') &&
5388: (NXT(6) == 'n') && (NXT(7) == 'g')) {
5389: SKIP(8);
1.42 daniel 5390: SKIP_BLANKS;
1.40 daniel 5391: if (CUR != '=') {
1.55 daniel 5392: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5393: ctxt->sax->error(ctxt->userData, "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 5394: ctxt->wellFormed = 0;
1.31 daniel 5395: return(NULL);
5396: }
1.40 daniel 5397: NEXT;
1.42 daniel 5398: SKIP_BLANKS;
1.40 daniel 5399: if (CUR == '"') {
5400: NEXT;
5401: q = CUR_PTR;
1.29 daniel 5402: encoding = xmlParseEncName(ctxt);
1.55 daniel 5403: if (CUR != '"') {
5404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5405: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5406: ctxt->wellFormed = 0;
1.55 daniel 5407: } else
1.40 daniel 5408: NEXT;
5409: } else if (CUR == '\''){
5410: NEXT;
5411: q = CUR_PTR;
1.29 daniel 5412: encoding = xmlParseEncName(ctxt);
1.55 daniel 5413: if (CUR != '\'') {
5414: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5415: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5416: ctxt->wellFormed = 0;
1.55 daniel 5417: } else
1.40 daniel 5418: NEXT;
5419: } else if (CUR == '"'){
1.55 daniel 5420: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5421: ctxt->sax->error(ctxt->userData,
1.59 daniel 5422: "xmlParseEncodingDecl : expected ' or \"\n");
5423: ctxt->wellFormed = 0;
1.29 daniel 5424: }
5425: }
5426: return(encoding);
5427: }
5428:
1.50 daniel 5429: /**
5430: * xmlParseSDDecl:
5431: * @ctxt: an XML parser context
5432: *
5433: * parse the XML standalone declaration
1.29 daniel 5434: *
5435: * [32] SDDecl ::= S 'standalone' Eq
5436: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 5437: *
5438: * [ VC: Standalone Document Declaration ]
5439: * TODO The standalone document declaration must have the value "no"
5440: * if any external markup declarations contain declarations of:
5441: * - attributes with default values, if elements to which these
5442: * attributes apply appear in the document without specifications
5443: * of values for these attributes, or
5444: * - entities (other than amp, lt, gt, apos, quot), if references
5445: * to those entities appear in the document, or
5446: * - attributes with values subject to normalization, where the
5447: * attribute appears in the document with a value which will change
5448: * as a result of normalization, or
5449: * - element types with element content, if white space occurs directly
5450: * within any instance of those types.
1.68 daniel 5451: *
5452: * Returns 1 if standalone, 0 otherwise
1.29 daniel 5453: */
5454:
1.55 daniel 5455: int
5456: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5457: int standalone = -1;
5458:
1.42 daniel 5459: SKIP_BLANKS;
1.40 daniel 5460: if ((CUR == 's') && (NXT(1) == 't') &&
5461: (NXT(2) == 'a') && (NXT(3) == 'n') &&
5462: (NXT(4) == 'd') && (NXT(5) == 'a') &&
5463: (NXT(6) == 'l') && (NXT(7) == 'o') &&
5464: (NXT(8) == 'n') && (NXT(9) == 'e')) {
5465: SKIP(10);
1.81 daniel 5466: SKIP_BLANKS;
1.40 daniel 5467: if (CUR != '=') {
1.55 daniel 5468: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5469: ctxt->sax->error(ctxt->userData,
1.59 daniel 5470: "XML standalone declaration : expected '='\n");
5471: ctxt->wellFormed = 0;
1.32 daniel 5472: return(standalone);
5473: }
1.40 daniel 5474: NEXT;
1.42 daniel 5475: SKIP_BLANKS;
1.40 daniel 5476: if (CUR == '\''){
5477: NEXT;
5478: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5479: standalone = 0;
1.40 daniel 5480: SKIP(2);
5481: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5482: (NXT(2) == 's')) {
1.29 daniel 5483: standalone = 1;
1.40 daniel 5484: SKIP(3);
1.29 daniel 5485: } else {
1.55 daniel 5486: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5487: ctxt->sax->error(ctxt->userData, "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 5488: ctxt->wellFormed = 0;
1.29 daniel 5489: }
1.55 daniel 5490: if (CUR != '\'') {
5491: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5492: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5493: ctxt->wellFormed = 0;
1.55 daniel 5494: } else
1.40 daniel 5495: NEXT;
5496: } else if (CUR == '"'){
5497: NEXT;
5498: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5499: standalone = 0;
1.40 daniel 5500: SKIP(2);
5501: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5502: (NXT(2) == 's')) {
1.29 daniel 5503: standalone = 1;
1.40 daniel 5504: SKIP(3);
1.29 daniel 5505: } else {
1.55 daniel 5506: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5507: ctxt->sax->error(ctxt->userData,
1.59 daniel 5508: "standalone accepts only 'yes' or 'no'\n");
5509: ctxt->wellFormed = 0;
1.29 daniel 5510: }
1.55 daniel 5511: if (CUR != '"') {
5512: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5513: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5514: ctxt->wellFormed = 0;
1.55 daniel 5515: } else
1.40 daniel 5516: NEXT;
1.37 daniel 5517: } else {
1.55 daniel 5518: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5519: ctxt->sax->error(ctxt->userData, "Standalone value not found\n");
1.59 daniel 5520: ctxt->wellFormed = 0;
1.37 daniel 5521: }
1.29 daniel 5522: }
5523: return(standalone);
5524: }
5525:
1.50 daniel 5526: /**
5527: * xmlParseXMLDecl:
5528: * @ctxt: an XML parser context
5529: *
5530: * parse an XML declaration header
1.29 daniel 5531: *
5532: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 5533: */
5534:
1.55 daniel 5535: void
5536: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 5537: CHAR *version;
5538:
5539: /*
1.19 daniel 5540: * We know that '<?xml' is here.
1.1 veillard 5541: */
1.40 daniel 5542: SKIP(5);
1.1 veillard 5543:
1.59 daniel 5544: if (!IS_BLANK(CUR)) {
5545: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5546: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 5547: ctxt->wellFormed = 0;
5548: }
1.42 daniel 5549: SKIP_BLANKS;
1.1 veillard 5550:
5551: /*
1.29 daniel 5552: * We should have the VersionInfo here.
1.1 veillard 5553: */
1.29 daniel 5554: version = xmlParseVersionInfo(ctxt);
5555: if (version == NULL)
1.45 daniel 5556: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 5557: ctxt->version = xmlStrdup(version);
1.119 daniel 5558: xmlFree(version);
1.29 daniel 5559:
5560: /*
5561: * We may have the encoding declaration
5562: */
1.59 daniel 5563: if (!IS_BLANK(CUR)) {
5564: if ((CUR == '?') && (NXT(1) == '>')) {
5565: SKIP(2);
5566: return;
5567: }
5568: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5569: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5570: ctxt->wellFormed = 0;
5571: }
1.72 daniel 5572: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 5573:
5574: /*
1.29 daniel 5575: * We may have the standalone status.
1.1 veillard 5576: */
1.72 daniel 5577: if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
1.59 daniel 5578: if ((CUR == '?') && (NXT(1) == '>')) {
5579: SKIP(2);
5580: return;
5581: }
5582: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5583: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5584: ctxt->wellFormed = 0;
5585: }
5586: SKIP_BLANKS;
1.72 daniel 5587: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 5588:
1.42 daniel 5589: SKIP_BLANKS;
1.40 daniel 5590: if ((CUR == '?') && (NXT(1) == '>')) {
5591: SKIP(2);
5592: } else if (CUR == '>') {
1.31 daniel 5593: /* Deprecated old WD ... */
1.55 daniel 5594: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5595: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
1.59 daniel 5596: ctxt->wellFormed = 0;
1.40 daniel 5597: NEXT;
1.29 daniel 5598: } else {
1.55 daniel 5599: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5600: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
1.59 daniel 5601: ctxt->wellFormed = 0;
1.40 daniel 5602: MOVETO_ENDTAG(CUR_PTR);
5603: NEXT;
1.29 daniel 5604: }
1.1 veillard 5605: }
5606:
1.50 daniel 5607: /**
5608: * xmlParseMisc:
5609: * @ctxt: an XML parser context
5610: *
5611: * parse an XML Misc* optionnal field.
1.21 daniel 5612: *
1.22 daniel 5613: * [27] Misc ::= Comment | PI | S
1.1 veillard 5614: */
5615:
1.55 daniel 5616: void
5617: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 5618: while (((CUR == '<') && (NXT(1) == '?')) ||
5619: ((CUR == '<') && (NXT(1) == '!') &&
5620: (NXT(2) == '-') && (NXT(3) == '-')) ||
5621: IS_BLANK(CUR)) {
5622: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 5623: xmlParsePI(ctxt);
1.40 daniel 5624: } else if (IS_BLANK(CUR)) {
5625: NEXT;
1.1 veillard 5626: } else
1.114 daniel 5627: xmlParseComment(ctxt);
1.1 veillard 5628: }
5629: }
5630:
1.50 daniel 5631: /**
5632: * xmlParseDocument :
5633: * @ctxt: an XML parser context
5634: *
5635: * parse an XML document (and build a tree if using the standard SAX
5636: * interface).
1.21 daniel 5637: *
1.22 daniel 5638: * [1] document ::= prolog element Misc*
1.29 daniel 5639: *
5640: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 5641: *
1.68 daniel 5642: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 5643: * as a result of the parsing.
1.1 veillard 5644: */
5645:
1.55 daniel 5646: int
5647: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 5648: xmlDefaultSAXHandlerInit();
5649:
1.91 daniel 5650: GROW;
5651:
1.14 veillard 5652: /*
1.44 daniel 5653: * SAX: beginning of the document processing.
5654: */
1.72 daniel 5655: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 5656: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 5657:
5658: /*
1.117 daniel 5659: * TODO We should check for encoding here and plug-in some
5660: * conversion code !!!!
1.14 veillard 5661: */
1.1 veillard 5662:
5663: /*
5664: * Wipe out everything which is before the first '<'
5665: */
1.59 daniel 5666: if (IS_BLANK(CUR)) {
5667: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5668: ctxt->sax->error(ctxt->userData,
1.59 daniel 5669: "Extra spaces at the beginning of the document are not allowed\n");
5670: ctxt->wellFormed = 0;
5671: SKIP_BLANKS;
5672: }
5673:
5674: if (CUR == 0) {
5675: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5676: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 5677: ctxt->wellFormed = 0;
5678: }
1.1 veillard 5679:
5680: /*
5681: * Check for the XMLDecl in the Prolog.
5682: */
1.91 daniel 5683: GROW;
1.40 daniel 5684: if ((CUR == '<') && (NXT(1) == '?') &&
5685: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5686: (NXT(4) == 'l')) {
1.19 daniel 5687: xmlParseXMLDecl(ctxt);
5688: /* SKIP_EOL(cur); */
1.42 daniel 5689: SKIP_BLANKS;
1.40 daniel 5690: } else if ((CUR == '<') && (NXT(1) == '?') &&
5691: (NXT(2) == 'X') && (NXT(3) == 'M') &&
5692: (NXT(4) == 'L')) {
1.19 daniel 5693: /*
5694: * The first drafts were using <?XML and the final W3C REC
5695: * now use <?xml ...
5696: */
1.16 daniel 5697: xmlParseXMLDecl(ctxt);
1.1 veillard 5698: /* SKIP_EOL(cur); */
1.42 daniel 5699: SKIP_BLANKS;
1.1 veillard 5700: } else {
1.72 daniel 5701: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 5702: }
1.72 daniel 5703: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 5704: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 5705:
5706: /*
5707: * The Misc part of the Prolog
5708: */
1.91 daniel 5709: GROW;
1.16 daniel 5710: xmlParseMisc(ctxt);
1.1 veillard 5711:
5712: /*
1.29 daniel 5713: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 5714: * (doctypedecl Misc*)?
5715: */
1.91 daniel 5716: GROW;
1.40 daniel 5717: if ((CUR == '<') && (NXT(1) == '!') &&
5718: (NXT(2) == 'D') && (NXT(3) == 'O') &&
5719: (NXT(4) == 'C') && (NXT(5) == 'T') &&
5720: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
5721: (NXT(8) == 'E')) {
1.22 daniel 5722: xmlParseDocTypeDecl(ctxt);
1.96 daniel 5723: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 5724: xmlParseMisc(ctxt);
1.21 daniel 5725: }
5726:
5727: /*
5728: * Time to start parsing the tree itself
1.1 veillard 5729: */
1.91 daniel 5730: GROW;
1.96 daniel 5731: ctxt->instate = XML_PARSER_CONTENT;
1.72 daniel 5732: xmlParseElement(ctxt);
1.96 daniel 5733: ctxt->instate = XML_PARSER_EPILOG;
1.33 daniel 5734:
5735: /*
5736: * The Misc part at the end
5737: */
5738: xmlParseMisc(ctxt);
1.16 daniel 5739:
1.59 daniel 5740: if (CUR != 0) {
5741: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5742: ctxt->sax->error(ctxt->userData,
1.59 daniel 5743: "Extra content at the end of the document\n");
5744: ctxt->wellFormed = 0;
5745: }
1.96 daniel 5746: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 5747:
1.44 daniel 5748: /*
5749: * SAX: end of the document processing.
5750: */
1.72 daniel 5751: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 5752: ctxt->sax->endDocument(ctxt->userData);
1.59 daniel 5753: if (! ctxt->wellFormed) return(-1);
1.16 daniel 5754: return(0);
5755: }
5756:
1.98 daniel 5757: /************************************************************************
5758: * *
5759: * I/O front end functions to the parser *
5760: * *
5761: ************************************************************************/
5762:
1.50 daniel 5763: /**
1.86 daniel 5764: * xmlCreateDocParserCtxt :
1.50 daniel 5765: * @cur: a pointer to an array of CHAR
5766: *
1.69 daniel 5767: * Create a parser context for an XML in-memory document.
5768: *
5769: * Returns the new parser context or NULL
1.16 daniel 5770: */
1.69 daniel 5771: xmlParserCtxtPtr
5772: xmlCreateDocParserCtxt(CHAR *cur) {
1.16 daniel 5773: xmlParserCtxtPtr ctxt;
1.40 daniel 5774: xmlParserInputPtr input;
1.75 daniel 5775: xmlCharEncoding enc;
1.16 daniel 5776:
1.97 daniel 5777: ctxt = xmlNewParserCtxt();
1.16 daniel 5778: if (ctxt == NULL) {
5779: return(NULL);
5780: }
1.96 daniel 5781: input = xmlNewInputStream(ctxt);
1.40 daniel 5782: if (input == NULL) {
1.97 daniel 5783: xmlFreeParserCtxt(ctxt);
1.40 daniel 5784: return(NULL);
5785: }
5786:
1.75 daniel 5787: /*
5788: * plug some encoding conversion routines here. !!!
5789: */
5790: enc = xmlDetectCharEncoding(cur);
5791: xmlSwitchEncoding(ctxt, enc);
5792:
1.40 daniel 5793: input->base = cur;
5794: input->cur = cur;
5795:
5796: inputPush(ctxt, input);
1.69 daniel 5797: return(ctxt);
5798: }
5799:
5800: /**
5801: * xmlSAXParseDoc :
5802: * @sax: the SAX handler block
5803: * @cur: a pointer to an array of CHAR
5804: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5805: * documents
5806: *
5807: * parse an XML in-memory document and build a tree.
5808: * It use the given SAX function block to handle the parsing callback.
5809: * If sax is NULL, fallback to the default DOM tree building routines.
5810: *
5811: * Returns the resulting document tree
5812: */
5813:
5814: xmlDocPtr
5815: xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
5816: xmlDocPtr ret;
5817: xmlParserCtxtPtr ctxt;
5818:
5819: if (cur == NULL) return(NULL);
1.16 daniel 5820:
5821:
1.69 daniel 5822: ctxt = xmlCreateDocParserCtxt(cur);
5823: if (ctxt == NULL) return(NULL);
1.74 daniel 5824: if (sax != NULL) {
5825: ctxt->sax = sax;
5826: ctxt->userData = NULL;
5827: }
1.69 daniel 5828:
1.16 daniel 5829: xmlParseDocument(ctxt);
1.72 daniel 5830: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5831: else {
5832: ret = NULL;
1.72 daniel 5833: xmlFreeDoc(ctxt->myDoc);
5834: ctxt->myDoc = NULL;
1.59 daniel 5835: }
1.86 daniel 5836: if (sax != NULL)
5837: ctxt->sax = NULL;
1.69 daniel 5838: xmlFreeParserCtxt(ctxt);
1.16 daniel 5839:
1.1 veillard 5840: return(ret);
5841: }
5842:
1.50 daniel 5843: /**
1.55 daniel 5844: * xmlParseDoc :
5845: * @cur: a pointer to an array of CHAR
5846: *
5847: * parse an XML in-memory document and build a tree.
5848: *
1.68 daniel 5849: * Returns the resulting document tree
1.55 daniel 5850: */
5851:
1.69 daniel 5852: xmlDocPtr
5853: xmlParseDoc(CHAR *cur) {
1.59 daniel 5854: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 5855: }
5856:
5857: /**
5858: * xmlSAXParseDTD :
5859: * @sax: the SAX handler block
5860: * @ExternalID: a NAME* containing the External ID of the DTD
5861: * @SystemID: a NAME* containing the URL to the DTD
5862: *
5863: * Load and parse an external subset.
5864: *
5865: * Returns the resulting xmlDtdPtr or NULL in case of error.
5866: */
5867:
5868: xmlDtdPtr
5869: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const CHAR *ExternalID,
5870: const CHAR *SystemID) {
5871: xmlDtdPtr ret = NULL;
5872: xmlParserCtxtPtr ctxt;
1.83 daniel 5873: xmlParserInputPtr input = NULL;
1.76 daniel 5874: xmlCharEncoding enc;
5875:
5876: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
5877:
1.97 daniel 5878: ctxt = xmlNewParserCtxt();
1.76 daniel 5879: if (ctxt == NULL) {
5880: return(NULL);
5881: }
5882:
5883: /*
5884: * Set-up the SAX context
5885: */
5886: if (ctxt == NULL) return(NULL);
5887: if (sax != NULL) {
1.93 veillard 5888: if (ctxt->sax != NULL)
1.119 daniel 5889: xmlFree(ctxt->sax);
1.76 daniel 5890: ctxt->sax = sax;
5891: ctxt->userData = NULL;
5892: }
5893:
5894: /*
5895: * Ask the Entity resolver to load the damn thing
5896: */
5897:
5898: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
5899: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
5900: if (input == NULL) {
1.86 daniel 5901: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5902: xmlFreeParserCtxt(ctxt);
5903: return(NULL);
5904: }
5905:
5906: /*
5907: * plug some encoding conversion routines here. !!!
5908: */
5909: xmlPushInput(ctxt, input);
5910: enc = xmlDetectCharEncoding(ctxt->input->cur);
5911: xmlSwitchEncoding(ctxt, enc);
5912:
1.95 veillard 5913: if (input->filename == NULL)
1.116 daniel 5914: input->filename = (char *) xmlStrdup(SystemID); /* !!!!!!! */
1.76 daniel 5915: input->line = 1;
5916: input->col = 1;
5917: input->base = ctxt->input->cur;
5918: input->cur = ctxt->input->cur;
5919: input->free = NULL;
5920:
5921: /*
5922: * let's parse that entity knowing it's an external subset.
5923: */
1.79 daniel 5924: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 5925:
5926: if (ctxt->myDoc != NULL) {
5927: if (ctxt->wellFormed) {
5928: ret = ctxt->myDoc->intSubset;
5929: ctxt->myDoc->intSubset = NULL;
5930: } else {
5931: ret = NULL;
5932: }
5933: xmlFreeDoc(ctxt->myDoc);
5934: ctxt->myDoc = NULL;
5935: }
1.86 daniel 5936: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5937: xmlFreeParserCtxt(ctxt);
5938:
5939: return(ret);
5940: }
5941:
5942: /**
5943: * xmlParseDTD :
5944: * @ExternalID: a NAME* containing the External ID of the DTD
5945: * @SystemID: a NAME* containing the URL to the DTD
5946: *
5947: * Load and parse an external subset.
5948: *
5949: * Returns the resulting xmlDtdPtr or NULL in case of error.
5950: */
5951:
5952: xmlDtdPtr
5953: xmlParseDTD(const CHAR *ExternalID, const CHAR *SystemID) {
5954: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 5955: }
5956:
5957: /**
5958: * xmlRecoverDoc :
5959: * @cur: a pointer to an array of CHAR
5960: *
5961: * parse an XML in-memory document and build a tree.
5962: * In the case the document is not Well Formed, a tree is built anyway
5963: *
1.68 daniel 5964: * Returns the resulting document tree
1.59 daniel 5965: */
5966:
1.69 daniel 5967: xmlDocPtr
5968: xmlRecoverDoc(CHAR *cur) {
1.59 daniel 5969: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 5970: }
5971:
5972: /**
1.69 daniel 5973: * xmlCreateFileParserCtxt :
1.50 daniel 5974: * @filename: the filename
5975: *
1.69 daniel 5976: * Create a parser context for a file content.
5977: * Automatic support for ZLIB/Compress compressed document is provided
5978: * by default if found at compile-time.
1.50 daniel 5979: *
1.69 daniel 5980: * Returns the new parser context or NULL
1.9 httpng 5981: */
1.69 daniel 5982: xmlParserCtxtPtr
5983: xmlCreateFileParserCtxt(const char *filename)
5984: {
5985: xmlParserCtxtPtr ctxt;
1.40 daniel 5986: xmlParserInputPtr inputStream;
1.91 daniel 5987: xmlParserInputBufferPtr buf;
1.111 daniel 5988: char *directory = NULL;
1.9 httpng 5989:
1.91 daniel 5990: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
5991: if (buf == NULL) return(NULL);
1.9 httpng 5992:
1.97 daniel 5993: ctxt = xmlNewParserCtxt();
1.16 daniel 5994: if (ctxt == NULL) {
5995: return(NULL);
5996: }
1.97 daniel 5997:
1.96 daniel 5998: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 5999: if (inputStream == NULL) {
1.97 daniel 6000: xmlFreeParserCtxt(ctxt);
1.40 daniel 6001: return(NULL);
6002: }
6003:
1.119 daniel 6004: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 6005: inputStream->buf = buf;
6006: inputStream->base = inputStream->buf->buffer->content;
6007: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 6008:
1.40 daniel 6009: inputPush(ctxt, inputStream);
1.110 daniel 6010: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 6011: directory = xmlParserGetDirectory(filename);
6012: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 6013: ctxt->directory = directory;
1.106 daniel 6014:
1.69 daniel 6015: return(ctxt);
6016: }
6017:
6018: /**
6019: * xmlSAXParseFile :
6020: * @sax: the SAX handler block
6021: * @filename: the filename
6022: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6023: * documents
6024: *
6025: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6026: * compressed document is provided by default if found at compile-time.
6027: * It use the given SAX function block to handle the parsing callback.
6028: * If sax is NULL, fallback to the default DOM tree building routines.
6029: *
6030: * Returns the resulting document tree
6031: */
6032:
1.79 daniel 6033: xmlDocPtr
6034: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 6035: int recovery) {
6036: xmlDocPtr ret;
6037: xmlParserCtxtPtr ctxt;
1.111 daniel 6038: char *directory = NULL;
1.69 daniel 6039:
6040: ctxt = xmlCreateFileParserCtxt(filename);
6041: if (ctxt == NULL) return(NULL);
1.74 daniel 6042: if (sax != NULL) {
1.93 veillard 6043: if (ctxt->sax != NULL)
1.119 daniel 6044: xmlFree(ctxt->sax);
1.74 daniel 6045: ctxt->sax = sax;
6046: ctxt->userData = NULL;
6047: }
1.106 daniel 6048:
1.110 daniel 6049: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 6050: directory = xmlParserGetDirectory(filename);
6051: if ((ctxt->directory == NULL) && (directory != NULL))
1.116 daniel 6052: ctxt->directory = (char *) xmlStrdup((CHAR *) directory); /* !!!!!!! */
1.16 daniel 6053:
6054: xmlParseDocument(ctxt);
1.40 daniel 6055:
1.72 daniel 6056: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6057: else {
6058: ret = NULL;
1.72 daniel 6059: xmlFreeDoc(ctxt->myDoc);
6060: ctxt->myDoc = NULL;
1.59 daniel 6061: }
1.86 daniel 6062: if (sax != NULL)
6063: ctxt->sax = NULL;
1.69 daniel 6064: xmlFreeParserCtxt(ctxt);
1.20 daniel 6065:
6066: return(ret);
6067: }
6068:
1.55 daniel 6069: /**
6070: * xmlParseFile :
6071: * @filename: the filename
6072: *
6073: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6074: * compressed document is provided by default if found at compile-time.
6075: *
1.68 daniel 6076: * Returns the resulting document tree
1.55 daniel 6077: */
6078:
1.79 daniel 6079: xmlDocPtr
6080: xmlParseFile(const char *filename) {
1.59 daniel 6081: return(xmlSAXParseFile(NULL, filename, 0));
6082: }
6083:
6084: /**
6085: * xmlRecoverFile :
6086: * @filename: the filename
6087: *
6088: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6089: * compressed document is provided by default if found at compile-time.
6090: * In the case the document is not Well Formed, a tree is built anyway
6091: *
1.68 daniel 6092: * Returns the resulting document tree
1.59 daniel 6093: */
6094:
1.79 daniel 6095: xmlDocPtr
6096: xmlRecoverFile(const char *filename) {
1.59 daniel 6097: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 6098: }
1.32 daniel 6099:
1.50 daniel 6100: /**
1.69 daniel 6101: * xmlCreateMemoryParserCtxt :
1.68 daniel 6102: * @buffer: an pointer to a char array
1.50 daniel 6103: * @size: the siwe of the array
6104: *
1.69 daniel 6105: * Create a parser context for an XML in-memory document.
1.50 daniel 6106: *
1.69 daniel 6107: * Returns the new parser context or NULL
1.20 daniel 6108: */
1.69 daniel 6109: xmlParserCtxtPtr
6110: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 6111: xmlParserCtxtPtr ctxt;
1.40 daniel 6112: xmlParserInputPtr input;
1.75 daniel 6113: xmlCharEncoding enc;
1.40 daniel 6114:
6115: buffer[size - 1] = '\0';
6116:
1.97 daniel 6117: ctxt = xmlNewParserCtxt();
1.20 daniel 6118: if (ctxt == NULL) {
6119: return(NULL);
6120: }
1.97 daniel 6121:
1.96 daniel 6122: input = xmlNewInputStream(ctxt);
1.40 daniel 6123: if (input == NULL) {
1.97 daniel 6124: xmlFreeParserCtxt(ctxt);
1.40 daniel 6125: return(NULL);
6126: }
1.20 daniel 6127:
1.40 daniel 6128: input->filename = NULL;
6129: input->line = 1;
6130: input->col = 1;
1.96 daniel 6131: input->buf = NULL;
1.91 daniel 6132: input->consumed = 0;
1.45 daniel 6133:
6134: /*
1.75 daniel 6135: * plug some encoding conversion routines here. !!!
1.45 daniel 6136: */
1.116 daniel 6137: enc = xmlDetectCharEncoding(BAD_CAST buffer);
1.75 daniel 6138: xmlSwitchEncoding(ctxt, enc);
6139:
1.116 daniel 6140: input->base = BAD_CAST buffer;
6141: input->cur = BAD_CAST buffer;
1.69 daniel 6142: input->free = NULL;
1.20 daniel 6143:
1.40 daniel 6144: inputPush(ctxt, input);
1.69 daniel 6145: return(ctxt);
6146: }
6147:
6148: /**
6149: * xmlSAXParseMemory :
6150: * @sax: the SAX handler block
6151: * @buffer: an pointer to a char array
6152: * @size: the siwe of the array
6153: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6154: * documents
6155: *
6156: * parse an XML in-memory block and use the given SAX function block
6157: * to handle the parsing callback. If sax is NULL, fallback to the default
6158: * DOM tree building routines.
6159: *
6160: * Returns the resulting document tree
6161: */
6162: xmlDocPtr
6163: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
6164: xmlDocPtr ret;
6165: xmlParserCtxtPtr ctxt;
6166:
6167: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6168: if (ctxt == NULL) return(NULL);
1.74 daniel 6169: if (sax != NULL) {
6170: ctxt->sax = sax;
6171: ctxt->userData = NULL;
6172: }
1.20 daniel 6173:
6174: xmlParseDocument(ctxt);
1.40 daniel 6175:
1.72 daniel 6176: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6177: else {
6178: ret = NULL;
1.72 daniel 6179: xmlFreeDoc(ctxt->myDoc);
6180: ctxt->myDoc = NULL;
1.59 daniel 6181: }
1.86 daniel 6182: if (sax != NULL)
6183: ctxt->sax = NULL;
1.69 daniel 6184: xmlFreeParserCtxt(ctxt);
1.16 daniel 6185:
1.9 httpng 6186: return(ret);
1.17 daniel 6187: }
6188:
1.55 daniel 6189: /**
6190: * xmlParseMemory :
1.68 daniel 6191: * @buffer: an pointer to a char array
1.55 daniel 6192: * @size: the size of the array
6193: *
6194: * parse an XML in-memory block and build a tree.
6195: *
1.68 daniel 6196: * Returns the resulting document tree
1.55 daniel 6197: */
6198:
6199: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 6200: return(xmlSAXParseMemory(NULL, buffer, size, 0));
6201: }
6202:
6203: /**
6204: * xmlRecoverMemory :
1.68 daniel 6205: * @buffer: an pointer to a char array
1.59 daniel 6206: * @size: the size of the array
6207: *
6208: * parse an XML in-memory block and build a tree.
6209: * In the case the document is not Well Formed, a tree is built anyway
6210: *
1.68 daniel 6211: * Returns the resulting document tree
1.59 daniel 6212: */
6213:
6214: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
6215: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 6216: }
6217:
6218:
1.50 daniel 6219: /**
6220: * xmlSetupParserForBuffer:
6221: * @ctxt: an XML parser context
6222: * @buffer: a CHAR * buffer
6223: * @filename: a file name
6224: *
1.19 daniel 6225: * Setup the parser context to parse a new buffer; Clears any prior
6226: * contents from the parser context. The buffer parameter must not be
6227: * NULL, but the filename parameter can be
6228: */
1.55 daniel 6229: void
6230: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 6231: const char* filename)
6232: {
1.96 daniel 6233: xmlParserInputPtr input;
1.40 daniel 6234:
1.96 daniel 6235: input = xmlNewInputStream(ctxt);
6236: if (input == NULL) {
6237: perror("malloc");
1.119 daniel 6238: xmlFree(ctxt);
1.96 daniel 6239: exit(1);
6240: }
6241:
6242: xmlClearParserCtxt(ctxt);
6243: if (filename != NULL)
1.119 daniel 6244: input->filename = xmlMemStrdup(filename);
1.96 daniel 6245: input->base = buffer;
6246: input->cur = buffer;
6247: inputPush(ctxt, input);
1.17 daniel 6248: }
6249:
1.32 daniel 6250:
1.98 daniel 6251: /************************************************************************
6252: * *
6253: * Miscelaneous *
6254: * *
6255: ************************************************************************/
6256:
6257:
1.50 daniel 6258: /**
6259: * xmlParserFindNodeInfo:
6260: * @ctxt: an XML parser context
6261: * @node: an XML node within the tree
6262: *
6263: * Find the parser node info struct for a given node
6264: *
1.68 daniel 6265: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 6266: */
6267: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
6268: const xmlNode* node)
6269: {
6270: unsigned long pos;
6271:
6272: /* Find position where node should be at */
6273: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
6274: if ( ctx->node_seq.buffer[pos].node == node )
6275: return &ctx->node_seq.buffer[pos];
6276: else
6277: return NULL;
6278: }
6279:
6280:
1.50 daniel 6281: /**
6282: * xmlInitNodeInfoSeq :
6283: * @seq: a node info sequence pointer
6284: *
6285: * -- Initialize (set to initial state) node info sequence
1.32 daniel 6286: */
1.55 daniel 6287: void
6288: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6289: {
6290: seq->length = 0;
6291: seq->maximum = 0;
6292: seq->buffer = NULL;
6293: }
6294:
1.50 daniel 6295: /**
6296: * xmlClearNodeInfoSeq :
6297: * @seq: a node info sequence pointer
6298: *
6299: * -- Clear (release memory and reinitialize) node
1.32 daniel 6300: * info sequence
6301: */
1.55 daniel 6302: void
6303: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6304: {
6305: if ( seq->buffer != NULL )
1.119 daniel 6306: xmlFree(seq->buffer);
1.32 daniel 6307: xmlInitNodeInfoSeq(seq);
6308: }
6309:
6310:
1.50 daniel 6311: /**
6312: * xmlParserFindNodeInfoIndex:
6313: * @seq: a node info sequence pointer
6314: * @node: an XML node pointer
6315: *
6316: *
1.32 daniel 6317: * xmlParserFindNodeInfoIndex : Find the index that the info record for
6318: * the given node is or should be at in a sorted sequence
1.68 daniel 6319: *
6320: * Returns a long indicating the position of the record
1.32 daniel 6321: */
6322: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
6323: const xmlNode* node)
6324: {
6325: unsigned long upper, lower, middle;
6326: int found = 0;
6327:
6328: /* Do a binary search for the key */
6329: lower = 1;
6330: upper = seq->length;
6331: middle = 0;
6332: while ( lower <= upper && !found) {
6333: middle = lower + (upper - lower) / 2;
6334: if ( node == seq->buffer[middle - 1].node )
6335: found = 1;
6336: else if ( node < seq->buffer[middle - 1].node )
6337: upper = middle - 1;
6338: else
6339: lower = middle + 1;
6340: }
6341:
6342: /* Return position */
6343: if ( middle == 0 || seq->buffer[middle - 1].node < node )
6344: return middle;
6345: else
6346: return middle - 1;
6347: }
6348:
6349:
1.50 daniel 6350: /**
6351: * xmlParserAddNodeInfo:
6352: * @ctxt: an XML parser context
1.68 daniel 6353: * @info: a node info sequence pointer
1.50 daniel 6354: *
6355: * Insert node info record into the sorted sequence
1.32 daniel 6356: */
1.55 daniel 6357: void
6358: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 6359: const xmlParserNodeInfo* info)
1.32 daniel 6360: {
6361: unsigned long pos;
6362: static unsigned int block_size = 5;
6363:
6364: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 6365: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
6366: if ( pos < ctxt->node_seq.length
6367: && ctxt->node_seq.buffer[pos].node == info->node ) {
6368: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 6369: }
6370:
6371: /* Otherwise, we need to add new node to buffer */
6372: else {
6373: /* Expand buffer by 5 if needed */
1.55 daniel 6374: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 6375: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 6376: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
6377: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 6378:
1.55 daniel 6379: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 6380: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 6381: else
1.119 daniel 6382: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 6383:
6384: if ( tmp_buffer == NULL ) {
1.55 daniel 6385: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6386: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.32 daniel 6387: return;
6388: }
1.55 daniel 6389: ctxt->node_seq.buffer = tmp_buffer;
6390: ctxt->node_seq.maximum += block_size;
1.32 daniel 6391: }
6392:
6393: /* If position is not at end, move elements out of the way */
1.55 daniel 6394: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 6395: unsigned long i;
6396:
1.55 daniel 6397: for ( i = ctxt->node_seq.length; i > pos; i-- )
6398: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 6399: }
6400:
6401: /* Copy element and increase length */
1.55 daniel 6402: ctxt->node_seq.buffer[pos] = *info;
6403: ctxt->node_seq.length++;
1.32 daniel 6404: }
6405: }
1.77 daniel 6406:
1.98 daniel 6407:
6408: /**
6409: * xmlSubstituteEntitiesDefault :
6410: * @val: int 0 or 1
6411: *
6412: * Set and return the previous value for default entity support.
6413: * Initially the parser always keep entity references instead of substituting
6414: * entity values in the output. This function has to be used to change the
6415: * default parser behaviour
6416: * SAX::subtituteEntities() has to be used for changing that on a file by
6417: * file basis.
6418: *
6419: * Returns the last value for 0 for no substitution, 1 for substitution.
6420: */
6421:
6422: int
6423: xmlSubstituteEntitiesDefault(int val) {
6424: int old = xmlSubstituteEntitiesDefaultValue;
6425:
6426: xmlSubstituteEntitiesDefaultValue = val;
6427: return(old);
6428: }
1.77 daniel 6429:
Webmaster