Annotation of XML/parser.c, revision 1.156
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.119 daniel 36: #include "xmlmemory.h"
1.14 veillard 37: #include "tree.h"
1.1 veillard 38: #include "parser.h"
1.14 veillard 39: #include "entities.h"
1.75 daniel 40: #include "encoding.h"
1.61 daniel 41: #include "valid.h"
1.69 daniel 42: #include "parserInternals.h"
1.91 daniel 43: #include "xmlIO.h"
1.122 daniel 44: #include "xml-error.h"
1.1 veillard 45:
1.140 daniel 46: #define XML_PARSER_BIG_BUFFER_SIZE 1000
47: #define XML_PARSER_BUFFER_SIZE 100
48:
1.86 daniel 49: const char *xmlParserVersion = LIBXML_VERSION;
50:
1.139 daniel 51: /*
52: * List of XML prefixed PI allowed by W3C specs
53: */
54:
55: const char *xmlW3CPIs[] = {
56: "xml-stylesheet",
57: NULL
58: };
1.91 daniel 59:
1.151 daniel 60: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
61: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
62: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
63: const xmlChar **str);
1.91 daniel 64: /************************************************************************
65: * *
66: * Input handling functions for progressive parsing *
67: * *
68: ************************************************************************/
69:
70: /* #define DEBUG_INPUT */
1.140 daniel 71: /* #define DEBUG_STACK */
72: /* #define DEBUG_PUSH */
73:
1.91 daniel 74:
1.110 daniel 75: #define INPUT_CHUNK 250
76: /* we need to keep enough input to show errors in context */
77: #define LINE_LEN 80
1.91 daniel 78:
79: #ifdef DEBUG_INPUT
80: #define CHECK_BUFFER(in) check_buffer(in)
81:
82: void check_buffer(xmlParserInputPtr in) {
83: if (in->base != in->buf->buffer->content) {
84: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
85: }
86: if (in->cur < in->base) {
87: fprintf(stderr, "xmlParserInput: cur < base problem\n");
88: }
89: if (in->cur > in->base + in->buf->buffer->use) {
90: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
91: }
92: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
93: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
94: in->buf->buffer->use, in->buf->buffer->size);
95: }
96:
1.110 daniel 97: #else
98: #define CHECK_BUFFER(in)
99: #endif
100:
1.91 daniel 101:
102: /**
103: * xmlParserInputRead:
104: * @in: an XML parser input
105: * @len: an indicative size for the lookahead
106: *
107: * This function refresh the input for the parser. It doesn't try to
108: * preserve pointers to the input buffer, and discard already read data
109: *
1.123 daniel 110: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 111: * end of this entity
112: */
113: int
114: xmlParserInputRead(xmlParserInputPtr in, int len) {
115: int ret;
116: int used;
117: int index;
118:
119: #ifdef DEBUG_INPUT
120: fprintf(stderr, "Read\n");
121: #endif
122: if (in->buf == NULL) return(-1);
123: if (in->base == NULL) return(-1);
124: if (in->cur == NULL) return(-1);
125: if (in->buf->buffer == NULL) return(-1);
126:
127: CHECK_BUFFER(in);
128:
129: used = in->cur - in->buf->buffer->content;
130: ret = xmlBufferShrink(in->buf->buffer, used);
131: if (ret > 0) {
132: in->cur -= ret;
133: in->consumed += ret;
134: }
135: ret = xmlParserInputBufferRead(in->buf, len);
136: if (in->base != in->buf->buffer->content) {
137: /*
138: * the buffer has been realloced
139: */
140: index = in->cur - in->base;
141: in->base = in->buf->buffer->content;
142: in->cur = &in->buf->buffer->content[index];
143: }
144:
145: CHECK_BUFFER(in);
146:
147: return(ret);
148: }
149:
150: /**
151: * xmlParserInputGrow:
152: * @in: an XML parser input
153: * @len: an indicative size for the lookahead
154: *
155: * This function increase the input for the parser. It tries to
156: * preserve pointers to the input buffer, and keep already read data
157: *
1.123 daniel 158: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 159: * end of this entity
160: */
161: int
162: xmlParserInputGrow(xmlParserInputPtr in, int len) {
163: int ret;
164: int index;
165:
166: #ifdef DEBUG_INPUT
167: fprintf(stderr, "Grow\n");
168: #endif
169: if (in->buf == NULL) return(-1);
170: if (in->base == NULL) return(-1);
171: if (in->cur == NULL) return(-1);
172: if (in->buf->buffer == NULL) return(-1);
173:
174: CHECK_BUFFER(in);
175:
176: index = in->cur - in->base;
177: if (in->buf->buffer->use > index + INPUT_CHUNK) {
178:
179: CHECK_BUFFER(in);
180:
181: return(0);
182: }
1.148 daniel 183: if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) ||
184: (in->buf->file != NULL) ||
1.140 daniel 185: #ifdef HAVE_ZLIB_H
186: (in->buf->gzfile != NULL) ||
187: #endif
188: (in->buf->fd >= 0))
189: ret = xmlParserInputBufferGrow(in->buf, len);
190: else
191: return(0);
1.135 daniel 192:
193: /*
194: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
195: * block, but we use it really as an integer to do some
196: * pointer arithmetic. Insure will raise it as a bug but in
197: * that specific case, that's not !
198: */
1.91 daniel 199: if (in->base != in->buf->buffer->content) {
200: /*
201: * the buffer has been realloced
202: */
203: index = in->cur - in->base;
204: in->base = in->buf->buffer->content;
205: in->cur = &in->buf->buffer->content[index];
206: }
207:
208: CHECK_BUFFER(in);
209:
210: return(ret);
211: }
212:
213: /**
214: * xmlParserInputShrink:
215: * @in: an XML parser input
216: *
217: * This function removes used input for the parser.
218: */
219: void
220: xmlParserInputShrink(xmlParserInputPtr in) {
221: int used;
222: int ret;
223: int index;
224:
225: #ifdef DEBUG_INPUT
226: fprintf(stderr, "Shrink\n");
227: #endif
228: if (in->buf == NULL) return;
229: if (in->base == NULL) return;
230: if (in->cur == NULL) return;
231: if (in->buf->buffer == NULL) return;
232:
233: CHECK_BUFFER(in);
234:
235: used = in->cur - in->buf->buffer->content;
236: if (used > INPUT_CHUNK) {
1.110 daniel 237: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 238: if (ret > 0) {
239: in->cur -= ret;
240: in->consumed += ret;
241: }
242: }
243:
244: CHECK_BUFFER(in);
245:
246: if (in->buf->buffer->use > INPUT_CHUNK) {
247: return;
248: }
249: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
250: if (in->base != in->buf->buffer->content) {
251: /*
252: * the buffer has been realloced
253: */
254: index = in->cur - in->base;
255: in->base = in->buf->buffer->content;
256: in->cur = &in->buf->buffer->content[index];
257: }
258:
259: CHECK_BUFFER(in);
260: }
261:
1.45 daniel 262: /************************************************************************
263: * *
264: * Parser stacks related functions and macros *
265: * *
266: ************************************************************************/
1.79 daniel 267:
268: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 269: int xmlDoValidityCheckingDefaultValue = 0;
1.135 daniel 270: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
271: const xmlChar ** str);
1.79 daniel 272:
1.1 veillard 273: /*
1.40 daniel 274: * Generic function for accessing stacks in the Parser Context
1.1 veillard 275: */
276:
1.140 daniel 277: #define PUSH_AND_POP(scope, type, name) \
278: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 279: if (ctxt->name##Nr >= ctxt->name##Max) { \
280: ctxt->name##Max *= 2; \
1.119 daniel 281: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 282: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
283: if (ctxt->name##Tab == NULL) { \
1.31 daniel 284: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 285: return(0); \
1.31 daniel 286: } \
287: } \
1.40 daniel 288: ctxt->name##Tab[ctxt->name##Nr] = value; \
289: ctxt->name = value; \
290: return(ctxt->name##Nr++); \
1.31 daniel 291: } \
1.140 daniel 292: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 293: type ret; \
1.40 daniel 294: if (ctxt->name##Nr <= 0) return(0); \
295: ctxt->name##Nr--; \
1.50 daniel 296: if (ctxt->name##Nr > 0) \
297: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
298: else \
299: ctxt->name = NULL; \
1.69 daniel 300: ret = ctxt->name##Tab[ctxt->name##Nr]; \
301: ctxt->name##Tab[ctxt->name##Nr] = 0; \
302: return(ret); \
1.31 daniel 303: } \
304:
1.140 daniel 305: PUSH_AND_POP(extern, xmlParserInputPtr, input)
306: PUSH_AND_POP(extern, xmlNodePtr, node)
307: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 308:
1.55 daniel 309: /*
310: * Macros for accessing the content. Those should be used only by the parser,
311: * and not exported.
312: *
313: * Dirty macros, i.e. one need to make assumption on the context to use them
314: *
1.123 daniel 315: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 316: * To be used with extreme caution since operations consuming
317: * characters may move the input buffer to a different location !
1.123 daniel 318: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 319: * in ISO-Latin or UTF-8.
1.151 daniel 320: * This should be used internally by the parser
1.55 daniel 321: * only to compare to ASCII values otherwise it would break when
322: * running with UTF-8 encoding.
1.123 daniel 323: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 324: * to compare on ASCII based substring.
1.123 daniel 325: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 326: * strings within the parser.
327: *
1.77 daniel 328: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 329: *
330: * NEXT Skip to the next character, this does the proper decoding
331: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 332: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 333: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 334: */
1.45 daniel 335:
1.152 daniel 336: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 337: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.135 daniel 338: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
1.55 daniel 339: #define NXT(val) ctxt->input->cur[(val)]
340: #define CUR_PTR ctxt->input->cur
1.154 daniel 341:
1.97 daniel 342: #define SHRINK xmlParserInputShrink(ctxt->input); \
343: if ((*ctxt->input->cur == 0) && \
344: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
345: xmlPopInput(ctxt)
346:
347: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
348: if ((*ctxt->input->cur == 0) && \
349: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
350: xmlPopInput(ctxt)
1.55 daniel 351:
1.155 daniel 352: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 353:
1.151 daniel 354: #define NEXT xmlNextChar(ctxt);
1.154 daniel 355:
1.153 daniel 356: #define NEXTL(l) \
357: if (*(ctxt->input->cur) == '\n') { \
358: ctxt->input->line++; ctxt->input->col = 1; \
359: } else ctxt->input->col++; \
1.154 daniel 360: ctxt->token = 0; ctxt->input->cur += l; \
361: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
362: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
363:
1.152 daniel 364: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.154 daniel 365:
1.152 daniel 366: #define COPY_BUF(l,b,i,v) \
367: if (l == 1) b[i++] = (xmlChar) v; \
368: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 369:
370: /**
371: * xmlNextChar:
372: * @ctxt: the XML parser context
373: *
374: * Skip to the next char input char.
375: */
1.55 daniel 376:
1.151 daniel 377: void
378: xmlNextChar(xmlParserCtxtPtr ctxt) {
379: if (ctxt->token != 0) ctxt->token = 0;
380: else {
381: if ((*ctxt->input->cur == 0) &&
382: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
383: (ctxt->instate != XML_PARSER_COMMENT)) {
384: /*
385: * If we are at the end of the current entity and
386: * the context allows it, we pop consumed entities
387: * automatically.
388: * TODO: the auto closing should be blocked in other cases
389: */
390: xmlPopInput(ctxt);
391: } else {
392: if (*(ctxt->input->cur) == '\n') {
393: ctxt->input->line++; ctxt->input->col = 1;
394: } else ctxt->input->col++;
395: if (ctxt->encoding == NULL) {
396: /*
397: * We are supposed to handle UTF8, check it's valid
398: * From rfc2044: encoding of the Unicode values on UTF-8:
399: *
400: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
401: * 0000 0000-0000 007F 0xxxxxxx
402: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
403: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
404: *
405: * Check for the 0x11000 limit too
406: */
407: const unsigned char *cur = ctxt->input->cur;
408: unsigned char c;
1.91 daniel 409:
1.151 daniel 410: c = *cur;
411: if (c & 0x80) {
412: if (cur[1] == 0)
413: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
414: if ((cur[1] & 0xc0) != 0x80)
415: goto encoding_error;
416: if ((c & 0xe0) == 0xe0) {
417: unsigned int val;
418:
419: if (cur[2] == 0)
420: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
421: if ((cur[2] & 0xc0) != 0x80)
422: goto encoding_error;
423: if ((c & 0xf0) == 0xf0) {
424: if (cur[3] == 0)
425: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
426: if (((c & 0xf8) != 0xf0) ||
427: ((cur[3] & 0xc0) != 0x80))
428: goto encoding_error;
429: /* 4-byte code */
430: ctxt->input->cur += 4;
431: val = (cur[0] & 0x7) << 18;
432: val |= (cur[1] & 0x3f) << 12;
433: val |= (cur[2] & 0x3f) << 6;
434: val |= cur[3] & 0x3f;
435: } else {
436: /* 3-byte code */
437: ctxt->input->cur += 3;
438: val = (cur[0] & 0xf) << 12;
439: val |= (cur[1] & 0x3f) << 6;
440: val |= cur[2] & 0x3f;
441: }
442: if (((val > 0xd7ff) && (val < 0xe000)) ||
443: ((val > 0xfffd) && (val < 0x10000)) ||
444: (val >= 0x11000)) {
445: if ((ctxt->sax != NULL) &&
446: (ctxt->sax->error != NULL))
447: ctxt->sax->error(ctxt->userData,
448: "Char out of allowed range\n");
449: ctxt->errNo = XML_ERR_INVALID_ENCODING;
450: ctxt->wellFormed = 0;
451: }
452: } else
453: /* 2-byte code */
454: ctxt->input->cur += 2;
455: } else
456: /* 1-byte code */
457: ctxt->input->cur++;
458: } else {
459: /*
460: * Assume it's a fixed lenght encoding (1) with
461: * a compatibke encoding for the ASCII set, since
462: * XML constructs only use < 128 chars
463: */
464: ctxt->input->cur++;
465: }
466: ctxt->nbChars++;
467: if (*ctxt->input->cur == 0)
468: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
469: }
470: }
1.154 daniel 471: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
472: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.151 daniel 473: return;
474: encoding_error:
475: /*
476: * If we detect an UTF8 error that probably mean that the
477: * input encoding didn't get properly advertized in the
478: * declaration header. Report the error and switch the encoding
479: * to ISO-Latin-1 (if you don't like this policy, just declare the
480: * encoding !)
481: */
482: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
483: ctxt->sax->error(ctxt->userData,
484: "Input is not proper UTF-8, indicate encoding !\n");
485: ctxt->errNo = XML_ERR_INVALID_ENCODING;
486:
487: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
488: ctxt->input->cur++;
489: return;
490: }
1.42 daniel 491:
1.152 daniel 492: /**
493: * xmlCurrentChar:
494: * @ctxt: the XML parser context
495: * @len: pointer to the length of the char read
496: *
497: * The current char value, if using UTF-8 this may actaully span multiple
498: * bytes in the input buffer.
499: *
500: * Returns the current char value and its lenght
501: */
502:
503: int
504: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
505: if (ctxt->token != 0) {
506: *len = 0;
507: return(ctxt->token);
508: }
509: if (ctxt->encoding == NULL) {
510: /*
511: * We are supposed to handle UTF8, check it's valid
512: * From rfc2044: encoding of the Unicode values on UTF-8:
513: *
514: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
515: * 0000 0000-0000 007F 0xxxxxxx
516: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
517: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
518: *
519: * Check for the 0x11000 limit too
520: */
521: const unsigned char *cur = ctxt->input->cur;
522: unsigned char c;
523: unsigned int val;
524:
525: c = *cur;
526: if (c & 0x80) {
527: if (cur[1] == 0)
528: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
529: if ((cur[1] & 0xc0) != 0x80)
530: goto encoding_error;
531: if ((c & 0xe0) == 0xe0) {
532:
533: if (cur[2] == 0)
534: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
535: if ((cur[2] & 0xc0) != 0x80)
536: goto encoding_error;
537: if ((c & 0xf0) == 0xf0) {
538: if (cur[3] == 0)
539: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
540: if (((c & 0xf8) != 0xf0) ||
541: ((cur[3] & 0xc0) != 0x80))
542: goto encoding_error;
543: /* 4-byte code */
544: *len = 4;
545: val = (cur[0] & 0x7) << 18;
546: val |= (cur[1] & 0x3f) << 12;
547: val |= (cur[2] & 0x3f) << 6;
548: val |= cur[3] & 0x3f;
549: } else {
550: /* 3-byte code */
551: *len = 3;
552: val = (cur[0] & 0xf) << 12;
553: val |= (cur[1] & 0x3f) << 6;
554: val |= cur[2] & 0x3f;
555: }
556: } else {
557: /* 2-byte code */
558: *len = 2;
559: val = (cur[0] & 0x1f) << 6;
560: val |= cur[2] & 0x3f;
561: }
562: if (!IS_CHAR(val)) {
563: if ((ctxt->sax != NULL) &&
564: (ctxt->sax->error != NULL))
565: ctxt->sax->error(ctxt->userData,
566: "Char out of allowed range\n");
567: ctxt->errNo = XML_ERR_INVALID_ENCODING;
568: ctxt->wellFormed = 0;
569: }
570: return(val);
571: } else {
572: /* 1-byte code */
573: *len = 1;
574: return((int) *ctxt->input->cur);
575: }
576: }
577: /*
578: * Assume it's a fixed lenght encoding (1) with
579: * a compatibke encoding for the ASCII set, since
580: * XML constructs only use < 128 chars
581: */
582: *len = 1;
583: return((int) *ctxt->input->cur);
584: encoding_error:
585: /*
586: * If we detect an UTF8 error that probably mean that the
587: * input encoding didn't get properly advertized in the
588: * declaration header. Report the error and switch the encoding
589: * to ISO-Latin-1 (if you don't like this policy, just declare the
590: * encoding !)
591: */
592: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
593: ctxt->sax->error(ctxt->userData,
594: "Input is not proper UTF-8, indicate encoding !\n");
595: ctxt->errNo = XML_ERR_INVALID_ENCODING;
596:
597: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
598: *len = 1;
599: return((int) *ctxt->input->cur);
600: }
601:
602: /**
603: * xmlCopyChar:
604: * @len: pointer to the length of the char read (or zero)
605: * @array: pointer to an arry of xmlChar
606: * @val: the char value
607: *
608: * append the char value in the array
609: *
610: * Returns the number of xmlChar written
611: */
612:
613: int
614: xmlCopyChar(int len, xmlChar *out, int val) {
615: /*
616: * We are supposed to handle UTF8, check it's valid
617: * From rfc2044: encoding of the Unicode values on UTF-8:
618: *
619: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
620: * 0000 0000-0000 007F 0xxxxxxx
621: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
622: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
623: */
624: if (len == 0) {
625: if (val < 0) len = 0;
626: else if (val < 128) len = 1;
627: else if (val < (1 << 12)) len = 2;
628: else if (val < (1 << 18)) len = 3;
629: else if (val < 0x11000) len = 4;
630: if (len == 0) {
631: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
632: val);
633: return(0);
634: }
635: }
636: if (len > 1) {
637: int bits;
638:
639: if (val < 0x80) { *out++= val; bits= -6; }
640: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
641: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
642: else { *out++= (val >> 18) | 0xF0; bits= 12; }
643:
644: for ( ; bits >= 0; bits-= 6)
645: *out++= ((val >> bits) & 0x3F) | 0x80 ;
646:
647: return(len);
648: }
649: *out = (xmlChar) val;
650: return(1);
1.155 daniel 651: }
652:
653: /**
654: * xmlSkipBlankChars:
655: * @ctxt: the XML parser context
656: *
657: * skip all blanks character found at that point in the input streams.
658: * It pops up finished entities in the process if allowable at that point.
659: *
660: * Returns the number of space chars skipped
661: */
662:
663: int
664: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
665: int cur, res = 0;
666:
667: do {
668: cur = CUR;
669: while (IS_BLANK(cur)) {
670: NEXT;
671: cur = CUR;
672: res++;
673: }
674: while ((cur == 0) && (ctxt->inputNr > 1) &&
675: (ctxt->instate != XML_PARSER_COMMENT)) {
676: xmlPopInput(ctxt);
677: cur = CUR;
678: }
679: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
680: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
681: } while (IS_BLANK(cur));
682: return(res);
1.152 daniel 683: }
684:
1.97 daniel 685: /************************************************************************
686: * *
687: * Commodity functions to handle entities processing *
688: * *
689: ************************************************************************/
1.40 daniel 690:
1.50 daniel 691: /**
692: * xmlPopInput:
693: * @ctxt: an XML parser context
694: *
1.40 daniel 695: * xmlPopInput: the current input pointed by ctxt->input came to an end
696: * pop it and return the next char.
1.45 daniel 697: *
1.123 daniel 698: * Returns the current xmlChar in the parser context
1.40 daniel 699: */
1.123 daniel 700: xmlChar
1.55 daniel 701: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 702: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 703: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 704: if ((*ctxt->input->cur == 0) &&
705: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
706: return(xmlPopInput(ctxt));
1.40 daniel 707: return(CUR);
708: }
709:
1.50 daniel 710: /**
711: * xmlPushInput:
712: * @ctxt: an XML parser context
713: * @input: an XML parser input fragment (entity, XML fragment ...).
714: *
1.40 daniel 715: * xmlPushInput: switch to a new input stream which is stacked on top
716: * of the previous one(s).
717: */
1.55 daniel 718: void
719: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 720: if (input == NULL) return;
721: inputPush(ctxt, input);
722: }
723:
1.50 daniel 724: /**
1.69 daniel 725: * xmlFreeInputStream:
1.127 daniel 726: * @input: an xmlParserInputPtr
1.69 daniel 727: *
728: * Free up an input stream.
729: */
730: void
731: xmlFreeInputStream(xmlParserInputPtr input) {
732: if (input == NULL) return;
733:
1.119 daniel 734: if (input->filename != NULL) xmlFree((char *) input->filename);
735: if (input->directory != NULL) xmlFree((char *) input->directory);
1.69 daniel 736: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 737: input->free((xmlChar *) input->base);
1.93 veillard 738: if (input->buf != NULL)
739: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 740: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 741: xmlFree(input);
1.69 daniel 742: }
743:
744: /**
1.96 daniel 745: * xmlNewInputStream:
746: * @ctxt: an XML parser context
747: *
748: * Create a new input stream structure
749: * Returns the new input stream or NULL
750: */
751: xmlParserInputPtr
752: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
753: xmlParserInputPtr input;
754:
1.119 daniel 755: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 756: if (input == NULL) {
1.123 daniel 757: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 758: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 759: ctxt->sax->error(ctxt->userData,
760: "malloc: couldn't allocate a new input stream\n");
1.123 daniel 761: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 762: return(NULL);
763: }
764: input->filename = NULL;
765: input->directory = NULL;
766: input->base = NULL;
767: input->cur = NULL;
768: input->buf = NULL;
769: input->line = 1;
770: input->col = 1;
771: input->buf = NULL;
772: input->free = NULL;
773: input->consumed = 0;
1.140 daniel 774: input->length = 0;
1.96 daniel 775: return(input);
776: }
777:
778: /**
1.50 daniel 779: * xmlNewEntityInputStream:
780: * @ctxt: an XML parser context
781: * @entity: an Entity pointer
782: *
1.82 daniel 783: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 784: *
785: * Returns the new input stream or NULL
1.45 daniel 786: */
1.50 daniel 787: xmlParserInputPtr
788: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 789: xmlParserInputPtr input;
790:
791: if (entity == NULL) {
1.123 daniel 792: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 793: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 794: ctxt->sax->error(ctxt->userData,
1.45 daniel 795: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 796: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 797: return(NULL);
1.45 daniel 798: }
799: if (entity->content == NULL) {
1.113 daniel 800: switch (entity->type) {
801: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 802: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 803: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
804: ctxt->sax->error(ctxt->userData,
805: "xmlNewEntityInputStream unparsed entity !\n");
806: break;
807: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
808: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 809: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 810: (char *) entity->ExternalID, ctxt));
1.113 daniel 811: case XML_INTERNAL_GENERAL_ENTITY:
812: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
813: ctxt->sax->error(ctxt->userData,
814: "Internal entity %s without content !\n", entity->name);
815: break;
816: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 817: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 818: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
819: ctxt->sax->error(ctxt->userData,
820: "Internal parameter entity %s without content !\n", entity->name);
821: break;
822: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 823: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 824: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
825: ctxt->sax->error(ctxt->userData,
826: "Predefined entity %s without content !\n", entity->name);
827: break;
828: }
1.50 daniel 829: return(NULL);
1.45 daniel 830: }
1.96 daniel 831: input = xmlNewInputStream(ctxt);
1.45 daniel 832: if (input == NULL) {
1.50 daniel 833: return(NULL);
1.45 daniel 834: }
1.156 ! daniel 835: input->filename = (char *) entity->SystemID;
1.45 daniel 836: input->base = entity->content;
837: input->cur = entity->content;
1.140 daniel 838: input->length = entity->length;
1.50 daniel 839: return(input);
1.45 daniel 840: }
841:
1.59 daniel 842: /**
843: * xmlNewStringInputStream:
844: * @ctxt: an XML parser context
1.96 daniel 845: * @buffer: an memory buffer
1.59 daniel 846: *
847: * Create a new input stream based on a memory buffer.
1.68 daniel 848: * Returns the new input stream
1.59 daniel 849: */
850: xmlParserInputPtr
1.123 daniel 851: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 852: xmlParserInputPtr input;
853:
1.96 daniel 854: if (buffer == NULL) {
1.123 daniel 855: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 856: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 857: ctxt->sax->error(ctxt->userData,
1.59 daniel 858: "internal: xmlNewStringInputStream string = NULL\n");
859: return(NULL);
860: }
1.96 daniel 861: input = xmlNewInputStream(ctxt);
1.59 daniel 862: if (input == NULL) {
863: return(NULL);
864: }
1.96 daniel 865: input->base = buffer;
866: input->cur = buffer;
1.140 daniel 867: input->length = xmlStrlen(buffer);
1.59 daniel 868: return(input);
869: }
870:
1.76 daniel 871: /**
872: * xmlNewInputFromFile:
873: * @ctxt: an XML parser context
874: * @filename: the filename to use as entity
875: *
876: * Create a new input stream based on a file.
877: *
878: * Returns the new input stream or NULL in case of error
879: */
880: xmlParserInputPtr
1.79 daniel 881: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 882: xmlParserInputBufferPtr buf;
1.76 daniel 883: xmlParserInputPtr inputStream;
1.111 daniel 884: char *directory = NULL;
1.76 daniel 885:
1.96 daniel 886: if (ctxt == NULL) return(NULL);
1.91 daniel 887: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 888: if (buf == NULL) {
1.140 daniel 889: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 890:
1.94 daniel 891: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
892: #ifdef WIN32
893: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
894: #else
895: sprintf(name, "%s/%s", ctxt->input->directory, filename);
896: #endif
897: buf = xmlParserInputBufferCreateFilename(name,
898: XML_CHAR_ENCODING_NONE);
1.106 daniel 899: if (buf != NULL)
1.142 daniel 900: directory = xmlParserGetDirectory(name);
1.106 daniel 901: }
902: if ((buf == NULL) && (ctxt->directory != NULL)) {
903: #ifdef WIN32
904: sprintf(name, "%s\\%s", ctxt->directory, filename);
905: #else
906: sprintf(name, "%s/%s", ctxt->directory, filename);
907: #endif
908: buf = xmlParserInputBufferCreateFilename(name,
909: XML_CHAR_ENCODING_NONE);
910: if (buf != NULL)
1.142 daniel 911: directory = xmlParserGetDirectory(name);
1.106 daniel 912: }
913: if (buf == NULL)
1.94 daniel 914: return(NULL);
915: }
916: if (directory == NULL)
917: directory = xmlParserGetDirectory(filename);
1.76 daniel 918:
1.96 daniel 919: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 920: if (inputStream == NULL) {
1.119 daniel 921: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 922: return(NULL);
923: }
924:
1.119 daniel 925: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 926: inputStream->directory = directory;
1.91 daniel 927: inputStream->buf = buf;
1.76 daniel 928:
1.91 daniel 929: inputStream->base = inputStream->buf->buffer->content;
930: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 931: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 932: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 933: return(inputStream);
934: }
935:
1.77 daniel 936: /************************************************************************
937: * *
1.97 daniel 938: * Commodity functions to handle parser contexts *
939: * *
940: ************************************************************************/
941:
942: /**
943: * xmlInitParserCtxt:
944: * @ctxt: an XML parser context
945: *
946: * Initialize a parser context
947: */
948:
949: void
950: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
951: {
952: xmlSAXHandler *sax;
953:
1.119 daniel 954: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 955: if (sax == NULL) {
956: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
957: }
958:
959: /* Allocate the Input stack */
1.119 daniel 960: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 961: ctxt->inputNr = 0;
962: ctxt->inputMax = 5;
963: ctxt->input = NULL;
964: ctxt->version = NULL;
965: ctxt->encoding = NULL;
966: ctxt->standalone = -1;
1.98 daniel 967: ctxt->hasExternalSubset = 0;
968: ctxt->hasPErefs = 0;
1.97 daniel 969: ctxt->html = 0;
1.98 daniel 970: ctxt->external = 0;
1.140 daniel 971: ctxt->instate = XML_PARSER_START;
1.97 daniel 972: ctxt->token = 0;
1.106 daniel 973: ctxt->directory = NULL;
1.97 daniel 974:
975: /* Allocate the Node stack */
1.119 daniel 976: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 977: ctxt->nodeNr = 0;
978: ctxt->nodeMax = 10;
979: ctxt->node = NULL;
980:
1.140 daniel 981: /* Allocate the Name stack */
982: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
983: ctxt->nameNr = 0;
984: ctxt->nameMax = 10;
985: ctxt->name = NULL;
986:
1.97 daniel 987: if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
988: else {
989: ctxt->sax = sax;
990: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
991: }
992: ctxt->userData = ctxt;
993: ctxt->myDoc = NULL;
994: ctxt->wellFormed = 1;
1.99 daniel 995: ctxt->valid = 1;
1.100 daniel 996: ctxt->validate = xmlDoValidityCheckingDefaultValue;
997: ctxt->vctxt.userData = ctxt;
1.149 daniel 998: if (ctxt->validate) {
999: ctxt->vctxt.error = xmlParserValidityError;
1000: ctxt->vctxt.warning = xmlParserValidityWarning;
1001: } else {
1002: ctxt->vctxt.error = NULL;
1003: ctxt->vctxt.warning = NULL;
1004: }
1.97 daniel 1005: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1006: ctxt->record_info = 0;
1.135 daniel 1007: ctxt->nbChars = 0;
1.140 daniel 1008: ctxt->checkIndex = 0;
1009: ctxt->errNo = XML_ERR_OK;
1.97 daniel 1010: xmlInitNodeInfoSeq(&ctxt->node_seq);
1011: }
1012:
1013: /**
1014: * xmlFreeParserCtxt:
1015: * @ctxt: an XML parser context
1016: *
1017: * Free all the memory used by a parser context. However the parsed
1018: * document in ctxt->myDoc is not freed.
1019: */
1020:
1021: void
1022: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1023: {
1024: xmlParserInputPtr input;
1.140 daniel 1025: xmlChar *oldname;
1.97 daniel 1026:
1027: if (ctxt == NULL) return;
1028:
1029: while ((input = inputPop(ctxt)) != NULL) {
1030: xmlFreeInputStream(input);
1031: }
1.140 daniel 1032: while ((oldname = namePop(ctxt)) != NULL) {
1033: xmlFree(oldname);
1034: }
1035: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1036: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1037: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1038: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1039: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.97 daniel 1040: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1041: xmlFree(ctxt->sax);
1042: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1043: xmlFree(ctxt);
1.97 daniel 1044: }
1045:
1046: /**
1047: * xmlNewParserCtxt:
1048: *
1049: * Allocate and initialize a new parser context.
1050: *
1051: * Returns the xmlParserCtxtPtr or NULL
1052: */
1053:
1054: xmlParserCtxtPtr
1055: xmlNewParserCtxt()
1056: {
1057: xmlParserCtxtPtr ctxt;
1058:
1.119 daniel 1059: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1060: if (ctxt == NULL) {
1061: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1062: perror("malloc");
1063: return(NULL);
1064: }
1065: xmlInitParserCtxt(ctxt);
1066: return(ctxt);
1067: }
1068:
1069: /**
1070: * xmlClearParserCtxt:
1071: * @ctxt: an XML parser context
1072: *
1073: * Clear (release owned resources) and reinitialize a parser context
1074: */
1075:
1076: void
1077: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1078: {
1079: xmlClearNodeInfoSeq(&ctxt->node_seq);
1080: xmlInitParserCtxt(ctxt);
1081: }
1082:
1083: /************************************************************************
1084: * *
1.77 daniel 1085: * Commodity functions to handle entities *
1086: * *
1087: ************************************************************************/
1088:
1.97 daniel 1089:
1090: /**
1091: * xmlParseCharRef:
1092: * @ctxt: an XML parser context
1093: *
1094: * parse Reference declarations
1095: *
1096: * [66] CharRef ::= '&#' [0-9]+ ';' |
1097: * '&#x' [0-9a-fA-F]+ ';'
1098: *
1.98 daniel 1099: * [ WFC: Legal Character ]
1100: * Characters referred to using character references must match the
1101: * production for Char.
1102: *
1.135 daniel 1103: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1104: */
1.97 daniel 1105: int
1106: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1107: int val = 0;
1108:
1.111 daniel 1109: if (ctxt->token != 0) {
1110: val = ctxt->token;
1111: ctxt->token = 0;
1112: return(val);
1113: }
1.152 daniel 1114: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1115: (NXT(2) == 'x')) {
1116: SKIP(3);
1.152 daniel 1117: while (RAW != ';') {
1118: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1119: val = val * 16 + (CUR - '0');
1.152 daniel 1120: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1121: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1122: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1123: val = val * 16 + (CUR - 'A') + 10;
1124: else {
1.123 daniel 1125: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1126: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1127: ctxt->sax->error(ctxt->userData,
1128: "xmlParseCharRef: invalid hexadecimal value\n");
1129: ctxt->wellFormed = 0;
1130: val = 0;
1131: break;
1132: }
1133: NEXT;
1134: }
1.152 daniel 1135: if (RAW == ';')
1.126 daniel 1136: SKIP(1); /* on purpose to avoid reentrancy problems with NEXT */
1.152 daniel 1137: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1138: SKIP(2);
1.152 daniel 1139: while (RAW != ';') {
1140: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1141: val = val * 10 + (CUR - '0');
1142: else {
1.123 daniel 1143: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1144: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1145: ctxt->sax->error(ctxt->userData,
1146: "xmlParseCharRef: invalid decimal value\n");
1147: ctxt->wellFormed = 0;
1148: val = 0;
1149: break;
1150: }
1151: NEXT;
1152: }
1.152 daniel 1153: if (RAW == ';')
1.126 daniel 1154: SKIP(1); /* on purpose to avoid reentrancy problems with NEXT */
1.97 daniel 1155: } else {
1.123 daniel 1156: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1158: ctxt->sax->error(ctxt->userData,
1159: "xmlParseCharRef: invalid value\n");
1.97 daniel 1160: ctxt->wellFormed = 0;
1161: }
1.98 daniel 1162:
1.97 daniel 1163: /*
1.98 daniel 1164: * [ WFC: Legal Character ]
1165: * Characters referred to using character references must match the
1166: * production for Char.
1.97 daniel 1167: */
1168: if (IS_CHAR(val)) {
1169: return(val);
1170: } else {
1.123 daniel 1171: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1173: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1174: val);
1175: ctxt->wellFormed = 0;
1176: }
1177: return(0);
1.77 daniel 1178: }
1179:
1.96 daniel 1180: /**
1.135 daniel 1181: * xmlParseStringCharRef:
1182: * @ctxt: an XML parser context
1183: * @str: a pointer to an index in the string
1184: *
1185: * parse Reference declarations, variant parsing from a string rather
1186: * than an an input flow.
1187: *
1188: * [66] CharRef ::= '&#' [0-9]+ ';' |
1189: * '&#x' [0-9a-fA-F]+ ';'
1190: *
1191: * [ WFC: Legal Character ]
1192: * Characters referred to using character references must match the
1193: * production for Char.
1194: *
1195: * Returns the value parsed (as an int), 0 in case of error, str will be
1196: * updated to the current value of the index
1197: */
1198: int
1199: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1200: const xmlChar *ptr;
1201: xmlChar cur;
1202: int val = 0;
1203:
1204: if ((str == NULL) || (*str == NULL)) return(0);
1205: ptr = *str;
1206: cur = *ptr;
1.137 daniel 1207: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1208: ptr += 3;
1209: cur = *ptr;
1210: while (cur != ';') {
1211: if ((cur >= '0') && (cur <= '9'))
1212: val = val * 16 + (cur - '0');
1213: else if ((cur >= 'a') && (cur <= 'f'))
1214: val = val * 16 + (cur - 'a') + 10;
1215: else if ((cur >= 'A') && (cur <= 'F'))
1216: val = val * 16 + (cur - 'A') + 10;
1217: else {
1218: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1219: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1220: ctxt->sax->error(ctxt->userData,
1221: "xmlParseCharRef: invalid hexadecimal value\n");
1222: ctxt->wellFormed = 0;
1223: val = 0;
1224: break;
1225: }
1226: ptr++;
1227: cur = *ptr;
1228: }
1229: if (cur == ';')
1230: ptr++;
1.145 daniel 1231: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1232: ptr += 2;
1233: cur = *ptr;
1234: while (cur != ';') {
1235: if ((cur >= '0') && (cur <= '9'))
1236: val = val * 10 + (cur - '0');
1237: else {
1238: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1239: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1240: ctxt->sax->error(ctxt->userData,
1241: "xmlParseCharRef: invalid decimal value\n");
1242: ctxt->wellFormed = 0;
1243: val = 0;
1244: break;
1245: }
1246: ptr++;
1247: cur = *ptr;
1248: }
1249: if (cur == ';')
1250: ptr++;
1251: } else {
1252: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1253: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1254: ctxt->sax->error(ctxt->userData,
1255: "xmlParseCharRef: invalid value\n");
1256: ctxt->wellFormed = 0;
1257: return(0);
1258: }
1259: *str = ptr;
1260:
1261: /*
1262: * [ WFC: Legal Character ]
1263: * Characters referred to using character references must match the
1264: * production for Char.
1265: */
1266: if (IS_CHAR(val)) {
1267: return(val);
1268: } else {
1269: ctxt->errNo = XML_ERR_INVALID_CHAR;
1270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1271: ctxt->sax->error(ctxt->userData,
1272: "CharRef: invalid xmlChar value %d\n", val);
1273: ctxt->wellFormed = 0;
1274: }
1275: return(0);
1276: }
1277:
1278: /**
1.96 daniel 1279: * xmlParserHandleReference:
1280: * @ctxt: the parser context
1281: *
1.97 daniel 1282: * [67] Reference ::= EntityRef | CharRef
1283: *
1.96 daniel 1284: * [68] EntityRef ::= '&' Name ';'
1285: *
1.98 daniel 1286: * [ WFC: Entity Declared ]
1287: * the Name given in the entity reference must match that in an entity
1288: * declaration, except that well-formed documents need not declare any
1289: * of the following entities: amp, lt, gt, apos, quot.
1290: *
1291: * [ WFC: Parsed Entity ]
1292: * An entity reference must not contain the name of an unparsed entity
1293: *
1.97 daniel 1294: * [66] CharRef ::= '&#' [0-9]+ ';' |
1295: * '&#x' [0-9a-fA-F]+ ';'
1296: *
1.96 daniel 1297: * A PEReference may have been detectect in the current input stream
1298: * the handling is done accordingly to
1299: * http://www.w3.org/TR/REC-xml#entproc
1300: */
1301: void
1302: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1303: xmlParserInputPtr input;
1.123 daniel 1304: xmlChar *name;
1.97 daniel 1305: xmlEntityPtr ent = NULL;
1306:
1.126 daniel 1307: if (ctxt->token != 0) {
1308: return;
1309: }
1.152 daniel 1310: if (RAW != '&') return;
1.97 daniel 1311: GROW;
1.152 daniel 1312: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1313: switch(ctxt->instate) {
1.140 daniel 1314: case XML_PARSER_ENTITY_DECL:
1315: case XML_PARSER_PI:
1.109 daniel 1316: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1317: case XML_PARSER_COMMENT:
1318: /* we just ignore it there */
1319: return;
1320: case XML_PARSER_START_TAG:
1.109 daniel 1321: return;
1.140 daniel 1322: case XML_PARSER_END_TAG:
1.97 daniel 1323: return;
1324: case XML_PARSER_EOF:
1.123 daniel 1325: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1326: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1327: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1328: ctxt->wellFormed = 0;
1329: return;
1330: case XML_PARSER_PROLOG:
1.140 daniel 1331: case XML_PARSER_START:
1332: case XML_PARSER_MISC:
1.123 daniel 1333: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1334: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1335: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1336: ctxt->wellFormed = 0;
1337: return;
1338: case XML_PARSER_EPILOG:
1.123 daniel 1339: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1341: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1342: ctxt->wellFormed = 0;
1343: return;
1344: case XML_PARSER_DTD:
1.123 daniel 1345: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1346: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1347: ctxt->sax->error(ctxt->userData,
1348: "CharRef are forbiden in DTDs!\n");
1349: ctxt->wellFormed = 0;
1350: return;
1351: case XML_PARSER_ENTITY_VALUE:
1352: /*
1353: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1354: * substitution here since we need the literal
1.97 daniel 1355: * entity value to be able to save the internal
1356: * subset of the document.
1357: * This will be handled by xmlDecodeEntities
1358: */
1359: return;
1360: case XML_PARSER_CONTENT:
1361: case XML_PARSER_ATTRIBUTE_VALUE:
1362: ctxt->token = xmlParseCharRef(ctxt);
1363: return;
1364: }
1365: return;
1366: }
1367:
1368: switch(ctxt->instate) {
1.109 daniel 1369: case XML_PARSER_CDATA_SECTION:
1370: return;
1.140 daniel 1371: case XML_PARSER_PI:
1.97 daniel 1372: case XML_PARSER_COMMENT:
1373: return;
1.140 daniel 1374: case XML_PARSER_START_TAG:
1375: return;
1376: case XML_PARSER_END_TAG:
1377: return;
1.97 daniel 1378: case XML_PARSER_EOF:
1.123 daniel 1379: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1380: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1381: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1382: ctxt->wellFormed = 0;
1383: return;
1384: case XML_PARSER_PROLOG:
1.140 daniel 1385: case XML_PARSER_START:
1386: case XML_PARSER_MISC:
1.123 daniel 1387: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1389: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1390: ctxt->wellFormed = 0;
1391: return;
1392: case XML_PARSER_EPILOG:
1.123 daniel 1393: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1394: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1395: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1396: ctxt->wellFormed = 0;
1397: return;
1398: case XML_PARSER_ENTITY_VALUE:
1399: /*
1400: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1401: * substitution here since we need the literal
1.97 daniel 1402: * entity value to be able to save the internal
1403: * subset of the document.
1404: * This will be handled by xmlDecodeEntities
1405: */
1406: return;
1407: case XML_PARSER_ATTRIBUTE_VALUE:
1408: /*
1409: * NOTE: in the case of attributes values, we don't do the
1410: * substitution here unless we are in a mode where
1411: * the parser is explicitely asked to substitute
1412: * entities. The SAX callback is called with values
1413: * without entity substitution.
1414: * This will then be handled by xmlDecodeEntities
1415: */
1.113 daniel 1416: return;
1.97 daniel 1417: case XML_PARSER_ENTITY_DECL:
1418: /*
1419: * we just ignore it there
1420: * the substitution will be done once the entity is referenced
1421: */
1422: return;
1423: case XML_PARSER_DTD:
1.123 daniel 1424: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1425: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1426: ctxt->sax->error(ctxt->userData,
1427: "Entity references are forbiden in DTDs!\n");
1428: ctxt->wellFormed = 0;
1429: return;
1430: case XML_PARSER_CONTENT:
1.113 daniel 1431: return;
1.97 daniel 1432: }
1433:
1434: NEXT;
1435: name = xmlScanName(ctxt);
1436: if (name == NULL) {
1.123 daniel 1437: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1438: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1439: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1440: ctxt->wellFormed = 0;
1441: ctxt->token = '&';
1442: return;
1443: }
1444: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1445: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1446: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1447: ctxt->sax->error(ctxt->userData,
1448: "Entity reference: ';' expected\n");
1449: ctxt->wellFormed = 0;
1450: ctxt->token = '&';
1.119 daniel 1451: xmlFree(name);
1.97 daniel 1452: return;
1453: }
1454: SKIP(xmlStrlen(name) + 1);
1455: if (ctxt->sax != NULL) {
1456: if (ctxt->sax->getEntity != NULL)
1457: ent = ctxt->sax->getEntity(ctxt->userData, name);
1458: }
1.98 daniel 1459:
1460: /*
1461: * [ WFC: Entity Declared ]
1462: * the Name given in the entity reference must match that in an entity
1463: * declaration, except that well-formed documents need not declare any
1464: * of the following entities: amp, lt, gt, apos, quot.
1465: */
1.97 daniel 1466: if (ent == NULL)
1467: ent = xmlGetPredefinedEntity(name);
1468: if (ent == NULL) {
1.123 daniel 1469: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1470: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1471: ctxt->sax->error(ctxt->userData,
1.98 daniel 1472: "Entity reference: entity %s not declared\n",
1473: name);
1.97 daniel 1474: ctxt->wellFormed = 0;
1.119 daniel 1475: xmlFree(name);
1.97 daniel 1476: return;
1477: }
1.98 daniel 1478:
1479: /*
1480: * [ WFC: Parsed Entity ]
1481: * An entity reference must not contain the name of an unparsed entity
1482: */
1483: if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1484: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1485: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1486: ctxt->sax->error(ctxt->userData,
1487: "Entity reference to unparsed entity %s\n", name);
1488: ctxt->wellFormed = 0;
1489: }
1490:
1.97 daniel 1491: if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
1492: ctxt->token = ent->content[0];
1.119 daniel 1493: xmlFree(name);
1.97 daniel 1494: return;
1495: }
1496: input = xmlNewEntityInputStream(ctxt, ent);
1497: xmlPushInput(ctxt, input);
1.119 daniel 1498: xmlFree(name);
1.96 daniel 1499: return;
1500: }
1501:
1502: /**
1503: * xmlParserHandlePEReference:
1504: * @ctxt: the parser context
1505: *
1506: * [69] PEReference ::= '%' Name ';'
1507: *
1.98 daniel 1508: * [ WFC: No Recursion ]
1509: * TODO A parsed entity must not contain a recursive
1510: * reference to itself, either directly or indirectly.
1511: *
1512: * [ WFC: Entity Declared ]
1513: * In a document without any DTD, a document with only an internal DTD
1514: * subset which contains no parameter entity references, or a document
1515: * with "standalone='yes'", ... ... The declaration of a parameter
1516: * entity must precede any reference to it...
1517: *
1518: * [ VC: Entity Declared ]
1519: * In a document with an external subset or external parameter entities
1520: * with "standalone='no'", ... ... The declaration of a parameter entity
1521: * must precede any reference to it...
1522: *
1523: * [ WFC: In DTD ]
1524: * Parameter-entity references may only appear in the DTD.
1525: * NOTE: misleading but this is handled.
1526: *
1527: * A PEReference may have been detected in the current input stream
1.96 daniel 1528: * the handling is done accordingly to
1529: * http://www.w3.org/TR/REC-xml#entproc
1530: * i.e.
1531: * - Included in literal in entity values
1532: * - Included as Paraemeter Entity reference within DTDs
1533: */
1534: void
1535: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1536: xmlChar *name;
1.96 daniel 1537: xmlEntityPtr entity = NULL;
1538: xmlParserInputPtr input;
1539:
1.126 daniel 1540: if (ctxt->token != 0) {
1541: return;
1542: }
1.152 daniel 1543: if (RAW != '%') return;
1.96 daniel 1544: switch(ctxt->instate) {
1.109 daniel 1545: case XML_PARSER_CDATA_SECTION:
1546: return;
1.97 daniel 1547: case XML_PARSER_COMMENT:
1548: return;
1.140 daniel 1549: case XML_PARSER_START_TAG:
1550: return;
1551: case XML_PARSER_END_TAG:
1552: return;
1.96 daniel 1553: case XML_PARSER_EOF:
1.123 daniel 1554: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1555: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1556: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1557: ctxt->wellFormed = 0;
1558: return;
1559: case XML_PARSER_PROLOG:
1.140 daniel 1560: case XML_PARSER_START:
1561: case XML_PARSER_MISC:
1.123 daniel 1562: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1563: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1564: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1565: ctxt->wellFormed = 0;
1566: return;
1.97 daniel 1567: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1568: case XML_PARSER_CONTENT:
1569: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1570: case XML_PARSER_PI:
1.96 daniel 1571: /* we just ignore it there */
1572: return;
1573: case XML_PARSER_EPILOG:
1.123 daniel 1574: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1575: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1576: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1577: ctxt->wellFormed = 0;
1578: return;
1.97 daniel 1579: case XML_PARSER_ENTITY_VALUE:
1580: /*
1581: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1582: * substitution here since we need the literal
1.97 daniel 1583: * entity value to be able to save the internal
1584: * subset of the document.
1585: * This will be handled by xmlDecodeEntities
1586: */
1587: return;
1.96 daniel 1588: case XML_PARSER_DTD:
1.98 daniel 1589: /*
1590: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1591: * In the internal DTD subset, parameter-entity references
1592: * can occur only where markup declarations can occur, not
1593: * within markup declarations.
1594: * In that case this is handled in xmlParseMarkupDecl
1595: */
1596: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1597: return;
1.96 daniel 1598: }
1599:
1600: NEXT;
1601: name = xmlParseName(ctxt);
1602: if (name == NULL) {
1.123 daniel 1603: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1604: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1606: ctxt->wellFormed = 0;
1607: } else {
1.152 daniel 1608: if (RAW == ';') {
1.96 daniel 1609: NEXT;
1.98 daniel 1610: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1611: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1612: if (entity == NULL) {
1.98 daniel 1613:
1614: /*
1615: * [ WFC: Entity Declared ]
1616: * In a document without any DTD, a document with only an
1617: * internal DTD subset which contains no parameter entity
1618: * references, or a document with "standalone='yes'", ...
1619: * ... The declaration of a parameter entity must precede
1620: * any reference to it...
1621: */
1622: if ((ctxt->standalone == 1) ||
1623: ((ctxt->hasExternalSubset == 0) &&
1624: (ctxt->hasPErefs == 0))) {
1625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1626: ctxt->sax->error(ctxt->userData,
1627: "PEReference: %%%s; not found\n", name);
1628: ctxt->wellFormed = 0;
1629: } else {
1630: /*
1631: * [ VC: Entity Declared ]
1632: * In a document with an external subset or external
1633: * parameter entities with "standalone='no'", ...
1634: * ... The declaration of a parameter entity must precede
1635: * any reference to it...
1636: */
1637: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1638: ctxt->sax->warning(ctxt->userData,
1639: "PEReference: %%%s; not found\n", name);
1640: ctxt->valid = 0;
1641: }
1.96 daniel 1642: } else {
1643: if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
1644: (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
1645: /*
1.156 ! daniel 1646: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1647: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1.156 ! daniel 1648: * TODO !!! Avoid quote processing in parameters value
1.96 daniel 1649: * c.f. http://www.w3.org/TR/REC-xml#inliteral
1650: */
1651: input = xmlNewEntityInputStream(ctxt, entity);
1652: xmlPushInput(ctxt, input);
1653: } else {
1654: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655: ctxt->sax->error(ctxt->userData,
1656: "xmlHandlePEReference: %s is not a parameter entity\n",
1657: name);
1658: ctxt->wellFormed = 0;
1659: }
1660: }
1661: } else {
1.123 daniel 1662: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1663: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1664: ctxt->sax->error(ctxt->userData,
1665: "xmlHandlePEReference: expecting ';'\n");
1666: ctxt->wellFormed = 0;
1667: }
1.119 daniel 1668: xmlFree(name);
1.97 daniel 1669: }
1670: }
1671:
1672: /*
1673: * Macro used to grow the current buffer.
1674: */
1675: #define growBuffer(buffer) { \
1676: buffer##_size *= 2; \
1.145 daniel 1677: buffer = (xmlChar *) \
1678: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1679: if (buffer == NULL) { \
1680: perror("realloc failed"); \
1.145 daniel 1681: return(NULL); \
1.97 daniel 1682: } \
1.96 daniel 1683: }
1.77 daniel 1684:
1685: /**
1686: * xmlDecodeEntities:
1687: * @ctxt: the parser context
1688: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1689: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1690: * @end: an end marker xmlChar, 0 if none
1691: * @end2: an end marker xmlChar, 0 if none
1692: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1693: *
1694: * [67] Reference ::= EntityRef | CharRef
1695: *
1696: * [69] PEReference ::= '%' Name ';'
1697: *
1698: * Returns A newly allocated string with the substitution done. The caller
1699: * must deallocate it !
1700: */
1.123 daniel 1701: xmlChar *
1.77 daniel 1702: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1703: xmlChar end, xmlChar end2, xmlChar end3) {
1704: xmlChar *buffer = NULL;
1.78 daniel 1705: int buffer_size = 0;
1.123 daniel 1706: xmlChar *out = NULL;
1.78 daniel 1707:
1.123 daniel 1708: xmlChar *current = NULL;
1.77 daniel 1709: xmlEntityPtr ent;
1.91 daniel 1710: int nbchars = 0;
1.77 daniel 1711: unsigned int max = (unsigned int) len;
1.123 daniel 1712: xmlChar cur;
1.77 daniel 1713:
1714: /*
1715: * allocate a translation buffer.
1716: */
1.140 daniel 1717: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 1718: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1719: if (buffer == NULL) {
1720: perror("xmlDecodeEntities: malloc failed");
1721: return(NULL);
1722: }
1723: out = buffer;
1724:
1.78 daniel 1725: /*
1726: * Ok loop until we reach one of the ending char or a size limit.
1727: */
1.97 daniel 1728: cur = CUR;
1729: while ((nbchars < max) && (cur != end) &&
1730: (cur != end2) && (cur != end3)) {
1.77 daniel 1731:
1.98 daniel 1732: if (cur == 0) break;
1733: if ((cur == '&') && (NXT(1) == '#')) {
1734: int val = xmlParseCharRef(ctxt);
1735: *out++ = val;
1736: nbchars += 3;
1737: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1738: ent = xmlParseEntityRef(ctxt);
1739: if ((ent != NULL) &&
1740: (ctxt->replaceEntities != 0)) {
1741: current = ent->content;
1742: while (*current != 0) {
1743: *out++ = *current++;
1.140 daniel 1744: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1745: int index = out - buffer;
1746:
1747: growBuffer(buffer);
1748: out = &buffer[index];
1.77 daniel 1749: }
1750: }
1.98 daniel 1751: nbchars += 3 + xmlStrlen(ent->name);
1752: } else if (ent != NULL) {
1753: int i = xmlStrlen(ent->name);
1.123 daniel 1754: const xmlChar *cur = ent->name;
1.98 daniel 1755:
1756: nbchars += i + 2;
1757: *out++ = '&';
1.140 daniel 1758: if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1759: int index = out - buffer;
1760:
1761: growBuffer(buffer);
1762: out = &buffer[index];
1763: }
1764: for (;i > 0;i--)
1765: *out++ = *cur++;
1766: *out++ = ';';
1.77 daniel 1767: }
1.97 daniel 1768: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1769: /*
1.77 daniel 1770: * a PEReference induce to switch the entity flow,
1771: * we break here to flush the current set of chars
1772: * parsed if any. We will be called back later.
1.97 daniel 1773: */
1.91 daniel 1774: if (nbchars != 0) break;
1.77 daniel 1775:
1776: xmlParsePEReference(ctxt);
1.79 daniel 1777:
1.97 daniel 1778: /*
1.79 daniel 1779: * Pop-up of finished entities.
1.97 daniel 1780: */
1.152 daniel 1781: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 1782: xmlPopInput(ctxt);
1783:
1.98 daniel 1784: break;
1.77 daniel 1785: } else {
1.156 ! daniel 1786: /* invalid for UTF-8 , use COPY(out); !!! */
1.97 daniel 1787: *out++ = cur;
1.91 daniel 1788: nbchars++;
1.140 daniel 1789: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 1790: int index = out - buffer;
1791:
1792: growBuffer(buffer);
1793: out = &buffer[index];
1794: }
1.77 daniel 1795: NEXT;
1796: }
1.97 daniel 1797: cur = CUR;
1.77 daniel 1798: }
1799: *out++ = 0;
1800: return(buffer);
1801: }
1802:
1.135 daniel 1803: /**
1804: * xmlStringDecodeEntities:
1805: * @ctxt: the parser context
1806: * @str: the input string
1807: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1808: * @end: an end marker xmlChar, 0 if none
1809: * @end2: an end marker xmlChar, 0 if none
1810: * @end3: an end marker xmlChar, 0 if none
1811: *
1812: * [67] Reference ::= EntityRef | CharRef
1813: *
1814: * [69] PEReference ::= '%' Name ';'
1815: *
1816: * Returns A newly allocated string with the substitution done. The caller
1817: * must deallocate it !
1818: */
1819: xmlChar *
1820: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1821: xmlChar end, xmlChar end2, xmlChar end3) {
1822: xmlChar *buffer = NULL;
1823: int buffer_size = 0;
1824: xmlChar *out = NULL;
1825:
1826: xmlChar *current = NULL;
1827: xmlEntityPtr ent;
1828: xmlChar cur;
1829:
1830: /*
1831: * allocate a translation buffer.
1832: */
1.140 daniel 1833: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 1834: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1835: if (buffer == NULL) {
1836: perror("xmlDecodeEntities: malloc failed");
1837: return(NULL);
1838: }
1839: out = buffer;
1840:
1841: /*
1842: * Ok loop until we reach one of the ending char or a size limit.
1843: */
1844: cur = *str;
1845: while ((cur != 0) && (cur != end) &&
1846: (cur != end2) && (cur != end3)) {
1847:
1848: if (cur == 0) break;
1849: if ((cur == '&') && (str[1] == '#')) {
1850: int val = xmlParseStringCharRef(ctxt, &str);
1851: if (val != 0)
1852: *out++ = val;
1853: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1854: ent = xmlParseStringEntityRef(ctxt, &str);
1855: if ((ent != NULL) &&
1856: (ctxt->replaceEntities != 0)) {
1857: current = ent->content;
1858: while (*current != 0) {
1859: *out++ = *current++;
1.140 daniel 1860: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1861: int index = out - buffer;
1862:
1863: growBuffer(buffer);
1864: out = &buffer[index];
1865: }
1866: }
1867: } else if (ent != NULL) {
1868: int i = xmlStrlen(ent->name);
1869: const xmlChar *cur = ent->name;
1870:
1871: *out++ = '&';
1.140 daniel 1872: if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1873: int index = out - buffer;
1874:
1875: growBuffer(buffer);
1876: out = &buffer[index];
1877: }
1878: for (;i > 0;i--)
1879: *out++ = *cur++;
1880: *out++ = ';';
1881: }
1882: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1883: ent = xmlParseStringPEReference(ctxt, &str);
1884: if (ent != NULL) {
1885: current = ent->content;
1886: while (*current != 0) {
1887: *out++ = *current++;
1.140 daniel 1888: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1889: int index = out - buffer;
1890:
1891: growBuffer(buffer);
1892: out = &buffer[index];
1893: }
1894: }
1895: }
1896: } else {
1.156 ! daniel 1897: /* invalid for UTF-8 , use COPY(out); !!! */
1.135 daniel 1898: *out++ = cur;
1.140 daniel 1899: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1900: int index = out - buffer;
1901:
1902: growBuffer(buffer);
1903: out = &buffer[index];
1904: }
1905: str++;
1906: }
1907: cur = *str;
1908: }
1909: *out = 0;
1910: return(buffer);
1911: }
1912:
1.1 veillard 1913:
1.28 daniel 1914: /************************************************************************
1915: * *
1.75 daniel 1916: * Commodity functions to handle encodings *
1917: * *
1918: ************************************************************************/
1919:
1920: /**
1921: * xmlSwitchEncoding:
1922: * @ctxt: the parser context
1.124 daniel 1923: * @enc: the encoding value (number)
1.75 daniel 1924: *
1925: * change the input functions when discovering the character encoding
1926: * of a given entity.
1927: */
1928: void
1929: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1930: {
1.156 ! daniel 1931: xmlCharEncodingHandlerPtr handler;
! 1932:
! 1933: handler = xmlGetCharEncodingHandler(enc);
! 1934: if (handler != NULL) {
! 1935: if (ctxt->input != NULL) {
! 1936: if (ctxt->input->buf != NULL) {
! 1937: if (ctxt->input->buf->encoder != NULL) {
! 1938: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 1939: ctxt->sax->error(ctxt->userData,
! 1940: "xmlSwitchEncoding : encoder already regitered\n");
! 1941: return;
! 1942: }
! 1943: ctxt->input->buf->encoder = handler;
! 1944:
! 1945: /*
! 1946: * Is there already some content down the pipe to convert
! 1947: */
! 1948: if ((ctxt->input->buf->buffer != NULL) &&
! 1949: (ctxt->input->buf->buffer->use > 0)) {
! 1950: xmlChar *buf;
! 1951: int res, len, size;
! 1952: int processed;
! 1953:
! 1954: /*
! 1955: * Specific handling of the Byte Order Mark for
! 1956: * UTF-16
! 1957: */
! 1958: if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
! 1959: (ctxt->input->cur[0] == 0xFF) &&
! 1960: (ctxt->input->cur[1] == 0xFE)) {
! 1961: SKIP(2);
! 1962: }
! 1963: if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
! 1964: (ctxt->input->cur[0] == 0xFE) &&
! 1965: (ctxt->input->cur[1] == 0xFF)) {
! 1966: SKIP(2);
! 1967: }
! 1968:
! 1969: /*
! 1970: * convert the non processed part
! 1971: */
! 1972: processed = ctxt->input->cur - ctxt->input->base;
! 1973: len = ctxt->input->buf->buffer->use - processed;
! 1974:
! 1975: if (len <= 0) {
! 1976: return;
! 1977: }
! 1978: size = ctxt->input->buf->buffer->use * 4;
! 1979: if (size < 4000)
! 1980: size = 4000;
! 1981: buf = (xmlChar *) xmlMalloc(size);
! 1982: if (buf == NULL) {
! 1983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 1984: ctxt->sax->error(ctxt->userData,
! 1985: "xmlSwitchEncoding : out of memory\n");
! 1986: return;
! 1987: }
! 1988: res = handler->input(buf, size, ctxt->input->cur, &len);
! 1989: if ((res < 0) ||
! 1990: (len != ctxt->input->buf->buffer->use - processed)) {
! 1991: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 1992: ctxt->sax->error(ctxt->userData,
! 1993: "xmlSwitchEncoding : conversion failed\n");
! 1994: xmlFree(buf);
! 1995: return;
! 1996: }
! 1997: /*
! 1998: * Conversion succeeded, get rid of the old buffer
! 1999: */
! 2000: xmlFree(ctxt->input->buf->buffer->content);
! 2001: ctxt->input->buf->buffer->content = buf;
! 2002: ctxt->input->base = buf;
! 2003: ctxt->input->cur = buf;
! 2004: ctxt->input->buf->buffer->size = size;
! 2005: ctxt->input->buf->buffer->use = res;
! 2006: }
! 2007: return;
! 2008: } else {
! 2009: if (ctxt->input->length == 0) {
! 2010: /*
! 2011: * When parsing a static memory array one must know the
! 2012: * size to be able to convert the buffer.
! 2013: */
! 2014: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2015: ctxt->sax->error(ctxt->userData,
! 2016: "xmlSwitchEncoding : no input\n");
! 2017: return;
! 2018: } else {
! 2019: xmlChar *buf;
! 2020: int res, len;
! 2021: int processed = ctxt->input->cur - ctxt->input->base;
! 2022:
! 2023: /*
! 2024: * convert the non processed part
! 2025: */
! 2026: len = ctxt->input->length - processed;
! 2027: if (len <= 0) {
! 2028: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2029: ctxt->sax->error(ctxt->userData,
! 2030: "xmlSwitchEncoding : input fully consumed?\n");
! 2031: return;
! 2032: }
! 2033: buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
! 2034: if (buf == NULL) {
! 2035: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2036: ctxt->sax->error(ctxt->userData,
! 2037: "xmlSwitchEncoding : out of memory\n");
! 2038: return;
! 2039: }
! 2040: res = handler->input(buf, ctxt->input->length * 4,
! 2041: ctxt->input->cur, &len);
! 2042: if ((res < 0) ||
! 2043: (len != ctxt->input->length - processed)) {
! 2044: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2045: ctxt->sax->error(ctxt->userData,
! 2046: "xmlSwitchEncoding : conversion failed\n");
! 2047: xmlFree(buf);
! 2048: return;
! 2049: }
! 2050: /*
! 2051: * Conversion succeeded, get rid of the old buffer
! 2052: */
! 2053: if ((ctxt->input->free != NULL) &&
! 2054: (ctxt->input->base != NULL))
! 2055: ctxt->input->free((xmlChar *) ctxt->input->base);
! 2056: ctxt->input->base = ctxt->input->cur = buf;
! 2057: ctxt->input->length = res;
! 2058: }
! 2059: }
! 2060: } else {
! 2061: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2062: ctxt->sax->error(ctxt->userData,
! 2063: "xmlSwitchEncoding : no input\n");
! 2064: }
! 2065: }
! 2066:
1.75 daniel 2067: switch (enc) {
2068: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 2069: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 2070: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2071: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2072: ctxt->wellFormed = 0;
2073: break;
2074: case XML_CHAR_ENCODING_NONE:
2075: /* let's assume it's UTF-8 without the XML decl */
2076: return;
2077: case XML_CHAR_ENCODING_UTF8:
2078: /* default encoding, no conversion should be needed */
2079: return;
2080: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 2081: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2082: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2083: ctxt->sax->error(ctxt->userData,
2084: "char encoding UTF16 little endian not supported\n");
2085: break;
2086: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 2087: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2088: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2089: ctxt->sax->error(ctxt->userData,
2090: "char encoding UTF16 big endian not supported\n");
2091: break;
2092: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 2093: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2094: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2095: ctxt->sax->error(ctxt->userData,
2096: "char encoding USC4 little endian not supported\n");
2097: break;
2098: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 2099: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2100: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2101: ctxt->sax->error(ctxt->userData,
2102: "char encoding USC4 big endian not supported\n");
2103: break;
2104: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 2105: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2106: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2107: ctxt->sax->error(ctxt->userData,
2108: "char encoding EBCDIC not supported\n");
2109: break;
2110: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 2111: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2112: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2113: ctxt->sax->error(ctxt->userData,
2114: "char encoding UCS4 2143 not supported\n");
2115: break;
2116: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 2117: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2118: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2119: ctxt->sax->error(ctxt->userData,
2120: "char encoding UCS4 3412 not supported\n");
2121: break;
2122: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 2123: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2124: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2125: ctxt->sax->error(ctxt->userData,
2126: "char encoding UCS2 not supported\n");
2127: break;
2128: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 2129: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2130: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2131: ctxt->sax->error(ctxt->userData,
2132: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2133: break;
2134: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 2135: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2136: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2137: ctxt->sax->error(ctxt->userData,
2138: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2139: break;
2140: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 2141: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2142: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2143: ctxt->sax->error(ctxt->userData,
2144: "char encoding ISO_8859_3 not supported\n");
2145: break;
2146: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 2147: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2149: ctxt->sax->error(ctxt->userData,
2150: "char encoding ISO_8859_4 not supported\n");
2151: break;
2152: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 2153: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2154: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2155: ctxt->sax->error(ctxt->userData,
2156: "char encoding ISO_8859_5 not supported\n");
2157: break;
2158: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 2159: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2160: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2161: ctxt->sax->error(ctxt->userData,
2162: "char encoding ISO_8859_6 not supported\n");
2163: break;
2164: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 2165: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2166: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2167: ctxt->sax->error(ctxt->userData,
2168: "char encoding ISO_8859_7 not supported\n");
2169: break;
2170: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 2171: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2173: ctxt->sax->error(ctxt->userData,
2174: "char encoding ISO_8859_8 not supported\n");
2175: break;
2176: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 2177: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2178: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2179: ctxt->sax->error(ctxt->userData,
2180: "char encoding ISO_8859_9 not supported\n");
2181: break;
2182: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 2183: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2184: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2185: ctxt->sax->error(ctxt->userData,
2186: "char encoding ISO-2022-JPnot supported\n");
2187: break;
2188: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 2189: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2190: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2191: ctxt->sax->error(ctxt->userData,
2192: "char encoding Shift_JISnot supported\n");
2193: break;
2194: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 2195: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2196: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2197: ctxt->sax->error(ctxt->userData,
2198: "char encoding EUC-JPnot supported\n");
2199: break;
2200: }
2201: }
2202:
2203: /************************************************************************
2204: * *
1.123 daniel 2205: * Commodity functions to handle xmlChars *
1.28 daniel 2206: * *
2207: ************************************************************************/
2208:
1.50 daniel 2209: /**
2210: * xmlStrndup:
1.123 daniel 2211: * @cur: the input xmlChar *
1.50 daniel 2212: * @len: the len of @cur
2213: *
1.123 daniel 2214: * a strndup for array of xmlChar's
1.68 daniel 2215: *
1.123 daniel 2216: * Returns a new xmlChar * or NULL
1.1 veillard 2217: */
1.123 daniel 2218: xmlChar *
2219: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2220: xmlChar *ret;
2221:
2222: if ((cur == NULL) || (len < 0)) return(NULL);
2223: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2224: if (ret == NULL) {
1.86 daniel 2225: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2226: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2227: return(NULL);
2228: }
1.123 daniel 2229: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2230: ret[len] = 0;
2231: return(ret);
2232: }
2233:
1.50 daniel 2234: /**
2235: * xmlStrdup:
1.123 daniel 2236: * @cur: the input xmlChar *
1.50 daniel 2237: *
1.152 daniel 2238: * a strdup for array of xmlChar's. Since they are supposed to be
2239: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2240: * a termination mark of '0'.
1.68 daniel 2241: *
1.123 daniel 2242: * Returns a new xmlChar * or NULL
1.1 veillard 2243: */
1.123 daniel 2244: xmlChar *
2245: xmlStrdup(const xmlChar *cur) {
2246: const xmlChar *p = cur;
1.1 veillard 2247:
1.135 daniel 2248: if (cur == NULL) return(NULL);
1.152 daniel 2249: while (*p != 0) p++;
1.1 veillard 2250: return(xmlStrndup(cur, p - cur));
2251: }
2252:
1.50 daniel 2253: /**
2254: * xmlCharStrndup:
2255: * @cur: the input char *
2256: * @len: the len of @cur
2257: *
1.123 daniel 2258: * a strndup for char's to xmlChar's
1.68 daniel 2259: *
1.123 daniel 2260: * Returns a new xmlChar * or NULL
1.45 daniel 2261: */
2262:
1.123 daniel 2263: xmlChar *
1.55 daniel 2264: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2265: int i;
1.135 daniel 2266: xmlChar *ret;
2267:
2268: if ((cur == NULL) || (len < 0)) return(NULL);
2269: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2270: if (ret == NULL) {
1.86 daniel 2271: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2272: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2273: return(NULL);
2274: }
2275: for (i = 0;i < len;i++)
1.123 daniel 2276: ret[i] = (xmlChar) cur[i];
1.45 daniel 2277: ret[len] = 0;
2278: return(ret);
2279: }
2280:
1.50 daniel 2281: /**
2282: * xmlCharStrdup:
2283: * @cur: the input char *
2284: * @len: the len of @cur
2285: *
1.123 daniel 2286: * a strdup for char's to xmlChar's
1.68 daniel 2287: *
1.123 daniel 2288: * Returns a new xmlChar * or NULL
1.45 daniel 2289: */
2290:
1.123 daniel 2291: xmlChar *
1.55 daniel 2292: xmlCharStrdup(const char *cur) {
1.45 daniel 2293: const char *p = cur;
2294:
1.135 daniel 2295: if (cur == NULL) return(NULL);
1.45 daniel 2296: while (*p != '\0') p++;
2297: return(xmlCharStrndup(cur, p - cur));
2298: }
2299:
1.50 daniel 2300: /**
2301: * xmlStrcmp:
1.123 daniel 2302: * @str1: the first xmlChar *
2303: * @str2: the second xmlChar *
1.50 daniel 2304: *
1.123 daniel 2305: * a strcmp for xmlChar's
1.68 daniel 2306: *
2307: * Returns the integer result of the comparison
1.14 veillard 2308: */
2309:
1.55 daniel 2310: int
1.123 daniel 2311: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2312: register int tmp;
2313:
1.135 daniel 2314: if ((str1 == NULL) && (str2 == NULL)) return(0);
2315: if (str1 == NULL) return(-1);
2316: if (str2 == NULL) return(1);
1.14 veillard 2317: do {
2318: tmp = *str1++ - *str2++;
2319: if (tmp != 0) return(tmp);
2320: } while ((*str1 != 0) && (*str2 != 0));
2321: return (*str1 - *str2);
2322: }
2323:
1.50 daniel 2324: /**
2325: * xmlStrncmp:
1.123 daniel 2326: * @str1: the first xmlChar *
2327: * @str2: the second xmlChar *
1.50 daniel 2328: * @len: the max comparison length
2329: *
1.123 daniel 2330: * a strncmp for xmlChar's
1.68 daniel 2331: *
2332: * Returns the integer result of the comparison
1.14 veillard 2333: */
2334:
1.55 daniel 2335: int
1.123 daniel 2336: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2337: register int tmp;
2338:
2339: if (len <= 0) return(0);
1.135 daniel 2340: if ((str1 == NULL) && (str2 == NULL)) return(0);
2341: if (str1 == NULL) return(-1);
2342: if (str2 == NULL) return(1);
1.14 veillard 2343: do {
2344: tmp = *str1++ - *str2++;
2345: if (tmp != 0) return(tmp);
2346: len--;
2347: if (len <= 0) return(0);
2348: } while ((*str1 != 0) && (*str2 != 0));
2349: return (*str1 - *str2);
2350: }
2351:
1.50 daniel 2352: /**
2353: * xmlStrchr:
1.123 daniel 2354: * @str: the xmlChar * array
2355: * @val: the xmlChar to search
1.50 daniel 2356: *
1.123 daniel 2357: * a strchr for xmlChar's
1.68 daniel 2358: *
1.123 daniel 2359: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2360: */
2361:
1.123 daniel 2362: const xmlChar *
2363: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2364: if (str == NULL) return(NULL);
1.14 veillard 2365: while (*str != 0) {
1.123 daniel 2366: if (*str == val) return((xmlChar *) str);
1.14 veillard 2367: str++;
2368: }
2369: return(NULL);
1.89 daniel 2370: }
2371:
2372: /**
2373: * xmlStrstr:
1.123 daniel 2374: * @str: the xmlChar * array (haystack)
2375: * @val: the xmlChar to search (needle)
1.89 daniel 2376: *
1.123 daniel 2377: * a strstr for xmlChar's
1.89 daniel 2378: *
1.123 daniel 2379: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2380: */
2381:
1.123 daniel 2382: const xmlChar *
2383: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2384: int n;
2385:
2386: if (str == NULL) return(NULL);
2387: if (val == NULL) return(NULL);
2388: n = xmlStrlen(val);
2389:
2390: if (n == 0) return(str);
2391: while (*str != 0) {
2392: if (*str == *val) {
1.123 daniel 2393: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2394: }
2395: str++;
2396: }
2397: return(NULL);
2398: }
2399:
2400: /**
2401: * xmlStrsub:
1.123 daniel 2402: * @str: the xmlChar * array (haystack)
1.89 daniel 2403: * @start: the index of the first char (zero based)
2404: * @len: the length of the substring
2405: *
2406: * Extract a substring of a given string
2407: *
1.123 daniel 2408: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2409: */
2410:
1.123 daniel 2411: xmlChar *
2412: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2413: int i;
2414:
2415: if (str == NULL) return(NULL);
2416: if (start < 0) return(NULL);
1.90 daniel 2417: if (len < 0) return(NULL);
1.89 daniel 2418:
2419: for (i = 0;i < start;i++) {
2420: if (*str == 0) return(NULL);
2421: str++;
2422: }
2423: if (*str == 0) return(NULL);
2424: return(xmlStrndup(str, len));
1.14 veillard 2425: }
1.28 daniel 2426:
1.50 daniel 2427: /**
2428: * xmlStrlen:
1.123 daniel 2429: * @str: the xmlChar * array
1.50 daniel 2430: *
1.127 daniel 2431: * length of a xmlChar's string
1.68 daniel 2432: *
1.123 daniel 2433: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2434: */
2435:
1.55 daniel 2436: int
1.123 daniel 2437: xmlStrlen(const xmlChar *str) {
1.45 daniel 2438: int len = 0;
2439:
2440: if (str == NULL) return(0);
2441: while (*str != 0) {
2442: str++;
2443: len++;
2444: }
2445: return(len);
2446: }
2447:
1.50 daniel 2448: /**
2449: * xmlStrncat:
1.123 daniel 2450: * @cur: the original xmlChar * array
2451: * @add: the xmlChar * array added
1.50 daniel 2452: * @len: the length of @add
2453: *
1.123 daniel 2454: * a strncat for array of xmlChar's
1.68 daniel 2455: *
1.123 daniel 2456: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2457: */
2458:
1.123 daniel 2459: xmlChar *
2460: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2461: int size;
1.123 daniel 2462: xmlChar *ret;
1.45 daniel 2463:
2464: if ((add == NULL) || (len == 0))
2465: return(cur);
2466: if (cur == NULL)
2467: return(xmlStrndup(add, len));
2468:
2469: size = xmlStrlen(cur);
1.123 daniel 2470: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2471: if (ret == NULL) {
1.86 daniel 2472: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2473: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2474: return(cur);
2475: }
1.123 daniel 2476: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2477: ret[size + len] = 0;
2478: return(ret);
2479: }
2480:
1.50 daniel 2481: /**
2482: * xmlStrcat:
1.123 daniel 2483: * @cur: the original xmlChar * array
2484: * @add: the xmlChar * array added
1.50 daniel 2485: *
1.152 daniel 2486: * a strcat for array of xmlChar's. Since they are supposed to be
2487: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2488: * a termination mark of '0'.
1.68 daniel 2489: *
1.123 daniel 2490: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2491: */
1.123 daniel 2492: xmlChar *
2493: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2494: const xmlChar *p = add;
1.45 daniel 2495:
2496: if (add == NULL) return(cur);
2497: if (cur == NULL)
2498: return(xmlStrdup(add));
2499:
1.152 daniel 2500: while (*p != 0) p++;
1.45 daniel 2501: return(xmlStrncat(cur, add, p - add));
2502: }
2503:
2504: /************************************************************************
2505: * *
2506: * Commodity functions, cleanup needed ? *
2507: * *
2508: ************************************************************************/
2509:
1.50 daniel 2510: /**
2511: * areBlanks:
2512: * @ctxt: an XML parser context
1.123 daniel 2513: * @str: a xmlChar *
1.50 daniel 2514: * @len: the size of @str
2515: *
1.45 daniel 2516: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2517: *
1.68 daniel 2518: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2519: */
2520:
1.123 daniel 2521: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2522: int i, ret;
1.45 daniel 2523: xmlNodePtr lastChild;
2524:
2525: for (i = 0;i < len;i++)
2526: if (!(IS_BLANK(str[i]))) return(0);
2527:
1.152 daniel 2528: if (RAW != '<') return(0);
1.72 daniel 2529: if (ctxt->node == NULL) return(0);
1.104 daniel 2530: if (ctxt->myDoc != NULL) {
2531: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2532: if (ret == 0) return(1);
2533: if (ret == 1) return(0);
2534: }
2535: /*
2536: * heuristic
2537: */
1.45 daniel 2538: lastChild = xmlGetLastChild(ctxt->node);
2539: if (lastChild == NULL) {
2540: if (ctxt->node->content != NULL) return(0);
2541: } else if (xmlNodeIsText(lastChild))
2542: return(0);
1.104 daniel 2543: else if ((ctxt->node->childs != NULL) &&
2544: (xmlNodeIsText(ctxt->node->childs)))
2545: return(0);
1.45 daniel 2546: return(1);
2547: }
2548:
1.50 daniel 2549: /**
2550: * xmlHandleEntity:
2551: * @ctxt: an XML parser context
2552: * @entity: an XML entity pointer.
2553: *
2554: * Default handling of defined entities, when should we define a new input
1.45 daniel 2555: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2556: *
2557: * OBSOLETE: to be removed at some point.
1.45 daniel 2558: */
2559:
1.55 daniel 2560: void
2561: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2562: int len;
1.50 daniel 2563: xmlParserInputPtr input;
1.45 daniel 2564:
2565: if (entity->content == NULL) {
1.123 daniel 2566: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2567: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2568: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2569: entity->name);
1.59 daniel 2570: ctxt->wellFormed = 0;
1.45 daniel 2571: return;
2572: }
2573: len = xmlStrlen(entity->content);
2574: if (len <= 2) goto handle_as_char;
2575:
2576: /*
2577: * Redefine its content as an input stream.
2578: */
1.50 daniel 2579: input = xmlNewEntityInputStream(ctxt, entity);
2580: xmlPushInput(ctxt, input);
1.45 daniel 2581: return;
2582:
2583: handle_as_char:
2584: /*
2585: * Just handle the content as a set of chars.
2586: */
1.72 daniel 2587: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 2588: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2589:
2590: }
2591:
2592: /*
2593: * Forward definition for recusive behaviour.
2594: */
1.77 daniel 2595: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2596: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2597:
1.28 daniel 2598: /************************************************************************
2599: * *
2600: * Extra stuff for namespace support *
2601: * Relates to http://www.w3.org/TR/WD-xml-names *
2602: * *
2603: ************************************************************************/
2604:
1.50 daniel 2605: /**
2606: * xmlNamespaceParseNCName:
2607: * @ctxt: an XML parser context
2608: *
2609: * parse an XML namespace name.
1.28 daniel 2610: *
2611: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2612: *
2613: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2614: * CombiningChar | Extender
1.68 daniel 2615: *
2616: * Returns the namespace name or NULL
1.28 daniel 2617: */
2618:
1.123 daniel 2619: xmlChar *
1.55 daniel 2620: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 2621: xmlChar buf[XML_MAX_NAMELEN + 5];
2622: int len = 0, l;
2623: int cur = CUR_CHAR(l);
1.28 daniel 2624:
1.156 ! daniel 2625: /* load first the value of the char !!! */
1.152 daniel 2626: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 2627:
1.152 daniel 2628: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2629: (cur == '.') || (cur == '-') ||
2630: (cur == '_') ||
2631: (IS_COMBINING(cur)) ||
2632: (IS_EXTENDER(cur))) {
2633: COPY_BUF(l,buf,len,cur);
2634: NEXTL(l);
2635: cur = CUR_CHAR(l);
1.91 daniel 2636: if (len >= XML_MAX_NAMELEN) {
2637: fprintf(stderr,
2638: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 2639: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2640: (cur == '.') || (cur == '-') ||
2641: (cur == '_') ||
2642: (IS_COMBINING(cur)) ||
2643: (IS_EXTENDER(cur))) {
2644: NEXTL(l);
2645: cur = CUR_CHAR(l);
2646: }
1.91 daniel 2647: break;
2648: }
2649: }
2650: return(xmlStrndup(buf, len));
1.28 daniel 2651: }
2652:
1.50 daniel 2653: /**
2654: * xmlNamespaceParseQName:
2655: * @ctxt: an XML parser context
1.123 daniel 2656: * @prefix: a xmlChar **
1.50 daniel 2657: *
2658: * parse an XML qualified name
1.28 daniel 2659: *
2660: * [NS 5] QName ::= (Prefix ':')? LocalPart
2661: *
2662: * [NS 6] Prefix ::= NCName
2663: *
2664: * [NS 7] LocalPart ::= NCName
1.68 daniel 2665: *
1.127 daniel 2666: * Returns the local part, and prefix is updated
1.50 daniel 2667: * to get the Prefix if any.
1.28 daniel 2668: */
2669:
1.123 daniel 2670: xmlChar *
2671: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2672: xmlChar *ret = NULL;
1.28 daniel 2673:
2674: *prefix = NULL;
2675: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 2676: if (RAW == ':') {
1.28 daniel 2677: *prefix = ret;
1.40 daniel 2678: NEXT;
1.28 daniel 2679: ret = xmlNamespaceParseNCName(ctxt);
2680: }
2681:
2682: return(ret);
2683: }
2684:
1.50 daniel 2685: /**
1.72 daniel 2686: * xmlSplitQName:
2687: * @name: an XML parser context
1.123 daniel 2688: * @prefix: a xmlChar **
1.72 daniel 2689: *
2690: * parse an XML qualified name string
2691: *
2692: * [NS 5] QName ::= (Prefix ':')? LocalPart
2693: *
2694: * [NS 6] Prefix ::= NCName
2695: *
2696: * [NS 7] LocalPart ::= NCName
2697: *
1.127 daniel 2698: * Returns the local part, and prefix is updated
1.72 daniel 2699: * to get the Prefix if any.
2700: */
2701:
1.123 daniel 2702: xmlChar *
2703: xmlSplitQName(const xmlChar *name, xmlChar **prefix) {
2704: xmlChar *ret = NULL;
2705: const xmlChar *q;
2706: const xmlChar *cur = name;
1.72 daniel 2707:
2708: *prefix = NULL;
1.113 daniel 2709:
2710: /* xml: prefix is not really a namespace */
2711: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2712: (cur[2] == 'l') && (cur[3] == ':'))
2713: return(xmlStrdup(name));
2714:
1.72 daniel 2715: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
2716: q = cur++;
2717:
2718: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
2719: (*cur == '.') || (*cur == '-') ||
2720: (*cur == '_') ||
2721: (IS_COMBINING(*cur)) ||
2722: (IS_EXTENDER(*cur)))
2723: cur++;
2724:
2725: ret = xmlStrndup(q, cur - q);
2726:
2727: if (*cur == ':') {
2728: cur++;
2729: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
2730: *prefix = ret;
2731:
2732: q = cur++;
2733:
2734: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
2735: (*cur == '.') || (*cur == '-') ||
2736: (*cur == '_') ||
2737: (IS_COMBINING(*cur)) ||
2738: (IS_EXTENDER(*cur)))
2739: cur++;
2740:
2741: ret = xmlStrndup(q, cur - q);
2742: }
2743:
2744: return(ret);
2745: }
2746: /**
1.50 daniel 2747: * xmlNamespaceParseNSDef:
2748: * @ctxt: an XML parser context
2749: *
2750: * parse a namespace prefix declaration
1.28 daniel 2751: *
2752: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2753: *
2754: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 2755: *
2756: * Returns the namespace name
1.28 daniel 2757: */
2758:
1.123 daniel 2759: xmlChar *
1.55 daniel 2760: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 2761: xmlChar *name = NULL;
1.28 daniel 2762:
1.152 daniel 2763: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 2764: (NXT(2) == 'l') && (NXT(3) == 'n') &&
2765: (NXT(4) == 's')) {
2766: SKIP(5);
1.152 daniel 2767: if (RAW == ':') {
1.40 daniel 2768: NEXT;
1.28 daniel 2769: name = xmlNamespaceParseNCName(ctxt);
2770: }
2771: }
1.39 daniel 2772: return(name);
1.28 daniel 2773: }
2774:
1.50 daniel 2775: /**
2776: * xmlParseQuotedString:
2777: * @ctxt: an XML parser context
2778: *
1.45 daniel 2779: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 2780: * To be removed at next drop of binary compatibility
1.68 daniel 2781: *
2782: * Returns the string parser or NULL.
1.45 daniel 2783: */
1.123 daniel 2784: xmlChar *
1.55 daniel 2785: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 2786: xmlChar *buf = NULL;
1.152 daniel 2787: int len = 0,l;
1.140 daniel 2788: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2789: int c;
1.45 daniel 2790:
1.135 daniel 2791: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2792: if (buf == NULL) {
2793: fprintf(stderr, "malloc of %d byte failed\n", size);
2794: return(NULL);
2795: }
1.152 daniel 2796: if (RAW == '"') {
1.45 daniel 2797: NEXT;
1.152 daniel 2798: c = CUR_CHAR(l);
1.135 daniel 2799: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 2800: if (len + 5 >= size) {
1.135 daniel 2801: size *= 2;
2802: buf = xmlRealloc(buf, size * sizeof(xmlChar));
2803: if (buf == NULL) {
2804: fprintf(stderr, "realloc of %d byte failed\n", size);
2805: return(NULL);
2806: }
2807: }
1.152 daniel 2808: COPY_BUF(l,buf,len,c);
2809: NEXTL(l);
2810: c = CUR_CHAR(l);
1.135 daniel 2811: }
2812: if (c != '"') {
1.123 daniel 2813: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2814: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 2815: ctxt->sax->error(ctxt->userData,
2816: "String not closed \"%.50s\"\n", buf);
1.59 daniel 2817: ctxt->wellFormed = 0;
1.55 daniel 2818: } else {
1.45 daniel 2819: NEXT;
2820: }
1.152 daniel 2821: } else if (RAW == '\''){
1.45 daniel 2822: NEXT;
1.135 daniel 2823: c = CUR;
2824: while (IS_CHAR(c) && (c != '\'')) {
2825: if (len + 1 >= size) {
2826: size *= 2;
2827: buf = xmlRealloc(buf, size * sizeof(xmlChar));
2828: if (buf == NULL) {
2829: fprintf(stderr, "realloc of %d byte failed\n", size);
2830: return(NULL);
2831: }
2832: }
2833: buf[len++] = c;
2834: NEXT;
2835: c = CUR;
2836: }
1.152 daniel 2837: if (RAW != '\'') {
1.123 daniel 2838: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2839: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 2840: ctxt->sax->error(ctxt->userData,
2841: "String not closed \"%.50s\"\n", buf);
1.59 daniel 2842: ctxt->wellFormed = 0;
1.55 daniel 2843: } else {
1.45 daniel 2844: NEXT;
2845: }
2846: }
1.135 daniel 2847: return(buf);
1.45 daniel 2848: }
2849:
1.50 daniel 2850: /**
2851: * xmlParseNamespace:
2852: * @ctxt: an XML parser context
2853: *
1.45 daniel 2854: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2855: *
2856: * This is what the older xml-name Working Draft specified, a bunch of
2857: * other stuff may still rely on it, so support is still here as
1.127 daniel 2858: * if it was declared on the root of the Tree:-(
1.110 daniel 2859: *
2860: * To be removed at next drop of binary compatibility
1.45 daniel 2861: */
2862:
1.55 daniel 2863: void
2864: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 2865: xmlChar *href = NULL;
2866: xmlChar *prefix = NULL;
1.45 daniel 2867: int garbage = 0;
2868:
2869: /*
2870: * We just skipped "namespace" or "xml:namespace"
2871: */
2872: SKIP_BLANKS;
2873:
1.153 daniel 2874: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 2875: /*
2876: * We can have "ns" or "prefix" attributes
2877: * Old encoding as 'href' or 'AS' attributes is still supported
2878: */
1.152 daniel 2879: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 2880: garbage = 0;
2881: SKIP(2);
2882: SKIP_BLANKS;
2883:
1.152 daniel 2884: if (RAW != '=') continue;
1.45 daniel 2885: NEXT;
2886: SKIP_BLANKS;
2887:
2888: href = xmlParseQuotedString(ctxt);
2889: SKIP_BLANKS;
1.152 daniel 2890: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 2891: (NXT(2) == 'e') && (NXT(3) == 'f')) {
2892: garbage = 0;
2893: SKIP(4);
2894: SKIP_BLANKS;
2895:
1.152 daniel 2896: if (RAW != '=') continue;
1.45 daniel 2897: NEXT;
2898: SKIP_BLANKS;
2899:
2900: href = xmlParseQuotedString(ctxt);
2901: SKIP_BLANKS;
1.152 daniel 2902: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 2903: (NXT(2) == 'e') && (NXT(3) == 'f') &&
2904: (NXT(4) == 'i') && (NXT(5) == 'x')) {
2905: garbage = 0;
2906: SKIP(6);
2907: SKIP_BLANKS;
2908:
1.152 daniel 2909: if (RAW != '=') continue;
1.45 daniel 2910: NEXT;
2911: SKIP_BLANKS;
2912:
2913: prefix = xmlParseQuotedString(ctxt);
2914: SKIP_BLANKS;
1.152 daniel 2915: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 2916: garbage = 0;
2917: SKIP(2);
2918: SKIP_BLANKS;
2919:
1.152 daniel 2920: if (RAW != '=') continue;
1.45 daniel 2921: NEXT;
2922: SKIP_BLANKS;
2923:
2924: prefix = xmlParseQuotedString(ctxt);
2925: SKIP_BLANKS;
1.152 daniel 2926: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 2927: garbage = 0;
1.91 daniel 2928: NEXT;
1.45 daniel 2929: } else {
2930: /*
2931: * Found garbage when parsing the namespace
2932: */
1.122 daniel 2933: if (!garbage) {
1.55 daniel 2934: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2935: ctxt->sax->error(ctxt->userData,
2936: "xmlParseNamespace found garbage\n");
2937: }
1.123 daniel 2938: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 2939: ctxt->wellFormed = 0;
1.45 daniel 2940: NEXT;
2941: }
2942: }
2943:
2944: MOVETO_ENDTAG(CUR_PTR);
2945: NEXT;
2946:
2947: /*
2948: * Register the DTD.
1.72 daniel 2949: if (href != NULL)
2950: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 2951: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 2952: */
2953:
1.119 daniel 2954: if (prefix != NULL) xmlFree(prefix);
2955: if (href != NULL) xmlFree(href);
1.45 daniel 2956: }
2957:
1.28 daniel 2958: /************************************************************************
2959: * *
2960: * The parser itself *
2961: * Relates to http://www.w3.org/TR/REC-xml *
2962: * *
2963: ************************************************************************/
1.14 veillard 2964:
1.50 daniel 2965: /**
1.97 daniel 2966: * xmlScanName:
2967: * @ctxt: an XML parser context
2968: *
2969: * Trickery: parse an XML name but without consuming the input flow
2970: * Needed for rollback cases.
2971: *
2972: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2973: * CombiningChar | Extender
2974: *
2975: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2976: *
2977: * [6] Names ::= Name (S Name)*
2978: *
2979: * Returns the Name parsed or NULL
2980: */
2981:
1.123 daniel 2982: xmlChar *
1.97 daniel 2983: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 2984: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 2985: int len = 0;
2986:
2987: GROW;
1.152 daniel 2988: if (!IS_LETTER(RAW) && (RAW != '_') &&
2989: (RAW != ':')) {
1.97 daniel 2990: return(NULL);
2991: }
2992:
2993: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2994: (NXT(len) == '.') || (NXT(len) == '-') ||
2995: (NXT(len) == '_') || (NXT(len) == ':') ||
2996: (IS_COMBINING(NXT(len))) ||
2997: (IS_EXTENDER(NXT(len)))) {
2998: buf[len] = NXT(len);
2999: len++;
3000: if (len >= XML_MAX_NAMELEN) {
3001: fprintf(stderr,
3002: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3003: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3004: (NXT(len) == '.') || (NXT(len) == '-') ||
3005: (NXT(len) == '_') || (NXT(len) == ':') ||
3006: (IS_COMBINING(NXT(len))) ||
3007: (IS_EXTENDER(NXT(len))))
3008: len++;
3009: break;
3010: }
3011: }
3012: return(xmlStrndup(buf, len));
3013: }
3014:
3015: /**
1.50 daniel 3016: * xmlParseName:
3017: * @ctxt: an XML parser context
3018: *
3019: * parse an XML name.
1.22 daniel 3020: *
3021: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3022: * CombiningChar | Extender
3023: *
3024: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3025: *
3026: * [6] Names ::= Name (S Name)*
1.68 daniel 3027: *
3028: * Returns the Name parsed or NULL
1.1 veillard 3029: */
3030:
1.123 daniel 3031: xmlChar *
1.55 daniel 3032: xmlParseName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3033: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3034: int len = 0;
1.123 daniel 3035: xmlChar cur;
1.1 veillard 3036:
1.91 daniel 3037: GROW;
1.97 daniel 3038: cur = CUR;
3039: if (!IS_LETTER(cur) && (cur != '_') &&
3040: (cur != ':')) {
1.91 daniel 3041: return(NULL);
3042: }
1.40 daniel 3043:
1.97 daniel 3044: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3045: (cur == '.') || (cur == '-') ||
3046: (cur == '_') || (cur == ':') ||
3047: (IS_COMBINING(cur)) ||
3048: (IS_EXTENDER(cur))) {
3049: buf[len++] = cur;
1.40 daniel 3050: NEXT;
1.97 daniel 3051: cur = CUR;
1.91 daniel 3052: if (len >= XML_MAX_NAMELEN) {
3053: fprintf(stderr,
3054: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.97 daniel 3055: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3056: (cur == '.') || (cur == '-') ||
3057: (cur == '_') || (cur == ':') ||
3058: (IS_COMBINING(cur)) ||
3059: (IS_EXTENDER(cur))) {
3060: NEXT;
3061: cur = CUR;
3062: }
1.91 daniel 3063: break;
3064: }
3065: }
3066: return(xmlStrndup(buf, len));
1.22 daniel 3067: }
3068:
1.50 daniel 3069: /**
1.135 daniel 3070: * xmlParseStringName:
3071: * @ctxt: an XML parser context
3072: * @str: a pointer to an index in the string
3073: *
3074: * parse an XML name.
3075: *
3076: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3077: * CombiningChar | Extender
3078: *
3079: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3080: *
3081: * [6] Names ::= Name (S Name)*
3082: *
3083: * Returns the Name parsed or NULL. The str pointer
3084: * is updated to the current location in the string.
3085: */
3086:
3087: xmlChar *
3088: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3089: const xmlChar *ptr;
3090: const xmlChar *start;
3091: xmlChar cur;
3092:
3093: if ((str == NULL) || (*str == NULL)) return(NULL);
3094:
3095: start = ptr = *str;
3096: cur = *ptr;
3097: if (!IS_LETTER(cur) && (cur != '_') &&
3098: (cur != ':')) {
3099: return(NULL);
3100: }
3101:
3102: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3103: (cur == '.') || (cur == '-') ||
3104: (cur == '_') || (cur == ':') ||
3105: (IS_COMBINING(cur)) ||
3106: (IS_EXTENDER(cur))) {
3107: ptr++;
3108: cur = *ptr;
3109: }
3110: *str = ptr;
3111: return(xmlStrndup(start, ptr - start ));
3112: }
3113:
3114: /**
1.50 daniel 3115: * xmlParseNmtoken:
3116: * @ctxt: an XML parser context
3117: *
3118: * parse an XML Nmtoken.
1.22 daniel 3119: *
3120: * [7] Nmtoken ::= (NameChar)+
3121: *
3122: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3123: *
3124: * Returns the Nmtoken parsed or NULL
1.22 daniel 3125: */
3126:
1.123 daniel 3127: xmlChar *
1.55 daniel 3128: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3129: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3130: int len = 0;
1.22 daniel 3131:
1.91 daniel 3132: GROW;
1.153 daniel 3133: while ((IS_LETTER(RAW)) || (IS_DIGIT(RAW)) ||
1.152 daniel 3134: (RAW == '.') || (RAW == '-') ||
3135: (RAW == '_') || (RAW == ':') ||
1.153 daniel 3136: (IS_COMBINING(RAW)) ||
3137: (IS_EXTENDER(RAW))) {
1.91 daniel 3138: buf[len++] = CUR;
1.40 daniel 3139: NEXT;
1.91 daniel 3140: if (len >= XML_MAX_NAMELEN) {
3141: fprintf(stderr,
3142: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.153 daniel 3143: while ((IS_LETTER(RAW)) || (IS_DIGIT(RAW)) ||
1.152 daniel 3144: (RAW == '.') || (RAW == '-') ||
3145: (RAW == '_') || (RAW == ':') ||
1.153 daniel 3146: (IS_COMBINING(RAW)) ||
3147: (IS_EXTENDER(RAW)))
1.91 daniel 3148: NEXT;
3149: break;
3150: }
3151: }
3152: return(xmlStrndup(buf, len));
1.1 veillard 3153: }
3154:
1.50 daniel 3155: /**
3156: * xmlParseEntityValue:
3157: * @ctxt: an XML parser context
1.78 daniel 3158: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3159: *
3160: * parse a value for ENTITY decl.
1.24 daniel 3161: *
3162: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3163: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3164: *
1.78 daniel 3165: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3166: */
3167:
1.123 daniel 3168: xmlChar *
3169: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3170: xmlChar *buf = NULL;
3171: int len = 0;
1.140 daniel 3172: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3173: int c, l;
1.135 daniel 3174: xmlChar stop;
1.123 daniel 3175: xmlChar *ret = NULL;
1.98 daniel 3176: xmlParserInputPtr input;
1.24 daniel 3177:
1.152 daniel 3178: if (RAW == '"') stop = '"';
3179: else if (RAW == '\'') stop = '\'';
1.135 daniel 3180: else {
3181: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3182: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3183: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3184: ctxt->wellFormed = 0;
3185: return(NULL);
3186: }
3187: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3188: if (buf == NULL) {
3189: fprintf(stderr, "malloc of %d byte failed\n", size);
3190: return(NULL);
3191: }
1.94 daniel 3192:
1.135 daniel 3193: /*
3194: * The content of the entity definition is copied in a buffer.
3195: */
1.94 daniel 3196:
1.135 daniel 3197: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3198: input = ctxt->input;
3199: GROW;
3200: NEXT;
1.152 daniel 3201: c = CUR_CHAR(l);
1.135 daniel 3202: /*
3203: * NOTE: 4.4.5 Included in Literal
3204: * When a parameter entity reference appears in a literal entity
3205: * value, ... a single or double quote character in the replacement
3206: * text is always treated as a normal data character and will not
3207: * terminate the literal.
3208: * In practice it means we stop the loop only when back at parsing
3209: * the initial entity and the quote is found
3210: */
3211: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3212: if (len + 5 >= size) {
1.135 daniel 3213: size *= 2;
3214: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3215: if (buf == NULL) {
3216: fprintf(stderr, "realloc of %d byte failed\n", size);
3217: return(NULL);
1.94 daniel 3218: }
1.79 daniel 3219: }
1.152 daniel 3220: COPY_BUF(l,buf,len,c);
3221: NEXTL(l);
1.98 daniel 3222: /*
1.135 daniel 3223: * Pop-up of finished entities.
1.98 daniel 3224: */
1.152 daniel 3225: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3226: xmlPopInput(ctxt);
1.152 daniel 3227:
3228: c = CUR_CHAR(l);
1.135 daniel 3229: if (c == 0) {
1.94 daniel 3230: GROW;
1.152 daniel 3231: c = CUR_CHAR(l);
1.79 daniel 3232: }
1.135 daniel 3233: }
3234: buf[len] = 0;
3235:
3236: /*
3237: * Then PEReference entities are substituted.
3238: */
3239: if (c != stop) {
3240: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3241: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3242: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3243: ctxt->wellFormed = 0;
1.135 daniel 3244: } else {
3245: NEXT;
3246: /*
3247: * NOTE: 4.4.7 Bypassed
3248: * When a general entity reference appears in the EntityValue in
3249: * an entity declaration, it is bypassed and left as is.
3250: * so XML_SUBSTITUTE_REF is not set.
3251: */
3252: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3253: 0, 0, 0);
3254: if (orig != NULL)
3255: *orig = buf;
3256: else
3257: xmlFree(buf);
1.24 daniel 3258: }
3259:
3260: return(ret);
3261: }
3262:
1.50 daniel 3263: /**
3264: * xmlParseAttValue:
3265: * @ctxt: an XML parser context
3266: *
3267: * parse a value for an attribute
1.78 daniel 3268: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3269: * will be handled later in xmlStringGetNodeList
1.29 daniel 3270: *
3271: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3272: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3273: *
1.129 daniel 3274: * 3.3.3 Attribute-Value Normalization:
3275: * Before the value of an attribute is passed to the application or
3276: * checked for validity, the XML processor must normalize it as follows:
3277: * - a character reference is processed by appending the referenced
3278: * character to the attribute value
3279: * - an entity reference is processed by recursively processing the
3280: * replacement text of the entity
3281: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3282: * appending #x20 to the normalized value, except that only a single
3283: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3284: * parsed entity or the literal entity value of an internal parsed entity
3285: * - other characters are processed by appending them to the normalized value
1.130 daniel 3286: * If the declared value is not CDATA, then the XML processor must further
3287: * process the normalized attribute value by discarding any leading and
3288: * trailing space (#x20) characters, and by replacing sequences of space
3289: * (#x20) characters by a single space (#x20) character.
3290: * All attributes for which no declaration has been read should be treated
3291: * by a non-validating parser as if declared CDATA.
1.129 daniel 3292: *
3293: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3294: */
3295:
1.123 daniel 3296: xmlChar *
1.55 daniel 3297: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3298: xmlChar limit = 0;
3299: xmlChar *buffer = NULL;
3300: int buffer_size = 0;
3301: xmlChar *out = NULL;
3302:
3303: xmlChar *current = NULL;
3304: xmlEntityPtr ent;
3305: xmlChar cur;
3306:
1.29 daniel 3307:
1.91 daniel 3308: SHRINK;
1.151 daniel 3309: if (NXT(0) == '"') {
1.96 daniel 3310: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3311: limit = '"';
1.40 daniel 3312: NEXT;
1.151 daniel 3313: } else if (NXT(0) == '\'') {
1.129 daniel 3314: limit = '\'';
1.96 daniel 3315: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3316: NEXT;
1.29 daniel 3317: } else {
1.123 daniel 3318: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3319: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3320: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3321: ctxt->wellFormed = 0;
1.129 daniel 3322: return(NULL);
1.29 daniel 3323: }
3324:
1.129 daniel 3325: /*
3326: * allocate a translation buffer.
3327: */
1.140 daniel 3328: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3329: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3330: if (buffer == NULL) {
3331: perror("xmlParseAttValue: malloc failed");
3332: return(NULL);
3333: }
3334: out = buffer;
3335:
3336: /*
3337: * Ok loop until we reach one of the ending char or a size limit.
3338: */
3339: cur = CUR;
1.156 ! daniel 3340: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3341: if (cur == 0) break;
3342: if ((cur == '&') && (NXT(1) == '#')) {
3343: int val = xmlParseCharRef(ctxt);
3344: *out++ = val;
3345: } else if (cur == '&') {
3346: ent = xmlParseEntityRef(ctxt);
3347: if ((ent != NULL) &&
3348: (ctxt->replaceEntities != 0)) {
3349: current = ent->content;
3350: while (*current != 0) {
3351: *out++ = *current++;
3352: if (out - buffer > buffer_size - 10) {
3353: int index = out - buffer;
3354:
3355: growBuffer(buffer);
3356: out = &buffer[index];
3357: }
3358: }
3359: } else if (ent != NULL) {
3360: int i = xmlStrlen(ent->name);
3361: const xmlChar *cur = ent->name;
3362:
3363: *out++ = '&';
3364: if (out - buffer > buffer_size - i - 10) {
3365: int index = out - buffer;
3366:
3367: growBuffer(buffer);
3368: out = &buffer[index];
3369: }
3370: for (;i > 0;i--)
3371: *out++ = *cur++;
3372: *out++ = ';';
3373: }
3374: } else {
1.156 ! daniel 3375: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3376: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3377: *out++ = 0x20;
3378: if (out - buffer > buffer_size - 10) {
3379: int index = out - buffer;
3380:
3381: growBuffer(buffer);
3382: out = &buffer[index];
1.129 daniel 3383: }
3384: } else {
3385: *out++ = cur;
3386: if (out - buffer > buffer_size - 10) {
3387: int index = out - buffer;
3388:
3389: growBuffer(buffer);
3390: out = &buffer[index];
3391: }
3392: }
3393: NEXT;
3394: }
3395: cur = CUR;
3396: }
3397: *out++ = 0;
1.152 daniel 3398: if (RAW == '<') {
1.129 daniel 3399: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3400: ctxt->sax->error(ctxt->userData,
3401: "Unescaped '<' not allowed in attributes values\n");
3402: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3403: ctxt->wellFormed = 0;
1.152 daniel 3404: } else if (RAW != limit) {
1.129 daniel 3405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3406: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3407: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3408: ctxt->wellFormed = 0;
3409: } else
3410: NEXT;
3411: return(buffer);
1.29 daniel 3412: }
3413:
1.50 daniel 3414: /**
3415: * xmlParseSystemLiteral:
3416: * @ctxt: an XML parser context
3417: *
3418: * parse an XML Literal
1.21 daniel 3419: *
1.22 daniel 3420: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3421: *
3422: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3423: */
3424:
1.123 daniel 3425: xmlChar *
1.55 daniel 3426: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3427: xmlChar *buf = NULL;
3428: int len = 0;
1.140 daniel 3429: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3430: int cur, l;
1.135 daniel 3431: xmlChar stop;
1.21 daniel 3432:
1.91 daniel 3433: SHRINK;
1.152 daniel 3434: if (RAW == '"') {
1.40 daniel 3435: NEXT;
1.135 daniel 3436: stop = '"';
1.152 daniel 3437: } else if (RAW == '\'') {
1.40 daniel 3438: NEXT;
1.135 daniel 3439: stop = '\'';
1.21 daniel 3440: } else {
1.55 daniel 3441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3442: ctxt->sax->error(ctxt->userData,
3443: "SystemLiteral \" or ' expected\n");
1.123 daniel 3444: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3445: ctxt->wellFormed = 0;
1.135 daniel 3446: return(NULL);
1.21 daniel 3447: }
3448:
1.135 daniel 3449: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3450: if (buf == NULL) {
3451: fprintf(stderr, "malloc of %d byte failed\n", size);
3452: return(NULL);
3453: }
1.152 daniel 3454: cur = CUR_CHAR(l);
1.135 daniel 3455: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3456: if (len + 5 >= size) {
1.135 daniel 3457: size *= 2;
3458: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3459: if (buf == NULL) {
3460: fprintf(stderr, "realloc of %d byte failed\n", size);
3461: return(NULL);
3462: }
3463: }
1.152 daniel 3464: COPY_BUF(l,buf,len,cur);
3465: NEXTL(l);
3466: cur = CUR_CHAR(l);
1.135 daniel 3467: if (cur == 0) {
3468: GROW;
3469: SHRINK;
1.152 daniel 3470: cur = CUR_CHAR(l);
1.135 daniel 3471: }
3472: }
3473: buf[len] = 0;
3474: if (!IS_CHAR(cur)) {
3475: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3476: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3477: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3478: ctxt->wellFormed = 0;
3479: } else {
3480: NEXT;
3481: }
3482: return(buf);
1.21 daniel 3483: }
3484:
1.50 daniel 3485: /**
3486: * xmlParsePubidLiteral:
3487: * @ctxt: an XML parser context
1.21 daniel 3488: *
1.50 daniel 3489: * parse an XML public literal
1.68 daniel 3490: *
3491: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3492: *
3493: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3494: */
3495:
1.123 daniel 3496: xmlChar *
1.55 daniel 3497: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3498: xmlChar *buf = NULL;
3499: int len = 0;
1.140 daniel 3500: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3501: xmlChar cur;
3502: xmlChar stop;
1.125 daniel 3503:
1.91 daniel 3504: SHRINK;
1.152 daniel 3505: if (RAW == '"') {
1.40 daniel 3506: NEXT;
1.135 daniel 3507: stop = '"';
1.152 daniel 3508: } else if (RAW == '\'') {
1.40 daniel 3509: NEXT;
1.135 daniel 3510: stop = '\'';
1.21 daniel 3511: } else {
1.55 daniel 3512: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3513: ctxt->sax->error(ctxt->userData,
3514: "SystemLiteral \" or ' expected\n");
1.123 daniel 3515: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3516: ctxt->wellFormed = 0;
1.135 daniel 3517: return(NULL);
3518: }
3519: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3520: if (buf == NULL) {
3521: fprintf(stderr, "malloc of %d byte failed\n", size);
3522: return(NULL);
3523: }
3524: cur = CUR;
3525: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3526: if (len + 1 >= size) {
3527: size *= 2;
3528: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3529: if (buf == NULL) {
3530: fprintf(stderr, "realloc of %d byte failed\n", size);
3531: return(NULL);
3532: }
3533: }
3534: buf[len++] = cur;
3535: NEXT;
3536: cur = CUR;
3537: if (cur == 0) {
3538: GROW;
3539: SHRINK;
3540: cur = CUR;
3541: }
3542: }
3543: buf[len] = 0;
3544: if (cur != stop) {
3545: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3546: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3547: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3548: ctxt->wellFormed = 0;
3549: } else {
3550: NEXT;
1.21 daniel 3551: }
1.135 daniel 3552: return(buf);
1.21 daniel 3553: }
3554:
1.50 daniel 3555: /**
3556: * xmlParseCharData:
3557: * @ctxt: an XML parser context
3558: * @cdata: int indicating whether we are within a CDATA section
3559: *
3560: * parse a CharData section.
3561: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 3562: *
1.151 daniel 3563: * The right angle bracket (>) may be represented using the string ">",
3564: * and must, for compatibility, be escaped using ">" or a character
3565: * reference when it appears in the string "]]>" in content, when that
3566: * string is not marking the end of a CDATA section.
3567: *
1.27 daniel 3568: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3569: */
3570:
1.55 daniel 3571: void
3572: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 3573: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 3574: int nbchar = 0;
1.152 daniel 3575: int cur, l;
1.27 daniel 3576:
1.91 daniel 3577: SHRINK;
1.152 daniel 3578: cur = CUR_CHAR(l);
1.97 daniel 3579: while ((IS_CHAR(cur)) && (cur != '<') &&
1.153 daniel 3580: ((cur != '&') || (ctxt->token == '&'))) {
1.97 daniel 3581: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 3582: (NXT(2) == '>')) {
3583: if (cdata) break;
3584: else {
3585: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 3586: ctxt->sax->error(ctxt->userData,
1.59 daniel 3587: "Sequence ']]>' not allowed in content\n");
1.123 daniel 3588: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 3589: /* Should this be relaxed ??? I see a "must here */
3590: ctxt->wellFormed = 0;
1.59 daniel 3591: }
3592: }
1.152 daniel 3593: COPY_BUF(l,buf,nbchar,cur);
3594: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 3595: /*
3596: * Ok the segment is to be consumed as chars.
3597: */
3598: if (ctxt->sax != NULL) {
3599: if (areBlanks(ctxt, buf, nbchar)) {
3600: if (ctxt->sax->ignorableWhitespace != NULL)
3601: ctxt->sax->ignorableWhitespace(ctxt->userData,
3602: buf, nbchar);
3603: } else {
3604: if (ctxt->sax->characters != NULL)
3605: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3606: }
3607: }
3608: nbchar = 0;
3609: }
1.152 daniel 3610: NEXTL(l);
3611: cur = CUR_CHAR(l);
1.27 daniel 3612: }
1.91 daniel 3613: if (nbchar != 0) {
3614: /*
3615: * Ok the segment is to be consumed as chars.
3616: */
3617: if (ctxt->sax != NULL) {
3618: if (areBlanks(ctxt, buf, nbchar)) {
3619: if (ctxt->sax->ignorableWhitespace != NULL)
3620: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3621: } else {
3622: if (ctxt->sax->characters != NULL)
3623: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3624: }
3625: }
1.45 daniel 3626: }
1.27 daniel 3627: }
3628:
1.50 daniel 3629: /**
3630: * xmlParseExternalID:
3631: * @ctxt: an XML parser context
1.123 daniel 3632: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 3633: * @strict: indicate whether we should restrict parsing to only
3634: * production [75], see NOTE below
1.50 daniel 3635: *
1.67 daniel 3636: * Parse an External ID or a Public ID
3637: *
3638: * NOTE: Productions [75] and [83] interract badly since [75] can generate
3639: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 3640: *
3641: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3642: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 3643: *
3644: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3645: *
1.68 daniel 3646: * Returns the function returns SystemLiteral and in the second
1.67 daniel 3647: * case publicID receives PubidLiteral, is strict is off
3648: * it is possible to return NULL and have publicID set.
1.22 daniel 3649: */
3650:
1.123 daniel 3651: xmlChar *
3652: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3653: xmlChar *URI = NULL;
1.22 daniel 3654:
1.91 daniel 3655: SHRINK;
1.152 daniel 3656: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 3657: (NXT(2) == 'S') && (NXT(3) == 'T') &&
3658: (NXT(4) == 'E') && (NXT(5) == 'M')) {
3659: SKIP(6);
1.59 daniel 3660: if (!IS_BLANK(CUR)) {
3661: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3662: ctxt->sax->error(ctxt->userData,
1.59 daniel 3663: "Space required after 'SYSTEM'\n");
1.123 daniel 3664: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3665: ctxt->wellFormed = 0;
3666: }
1.42 daniel 3667: SKIP_BLANKS;
1.39 daniel 3668: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3669: if (URI == NULL) {
1.55 daniel 3670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3671: ctxt->sax->error(ctxt->userData,
1.39 daniel 3672: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 3673: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3674: ctxt->wellFormed = 0;
3675: }
1.152 daniel 3676: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 3677: (NXT(2) == 'B') && (NXT(3) == 'L') &&
3678: (NXT(4) == 'I') && (NXT(5) == 'C')) {
3679: SKIP(6);
1.59 daniel 3680: if (!IS_BLANK(CUR)) {
3681: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3682: ctxt->sax->error(ctxt->userData,
1.59 daniel 3683: "Space required after 'PUBLIC'\n");
1.123 daniel 3684: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3685: ctxt->wellFormed = 0;
3686: }
1.42 daniel 3687: SKIP_BLANKS;
1.39 daniel 3688: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 3689: if (*publicID == NULL) {
1.55 daniel 3690: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3691: ctxt->sax->error(ctxt->userData,
1.39 daniel 3692: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 3693: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 3694: ctxt->wellFormed = 0;
3695: }
1.67 daniel 3696: if (strict) {
3697: /*
3698: * We don't handle [83] so "S SystemLiteral" is required.
3699: */
3700: if (!IS_BLANK(CUR)) {
3701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3702: ctxt->sax->error(ctxt->userData,
1.67 daniel 3703: "Space required after the Public Identifier\n");
1.123 daniel 3704: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 3705: ctxt->wellFormed = 0;
3706: }
3707: } else {
3708: /*
3709: * We handle [83] so we return immediately, if
3710: * "S SystemLiteral" is not detected. From a purely parsing
3711: * point of view that's a nice mess.
3712: */
1.135 daniel 3713: const xmlChar *ptr;
3714: GROW;
3715:
3716: ptr = CUR_PTR;
1.67 daniel 3717: if (!IS_BLANK(*ptr)) return(NULL);
3718:
3719: while (IS_BLANK(*ptr)) ptr++;
3720: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 3721: }
1.42 daniel 3722: SKIP_BLANKS;
1.39 daniel 3723: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3724: if (URI == NULL) {
1.55 daniel 3725: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3726: ctxt->sax->error(ctxt->userData,
1.39 daniel 3727: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 3728: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3729: ctxt->wellFormed = 0;
3730: }
1.22 daniel 3731: }
1.39 daniel 3732: return(URI);
1.22 daniel 3733: }
3734:
1.50 daniel 3735: /**
3736: * xmlParseComment:
1.69 daniel 3737: * @ctxt: an XML parser context
1.50 daniel 3738: *
1.3 veillard 3739: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 3740: * The spec says that "For compatibility, the string "--" (double-hyphen)
3741: * must not occur within comments. "
1.22 daniel 3742: *
3743: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 3744: */
1.72 daniel 3745: void
1.114 daniel 3746: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 3747: xmlChar *buf = NULL;
3748: int len = 0;
1.140 daniel 3749: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3750: int q, ql;
3751: int r, rl;
3752: int cur, l;
1.140 daniel 3753: xmlParserInputState state;
1.3 veillard 3754:
3755: /*
1.22 daniel 3756: * Check that there is a comment right here.
1.3 veillard 3757: */
1.152 daniel 3758: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 3759: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 3760:
1.140 daniel 3761: state = ctxt->instate;
1.97 daniel 3762: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 3763: SHRINK;
1.40 daniel 3764: SKIP(4);
1.135 daniel 3765: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3766: if (buf == NULL) {
3767: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 3768: ctxt->instate = state;
1.135 daniel 3769: return;
3770: }
1.152 daniel 3771: q = CUR_CHAR(ql);
3772: NEXTL(ql);
3773: r = CUR_CHAR(rl);
3774: NEXTL(rl);
3775: cur = CUR_CHAR(l);
1.135 daniel 3776: while (IS_CHAR(cur) &&
3777: ((cur != '>') ||
3778: (r != '-') || (q != '-'))) {
3779: if ((r == '-') && (q == '-')) {
1.55 daniel 3780: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3781: ctxt->sax->error(ctxt->userData,
1.38 daniel 3782: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 3783: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 3784: ctxt->wellFormed = 0;
3785: }
1.152 daniel 3786: if (len + 5 >= size) {
1.135 daniel 3787: size *= 2;
3788: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3789: if (buf == NULL) {
3790: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 3791: ctxt->instate = state;
1.135 daniel 3792: return;
3793: }
3794: }
1.152 daniel 3795: COPY_BUF(ql,buf,len,q);
1.135 daniel 3796: q = r;
1.152 daniel 3797: ql = rl;
1.135 daniel 3798: r = cur;
1.152 daniel 3799: rl = l;
3800: NEXTL(l);
3801: cur = CUR_CHAR(l);
1.135 daniel 3802: if (cur == 0) {
3803: SHRINK;
3804: GROW;
1.152 daniel 3805: cur = CUR_CHAR(l);
1.135 daniel 3806: }
1.3 veillard 3807: }
1.135 daniel 3808: buf[len] = 0;
3809: if (!IS_CHAR(cur)) {
1.55 daniel 3810: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3811: ctxt->sax->error(ctxt->userData,
1.135 daniel 3812: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 3813: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 3814: ctxt->wellFormed = 0;
1.3 veillard 3815: } else {
1.40 daniel 3816: NEXT;
1.114 daniel 3817: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
1.135 daniel 3818: ctxt->sax->comment(ctxt->userData, buf);
3819: xmlFree(buf);
1.3 veillard 3820: }
1.140 daniel 3821: ctxt->instate = state;
1.3 veillard 3822: }
3823:
1.50 daniel 3824: /**
3825: * xmlParsePITarget:
3826: * @ctxt: an XML parser context
3827: *
3828: * parse the name of a PI
1.22 daniel 3829: *
3830: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 3831: *
3832: * Returns the PITarget name or NULL
1.22 daniel 3833: */
3834:
1.123 daniel 3835: xmlChar *
1.55 daniel 3836: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 3837: xmlChar *name;
1.22 daniel 3838:
3839: name = xmlParseName(ctxt);
1.139 daniel 3840: if ((name != NULL) &&
1.22 daniel 3841: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 3842: ((name[1] == 'm') || (name[1] == 'M')) &&
3843: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 3844: int i;
1.151 daniel 3845: if ((name[0] = 'x') && (name[1] == 'm') &&
3846: (name[2] = 'l') && (name[3] == 0)) {
3847: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3848: ctxt->sax->error(ctxt->userData,
3849: "XML declaration allowed only at the start of the document\n");
3850: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3851: ctxt->wellFormed = 0;
3852: return(name);
3853: } else if (name[3] == 0) {
3854: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3855: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3856: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3857: ctxt->wellFormed = 0;
3858: return(name);
3859: }
1.139 daniel 3860: for (i = 0;;i++) {
3861: if (xmlW3CPIs[i] == NULL) break;
3862: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
3863: return(name);
3864: }
3865: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3866: ctxt->sax->warning(ctxt->userData,
1.122 daniel 3867: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 3868: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 3869: }
1.22 daniel 3870: }
3871: return(name);
3872: }
3873:
1.50 daniel 3874: /**
3875: * xmlParsePI:
3876: * @ctxt: an XML parser context
3877: *
3878: * parse an XML Processing Instruction.
1.22 daniel 3879: *
3880: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 3881: *
1.69 daniel 3882: * The processing is transfered to SAX once parsed.
1.3 veillard 3883: */
3884:
1.55 daniel 3885: void
3886: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 3887: xmlChar *buf = NULL;
3888: int len = 0;
1.140 daniel 3889: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3890: int cur, l;
1.123 daniel 3891: xmlChar *target;
1.140 daniel 3892: xmlParserInputState state;
1.22 daniel 3893:
1.152 daniel 3894: if ((RAW == '<') && (NXT(1) == '?')) {
1.140 daniel 3895: state = ctxt->instate;
3896: ctxt->instate = XML_PARSER_PI;
1.3 veillard 3897: /*
3898: * this is a Processing Instruction.
3899: */
1.40 daniel 3900: SKIP(2);
1.91 daniel 3901: SHRINK;
1.3 veillard 3902:
3903: /*
1.22 daniel 3904: * Parse the target name and check for special support like
3905: * namespace.
1.3 veillard 3906: */
1.22 daniel 3907: target = xmlParsePITarget(ctxt);
3908: if (target != NULL) {
1.156 ! daniel 3909: if ((RAW == '?') && (NXT(1) == '>')) {
! 3910: SKIP(2);
! 3911:
! 3912: /*
! 3913: * SAX: PI detected.
! 3914: */
! 3915: if ((ctxt->sax) &&
! 3916: (ctxt->sax->processingInstruction != NULL))
! 3917: ctxt->sax->processingInstruction(ctxt->userData,
! 3918: target, NULL);
! 3919: ctxt->instate = state;
! 3920: return;
! 3921: }
1.135 daniel 3922: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3923: if (buf == NULL) {
3924: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 3925: ctxt->instate = state;
1.135 daniel 3926: return;
3927: }
3928: cur = CUR;
3929: if (!IS_BLANK(cur)) {
1.114 daniel 3930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3931: ctxt->sax->error(ctxt->userData,
3932: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 3933: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 3934: ctxt->wellFormed = 0;
3935: }
3936: SKIP_BLANKS;
1.152 daniel 3937: cur = CUR_CHAR(l);
1.135 daniel 3938: while (IS_CHAR(cur) &&
3939: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 3940: if (len + 5 >= size) {
1.135 daniel 3941: size *= 2;
3942: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3943: if (buf == NULL) {
3944: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 3945: ctxt->instate = state;
1.135 daniel 3946: return;
3947: }
3948: }
1.152 daniel 3949: COPY_BUF(l,buf,len,cur);
3950: NEXTL(l);
3951: cur = CUR_CHAR(l);
1.135 daniel 3952: if (cur == 0) {
3953: SHRINK;
3954: GROW;
1.152 daniel 3955: cur = CUR_CHAR(l);
1.135 daniel 3956: }
3957: }
3958: buf[len] = 0;
1.152 daniel 3959: if (cur != '?') {
1.72 daniel 3960: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3961: ctxt->sax->error(ctxt->userData,
1.72 daniel 3962: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 3963: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 3964: ctxt->wellFormed = 0;
1.22 daniel 3965: } else {
1.72 daniel 3966: SKIP(2);
1.44 daniel 3967:
1.72 daniel 3968: /*
3969: * SAX: PI detected.
3970: */
3971: if ((ctxt->sax) &&
3972: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 3973: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 3974: target, buf);
1.22 daniel 3975: }
1.135 daniel 3976: xmlFree(buf);
1.119 daniel 3977: xmlFree(target);
1.3 veillard 3978: } else {
1.55 daniel 3979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 3980: ctxt->sax->error(ctxt->userData,
3981: "xmlParsePI : no target name\n");
1.123 daniel 3982: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 3983: ctxt->wellFormed = 0;
1.22 daniel 3984: }
1.140 daniel 3985: ctxt->instate = state;
1.22 daniel 3986: }
3987: }
3988:
1.50 daniel 3989: /**
3990: * xmlParseNotationDecl:
3991: * @ctxt: an XML parser context
3992: *
3993: * parse a notation declaration
1.22 daniel 3994: *
3995: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3996: *
3997: * Hence there is actually 3 choices:
3998: * 'PUBLIC' S PubidLiteral
3999: * 'PUBLIC' S PubidLiteral S SystemLiteral
4000: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4001: *
1.67 daniel 4002: * See the NOTE on xmlParseExternalID().
1.22 daniel 4003: */
4004:
1.55 daniel 4005: void
4006: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4007: xmlChar *name;
4008: xmlChar *Pubid;
4009: xmlChar *Systemid;
1.22 daniel 4010:
1.152 daniel 4011: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4012: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4013: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4014: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4015: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 4016: SHRINK;
1.40 daniel 4017: SKIP(10);
1.67 daniel 4018: if (!IS_BLANK(CUR)) {
4019: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4020: ctxt->sax->error(ctxt->userData,
4021: "Space required after '<!NOTATION'\n");
1.123 daniel 4022: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4023: ctxt->wellFormed = 0;
4024: return;
4025: }
4026: SKIP_BLANKS;
1.22 daniel 4027:
4028: name = xmlParseName(ctxt);
4029: if (name == NULL) {
1.55 daniel 4030: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4031: ctxt->sax->error(ctxt->userData,
4032: "NOTATION: Name expected here\n");
1.123 daniel 4033: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4034: ctxt->wellFormed = 0;
4035: return;
4036: }
4037: if (!IS_BLANK(CUR)) {
4038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4039: ctxt->sax->error(ctxt->userData,
1.67 daniel 4040: "Space required after the NOTATION name'\n");
1.123 daniel 4041: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4042: ctxt->wellFormed = 0;
1.22 daniel 4043: return;
4044: }
1.42 daniel 4045: SKIP_BLANKS;
1.67 daniel 4046:
1.22 daniel 4047: /*
1.67 daniel 4048: * Parse the IDs.
1.22 daniel 4049: */
1.67 daniel 4050: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
4051: SKIP_BLANKS;
4052:
1.152 daniel 4053: if (RAW == '>') {
1.40 daniel 4054: NEXT;
1.72 daniel 4055: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 4056: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4057: } else {
4058: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4059: ctxt->sax->error(ctxt->userData,
1.67 daniel 4060: "'>' required to close NOTATION declaration\n");
1.123 daniel 4061: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4062: ctxt->wellFormed = 0;
4063: }
1.119 daniel 4064: xmlFree(name);
4065: if (Systemid != NULL) xmlFree(Systemid);
4066: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4067: }
4068: }
4069:
1.50 daniel 4070: /**
4071: * xmlParseEntityDecl:
4072: * @ctxt: an XML parser context
4073: *
4074: * parse <!ENTITY declarations
1.22 daniel 4075: *
4076: * [70] EntityDecl ::= GEDecl | PEDecl
4077: *
4078: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4079: *
4080: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4081: *
4082: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4083: *
4084: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4085: *
4086: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4087: *
4088: * [ VC: Notation Declared ]
1.116 daniel 4089: * The Name must match the declared name of a notation.
1.22 daniel 4090: */
4091:
1.55 daniel 4092: void
4093: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4094: xmlChar *name = NULL;
4095: xmlChar *value = NULL;
4096: xmlChar *URI = NULL, *literal = NULL;
4097: xmlChar *ndata = NULL;
1.39 daniel 4098: int isParameter = 0;
1.123 daniel 4099: xmlChar *orig = NULL;
1.22 daniel 4100:
1.94 daniel 4101: GROW;
1.152 daniel 4102: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4103: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4104: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4105: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 4106: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4107: SHRINK;
1.40 daniel 4108: SKIP(8);
1.59 daniel 4109: if (!IS_BLANK(CUR)) {
4110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4111: ctxt->sax->error(ctxt->userData,
4112: "Space required after '<!ENTITY'\n");
1.123 daniel 4113: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4114: ctxt->wellFormed = 0;
4115: }
4116: SKIP_BLANKS;
1.40 daniel 4117:
1.152 daniel 4118: if (RAW == '%') {
1.40 daniel 4119: NEXT;
1.59 daniel 4120: if (!IS_BLANK(CUR)) {
4121: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4122: ctxt->sax->error(ctxt->userData,
4123: "Space required after '%'\n");
1.123 daniel 4124: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4125: ctxt->wellFormed = 0;
4126: }
1.42 daniel 4127: SKIP_BLANKS;
1.39 daniel 4128: isParameter = 1;
1.22 daniel 4129: }
4130:
4131: name = xmlParseName(ctxt);
1.24 daniel 4132: if (name == NULL) {
1.55 daniel 4133: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4134: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4135: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4136: ctxt->wellFormed = 0;
1.24 daniel 4137: return;
4138: }
1.59 daniel 4139: if (!IS_BLANK(CUR)) {
4140: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4141: ctxt->sax->error(ctxt->userData,
1.59 daniel 4142: "Space required after the entity name\n");
1.123 daniel 4143: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4144: ctxt->wellFormed = 0;
4145: }
1.42 daniel 4146: SKIP_BLANKS;
1.24 daniel 4147:
1.22 daniel 4148: /*
1.68 daniel 4149: * handle the various case of definitions...
1.22 daniel 4150: */
1.39 daniel 4151: if (isParameter) {
1.152 daniel 4152: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4153: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4154: if (value) {
1.72 daniel 4155: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4156: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4157: XML_INTERNAL_PARAMETER_ENTITY,
4158: NULL, NULL, value);
4159: }
1.24 daniel 4160: else {
1.67 daniel 4161: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 4162: if (URI) {
1.72 daniel 4163: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4164: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4165: XML_EXTERNAL_PARAMETER_ENTITY,
4166: literal, URI, NULL);
4167: }
1.24 daniel 4168: }
4169: } else {
1.152 daniel 4170: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4171: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 4172: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4173: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4174: XML_INTERNAL_GENERAL_ENTITY,
4175: NULL, NULL, value);
4176: } else {
1.67 daniel 4177: URI = xmlParseExternalID(ctxt, &literal, 1);
1.152 daniel 4178: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4179: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4180: ctxt->sax->error(ctxt->userData,
1.59 daniel 4181: "Space required before 'NDATA'\n");
1.123 daniel 4182: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4183: ctxt->wellFormed = 0;
4184: }
1.42 daniel 4185: SKIP_BLANKS;
1.152 daniel 4186: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4187: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4188: (NXT(4) == 'A')) {
4189: SKIP(5);
1.59 daniel 4190: if (!IS_BLANK(CUR)) {
4191: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4192: ctxt->sax->error(ctxt->userData,
1.59 daniel 4193: "Space required after 'NDATA'\n");
1.123 daniel 4194: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4195: ctxt->wellFormed = 0;
4196: }
1.42 daniel 4197: SKIP_BLANKS;
1.24 daniel 4198: ndata = xmlParseName(ctxt);
1.116 daniel 4199: if ((ctxt->sax != NULL) &&
4200: (ctxt->sax->unparsedEntityDecl != NULL))
4201: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4202: literal, URI, ndata);
4203: } else {
1.72 daniel 4204: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4205: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4206: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4207: literal, URI, NULL);
1.24 daniel 4208: }
4209: }
4210: }
1.42 daniel 4211: SKIP_BLANKS;
1.152 daniel 4212: if (RAW != '>') {
1.55 daniel 4213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4214: ctxt->sax->error(ctxt->userData,
1.31 daniel 4215: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4216: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4217: ctxt->wellFormed = 0;
1.24 daniel 4218: } else
1.40 daniel 4219: NEXT;
1.78 daniel 4220: if (orig != NULL) {
4221: /*
1.98 daniel 4222: * Ugly mechanism to save the raw entity value.
1.78 daniel 4223: */
4224: xmlEntityPtr cur = NULL;
4225:
1.98 daniel 4226: if (isParameter) {
4227: if ((ctxt->sax != NULL) &&
4228: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4229: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4230: } else {
4231: if ((ctxt->sax != NULL) &&
4232: (ctxt->sax->getEntity != NULL))
1.120 daniel 4233: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4234: }
4235: if (cur != NULL) {
4236: if (cur->orig != NULL)
1.119 daniel 4237: xmlFree(orig);
1.98 daniel 4238: else
4239: cur->orig = orig;
4240: } else
1.119 daniel 4241: xmlFree(orig);
1.78 daniel 4242: }
1.119 daniel 4243: if (name != NULL) xmlFree(name);
4244: if (value != NULL) xmlFree(value);
4245: if (URI != NULL) xmlFree(URI);
4246: if (literal != NULL) xmlFree(literal);
4247: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4248: }
4249: }
4250:
1.50 daniel 4251: /**
1.59 daniel 4252: * xmlParseDefaultDecl:
4253: * @ctxt: an XML parser context
4254: * @value: Receive a possible fixed default value for the attribute
4255: *
4256: * Parse an attribute default declaration
4257: *
4258: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4259: *
1.99 daniel 4260: * [ VC: Required Attribute ]
1.117 daniel 4261: * if the default declaration is the keyword #REQUIRED, then the
4262: * attribute must be specified for all elements of the type in the
4263: * attribute-list declaration.
1.99 daniel 4264: *
4265: * [ VC: Attribute Default Legal ]
1.102 daniel 4266: * The declared default value must meet the lexical constraints of
4267: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4268: *
4269: * [ VC: Fixed Attribute Default ]
1.117 daniel 4270: * if an attribute has a default value declared with the #FIXED
4271: * keyword, instances of that attribute must match the default value.
1.99 daniel 4272: *
4273: * [ WFC: No < in Attribute Values ]
4274: * handled in xmlParseAttValue()
4275: *
1.59 daniel 4276: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4277: * or XML_ATTRIBUTE_FIXED.
4278: */
4279:
4280: int
1.123 daniel 4281: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4282: int val;
1.123 daniel 4283: xmlChar *ret;
1.59 daniel 4284:
4285: *value = NULL;
1.152 daniel 4286: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4287: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4288: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4289: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4290: (NXT(8) == 'D')) {
4291: SKIP(9);
4292: return(XML_ATTRIBUTE_REQUIRED);
4293: }
1.152 daniel 4294: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4295: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4296: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4297: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4298: SKIP(8);
4299: return(XML_ATTRIBUTE_IMPLIED);
4300: }
4301: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4302: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4303: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4304: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4305: SKIP(6);
4306: val = XML_ATTRIBUTE_FIXED;
4307: if (!IS_BLANK(CUR)) {
4308: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4309: ctxt->sax->error(ctxt->userData,
4310: "Space required after '#FIXED'\n");
1.123 daniel 4311: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4312: ctxt->wellFormed = 0;
4313: }
4314: SKIP_BLANKS;
4315: }
4316: ret = xmlParseAttValue(ctxt);
1.96 daniel 4317: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4318: if (ret == NULL) {
4319: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4320: ctxt->sax->error(ctxt->userData,
1.59 daniel 4321: "Attribute default value declaration error\n");
4322: ctxt->wellFormed = 0;
4323: } else
4324: *value = ret;
4325: return(val);
4326: }
4327:
4328: /**
1.66 daniel 4329: * xmlParseNotationType:
4330: * @ctxt: an XML parser context
4331: *
4332: * parse an Notation attribute type.
4333: *
1.99 daniel 4334: * Note: the leading 'NOTATION' S part has already being parsed...
4335: *
1.66 daniel 4336: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4337: *
1.99 daniel 4338: * [ VC: Notation Attributes ]
1.117 daniel 4339: * Values of this type must match one of the notation names included
1.99 daniel 4340: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4341: *
4342: * Returns: the notation attribute tree built while parsing
4343: */
4344:
4345: xmlEnumerationPtr
4346: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4347: xmlChar *name;
1.66 daniel 4348: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4349:
1.152 daniel 4350: if (RAW != '(') {
1.66 daniel 4351: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4352: ctxt->sax->error(ctxt->userData,
4353: "'(' required to start 'NOTATION'\n");
1.123 daniel 4354: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4355: ctxt->wellFormed = 0;
4356: return(NULL);
4357: }
1.91 daniel 4358: SHRINK;
1.66 daniel 4359: do {
4360: NEXT;
4361: SKIP_BLANKS;
4362: name = xmlParseName(ctxt);
4363: if (name == NULL) {
4364: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4365: ctxt->sax->error(ctxt->userData,
1.66 daniel 4366: "Name expected in NOTATION declaration\n");
1.123 daniel 4367: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4368: ctxt->wellFormed = 0;
4369: return(ret);
4370: }
4371: cur = xmlCreateEnumeration(name);
1.119 daniel 4372: xmlFree(name);
1.66 daniel 4373: if (cur == NULL) return(ret);
4374: if (last == NULL) ret = last = cur;
4375: else {
4376: last->next = cur;
4377: last = cur;
4378: }
4379: SKIP_BLANKS;
1.152 daniel 4380: } while (RAW == '|');
4381: if (RAW != ')') {
1.66 daniel 4382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4383: ctxt->sax->error(ctxt->userData,
1.66 daniel 4384: "')' required to finish NOTATION declaration\n");
1.123 daniel 4385: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 4386: ctxt->wellFormed = 0;
4387: return(ret);
4388: }
4389: NEXT;
4390: return(ret);
4391: }
4392:
4393: /**
4394: * xmlParseEnumerationType:
4395: * @ctxt: an XML parser context
4396: *
4397: * parse an Enumeration attribute type.
4398: *
4399: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4400: *
1.99 daniel 4401: * [ VC: Enumeration ]
1.117 daniel 4402: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 4403: * the declaration
4404: *
1.66 daniel 4405: * Returns: the enumeration attribute tree built while parsing
4406: */
4407:
4408: xmlEnumerationPtr
4409: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4410: xmlChar *name;
1.66 daniel 4411: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4412:
1.152 daniel 4413: if (RAW != '(') {
1.66 daniel 4414: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4415: ctxt->sax->error(ctxt->userData,
1.66 daniel 4416: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 4417: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 4418: ctxt->wellFormed = 0;
4419: return(NULL);
4420: }
1.91 daniel 4421: SHRINK;
1.66 daniel 4422: do {
4423: NEXT;
4424: SKIP_BLANKS;
4425: name = xmlParseNmtoken(ctxt);
4426: if (name == NULL) {
4427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4428: ctxt->sax->error(ctxt->userData,
1.66 daniel 4429: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 4430: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 4431: ctxt->wellFormed = 0;
4432: return(ret);
4433: }
4434: cur = xmlCreateEnumeration(name);
1.119 daniel 4435: xmlFree(name);
1.66 daniel 4436: if (cur == NULL) return(ret);
4437: if (last == NULL) ret = last = cur;
4438: else {
4439: last->next = cur;
4440: last = cur;
4441: }
4442: SKIP_BLANKS;
1.152 daniel 4443: } while (RAW == '|');
4444: if (RAW != ')') {
1.66 daniel 4445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4446: ctxt->sax->error(ctxt->userData,
1.66 daniel 4447: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 4448: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 4449: ctxt->wellFormed = 0;
4450: return(ret);
4451: }
4452: NEXT;
4453: return(ret);
4454: }
4455:
4456: /**
1.50 daniel 4457: * xmlParseEnumeratedType:
4458: * @ctxt: an XML parser context
1.66 daniel 4459: * @tree: the enumeration tree built while parsing
1.50 daniel 4460: *
1.66 daniel 4461: * parse an Enumerated attribute type.
1.22 daniel 4462: *
4463: * [57] EnumeratedType ::= NotationType | Enumeration
4464: *
4465: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4466: *
1.50 daniel 4467: *
1.66 daniel 4468: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 4469: */
4470:
1.66 daniel 4471: int
4472: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 4473: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 4474: (NXT(2) == 'T') && (NXT(3) == 'A') &&
4475: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4476: (NXT(6) == 'O') && (NXT(7) == 'N')) {
4477: SKIP(8);
4478: if (!IS_BLANK(CUR)) {
4479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4480: ctxt->sax->error(ctxt->userData,
4481: "Space required after 'NOTATION'\n");
1.123 daniel 4482: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 4483: ctxt->wellFormed = 0;
4484: return(0);
4485: }
4486: SKIP_BLANKS;
4487: *tree = xmlParseNotationType(ctxt);
4488: if (*tree == NULL) return(0);
4489: return(XML_ATTRIBUTE_NOTATION);
4490: }
4491: *tree = xmlParseEnumerationType(ctxt);
4492: if (*tree == NULL) return(0);
4493: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 4494: }
4495:
1.50 daniel 4496: /**
4497: * xmlParseAttributeType:
4498: * @ctxt: an XML parser context
1.66 daniel 4499: * @tree: the enumeration tree built while parsing
1.50 daniel 4500: *
1.59 daniel 4501: * parse the Attribute list def for an element
1.22 daniel 4502: *
4503: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4504: *
4505: * [55] StringType ::= 'CDATA'
4506: *
4507: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4508: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 4509: *
1.102 daniel 4510: * Validity constraints for attribute values syntax are checked in
4511: * xmlValidateAttributeValue()
4512: *
1.99 daniel 4513: * [ VC: ID ]
1.117 daniel 4514: * Values of type ID must match the Name production. A name must not
1.99 daniel 4515: * appear more than once in an XML document as a value of this type;
4516: * i.e., ID values must uniquely identify the elements which bear them.
4517: *
4518: * [ VC: One ID per Element Type ]
1.117 daniel 4519: * No element type may have more than one ID attribute specified.
1.99 daniel 4520: *
4521: * [ VC: ID Attribute Default ]
1.117 daniel 4522: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 4523: *
4524: * [ VC: IDREF ]
1.102 daniel 4525: * Values of type IDREF must match the Name production, and values
1.140 daniel 4526: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 4527: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 4528: * values must match the value of some ID attribute.
4529: *
4530: * [ VC: Entity Name ]
1.102 daniel 4531: * Values of type ENTITY must match the Name production, values
1.140 daniel 4532: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 4533: * name of an unparsed entity declared in the DTD.
1.99 daniel 4534: *
4535: * [ VC: Name Token ]
1.102 daniel 4536: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 4537: * of type NMTOKENS must match Nmtokens.
4538: *
1.69 daniel 4539: * Returns the attribute type
1.22 daniel 4540: */
1.59 daniel 4541: int
1.66 daniel 4542: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 4543: SHRINK;
1.152 daniel 4544: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 4545: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4546: (NXT(4) == 'A')) {
4547: SKIP(5);
1.66 daniel 4548: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 4549: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 4550: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 4551: (NXT(4) == 'F') && (NXT(5) == 'S')) {
4552: SKIP(6);
4553: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 4554: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 4555: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 4556: (NXT(4) == 'F')) {
4557: SKIP(5);
1.59 daniel 4558: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 4559: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 4560: SKIP(2);
4561: return(XML_ATTRIBUTE_ID);
1.152 daniel 4562: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4563: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4564: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4565: SKIP(6);
1.59 daniel 4566: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 4567: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4568: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4569: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4570: (NXT(6) == 'E') && (NXT(7) == 'S')) {
4571: SKIP(8);
1.59 daniel 4572: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 4573: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 4574: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4575: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 4576: (NXT(6) == 'N') && (NXT(7) == 'S')) {
4577: SKIP(8);
4578: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 4579: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 4580: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4581: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 4582: (NXT(6) == 'N')) {
4583: SKIP(7);
1.59 daniel 4584: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 4585: }
1.66 daniel 4586: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 4587: }
4588:
1.50 daniel 4589: /**
4590: * xmlParseAttributeListDecl:
4591: * @ctxt: an XML parser context
4592: *
4593: * : parse the Attribute list def for an element
1.22 daniel 4594: *
4595: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4596: *
4597: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 4598: *
1.22 daniel 4599: */
1.55 daniel 4600: void
4601: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4602: xmlChar *elemName;
4603: xmlChar *attrName;
1.103 daniel 4604: xmlEnumerationPtr tree;
1.22 daniel 4605:
1.152 daniel 4606: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4607: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4608: (NXT(4) == 'T') && (NXT(5) == 'L') &&
4609: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 4610: (NXT(8) == 'T')) {
1.40 daniel 4611: SKIP(9);
1.59 daniel 4612: if (!IS_BLANK(CUR)) {
4613: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4614: ctxt->sax->error(ctxt->userData,
4615: "Space required after '<!ATTLIST'\n");
1.123 daniel 4616: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4617: ctxt->wellFormed = 0;
4618: }
1.42 daniel 4619: SKIP_BLANKS;
1.59 daniel 4620: elemName = xmlParseName(ctxt);
4621: if (elemName == NULL) {
1.55 daniel 4622: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4623: ctxt->sax->error(ctxt->userData,
4624: "ATTLIST: no name for Element\n");
1.123 daniel 4625: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4626: ctxt->wellFormed = 0;
1.22 daniel 4627: return;
4628: }
1.42 daniel 4629: SKIP_BLANKS;
1.152 daniel 4630: while (RAW != '>') {
1.123 daniel 4631: const xmlChar *check = CUR_PTR;
1.59 daniel 4632: int type;
4633: int def;
1.123 daniel 4634: xmlChar *defaultValue = NULL;
1.59 daniel 4635:
1.103 daniel 4636: tree = NULL;
1.59 daniel 4637: attrName = xmlParseName(ctxt);
4638: if (attrName == NULL) {
4639: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4640: ctxt->sax->error(ctxt->userData,
4641: "ATTLIST: no name for Attribute\n");
1.123 daniel 4642: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4643: ctxt->wellFormed = 0;
4644: break;
4645: }
1.97 daniel 4646: GROW;
1.59 daniel 4647: if (!IS_BLANK(CUR)) {
4648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4649: ctxt->sax->error(ctxt->userData,
1.59 daniel 4650: "Space required after the attribute name\n");
1.123 daniel 4651: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4652: ctxt->wellFormed = 0;
4653: break;
4654: }
4655: SKIP_BLANKS;
4656:
1.66 daniel 4657: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 4658: if (type <= 0) break;
1.22 daniel 4659:
1.97 daniel 4660: GROW;
1.59 daniel 4661: if (!IS_BLANK(CUR)) {
4662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4663: ctxt->sax->error(ctxt->userData,
1.59 daniel 4664: "Space required after the attribute type\n");
1.123 daniel 4665: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4666: ctxt->wellFormed = 0;
4667: break;
4668: }
1.42 daniel 4669: SKIP_BLANKS;
1.59 daniel 4670:
4671: def = xmlParseDefaultDecl(ctxt, &defaultValue);
4672: if (def <= 0) break;
4673:
1.97 daniel 4674: GROW;
1.152 daniel 4675: if (RAW != '>') {
1.59 daniel 4676: if (!IS_BLANK(CUR)) {
4677: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4678: ctxt->sax->error(ctxt->userData,
1.59 daniel 4679: "Space required after the attribute default value\n");
1.123 daniel 4680: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4681: ctxt->wellFormed = 0;
4682: break;
4683: }
4684: SKIP_BLANKS;
4685: }
1.40 daniel 4686: if (check == CUR_PTR) {
1.55 daniel 4687: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4688: ctxt->sax->error(ctxt->userData,
1.59 daniel 4689: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 4690: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 4691: break;
4692: }
1.72 daniel 4693: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 4694: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 4695: type, def, defaultValue, tree);
1.59 daniel 4696: if (attrName != NULL)
1.119 daniel 4697: xmlFree(attrName);
1.59 daniel 4698: if (defaultValue != NULL)
1.119 daniel 4699: xmlFree(defaultValue);
1.97 daniel 4700: GROW;
1.22 daniel 4701: }
1.152 daniel 4702: if (RAW == '>')
1.40 daniel 4703: NEXT;
1.22 daniel 4704:
1.119 daniel 4705: xmlFree(elemName);
1.22 daniel 4706: }
4707: }
4708:
1.50 daniel 4709: /**
1.61 daniel 4710: * xmlParseElementMixedContentDecl:
4711: * @ctxt: an XML parser context
4712: *
4713: * parse the declaration for a Mixed Element content
4714: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4715: *
4716: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4717: * '(' S? '#PCDATA' S? ')'
4718: *
1.99 daniel 4719: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4720: *
4721: * [ VC: No Duplicate Types ]
1.117 daniel 4722: * The same name must not appear more than once in a single
4723: * mixed-content declaration.
1.99 daniel 4724: *
1.61 daniel 4725: * returns: the list of the xmlElementContentPtr describing the element choices
4726: */
4727: xmlElementContentPtr
1.62 daniel 4728: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 4729: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 4730: xmlChar *elem = NULL;
1.61 daniel 4731:
1.97 daniel 4732: GROW;
1.152 daniel 4733: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 4734: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4735: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4736: (NXT(6) == 'A')) {
4737: SKIP(7);
4738: SKIP_BLANKS;
1.91 daniel 4739: SHRINK;
1.152 daniel 4740: if (RAW == ')') {
1.63 daniel 4741: NEXT;
4742: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 4743: if (RAW == '*') {
1.136 daniel 4744: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4745: NEXT;
4746: }
1.63 daniel 4747: return(ret);
4748: }
1.152 daniel 4749: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 4750: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4751: if (ret == NULL) return(NULL);
1.99 daniel 4752: }
1.152 daniel 4753: while (RAW == '|') {
1.64 daniel 4754: NEXT;
1.61 daniel 4755: if (elem == NULL) {
4756: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4757: if (ret == NULL) return(NULL);
4758: ret->c1 = cur;
1.64 daniel 4759: cur = ret;
1.61 daniel 4760: } else {
1.64 daniel 4761: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4762: if (n == NULL) return(NULL);
4763: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4764: cur->c2 = n;
4765: cur = n;
1.119 daniel 4766: xmlFree(elem);
1.61 daniel 4767: }
4768: SKIP_BLANKS;
4769: elem = xmlParseName(ctxt);
4770: if (elem == NULL) {
4771: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4772: ctxt->sax->error(ctxt->userData,
1.61 daniel 4773: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 4774: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 4775: ctxt->wellFormed = 0;
4776: xmlFreeElementContent(cur);
4777: return(NULL);
4778: }
4779: SKIP_BLANKS;
1.97 daniel 4780: GROW;
1.61 daniel 4781: }
1.152 daniel 4782: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 4783: if (elem != NULL) {
1.61 daniel 4784: cur->c2 = xmlNewElementContent(elem,
4785: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4786: xmlFree(elem);
1.66 daniel 4787: }
1.65 daniel 4788: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 4789: SKIP(2);
1.61 daniel 4790: } else {
1.119 daniel 4791: if (elem != NULL) xmlFree(elem);
1.61 daniel 4792: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4793: ctxt->sax->error(ctxt->userData,
1.63 daniel 4794: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 4795: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 4796: ctxt->wellFormed = 0;
4797: xmlFreeElementContent(ret);
4798: return(NULL);
4799: }
4800:
4801: } else {
4802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4803: ctxt->sax->error(ctxt->userData,
1.61 daniel 4804: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 4805: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 4806: ctxt->wellFormed = 0;
4807: }
4808: return(ret);
4809: }
4810:
4811: /**
4812: * xmlParseElementChildrenContentDecl:
1.50 daniel 4813: * @ctxt: an XML parser context
4814: *
1.61 daniel 4815: * parse the declaration for a Mixed Element content
4816: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 4817: *
1.61 daniel 4818: *
1.22 daniel 4819: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4820: *
4821: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4822: *
4823: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4824: *
4825: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4826: *
1.99 daniel 4827: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4828: * TODO Parameter-entity replacement text must be properly nested
4829: * with parenthetized groups. That is to say, if either of the
4830: * opening or closing parentheses in a choice, seq, or Mixed
4831: * construct is contained in the replacement text for a parameter
4832: * entity, both must be contained in the same replacement text. For
4833: * interoperability, if a parameter-entity reference appears in a
4834: * choice, seq, or Mixed construct, its replacement text should not
4835: * be empty, and neither the first nor last non-blank character of
4836: * the replacement text should be a connector (| or ,).
4837: *
1.62 daniel 4838: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 4839: * hierarchy.
4840: */
4841: xmlElementContentPtr
1.62 daniel 4842: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 4843: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 4844: xmlChar *elem;
4845: xmlChar type = 0;
1.62 daniel 4846:
4847: SKIP_BLANKS;
1.94 daniel 4848: GROW;
1.152 daniel 4849: if (RAW == '(') {
1.63 daniel 4850: /* Recurse on first child */
1.62 daniel 4851: NEXT;
4852: SKIP_BLANKS;
4853: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4854: SKIP_BLANKS;
1.101 daniel 4855: GROW;
1.62 daniel 4856: } else {
4857: elem = xmlParseName(ctxt);
4858: if (elem == NULL) {
4859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4860: ctxt->sax->error(ctxt->userData,
1.62 daniel 4861: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 4862: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 4863: ctxt->wellFormed = 0;
4864: return(NULL);
4865: }
4866: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 4867: GROW;
1.152 daniel 4868: if (RAW == '?') {
1.104 daniel 4869: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 4870: NEXT;
1.152 daniel 4871: } else if (RAW == '*') {
1.104 daniel 4872: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 4873: NEXT;
1.152 daniel 4874: } else if (RAW == '+') {
1.104 daniel 4875: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 4876: NEXT;
4877: } else {
1.104 daniel 4878: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 4879: }
1.119 daniel 4880: xmlFree(elem);
1.101 daniel 4881: GROW;
1.62 daniel 4882: }
4883: SKIP_BLANKS;
1.91 daniel 4884: SHRINK;
1.152 daniel 4885: while (RAW != ')') {
1.63 daniel 4886: /*
4887: * Each loop we parse one separator and one element.
4888: */
1.152 daniel 4889: if (RAW == ',') {
1.62 daniel 4890: if (type == 0) type = CUR;
4891:
4892: /*
4893: * Detect "Name | Name , Name" error
4894: */
4895: else if (type != CUR) {
4896: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4897: ctxt->sax->error(ctxt->userData,
1.62 daniel 4898: "xmlParseElementChildrenContentDecl : '%c' expected\n",
4899: type);
1.123 daniel 4900: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 4901: ctxt->wellFormed = 0;
4902: xmlFreeElementContent(ret);
4903: return(NULL);
4904: }
1.64 daniel 4905: NEXT;
1.62 daniel 4906:
1.63 daniel 4907: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4908: if (op == NULL) {
4909: xmlFreeElementContent(ret);
4910: return(NULL);
4911: }
4912: if (last == NULL) {
4913: op->c1 = ret;
1.65 daniel 4914: ret = cur = op;
1.63 daniel 4915: } else {
4916: cur->c2 = op;
4917: op->c1 = last;
4918: cur =op;
1.65 daniel 4919: last = NULL;
1.63 daniel 4920: }
1.152 daniel 4921: } else if (RAW == '|') {
1.62 daniel 4922: if (type == 0) type = CUR;
4923:
4924: /*
1.63 daniel 4925: * Detect "Name , Name | Name" error
1.62 daniel 4926: */
4927: else if (type != CUR) {
4928: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4929: ctxt->sax->error(ctxt->userData,
1.62 daniel 4930: "xmlParseElementChildrenContentDecl : '%c' expected\n",
4931: type);
1.123 daniel 4932: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 4933: ctxt->wellFormed = 0;
4934: xmlFreeElementContent(ret);
4935: return(NULL);
4936: }
1.64 daniel 4937: NEXT;
1.62 daniel 4938:
1.63 daniel 4939: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4940: if (op == NULL) {
4941: xmlFreeElementContent(ret);
4942: return(NULL);
4943: }
4944: if (last == NULL) {
4945: op->c1 = ret;
1.65 daniel 4946: ret = cur = op;
1.63 daniel 4947: } else {
4948: cur->c2 = op;
4949: op->c1 = last;
4950: cur =op;
1.65 daniel 4951: last = NULL;
1.63 daniel 4952: }
1.62 daniel 4953: } else {
4954: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4955: ctxt->sax->error(ctxt->userData,
1.62 daniel 4956: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4957: ctxt->wellFormed = 0;
1.123 daniel 4958: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 4959: xmlFreeElementContent(ret);
4960: return(NULL);
4961: }
1.101 daniel 4962: GROW;
1.62 daniel 4963: SKIP_BLANKS;
1.101 daniel 4964: GROW;
1.152 daniel 4965: if (RAW == '(') {
1.63 daniel 4966: /* Recurse on second child */
1.62 daniel 4967: NEXT;
4968: SKIP_BLANKS;
1.65 daniel 4969: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 4970: SKIP_BLANKS;
4971: } else {
4972: elem = xmlParseName(ctxt);
4973: if (elem == NULL) {
4974: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4975: ctxt->sax->error(ctxt->userData,
1.122 daniel 4976: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 4977: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 4978: ctxt->wellFormed = 0;
4979: return(NULL);
4980: }
1.65 daniel 4981: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4982: xmlFree(elem);
1.152 daniel 4983: if (RAW == '?') {
1.105 daniel 4984: last->ocur = XML_ELEMENT_CONTENT_OPT;
4985: NEXT;
1.152 daniel 4986: } else if (RAW == '*') {
1.105 daniel 4987: last->ocur = XML_ELEMENT_CONTENT_MULT;
4988: NEXT;
1.152 daniel 4989: } else if (RAW == '+') {
1.105 daniel 4990: last->ocur = XML_ELEMENT_CONTENT_PLUS;
4991: NEXT;
4992: } else {
4993: last->ocur = XML_ELEMENT_CONTENT_ONCE;
4994: }
1.63 daniel 4995: }
4996: SKIP_BLANKS;
1.97 daniel 4997: GROW;
1.64 daniel 4998: }
1.65 daniel 4999: if ((cur != NULL) && (last != NULL)) {
5000: cur->c2 = last;
1.62 daniel 5001: }
5002: NEXT;
1.152 daniel 5003: if (RAW == '?') {
1.62 daniel 5004: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5005: NEXT;
1.152 daniel 5006: } else if (RAW == '*') {
1.62 daniel 5007: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5008: NEXT;
1.152 daniel 5009: } else if (RAW == '+') {
1.62 daniel 5010: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5011: NEXT;
5012: }
5013: return(ret);
1.61 daniel 5014: }
5015:
5016: /**
5017: * xmlParseElementContentDecl:
5018: * @ctxt: an XML parser context
5019: * @name: the name of the element being defined.
5020: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5021: *
1.61 daniel 5022: * parse the declaration for an Element content either Mixed or Children,
5023: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5024: *
5025: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5026: *
1.61 daniel 5027: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5028: */
5029:
1.61 daniel 5030: int
1.123 daniel 5031: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5032: xmlElementContentPtr *result) {
5033:
5034: xmlElementContentPtr tree = NULL;
5035: int res;
5036:
5037: *result = NULL;
5038:
1.152 daniel 5039: if (RAW != '(') {
1.61 daniel 5040: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5041: ctxt->sax->error(ctxt->userData,
1.61 daniel 5042: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5043: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5044: ctxt->wellFormed = 0;
5045: return(-1);
5046: }
5047: NEXT;
1.97 daniel 5048: GROW;
1.61 daniel 5049: SKIP_BLANKS;
1.152 daniel 5050: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5051: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5052: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5053: (NXT(6) == 'A')) {
1.62 daniel 5054: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5055: res = XML_ELEMENT_TYPE_MIXED;
5056: } else {
1.62 daniel 5057: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5058: res = XML_ELEMENT_TYPE_ELEMENT;
5059: }
5060: SKIP_BLANKS;
1.63 daniel 5061: /****************************
1.152 daniel 5062: if (RAW != ')') {
1.61 daniel 5063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5064: ctxt->sax->error(ctxt->userData,
1.61 daniel 5065: "xmlParseElementContentDecl : ')' expected\n");
5066: ctxt->wellFormed = 0;
5067: return(-1);
5068: }
1.63 daniel 5069: ****************************/
5070: *result = tree;
1.61 daniel 5071: return(res);
1.22 daniel 5072: }
5073:
1.50 daniel 5074: /**
5075: * xmlParseElementDecl:
5076: * @ctxt: an XML parser context
5077: *
5078: * parse an Element declaration.
1.22 daniel 5079: *
5080: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5081: *
1.99 daniel 5082: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5083: * No element type may be declared more than once
1.69 daniel 5084: *
5085: * Returns the type of the element, or -1 in case of error
1.22 daniel 5086: */
1.59 daniel 5087: int
1.55 daniel 5088: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5089: xmlChar *name;
1.59 daniel 5090: int ret = -1;
1.61 daniel 5091: xmlElementContentPtr content = NULL;
1.22 daniel 5092:
1.97 daniel 5093: GROW;
1.152 daniel 5094: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5095: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5096: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5097: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5098: (NXT(8) == 'T')) {
1.40 daniel 5099: SKIP(9);
1.59 daniel 5100: if (!IS_BLANK(CUR)) {
5101: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5102: ctxt->sax->error(ctxt->userData,
1.59 daniel 5103: "Space required after 'ELEMENT'\n");
1.123 daniel 5104: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5105: ctxt->wellFormed = 0;
5106: }
1.42 daniel 5107: SKIP_BLANKS;
1.22 daniel 5108: name = xmlParseName(ctxt);
5109: if (name == NULL) {
1.55 daniel 5110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5111: ctxt->sax->error(ctxt->userData,
1.59 daniel 5112: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5113: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5114: ctxt->wellFormed = 0;
5115: return(-1);
5116: }
5117: if (!IS_BLANK(CUR)) {
5118: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5119: ctxt->sax->error(ctxt->userData,
1.59 daniel 5120: "Space required after the element name\n");
1.123 daniel 5121: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5122: ctxt->wellFormed = 0;
1.22 daniel 5123: }
1.42 daniel 5124: SKIP_BLANKS;
1.152 daniel 5125: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5126: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5127: (NXT(4) == 'Y')) {
5128: SKIP(5);
1.22 daniel 5129: /*
5130: * Element must always be empty.
5131: */
1.59 daniel 5132: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5133: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5134: (NXT(2) == 'Y')) {
5135: SKIP(3);
1.22 daniel 5136: /*
5137: * Element is a generic container.
5138: */
1.59 daniel 5139: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5140: } else if (RAW == '(') {
1.61 daniel 5141: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5142: } else {
1.98 daniel 5143: /*
5144: * [ WFC: PEs in Internal Subset ] error handling.
5145: */
1.152 daniel 5146: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5147: (ctxt->inputNr == 1)) {
5148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5149: ctxt->sax->error(ctxt->userData,
5150: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5151: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5152: } else {
5153: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5154: ctxt->sax->error(ctxt->userData,
5155: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5156: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5157: }
1.61 daniel 5158: ctxt->wellFormed = 0;
1.119 daniel 5159: if (name != NULL) xmlFree(name);
1.61 daniel 5160: return(-1);
1.22 daniel 5161: }
1.142 daniel 5162:
5163: SKIP_BLANKS;
5164: /*
5165: * Pop-up of finished entities.
5166: */
1.152 daniel 5167: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5168: xmlPopInput(ctxt);
1.42 daniel 5169: SKIP_BLANKS;
1.142 daniel 5170:
1.152 daniel 5171: if (RAW != '>') {
1.55 daniel 5172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5173: ctxt->sax->error(ctxt->userData,
1.31 daniel 5174: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5175: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5176: ctxt->wellFormed = 0;
1.61 daniel 5177: } else {
1.40 daniel 5178: NEXT;
1.72 daniel 5179: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 5180: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5181: content);
1.61 daniel 5182: }
1.84 daniel 5183: if (content != NULL) {
5184: xmlFreeElementContent(content);
5185: }
1.61 daniel 5186: if (name != NULL) {
1.119 daniel 5187: xmlFree(name);
1.61 daniel 5188: }
1.22 daniel 5189: }
1.59 daniel 5190: return(ret);
1.22 daniel 5191: }
5192:
1.50 daniel 5193: /**
5194: * xmlParseMarkupDecl:
5195: * @ctxt: an XML parser context
5196: *
5197: * parse Markup declarations
1.22 daniel 5198: *
5199: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5200: * NotationDecl | PI | Comment
5201: *
1.98 daniel 5202: * [ VC: Proper Declaration/PE Nesting ]
5203: * TODO Parameter-entity replacement text must be properly nested with
5204: * markup declarations. That is to say, if either the first character
5205: * or the last character of a markup declaration (markupdecl above) is
5206: * contained in the replacement text for a parameter-entity reference,
5207: * both must be contained in the same replacement text.
5208: *
5209: * [ WFC: PEs in Internal Subset ]
5210: * In the internal DTD subset, parameter-entity references can occur
5211: * only where markup declarations can occur, not within markup declarations.
5212: * (This does not apply to references that occur in external parameter
5213: * entities or to the external subset.)
1.22 daniel 5214: */
1.55 daniel 5215: void
5216: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5217: GROW;
1.22 daniel 5218: xmlParseElementDecl(ctxt);
5219: xmlParseAttributeListDecl(ctxt);
5220: xmlParseEntityDecl(ctxt);
5221: xmlParseNotationDecl(ctxt);
5222: xmlParsePI(ctxt);
1.114 daniel 5223: xmlParseComment(ctxt);
1.98 daniel 5224: /*
5225: * This is only for internal subset. On external entities,
5226: * the replacement is done before parsing stage
5227: */
5228: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5229: xmlParsePEReference(ctxt);
1.97 daniel 5230: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5231: }
5232:
1.50 daniel 5233: /**
1.76 daniel 5234: * xmlParseTextDecl:
5235: * @ctxt: an XML parser context
5236: *
5237: * parse an XML declaration header for external entities
5238: *
5239: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5240: *
5241: * Returns the only valuable info for an external parsed entity, the encoding
5242: */
5243:
1.123 daniel 5244: xmlChar *
1.76 daniel 5245: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5246: xmlChar *version;
5247: xmlChar *encoding = NULL;
1.76 daniel 5248:
5249: /*
5250: * We know that '<?xml' is here.
5251: */
5252: SKIP(5);
5253:
5254: if (!IS_BLANK(CUR)) {
5255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5256: ctxt->sax->error(ctxt->userData,
5257: "Space needed after '<?xml'\n");
1.123 daniel 5258: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5259: ctxt->wellFormed = 0;
5260: }
5261: SKIP_BLANKS;
5262:
5263: /*
5264: * We may have the VersionInfo here.
5265: */
5266: version = xmlParseVersionInfo(ctxt);
5267: if (version == NULL)
5268: version = xmlCharStrdup(XML_DEFAULT_VERSION);
5269: ctxt->version = xmlStrdup(version);
1.119 daniel 5270: xmlFree(version);
1.76 daniel 5271:
5272: /*
5273: * We must have the encoding declaration
5274: */
5275: if (!IS_BLANK(CUR)) {
5276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5277: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 5278: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5279: ctxt->wellFormed = 0;
5280: }
5281: encoding = xmlParseEncodingDecl(ctxt);
5282:
5283: SKIP_BLANKS;
1.152 daniel 5284: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 5285: SKIP(2);
1.152 daniel 5286: } else if (RAW == '>') {
1.76 daniel 5287: /* Deprecated old WD ... */
5288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5289: ctxt->sax->error(ctxt->userData,
5290: "XML declaration must end-up with '?>'\n");
1.123 daniel 5291: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5292: ctxt->wellFormed = 0;
5293: NEXT;
5294: } else {
5295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5296: ctxt->sax->error(ctxt->userData,
5297: "parsing XML declaration: '?>' expected\n");
1.123 daniel 5298: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5299: ctxt->wellFormed = 0;
5300: MOVETO_ENDTAG(CUR_PTR);
5301: NEXT;
5302: }
5303: return(encoding);
5304: }
5305:
5306: /*
5307: * xmlParseConditionalSections
5308: * @ctxt: an XML parser context
5309: *
5310: * TODO : Conditionnal section are not yet supported !
5311: *
5312: * [61] conditionalSect ::= includeSect | ignoreSect
5313: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5314: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5315: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5316: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5317: */
5318:
5319: void
5320: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5321: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5322: ctxt->sax->warning(ctxt->userData,
5323: "XML conditional section not supported\n");
5324: /*
5325: * Skip up to the end of the conditionnal section.
5326: */
1.152 daniel 5327: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || (NXT(2) != '>'))) {
1.76 daniel 5328: NEXT;
1.143 daniel 5329: /*
5330: * Pop-up of finished entities.
5331: */
1.152 daniel 5332: while ((RAW == 0) && (ctxt->inputNr > 1))
1.143 daniel 5333: xmlPopInput(ctxt);
5334:
1.152 daniel 5335: if (RAW == 0)
1.143 daniel 5336: GROW;
5337: }
5338:
1.152 daniel 5339: if (RAW == 0)
1.143 daniel 5340: SHRINK;
5341:
1.152 daniel 5342: if (RAW == 0) {
1.76 daniel 5343: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5344: ctxt->sax->error(ctxt->userData,
5345: "XML conditional section not closed\n");
1.123 daniel 5346: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 5347: ctxt->wellFormed = 0;
1.143 daniel 5348: } else {
5349: SKIP(3);
1.76 daniel 5350: }
5351: }
5352:
5353: /**
1.124 daniel 5354: * xmlParseExternalSubset:
1.76 daniel 5355: * @ctxt: an XML parser context
1.124 daniel 5356: * @ExternalID: the external identifier
5357: * @SystemID: the system identifier (or URL)
1.76 daniel 5358: *
5359: * parse Markup declarations from an external subset
5360: *
5361: * [30] extSubset ::= textDecl? extSubsetDecl
5362: *
5363: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5364: */
5365: void
1.123 daniel 5366: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5367: const xmlChar *SystemID) {
1.132 daniel 5368: GROW;
1.152 daniel 5369: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 5370: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5371: (NXT(4) == 'l')) {
1.134 daniel 5372: xmlChar *decl;
5373:
5374: decl = xmlParseTextDecl(ctxt);
5375: if (decl != NULL)
5376: xmlFree(decl);
1.76 daniel 5377: }
1.79 daniel 5378: if (ctxt->myDoc == NULL) {
1.116 daniel 5379: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 5380: }
5381: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5382: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5383:
1.96 daniel 5384: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 5385: ctxt->external = 1;
1.152 daniel 5386: while (((RAW == '<') && (NXT(1) == '?')) ||
5387: ((RAW == '<') && (NXT(1) == '!')) ||
1.153 daniel 5388: IS_BLANK(RAW)) {
1.123 daniel 5389: const xmlChar *check = CUR_PTR;
1.115 daniel 5390: int cons = ctxt->input->consumed;
5391:
1.152 daniel 5392: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 5393: xmlParseConditionalSections(ctxt);
5394: } else if (IS_BLANK(CUR)) {
5395: NEXT;
1.152 daniel 5396: } else if (RAW == '%') {
1.76 daniel 5397: xmlParsePEReference(ctxt);
5398: } else
5399: xmlParseMarkupDecl(ctxt);
1.77 daniel 5400:
5401: /*
5402: * Pop-up of finished entities.
5403: */
1.152 daniel 5404: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 5405: xmlPopInput(ctxt);
5406:
1.115 daniel 5407: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5408: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5409: ctxt->sax->error(ctxt->userData,
5410: "Content error in the external subset\n");
5411: ctxt->wellFormed = 0;
1.123 daniel 5412: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 5413: break;
5414: }
1.76 daniel 5415: }
5416:
1.152 daniel 5417: if (RAW != 0) {
1.76 daniel 5418: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5419: ctxt->sax->error(ctxt->userData,
5420: "Extra content at the end of the document\n");
1.123 daniel 5421: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 5422: ctxt->wellFormed = 0;
5423: }
5424:
5425: }
5426:
5427: /**
1.77 daniel 5428: * xmlParseReference:
5429: * @ctxt: an XML parser context
5430: *
5431: * parse and handle entity references in content, depending on the SAX
5432: * interface, this may end-up in a call to character() if this is a
1.79 daniel 5433: * CharRef, a predefined entity, if there is no reference() callback.
5434: * or if the parser was asked to switch to that mode.
1.77 daniel 5435: *
5436: * [67] Reference ::= EntityRef | CharRef
5437: */
5438: void
5439: xmlParseReference(xmlParserCtxtPtr ctxt) {
5440: xmlEntityPtr ent;
1.123 daniel 5441: xmlChar *val;
1.152 daniel 5442: if (RAW != '&') return;
1.77 daniel 5443:
1.113 daniel 5444: if (ctxt->inputNr > 1) {
1.123 daniel 5445: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 5446:
5447: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5448: ctxt->sax->characters(ctxt->userData, cur, 1);
5449: if (ctxt->token == '&')
5450: ctxt->token = 0;
5451: else {
5452: SKIP(1);
5453: }
5454: return;
5455: }
1.77 daniel 5456: if (NXT(1) == '#') {
1.152 daniel 5457: int i = 0;
1.153 daniel 5458: xmlChar out[10];
5459: int hex = NXT(2);
1.77 daniel 5460: int val = xmlParseCharRef(ctxt);
1.152 daniel 5461:
1.153 daniel 5462: if (ctxt->encoding != NULL) {
5463: /*
5464: * So we are using non-UTF-8 buffers
5465: * Check that the char fit on 8bits, if not
5466: * generate a CharRef.
5467: */
5468: if (val <= 0xFF) {
5469: out[0] = val;
5470: out[1] = 0;
5471: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5472: ctxt->sax->characters(ctxt->userData, out, 1);
5473: } else {
5474: if ((hex == 'x') || (hex == 'X'))
5475: sprintf((char *)out, "#x%X", val);
5476: else
5477: sprintf((char *)out, "#%d", val);
5478: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL))
5479: ctxt->sax->reference(ctxt->userData, out);
5480: }
5481: } else {
5482: /*
5483: * Just encode the value in UTF-8
5484: */
5485: COPY_BUF(0 ,out, i, val);
5486: out[i] = 0;
5487: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5488: ctxt->sax->characters(ctxt->userData, out, i);
5489: }
1.77 daniel 5490: } else {
5491: ent = xmlParseEntityRef(ctxt);
5492: if (ent == NULL) return;
5493: if ((ent->name != NULL) &&
1.113 daniel 5494: (ent->type != XML_INTERNAL_PREDEFINED_ENTITY)) {
5495: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5496: (ctxt->replaceEntities == 0)) {
5497: /*
5498: * Create a node.
5499: */
5500: ctxt->sax->reference(ctxt->userData, ent->name);
5501: return;
5502: } else if (ctxt->replaceEntities) {
5503: xmlParserInputPtr input;
1.79 daniel 5504:
1.113 daniel 5505: input = xmlNewEntityInputStream(ctxt, ent);
5506: xmlPushInput(ctxt, input);
5507: return;
5508: }
1.77 daniel 5509: }
5510: val = ent->content;
5511: if (val == NULL) return;
5512: /*
5513: * inline the entity.
5514: */
5515: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5516: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5517: }
1.24 daniel 5518: }
5519:
1.50 daniel 5520: /**
5521: * xmlParseEntityRef:
5522: * @ctxt: an XML parser context
5523: *
5524: * parse ENTITY references declarations
1.24 daniel 5525: *
5526: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 5527: *
1.98 daniel 5528: * [ WFC: Entity Declared ]
5529: * In a document without any DTD, a document with only an internal DTD
5530: * subset which contains no parameter entity references, or a document
5531: * with "standalone='yes'", the Name given in the entity reference
5532: * must match that in an entity declaration, except that well-formed
5533: * documents need not declare any of the following entities: amp, lt,
5534: * gt, apos, quot. The declaration of a parameter entity must precede
5535: * any reference to it. Similarly, the declaration of a general entity
5536: * must precede any reference to it which appears in a default value in an
5537: * attribute-list declaration. Note that if entities are declared in the
5538: * external subset or in external parameter entities, a non-validating
5539: * processor is not obligated to read and process their declarations;
5540: * for such documents, the rule that an entity must be declared is a
5541: * well-formedness constraint only if standalone='yes'.
5542: *
5543: * [ WFC: Parsed Entity ]
5544: * An entity reference must not contain the name of an unparsed entity
5545: *
1.77 daniel 5546: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 5547: */
1.77 daniel 5548: xmlEntityPtr
1.55 daniel 5549: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 5550: xmlChar *name;
1.72 daniel 5551: xmlEntityPtr ent = NULL;
1.24 daniel 5552:
1.91 daniel 5553: GROW;
1.111 daniel 5554:
1.152 daniel 5555: if (RAW == '&') {
1.40 daniel 5556: NEXT;
1.24 daniel 5557: name = xmlParseName(ctxt);
5558: if (name == NULL) {
1.55 daniel 5559: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5560: ctxt->sax->error(ctxt->userData,
5561: "xmlParseEntityRef: no name\n");
1.123 daniel 5562: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5563: ctxt->wellFormed = 0;
1.24 daniel 5564: } else {
1.152 daniel 5565: if (RAW == ';') {
1.40 daniel 5566: NEXT;
1.24 daniel 5567: /*
1.77 daniel 5568: * Ask first SAX for entity resolution, otherwise try the
5569: * predefined set.
5570: */
5571: if (ctxt->sax != NULL) {
5572: if (ctxt->sax->getEntity != NULL)
5573: ent = ctxt->sax->getEntity(ctxt->userData, name);
5574: if (ent == NULL)
5575: ent = xmlGetPredefinedEntity(name);
5576: }
5577: /*
1.98 daniel 5578: * [ WFC: Entity Declared ]
5579: * In a document without any DTD, a document with only an
5580: * internal DTD subset which contains no parameter entity
5581: * references, or a document with "standalone='yes'", the
5582: * Name given in the entity reference must match that in an
5583: * entity declaration, except that well-formed documents
5584: * need not declare any of the following entities: amp, lt,
5585: * gt, apos, quot.
5586: * The declaration of a parameter entity must precede any
5587: * reference to it.
5588: * Similarly, the declaration of a general entity must
5589: * precede any reference to it which appears in a default
5590: * value in an attribute-list declaration. Note that if
5591: * entities are declared in the external subset or in
5592: * external parameter entities, a non-validating processor
5593: * is not obligated to read and process their declarations;
5594: * for such documents, the rule that an entity must be
5595: * declared is a well-formedness constraint only if
5596: * standalone='yes'.
1.59 daniel 5597: */
1.77 daniel 5598: if (ent == NULL) {
1.98 daniel 5599: if ((ctxt->standalone == 1) ||
5600: ((ctxt->hasExternalSubset == 0) &&
5601: (ctxt->hasPErefs == 0))) {
5602: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 5603: ctxt->sax->error(ctxt->userData,
5604: "Entity '%s' not defined\n", name);
1.123 daniel 5605: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 5606: ctxt->wellFormed = 0;
5607: } else {
1.98 daniel 5608: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5609: ctxt->sax->warning(ctxt->userData,
5610: "Entity '%s' not defined\n", name);
1.123 daniel 5611: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 5612: }
1.77 daniel 5613: }
1.59 daniel 5614:
5615: /*
1.98 daniel 5616: * [ WFC: Parsed Entity ]
5617: * An entity reference must not contain the name of an
5618: * unparsed entity
5619: */
5620: else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5621: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5622: ctxt->sax->error(ctxt->userData,
5623: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 5624: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 5625: ctxt->wellFormed = 0;
5626: }
5627:
5628: /*
5629: * [ WFC: No External Entity References ]
5630: * Attribute values cannot contain direct or indirect
5631: * entity references to external entities.
5632: */
5633: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5634: (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5636: ctxt->sax->error(ctxt->userData,
5637: "Attribute references external entity '%s'\n", name);
1.123 daniel 5638: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 5639: ctxt->wellFormed = 0;
5640: }
5641: /*
5642: * [ WFC: No < in Attribute Values ]
5643: * The replacement text of any entity referred to directly or
5644: * indirectly in an attribute value (other than "<") must
5645: * not contain a <.
1.59 daniel 5646: */
1.98 daniel 5647: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 5648: (ent != NULL) &&
5649: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 5650: (ent->content != NULL) &&
5651: (xmlStrchr(ent->content, '<'))) {
5652: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5653: ctxt->sax->error(ctxt->userData,
5654: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 5655: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 5656: ctxt->wellFormed = 0;
5657: }
5658:
5659: /*
5660: * Internal check, no parameter entities here ...
5661: */
5662: else {
1.59 daniel 5663: switch (ent->type) {
5664: case XML_INTERNAL_PARAMETER_ENTITY:
5665: case XML_EXTERNAL_PARAMETER_ENTITY:
5666: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5667: ctxt->sax->error(ctxt->userData,
1.59 daniel 5668: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 5669: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 5670: ctxt->wellFormed = 0;
5671: break;
5672: }
5673: }
5674:
5675: /*
1.98 daniel 5676: * [ WFC: No Recursion ]
1.117 daniel 5677: * TODO A parsed entity must not contain a recursive reference
5678: * to itself, either directly or indirectly.
1.59 daniel 5679: */
1.77 daniel 5680:
1.24 daniel 5681: } else {
1.55 daniel 5682: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5683: ctxt->sax->error(ctxt->userData,
1.59 daniel 5684: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 5685: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 5686: ctxt->wellFormed = 0;
1.24 daniel 5687: }
1.119 daniel 5688: xmlFree(name);
1.24 daniel 5689: }
5690: }
1.77 daniel 5691: return(ent);
1.24 daniel 5692: }
1.135 daniel 5693: /**
5694: * xmlParseStringEntityRef:
5695: * @ctxt: an XML parser context
5696: * @str: a pointer to an index in the string
5697: *
5698: * parse ENTITY references declarations, but this version parses it from
5699: * a string value.
5700: *
5701: * [68] EntityRef ::= '&' Name ';'
5702: *
5703: * [ WFC: Entity Declared ]
5704: * In a document without any DTD, a document with only an internal DTD
5705: * subset which contains no parameter entity references, or a document
5706: * with "standalone='yes'", the Name given in the entity reference
5707: * must match that in an entity declaration, except that well-formed
5708: * documents need not declare any of the following entities: amp, lt,
5709: * gt, apos, quot. The declaration of a parameter entity must precede
5710: * any reference to it. Similarly, the declaration of a general entity
5711: * must precede any reference to it which appears in a default value in an
5712: * attribute-list declaration. Note that if entities are declared in the
5713: * external subset or in external parameter entities, a non-validating
5714: * processor is not obligated to read and process their declarations;
5715: * for such documents, the rule that an entity must be declared is a
5716: * well-formedness constraint only if standalone='yes'.
5717: *
5718: * [ WFC: Parsed Entity ]
5719: * An entity reference must not contain the name of an unparsed entity
5720: *
5721: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5722: * is updated to the current location in the string.
5723: */
5724: xmlEntityPtr
5725: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5726: xmlChar *name;
5727: const xmlChar *ptr;
5728: xmlChar cur;
5729: xmlEntityPtr ent = NULL;
5730:
5731: GROW;
5732:
1.156 ! daniel 5733: if ((str == NULL) || (*str == NULL))
! 5734: return(NULL);
1.135 daniel 5735: ptr = *str;
5736: cur = *ptr;
5737: if (cur == '&') {
5738: ptr++;
5739: cur = *ptr;
5740: name = xmlParseStringName(ctxt, &ptr);
5741: if (name == NULL) {
5742: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5743: ctxt->sax->error(ctxt->userData,
5744: "xmlParseEntityRef: no name\n");
5745: ctxt->errNo = XML_ERR_NAME_REQUIRED;
5746: ctxt->wellFormed = 0;
5747: } else {
1.152 daniel 5748: if (RAW == ';') {
1.135 daniel 5749: NEXT;
5750: /*
5751: * Ask first SAX for entity resolution, otherwise try the
5752: * predefined set.
5753: */
5754: if (ctxt->sax != NULL) {
5755: if (ctxt->sax->getEntity != NULL)
5756: ent = ctxt->sax->getEntity(ctxt->userData, name);
5757: if (ent == NULL)
5758: ent = xmlGetPredefinedEntity(name);
5759: }
5760: /*
5761: * [ WFC: Entity Declared ]
5762: * In a document without any DTD, a document with only an
5763: * internal DTD subset which contains no parameter entity
5764: * references, or a document with "standalone='yes'", the
5765: * Name given in the entity reference must match that in an
5766: * entity declaration, except that well-formed documents
5767: * need not declare any of the following entities: amp, lt,
5768: * gt, apos, quot.
5769: * The declaration of a parameter entity must precede any
5770: * reference to it.
5771: * Similarly, the declaration of a general entity must
5772: * precede any reference to it which appears in a default
5773: * value in an attribute-list declaration. Note that if
5774: * entities are declared in the external subset or in
5775: * external parameter entities, a non-validating processor
5776: * is not obligated to read and process their declarations;
5777: * for such documents, the rule that an entity must be
5778: * declared is a well-formedness constraint only if
5779: * standalone='yes'.
5780: */
5781: if (ent == NULL) {
5782: if ((ctxt->standalone == 1) ||
5783: ((ctxt->hasExternalSubset == 0) &&
5784: (ctxt->hasPErefs == 0))) {
5785: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5786: ctxt->sax->error(ctxt->userData,
5787: "Entity '%s' not defined\n", name);
5788: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5789: ctxt->wellFormed = 0;
5790: } else {
5791: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5792: ctxt->sax->warning(ctxt->userData,
5793: "Entity '%s' not defined\n", name);
5794: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5795: }
5796: }
5797:
5798: /*
5799: * [ WFC: Parsed Entity ]
5800: * An entity reference must not contain the name of an
5801: * unparsed entity
5802: */
5803: else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5804: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5805: ctxt->sax->error(ctxt->userData,
5806: "Entity reference to unparsed entity %s\n", name);
5807: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5808: ctxt->wellFormed = 0;
5809: }
5810:
5811: /*
5812: * [ WFC: No External Entity References ]
5813: * Attribute values cannot contain direct or indirect
5814: * entity references to external entities.
5815: */
5816: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5817: (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5818: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5819: ctxt->sax->error(ctxt->userData,
5820: "Attribute references external entity '%s'\n", name);
5821: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5822: ctxt->wellFormed = 0;
5823: }
5824: /*
5825: * [ WFC: No < in Attribute Values ]
5826: * The replacement text of any entity referred to directly or
5827: * indirectly in an attribute value (other than "<") must
5828: * not contain a <.
5829: */
5830: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5831: (ent != NULL) &&
5832: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
5833: (ent->content != NULL) &&
5834: (xmlStrchr(ent->content, '<'))) {
5835: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5836: ctxt->sax->error(ctxt->userData,
5837: "'<' in entity '%s' is not allowed in attributes values\n", name);
5838: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5839: ctxt->wellFormed = 0;
5840: }
5841:
5842: /*
5843: * Internal check, no parameter entities here ...
5844: */
5845: else {
5846: switch (ent->type) {
5847: case XML_INTERNAL_PARAMETER_ENTITY:
5848: case XML_EXTERNAL_PARAMETER_ENTITY:
5849: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5850: ctxt->sax->error(ctxt->userData,
5851: "Attempt to reference the parameter entity '%s'\n", name);
5852: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5853: ctxt->wellFormed = 0;
5854: break;
5855: }
5856: }
5857:
5858: /*
5859: * [ WFC: No Recursion ]
5860: * TODO A parsed entity must not contain a recursive reference
5861: * to itself, either directly or indirectly.
5862: */
5863:
5864: } else {
5865: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5866: ctxt->sax->error(ctxt->userData,
5867: "xmlParseEntityRef: expecting ';'\n");
5868: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5869: ctxt->wellFormed = 0;
5870: }
5871: xmlFree(name);
5872: }
5873: }
5874: return(ent);
5875: }
1.24 daniel 5876:
1.50 daniel 5877: /**
5878: * xmlParsePEReference:
5879: * @ctxt: an XML parser context
5880: *
5881: * parse PEReference declarations
1.77 daniel 5882: * The entity content is handled directly by pushing it's content as
5883: * a new input stream.
1.22 daniel 5884: *
5885: * [69] PEReference ::= '%' Name ';'
1.68 daniel 5886: *
1.98 daniel 5887: * [ WFC: No Recursion ]
5888: * TODO A parsed entity must not contain a recursive
5889: * reference to itself, either directly or indirectly.
5890: *
5891: * [ WFC: Entity Declared ]
5892: * In a document without any DTD, a document with only an internal DTD
5893: * subset which contains no parameter entity references, or a document
5894: * with "standalone='yes'", ... ... The declaration of a parameter
5895: * entity must precede any reference to it...
5896: *
5897: * [ VC: Entity Declared ]
5898: * In a document with an external subset or external parameter entities
5899: * with "standalone='no'", ... ... The declaration of a parameter entity
5900: * must precede any reference to it...
5901: *
5902: * [ WFC: In DTD ]
5903: * Parameter-entity references may only appear in the DTD.
5904: * NOTE: misleading but this is handled.
1.22 daniel 5905: */
1.77 daniel 5906: void
1.55 daniel 5907: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 5908: xmlChar *name;
1.72 daniel 5909: xmlEntityPtr entity = NULL;
1.50 daniel 5910: xmlParserInputPtr input;
1.22 daniel 5911:
1.152 daniel 5912: if (RAW == '%') {
1.40 daniel 5913: NEXT;
1.22 daniel 5914: name = xmlParseName(ctxt);
5915: if (name == NULL) {
1.55 daniel 5916: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5917: ctxt->sax->error(ctxt->userData,
5918: "xmlParsePEReference: no name\n");
1.123 daniel 5919: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5920: ctxt->wellFormed = 0;
1.22 daniel 5921: } else {
1.152 daniel 5922: if (RAW == ';') {
1.40 daniel 5923: NEXT;
1.98 daniel 5924: if ((ctxt->sax != NULL) &&
5925: (ctxt->sax->getParameterEntity != NULL))
5926: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5927: name);
1.45 daniel 5928: if (entity == NULL) {
1.98 daniel 5929: /*
5930: * [ WFC: Entity Declared ]
5931: * In a document without any DTD, a document with only an
5932: * internal DTD subset which contains no parameter entity
5933: * references, or a document with "standalone='yes'", ...
5934: * ... The declaration of a parameter entity must precede
5935: * any reference to it...
5936: */
5937: if ((ctxt->standalone == 1) ||
5938: ((ctxt->hasExternalSubset == 0) &&
5939: (ctxt->hasPErefs == 0))) {
5940: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5941: ctxt->sax->error(ctxt->userData,
5942: "PEReference: %%%s; not found\n", name);
1.123 daniel 5943: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 5944: ctxt->wellFormed = 0;
5945: } else {
5946: /*
5947: * [ VC: Entity Declared ]
5948: * In a document with an external subset or external
5949: * parameter entities with "standalone='no'", ...
5950: * ... The declaration of a parameter entity must precede
5951: * any reference to it...
5952: */
5953: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5954: ctxt->sax->warning(ctxt->userData,
5955: "PEReference: %%%s; not found\n", name);
5956: ctxt->valid = 0;
5957: }
1.50 daniel 5958: } else {
1.98 daniel 5959: /*
5960: * Internal checking in case the entity quest barfed
5961: */
5962: if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
5963: (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
5964: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5965: ctxt->sax->warning(ctxt->userData,
5966: "Internal: %%%s; is not a parameter entity\n", name);
5967: } else {
5968: input = xmlNewEntityInputStream(ctxt, entity);
5969: xmlPushInput(ctxt, input);
5970: }
1.45 daniel 5971: }
1.98 daniel 5972: ctxt->hasPErefs = 1;
1.22 daniel 5973: } else {
1.55 daniel 5974: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5975: ctxt->sax->error(ctxt->userData,
1.59 daniel 5976: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 5977: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 5978: ctxt->wellFormed = 0;
1.22 daniel 5979: }
1.119 daniel 5980: xmlFree(name);
1.3 veillard 5981: }
5982: }
5983: }
5984:
1.50 daniel 5985: /**
1.135 daniel 5986: * xmlParseStringPEReference:
5987: * @ctxt: an XML parser context
5988: * @str: a pointer to an index in the string
5989: *
5990: * parse PEReference declarations
5991: *
5992: * [69] PEReference ::= '%' Name ';'
5993: *
5994: * [ WFC: No Recursion ]
5995: * TODO A parsed entity must not contain a recursive
5996: * reference to itself, either directly or indirectly.
5997: *
5998: * [ WFC: Entity Declared ]
5999: * In a document without any DTD, a document with only an internal DTD
6000: * subset which contains no parameter entity references, or a document
6001: * with "standalone='yes'", ... ... The declaration of a parameter
6002: * entity must precede any reference to it...
6003: *
6004: * [ VC: Entity Declared ]
6005: * In a document with an external subset or external parameter entities
6006: * with "standalone='no'", ... ... The declaration of a parameter entity
6007: * must precede any reference to it...
6008: *
6009: * [ WFC: In DTD ]
6010: * Parameter-entity references may only appear in the DTD.
6011: * NOTE: misleading but this is handled.
6012: *
6013: * Returns the string of the entity content.
6014: * str is updated to the current value of the index
6015: */
6016: xmlEntityPtr
6017: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6018: const xmlChar *ptr;
6019: xmlChar cur;
6020: xmlChar *name;
6021: xmlEntityPtr entity = NULL;
6022:
6023: if ((str == NULL) || (*str == NULL)) return(NULL);
6024: ptr = *str;
6025: cur = *ptr;
6026: if (cur == '%') {
6027: ptr++;
6028: cur = *ptr;
6029: name = xmlParseStringName(ctxt, &ptr);
6030: if (name == NULL) {
6031: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6032: ctxt->sax->error(ctxt->userData,
6033: "xmlParseStringPEReference: no name\n");
6034: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6035: ctxt->wellFormed = 0;
6036: } else {
6037: cur = *ptr;
6038: if (cur == ';') {
6039: ptr++;
6040: cur = *ptr;
6041: if ((ctxt->sax != NULL) &&
6042: (ctxt->sax->getParameterEntity != NULL))
6043: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6044: name);
6045: if (entity == NULL) {
6046: /*
6047: * [ WFC: Entity Declared ]
6048: * In a document without any DTD, a document with only an
6049: * internal DTD subset which contains no parameter entity
6050: * references, or a document with "standalone='yes'", ...
6051: * ... The declaration of a parameter entity must precede
6052: * any reference to it...
6053: */
6054: if ((ctxt->standalone == 1) ||
6055: ((ctxt->hasExternalSubset == 0) &&
6056: (ctxt->hasPErefs == 0))) {
6057: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6058: ctxt->sax->error(ctxt->userData,
6059: "PEReference: %%%s; not found\n", name);
6060: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6061: ctxt->wellFormed = 0;
6062: } else {
6063: /*
6064: * [ VC: Entity Declared ]
6065: * In a document with an external subset or external
6066: * parameter entities with "standalone='no'", ...
6067: * ... The declaration of a parameter entity must
6068: * precede any reference to it...
6069: */
6070: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6071: ctxt->sax->warning(ctxt->userData,
6072: "PEReference: %%%s; not found\n", name);
6073: ctxt->valid = 0;
6074: }
6075: } else {
6076: /*
6077: * Internal checking in case the entity quest barfed
6078: */
6079: if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
6080: (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
6081: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6082: ctxt->sax->warning(ctxt->userData,
6083: "Internal: %%%s; is not a parameter entity\n", name);
6084: }
6085: }
6086: ctxt->hasPErefs = 1;
6087: } else {
6088: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6089: ctxt->sax->error(ctxt->userData,
6090: "xmlParseStringPEReference: expecting ';'\n");
6091: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6092: ctxt->wellFormed = 0;
6093: }
6094: xmlFree(name);
6095: }
6096: }
6097: *str = ptr;
6098: return(entity);
6099: }
6100:
6101: /**
1.50 daniel 6102: * xmlParseDocTypeDecl :
6103: * @ctxt: an XML parser context
6104: *
6105: * parse a DOCTYPE declaration
1.21 daniel 6106: *
1.22 daniel 6107: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6108: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 6109: *
6110: * [ VC: Root Element Type ]
1.99 daniel 6111: * The Name in the document type declaration must match the element
1.98 daniel 6112: * type of the root element.
1.21 daniel 6113: */
6114:
1.55 daniel 6115: void
6116: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6117: xmlChar *name;
6118: xmlChar *ExternalID = NULL;
6119: xmlChar *URI = NULL;
1.21 daniel 6120:
6121: /*
6122: * We know that '<!DOCTYPE' has been detected.
6123: */
1.40 daniel 6124: SKIP(9);
1.21 daniel 6125:
1.42 daniel 6126: SKIP_BLANKS;
1.21 daniel 6127:
6128: /*
6129: * Parse the DOCTYPE name.
6130: */
6131: name = xmlParseName(ctxt);
6132: if (name == NULL) {
1.55 daniel 6133: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6134: ctxt->sax->error(ctxt->userData,
6135: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 6136: ctxt->wellFormed = 0;
1.123 daniel 6137: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 6138: }
6139:
1.42 daniel 6140: SKIP_BLANKS;
1.21 daniel 6141:
6142: /*
1.22 daniel 6143: * Check for SystemID and ExternalID
6144: */
1.67 daniel 6145: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 6146:
6147: if ((URI != NULL) || (ExternalID != NULL)) {
6148: ctxt->hasExternalSubset = 1;
6149: }
6150:
1.42 daniel 6151: SKIP_BLANKS;
1.36 daniel 6152:
1.76 daniel 6153: /*
6154: * NOTE: the SAX callback may try to fetch the external subset
6155: * entity and fill it up !
6156: */
1.72 daniel 6157: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 6158: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 6159:
6160: /*
1.140 daniel 6161: * Cleanup
6162: */
6163: if (URI != NULL) xmlFree(URI);
6164: if (ExternalID != NULL) xmlFree(ExternalID);
6165: if (name != NULL) xmlFree(name);
6166:
6167: /*
6168: * Is there any internal subset declarations ?
6169: * they are handled separately in xmlParseInternalSubset()
6170: */
1.152 daniel 6171: if (RAW == '[')
1.140 daniel 6172: return;
6173:
6174: /*
6175: * We should be at the end of the DOCTYPE declaration.
6176: */
1.152 daniel 6177: if (RAW != '>') {
1.140 daniel 6178: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6179: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6180: ctxt->wellFormed = 0;
6181: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6182: }
6183: NEXT;
6184: }
6185:
6186: /**
6187: * xmlParseInternalsubset :
6188: * @ctxt: an XML parser context
6189: *
6190: * parse the internal subset declaration
6191: *
6192: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6193: */
6194:
6195: void
6196: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6197: /*
1.22 daniel 6198: * Is there any DTD definition ?
6199: */
1.152 daniel 6200: if (RAW == '[') {
1.96 daniel 6201: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 6202: NEXT;
1.22 daniel 6203: /*
6204: * Parse the succession of Markup declarations and
6205: * PEReferences.
6206: * Subsequence (markupdecl | PEReference | S)*
6207: */
1.152 daniel 6208: while (RAW != ']') {
1.123 daniel 6209: const xmlChar *check = CUR_PTR;
1.115 daniel 6210: int cons = ctxt->input->consumed;
1.22 daniel 6211:
1.42 daniel 6212: SKIP_BLANKS;
1.22 daniel 6213: xmlParseMarkupDecl(ctxt);
1.50 daniel 6214: xmlParsePEReference(ctxt);
1.22 daniel 6215:
1.115 daniel 6216: /*
6217: * Pop-up of finished entities.
6218: */
1.152 daniel 6219: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 6220: xmlPopInput(ctxt);
6221:
1.118 daniel 6222: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 6223: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6224: ctxt->sax->error(ctxt->userData,
1.140 daniel 6225: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 6226: ctxt->wellFormed = 0;
1.123 daniel 6227: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 6228: break;
6229: }
6230: }
1.152 daniel 6231: if (RAW == ']') NEXT;
1.22 daniel 6232: }
6233:
6234: /*
6235: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 6236: */
1.152 daniel 6237: if (RAW != '>') {
1.55 daniel 6238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6239: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 6240: ctxt->wellFormed = 0;
1.123 daniel 6241: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 6242: }
1.40 daniel 6243: NEXT;
1.21 daniel 6244: }
6245:
1.50 daniel 6246: /**
6247: * xmlParseAttribute:
6248: * @ctxt: an XML parser context
1.123 daniel 6249: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 6250: *
6251: * parse an attribute
1.3 veillard 6252: *
1.22 daniel 6253: * [41] Attribute ::= Name Eq AttValue
6254: *
1.98 daniel 6255: * [ WFC: No External Entity References ]
6256: * Attribute values cannot contain direct or indirect entity references
6257: * to external entities.
6258: *
6259: * [ WFC: No < in Attribute Values ]
6260: * The replacement text of any entity referred to directly or indirectly in
6261: * an attribute value (other than "<") must not contain a <.
6262: *
6263: * [ VC: Attribute Value Type ]
1.117 daniel 6264: * The attribute must have been declared; the value must be of the type
1.99 daniel 6265: * declared for it.
1.98 daniel 6266: *
1.22 daniel 6267: * [25] Eq ::= S? '=' S?
6268: *
1.29 daniel 6269: * With namespace:
6270: *
6271: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 6272: *
6273: * Also the case QName == xmlns:??? is handled independently as a namespace
6274: * definition.
1.69 daniel 6275: *
1.72 daniel 6276: * Returns the attribute name, and the value in *value.
1.3 veillard 6277: */
6278:
1.123 daniel 6279: xmlChar *
6280: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6281: xmlChar *name, *val;
1.3 veillard 6282:
1.72 daniel 6283: *value = NULL;
6284: name = xmlParseName(ctxt);
1.22 daniel 6285: if (name == NULL) {
1.55 daniel 6286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6287: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 6288: ctxt->wellFormed = 0;
1.123 daniel 6289: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 6290: return(NULL);
1.3 veillard 6291: }
6292:
6293: /*
1.29 daniel 6294: * read the value
1.3 veillard 6295: */
1.42 daniel 6296: SKIP_BLANKS;
1.152 daniel 6297: if (RAW == '=') {
1.40 daniel 6298: NEXT;
1.42 daniel 6299: SKIP_BLANKS;
1.72 daniel 6300: val = xmlParseAttValue(ctxt);
1.96 daniel 6301: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 6302: } else {
1.55 daniel 6303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6304: ctxt->sax->error(ctxt->userData,
1.59 daniel 6305: "Specification mandate value for attribute %s\n", name);
1.123 daniel 6306: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 6307: ctxt->wellFormed = 0;
1.52 daniel 6308: return(NULL);
1.43 daniel 6309: }
6310:
1.72 daniel 6311: *value = val;
6312: return(name);
1.3 veillard 6313: }
6314:
1.50 daniel 6315: /**
6316: * xmlParseStartTag:
6317: * @ctxt: an XML parser context
6318: *
6319: * parse a start of tag either for rule element or
6320: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 6321: *
6322: * [40] STag ::= '<' Name (S Attribute)* S? '>'
6323: *
1.98 daniel 6324: * [ WFC: Unique Att Spec ]
6325: * No attribute name may appear more than once in the same start-tag or
6326: * empty-element tag.
6327: *
1.29 daniel 6328: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6329: *
1.98 daniel 6330: * [ WFC: Unique Att Spec ]
6331: * No attribute name may appear more than once in the same start-tag or
6332: * empty-element tag.
6333: *
1.29 daniel 6334: * With namespace:
6335: *
6336: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6337: *
6338: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 6339: *
1.129 daniel 6340: * Returne the element name parsed
1.2 veillard 6341: */
6342:
1.123 daniel 6343: xmlChar *
1.69 daniel 6344: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6345: xmlChar *name;
6346: xmlChar *attname;
6347: xmlChar *attvalue;
6348: const xmlChar **atts = NULL;
1.72 daniel 6349: int nbatts = 0;
6350: int maxatts = 0;
6351: int i;
1.2 veillard 6352:
1.152 daniel 6353: if (RAW != '<') return(NULL);
1.40 daniel 6354: NEXT;
1.3 veillard 6355:
1.72 daniel 6356: name = xmlParseName(ctxt);
1.59 daniel 6357: if (name == NULL) {
6358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6359: ctxt->sax->error(ctxt->userData,
1.59 daniel 6360: "xmlParseStartTag: invalid element name\n");
1.123 daniel 6361: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6362: ctxt->wellFormed = 0;
1.83 daniel 6363: return(NULL);
1.50 daniel 6364: }
6365:
6366: /*
1.3 veillard 6367: * Now parse the attributes, it ends up with the ending
6368: *
6369: * (S Attribute)* S?
6370: */
1.42 daniel 6371: SKIP_BLANKS;
1.91 daniel 6372: GROW;
1.153 daniel 6373: while ((IS_CHAR(RAW)) &&
1.152 daniel 6374: (RAW != '>') &&
6375: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 6376: const xmlChar *q = CUR_PTR;
1.91 daniel 6377: int cons = ctxt->input->consumed;
1.29 daniel 6378:
1.72 daniel 6379: attname = xmlParseAttribute(ctxt, &attvalue);
6380: if ((attname != NULL) && (attvalue != NULL)) {
6381: /*
1.98 daniel 6382: * [ WFC: Unique Att Spec ]
6383: * No attribute name may appear more than once in the same
6384: * start-tag or empty-element tag.
1.72 daniel 6385: */
6386: for (i = 0; i < nbatts;i += 2) {
6387: if (!xmlStrcmp(atts[i], attname)) {
6388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6389: ctxt->sax->error(ctxt->userData,
6390: "Attribute %s redefined\n",
6391: attname);
1.72 daniel 6392: ctxt->wellFormed = 0;
1.123 daniel 6393: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 6394: xmlFree(attname);
6395: xmlFree(attvalue);
1.98 daniel 6396: goto failed;
1.72 daniel 6397: }
6398: }
6399:
6400: /*
6401: * Add the pair to atts
6402: */
6403: if (atts == NULL) {
6404: maxatts = 10;
1.123 daniel 6405: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 6406: if (atts == NULL) {
1.86 daniel 6407: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 6408: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6409: return(NULL);
1.72 daniel 6410: }
1.127 daniel 6411: } else if (nbatts + 4 > maxatts) {
1.72 daniel 6412: maxatts *= 2;
1.123 daniel 6413: atts = (const xmlChar **) xmlRealloc(atts,
6414: maxatts * sizeof(xmlChar *));
1.72 daniel 6415: if (atts == NULL) {
1.86 daniel 6416: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 6417: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6418: return(NULL);
1.72 daniel 6419: }
6420: }
6421: atts[nbatts++] = attname;
6422: atts[nbatts++] = attvalue;
6423: atts[nbatts] = NULL;
6424: atts[nbatts + 1] = NULL;
6425: }
6426:
1.116 daniel 6427: failed:
1.42 daniel 6428: SKIP_BLANKS;
1.91 daniel 6429: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 6430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6431: ctxt->sax->error(ctxt->userData,
1.31 daniel 6432: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 6433: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 6434: ctxt->wellFormed = 0;
1.29 daniel 6435: break;
1.3 veillard 6436: }
1.91 daniel 6437: GROW;
1.3 veillard 6438: }
6439:
1.43 daniel 6440: /*
1.72 daniel 6441: * SAX: Start of Element !
1.43 daniel 6442: */
1.72 daniel 6443: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 6444: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 6445:
1.72 daniel 6446: if (atts != NULL) {
1.123 daniel 6447: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 6448: xmlFree(atts);
1.72 daniel 6449: }
1.83 daniel 6450: return(name);
1.3 veillard 6451: }
6452:
1.50 daniel 6453: /**
6454: * xmlParseEndTag:
6455: * @ctxt: an XML parser context
6456: *
6457: * parse an end of tag
1.27 daniel 6458: *
6459: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 6460: *
6461: * With namespace
6462: *
1.72 daniel 6463: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 6464: */
6465:
1.55 daniel 6466: void
1.140 daniel 6467: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6468: xmlChar *name;
1.140 daniel 6469: xmlChar *oldname;
1.7 veillard 6470:
1.91 daniel 6471: GROW;
1.152 daniel 6472: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 6473: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6474: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 6475: ctxt->wellFormed = 0;
1.123 daniel 6476: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 6477: return;
6478: }
1.40 daniel 6479: SKIP(2);
1.7 veillard 6480:
1.72 daniel 6481: name = xmlParseName(ctxt);
1.7 veillard 6482:
6483: /*
6484: * We should definitely be at the ending "S? '>'" part
6485: */
1.91 daniel 6486: GROW;
1.42 daniel 6487: SKIP_BLANKS;
1.153 daniel 6488: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 6489: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6490: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 6491: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 6492: ctxt->wellFormed = 0;
1.7 veillard 6493: } else
1.40 daniel 6494: NEXT;
1.7 veillard 6495:
1.72 daniel 6496: /*
1.98 daniel 6497: * [ WFC: Element Type Match ]
6498: * The Name in an element's end-tag must match the element type in the
6499: * start-tag.
6500: *
1.83 daniel 6501: */
1.147 daniel 6502: if ((name == NULL) || (ctxt->name == NULL) ||
6503: (xmlStrcmp(name, ctxt->name))) {
6504: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6505: if ((name != NULL) && (ctxt->name != NULL)) {
6506: ctxt->sax->error(ctxt->userData,
6507: "Opening and ending tag mismatch: %s and %s\n",
6508: ctxt->name, name);
6509: } else if (ctxt->name != NULL) {
6510: ctxt->sax->error(ctxt->userData,
6511: "Ending tag eror for: %s\n", ctxt->name);
6512: } else {
6513: ctxt->sax->error(ctxt->userData,
6514: "Ending tag error: internal error ???\n");
6515: }
1.122 daniel 6516:
1.147 daniel 6517: }
1.123 daniel 6518: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 6519: ctxt->wellFormed = 0;
6520: }
6521:
6522: /*
1.72 daniel 6523: * SAX: End of Tag
6524: */
6525: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 6526: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6527:
6528: if (name != NULL)
1.119 daniel 6529: xmlFree(name);
1.140 daniel 6530: oldname = namePop(ctxt);
6531: if (oldname != NULL) {
6532: #ifdef DEBUG_STACK
6533: fprintf(stderr,"Close: popped %s\n", oldname);
6534: #endif
6535: xmlFree(oldname);
6536: }
1.7 veillard 6537: return;
6538: }
6539:
1.50 daniel 6540: /**
6541: * xmlParseCDSect:
6542: * @ctxt: an XML parser context
6543: *
6544: * Parse escaped pure raw content.
1.29 daniel 6545: *
6546: * [18] CDSect ::= CDStart CData CDEnd
6547: *
6548: * [19] CDStart ::= '<![CDATA['
6549: *
6550: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6551: *
6552: * [21] CDEnd ::= ']]>'
1.3 veillard 6553: */
1.55 daniel 6554: void
6555: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 6556: xmlChar *buf = NULL;
6557: int len = 0;
1.140 daniel 6558: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 6559: int r, rl;
6560: int s, sl;
6561: int cur, l;
1.3 veillard 6562:
1.106 daniel 6563: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 6564: (NXT(2) == '[') && (NXT(3) == 'C') &&
6565: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6566: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6567: (NXT(8) == '[')) {
6568: SKIP(9);
1.29 daniel 6569: } else
1.45 daniel 6570: return;
1.109 daniel 6571:
6572: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 6573: r = CUR_CHAR(rl);
6574: if (!IS_CHAR(r)) {
1.55 daniel 6575: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6576: ctxt->sax->error(ctxt->userData,
1.135 daniel 6577: "CData section not finished\n");
1.59 daniel 6578: ctxt->wellFormed = 0;
1.123 daniel 6579: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 6580: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6581: return;
1.3 veillard 6582: }
1.152 daniel 6583: NEXTL(rl);
6584: s = CUR_CHAR(sl);
6585: if (!IS_CHAR(s)) {
1.55 daniel 6586: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6587: ctxt->sax->error(ctxt->userData,
1.135 daniel 6588: "CData section not finished\n");
1.123 daniel 6589: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 6590: ctxt->wellFormed = 0;
1.109 daniel 6591: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6592: return;
1.3 veillard 6593: }
1.152 daniel 6594: NEXTL(sl);
6595: cur = CUR_CHAR(l);
1.135 daniel 6596: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6597: if (buf == NULL) {
6598: fprintf(stderr, "malloc of %d byte failed\n", size);
6599: return;
6600: }
1.108 veillard 6601: while (IS_CHAR(cur) &&
1.110 daniel 6602: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 6603: if (len + 5 >= size) {
1.135 daniel 6604: size *= 2;
6605: buf = xmlRealloc(buf, size * sizeof(xmlChar));
6606: if (buf == NULL) {
6607: fprintf(stderr, "realloc of %d byte failed\n", size);
6608: return;
6609: }
6610: }
1.152 daniel 6611: COPY_BUF(rl,buf,len,r);
1.110 daniel 6612: r = s;
1.152 daniel 6613: rl = sl;
1.110 daniel 6614: s = cur;
1.152 daniel 6615: sl = l;
6616: NEXTL(l);
6617: cur = CUR_CHAR(l);
1.3 veillard 6618: }
1.135 daniel 6619: buf[len] = 0;
1.109 daniel 6620: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 6621: if (cur != '>') {
1.55 daniel 6622: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6623: ctxt->sax->error(ctxt->userData,
1.135 daniel 6624: "CData section not finished\n%.50s\n", buf);
1.123 daniel 6625: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 6626: ctxt->wellFormed = 0;
1.135 daniel 6627: xmlFree(buf);
1.45 daniel 6628: return;
1.3 veillard 6629: }
1.152 daniel 6630: NEXTL(l);
1.16 daniel 6631:
1.45 daniel 6632: /*
1.135 daniel 6633: * Ok the buffer is to be consumed as cdata.
1.45 daniel 6634: */
6635: if (ctxt->sax != NULL) {
1.107 daniel 6636: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 6637: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 6638: }
1.135 daniel 6639: xmlFree(buf);
1.2 veillard 6640: }
6641:
1.50 daniel 6642: /**
6643: * xmlParseContent:
6644: * @ctxt: an XML parser context
6645: *
6646: * Parse a content:
1.2 veillard 6647: *
1.27 daniel 6648: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 6649: */
6650:
1.55 daniel 6651: void
6652: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 6653: GROW;
1.152 daniel 6654: while ((RAW != '<') || (NXT(1) != '/')) {
1.123 daniel 6655: const xmlChar *test = CUR_PTR;
1.91 daniel 6656: int cons = ctxt->input->consumed;
1.123 daniel 6657: xmlChar tok = ctxt->token;
1.27 daniel 6658:
6659: /*
1.152 daniel 6660: * Handle possible processed charrefs.
6661: */
6662: if (ctxt->token != 0) {
6663: xmlParseCharData(ctxt, 0);
6664: }
6665: /*
1.27 daniel 6666: * First case : a Processing Instruction.
6667: */
1.152 daniel 6668: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 6669: xmlParsePI(ctxt);
6670: }
1.72 daniel 6671:
1.27 daniel 6672: /*
6673: * Second case : a CDSection
6674: */
1.152 daniel 6675: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6676: (NXT(2) == '[') && (NXT(3) == 'C') &&
6677: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6678: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6679: (NXT(8) == '[')) {
1.45 daniel 6680: xmlParseCDSect(ctxt);
1.27 daniel 6681: }
1.72 daniel 6682:
1.27 daniel 6683: /*
6684: * Third case : a comment
6685: */
1.152 daniel 6686: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6687: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 6688: xmlParseComment(ctxt);
1.97 daniel 6689: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 6690: }
1.72 daniel 6691:
1.27 daniel 6692: /*
6693: * Fourth case : a sub-element.
6694: */
1.152 daniel 6695: else if (RAW == '<') {
1.72 daniel 6696: xmlParseElement(ctxt);
1.45 daniel 6697: }
1.72 daniel 6698:
1.45 daniel 6699: /*
1.50 daniel 6700: * Fifth case : a reference. If if has not been resolved,
6701: * parsing returns it's Name, create the node
1.45 daniel 6702: */
1.97 daniel 6703:
1.152 daniel 6704: else if (RAW == '&') {
1.77 daniel 6705: xmlParseReference(ctxt);
1.27 daniel 6706: }
1.72 daniel 6707:
1.27 daniel 6708: /*
6709: * Last case, text. Note that References are handled directly.
6710: */
6711: else {
1.45 daniel 6712: xmlParseCharData(ctxt, 0);
1.3 veillard 6713: }
1.14 veillard 6714:
1.91 daniel 6715: GROW;
1.14 veillard 6716: /*
1.45 daniel 6717: * Pop-up of finished entities.
1.14 veillard 6718: */
1.152 daniel 6719: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 6720: xmlPopInput(ctxt);
1.135 daniel 6721: SHRINK;
1.45 daniel 6722:
1.113 daniel 6723: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6724: (tok == ctxt->token)) {
1.55 daniel 6725: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6726: ctxt->sax->error(ctxt->userData,
1.59 daniel 6727: "detected an error in element content\n");
1.123 daniel 6728: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 6729: ctxt->wellFormed = 0;
1.29 daniel 6730: break;
6731: }
1.3 veillard 6732: }
1.2 veillard 6733: }
6734:
1.50 daniel 6735: /**
6736: * xmlParseElement:
6737: * @ctxt: an XML parser context
6738: *
6739: * parse an XML element, this is highly recursive
1.26 daniel 6740: *
6741: * [39] element ::= EmptyElemTag | STag content ETag
6742: *
1.98 daniel 6743: * [ WFC: Element Type Match ]
6744: * The Name in an element's end-tag must match the element type in the
6745: * start-tag.
6746: *
6747: * [ VC: Element Valid ]
1.117 daniel 6748: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 6749: * where the Name matches the element type and one of the following holds:
6750: * - The declaration matches EMPTY and the element has no content.
6751: * - The declaration matches children and the sequence of child elements
6752: * belongs to the language generated by the regular expression in the
6753: * content model, with optional white space (characters matching the
6754: * nonterminal S) between each pair of child elements.
6755: * - The declaration matches Mixed and the content consists of character
6756: * data and child elements whose types match names in the content model.
6757: * - The declaration matches ANY, and the types of any child elements have
6758: * been declared.
1.2 veillard 6759: */
1.26 daniel 6760:
1.72 daniel 6761: void
1.69 daniel 6762: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 6763: const xmlChar *openTag = CUR_PTR;
6764: xmlChar *name;
1.140 daniel 6765: xmlChar *oldname;
1.32 daniel 6766: xmlParserNodeInfo node_info;
1.118 daniel 6767: xmlNodePtr ret;
1.2 veillard 6768:
1.32 daniel 6769: /* Capture start position */
1.118 daniel 6770: if (ctxt->record_info) {
6771: node_info.begin_pos = ctxt->input->consumed +
6772: (CUR_PTR - ctxt->input->base);
6773: node_info.begin_line = ctxt->input->line;
6774: }
1.32 daniel 6775:
1.83 daniel 6776: name = xmlParseStartTag(ctxt);
6777: if (name == NULL) {
6778: return;
6779: }
1.140 daniel 6780: namePush(ctxt, name);
1.118 daniel 6781: ret = ctxt->node;
1.2 veillard 6782:
6783: /*
1.99 daniel 6784: * [ VC: Root Element Type ]
6785: * The Name in the document type declaration must match the element
6786: * type of the root element.
6787: */
1.105 daniel 6788: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6789: ctxt->node && (ctxt->node == ctxt->myDoc->root))
1.102 daniel 6790: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 6791:
6792: /*
1.2 veillard 6793: * Check for an Empty Element.
6794: */
1.152 daniel 6795: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 6796: SKIP(2);
1.72 daniel 6797: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 6798: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 6799: oldname = namePop(ctxt);
6800: if (oldname != NULL) {
6801: #ifdef DEBUG_STACK
6802: fprintf(stderr,"Close: popped %s\n", oldname);
6803: #endif
6804: xmlFree(oldname);
6805: }
1.72 daniel 6806: return;
1.2 veillard 6807: }
1.152 daniel 6808: if (RAW == '>') {
1.91 daniel 6809: NEXT;
6810: } else {
1.55 daniel 6811: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6812: ctxt->sax->error(ctxt->userData,
6813: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 6814: openTag);
1.59 daniel 6815: ctxt->wellFormed = 0;
1.123 daniel 6816: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 6817:
6818: /*
6819: * end of parsing of this node.
6820: */
6821: nodePop(ctxt);
1.140 daniel 6822: oldname = namePop(ctxt);
6823: if (oldname != NULL) {
6824: #ifdef DEBUG_STACK
6825: fprintf(stderr,"Close: popped %s\n", oldname);
6826: #endif
6827: xmlFree(oldname);
6828: }
1.118 daniel 6829:
6830: /*
6831: * Capture end position and add node
6832: */
6833: if ( ret != NULL && ctxt->record_info ) {
6834: node_info.end_pos = ctxt->input->consumed +
6835: (CUR_PTR - ctxt->input->base);
6836: node_info.end_line = ctxt->input->line;
6837: node_info.node = ret;
6838: xmlParserAddNodeInfo(ctxt, &node_info);
6839: }
1.72 daniel 6840: return;
1.2 veillard 6841: }
6842:
6843: /*
6844: * Parse the content of the element:
6845: */
1.45 daniel 6846: xmlParseContent(ctxt);
1.153 daniel 6847: if (!IS_CHAR(RAW)) {
1.55 daniel 6848: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6849: ctxt->sax->error(ctxt->userData,
1.57 daniel 6850: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 6851: ctxt->wellFormed = 0;
1.123 daniel 6852: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 6853:
6854: /*
6855: * end of parsing of this node.
6856: */
6857: nodePop(ctxt);
1.140 daniel 6858: oldname = namePop(ctxt);
6859: if (oldname != NULL) {
6860: #ifdef DEBUG_STACK
6861: fprintf(stderr,"Close: popped %s\n", oldname);
6862: #endif
6863: xmlFree(oldname);
6864: }
1.72 daniel 6865: return;
1.2 veillard 6866: }
6867:
6868: /*
1.27 daniel 6869: * parse the end of tag: '</' should be here.
1.2 veillard 6870: */
1.140 daniel 6871: xmlParseEndTag(ctxt);
1.118 daniel 6872:
6873: /*
6874: * Capture end position and add node
6875: */
6876: if ( ret != NULL && ctxt->record_info ) {
6877: node_info.end_pos = ctxt->input->consumed +
6878: (CUR_PTR - ctxt->input->base);
6879: node_info.end_line = ctxt->input->line;
6880: node_info.node = ret;
6881: xmlParserAddNodeInfo(ctxt, &node_info);
6882: }
1.2 veillard 6883: }
6884:
1.50 daniel 6885: /**
6886: * xmlParseVersionNum:
6887: * @ctxt: an XML parser context
6888: *
6889: * parse the XML version value.
1.29 daniel 6890: *
6891: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 6892: *
6893: * Returns the string giving the XML version number, or NULL
1.29 daniel 6894: */
1.123 daniel 6895: xmlChar *
1.55 daniel 6896: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 6897: xmlChar *buf = NULL;
6898: int len = 0;
6899: int size = 10;
6900: xmlChar cur;
1.29 daniel 6901:
1.135 daniel 6902: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6903: if (buf == NULL) {
6904: fprintf(stderr, "malloc of %d byte failed\n", size);
6905: return(NULL);
6906: }
6907: cur = CUR;
1.152 daniel 6908: while (((cur >= 'a') && (cur <= 'z')) ||
6909: ((cur >= 'A') && (cur <= 'Z')) ||
6910: ((cur >= '0') && (cur <= '9')) ||
6911: (cur == '_') || (cur == '.') ||
6912: (cur == ':') || (cur == '-')) {
1.135 daniel 6913: if (len + 1 >= size) {
6914: size *= 2;
6915: buf = xmlRealloc(buf, size * sizeof(xmlChar));
6916: if (buf == NULL) {
6917: fprintf(stderr, "realloc of %d byte failed\n", size);
6918: return(NULL);
6919: }
6920: }
6921: buf[len++] = cur;
6922: NEXT;
6923: cur=CUR;
6924: }
6925: buf[len] = 0;
6926: return(buf);
1.29 daniel 6927: }
6928:
1.50 daniel 6929: /**
6930: * xmlParseVersionInfo:
6931: * @ctxt: an XML parser context
6932: *
6933: * parse the XML version.
1.29 daniel 6934: *
6935: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6936: *
6937: * [25] Eq ::= S? '=' S?
1.50 daniel 6938: *
1.68 daniel 6939: * Returns the version string, e.g. "1.0"
1.29 daniel 6940: */
6941:
1.123 daniel 6942: xmlChar *
1.55 daniel 6943: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 6944: xmlChar *version = NULL;
6945: const xmlChar *q;
1.29 daniel 6946:
1.152 daniel 6947: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 6948: (NXT(2) == 'r') && (NXT(3) == 's') &&
6949: (NXT(4) == 'i') && (NXT(5) == 'o') &&
6950: (NXT(6) == 'n')) {
6951: SKIP(7);
1.42 daniel 6952: SKIP_BLANKS;
1.152 daniel 6953: if (RAW != '=') {
1.55 daniel 6954: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6955: ctxt->sax->error(ctxt->userData,
6956: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 6957: ctxt->wellFormed = 0;
1.123 daniel 6958: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 6959: return(NULL);
6960: }
1.40 daniel 6961: NEXT;
1.42 daniel 6962: SKIP_BLANKS;
1.152 daniel 6963: if (RAW == '"') {
1.40 daniel 6964: NEXT;
6965: q = CUR_PTR;
1.29 daniel 6966: version = xmlParseVersionNum(ctxt);
1.152 daniel 6967: if (RAW != '"') {
1.55 daniel 6968: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6969: ctxt->sax->error(ctxt->userData,
6970: "String not closed\n%.50s\n", q);
1.59 daniel 6971: ctxt->wellFormed = 0;
1.123 daniel 6972: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6973: } else
1.40 daniel 6974: NEXT;
1.152 daniel 6975: } else if (RAW == '\''){
1.40 daniel 6976: NEXT;
6977: q = CUR_PTR;
1.29 daniel 6978: version = xmlParseVersionNum(ctxt);
1.152 daniel 6979: if (RAW != '\'') {
1.55 daniel 6980: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6981: ctxt->sax->error(ctxt->userData,
6982: "String not closed\n%.50s\n", q);
1.123 daniel 6983: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 6984: ctxt->wellFormed = 0;
1.55 daniel 6985: } else
1.40 daniel 6986: NEXT;
1.31 daniel 6987: } else {
1.55 daniel 6988: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6989: ctxt->sax->error(ctxt->userData,
1.59 daniel 6990: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 6991: ctxt->wellFormed = 0;
1.123 daniel 6992: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 6993: }
6994: }
6995: return(version);
6996: }
6997:
1.50 daniel 6998: /**
6999: * xmlParseEncName:
7000: * @ctxt: an XML parser context
7001: *
7002: * parse the XML encoding name
1.29 daniel 7003: *
7004: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 7005: *
1.68 daniel 7006: * Returns the encoding name value or NULL
1.29 daniel 7007: */
1.123 daniel 7008: xmlChar *
1.55 daniel 7009: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 7010: xmlChar *buf = NULL;
7011: int len = 0;
7012: int size = 10;
7013: xmlChar cur;
1.29 daniel 7014:
1.135 daniel 7015: cur = CUR;
7016: if (((cur >= 'a') && (cur <= 'z')) ||
7017: ((cur >= 'A') && (cur <= 'Z'))) {
7018: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7019: if (buf == NULL) {
7020: fprintf(stderr, "malloc of %d byte failed\n", size);
7021: return(NULL);
7022: }
7023:
7024: buf[len++] = cur;
1.40 daniel 7025: NEXT;
1.135 daniel 7026: cur = CUR;
1.152 daniel 7027: while (((cur >= 'a') && (cur <= 'z')) ||
7028: ((cur >= 'A') && (cur <= 'Z')) ||
7029: ((cur >= '0') && (cur <= '9')) ||
7030: (cur == '.') || (cur == '_') ||
7031: (cur == '-')) {
1.135 daniel 7032: if (len + 1 >= size) {
7033: size *= 2;
7034: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7035: if (buf == NULL) {
7036: fprintf(stderr, "realloc of %d byte failed\n", size);
7037: return(NULL);
7038: }
7039: }
7040: buf[len++] = cur;
7041: NEXT;
7042: cur = CUR;
7043: if (cur == 0) {
7044: SHRINK;
7045: GROW;
7046: cur = CUR;
7047: }
7048: }
7049: buf[len] = 0;
1.29 daniel 7050: } else {
1.55 daniel 7051: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7052: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 7053: ctxt->wellFormed = 0;
1.123 daniel 7054: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 7055: }
1.135 daniel 7056: return(buf);
1.29 daniel 7057: }
7058:
1.50 daniel 7059: /**
7060: * xmlParseEncodingDecl:
7061: * @ctxt: an XML parser context
7062: *
7063: * parse the XML encoding declaration
1.29 daniel 7064: *
7065: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 7066: *
7067: * TODO: this should setup the conversion filters.
7068: *
1.68 daniel 7069: * Returns the encoding value or NULL
1.29 daniel 7070: */
7071:
1.123 daniel 7072: xmlChar *
1.55 daniel 7073: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7074: xmlChar *encoding = NULL;
7075: const xmlChar *q;
1.29 daniel 7076:
1.42 daniel 7077: SKIP_BLANKS;
1.152 daniel 7078: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 7079: (NXT(2) == 'c') && (NXT(3) == 'o') &&
7080: (NXT(4) == 'd') && (NXT(5) == 'i') &&
7081: (NXT(6) == 'n') && (NXT(7) == 'g')) {
7082: SKIP(8);
1.42 daniel 7083: SKIP_BLANKS;
1.152 daniel 7084: if (RAW != '=') {
1.55 daniel 7085: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7086: ctxt->sax->error(ctxt->userData,
7087: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 7088: ctxt->wellFormed = 0;
1.123 daniel 7089: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7090: return(NULL);
7091: }
1.40 daniel 7092: NEXT;
1.42 daniel 7093: SKIP_BLANKS;
1.152 daniel 7094: if (RAW == '"') {
1.40 daniel 7095: NEXT;
7096: q = CUR_PTR;
1.29 daniel 7097: encoding = xmlParseEncName(ctxt);
1.152 daniel 7098: if (RAW != '"') {
1.55 daniel 7099: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7100: ctxt->sax->error(ctxt->userData,
7101: "String not closed\n%.50s\n", q);
1.59 daniel 7102: ctxt->wellFormed = 0;
1.123 daniel 7103: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7104: } else
1.40 daniel 7105: NEXT;
1.152 daniel 7106: } else if (RAW == '\''){
1.40 daniel 7107: NEXT;
7108: q = CUR_PTR;
1.29 daniel 7109: encoding = xmlParseEncName(ctxt);
1.152 daniel 7110: if (RAW != '\'') {
1.55 daniel 7111: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7112: ctxt->sax->error(ctxt->userData,
7113: "String not closed\n%.50s\n", q);
1.59 daniel 7114: ctxt->wellFormed = 0;
1.123 daniel 7115: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7116: } else
1.40 daniel 7117: NEXT;
1.152 daniel 7118: } else if (RAW == '"'){
1.55 daniel 7119: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7120: ctxt->sax->error(ctxt->userData,
1.59 daniel 7121: "xmlParseEncodingDecl : expected ' or \"\n");
7122: ctxt->wellFormed = 0;
1.123 daniel 7123: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7124: }
7125: }
7126: return(encoding);
7127: }
7128:
1.50 daniel 7129: /**
7130: * xmlParseSDDecl:
7131: * @ctxt: an XML parser context
7132: *
7133: * parse the XML standalone declaration
1.29 daniel 7134: *
7135: * [32] SDDecl ::= S 'standalone' Eq
7136: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 7137: *
7138: * [ VC: Standalone Document Declaration ]
7139: * TODO The standalone document declaration must have the value "no"
7140: * if any external markup declarations contain declarations of:
7141: * - attributes with default values, if elements to which these
7142: * attributes apply appear in the document without specifications
7143: * of values for these attributes, or
7144: * - entities (other than amp, lt, gt, apos, quot), if references
7145: * to those entities appear in the document, or
7146: * - attributes with values subject to normalization, where the
7147: * attribute appears in the document with a value which will change
7148: * as a result of normalization, or
7149: * - element types with element content, if white space occurs directly
7150: * within any instance of those types.
1.68 daniel 7151: *
7152: * Returns 1 if standalone, 0 otherwise
1.29 daniel 7153: */
7154:
1.55 daniel 7155: int
7156: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 7157: int standalone = -1;
7158:
1.42 daniel 7159: SKIP_BLANKS;
1.152 daniel 7160: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 7161: (NXT(2) == 'a') && (NXT(3) == 'n') &&
7162: (NXT(4) == 'd') && (NXT(5) == 'a') &&
7163: (NXT(6) == 'l') && (NXT(7) == 'o') &&
7164: (NXT(8) == 'n') && (NXT(9) == 'e')) {
7165: SKIP(10);
1.81 daniel 7166: SKIP_BLANKS;
1.152 daniel 7167: if (RAW != '=') {
1.55 daniel 7168: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7169: ctxt->sax->error(ctxt->userData,
1.59 daniel 7170: "XML standalone declaration : expected '='\n");
1.123 daniel 7171: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 7172: ctxt->wellFormed = 0;
1.32 daniel 7173: return(standalone);
7174: }
1.40 daniel 7175: NEXT;
1.42 daniel 7176: SKIP_BLANKS;
1.152 daniel 7177: if (RAW == '\''){
1.40 daniel 7178: NEXT;
1.152 daniel 7179: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7180: standalone = 0;
1.40 daniel 7181: SKIP(2);
1.152 daniel 7182: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7183: (NXT(2) == 's')) {
1.29 daniel 7184: standalone = 1;
1.40 daniel 7185: SKIP(3);
1.29 daniel 7186: } else {
1.55 daniel 7187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7188: ctxt->sax->error(ctxt->userData,
7189: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7190: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7191: ctxt->wellFormed = 0;
1.29 daniel 7192: }
1.152 daniel 7193: if (RAW != '\'') {
1.55 daniel 7194: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7195: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 7196: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7197: ctxt->wellFormed = 0;
1.55 daniel 7198: } else
1.40 daniel 7199: NEXT;
1.152 daniel 7200: } else if (RAW == '"'){
1.40 daniel 7201: NEXT;
1.152 daniel 7202: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7203: standalone = 0;
1.40 daniel 7204: SKIP(2);
1.152 daniel 7205: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7206: (NXT(2) == 's')) {
1.29 daniel 7207: standalone = 1;
1.40 daniel 7208: SKIP(3);
1.29 daniel 7209: } else {
1.55 daniel 7210: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7211: ctxt->sax->error(ctxt->userData,
1.59 daniel 7212: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7213: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7214: ctxt->wellFormed = 0;
1.29 daniel 7215: }
1.152 daniel 7216: if (RAW != '"') {
1.55 daniel 7217: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7218: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 7219: ctxt->wellFormed = 0;
1.123 daniel 7220: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7221: } else
1.40 daniel 7222: NEXT;
1.37 daniel 7223: } else {
1.55 daniel 7224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7225: ctxt->sax->error(ctxt->userData,
7226: "Standalone value not found\n");
1.59 daniel 7227: ctxt->wellFormed = 0;
1.123 daniel 7228: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 7229: }
1.29 daniel 7230: }
7231: return(standalone);
7232: }
7233:
1.50 daniel 7234: /**
7235: * xmlParseXMLDecl:
7236: * @ctxt: an XML parser context
7237: *
7238: * parse an XML declaration header
1.29 daniel 7239: *
7240: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 7241: */
7242:
1.55 daniel 7243: void
7244: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7245: xmlChar *version;
1.1 veillard 7246:
7247: /*
1.19 daniel 7248: * We know that '<?xml' is here.
1.1 veillard 7249: */
1.40 daniel 7250: SKIP(5);
1.1 veillard 7251:
1.153 daniel 7252: if (!IS_BLANK(RAW)) {
1.59 daniel 7253: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7254: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 7255: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7256: ctxt->wellFormed = 0;
7257: }
1.42 daniel 7258: SKIP_BLANKS;
1.1 veillard 7259:
7260: /*
1.29 daniel 7261: * We should have the VersionInfo here.
1.1 veillard 7262: */
1.29 daniel 7263: version = xmlParseVersionInfo(ctxt);
7264: if (version == NULL)
1.45 daniel 7265: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 7266: ctxt->version = xmlStrdup(version);
1.119 daniel 7267: xmlFree(version);
1.29 daniel 7268:
7269: /*
7270: * We may have the encoding declaration
7271: */
1.153 daniel 7272: if (!IS_BLANK(RAW)) {
1.152 daniel 7273: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7274: SKIP(2);
7275: return;
7276: }
7277: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7278: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 7279: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7280: ctxt->wellFormed = 0;
7281: }
1.72 daniel 7282: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 7283:
7284: /*
1.29 daniel 7285: * We may have the standalone status.
1.1 veillard 7286: */
1.153 daniel 7287: if ((ctxt->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 7288: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7289: SKIP(2);
7290: return;
7291: }
7292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7293: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 7294: ctxt->wellFormed = 0;
1.123 daniel 7295: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7296: }
7297: SKIP_BLANKS;
1.72 daniel 7298: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 7299:
1.42 daniel 7300: SKIP_BLANKS;
1.152 daniel 7301: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 7302: SKIP(2);
1.152 daniel 7303: } else if (RAW == '>') {
1.31 daniel 7304: /* Deprecated old WD ... */
1.55 daniel 7305: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7306: ctxt->sax->error(ctxt->userData,
7307: "XML declaration must end-up with '?>'\n");
1.59 daniel 7308: ctxt->wellFormed = 0;
1.123 daniel 7309: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7310: NEXT;
1.29 daniel 7311: } else {
1.55 daniel 7312: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7313: ctxt->sax->error(ctxt->userData,
7314: "parsing XML declaration: '?>' expected\n");
1.59 daniel 7315: ctxt->wellFormed = 0;
1.123 daniel 7316: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7317: MOVETO_ENDTAG(CUR_PTR);
7318: NEXT;
1.29 daniel 7319: }
1.1 veillard 7320: }
7321:
1.50 daniel 7322: /**
7323: * xmlParseMisc:
7324: * @ctxt: an XML parser context
7325: *
7326: * parse an XML Misc* optionnal field.
1.21 daniel 7327: *
1.22 daniel 7328: * [27] Misc ::= Comment | PI | S
1.1 veillard 7329: */
7330:
1.55 daniel 7331: void
7332: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 7333: while (((RAW == '<') && (NXT(1) == '?')) ||
7334: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7335: (NXT(2) == '-') && (NXT(3) == '-')) ||
7336: IS_BLANK(CUR)) {
1.152 daniel 7337: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 7338: xmlParsePI(ctxt);
1.40 daniel 7339: } else if (IS_BLANK(CUR)) {
7340: NEXT;
1.1 veillard 7341: } else
1.114 daniel 7342: xmlParseComment(ctxt);
1.1 veillard 7343: }
7344: }
7345:
1.50 daniel 7346: /**
7347: * xmlParseDocument :
7348: * @ctxt: an XML parser context
7349: *
7350: * parse an XML document (and build a tree if using the standard SAX
7351: * interface).
1.21 daniel 7352: *
1.22 daniel 7353: * [1] document ::= prolog element Misc*
1.29 daniel 7354: *
7355: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 7356: *
1.68 daniel 7357: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 7358: * as a result of the parsing.
1.1 veillard 7359: */
7360:
1.55 daniel 7361: int
7362: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 ! daniel 7363: xmlChar start[4];
! 7364: xmlCharEncoding enc;
! 7365:
1.45 daniel 7366: xmlDefaultSAXHandlerInit();
7367:
1.91 daniel 7368: GROW;
7369:
1.14 veillard 7370: /*
1.44 daniel 7371: * SAX: beginning of the document processing.
7372: */
1.72 daniel 7373: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 7374: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 7375:
1.156 ! daniel 7376: /*
! 7377: * Get the 4 first bytes and decode the charset
! 7378: * if enc != XML_CHAR_ENCODING_NONE
! 7379: * plug some encoding conversion routines.
! 7380: */
! 7381: start[0] = RAW;
! 7382: start[1] = NXT(1);
! 7383: start[2] = NXT(2);
! 7384: start[3] = NXT(3);
! 7385: enc = xmlDetectCharEncoding(start, 4);
! 7386: if (enc != XML_CHAR_ENCODING_NONE) {
! 7387: xmlSwitchEncoding(ctxt, enc);
! 7388: }
! 7389:
1.1 veillard 7390:
7391: /*
7392: * Wipe out everything which is before the first '<'
7393: */
1.153 daniel 7394: if (IS_BLANK(RAW)) {
1.59 daniel 7395: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7396: ctxt->sax->error(ctxt->userData,
1.59 daniel 7397: "Extra spaces at the beginning of the document are not allowed\n");
1.123 daniel 7398: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.59 daniel 7399: ctxt->wellFormed = 0;
7400: SKIP_BLANKS;
7401: }
7402:
7403: if (CUR == 0) {
7404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7405: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 7406: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7407: ctxt->wellFormed = 0;
7408: }
1.1 veillard 7409:
7410: /*
7411: * Check for the XMLDecl in the Prolog.
7412: */
1.91 daniel 7413: GROW;
1.152 daniel 7414: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7415: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 7416: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.19 daniel 7417: xmlParseXMLDecl(ctxt);
1.42 daniel 7418: SKIP_BLANKS;
1.151 daniel 7419: #if 0
1.152 daniel 7420: } else if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7421: (NXT(2) == 'X') && (NXT(3) == 'M') &&
1.142 daniel 7422: (NXT(4) == 'L') && (IS_BLANK(NXT(5)))) {
1.19 daniel 7423: /*
7424: * The first drafts were using <?XML and the final W3C REC
7425: * now use <?xml ...
7426: */
1.16 daniel 7427: xmlParseXMLDecl(ctxt);
1.42 daniel 7428: SKIP_BLANKS;
1.151 daniel 7429: #endif
1.1 veillard 7430: } else {
1.72 daniel 7431: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 7432: }
1.72 daniel 7433: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 7434: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 7435:
7436: /*
7437: * The Misc part of the Prolog
7438: */
1.91 daniel 7439: GROW;
1.16 daniel 7440: xmlParseMisc(ctxt);
1.1 veillard 7441:
7442: /*
1.29 daniel 7443: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7444: * (doctypedecl Misc*)?
7445: */
1.91 daniel 7446: GROW;
1.152 daniel 7447: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7448: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7449: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7450: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7451: (NXT(8) == 'E')) {
1.22 daniel 7452: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7453: if (RAW == '[') {
1.140 daniel 7454: ctxt->instate = XML_PARSER_DTD;
7455: xmlParseInternalSubset(ctxt);
7456: }
1.96 daniel 7457: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7458: xmlParseMisc(ctxt);
1.21 daniel 7459: }
7460:
7461: /*
7462: * Time to start parsing the tree itself
1.1 veillard 7463: */
1.91 daniel 7464: GROW;
1.152 daniel 7465: if (RAW != '<') {
1.59 daniel 7466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7467: ctxt->sax->error(ctxt->userData,
1.151 daniel 7468: "Start tag expected, '<' not found\n");
1.140 daniel 7469: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7470: ctxt->wellFormed = 0;
1.140 daniel 7471: ctxt->instate = XML_PARSER_EOF;
7472: } else {
7473: ctxt->instate = XML_PARSER_CONTENT;
7474: xmlParseElement(ctxt);
7475: ctxt->instate = XML_PARSER_EPILOG;
7476:
7477:
7478: /*
7479: * The Misc part at the end
7480: */
7481: xmlParseMisc(ctxt);
7482:
1.152 daniel 7483: if (RAW != 0) {
1.140 daniel 7484: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7485: ctxt->sax->error(ctxt->userData,
7486: "Extra content at the end of the document\n");
7487: ctxt->wellFormed = 0;
7488: ctxt->errNo = XML_ERR_DOCUMENT_END;
7489: }
7490: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7491: }
7492:
1.44 daniel 7493: /*
7494: * SAX: end of the document processing.
7495: */
1.72 daniel 7496: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 7497: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7498:
7499: /*
7500: * Grab the encoding if it was added on-the-fly
7501: */
7502: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
7503: (ctxt->myDoc->encoding == NULL)) {
7504: ctxt->myDoc->encoding = ctxt->encoding;
7505: ctxt->encoding = NULL;
7506: }
1.59 daniel 7507: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7508: return(0);
7509: }
7510:
1.98 daniel 7511: /************************************************************************
7512: * *
1.128 daniel 7513: * Progressive parsing interfaces *
7514: * *
7515: ************************************************************************/
7516:
7517: /**
7518: * xmlParseLookupSequence:
7519: * @ctxt: an XML parser context
7520: * @first: the first char to lookup
1.140 daniel 7521: * @next: the next char to lookup or zero
7522: * @third: the next char to lookup or zero
1.128 daniel 7523: *
1.140 daniel 7524: * Try to find if a sequence (first, next, third) or just (first next) or
7525: * (first) is available in the input stream.
7526: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7527: * to avoid rescanning sequences of bytes, it DOES change the state of the
7528: * parser, do not use liberally.
1.128 daniel 7529: *
1.140 daniel 7530: * Returns the index to the current parsing point if the full sequence
7531: * is available, -1 otherwise.
1.128 daniel 7532: */
7533: int
1.140 daniel 7534: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7535: xmlChar next, xmlChar third) {
7536: int base, len;
7537: xmlParserInputPtr in;
7538: const xmlChar *buf;
7539:
7540: in = ctxt->input;
7541: if (in == NULL) return(-1);
7542: base = in->cur - in->base;
7543: if (base < 0) return(-1);
7544: if (ctxt->checkIndex > base)
7545: base = ctxt->checkIndex;
7546: if (in->buf == NULL) {
7547: buf = in->base;
7548: len = in->length;
7549: } else {
7550: buf = in->buf->buffer->content;
7551: len = in->buf->buffer->use;
7552: }
7553: /* take into account the sequence length */
7554: if (third) len -= 2;
7555: else if (next) len --;
7556: for (;base < len;base++) {
7557: if (buf[base] == first) {
7558: if (third != 0) {
7559: if ((buf[base + 1] != next) ||
7560: (buf[base + 2] != third)) continue;
7561: } else if (next != 0) {
7562: if (buf[base + 1] != next) continue;
7563: }
7564: ctxt->checkIndex = 0;
7565: #ifdef DEBUG_PUSH
7566: if (next == 0)
7567: fprintf(stderr, "PP: lookup '%c' found at %d\n",
7568: first, base);
7569: else if (third == 0)
7570: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
7571: first, next, base);
7572: else
7573: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
7574: first, next, third, base);
7575: #endif
7576: return(base - (in->cur - in->base));
7577: }
7578: }
7579: ctxt->checkIndex = base;
7580: #ifdef DEBUG_PUSH
7581: if (next == 0)
7582: fprintf(stderr, "PP: lookup '%c' failed\n", first);
7583: else if (third == 0)
7584: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
7585: else
7586: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
7587: #endif
7588: return(-1);
1.128 daniel 7589: }
7590:
7591: /**
1.143 daniel 7592: * xmlParseTryOrFinish:
1.128 daniel 7593: * @ctxt: an XML parser context
1.143 daniel 7594: * @terminate: last chunk indicator
1.128 daniel 7595: *
7596: * Try to progress on parsing
7597: *
7598: * Returns zero if no parsing was possible
7599: */
7600: int
1.143 daniel 7601: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 7602: int ret = 0;
1.140 daniel 7603: xmlParserInputPtr in;
7604: int avail;
7605: xmlChar cur, next;
7606:
7607: #ifdef DEBUG_PUSH
7608: switch (ctxt->instate) {
7609: case XML_PARSER_EOF:
7610: fprintf(stderr, "PP: try EOF\n"); break;
7611: case XML_PARSER_START:
7612: fprintf(stderr, "PP: try START\n"); break;
7613: case XML_PARSER_MISC:
7614: fprintf(stderr, "PP: try MISC\n");break;
7615: case XML_PARSER_COMMENT:
7616: fprintf(stderr, "PP: try COMMENT\n");break;
7617: case XML_PARSER_PROLOG:
7618: fprintf(stderr, "PP: try PROLOG\n");break;
7619: case XML_PARSER_START_TAG:
7620: fprintf(stderr, "PP: try START_TAG\n");break;
7621: case XML_PARSER_CONTENT:
7622: fprintf(stderr, "PP: try CONTENT\n");break;
7623: case XML_PARSER_CDATA_SECTION:
7624: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
7625: case XML_PARSER_END_TAG:
7626: fprintf(stderr, "PP: try END_TAG\n");break;
7627: case XML_PARSER_ENTITY_DECL:
7628: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
7629: case XML_PARSER_ENTITY_VALUE:
7630: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
7631: case XML_PARSER_ATTRIBUTE_VALUE:
7632: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
7633: case XML_PARSER_DTD:
7634: fprintf(stderr, "PP: try DTD\n");break;
7635: case XML_PARSER_EPILOG:
7636: fprintf(stderr, "PP: try EPILOG\n");break;
7637: case XML_PARSER_PI:
7638: fprintf(stderr, "PP: try PI\n");break;
7639: }
7640: #endif
1.128 daniel 7641:
7642: while (1) {
1.140 daniel 7643: /*
7644: * Pop-up of finished entities.
7645: */
1.152 daniel 7646: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7647: xmlPopInput(ctxt);
7648:
7649: in = ctxt->input;
7650: if (in == NULL) break;
7651: if (in->buf == NULL)
7652: avail = in->length - (in->cur - in->base);
7653: else
7654: avail = in->buf->buffer->use - (in->cur - in->base);
7655: if (avail < 1)
7656: goto done;
1.128 daniel 7657: switch (ctxt->instate) {
7658: case XML_PARSER_EOF:
1.140 daniel 7659: /*
7660: * Document parsing is done !
7661: */
7662: goto done;
7663: case XML_PARSER_START:
7664: /*
7665: * Very first chars read from the document flow.
7666: */
7667: cur = in->cur[0];
7668: if (IS_BLANK(cur)) {
7669: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7670: ctxt->sax->setDocumentLocator(ctxt->userData,
7671: &xmlDefaultSAXLocator);
7672: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7673: ctxt->sax->error(ctxt->userData,
7674: "Extra spaces at the beginning of the document are not allowed\n");
7675: ctxt->errNo = XML_ERR_DOCUMENT_START;
7676: ctxt->wellFormed = 0;
7677: SKIP_BLANKS;
7678: ret++;
7679: if (in->buf == NULL)
7680: avail = in->length - (in->cur - in->base);
7681: else
7682: avail = in->buf->buffer->use - (in->cur - in->base);
7683: }
7684: if (avail < 2)
7685: goto done;
7686:
7687: cur = in->cur[0];
7688: next = in->cur[1];
7689: if (cur == 0) {
7690: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7691: ctxt->sax->setDocumentLocator(ctxt->userData,
7692: &xmlDefaultSAXLocator);
7693: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7694: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7695: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7696: ctxt->wellFormed = 0;
7697: ctxt->instate = XML_PARSER_EOF;
7698: #ifdef DEBUG_PUSH
7699: fprintf(stderr, "PP: entering EOF\n");
7700: #endif
7701: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7702: ctxt->sax->endDocument(ctxt->userData);
7703: goto done;
7704: }
7705: if ((cur == '<') && (next == '?')) {
7706: /* PI or XML decl */
7707: if (avail < 5) return(ret);
1.143 daniel 7708: if ((!terminate) &&
7709: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7710: return(ret);
7711: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7712: ctxt->sax->setDocumentLocator(ctxt->userData,
7713: &xmlDefaultSAXLocator);
7714: if ((in->cur[2] == 'x') &&
7715: (in->cur[3] == 'm') &&
1.142 daniel 7716: (in->cur[4] == 'l') &&
7717: (IS_BLANK(in->cur[5]))) {
1.140 daniel 7718: ret += 5;
7719: #ifdef DEBUG_PUSH
7720: fprintf(stderr, "PP: Parsing XML Decl\n");
7721: #endif
7722: xmlParseXMLDecl(ctxt);
7723: if ((ctxt->sax) && (ctxt->sax->startDocument))
7724: ctxt->sax->startDocument(ctxt->userData);
7725: ctxt->instate = XML_PARSER_MISC;
7726: #ifdef DEBUG_PUSH
7727: fprintf(stderr, "PP: entering MISC\n");
7728: #endif
7729: } else {
7730: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7731: if ((ctxt->sax) && (ctxt->sax->startDocument))
7732: ctxt->sax->startDocument(ctxt->userData);
7733: ctxt->instate = XML_PARSER_MISC;
7734: #ifdef DEBUG_PUSH
7735: fprintf(stderr, "PP: entering MISC\n");
7736: #endif
7737: }
7738: } else {
7739: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7740: ctxt->sax->setDocumentLocator(ctxt->userData,
7741: &xmlDefaultSAXLocator);
7742: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7743: if ((ctxt->sax) && (ctxt->sax->startDocument))
7744: ctxt->sax->startDocument(ctxt->userData);
7745: ctxt->instate = XML_PARSER_MISC;
7746: #ifdef DEBUG_PUSH
7747: fprintf(stderr, "PP: entering MISC\n");
7748: #endif
7749: }
7750: break;
7751: case XML_PARSER_MISC:
7752: SKIP_BLANKS;
7753: if (in->buf == NULL)
7754: avail = in->length - (in->cur - in->base);
7755: else
7756: avail = in->buf->buffer->use - (in->cur - in->base);
7757: if (avail < 2)
7758: goto done;
7759: cur = in->cur[0];
7760: next = in->cur[1];
7761: if ((cur == '<') && (next == '?')) {
1.143 daniel 7762: if ((!terminate) &&
7763: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7764: goto done;
7765: #ifdef DEBUG_PUSH
7766: fprintf(stderr, "PP: Parsing PI\n");
7767: #endif
7768: xmlParsePI(ctxt);
7769: } else if ((cur == '<') && (next == '!') &&
7770: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 7771: if ((!terminate) &&
7772: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7773: goto done;
7774: #ifdef DEBUG_PUSH
7775: fprintf(stderr, "PP: Parsing Comment\n");
7776: #endif
7777: xmlParseComment(ctxt);
7778: ctxt->instate = XML_PARSER_MISC;
7779: } else if ((cur == '<') && (next == '!') &&
7780: (in->cur[2] == 'D') && (in->cur[3] == 'O') &&
7781: (in->cur[4] == 'C') && (in->cur[5] == 'T') &&
7782: (in->cur[6] == 'Y') && (in->cur[7] == 'P') &&
7783: (in->cur[8] == 'E')) {
1.143 daniel 7784: if ((!terminate) &&
7785: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7786: goto done;
7787: #ifdef DEBUG_PUSH
7788: fprintf(stderr, "PP: Parsing internal subset\n");
7789: #endif
7790: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7791: if (RAW == '[') {
1.140 daniel 7792: ctxt->instate = XML_PARSER_DTD;
7793: #ifdef DEBUG_PUSH
7794: fprintf(stderr, "PP: entering DTD\n");
7795: #endif
7796: } else {
7797: ctxt->instate = XML_PARSER_PROLOG;
7798: #ifdef DEBUG_PUSH
7799: fprintf(stderr, "PP: entering PROLOG\n");
7800: #endif
7801: }
7802: } else if ((cur == '<') && (next == '!') &&
7803: (avail < 9)) {
7804: goto done;
7805: } else {
7806: ctxt->instate = XML_PARSER_START_TAG;
7807: #ifdef DEBUG_PUSH
7808: fprintf(stderr, "PP: entering START_TAG\n");
7809: #endif
7810: }
7811: break;
1.128 daniel 7812: case XML_PARSER_PROLOG:
1.140 daniel 7813: SKIP_BLANKS;
7814: if (in->buf == NULL)
7815: avail = in->length - (in->cur - in->base);
7816: else
7817: avail = in->buf->buffer->use - (in->cur - in->base);
7818: if (avail < 2)
7819: goto done;
7820: cur = in->cur[0];
7821: next = in->cur[1];
7822: if ((cur == '<') && (next == '?')) {
1.143 daniel 7823: if ((!terminate) &&
7824: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7825: goto done;
7826: #ifdef DEBUG_PUSH
7827: fprintf(stderr, "PP: Parsing PI\n");
7828: #endif
7829: xmlParsePI(ctxt);
7830: } else if ((cur == '<') && (next == '!') &&
7831: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 7832: if ((!terminate) &&
7833: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7834: goto done;
7835: #ifdef DEBUG_PUSH
7836: fprintf(stderr, "PP: Parsing Comment\n");
7837: #endif
7838: xmlParseComment(ctxt);
7839: ctxt->instate = XML_PARSER_PROLOG;
7840: } else if ((cur == '<') && (next == '!') &&
7841: (avail < 4)) {
7842: goto done;
7843: } else {
7844: ctxt->instate = XML_PARSER_START_TAG;
7845: #ifdef DEBUG_PUSH
7846: fprintf(stderr, "PP: entering START_TAG\n");
7847: #endif
7848: }
7849: break;
7850: case XML_PARSER_EPILOG:
7851: SKIP_BLANKS;
7852: if (in->buf == NULL)
7853: avail = in->length - (in->cur - in->base);
7854: else
7855: avail = in->buf->buffer->use - (in->cur - in->base);
7856: if (avail < 2)
7857: goto done;
7858: cur = in->cur[0];
7859: next = in->cur[1];
7860: if ((cur == '<') && (next == '?')) {
1.143 daniel 7861: if ((!terminate) &&
7862: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7863: goto done;
7864: #ifdef DEBUG_PUSH
7865: fprintf(stderr, "PP: Parsing PI\n");
7866: #endif
7867: xmlParsePI(ctxt);
7868: ctxt->instate = XML_PARSER_EPILOG;
7869: } else if ((cur == '<') && (next == '!') &&
7870: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 7871: if ((!terminate) &&
7872: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7873: goto done;
7874: #ifdef DEBUG_PUSH
7875: fprintf(stderr, "PP: Parsing Comment\n");
7876: #endif
7877: xmlParseComment(ctxt);
7878: ctxt->instate = XML_PARSER_EPILOG;
7879: } else if ((cur == '<') && (next == '!') &&
7880: (avail < 4)) {
7881: goto done;
7882: } else {
7883: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7884: ctxt->sax->error(ctxt->userData,
7885: "Extra content at the end of the document\n");
7886: ctxt->wellFormed = 0;
7887: ctxt->errNo = XML_ERR_DOCUMENT_END;
7888: ctxt->instate = XML_PARSER_EOF;
7889: #ifdef DEBUG_PUSH
7890: fprintf(stderr, "PP: entering EOF\n");
7891: #endif
7892: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7893: ctxt->sax->endDocument(ctxt->userData);
7894: goto done;
7895: }
7896: break;
7897: case XML_PARSER_START_TAG: {
7898: xmlChar *name, *oldname;
7899:
7900: if (avail < 2)
7901: goto done;
7902: cur = in->cur[0];
7903: if (cur != '<') {
7904: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7905: ctxt->sax->error(ctxt->userData,
7906: "Start tag expect, '<' not found\n");
7907: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7908: ctxt->wellFormed = 0;
7909: ctxt->instate = XML_PARSER_EOF;
7910: #ifdef DEBUG_PUSH
7911: fprintf(stderr, "PP: entering EOF\n");
7912: #endif
7913: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7914: ctxt->sax->endDocument(ctxt->userData);
7915: goto done;
7916: }
1.143 daniel 7917: if ((!terminate) &&
7918: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7919: goto done;
7920: name = xmlParseStartTag(ctxt);
7921: if (name == NULL) {
7922: ctxt->instate = XML_PARSER_EOF;
7923: #ifdef DEBUG_PUSH
7924: fprintf(stderr, "PP: entering EOF\n");
7925: #endif
7926: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7927: ctxt->sax->endDocument(ctxt->userData);
7928: goto done;
7929: }
7930: namePush(ctxt, xmlStrdup(name));
7931:
7932: /*
7933: * [ VC: Root Element Type ]
7934: * The Name in the document type declaration must match
7935: * the element type of the root element.
7936: */
7937: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7938: ctxt->node && (ctxt->node == ctxt->myDoc->root))
7939: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7940:
7941: /*
7942: * Check for an Empty Element.
7943: */
1.152 daniel 7944: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 7945: SKIP(2);
7946: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
7947: ctxt->sax->endElement(ctxt->userData, name);
7948: xmlFree(name);
7949: oldname = namePop(ctxt);
7950: if (oldname != NULL) {
7951: #ifdef DEBUG_STACK
7952: fprintf(stderr,"Close: popped %s\n", oldname);
7953: #endif
7954: xmlFree(oldname);
7955: }
7956: if (ctxt->name == NULL) {
7957: ctxt->instate = XML_PARSER_EPILOG;
7958: #ifdef DEBUG_PUSH
7959: fprintf(stderr, "PP: entering EPILOG\n");
7960: #endif
7961: } else {
7962: ctxt->instate = XML_PARSER_CONTENT;
7963: #ifdef DEBUG_PUSH
7964: fprintf(stderr, "PP: entering CONTENT\n");
7965: #endif
7966: }
7967: break;
7968: }
1.152 daniel 7969: if (RAW == '>') {
1.140 daniel 7970: NEXT;
7971: } else {
7972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7973: ctxt->sax->error(ctxt->userData,
7974: "Couldn't find end of Start Tag %s\n",
7975: name);
7976: ctxt->wellFormed = 0;
7977: ctxt->errNo = XML_ERR_GT_REQUIRED;
7978:
7979: /*
7980: * end of parsing of this node.
7981: */
7982: nodePop(ctxt);
7983: oldname = namePop(ctxt);
7984: if (oldname != NULL) {
7985: #ifdef DEBUG_STACK
7986: fprintf(stderr,"Close: popped %s\n", oldname);
7987: #endif
7988: xmlFree(oldname);
7989: }
7990: }
7991: xmlFree(name);
7992: ctxt->instate = XML_PARSER_CONTENT;
7993: #ifdef DEBUG_PUSH
7994: fprintf(stderr, "PP: entering CONTENT\n");
7995: #endif
7996: break;
7997: }
1.128 daniel 7998: case XML_PARSER_CONTENT:
1.140 daniel 7999: /*
8000: * Handle preparsed entities and charRef
8001: */
8002: if (ctxt->token != 0) {
8003: xmlChar cur[2] = { 0 , 0 } ;
8004:
8005: cur[0] = (xmlChar) ctxt->token;
8006: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
8007: ctxt->sax->characters(ctxt->userData, cur, 1);
8008: ctxt->token = 0;
8009: }
8010: if (avail < 2)
8011: goto done;
8012: cur = in->cur[0];
8013: next = in->cur[1];
8014: if ((cur == '<') && (next == '?')) {
1.143 daniel 8015: if ((!terminate) &&
8016: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8017: goto done;
8018: #ifdef DEBUG_PUSH
8019: fprintf(stderr, "PP: Parsing PI\n");
8020: #endif
8021: xmlParsePI(ctxt);
8022: } else if ((cur == '<') && (next == '!') &&
8023: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8024: if ((!terminate) &&
8025: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8026: goto done;
8027: #ifdef DEBUG_PUSH
8028: fprintf(stderr, "PP: Parsing Comment\n");
8029: #endif
8030: xmlParseComment(ctxt);
8031: ctxt->instate = XML_PARSER_CONTENT;
8032: } else if ((cur == '<') && (in->cur[1] == '!') &&
8033: (in->cur[2] == '[') && (NXT(3) == 'C') &&
8034: (in->cur[4] == 'D') && (NXT(5) == 'A') &&
8035: (in->cur[6] == 'T') && (NXT(7) == 'A') &&
8036: (in->cur[8] == '[')) {
8037: SKIP(9);
8038: ctxt->instate = XML_PARSER_CDATA_SECTION;
8039: #ifdef DEBUG_PUSH
8040: fprintf(stderr, "PP: entering CDATA_SECTION\n");
8041: #endif
8042: break;
8043: } else if ((cur == '<') && (next == '!') &&
8044: (avail < 9)) {
8045: goto done;
8046: } else if ((cur == '<') && (next == '/')) {
8047: ctxt->instate = XML_PARSER_END_TAG;
8048: #ifdef DEBUG_PUSH
8049: fprintf(stderr, "PP: entering END_TAG\n");
8050: #endif
8051: break;
8052: } else if (cur == '<') {
8053: ctxt->instate = XML_PARSER_START_TAG;
8054: #ifdef DEBUG_PUSH
8055: fprintf(stderr, "PP: entering START_TAG\n");
8056: #endif
8057: break;
8058: } else if (cur == '&') {
1.143 daniel 8059: if ((!terminate) &&
8060: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 8061: goto done;
8062: #ifdef DEBUG_PUSH
8063: fprintf(stderr, "PP: Parsing Reference\n");
8064: #endif
8065: /* TODO: check generation of subtrees if noent !!! */
8066: xmlParseReference(ctxt);
8067: } else {
1.156 ! daniel 8068: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 8069: /*
8070: * Goal of the following test is :
8071: * - minimize calls to the SAX 'character' callback
8072: * when they are mergeable
8073: * - handle an problem for isBlank when we only parse
8074: * a sequence of blank chars and the next one is
8075: * not available to check against '<' presence.
8076: * - tries to homogenize the differences in SAX
8077: * callbacks beween the push and pull versions
8078: * of the parser.
8079: */
8080: if ((ctxt->inputNr == 1) &&
8081: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 8082: if ((!terminate) &&
8083: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 8084: goto done;
8085: }
8086: ctxt->checkIndex = 0;
8087: #ifdef DEBUG_PUSH
8088: fprintf(stderr, "PP: Parsing char data\n");
8089: #endif
8090: xmlParseCharData(ctxt, 0);
8091: }
8092: /*
8093: * Pop-up of finished entities.
8094: */
1.152 daniel 8095: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8096: xmlPopInput(ctxt);
8097: break;
8098: case XML_PARSER_CDATA_SECTION: {
8099: /*
8100: * The Push mode need to have the SAX callback for
8101: * cdataBlock merge back contiguous callbacks.
8102: */
8103: int base;
8104:
8105: in = ctxt->input;
8106: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8107: if (base < 0) {
8108: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8109: if (ctxt->sax != NULL) {
8110: if (ctxt->sax->cdataBlock != NULL)
8111: ctxt->sax->cdataBlock(ctxt->userData, in->cur,
8112: XML_PARSER_BIG_BUFFER_SIZE);
8113: }
8114: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8115: ctxt->checkIndex = 0;
8116: }
8117: goto done;
8118: } else {
8119: if ((ctxt->sax != NULL) && (base > 0)) {
8120: if (ctxt->sax->cdataBlock != NULL)
8121: ctxt->sax->cdataBlock(ctxt->userData,
8122: in->cur, base);
8123: }
8124: SKIP(base + 3);
8125: ctxt->checkIndex = 0;
8126: ctxt->instate = XML_PARSER_CONTENT;
8127: #ifdef DEBUG_PUSH
8128: fprintf(stderr, "PP: entering CONTENT\n");
8129: #endif
8130: }
8131: break;
8132: }
1.141 daniel 8133: case XML_PARSER_END_TAG:
1.140 daniel 8134: if (avail < 2)
8135: goto done;
1.143 daniel 8136: if ((!terminate) &&
8137: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8138: goto done;
8139: xmlParseEndTag(ctxt);
8140: if (ctxt->name == NULL) {
8141: ctxt->instate = XML_PARSER_EPILOG;
8142: #ifdef DEBUG_PUSH
8143: fprintf(stderr, "PP: entering EPILOG\n");
8144: #endif
8145: } else {
8146: ctxt->instate = XML_PARSER_CONTENT;
8147: #ifdef DEBUG_PUSH
8148: fprintf(stderr, "PP: entering CONTENT\n");
8149: #endif
8150: }
8151: break;
8152: case XML_PARSER_DTD: {
8153: /*
8154: * Sorry but progressive parsing of the internal subset
8155: * is not expected to be supported. We first check that
8156: * the full content of the internal subset is available and
8157: * the parsing is launched only at that point.
8158: * Internal subset ends up with "']' S? '>'" in an unescaped
8159: * section and not in a ']]>' sequence which are conditional
8160: * sections (whoever argued to keep that crap in XML deserve
8161: * a place in hell !).
8162: */
8163: int base, i;
8164: xmlChar *buf;
8165: xmlChar quote = 0;
8166:
8167: base = in->cur - in->base;
8168: if (base < 0) return(0);
8169: if (ctxt->checkIndex > base)
8170: base = ctxt->checkIndex;
8171: buf = in->buf->buffer->content;
8172: for (;base < in->buf->buffer->use;base++) {
8173: if (quote != 0) {
8174: if (buf[base] == quote)
8175: quote = 0;
8176: continue;
8177: }
8178: if (buf[base] == '"') {
8179: quote = '"';
8180: continue;
8181: }
8182: if (buf[base] == '\'') {
8183: quote = '\'';
8184: continue;
8185: }
8186: if (buf[base] == ']') {
8187: if (base +1 >= in->buf->buffer->use)
8188: break;
8189: if (buf[base + 1] == ']') {
8190: /* conditional crap, skip both ']' ! */
8191: base++;
8192: continue;
8193: }
8194: for (i = 0;base + i < in->buf->buffer->use;i++) {
8195: if (buf[base + i] == '>')
8196: goto found_end_int_subset;
8197: }
8198: break;
8199: }
8200: }
8201: /*
8202: * We didn't found the end of the Internal subset
8203: */
8204: if (quote == 0)
8205: ctxt->checkIndex = base;
8206: #ifdef DEBUG_PUSH
8207: if (next == 0)
8208: fprintf(stderr, "PP: lookup of int subset end filed\n");
8209: #endif
8210: goto done;
8211:
8212: found_end_int_subset:
8213: xmlParseInternalSubset(ctxt);
8214: ctxt->instate = XML_PARSER_PROLOG;
8215: ctxt->checkIndex = 0;
8216: #ifdef DEBUG_PUSH
8217: fprintf(stderr, "PP: entering PROLOG\n");
8218: #endif
8219: break;
8220: }
8221: case XML_PARSER_COMMENT:
8222: fprintf(stderr, "PP: internal error, state == COMMENT\n");
8223: ctxt->instate = XML_PARSER_CONTENT;
8224: #ifdef DEBUG_PUSH
8225: fprintf(stderr, "PP: entering CONTENT\n");
8226: #endif
8227: break;
8228: case XML_PARSER_PI:
8229: fprintf(stderr, "PP: internal error, state == PI\n");
8230: ctxt->instate = XML_PARSER_CONTENT;
8231: #ifdef DEBUG_PUSH
8232: fprintf(stderr, "PP: entering CONTENT\n");
8233: #endif
8234: break;
1.128 daniel 8235: case XML_PARSER_ENTITY_DECL:
1.140 daniel 8236: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
8237: ctxt->instate = XML_PARSER_DTD;
8238: #ifdef DEBUG_PUSH
8239: fprintf(stderr, "PP: entering DTD\n");
8240: #endif
8241: break;
1.128 daniel 8242: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 8243: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
8244: ctxt->instate = XML_PARSER_CONTENT;
8245: #ifdef DEBUG_PUSH
8246: fprintf(stderr, "PP: entering DTD\n");
8247: #endif
8248: break;
1.128 daniel 8249: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 8250: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
8251: ctxt->instate = XML_PARSER_START_TAG;
8252: #ifdef DEBUG_PUSH
8253: fprintf(stderr, "PP: entering START_TAG\n");
8254: #endif
8255: break;
1.128 daniel 8256: }
8257: }
1.140 daniel 8258: done:
8259: #ifdef DEBUG_PUSH
8260: fprintf(stderr, "PP: done %d\n", ret);
8261: #endif
1.128 daniel 8262: return(ret);
8263: }
8264:
8265: /**
1.143 daniel 8266: * xmlParseTry:
8267: * @ctxt: an XML parser context
8268: *
8269: * Try to progress on parsing
8270: *
8271: * Returns zero if no parsing was possible
8272: */
8273: int
8274: xmlParseTry(xmlParserCtxtPtr ctxt) {
8275: return(xmlParseTryOrFinish(ctxt, 0));
8276: }
8277:
8278: /**
1.128 daniel 8279: * xmlParseChunk:
8280: * @ctxt: an XML parser context
8281: * @chunk: an char array
8282: * @size: the size in byte of the chunk
8283: * @terminate: last chunk indicator
8284: *
8285: * Parse a Chunk of memory
8286: *
8287: * Returns zero if no error, the xmlParserErrors otherwise.
8288: */
1.140 daniel 8289: int
1.128 daniel 8290: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8291: int terminate) {
1.132 daniel 8292: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8293: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8294: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8295: int cur = ctxt->input->cur - ctxt->input->base;
8296:
1.132 daniel 8297: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8298: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8299: ctxt->input->cur = ctxt->input->base + cur;
8300: #ifdef DEBUG_PUSH
8301: fprintf(stderr, "PP: pushed %d\n", size);
8302: #endif
8303:
1.150 daniel 8304: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8305: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8306: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 8307: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8308: if (terminate) {
1.151 daniel 8309: /*
8310: * Grab the encoding if it was added on-the-fly
8311: */
8312: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8313: (ctxt->myDoc->encoding == NULL)) {
8314: ctxt->myDoc->encoding = ctxt->encoding;
8315: ctxt->encoding = NULL;
8316: }
8317:
8318: /*
8319: * Check for termination
8320: */
1.140 daniel 8321: if ((ctxt->instate != XML_PARSER_EOF) &&
8322: (ctxt->instate != XML_PARSER_EPILOG)) {
8323: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8324: ctxt->sax->error(ctxt->userData,
8325: "Extra content at the end of the document\n");
8326: ctxt->wellFormed = 0;
8327: ctxt->errNo = XML_ERR_DOCUMENT_END;
8328: }
8329: if (ctxt->instate != XML_PARSER_EOF) {
8330: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8331: ctxt->sax->endDocument(ctxt->userData);
8332: }
8333: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8334: }
8335: return((xmlParserErrors) ctxt->errNo);
8336: }
8337:
8338: /************************************************************************
8339: * *
1.98 daniel 8340: * I/O front end functions to the parser *
8341: * *
8342: ************************************************************************/
8343:
1.50 daniel 8344: /**
1.140 daniel 8345: * xmlCreatePushParserCtxt :
8346: * @sax: a SAX handler
8347: * @user_data: The user data returned on SAX callbacks
8348: * @chunk: a pointer to an array of chars
8349: * @size: number of chars in the array
8350: * @filename: an optional file name or URI
8351: *
8352: * Create a parser context for using the XML parser in push mode
8353: * To allow content encoding detection, @size should be >= 4
8354: * The value of @filename is used for fetching external entities
8355: * and error/warning reports.
8356: *
8357: * Returns the new parser context or NULL
8358: */
8359: xmlParserCtxtPtr
8360: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8361: const char *chunk, int size, const char *filename) {
8362: xmlParserCtxtPtr ctxt;
8363: xmlParserInputPtr inputStream;
8364: xmlParserInputBufferPtr buf;
8365: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8366:
8367: /*
1.156 ! daniel 8368: * plug some encoding conversion routines
1.140 daniel 8369: */
8370: if ((chunk != NULL) && (size >= 4))
1.156 ! daniel 8371: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8372:
8373: buf = xmlAllocParserInputBuffer(enc);
8374: if (buf == NULL) return(NULL);
8375:
8376: ctxt = xmlNewParserCtxt();
8377: if (ctxt == NULL) {
8378: xmlFree(buf);
8379: return(NULL);
8380: }
8381: if (sax != NULL) {
8382: if (ctxt->sax != &xmlDefaultSAXHandler)
8383: xmlFree(ctxt->sax);
8384: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8385: if (ctxt->sax == NULL) {
8386: xmlFree(buf);
8387: xmlFree(ctxt);
8388: return(NULL);
8389: }
8390: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8391: if (user_data != NULL)
8392: ctxt->userData = user_data;
8393: }
8394: if (filename == NULL) {
8395: ctxt->directory = NULL;
8396: } else {
8397: ctxt->directory = xmlParserGetDirectory(filename);
8398: }
8399:
8400: inputStream = xmlNewInputStream(ctxt);
8401: if (inputStream == NULL) {
8402: xmlFreeParserCtxt(ctxt);
8403: return(NULL);
8404: }
8405:
8406: if (filename == NULL)
8407: inputStream->filename = NULL;
8408: else
8409: inputStream->filename = xmlMemStrdup(filename);
8410: inputStream->buf = buf;
8411: inputStream->base = inputStream->buf->buffer->content;
8412: inputStream->cur = inputStream->buf->buffer->content;
1.156 ! daniel 8413: if (enc != XML_CHAR_ENCODING_NONE) {
! 8414: xmlSwitchEncoding(ctxt, enc);
! 8415: }
1.140 daniel 8416:
8417: inputPush(ctxt, inputStream);
8418:
8419: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8420: (ctxt->input->buf != NULL)) {
8421: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8422: #ifdef DEBUG_PUSH
8423: fprintf(stderr, "PP: pushed %d\n", size);
8424: #endif
8425: }
8426:
8427: return(ctxt);
8428: }
8429:
8430: /**
1.86 daniel 8431: * xmlCreateDocParserCtxt :
1.123 daniel 8432: * @cur: a pointer to an array of xmlChar
1.50 daniel 8433: *
1.69 daniel 8434: * Create a parser context for an XML in-memory document.
8435: *
8436: * Returns the new parser context or NULL
1.16 daniel 8437: */
1.69 daniel 8438: xmlParserCtxtPtr
1.123 daniel 8439: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 8440: xmlParserCtxtPtr ctxt;
1.40 daniel 8441: xmlParserInputPtr input;
1.16 daniel 8442:
1.97 daniel 8443: ctxt = xmlNewParserCtxt();
1.16 daniel 8444: if (ctxt == NULL) {
8445: return(NULL);
8446: }
1.96 daniel 8447: input = xmlNewInputStream(ctxt);
1.40 daniel 8448: if (input == NULL) {
1.97 daniel 8449: xmlFreeParserCtxt(ctxt);
1.40 daniel 8450: return(NULL);
8451: }
8452:
8453: input->base = cur;
8454: input->cur = cur;
8455:
8456: inputPush(ctxt, input);
1.69 daniel 8457: return(ctxt);
8458: }
8459:
8460: /**
8461: * xmlSAXParseDoc :
8462: * @sax: the SAX handler block
1.123 daniel 8463: * @cur: a pointer to an array of xmlChar
1.69 daniel 8464: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
8465: * documents
8466: *
8467: * parse an XML in-memory document and build a tree.
8468: * It use the given SAX function block to handle the parsing callback.
8469: * If sax is NULL, fallback to the default DOM tree building routines.
8470: *
8471: * Returns the resulting document tree
8472: */
8473:
8474: xmlDocPtr
1.123 daniel 8475: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 8476: xmlDocPtr ret;
8477: xmlParserCtxtPtr ctxt;
8478:
8479: if (cur == NULL) return(NULL);
1.16 daniel 8480:
8481:
1.69 daniel 8482: ctxt = xmlCreateDocParserCtxt(cur);
8483: if (ctxt == NULL) return(NULL);
1.74 daniel 8484: if (sax != NULL) {
8485: ctxt->sax = sax;
8486: ctxt->userData = NULL;
8487: }
1.69 daniel 8488:
1.16 daniel 8489: xmlParseDocument(ctxt);
1.72 daniel 8490: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 8491: else {
8492: ret = NULL;
1.72 daniel 8493: xmlFreeDoc(ctxt->myDoc);
8494: ctxt->myDoc = NULL;
1.59 daniel 8495: }
1.86 daniel 8496: if (sax != NULL)
8497: ctxt->sax = NULL;
1.69 daniel 8498: xmlFreeParserCtxt(ctxt);
1.16 daniel 8499:
1.1 veillard 8500: return(ret);
8501: }
8502:
1.50 daniel 8503: /**
1.55 daniel 8504: * xmlParseDoc :
1.123 daniel 8505: * @cur: a pointer to an array of xmlChar
1.55 daniel 8506: *
8507: * parse an XML in-memory document and build a tree.
8508: *
1.68 daniel 8509: * Returns the resulting document tree
1.55 daniel 8510: */
8511:
1.69 daniel 8512: xmlDocPtr
1.123 daniel 8513: xmlParseDoc(xmlChar *cur) {
1.59 daniel 8514: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 8515: }
8516:
8517: /**
8518: * xmlSAXParseDTD :
8519: * @sax: the SAX handler block
8520: * @ExternalID: a NAME* containing the External ID of the DTD
8521: * @SystemID: a NAME* containing the URL to the DTD
8522: *
8523: * Load and parse an external subset.
8524: *
8525: * Returns the resulting xmlDtdPtr or NULL in case of error.
8526: */
8527:
8528: xmlDtdPtr
1.123 daniel 8529: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8530: const xmlChar *SystemID) {
1.76 daniel 8531: xmlDtdPtr ret = NULL;
8532: xmlParserCtxtPtr ctxt;
1.83 daniel 8533: xmlParserInputPtr input = NULL;
1.76 daniel 8534: xmlCharEncoding enc;
8535:
8536: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8537:
1.97 daniel 8538: ctxt = xmlNewParserCtxt();
1.76 daniel 8539: if (ctxt == NULL) {
8540: return(NULL);
8541: }
8542:
8543: /*
8544: * Set-up the SAX context
8545: */
8546: if (ctxt == NULL) return(NULL);
8547: if (sax != NULL) {
1.93 veillard 8548: if (ctxt->sax != NULL)
1.119 daniel 8549: xmlFree(ctxt->sax);
1.76 daniel 8550: ctxt->sax = sax;
8551: ctxt->userData = NULL;
8552: }
8553:
8554: /*
8555: * Ask the Entity resolver to load the damn thing
8556: */
8557:
8558: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8559: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8560: if (input == NULL) {
1.86 daniel 8561: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8562: xmlFreeParserCtxt(ctxt);
8563: return(NULL);
8564: }
8565:
8566: /*
1.156 ! daniel 8567: * plug some encoding conversion routines here.
1.76 daniel 8568: */
8569: xmlPushInput(ctxt, input);
1.156 ! daniel 8570: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 8571: xmlSwitchEncoding(ctxt, enc);
8572:
1.95 veillard 8573: if (input->filename == NULL)
1.156 ! daniel 8574: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 8575: input->line = 1;
8576: input->col = 1;
8577: input->base = ctxt->input->cur;
8578: input->cur = ctxt->input->cur;
8579: input->free = NULL;
8580:
8581: /*
8582: * let's parse that entity knowing it's an external subset.
8583: */
1.79 daniel 8584: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 8585:
8586: if (ctxt->myDoc != NULL) {
8587: if (ctxt->wellFormed) {
8588: ret = ctxt->myDoc->intSubset;
8589: ctxt->myDoc->intSubset = NULL;
8590: } else {
8591: ret = NULL;
8592: }
8593: xmlFreeDoc(ctxt->myDoc);
8594: ctxt->myDoc = NULL;
8595: }
1.86 daniel 8596: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8597: xmlFreeParserCtxt(ctxt);
8598:
8599: return(ret);
8600: }
8601:
8602: /**
8603: * xmlParseDTD :
8604: * @ExternalID: a NAME* containing the External ID of the DTD
8605: * @SystemID: a NAME* containing the URL to the DTD
8606: *
8607: * Load and parse an external subset.
8608: *
8609: * Returns the resulting xmlDtdPtr or NULL in case of error.
8610: */
8611:
8612: xmlDtdPtr
1.123 daniel 8613: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 8614: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 8615: }
8616:
8617: /**
1.144 daniel 8618: * xmlSAXParseBalancedChunk :
8619: * @ctx: an XML parser context (possibly NULL)
8620: * @sax: the SAX handler bloc (possibly NULL)
8621: * @user_data: The user data returned on SAX callbacks (possibly NULL)
8622: * @input: a parser input stream
8623: * @enc: the encoding
8624: *
8625: * Parse a well-balanced chunk of an XML document
8626: * The user has to provide SAX callback block whose routines will be
8627: * called by the parser
8628: * The allowed sequence for the Well Balanced Chunk is the one defined by
8629: * the content production in the XML grammar:
8630: *
8631: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8632: *
8633: * Returns 0 id the chunk is well balanced, -1 in case of args problem and
8634: * the error code otherwise
8635: */
8636:
8637: int
8638: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8639: void *user_data, xmlParserInputPtr input,
8640: xmlCharEncoding enc) {
8641: xmlParserCtxtPtr ctxt;
8642: int ret;
8643:
8644: if (input == NULL) return(-1);
8645:
8646: if (ctx != NULL)
8647: ctxt = ctx;
8648: else {
8649: ctxt = xmlNewParserCtxt();
8650: if (ctxt == NULL)
8651: return(-1);
8652: if (sax == NULL)
8653: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8654: }
8655:
8656: /*
8657: * Set-up the SAX context
8658: */
8659: if (sax != NULL) {
8660: if (ctxt->sax != NULL)
8661: xmlFree(ctxt->sax);
8662: ctxt->sax = sax;
8663: ctxt->userData = user_data;
8664: }
8665:
8666: /*
8667: * plug some encoding conversion routines here.
8668: */
8669: xmlPushInput(ctxt, input);
8670: if (enc != XML_CHAR_ENCODING_NONE)
8671: xmlSwitchEncoding(ctxt, enc);
8672:
8673: /*
8674: * let's parse that entity knowing it's an external subset.
8675: */
8676: xmlParseContent(ctxt);
8677: ret = ctxt->errNo;
8678:
8679: if (ctx == NULL) {
8680: if (sax != NULL)
8681: ctxt->sax = NULL;
8682: else
8683: xmlFreeDoc(ctxt->myDoc);
8684: xmlFreeParserCtxt(ctxt);
8685: }
8686: return(ret);
8687: }
8688:
8689: /**
8690: * xmlParseBalancedChunk :
8691: * @doc: the document the chunk pertains to
8692: * @node: the node defining the context in which informations will be added
8693: *
8694: * Parse a well-balanced chunk of an XML document present in memory
8695: *
8696: * Returns the resulting list of nodes resulting from the parsing,
8697: * they are not added to @node
8698: */
8699:
8700: xmlNodePtr
8701: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlNodePtr node) {
1.156 ! daniel 8702: /* TODO !!! */
! 8703: return(NULL);
1.144 daniel 8704: }
8705:
8706: /**
8707: * xmlParseBalancedChunkFile :
8708: * @doc: the document the chunk pertains to
8709: *
8710: * Parse a well-balanced chunk of an XML document contained in a file
8711: *
8712: * Returns the resulting list of nodes resulting from the parsing,
8713: * they are not added to @node
8714: */
8715:
8716: xmlNodePtr
8717: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 ! daniel 8718: /* TODO !!! */
! 8719: return(NULL);
1.144 daniel 8720: }
8721:
8722: /**
1.59 daniel 8723: * xmlRecoverDoc :
1.123 daniel 8724: * @cur: a pointer to an array of xmlChar
1.59 daniel 8725: *
8726: * parse an XML in-memory document and build a tree.
8727: * In the case the document is not Well Formed, a tree is built anyway
8728: *
1.68 daniel 8729: * Returns the resulting document tree
1.59 daniel 8730: */
8731:
1.69 daniel 8732: xmlDocPtr
1.123 daniel 8733: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 8734: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 8735: }
8736:
8737: /**
1.69 daniel 8738: * xmlCreateFileParserCtxt :
1.50 daniel 8739: * @filename: the filename
8740: *
1.69 daniel 8741: * Create a parser context for a file content.
8742: * Automatic support for ZLIB/Compress compressed document is provided
8743: * by default if found at compile-time.
1.50 daniel 8744: *
1.69 daniel 8745: * Returns the new parser context or NULL
1.9 httpng 8746: */
1.69 daniel 8747: xmlParserCtxtPtr
8748: xmlCreateFileParserCtxt(const char *filename)
8749: {
8750: xmlParserCtxtPtr ctxt;
1.40 daniel 8751: xmlParserInputPtr inputStream;
1.91 daniel 8752: xmlParserInputBufferPtr buf;
1.111 daniel 8753: char *directory = NULL;
1.9 httpng 8754:
1.91 daniel 8755: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
8756: if (buf == NULL) return(NULL);
1.9 httpng 8757:
1.97 daniel 8758: ctxt = xmlNewParserCtxt();
1.16 daniel 8759: if (ctxt == NULL) {
8760: return(NULL);
8761: }
1.97 daniel 8762:
1.96 daniel 8763: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 8764: if (inputStream == NULL) {
1.97 daniel 8765: xmlFreeParserCtxt(ctxt);
1.40 daniel 8766: return(NULL);
8767: }
8768:
1.119 daniel 8769: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 8770: inputStream->buf = buf;
8771: inputStream->base = inputStream->buf->buffer->content;
8772: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 8773:
1.40 daniel 8774: inputPush(ctxt, inputStream);
1.110 daniel 8775: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 8776: directory = xmlParserGetDirectory(filename);
8777: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 8778: ctxt->directory = directory;
1.106 daniel 8779:
1.69 daniel 8780: return(ctxt);
8781: }
8782:
8783: /**
8784: * xmlSAXParseFile :
8785: * @sax: the SAX handler block
8786: * @filename: the filename
8787: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
8788: * documents
8789: *
8790: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
8791: * compressed document is provided by default if found at compile-time.
8792: * It use the given SAX function block to handle the parsing callback.
8793: * If sax is NULL, fallback to the default DOM tree building routines.
8794: *
8795: * Returns the resulting document tree
8796: */
8797:
1.79 daniel 8798: xmlDocPtr
8799: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 8800: int recovery) {
8801: xmlDocPtr ret;
8802: xmlParserCtxtPtr ctxt;
1.111 daniel 8803: char *directory = NULL;
1.69 daniel 8804:
8805: ctxt = xmlCreateFileParserCtxt(filename);
8806: if (ctxt == NULL) return(NULL);
1.74 daniel 8807: if (sax != NULL) {
1.93 veillard 8808: if (ctxt->sax != NULL)
1.119 daniel 8809: xmlFree(ctxt->sax);
1.74 daniel 8810: ctxt->sax = sax;
8811: ctxt->userData = NULL;
8812: }
1.106 daniel 8813:
1.110 daniel 8814: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 8815: directory = xmlParserGetDirectory(filename);
8816: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 ! daniel 8817: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 8818:
8819: xmlParseDocument(ctxt);
1.40 daniel 8820:
1.72 daniel 8821: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 8822: else {
8823: ret = NULL;
1.72 daniel 8824: xmlFreeDoc(ctxt->myDoc);
8825: ctxt->myDoc = NULL;
1.59 daniel 8826: }
1.86 daniel 8827: if (sax != NULL)
8828: ctxt->sax = NULL;
1.69 daniel 8829: xmlFreeParserCtxt(ctxt);
1.20 daniel 8830:
8831: return(ret);
8832: }
8833:
1.55 daniel 8834: /**
8835: * xmlParseFile :
8836: * @filename: the filename
8837: *
8838: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
8839: * compressed document is provided by default if found at compile-time.
8840: *
1.68 daniel 8841: * Returns the resulting document tree
1.55 daniel 8842: */
8843:
1.79 daniel 8844: xmlDocPtr
8845: xmlParseFile(const char *filename) {
1.59 daniel 8846: return(xmlSAXParseFile(NULL, filename, 0));
8847: }
8848:
8849: /**
8850: * xmlRecoverFile :
8851: * @filename: the filename
8852: *
8853: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
8854: * compressed document is provided by default if found at compile-time.
8855: * In the case the document is not Well Formed, a tree is built anyway
8856: *
1.68 daniel 8857: * Returns the resulting document tree
1.59 daniel 8858: */
8859:
1.79 daniel 8860: xmlDocPtr
8861: xmlRecoverFile(const char *filename) {
1.59 daniel 8862: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 8863: }
1.32 daniel 8864:
1.50 daniel 8865: /**
1.69 daniel 8866: * xmlCreateMemoryParserCtxt :
1.68 daniel 8867: * @buffer: an pointer to a char array
1.127 daniel 8868: * @size: the size of the array
1.50 daniel 8869: *
1.69 daniel 8870: * Create a parser context for an XML in-memory document.
1.50 daniel 8871: *
1.69 daniel 8872: * Returns the new parser context or NULL
1.20 daniel 8873: */
1.69 daniel 8874: xmlParserCtxtPtr
8875: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 8876: xmlParserCtxtPtr ctxt;
1.40 daniel 8877: xmlParserInputPtr input;
8878:
8879: buffer[size - 1] = '\0';
8880:
1.97 daniel 8881: ctxt = xmlNewParserCtxt();
1.20 daniel 8882: if (ctxt == NULL) {
8883: return(NULL);
8884: }
1.97 daniel 8885:
1.96 daniel 8886: input = xmlNewInputStream(ctxt);
1.40 daniel 8887: if (input == NULL) {
1.97 daniel 8888: xmlFreeParserCtxt(ctxt);
1.40 daniel 8889: return(NULL);
8890: }
1.20 daniel 8891:
1.40 daniel 8892: input->filename = NULL;
8893: input->line = 1;
8894: input->col = 1;
1.96 daniel 8895: input->buf = NULL;
1.91 daniel 8896: input->consumed = 0;
1.75 daniel 8897:
1.116 daniel 8898: input->base = BAD_CAST buffer;
8899: input->cur = BAD_CAST buffer;
1.69 daniel 8900: input->free = NULL;
1.20 daniel 8901:
1.40 daniel 8902: inputPush(ctxt, input);
1.69 daniel 8903: return(ctxt);
8904: }
8905:
8906: /**
8907: * xmlSAXParseMemory :
8908: * @sax: the SAX handler block
8909: * @buffer: an pointer to a char array
1.127 daniel 8910: * @size: the size of the array
8911: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 8912: * documents
8913: *
8914: * parse an XML in-memory block and use the given SAX function block
8915: * to handle the parsing callback. If sax is NULL, fallback to the default
8916: * DOM tree building routines.
8917: *
8918: * Returns the resulting document tree
8919: */
8920: xmlDocPtr
8921: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
8922: xmlDocPtr ret;
8923: xmlParserCtxtPtr ctxt;
8924:
8925: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
8926: if (ctxt == NULL) return(NULL);
1.74 daniel 8927: if (sax != NULL) {
8928: ctxt->sax = sax;
8929: ctxt->userData = NULL;
8930: }
1.20 daniel 8931:
8932: xmlParseDocument(ctxt);
1.40 daniel 8933:
1.72 daniel 8934: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 8935: else {
8936: ret = NULL;
1.72 daniel 8937: xmlFreeDoc(ctxt->myDoc);
8938: ctxt->myDoc = NULL;
1.59 daniel 8939: }
1.86 daniel 8940: if (sax != NULL)
8941: ctxt->sax = NULL;
1.69 daniel 8942: xmlFreeParserCtxt(ctxt);
1.16 daniel 8943:
1.9 httpng 8944: return(ret);
1.17 daniel 8945: }
8946:
1.55 daniel 8947: /**
8948: * xmlParseMemory :
1.68 daniel 8949: * @buffer: an pointer to a char array
1.55 daniel 8950: * @size: the size of the array
8951: *
8952: * parse an XML in-memory block and build a tree.
8953: *
1.68 daniel 8954: * Returns the resulting document tree
1.55 daniel 8955: */
8956:
8957: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 8958: return(xmlSAXParseMemory(NULL, buffer, size, 0));
8959: }
8960:
8961: /**
8962: * xmlRecoverMemory :
1.68 daniel 8963: * @buffer: an pointer to a char array
1.59 daniel 8964: * @size: the size of the array
8965: *
8966: * parse an XML in-memory block and build a tree.
8967: * In the case the document is not Well Formed, a tree is built anyway
8968: *
1.68 daniel 8969: * Returns the resulting document tree
1.59 daniel 8970: */
8971:
8972: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
8973: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 8974: }
8975:
8976:
1.50 daniel 8977: /**
8978: * xmlSetupParserForBuffer:
8979: * @ctxt: an XML parser context
1.123 daniel 8980: * @buffer: a xmlChar * buffer
1.50 daniel 8981: * @filename: a file name
8982: *
1.19 daniel 8983: * Setup the parser context to parse a new buffer; Clears any prior
8984: * contents from the parser context. The buffer parameter must not be
8985: * NULL, but the filename parameter can be
8986: */
1.55 daniel 8987: void
1.123 daniel 8988: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 8989: const char* filename)
8990: {
1.96 daniel 8991: xmlParserInputPtr input;
1.40 daniel 8992:
1.96 daniel 8993: input = xmlNewInputStream(ctxt);
8994: if (input == NULL) {
8995: perror("malloc");
1.119 daniel 8996: xmlFree(ctxt);
1.145 daniel 8997: return;
1.96 daniel 8998: }
8999:
9000: xmlClearParserCtxt(ctxt);
9001: if (filename != NULL)
1.119 daniel 9002: input->filename = xmlMemStrdup(filename);
1.96 daniel 9003: input->base = buffer;
9004: input->cur = buffer;
9005: inputPush(ctxt, input);
1.17 daniel 9006: }
9007:
1.123 daniel 9008: /**
9009: * xmlSAXUserParseFile:
9010: * @sax: a SAX handler
9011: * @user_data: The user data returned on SAX callbacks
9012: * @filename: a file name
9013: *
9014: * parse an XML file and call the given SAX handler routines.
9015: * Automatic support for ZLIB/Compress compressed document is provided
9016: *
9017: * Returns 0 in case of success or a error number otherwise
9018: */
1.131 daniel 9019: int
9020: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9021: const char *filename) {
1.123 daniel 9022: int ret = 0;
9023: xmlParserCtxtPtr ctxt;
9024:
9025: ctxt = xmlCreateFileParserCtxt(filename);
9026: if (ctxt == NULL) return -1;
1.134 daniel 9027: if (ctxt->sax != &xmlDefaultSAXHandler)
9028: xmlFree(ctxt->sax);
1.123 daniel 9029: ctxt->sax = sax;
1.140 daniel 9030: if (user_data != NULL)
9031: ctxt->userData = user_data;
1.123 daniel 9032:
9033: xmlParseDocument(ctxt);
9034:
9035: if (ctxt->wellFormed)
9036: ret = 0;
9037: else {
9038: if (ctxt->errNo != 0)
9039: ret = ctxt->errNo;
9040: else
9041: ret = -1;
9042: }
9043: if (sax != NULL)
9044: ctxt->sax = NULL;
9045: xmlFreeParserCtxt(ctxt);
9046:
9047: return ret;
9048: }
9049:
9050: /**
9051: * xmlSAXUserParseMemory:
9052: * @sax: a SAX handler
9053: * @user_data: The user data returned on SAX callbacks
9054: * @buffer: an in-memory XML document input
1.127 daniel 9055: * @size: the length of the XML document in bytes
1.123 daniel 9056: *
9057: * A better SAX parsing routine.
9058: * parse an XML in-memory buffer and call the given SAX handler routines.
9059: *
9060: * Returns 0 in case of success or a error number otherwise
9061: */
9062: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9063: char *buffer, int size) {
9064: int ret = 0;
9065: xmlParserCtxtPtr ctxt;
9066:
9067: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9068: if (ctxt == NULL) return -1;
9069: ctxt->sax = sax;
9070: ctxt->userData = user_data;
9071:
9072: xmlParseDocument(ctxt);
9073:
9074: if (ctxt->wellFormed)
9075: ret = 0;
9076: else {
9077: if (ctxt->errNo != 0)
9078: ret = ctxt->errNo;
9079: else
9080: ret = -1;
9081: }
9082: if (sax != NULL)
9083: ctxt->sax = NULL;
9084: xmlFreeParserCtxt(ctxt);
9085:
9086: return ret;
9087: }
9088:
1.32 daniel 9089:
1.98 daniel 9090: /************************************************************************
9091: * *
1.127 daniel 9092: * Miscellaneous *
1.98 daniel 9093: * *
9094: ************************************************************************/
9095:
1.132 daniel 9096: /**
9097: * xmlCleanupParser:
9098: *
9099: * Cleanup function for the XML parser. It tries to reclaim all
9100: * parsing related global memory allocated for the parser processing.
9101: * It doesn't deallocate any document related memory. Calling this
9102: * function should not prevent reusing the parser.
9103: */
9104:
9105: void
9106: xmlCleanupParser(void) {
9107: xmlCleanupCharEncodingHandlers();
1.133 daniel 9108: xmlCleanupPredefinedEntities();
1.132 daniel 9109: }
1.98 daniel 9110:
1.50 daniel 9111: /**
9112: * xmlParserFindNodeInfo:
9113: * @ctxt: an XML parser context
9114: * @node: an XML node within the tree
9115: *
9116: * Find the parser node info struct for a given node
9117: *
1.68 daniel 9118: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 9119: */
9120: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
9121: const xmlNode* node)
9122: {
9123: unsigned long pos;
9124:
9125: /* Find position where node should be at */
9126: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
9127: if ( ctx->node_seq.buffer[pos].node == node )
9128: return &ctx->node_seq.buffer[pos];
9129: else
9130: return NULL;
9131: }
9132:
9133:
1.50 daniel 9134: /**
9135: * xmlInitNodeInfoSeq :
9136: * @seq: a node info sequence pointer
9137: *
9138: * -- Initialize (set to initial state) node info sequence
1.32 daniel 9139: */
1.55 daniel 9140: void
9141: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9142: {
9143: seq->length = 0;
9144: seq->maximum = 0;
9145: seq->buffer = NULL;
9146: }
9147:
1.50 daniel 9148: /**
9149: * xmlClearNodeInfoSeq :
9150: * @seq: a node info sequence pointer
9151: *
9152: * -- Clear (release memory and reinitialize) node
1.32 daniel 9153: * info sequence
9154: */
1.55 daniel 9155: void
9156: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9157: {
9158: if ( seq->buffer != NULL )
1.119 daniel 9159: xmlFree(seq->buffer);
1.32 daniel 9160: xmlInitNodeInfoSeq(seq);
9161: }
9162:
9163:
1.50 daniel 9164: /**
9165: * xmlParserFindNodeInfoIndex:
9166: * @seq: a node info sequence pointer
9167: * @node: an XML node pointer
9168: *
9169: *
1.32 daniel 9170: * xmlParserFindNodeInfoIndex : Find the index that the info record for
9171: * the given node is or should be at in a sorted sequence
1.68 daniel 9172: *
9173: * Returns a long indicating the position of the record
1.32 daniel 9174: */
9175: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
9176: const xmlNode* node)
9177: {
9178: unsigned long upper, lower, middle;
9179: int found = 0;
9180:
9181: /* Do a binary search for the key */
9182: lower = 1;
9183: upper = seq->length;
9184: middle = 0;
9185: while ( lower <= upper && !found) {
9186: middle = lower + (upper - lower) / 2;
9187: if ( node == seq->buffer[middle - 1].node )
9188: found = 1;
9189: else if ( node < seq->buffer[middle - 1].node )
9190: upper = middle - 1;
9191: else
9192: lower = middle + 1;
9193: }
9194:
9195: /* Return position */
9196: if ( middle == 0 || seq->buffer[middle - 1].node < node )
9197: return middle;
9198: else
9199: return middle - 1;
9200: }
9201:
9202:
1.50 daniel 9203: /**
9204: * xmlParserAddNodeInfo:
9205: * @ctxt: an XML parser context
1.68 daniel 9206: * @info: a node info sequence pointer
1.50 daniel 9207: *
9208: * Insert node info record into the sorted sequence
1.32 daniel 9209: */
1.55 daniel 9210: void
9211: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 9212: const xmlParserNodeInfo* info)
1.32 daniel 9213: {
9214: unsigned long pos;
9215: static unsigned int block_size = 5;
9216:
9217: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 9218: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
9219: if ( pos < ctxt->node_seq.length
9220: && ctxt->node_seq.buffer[pos].node == info->node ) {
9221: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 9222: }
9223:
9224: /* Otherwise, we need to add new node to buffer */
9225: else {
9226: /* Expand buffer by 5 if needed */
1.55 daniel 9227: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 9228: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 9229: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
9230: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 9231:
1.55 daniel 9232: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 9233: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 9234: else
1.119 daniel 9235: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 9236:
9237: if ( tmp_buffer == NULL ) {
1.55 daniel 9238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 9239: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 9240: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 9241: return;
9242: }
1.55 daniel 9243: ctxt->node_seq.buffer = tmp_buffer;
9244: ctxt->node_seq.maximum += block_size;
1.32 daniel 9245: }
9246:
9247: /* If position is not at end, move elements out of the way */
1.55 daniel 9248: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 9249: unsigned long i;
9250:
1.55 daniel 9251: for ( i = ctxt->node_seq.length; i > pos; i-- )
9252: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 9253: }
9254:
9255: /* Copy element and increase length */
1.55 daniel 9256: ctxt->node_seq.buffer[pos] = *info;
9257: ctxt->node_seq.length++;
1.32 daniel 9258: }
9259: }
1.77 daniel 9260:
1.98 daniel 9261:
9262: /**
9263: * xmlSubstituteEntitiesDefault :
9264: * @val: int 0 or 1
9265: *
9266: * Set and return the previous value for default entity support.
9267: * Initially the parser always keep entity references instead of substituting
9268: * entity values in the output. This function has to be used to change the
9269: * default parser behaviour
9270: * SAX::subtituteEntities() has to be used for changing that on a file by
9271: * file basis.
9272: *
9273: * Returns the last value for 0 for no substitution, 1 for substitution.
9274: */
9275:
9276: int
9277: xmlSubstituteEntitiesDefault(int val) {
9278: int old = xmlSubstituteEntitiesDefaultValue;
9279:
9280: xmlSubstituteEntitiesDefaultValue = val;
9281: return(old);
9282: }
1.77 daniel 9283:
Webmaster