Annotation of XML/parser.c, revision 1.168
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.119 daniel 36: #include "xmlmemory.h"
1.14 veillard 37: #include "tree.h"
1.1 veillard 38: #include "parser.h"
1.14 veillard 39: #include "entities.h"
1.75 daniel 40: #include "encoding.h"
1.61 daniel 41: #include "valid.h"
1.69 daniel 42: #include "parserInternals.h"
1.91 daniel 43: #include "xmlIO.h"
1.122 daniel 44: #include "xml-error.h"
1.1 veillard 45:
1.140 daniel 46: #define XML_PARSER_BIG_BUFFER_SIZE 1000
47: #define XML_PARSER_BUFFER_SIZE 100
48:
1.86 daniel 49: const char *xmlParserVersion = LIBXML_VERSION;
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.91 daniel 65: /************************************************************************
66: * *
67: * Input handling functions for progressive parsing *
68: * *
69: ************************************************************************/
70:
71: /* #define DEBUG_INPUT */
1.140 daniel 72: /* #define DEBUG_STACK */
73: /* #define DEBUG_PUSH */
74:
1.91 daniel 75:
1.110 daniel 76: #define INPUT_CHUNK 250
77: /* we need to keep enough input to show errors in context */
78: #define LINE_LEN 80
1.91 daniel 79:
80: #ifdef DEBUG_INPUT
81: #define CHECK_BUFFER(in) check_buffer(in)
82:
83: void check_buffer(xmlParserInputPtr in) {
84: if (in->base != in->buf->buffer->content) {
85: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
86: }
87: if (in->cur < in->base) {
88: fprintf(stderr, "xmlParserInput: cur < base problem\n");
89: }
90: if (in->cur > in->base + in->buf->buffer->use) {
91: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
92: }
93: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
94: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
95: in->buf->buffer->use, in->buf->buffer->size);
96: }
97:
1.110 daniel 98: #else
99: #define CHECK_BUFFER(in)
100: #endif
101:
1.91 daniel 102:
103: /**
104: * xmlParserInputRead:
105: * @in: an XML parser input
106: * @len: an indicative size for the lookahead
107: *
108: * This function refresh the input for the parser. It doesn't try to
109: * preserve pointers to the input buffer, and discard already read data
110: *
1.123 daniel 111: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 112: * end of this entity
113: */
114: int
115: xmlParserInputRead(xmlParserInputPtr in, int len) {
116: int ret;
117: int used;
118: int index;
119:
120: #ifdef DEBUG_INPUT
121: fprintf(stderr, "Read\n");
122: #endif
123: if (in->buf == NULL) return(-1);
124: if (in->base == NULL) return(-1);
125: if (in->cur == NULL) return(-1);
126: if (in->buf->buffer == NULL) return(-1);
127:
128: CHECK_BUFFER(in);
129:
130: used = in->cur - in->buf->buffer->content;
131: ret = xmlBufferShrink(in->buf->buffer, used);
132: if (ret > 0) {
133: in->cur -= ret;
134: in->consumed += ret;
135: }
136: ret = xmlParserInputBufferRead(in->buf, len);
137: if (in->base != in->buf->buffer->content) {
138: /*
139: * the buffer has been realloced
140: */
141: index = in->cur - in->base;
142: in->base = in->buf->buffer->content;
143: in->cur = &in->buf->buffer->content[index];
144: }
145:
146: CHECK_BUFFER(in);
147:
148: return(ret);
149: }
150:
151: /**
152: * xmlParserInputGrow:
153: * @in: an XML parser input
154: * @len: an indicative size for the lookahead
155: *
156: * This function increase the input for the parser. It tries to
157: * preserve pointers to the input buffer, and keep already read data
158: *
1.123 daniel 159: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 160: * end of this entity
161: */
162: int
163: xmlParserInputGrow(xmlParserInputPtr in, int len) {
164: int ret;
165: int index;
166:
167: #ifdef DEBUG_INPUT
168: fprintf(stderr, "Grow\n");
169: #endif
170: if (in->buf == NULL) return(-1);
171: if (in->base == NULL) return(-1);
172: if (in->cur == NULL) return(-1);
173: if (in->buf->buffer == NULL) return(-1);
174:
175: CHECK_BUFFER(in);
176:
177: index = in->cur - in->base;
178: if (in->buf->buffer->use > index + INPUT_CHUNK) {
179:
180: CHECK_BUFFER(in);
181:
182: return(0);
183: }
1.148 daniel 184: if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) ||
185: (in->buf->file != NULL) ||
1.140 daniel 186: #ifdef HAVE_ZLIB_H
187: (in->buf->gzfile != NULL) ||
188: #endif
189: (in->buf->fd >= 0))
190: ret = xmlParserInputBufferGrow(in->buf, len);
191: else
192: return(0);
1.135 daniel 193:
194: /*
195: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
196: * block, but we use it really as an integer to do some
197: * pointer arithmetic. Insure will raise it as a bug but in
198: * that specific case, that's not !
199: */
1.91 daniel 200: if (in->base != in->buf->buffer->content) {
201: /*
202: * the buffer has been realloced
203: */
204: index = in->cur - in->base;
205: in->base = in->buf->buffer->content;
206: in->cur = &in->buf->buffer->content[index];
207: }
208:
209: CHECK_BUFFER(in);
210:
211: return(ret);
212: }
213:
214: /**
215: * xmlParserInputShrink:
216: * @in: an XML parser input
217: *
218: * This function removes used input for the parser.
219: */
220: void
221: xmlParserInputShrink(xmlParserInputPtr in) {
222: int used;
223: int ret;
224: int index;
225:
226: #ifdef DEBUG_INPUT
227: fprintf(stderr, "Shrink\n");
228: #endif
229: if (in->buf == NULL) return;
230: if (in->base == NULL) return;
231: if (in->cur == NULL) return;
232: if (in->buf->buffer == NULL) return;
233:
234: CHECK_BUFFER(in);
235:
236: used = in->cur - in->buf->buffer->content;
237: if (used > INPUT_CHUNK) {
1.110 daniel 238: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 239: if (ret > 0) {
240: in->cur -= ret;
241: in->consumed += ret;
242: }
243: }
244:
245: CHECK_BUFFER(in);
246:
247: if (in->buf->buffer->use > INPUT_CHUNK) {
248: return;
249: }
250: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
251: if (in->base != in->buf->buffer->content) {
252: /*
253: * the buffer has been realloced
254: */
255: index = in->cur - in->base;
256: in->base = in->buf->buffer->content;
257: in->cur = &in->buf->buffer->content[index];
258: }
259:
260: CHECK_BUFFER(in);
261: }
262:
1.45 daniel 263: /************************************************************************
264: * *
265: * Parser stacks related functions and macros *
266: * *
267: ************************************************************************/
1.79 daniel 268:
269: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 270: int xmlDoValidityCheckingDefaultValue = 0;
1.135 daniel 271: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
272: const xmlChar ** str);
1.79 daniel 273:
1.1 veillard 274: /*
1.40 daniel 275: * Generic function for accessing stacks in the Parser Context
1.1 veillard 276: */
277:
1.140 daniel 278: #define PUSH_AND_POP(scope, type, name) \
279: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 280: if (ctxt->name##Nr >= ctxt->name##Max) { \
281: ctxt->name##Max *= 2; \
1.119 daniel 282: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 283: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
284: if (ctxt->name##Tab == NULL) { \
1.31 daniel 285: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 286: return(0); \
1.31 daniel 287: } \
288: } \
1.40 daniel 289: ctxt->name##Tab[ctxt->name##Nr] = value; \
290: ctxt->name = value; \
291: return(ctxt->name##Nr++); \
1.31 daniel 292: } \
1.140 daniel 293: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 294: type ret; \
1.40 daniel 295: if (ctxt->name##Nr <= 0) return(0); \
296: ctxt->name##Nr--; \
1.50 daniel 297: if (ctxt->name##Nr > 0) \
298: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
299: else \
300: ctxt->name = NULL; \
1.69 daniel 301: ret = ctxt->name##Tab[ctxt->name##Nr]; \
302: ctxt->name##Tab[ctxt->name##Nr] = 0; \
303: return(ret); \
1.31 daniel 304: } \
305:
1.140 daniel 306: PUSH_AND_POP(extern, xmlParserInputPtr, input)
307: PUSH_AND_POP(extern, xmlNodePtr, node)
308: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 309:
1.55 daniel 310: /*
311: * Macros for accessing the content. Those should be used only by the parser,
312: * and not exported.
313: *
314: * Dirty macros, i.e. one need to make assumption on the context to use them
315: *
1.123 daniel 316: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 317: * To be used with extreme caution since operations consuming
318: * characters may move the input buffer to a different location !
1.123 daniel 319: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 320: * in ISO-Latin or UTF-8.
1.151 daniel 321: * This should be used internally by the parser
1.55 daniel 322: * only to compare to ASCII values otherwise it would break when
323: * running with UTF-8 encoding.
1.123 daniel 324: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 325: * to compare on ASCII based substring.
1.123 daniel 326: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 327: * strings within the parser.
328: *
1.77 daniel 329: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 330: *
331: * NEXT Skip to the next character, this does the proper decoding
332: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 333: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 334: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 335: */
1.45 daniel 336:
1.152 daniel 337: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 338: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 339: #define NXT(val) ctxt->input->cur[(val)]
340: #define CUR_PTR ctxt->input->cur
1.154 daniel 341:
1.164 daniel 342: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
343: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 ! daniel 344: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
! 345: if ((*ctxt->input->cur == 0) && \
! 346: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
! 347: xmlPopInput(ctxt)
1.164 daniel 348:
1.97 daniel 349: #define SHRINK xmlParserInputShrink(ctxt->input); \
350: if ((*ctxt->input->cur == 0) && \
351: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
352: xmlPopInput(ctxt)
353:
354: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
355: if ((*ctxt->input->cur == 0) && \
356: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
357: xmlPopInput(ctxt)
1.55 daniel 358:
1.155 daniel 359: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 360:
1.151 daniel 361: #define NEXT xmlNextChar(ctxt);
1.154 daniel 362:
1.153 daniel 363: #define NEXTL(l) \
364: if (*(ctxt->input->cur) == '\n') { \
365: ctxt->input->line++; ctxt->input->col = 1; \
366: } else ctxt->input->col++; \
1.154 daniel 367: ctxt->token = 0; ctxt->input->cur += l; \
368: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
369: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
370:
1.152 daniel 371: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 372: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 373:
1.152 daniel 374: #define COPY_BUF(l,b,i,v) \
375: if (l == 1) b[i++] = (xmlChar) v; \
376: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 377:
378: /**
379: * xmlNextChar:
380: * @ctxt: the XML parser context
381: *
382: * Skip to the next char input char.
383: */
1.55 daniel 384:
1.151 daniel 385: void
386: xmlNextChar(xmlParserCtxtPtr ctxt) {
387: if (ctxt->token != 0) ctxt->token = 0;
388: else {
389: if ((*ctxt->input->cur == 0) &&
390: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
391: (ctxt->instate != XML_PARSER_COMMENT)) {
392: /*
393: * If we are at the end of the current entity and
394: * the context allows it, we pop consumed entities
395: * automatically.
396: * TODO: the auto closing should be blocked in other cases
397: */
398: xmlPopInput(ctxt);
399: } else {
400: if (*(ctxt->input->cur) == '\n') {
401: ctxt->input->line++; ctxt->input->col = 1;
402: } else ctxt->input->col++;
403: if (ctxt->encoding == NULL) {
404: /*
405: * We are supposed to handle UTF8, check it's valid
406: * From rfc2044: encoding of the Unicode values on UTF-8:
407: *
408: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
409: * 0000 0000-0000 007F 0xxxxxxx
410: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
411: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
412: *
1.160 daniel 413: * Check for the 0x110000 limit too
1.151 daniel 414: */
415: const unsigned char *cur = ctxt->input->cur;
416: unsigned char c;
1.91 daniel 417:
1.151 daniel 418: c = *cur;
419: if (c & 0x80) {
420: if (cur[1] == 0)
421: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
422: if ((cur[1] & 0xc0) != 0x80)
423: goto encoding_error;
424: if ((c & 0xe0) == 0xe0) {
425: unsigned int val;
426:
427: if (cur[2] == 0)
428: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429: if ((cur[2] & 0xc0) != 0x80)
430: goto encoding_error;
431: if ((c & 0xf0) == 0xf0) {
432: if (cur[3] == 0)
433: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
434: if (((c & 0xf8) != 0xf0) ||
435: ((cur[3] & 0xc0) != 0x80))
436: goto encoding_error;
437: /* 4-byte code */
438: ctxt->input->cur += 4;
439: val = (cur[0] & 0x7) << 18;
440: val |= (cur[1] & 0x3f) << 12;
441: val |= (cur[2] & 0x3f) << 6;
442: val |= cur[3] & 0x3f;
443: } else {
444: /* 3-byte code */
445: ctxt->input->cur += 3;
446: val = (cur[0] & 0xf) << 12;
447: val |= (cur[1] & 0x3f) << 6;
448: val |= cur[2] & 0x3f;
449: }
450: if (((val > 0xd7ff) && (val < 0xe000)) ||
451: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 452: (val >= 0x110000)) {
1.151 daniel 453: if ((ctxt->sax != NULL) &&
454: (ctxt->sax->error != NULL))
455: ctxt->sax->error(ctxt->userData,
456: "Char out of allowed range\n");
457: ctxt->errNo = XML_ERR_INVALID_ENCODING;
458: ctxt->wellFormed = 0;
459: }
460: } else
461: /* 2-byte code */
462: ctxt->input->cur += 2;
463: } else
464: /* 1-byte code */
465: ctxt->input->cur++;
466: } else {
467: /*
468: * Assume it's a fixed lenght encoding (1) with
469: * a compatibke encoding for the ASCII set, since
470: * XML constructs only use < 128 chars
471: */
472: ctxt->input->cur++;
473: }
474: ctxt->nbChars++;
475: if (*ctxt->input->cur == 0)
476: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
477: }
478: }
1.154 daniel 479: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
480: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 ! daniel 481: if ((*ctxt->input->cur == 0) &&
! 482: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
! 483: xmlPopInput(ctxt);
1.151 daniel 484: return;
485: encoding_error:
486: /*
487: * If we detect an UTF8 error that probably mean that the
488: * input encoding didn't get properly advertized in the
489: * declaration header. Report the error and switch the encoding
490: * to ISO-Latin-1 (if you don't like this policy, just declare the
491: * encoding !)
492: */
493: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
494: ctxt->sax->error(ctxt->userData,
495: "Input is not proper UTF-8, indicate encoding !\n");
496: ctxt->errNo = XML_ERR_INVALID_ENCODING;
497:
498: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
499: ctxt->input->cur++;
500: return;
501: }
1.42 daniel 502:
1.152 daniel 503: /**
504: * xmlCurrentChar:
505: * @ctxt: the XML parser context
506: * @len: pointer to the length of the char read
507: *
508: * The current char value, if using UTF-8 this may actaully span multiple
509: * bytes in the input buffer.
510: *
511: * Returns the current char value and its lenght
512: */
513:
514: int
515: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
516: if (ctxt->token != 0) {
517: *len = 0;
518: return(ctxt->token);
519: }
520: if (ctxt->encoding == NULL) {
521: /*
522: * We are supposed to handle UTF8, check it's valid
523: * From rfc2044: encoding of the Unicode values on UTF-8:
524: *
525: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
526: * 0000 0000-0000 007F 0xxxxxxx
527: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
528: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
529: *
1.160 daniel 530: * Check for the 0x110000 limit too
1.152 daniel 531: */
532: const unsigned char *cur = ctxt->input->cur;
533: unsigned char c;
534: unsigned int val;
535:
536: c = *cur;
537: if (c & 0x80) {
538: if (cur[1] == 0)
539: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
540: if ((cur[1] & 0xc0) != 0x80)
541: goto encoding_error;
542: if ((c & 0xe0) == 0xe0) {
543:
544: if (cur[2] == 0)
545: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
546: if ((cur[2] & 0xc0) != 0x80)
547: goto encoding_error;
548: if ((c & 0xf0) == 0xf0) {
549: if (cur[3] == 0)
550: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
551: if (((c & 0xf8) != 0xf0) ||
552: ((cur[3] & 0xc0) != 0x80))
553: goto encoding_error;
554: /* 4-byte code */
555: *len = 4;
556: val = (cur[0] & 0x7) << 18;
557: val |= (cur[1] & 0x3f) << 12;
558: val |= (cur[2] & 0x3f) << 6;
559: val |= cur[3] & 0x3f;
560: } else {
561: /* 3-byte code */
562: *len = 3;
563: val = (cur[0] & 0xf) << 12;
564: val |= (cur[1] & 0x3f) << 6;
565: val |= cur[2] & 0x3f;
566: }
567: } else {
568: /* 2-byte code */
569: *len = 2;
570: val = (cur[0] & 0x1f) << 6;
1.168 ! daniel 571: val |= cur[1] & 0x3f;
1.152 daniel 572: }
573: if (!IS_CHAR(val)) {
574: if ((ctxt->sax != NULL) &&
575: (ctxt->sax->error != NULL))
576: ctxt->sax->error(ctxt->userData,
577: "Char out of allowed range\n");
578: ctxt->errNo = XML_ERR_INVALID_ENCODING;
579: ctxt->wellFormed = 0;
580: }
581: return(val);
582: } else {
583: /* 1-byte code */
584: *len = 1;
585: return((int) *ctxt->input->cur);
586: }
587: }
588: /*
589: * Assume it's a fixed lenght encoding (1) with
590: * a compatibke encoding for the ASCII set, since
591: * XML constructs only use < 128 chars
592: */
593: *len = 1;
594: return((int) *ctxt->input->cur);
595: encoding_error:
596: /*
597: * If we detect an UTF8 error that probably mean that the
598: * input encoding didn't get properly advertized in the
599: * declaration header. Report the error and switch the encoding
600: * to ISO-Latin-1 (if you don't like this policy, just declare the
601: * encoding !)
602: */
603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
604: ctxt->sax->error(ctxt->userData,
605: "Input is not proper UTF-8, indicate encoding !\n");
606: ctxt->errNo = XML_ERR_INVALID_ENCODING;
607:
608: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
609: *len = 1;
610: return((int) *ctxt->input->cur);
611: }
612:
613: /**
1.162 daniel 614: * xmlStringCurrentChar:
615: * @ctxt: the XML parser context
616: * @cur: pointer to the beginning of the char
617: * @len: pointer to the length of the char read
618: *
619: * The current char value, if using UTF-8 this may actaully span multiple
620: * bytes in the input buffer.
621: *
622: * Returns the current char value and its lenght
623: */
624:
625: int
626: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
627: if (ctxt->encoding == NULL) {
628: /*
629: * We are supposed to handle UTF8, check it's valid
630: * From rfc2044: encoding of the Unicode values on UTF-8:
631: *
632: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
633: * 0000 0000-0000 007F 0xxxxxxx
634: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
635: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
636: *
637: * Check for the 0x110000 limit too
638: */
639: unsigned char c;
640: unsigned int val;
641:
642: c = *cur;
643: if (c & 0x80) {
644: if ((cur[1] & 0xc0) != 0x80)
645: goto encoding_error;
646: if ((c & 0xe0) == 0xe0) {
647:
648: if ((cur[2] & 0xc0) != 0x80)
649: goto encoding_error;
650: if ((c & 0xf0) == 0xf0) {
651: if (((c & 0xf8) != 0xf0) ||
652: ((cur[3] & 0xc0) != 0x80))
653: goto encoding_error;
654: /* 4-byte code */
655: *len = 4;
656: val = (cur[0] & 0x7) << 18;
657: val |= (cur[1] & 0x3f) << 12;
658: val |= (cur[2] & 0x3f) << 6;
659: val |= cur[3] & 0x3f;
660: } else {
661: /* 3-byte code */
662: *len = 3;
663: val = (cur[0] & 0xf) << 12;
664: val |= (cur[1] & 0x3f) << 6;
665: val |= cur[2] & 0x3f;
666: }
667: } else {
668: /* 2-byte code */
669: *len = 2;
670: val = (cur[0] & 0x1f) << 6;
671: val |= cur[2] & 0x3f;
672: }
673: if (!IS_CHAR(val)) {
674: if ((ctxt->sax != NULL) &&
675: (ctxt->sax->error != NULL))
676: ctxt->sax->error(ctxt->userData,
677: "Char out of allowed range\n");
678: ctxt->errNo = XML_ERR_INVALID_ENCODING;
679: ctxt->wellFormed = 0;
680: }
681: return(val);
682: } else {
683: /* 1-byte code */
684: *len = 1;
685: return((int) *cur);
686: }
687: }
688: /*
689: * Assume it's a fixed lenght encoding (1) with
690: * a compatibke encoding for the ASCII set, since
691: * XML constructs only use < 128 chars
692: */
693: *len = 1;
694: return((int) *cur);
695: encoding_error:
696: /*
697: * If we detect an UTF8 error that probably mean that the
698: * input encoding didn't get properly advertized in the
699: * declaration header. Report the error and switch the encoding
700: * to ISO-Latin-1 (if you don't like this policy, just declare the
701: * encoding !)
702: */
703: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
704: ctxt->sax->error(ctxt->userData,
705: "Input is not proper UTF-8, indicate encoding !\n");
706: ctxt->errNo = XML_ERR_INVALID_ENCODING;
707:
708: *len = 1;
709: return((int) *cur);
710: }
711:
712: /**
1.152 daniel 713: * xmlCopyChar:
714: * @len: pointer to the length of the char read (or zero)
715: * @array: pointer to an arry of xmlChar
716: * @val: the char value
717: *
718: * append the char value in the array
719: *
720: * Returns the number of xmlChar written
721: */
722:
723: int
724: xmlCopyChar(int len, xmlChar *out, int val) {
725: /*
726: * We are supposed to handle UTF8, check it's valid
727: * From rfc2044: encoding of the Unicode values on UTF-8:
728: *
729: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
730: * 0000 0000-0000 007F 0xxxxxxx
731: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
732: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
733: */
734: if (len == 0) {
735: if (val < 0) len = 0;
1.160 daniel 736: else if (val < 0x80) len = 1;
737: else if (val < 0x800) len = 2;
738: else if (val < 0x10000) len = 3;
739: else if (val < 0x110000) len = 4;
1.152 daniel 740: if (len == 0) {
741: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
742: val);
743: return(0);
744: }
745: }
746: if (len > 1) {
747: int bits;
748:
749: if (val < 0x80) { *out++= val; bits= -6; }
750: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
751: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
752: else { *out++= (val >> 18) | 0xF0; bits= 12; }
753:
754: for ( ; bits >= 0; bits-= 6)
755: *out++= ((val >> bits) & 0x3F) | 0x80 ;
756:
757: return(len);
758: }
759: *out = (xmlChar) val;
760: return(1);
1.155 daniel 761: }
762:
763: /**
764: * xmlSkipBlankChars:
765: * @ctxt: the XML parser context
766: *
767: * skip all blanks character found at that point in the input streams.
768: * It pops up finished entities in the process if allowable at that point.
769: *
770: * Returns the number of space chars skipped
771: */
772:
773: int
774: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
775: int cur, res = 0;
776:
777: do {
778: cur = CUR;
779: while (IS_BLANK(cur)) {
780: NEXT;
781: cur = CUR;
782: res++;
783: }
784: while ((cur == 0) && (ctxt->inputNr > 1) &&
785: (ctxt->instate != XML_PARSER_COMMENT)) {
786: xmlPopInput(ctxt);
787: cur = CUR;
788: }
789: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
790: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
791: } while (IS_BLANK(cur));
792: return(res);
1.152 daniel 793: }
794:
1.97 daniel 795: /************************************************************************
796: * *
797: * Commodity functions to handle entities processing *
798: * *
799: ************************************************************************/
1.40 daniel 800:
1.50 daniel 801: /**
802: * xmlPopInput:
803: * @ctxt: an XML parser context
804: *
1.40 daniel 805: * xmlPopInput: the current input pointed by ctxt->input came to an end
806: * pop it and return the next char.
1.45 daniel 807: *
1.123 daniel 808: * Returns the current xmlChar in the parser context
1.40 daniel 809: */
1.123 daniel 810: xmlChar
1.55 daniel 811: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 812: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 813: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 814: if ((*ctxt->input->cur == 0) &&
815: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
816: return(xmlPopInput(ctxt));
1.40 daniel 817: return(CUR);
818: }
819:
1.50 daniel 820: /**
821: * xmlPushInput:
822: * @ctxt: an XML parser context
823: * @input: an XML parser input fragment (entity, XML fragment ...).
824: *
1.40 daniel 825: * xmlPushInput: switch to a new input stream which is stacked on top
826: * of the previous one(s).
827: */
1.55 daniel 828: void
829: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 830: if (input == NULL) return;
831: inputPush(ctxt, input);
1.164 daniel 832: GROW;
1.40 daniel 833: }
834:
1.50 daniel 835: /**
1.69 daniel 836: * xmlFreeInputStream:
1.127 daniel 837: * @input: an xmlParserInputPtr
1.69 daniel 838: *
839: * Free up an input stream.
840: */
841: void
842: xmlFreeInputStream(xmlParserInputPtr input) {
843: if (input == NULL) return;
844:
1.119 daniel 845: if (input->filename != NULL) xmlFree((char *) input->filename);
846: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 847: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 848: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 849: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 850: input->free((xmlChar *) input->base);
1.93 veillard 851: if (input->buf != NULL)
852: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 853: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 854: xmlFree(input);
1.69 daniel 855: }
856:
857: /**
1.96 daniel 858: * xmlNewInputStream:
859: * @ctxt: an XML parser context
860: *
861: * Create a new input stream structure
862: * Returns the new input stream or NULL
863: */
864: xmlParserInputPtr
865: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
866: xmlParserInputPtr input;
867:
1.119 daniel 868: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 869: if (input == NULL) {
1.123 daniel 870: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 871: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 872: ctxt->sax->error(ctxt->userData,
873: "malloc: couldn't allocate a new input stream\n");
1.123 daniel 874: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 875: return(NULL);
876: }
1.165 daniel 877: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 878: input->line = 1;
879: input->col = 1;
1.167 daniel 880: input->standalone = -1;
1.96 daniel 881: return(input);
882: }
883:
884: /**
1.50 daniel 885: * xmlNewEntityInputStream:
886: * @ctxt: an XML parser context
887: * @entity: an Entity pointer
888: *
1.82 daniel 889: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 890: *
891: * Returns the new input stream or NULL
1.45 daniel 892: */
1.50 daniel 893: xmlParserInputPtr
894: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 895: xmlParserInputPtr input;
896:
897: if (entity == NULL) {
1.123 daniel 898: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 900: ctxt->sax->error(ctxt->userData,
1.45 daniel 901: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 902: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 903: return(NULL);
1.45 daniel 904: }
905: if (entity->content == NULL) {
1.159 daniel 906: switch (entity->etype) {
1.113 daniel 907: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 908: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
910: ctxt->sax->error(ctxt->userData,
911: "xmlNewEntityInputStream unparsed entity !\n");
912: break;
913: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
914: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 915: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 916: (char *) entity->ExternalID, ctxt));
1.113 daniel 917: case XML_INTERNAL_GENERAL_ENTITY:
918: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
919: ctxt->sax->error(ctxt->userData,
920: "Internal entity %s without content !\n", entity->name);
921: break;
922: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 923: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
925: ctxt->sax->error(ctxt->userData,
926: "Internal parameter entity %s without content !\n", entity->name);
927: break;
928: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 929: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
931: ctxt->sax->error(ctxt->userData,
932: "Predefined entity %s without content !\n", entity->name);
933: break;
934: }
1.50 daniel 935: return(NULL);
1.45 daniel 936: }
1.96 daniel 937: input = xmlNewInputStream(ctxt);
1.45 daniel 938: if (input == NULL) {
1.50 daniel 939: return(NULL);
1.45 daniel 940: }
1.156 daniel 941: input->filename = (char *) entity->SystemID;
1.45 daniel 942: input->base = entity->content;
943: input->cur = entity->content;
1.140 daniel 944: input->length = entity->length;
1.50 daniel 945: return(input);
1.45 daniel 946: }
947:
1.59 daniel 948: /**
949: * xmlNewStringInputStream:
950: * @ctxt: an XML parser context
1.96 daniel 951: * @buffer: an memory buffer
1.59 daniel 952: *
953: * Create a new input stream based on a memory buffer.
1.68 daniel 954: * Returns the new input stream
1.59 daniel 955: */
956: xmlParserInputPtr
1.123 daniel 957: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 958: xmlParserInputPtr input;
959:
1.96 daniel 960: if (buffer == NULL) {
1.123 daniel 961: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 963: ctxt->sax->error(ctxt->userData,
1.59 daniel 964: "internal: xmlNewStringInputStream string = NULL\n");
965: return(NULL);
966: }
1.96 daniel 967: input = xmlNewInputStream(ctxt);
1.59 daniel 968: if (input == NULL) {
969: return(NULL);
970: }
1.96 daniel 971: input->base = buffer;
972: input->cur = buffer;
1.140 daniel 973: input->length = xmlStrlen(buffer);
1.59 daniel 974: return(input);
975: }
976:
1.76 daniel 977: /**
978: * xmlNewInputFromFile:
979: * @ctxt: an XML parser context
980: * @filename: the filename to use as entity
981: *
982: * Create a new input stream based on a file.
983: *
984: * Returns the new input stream or NULL in case of error
985: */
986: xmlParserInputPtr
1.79 daniel 987: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 988: xmlParserInputBufferPtr buf;
1.76 daniel 989: xmlParserInputPtr inputStream;
1.111 daniel 990: char *directory = NULL;
1.76 daniel 991:
1.96 daniel 992: if (ctxt == NULL) return(NULL);
1.91 daniel 993: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 994: if (buf == NULL) {
1.140 daniel 995: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 996:
1.94 daniel 997: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
998: #ifdef WIN32
999: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1000: #else
1001: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1002: #endif
1003: buf = xmlParserInputBufferCreateFilename(name,
1004: XML_CHAR_ENCODING_NONE);
1.106 daniel 1005: if (buf != NULL)
1.142 daniel 1006: directory = xmlParserGetDirectory(name);
1.106 daniel 1007: }
1008: if ((buf == NULL) && (ctxt->directory != NULL)) {
1009: #ifdef WIN32
1010: sprintf(name, "%s\\%s", ctxt->directory, filename);
1011: #else
1012: sprintf(name, "%s/%s", ctxt->directory, filename);
1013: #endif
1014: buf = xmlParserInputBufferCreateFilename(name,
1015: XML_CHAR_ENCODING_NONE);
1016: if (buf != NULL)
1.142 daniel 1017: directory = xmlParserGetDirectory(name);
1.106 daniel 1018: }
1019: if (buf == NULL)
1.94 daniel 1020: return(NULL);
1021: }
1022: if (directory == NULL)
1023: directory = xmlParserGetDirectory(filename);
1.76 daniel 1024:
1.96 daniel 1025: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1026: if (inputStream == NULL) {
1.119 daniel 1027: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1028: return(NULL);
1029: }
1030:
1.119 daniel 1031: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1032: inputStream->directory = directory;
1.91 daniel 1033: inputStream->buf = buf;
1.76 daniel 1034:
1.91 daniel 1035: inputStream->base = inputStream->buf->buffer->content;
1036: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1037: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1038: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1039: return(inputStream);
1040: }
1041:
1.77 daniel 1042: /************************************************************************
1043: * *
1.97 daniel 1044: * Commodity functions to handle parser contexts *
1045: * *
1046: ************************************************************************/
1047:
1048: /**
1049: * xmlInitParserCtxt:
1050: * @ctxt: an XML parser context
1051: *
1052: * Initialize a parser context
1053: */
1054:
1055: void
1056: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1057: {
1058: xmlSAXHandler *sax;
1059:
1.168 ! daniel 1060: xmlDefaultSAXHandlerInit();
! 1061:
1.119 daniel 1062: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1063: if (sax == NULL) {
1064: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1065: }
1066:
1067: /* Allocate the Input stack */
1.119 daniel 1068: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1069: ctxt->inputNr = 0;
1070: ctxt->inputMax = 5;
1071: ctxt->input = NULL;
1.165 daniel 1072:
1.97 daniel 1073: ctxt->version = NULL;
1074: ctxt->encoding = NULL;
1075: ctxt->standalone = -1;
1.98 daniel 1076: ctxt->hasExternalSubset = 0;
1077: ctxt->hasPErefs = 0;
1.97 daniel 1078: ctxt->html = 0;
1.98 daniel 1079: ctxt->external = 0;
1.140 daniel 1080: ctxt->instate = XML_PARSER_START;
1.97 daniel 1081: ctxt->token = 0;
1.106 daniel 1082: ctxt->directory = NULL;
1.97 daniel 1083:
1084: /* Allocate the Node stack */
1.119 daniel 1085: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1086: ctxt->nodeNr = 0;
1087: ctxt->nodeMax = 10;
1088: ctxt->node = NULL;
1089:
1.140 daniel 1090: /* Allocate the Name stack */
1091: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1092: ctxt->nameNr = 0;
1093: ctxt->nameMax = 10;
1094: ctxt->name = NULL;
1095:
1.160 daniel 1096: if (sax == NULL) {
1097: ctxt->sax = &xmlDefaultSAXHandler;
1098: } else {
1.97 daniel 1099: ctxt->sax = sax;
1100: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1101: }
1102: ctxt->userData = ctxt;
1103: ctxt->myDoc = NULL;
1104: ctxt->wellFormed = 1;
1.99 daniel 1105: ctxt->valid = 1;
1.100 daniel 1106: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1107: ctxt->vctxt.userData = ctxt;
1.149 daniel 1108: if (ctxt->validate) {
1109: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1110: if (xmlGetWarningsDefaultValue == 0)
1111: ctxt->vctxt.warning = NULL;
1112: else
1113: ctxt->vctxt.warning = xmlParserValidityWarning;
1.149 daniel 1114: } else {
1115: ctxt->vctxt.error = NULL;
1116: ctxt->vctxt.warning = NULL;
1117: }
1.97 daniel 1118: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1119: ctxt->record_info = 0;
1.135 daniel 1120: ctxt->nbChars = 0;
1.140 daniel 1121: ctxt->checkIndex = 0;
1122: ctxt->errNo = XML_ERR_OK;
1.97 daniel 1123: xmlInitNodeInfoSeq(&ctxt->node_seq);
1124: }
1125:
1126: /**
1127: * xmlFreeParserCtxt:
1128: * @ctxt: an XML parser context
1129: *
1130: * Free all the memory used by a parser context. However the parsed
1131: * document in ctxt->myDoc is not freed.
1132: */
1133:
1134: void
1135: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1136: {
1137: xmlParserInputPtr input;
1.140 daniel 1138: xmlChar *oldname;
1.97 daniel 1139:
1140: if (ctxt == NULL) return;
1141:
1142: while ((input = inputPop(ctxt)) != NULL) {
1143: xmlFreeInputStream(input);
1144: }
1.140 daniel 1145: while ((oldname = namePop(ctxt)) != NULL) {
1146: xmlFree(oldname);
1147: }
1148: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1149: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1150: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1151: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1152: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1153: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1154: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1155: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.97 daniel 1156: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1157: xmlFree(ctxt->sax);
1158: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1159: xmlFree(ctxt);
1.97 daniel 1160: }
1161:
1162: /**
1163: * xmlNewParserCtxt:
1164: *
1165: * Allocate and initialize a new parser context.
1166: *
1167: * Returns the xmlParserCtxtPtr or NULL
1168: */
1169:
1170: xmlParserCtxtPtr
1171: xmlNewParserCtxt()
1172: {
1173: xmlParserCtxtPtr ctxt;
1174:
1.119 daniel 1175: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1176: if (ctxt == NULL) {
1177: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1178: perror("malloc");
1179: return(NULL);
1180: }
1.165 daniel 1181: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1182: xmlInitParserCtxt(ctxt);
1183: return(ctxt);
1184: }
1185:
1186: /**
1187: * xmlClearParserCtxt:
1188: * @ctxt: an XML parser context
1189: *
1190: * Clear (release owned resources) and reinitialize a parser context
1191: */
1192:
1193: void
1194: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1195: {
1196: xmlClearNodeInfoSeq(&ctxt->node_seq);
1197: xmlInitParserCtxt(ctxt);
1198: }
1199:
1200: /************************************************************************
1201: * *
1.77 daniel 1202: * Commodity functions to handle entities *
1203: * *
1204: ************************************************************************/
1205:
1.97 daniel 1206:
1207: /**
1208: * xmlParseCharRef:
1209: * @ctxt: an XML parser context
1210: *
1211: * parse Reference declarations
1212: *
1213: * [66] CharRef ::= '&#' [0-9]+ ';' |
1214: * '&#x' [0-9a-fA-F]+ ';'
1215: *
1.98 daniel 1216: * [ WFC: Legal Character ]
1217: * Characters referred to using character references must match the
1218: * production for Char.
1219: *
1.135 daniel 1220: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1221: */
1.97 daniel 1222: int
1223: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1224: int val = 0;
1225:
1.111 daniel 1226: if (ctxt->token != 0) {
1227: val = ctxt->token;
1228: ctxt->token = 0;
1229: return(val);
1230: }
1.152 daniel 1231: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1232: (NXT(2) == 'x')) {
1233: SKIP(3);
1.152 daniel 1234: while (RAW != ';') {
1235: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1236: val = val * 16 + (CUR - '0');
1.152 daniel 1237: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1238: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1239: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1240: val = val * 16 + (CUR - 'A') + 10;
1241: else {
1.123 daniel 1242: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1243: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1244: ctxt->sax->error(ctxt->userData,
1245: "xmlParseCharRef: invalid hexadecimal value\n");
1246: ctxt->wellFormed = 0;
1247: val = 0;
1248: break;
1249: }
1250: NEXT;
1251: }
1.164 daniel 1252: if (RAW == ';') {
1253: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1254: ctxt->nbChars ++;
1255: ctxt->input->cur++;
1256: }
1.152 daniel 1257: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1258: SKIP(2);
1.152 daniel 1259: while (RAW != ';') {
1260: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1261: val = val * 10 + (CUR - '0');
1262: else {
1.123 daniel 1263: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1264: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1265: ctxt->sax->error(ctxt->userData,
1266: "xmlParseCharRef: invalid decimal value\n");
1267: ctxt->wellFormed = 0;
1268: val = 0;
1269: break;
1270: }
1271: NEXT;
1272: }
1.164 daniel 1273: if (RAW == ';') {
1274: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1275: ctxt->nbChars ++;
1276: ctxt->input->cur++;
1277: }
1.97 daniel 1278: } else {
1.123 daniel 1279: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1281: ctxt->sax->error(ctxt->userData,
1282: "xmlParseCharRef: invalid value\n");
1.97 daniel 1283: ctxt->wellFormed = 0;
1284: }
1.98 daniel 1285:
1.97 daniel 1286: /*
1.98 daniel 1287: * [ WFC: Legal Character ]
1288: * Characters referred to using character references must match the
1289: * production for Char.
1.97 daniel 1290: */
1291: if (IS_CHAR(val)) {
1292: return(val);
1293: } else {
1.123 daniel 1294: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1296: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1297: val);
1298: ctxt->wellFormed = 0;
1299: }
1300: return(0);
1.77 daniel 1301: }
1302:
1.96 daniel 1303: /**
1.135 daniel 1304: * xmlParseStringCharRef:
1305: * @ctxt: an XML parser context
1306: * @str: a pointer to an index in the string
1307: *
1308: * parse Reference declarations, variant parsing from a string rather
1309: * than an an input flow.
1310: *
1311: * [66] CharRef ::= '&#' [0-9]+ ';' |
1312: * '&#x' [0-9a-fA-F]+ ';'
1313: *
1314: * [ WFC: Legal Character ]
1315: * Characters referred to using character references must match the
1316: * production for Char.
1317: *
1318: * Returns the value parsed (as an int), 0 in case of error, str will be
1319: * updated to the current value of the index
1320: */
1321: int
1322: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1323: const xmlChar *ptr;
1324: xmlChar cur;
1325: int val = 0;
1326:
1327: if ((str == NULL) || (*str == NULL)) return(0);
1328: ptr = *str;
1329: cur = *ptr;
1.137 daniel 1330: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1331: ptr += 3;
1332: cur = *ptr;
1333: while (cur != ';') {
1334: if ((cur >= '0') && (cur <= '9'))
1335: val = val * 16 + (cur - '0');
1336: else if ((cur >= 'a') && (cur <= 'f'))
1337: val = val * 16 + (cur - 'a') + 10;
1338: else if ((cur >= 'A') && (cur <= 'F'))
1339: val = val * 16 + (cur - 'A') + 10;
1340: else {
1341: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1343: ctxt->sax->error(ctxt->userData,
1344: "xmlParseCharRef: invalid hexadecimal value\n");
1345: ctxt->wellFormed = 0;
1346: val = 0;
1347: break;
1348: }
1349: ptr++;
1350: cur = *ptr;
1351: }
1352: if (cur == ';')
1353: ptr++;
1.145 daniel 1354: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1355: ptr += 2;
1356: cur = *ptr;
1357: while (cur != ';') {
1358: if ((cur >= '0') && (cur <= '9'))
1359: val = val * 10 + (cur - '0');
1360: else {
1361: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1363: ctxt->sax->error(ctxt->userData,
1364: "xmlParseCharRef: invalid decimal value\n");
1365: ctxt->wellFormed = 0;
1366: val = 0;
1367: break;
1368: }
1369: ptr++;
1370: cur = *ptr;
1371: }
1372: if (cur == ';')
1373: ptr++;
1374: } else {
1375: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1376: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1377: ctxt->sax->error(ctxt->userData,
1378: "xmlParseCharRef: invalid value\n");
1379: ctxt->wellFormed = 0;
1380: return(0);
1381: }
1382: *str = ptr;
1383:
1384: /*
1385: * [ WFC: Legal Character ]
1386: * Characters referred to using character references must match the
1387: * production for Char.
1388: */
1389: if (IS_CHAR(val)) {
1390: return(val);
1391: } else {
1392: ctxt->errNo = XML_ERR_INVALID_CHAR;
1393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1394: ctxt->sax->error(ctxt->userData,
1395: "CharRef: invalid xmlChar value %d\n", val);
1396: ctxt->wellFormed = 0;
1397: }
1398: return(0);
1399: }
1400:
1401: /**
1.96 daniel 1402: * xmlParserHandleReference:
1403: * @ctxt: the parser context
1404: *
1.97 daniel 1405: * [67] Reference ::= EntityRef | CharRef
1406: *
1.96 daniel 1407: * [68] EntityRef ::= '&' Name ';'
1408: *
1.98 daniel 1409: * [ WFC: Entity Declared ]
1410: * the Name given in the entity reference must match that in an entity
1411: * declaration, except that well-formed documents need not declare any
1412: * of the following entities: amp, lt, gt, apos, quot.
1413: *
1414: * [ WFC: Parsed Entity ]
1415: * An entity reference must not contain the name of an unparsed entity
1416: *
1.97 daniel 1417: * [66] CharRef ::= '&#' [0-9]+ ';' |
1418: * '&#x' [0-9a-fA-F]+ ';'
1419: *
1.96 daniel 1420: * A PEReference may have been detectect in the current input stream
1421: * the handling is done accordingly to
1422: * http://www.w3.org/TR/REC-xml#entproc
1423: */
1424: void
1425: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1426: xmlParserInputPtr input;
1.123 daniel 1427: xmlChar *name;
1.97 daniel 1428: xmlEntityPtr ent = NULL;
1429:
1.126 daniel 1430: if (ctxt->token != 0) {
1431: return;
1432: }
1.152 daniel 1433: if (RAW != '&') return;
1.97 daniel 1434: GROW;
1.152 daniel 1435: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1436: switch(ctxt->instate) {
1.140 daniel 1437: case XML_PARSER_ENTITY_DECL:
1438: case XML_PARSER_PI:
1.109 daniel 1439: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1440: case XML_PARSER_COMMENT:
1.168 ! daniel 1441: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1442: /* we just ignore it there */
1443: return;
1444: case XML_PARSER_START_TAG:
1.109 daniel 1445: return;
1.140 daniel 1446: case XML_PARSER_END_TAG:
1.97 daniel 1447: return;
1448: case XML_PARSER_EOF:
1.123 daniel 1449: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1450: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1451: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1452: ctxt->wellFormed = 0;
1453: return;
1454: case XML_PARSER_PROLOG:
1.140 daniel 1455: case XML_PARSER_START:
1456: case XML_PARSER_MISC:
1.123 daniel 1457: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1459: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1460: ctxt->wellFormed = 0;
1461: return;
1462: case XML_PARSER_EPILOG:
1.123 daniel 1463: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1465: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1466: ctxt->wellFormed = 0;
1467: return;
1468: case XML_PARSER_DTD:
1.123 daniel 1469: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1470: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1471: ctxt->sax->error(ctxt->userData,
1472: "CharRef are forbiden in DTDs!\n");
1473: ctxt->wellFormed = 0;
1474: return;
1475: case XML_PARSER_ENTITY_VALUE:
1476: /*
1477: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1478: * substitution here since we need the literal
1.97 daniel 1479: * entity value to be able to save the internal
1480: * subset of the document.
1481: * This will be handled by xmlDecodeEntities
1482: */
1483: return;
1484: case XML_PARSER_CONTENT:
1485: case XML_PARSER_ATTRIBUTE_VALUE:
1486: ctxt->token = xmlParseCharRef(ctxt);
1487: return;
1488: }
1489: return;
1490: }
1491:
1492: switch(ctxt->instate) {
1.109 daniel 1493: case XML_PARSER_CDATA_SECTION:
1494: return;
1.140 daniel 1495: case XML_PARSER_PI:
1.97 daniel 1496: case XML_PARSER_COMMENT:
1.168 ! daniel 1497: case XML_PARSER_SYSTEM_LITERAL:
! 1498: case XML_PARSER_CONTENT:
1.97 daniel 1499: return;
1.140 daniel 1500: case XML_PARSER_START_TAG:
1501: return;
1502: case XML_PARSER_END_TAG:
1503: return;
1.97 daniel 1504: case XML_PARSER_EOF:
1.123 daniel 1505: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1506: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1507: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1508: ctxt->wellFormed = 0;
1509: return;
1510: case XML_PARSER_PROLOG:
1.140 daniel 1511: case XML_PARSER_START:
1512: case XML_PARSER_MISC:
1.123 daniel 1513: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1515: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1516: ctxt->wellFormed = 0;
1517: return;
1518: case XML_PARSER_EPILOG:
1.123 daniel 1519: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1521: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1522: ctxt->wellFormed = 0;
1523: return;
1524: case XML_PARSER_ENTITY_VALUE:
1525: /*
1526: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1527: * substitution here since we need the literal
1.97 daniel 1528: * entity value to be able to save the internal
1529: * subset of the document.
1530: * This will be handled by xmlDecodeEntities
1531: */
1532: return;
1533: case XML_PARSER_ATTRIBUTE_VALUE:
1534: /*
1535: * NOTE: in the case of attributes values, we don't do the
1536: * substitution here unless we are in a mode where
1537: * the parser is explicitely asked to substitute
1538: * entities. The SAX callback is called with values
1539: * without entity substitution.
1540: * This will then be handled by xmlDecodeEntities
1541: */
1.113 daniel 1542: return;
1.97 daniel 1543: case XML_PARSER_ENTITY_DECL:
1544: /*
1545: * we just ignore it there
1546: * the substitution will be done once the entity is referenced
1547: */
1548: return;
1549: case XML_PARSER_DTD:
1.123 daniel 1550: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1552: ctxt->sax->error(ctxt->userData,
1553: "Entity references are forbiden in DTDs!\n");
1554: ctxt->wellFormed = 0;
1555: return;
1556: }
1557:
1558: NEXT;
1559: name = xmlScanName(ctxt);
1560: if (name == NULL) {
1.123 daniel 1561: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1562: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1563: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1564: ctxt->wellFormed = 0;
1565: ctxt->token = '&';
1566: return;
1567: }
1568: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1569: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1570: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1571: ctxt->sax->error(ctxt->userData,
1572: "Entity reference: ';' expected\n");
1573: ctxt->wellFormed = 0;
1574: ctxt->token = '&';
1.119 daniel 1575: xmlFree(name);
1.97 daniel 1576: return;
1577: }
1578: SKIP(xmlStrlen(name) + 1);
1579: if (ctxt->sax != NULL) {
1580: if (ctxt->sax->getEntity != NULL)
1581: ent = ctxt->sax->getEntity(ctxt->userData, name);
1582: }
1.98 daniel 1583:
1584: /*
1585: * [ WFC: Entity Declared ]
1586: * the Name given in the entity reference must match that in an entity
1587: * declaration, except that well-formed documents need not declare any
1588: * of the following entities: amp, lt, gt, apos, quot.
1589: */
1.97 daniel 1590: if (ent == NULL)
1591: ent = xmlGetPredefinedEntity(name);
1592: if (ent == NULL) {
1.123 daniel 1593: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1594: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1595: ctxt->sax->error(ctxt->userData,
1.98 daniel 1596: "Entity reference: entity %s not declared\n",
1597: name);
1.97 daniel 1598: ctxt->wellFormed = 0;
1.119 daniel 1599: xmlFree(name);
1.97 daniel 1600: return;
1601: }
1.98 daniel 1602:
1603: /*
1604: * [ WFC: Parsed Entity ]
1605: * An entity reference must not contain the name of an unparsed entity
1606: */
1.159 daniel 1607: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1608: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1609: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1610: ctxt->sax->error(ctxt->userData,
1611: "Entity reference to unparsed entity %s\n", name);
1612: ctxt->wellFormed = 0;
1613: }
1614:
1.159 daniel 1615: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1616: ctxt->token = ent->content[0];
1.119 daniel 1617: xmlFree(name);
1.97 daniel 1618: return;
1619: }
1620: input = xmlNewEntityInputStream(ctxt, ent);
1621: xmlPushInput(ctxt, input);
1.119 daniel 1622: xmlFree(name);
1.96 daniel 1623: return;
1624: }
1625:
1626: /**
1627: * xmlParserHandlePEReference:
1628: * @ctxt: the parser context
1629: *
1630: * [69] PEReference ::= '%' Name ';'
1631: *
1.98 daniel 1632: * [ WFC: No Recursion ]
1633: * TODO A parsed entity must not contain a recursive
1634: * reference to itself, either directly or indirectly.
1635: *
1636: * [ WFC: Entity Declared ]
1637: * In a document without any DTD, a document with only an internal DTD
1638: * subset which contains no parameter entity references, or a document
1639: * with "standalone='yes'", ... ... The declaration of a parameter
1640: * entity must precede any reference to it...
1641: *
1642: * [ VC: Entity Declared ]
1643: * In a document with an external subset or external parameter entities
1644: * with "standalone='no'", ... ... The declaration of a parameter entity
1645: * must precede any reference to it...
1646: *
1647: * [ WFC: In DTD ]
1648: * Parameter-entity references may only appear in the DTD.
1649: * NOTE: misleading but this is handled.
1650: *
1651: * A PEReference may have been detected in the current input stream
1.96 daniel 1652: * the handling is done accordingly to
1653: * http://www.w3.org/TR/REC-xml#entproc
1654: * i.e.
1655: * - Included in literal in entity values
1656: * - Included as Paraemeter Entity reference within DTDs
1657: */
1658: void
1659: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1660: xmlChar *name;
1.96 daniel 1661: xmlEntityPtr entity = NULL;
1662: xmlParserInputPtr input;
1663:
1.126 daniel 1664: if (ctxt->token != 0) {
1665: return;
1666: }
1.152 daniel 1667: if (RAW != '%') return;
1.96 daniel 1668: switch(ctxt->instate) {
1.109 daniel 1669: case XML_PARSER_CDATA_SECTION:
1670: return;
1.97 daniel 1671: case XML_PARSER_COMMENT:
1672: return;
1.140 daniel 1673: case XML_PARSER_START_TAG:
1674: return;
1675: case XML_PARSER_END_TAG:
1676: return;
1.96 daniel 1677: case XML_PARSER_EOF:
1.123 daniel 1678: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1679: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1680: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1681: ctxt->wellFormed = 0;
1682: return;
1683: case XML_PARSER_PROLOG:
1.140 daniel 1684: case XML_PARSER_START:
1685: case XML_PARSER_MISC:
1.123 daniel 1686: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1687: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1688: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1689: ctxt->wellFormed = 0;
1690: return;
1.97 daniel 1691: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1692: case XML_PARSER_CONTENT:
1693: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1694: case XML_PARSER_PI:
1.168 ! daniel 1695: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1696: /* we just ignore it there */
1697: return;
1698: case XML_PARSER_EPILOG:
1.123 daniel 1699: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1701: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1702: ctxt->wellFormed = 0;
1703: return;
1.97 daniel 1704: case XML_PARSER_ENTITY_VALUE:
1705: /*
1706: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1707: * substitution here since we need the literal
1.97 daniel 1708: * entity value to be able to save the internal
1709: * subset of the document.
1710: * This will be handled by xmlDecodeEntities
1711: */
1712: return;
1.96 daniel 1713: case XML_PARSER_DTD:
1.98 daniel 1714: /*
1715: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1716: * In the internal DTD subset, parameter-entity references
1717: * can occur only where markup declarations can occur, not
1718: * within markup declarations.
1719: * In that case this is handled in xmlParseMarkupDecl
1720: */
1721: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1722: return;
1.96 daniel 1723: }
1724:
1725: NEXT;
1726: name = xmlParseName(ctxt);
1727: if (name == NULL) {
1.123 daniel 1728: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1730: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1731: ctxt->wellFormed = 0;
1732: } else {
1.152 daniel 1733: if (RAW == ';') {
1.96 daniel 1734: NEXT;
1.98 daniel 1735: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1736: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1737: if (entity == NULL) {
1.98 daniel 1738:
1739: /*
1740: * [ WFC: Entity Declared ]
1741: * In a document without any DTD, a document with only an
1742: * internal DTD subset which contains no parameter entity
1743: * references, or a document with "standalone='yes'", ...
1744: * ... The declaration of a parameter entity must precede
1745: * any reference to it...
1746: */
1747: if ((ctxt->standalone == 1) ||
1748: ((ctxt->hasExternalSubset == 0) &&
1749: (ctxt->hasPErefs == 0))) {
1750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1751: ctxt->sax->error(ctxt->userData,
1752: "PEReference: %%%s; not found\n", name);
1753: ctxt->wellFormed = 0;
1754: } else {
1755: /*
1756: * [ VC: Entity Declared ]
1757: * In a document with an external subset or external
1758: * parameter entities with "standalone='no'", ...
1759: * ... The declaration of a parameter entity must precede
1760: * any reference to it...
1761: */
1762: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1763: ctxt->sax->warning(ctxt->userData,
1764: "PEReference: %%%s; not found\n", name);
1765: ctxt->valid = 0;
1766: }
1.96 daniel 1767: } else {
1.159 daniel 1768: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1769: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1770: /*
1.156 daniel 1771: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1772: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1773: */
1774: input = xmlNewEntityInputStream(ctxt, entity);
1775: xmlPushInput(ctxt, input);
1.164 daniel 1776: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1777: (RAW == '<') && (NXT(1) == '?') &&
1778: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1779: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1780: xmlParseXMLDecl(ctxt);
1781: }
1782: if (ctxt->token == 0)
1783: ctxt->token = ' ';
1.96 daniel 1784: } else {
1785: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1786: ctxt->sax->error(ctxt->userData,
1787: "xmlHandlePEReference: %s is not a parameter entity\n",
1788: name);
1789: ctxt->wellFormed = 0;
1790: }
1791: }
1792: } else {
1.123 daniel 1793: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1795: ctxt->sax->error(ctxt->userData,
1796: "xmlHandlePEReference: expecting ';'\n");
1797: ctxt->wellFormed = 0;
1798: }
1.119 daniel 1799: xmlFree(name);
1.97 daniel 1800: }
1801: }
1802:
1803: /*
1804: * Macro used to grow the current buffer.
1805: */
1806: #define growBuffer(buffer) { \
1807: buffer##_size *= 2; \
1.145 daniel 1808: buffer = (xmlChar *) \
1809: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1810: if (buffer == NULL) { \
1811: perror("realloc failed"); \
1.145 daniel 1812: return(NULL); \
1.97 daniel 1813: } \
1.96 daniel 1814: }
1.77 daniel 1815:
1816: /**
1817: * xmlDecodeEntities:
1818: * @ctxt: the parser context
1819: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1820: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1821: * @end: an end marker xmlChar, 0 if none
1822: * @end2: an end marker xmlChar, 0 if none
1823: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1824: *
1825: * [67] Reference ::= EntityRef | CharRef
1826: *
1827: * [69] PEReference ::= '%' Name ';'
1828: *
1829: * Returns A newly allocated string with the substitution done. The caller
1830: * must deallocate it !
1831: */
1.123 daniel 1832: xmlChar *
1.77 daniel 1833: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1834: xmlChar end, xmlChar end2, xmlChar end3) {
1835: xmlChar *buffer = NULL;
1.78 daniel 1836: int buffer_size = 0;
1.161 daniel 1837: int nbchars = 0;
1.78 daniel 1838:
1.123 daniel 1839: xmlChar *current = NULL;
1.77 daniel 1840: xmlEntityPtr ent;
1841: unsigned int max = (unsigned int) len;
1.161 daniel 1842: int c,l;
1.77 daniel 1843:
1844: /*
1845: * allocate a translation buffer.
1846: */
1.140 daniel 1847: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 1848: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1849: if (buffer == NULL) {
1850: perror("xmlDecodeEntities: malloc failed");
1851: return(NULL);
1852: }
1853:
1.78 daniel 1854: /*
1855: * Ok loop until we reach one of the ending char or a size limit.
1856: */
1.161 daniel 1857: c = CUR_CHAR(l);
1858: while ((nbchars < max) && (c != end) &&
1859: (c != end2) && (c != end3)) {
1.77 daniel 1860:
1.161 daniel 1861: if (c == 0) break;
1862: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 1863: int val = xmlParseCharRef(ctxt);
1.161 daniel 1864: COPY_BUF(0,buffer,nbchars,val);
1865: NEXTL(l);
1866: } else if ((c == '&') && (ctxt->token != '&') &&
1867: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 1868: ent = xmlParseEntityRef(ctxt);
1869: if ((ent != NULL) &&
1870: (ctxt->replaceEntities != 0)) {
1871: current = ent->content;
1872: while (*current != 0) {
1.161 daniel 1873: buffer[nbchars++] = *current++;
1874: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1875: growBuffer(buffer);
1.77 daniel 1876: }
1877: }
1.98 daniel 1878: } else if (ent != NULL) {
1.123 daniel 1879: const xmlChar *cur = ent->name;
1.98 daniel 1880:
1.161 daniel 1881: buffer[nbchars++] = '&';
1882: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1883: growBuffer(buffer);
1884: }
1.161 daniel 1885: while (*cur != 0) {
1886: buffer[nbchars++] = *cur++;
1887: }
1888: buffer[nbchars++] = ';';
1.77 daniel 1889: }
1.161 daniel 1890: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 1891: /*
1.77 daniel 1892: * a PEReference induce to switch the entity flow,
1893: * we break here to flush the current set of chars
1894: * parsed if any. We will be called back later.
1.97 daniel 1895: */
1.91 daniel 1896: if (nbchars != 0) break;
1.77 daniel 1897:
1898: xmlParsePEReference(ctxt);
1.79 daniel 1899:
1.97 daniel 1900: /*
1.79 daniel 1901: * Pop-up of finished entities.
1.97 daniel 1902: */
1.152 daniel 1903: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 1904: xmlPopInput(ctxt);
1905:
1.98 daniel 1906: break;
1.77 daniel 1907: } else {
1.161 daniel 1908: COPY_BUF(l,buffer,nbchars,c);
1909: NEXTL(l);
1910: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 1911: growBuffer(buffer);
1912: }
1.77 daniel 1913: }
1.161 daniel 1914: c = CUR_CHAR(l);
1.77 daniel 1915: }
1.161 daniel 1916: buffer[nbchars++] = 0;
1.77 daniel 1917: return(buffer);
1918: }
1919:
1.135 daniel 1920: /**
1921: * xmlStringDecodeEntities:
1922: * @ctxt: the parser context
1923: * @str: the input string
1924: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1925: * @end: an end marker xmlChar, 0 if none
1926: * @end2: an end marker xmlChar, 0 if none
1927: * @end3: an end marker xmlChar, 0 if none
1928: *
1929: * [67] Reference ::= EntityRef | CharRef
1930: *
1931: * [69] PEReference ::= '%' Name ';'
1932: *
1933: * Returns A newly allocated string with the substitution done. The caller
1934: * must deallocate it !
1935: */
1936: xmlChar *
1937: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1938: xmlChar end, xmlChar end2, xmlChar end3) {
1939: xmlChar *buffer = NULL;
1940: int buffer_size = 0;
1941: xmlChar *out = NULL;
1942:
1943: xmlChar *current = NULL;
1944: xmlEntityPtr ent;
1945: xmlChar cur;
1946:
1947: /*
1948: * allocate a translation buffer.
1949: */
1.140 daniel 1950: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 1951: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1952: if (buffer == NULL) {
1953: perror("xmlDecodeEntities: malloc failed");
1954: return(NULL);
1955: }
1956: out = buffer;
1957:
1958: /*
1959: * Ok loop until we reach one of the ending char or a size limit.
1960: */
1961: cur = *str;
1962: while ((cur != 0) && (cur != end) &&
1963: (cur != end2) && (cur != end3)) {
1964:
1965: if (cur == 0) break;
1966: if ((cur == '&') && (str[1] == '#')) {
1967: int val = xmlParseStringCharRef(ctxt, &str);
1968: if (val != 0)
1969: *out++ = val;
1970: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1971: ent = xmlParseStringEntityRef(ctxt, &str);
1972: if ((ent != NULL) &&
1973: (ctxt->replaceEntities != 0)) {
1974: current = ent->content;
1975: while (*current != 0) {
1976: *out++ = *current++;
1.140 daniel 1977: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1978: int index = out - buffer;
1979:
1980: growBuffer(buffer);
1981: out = &buffer[index];
1982: }
1983: }
1984: } else if (ent != NULL) {
1985: int i = xmlStrlen(ent->name);
1986: const xmlChar *cur = ent->name;
1987:
1988: *out++ = '&';
1.140 daniel 1989: if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1990: int index = out - buffer;
1991:
1992: growBuffer(buffer);
1993: out = &buffer[index];
1994: }
1995: for (;i > 0;i--)
1996: *out++ = *cur++;
1997: *out++ = ';';
1998: }
1999: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2000: ent = xmlParseStringPEReference(ctxt, &str);
2001: if (ent != NULL) {
2002: current = ent->content;
2003: while (*current != 0) {
2004: *out++ = *current++;
1.140 daniel 2005: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2006: int index = out - buffer;
2007:
2008: growBuffer(buffer);
2009: out = &buffer[index];
2010: }
2011: }
2012: }
2013: } else {
1.156 daniel 2014: /* invalid for UTF-8 , use COPY(out); !!! */
1.135 daniel 2015: *out++ = cur;
1.140 daniel 2016: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2017: int index = out - buffer;
2018:
2019: growBuffer(buffer);
2020: out = &buffer[index];
2021: }
2022: str++;
2023: }
2024: cur = *str;
2025: }
2026: *out = 0;
2027: return(buffer);
2028: }
2029:
1.1 veillard 2030:
1.28 daniel 2031: /************************************************************************
2032: * *
1.75 daniel 2033: * Commodity functions to handle encodings *
2034: * *
2035: ************************************************************************/
2036:
2037: /**
2038: * xmlSwitchEncoding:
2039: * @ctxt: the parser context
1.124 daniel 2040: * @enc: the encoding value (number)
1.75 daniel 2041: *
2042: * change the input functions when discovering the character encoding
2043: * of a given entity.
2044: */
2045: void
2046: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2047: {
1.156 daniel 2048: xmlCharEncodingHandlerPtr handler;
2049:
2050: handler = xmlGetCharEncodingHandler(enc);
2051: if (handler != NULL) {
2052: if (ctxt->input != NULL) {
2053: if (ctxt->input->buf != NULL) {
2054: if (ctxt->input->buf->encoder != NULL) {
2055: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2056: ctxt->sax->error(ctxt->userData,
2057: "xmlSwitchEncoding : encoder already regitered\n");
2058: return;
2059: }
2060: ctxt->input->buf->encoder = handler;
2061:
2062: /*
2063: * Is there already some content down the pipe to convert
2064: */
2065: if ((ctxt->input->buf->buffer != NULL) &&
2066: (ctxt->input->buf->buffer->use > 0)) {
2067: xmlChar *buf;
2068: int res, len, size;
2069: int processed;
2070:
2071: /*
2072: * Specific handling of the Byte Order Mark for
2073: * UTF-16
2074: */
2075: if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
2076: (ctxt->input->cur[0] == 0xFF) &&
2077: (ctxt->input->cur[1] == 0xFE)) {
2078: SKIP(2);
2079: }
2080: if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
2081: (ctxt->input->cur[0] == 0xFE) &&
2082: (ctxt->input->cur[1] == 0xFF)) {
2083: SKIP(2);
2084: }
2085:
2086: /*
2087: * convert the non processed part
2088: */
2089: processed = ctxt->input->cur - ctxt->input->base;
2090: len = ctxt->input->buf->buffer->use - processed;
2091:
2092: if (len <= 0) {
2093: return;
2094: }
2095: size = ctxt->input->buf->buffer->use * 4;
2096: if (size < 4000)
2097: size = 4000;
1.167 daniel 2098: retry_larger:
1.160 daniel 2099: buf = (xmlChar *) xmlMalloc(size + 1);
1.156 daniel 2100: if (buf == NULL) {
2101: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2102: ctxt->sax->error(ctxt->userData,
2103: "xmlSwitchEncoding : out of memory\n");
2104: return;
2105: }
1.160 daniel 2106: /* TODO !!! Handling of buf too small */
1.156 daniel 2107: res = handler->input(buf, size, ctxt->input->cur, &len);
1.167 daniel 2108: if (res == -1) {
2109: size *= 2;
2110: xmlFree(buf);
2111: goto retry_larger;
2112: }
1.156 daniel 2113: if ((res < 0) ||
2114: (len != ctxt->input->buf->buffer->use - processed)) {
2115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2116: ctxt->sax->error(ctxt->userData,
2117: "xmlSwitchEncoding : conversion failed\n");
2118: xmlFree(buf);
2119: return;
2120: }
1.167 daniel 2121:
1.156 daniel 2122: /*
2123: * Conversion succeeded, get rid of the old buffer
2124: */
2125: xmlFree(ctxt->input->buf->buffer->content);
2126: ctxt->input->buf->buffer->content = buf;
2127: ctxt->input->base = buf;
2128: ctxt->input->cur = buf;
2129: ctxt->input->buf->buffer->size = size;
2130: ctxt->input->buf->buffer->use = res;
1.160 daniel 2131: buf[res] = 0;
1.156 daniel 2132: }
2133: return;
2134: } else {
2135: if (ctxt->input->length == 0) {
2136: /*
2137: * When parsing a static memory array one must know the
2138: * size to be able to convert the buffer.
2139: */
2140: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2141: ctxt->sax->error(ctxt->userData,
2142: "xmlSwitchEncoding : no input\n");
2143: return;
2144: } else {
2145: xmlChar *buf;
2146: int res, len;
2147: int processed = ctxt->input->cur - ctxt->input->base;
2148:
2149: /*
2150: * convert the non processed part
2151: */
2152: len = ctxt->input->length - processed;
2153: if (len <= 0) {
2154: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2155: ctxt->sax->error(ctxt->userData,
2156: "xmlSwitchEncoding : input fully consumed?\n");
2157: return;
2158: }
2159: buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
2160: if (buf == NULL) {
2161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2162: ctxt->sax->error(ctxt->userData,
2163: "xmlSwitchEncoding : out of memory\n");
2164: return;
2165: }
2166: res = handler->input(buf, ctxt->input->length * 4,
2167: ctxt->input->cur, &len);
2168: if ((res < 0) ||
2169: (len != ctxt->input->length - processed)) {
2170: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2171: ctxt->sax->error(ctxt->userData,
2172: "xmlSwitchEncoding : conversion failed\n");
2173: xmlFree(buf);
2174: return;
2175: }
2176: /*
2177: * Conversion succeeded, get rid of the old buffer
2178: */
2179: if ((ctxt->input->free != NULL) &&
2180: (ctxt->input->base != NULL))
2181: ctxt->input->free((xmlChar *) ctxt->input->base);
2182: ctxt->input->base = ctxt->input->cur = buf;
2183: ctxt->input->length = res;
2184: }
2185: }
2186: } else {
2187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2188: ctxt->sax->error(ctxt->userData,
2189: "xmlSwitchEncoding : no input\n");
2190: }
2191: }
2192:
1.75 daniel 2193: switch (enc) {
2194: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 2195: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 2196: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2197: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2198: ctxt->wellFormed = 0;
2199: break;
2200: case XML_CHAR_ENCODING_NONE:
2201: /* let's assume it's UTF-8 without the XML decl */
2202: return;
2203: case XML_CHAR_ENCODING_UTF8:
2204: /* default encoding, no conversion should be needed */
2205: return;
2206: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 2207: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2208: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2209: ctxt->sax->error(ctxt->userData,
2210: "char encoding UTF16 little endian not supported\n");
2211: break;
2212: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 2213: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2214: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2215: ctxt->sax->error(ctxt->userData,
2216: "char encoding UTF16 big endian not supported\n");
2217: break;
2218: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 2219: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2220: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2221: ctxt->sax->error(ctxt->userData,
2222: "char encoding USC4 little endian not supported\n");
2223: break;
2224: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 2225: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2226: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2227: ctxt->sax->error(ctxt->userData,
2228: "char encoding USC4 big endian not supported\n");
2229: break;
2230: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 2231: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2232: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2233: ctxt->sax->error(ctxt->userData,
2234: "char encoding EBCDIC not supported\n");
2235: break;
2236: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 2237: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2239: ctxt->sax->error(ctxt->userData,
2240: "char encoding UCS4 2143 not supported\n");
2241: break;
2242: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 2243: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2244: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2245: ctxt->sax->error(ctxt->userData,
2246: "char encoding UCS4 3412 not supported\n");
2247: break;
2248: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 2249: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2250: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2251: ctxt->sax->error(ctxt->userData,
2252: "char encoding UCS2 not supported\n");
2253: break;
2254: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 2255: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2256: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2257: ctxt->sax->error(ctxt->userData,
2258: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2259: break;
2260: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 2261: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2262: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2263: ctxt->sax->error(ctxt->userData,
2264: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2265: break;
2266: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 2267: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2268: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2269: ctxt->sax->error(ctxt->userData,
2270: "char encoding ISO_8859_3 not supported\n");
2271: break;
2272: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 2273: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2274: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2275: ctxt->sax->error(ctxt->userData,
2276: "char encoding ISO_8859_4 not supported\n");
2277: break;
2278: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 2279: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2281: ctxt->sax->error(ctxt->userData,
2282: "char encoding ISO_8859_5 not supported\n");
2283: break;
2284: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 2285: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2287: ctxt->sax->error(ctxt->userData,
2288: "char encoding ISO_8859_6 not supported\n");
2289: break;
2290: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 2291: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2293: ctxt->sax->error(ctxt->userData,
2294: "char encoding ISO_8859_7 not supported\n");
2295: break;
2296: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 2297: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2298: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2299: ctxt->sax->error(ctxt->userData,
2300: "char encoding ISO_8859_8 not supported\n");
2301: break;
2302: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 2303: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2304: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2305: ctxt->sax->error(ctxt->userData,
2306: "char encoding ISO_8859_9 not supported\n");
2307: break;
2308: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 2309: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2310: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2311: ctxt->sax->error(ctxt->userData,
2312: "char encoding ISO-2022-JPnot supported\n");
2313: break;
2314: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 2315: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2316: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2317: ctxt->sax->error(ctxt->userData,
2318: "char encoding Shift_JISnot supported\n");
2319: break;
2320: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 2321: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2322: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2323: ctxt->sax->error(ctxt->userData,
2324: "char encoding EUC-JPnot supported\n");
2325: break;
2326: }
2327: }
2328:
2329: /************************************************************************
2330: * *
1.123 daniel 2331: * Commodity functions to handle xmlChars *
1.28 daniel 2332: * *
2333: ************************************************************************/
2334:
1.50 daniel 2335: /**
2336: * xmlStrndup:
1.123 daniel 2337: * @cur: the input xmlChar *
1.50 daniel 2338: * @len: the len of @cur
2339: *
1.123 daniel 2340: * a strndup for array of xmlChar's
1.68 daniel 2341: *
1.123 daniel 2342: * Returns a new xmlChar * or NULL
1.1 veillard 2343: */
1.123 daniel 2344: xmlChar *
2345: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2346: xmlChar *ret;
2347:
2348: if ((cur == NULL) || (len < 0)) return(NULL);
2349: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2350: if (ret == NULL) {
1.86 daniel 2351: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2352: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2353: return(NULL);
2354: }
1.123 daniel 2355: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2356: ret[len] = 0;
2357: return(ret);
2358: }
2359:
1.50 daniel 2360: /**
2361: * xmlStrdup:
1.123 daniel 2362: * @cur: the input xmlChar *
1.50 daniel 2363: *
1.152 daniel 2364: * a strdup for array of xmlChar's. Since they are supposed to be
2365: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2366: * a termination mark of '0'.
1.68 daniel 2367: *
1.123 daniel 2368: * Returns a new xmlChar * or NULL
1.1 veillard 2369: */
1.123 daniel 2370: xmlChar *
2371: xmlStrdup(const xmlChar *cur) {
2372: const xmlChar *p = cur;
1.1 veillard 2373:
1.135 daniel 2374: if (cur == NULL) return(NULL);
1.152 daniel 2375: while (*p != 0) p++;
1.1 veillard 2376: return(xmlStrndup(cur, p - cur));
2377: }
2378:
1.50 daniel 2379: /**
2380: * xmlCharStrndup:
2381: * @cur: the input char *
2382: * @len: the len of @cur
2383: *
1.123 daniel 2384: * a strndup for char's to xmlChar's
1.68 daniel 2385: *
1.123 daniel 2386: * Returns a new xmlChar * or NULL
1.45 daniel 2387: */
2388:
1.123 daniel 2389: xmlChar *
1.55 daniel 2390: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2391: int i;
1.135 daniel 2392: xmlChar *ret;
2393:
2394: if ((cur == NULL) || (len < 0)) return(NULL);
2395: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2396: if (ret == NULL) {
1.86 daniel 2397: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2398: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2399: return(NULL);
2400: }
2401: for (i = 0;i < len;i++)
1.123 daniel 2402: ret[i] = (xmlChar) cur[i];
1.45 daniel 2403: ret[len] = 0;
2404: return(ret);
2405: }
2406:
1.50 daniel 2407: /**
2408: * xmlCharStrdup:
2409: * @cur: the input char *
2410: * @len: the len of @cur
2411: *
1.123 daniel 2412: * a strdup for char's to xmlChar's
1.68 daniel 2413: *
1.123 daniel 2414: * Returns a new xmlChar * or NULL
1.45 daniel 2415: */
2416:
1.123 daniel 2417: xmlChar *
1.55 daniel 2418: xmlCharStrdup(const char *cur) {
1.45 daniel 2419: const char *p = cur;
2420:
1.135 daniel 2421: if (cur == NULL) return(NULL);
1.45 daniel 2422: while (*p != '\0') p++;
2423: return(xmlCharStrndup(cur, p - cur));
2424: }
2425:
1.50 daniel 2426: /**
2427: * xmlStrcmp:
1.123 daniel 2428: * @str1: the first xmlChar *
2429: * @str2: the second xmlChar *
1.50 daniel 2430: *
1.123 daniel 2431: * a strcmp for xmlChar's
1.68 daniel 2432: *
2433: * Returns the integer result of the comparison
1.14 veillard 2434: */
2435:
1.55 daniel 2436: int
1.123 daniel 2437: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2438: register int tmp;
2439:
1.135 daniel 2440: if ((str1 == NULL) && (str2 == NULL)) return(0);
2441: if (str1 == NULL) return(-1);
2442: if (str2 == NULL) return(1);
1.14 veillard 2443: do {
2444: tmp = *str1++ - *str2++;
2445: if (tmp != 0) return(tmp);
2446: } while ((*str1 != 0) && (*str2 != 0));
2447: return (*str1 - *str2);
2448: }
2449:
1.50 daniel 2450: /**
2451: * xmlStrncmp:
1.123 daniel 2452: * @str1: the first xmlChar *
2453: * @str2: the second xmlChar *
1.50 daniel 2454: * @len: the max comparison length
2455: *
1.123 daniel 2456: * a strncmp for xmlChar's
1.68 daniel 2457: *
2458: * Returns the integer result of the comparison
1.14 veillard 2459: */
2460:
1.55 daniel 2461: int
1.123 daniel 2462: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2463: register int tmp;
2464:
2465: if (len <= 0) return(0);
1.135 daniel 2466: if ((str1 == NULL) && (str2 == NULL)) return(0);
2467: if (str1 == NULL) return(-1);
2468: if (str2 == NULL) return(1);
1.14 veillard 2469: do {
2470: tmp = *str1++ - *str2++;
2471: if (tmp != 0) return(tmp);
2472: len--;
2473: if (len <= 0) return(0);
2474: } while ((*str1 != 0) && (*str2 != 0));
2475: return (*str1 - *str2);
2476: }
2477:
1.50 daniel 2478: /**
2479: * xmlStrchr:
1.123 daniel 2480: * @str: the xmlChar * array
2481: * @val: the xmlChar to search
1.50 daniel 2482: *
1.123 daniel 2483: * a strchr for xmlChar's
1.68 daniel 2484: *
1.123 daniel 2485: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2486: */
2487:
1.123 daniel 2488: const xmlChar *
2489: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2490: if (str == NULL) return(NULL);
1.14 veillard 2491: while (*str != 0) {
1.123 daniel 2492: if (*str == val) return((xmlChar *) str);
1.14 veillard 2493: str++;
2494: }
2495: return(NULL);
1.89 daniel 2496: }
2497:
2498: /**
2499: * xmlStrstr:
1.123 daniel 2500: * @str: the xmlChar * array (haystack)
2501: * @val: the xmlChar to search (needle)
1.89 daniel 2502: *
1.123 daniel 2503: * a strstr for xmlChar's
1.89 daniel 2504: *
1.123 daniel 2505: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2506: */
2507:
1.123 daniel 2508: const xmlChar *
2509: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2510: int n;
2511:
2512: if (str == NULL) return(NULL);
2513: if (val == NULL) return(NULL);
2514: n = xmlStrlen(val);
2515:
2516: if (n == 0) return(str);
2517: while (*str != 0) {
2518: if (*str == *val) {
1.123 daniel 2519: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2520: }
2521: str++;
2522: }
2523: return(NULL);
2524: }
2525:
2526: /**
2527: * xmlStrsub:
1.123 daniel 2528: * @str: the xmlChar * array (haystack)
1.89 daniel 2529: * @start: the index of the first char (zero based)
2530: * @len: the length of the substring
2531: *
2532: * Extract a substring of a given string
2533: *
1.123 daniel 2534: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2535: */
2536:
1.123 daniel 2537: xmlChar *
2538: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2539: int i;
2540:
2541: if (str == NULL) return(NULL);
2542: if (start < 0) return(NULL);
1.90 daniel 2543: if (len < 0) return(NULL);
1.89 daniel 2544:
2545: for (i = 0;i < start;i++) {
2546: if (*str == 0) return(NULL);
2547: str++;
2548: }
2549: if (*str == 0) return(NULL);
2550: return(xmlStrndup(str, len));
1.14 veillard 2551: }
1.28 daniel 2552:
1.50 daniel 2553: /**
2554: * xmlStrlen:
1.123 daniel 2555: * @str: the xmlChar * array
1.50 daniel 2556: *
1.127 daniel 2557: * length of a xmlChar's string
1.68 daniel 2558: *
1.123 daniel 2559: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2560: */
2561:
1.55 daniel 2562: int
1.123 daniel 2563: xmlStrlen(const xmlChar *str) {
1.45 daniel 2564: int len = 0;
2565:
2566: if (str == NULL) return(0);
2567: while (*str != 0) {
2568: str++;
2569: len++;
2570: }
2571: return(len);
2572: }
2573:
1.50 daniel 2574: /**
2575: * xmlStrncat:
1.123 daniel 2576: * @cur: the original xmlChar * array
2577: * @add: the xmlChar * array added
1.50 daniel 2578: * @len: the length of @add
2579: *
1.123 daniel 2580: * a strncat for array of xmlChar's
1.68 daniel 2581: *
1.123 daniel 2582: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2583: */
2584:
1.123 daniel 2585: xmlChar *
2586: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2587: int size;
1.123 daniel 2588: xmlChar *ret;
1.45 daniel 2589:
2590: if ((add == NULL) || (len == 0))
2591: return(cur);
2592: if (cur == NULL)
2593: return(xmlStrndup(add, len));
2594:
2595: size = xmlStrlen(cur);
1.123 daniel 2596: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2597: if (ret == NULL) {
1.86 daniel 2598: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2599: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2600: return(cur);
2601: }
1.123 daniel 2602: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2603: ret[size + len] = 0;
2604: return(ret);
2605: }
2606:
1.50 daniel 2607: /**
2608: * xmlStrcat:
1.123 daniel 2609: * @cur: the original xmlChar * array
2610: * @add: the xmlChar * array added
1.50 daniel 2611: *
1.152 daniel 2612: * a strcat for array of xmlChar's. Since they are supposed to be
2613: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2614: * a termination mark of '0'.
1.68 daniel 2615: *
1.123 daniel 2616: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2617: */
1.123 daniel 2618: xmlChar *
2619: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2620: const xmlChar *p = add;
1.45 daniel 2621:
2622: if (add == NULL) return(cur);
2623: if (cur == NULL)
2624: return(xmlStrdup(add));
2625:
1.152 daniel 2626: while (*p != 0) p++;
1.45 daniel 2627: return(xmlStrncat(cur, add, p - add));
2628: }
2629:
2630: /************************************************************************
2631: * *
2632: * Commodity functions, cleanup needed ? *
2633: * *
2634: ************************************************************************/
2635:
1.50 daniel 2636: /**
2637: * areBlanks:
2638: * @ctxt: an XML parser context
1.123 daniel 2639: * @str: a xmlChar *
1.50 daniel 2640: * @len: the size of @str
2641: *
1.45 daniel 2642: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2643: *
1.68 daniel 2644: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2645: */
2646:
1.123 daniel 2647: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2648: int i, ret;
1.45 daniel 2649: xmlNodePtr lastChild;
2650:
2651: for (i = 0;i < len;i++)
2652: if (!(IS_BLANK(str[i]))) return(0);
2653:
1.152 daniel 2654: if (RAW != '<') return(0);
1.72 daniel 2655: if (ctxt->node == NULL) return(0);
1.104 daniel 2656: if (ctxt->myDoc != NULL) {
2657: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2658: if (ret == 0) return(1);
2659: if (ret == 1) return(0);
2660: }
2661: /*
2662: * heuristic
2663: */
1.45 daniel 2664: lastChild = xmlGetLastChild(ctxt->node);
2665: if (lastChild == NULL) {
2666: if (ctxt->node->content != NULL) return(0);
2667: } else if (xmlNodeIsText(lastChild))
2668: return(0);
1.157 daniel 2669: else if ((ctxt->node->children != NULL) &&
2670: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2671: return(0);
1.45 daniel 2672: return(1);
2673: }
2674:
1.50 daniel 2675: /**
2676: * xmlHandleEntity:
2677: * @ctxt: an XML parser context
2678: * @entity: an XML entity pointer.
2679: *
2680: * Default handling of defined entities, when should we define a new input
1.45 daniel 2681: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2682: *
2683: * OBSOLETE: to be removed at some point.
1.45 daniel 2684: */
2685:
1.55 daniel 2686: void
2687: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2688: int len;
1.50 daniel 2689: xmlParserInputPtr input;
1.45 daniel 2690:
2691: if (entity->content == NULL) {
1.123 daniel 2692: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2693: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2694: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2695: entity->name);
1.59 daniel 2696: ctxt->wellFormed = 0;
1.45 daniel 2697: return;
2698: }
2699: len = xmlStrlen(entity->content);
2700: if (len <= 2) goto handle_as_char;
2701:
2702: /*
2703: * Redefine its content as an input stream.
2704: */
1.50 daniel 2705: input = xmlNewEntityInputStream(ctxt, entity);
2706: xmlPushInput(ctxt, input);
1.45 daniel 2707: return;
2708:
2709: handle_as_char:
2710: /*
2711: * Just handle the content as a set of chars.
2712: */
1.72 daniel 2713: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 2714: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2715:
2716: }
2717:
2718: /*
2719: * Forward definition for recusive behaviour.
2720: */
1.77 daniel 2721: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2722: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2723:
1.28 daniel 2724: /************************************************************************
2725: * *
2726: * Extra stuff for namespace support *
2727: * Relates to http://www.w3.org/TR/WD-xml-names *
2728: * *
2729: ************************************************************************/
2730:
1.50 daniel 2731: /**
2732: * xmlNamespaceParseNCName:
2733: * @ctxt: an XML parser context
2734: *
2735: * parse an XML namespace name.
1.28 daniel 2736: *
2737: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2738: *
2739: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2740: * CombiningChar | Extender
1.68 daniel 2741: *
2742: * Returns the namespace name or NULL
1.28 daniel 2743: */
2744:
1.123 daniel 2745: xmlChar *
1.55 daniel 2746: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 2747: xmlChar buf[XML_MAX_NAMELEN + 5];
2748: int len = 0, l;
2749: int cur = CUR_CHAR(l);
1.28 daniel 2750:
1.156 daniel 2751: /* load first the value of the char !!! */
1.152 daniel 2752: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 2753:
1.152 daniel 2754: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2755: (cur == '.') || (cur == '-') ||
2756: (cur == '_') ||
2757: (IS_COMBINING(cur)) ||
2758: (IS_EXTENDER(cur))) {
2759: COPY_BUF(l,buf,len,cur);
2760: NEXTL(l);
2761: cur = CUR_CHAR(l);
1.91 daniel 2762: if (len >= XML_MAX_NAMELEN) {
2763: fprintf(stderr,
2764: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 2765: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2766: (cur == '.') || (cur == '-') ||
2767: (cur == '_') ||
2768: (IS_COMBINING(cur)) ||
2769: (IS_EXTENDER(cur))) {
2770: NEXTL(l);
2771: cur = CUR_CHAR(l);
2772: }
1.91 daniel 2773: break;
2774: }
2775: }
2776: return(xmlStrndup(buf, len));
1.28 daniel 2777: }
2778:
1.50 daniel 2779: /**
2780: * xmlNamespaceParseQName:
2781: * @ctxt: an XML parser context
1.123 daniel 2782: * @prefix: a xmlChar **
1.50 daniel 2783: *
2784: * parse an XML qualified name
1.28 daniel 2785: *
2786: * [NS 5] QName ::= (Prefix ':')? LocalPart
2787: *
2788: * [NS 6] Prefix ::= NCName
2789: *
2790: * [NS 7] LocalPart ::= NCName
1.68 daniel 2791: *
1.127 daniel 2792: * Returns the local part, and prefix is updated
1.50 daniel 2793: * to get the Prefix if any.
1.28 daniel 2794: */
2795:
1.123 daniel 2796: xmlChar *
2797: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2798: xmlChar *ret = NULL;
1.28 daniel 2799:
2800: *prefix = NULL;
2801: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 2802: if (RAW == ':') {
1.28 daniel 2803: *prefix = ret;
1.40 daniel 2804: NEXT;
1.28 daniel 2805: ret = xmlNamespaceParseNCName(ctxt);
2806: }
2807:
2808: return(ret);
2809: }
2810:
1.50 daniel 2811: /**
1.72 daniel 2812: * xmlSplitQName:
1.162 daniel 2813: * @ctxt: an XML parser context
1.72 daniel 2814: * @name: an XML parser context
1.123 daniel 2815: * @prefix: a xmlChar **
1.72 daniel 2816: *
2817: * parse an XML qualified name string
2818: *
2819: * [NS 5] QName ::= (Prefix ':')? LocalPart
2820: *
2821: * [NS 6] Prefix ::= NCName
2822: *
2823: * [NS 7] LocalPart ::= NCName
2824: *
1.127 daniel 2825: * Returns the local part, and prefix is updated
1.72 daniel 2826: * to get the Prefix if any.
2827: */
2828:
1.123 daniel 2829: xmlChar *
1.162 daniel 2830: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2831: xmlChar buf[XML_MAX_NAMELEN + 5];
2832: int len = 0;
1.123 daniel 2833: xmlChar *ret = NULL;
2834: const xmlChar *cur = name;
1.162 daniel 2835: int c,l;
1.72 daniel 2836:
2837: *prefix = NULL;
1.113 daniel 2838:
2839: /* xml: prefix is not really a namespace */
2840: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2841: (cur[2] == 'l') && (cur[3] == ':'))
2842: return(xmlStrdup(name));
2843:
1.162 daniel 2844: /* nasty but valid */
2845: if (cur[0] == ':')
2846: return(xmlStrdup(name));
2847:
2848: c = CUR_SCHAR(cur, l);
2849: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 2850:
1.162 daniel 2851: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2852: (c == '.') || (c == '-') ||
2853: (c == '_') ||
2854: (IS_COMBINING(c)) ||
2855: (IS_EXTENDER(c))) {
2856: COPY_BUF(l,buf,len,c);
2857: cur += l;
2858: c = CUR_SCHAR(cur, l);
2859: }
1.72 daniel 2860:
1.162 daniel 2861: ret = xmlStrndup(buf, len);
1.72 daniel 2862:
1.162 daniel 2863: if (c == ':') {
2864: cur += l;
1.163 daniel 2865: c = CUR_SCHAR(cur, l);
1.162 daniel 2866: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 2867: *prefix = ret;
1.162 daniel 2868: len = 0;
1.72 daniel 2869:
1.162 daniel 2870: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2871: (c == '.') || (c == '-') ||
2872: (c == '_') ||
2873: (IS_COMBINING(c)) ||
2874: (IS_EXTENDER(c))) {
2875: COPY_BUF(l,buf,len,c);
2876: cur += l;
2877: c = CUR_SCHAR(cur, l);
2878: }
1.72 daniel 2879:
1.162 daniel 2880: ret = xmlStrndup(buf, len);
1.72 daniel 2881: }
2882:
2883: return(ret);
2884: }
2885: /**
1.50 daniel 2886: * xmlNamespaceParseNSDef:
2887: * @ctxt: an XML parser context
2888: *
2889: * parse a namespace prefix declaration
1.28 daniel 2890: *
2891: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2892: *
2893: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 2894: *
2895: * Returns the namespace name
1.28 daniel 2896: */
2897:
1.123 daniel 2898: xmlChar *
1.55 daniel 2899: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 2900: xmlChar *name = NULL;
1.28 daniel 2901:
1.152 daniel 2902: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 2903: (NXT(2) == 'l') && (NXT(3) == 'n') &&
2904: (NXT(4) == 's')) {
2905: SKIP(5);
1.152 daniel 2906: if (RAW == ':') {
1.40 daniel 2907: NEXT;
1.28 daniel 2908: name = xmlNamespaceParseNCName(ctxt);
2909: }
2910: }
1.39 daniel 2911: return(name);
1.28 daniel 2912: }
2913:
1.50 daniel 2914: /**
2915: * xmlParseQuotedString:
2916: * @ctxt: an XML parser context
2917: *
1.45 daniel 2918: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 2919: * To be removed at next drop of binary compatibility
1.68 daniel 2920: *
2921: * Returns the string parser or NULL.
1.45 daniel 2922: */
1.123 daniel 2923: xmlChar *
1.55 daniel 2924: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 2925: xmlChar *buf = NULL;
1.152 daniel 2926: int len = 0,l;
1.140 daniel 2927: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2928: int c;
1.45 daniel 2929:
1.135 daniel 2930: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2931: if (buf == NULL) {
2932: fprintf(stderr, "malloc of %d byte failed\n", size);
2933: return(NULL);
2934: }
1.152 daniel 2935: if (RAW == '"') {
1.45 daniel 2936: NEXT;
1.152 daniel 2937: c = CUR_CHAR(l);
1.135 daniel 2938: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 2939: if (len + 5 >= size) {
1.135 daniel 2940: size *= 2;
2941: buf = xmlRealloc(buf, size * sizeof(xmlChar));
2942: if (buf == NULL) {
2943: fprintf(stderr, "realloc of %d byte failed\n", size);
2944: return(NULL);
2945: }
2946: }
1.152 daniel 2947: COPY_BUF(l,buf,len,c);
2948: NEXTL(l);
2949: c = CUR_CHAR(l);
1.135 daniel 2950: }
2951: if (c != '"') {
1.123 daniel 2952: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2953: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 2954: ctxt->sax->error(ctxt->userData,
2955: "String not closed \"%.50s\"\n", buf);
1.59 daniel 2956: ctxt->wellFormed = 0;
1.55 daniel 2957: } else {
1.45 daniel 2958: NEXT;
2959: }
1.152 daniel 2960: } else if (RAW == '\''){
1.45 daniel 2961: NEXT;
1.135 daniel 2962: c = CUR;
2963: while (IS_CHAR(c) && (c != '\'')) {
2964: if (len + 1 >= size) {
2965: size *= 2;
2966: buf = xmlRealloc(buf, size * sizeof(xmlChar));
2967: if (buf == NULL) {
2968: fprintf(stderr, "realloc of %d byte failed\n", size);
2969: return(NULL);
2970: }
2971: }
2972: buf[len++] = c;
2973: NEXT;
2974: c = CUR;
2975: }
1.152 daniel 2976: if (RAW != '\'') {
1.123 daniel 2977: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2978: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 2979: ctxt->sax->error(ctxt->userData,
2980: "String not closed \"%.50s\"\n", buf);
1.59 daniel 2981: ctxt->wellFormed = 0;
1.55 daniel 2982: } else {
1.45 daniel 2983: NEXT;
2984: }
2985: }
1.135 daniel 2986: return(buf);
1.45 daniel 2987: }
2988:
1.50 daniel 2989: /**
2990: * xmlParseNamespace:
2991: * @ctxt: an XML parser context
2992: *
1.45 daniel 2993: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2994: *
2995: * This is what the older xml-name Working Draft specified, a bunch of
2996: * other stuff may still rely on it, so support is still here as
1.127 daniel 2997: * if it was declared on the root of the Tree:-(
1.110 daniel 2998: *
2999: * To be removed at next drop of binary compatibility
1.45 daniel 3000: */
3001:
1.55 daniel 3002: void
3003: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3004: xmlChar *href = NULL;
3005: xmlChar *prefix = NULL;
1.45 daniel 3006: int garbage = 0;
3007:
3008: /*
3009: * We just skipped "namespace" or "xml:namespace"
3010: */
3011: SKIP_BLANKS;
3012:
1.153 daniel 3013: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3014: /*
3015: * We can have "ns" or "prefix" attributes
3016: * Old encoding as 'href' or 'AS' attributes is still supported
3017: */
1.152 daniel 3018: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3019: garbage = 0;
3020: SKIP(2);
3021: SKIP_BLANKS;
3022:
1.152 daniel 3023: if (RAW != '=') continue;
1.45 daniel 3024: NEXT;
3025: SKIP_BLANKS;
3026:
3027: href = xmlParseQuotedString(ctxt);
3028: SKIP_BLANKS;
1.152 daniel 3029: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3030: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3031: garbage = 0;
3032: SKIP(4);
3033: SKIP_BLANKS;
3034:
1.152 daniel 3035: if (RAW != '=') continue;
1.45 daniel 3036: NEXT;
3037: SKIP_BLANKS;
3038:
3039: href = xmlParseQuotedString(ctxt);
3040: SKIP_BLANKS;
1.152 daniel 3041: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3042: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3043: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3044: garbage = 0;
3045: SKIP(6);
3046: SKIP_BLANKS;
3047:
1.152 daniel 3048: if (RAW != '=') continue;
1.45 daniel 3049: NEXT;
3050: SKIP_BLANKS;
3051:
3052: prefix = xmlParseQuotedString(ctxt);
3053: SKIP_BLANKS;
1.152 daniel 3054: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3055: garbage = 0;
3056: SKIP(2);
3057: SKIP_BLANKS;
3058:
1.152 daniel 3059: if (RAW != '=') continue;
1.45 daniel 3060: NEXT;
3061: SKIP_BLANKS;
3062:
3063: prefix = xmlParseQuotedString(ctxt);
3064: SKIP_BLANKS;
1.152 daniel 3065: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3066: garbage = 0;
1.91 daniel 3067: NEXT;
1.45 daniel 3068: } else {
3069: /*
3070: * Found garbage when parsing the namespace
3071: */
1.122 daniel 3072: if (!garbage) {
1.55 daniel 3073: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3074: ctxt->sax->error(ctxt->userData,
3075: "xmlParseNamespace found garbage\n");
3076: }
1.123 daniel 3077: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3078: ctxt->wellFormed = 0;
1.45 daniel 3079: NEXT;
3080: }
3081: }
3082:
3083: MOVETO_ENDTAG(CUR_PTR);
3084: NEXT;
3085:
3086: /*
3087: * Register the DTD.
1.72 daniel 3088: if (href != NULL)
3089: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3090: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3091: */
3092:
1.119 daniel 3093: if (prefix != NULL) xmlFree(prefix);
3094: if (href != NULL) xmlFree(href);
1.45 daniel 3095: }
3096:
1.28 daniel 3097: /************************************************************************
3098: * *
3099: * The parser itself *
3100: * Relates to http://www.w3.org/TR/REC-xml *
3101: * *
3102: ************************************************************************/
1.14 veillard 3103:
1.50 daniel 3104: /**
1.97 daniel 3105: * xmlScanName:
3106: * @ctxt: an XML parser context
3107: *
3108: * Trickery: parse an XML name but without consuming the input flow
3109: * Needed for rollback cases.
3110: *
3111: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3112: * CombiningChar | Extender
3113: *
3114: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3115: *
3116: * [6] Names ::= Name (S Name)*
3117: *
3118: * Returns the Name parsed or NULL
3119: */
3120:
1.123 daniel 3121: xmlChar *
1.97 daniel 3122: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3123: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3124: int len = 0;
3125:
3126: GROW;
1.152 daniel 3127: if (!IS_LETTER(RAW) && (RAW != '_') &&
3128: (RAW != ':')) {
1.97 daniel 3129: return(NULL);
3130: }
3131:
3132: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3133: (NXT(len) == '.') || (NXT(len) == '-') ||
3134: (NXT(len) == '_') || (NXT(len) == ':') ||
3135: (IS_COMBINING(NXT(len))) ||
3136: (IS_EXTENDER(NXT(len)))) {
3137: buf[len] = NXT(len);
3138: len++;
3139: if (len >= XML_MAX_NAMELEN) {
3140: fprintf(stderr,
3141: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3142: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3143: (NXT(len) == '.') || (NXT(len) == '-') ||
3144: (NXT(len) == '_') || (NXT(len) == ':') ||
3145: (IS_COMBINING(NXT(len))) ||
3146: (IS_EXTENDER(NXT(len))))
3147: len++;
3148: break;
3149: }
3150: }
3151: return(xmlStrndup(buf, len));
3152: }
3153:
3154: /**
1.50 daniel 3155: * xmlParseName:
3156: * @ctxt: an XML parser context
3157: *
3158: * parse an XML name.
1.22 daniel 3159: *
3160: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3161: * CombiningChar | Extender
3162: *
3163: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3164: *
3165: * [6] Names ::= Name (S Name)*
1.68 daniel 3166: *
3167: * Returns the Name parsed or NULL
1.1 veillard 3168: */
3169:
1.123 daniel 3170: xmlChar *
1.55 daniel 3171: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3172: xmlChar buf[XML_MAX_NAMELEN + 5];
3173: int len = 0, l;
3174: int c;
1.1 veillard 3175:
1.91 daniel 3176: GROW;
1.160 daniel 3177: c = CUR_CHAR(l);
3178: if (!IS_LETTER(c) && (c != '_') &&
3179: (c != ':')) {
1.91 daniel 3180: return(NULL);
3181: }
1.40 daniel 3182:
1.160 daniel 3183: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3184: (c == '.') || (c == '-') ||
3185: (c == '_') || (c == ':') ||
3186: (IS_COMBINING(c)) ||
3187: (IS_EXTENDER(c))) {
3188: COPY_BUF(l,buf,len,c);
3189: NEXTL(l);
3190: c = CUR_CHAR(l);
1.91 daniel 3191: if (len >= XML_MAX_NAMELEN) {
3192: fprintf(stderr,
3193: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3194: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3195: (c == '.') || (c == '-') ||
3196: (c == '_') || (c == ':') ||
3197: (IS_COMBINING(c)) ||
3198: (IS_EXTENDER(c))) {
3199: NEXTL(l);
3200: c = CUR_CHAR(l);
1.97 daniel 3201: }
1.91 daniel 3202: break;
3203: }
3204: }
3205: return(xmlStrndup(buf, len));
1.22 daniel 3206: }
3207:
1.50 daniel 3208: /**
1.135 daniel 3209: * xmlParseStringName:
3210: * @ctxt: an XML parser context
3211: * @str: a pointer to an index in the string
3212: *
3213: * parse an XML name.
3214: *
3215: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3216: * CombiningChar | Extender
3217: *
3218: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3219: *
3220: * [6] Names ::= Name (S Name)*
3221: *
3222: * Returns the Name parsed or NULL. The str pointer
3223: * is updated to the current location in the string.
3224: */
3225:
3226: xmlChar *
3227: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3228: const xmlChar *ptr;
3229: const xmlChar *start;
3230: xmlChar cur;
3231:
3232: if ((str == NULL) || (*str == NULL)) return(NULL);
3233:
3234: start = ptr = *str;
3235: cur = *ptr;
3236: if (!IS_LETTER(cur) && (cur != '_') &&
3237: (cur != ':')) {
3238: return(NULL);
3239: }
3240:
3241: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3242: (cur == '.') || (cur == '-') ||
3243: (cur == '_') || (cur == ':') ||
3244: (IS_COMBINING(cur)) ||
3245: (IS_EXTENDER(cur))) {
3246: ptr++;
3247: cur = *ptr;
3248: }
3249: *str = ptr;
3250: return(xmlStrndup(start, ptr - start ));
3251: }
3252:
3253: /**
1.50 daniel 3254: * xmlParseNmtoken:
3255: * @ctxt: an XML parser context
3256: *
3257: * parse an XML Nmtoken.
1.22 daniel 3258: *
3259: * [7] Nmtoken ::= (NameChar)+
3260: *
3261: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3262: *
3263: * Returns the Nmtoken parsed or NULL
1.22 daniel 3264: */
3265:
1.123 daniel 3266: xmlChar *
1.55 daniel 3267: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3268: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3269: int len = 0;
1.160 daniel 3270: int c,l;
1.22 daniel 3271:
1.91 daniel 3272: GROW;
1.160 daniel 3273: c = CUR_CHAR(l);
3274: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3275: (c == '.') || (c == '-') ||
3276: (c == '_') || (c == ':') ||
3277: (IS_COMBINING(c)) ||
3278: (IS_EXTENDER(c))) {
3279: COPY_BUF(l,buf,len,c);
3280: NEXTL(l);
3281: c = CUR_CHAR(l);
1.91 daniel 3282: if (len >= XML_MAX_NAMELEN) {
3283: fprintf(stderr,
3284: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3285: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3286: (c == '.') || (c == '-') ||
3287: (c == '_') || (c == ':') ||
3288: (IS_COMBINING(c)) ||
3289: (IS_EXTENDER(c))) {
3290: NEXTL(l);
3291: c = CUR_CHAR(l);
3292: }
1.91 daniel 3293: break;
3294: }
3295: }
1.168 ! daniel 3296: if (len == 0)
! 3297: return(NULL);
1.91 daniel 3298: return(xmlStrndup(buf, len));
1.1 veillard 3299: }
3300:
1.50 daniel 3301: /**
3302: * xmlParseEntityValue:
3303: * @ctxt: an XML parser context
1.78 daniel 3304: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3305: *
3306: * parse a value for ENTITY decl.
1.24 daniel 3307: *
3308: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3309: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3310: *
1.78 daniel 3311: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3312: */
3313:
1.123 daniel 3314: xmlChar *
3315: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3316: xmlChar *buf = NULL;
3317: int len = 0;
1.140 daniel 3318: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3319: int c, l;
1.135 daniel 3320: xmlChar stop;
1.123 daniel 3321: xmlChar *ret = NULL;
1.98 daniel 3322: xmlParserInputPtr input;
1.24 daniel 3323:
1.152 daniel 3324: if (RAW == '"') stop = '"';
3325: else if (RAW == '\'') stop = '\'';
1.135 daniel 3326: else {
3327: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3329: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3330: ctxt->wellFormed = 0;
3331: return(NULL);
3332: }
3333: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3334: if (buf == NULL) {
3335: fprintf(stderr, "malloc of %d byte failed\n", size);
3336: return(NULL);
3337: }
1.94 daniel 3338:
1.135 daniel 3339: /*
3340: * The content of the entity definition is copied in a buffer.
3341: */
1.94 daniel 3342:
1.135 daniel 3343: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3344: input = ctxt->input;
3345: GROW;
3346: NEXT;
1.152 daniel 3347: c = CUR_CHAR(l);
1.135 daniel 3348: /*
3349: * NOTE: 4.4.5 Included in Literal
3350: * When a parameter entity reference appears in a literal entity
3351: * value, ... a single or double quote character in the replacement
3352: * text is always treated as a normal data character and will not
3353: * terminate the literal.
3354: * In practice it means we stop the loop only when back at parsing
3355: * the initial entity and the quote is found
3356: */
3357: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3358: if (len + 5 >= size) {
1.135 daniel 3359: size *= 2;
3360: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3361: if (buf == NULL) {
3362: fprintf(stderr, "realloc of %d byte failed\n", size);
3363: return(NULL);
1.94 daniel 3364: }
1.79 daniel 3365: }
1.152 daniel 3366: COPY_BUF(l,buf,len,c);
3367: NEXTL(l);
1.98 daniel 3368: /*
1.135 daniel 3369: * Pop-up of finished entities.
1.98 daniel 3370: */
1.152 daniel 3371: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3372: xmlPopInput(ctxt);
1.152 daniel 3373:
3374: c = CUR_CHAR(l);
1.135 daniel 3375: if (c == 0) {
1.94 daniel 3376: GROW;
1.152 daniel 3377: c = CUR_CHAR(l);
1.79 daniel 3378: }
1.135 daniel 3379: }
3380: buf[len] = 0;
3381:
3382: /*
3383: * Then PEReference entities are substituted.
3384: */
3385: if (c != stop) {
3386: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3387: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3388: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3389: ctxt->wellFormed = 0;
1.135 daniel 3390: } else {
3391: NEXT;
3392: /*
3393: * NOTE: 4.4.7 Bypassed
3394: * When a general entity reference appears in the EntityValue in
3395: * an entity declaration, it is bypassed and left as is.
3396: * so XML_SUBSTITUTE_REF is not set.
3397: */
3398: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3399: 0, 0, 0);
3400: if (orig != NULL)
3401: *orig = buf;
3402: else
3403: xmlFree(buf);
1.24 daniel 3404: }
3405:
3406: return(ret);
3407: }
3408:
1.50 daniel 3409: /**
3410: * xmlParseAttValue:
3411: * @ctxt: an XML parser context
3412: *
3413: * parse a value for an attribute
1.78 daniel 3414: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3415: * will be handled later in xmlStringGetNodeList
1.29 daniel 3416: *
3417: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3418: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3419: *
1.129 daniel 3420: * 3.3.3 Attribute-Value Normalization:
3421: * Before the value of an attribute is passed to the application or
3422: * checked for validity, the XML processor must normalize it as follows:
3423: * - a character reference is processed by appending the referenced
3424: * character to the attribute value
3425: * - an entity reference is processed by recursively processing the
3426: * replacement text of the entity
3427: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3428: * appending #x20 to the normalized value, except that only a single
3429: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3430: * parsed entity or the literal entity value of an internal parsed entity
3431: * - other characters are processed by appending them to the normalized value
1.130 daniel 3432: * If the declared value is not CDATA, then the XML processor must further
3433: * process the normalized attribute value by discarding any leading and
3434: * trailing space (#x20) characters, and by replacing sequences of space
3435: * (#x20) characters by a single space (#x20) character.
3436: * All attributes for which no declaration has been read should be treated
3437: * by a non-validating parser as if declared CDATA.
1.129 daniel 3438: *
3439: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3440: */
3441:
1.123 daniel 3442: xmlChar *
1.55 daniel 3443: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3444: xmlChar limit = 0;
3445: xmlChar *buffer = NULL;
3446: int buffer_size = 0;
3447: xmlChar *out = NULL;
3448:
3449: xmlChar *current = NULL;
3450: xmlEntityPtr ent;
3451: xmlChar cur;
3452:
1.29 daniel 3453:
1.91 daniel 3454: SHRINK;
1.151 daniel 3455: if (NXT(0) == '"') {
1.96 daniel 3456: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3457: limit = '"';
1.40 daniel 3458: NEXT;
1.151 daniel 3459: } else if (NXT(0) == '\'') {
1.129 daniel 3460: limit = '\'';
1.96 daniel 3461: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3462: NEXT;
1.29 daniel 3463: } else {
1.123 daniel 3464: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3465: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3466: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3467: ctxt->wellFormed = 0;
1.129 daniel 3468: return(NULL);
1.29 daniel 3469: }
3470:
1.129 daniel 3471: /*
3472: * allocate a translation buffer.
3473: */
1.140 daniel 3474: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3475: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3476: if (buffer == NULL) {
3477: perror("xmlParseAttValue: malloc failed");
3478: return(NULL);
3479: }
3480: out = buffer;
3481:
3482: /*
3483: * Ok loop until we reach one of the ending char or a size limit.
3484: */
3485: cur = CUR;
1.156 daniel 3486: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3487: if (cur == 0) break;
3488: if ((cur == '&') && (NXT(1) == '#')) {
3489: int val = xmlParseCharRef(ctxt);
3490: *out++ = val;
3491: } else if (cur == '&') {
3492: ent = xmlParseEntityRef(ctxt);
3493: if ((ent != NULL) &&
3494: (ctxt->replaceEntities != 0)) {
3495: current = ent->content;
3496: while (*current != 0) {
3497: *out++ = *current++;
3498: if (out - buffer > buffer_size - 10) {
3499: int index = out - buffer;
3500:
3501: growBuffer(buffer);
3502: out = &buffer[index];
3503: }
3504: }
3505: } else if (ent != NULL) {
3506: int i = xmlStrlen(ent->name);
3507: const xmlChar *cur = ent->name;
3508:
3509: *out++ = '&';
3510: if (out - buffer > buffer_size - i - 10) {
3511: int index = out - buffer;
3512:
3513: growBuffer(buffer);
3514: out = &buffer[index];
3515: }
3516: for (;i > 0;i--)
3517: *out++ = *cur++;
3518: *out++ = ';';
3519: }
3520: } else {
1.156 daniel 3521: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3522: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3523: *out++ = 0x20;
3524: if (out - buffer > buffer_size - 10) {
3525: int index = out - buffer;
3526:
3527: growBuffer(buffer);
3528: out = &buffer[index];
1.129 daniel 3529: }
3530: } else {
3531: *out++ = cur;
3532: if (out - buffer > buffer_size - 10) {
3533: int index = out - buffer;
3534:
3535: growBuffer(buffer);
3536: out = &buffer[index];
3537: }
3538: }
3539: NEXT;
3540: }
3541: cur = CUR;
3542: }
3543: *out++ = 0;
1.152 daniel 3544: if (RAW == '<') {
1.129 daniel 3545: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3546: ctxt->sax->error(ctxt->userData,
3547: "Unescaped '<' not allowed in attributes values\n");
3548: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3549: ctxt->wellFormed = 0;
1.152 daniel 3550: } else if (RAW != limit) {
1.129 daniel 3551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3552: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3553: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3554: ctxt->wellFormed = 0;
3555: } else
3556: NEXT;
3557: return(buffer);
1.29 daniel 3558: }
3559:
1.50 daniel 3560: /**
3561: * xmlParseSystemLiteral:
3562: * @ctxt: an XML parser context
3563: *
3564: * parse an XML Literal
1.21 daniel 3565: *
1.22 daniel 3566: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3567: *
3568: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3569: */
3570:
1.123 daniel 3571: xmlChar *
1.55 daniel 3572: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3573: xmlChar *buf = NULL;
3574: int len = 0;
1.140 daniel 3575: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3576: int cur, l;
1.135 daniel 3577: xmlChar stop;
1.168 ! daniel 3578: int state = ctxt->instate;
1.21 daniel 3579:
1.91 daniel 3580: SHRINK;
1.152 daniel 3581: if (RAW == '"') {
1.40 daniel 3582: NEXT;
1.135 daniel 3583: stop = '"';
1.152 daniel 3584: } else if (RAW == '\'') {
1.40 daniel 3585: NEXT;
1.135 daniel 3586: stop = '\'';
1.21 daniel 3587: } else {
1.55 daniel 3588: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3589: ctxt->sax->error(ctxt->userData,
3590: "SystemLiteral \" or ' expected\n");
1.123 daniel 3591: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3592: ctxt->wellFormed = 0;
1.135 daniel 3593: return(NULL);
1.21 daniel 3594: }
3595:
1.135 daniel 3596: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3597: if (buf == NULL) {
3598: fprintf(stderr, "malloc of %d byte failed\n", size);
3599: return(NULL);
3600: }
1.168 ! daniel 3601: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3602: cur = CUR_CHAR(l);
1.135 daniel 3603: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3604: if (len + 5 >= size) {
1.135 daniel 3605: size *= 2;
3606: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3607: if (buf == NULL) {
3608: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 ! daniel 3609: ctxt->instate = state;
1.135 daniel 3610: return(NULL);
3611: }
3612: }
1.152 daniel 3613: COPY_BUF(l,buf,len,cur);
3614: NEXTL(l);
3615: cur = CUR_CHAR(l);
1.135 daniel 3616: if (cur == 0) {
3617: GROW;
3618: SHRINK;
1.152 daniel 3619: cur = CUR_CHAR(l);
1.135 daniel 3620: }
3621: }
3622: buf[len] = 0;
1.168 ! daniel 3623: ctxt->instate = state;
1.135 daniel 3624: if (!IS_CHAR(cur)) {
3625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3626: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3627: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3628: ctxt->wellFormed = 0;
3629: } else {
3630: NEXT;
3631: }
3632: return(buf);
1.21 daniel 3633: }
3634:
1.50 daniel 3635: /**
3636: * xmlParsePubidLiteral:
3637: * @ctxt: an XML parser context
1.21 daniel 3638: *
1.50 daniel 3639: * parse an XML public literal
1.68 daniel 3640: *
3641: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3642: *
3643: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3644: */
3645:
1.123 daniel 3646: xmlChar *
1.55 daniel 3647: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3648: xmlChar *buf = NULL;
3649: int len = 0;
1.140 daniel 3650: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3651: xmlChar cur;
3652: xmlChar stop;
1.125 daniel 3653:
1.91 daniel 3654: SHRINK;
1.152 daniel 3655: if (RAW == '"') {
1.40 daniel 3656: NEXT;
1.135 daniel 3657: stop = '"';
1.152 daniel 3658: } else if (RAW == '\'') {
1.40 daniel 3659: NEXT;
1.135 daniel 3660: stop = '\'';
1.21 daniel 3661: } else {
1.55 daniel 3662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3663: ctxt->sax->error(ctxt->userData,
3664: "SystemLiteral \" or ' expected\n");
1.123 daniel 3665: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3666: ctxt->wellFormed = 0;
1.135 daniel 3667: return(NULL);
3668: }
3669: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3670: if (buf == NULL) {
3671: fprintf(stderr, "malloc of %d byte failed\n", size);
3672: return(NULL);
3673: }
3674: cur = CUR;
3675: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3676: if (len + 1 >= size) {
3677: size *= 2;
3678: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3679: if (buf == NULL) {
3680: fprintf(stderr, "realloc of %d byte failed\n", size);
3681: return(NULL);
3682: }
3683: }
3684: buf[len++] = cur;
3685: NEXT;
3686: cur = CUR;
3687: if (cur == 0) {
3688: GROW;
3689: SHRINK;
3690: cur = CUR;
3691: }
3692: }
3693: buf[len] = 0;
3694: if (cur != stop) {
3695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3696: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3697: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3698: ctxt->wellFormed = 0;
3699: } else {
3700: NEXT;
1.21 daniel 3701: }
1.135 daniel 3702: return(buf);
1.21 daniel 3703: }
3704:
1.50 daniel 3705: /**
3706: * xmlParseCharData:
3707: * @ctxt: an XML parser context
3708: * @cdata: int indicating whether we are within a CDATA section
3709: *
3710: * parse a CharData section.
3711: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 3712: *
1.151 daniel 3713: * The right angle bracket (>) may be represented using the string ">",
3714: * and must, for compatibility, be escaped using ">" or a character
3715: * reference when it appears in the string "]]>" in content, when that
3716: * string is not marking the end of a CDATA section.
3717: *
1.27 daniel 3718: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3719: */
3720:
1.55 daniel 3721: void
3722: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 3723: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 3724: int nbchar = 0;
1.152 daniel 3725: int cur, l;
1.27 daniel 3726:
1.91 daniel 3727: SHRINK;
1.152 daniel 3728: cur = CUR_CHAR(l);
1.160 daniel 3729: while ((IS_CHAR(cur)) && ((cur != '<') || (ctxt->token == '<')) &&
1.153 daniel 3730: ((cur != '&') || (ctxt->token == '&'))) {
1.97 daniel 3731: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 3732: (NXT(2) == '>')) {
3733: if (cdata) break;
3734: else {
3735: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 3736: ctxt->sax->error(ctxt->userData,
1.59 daniel 3737: "Sequence ']]>' not allowed in content\n");
1.123 daniel 3738: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 3739: /* Should this be relaxed ??? I see a "must here */
3740: ctxt->wellFormed = 0;
1.59 daniel 3741: }
3742: }
1.152 daniel 3743: COPY_BUF(l,buf,nbchar,cur);
3744: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 3745: /*
3746: * Ok the segment is to be consumed as chars.
3747: */
3748: if (ctxt->sax != NULL) {
3749: if (areBlanks(ctxt, buf, nbchar)) {
3750: if (ctxt->sax->ignorableWhitespace != NULL)
3751: ctxt->sax->ignorableWhitespace(ctxt->userData,
3752: buf, nbchar);
3753: } else {
3754: if (ctxt->sax->characters != NULL)
3755: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3756: }
3757: }
3758: nbchar = 0;
3759: }
1.152 daniel 3760: NEXTL(l);
3761: cur = CUR_CHAR(l);
1.27 daniel 3762: }
1.91 daniel 3763: if (nbchar != 0) {
3764: /*
3765: * Ok the segment is to be consumed as chars.
3766: */
3767: if (ctxt->sax != NULL) {
3768: if (areBlanks(ctxt, buf, nbchar)) {
3769: if (ctxt->sax->ignorableWhitespace != NULL)
3770: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3771: } else {
3772: if (ctxt->sax->characters != NULL)
3773: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3774: }
3775: }
1.45 daniel 3776: }
1.27 daniel 3777: }
3778:
1.50 daniel 3779: /**
3780: * xmlParseExternalID:
3781: * @ctxt: an XML parser context
1.123 daniel 3782: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 3783: * @strict: indicate whether we should restrict parsing to only
3784: * production [75], see NOTE below
1.50 daniel 3785: *
1.67 daniel 3786: * Parse an External ID or a Public ID
3787: *
3788: * NOTE: Productions [75] and [83] interract badly since [75] can generate
3789: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 3790: *
3791: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3792: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 3793: *
3794: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3795: *
1.68 daniel 3796: * Returns the function returns SystemLiteral and in the second
1.67 daniel 3797: * case publicID receives PubidLiteral, is strict is off
3798: * it is possible to return NULL and have publicID set.
1.22 daniel 3799: */
3800:
1.123 daniel 3801: xmlChar *
3802: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3803: xmlChar *URI = NULL;
1.22 daniel 3804:
1.91 daniel 3805: SHRINK;
1.152 daniel 3806: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 3807: (NXT(2) == 'S') && (NXT(3) == 'T') &&
3808: (NXT(4) == 'E') && (NXT(5) == 'M')) {
3809: SKIP(6);
1.59 daniel 3810: if (!IS_BLANK(CUR)) {
3811: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3812: ctxt->sax->error(ctxt->userData,
1.59 daniel 3813: "Space required after 'SYSTEM'\n");
1.123 daniel 3814: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3815: ctxt->wellFormed = 0;
3816: }
1.42 daniel 3817: SKIP_BLANKS;
1.39 daniel 3818: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3819: if (URI == NULL) {
1.55 daniel 3820: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3821: ctxt->sax->error(ctxt->userData,
1.39 daniel 3822: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 3823: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3824: ctxt->wellFormed = 0;
3825: }
1.152 daniel 3826: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 3827: (NXT(2) == 'B') && (NXT(3) == 'L') &&
3828: (NXT(4) == 'I') && (NXT(5) == 'C')) {
3829: SKIP(6);
1.59 daniel 3830: if (!IS_BLANK(CUR)) {
3831: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3832: ctxt->sax->error(ctxt->userData,
1.59 daniel 3833: "Space required after 'PUBLIC'\n");
1.123 daniel 3834: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3835: ctxt->wellFormed = 0;
3836: }
1.42 daniel 3837: SKIP_BLANKS;
1.39 daniel 3838: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 3839: if (*publicID == NULL) {
1.55 daniel 3840: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3841: ctxt->sax->error(ctxt->userData,
1.39 daniel 3842: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 3843: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 3844: ctxt->wellFormed = 0;
3845: }
1.67 daniel 3846: if (strict) {
3847: /*
3848: * We don't handle [83] so "S SystemLiteral" is required.
3849: */
3850: if (!IS_BLANK(CUR)) {
3851: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3852: ctxt->sax->error(ctxt->userData,
1.67 daniel 3853: "Space required after the Public Identifier\n");
1.123 daniel 3854: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 3855: ctxt->wellFormed = 0;
3856: }
3857: } else {
3858: /*
3859: * We handle [83] so we return immediately, if
3860: * "S SystemLiteral" is not detected. From a purely parsing
3861: * point of view that's a nice mess.
3862: */
1.135 daniel 3863: const xmlChar *ptr;
3864: GROW;
3865:
3866: ptr = CUR_PTR;
1.67 daniel 3867: if (!IS_BLANK(*ptr)) return(NULL);
3868:
3869: while (IS_BLANK(*ptr)) ptr++;
3870: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 3871: }
1.42 daniel 3872: SKIP_BLANKS;
1.39 daniel 3873: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3874: if (URI == NULL) {
1.55 daniel 3875: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3876: ctxt->sax->error(ctxt->userData,
1.39 daniel 3877: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 3878: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3879: ctxt->wellFormed = 0;
3880: }
1.22 daniel 3881: }
1.39 daniel 3882: return(URI);
1.22 daniel 3883: }
3884:
1.50 daniel 3885: /**
3886: * xmlParseComment:
1.69 daniel 3887: * @ctxt: an XML parser context
1.50 daniel 3888: *
1.3 veillard 3889: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 3890: * The spec says that "For compatibility, the string "--" (double-hyphen)
3891: * must not occur within comments. "
1.22 daniel 3892: *
3893: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 3894: */
1.72 daniel 3895: void
1.114 daniel 3896: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 3897: xmlChar *buf = NULL;
3898: int len = 0;
1.140 daniel 3899: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3900: int q, ql;
3901: int r, rl;
3902: int cur, l;
1.140 daniel 3903: xmlParserInputState state;
1.3 veillard 3904:
3905: /*
1.22 daniel 3906: * Check that there is a comment right here.
1.3 veillard 3907: */
1.152 daniel 3908: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 3909: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 3910:
1.140 daniel 3911: state = ctxt->instate;
1.97 daniel 3912: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 3913: SHRINK;
1.40 daniel 3914: SKIP(4);
1.135 daniel 3915: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3916: if (buf == NULL) {
3917: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 3918: ctxt->instate = state;
1.135 daniel 3919: return;
3920: }
1.152 daniel 3921: q = CUR_CHAR(ql);
3922: NEXTL(ql);
3923: r = CUR_CHAR(rl);
3924: NEXTL(rl);
3925: cur = CUR_CHAR(l);
1.135 daniel 3926: while (IS_CHAR(cur) &&
3927: ((cur != '>') ||
3928: (r != '-') || (q != '-'))) {
3929: if ((r == '-') && (q == '-')) {
1.55 daniel 3930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3931: ctxt->sax->error(ctxt->userData,
1.38 daniel 3932: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 3933: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 3934: ctxt->wellFormed = 0;
3935: }
1.152 daniel 3936: if (len + 5 >= size) {
1.135 daniel 3937: size *= 2;
3938: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3939: if (buf == NULL) {
3940: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 3941: ctxt->instate = state;
1.135 daniel 3942: return;
3943: }
3944: }
1.152 daniel 3945: COPY_BUF(ql,buf,len,q);
1.135 daniel 3946: q = r;
1.152 daniel 3947: ql = rl;
1.135 daniel 3948: r = cur;
1.152 daniel 3949: rl = l;
3950: NEXTL(l);
3951: cur = CUR_CHAR(l);
1.135 daniel 3952: if (cur == 0) {
3953: SHRINK;
3954: GROW;
1.152 daniel 3955: cur = CUR_CHAR(l);
1.135 daniel 3956: }
1.3 veillard 3957: }
1.135 daniel 3958: buf[len] = 0;
3959: if (!IS_CHAR(cur)) {
1.55 daniel 3960: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3961: ctxt->sax->error(ctxt->userData,
1.135 daniel 3962: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 3963: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 3964: ctxt->wellFormed = 0;
1.3 veillard 3965: } else {
1.40 daniel 3966: NEXT;
1.114 daniel 3967: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
1.135 daniel 3968: ctxt->sax->comment(ctxt->userData, buf);
3969: xmlFree(buf);
1.3 veillard 3970: }
1.140 daniel 3971: ctxt->instate = state;
1.3 veillard 3972: }
3973:
1.50 daniel 3974: /**
3975: * xmlParsePITarget:
3976: * @ctxt: an XML parser context
3977: *
3978: * parse the name of a PI
1.22 daniel 3979: *
3980: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 3981: *
3982: * Returns the PITarget name or NULL
1.22 daniel 3983: */
3984:
1.123 daniel 3985: xmlChar *
1.55 daniel 3986: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 3987: xmlChar *name;
1.22 daniel 3988:
3989: name = xmlParseName(ctxt);
1.139 daniel 3990: if ((name != NULL) &&
1.22 daniel 3991: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 3992: ((name[1] == 'm') || (name[1] == 'M')) &&
3993: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 3994: int i;
1.151 daniel 3995: if ((name[0] = 'x') && (name[1] == 'm') &&
3996: (name[2] = 'l') && (name[3] == 0)) {
3997: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998: ctxt->sax->error(ctxt->userData,
3999: "XML declaration allowed only at the start of the document\n");
4000: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4001: ctxt->wellFormed = 0;
4002: return(name);
4003: } else if (name[3] == 0) {
4004: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4005: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4006: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4007: ctxt->wellFormed = 0;
4008: return(name);
4009: }
1.139 daniel 4010: for (i = 0;;i++) {
4011: if (xmlW3CPIs[i] == NULL) break;
4012: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4013: return(name);
4014: }
4015: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4016: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4017: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4018: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4019: }
1.22 daniel 4020: }
4021: return(name);
4022: }
4023:
1.50 daniel 4024: /**
4025: * xmlParsePI:
4026: * @ctxt: an XML parser context
4027: *
4028: * parse an XML Processing Instruction.
1.22 daniel 4029: *
4030: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4031: *
1.69 daniel 4032: * The processing is transfered to SAX once parsed.
1.3 veillard 4033: */
4034:
1.55 daniel 4035: void
4036: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4037: xmlChar *buf = NULL;
4038: int len = 0;
1.140 daniel 4039: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4040: int cur, l;
1.123 daniel 4041: xmlChar *target;
1.140 daniel 4042: xmlParserInputState state;
1.22 daniel 4043:
1.152 daniel 4044: if ((RAW == '<') && (NXT(1) == '?')) {
1.140 daniel 4045: state = ctxt->instate;
4046: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4047: /*
4048: * this is a Processing Instruction.
4049: */
1.40 daniel 4050: SKIP(2);
1.91 daniel 4051: SHRINK;
1.3 veillard 4052:
4053: /*
1.22 daniel 4054: * Parse the target name and check for special support like
4055: * namespace.
1.3 veillard 4056: */
1.22 daniel 4057: target = xmlParsePITarget(ctxt);
4058: if (target != NULL) {
1.156 daniel 4059: if ((RAW == '?') && (NXT(1) == '>')) {
4060: SKIP(2);
4061:
4062: /*
4063: * SAX: PI detected.
4064: */
4065: if ((ctxt->sax) &&
4066: (ctxt->sax->processingInstruction != NULL))
4067: ctxt->sax->processingInstruction(ctxt->userData,
4068: target, NULL);
4069: ctxt->instate = state;
4070: return;
4071: }
1.135 daniel 4072: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4073: if (buf == NULL) {
4074: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4075: ctxt->instate = state;
1.135 daniel 4076: return;
4077: }
4078: cur = CUR;
4079: if (!IS_BLANK(cur)) {
1.114 daniel 4080: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4081: ctxt->sax->error(ctxt->userData,
4082: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4083: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4084: ctxt->wellFormed = 0;
4085: }
4086: SKIP_BLANKS;
1.152 daniel 4087: cur = CUR_CHAR(l);
1.135 daniel 4088: while (IS_CHAR(cur) &&
4089: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4090: if (len + 5 >= size) {
1.135 daniel 4091: size *= 2;
4092: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4093: if (buf == NULL) {
4094: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4095: ctxt->instate = state;
1.135 daniel 4096: return;
4097: }
4098: }
1.152 daniel 4099: COPY_BUF(l,buf,len,cur);
4100: NEXTL(l);
4101: cur = CUR_CHAR(l);
1.135 daniel 4102: if (cur == 0) {
4103: SHRINK;
4104: GROW;
1.152 daniel 4105: cur = CUR_CHAR(l);
1.135 daniel 4106: }
4107: }
4108: buf[len] = 0;
1.152 daniel 4109: if (cur != '?') {
1.72 daniel 4110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4111: ctxt->sax->error(ctxt->userData,
1.72 daniel 4112: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4113: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4114: ctxt->wellFormed = 0;
1.22 daniel 4115: } else {
1.72 daniel 4116: SKIP(2);
1.44 daniel 4117:
1.72 daniel 4118: /*
4119: * SAX: PI detected.
4120: */
4121: if ((ctxt->sax) &&
4122: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4123: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4124: target, buf);
1.22 daniel 4125: }
1.135 daniel 4126: xmlFree(buf);
1.119 daniel 4127: xmlFree(target);
1.3 veillard 4128: } else {
1.55 daniel 4129: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4130: ctxt->sax->error(ctxt->userData,
4131: "xmlParsePI : no target name\n");
1.123 daniel 4132: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4133: ctxt->wellFormed = 0;
1.22 daniel 4134: }
1.140 daniel 4135: ctxt->instate = state;
1.22 daniel 4136: }
4137: }
4138:
1.50 daniel 4139: /**
4140: * xmlParseNotationDecl:
4141: * @ctxt: an XML parser context
4142: *
4143: * parse a notation declaration
1.22 daniel 4144: *
4145: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4146: *
4147: * Hence there is actually 3 choices:
4148: * 'PUBLIC' S PubidLiteral
4149: * 'PUBLIC' S PubidLiteral S SystemLiteral
4150: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4151: *
1.67 daniel 4152: * See the NOTE on xmlParseExternalID().
1.22 daniel 4153: */
4154:
1.55 daniel 4155: void
4156: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4157: xmlChar *name;
4158: xmlChar *Pubid;
4159: xmlChar *Systemid;
1.22 daniel 4160:
1.152 daniel 4161: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4162: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4163: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4164: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4165: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 4166: SHRINK;
1.40 daniel 4167: SKIP(10);
1.67 daniel 4168: if (!IS_BLANK(CUR)) {
4169: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4170: ctxt->sax->error(ctxt->userData,
4171: "Space required after '<!NOTATION'\n");
1.123 daniel 4172: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4173: ctxt->wellFormed = 0;
4174: return;
4175: }
4176: SKIP_BLANKS;
1.22 daniel 4177:
4178: name = xmlParseName(ctxt);
4179: if (name == NULL) {
1.55 daniel 4180: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4181: ctxt->sax->error(ctxt->userData,
4182: "NOTATION: Name expected here\n");
1.123 daniel 4183: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4184: ctxt->wellFormed = 0;
4185: return;
4186: }
4187: if (!IS_BLANK(CUR)) {
4188: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4189: ctxt->sax->error(ctxt->userData,
1.67 daniel 4190: "Space required after the NOTATION name'\n");
1.123 daniel 4191: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4192: ctxt->wellFormed = 0;
1.22 daniel 4193: return;
4194: }
1.42 daniel 4195: SKIP_BLANKS;
1.67 daniel 4196:
1.22 daniel 4197: /*
1.67 daniel 4198: * Parse the IDs.
1.22 daniel 4199: */
1.160 daniel 4200: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4201: SKIP_BLANKS;
4202:
1.152 daniel 4203: if (RAW == '>') {
1.40 daniel 4204: NEXT;
1.72 daniel 4205: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 4206: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4207: } else {
4208: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4209: ctxt->sax->error(ctxt->userData,
1.67 daniel 4210: "'>' required to close NOTATION declaration\n");
1.123 daniel 4211: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4212: ctxt->wellFormed = 0;
4213: }
1.119 daniel 4214: xmlFree(name);
4215: if (Systemid != NULL) xmlFree(Systemid);
4216: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4217: }
4218: }
4219:
1.50 daniel 4220: /**
4221: * xmlParseEntityDecl:
4222: * @ctxt: an XML parser context
4223: *
4224: * parse <!ENTITY declarations
1.22 daniel 4225: *
4226: * [70] EntityDecl ::= GEDecl | PEDecl
4227: *
4228: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4229: *
4230: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4231: *
4232: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4233: *
4234: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4235: *
4236: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4237: *
4238: * [ VC: Notation Declared ]
1.116 daniel 4239: * The Name must match the declared name of a notation.
1.22 daniel 4240: */
4241:
1.55 daniel 4242: void
4243: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4244: xmlChar *name = NULL;
4245: xmlChar *value = NULL;
4246: xmlChar *URI = NULL, *literal = NULL;
4247: xmlChar *ndata = NULL;
1.39 daniel 4248: int isParameter = 0;
1.123 daniel 4249: xmlChar *orig = NULL;
1.22 daniel 4250:
1.94 daniel 4251: GROW;
1.152 daniel 4252: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4253: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4254: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4255: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 4256: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4257: SHRINK;
1.40 daniel 4258: SKIP(8);
1.59 daniel 4259: if (!IS_BLANK(CUR)) {
4260: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4261: ctxt->sax->error(ctxt->userData,
4262: "Space required after '<!ENTITY'\n");
1.123 daniel 4263: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4264: ctxt->wellFormed = 0;
4265: }
4266: SKIP_BLANKS;
1.40 daniel 4267:
1.152 daniel 4268: if (RAW == '%') {
1.40 daniel 4269: NEXT;
1.59 daniel 4270: if (!IS_BLANK(CUR)) {
4271: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4272: ctxt->sax->error(ctxt->userData,
4273: "Space required after '%'\n");
1.123 daniel 4274: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4275: ctxt->wellFormed = 0;
4276: }
1.42 daniel 4277: SKIP_BLANKS;
1.39 daniel 4278: isParameter = 1;
1.22 daniel 4279: }
4280:
4281: name = xmlParseName(ctxt);
1.24 daniel 4282: if (name == NULL) {
1.55 daniel 4283: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4284: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4285: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4286: ctxt->wellFormed = 0;
1.24 daniel 4287: return;
4288: }
1.59 daniel 4289: if (!IS_BLANK(CUR)) {
4290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4291: ctxt->sax->error(ctxt->userData,
1.59 daniel 4292: "Space required after the entity name\n");
1.123 daniel 4293: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4294: ctxt->wellFormed = 0;
4295: }
1.42 daniel 4296: SKIP_BLANKS;
1.24 daniel 4297:
1.22 daniel 4298: /*
1.68 daniel 4299: * handle the various case of definitions...
1.22 daniel 4300: */
1.39 daniel 4301: if (isParameter) {
1.152 daniel 4302: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4303: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4304: if (value) {
1.72 daniel 4305: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4306: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4307: XML_INTERNAL_PARAMETER_ENTITY,
4308: NULL, NULL, value);
4309: }
1.24 daniel 4310: else {
1.67 daniel 4311: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 4312: if (URI) {
1.72 daniel 4313: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4314: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4315: XML_EXTERNAL_PARAMETER_ENTITY,
4316: literal, URI, NULL);
4317: }
1.24 daniel 4318: }
4319: } else {
1.152 daniel 4320: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4321: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 4322: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4323: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4324: XML_INTERNAL_GENERAL_ENTITY,
4325: NULL, NULL, value);
4326: } else {
1.67 daniel 4327: URI = xmlParseExternalID(ctxt, &literal, 1);
1.152 daniel 4328: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4329: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4330: ctxt->sax->error(ctxt->userData,
1.59 daniel 4331: "Space required before 'NDATA'\n");
1.123 daniel 4332: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4333: ctxt->wellFormed = 0;
4334: }
1.42 daniel 4335: SKIP_BLANKS;
1.152 daniel 4336: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4337: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4338: (NXT(4) == 'A')) {
4339: SKIP(5);
1.59 daniel 4340: if (!IS_BLANK(CUR)) {
4341: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4342: ctxt->sax->error(ctxt->userData,
1.59 daniel 4343: "Space required after 'NDATA'\n");
1.123 daniel 4344: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4345: ctxt->wellFormed = 0;
4346: }
1.42 daniel 4347: SKIP_BLANKS;
1.24 daniel 4348: ndata = xmlParseName(ctxt);
1.116 daniel 4349: if ((ctxt->sax != NULL) &&
4350: (ctxt->sax->unparsedEntityDecl != NULL))
4351: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4352: literal, URI, ndata);
4353: } else {
1.72 daniel 4354: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4355: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4356: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4357: literal, URI, NULL);
1.24 daniel 4358: }
4359: }
4360: }
1.42 daniel 4361: SKIP_BLANKS;
1.152 daniel 4362: if (RAW != '>') {
1.55 daniel 4363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4364: ctxt->sax->error(ctxt->userData,
1.31 daniel 4365: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4366: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4367: ctxt->wellFormed = 0;
1.24 daniel 4368: } else
1.40 daniel 4369: NEXT;
1.78 daniel 4370: if (orig != NULL) {
4371: /*
1.98 daniel 4372: * Ugly mechanism to save the raw entity value.
1.78 daniel 4373: */
4374: xmlEntityPtr cur = NULL;
4375:
1.98 daniel 4376: if (isParameter) {
4377: if ((ctxt->sax != NULL) &&
4378: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4379: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4380: } else {
4381: if ((ctxt->sax != NULL) &&
4382: (ctxt->sax->getEntity != NULL))
1.120 daniel 4383: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4384: }
4385: if (cur != NULL) {
4386: if (cur->orig != NULL)
1.119 daniel 4387: xmlFree(orig);
1.98 daniel 4388: else
4389: cur->orig = orig;
4390: } else
1.119 daniel 4391: xmlFree(orig);
1.78 daniel 4392: }
1.119 daniel 4393: if (name != NULL) xmlFree(name);
4394: if (value != NULL) xmlFree(value);
4395: if (URI != NULL) xmlFree(URI);
4396: if (literal != NULL) xmlFree(literal);
4397: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4398: }
4399: }
4400:
1.50 daniel 4401: /**
1.59 daniel 4402: * xmlParseDefaultDecl:
4403: * @ctxt: an XML parser context
4404: * @value: Receive a possible fixed default value for the attribute
4405: *
4406: * Parse an attribute default declaration
4407: *
4408: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4409: *
1.99 daniel 4410: * [ VC: Required Attribute ]
1.117 daniel 4411: * if the default declaration is the keyword #REQUIRED, then the
4412: * attribute must be specified for all elements of the type in the
4413: * attribute-list declaration.
1.99 daniel 4414: *
4415: * [ VC: Attribute Default Legal ]
1.102 daniel 4416: * The declared default value must meet the lexical constraints of
4417: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4418: *
4419: * [ VC: Fixed Attribute Default ]
1.117 daniel 4420: * if an attribute has a default value declared with the #FIXED
4421: * keyword, instances of that attribute must match the default value.
1.99 daniel 4422: *
4423: * [ WFC: No < in Attribute Values ]
4424: * handled in xmlParseAttValue()
4425: *
1.59 daniel 4426: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4427: * or XML_ATTRIBUTE_FIXED.
4428: */
4429:
4430: int
1.123 daniel 4431: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4432: int val;
1.123 daniel 4433: xmlChar *ret;
1.59 daniel 4434:
4435: *value = NULL;
1.152 daniel 4436: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4437: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4438: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4439: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4440: (NXT(8) == 'D')) {
4441: SKIP(9);
4442: return(XML_ATTRIBUTE_REQUIRED);
4443: }
1.152 daniel 4444: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4445: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4446: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4447: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4448: SKIP(8);
4449: return(XML_ATTRIBUTE_IMPLIED);
4450: }
4451: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4452: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4453: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4454: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4455: SKIP(6);
4456: val = XML_ATTRIBUTE_FIXED;
4457: if (!IS_BLANK(CUR)) {
4458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4459: ctxt->sax->error(ctxt->userData,
4460: "Space required after '#FIXED'\n");
1.123 daniel 4461: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4462: ctxt->wellFormed = 0;
4463: }
4464: SKIP_BLANKS;
4465: }
4466: ret = xmlParseAttValue(ctxt);
1.96 daniel 4467: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4468: if (ret == NULL) {
4469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4470: ctxt->sax->error(ctxt->userData,
1.59 daniel 4471: "Attribute default value declaration error\n");
4472: ctxt->wellFormed = 0;
4473: } else
4474: *value = ret;
4475: return(val);
4476: }
4477:
4478: /**
1.66 daniel 4479: * xmlParseNotationType:
4480: * @ctxt: an XML parser context
4481: *
4482: * parse an Notation attribute type.
4483: *
1.99 daniel 4484: * Note: the leading 'NOTATION' S part has already being parsed...
4485: *
1.66 daniel 4486: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4487: *
1.99 daniel 4488: * [ VC: Notation Attributes ]
1.117 daniel 4489: * Values of this type must match one of the notation names included
1.99 daniel 4490: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4491: *
4492: * Returns: the notation attribute tree built while parsing
4493: */
4494:
4495: xmlEnumerationPtr
4496: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4497: xmlChar *name;
1.66 daniel 4498: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4499:
1.152 daniel 4500: if (RAW != '(') {
1.66 daniel 4501: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4502: ctxt->sax->error(ctxt->userData,
4503: "'(' required to start 'NOTATION'\n");
1.123 daniel 4504: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4505: ctxt->wellFormed = 0;
4506: return(NULL);
4507: }
1.91 daniel 4508: SHRINK;
1.66 daniel 4509: do {
4510: NEXT;
4511: SKIP_BLANKS;
4512: name = xmlParseName(ctxt);
4513: if (name == NULL) {
4514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4515: ctxt->sax->error(ctxt->userData,
1.66 daniel 4516: "Name expected in NOTATION declaration\n");
1.123 daniel 4517: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4518: ctxt->wellFormed = 0;
4519: return(ret);
4520: }
4521: cur = xmlCreateEnumeration(name);
1.119 daniel 4522: xmlFree(name);
1.66 daniel 4523: if (cur == NULL) return(ret);
4524: if (last == NULL) ret = last = cur;
4525: else {
4526: last->next = cur;
4527: last = cur;
4528: }
4529: SKIP_BLANKS;
1.152 daniel 4530: } while (RAW == '|');
4531: if (RAW != ')') {
1.66 daniel 4532: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4533: ctxt->sax->error(ctxt->userData,
1.66 daniel 4534: "')' required to finish NOTATION declaration\n");
1.123 daniel 4535: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 4536: ctxt->wellFormed = 0;
4537: return(ret);
4538: }
4539: NEXT;
4540: return(ret);
4541: }
4542:
4543: /**
4544: * xmlParseEnumerationType:
4545: * @ctxt: an XML parser context
4546: *
4547: * parse an Enumeration attribute type.
4548: *
4549: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4550: *
1.99 daniel 4551: * [ VC: Enumeration ]
1.117 daniel 4552: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 4553: * the declaration
4554: *
1.66 daniel 4555: * Returns: the enumeration attribute tree built while parsing
4556: */
4557:
4558: xmlEnumerationPtr
4559: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4560: xmlChar *name;
1.66 daniel 4561: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4562:
1.152 daniel 4563: if (RAW != '(') {
1.66 daniel 4564: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4565: ctxt->sax->error(ctxt->userData,
1.66 daniel 4566: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 4567: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 4568: ctxt->wellFormed = 0;
4569: return(NULL);
4570: }
1.91 daniel 4571: SHRINK;
1.66 daniel 4572: do {
4573: NEXT;
4574: SKIP_BLANKS;
4575: name = xmlParseNmtoken(ctxt);
4576: if (name == NULL) {
4577: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4578: ctxt->sax->error(ctxt->userData,
1.66 daniel 4579: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 4580: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 4581: ctxt->wellFormed = 0;
4582: return(ret);
4583: }
4584: cur = xmlCreateEnumeration(name);
1.119 daniel 4585: xmlFree(name);
1.66 daniel 4586: if (cur == NULL) return(ret);
4587: if (last == NULL) ret = last = cur;
4588: else {
4589: last->next = cur;
4590: last = cur;
4591: }
4592: SKIP_BLANKS;
1.152 daniel 4593: } while (RAW == '|');
4594: if (RAW != ')') {
1.66 daniel 4595: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4596: ctxt->sax->error(ctxt->userData,
1.66 daniel 4597: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 4598: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 4599: ctxt->wellFormed = 0;
4600: return(ret);
4601: }
4602: NEXT;
4603: return(ret);
4604: }
4605:
4606: /**
1.50 daniel 4607: * xmlParseEnumeratedType:
4608: * @ctxt: an XML parser context
1.66 daniel 4609: * @tree: the enumeration tree built while parsing
1.50 daniel 4610: *
1.66 daniel 4611: * parse an Enumerated attribute type.
1.22 daniel 4612: *
4613: * [57] EnumeratedType ::= NotationType | Enumeration
4614: *
4615: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4616: *
1.50 daniel 4617: *
1.66 daniel 4618: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 4619: */
4620:
1.66 daniel 4621: int
4622: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 4623: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 4624: (NXT(2) == 'T') && (NXT(3) == 'A') &&
4625: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4626: (NXT(6) == 'O') && (NXT(7) == 'N')) {
4627: SKIP(8);
4628: if (!IS_BLANK(CUR)) {
4629: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4630: ctxt->sax->error(ctxt->userData,
4631: "Space required after 'NOTATION'\n");
1.123 daniel 4632: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 4633: ctxt->wellFormed = 0;
4634: return(0);
4635: }
4636: SKIP_BLANKS;
4637: *tree = xmlParseNotationType(ctxt);
4638: if (*tree == NULL) return(0);
4639: return(XML_ATTRIBUTE_NOTATION);
4640: }
4641: *tree = xmlParseEnumerationType(ctxt);
4642: if (*tree == NULL) return(0);
4643: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 4644: }
4645:
1.50 daniel 4646: /**
4647: * xmlParseAttributeType:
4648: * @ctxt: an XML parser context
1.66 daniel 4649: * @tree: the enumeration tree built while parsing
1.50 daniel 4650: *
1.59 daniel 4651: * parse the Attribute list def for an element
1.22 daniel 4652: *
4653: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4654: *
4655: * [55] StringType ::= 'CDATA'
4656: *
4657: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4658: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 4659: *
1.102 daniel 4660: * Validity constraints for attribute values syntax are checked in
4661: * xmlValidateAttributeValue()
4662: *
1.99 daniel 4663: * [ VC: ID ]
1.117 daniel 4664: * Values of type ID must match the Name production. A name must not
1.99 daniel 4665: * appear more than once in an XML document as a value of this type;
4666: * i.e., ID values must uniquely identify the elements which bear them.
4667: *
4668: * [ VC: One ID per Element Type ]
1.117 daniel 4669: * No element type may have more than one ID attribute specified.
1.99 daniel 4670: *
4671: * [ VC: ID Attribute Default ]
1.117 daniel 4672: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 4673: *
4674: * [ VC: IDREF ]
1.102 daniel 4675: * Values of type IDREF must match the Name production, and values
1.140 daniel 4676: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 4677: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 4678: * values must match the value of some ID attribute.
4679: *
4680: * [ VC: Entity Name ]
1.102 daniel 4681: * Values of type ENTITY must match the Name production, values
1.140 daniel 4682: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 4683: * name of an unparsed entity declared in the DTD.
1.99 daniel 4684: *
4685: * [ VC: Name Token ]
1.102 daniel 4686: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 4687: * of type NMTOKENS must match Nmtokens.
4688: *
1.69 daniel 4689: * Returns the attribute type
1.22 daniel 4690: */
1.59 daniel 4691: int
1.66 daniel 4692: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 4693: SHRINK;
1.152 daniel 4694: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 4695: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4696: (NXT(4) == 'A')) {
4697: SKIP(5);
1.66 daniel 4698: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 4699: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 4700: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 4701: (NXT(4) == 'F') && (NXT(5) == 'S')) {
4702: SKIP(6);
4703: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 4704: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 4705: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 4706: (NXT(4) == 'F')) {
4707: SKIP(5);
1.59 daniel 4708: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 4709: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 4710: SKIP(2);
4711: return(XML_ATTRIBUTE_ID);
1.152 daniel 4712: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4713: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4714: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4715: SKIP(6);
1.59 daniel 4716: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 4717: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4718: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4719: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4720: (NXT(6) == 'E') && (NXT(7) == 'S')) {
4721: SKIP(8);
1.59 daniel 4722: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 4723: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 4724: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4725: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 4726: (NXT(6) == 'N') && (NXT(7) == 'S')) {
4727: SKIP(8);
4728: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 4729: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 4730: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4731: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 4732: (NXT(6) == 'N')) {
4733: SKIP(7);
1.59 daniel 4734: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 4735: }
1.66 daniel 4736: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 4737: }
4738:
1.50 daniel 4739: /**
4740: * xmlParseAttributeListDecl:
4741: * @ctxt: an XML parser context
4742: *
4743: * : parse the Attribute list def for an element
1.22 daniel 4744: *
4745: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4746: *
4747: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 4748: *
1.22 daniel 4749: */
1.55 daniel 4750: void
4751: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4752: xmlChar *elemName;
4753: xmlChar *attrName;
1.103 daniel 4754: xmlEnumerationPtr tree;
1.22 daniel 4755:
1.152 daniel 4756: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4757: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4758: (NXT(4) == 'T') && (NXT(5) == 'L') &&
4759: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 4760: (NXT(8) == 'T')) {
1.40 daniel 4761: SKIP(9);
1.59 daniel 4762: if (!IS_BLANK(CUR)) {
4763: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4764: ctxt->sax->error(ctxt->userData,
4765: "Space required after '<!ATTLIST'\n");
1.123 daniel 4766: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4767: ctxt->wellFormed = 0;
4768: }
1.42 daniel 4769: SKIP_BLANKS;
1.59 daniel 4770: elemName = xmlParseName(ctxt);
4771: if (elemName == NULL) {
1.55 daniel 4772: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4773: ctxt->sax->error(ctxt->userData,
4774: "ATTLIST: no name for Element\n");
1.123 daniel 4775: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4776: ctxt->wellFormed = 0;
1.22 daniel 4777: return;
4778: }
1.42 daniel 4779: SKIP_BLANKS;
1.152 daniel 4780: while (RAW != '>') {
1.123 daniel 4781: const xmlChar *check = CUR_PTR;
1.59 daniel 4782: int type;
4783: int def;
1.123 daniel 4784: xmlChar *defaultValue = NULL;
1.59 daniel 4785:
1.103 daniel 4786: tree = NULL;
1.59 daniel 4787: attrName = xmlParseName(ctxt);
4788: if (attrName == NULL) {
4789: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4790: ctxt->sax->error(ctxt->userData,
4791: "ATTLIST: no name for Attribute\n");
1.123 daniel 4792: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4793: ctxt->wellFormed = 0;
4794: break;
4795: }
1.97 daniel 4796: GROW;
1.59 daniel 4797: if (!IS_BLANK(CUR)) {
4798: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4799: ctxt->sax->error(ctxt->userData,
1.59 daniel 4800: "Space required after the attribute name\n");
1.123 daniel 4801: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4802: ctxt->wellFormed = 0;
4803: break;
4804: }
4805: SKIP_BLANKS;
4806:
1.66 daniel 4807: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 4808: if (type <= 0) break;
1.22 daniel 4809:
1.97 daniel 4810: GROW;
1.59 daniel 4811: if (!IS_BLANK(CUR)) {
4812: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4813: ctxt->sax->error(ctxt->userData,
1.59 daniel 4814: "Space required after the attribute type\n");
1.123 daniel 4815: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4816: ctxt->wellFormed = 0;
4817: break;
4818: }
1.42 daniel 4819: SKIP_BLANKS;
1.59 daniel 4820:
4821: def = xmlParseDefaultDecl(ctxt, &defaultValue);
4822: if (def <= 0) break;
4823:
1.97 daniel 4824: GROW;
1.152 daniel 4825: if (RAW != '>') {
1.59 daniel 4826: if (!IS_BLANK(CUR)) {
4827: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4828: ctxt->sax->error(ctxt->userData,
1.59 daniel 4829: "Space required after the attribute default value\n");
1.123 daniel 4830: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4831: ctxt->wellFormed = 0;
4832: break;
4833: }
4834: SKIP_BLANKS;
4835: }
1.40 daniel 4836: if (check == CUR_PTR) {
1.55 daniel 4837: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4838: ctxt->sax->error(ctxt->userData,
1.59 daniel 4839: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 4840: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 4841: break;
4842: }
1.72 daniel 4843: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 4844: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 4845: type, def, defaultValue, tree);
1.59 daniel 4846: if (attrName != NULL)
1.119 daniel 4847: xmlFree(attrName);
1.59 daniel 4848: if (defaultValue != NULL)
1.119 daniel 4849: xmlFree(defaultValue);
1.97 daniel 4850: GROW;
1.22 daniel 4851: }
1.152 daniel 4852: if (RAW == '>')
1.40 daniel 4853: NEXT;
1.22 daniel 4854:
1.119 daniel 4855: xmlFree(elemName);
1.22 daniel 4856: }
4857: }
4858:
1.50 daniel 4859: /**
1.61 daniel 4860: * xmlParseElementMixedContentDecl:
4861: * @ctxt: an XML parser context
4862: *
4863: * parse the declaration for a Mixed Element content
4864: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4865: *
4866: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4867: * '(' S? '#PCDATA' S? ')'
4868: *
1.99 daniel 4869: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4870: *
4871: * [ VC: No Duplicate Types ]
1.117 daniel 4872: * The same name must not appear more than once in a single
4873: * mixed-content declaration.
1.99 daniel 4874: *
1.61 daniel 4875: * returns: the list of the xmlElementContentPtr describing the element choices
4876: */
4877: xmlElementContentPtr
1.62 daniel 4878: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 4879: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 4880: xmlChar *elem = NULL;
1.61 daniel 4881:
1.97 daniel 4882: GROW;
1.152 daniel 4883: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 4884: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4885: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4886: (NXT(6) == 'A')) {
4887: SKIP(7);
4888: SKIP_BLANKS;
1.91 daniel 4889: SHRINK;
1.152 daniel 4890: if (RAW == ')') {
1.63 daniel 4891: NEXT;
4892: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 4893: if (RAW == '*') {
1.136 daniel 4894: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4895: NEXT;
4896: }
1.63 daniel 4897: return(ret);
4898: }
1.152 daniel 4899: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 4900: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4901: if (ret == NULL) return(NULL);
1.99 daniel 4902: }
1.152 daniel 4903: while (RAW == '|') {
1.64 daniel 4904: NEXT;
1.61 daniel 4905: if (elem == NULL) {
4906: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4907: if (ret == NULL) return(NULL);
4908: ret->c1 = cur;
1.64 daniel 4909: cur = ret;
1.61 daniel 4910: } else {
1.64 daniel 4911: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4912: if (n == NULL) return(NULL);
4913: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4914: cur->c2 = n;
4915: cur = n;
1.119 daniel 4916: xmlFree(elem);
1.61 daniel 4917: }
4918: SKIP_BLANKS;
4919: elem = xmlParseName(ctxt);
4920: if (elem == NULL) {
4921: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4922: ctxt->sax->error(ctxt->userData,
1.61 daniel 4923: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 4924: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 4925: ctxt->wellFormed = 0;
4926: xmlFreeElementContent(cur);
4927: return(NULL);
4928: }
4929: SKIP_BLANKS;
1.97 daniel 4930: GROW;
1.61 daniel 4931: }
1.152 daniel 4932: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 4933: if (elem != NULL) {
1.61 daniel 4934: cur->c2 = xmlNewElementContent(elem,
4935: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4936: xmlFree(elem);
1.66 daniel 4937: }
1.65 daniel 4938: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 4939: SKIP(2);
1.61 daniel 4940: } else {
1.119 daniel 4941: if (elem != NULL) xmlFree(elem);
1.61 daniel 4942: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4943: ctxt->sax->error(ctxt->userData,
1.63 daniel 4944: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 4945: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 4946: ctxt->wellFormed = 0;
4947: xmlFreeElementContent(ret);
4948: return(NULL);
4949: }
4950:
4951: } else {
4952: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4953: ctxt->sax->error(ctxt->userData,
1.61 daniel 4954: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 4955: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 4956: ctxt->wellFormed = 0;
4957: }
4958: return(ret);
4959: }
4960:
4961: /**
4962: * xmlParseElementChildrenContentDecl:
1.50 daniel 4963: * @ctxt: an XML parser context
4964: *
1.61 daniel 4965: * parse the declaration for a Mixed Element content
4966: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 4967: *
1.61 daniel 4968: *
1.22 daniel 4969: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4970: *
4971: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4972: *
4973: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4974: *
4975: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4976: *
1.99 daniel 4977: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4978: * TODO Parameter-entity replacement text must be properly nested
4979: * with parenthetized groups. That is to say, if either of the
4980: * opening or closing parentheses in a choice, seq, or Mixed
4981: * construct is contained in the replacement text for a parameter
4982: * entity, both must be contained in the same replacement text. For
4983: * interoperability, if a parameter-entity reference appears in a
4984: * choice, seq, or Mixed construct, its replacement text should not
4985: * be empty, and neither the first nor last non-blank character of
4986: * the replacement text should be a connector (| or ,).
4987: *
1.62 daniel 4988: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 4989: * hierarchy.
4990: */
4991: xmlElementContentPtr
1.62 daniel 4992: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 4993: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 4994: xmlChar *elem;
4995: xmlChar type = 0;
1.62 daniel 4996:
4997: SKIP_BLANKS;
1.94 daniel 4998: GROW;
1.152 daniel 4999: if (RAW == '(') {
1.63 daniel 5000: /* Recurse on first child */
1.62 daniel 5001: NEXT;
5002: SKIP_BLANKS;
5003: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5004: SKIP_BLANKS;
1.101 daniel 5005: GROW;
1.62 daniel 5006: } else {
5007: elem = xmlParseName(ctxt);
5008: if (elem == NULL) {
5009: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5010: ctxt->sax->error(ctxt->userData,
1.62 daniel 5011: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5012: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5013: ctxt->wellFormed = 0;
5014: return(NULL);
5015: }
5016: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5017: GROW;
1.152 daniel 5018: if (RAW == '?') {
1.104 daniel 5019: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5020: NEXT;
1.152 daniel 5021: } else if (RAW == '*') {
1.104 daniel 5022: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5023: NEXT;
1.152 daniel 5024: } else if (RAW == '+') {
1.104 daniel 5025: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5026: NEXT;
5027: } else {
1.104 daniel 5028: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5029: }
1.119 daniel 5030: xmlFree(elem);
1.101 daniel 5031: GROW;
1.62 daniel 5032: }
5033: SKIP_BLANKS;
1.91 daniel 5034: SHRINK;
1.152 daniel 5035: while (RAW != ')') {
1.63 daniel 5036: /*
5037: * Each loop we parse one separator and one element.
5038: */
1.152 daniel 5039: if (RAW == ',') {
1.62 daniel 5040: if (type == 0) type = CUR;
5041:
5042: /*
5043: * Detect "Name | Name , Name" error
5044: */
5045: else if (type != CUR) {
5046: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5047: ctxt->sax->error(ctxt->userData,
1.62 daniel 5048: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5049: type);
1.123 daniel 5050: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5051: ctxt->wellFormed = 0;
5052: xmlFreeElementContent(ret);
5053: return(NULL);
5054: }
1.64 daniel 5055: NEXT;
1.62 daniel 5056:
1.63 daniel 5057: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5058: if (op == NULL) {
5059: xmlFreeElementContent(ret);
5060: return(NULL);
5061: }
5062: if (last == NULL) {
5063: op->c1 = ret;
1.65 daniel 5064: ret = cur = op;
1.63 daniel 5065: } else {
5066: cur->c2 = op;
5067: op->c1 = last;
5068: cur =op;
1.65 daniel 5069: last = NULL;
1.63 daniel 5070: }
1.152 daniel 5071: } else if (RAW == '|') {
1.62 daniel 5072: if (type == 0) type = CUR;
5073:
5074: /*
1.63 daniel 5075: * Detect "Name , Name | Name" error
1.62 daniel 5076: */
5077: else if (type != CUR) {
5078: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5079: ctxt->sax->error(ctxt->userData,
1.62 daniel 5080: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5081: type);
1.123 daniel 5082: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5083: ctxt->wellFormed = 0;
5084: xmlFreeElementContent(ret);
5085: return(NULL);
5086: }
1.64 daniel 5087: NEXT;
1.62 daniel 5088:
1.63 daniel 5089: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5090: if (op == NULL) {
5091: xmlFreeElementContent(ret);
5092: return(NULL);
5093: }
5094: if (last == NULL) {
5095: op->c1 = ret;
1.65 daniel 5096: ret = cur = op;
1.63 daniel 5097: } else {
5098: cur->c2 = op;
5099: op->c1 = last;
5100: cur =op;
1.65 daniel 5101: last = NULL;
1.63 daniel 5102: }
1.62 daniel 5103: } else {
5104: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5105: ctxt->sax->error(ctxt->userData,
1.62 daniel 5106: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5107: ctxt->wellFormed = 0;
1.123 daniel 5108: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 5109: xmlFreeElementContent(ret);
5110: return(NULL);
5111: }
1.101 daniel 5112: GROW;
1.62 daniel 5113: SKIP_BLANKS;
1.101 daniel 5114: GROW;
1.152 daniel 5115: if (RAW == '(') {
1.63 daniel 5116: /* Recurse on second child */
1.62 daniel 5117: NEXT;
5118: SKIP_BLANKS;
1.65 daniel 5119: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5120: SKIP_BLANKS;
5121: } else {
5122: elem = xmlParseName(ctxt);
5123: if (elem == NULL) {
5124: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5125: ctxt->sax->error(ctxt->userData,
1.122 daniel 5126: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5127: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5128: ctxt->wellFormed = 0;
5129: return(NULL);
5130: }
1.65 daniel 5131: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5132: xmlFree(elem);
1.152 daniel 5133: if (RAW == '?') {
1.105 daniel 5134: last->ocur = XML_ELEMENT_CONTENT_OPT;
5135: NEXT;
1.152 daniel 5136: } else if (RAW == '*') {
1.105 daniel 5137: last->ocur = XML_ELEMENT_CONTENT_MULT;
5138: NEXT;
1.152 daniel 5139: } else if (RAW == '+') {
1.105 daniel 5140: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5141: NEXT;
5142: } else {
5143: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5144: }
1.63 daniel 5145: }
5146: SKIP_BLANKS;
1.97 daniel 5147: GROW;
1.64 daniel 5148: }
1.65 daniel 5149: if ((cur != NULL) && (last != NULL)) {
5150: cur->c2 = last;
1.62 daniel 5151: }
5152: NEXT;
1.152 daniel 5153: if (RAW == '?') {
1.62 daniel 5154: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5155: NEXT;
1.152 daniel 5156: } else if (RAW == '*') {
1.62 daniel 5157: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5158: NEXT;
1.152 daniel 5159: } else if (RAW == '+') {
1.62 daniel 5160: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5161: NEXT;
5162: }
5163: return(ret);
1.61 daniel 5164: }
5165:
5166: /**
5167: * xmlParseElementContentDecl:
5168: * @ctxt: an XML parser context
5169: * @name: the name of the element being defined.
5170: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5171: *
1.61 daniel 5172: * parse the declaration for an Element content either Mixed or Children,
5173: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5174: *
5175: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5176: *
1.61 daniel 5177: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5178: */
5179:
1.61 daniel 5180: int
1.123 daniel 5181: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5182: xmlElementContentPtr *result) {
5183:
5184: xmlElementContentPtr tree = NULL;
5185: int res;
5186:
5187: *result = NULL;
5188:
1.152 daniel 5189: if (RAW != '(') {
1.61 daniel 5190: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5191: ctxt->sax->error(ctxt->userData,
1.61 daniel 5192: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5193: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5194: ctxt->wellFormed = 0;
5195: return(-1);
5196: }
5197: NEXT;
1.97 daniel 5198: GROW;
1.61 daniel 5199: SKIP_BLANKS;
1.152 daniel 5200: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5201: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5202: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5203: (NXT(6) == 'A')) {
1.62 daniel 5204: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5205: res = XML_ELEMENT_TYPE_MIXED;
5206: } else {
1.62 daniel 5207: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5208: res = XML_ELEMENT_TYPE_ELEMENT;
5209: }
5210: SKIP_BLANKS;
1.63 daniel 5211: /****************************
1.152 daniel 5212: if (RAW != ')') {
1.61 daniel 5213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5214: ctxt->sax->error(ctxt->userData,
1.61 daniel 5215: "xmlParseElementContentDecl : ')' expected\n");
5216: ctxt->wellFormed = 0;
5217: return(-1);
5218: }
1.63 daniel 5219: ****************************/
5220: *result = tree;
1.61 daniel 5221: return(res);
1.22 daniel 5222: }
5223:
1.50 daniel 5224: /**
5225: * xmlParseElementDecl:
5226: * @ctxt: an XML parser context
5227: *
5228: * parse an Element declaration.
1.22 daniel 5229: *
5230: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5231: *
1.99 daniel 5232: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5233: * No element type may be declared more than once
1.69 daniel 5234: *
5235: * Returns the type of the element, or -1 in case of error
1.22 daniel 5236: */
1.59 daniel 5237: int
1.55 daniel 5238: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5239: xmlChar *name;
1.59 daniel 5240: int ret = -1;
1.61 daniel 5241: xmlElementContentPtr content = NULL;
1.22 daniel 5242:
1.97 daniel 5243: GROW;
1.152 daniel 5244: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5245: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5246: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5247: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5248: (NXT(8) == 'T')) {
1.40 daniel 5249: SKIP(9);
1.59 daniel 5250: if (!IS_BLANK(CUR)) {
5251: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5252: ctxt->sax->error(ctxt->userData,
1.59 daniel 5253: "Space required after 'ELEMENT'\n");
1.123 daniel 5254: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5255: ctxt->wellFormed = 0;
5256: }
1.42 daniel 5257: SKIP_BLANKS;
1.22 daniel 5258: name = xmlParseName(ctxt);
5259: if (name == NULL) {
1.55 daniel 5260: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5261: ctxt->sax->error(ctxt->userData,
1.59 daniel 5262: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5263: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5264: ctxt->wellFormed = 0;
5265: return(-1);
5266: }
5267: if (!IS_BLANK(CUR)) {
5268: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5269: ctxt->sax->error(ctxt->userData,
1.59 daniel 5270: "Space required after the element name\n");
1.123 daniel 5271: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5272: ctxt->wellFormed = 0;
1.22 daniel 5273: }
1.42 daniel 5274: SKIP_BLANKS;
1.152 daniel 5275: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5276: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5277: (NXT(4) == 'Y')) {
5278: SKIP(5);
1.22 daniel 5279: /*
5280: * Element must always be empty.
5281: */
1.59 daniel 5282: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5283: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5284: (NXT(2) == 'Y')) {
5285: SKIP(3);
1.22 daniel 5286: /*
5287: * Element is a generic container.
5288: */
1.59 daniel 5289: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5290: } else if (RAW == '(') {
1.61 daniel 5291: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5292: } else {
1.98 daniel 5293: /*
5294: * [ WFC: PEs in Internal Subset ] error handling.
5295: */
1.152 daniel 5296: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5297: (ctxt->inputNr == 1)) {
5298: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5299: ctxt->sax->error(ctxt->userData,
5300: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5301: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5302: } else {
5303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5304: ctxt->sax->error(ctxt->userData,
5305: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5306: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5307: }
1.61 daniel 5308: ctxt->wellFormed = 0;
1.119 daniel 5309: if (name != NULL) xmlFree(name);
1.61 daniel 5310: return(-1);
1.22 daniel 5311: }
1.142 daniel 5312:
5313: SKIP_BLANKS;
5314: /*
5315: * Pop-up of finished entities.
5316: */
1.152 daniel 5317: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5318: xmlPopInput(ctxt);
1.42 daniel 5319: SKIP_BLANKS;
1.142 daniel 5320:
1.152 daniel 5321: if (RAW != '>') {
1.55 daniel 5322: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5323: ctxt->sax->error(ctxt->userData,
1.31 daniel 5324: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5325: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5326: ctxt->wellFormed = 0;
1.61 daniel 5327: } else {
1.40 daniel 5328: NEXT;
1.72 daniel 5329: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 5330: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5331: content);
1.61 daniel 5332: }
1.84 daniel 5333: if (content != NULL) {
5334: xmlFreeElementContent(content);
5335: }
1.61 daniel 5336: if (name != NULL) {
1.119 daniel 5337: xmlFree(name);
1.61 daniel 5338: }
1.22 daniel 5339: }
1.59 daniel 5340: return(ret);
1.22 daniel 5341: }
5342:
1.50 daniel 5343: /**
5344: * xmlParseMarkupDecl:
5345: * @ctxt: an XML parser context
5346: *
5347: * parse Markup declarations
1.22 daniel 5348: *
5349: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5350: * NotationDecl | PI | Comment
5351: *
1.98 daniel 5352: * [ VC: Proper Declaration/PE Nesting ]
5353: * TODO Parameter-entity replacement text must be properly nested with
5354: * markup declarations. That is to say, if either the first character
5355: * or the last character of a markup declaration (markupdecl above) is
5356: * contained in the replacement text for a parameter-entity reference,
5357: * both must be contained in the same replacement text.
5358: *
5359: * [ WFC: PEs in Internal Subset ]
5360: * In the internal DTD subset, parameter-entity references can occur
5361: * only where markup declarations can occur, not within markup declarations.
5362: * (This does not apply to references that occur in external parameter
5363: * entities or to the external subset.)
1.22 daniel 5364: */
1.55 daniel 5365: void
5366: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5367: GROW;
1.22 daniel 5368: xmlParseElementDecl(ctxt);
5369: xmlParseAttributeListDecl(ctxt);
5370: xmlParseEntityDecl(ctxt);
5371: xmlParseNotationDecl(ctxt);
5372: xmlParsePI(ctxt);
1.114 daniel 5373: xmlParseComment(ctxt);
1.98 daniel 5374: /*
5375: * This is only for internal subset. On external entities,
5376: * the replacement is done before parsing stage
5377: */
5378: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5379: xmlParsePEReference(ctxt);
1.97 daniel 5380: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5381: }
5382:
1.50 daniel 5383: /**
1.76 daniel 5384: * xmlParseTextDecl:
5385: * @ctxt: an XML parser context
5386: *
5387: * parse an XML declaration header for external entities
5388: *
5389: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5390: *
5391: * Returns the only valuable info for an external parsed entity, the encoding
5392: */
5393:
1.123 daniel 5394: xmlChar *
1.76 daniel 5395: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5396: xmlChar *version;
5397: xmlChar *encoding = NULL;
1.76 daniel 5398:
5399: /*
5400: * We know that '<?xml' is here.
5401: */
5402: SKIP(5);
5403:
5404: if (!IS_BLANK(CUR)) {
5405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5406: ctxt->sax->error(ctxt->userData,
5407: "Space needed after '<?xml'\n");
1.123 daniel 5408: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5409: ctxt->wellFormed = 0;
5410: }
5411: SKIP_BLANKS;
5412:
5413: /*
5414: * We may have the VersionInfo here.
5415: */
5416: version = xmlParseVersionInfo(ctxt);
5417: if (version == NULL)
5418: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.165 daniel 5419: ctxt->input->version = xmlStrdup(version);
1.119 daniel 5420: xmlFree(version);
1.76 daniel 5421:
5422: /*
5423: * We must have the encoding declaration
5424: */
5425: if (!IS_BLANK(CUR)) {
5426: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5427: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 5428: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5429: ctxt->wellFormed = 0;
5430: }
5431: encoding = xmlParseEncodingDecl(ctxt);
5432:
5433: SKIP_BLANKS;
1.152 daniel 5434: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 5435: SKIP(2);
1.152 daniel 5436: } else if (RAW == '>') {
1.76 daniel 5437: /* Deprecated old WD ... */
5438: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5439: ctxt->sax->error(ctxt->userData,
5440: "XML declaration must end-up with '?>'\n");
1.123 daniel 5441: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5442: ctxt->wellFormed = 0;
5443: NEXT;
5444: } else {
5445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5446: ctxt->sax->error(ctxt->userData,
5447: "parsing XML declaration: '?>' expected\n");
1.123 daniel 5448: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5449: ctxt->wellFormed = 0;
5450: MOVETO_ENDTAG(CUR_PTR);
5451: NEXT;
5452: }
5453: return(encoding);
5454: }
5455:
5456: /*
5457: * xmlParseConditionalSections
5458: * @ctxt: an XML parser context
5459: *
5460: * TODO : Conditionnal section are not yet supported !
5461: *
5462: * [61] conditionalSect ::= includeSect | ignoreSect
5463: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5464: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5465: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5466: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5467: */
5468:
5469: void
5470: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 5471: SKIP(3);
5472: SKIP_BLANKS;
1.168 ! daniel 5473: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
! 5474: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
! 5475: (NXT(6) == 'E')) {
1.165 daniel 5476: SKIP(7);
1.168 ! daniel 5477: SKIP_BLANKS;
! 5478: if (RAW != '[') {
! 5479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 5480: ctxt->sax->error(ctxt->userData,
! 5481: "XML conditional section '[' expected\n");
! 5482: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
! 5483: ctxt->wellFormed = 0;
! 5484: } else {
! 5485: NEXT;
! 5486: }
1.165 daniel 5487: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5488: (NXT(2) != '>'))) {
5489: const xmlChar *check = CUR_PTR;
5490: int cons = ctxt->input->consumed;
5491: int tok = ctxt->token;
5492:
5493: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5494: xmlParseConditionalSections(ctxt);
5495: } else if (IS_BLANK(CUR)) {
5496: NEXT;
5497: } else if (RAW == '%') {
5498: xmlParsePEReference(ctxt);
5499: } else
5500: xmlParseMarkupDecl(ctxt);
5501:
5502: /*
5503: * Pop-up of finished entities.
5504: */
5505: while ((RAW == 0) && (ctxt->inputNr > 1))
5506: xmlPopInput(ctxt);
5507:
5508: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5509: (tok == ctxt->token)) {
5510: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5511: ctxt->sax->error(ctxt->userData,
5512: "Content error in the external subset\n");
5513: ctxt->wellFormed = 0;
5514: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5515: break;
5516: }
5517: }
1.168 ! daniel 5518: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
! 5519: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
! 5520: SKIP(6);
! 5521: SKIP_BLANKS;
! 5522: if (RAW != '[') {
! 5523: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 5524: ctxt->sax->error(ctxt->userData,
! 5525: "XML conditional section '[' expected\n");
! 5526: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
! 5527: ctxt->wellFormed = 0;
! 5528: } else {
! 5529: NEXT;
! 5530: }
1.143 daniel 5531: /*
1.165 daniel 5532: * Skip up to the end of the conditionnal section.
1.168 ! daniel 5533: * !!!! TODO [4.3] Note that for reliable parsing, the contents
! 5534: * of even ignored conditional sections must be read in order to
! 5535: * detect nested conditional sections
1.143 daniel 5536: */
1.165 daniel 5537: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5538: (NXT(2) != '>'))) {
5539: NEXT;
5540: /*
5541: * Pop-up of finished entities.
5542: */
5543: while ((RAW == 0) && (ctxt->inputNr > 1))
5544: xmlPopInput(ctxt);
1.143 daniel 5545:
1.165 daniel 5546: if (RAW == 0)
5547: GROW;
5548: }
1.168 ! daniel 5549: } else {
! 5550: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 5551: ctxt->sax->error(ctxt->userData,
! 5552: "XML conditional section INCLUDE or IGNORE keyword expected\n");
! 5553: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
! 5554: ctxt->wellFormed = 0;
1.143 daniel 5555: }
5556:
1.152 daniel 5557: if (RAW == 0)
1.143 daniel 5558: SHRINK;
5559:
1.152 daniel 5560: if (RAW == 0) {
1.76 daniel 5561: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5562: ctxt->sax->error(ctxt->userData,
5563: "XML conditional section not closed\n");
1.123 daniel 5564: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 5565: ctxt->wellFormed = 0;
1.143 daniel 5566: } else {
5567: SKIP(3);
1.76 daniel 5568: }
5569: }
5570:
5571: /**
1.124 daniel 5572: * xmlParseExternalSubset:
1.76 daniel 5573: * @ctxt: an XML parser context
1.124 daniel 5574: * @ExternalID: the external identifier
5575: * @SystemID: the system identifier (or URL)
1.76 daniel 5576: *
5577: * parse Markup declarations from an external subset
5578: *
5579: * [30] extSubset ::= textDecl? extSubsetDecl
5580: *
5581: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5582: */
5583: void
1.123 daniel 5584: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5585: const xmlChar *SystemID) {
1.132 daniel 5586: GROW;
1.152 daniel 5587: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 5588: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5589: (NXT(4) == 'l')) {
1.134 daniel 5590: xmlChar *decl;
5591:
5592: decl = xmlParseTextDecl(ctxt);
5593: if (decl != NULL)
5594: xmlFree(decl);
1.76 daniel 5595: }
1.79 daniel 5596: if (ctxt->myDoc == NULL) {
1.116 daniel 5597: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 5598: }
5599: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5600: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5601:
1.96 daniel 5602: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 5603: ctxt->external = 1;
1.152 daniel 5604: while (((RAW == '<') && (NXT(1) == '?')) ||
5605: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 5606: IS_BLANK(CUR)) {
1.123 daniel 5607: const xmlChar *check = CUR_PTR;
1.115 daniel 5608: int cons = ctxt->input->consumed;
1.164 daniel 5609: int tok = ctxt->token;
1.115 daniel 5610:
1.152 daniel 5611: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 5612: xmlParseConditionalSections(ctxt);
5613: } else if (IS_BLANK(CUR)) {
5614: NEXT;
1.152 daniel 5615: } else if (RAW == '%') {
1.76 daniel 5616: xmlParsePEReference(ctxt);
5617: } else
5618: xmlParseMarkupDecl(ctxt);
1.77 daniel 5619:
5620: /*
5621: * Pop-up of finished entities.
5622: */
1.166 daniel 5623: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 5624: xmlPopInput(ctxt);
5625:
1.164 daniel 5626: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5627: (tok == ctxt->token)) {
1.115 daniel 5628: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5629: ctxt->sax->error(ctxt->userData,
5630: "Content error in the external subset\n");
5631: ctxt->wellFormed = 0;
1.123 daniel 5632: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 5633: break;
5634: }
1.76 daniel 5635: }
5636:
1.152 daniel 5637: if (RAW != 0) {
1.76 daniel 5638: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5639: ctxt->sax->error(ctxt->userData,
5640: "Extra content at the end of the document\n");
1.123 daniel 5641: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 5642: ctxt->wellFormed = 0;
5643: }
5644:
5645: }
5646:
5647: /**
1.77 daniel 5648: * xmlParseReference:
5649: * @ctxt: an XML parser context
5650: *
5651: * parse and handle entity references in content, depending on the SAX
5652: * interface, this may end-up in a call to character() if this is a
1.79 daniel 5653: * CharRef, a predefined entity, if there is no reference() callback.
5654: * or if the parser was asked to switch to that mode.
1.77 daniel 5655: *
5656: * [67] Reference ::= EntityRef | CharRef
5657: */
5658: void
5659: xmlParseReference(xmlParserCtxtPtr ctxt) {
5660: xmlEntityPtr ent;
1.123 daniel 5661: xmlChar *val;
1.152 daniel 5662: if (RAW != '&') return;
1.77 daniel 5663:
1.113 daniel 5664: if (ctxt->inputNr > 1) {
1.123 daniel 5665: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 5666:
5667: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5668: ctxt->sax->characters(ctxt->userData, cur, 1);
5669: if (ctxt->token == '&')
5670: ctxt->token = 0;
5671: else {
5672: SKIP(1);
5673: }
5674: return;
5675: }
1.77 daniel 5676: if (NXT(1) == '#') {
1.152 daniel 5677: int i = 0;
1.153 daniel 5678: xmlChar out[10];
5679: int hex = NXT(2);
1.77 daniel 5680: int val = xmlParseCharRef(ctxt);
1.152 daniel 5681:
1.153 daniel 5682: if (ctxt->encoding != NULL) {
5683: /*
5684: * So we are using non-UTF-8 buffers
5685: * Check that the char fit on 8bits, if not
5686: * generate a CharRef.
5687: */
5688: if (val <= 0xFF) {
5689: out[0] = val;
5690: out[1] = 0;
5691: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5692: ctxt->sax->characters(ctxt->userData, out, 1);
5693: } else {
5694: if ((hex == 'x') || (hex == 'X'))
5695: sprintf((char *)out, "#x%X", val);
5696: else
5697: sprintf((char *)out, "#%d", val);
5698: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL))
5699: ctxt->sax->reference(ctxt->userData, out);
5700: }
5701: } else {
5702: /*
5703: * Just encode the value in UTF-8
5704: */
5705: COPY_BUF(0 ,out, i, val);
5706: out[i] = 0;
5707: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5708: ctxt->sax->characters(ctxt->userData, out, i);
5709: }
1.77 daniel 5710: } else {
5711: ent = xmlParseEntityRef(ctxt);
5712: if (ent == NULL) return;
5713: if ((ent->name != NULL) &&
1.159 daniel 5714: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.113 daniel 5715: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5716: (ctxt->replaceEntities == 0)) {
5717: /*
5718: * Create a node.
5719: */
5720: ctxt->sax->reference(ctxt->userData, ent->name);
5721: return;
5722: } else if (ctxt->replaceEntities) {
5723: xmlParserInputPtr input;
1.79 daniel 5724:
1.113 daniel 5725: input = xmlNewEntityInputStream(ctxt, ent);
5726: xmlPushInput(ctxt, input);
1.167 daniel 5727: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5728: (RAW == '<') && (NXT(1) == '?') &&
5729: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5730: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5731: xmlParseXMLDecl(ctxt);
5732: if (input->standalone) {
5733: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5734: ctxt->sax->error(ctxt->userData,
5735: "external parsed entities cannot be standalone\n");
5736: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5737: ctxt->wellFormed = 0;
5738: }
5739: }
1.113 daniel 5740: return;
5741: }
1.77 daniel 5742: }
5743: val = ent->content;
5744: if (val == NULL) return;
5745: /*
5746: * inline the entity.
5747: */
5748: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5749: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5750: }
1.24 daniel 5751: }
5752:
1.50 daniel 5753: /**
5754: * xmlParseEntityRef:
5755: * @ctxt: an XML parser context
5756: *
5757: * parse ENTITY references declarations
1.24 daniel 5758: *
5759: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 5760: *
1.98 daniel 5761: * [ WFC: Entity Declared ]
5762: * In a document without any DTD, a document with only an internal DTD
5763: * subset which contains no parameter entity references, or a document
5764: * with "standalone='yes'", the Name given in the entity reference
5765: * must match that in an entity declaration, except that well-formed
5766: * documents need not declare any of the following entities: amp, lt,
5767: * gt, apos, quot. The declaration of a parameter entity must precede
5768: * any reference to it. Similarly, the declaration of a general entity
5769: * must precede any reference to it which appears in a default value in an
5770: * attribute-list declaration. Note that if entities are declared in the
5771: * external subset or in external parameter entities, a non-validating
5772: * processor is not obligated to read and process their declarations;
5773: * for such documents, the rule that an entity must be declared is a
5774: * well-formedness constraint only if standalone='yes'.
5775: *
5776: * [ WFC: Parsed Entity ]
5777: * An entity reference must not contain the name of an unparsed entity
5778: *
1.77 daniel 5779: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 5780: */
1.77 daniel 5781: xmlEntityPtr
1.55 daniel 5782: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 5783: xmlChar *name;
1.72 daniel 5784: xmlEntityPtr ent = NULL;
1.24 daniel 5785:
1.91 daniel 5786: GROW;
1.111 daniel 5787:
1.152 daniel 5788: if (RAW == '&') {
1.40 daniel 5789: NEXT;
1.24 daniel 5790: name = xmlParseName(ctxt);
5791: if (name == NULL) {
1.55 daniel 5792: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5793: ctxt->sax->error(ctxt->userData,
5794: "xmlParseEntityRef: no name\n");
1.123 daniel 5795: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5796: ctxt->wellFormed = 0;
1.24 daniel 5797: } else {
1.152 daniel 5798: if (RAW == ';') {
1.40 daniel 5799: NEXT;
1.24 daniel 5800: /*
1.77 daniel 5801: * Ask first SAX for entity resolution, otherwise try the
5802: * predefined set.
5803: */
5804: if (ctxt->sax != NULL) {
5805: if (ctxt->sax->getEntity != NULL)
5806: ent = ctxt->sax->getEntity(ctxt->userData, name);
5807: if (ent == NULL)
5808: ent = xmlGetPredefinedEntity(name);
5809: }
5810: /*
1.98 daniel 5811: * [ WFC: Entity Declared ]
5812: * In a document without any DTD, a document with only an
5813: * internal DTD subset which contains no parameter entity
5814: * references, or a document with "standalone='yes'", the
5815: * Name given in the entity reference must match that in an
5816: * entity declaration, except that well-formed documents
5817: * need not declare any of the following entities: amp, lt,
5818: * gt, apos, quot.
5819: * The declaration of a parameter entity must precede any
5820: * reference to it.
5821: * Similarly, the declaration of a general entity must
5822: * precede any reference to it which appears in a default
5823: * value in an attribute-list declaration. Note that if
5824: * entities are declared in the external subset or in
5825: * external parameter entities, a non-validating processor
5826: * is not obligated to read and process their declarations;
5827: * for such documents, the rule that an entity must be
5828: * declared is a well-formedness constraint only if
5829: * standalone='yes'.
1.59 daniel 5830: */
1.77 daniel 5831: if (ent == NULL) {
1.98 daniel 5832: if ((ctxt->standalone == 1) ||
5833: ((ctxt->hasExternalSubset == 0) &&
5834: (ctxt->hasPErefs == 0))) {
5835: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 5836: ctxt->sax->error(ctxt->userData,
5837: "Entity '%s' not defined\n", name);
1.123 daniel 5838: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 5839: ctxt->wellFormed = 0;
5840: } else {
1.98 daniel 5841: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5842: ctxt->sax->warning(ctxt->userData,
5843: "Entity '%s' not defined\n", name);
1.123 daniel 5844: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 5845: }
1.77 daniel 5846: }
1.59 daniel 5847:
5848: /*
1.98 daniel 5849: * [ WFC: Parsed Entity ]
5850: * An entity reference must not contain the name of an
5851: * unparsed entity
5852: */
1.159 daniel 5853: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 5854: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5855: ctxt->sax->error(ctxt->userData,
5856: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 5857: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 5858: ctxt->wellFormed = 0;
5859: }
5860:
5861: /*
5862: * [ WFC: No External Entity References ]
5863: * Attribute values cannot contain direct or indirect
5864: * entity references to external entities.
5865: */
5866: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5867: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 5868: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5869: ctxt->sax->error(ctxt->userData,
5870: "Attribute references external entity '%s'\n", name);
1.123 daniel 5871: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 5872: ctxt->wellFormed = 0;
5873: }
5874: /*
5875: * [ WFC: No < in Attribute Values ]
5876: * The replacement text of any entity referred to directly or
5877: * indirectly in an attribute value (other than "<") must
5878: * not contain a <.
1.59 daniel 5879: */
1.98 daniel 5880: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 5881: (ent != NULL) &&
5882: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 5883: (ent->content != NULL) &&
5884: (xmlStrchr(ent->content, '<'))) {
5885: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5886: ctxt->sax->error(ctxt->userData,
5887: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 5888: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 5889: ctxt->wellFormed = 0;
5890: }
5891:
5892: /*
5893: * Internal check, no parameter entities here ...
5894: */
5895: else {
1.159 daniel 5896: switch (ent->etype) {
1.59 daniel 5897: case XML_INTERNAL_PARAMETER_ENTITY:
5898: case XML_EXTERNAL_PARAMETER_ENTITY:
5899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5900: ctxt->sax->error(ctxt->userData,
1.59 daniel 5901: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 5902: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 5903: ctxt->wellFormed = 0;
5904: break;
5905: }
5906: }
5907:
5908: /*
1.98 daniel 5909: * [ WFC: No Recursion ]
1.117 daniel 5910: * TODO A parsed entity must not contain a recursive reference
5911: * to itself, either directly or indirectly.
1.59 daniel 5912: */
1.77 daniel 5913:
1.24 daniel 5914: } else {
1.55 daniel 5915: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5916: ctxt->sax->error(ctxt->userData,
1.59 daniel 5917: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 5918: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 5919: ctxt->wellFormed = 0;
1.24 daniel 5920: }
1.119 daniel 5921: xmlFree(name);
1.24 daniel 5922: }
5923: }
1.77 daniel 5924: return(ent);
1.24 daniel 5925: }
1.135 daniel 5926: /**
5927: * xmlParseStringEntityRef:
5928: * @ctxt: an XML parser context
5929: * @str: a pointer to an index in the string
5930: *
5931: * parse ENTITY references declarations, but this version parses it from
5932: * a string value.
5933: *
5934: * [68] EntityRef ::= '&' Name ';'
5935: *
5936: * [ WFC: Entity Declared ]
5937: * In a document without any DTD, a document with only an internal DTD
5938: * subset which contains no parameter entity references, or a document
5939: * with "standalone='yes'", the Name given in the entity reference
5940: * must match that in an entity declaration, except that well-formed
5941: * documents need not declare any of the following entities: amp, lt,
5942: * gt, apos, quot. The declaration of a parameter entity must precede
5943: * any reference to it. Similarly, the declaration of a general entity
5944: * must precede any reference to it which appears in a default value in an
5945: * attribute-list declaration. Note that if entities are declared in the
5946: * external subset or in external parameter entities, a non-validating
5947: * processor is not obligated to read and process their declarations;
5948: * for such documents, the rule that an entity must be declared is a
5949: * well-formedness constraint only if standalone='yes'.
5950: *
5951: * [ WFC: Parsed Entity ]
5952: * An entity reference must not contain the name of an unparsed entity
5953: *
5954: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5955: * is updated to the current location in the string.
5956: */
5957: xmlEntityPtr
5958: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5959: xmlChar *name;
5960: const xmlChar *ptr;
5961: xmlChar cur;
5962: xmlEntityPtr ent = NULL;
5963:
5964: GROW;
5965:
1.156 daniel 5966: if ((str == NULL) || (*str == NULL))
5967: return(NULL);
1.135 daniel 5968: ptr = *str;
5969: cur = *ptr;
5970: if (cur == '&') {
5971: ptr++;
5972: cur = *ptr;
5973: name = xmlParseStringName(ctxt, &ptr);
5974: if (name == NULL) {
5975: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5976: ctxt->sax->error(ctxt->userData,
5977: "xmlParseEntityRef: no name\n");
5978: ctxt->errNo = XML_ERR_NAME_REQUIRED;
5979: ctxt->wellFormed = 0;
5980: } else {
1.152 daniel 5981: if (RAW == ';') {
1.135 daniel 5982: NEXT;
5983: /*
5984: * Ask first SAX for entity resolution, otherwise try the
5985: * predefined set.
5986: */
5987: if (ctxt->sax != NULL) {
5988: if (ctxt->sax->getEntity != NULL)
5989: ent = ctxt->sax->getEntity(ctxt->userData, name);
5990: if (ent == NULL)
5991: ent = xmlGetPredefinedEntity(name);
5992: }
5993: /*
5994: * [ WFC: Entity Declared ]
5995: * In a document without any DTD, a document with only an
5996: * internal DTD subset which contains no parameter entity
5997: * references, or a document with "standalone='yes'", the
5998: * Name given in the entity reference must match that in an
5999: * entity declaration, except that well-formed documents
6000: * need not declare any of the following entities: amp, lt,
6001: * gt, apos, quot.
6002: * The declaration of a parameter entity must precede any
6003: * reference to it.
6004: * Similarly, the declaration of a general entity must
6005: * precede any reference to it which appears in a default
6006: * value in an attribute-list declaration. Note that if
6007: * entities are declared in the external subset or in
6008: * external parameter entities, a non-validating processor
6009: * is not obligated to read and process their declarations;
6010: * for such documents, the rule that an entity must be
6011: * declared is a well-formedness constraint only if
6012: * standalone='yes'.
6013: */
6014: if (ent == NULL) {
6015: if ((ctxt->standalone == 1) ||
6016: ((ctxt->hasExternalSubset == 0) &&
6017: (ctxt->hasPErefs == 0))) {
6018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6019: ctxt->sax->error(ctxt->userData,
6020: "Entity '%s' not defined\n", name);
6021: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6022: ctxt->wellFormed = 0;
6023: } else {
6024: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6025: ctxt->sax->warning(ctxt->userData,
6026: "Entity '%s' not defined\n", name);
6027: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6028: }
6029: }
6030:
6031: /*
6032: * [ WFC: Parsed Entity ]
6033: * An entity reference must not contain the name of an
6034: * unparsed entity
6035: */
1.159 daniel 6036: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6037: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6038: ctxt->sax->error(ctxt->userData,
6039: "Entity reference to unparsed entity %s\n", name);
6040: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6041: ctxt->wellFormed = 0;
6042: }
6043:
6044: /*
6045: * [ WFC: No External Entity References ]
6046: * Attribute values cannot contain direct or indirect
6047: * entity references to external entities.
6048: */
6049: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6050: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6051: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6052: ctxt->sax->error(ctxt->userData,
6053: "Attribute references external entity '%s'\n", name);
6054: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6055: ctxt->wellFormed = 0;
6056: }
6057: /*
6058: * [ WFC: No < in Attribute Values ]
6059: * The replacement text of any entity referred to directly or
6060: * indirectly in an attribute value (other than "<") must
6061: * not contain a <.
6062: */
6063: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6064: (ent != NULL) &&
6065: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6066: (ent->content != NULL) &&
6067: (xmlStrchr(ent->content, '<'))) {
6068: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6069: ctxt->sax->error(ctxt->userData,
6070: "'<' in entity '%s' is not allowed in attributes values\n", name);
6071: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6072: ctxt->wellFormed = 0;
6073: }
6074:
6075: /*
6076: * Internal check, no parameter entities here ...
6077: */
6078: else {
1.159 daniel 6079: switch (ent->etype) {
1.135 daniel 6080: case XML_INTERNAL_PARAMETER_ENTITY:
6081: case XML_EXTERNAL_PARAMETER_ENTITY:
6082: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6083: ctxt->sax->error(ctxt->userData,
6084: "Attempt to reference the parameter entity '%s'\n", name);
6085: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6086: ctxt->wellFormed = 0;
6087: break;
6088: }
6089: }
6090:
6091: /*
6092: * [ WFC: No Recursion ]
6093: * TODO A parsed entity must not contain a recursive reference
6094: * to itself, either directly or indirectly.
6095: */
6096:
6097: } else {
6098: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6099: ctxt->sax->error(ctxt->userData,
6100: "xmlParseEntityRef: expecting ';'\n");
6101: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6102: ctxt->wellFormed = 0;
6103: }
6104: xmlFree(name);
6105: }
6106: }
6107: return(ent);
6108: }
1.24 daniel 6109:
1.50 daniel 6110: /**
6111: * xmlParsePEReference:
6112: * @ctxt: an XML parser context
6113: *
6114: * parse PEReference declarations
1.77 daniel 6115: * The entity content is handled directly by pushing it's content as
6116: * a new input stream.
1.22 daniel 6117: *
6118: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6119: *
1.98 daniel 6120: * [ WFC: No Recursion ]
6121: * TODO A parsed entity must not contain a recursive
6122: * reference to itself, either directly or indirectly.
6123: *
6124: * [ WFC: Entity Declared ]
6125: * In a document without any DTD, a document with only an internal DTD
6126: * subset which contains no parameter entity references, or a document
6127: * with "standalone='yes'", ... ... The declaration of a parameter
6128: * entity must precede any reference to it...
6129: *
6130: * [ VC: Entity Declared ]
6131: * In a document with an external subset or external parameter entities
6132: * with "standalone='no'", ... ... The declaration of a parameter entity
6133: * must precede any reference to it...
6134: *
6135: * [ WFC: In DTD ]
6136: * Parameter-entity references may only appear in the DTD.
6137: * NOTE: misleading but this is handled.
1.22 daniel 6138: */
1.77 daniel 6139: void
1.55 daniel 6140: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6141: xmlChar *name;
1.72 daniel 6142: xmlEntityPtr entity = NULL;
1.50 daniel 6143: xmlParserInputPtr input;
1.22 daniel 6144:
1.152 daniel 6145: if (RAW == '%') {
1.40 daniel 6146: NEXT;
1.22 daniel 6147: name = xmlParseName(ctxt);
6148: if (name == NULL) {
1.55 daniel 6149: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6150: ctxt->sax->error(ctxt->userData,
6151: "xmlParsePEReference: no name\n");
1.123 daniel 6152: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6153: ctxt->wellFormed = 0;
1.22 daniel 6154: } else {
1.152 daniel 6155: if (RAW == ';') {
1.40 daniel 6156: NEXT;
1.98 daniel 6157: if ((ctxt->sax != NULL) &&
6158: (ctxt->sax->getParameterEntity != NULL))
6159: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6160: name);
1.45 daniel 6161: if (entity == NULL) {
1.98 daniel 6162: /*
6163: * [ WFC: Entity Declared ]
6164: * In a document without any DTD, a document with only an
6165: * internal DTD subset which contains no parameter entity
6166: * references, or a document with "standalone='yes'", ...
6167: * ... The declaration of a parameter entity must precede
6168: * any reference to it...
6169: */
6170: if ((ctxt->standalone == 1) ||
6171: ((ctxt->hasExternalSubset == 0) &&
6172: (ctxt->hasPErefs == 0))) {
6173: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6174: ctxt->sax->error(ctxt->userData,
6175: "PEReference: %%%s; not found\n", name);
1.123 daniel 6176: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 6177: ctxt->wellFormed = 0;
6178: } else {
6179: /*
6180: * [ VC: Entity Declared ]
6181: * In a document with an external subset or external
6182: * parameter entities with "standalone='no'", ...
6183: * ... The declaration of a parameter entity must precede
6184: * any reference to it...
6185: */
6186: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6187: ctxt->sax->warning(ctxt->userData,
6188: "PEReference: %%%s; not found\n", name);
6189: ctxt->valid = 0;
6190: }
1.50 daniel 6191: } else {
1.98 daniel 6192: /*
6193: * Internal checking in case the entity quest barfed
6194: */
1.159 daniel 6195: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6196: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 6197: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6198: ctxt->sax->warning(ctxt->userData,
6199: "Internal: %%%s; is not a parameter entity\n", name);
6200: } else {
1.164 daniel 6201: /*
6202: * TODO !!!
6203: * handle the extra spaces added before and after
6204: * c.f. http://www.w3.org/TR/REC-xml#as-PE
6205: */
1.98 daniel 6206: input = xmlNewEntityInputStream(ctxt, entity);
6207: xmlPushInput(ctxt, input);
1.164 daniel 6208: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6209: (RAW == '<') && (NXT(1) == '?') &&
6210: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6211: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6212: xmlParseXMLDecl(ctxt);
6213: }
6214: if (ctxt->token == 0)
6215: ctxt->token = ' ';
1.98 daniel 6216: }
1.45 daniel 6217: }
1.98 daniel 6218: ctxt->hasPErefs = 1;
1.22 daniel 6219: } else {
1.55 daniel 6220: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6221: ctxt->sax->error(ctxt->userData,
1.59 daniel 6222: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 6223: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6224: ctxt->wellFormed = 0;
1.22 daniel 6225: }
1.119 daniel 6226: xmlFree(name);
1.3 veillard 6227: }
6228: }
6229: }
6230:
1.50 daniel 6231: /**
1.135 daniel 6232: * xmlParseStringPEReference:
6233: * @ctxt: an XML parser context
6234: * @str: a pointer to an index in the string
6235: *
6236: * parse PEReference declarations
6237: *
6238: * [69] PEReference ::= '%' Name ';'
6239: *
6240: * [ WFC: No Recursion ]
6241: * TODO A parsed entity must not contain a recursive
6242: * reference to itself, either directly or indirectly.
6243: *
6244: * [ WFC: Entity Declared ]
6245: * In a document without any DTD, a document with only an internal DTD
6246: * subset which contains no parameter entity references, or a document
6247: * with "standalone='yes'", ... ... The declaration of a parameter
6248: * entity must precede any reference to it...
6249: *
6250: * [ VC: Entity Declared ]
6251: * In a document with an external subset or external parameter entities
6252: * with "standalone='no'", ... ... The declaration of a parameter entity
6253: * must precede any reference to it...
6254: *
6255: * [ WFC: In DTD ]
6256: * Parameter-entity references may only appear in the DTD.
6257: * NOTE: misleading but this is handled.
6258: *
6259: * Returns the string of the entity content.
6260: * str is updated to the current value of the index
6261: */
6262: xmlEntityPtr
6263: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6264: const xmlChar *ptr;
6265: xmlChar cur;
6266: xmlChar *name;
6267: xmlEntityPtr entity = NULL;
6268:
6269: if ((str == NULL) || (*str == NULL)) return(NULL);
6270: ptr = *str;
6271: cur = *ptr;
6272: if (cur == '%') {
6273: ptr++;
6274: cur = *ptr;
6275: name = xmlParseStringName(ctxt, &ptr);
6276: if (name == NULL) {
6277: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6278: ctxt->sax->error(ctxt->userData,
6279: "xmlParseStringPEReference: no name\n");
6280: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6281: ctxt->wellFormed = 0;
6282: } else {
6283: cur = *ptr;
6284: if (cur == ';') {
6285: ptr++;
6286: cur = *ptr;
6287: if ((ctxt->sax != NULL) &&
6288: (ctxt->sax->getParameterEntity != NULL))
6289: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6290: name);
6291: if (entity == NULL) {
6292: /*
6293: * [ WFC: Entity Declared ]
6294: * In a document without any DTD, a document with only an
6295: * internal DTD subset which contains no parameter entity
6296: * references, or a document with "standalone='yes'", ...
6297: * ... The declaration of a parameter entity must precede
6298: * any reference to it...
6299: */
6300: if ((ctxt->standalone == 1) ||
6301: ((ctxt->hasExternalSubset == 0) &&
6302: (ctxt->hasPErefs == 0))) {
6303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6304: ctxt->sax->error(ctxt->userData,
6305: "PEReference: %%%s; not found\n", name);
6306: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6307: ctxt->wellFormed = 0;
6308: } else {
6309: /*
6310: * [ VC: Entity Declared ]
6311: * In a document with an external subset or external
6312: * parameter entities with "standalone='no'", ...
6313: * ... The declaration of a parameter entity must
6314: * precede any reference to it...
6315: */
6316: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6317: ctxt->sax->warning(ctxt->userData,
6318: "PEReference: %%%s; not found\n", name);
6319: ctxt->valid = 0;
6320: }
6321: } else {
6322: /*
6323: * Internal checking in case the entity quest barfed
6324: */
1.159 daniel 6325: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6326: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 6327: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6328: ctxt->sax->warning(ctxt->userData,
6329: "Internal: %%%s; is not a parameter entity\n", name);
6330: }
6331: }
6332: ctxt->hasPErefs = 1;
6333: } else {
6334: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6335: ctxt->sax->error(ctxt->userData,
6336: "xmlParseStringPEReference: expecting ';'\n");
6337: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6338: ctxt->wellFormed = 0;
6339: }
6340: xmlFree(name);
6341: }
6342: }
6343: *str = ptr;
6344: return(entity);
6345: }
6346:
6347: /**
1.50 daniel 6348: * xmlParseDocTypeDecl :
6349: * @ctxt: an XML parser context
6350: *
6351: * parse a DOCTYPE declaration
1.21 daniel 6352: *
1.22 daniel 6353: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6354: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 6355: *
6356: * [ VC: Root Element Type ]
1.99 daniel 6357: * The Name in the document type declaration must match the element
1.98 daniel 6358: * type of the root element.
1.21 daniel 6359: */
6360:
1.55 daniel 6361: void
6362: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 6363: xmlChar *name = NULL;
1.123 daniel 6364: xmlChar *ExternalID = NULL;
6365: xmlChar *URI = NULL;
1.21 daniel 6366:
6367: /*
6368: * We know that '<!DOCTYPE' has been detected.
6369: */
1.40 daniel 6370: SKIP(9);
1.21 daniel 6371:
1.42 daniel 6372: SKIP_BLANKS;
1.21 daniel 6373:
6374: /*
6375: * Parse the DOCTYPE name.
6376: */
6377: name = xmlParseName(ctxt);
6378: if (name == NULL) {
1.55 daniel 6379: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6380: ctxt->sax->error(ctxt->userData,
6381: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 6382: ctxt->wellFormed = 0;
1.123 daniel 6383: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 6384: }
1.165 daniel 6385: ctxt->intSubName = name;
1.21 daniel 6386:
1.42 daniel 6387: SKIP_BLANKS;
1.21 daniel 6388:
6389: /*
1.22 daniel 6390: * Check for SystemID and ExternalID
6391: */
1.67 daniel 6392: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 6393:
6394: if ((URI != NULL) || (ExternalID != NULL)) {
6395: ctxt->hasExternalSubset = 1;
6396: }
1.165 daniel 6397: ctxt->extSubURI = URI;
6398: ctxt->extSubSystem = ExternalID;
1.98 daniel 6399:
1.42 daniel 6400: SKIP_BLANKS;
1.36 daniel 6401:
1.76 daniel 6402: /*
1.165 daniel 6403: * Create and update the internal subset.
1.76 daniel 6404: */
1.72 daniel 6405: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 6406: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 6407:
6408: /*
1.140 daniel 6409: * Is there any internal subset declarations ?
6410: * they are handled separately in xmlParseInternalSubset()
6411: */
1.152 daniel 6412: if (RAW == '[')
1.140 daniel 6413: return;
6414:
6415: /*
6416: * We should be at the end of the DOCTYPE declaration.
6417: */
1.152 daniel 6418: if (RAW != '>') {
1.140 daniel 6419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6420: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6421: ctxt->wellFormed = 0;
6422: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6423: }
6424: NEXT;
6425: }
6426:
6427: /**
6428: * xmlParseInternalsubset :
6429: * @ctxt: an XML parser context
6430: *
6431: * parse the internal subset declaration
6432: *
6433: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6434: */
6435:
6436: void
6437: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6438: /*
1.22 daniel 6439: * Is there any DTD definition ?
6440: */
1.152 daniel 6441: if (RAW == '[') {
1.96 daniel 6442: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 6443: NEXT;
1.22 daniel 6444: /*
6445: * Parse the succession of Markup declarations and
6446: * PEReferences.
6447: * Subsequence (markupdecl | PEReference | S)*
6448: */
1.152 daniel 6449: while (RAW != ']') {
1.123 daniel 6450: const xmlChar *check = CUR_PTR;
1.115 daniel 6451: int cons = ctxt->input->consumed;
1.22 daniel 6452:
1.42 daniel 6453: SKIP_BLANKS;
1.22 daniel 6454: xmlParseMarkupDecl(ctxt);
1.50 daniel 6455: xmlParsePEReference(ctxt);
1.22 daniel 6456:
1.115 daniel 6457: /*
6458: * Pop-up of finished entities.
6459: */
1.152 daniel 6460: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 6461: xmlPopInput(ctxt);
6462:
1.118 daniel 6463: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 6464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6465: ctxt->sax->error(ctxt->userData,
1.140 daniel 6466: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 6467: ctxt->wellFormed = 0;
1.123 daniel 6468: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 6469: break;
6470: }
6471: }
1.152 daniel 6472: if (RAW == ']') NEXT;
1.22 daniel 6473: }
6474:
6475: /*
6476: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 6477: */
1.152 daniel 6478: if (RAW != '>') {
1.55 daniel 6479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6480: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 6481: ctxt->wellFormed = 0;
1.123 daniel 6482: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 6483: }
1.40 daniel 6484: NEXT;
1.21 daniel 6485: }
6486:
1.50 daniel 6487: /**
6488: * xmlParseAttribute:
6489: * @ctxt: an XML parser context
1.123 daniel 6490: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 6491: *
6492: * parse an attribute
1.3 veillard 6493: *
1.22 daniel 6494: * [41] Attribute ::= Name Eq AttValue
6495: *
1.98 daniel 6496: * [ WFC: No External Entity References ]
6497: * Attribute values cannot contain direct or indirect entity references
6498: * to external entities.
6499: *
6500: * [ WFC: No < in Attribute Values ]
6501: * The replacement text of any entity referred to directly or indirectly in
6502: * an attribute value (other than "<") must not contain a <.
6503: *
6504: * [ VC: Attribute Value Type ]
1.117 daniel 6505: * The attribute must have been declared; the value must be of the type
1.99 daniel 6506: * declared for it.
1.98 daniel 6507: *
1.22 daniel 6508: * [25] Eq ::= S? '=' S?
6509: *
1.29 daniel 6510: * With namespace:
6511: *
6512: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 6513: *
6514: * Also the case QName == xmlns:??? is handled independently as a namespace
6515: * definition.
1.69 daniel 6516: *
1.72 daniel 6517: * Returns the attribute name, and the value in *value.
1.3 veillard 6518: */
6519:
1.123 daniel 6520: xmlChar *
6521: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6522: xmlChar *name, *val;
1.3 veillard 6523:
1.72 daniel 6524: *value = NULL;
6525: name = xmlParseName(ctxt);
1.22 daniel 6526: if (name == NULL) {
1.55 daniel 6527: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6528: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 6529: ctxt->wellFormed = 0;
1.123 daniel 6530: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 6531: return(NULL);
1.3 veillard 6532: }
6533:
6534: /*
1.29 daniel 6535: * read the value
1.3 veillard 6536: */
1.42 daniel 6537: SKIP_BLANKS;
1.152 daniel 6538: if (RAW == '=') {
1.40 daniel 6539: NEXT;
1.42 daniel 6540: SKIP_BLANKS;
1.72 daniel 6541: val = xmlParseAttValue(ctxt);
1.96 daniel 6542: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 6543: } else {
1.55 daniel 6544: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6545: ctxt->sax->error(ctxt->userData,
1.59 daniel 6546: "Specification mandate value for attribute %s\n", name);
1.123 daniel 6547: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 6548: ctxt->wellFormed = 0;
1.52 daniel 6549: return(NULL);
1.43 daniel 6550: }
6551:
1.72 daniel 6552: *value = val;
6553: return(name);
1.3 veillard 6554: }
6555:
1.50 daniel 6556: /**
6557: * xmlParseStartTag:
6558: * @ctxt: an XML parser context
6559: *
6560: * parse a start of tag either for rule element or
6561: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 6562: *
6563: * [40] STag ::= '<' Name (S Attribute)* S? '>'
6564: *
1.98 daniel 6565: * [ WFC: Unique Att Spec ]
6566: * No attribute name may appear more than once in the same start-tag or
6567: * empty-element tag.
6568: *
1.29 daniel 6569: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6570: *
1.98 daniel 6571: * [ WFC: Unique Att Spec ]
6572: * No attribute name may appear more than once in the same start-tag or
6573: * empty-element tag.
6574: *
1.29 daniel 6575: * With namespace:
6576: *
6577: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6578: *
6579: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 6580: *
1.129 daniel 6581: * Returne the element name parsed
1.2 veillard 6582: */
6583:
1.123 daniel 6584: xmlChar *
1.69 daniel 6585: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6586: xmlChar *name;
6587: xmlChar *attname;
6588: xmlChar *attvalue;
6589: const xmlChar **atts = NULL;
1.72 daniel 6590: int nbatts = 0;
6591: int maxatts = 0;
6592: int i;
1.2 veillard 6593:
1.152 daniel 6594: if (RAW != '<') return(NULL);
1.40 daniel 6595: NEXT;
1.3 veillard 6596:
1.72 daniel 6597: name = xmlParseName(ctxt);
1.59 daniel 6598: if (name == NULL) {
6599: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6600: ctxt->sax->error(ctxt->userData,
1.59 daniel 6601: "xmlParseStartTag: invalid element name\n");
1.123 daniel 6602: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6603: ctxt->wellFormed = 0;
1.83 daniel 6604: return(NULL);
1.50 daniel 6605: }
6606:
6607: /*
1.3 veillard 6608: * Now parse the attributes, it ends up with the ending
6609: *
6610: * (S Attribute)* S?
6611: */
1.42 daniel 6612: SKIP_BLANKS;
1.91 daniel 6613: GROW;
1.168 ! daniel 6614:
1.153 daniel 6615: while ((IS_CHAR(RAW)) &&
1.152 daniel 6616: (RAW != '>') &&
6617: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 6618: const xmlChar *q = CUR_PTR;
1.91 daniel 6619: int cons = ctxt->input->consumed;
1.29 daniel 6620:
1.72 daniel 6621: attname = xmlParseAttribute(ctxt, &attvalue);
6622: if ((attname != NULL) && (attvalue != NULL)) {
6623: /*
1.98 daniel 6624: * [ WFC: Unique Att Spec ]
6625: * No attribute name may appear more than once in the same
6626: * start-tag or empty-element tag.
1.72 daniel 6627: */
6628: for (i = 0; i < nbatts;i += 2) {
6629: if (!xmlStrcmp(atts[i], attname)) {
6630: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6631: ctxt->sax->error(ctxt->userData,
6632: "Attribute %s redefined\n",
6633: attname);
1.72 daniel 6634: ctxt->wellFormed = 0;
1.123 daniel 6635: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 6636: xmlFree(attname);
6637: xmlFree(attvalue);
1.98 daniel 6638: goto failed;
1.72 daniel 6639: }
6640: }
6641:
6642: /*
6643: * Add the pair to atts
6644: */
6645: if (atts == NULL) {
6646: maxatts = 10;
1.123 daniel 6647: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 6648: if (atts == NULL) {
1.86 daniel 6649: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 6650: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6651: return(NULL);
1.72 daniel 6652: }
1.127 daniel 6653: } else if (nbatts + 4 > maxatts) {
1.72 daniel 6654: maxatts *= 2;
1.123 daniel 6655: atts = (const xmlChar **) xmlRealloc(atts,
6656: maxatts * sizeof(xmlChar *));
1.72 daniel 6657: if (atts == NULL) {
1.86 daniel 6658: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 6659: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6660: return(NULL);
1.72 daniel 6661: }
6662: }
6663: atts[nbatts++] = attname;
6664: atts[nbatts++] = attvalue;
6665: atts[nbatts] = NULL;
6666: atts[nbatts + 1] = NULL;
6667: }
6668:
1.116 daniel 6669: failed:
1.168 ! daniel 6670:
! 6671: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
! 6672: break;
! 6673: if (!IS_BLANK(RAW)) {
! 6674: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 6675: ctxt->sax->error(ctxt->userData,
! 6676: "attributes construct error\n");
! 6677: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
! 6678: ctxt->wellFormed = 0;
! 6679: }
1.42 daniel 6680: SKIP_BLANKS;
1.91 daniel 6681: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 6682: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6683: ctxt->sax->error(ctxt->userData,
1.31 daniel 6684: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 6685: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 6686: ctxt->wellFormed = 0;
1.29 daniel 6687: break;
1.3 veillard 6688: }
1.91 daniel 6689: GROW;
1.3 veillard 6690: }
6691:
1.43 daniel 6692: /*
1.72 daniel 6693: * SAX: Start of Element !
1.43 daniel 6694: */
1.72 daniel 6695: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 6696: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 6697:
1.72 daniel 6698: if (atts != NULL) {
1.123 daniel 6699: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 6700: xmlFree(atts);
1.72 daniel 6701: }
1.83 daniel 6702: return(name);
1.3 veillard 6703: }
6704:
1.50 daniel 6705: /**
6706: * xmlParseEndTag:
6707: * @ctxt: an XML parser context
6708: *
6709: * parse an end of tag
1.27 daniel 6710: *
6711: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 6712: *
6713: * With namespace
6714: *
1.72 daniel 6715: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 6716: */
6717:
1.55 daniel 6718: void
1.140 daniel 6719: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6720: xmlChar *name;
1.140 daniel 6721: xmlChar *oldname;
1.7 veillard 6722:
1.91 daniel 6723: GROW;
1.152 daniel 6724: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 6725: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6726: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 6727: ctxt->wellFormed = 0;
1.123 daniel 6728: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 6729: return;
6730: }
1.40 daniel 6731: SKIP(2);
1.7 veillard 6732:
1.72 daniel 6733: name = xmlParseName(ctxt);
1.7 veillard 6734:
6735: /*
6736: * We should definitely be at the ending "S? '>'" part
6737: */
1.91 daniel 6738: GROW;
1.42 daniel 6739: SKIP_BLANKS;
1.153 daniel 6740: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 6741: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6742: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 6743: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 6744: ctxt->wellFormed = 0;
1.7 veillard 6745: } else
1.40 daniel 6746: NEXT;
1.7 veillard 6747:
1.72 daniel 6748: /*
1.98 daniel 6749: * [ WFC: Element Type Match ]
6750: * The Name in an element's end-tag must match the element type in the
6751: * start-tag.
6752: *
1.83 daniel 6753: */
1.147 daniel 6754: if ((name == NULL) || (ctxt->name == NULL) ||
6755: (xmlStrcmp(name, ctxt->name))) {
6756: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6757: if ((name != NULL) && (ctxt->name != NULL)) {
6758: ctxt->sax->error(ctxt->userData,
6759: "Opening and ending tag mismatch: %s and %s\n",
6760: ctxt->name, name);
6761: } else if (ctxt->name != NULL) {
6762: ctxt->sax->error(ctxt->userData,
6763: "Ending tag eror for: %s\n", ctxt->name);
6764: } else {
6765: ctxt->sax->error(ctxt->userData,
6766: "Ending tag error: internal error ???\n");
6767: }
1.122 daniel 6768:
1.147 daniel 6769: }
1.123 daniel 6770: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 6771: ctxt->wellFormed = 0;
6772: }
6773:
6774: /*
1.72 daniel 6775: * SAX: End of Tag
6776: */
6777: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 6778: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6779:
6780: if (name != NULL)
1.119 daniel 6781: xmlFree(name);
1.140 daniel 6782: oldname = namePop(ctxt);
6783: if (oldname != NULL) {
6784: #ifdef DEBUG_STACK
6785: fprintf(stderr,"Close: popped %s\n", oldname);
6786: #endif
6787: xmlFree(oldname);
6788: }
1.7 veillard 6789: return;
6790: }
6791:
1.50 daniel 6792: /**
6793: * xmlParseCDSect:
6794: * @ctxt: an XML parser context
6795: *
6796: * Parse escaped pure raw content.
1.29 daniel 6797: *
6798: * [18] CDSect ::= CDStart CData CDEnd
6799: *
6800: * [19] CDStart ::= '<![CDATA['
6801: *
6802: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6803: *
6804: * [21] CDEnd ::= ']]>'
1.3 veillard 6805: */
1.55 daniel 6806: void
6807: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 6808: xmlChar *buf = NULL;
6809: int len = 0;
1.140 daniel 6810: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 6811: int r, rl;
6812: int s, sl;
6813: int cur, l;
1.3 veillard 6814:
1.106 daniel 6815: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 6816: (NXT(2) == '[') && (NXT(3) == 'C') &&
6817: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6818: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6819: (NXT(8) == '[')) {
6820: SKIP(9);
1.29 daniel 6821: } else
1.45 daniel 6822: return;
1.109 daniel 6823:
6824: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 6825: r = CUR_CHAR(rl);
6826: if (!IS_CHAR(r)) {
1.55 daniel 6827: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6828: ctxt->sax->error(ctxt->userData,
1.135 daniel 6829: "CData section not finished\n");
1.59 daniel 6830: ctxt->wellFormed = 0;
1.123 daniel 6831: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 6832: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6833: return;
1.3 veillard 6834: }
1.152 daniel 6835: NEXTL(rl);
6836: s = CUR_CHAR(sl);
6837: if (!IS_CHAR(s)) {
1.55 daniel 6838: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6839: ctxt->sax->error(ctxt->userData,
1.135 daniel 6840: "CData section not finished\n");
1.123 daniel 6841: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 6842: ctxt->wellFormed = 0;
1.109 daniel 6843: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6844: return;
1.3 veillard 6845: }
1.152 daniel 6846: NEXTL(sl);
6847: cur = CUR_CHAR(l);
1.135 daniel 6848: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6849: if (buf == NULL) {
6850: fprintf(stderr, "malloc of %d byte failed\n", size);
6851: return;
6852: }
1.108 veillard 6853: while (IS_CHAR(cur) &&
1.110 daniel 6854: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 6855: if (len + 5 >= size) {
1.135 daniel 6856: size *= 2;
6857: buf = xmlRealloc(buf, size * sizeof(xmlChar));
6858: if (buf == NULL) {
6859: fprintf(stderr, "realloc of %d byte failed\n", size);
6860: return;
6861: }
6862: }
1.152 daniel 6863: COPY_BUF(rl,buf,len,r);
1.110 daniel 6864: r = s;
1.152 daniel 6865: rl = sl;
1.110 daniel 6866: s = cur;
1.152 daniel 6867: sl = l;
6868: NEXTL(l);
6869: cur = CUR_CHAR(l);
1.3 veillard 6870: }
1.135 daniel 6871: buf[len] = 0;
1.109 daniel 6872: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 6873: if (cur != '>') {
1.55 daniel 6874: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6875: ctxt->sax->error(ctxt->userData,
1.135 daniel 6876: "CData section not finished\n%.50s\n", buf);
1.123 daniel 6877: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 6878: ctxt->wellFormed = 0;
1.135 daniel 6879: xmlFree(buf);
1.45 daniel 6880: return;
1.3 veillard 6881: }
1.152 daniel 6882: NEXTL(l);
1.16 daniel 6883:
1.45 daniel 6884: /*
1.135 daniel 6885: * Ok the buffer is to be consumed as cdata.
1.45 daniel 6886: */
6887: if (ctxt->sax != NULL) {
1.107 daniel 6888: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 6889: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 6890: }
1.135 daniel 6891: xmlFree(buf);
1.2 veillard 6892: }
6893:
1.50 daniel 6894: /**
6895: * xmlParseContent:
6896: * @ctxt: an XML parser context
6897: *
6898: * Parse a content:
1.2 veillard 6899: *
1.27 daniel 6900: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 6901: */
6902:
1.55 daniel 6903: void
6904: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 6905: GROW;
1.152 daniel 6906: while ((RAW != '<') || (NXT(1) != '/')) {
1.123 daniel 6907: const xmlChar *test = CUR_PTR;
1.91 daniel 6908: int cons = ctxt->input->consumed;
1.123 daniel 6909: xmlChar tok = ctxt->token;
1.27 daniel 6910:
6911: /*
1.152 daniel 6912: * Handle possible processed charrefs.
6913: */
6914: if (ctxt->token != 0) {
6915: xmlParseCharData(ctxt, 0);
6916: }
6917: /*
1.27 daniel 6918: * First case : a Processing Instruction.
6919: */
1.152 daniel 6920: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 6921: xmlParsePI(ctxt);
6922: }
1.72 daniel 6923:
1.27 daniel 6924: /*
6925: * Second case : a CDSection
6926: */
1.152 daniel 6927: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6928: (NXT(2) == '[') && (NXT(3) == 'C') &&
6929: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6930: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6931: (NXT(8) == '[')) {
1.45 daniel 6932: xmlParseCDSect(ctxt);
1.27 daniel 6933: }
1.72 daniel 6934:
1.27 daniel 6935: /*
6936: * Third case : a comment
6937: */
1.152 daniel 6938: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6939: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 6940: xmlParseComment(ctxt);
1.97 daniel 6941: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 6942: }
1.72 daniel 6943:
1.27 daniel 6944: /*
6945: * Fourth case : a sub-element.
6946: */
1.152 daniel 6947: else if (RAW == '<') {
1.72 daniel 6948: xmlParseElement(ctxt);
1.45 daniel 6949: }
1.72 daniel 6950:
1.45 daniel 6951: /*
1.50 daniel 6952: * Fifth case : a reference. If if has not been resolved,
6953: * parsing returns it's Name, create the node
1.45 daniel 6954: */
1.97 daniel 6955:
1.152 daniel 6956: else if (RAW == '&') {
1.77 daniel 6957: xmlParseReference(ctxt);
1.27 daniel 6958: }
1.72 daniel 6959:
1.27 daniel 6960: /*
6961: * Last case, text. Note that References are handled directly.
6962: */
6963: else {
1.45 daniel 6964: xmlParseCharData(ctxt, 0);
1.3 veillard 6965: }
1.14 veillard 6966:
1.91 daniel 6967: GROW;
1.14 veillard 6968: /*
1.45 daniel 6969: * Pop-up of finished entities.
1.14 veillard 6970: */
1.152 daniel 6971: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 6972: xmlPopInput(ctxt);
1.135 daniel 6973: SHRINK;
1.45 daniel 6974:
1.113 daniel 6975: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6976: (tok == ctxt->token)) {
1.55 daniel 6977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6978: ctxt->sax->error(ctxt->userData,
1.59 daniel 6979: "detected an error in element content\n");
1.123 daniel 6980: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 6981: ctxt->wellFormed = 0;
1.29 daniel 6982: break;
6983: }
1.3 veillard 6984: }
1.2 veillard 6985: }
6986:
1.50 daniel 6987: /**
6988: * xmlParseElement:
6989: * @ctxt: an XML parser context
6990: *
6991: * parse an XML element, this is highly recursive
1.26 daniel 6992: *
6993: * [39] element ::= EmptyElemTag | STag content ETag
6994: *
1.98 daniel 6995: * [ WFC: Element Type Match ]
6996: * The Name in an element's end-tag must match the element type in the
6997: * start-tag.
6998: *
6999: * [ VC: Element Valid ]
1.117 daniel 7000: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7001: * where the Name matches the element type and one of the following holds:
7002: * - The declaration matches EMPTY and the element has no content.
7003: * - The declaration matches children and the sequence of child elements
7004: * belongs to the language generated by the regular expression in the
7005: * content model, with optional white space (characters matching the
7006: * nonterminal S) between each pair of child elements.
7007: * - The declaration matches Mixed and the content consists of character
7008: * data and child elements whose types match names in the content model.
7009: * - The declaration matches ANY, and the types of any child elements have
7010: * been declared.
1.2 veillard 7011: */
1.26 daniel 7012:
1.72 daniel 7013: void
1.69 daniel 7014: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7015: const xmlChar *openTag = CUR_PTR;
7016: xmlChar *name;
1.140 daniel 7017: xmlChar *oldname;
1.32 daniel 7018: xmlParserNodeInfo node_info;
1.118 daniel 7019: xmlNodePtr ret;
1.2 veillard 7020:
1.32 daniel 7021: /* Capture start position */
1.118 daniel 7022: if (ctxt->record_info) {
7023: node_info.begin_pos = ctxt->input->consumed +
7024: (CUR_PTR - ctxt->input->base);
7025: node_info.begin_line = ctxt->input->line;
7026: }
1.32 daniel 7027:
1.83 daniel 7028: name = xmlParseStartTag(ctxt);
7029: if (name == NULL) {
7030: return;
7031: }
1.140 daniel 7032: namePush(ctxt, name);
1.118 daniel 7033: ret = ctxt->node;
1.2 veillard 7034:
7035: /*
1.99 daniel 7036: * [ VC: Root Element Type ]
7037: * The Name in the document type declaration must match the element
7038: * type of the root element.
7039: */
1.105 daniel 7040: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7041: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7042: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7043:
7044: /*
1.2 veillard 7045: * Check for an Empty Element.
7046: */
1.152 daniel 7047: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7048: SKIP(2);
1.72 daniel 7049: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 7050: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7051: oldname = namePop(ctxt);
7052: if (oldname != NULL) {
7053: #ifdef DEBUG_STACK
7054: fprintf(stderr,"Close: popped %s\n", oldname);
7055: #endif
7056: xmlFree(oldname);
7057: }
1.72 daniel 7058: return;
1.2 veillard 7059: }
1.152 daniel 7060: if (RAW == '>') {
1.91 daniel 7061: NEXT;
7062: } else {
1.55 daniel 7063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7064: ctxt->sax->error(ctxt->userData,
7065: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7066: openTag);
1.59 daniel 7067: ctxt->wellFormed = 0;
1.123 daniel 7068: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 7069:
7070: /*
7071: * end of parsing of this node.
7072: */
7073: nodePop(ctxt);
1.140 daniel 7074: oldname = namePop(ctxt);
7075: if (oldname != NULL) {
7076: #ifdef DEBUG_STACK
7077: fprintf(stderr,"Close: popped %s\n", oldname);
7078: #endif
7079: xmlFree(oldname);
7080: }
1.118 daniel 7081:
7082: /*
7083: * Capture end position and add node
7084: */
7085: if ( ret != NULL && ctxt->record_info ) {
7086: node_info.end_pos = ctxt->input->consumed +
7087: (CUR_PTR - ctxt->input->base);
7088: node_info.end_line = ctxt->input->line;
7089: node_info.node = ret;
7090: xmlParserAddNodeInfo(ctxt, &node_info);
7091: }
1.72 daniel 7092: return;
1.2 veillard 7093: }
7094:
7095: /*
7096: * Parse the content of the element:
7097: */
1.45 daniel 7098: xmlParseContent(ctxt);
1.153 daniel 7099: if (!IS_CHAR(RAW)) {
1.55 daniel 7100: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7101: ctxt->sax->error(ctxt->userData,
1.57 daniel 7102: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 7103: ctxt->wellFormed = 0;
1.123 daniel 7104: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 7105:
7106: /*
7107: * end of parsing of this node.
7108: */
7109: nodePop(ctxt);
1.140 daniel 7110: oldname = namePop(ctxt);
7111: if (oldname != NULL) {
7112: #ifdef DEBUG_STACK
7113: fprintf(stderr,"Close: popped %s\n", oldname);
7114: #endif
7115: xmlFree(oldname);
7116: }
1.72 daniel 7117: return;
1.2 veillard 7118: }
7119:
7120: /*
1.27 daniel 7121: * parse the end of tag: '</' should be here.
1.2 veillard 7122: */
1.140 daniel 7123: xmlParseEndTag(ctxt);
1.118 daniel 7124:
7125: /*
7126: * Capture end position and add node
7127: */
7128: if ( ret != NULL && ctxt->record_info ) {
7129: node_info.end_pos = ctxt->input->consumed +
7130: (CUR_PTR - ctxt->input->base);
7131: node_info.end_line = ctxt->input->line;
7132: node_info.node = ret;
7133: xmlParserAddNodeInfo(ctxt, &node_info);
7134: }
1.2 veillard 7135: }
7136:
1.50 daniel 7137: /**
7138: * xmlParseVersionNum:
7139: * @ctxt: an XML parser context
7140: *
7141: * parse the XML version value.
1.29 daniel 7142: *
7143: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 7144: *
7145: * Returns the string giving the XML version number, or NULL
1.29 daniel 7146: */
1.123 daniel 7147: xmlChar *
1.55 daniel 7148: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 7149: xmlChar *buf = NULL;
7150: int len = 0;
7151: int size = 10;
7152: xmlChar cur;
1.29 daniel 7153:
1.135 daniel 7154: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7155: if (buf == NULL) {
7156: fprintf(stderr, "malloc of %d byte failed\n", size);
7157: return(NULL);
7158: }
7159: cur = CUR;
1.152 daniel 7160: while (((cur >= 'a') && (cur <= 'z')) ||
7161: ((cur >= 'A') && (cur <= 'Z')) ||
7162: ((cur >= '0') && (cur <= '9')) ||
7163: (cur == '_') || (cur == '.') ||
7164: (cur == ':') || (cur == '-')) {
1.135 daniel 7165: if (len + 1 >= size) {
7166: size *= 2;
7167: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7168: if (buf == NULL) {
7169: fprintf(stderr, "realloc of %d byte failed\n", size);
7170: return(NULL);
7171: }
7172: }
7173: buf[len++] = cur;
7174: NEXT;
7175: cur=CUR;
7176: }
7177: buf[len] = 0;
7178: return(buf);
1.29 daniel 7179: }
7180:
1.50 daniel 7181: /**
7182: * xmlParseVersionInfo:
7183: * @ctxt: an XML parser context
7184: *
7185: * parse the XML version.
1.29 daniel 7186: *
7187: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7188: *
7189: * [25] Eq ::= S? '=' S?
1.50 daniel 7190: *
1.68 daniel 7191: * Returns the version string, e.g. "1.0"
1.29 daniel 7192: */
7193:
1.123 daniel 7194: xmlChar *
1.55 daniel 7195: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 7196: xmlChar *version = NULL;
7197: const xmlChar *q;
1.29 daniel 7198:
1.152 daniel 7199: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 7200: (NXT(2) == 'r') && (NXT(3) == 's') &&
7201: (NXT(4) == 'i') && (NXT(5) == 'o') &&
7202: (NXT(6) == 'n')) {
7203: SKIP(7);
1.42 daniel 7204: SKIP_BLANKS;
1.152 daniel 7205: if (RAW != '=') {
1.55 daniel 7206: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7207: ctxt->sax->error(ctxt->userData,
7208: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 7209: ctxt->wellFormed = 0;
1.123 daniel 7210: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7211: return(NULL);
7212: }
1.40 daniel 7213: NEXT;
1.42 daniel 7214: SKIP_BLANKS;
1.152 daniel 7215: if (RAW == '"') {
1.40 daniel 7216: NEXT;
7217: q = CUR_PTR;
1.29 daniel 7218: version = xmlParseVersionNum(ctxt);
1.152 daniel 7219: if (RAW != '"') {
1.55 daniel 7220: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7221: ctxt->sax->error(ctxt->userData,
7222: "String not closed\n%.50s\n", q);
1.59 daniel 7223: ctxt->wellFormed = 0;
1.123 daniel 7224: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7225: } else
1.40 daniel 7226: NEXT;
1.152 daniel 7227: } else if (RAW == '\''){
1.40 daniel 7228: NEXT;
7229: q = CUR_PTR;
1.29 daniel 7230: version = xmlParseVersionNum(ctxt);
1.152 daniel 7231: if (RAW != '\'') {
1.55 daniel 7232: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7233: ctxt->sax->error(ctxt->userData,
7234: "String not closed\n%.50s\n", q);
1.123 daniel 7235: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7236: ctxt->wellFormed = 0;
1.55 daniel 7237: } else
1.40 daniel 7238: NEXT;
1.31 daniel 7239: } else {
1.55 daniel 7240: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7241: ctxt->sax->error(ctxt->userData,
1.59 daniel 7242: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 7243: ctxt->wellFormed = 0;
1.123 daniel 7244: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7245: }
7246: }
7247: return(version);
7248: }
7249:
1.50 daniel 7250: /**
7251: * xmlParseEncName:
7252: * @ctxt: an XML parser context
7253: *
7254: * parse the XML encoding name
1.29 daniel 7255: *
7256: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 7257: *
1.68 daniel 7258: * Returns the encoding name value or NULL
1.29 daniel 7259: */
1.123 daniel 7260: xmlChar *
1.55 daniel 7261: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 7262: xmlChar *buf = NULL;
7263: int len = 0;
7264: int size = 10;
7265: xmlChar cur;
1.29 daniel 7266:
1.135 daniel 7267: cur = CUR;
7268: if (((cur >= 'a') && (cur <= 'z')) ||
7269: ((cur >= 'A') && (cur <= 'Z'))) {
7270: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7271: if (buf == NULL) {
7272: fprintf(stderr, "malloc of %d byte failed\n", size);
7273: return(NULL);
7274: }
7275:
7276: buf[len++] = cur;
1.40 daniel 7277: NEXT;
1.135 daniel 7278: cur = CUR;
1.152 daniel 7279: while (((cur >= 'a') && (cur <= 'z')) ||
7280: ((cur >= 'A') && (cur <= 'Z')) ||
7281: ((cur >= '0') && (cur <= '9')) ||
7282: (cur == '.') || (cur == '_') ||
7283: (cur == '-')) {
1.135 daniel 7284: if (len + 1 >= size) {
7285: size *= 2;
7286: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7287: if (buf == NULL) {
7288: fprintf(stderr, "realloc of %d byte failed\n", size);
7289: return(NULL);
7290: }
7291: }
7292: buf[len++] = cur;
7293: NEXT;
7294: cur = CUR;
7295: if (cur == 0) {
7296: SHRINK;
7297: GROW;
7298: cur = CUR;
7299: }
7300: }
7301: buf[len] = 0;
1.29 daniel 7302: } else {
1.55 daniel 7303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7304: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 7305: ctxt->wellFormed = 0;
1.123 daniel 7306: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 7307: }
1.135 daniel 7308: return(buf);
1.29 daniel 7309: }
7310:
1.50 daniel 7311: /**
7312: * xmlParseEncodingDecl:
7313: * @ctxt: an XML parser context
7314: *
7315: * parse the XML encoding declaration
1.29 daniel 7316: *
7317: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 7318: *
7319: * TODO: this should setup the conversion filters.
7320: *
1.68 daniel 7321: * Returns the encoding value or NULL
1.29 daniel 7322: */
7323:
1.123 daniel 7324: xmlChar *
1.55 daniel 7325: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7326: xmlChar *encoding = NULL;
7327: const xmlChar *q;
1.29 daniel 7328:
1.42 daniel 7329: SKIP_BLANKS;
1.152 daniel 7330: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 7331: (NXT(2) == 'c') && (NXT(3) == 'o') &&
7332: (NXT(4) == 'd') && (NXT(5) == 'i') &&
7333: (NXT(6) == 'n') && (NXT(7) == 'g')) {
7334: SKIP(8);
1.42 daniel 7335: SKIP_BLANKS;
1.152 daniel 7336: if (RAW != '=') {
1.55 daniel 7337: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7338: ctxt->sax->error(ctxt->userData,
7339: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 7340: ctxt->wellFormed = 0;
1.123 daniel 7341: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7342: return(NULL);
7343: }
1.40 daniel 7344: NEXT;
1.42 daniel 7345: SKIP_BLANKS;
1.152 daniel 7346: if (RAW == '"') {
1.40 daniel 7347: NEXT;
7348: q = CUR_PTR;
1.29 daniel 7349: encoding = xmlParseEncName(ctxt);
1.152 daniel 7350: if (RAW != '"') {
1.55 daniel 7351: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7352: ctxt->sax->error(ctxt->userData,
7353: "String not closed\n%.50s\n", q);
1.59 daniel 7354: ctxt->wellFormed = 0;
1.123 daniel 7355: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7356: } else
1.40 daniel 7357: NEXT;
1.152 daniel 7358: } else if (RAW == '\''){
1.40 daniel 7359: NEXT;
7360: q = CUR_PTR;
1.29 daniel 7361: encoding = xmlParseEncName(ctxt);
1.152 daniel 7362: if (RAW != '\'') {
1.55 daniel 7363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7364: ctxt->sax->error(ctxt->userData,
7365: "String not closed\n%.50s\n", q);
1.59 daniel 7366: ctxt->wellFormed = 0;
1.123 daniel 7367: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7368: } else
1.40 daniel 7369: NEXT;
1.152 daniel 7370: } else if (RAW == '"'){
1.55 daniel 7371: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7372: ctxt->sax->error(ctxt->userData,
1.59 daniel 7373: "xmlParseEncodingDecl : expected ' or \"\n");
7374: ctxt->wellFormed = 0;
1.123 daniel 7375: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7376: }
7377: }
7378: return(encoding);
7379: }
7380:
1.50 daniel 7381: /**
7382: * xmlParseSDDecl:
7383: * @ctxt: an XML parser context
7384: *
7385: * parse the XML standalone declaration
1.29 daniel 7386: *
7387: * [32] SDDecl ::= S 'standalone' Eq
7388: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 7389: *
7390: * [ VC: Standalone Document Declaration ]
7391: * TODO The standalone document declaration must have the value "no"
7392: * if any external markup declarations contain declarations of:
7393: * - attributes with default values, if elements to which these
7394: * attributes apply appear in the document without specifications
7395: * of values for these attributes, or
7396: * - entities (other than amp, lt, gt, apos, quot), if references
7397: * to those entities appear in the document, or
7398: * - attributes with values subject to normalization, where the
7399: * attribute appears in the document with a value which will change
7400: * as a result of normalization, or
7401: * - element types with element content, if white space occurs directly
7402: * within any instance of those types.
1.68 daniel 7403: *
7404: * Returns 1 if standalone, 0 otherwise
1.29 daniel 7405: */
7406:
1.55 daniel 7407: int
7408: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 7409: int standalone = -1;
7410:
1.42 daniel 7411: SKIP_BLANKS;
1.152 daniel 7412: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 7413: (NXT(2) == 'a') && (NXT(3) == 'n') &&
7414: (NXT(4) == 'd') && (NXT(5) == 'a') &&
7415: (NXT(6) == 'l') && (NXT(7) == 'o') &&
7416: (NXT(8) == 'n') && (NXT(9) == 'e')) {
7417: SKIP(10);
1.81 daniel 7418: SKIP_BLANKS;
1.152 daniel 7419: if (RAW != '=') {
1.55 daniel 7420: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7421: ctxt->sax->error(ctxt->userData,
1.59 daniel 7422: "XML standalone declaration : expected '='\n");
1.123 daniel 7423: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 7424: ctxt->wellFormed = 0;
1.32 daniel 7425: return(standalone);
7426: }
1.40 daniel 7427: NEXT;
1.42 daniel 7428: SKIP_BLANKS;
1.152 daniel 7429: if (RAW == '\''){
1.40 daniel 7430: NEXT;
1.152 daniel 7431: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7432: standalone = 0;
1.40 daniel 7433: SKIP(2);
1.152 daniel 7434: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7435: (NXT(2) == 's')) {
1.29 daniel 7436: standalone = 1;
1.40 daniel 7437: SKIP(3);
1.29 daniel 7438: } else {
1.55 daniel 7439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7440: ctxt->sax->error(ctxt->userData,
7441: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7442: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7443: ctxt->wellFormed = 0;
1.29 daniel 7444: }
1.152 daniel 7445: if (RAW != '\'') {
1.55 daniel 7446: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7447: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 7448: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7449: ctxt->wellFormed = 0;
1.55 daniel 7450: } else
1.40 daniel 7451: NEXT;
1.152 daniel 7452: } else if (RAW == '"'){
1.40 daniel 7453: NEXT;
1.152 daniel 7454: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7455: standalone = 0;
1.40 daniel 7456: SKIP(2);
1.152 daniel 7457: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7458: (NXT(2) == 's')) {
1.29 daniel 7459: standalone = 1;
1.40 daniel 7460: SKIP(3);
1.29 daniel 7461: } else {
1.55 daniel 7462: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7463: ctxt->sax->error(ctxt->userData,
1.59 daniel 7464: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7465: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7466: ctxt->wellFormed = 0;
1.29 daniel 7467: }
1.152 daniel 7468: if (RAW != '"') {
1.55 daniel 7469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7470: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 7471: ctxt->wellFormed = 0;
1.123 daniel 7472: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7473: } else
1.40 daniel 7474: NEXT;
1.37 daniel 7475: } else {
1.55 daniel 7476: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7477: ctxt->sax->error(ctxt->userData,
7478: "Standalone value not found\n");
1.59 daniel 7479: ctxt->wellFormed = 0;
1.123 daniel 7480: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 7481: }
1.29 daniel 7482: }
7483: return(standalone);
7484: }
7485:
1.50 daniel 7486: /**
7487: * xmlParseXMLDecl:
7488: * @ctxt: an XML parser context
7489: *
7490: * parse an XML declaration header
1.29 daniel 7491: *
7492: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 7493: */
7494:
1.55 daniel 7495: void
7496: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7497: xmlChar *version;
1.1 veillard 7498:
7499: /*
1.19 daniel 7500: * We know that '<?xml' is here.
1.1 veillard 7501: */
1.40 daniel 7502: SKIP(5);
1.1 veillard 7503:
1.153 daniel 7504: if (!IS_BLANK(RAW)) {
1.59 daniel 7505: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7506: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 7507: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7508: ctxt->wellFormed = 0;
7509: }
1.42 daniel 7510: SKIP_BLANKS;
1.1 veillard 7511:
7512: /*
1.29 daniel 7513: * We should have the VersionInfo here.
1.1 veillard 7514: */
1.29 daniel 7515: version = xmlParseVersionInfo(ctxt);
7516: if (version == NULL)
1.45 daniel 7517: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 7518: ctxt->version = xmlStrdup(version);
1.119 daniel 7519: xmlFree(version);
1.29 daniel 7520:
7521: /*
7522: * We may have the encoding declaration
7523: */
1.153 daniel 7524: if (!IS_BLANK(RAW)) {
1.152 daniel 7525: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7526: SKIP(2);
7527: return;
7528: }
7529: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7530: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 7531: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7532: ctxt->wellFormed = 0;
7533: }
1.164 daniel 7534: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 7535:
7536: /*
1.29 daniel 7537: * We may have the standalone status.
1.1 veillard 7538: */
1.164 daniel 7539: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 7540: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7541: SKIP(2);
7542: return;
7543: }
7544: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7545: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 7546: ctxt->wellFormed = 0;
1.123 daniel 7547: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7548: }
7549: SKIP_BLANKS;
1.167 daniel 7550: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 7551:
1.42 daniel 7552: SKIP_BLANKS;
1.152 daniel 7553: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 7554: SKIP(2);
1.152 daniel 7555: } else if (RAW == '>') {
1.31 daniel 7556: /* Deprecated old WD ... */
1.55 daniel 7557: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7558: ctxt->sax->error(ctxt->userData,
7559: "XML declaration must end-up with '?>'\n");
1.59 daniel 7560: ctxt->wellFormed = 0;
1.123 daniel 7561: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7562: NEXT;
1.29 daniel 7563: } else {
1.55 daniel 7564: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7565: ctxt->sax->error(ctxt->userData,
7566: "parsing XML declaration: '?>' expected\n");
1.59 daniel 7567: ctxt->wellFormed = 0;
1.123 daniel 7568: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7569: MOVETO_ENDTAG(CUR_PTR);
7570: NEXT;
1.29 daniel 7571: }
1.1 veillard 7572: }
7573:
1.50 daniel 7574: /**
7575: * xmlParseMisc:
7576: * @ctxt: an XML parser context
7577: *
7578: * parse an XML Misc* optionnal field.
1.21 daniel 7579: *
1.22 daniel 7580: * [27] Misc ::= Comment | PI | S
1.1 veillard 7581: */
7582:
1.55 daniel 7583: void
7584: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 7585: while (((RAW == '<') && (NXT(1) == '?')) ||
7586: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7587: (NXT(2) == '-') && (NXT(3) == '-')) ||
7588: IS_BLANK(CUR)) {
1.152 daniel 7589: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 7590: xmlParsePI(ctxt);
1.40 daniel 7591: } else if (IS_BLANK(CUR)) {
7592: NEXT;
1.1 veillard 7593: } else
1.114 daniel 7594: xmlParseComment(ctxt);
1.1 veillard 7595: }
7596: }
7597:
1.50 daniel 7598: /**
7599: * xmlParseDocument :
7600: * @ctxt: an XML parser context
7601: *
7602: * parse an XML document (and build a tree if using the standard SAX
7603: * interface).
1.21 daniel 7604: *
1.22 daniel 7605: * [1] document ::= prolog element Misc*
1.29 daniel 7606: *
7607: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 7608: *
1.68 daniel 7609: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 7610: * as a result of the parsing.
1.1 veillard 7611: */
7612:
1.55 daniel 7613: int
7614: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 7615: xmlChar start[4];
7616: xmlCharEncoding enc;
7617:
1.45 daniel 7618: xmlDefaultSAXHandlerInit();
7619:
1.91 daniel 7620: GROW;
7621:
1.14 veillard 7622: /*
1.44 daniel 7623: * SAX: beginning of the document processing.
7624: */
1.72 daniel 7625: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 7626: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 7627:
1.156 daniel 7628: /*
7629: * Get the 4 first bytes and decode the charset
7630: * if enc != XML_CHAR_ENCODING_NONE
7631: * plug some encoding conversion routines.
7632: */
7633: start[0] = RAW;
7634: start[1] = NXT(1);
7635: start[2] = NXT(2);
7636: start[3] = NXT(3);
7637: enc = xmlDetectCharEncoding(start, 4);
7638: if (enc != XML_CHAR_ENCODING_NONE) {
7639: xmlSwitchEncoding(ctxt, enc);
7640: }
7641:
1.1 veillard 7642:
1.168 ! daniel 7643: #if 0
1.1 veillard 7644: /*
7645: * Wipe out everything which is before the first '<'
7646: */
1.153 daniel 7647: if (IS_BLANK(RAW)) {
1.59 daniel 7648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7649: ctxt->sax->error(ctxt->userData,
1.59 daniel 7650: "Extra spaces at the beginning of the document are not allowed\n");
1.123 daniel 7651: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.59 daniel 7652: ctxt->wellFormed = 0;
7653: SKIP_BLANKS;
7654: }
1.168 ! daniel 7655: #endif
1.59 daniel 7656:
7657: if (CUR == 0) {
7658: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7659: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 7660: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7661: ctxt->wellFormed = 0;
7662: }
1.1 veillard 7663:
7664: /*
7665: * Check for the XMLDecl in the Prolog.
7666: */
1.91 daniel 7667: GROW;
1.152 daniel 7668: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7669: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 7670: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.19 daniel 7671: xmlParseXMLDecl(ctxt);
1.167 daniel 7672: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 7673: SKIP_BLANKS;
1.164 daniel 7674: if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
7675: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7676:
1.151 daniel 7677: #if 0
1.152 daniel 7678: } else if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7679: (NXT(2) == 'X') && (NXT(3) == 'M') &&
1.142 daniel 7680: (NXT(4) == 'L') && (IS_BLANK(NXT(5)))) {
1.19 daniel 7681: /*
7682: * The first drafts were using <?XML and the final W3C REC
7683: * now use <?xml ...
7684: */
1.16 daniel 7685: xmlParseXMLDecl(ctxt);
1.42 daniel 7686: SKIP_BLANKS;
1.151 daniel 7687: #endif
1.1 veillard 7688: } else {
1.72 daniel 7689: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 7690: }
1.72 daniel 7691: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 7692: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 7693:
7694: /*
7695: * The Misc part of the Prolog
7696: */
1.91 daniel 7697: GROW;
1.16 daniel 7698: xmlParseMisc(ctxt);
1.1 veillard 7699:
7700: /*
1.29 daniel 7701: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7702: * (doctypedecl Misc*)?
7703: */
1.91 daniel 7704: GROW;
1.152 daniel 7705: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7706: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7707: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7708: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7709: (NXT(8) == 'E')) {
1.165 daniel 7710:
1.166 daniel 7711: ctxt->inSubset = 1;
1.22 daniel 7712: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7713: if (RAW == '[') {
1.140 daniel 7714: ctxt->instate = XML_PARSER_DTD;
7715: xmlParseInternalSubset(ctxt);
7716: }
1.165 daniel 7717:
7718: /*
7719: * Create and update the external subset.
7720: */
1.166 daniel 7721: ctxt->inSubset = 2;
1.165 daniel 7722: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL))
7723: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7724: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 7725: ctxt->inSubset = 0;
1.165 daniel 7726:
7727:
1.96 daniel 7728: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7729: xmlParseMisc(ctxt);
1.21 daniel 7730: }
7731:
7732: /*
7733: * Time to start parsing the tree itself
1.1 veillard 7734: */
1.91 daniel 7735: GROW;
1.152 daniel 7736: if (RAW != '<') {
1.59 daniel 7737: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7738: ctxt->sax->error(ctxt->userData,
1.151 daniel 7739: "Start tag expected, '<' not found\n");
1.140 daniel 7740: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7741: ctxt->wellFormed = 0;
1.140 daniel 7742: ctxt->instate = XML_PARSER_EOF;
7743: } else {
7744: ctxt->instate = XML_PARSER_CONTENT;
7745: xmlParseElement(ctxt);
7746: ctxt->instate = XML_PARSER_EPILOG;
7747:
7748:
7749: /*
7750: * The Misc part at the end
7751: */
7752: xmlParseMisc(ctxt);
7753:
1.152 daniel 7754: if (RAW != 0) {
1.140 daniel 7755: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7756: ctxt->sax->error(ctxt->userData,
7757: "Extra content at the end of the document\n");
7758: ctxt->wellFormed = 0;
7759: ctxt->errNo = XML_ERR_DOCUMENT_END;
7760: }
7761: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7762: }
7763:
1.44 daniel 7764: /*
7765: * SAX: end of the document processing.
7766: */
1.72 daniel 7767: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 7768: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7769:
7770: /*
7771: * Grab the encoding if it was added on-the-fly
7772: */
7773: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
7774: (ctxt->myDoc->encoding == NULL)) {
7775: ctxt->myDoc->encoding = ctxt->encoding;
7776: ctxt->encoding = NULL;
7777: }
1.59 daniel 7778: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7779: return(0);
7780: }
7781:
1.98 daniel 7782: /************************************************************************
7783: * *
1.128 daniel 7784: * Progressive parsing interfaces *
7785: * *
7786: ************************************************************************/
7787:
7788: /**
7789: * xmlParseLookupSequence:
7790: * @ctxt: an XML parser context
7791: * @first: the first char to lookup
1.140 daniel 7792: * @next: the next char to lookup or zero
7793: * @third: the next char to lookup or zero
1.128 daniel 7794: *
1.140 daniel 7795: * Try to find if a sequence (first, next, third) or just (first next) or
7796: * (first) is available in the input stream.
7797: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7798: * to avoid rescanning sequences of bytes, it DOES change the state of the
7799: * parser, do not use liberally.
1.128 daniel 7800: *
1.140 daniel 7801: * Returns the index to the current parsing point if the full sequence
7802: * is available, -1 otherwise.
1.128 daniel 7803: */
7804: int
1.140 daniel 7805: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7806: xmlChar next, xmlChar third) {
7807: int base, len;
7808: xmlParserInputPtr in;
7809: const xmlChar *buf;
7810:
7811: in = ctxt->input;
7812: if (in == NULL) return(-1);
7813: base = in->cur - in->base;
7814: if (base < 0) return(-1);
7815: if (ctxt->checkIndex > base)
7816: base = ctxt->checkIndex;
7817: if (in->buf == NULL) {
7818: buf = in->base;
7819: len = in->length;
7820: } else {
7821: buf = in->buf->buffer->content;
7822: len = in->buf->buffer->use;
7823: }
7824: /* take into account the sequence length */
7825: if (third) len -= 2;
7826: else if (next) len --;
7827: for (;base < len;base++) {
7828: if (buf[base] == first) {
7829: if (third != 0) {
7830: if ((buf[base + 1] != next) ||
7831: (buf[base + 2] != third)) continue;
7832: } else if (next != 0) {
7833: if (buf[base + 1] != next) continue;
7834: }
7835: ctxt->checkIndex = 0;
7836: #ifdef DEBUG_PUSH
7837: if (next == 0)
7838: fprintf(stderr, "PP: lookup '%c' found at %d\n",
7839: first, base);
7840: else if (third == 0)
7841: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
7842: first, next, base);
7843: else
7844: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
7845: first, next, third, base);
7846: #endif
7847: return(base - (in->cur - in->base));
7848: }
7849: }
7850: ctxt->checkIndex = base;
7851: #ifdef DEBUG_PUSH
7852: if (next == 0)
7853: fprintf(stderr, "PP: lookup '%c' failed\n", first);
7854: else if (third == 0)
7855: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
7856: else
7857: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
7858: #endif
7859: return(-1);
1.128 daniel 7860: }
7861:
7862: /**
1.143 daniel 7863: * xmlParseTryOrFinish:
1.128 daniel 7864: * @ctxt: an XML parser context
1.143 daniel 7865: * @terminate: last chunk indicator
1.128 daniel 7866: *
7867: * Try to progress on parsing
7868: *
7869: * Returns zero if no parsing was possible
7870: */
7871: int
1.143 daniel 7872: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 7873: int ret = 0;
1.140 daniel 7874: xmlParserInputPtr in;
7875: int avail;
7876: xmlChar cur, next;
7877:
7878: #ifdef DEBUG_PUSH
7879: switch (ctxt->instate) {
7880: case XML_PARSER_EOF:
7881: fprintf(stderr, "PP: try EOF\n"); break;
7882: case XML_PARSER_START:
7883: fprintf(stderr, "PP: try START\n"); break;
7884: case XML_PARSER_MISC:
7885: fprintf(stderr, "PP: try MISC\n");break;
7886: case XML_PARSER_COMMENT:
7887: fprintf(stderr, "PP: try COMMENT\n");break;
7888: case XML_PARSER_PROLOG:
7889: fprintf(stderr, "PP: try PROLOG\n");break;
7890: case XML_PARSER_START_TAG:
7891: fprintf(stderr, "PP: try START_TAG\n");break;
7892: case XML_PARSER_CONTENT:
7893: fprintf(stderr, "PP: try CONTENT\n");break;
7894: case XML_PARSER_CDATA_SECTION:
7895: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
7896: case XML_PARSER_END_TAG:
7897: fprintf(stderr, "PP: try END_TAG\n");break;
7898: case XML_PARSER_ENTITY_DECL:
7899: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
7900: case XML_PARSER_ENTITY_VALUE:
7901: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
7902: case XML_PARSER_ATTRIBUTE_VALUE:
7903: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
7904: case XML_PARSER_DTD:
7905: fprintf(stderr, "PP: try DTD\n");break;
7906: case XML_PARSER_EPILOG:
7907: fprintf(stderr, "PP: try EPILOG\n");break;
7908: case XML_PARSER_PI:
7909: fprintf(stderr, "PP: try PI\n");break;
7910: }
7911: #endif
1.128 daniel 7912:
7913: while (1) {
1.140 daniel 7914: /*
7915: * Pop-up of finished entities.
7916: */
1.152 daniel 7917: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7918: xmlPopInput(ctxt);
7919:
7920: in = ctxt->input;
7921: if (in == NULL) break;
7922: if (in->buf == NULL)
7923: avail = in->length - (in->cur - in->base);
7924: else
7925: avail = in->buf->buffer->use - (in->cur - in->base);
7926: if (avail < 1)
7927: goto done;
1.128 daniel 7928: switch (ctxt->instate) {
7929: case XML_PARSER_EOF:
1.140 daniel 7930: /*
7931: * Document parsing is done !
7932: */
7933: goto done;
7934: case XML_PARSER_START:
7935: /*
7936: * Very first chars read from the document flow.
7937: */
7938: cur = in->cur[0];
7939: if (IS_BLANK(cur)) {
7940: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7941: ctxt->sax->setDocumentLocator(ctxt->userData,
7942: &xmlDefaultSAXLocator);
7943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7944: ctxt->sax->error(ctxt->userData,
7945: "Extra spaces at the beginning of the document are not allowed\n");
7946: ctxt->errNo = XML_ERR_DOCUMENT_START;
7947: ctxt->wellFormed = 0;
7948: SKIP_BLANKS;
7949: ret++;
7950: if (in->buf == NULL)
7951: avail = in->length - (in->cur - in->base);
7952: else
7953: avail = in->buf->buffer->use - (in->cur - in->base);
7954: }
7955: if (avail < 2)
7956: goto done;
7957:
7958: cur = in->cur[0];
7959: next = in->cur[1];
7960: if (cur == 0) {
7961: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7962: ctxt->sax->setDocumentLocator(ctxt->userData,
7963: &xmlDefaultSAXLocator);
7964: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7965: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7966: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7967: ctxt->wellFormed = 0;
7968: ctxt->instate = XML_PARSER_EOF;
7969: #ifdef DEBUG_PUSH
7970: fprintf(stderr, "PP: entering EOF\n");
7971: #endif
7972: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7973: ctxt->sax->endDocument(ctxt->userData);
7974: goto done;
7975: }
7976: if ((cur == '<') && (next == '?')) {
7977: /* PI or XML decl */
7978: if (avail < 5) return(ret);
1.143 daniel 7979: if ((!terminate) &&
7980: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7981: return(ret);
7982: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7983: ctxt->sax->setDocumentLocator(ctxt->userData,
7984: &xmlDefaultSAXLocator);
7985: if ((in->cur[2] == 'x') &&
7986: (in->cur[3] == 'm') &&
1.142 daniel 7987: (in->cur[4] == 'l') &&
7988: (IS_BLANK(in->cur[5]))) {
1.140 daniel 7989: ret += 5;
7990: #ifdef DEBUG_PUSH
7991: fprintf(stderr, "PP: Parsing XML Decl\n");
7992: #endif
7993: xmlParseXMLDecl(ctxt);
1.167 daniel 7994: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 7995: if ((ctxt->encoding == NULL) &&
7996: (ctxt->input->encoding != NULL))
7997: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.140 daniel 7998: if ((ctxt->sax) && (ctxt->sax->startDocument))
7999: ctxt->sax->startDocument(ctxt->userData);
8000: ctxt->instate = XML_PARSER_MISC;
8001: #ifdef DEBUG_PUSH
8002: fprintf(stderr, "PP: entering MISC\n");
8003: #endif
8004: } else {
8005: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8006: if ((ctxt->sax) && (ctxt->sax->startDocument))
8007: ctxt->sax->startDocument(ctxt->userData);
8008: ctxt->instate = XML_PARSER_MISC;
8009: #ifdef DEBUG_PUSH
8010: fprintf(stderr, "PP: entering MISC\n");
8011: #endif
8012: }
8013: } else {
8014: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8015: ctxt->sax->setDocumentLocator(ctxt->userData,
8016: &xmlDefaultSAXLocator);
8017: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8018: if ((ctxt->sax) && (ctxt->sax->startDocument))
8019: ctxt->sax->startDocument(ctxt->userData);
8020: ctxt->instate = XML_PARSER_MISC;
8021: #ifdef DEBUG_PUSH
8022: fprintf(stderr, "PP: entering MISC\n");
8023: #endif
8024: }
8025: break;
8026: case XML_PARSER_MISC:
8027: SKIP_BLANKS;
8028: if (in->buf == NULL)
8029: avail = in->length - (in->cur - in->base);
8030: else
8031: avail = in->buf->buffer->use - (in->cur - in->base);
8032: if (avail < 2)
8033: goto done;
8034: cur = in->cur[0];
8035: next = in->cur[1];
8036: if ((cur == '<') && (next == '?')) {
1.143 daniel 8037: if ((!terminate) &&
8038: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8039: goto done;
8040: #ifdef DEBUG_PUSH
8041: fprintf(stderr, "PP: Parsing PI\n");
8042: #endif
8043: xmlParsePI(ctxt);
8044: } else if ((cur == '<') && (next == '!') &&
8045: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8046: if ((!terminate) &&
8047: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8048: goto done;
8049: #ifdef DEBUG_PUSH
8050: fprintf(stderr, "PP: Parsing Comment\n");
8051: #endif
8052: xmlParseComment(ctxt);
8053: ctxt->instate = XML_PARSER_MISC;
8054: } else if ((cur == '<') && (next == '!') &&
8055: (in->cur[2] == 'D') && (in->cur[3] == 'O') &&
8056: (in->cur[4] == 'C') && (in->cur[5] == 'T') &&
8057: (in->cur[6] == 'Y') && (in->cur[7] == 'P') &&
8058: (in->cur[8] == 'E')) {
1.143 daniel 8059: if ((!terminate) &&
8060: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8061: goto done;
8062: #ifdef DEBUG_PUSH
8063: fprintf(stderr, "PP: Parsing internal subset\n");
8064: #endif
1.166 daniel 8065: ctxt->inSubset = 1;
1.140 daniel 8066: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8067: if (RAW == '[') {
1.140 daniel 8068: ctxt->instate = XML_PARSER_DTD;
8069: #ifdef DEBUG_PUSH
8070: fprintf(stderr, "PP: entering DTD\n");
8071: #endif
8072: } else {
1.166 daniel 8073: /*
8074: * Create and update the external subset.
8075: */
8076: ctxt->inSubset = 2;
8077: if ((ctxt->sax != NULL) &&
8078: (ctxt->sax->externalSubset != NULL))
8079: ctxt->sax->externalSubset(ctxt->userData,
8080: ctxt->intSubName, ctxt->extSubSystem,
8081: ctxt->extSubURI);
8082: ctxt->inSubset = 0;
1.140 daniel 8083: ctxt->instate = XML_PARSER_PROLOG;
8084: #ifdef DEBUG_PUSH
8085: fprintf(stderr, "PP: entering PROLOG\n");
8086: #endif
8087: }
8088: } else if ((cur == '<') && (next == '!') &&
8089: (avail < 9)) {
8090: goto done;
8091: } else {
8092: ctxt->instate = XML_PARSER_START_TAG;
8093: #ifdef DEBUG_PUSH
8094: fprintf(stderr, "PP: entering START_TAG\n");
8095: #endif
8096: }
8097: break;
1.128 daniel 8098: case XML_PARSER_PROLOG:
1.140 daniel 8099: SKIP_BLANKS;
8100: if (in->buf == NULL)
8101: avail = in->length - (in->cur - in->base);
8102: else
8103: avail = in->buf->buffer->use - (in->cur - in->base);
8104: if (avail < 2)
8105: goto done;
8106: cur = in->cur[0];
8107: next = in->cur[1];
8108: if ((cur == '<') && (next == '?')) {
1.143 daniel 8109: if ((!terminate) &&
8110: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8111: goto done;
8112: #ifdef DEBUG_PUSH
8113: fprintf(stderr, "PP: Parsing PI\n");
8114: #endif
8115: xmlParsePI(ctxt);
8116: } else if ((cur == '<') && (next == '!') &&
8117: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8118: if ((!terminate) &&
8119: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8120: goto done;
8121: #ifdef DEBUG_PUSH
8122: fprintf(stderr, "PP: Parsing Comment\n");
8123: #endif
8124: xmlParseComment(ctxt);
8125: ctxt->instate = XML_PARSER_PROLOG;
8126: } else if ((cur == '<') && (next == '!') &&
8127: (avail < 4)) {
8128: goto done;
8129: } else {
8130: ctxt->instate = XML_PARSER_START_TAG;
8131: #ifdef DEBUG_PUSH
8132: fprintf(stderr, "PP: entering START_TAG\n");
8133: #endif
8134: }
8135: break;
8136: case XML_PARSER_EPILOG:
8137: SKIP_BLANKS;
8138: if (in->buf == NULL)
8139: avail = in->length - (in->cur - in->base);
8140: else
8141: avail = in->buf->buffer->use - (in->cur - in->base);
8142: if (avail < 2)
8143: goto done;
8144: cur = in->cur[0];
8145: next = in->cur[1];
8146: if ((cur == '<') && (next == '?')) {
1.143 daniel 8147: if ((!terminate) &&
8148: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8149: goto done;
8150: #ifdef DEBUG_PUSH
8151: fprintf(stderr, "PP: Parsing PI\n");
8152: #endif
8153: xmlParsePI(ctxt);
8154: ctxt->instate = XML_PARSER_EPILOG;
8155: } else if ((cur == '<') && (next == '!') &&
8156: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8157: if ((!terminate) &&
8158: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8159: goto done;
8160: #ifdef DEBUG_PUSH
8161: fprintf(stderr, "PP: Parsing Comment\n");
8162: #endif
8163: xmlParseComment(ctxt);
8164: ctxt->instate = XML_PARSER_EPILOG;
8165: } else if ((cur == '<') && (next == '!') &&
8166: (avail < 4)) {
8167: goto done;
8168: } else {
8169: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8170: ctxt->sax->error(ctxt->userData,
8171: "Extra content at the end of the document\n");
8172: ctxt->wellFormed = 0;
8173: ctxt->errNo = XML_ERR_DOCUMENT_END;
8174: ctxt->instate = XML_PARSER_EOF;
8175: #ifdef DEBUG_PUSH
8176: fprintf(stderr, "PP: entering EOF\n");
8177: #endif
8178: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8179: ctxt->sax->endDocument(ctxt->userData);
8180: goto done;
8181: }
8182: break;
8183: case XML_PARSER_START_TAG: {
8184: xmlChar *name, *oldname;
8185:
8186: if (avail < 2)
8187: goto done;
8188: cur = in->cur[0];
8189: if (cur != '<') {
8190: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8191: ctxt->sax->error(ctxt->userData,
8192: "Start tag expect, '<' not found\n");
8193: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8194: ctxt->wellFormed = 0;
8195: ctxt->instate = XML_PARSER_EOF;
8196: #ifdef DEBUG_PUSH
8197: fprintf(stderr, "PP: entering EOF\n");
8198: #endif
8199: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8200: ctxt->sax->endDocument(ctxt->userData);
8201: goto done;
8202: }
1.143 daniel 8203: if ((!terminate) &&
8204: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8205: goto done;
8206: name = xmlParseStartTag(ctxt);
8207: if (name == NULL) {
8208: ctxt->instate = XML_PARSER_EOF;
8209: #ifdef DEBUG_PUSH
8210: fprintf(stderr, "PP: entering EOF\n");
8211: #endif
8212: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8213: ctxt->sax->endDocument(ctxt->userData);
8214: goto done;
8215: }
8216: namePush(ctxt, xmlStrdup(name));
8217:
8218: /*
8219: * [ VC: Root Element Type ]
8220: * The Name in the document type declaration must match
8221: * the element type of the root element.
8222: */
8223: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 8224: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 8225: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8226:
8227: /*
8228: * Check for an Empty Element.
8229: */
1.152 daniel 8230: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 8231: SKIP(2);
8232: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
8233: ctxt->sax->endElement(ctxt->userData, name);
8234: xmlFree(name);
8235: oldname = namePop(ctxt);
8236: if (oldname != NULL) {
8237: #ifdef DEBUG_STACK
8238: fprintf(stderr,"Close: popped %s\n", oldname);
8239: #endif
8240: xmlFree(oldname);
8241: }
8242: if (ctxt->name == NULL) {
8243: ctxt->instate = XML_PARSER_EPILOG;
8244: #ifdef DEBUG_PUSH
8245: fprintf(stderr, "PP: entering EPILOG\n");
8246: #endif
8247: } else {
8248: ctxt->instate = XML_PARSER_CONTENT;
8249: #ifdef DEBUG_PUSH
8250: fprintf(stderr, "PP: entering CONTENT\n");
8251: #endif
8252: }
8253: break;
8254: }
1.152 daniel 8255: if (RAW == '>') {
1.140 daniel 8256: NEXT;
8257: } else {
8258: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8259: ctxt->sax->error(ctxt->userData,
8260: "Couldn't find end of Start Tag %s\n",
8261: name);
8262: ctxt->wellFormed = 0;
8263: ctxt->errNo = XML_ERR_GT_REQUIRED;
8264:
8265: /*
8266: * end of parsing of this node.
8267: */
8268: nodePop(ctxt);
8269: oldname = namePop(ctxt);
8270: if (oldname != NULL) {
8271: #ifdef DEBUG_STACK
8272: fprintf(stderr,"Close: popped %s\n", oldname);
8273: #endif
8274: xmlFree(oldname);
8275: }
8276: }
8277: xmlFree(name);
8278: ctxt->instate = XML_PARSER_CONTENT;
8279: #ifdef DEBUG_PUSH
8280: fprintf(stderr, "PP: entering CONTENT\n");
8281: #endif
8282: break;
8283: }
1.128 daniel 8284: case XML_PARSER_CONTENT:
1.140 daniel 8285: /*
8286: * Handle preparsed entities and charRef
8287: */
8288: if (ctxt->token != 0) {
8289: xmlChar cur[2] = { 0 , 0 } ;
8290:
8291: cur[0] = (xmlChar) ctxt->token;
8292: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
8293: ctxt->sax->characters(ctxt->userData, cur, 1);
8294: ctxt->token = 0;
8295: }
8296: if (avail < 2)
8297: goto done;
8298: cur = in->cur[0];
8299: next = in->cur[1];
8300: if ((cur == '<') && (next == '?')) {
1.143 daniel 8301: if ((!terminate) &&
8302: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8303: goto done;
8304: #ifdef DEBUG_PUSH
8305: fprintf(stderr, "PP: Parsing PI\n");
8306: #endif
8307: xmlParsePI(ctxt);
8308: } else if ((cur == '<') && (next == '!') &&
8309: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8310: if ((!terminate) &&
8311: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8312: goto done;
8313: #ifdef DEBUG_PUSH
8314: fprintf(stderr, "PP: Parsing Comment\n");
8315: #endif
8316: xmlParseComment(ctxt);
8317: ctxt->instate = XML_PARSER_CONTENT;
8318: } else if ((cur == '<') && (in->cur[1] == '!') &&
8319: (in->cur[2] == '[') && (NXT(3) == 'C') &&
8320: (in->cur[4] == 'D') && (NXT(5) == 'A') &&
8321: (in->cur[6] == 'T') && (NXT(7) == 'A') &&
8322: (in->cur[8] == '[')) {
8323: SKIP(9);
8324: ctxt->instate = XML_PARSER_CDATA_SECTION;
8325: #ifdef DEBUG_PUSH
8326: fprintf(stderr, "PP: entering CDATA_SECTION\n");
8327: #endif
8328: break;
8329: } else if ((cur == '<') && (next == '!') &&
8330: (avail < 9)) {
8331: goto done;
8332: } else if ((cur == '<') && (next == '/')) {
8333: ctxt->instate = XML_PARSER_END_TAG;
8334: #ifdef DEBUG_PUSH
8335: fprintf(stderr, "PP: entering END_TAG\n");
8336: #endif
8337: break;
8338: } else if (cur == '<') {
8339: ctxt->instate = XML_PARSER_START_TAG;
8340: #ifdef DEBUG_PUSH
8341: fprintf(stderr, "PP: entering START_TAG\n");
8342: #endif
8343: break;
8344: } else if (cur == '&') {
1.143 daniel 8345: if ((!terminate) &&
8346: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 8347: goto done;
8348: #ifdef DEBUG_PUSH
8349: fprintf(stderr, "PP: Parsing Reference\n");
8350: #endif
8351: /* TODO: check generation of subtrees if noent !!! */
8352: xmlParseReference(ctxt);
8353: } else {
1.156 daniel 8354: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 8355: /*
8356: * Goal of the following test is :
8357: * - minimize calls to the SAX 'character' callback
8358: * when they are mergeable
8359: * - handle an problem for isBlank when we only parse
8360: * a sequence of blank chars and the next one is
8361: * not available to check against '<' presence.
8362: * - tries to homogenize the differences in SAX
8363: * callbacks beween the push and pull versions
8364: * of the parser.
8365: */
8366: if ((ctxt->inputNr == 1) &&
8367: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 8368: if ((!terminate) &&
8369: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 8370: goto done;
8371: }
8372: ctxt->checkIndex = 0;
8373: #ifdef DEBUG_PUSH
8374: fprintf(stderr, "PP: Parsing char data\n");
8375: #endif
8376: xmlParseCharData(ctxt, 0);
8377: }
8378: /*
8379: * Pop-up of finished entities.
8380: */
1.152 daniel 8381: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8382: xmlPopInput(ctxt);
8383: break;
8384: case XML_PARSER_CDATA_SECTION: {
8385: /*
8386: * The Push mode need to have the SAX callback for
8387: * cdataBlock merge back contiguous callbacks.
8388: */
8389: int base;
8390:
8391: in = ctxt->input;
8392: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8393: if (base < 0) {
8394: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8395: if (ctxt->sax != NULL) {
8396: if (ctxt->sax->cdataBlock != NULL)
8397: ctxt->sax->cdataBlock(ctxt->userData, in->cur,
8398: XML_PARSER_BIG_BUFFER_SIZE);
8399: }
8400: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8401: ctxt->checkIndex = 0;
8402: }
8403: goto done;
8404: } else {
8405: if ((ctxt->sax != NULL) && (base > 0)) {
8406: if (ctxt->sax->cdataBlock != NULL)
8407: ctxt->sax->cdataBlock(ctxt->userData,
8408: in->cur, base);
8409: }
8410: SKIP(base + 3);
8411: ctxt->checkIndex = 0;
8412: ctxt->instate = XML_PARSER_CONTENT;
8413: #ifdef DEBUG_PUSH
8414: fprintf(stderr, "PP: entering CONTENT\n");
8415: #endif
8416: }
8417: break;
8418: }
1.141 daniel 8419: case XML_PARSER_END_TAG:
1.140 daniel 8420: if (avail < 2)
8421: goto done;
1.143 daniel 8422: if ((!terminate) &&
8423: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8424: goto done;
8425: xmlParseEndTag(ctxt);
8426: if (ctxt->name == NULL) {
8427: ctxt->instate = XML_PARSER_EPILOG;
8428: #ifdef DEBUG_PUSH
8429: fprintf(stderr, "PP: entering EPILOG\n");
8430: #endif
8431: } else {
8432: ctxt->instate = XML_PARSER_CONTENT;
8433: #ifdef DEBUG_PUSH
8434: fprintf(stderr, "PP: entering CONTENT\n");
8435: #endif
8436: }
8437: break;
8438: case XML_PARSER_DTD: {
8439: /*
8440: * Sorry but progressive parsing of the internal subset
8441: * is not expected to be supported. We first check that
8442: * the full content of the internal subset is available and
8443: * the parsing is launched only at that point.
8444: * Internal subset ends up with "']' S? '>'" in an unescaped
8445: * section and not in a ']]>' sequence which are conditional
8446: * sections (whoever argued to keep that crap in XML deserve
8447: * a place in hell !).
8448: */
8449: int base, i;
8450: xmlChar *buf;
8451: xmlChar quote = 0;
8452:
8453: base = in->cur - in->base;
8454: if (base < 0) return(0);
8455: if (ctxt->checkIndex > base)
8456: base = ctxt->checkIndex;
8457: buf = in->buf->buffer->content;
8458: for (;base < in->buf->buffer->use;base++) {
8459: if (quote != 0) {
8460: if (buf[base] == quote)
8461: quote = 0;
8462: continue;
8463: }
8464: if (buf[base] == '"') {
8465: quote = '"';
8466: continue;
8467: }
8468: if (buf[base] == '\'') {
8469: quote = '\'';
8470: continue;
8471: }
8472: if (buf[base] == ']') {
8473: if (base +1 >= in->buf->buffer->use)
8474: break;
8475: if (buf[base + 1] == ']') {
8476: /* conditional crap, skip both ']' ! */
8477: base++;
8478: continue;
8479: }
8480: for (i = 0;base + i < in->buf->buffer->use;i++) {
8481: if (buf[base + i] == '>')
8482: goto found_end_int_subset;
8483: }
8484: break;
8485: }
8486: }
8487: /*
8488: * We didn't found the end of the Internal subset
8489: */
8490: if (quote == 0)
8491: ctxt->checkIndex = base;
8492: #ifdef DEBUG_PUSH
8493: if (next == 0)
8494: fprintf(stderr, "PP: lookup of int subset end filed\n");
8495: #endif
8496: goto done;
8497:
8498: found_end_int_subset:
8499: xmlParseInternalSubset(ctxt);
1.166 daniel 8500: ctxt->inSubset = 2;
8501: if ((ctxt->sax != NULL) &&
8502: (ctxt->sax->externalSubset != NULL))
8503: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8504: ctxt->extSubSystem, ctxt->extSubURI);
8505: ctxt->inSubset = 0;
1.140 daniel 8506: ctxt->instate = XML_PARSER_PROLOG;
8507: ctxt->checkIndex = 0;
8508: #ifdef DEBUG_PUSH
8509: fprintf(stderr, "PP: entering PROLOG\n");
8510: #endif
8511: break;
8512: }
8513: case XML_PARSER_COMMENT:
8514: fprintf(stderr, "PP: internal error, state == COMMENT\n");
8515: ctxt->instate = XML_PARSER_CONTENT;
8516: #ifdef DEBUG_PUSH
8517: fprintf(stderr, "PP: entering CONTENT\n");
8518: #endif
8519: break;
8520: case XML_PARSER_PI:
8521: fprintf(stderr, "PP: internal error, state == PI\n");
8522: ctxt->instate = XML_PARSER_CONTENT;
8523: #ifdef DEBUG_PUSH
8524: fprintf(stderr, "PP: entering CONTENT\n");
8525: #endif
8526: break;
1.128 daniel 8527: case XML_PARSER_ENTITY_DECL:
1.140 daniel 8528: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
8529: ctxt->instate = XML_PARSER_DTD;
8530: #ifdef DEBUG_PUSH
8531: fprintf(stderr, "PP: entering DTD\n");
8532: #endif
8533: break;
1.128 daniel 8534: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 8535: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
8536: ctxt->instate = XML_PARSER_CONTENT;
8537: #ifdef DEBUG_PUSH
8538: fprintf(stderr, "PP: entering DTD\n");
8539: #endif
8540: break;
1.128 daniel 8541: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 8542: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 ! daniel 8543: ctxt->instate = XML_PARSER_START_TAG;
! 8544: #ifdef DEBUG_PUSH
! 8545: fprintf(stderr, "PP: entering START_TAG\n");
! 8546: #endif
! 8547: break;
! 8548: case XML_PARSER_SYSTEM_LITERAL:
! 8549: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 8550: ctxt->instate = XML_PARSER_START_TAG;
8551: #ifdef DEBUG_PUSH
8552: fprintf(stderr, "PP: entering START_TAG\n");
8553: #endif
8554: break;
1.128 daniel 8555: }
8556: }
1.140 daniel 8557: done:
8558: #ifdef DEBUG_PUSH
8559: fprintf(stderr, "PP: done %d\n", ret);
8560: #endif
1.128 daniel 8561: return(ret);
8562: }
8563:
8564: /**
1.143 daniel 8565: * xmlParseTry:
8566: * @ctxt: an XML parser context
8567: *
8568: * Try to progress on parsing
8569: *
8570: * Returns zero if no parsing was possible
8571: */
8572: int
8573: xmlParseTry(xmlParserCtxtPtr ctxt) {
8574: return(xmlParseTryOrFinish(ctxt, 0));
8575: }
8576:
8577: /**
1.128 daniel 8578: * xmlParseChunk:
8579: * @ctxt: an XML parser context
8580: * @chunk: an char array
8581: * @size: the size in byte of the chunk
8582: * @terminate: last chunk indicator
8583: *
8584: * Parse a Chunk of memory
8585: *
8586: * Returns zero if no error, the xmlParserErrors otherwise.
8587: */
1.140 daniel 8588: int
1.128 daniel 8589: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8590: int terminate) {
1.132 daniel 8591: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8592: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8593: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8594: int cur = ctxt->input->cur - ctxt->input->base;
8595:
1.132 daniel 8596: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8597: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8598: ctxt->input->cur = ctxt->input->base + cur;
8599: #ifdef DEBUG_PUSH
8600: fprintf(stderr, "PP: pushed %d\n", size);
8601: #endif
8602:
1.150 daniel 8603: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8604: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8605: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 8606: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8607: if (terminate) {
1.151 daniel 8608: /*
8609: * Grab the encoding if it was added on-the-fly
8610: */
8611: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8612: (ctxt->myDoc->encoding == NULL)) {
8613: ctxt->myDoc->encoding = ctxt->encoding;
8614: ctxt->encoding = NULL;
8615: }
8616:
8617: /*
8618: * Check for termination
8619: */
1.140 daniel 8620: if ((ctxt->instate != XML_PARSER_EOF) &&
8621: (ctxt->instate != XML_PARSER_EPILOG)) {
8622: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8623: ctxt->sax->error(ctxt->userData,
8624: "Extra content at the end of the document\n");
8625: ctxt->wellFormed = 0;
8626: ctxt->errNo = XML_ERR_DOCUMENT_END;
8627: }
8628: if (ctxt->instate != XML_PARSER_EOF) {
8629: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8630: ctxt->sax->endDocument(ctxt->userData);
8631: }
8632: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8633: }
8634: return((xmlParserErrors) ctxt->errNo);
8635: }
8636:
8637: /************************************************************************
8638: * *
1.98 daniel 8639: * I/O front end functions to the parser *
8640: * *
8641: ************************************************************************/
8642:
1.50 daniel 8643: /**
1.140 daniel 8644: * xmlCreatePushParserCtxt :
8645: * @sax: a SAX handler
8646: * @user_data: The user data returned on SAX callbacks
8647: * @chunk: a pointer to an array of chars
8648: * @size: number of chars in the array
8649: * @filename: an optional file name or URI
8650: *
8651: * Create a parser context for using the XML parser in push mode
8652: * To allow content encoding detection, @size should be >= 4
8653: * The value of @filename is used for fetching external entities
8654: * and error/warning reports.
8655: *
8656: * Returns the new parser context or NULL
8657: */
8658: xmlParserCtxtPtr
8659: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8660: const char *chunk, int size, const char *filename) {
8661: xmlParserCtxtPtr ctxt;
8662: xmlParserInputPtr inputStream;
8663: xmlParserInputBufferPtr buf;
8664: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8665:
8666: /*
1.156 daniel 8667: * plug some encoding conversion routines
1.140 daniel 8668: */
8669: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8670: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8671:
8672: buf = xmlAllocParserInputBuffer(enc);
8673: if (buf == NULL) return(NULL);
8674:
8675: ctxt = xmlNewParserCtxt();
8676: if (ctxt == NULL) {
8677: xmlFree(buf);
8678: return(NULL);
8679: }
8680: if (sax != NULL) {
8681: if (ctxt->sax != &xmlDefaultSAXHandler)
8682: xmlFree(ctxt->sax);
8683: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8684: if (ctxt->sax == NULL) {
8685: xmlFree(buf);
8686: xmlFree(ctxt);
8687: return(NULL);
8688: }
8689: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8690: if (user_data != NULL)
8691: ctxt->userData = user_data;
8692: }
8693: if (filename == NULL) {
8694: ctxt->directory = NULL;
8695: } else {
8696: ctxt->directory = xmlParserGetDirectory(filename);
8697: }
8698:
8699: inputStream = xmlNewInputStream(ctxt);
8700: if (inputStream == NULL) {
8701: xmlFreeParserCtxt(ctxt);
8702: return(NULL);
8703: }
8704:
8705: if (filename == NULL)
8706: inputStream->filename = NULL;
8707: else
8708: inputStream->filename = xmlMemStrdup(filename);
8709: inputStream->buf = buf;
8710: inputStream->base = inputStream->buf->buffer->content;
8711: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8712: if (enc != XML_CHAR_ENCODING_NONE) {
8713: xmlSwitchEncoding(ctxt, enc);
8714: }
1.140 daniel 8715:
8716: inputPush(ctxt, inputStream);
8717:
8718: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8719: (ctxt->input->buf != NULL)) {
8720: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8721: #ifdef DEBUG_PUSH
8722: fprintf(stderr, "PP: pushed %d\n", size);
8723: #endif
8724: }
8725:
8726: return(ctxt);
8727: }
8728:
8729: /**
1.86 daniel 8730: * xmlCreateDocParserCtxt :
1.123 daniel 8731: * @cur: a pointer to an array of xmlChar
1.50 daniel 8732: *
1.69 daniel 8733: * Create a parser context for an XML in-memory document.
8734: *
8735: * Returns the new parser context or NULL
1.16 daniel 8736: */
1.69 daniel 8737: xmlParserCtxtPtr
1.123 daniel 8738: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 8739: xmlParserCtxtPtr ctxt;
1.40 daniel 8740: xmlParserInputPtr input;
1.16 daniel 8741:
1.97 daniel 8742: ctxt = xmlNewParserCtxt();
1.16 daniel 8743: if (ctxt == NULL) {
8744: return(NULL);
8745: }
1.96 daniel 8746: input = xmlNewInputStream(ctxt);
1.40 daniel 8747: if (input == NULL) {
1.97 daniel 8748: xmlFreeParserCtxt(ctxt);
1.40 daniel 8749: return(NULL);
8750: }
8751:
8752: input->base = cur;
8753: input->cur = cur;
8754:
8755: inputPush(ctxt, input);
1.69 daniel 8756: return(ctxt);
8757: }
8758:
8759: /**
8760: * xmlSAXParseDoc :
8761: * @sax: the SAX handler block
1.123 daniel 8762: * @cur: a pointer to an array of xmlChar
1.69 daniel 8763: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
8764: * documents
8765: *
8766: * parse an XML in-memory document and build a tree.
8767: * It use the given SAX function block to handle the parsing callback.
8768: * If sax is NULL, fallback to the default DOM tree building routines.
8769: *
8770: * Returns the resulting document tree
8771: */
8772:
8773: xmlDocPtr
1.123 daniel 8774: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 8775: xmlDocPtr ret;
8776: xmlParserCtxtPtr ctxt;
8777:
8778: if (cur == NULL) return(NULL);
1.16 daniel 8779:
8780:
1.69 daniel 8781: ctxt = xmlCreateDocParserCtxt(cur);
8782: if (ctxt == NULL) return(NULL);
1.74 daniel 8783: if (sax != NULL) {
8784: ctxt->sax = sax;
8785: ctxt->userData = NULL;
8786: }
1.69 daniel 8787:
1.16 daniel 8788: xmlParseDocument(ctxt);
1.72 daniel 8789: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 8790: else {
8791: ret = NULL;
1.72 daniel 8792: xmlFreeDoc(ctxt->myDoc);
8793: ctxt->myDoc = NULL;
1.59 daniel 8794: }
1.86 daniel 8795: if (sax != NULL)
8796: ctxt->sax = NULL;
1.69 daniel 8797: xmlFreeParserCtxt(ctxt);
1.16 daniel 8798:
1.1 veillard 8799: return(ret);
8800: }
8801:
1.50 daniel 8802: /**
1.55 daniel 8803: * xmlParseDoc :
1.123 daniel 8804: * @cur: a pointer to an array of xmlChar
1.55 daniel 8805: *
8806: * parse an XML in-memory document and build a tree.
8807: *
1.68 daniel 8808: * Returns the resulting document tree
1.55 daniel 8809: */
8810:
1.69 daniel 8811: xmlDocPtr
1.123 daniel 8812: xmlParseDoc(xmlChar *cur) {
1.59 daniel 8813: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 8814: }
8815:
8816: /**
8817: * xmlSAXParseDTD :
8818: * @sax: the SAX handler block
8819: * @ExternalID: a NAME* containing the External ID of the DTD
8820: * @SystemID: a NAME* containing the URL to the DTD
8821: *
8822: * Load and parse an external subset.
8823: *
8824: * Returns the resulting xmlDtdPtr or NULL in case of error.
8825: */
8826:
8827: xmlDtdPtr
1.123 daniel 8828: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8829: const xmlChar *SystemID) {
1.76 daniel 8830: xmlDtdPtr ret = NULL;
8831: xmlParserCtxtPtr ctxt;
1.83 daniel 8832: xmlParserInputPtr input = NULL;
1.76 daniel 8833: xmlCharEncoding enc;
8834:
8835: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8836:
1.97 daniel 8837: ctxt = xmlNewParserCtxt();
1.76 daniel 8838: if (ctxt == NULL) {
8839: return(NULL);
8840: }
8841:
8842: /*
8843: * Set-up the SAX context
8844: */
8845: if (ctxt == NULL) return(NULL);
8846: if (sax != NULL) {
1.93 veillard 8847: if (ctxt->sax != NULL)
1.119 daniel 8848: xmlFree(ctxt->sax);
1.76 daniel 8849: ctxt->sax = sax;
8850: ctxt->userData = NULL;
8851: }
8852:
8853: /*
8854: * Ask the Entity resolver to load the damn thing
8855: */
8856:
8857: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8858: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8859: if (input == NULL) {
1.86 daniel 8860: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8861: xmlFreeParserCtxt(ctxt);
8862: return(NULL);
8863: }
8864:
8865: /*
1.156 daniel 8866: * plug some encoding conversion routines here.
1.76 daniel 8867: */
8868: xmlPushInput(ctxt, input);
1.156 daniel 8869: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 8870: xmlSwitchEncoding(ctxt, enc);
8871:
1.95 veillard 8872: if (input->filename == NULL)
1.156 daniel 8873: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 8874: input->line = 1;
8875: input->col = 1;
8876: input->base = ctxt->input->cur;
8877: input->cur = ctxt->input->cur;
8878: input->free = NULL;
8879:
8880: /*
8881: * let's parse that entity knowing it's an external subset.
8882: */
1.79 daniel 8883: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 8884:
8885: if (ctxt->myDoc != NULL) {
8886: if (ctxt->wellFormed) {
8887: ret = ctxt->myDoc->intSubset;
8888: ctxt->myDoc->intSubset = NULL;
8889: } else {
8890: ret = NULL;
8891: }
8892: xmlFreeDoc(ctxt->myDoc);
8893: ctxt->myDoc = NULL;
8894: }
1.86 daniel 8895: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8896: xmlFreeParserCtxt(ctxt);
8897:
8898: return(ret);
8899: }
8900:
8901: /**
8902: * xmlParseDTD :
8903: * @ExternalID: a NAME* containing the External ID of the DTD
8904: * @SystemID: a NAME* containing the URL to the DTD
8905: *
8906: * Load and parse an external subset.
8907: *
8908: * Returns the resulting xmlDtdPtr or NULL in case of error.
8909: */
8910:
8911: xmlDtdPtr
1.123 daniel 8912: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 8913: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 8914: }
8915:
8916: /**
1.144 daniel 8917: * xmlSAXParseBalancedChunk :
8918: * @ctx: an XML parser context (possibly NULL)
8919: * @sax: the SAX handler bloc (possibly NULL)
8920: * @user_data: The user data returned on SAX callbacks (possibly NULL)
8921: * @input: a parser input stream
8922: * @enc: the encoding
8923: *
8924: * Parse a well-balanced chunk of an XML document
8925: * The user has to provide SAX callback block whose routines will be
8926: * called by the parser
8927: * The allowed sequence for the Well Balanced Chunk is the one defined by
8928: * the content production in the XML grammar:
8929: *
8930: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8931: *
8932: * Returns 0 id the chunk is well balanced, -1 in case of args problem and
8933: * the error code otherwise
8934: */
8935:
8936: int
8937: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8938: void *user_data, xmlParserInputPtr input,
8939: xmlCharEncoding enc) {
8940: xmlParserCtxtPtr ctxt;
8941: int ret;
8942:
8943: if (input == NULL) return(-1);
8944:
8945: if (ctx != NULL)
8946: ctxt = ctx;
8947: else {
8948: ctxt = xmlNewParserCtxt();
8949: if (ctxt == NULL)
8950: return(-1);
8951: if (sax == NULL)
8952: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8953: }
8954:
8955: /*
8956: * Set-up the SAX context
8957: */
8958: if (sax != NULL) {
8959: if (ctxt->sax != NULL)
8960: xmlFree(ctxt->sax);
8961: ctxt->sax = sax;
8962: ctxt->userData = user_data;
8963: }
8964:
8965: /*
8966: * plug some encoding conversion routines here.
8967: */
8968: xmlPushInput(ctxt, input);
8969: if (enc != XML_CHAR_ENCODING_NONE)
8970: xmlSwitchEncoding(ctxt, enc);
8971:
8972: /*
8973: * let's parse that entity knowing it's an external subset.
8974: */
8975: xmlParseContent(ctxt);
8976: ret = ctxt->errNo;
8977:
8978: if (ctx == NULL) {
8979: if (sax != NULL)
8980: ctxt->sax = NULL;
8981: else
8982: xmlFreeDoc(ctxt->myDoc);
8983: xmlFreeParserCtxt(ctxt);
8984: }
8985: return(ret);
8986: }
8987:
8988: /**
8989: * xmlParseBalancedChunk :
8990: * @doc: the document the chunk pertains to
8991: * @node: the node defining the context in which informations will be added
8992: *
8993: * Parse a well-balanced chunk of an XML document present in memory
8994: *
8995: * Returns the resulting list of nodes resulting from the parsing,
8996: * they are not added to @node
8997: */
8998:
8999: xmlNodePtr
9000: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 9001: /* TODO !!! */
9002: return(NULL);
1.144 daniel 9003: }
9004:
9005: /**
9006: * xmlParseBalancedChunkFile :
9007: * @doc: the document the chunk pertains to
9008: *
9009: * Parse a well-balanced chunk of an XML document contained in a file
9010: *
9011: * Returns the resulting list of nodes resulting from the parsing,
9012: * they are not added to @node
9013: */
9014:
9015: xmlNodePtr
9016: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 9017: /* TODO !!! */
9018: return(NULL);
1.144 daniel 9019: }
9020:
9021: /**
1.59 daniel 9022: * xmlRecoverDoc :
1.123 daniel 9023: * @cur: a pointer to an array of xmlChar
1.59 daniel 9024: *
9025: * parse an XML in-memory document and build a tree.
9026: * In the case the document is not Well Formed, a tree is built anyway
9027: *
1.68 daniel 9028: * Returns the resulting document tree
1.59 daniel 9029: */
9030:
1.69 daniel 9031: xmlDocPtr
1.123 daniel 9032: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 9033: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 9034: }
9035:
9036: /**
1.69 daniel 9037: * xmlCreateFileParserCtxt :
1.50 daniel 9038: * @filename: the filename
9039: *
1.69 daniel 9040: * Create a parser context for a file content.
9041: * Automatic support for ZLIB/Compress compressed document is provided
9042: * by default if found at compile-time.
1.50 daniel 9043: *
1.69 daniel 9044: * Returns the new parser context or NULL
1.9 httpng 9045: */
1.69 daniel 9046: xmlParserCtxtPtr
9047: xmlCreateFileParserCtxt(const char *filename)
9048: {
9049: xmlParserCtxtPtr ctxt;
1.40 daniel 9050: xmlParserInputPtr inputStream;
1.91 daniel 9051: xmlParserInputBufferPtr buf;
1.111 daniel 9052: char *directory = NULL;
1.9 httpng 9053:
1.91 daniel 9054: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9055: if (buf == NULL) return(NULL);
1.9 httpng 9056:
1.97 daniel 9057: ctxt = xmlNewParserCtxt();
1.16 daniel 9058: if (ctxt == NULL) {
9059: return(NULL);
9060: }
1.97 daniel 9061:
1.96 daniel 9062: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9063: if (inputStream == NULL) {
1.97 daniel 9064: xmlFreeParserCtxt(ctxt);
1.40 daniel 9065: return(NULL);
9066: }
9067:
1.119 daniel 9068: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9069: inputStream->buf = buf;
9070: inputStream->base = inputStream->buf->buffer->content;
9071: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9072:
1.40 daniel 9073: inputPush(ctxt, inputStream);
1.110 daniel 9074: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9075: directory = xmlParserGetDirectory(filename);
9076: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9077: ctxt->directory = directory;
1.106 daniel 9078:
1.69 daniel 9079: return(ctxt);
9080: }
9081:
9082: /**
9083: * xmlSAXParseFile :
9084: * @sax: the SAX handler block
9085: * @filename: the filename
9086: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9087: * documents
9088: *
9089: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9090: * compressed document is provided by default if found at compile-time.
9091: * It use the given SAX function block to handle the parsing callback.
9092: * If sax is NULL, fallback to the default DOM tree building routines.
9093: *
9094: * Returns the resulting document tree
9095: */
9096:
1.79 daniel 9097: xmlDocPtr
9098: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9099: int recovery) {
9100: xmlDocPtr ret;
9101: xmlParserCtxtPtr ctxt;
1.111 daniel 9102: char *directory = NULL;
1.69 daniel 9103:
9104: ctxt = xmlCreateFileParserCtxt(filename);
9105: if (ctxt == NULL) return(NULL);
1.74 daniel 9106: if (sax != NULL) {
1.93 veillard 9107: if (ctxt->sax != NULL)
1.119 daniel 9108: xmlFree(ctxt->sax);
1.74 daniel 9109: ctxt->sax = sax;
9110: ctxt->userData = NULL;
9111: }
1.106 daniel 9112:
1.110 daniel 9113: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9114: directory = xmlParserGetDirectory(filename);
9115: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 9116: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 9117:
9118: xmlParseDocument(ctxt);
1.40 daniel 9119:
1.72 daniel 9120: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9121: else {
9122: ret = NULL;
1.72 daniel 9123: xmlFreeDoc(ctxt->myDoc);
9124: ctxt->myDoc = NULL;
1.59 daniel 9125: }
1.86 daniel 9126: if (sax != NULL)
9127: ctxt->sax = NULL;
1.69 daniel 9128: xmlFreeParserCtxt(ctxt);
1.20 daniel 9129:
9130: return(ret);
9131: }
9132:
1.55 daniel 9133: /**
9134: * xmlParseFile :
9135: * @filename: the filename
9136: *
9137: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9138: * compressed document is provided by default if found at compile-time.
9139: *
1.68 daniel 9140: * Returns the resulting document tree
1.55 daniel 9141: */
9142:
1.79 daniel 9143: xmlDocPtr
9144: xmlParseFile(const char *filename) {
1.59 daniel 9145: return(xmlSAXParseFile(NULL, filename, 0));
9146: }
9147:
9148: /**
9149: * xmlRecoverFile :
9150: * @filename: the filename
9151: *
9152: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9153: * compressed document is provided by default if found at compile-time.
9154: * In the case the document is not Well Formed, a tree is built anyway
9155: *
1.68 daniel 9156: * Returns the resulting document tree
1.59 daniel 9157: */
9158:
1.79 daniel 9159: xmlDocPtr
9160: xmlRecoverFile(const char *filename) {
1.59 daniel 9161: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 9162: }
1.32 daniel 9163:
1.50 daniel 9164: /**
1.69 daniel 9165: * xmlCreateMemoryParserCtxt :
1.68 daniel 9166: * @buffer: an pointer to a char array
1.127 daniel 9167: * @size: the size of the array
1.50 daniel 9168: *
1.69 daniel 9169: * Create a parser context for an XML in-memory document.
1.50 daniel 9170: *
1.69 daniel 9171: * Returns the new parser context or NULL
1.20 daniel 9172: */
1.69 daniel 9173: xmlParserCtxtPtr
9174: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9175: xmlParserCtxtPtr ctxt;
1.40 daniel 9176: xmlParserInputPtr input;
9177:
1.158 daniel 9178: if (buffer[size - 1] != 0)
9179: buffer[size - 1] = '\0';
1.40 daniel 9180:
1.97 daniel 9181: ctxt = xmlNewParserCtxt();
1.20 daniel 9182: if (ctxt == NULL) {
9183: return(NULL);
9184: }
1.97 daniel 9185:
1.96 daniel 9186: input = xmlNewInputStream(ctxt);
1.40 daniel 9187: if (input == NULL) {
1.97 daniel 9188: xmlFreeParserCtxt(ctxt);
1.40 daniel 9189: return(NULL);
9190: }
1.20 daniel 9191:
1.40 daniel 9192: input->filename = NULL;
9193: input->line = 1;
9194: input->col = 1;
1.96 daniel 9195: input->buf = NULL;
1.91 daniel 9196: input->consumed = 0;
1.75 daniel 9197:
1.116 daniel 9198: input->base = BAD_CAST buffer;
9199: input->cur = BAD_CAST buffer;
1.69 daniel 9200: input->free = NULL;
1.20 daniel 9201:
1.40 daniel 9202: inputPush(ctxt, input);
1.69 daniel 9203: return(ctxt);
9204: }
9205:
9206: /**
9207: * xmlSAXParseMemory :
9208: * @sax: the SAX handler block
9209: * @buffer: an pointer to a char array
1.127 daniel 9210: * @size: the size of the array
9211: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9212: * documents
9213: *
9214: * parse an XML in-memory block and use the given SAX function block
9215: * to handle the parsing callback. If sax is NULL, fallback to the default
9216: * DOM tree building routines.
9217: *
9218: * Returns the resulting document tree
9219: */
9220: xmlDocPtr
9221: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9222: xmlDocPtr ret;
9223: xmlParserCtxtPtr ctxt;
9224:
9225: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9226: if (ctxt == NULL) return(NULL);
1.74 daniel 9227: if (sax != NULL) {
9228: ctxt->sax = sax;
9229: ctxt->userData = NULL;
9230: }
1.20 daniel 9231:
9232: xmlParseDocument(ctxt);
1.40 daniel 9233:
1.72 daniel 9234: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9235: else {
9236: ret = NULL;
1.72 daniel 9237: xmlFreeDoc(ctxt->myDoc);
9238: ctxt->myDoc = NULL;
1.59 daniel 9239: }
1.86 daniel 9240: if (sax != NULL)
9241: ctxt->sax = NULL;
1.69 daniel 9242: xmlFreeParserCtxt(ctxt);
1.16 daniel 9243:
1.9 httpng 9244: return(ret);
1.17 daniel 9245: }
9246:
1.55 daniel 9247: /**
9248: * xmlParseMemory :
1.68 daniel 9249: * @buffer: an pointer to a char array
1.55 daniel 9250: * @size: the size of the array
9251: *
9252: * parse an XML in-memory block and build a tree.
9253: *
1.68 daniel 9254: * Returns the resulting document tree
1.55 daniel 9255: */
9256:
9257: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9258: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9259: }
9260:
9261: /**
9262: * xmlRecoverMemory :
1.68 daniel 9263: * @buffer: an pointer to a char array
1.59 daniel 9264: * @size: the size of the array
9265: *
9266: * parse an XML in-memory block and build a tree.
9267: * In the case the document is not Well Formed, a tree is built anyway
9268: *
1.68 daniel 9269: * Returns the resulting document tree
1.59 daniel 9270: */
9271:
9272: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9273: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9274: }
9275:
9276:
1.50 daniel 9277: /**
9278: * xmlSetupParserForBuffer:
9279: * @ctxt: an XML parser context
1.123 daniel 9280: * @buffer: a xmlChar * buffer
1.50 daniel 9281: * @filename: a file name
9282: *
1.19 daniel 9283: * Setup the parser context to parse a new buffer; Clears any prior
9284: * contents from the parser context. The buffer parameter must not be
9285: * NULL, but the filename parameter can be
9286: */
1.55 daniel 9287: void
1.123 daniel 9288: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 9289: const char* filename)
9290: {
1.96 daniel 9291: xmlParserInputPtr input;
1.40 daniel 9292:
1.96 daniel 9293: input = xmlNewInputStream(ctxt);
9294: if (input == NULL) {
9295: perror("malloc");
1.119 daniel 9296: xmlFree(ctxt);
1.145 daniel 9297: return;
1.96 daniel 9298: }
9299:
9300: xmlClearParserCtxt(ctxt);
9301: if (filename != NULL)
1.119 daniel 9302: input->filename = xmlMemStrdup(filename);
1.96 daniel 9303: input->base = buffer;
9304: input->cur = buffer;
9305: inputPush(ctxt, input);
1.17 daniel 9306: }
9307:
1.123 daniel 9308: /**
9309: * xmlSAXUserParseFile:
9310: * @sax: a SAX handler
9311: * @user_data: The user data returned on SAX callbacks
9312: * @filename: a file name
9313: *
9314: * parse an XML file and call the given SAX handler routines.
9315: * Automatic support for ZLIB/Compress compressed document is provided
9316: *
9317: * Returns 0 in case of success or a error number otherwise
9318: */
1.131 daniel 9319: int
9320: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9321: const char *filename) {
1.123 daniel 9322: int ret = 0;
9323: xmlParserCtxtPtr ctxt;
9324:
9325: ctxt = xmlCreateFileParserCtxt(filename);
9326: if (ctxt == NULL) return -1;
1.134 daniel 9327: if (ctxt->sax != &xmlDefaultSAXHandler)
9328: xmlFree(ctxt->sax);
1.123 daniel 9329: ctxt->sax = sax;
1.140 daniel 9330: if (user_data != NULL)
9331: ctxt->userData = user_data;
1.123 daniel 9332:
9333: xmlParseDocument(ctxt);
9334:
9335: if (ctxt->wellFormed)
9336: ret = 0;
9337: else {
9338: if (ctxt->errNo != 0)
9339: ret = ctxt->errNo;
9340: else
9341: ret = -1;
9342: }
9343: if (sax != NULL)
9344: ctxt->sax = NULL;
9345: xmlFreeParserCtxt(ctxt);
9346:
9347: return ret;
9348: }
9349:
9350: /**
9351: * xmlSAXUserParseMemory:
9352: * @sax: a SAX handler
9353: * @user_data: The user data returned on SAX callbacks
9354: * @buffer: an in-memory XML document input
1.127 daniel 9355: * @size: the length of the XML document in bytes
1.123 daniel 9356: *
9357: * A better SAX parsing routine.
9358: * parse an XML in-memory buffer and call the given SAX handler routines.
9359: *
9360: * Returns 0 in case of success or a error number otherwise
9361: */
9362: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9363: char *buffer, int size) {
9364: int ret = 0;
9365: xmlParserCtxtPtr ctxt;
9366:
9367: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9368: if (ctxt == NULL) return -1;
9369: ctxt->sax = sax;
9370: ctxt->userData = user_data;
9371:
9372: xmlParseDocument(ctxt);
9373:
9374: if (ctxt->wellFormed)
9375: ret = 0;
9376: else {
9377: if (ctxt->errNo != 0)
9378: ret = ctxt->errNo;
9379: else
9380: ret = -1;
9381: }
9382: if (sax != NULL)
9383: ctxt->sax = NULL;
9384: xmlFreeParserCtxt(ctxt);
9385:
9386: return ret;
9387: }
9388:
1.32 daniel 9389:
1.98 daniel 9390: /************************************************************************
9391: * *
1.127 daniel 9392: * Miscellaneous *
1.98 daniel 9393: * *
9394: ************************************************************************/
9395:
1.132 daniel 9396: /**
9397: * xmlCleanupParser:
9398: *
9399: * Cleanup function for the XML parser. It tries to reclaim all
9400: * parsing related global memory allocated for the parser processing.
9401: * It doesn't deallocate any document related memory. Calling this
9402: * function should not prevent reusing the parser.
9403: */
9404:
9405: void
9406: xmlCleanupParser(void) {
9407: xmlCleanupCharEncodingHandlers();
1.133 daniel 9408: xmlCleanupPredefinedEntities();
1.132 daniel 9409: }
1.98 daniel 9410:
1.50 daniel 9411: /**
9412: * xmlParserFindNodeInfo:
9413: * @ctxt: an XML parser context
9414: * @node: an XML node within the tree
9415: *
9416: * Find the parser node info struct for a given node
9417: *
1.68 daniel 9418: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 9419: */
9420: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
9421: const xmlNode* node)
9422: {
9423: unsigned long pos;
9424:
9425: /* Find position where node should be at */
9426: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
9427: if ( ctx->node_seq.buffer[pos].node == node )
9428: return &ctx->node_seq.buffer[pos];
9429: else
9430: return NULL;
9431: }
9432:
9433:
1.50 daniel 9434: /**
9435: * xmlInitNodeInfoSeq :
9436: * @seq: a node info sequence pointer
9437: *
9438: * -- Initialize (set to initial state) node info sequence
1.32 daniel 9439: */
1.55 daniel 9440: void
9441: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9442: {
9443: seq->length = 0;
9444: seq->maximum = 0;
9445: seq->buffer = NULL;
9446: }
9447:
1.50 daniel 9448: /**
9449: * xmlClearNodeInfoSeq :
9450: * @seq: a node info sequence pointer
9451: *
9452: * -- Clear (release memory and reinitialize) node
1.32 daniel 9453: * info sequence
9454: */
1.55 daniel 9455: void
9456: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9457: {
9458: if ( seq->buffer != NULL )
1.119 daniel 9459: xmlFree(seq->buffer);
1.32 daniel 9460: xmlInitNodeInfoSeq(seq);
9461: }
9462:
9463:
1.50 daniel 9464: /**
9465: * xmlParserFindNodeInfoIndex:
9466: * @seq: a node info sequence pointer
9467: * @node: an XML node pointer
9468: *
9469: *
1.32 daniel 9470: * xmlParserFindNodeInfoIndex : Find the index that the info record for
9471: * the given node is or should be at in a sorted sequence
1.68 daniel 9472: *
9473: * Returns a long indicating the position of the record
1.32 daniel 9474: */
9475: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
9476: const xmlNode* node)
9477: {
9478: unsigned long upper, lower, middle;
9479: int found = 0;
9480:
9481: /* Do a binary search for the key */
9482: lower = 1;
9483: upper = seq->length;
9484: middle = 0;
9485: while ( lower <= upper && !found) {
9486: middle = lower + (upper - lower) / 2;
9487: if ( node == seq->buffer[middle - 1].node )
9488: found = 1;
9489: else if ( node < seq->buffer[middle - 1].node )
9490: upper = middle - 1;
9491: else
9492: lower = middle + 1;
9493: }
9494:
9495: /* Return position */
9496: if ( middle == 0 || seq->buffer[middle - 1].node < node )
9497: return middle;
9498: else
9499: return middle - 1;
9500: }
9501:
9502:
1.50 daniel 9503: /**
9504: * xmlParserAddNodeInfo:
9505: * @ctxt: an XML parser context
1.68 daniel 9506: * @info: a node info sequence pointer
1.50 daniel 9507: *
9508: * Insert node info record into the sorted sequence
1.32 daniel 9509: */
1.55 daniel 9510: void
9511: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 9512: const xmlParserNodeInfo* info)
1.32 daniel 9513: {
9514: unsigned long pos;
9515: static unsigned int block_size = 5;
9516:
9517: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 9518: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
9519: if ( pos < ctxt->node_seq.length
9520: && ctxt->node_seq.buffer[pos].node == info->node ) {
9521: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 9522: }
9523:
9524: /* Otherwise, we need to add new node to buffer */
9525: else {
9526: /* Expand buffer by 5 if needed */
1.55 daniel 9527: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 9528: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 9529: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
9530: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 9531:
1.55 daniel 9532: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 9533: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 9534: else
1.119 daniel 9535: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 9536:
9537: if ( tmp_buffer == NULL ) {
1.55 daniel 9538: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 9539: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 9540: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 9541: return;
9542: }
1.55 daniel 9543: ctxt->node_seq.buffer = tmp_buffer;
9544: ctxt->node_seq.maximum += block_size;
1.32 daniel 9545: }
9546:
9547: /* If position is not at end, move elements out of the way */
1.55 daniel 9548: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 9549: unsigned long i;
9550:
1.55 daniel 9551: for ( i = ctxt->node_seq.length; i > pos; i-- )
9552: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 9553: }
9554:
9555: /* Copy element and increase length */
1.55 daniel 9556: ctxt->node_seq.buffer[pos] = *info;
9557: ctxt->node_seq.length++;
1.32 daniel 9558: }
9559: }
1.77 daniel 9560:
1.98 daniel 9561:
9562: /**
9563: * xmlSubstituteEntitiesDefault :
9564: * @val: int 0 or 1
9565: *
9566: * Set and return the previous value for default entity support.
9567: * Initially the parser always keep entity references instead of substituting
9568: * entity values in the output. This function has to be used to change the
9569: * default parser behaviour
9570: * SAX::subtituteEntities() has to be used for changing that on a file by
9571: * file basis.
9572: *
9573: * Returns the last value for 0 for no substitution, 1 for substitution.
9574: */
9575:
9576: int
9577: xmlSubstituteEntitiesDefault(int val) {
9578: int old = xmlSubstituteEntitiesDefaultValue;
9579:
9580: xmlSubstituteEntitiesDefaultValue = val;
9581: return(old);
9582: }
1.77 daniel 9583:
Webmaster