Annotation of XML/parser.c, revision 1.171
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.119 daniel 36: #include "xmlmemory.h"
1.14 veillard 37: #include "tree.h"
1.1 veillard 38: #include "parser.h"
1.14 veillard 39: #include "entities.h"
1.75 daniel 40: #include "encoding.h"
1.61 daniel 41: #include "valid.h"
1.69 daniel 42: #include "parserInternals.h"
1.91 daniel 43: #include "xmlIO.h"
1.122 daniel 44: #include "xml-error.h"
1.1 veillard 45:
1.140 daniel 46: #define XML_PARSER_BIG_BUFFER_SIZE 1000
47: #define XML_PARSER_BUFFER_SIZE 100
48:
1.86 daniel 49: const char *xmlParserVersion = LIBXML_VERSION;
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.91 daniel 65: /************************************************************************
66: * *
67: * Input handling functions for progressive parsing *
68: * *
69: ************************************************************************/
70:
71: /* #define DEBUG_INPUT */
1.140 daniel 72: /* #define DEBUG_STACK */
73: /* #define DEBUG_PUSH */
74:
1.91 daniel 75:
1.110 daniel 76: #define INPUT_CHUNK 250
77: /* we need to keep enough input to show errors in context */
78: #define LINE_LEN 80
1.91 daniel 79:
80: #ifdef DEBUG_INPUT
81: #define CHECK_BUFFER(in) check_buffer(in)
82:
83: void check_buffer(xmlParserInputPtr in) {
84: if (in->base != in->buf->buffer->content) {
85: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
86: }
87: if (in->cur < in->base) {
88: fprintf(stderr, "xmlParserInput: cur < base problem\n");
89: }
90: if (in->cur > in->base + in->buf->buffer->use) {
91: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
92: }
93: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
94: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
95: in->buf->buffer->use, in->buf->buffer->size);
96: }
97:
1.110 daniel 98: #else
99: #define CHECK_BUFFER(in)
100: #endif
101:
1.91 daniel 102:
103: /**
104: * xmlParserInputRead:
105: * @in: an XML parser input
106: * @len: an indicative size for the lookahead
107: *
108: * This function refresh the input for the parser. It doesn't try to
109: * preserve pointers to the input buffer, and discard already read data
110: *
1.123 daniel 111: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 112: * end of this entity
113: */
114: int
115: xmlParserInputRead(xmlParserInputPtr in, int len) {
116: int ret;
117: int used;
118: int index;
119:
120: #ifdef DEBUG_INPUT
121: fprintf(stderr, "Read\n");
122: #endif
123: if (in->buf == NULL) return(-1);
124: if (in->base == NULL) return(-1);
125: if (in->cur == NULL) return(-1);
126: if (in->buf->buffer == NULL) return(-1);
127:
128: CHECK_BUFFER(in);
129:
130: used = in->cur - in->buf->buffer->content;
131: ret = xmlBufferShrink(in->buf->buffer, used);
132: if (ret > 0) {
133: in->cur -= ret;
134: in->consumed += ret;
135: }
136: ret = xmlParserInputBufferRead(in->buf, len);
137: if (in->base != in->buf->buffer->content) {
138: /*
139: * the buffer has been realloced
140: */
141: index = in->cur - in->base;
142: in->base = in->buf->buffer->content;
143: in->cur = &in->buf->buffer->content[index];
144: }
145:
146: CHECK_BUFFER(in);
147:
148: return(ret);
149: }
150:
151: /**
152: * xmlParserInputGrow:
153: * @in: an XML parser input
154: * @len: an indicative size for the lookahead
155: *
156: * This function increase the input for the parser. It tries to
157: * preserve pointers to the input buffer, and keep already read data
158: *
1.123 daniel 159: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 160: * end of this entity
161: */
162: int
163: xmlParserInputGrow(xmlParserInputPtr in, int len) {
164: int ret;
165: int index;
166:
167: #ifdef DEBUG_INPUT
168: fprintf(stderr, "Grow\n");
169: #endif
170: if (in->buf == NULL) return(-1);
171: if (in->base == NULL) return(-1);
172: if (in->cur == NULL) return(-1);
173: if (in->buf->buffer == NULL) return(-1);
174:
175: CHECK_BUFFER(in);
176:
177: index = in->cur - in->base;
178: if (in->buf->buffer->use > index + INPUT_CHUNK) {
179:
180: CHECK_BUFFER(in);
181:
182: return(0);
183: }
1.148 daniel 184: if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) ||
185: (in->buf->file != NULL) ||
1.140 daniel 186: #ifdef HAVE_ZLIB_H
187: (in->buf->gzfile != NULL) ||
188: #endif
189: (in->buf->fd >= 0))
190: ret = xmlParserInputBufferGrow(in->buf, len);
191: else
192: return(0);
1.135 daniel 193:
194: /*
195: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
196: * block, but we use it really as an integer to do some
197: * pointer arithmetic. Insure will raise it as a bug but in
198: * that specific case, that's not !
199: */
1.91 daniel 200: if (in->base != in->buf->buffer->content) {
201: /*
202: * the buffer has been realloced
203: */
204: index = in->cur - in->base;
205: in->base = in->buf->buffer->content;
206: in->cur = &in->buf->buffer->content[index];
207: }
208:
209: CHECK_BUFFER(in);
210:
211: return(ret);
212: }
213:
214: /**
215: * xmlParserInputShrink:
216: * @in: an XML parser input
217: *
218: * This function removes used input for the parser.
219: */
220: void
221: xmlParserInputShrink(xmlParserInputPtr in) {
222: int used;
223: int ret;
224: int index;
225:
226: #ifdef DEBUG_INPUT
227: fprintf(stderr, "Shrink\n");
228: #endif
229: if (in->buf == NULL) return;
230: if (in->base == NULL) return;
231: if (in->cur == NULL) return;
232: if (in->buf->buffer == NULL) return;
233:
234: CHECK_BUFFER(in);
235:
236: used = in->cur - in->buf->buffer->content;
237: if (used > INPUT_CHUNK) {
1.110 daniel 238: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 239: if (ret > 0) {
240: in->cur -= ret;
241: in->consumed += ret;
242: }
243: }
244:
245: CHECK_BUFFER(in);
246:
247: if (in->buf->buffer->use > INPUT_CHUNK) {
248: return;
249: }
250: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
251: if (in->base != in->buf->buffer->content) {
252: /*
253: * the buffer has been realloced
254: */
255: index = in->cur - in->base;
256: in->base = in->buf->buffer->content;
257: in->cur = &in->buf->buffer->content[index];
258: }
259:
260: CHECK_BUFFER(in);
261: }
262:
1.45 daniel 263: /************************************************************************
264: * *
265: * Parser stacks related functions and macros *
266: * *
267: ************************************************************************/
1.79 daniel 268:
269: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 270: int xmlDoValidityCheckingDefaultValue = 0;
1.135 daniel 271: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
272: const xmlChar ** str);
1.79 daniel 273:
1.1 veillard 274: /*
1.40 daniel 275: * Generic function for accessing stacks in the Parser Context
1.1 veillard 276: */
277:
1.140 daniel 278: #define PUSH_AND_POP(scope, type, name) \
279: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 280: if (ctxt->name##Nr >= ctxt->name##Max) { \
281: ctxt->name##Max *= 2; \
1.119 daniel 282: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 283: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
284: if (ctxt->name##Tab == NULL) { \
1.31 daniel 285: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 286: return(0); \
1.31 daniel 287: } \
288: } \
1.40 daniel 289: ctxt->name##Tab[ctxt->name##Nr] = value; \
290: ctxt->name = value; \
291: return(ctxt->name##Nr++); \
1.31 daniel 292: } \
1.140 daniel 293: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 294: type ret; \
1.40 daniel 295: if (ctxt->name##Nr <= 0) return(0); \
296: ctxt->name##Nr--; \
1.50 daniel 297: if (ctxt->name##Nr > 0) \
298: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
299: else \
300: ctxt->name = NULL; \
1.69 daniel 301: ret = ctxt->name##Tab[ctxt->name##Nr]; \
302: ctxt->name##Tab[ctxt->name##Nr] = 0; \
303: return(ret); \
1.31 daniel 304: } \
305:
1.140 daniel 306: PUSH_AND_POP(extern, xmlParserInputPtr, input)
307: PUSH_AND_POP(extern, xmlNodePtr, node)
308: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 309:
1.55 daniel 310: /*
311: * Macros for accessing the content. Those should be used only by the parser,
312: * and not exported.
313: *
314: * Dirty macros, i.e. one need to make assumption on the context to use them
315: *
1.123 daniel 316: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 317: * To be used with extreme caution since operations consuming
318: * characters may move the input buffer to a different location !
1.123 daniel 319: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 320: * in ISO-Latin or UTF-8.
1.151 daniel 321: * This should be used internally by the parser
1.55 daniel 322: * only to compare to ASCII values otherwise it would break when
323: * running with UTF-8 encoding.
1.123 daniel 324: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 325: * to compare on ASCII based substring.
1.123 daniel 326: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 327: * strings within the parser.
328: *
1.77 daniel 329: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 330: *
331: * NEXT Skip to the next character, this does the proper decoding
332: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 333: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 334: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 335: */
1.45 daniel 336:
1.152 daniel 337: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 338: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 339: #define NXT(val) ctxt->input->cur[(val)]
340: #define CUR_PTR ctxt->input->cur
1.154 daniel 341:
1.164 daniel 342: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
343: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 344: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
345: if ((*ctxt->input->cur == 0) && \
346: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
347: xmlPopInput(ctxt)
1.164 daniel 348:
1.97 daniel 349: #define SHRINK xmlParserInputShrink(ctxt->input); \
350: if ((*ctxt->input->cur == 0) && \
351: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
352: xmlPopInput(ctxt)
353:
354: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
355: if ((*ctxt->input->cur == 0) && \
356: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
357: xmlPopInput(ctxt)
1.55 daniel 358:
1.155 daniel 359: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 360:
1.151 daniel 361: #define NEXT xmlNextChar(ctxt);
1.154 daniel 362:
1.153 daniel 363: #define NEXTL(l) \
364: if (*(ctxt->input->cur) == '\n') { \
365: ctxt->input->line++; ctxt->input->col = 1; \
366: } else ctxt->input->col++; \
1.154 daniel 367: ctxt->token = 0; ctxt->input->cur += l; \
368: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
369: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
370:
1.152 daniel 371: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 372: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 373:
1.152 daniel 374: #define COPY_BUF(l,b,i,v) \
375: if (l == 1) b[i++] = (xmlChar) v; \
376: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 377:
378: /**
379: * xmlNextChar:
380: * @ctxt: the XML parser context
381: *
382: * Skip to the next char input char.
383: */
1.55 daniel 384:
1.151 daniel 385: void
386: xmlNextChar(xmlParserCtxtPtr ctxt) {
387: if (ctxt->token != 0) ctxt->token = 0;
388: else {
389: if ((*ctxt->input->cur == 0) &&
390: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
391: (ctxt->instate != XML_PARSER_COMMENT)) {
392: /*
393: * If we are at the end of the current entity and
394: * the context allows it, we pop consumed entities
395: * automatically.
396: * TODO: the auto closing should be blocked in other cases
397: */
398: xmlPopInput(ctxt);
399: } else {
400: if (*(ctxt->input->cur) == '\n') {
401: ctxt->input->line++; ctxt->input->col = 1;
402: } else ctxt->input->col++;
403: if (ctxt->encoding == NULL) {
404: /*
405: * We are supposed to handle UTF8, check it's valid
406: * From rfc2044: encoding of the Unicode values on UTF-8:
407: *
408: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
409: * 0000 0000-0000 007F 0xxxxxxx
410: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
411: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
412: *
1.160 daniel 413: * Check for the 0x110000 limit too
1.151 daniel 414: */
415: const unsigned char *cur = ctxt->input->cur;
416: unsigned char c;
1.91 daniel 417:
1.151 daniel 418: c = *cur;
419: if (c & 0x80) {
420: if (cur[1] == 0)
421: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
422: if ((cur[1] & 0xc0) != 0x80)
423: goto encoding_error;
424: if ((c & 0xe0) == 0xe0) {
425: unsigned int val;
426:
427: if (cur[2] == 0)
428: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429: if ((cur[2] & 0xc0) != 0x80)
430: goto encoding_error;
431: if ((c & 0xf0) == 0xf0) {
432: if (cur[3] == 0)
433: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
434: if (((c & 0xf8) != 0xf0) ||
435: ((cur[3] & 0xc0) != 0x80))
436: goto encoding_error;
437: /* 4-byte code */
438: ctxt->input->cur += 4;
439: val = (cur[0] & 0x7) << 18;
440: val |= (cur[1] & 0x3f) << 12;
441: val |= (cur[2] & 0x3f) << 6;
442: val |= cur[3] & 0x3f;
443: } else {
444: /* 3-byte code */
445: ctxt->input->cur += 3;
446: val = (cur[0] & 0xf) << 12;
447: val |= (cur[1] & 0x3f) << 6;
448: val |= cur[2] & 0x3f;
449: }
450: if (((val > 0xd7ff) && (val < 0xe000)) ||
451: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 452: (val >= 0x110000)) {
1.151 daniel 453: if ((ctxt->sax != NULL) &&
454: (ctxt->sax->error != NULL))
455: ctxt->sax->error(ctxt->userData,
456: "Char out of allowed range\n");
457: ctxt->errNo = XML_ERR_INVALID_ENCODING;
458: ctxt->wellFormed = 0;
459: }
460: } else
461: /* 2-byte code */
462: ctxt->input->cur += 2;
463: } else
464: /* 1-byte code */
465: ctxt->input->cur++;
466: } else {
467: /*
468: * Assume it's a fixed lenght encoding (1) with
469: * a compatibke encoding for the ASCII set, since
470: * XML constructs only use < 128 chars
471: */
472: ctxt->input->cur++;
473: }
474: ctxt->nbChars++;
475: if (*ctxt->input->cur == 0)
476: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
477: }
478: }
1.154 daniel 479: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
480: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 481: if ((*ctxt->input->cur == 0) &&
482: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
483: xmlPopInput(ctxt);
1.151 daniel 484: return;
485: encoding_error:
486: /*
487: * If we detect an UTF8 error that probably mean that the
488: * input encoding didn't get properly advertized in the
489: * declaration header. Report the error and switch the encoding
490: * to ISO-Latin-1 (if you don't like this policy, just declare the
491: * encoding !)
492: */
493: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
494: ctxt->sax->error(ctxt->userData,
495: "Input is not proper UTF-8, indicate encoding !\n");
496: ctxt->errNo = XML_ERR_INVALID_ENCODING;
497:
498: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
499: ctxt->input->cur++;
500: return;
501: }
1.42 daniel 502:
1.152 daniel 503: /**
504: * xmlCurrentChar:
505: * @ctxt: the XML parser context
506: * @len: pointer to the length of the char read
507: *
508: * The current char value, if using UTF-8 this may actaully span multiple
509: * bytes in the input buffer.
510: *
511: * Returns the current char value and its lenght
512: */
513:
514: int
515: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
516: if (ctxt->token != 0) {
517: *len = 0;
518: return(ctxt->token);
519: }
520: if (ctxt->encoding == NULL) {
521: /*
522: * We are supposed to handle UTF8, check it's valid
523: * From rfc2044: encoding of the Unicode values on UTF-8:
524: *
525: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
526: * 0000 0000-0000 007F 0xxxxxxx
527: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
528: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
529: *
1.160 daniel 530: * Check for the 0x110000 limit too
1.152 daniel 531: */
532: const unsigned char *cur = ctxt->input->cur;
533: unsigned char c;
534: unsigned int val;
535:
536: c = *cur;
537: if (c & 0x80) {
538: if (cur[1] == 0)
539: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
540: if ((cur[1] & 0xc0) != 0x80)
541: goto encoding_error;
542: if ((c & 0xe0) == 0xe0) {
543:
544: if (cur[2] == 0)
545: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
546: if ((cur[2] & 0xc0) != 0x80)
547: goto encoding_error;
548: if ((c & 0xf0) == 0xf0) {
549: if (cur[3] == 0)
550: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
551: if (((c & 0xf8) != 0xf0) ||
552: ((cur[3] & 0xc0) != 0x80))
553: goto encoding_error;
554: /* 4-byte code */
555: *len = 4;
556: val = (cur[0] & 0x7) << 18;
557: val |= (cur[1] & 0x3f) << 12;
558: val |= (cur[2] & 0x3f) << 6;
559: val |= cur[3] & 0x3f;
560: } else {
561: /* 3-byte code */
562: *len = 3;
563: val = (cur[0] & 0xf) << 12;
564: val |= (cur[1] & 0x3f) << 6;
565: val |= cur[2] & 0x3f;
566: }
567: } else {
568: /* 2-byte code */
569: *len = 2;
570: val = (cur[0] & 0x1f) << 6;
1.168 daniel 571: val |= cur[1] & 0x3f;
1.152 daniel 572: }
573: if (!IS_CHAR(val)) {
574: if ((ctxt->sax != NULL) &&
575: (ctxt->sax->error != NULL))
576: ctxt->sax->error(ctxt->userData,
577: "Char out of allowed range\n");
578: ctxt->errNo = XML_ERR_INVALID_ENCODING;
579: ctxt->wellFormed = 0;
580: }
581: return(val);
582: } else {
583: /* 1-byte code */
584: *len = 1;
585: return((int) *ctxt->input->cur);
586: }
587: }
588: /*
589: * Assume it's a fixed lenght encoding (1) with
590: * a compatibke encoding for the ASCII set, since
591: * XML constructs only use < 128 chars
592: */
593: *len = 1;
594: return((int) *ctxt->input->cur);
595: encoding_error:
596: /*
597: * If we detect an UTF8 error that probably mean that the
598: * input encoding didn't get properly advertized in the
599: * declaration header. Report the error and switch the encoding
600: * to ISO-Latin-1 (if you don't like this policy, just declare the
601: * encoding !)
602: */
603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
604: ctxt->sax->error(ctxt->userData,
605: "Input is not proper UTF-8, indicate encoding !\n");
606: ctxt->errNo = XML_ERR_INVALID_ENCODING;
607:
608: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
609: *len = 1;
610: return((int) *ctxt->input->cur);
611: }
612:
613: /**
1.162 daniel 614: * xmlStringCurrentChar:
615: * @ctxt: the XML parser context
616: * @cur: pointer to the beginning of the char
617: * @len: pointer to the length of the char read
618: *
619: * The current char value, if using UTF-8 this may actaully span multiple
620: * bytes in the input buffer.
621: *
622: * Returns the current char value and its lenght
623: */
624:
625: int
626: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
627: if (ctxt->encoding == NULL) {
628: /*
629: * We are supposed to handle UTF8, check it's valid
630: * From rfc2044: encoding of the Unicode values on UTF-8:
631: *
632: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
633: * 0000 0000-0000 007F 0xxxxxxx
634: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
635: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
636: *
637: * Check for the 0x110000 limit too
638: */
639: unsigned char c;
640: unsigned int val;
641:
642: c = *cur;
643: if (c & 0x80) {
644: if ((cur[1] & 0xc0) != 0x80)
645: goto encoding_error;
646: if ((c & 0xe0) == 0xe0) {
647:
648: if ((cur[2] & 0xc0) != 0x80)
649: goto encoding_error;
650: if ((c & 0xf0) == 0xf0) {
651: if (((c & 0xf8) != 0xf0) ||
652: ((cur[3] & 0xc0) != 0x80))
653: goto encoding_error;
654: /* 4-byte code */
655: *len = 4;
656: val = (cur[0] & 0x7) << 18;
657: val |= (cur[1] & 0x3f) << 12;
658: val |= (cur[2] & 0x3f) << 6;
659: val |= cur[3] & 0x3f;
660: } else {
661: /* 3-byte code */
662: *len = 3;
663: val = (cur[0] & 0xf) << 12;
664: val |= (cur[1] & 0x3f) << 6;
665: val |= cur[2] & 0x3f;
666: }
667: } else {
668: /* 2-byte code */
669: *len = 2;
670: val = (cur[0] & 0x1f) << 6;
671: val |= cur[2] & 0x3f;
672: }
673: if (!IS_CHAR(val)) {
674: if ((ctxt->sax != NULL) &&
675: (ctxt->sax->error != NULL))
676: ctxt->sax->error(ctxt->userData,
677: "Char out of allowed range\n");
678: ctxt->errNo = XML_ERR_INVALID_ENCODING;
679: ctxt->wellFormed = 0;
680: }
681: return(val);
682: } else {
683: /* 1-byte code */
684: *len = 1;
685: return((int) *cur);
686: }
687: }
688: /*
689: * Assume it's a fixed lenght encoding (1) with
690: * a compatibke encoding for the ASCII set, since
691: * XML constructs only use < 128 chars
692: */
693: *len = 1;
694: return((int) *cur);
695: encoding_error:
696: /*
697: * If we detect an UTF8 error that probably mean that the
698: * input encoding didn't get properly advertized in the
699: * declaration header. Report the error and switch the encoding
700: * to ISO-Latin-1 (if you don't like this policy, just declare the
701: * encoding !)
702: */
703: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
704: ctxt->sax->error(ctxt->userData,
705: "Input is not proper UTF-8, indicate encoding !\n");
706: ctxt->errNo = XML_ERR_INVALID_ENCODING;
707:
708: *len = 1;
709: return((int) *cur);
710: }
711:
712: /**
1.152 daniel 713: * xmlCopyChar:
714: * @len: pointer to the length of the char read (or zero)
715: * @array: pointer to an arry of xmlChar
716: * @val: the char value
717: *
718: * append the char value in the array
719: *
720: * Returns the number of xmlChar written
721: */
722:
723: int
724: xmlCopyChar(int len, xmlChar *out, int val) {
725: /*
726: * We are supposed to handle UTF8, check it's valid
727: * From rfc2044: encoding of the Unicode values on UTF-8:
728: *
729: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
730: * 0000 0000-0000 007F 0xxxxxxx
731: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
732: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
733: */
734: if (len == 0) {
735: if (val < 0) len = 0;
1.160 daniel 736: else if (val < 0x80) len = 1;
737: else if (val < 0x800) len = 2;
738: else if (val < 0x10000) len = 3;
739: else if (val < 0x110000) len = 4;
1.152 daniel 740: if (len == 0) {
741: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
742: val);
743: return(0);
744: }
745: }
746: if (len > 1) {
747: int bits;
748:
749: if (val < 0x80) { *out++= val; bits= -6; }
750: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
751: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
752: else { *out++= (val >> 18) | 0xF0; bits= 12; }
753:
754: for ( ; bits >= 0; bits-= 6)
755: *out++= ((val >> bits) & 0x3F) | 0x80 ;
756:
757: return(len);
758: }
759: *out = (xmlChar) val;
760: return(1);
1.155 daniel 761: }
762:
763: /**
764: * xmlSkipBlankChars:
765: * @ctxt: the XML parser context
766: *
767: * skip all blanks character found at that point in the input streams.
768: * It pops up finished entities in the process if allowable at that point.
769: *
770: * Returns the number of space chars skipped
771: */
772:
773: int
774: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
775: int cur, res = 0;
776:
777: do {
778: cur = CUR;
779: while (IS_BLANK(cur)) {
780: NEXT;
781: cur = CUR;
782: res++;
783: }
784: while ((cur == 0) && (ctxt->inputNr > 1) &&
785: (ctxt->instate != XML_PARSER_COMMENT)) {
786: xmlPopInput(ctxt);
787: cur = CUR;
788: }
789: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
790: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
791: } while (IS_BLANK(cur));
792: return(res);
1.152 daniel 793: }
794:
1.97 daniel 795: /************************************************************************
796: * *
797: * Commodity functions to handle entities processing *
798: * *
799: ************************************************************************/
1.40 daniel 800:
1.50 daniel 801: /**
802: * xmlPopInput:
803: * @ctxt: an XML parser context
804: *
1.40 daniel 805: * xmlPopInput: the current input pointed by ctxt->input came to an end
806: * pop it and return the next char.
1.45 daniel 807: *
1.123 daniel 808: * Returns the current xmlChar in the parser context
1.40 daniel 809: */
1.123 daniel 810: xmlChar
1.55 daniel 811: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 812: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 813: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 814: if ((*ctxt->input->cur == 0) &&
815: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
816: return(xmlPopInput(ctxt));
1.40 daniel 817: return(CUR);
818: }
819:
1.50 daniel 820: /**
821: * xmlPushInput:
822: * @ctxt: an XML parser context
823: * @input: an XML parser input fragment (entity, XML fragment ...).
824: *
1.40 daniel 825: * xmlPushInput: switch to a new input stream which is stacked on top
826: * of the previous one(s).
827: */
1.55 daniel 828: void
829: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 830: if (input == NULL) return;
831: inputPush(ctxt, input);
1.164 daniel 832: GROW;
1.40 daniel 833: }
834:
1.50 daniel 835: /**
1.69 daniel 836: * xmlFreeInputStream:
1.127 daniel 837: * @input: an xmlParserInputPtr
1.69 daniel 838: *
839: * Free up an input stream.
840: */
841: void
842: xmlFreeInputStream(xmlParserInputPtr input) {
843: if (input == NULL) return;
844:
1.119 daniel 845: if (input->filename != NULL) xmlFree((char *) input->filename);
846: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 847: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 848: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 849: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 850: input->free((xmlChar *) input->base);
1.93 veillard 851: if (input->buf != NULL)
852: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 853: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 854: xmlFree(input);
1.69 daniel 855: }
856:
857: /**
1.96 daniel 858: * xmlNewInputStream:
859: * @ctxt: an XML parser context
860: *
861: * Create a new input stream structure
862: * Returns the new input stream or NULL
863: */
864: xmlParserInputPtr
865: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
866: xmlParserInputPtr input;
867:
1.119 daniel 868: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 869: if (input == NULL) {
1.123 daniel 870: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 871: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 872: ctxt->sax->error(ctxt->userData,
873: "malloc: couldn't allocate a new input stream\n");
1.123 daniel 874: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 875: return(NULL);
876: }
1.165 daniel 877: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 878: input->line = 1;
879: input->col = 1;
1.167 daniel 880: input->standalone = -1;
1.96 daniel 881: return(input);
882: }
883:
884: /**
1.50 daniel 885: * xmlNewEntityInputStream:
886: * @ctxt: an XML parser context
887: * @entity: an Entity pointer
888: *
1.82 daniel 889: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 890: *
891: * Returns the new input stream or NULL
1.45 daniel 892: */
1.50 daniel 893: xmlParserInputPtr
894: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 895: xmlParserInputPtr input;
896:
897: if (entity == NULL) {
1.123 daniel 898: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 900: ctxt->sax->error(ctxt->userData,
1.45 daniel 901: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 902: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 903: return(NULL);
1.45 daniel 904: }
905: if (entity->content == NULL) {
1.159 daniel 906: switch (entity->etype) {
1.113 daniel 907: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 908: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
910: ctxt->sax->error(ctxt->userData,
911: "xmlNewEntityInputStream unparsed entity !\n");
912: break;
913: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
914: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 915: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 916: (char *) entity->ExternalID, ctxt));
1.113 daniel 917: case XML_INTERNAL_GENERAL_ENTITY:
918: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
919: ctxt->sax->error(ctxt->userData,
920: "Internal entity %s without content !\n", entity->name);
921: break;
922: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 923: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
925: ctxt->sax->error(ctxt->userData,
926: "Internal parameter entity %s without content !\n", entity->name);
927: break;
928: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 929: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
931: ctxt->sax->error(ctxt->userData,
932: "Predefined entity %s without content !\n", entity->name);
933: break;
934: }
1.50 daniel 935: return(NULL);
1.45 daniel 936: }
1.96 daniel 937: input = xmlNewInputStream(ctxt);
1.45 daniel 938: if (input == NULL) {
1.50 daniel 939: return(NULL);
1.45 daniel 940: }
1.156 daniel 941: input->filename = (char *) entity->SystemID;
1.45 daniel 942: input->base = entity->content;
943: input->cur = entity->content;
1.140 daniel 944: input->length = entity->length;
1.50 daniel 945: return(input);
1.45 daniel 946: }
947:
1.59 daniel 948: /**
949: * xmlNewStringInputStream:
950: * @ctxt: an XML parser context
1.96 daniel 951: * @buffer: an memory buffer
1.59 daniel 952: *
953: * Create a new input stream based on a memory buffer.
1.68 daniel 954: * Returns the new input stream
1.59 daniel 955: */
956: xmlParserInputPtr
1.123 daniel 957: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 958: xmlParserInputPtr input;
959:
1.96 daniel 960: if (buffer == NULL) {
1.123 daniel 961: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 963: ctxt->sax->error(ctxt->userData,
1.59 daniel 964: "internal: xmlNewStringInputStream string = NULL\n");
965: return(NULL);
966: }
1.96 daniel 967: input = xmlNewInputStream(ctxt);
1.59 daniel 968: if (input == NULL) {
969: return(NULL);
970: }
1.96 daniel 971: input->base = buffer;
972: input->cur = buffer;
1.140 daniel 973: input->length = xmlStrlen(buffer);
1.59 daniel 974: return(input);
975: }
976:
1.76 daniel 977: /**
978: * xmlNewInputFromFile:
979: * @ctxt: an XML parser context
980: * @filename: the filename to use as entity
981: *
982: * Create a new input stream based on a file.
983: *
984: * Returns the new input stream or NULL in case of error
985: */
986: xmlParserInputPtr
1.79 daniel 987: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 988: xmlParserInputBufferPtr buf;
1.76 daniel 989: xmlParserInputPtr inputStream;
1.111 daniel 990: char *directory = NULL;
1.76 daniel 991:
1.96 daniel 992: if (ctxt == NULL) return(NULL);
1.91 daniel 993: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 994: if (buf == NULL) {
1.140 daniel 995: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 996:
1.94 daniel 997: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
998: #ifdef WIN32
999: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1000: #else
1001: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1002: #endif
1003: buf = xmlParserInputBufferCreateFilename(name,
1004: XML_CHAR_ENCODING_NONE);
1.106 daniel 1005: if (buf != NULL)
1.142 daniel 1006: directory = xmlParserGetDirectory(name);
1.106 daniel 1007: }
1008: if ((buf == NULL) && (ctxt->directory != NULL)) {
1009: #ifdef WIN32
1010: sprintf(name, "%s\\%s", ctxt->directory, filename);
1011: #else
1012: sprintf(name, "%s/%s", ctxt->directory, filename);
1013: #endif
1014: buf = xmlParserInputBufferCreateFilename(name,
1015: XML_CHAR_ENCODING_NONE);
1016: if (buf != NULL)
1.142 daniel 1017: directory = xmlParserGetDirectory(name);
1.106 daniel 1018: }
1019: if (buf == NULL)
1.94 daniel 1020: return(NULL);
1021: }
1022: if (directory == NULL)
1023: directory = xmlParserGetDirectory(filename);
1.76 daniel 1024:
1.96 daniel 1025: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1026: if (inputStream == NULL) {
1.119 daniel 1027: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1028: return(NULL);
1029: }
1030:
1.119 daniel 1031: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1032: inputStream->directory = directory;
1.91 daniel 1033: inputStream->buf = buf;
1.76 daniel 1034:
1.91 daniel 1035: inputStream->base = inputStream->buf->buffer->content;
1036: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1037: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1038: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1039: return(inputStream);
1040: }
1041:
1.77 daniel 1042: /************************************************************************
1043: * *
1.97 daniel 1044: * Commodity functions to handle parser contexts *
1045: * *
1046: ************************************************************************/
1047:
1048: /**
1049: * xmlInitParserCtxt:
1050: * @ctxt: an XML parser context
1051: *
1052: * Initialize a parser context
1053: */
1054:
1055: void
1056: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1057: {
1058: xmlSAXHandler *sax;
1059:
1.168 daniel 1060: xmlDefaultSAXHandlerInit();
1061:
1.119 daniel 1062: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1063: if (sax == NULL) {
1064: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1065: }
1066:
1067: /* Allocate the Input stack */
1.119 daniel 1068: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1069: ctxt->inputNr = 0;
1070: ctxt->inputMax = 5;
1071: ctxt->input = NULL;
1.165 daniel 1072:
1.97 daniel 1073: ctxt->version = NULL;
1074: ctxt->encoding = NULL;
1075: ctxt->standalone = -1;
1.98 daniel 1076: ctxt->hasExternalSubset = 0;
1077: ctxt->hasPErefs = 0;
1.97 daniel 1078: ctxt->html = 0;
1.98 daniel 1079: ctxt->external = 0;
1.140 daniel 1080: ctxt->instate = XML_PARSER_START;
1.97 daniel 1081: ctxt->token = 0;
1.106 daniel 1082: ctxt->directory = NULL;
1.97 daniel 1083:
1084: /* Allocate the Node stack */
1.119 daniel 1085: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1086: ctxt->nodeNr = 0;
1087: ctxt->nodeMax = 10;
1088: ctxt->node = NULL;
1089:
1.140 daniel 1090: /* Allocate the Name stack */
1091: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1092: ctxt->nameNr = 0;
1093: ctxt->nameMax = 10;
1094: ctxt->name = NULL;
1095:
1.160 daniel 1096: if (sax == NULL) {
1097: ctxt->sax = &xmlDefaultSAXHandler;
1098: } else {
1.97 daniel 1099: ctxt->sax = sax;
1100: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1101: }
1102: ctxt->userData = ctxt;
1103: ctxt->myDoc = NULL;
1104: ctxt->wellFormed = 1;
1.99 daniel 1105: ctxt->valid = 1;
1.100 daniel 1106: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1107: ctxt->vctxt.userData = ctxt;
1.149 daniel 1108: if (ctxt->validate) {
1109: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1110: if (xmlGetWarningsDefaultValue == 0)
1111: ctxt->vctxt.warning = NULL;
1112: else
1113: ctxt->vctxt.warning = xmlParserValidityWarning;
1.149 daniel 1114: } else {
1115: ctxt->vctxt.error = NULL;
1116: ctxt->vctxt.warning = NULL;
1117: }
1.97 daniel 1118: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1119: ctxt->record_info = 0;
1.135 daniel 1120: ctxt->nbChars = 0;
1.140 daniel 1121: ctxt->checkIndex = 0;
1122: ctxt->errNo = XML_ERR_OK;
1.97 daniel 1123: xmlInitNodeInfoSeq(&ctxt->node_seq);
1124: }
1125:
1126: /**
1127: * xmlFreeParserCtxt:
1128: * @ctxt: an XML parser context
1129: *
1130: * Free all the memory used by a parser context. However the parsed
1131: * document in ctxt->myDoc is not freed.
1132: */
1133:
1134: void
1135: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1136: {
1137: xmlParserInputPtr input;
1.140 daniel 1138: xmlChar *oldname;
1.97 daniel 1139:
1140: if (ctxt == NULL) return;
1141:
1142: while ((input = inputPop(ctxt)) != NULL) {
1143: xmlFreeInputStream(input);
1144: }
1.140 daniel 1145: while ((oldname = namePop(ctxt)) != NULL) {
1146: xmlFree(oldname);
1147: }
1148: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1149: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1150: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1151: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1152: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1153: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1154: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1155: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.97 daniel 1156: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1157: xmlFree(ctxt->sax);
1158: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1159: xmlFree(ctxt);
1.97 daniel 1160: }
1161:
1162: /**
1163: * xmlNewParserCtxt:
1164: *
1165: * Allocate and initialize a new parser context.
1166: *
1167: * Returns the xmlParserCtxtPtr or NULL
1168: */
1169:
1170: xmlParserCtxtPtr
1171: xmlNewParserCtxt()
1172: {
1173: xmlParserCtxtPtr ctxt;
1174:
1.119 daniel 1175: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1176: if (ctxt == NULL) {
1177: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1178: perror("malloc");
1179: return(NULL);
1180: }
1.165 daniel 1181: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1182: xmlInitParserCtxt(ctxt);
1183: return(ctxt);
1184: }
1185:
1186: /**
1187: * xmlClearParserCtxt:
1188: * @ctxt: an XML parser context
1189: *
1190: * Clear (release owned resources) and reinitialize a parser context
1191: */
1192:
1193: void
1194: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1195: {
1196: xmlClearNodeInfoSeq(&ctxt->node_seq);
1197: xmlInitParserCtxt(ctxt);
1198: }
1199:
1200: /************************************************************************
1201: * *
1.77 daniel 1202: * Commodity functions to handle entities *
1203: * *
1204: ************************************************************************/
1205:
1.97 daniel 1206:
1207: /**
1208: * xmlParseCharRef:
1209: * @ctxt: an XML parser context
1210: *
1211: * parse Reference declarations
1212: *
1213: * [66] CharRef ::= '&#' [0-9]+ ';' |
1214: * '&#x' [0-9a-fA-F]+ ';'
1215: *
1.98 daniel 1216: * [ WFC: Legal Character ]
1217: * Characters referred to using character references must match the
1218: * production for Char.
1219: *
1.135 daniel 1220: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1221: */
1.97 daniel 1222: int
1223: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1224: int val = 0;
1225:
1.111 daniel 1226: if (ctxt->token != 0) {
1227: val = ctxt->token;
1228: ctxt->token = 0;
1229: return(val);
1230: }
1.152 daniel 1231: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1232: (NXT(2) == 'x')) {
1233: SKIP(3);
1.152 daniel 1234: while (RAW != ';') {
1235: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1236: val = val * 16 + (CUR - '0');
1.152 daniel 1237: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1238: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1239: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1240: val = val * 16 + (CUR - 'A') + 10;
1241: else {
1.123 daniel 1242: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1243: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1244: ctxt->sax->error(ctxt->userData,
1245: "xmlParseCharRef: invalid hexadecimal value\n");
1246: ctxt->wellFormed = 0;
1247: val = 0;
1248: break;
1249: }
1250: NEXT;
1251: }
1.164 daniel 1252: if (RAW == ';') {
1253: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1254: ctxt->nbChars ++;
1255: ctxt->input->cur++;
1256: }
1.152 daniel 1257: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1258: SKIP(2);
1.152 daniel 1259: while (RAW != ';') {
1260: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1261: val = val * 10 + (CUR - '0');
1262: else {
1.123 daniel 1263: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1264: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1265: ctxt->sax->error(ctxt->userData,
1266: "xmlParseCharRef: invalid decimal value\n");
1267: ctxt->wellFormed = 0;
1268: val = 0;
1269: break;
1270: }
1271: NEXT;
1272: }
1.164 daniel 1273: if (RAW == ';') {
1274: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1275: ctxt->nbChars ++;
1276: ctxt->input->cur++;
1277: }
1.97 daniel 1278: } else {
1.123 daniel 1279: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1281: ctxt->sax->error(ctxt->userData,
1282: "xmlParseCharRef: invalid value\n");
1.97 daniel 1283: ctxt->wellFormed = 0;
1284: }
1.98 daniel 1285:
1.97 daniel 1286: /*
1.98 daniel 1287: * [ WFC: Legal Character ]
1288: * Characters referred to using character references must match the
1289: * production for Char.
1.97 daniel 1290: */
1291: if (IS_CHAR(val)) {
1292: return(val);
1293: } else {
1.123 daniel 1294: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1296: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1297: val);
1298: ctxt->wellFormed = 0;
1299: }
1300: return(0);
1.77 daniel 1301: }
1302:
1.96 daniel 1303: /**
1.135 daniel 1304: * xmlParseStringCharRef:
1305: * @ctxt: an XML parser context
1306: * @str: a pointer to an index in the string
1307: *
1308: * parse Reference declarations, variant parsing from a string rather
1309: * than an an input flow.
1310: *
1311: * [66] CharRef ::= '&#' [0-9]+ ';' |
1312: * '&#x' [0-9a-fA-F]+ ';'
1313: *
1314: * [ WFC: Legal Character ]
1315: * Characters referred to using character references must match the
1316: * production for Char.
1317: *
1318: * Returns the value parsed (as an int), 0 in case of error, str will be
1319: * updated to the current value of the index
1320: */
1321: int
1322: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1323: const xmlChar *ptr;
1324: xmlChar cur;
1325: int val = 0;
1326:
1327: if ((str == NULL) || (*str == NULL)) return(0);
1328: ptr = *str;
1329: cur = *ptr;
1.137 daniel 1330: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1331: ptr += 3;
1332: cur = *ptr;
1333: while (cur != ';') {
1334: if ((cur >= '0') && (cur <= '9'))
1335: val = val * 16 + (cur - '0');
1336: else if ((cur >= 'a') && (cur <= 'f'))
1337: val = val * 16 + (cur - 'a') + 10;
1338: else if ((cur >= 'A') && (cur <= 'F'))
1339: val = val * 16 + (cur - 'A') + 10;
1340: else {
1341: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1343: ctxt->sax->error(ctxt->userData,
1344: "xmlParseCharRef: invalid hexadecimal value\n");
1345: ctxt->wellFormed = 0;
1346: val = 0;
1347: break;
1348: }
1349: ptr++;
1350: cur = *ptr;
1351: }
1352: if (cur == ';')
1353: ptr++;
1.145 daniel 1354: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1355: ptr += 2;
1356: cur = *ptr;
1357: while (cur != ';') {
1358: if ((cur >= '0') && (cur <= '9'))
1359: val = val * 10 + (cur - '0');
1360: else {
1361: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1363: ctxt->sax->error(ctxt->userData,
1364: "xmlParseCharRef: invalid decimal value\n");
1365: ctxt->wellFormed = 0;
1366: val = 0;
1367: break;
1368: }
1369: ptr++;
1370: cur = *ptr;
1371: }
1372: if (cur == ';')
1373: ptr++;
1374: } else {
1375: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1376: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1377: ctxt->sax->error(ctxt->userData,
1378: "xmlParseCharRef: invalid value\n");
1379: ctxt->wellFormed = 0;
1380: return(0);
1381: }
1382: *str = ptr;
1383:
1384: /*
1385: * [ WFC: Legal Character ]
1386: * Characters referred to using character references must match the
1387: * production for Char.
1388: */
1389: if (IS_CHAR(val)) {
1390: return(val);
1391: } else {
1392: ctxt->errNo = XML_ERR_INVALID_CHAR;
1393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1394: ctxt->sax->error(ctxt->userData,
1395: "CharRef: invalid xmlChar value %d\n", val);
1396: ctxt->wellFormed = 0;
1397: }
1398: return(0);
1399: }
1400:
1401: /**
1.96 daniel 1402: * xmlParserHandleReference:
1403: * @ctxt: the parser context
1404: *
1.97 daniel 1405: * [67] Reference ::= EntityRef | CharRef
1406: *
1.96 daniel 1407: * [68] EntityRef ::= '&' Name ';'
1408: *
1.98 daniel 1409: * [ WFC: Entity Declared ]
1410: * the Name given in the entity reference must match that in an entity
1411: * declaration, except that well-formed documents need not declare any
1412: * of the following entities: amp, lt, gt, apos, quot.
1413: *
1414: * [ WFC: Parsed Entity ]
1415: * An entity reference must not contain the name of an unparsed entity
1416: *
1.97 daniel 1417: * [66] CharRef ::= '&#' [0-9]+ ';' |
1418: * '&#x' [0-9a-fA-F]+ ';'
1419: *
1.96 daniel 1420: * A PEReference may have been detectect in the current input stream
1421: * the handling is done accordingly to
1422: * http://www.w3.org/TR/REC-xml#entproc
1423: */
1424: void
1425: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1426: xmlParserInputPtr input;
1.123 daniel 1427: xmlChar *name;
1.97 daniel 1428: xmlEntityPtr ent = NULL;
1429:
1.126 daniel 1430: if (ctxt->token != 0) {
1431: return;
1432: }
1.152 daniel 1433: if (RAW != '&') return;
1.97 daniel 1434: GROW;
1.152 daniel 1435: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1436: switch(ctxt->instate) {
1.140 daniel 1437: case XML_PARSER_ENTITY_DECL:
1438: case XML_PARSER_PI:
1.109 daniel 1439: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1440: case XML_PARSER_COMMENT:
1.168 daniel 1441: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1442: /* we just ignore it there */
1443: return;
1444: case XML_PARSER_START_TAG:
1.109 daniel 1445: return;
1.140 daniel 1446: case XML_PARSER_END_TAG:
1.97 daniel 1447: return;
1448: case XML_PARSER_EOF:
1.123 daniel 1449: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1450: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1451: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1452: ctxt->wellFormed = 0;
1453: return;
1454: case XML_PARSER_PROLOG:
1.140 daniel 1455: case XML_PARSER_START:
1456: case XML_PARSER_MISC:
1.123 daniel 1457: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1459: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1460: ctxt->wellFormed = 0;
1461: return;
1462: case XML_PARSER_EPILOG:
1.123 daniel 1463: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1465: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1466: ctxt->wellFormed = 0;
1467: return;
1468: case XML_PARSER_DTD:
1.123 daniel 1469: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1470: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1471: ctxt->sax->error(ctxt->userData,
1472: "CharRef are forbiden in DTDs!\n");
1473: ctxt->wellFormed = 0;
1474: return;
1475: case XML_PARSER_ENTITY_VALUE:
1476: /*
1477: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1478: * substitution here since we need the literal
1.97 daniel 1479: * entity value to be able to save the internal
1480: * subset of the document.
1481: * This will be handled by xmlDecodeEntities
1482: */
1483: return;
1484: case XML_PARSER_CONTENT:
1485: case XML_PARSER_ATTRIBUTE_VALUE:
1486: ctxt->token = xmlParseCharRef(ctxt);
1487: return;
1488: }
1489: return;
1490: }
1491:
1492: switch(ctxt->instate) {
1.109 daniel 1493: case XML_PARSER_CDATA_SECTION:
1494: return;
1.140 daniel 1495: case XML_PARSER_PI:
1.97 daniel 1496: case XML_PARSER_COMMENT:
1.168 daniel 1497: case XML_PARSER_SYSTEM_LITERAL:
1498: case XML_PARSER_CONTENT:
1.97 daniel 1499: return;
1.140 daniel 1500: case XML_PARSER_START_TAG:
1501: return;
1502: case XML_PARSER_END_TAG:
1503: return;
1.97 daniel 1504: case XML_PARSER_EOF:
1.123 daniel 1505: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1506: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1507: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1508: ctxt->wellFormed = 0;
1509: return;
1510: case XML_PARSER_PROLOG:
1.140 daniel 1511: case XML_PARSER_START:
1512: case XML_PARSER_MISC:
1.123 daniel 1513: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1515: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1516: ctxt->wellFormed = 0;
1517: return;
1518: case XML_PARSER_EPILOG:
1.123 daniel 1519: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1521: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1522: ctxt->wellFormed = 0;
1523: return;
1524: case XML_PARSER_ENTITY_VALUE:
1525: /*
1526: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1527: * substitution here since we need the literal
1.97 daniel 1528: * entity value to be able to save the internal
1529: * subset of the document.
1530: * This will be handled by xmlDecodeEntities
1531: */
1532: return;
1533: case XML_PARSER_ATTRIBUTE_VALUE:
1534: /*
1535: * NOTE: in the case of attributes values, we don't do the
1536: * substitution here unless we are in a mode where
1537: * the parser is explicitely asked to substitute
1538: * entities. The SAX callback is called with values
1539: * without entity substitution.
1540: * This will then be handled by xmlDecodeEntities
1541: */
1.113 daniel 1542: return;
1.97 daniel 1543: case XML_PARSER_ENTITY_DECL:
1544: /*
1545: * we just ignore it there
1546: * the substitution will be done once the entity is referenced
1547: */
1548: return;
1549: case XML_PARSER_DTD:
1.123 daniel 1550: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1552: ctxt->sax->error(ctxt->userData,
1553: "Entity references are forbiden in DTDs!\n");
1554: ctxt->wellFormed = 0;
1555: return;
1556: }
1557:
1558: NEXT;
1559: name = xmlScanName(ctxt);
1560: if (name == NULL) {
1.123 daniel 1561: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1562: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1563: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1564: ctxt->wellFormed = 0;
1565: ctxt->token = '&';
1566: return;
1567: }
1568: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1569: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1570: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1571: ctxt->sax->error(ctxt->userData,
1572: "Entity reference: ';' expected\n");
1573: ctxt->wellFormed = 0;
1574: ctxt->token = '&';
1.119 daniel 1575: xmlFree(name);
1.97 daniel 1576: return;
1577: }
1578: SKIP(xmlStrlen(name) + 1);
1579: if (ctxt->sax != NULL) {
1580: if (ctxt->sax->getEntity != NULL)
1581: ent = ctxt->sax->getEntity(ctxt->userData, name);
1582: }
1.98 daniel 1583:
1584: /*
1585: * [ WFC: Entity Declared ]
1586: * the Name given in the entity reference must match that in an entity
1587: * declaration, except that well-formed documents need not declare any
1588: * of the following entities: amp, lt, gt, apos, quot.
1589: */
1.97 daniel 1590: if (ent == NULL)
1591: ent = xmlGetPredefinedEntity(name);
1592: if (ent == NULL) {
1.123 daniel 1593: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1594: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1595: ctxt->sax->error(ctxt->userData,
1.98 daniel 1596: "Entity reference: entity %s not declared\n",
1597: name);
1.97 daniel 1598: ctxt->wellFormed = 0;
1.119 daniel 1599: xmlFree(name);
1.97 daniel 1600: return;
1601: }
1.98 daniel 1602:
1603: /*
1604: * [ WFC: Parsed Entity ]
1605: * An entity reference must not contain the name of an unparsed entity
1606: */
1.159 daniel 1607: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1608: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1609: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1610: ctxt->sax->error(ctxt->userData,
1611: "Entity reference to unparsed entity %s\n", name);
1612: ctxt->wellFormed = 0;
1613: }
1614:
1.159 daniel 1615: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1616: ctxt->token = ent->content[0];
1.119 daniel 1617: xmlFree(name);
1.97 daniel 1618: return;
1619: }
1620: input = xmlNewEntityInputStream(ctxt, ent);
1621: xmlPushInput(ctxt, input);
1.119 daniel 1622: xmlFree(name);
1.96 daniel 1623: return;
1624: }
1625:
1626: /**
1627: * xmlParserHandlePEReference:
1628: * @ctxt: the parser context
1629: *
1630: * [69] PEReference ::= '%' Name ';'
1631: *
1.98 daniel 1632: * [ WFC: No Recursion ]
1633: * TODO A parsed entity must not contain a recursive
1634: * reference to itself, either directly or indirectly.
1635: *
1636: * [ WFC: Entity Declared ]
1637: * In a document without any DTD, a document with only an internal DTD
1638: * subset which contains no parameter entity references, or a document
1639: * with "standalone='yes'", ... ... The declaration of a parameter
1640: * entity must precede any reference to it...
1641: *
1642: * [ VC: Entity Declared ]
1643: * In a document with an external subset or external parameter entities
1644: * with "standalone='no'", ... ... The declaration of a parameter entity
1645: * must precede any reference to it...
1646: *
1647: * [ WFC: In DTD ]
1648: * Parameter-entity references may only appear in the DTD.
1649: * NOTE: misleading but this is handled.
1650: *
1651: * A PEReference may have been detected in the current input stream
1.96 daniel 1652: * the handling is done accordingly to
1653: * http://www.w3.org/TR/REC-xml#entproc
1654: * i.e.
1655: * - Included in literal in entity values
1656: * - Included as Paraemeter Entity reference within DTDs
1657: */
1658: void
1659: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1660: xmlChar *name;
1.96 daniel 1661: xmlEntityPtr entity = NULL;
1662: xmlParserInputPtr input;
1663:
1.126 daniel 1664: if (ctxt->token != 0) {
1665: return;
1666: }
1.152 daniel 1667: if (RAW != '%') return;
1.96 daniel 1668: switch(ctxt->instate) {
1.109 daniel 1669: case XML_PARSER_CDATA_SECTION:
1670: return;
1.97 daniel 1671: case XML_PARSER_COMMENT:
1672: return;
1.140 daniel 1673: case XML_PARSER_START_TAG:
1674: return;
1675: case XML_PARSER_END_TAG:
1676: return;
1.96 daniel 1677: case XML_PARSER_EOF:
1.123 daniel 1678: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1679: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1680: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1681: ctxt->wellFormed = 0;
1682: return;
1683: case XML_PARSER_PROLOG:
1.140 daniel 1684: case XML_PARSER_START:
1685: case XML_PARSER_MISC:
1.123 daniel 1686: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1687: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1688: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1689: ctxt->wellFormed = 0;
1690: return;
1.97 daniel 1691: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1692: case XML_PARSER_CONTENT:
1693: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1694: case XML_PARSER_PI:
1.168 daniel 1695: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1696: /* we just ignore it there */
1697: return;
1698: case XML_PARSER_EPILOG:
1.123 daniel 1699: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1701: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1702: ctxt->wellFormed = 0;
1703: return;
1.97 daniel 1704: case XML_PARSER_ENTITY_VALUE:
1705: /*
1706: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1707: * substitution here since we need the literal
1.97 daniel 1708: * entity value to be able to save the internal
1709: * subset of the document.
1710: * This will be handled by xmlDecodeEntities
1711: */
1712: return;
1.96 daniel 1713: case XML_PARSER_DTD:
1.98 daniel 1714: /*
1715: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1716: * In the internal DTD subset, parameter-entity references
1717: * can occur only where markup declarations can occur, not
1718: * within markup declarations.
1719: * In that case this is handled in xmlParseMarkupDecl
1720: */
1721: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1722: return;
1.96 daniel 1723: }
1724:
1725: NEXT;
1726: name = xmlParseName(ctxt);
1727: if (name == NULL) {
1.123 daniel 1728: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1730: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1731: ctxt->wellFormed = 0;
1732: } else {
1.152 daniel 1733: if (RAW == ';') {
1.96 daniel 1734: NEXT;
1.98 daniel 1735: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1736: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1737: if (entity == NULL) {
1.98 daniel 1738:
1739: /*
1740: * [ WFC: Entity Declared ]
1741: * In a document without any DTD, a document with only an
1742: * internal DTD subset which contains no parameter entity
1743: * references, or a document with "standalone='yes'", ...
1744: * ... The declaration of a parameter entity must precede
1745: * any reference to it...
1746: */
1747: if ((ctxt->standalone == 1) ||
1748: ((ctxt->hasExternalSubset == 0) &&
1749: (ctxt->hasPErefs == 0))) {
1750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1751: ctxt->sax->error(ctxt->userData,
1752: "PEReference: %%%s; not found\n", name);
1753: ctxt->wellFormed = 0;
1754: } else {
1755: /*
1756: * [ VC: Entity Declared ]
1757: * In a document with an external subset or external
1758: * parameter entities with "standalone='no'", ...
1759: * ... The declaration of a parameter entity must precede
1760: * any reference to it...
1761: */
1762: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1763: ctxt->sax->warning(ctxt->userData,
1764: "PEReference: %%%s; not found\n", name);
1765: ctxt->valid = 0;
1766: }
1.96 daniel 1767: } else {
1.159 daniel 1768: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1769: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1770: /*
1.156 daniel 1771: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1772: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1773: */
1774: input = xmlNewEntityInputStream(ctxt, entity);
1775: xmlPushInput(ctxt, input);
1.164 daniel 1776: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1777: (RAW == '<') && (NXT(1) == '?') &&
1778: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1779: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1780: xmlParseXMLDecl(ctxt);
1781: }
1782: if (ctxt->token == 0)
1783: ctxt->token = ' ';
1.96 daniel 1784: } else {
1785: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1786: ctxt->sax->error(ctxt->userData,
1787: "xmlHandlePEReference: %s is not a parameter entity\n",
1788: name);
1789: ctxt->wellFormed = 0;
1790: }
1791: }
1792: } else {
1.123 daniel 1793: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1795: ctxt->sax->error(ctxt->userData,
1796: "xmlHandlePEReference: expecting ';'\n");
1797: ctxt->wellFormed = 0;
1798: }
1.119 daniel 1799: xmlFree(name);
1.97 daniel 1800: }
1801: }
1802:
1803: /*
1804: * Macro used to grow the current buffer.
1805: */
1806: #define growBuffer(buffer) { \
1807: buffer##_size *= 2; \
1.145 daniel 1808: buffer = (xmlChar *) \
1809: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1810: if (buffer == NULL) { \
1811: perror("realloc failed"); \
1.145 daniel 1812: return(NULL); \
1.97 daniel 1813: } \
1.96 daniel 1814: }
1.77 daniel 1815:
1816: /**
1817: * xmlDecodeEntities:
1818: * @ctxt: the parser context
1819: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1820: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1821: * @end: an end marker xmlChar, 0 if none
1822: * @end2: an end marker xmlChar, 0 if none
1823: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1824: *
1825: * [67] Reference ::= EntityRef | CharRef
1826: *
1827: * [69] PEReference ::= '%' Name ';'
1828: *
1829: * Returns A newly allocated string with the substitution done. The caller
1830: * must deallocate it !
1831: */
1.123 daniel 1832: xmlChar *
1.77 daniel 1833: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1834: xmlChar end, xmlChar end2, xmlChar end3) {
1835: xmlChar *buffer = NULL;
1.78 daniel 1836: int buffer_size = 0;
1.161 daniel 1837: int nbchars = 0;
1.78 daniel 1838:
1.123 daniel 1839: xmlChar *current = NULL;
1.77 daniel 1840: xmlEntityPtr ent;
1841: unsigned int max = (unsigned int) len;
1.161 daniel 1842: int c,l;
1.77 daniel 1843:
1844: /*
1845: * allocate a translation buffer.
1846: */
1.140 daniel 1847: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 1848: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1849: if (buffer == NULL) {
1850: perror("xmlDecodeEntities: malloc failed");
1851: return(NULL);
1852: }
1853:
1.78 daniel 1854: /*
1855: * Ok loop until we reach one of the ending char or a size limit.
1856: */
1.161 daniel 1857: c = CUR_CHAR(l);
1858: while ((nbchars < max) && (c != end) &&
1859: (c != end2) && (c != end3)) {
1.77 daniel 1860:
1.161 daniel 1861: if (c == 0) break;
1862: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 1863: int val = xmlParseCharRef(ctxt);
1.161 daniel 1864: COPY_BUF(0,buffer,nbchars,val);
1865: NEXTL(l);
1866: } else if ((c == '&') && (ctxt->token != '&') &&
1867: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 1868: ent = xmlParseEntityRef(ctxt);
1869: if ((ent != NULL) &&
1870: (ctxt->replaceEntities != 0)) {
1871: current = ent->content;
1872: while (*current != 0) {
1.161 daniel 1873: buffer[nbchars++] = *current++;
1874: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1875: growBuffer(buffer);
1.77 daniel 1876: }
1877: }
1.98 daniel 1878: } else if (ent != NULL) {
1.123 daniel 1879: const xmlChar *cur = ent->name;
1.98 daniel 1880:
1.161 daniel 1881: buffer[nbchars++] = '&';
1882: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1883: growBuffer(buffer);
1884: }
1.161 daniel 1885: while (*cur != 0) {
1886: buffer[nbchars++] = *cur++;
1887: }
1888: buffer[nbchars++] = ';';
1.77 daniel 1889: }
1.161 daniel 1890: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 1891: /*
1.77 daniel 1892: * a PEReference induce to switch the entity flow,
1893: * we break here to flush the current set of chars
1894: * parsed if any. We will be called back later.
1.97 daniel 1895: */
1.91 daniel 1896: if (nbchars != 0) break;
1.77 daniel 1897:
1898: xmlParsePEReference(ctxt);
1.79 daniel 1899:
1.97 daniel 1900: /*
1.79 daniel 1901: * Pop-up of finished entities.
1.97 daniel 1902: */
1.152 daniel 1903: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 1904: xmlPopInput(ctxt);
1905:
1.98 daniel 1906: break;
1.77 daniel 1907: } else {
1.161 daniel 1908: COPY_BUF(l,buffer,nbchars,c);
1909: NEXTL(l);
1910: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 1911: growBuffer(buffer);
1912: }
1.77 daniel 1913: }
1.161 daniel 1914: c = CUR_CHAR(l);
1.77 daniel 1915: }
1.161 daniel 1916: buffer[nbchars++] = 0;
1.77 daniel 1917: return(buffer);
1918: }
1919:
1.135 daniel 1920: /**
1921: * xmlStringDecodeEntities:
1922: * @ctxt: the parser context
1923: * @str: the input string
1924: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1925: * @end: an end marker xmlChar, 0 if none
1926: * @end2: an end marker xmlChar, 0 if none
1927: * @end3: an end marker xmlChar, 0 if none
1928: *
1929: * [67] Reference ::= EntityRef | CharRef
1930: *
1931: * [69] PEReference ::= '%' Name ';'
1932: *
1933: * Returns A newly allocated string with the substitution done. The caller
1934: * must deallocate it !
1935: */
1936: xmlChar *
1937: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1938: xmlChar end, xmlChar end2, xmlChar end3) {
1939: xmlChar *buffer = NULL;
1940: int buffer_size = 0;
1941: xmlChar *out = NULL;
1942:
1943: xmlChar *current = NULL;
1944: xmlEntityPtr ent;
1945: xmlChar cur;
1946:
1947: /*
1948: * allocate a translation buffer.
1949: */
1.140 daniel 1950: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 1951: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1952: if (buffer == NULL) {
1953: perror("xmlDecodeEntities: malloc failed");
1954: return(NULL);
1955: }
1956: out = buffer;
1957:
1958: /*
1959: * Ok loop until we reach one of the ending char or a size limit.
1960: */
1961: cur = *str;
1962: while ((cur != 0) && (cur != end) &&
1963: (cur != end2) && (cur != end3)) {
1964:
1965: if (cur == 0) break;
1966: if ((cur == '&') && (str[1] == '#')) {
1967: int val = xmlParseStringCharRef(ctxt, &str);
1968: if (val != 0)
1969: *out++ = val;
1970: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1971: ent = xmlParseStringEntityRef(ctxt, &str);
1972: if ((ent != NULL) &&
1973: (ctxt->replaceEntities != 0)) {
1974: current = ent->content;
1975: while (*current != 0) {
1976: *out++ = *current++;
1.140 daniel 1977: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1978: int index = out - buffer;
1979:
1980: growBuffer(buffer);
1981: out = &buffer[index];
1982: }
1983: }
1984: } else if (ent != NULL) {
1985: int i = xmlStrlen(ent->name);
1986: const xmlChar *cur = ent->name;
1987:
1988: *out++ = '&';
1.140 daniel 1989: if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1990: int index = out - buffer;
1991:
1992: growBuffer(buffer);
1993: out = &buffer[index];
1994: }
1995: for (;i > 0;i--)
1996: *out++ = *cur++;
1997: *out++ = ';';
1998: }
1999: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2000: ent = xmlParseStringPEReference(ctxt, &str);
2001: if (ent != NULL) {
2002: current = ent->content;
2003: while (*current != 0) {
2004: *out++ = *current++;
1.140 daniel 2005: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2006: int index = out - buffer;
2007:
2008: growBuffer(buffer);
2009: out = &buffer[index];
2010: }
2011: }
2012: }
2013: } else {
1.156 daniel 2014: /* invalid for UTF-8 , use COPY(out); !!! */
1.135 daniel 2015: *out++ = cur;
1.140 daniel 2016: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2017: int index = out - buffer;
2018:
2019: growBuffer(buffer);
2020: out = &buffer[index];
2021: }
2022: str++;
2023: }
2024: cur = *str;
2025: }
2026: *out = 0;
2027: return(buffer);
2028: }
2029:
1.1 veillard 2030:
1.28 daniel 2031: /************************************************************************
2032: * *
1.75 daniel 2033: * Commodity functions to handle encodings *
2034: * *
2035: ************************************************************************/
2036:
2037: /**
2038: * xmlSwitchEncoding:
2039: * @ctxt: the parser context
1.124 daniel 2040: * @enc: the encoding value (number)
1.75 daniel 2041: *
2042: * change the input functions when discovering the character encoding
2043: * of a given entity.
2044: */
2045: void
2046: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2047: {
1.156 daniel 2048: xmlCharEncodingHandlerPtr handler;
2049:
2050: handler = xmlGetCharEncodingHandler(enc);
2051: if (handler != NULL) {
2052: if (ctxt->input != NULL) {
2053: if (ctxt->input->buf != NULL) {
2054: if (ctxt->input->buf->encoder != NULL) {
2055: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2056: ctxt->sax->error(ctxt->userData,
2057: "xmlSwitchEncoding : encoder already regitered\n");
2058: return;
2059: }
2060: ctxt->input->buf->encoder = handler;
2061:
2062: /*
2063: * Is there already some content down the pipe to convert
2064: */
2065: if ((ctxt->input->buf->buffer != NULL) &&
2066: (ctxt->input->buf->buffer->use > 0)) {
2067: xmlChar *buf;
2068: int res, len, size;
2069: int processed;
2070:
2071: /*
2072: * Specific handling of the Byte Order Mark for
2073: * UTF-16
2074: */
2075: if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
2076: (ctxt->input->cur[0] == 0xFF) &&
2077: (ctxt->input->cur[1] == 0xFE)) {
2078: SKIP(2);
2079: }
2080: if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
2081: (ctxt->input->cur[0] == 0xFE) &&
2082: (ctxt->input->cur[1] == 0xFF)) {
2083: SKIP(2);
2084: }
2085:
2086: /*
2087: * convert the non processed part
2088: */
2089: processed = ctxt->input->cur - ctxt->input->base;
2090: len = ctxt->input->buf->buffer->use - processed;
2091:
2092: if (len <= 0) {
2093: return;
2094: }
2095: size = ctxt->input->buf->buffer->use * 4;
2096: if (size < 4000)
2097: size = 4000;
1.167 daniel 2098: retry_larger:
1.160 daniel 2099: buf = (xmlChar *) xmlMalloc(size + 1);
1.156 daniel 2100: if (buf == NULL) {
2101: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2102: ctxt->sax->error(ctxt->userData,
2103: "xmlSwitchEncoding : out of memory\n");
2104: return;
2105: }
1.160 daniel 2106: /* TODO !!! Handling of buf too small */
1.156 daniel 2107: res = handler->input(buf, size, ctxt->input->cur, &len);
1.167 daniel 2108: if (res == -1) {
2109: size *= 2;
2110: xmlFree(buf);
2111: goto retry_larger;
2112: }
1.156 daniel 2113: if ((res < 0) ||
2114: (len != ctxt->input->buf->buffer->use - processed)) {
2115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2116: ctxt->sax->error(ctxt->userData,
2117: "xmlSwitchEncoding : conversion failed\n");
2118: xmlFree(buf);
2119: return;
2120: }
1.167 daniel 2121:
1.156 daniel 2122: /*
2123: * Conversion succeeded, get rid of the old buffer
2124: */
2125: xmlFree(ctxt->input->buf->buffer->content);
2126: ctxt->input->buf->buffer->content = buf;
2127: ctxt->input->base = buf;
2128: ctxt->input->cur = buf;
2129: ctxt->input->buf->buffer->size = size;
2130: ctxt->input->buf->buffer->use = res;
1.160 daniel 2131: buf[res] = 0;
1.156 daniel 2132: }
2133: return;
2134: } else {
2135: if (ctxt->input->length == 0) {
2136: /*
2137: * When parsing a static memory array one must know the
2138: * size to be able to convert the buffer.
2139: */
2140: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2141: ctxt->sax->error(ctxt->userData,
2142: "xmlSwitchEncoding : no input\n");
2143: return;
2144: } else {
2145: xmlChar *buf;
2146: int res, len;
2147: int processed = ctxt->input->cur - ctxt->input->base;
2148:
2149: /*
2150: * convert the non processed part
2151: */
2152: len = ctxt->input->length - processed;
2153: if (len <= 0) {
2154: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2155: ctxt->sax->error(ctxt->userData,
2156: "xmlSwitchEncoding : input fully consumed?\n");
2157: return;
2158: }
2159: buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
2160: if (buf == NULL) {
2161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2162: ctxt->sax->error(ctxt->userData,
2163: "xmlSwitchEncoding : out of memory\n");
2164: return;
2165: }
2166: res = handler->input(buf, ctxt->input->length * 4,
2167: ctxt->input->cur, &len);
2168: if ((res < 0) ||
2169: (len != ctxt->input->length - processed)) {
2170: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2171: ctxt->sax->error(ctxt->userData,
2172: "xmlSwitchEncoding : conversion failed\n");
2173: xmlFree(buf);
2174: return;
2175: }
2176: /*
2177: * Conversion succeeded, get rid of the old buffer
2178: */
2179: if ((ctxt->input->free != NULL) &&
2180: (ctxt->input->base != NULL))
2181: ctxt->input->free((xmlChar *) ctxt->input->base);
2182: ctxt->input->base = ctxt->input->cur = buf;
2183: ctxt->input->length = res;
2184: }
2185: }
2186: } else {
2187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2188: ctxt->sax->error(ctxt->userData,
2189: "xmlSwitchEncoding : no input\n");
2190: }
2191: }
2192:
1.75 daniel 2193: switch (enc) {
2194: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 2195: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 2196: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2197: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2198: ctxt->wellFormed = 0;
2199: break;
2200: case XML_CHAR_ENCODING_NONE:
2201: /* let's assume it's UTF-8 without the XML decl */
2202: return;
2203: case XML_CHAR_ENCODING_UTF8:
2204: /* default encoding, no conversion should be needed */
2205: return;
2206: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 2207: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2208: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2209: ctxt->sax->error(ctxt->userData,
2210: "char encoding UTF16 little endian not supported\n");
2211: break;
2212: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 2213: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2214: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2215: ctxt->sax->error(ctxt->userData,
2216: "char encoding UTF16 big endian not supported\n");
2217: break;
2218: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 2219: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2220: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2221: ctxt->sax->error(ctxt->userData,
2222: "char encoding USC4 little endian not supported\n");
2223: break;
2224: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 2225: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2226: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2227: ctxt->sax->error(ctxt->userData,
2228: "char encoding USC4 big endian not supported\n");
2229: break;
2230: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 2231: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2232: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2233: ctxt->sax->error(ctxt->userData,
2234: "char encoding EBCDIC not supported\n");
2235: break;
2236: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 2237: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2239: ctxt->sax->error(ctxt->userData,
2240: "char encoding UCS4 2143 not supported\n");
2241: break;
2242: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 2243: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2244: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2245: ctxt->sax->error(ctxt->userData,
2246: "char encoding UCS4 3412 not supported\n");
2247: break;
2248: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 2249: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2250: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2251: ctxt->sax->error(ctxt->userData,
2252: "char encoding UCS2 not supported\n");
2253: break;
2254: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 2255: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2256: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2257: ctxt->sax->error(ctxt->userData,
2258: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2259: break;
2260: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 2261: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2262: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2263: ctxt->sax->error(ctxt->userData,
2264: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2265: break;
2266: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 2267: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2268: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2269: ctxt->sax->error(ctxt->userData,
2270: "char encoding ISO_8859_3 not supported\n");
2271: break;
2272: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 2273: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2274: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2275: ctxt->sax->error(ctxt->userData,
2276: "char encoding ISO_8859_4 not supported\n");
2277: break;
2278: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 2279: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2281: ctxt->sax->error(ctxt->userData,
2282: "char encoding ISO_8859_5 not supported\n");
2283: break;
2284: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 2285: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2287: ctxt->sax->error(ctxt->userData,
2288: "char encoding ISO_8859_6 not supported\n");
2289: break;
2290: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 2291: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2293: ctxt->sax->error(ctxt->userData,
2294: "char encoding ISO_8859_7 not supported\n");
2295: break;
2296: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 2297: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2298: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2299: ctxt->sax->error(ctxt->userData,
2300: "char encoding ISO_8859_8 not supported\n");
2301: break;
2302: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 2303: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2304: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2305: ctxt->sax->error(ctxt->userData,
2306: "char encoding ISO_8859_9 not supported\n");
2307: break;
2308: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 2309: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2310: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2311: ctxt->sax->error(ctxt->userData,
2312: "char encoding ISO-2022-JPnot supported\n");
2313: break;
2314: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 2315: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2316: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2317: ctxt->sax->error(ctxt->userData,
2318: "char encoding Shift_JISnot supported\n");
2319: break;
2320: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 2321: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2322: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2323: ctxt->sax->error(ctxt->userData,
2324: "char encoding EUC-JPnot supported\n");
2325: break;
2326: }
2327: }
2328:
2329: /************************************************************************
2330: * *
1.123 daniel 2331: * Commodity functions to handle xmlChars *
1.28 daniel 2332: * *
2333: ************************************************************************/
2334:
1.50 daniel 2335: /**
2336: * xmlStrndup:
1.123 daniel 2337: * @cur: the input xmlChar *
1.50 daniel 2338: * @len: the len of @cur
2339: *
1.123 daniel 2340: * a strndup for array of xmlChar's
1.68 daniel 2341: *
1.123 daniel 2342: * Returns a new xmlChar * or NULL
1.1 veillard 2343: */
1.123 daniel 2344: xmlChar *
2345: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2346: xmlChar *ret;
2347:
2348: if ((cur == NULL) || (len < 0)) return(NULL);
2349: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2350: if (ret == NULL) {
1.86 daniel 2351: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2352: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2353: return(NULL);
2354: }
1.123 daniel 2355: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2356: ret[len] = 0;
2357: return(ret);
2358: }
2359:
1.50 daniel 2360: /**
2361: * xmlStrdup:
1.123 daniel 2362: * @cur: the input xmlChar *
1.50 daniel 2363: *
1.152 daniel 2364: * a strdup for array of xmlChar's. Since they are supposed to be
2365: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2366: * a termination mark of '0'.
1.68 daniel 2367: *
1.123 daniel 2368: * Returns a new xmlChar * or NULL
1.1 veillard 2369: */
1.123 daniel 2370: xmlChar *
2371: xmlStrdup(const xmlChar *cur) {
2372: const xmlChar *p = cur;
1.1 veillard 2373:
1.135 daniel 2374: if (cur == NULL) return(NULL);
1.152 daniel 2375: while (*p != 0) p++;
1.1 veillard 2376: return(xmlStrndup(cur, p - cur));
2377: }
2378:
1.50 daniel 2379: /**
2380: * xmlCharStrndup:
2381: * @cur: the input char *
2382: * @len: the len of @cur
2383: *
1.123 daniel 2384: * a strndup for char's to xmlChar's
1.68 daniel 2385: *
1.123 daniel 2386: * Returns a new xmlChar * or NULL
1.45 daniel 2387: */
2388:
1.123 daniel 2389: xmlChar *
1.55 daniel 2390: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2391: int i;
1.135 daniel 2392: xmlChar *ret;
2393:
2394: if ((cur == NULL) || (len < 0)) return(NULL);
2395: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2396: if (ret == NULL) {
1.86 daniel 2397: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2398: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2399: return(NULL);
2400: }
2401: for (i = 0;i < len;i++)
1.123 daniel 2402: ret[i] = (xmlChar) cur[i];
1.45 daniel 2403: ret[len] = 0;
2404: return(ret);
2405: }
2406:
1.50 daniel 2407: /**
2408: * xmlCharStrdup:
2409: * @cur: the input char *
2410: * @len: the len of @cur
2411: *
1.123 daniel 2412: * a strdup for char's to xmlChar's
1.68 daniel 2413: *
1.123 daniel 2414: * Returns a new xmlChar * or NULL
1.45 daniel 2415: */
2416:
1.123 daniel 2417: xmlChar *
1.55 daniel 2418: xmlCharStrdup(const char *cur) {
1.45 daniel 2419: const char *p = cur;
2420:
1.135 daniel 2421: if (cur == NULL) return(NULL);
1.45 daniel 2422: while (*p != '\0') p++;
2423: return(xmlCharStrndup(cur, p - cur));
2424: }
2425:
1.50 daniel 2426: /**
2427: * xmlStrcmp:
1.123 daniel 2428: * @str1: the first xmlChar *
2429: * @str2: the second xmlChar *
1.50 daniel 2430: *
1.123 daniel 2431: * a strcmp for xmlChar's
1.68 daniel 2432: *
2433: * Returns the integer result of the comparison
1.14 veillard 2434: */
2435:
1.55 daniel 2436: int
1.123 daniel 2437: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2438: register int tmp;
2439:
1.135 daniel 2440: if ((str1 == NULL) && (str2 == NULL)) return(0);
2441: if (str1 == NULL) return(-1);
2442: if (str2 == NULL) return(1);
1.14 veillard 2443: do {
2444: tmp = *str1++ - *str2++;
2445: if (tmp != 0) return(tmp);
2446: } while ((*str1 != 0) && (*str2 != 0));
2447: return (*str1 - *str2);
2448: }
2449:
1.50 daniel 2450: /**
2451: * xmlStrncmp:
1.123 daniel 2452: * @str1: the first xmlChar *
2453: * @str2: the second xmlChar *
1.50 daniel 2454: * @len: the max comparison length
2455: *
1.123 daniel 2456: * a strncmp for xmlChar's
1.68 daniel 2457: *
2458: * Returns the integer result of the comparison
1.14 veillard 2459: */
2460:
1.55 daniel 2461: int
1.123 daniel 2462: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2463: register int tmp;
2464:
2465: if (len <= 0) return(0);
1.135 daniel 2466: if ((str1 == NULL) && (str2 == NULL)) return(0);
2467: if (str1 == NULL) return(-1);
2468: if (str2 == NULL) return(1);
1.14 veillard 2469: do {
2470: tmp = *str1++ - *str2++;
2471: if (tmp != 0) return(tmp);
2472: len--;
2473: if (len <= 0) return(0);
2474: } while ((*str1 != 0) && (*str2 != 0));
2475: return (*str1 - *str2);
2476: }
2477:
1.50 daniel 2478: /**
2479: * xmlStrchr:
1.123 daniel 2480: * @str: the xmlChar * array
2481: * @val: the xmlChar to search
1.50 daniel 2482: *
1.123 daniel 2483: * a strchr for xmlChar's
1.68 daniel 2484: *
1.123 daniel 2485: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2486: */
2487:
1.123 daniel 2488: const xmlChar *
2489: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2490: if (str == NULL) return(NULL);
1.14 veillard 2491: while (*str != 0) {
1.123 daniel 2492: if (*str == val) return((xmlChar *) str);
1.14 veillard 2493: str++;
2494: }
2495: return(NULL);
1.89 daniel 2496: }
2497:
2498: /**
2499: * xmlStrstr:
1.123 daniel 2500: * @str: the xmlChar * array (haystack)
2501: * @val: the xmlChar to search (needle)
1.89 daniel 2502: *
1.123 daniel 2503: * a strstr for xmlChar's
1.89 daniel 2504: *
1.123 daniel 2505: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2506: */
2507:
1.123 daniel 2508: const xmlChar *
2509: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2510: int n;
2511:
2512: if (str == NULL) return(NULL);
2513: if (val == NULL) return(NULL);
2514: n = xmlStrlen(val);
2515:
2516: if (n == 0) return(str);
2517: while (*str != 0) {
2518: if (*str == *val) {
1.123 daniel 2519: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2520: }
2521: str++;
2522: }
2523: return(NULL);
2524: }
2525:
2526: /**
2527: * xmlStrsub:
1.123 daniel 2528: * @str: the xmlChar * array (haystack)
1.89 daniel 2529: * @start: the index of the first char (zero based)
2530: * @len: the length of the substring
2531: *
2532: * Extract a substring of a given string
2533: *
1.123 daniel 2534: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2535: */
2536:
1.123 daniel 2537: xmlChar *
2538: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2539: int i;
2540:
2541: if (str == NULL) return(NULL);
2542: if (start < 0) return(NULL);
1.90 daniel 2543: if (len < 0) return(NULL);
1.89 daniel 2544:
2545: for (i = 0;i < start;i++) {
2546: if (*str == 0) return(NULL);
2547: str++;
2548: }
2549: if (*str == 0) return(NULL);
2550: return(xmlStrndup(str, len));
1.14 veillard 2551: }
1.28 daniel 2552:
1.50 daniel 2553: /**
2554: * xmlStrlen:
1.123 daniel 2555: * @str: the xmlChar * array
1.50 daniel 2556: *
1.127 daniel 2557: * length of a xmlChar's string
1.68 daniel 2558: *
1.123 daniel 2559: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2560: */
2561:
1.55 daniel 2562: int
1.123 daniel 2563: xmlStrlen(const xmlChar *str) {
1.45 daniel 2564: int len = 0;
2565:
2566: if (str == NULL) return(0);
2567: while (*str != 0) {
2568: str++;
2569: len++;
2570: }
2571: return(len);
2572: }
2573:
1.50 daniel 2574: /**
2575: * xmlStrncat:
1.123 daniel 2576: * @cur: the original xmlChar * array
2577: * @add: the xmlChar * array added
1.50 daniel 2578: * @len: the length of @add
2579: *
1.123 daniel 2580: * a strncat for array of xmlChar's
1.68 daniel 2581: *
1.123 daniel 2582: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2583: */
2584:
1.123 daniel 2585: xmlChar *
2586: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2587: int size;
1.123 daniel 2588: xmlChar *ret;
1.45 daniel 2589:
2590: if ((add == NULL) || (len == 0))
2591: return(cur);
2592: if (cur == NULL)
2593: return(xmlStrndup(add, len));
2594:
2595: size = xmlStrlen(cur);
1.123 daniel 2596: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2597: if (ret == NULL) {
1.86 daniel 2598: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2599: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2600: return(cur);
2601: }
1.123 daniel 2602: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2603: ret[size + len] = 0;
2604: return(ret);
2605: }
2606:
1.50 daniel 2607: /**
2608: * xmlStrcat:
1.123 daniel 2609: * @cur: the original xmlChar * array
2610: * @add: the xmlChar * array added
1.50 daniel 2611: *
1.152 daniel 2612: * a strcat for array of xmlChar's. Since they are supposed to be
2613: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2614: * a termination mark of '0'.
1.68 daniel 2615: *
1.123 daniel 2616: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2617: */
1.123 daniel 2618: xmlChar *
2619: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2620: const xmlChar *p = add;
1.45 daniel 2621:
2622: if (add == NULL) return(cur);
2623: if (cur == NULL)
2624: return(xmlStrdup(add));
2625:
1.152 daniel 2626: while (*p != 0) p++;
1.45 daniel 2627: return(xmlStrncat(cur, add, p - add));
2628: }
2629:
2630: /************************************************************************
2631: * *
2632: * Commodity functions, cleanup needed ? *
2633: * *
2634: ************************************************************************/
2635:
1.50 daniel 2636: /**
2637: * areBlanks:
2638: * @ctxt: an XML parser context
1.123 daniel 2639: * @str: a xmlChar *
1.50 daniel 2640: * @len: the size of @str
2641: *
1.45 daniel 2642: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2643: *
1.68 daniel 2644: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2645: */
2646:
1.123 daniel 2647: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2648: int i, ret;
1.45 daniel 2649: xmlNodePtr lastChild;
2650:
2651: for (i = 0;i < len;i++)
2652: if (!(IS_BLANK(str[i]))) return(0);
2653:
1.152 daniel 2654: if (RAW != '<') return(0);
1.72 daniel 2655: if (ctxt->node == NULL) return(0);
1.104 daniel 2656: if (ctxt->myDoc != NULL) {
2657: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2658: if (ret == 0) return(1);
2659: if (ret == 1) return(0);
2660: }
2661: /*
2662: * heuristic
2663: */
1.45 daniel 2664: lastChild = xmlGetLastChild(ctxt->node);
2665: if (lastChild == NULL) {
2666: if (ctxt->node->content != NULL) return(0);
2667: } else if (xmlNodeIsText(lastChild))
2668: return(0);
1.157 daniel 2669: else if ((ctxt->node->children != NULL) &&
2670: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2671: return(0);
1.45 daniel 2672: return(1);
2673: }
2674:
1.50 daniel 2675: /**
2676: * xmlHandleEntity:
2677: * @ctxt: an XML parser context
2678: * @entity: an XML entity pointer.
2679: *
2680: * Default handling of defined entities, when should we define a new input
1.45 daniel 2681: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2682: *
2683: * OBSOLETE: to be removed at some point.
1.45 daniel 2684: */
2685:
1.55 daniel 2686: void
2687: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2688: int len;
1.50 daniel 2689: xmlParserInputPtr input;
1.45 daniel 2690:
2691: if (entity->content == NULL) {
1.123 daniel 2692: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2693: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2694: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2695: entity->name);
1.59 daniel 2696: ctxt->wellFormed = 0;
1.45 daniel 2697: return;
2698: }
2699: len = xmlStrlen(entity->content);
2700: if (len <= 2) goto handle_as_char;
2701:
2702: /*
2703: * Redefine its content as an input stream.
2704: */
1.50 daniel 2705: input = xmlNewEntityInputStream(ctxt, entity);
2706: xmlPushInput(ctxt, input);
1.45 daniel 2707: return;
2708:
2709: handle_as_char:
2710: /*
2711: * Just handle the content as a set of chars.
2712: */
1.171 ! daniel 2713: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
! 2714: (ctxt->sax->characters != NULL))
1.74 daniel 2715: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2716:
2717: }
2718:
2719: /*
2720: * Forward definition for recusive behaviour.
2721: */
1.77 daniel 2722: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2723: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2724:
1.28 daniel 2725: /************************************************************************
2726: * *
2727: * Extra stuff for namespace support *
2728: * Relates to http://www.w3.org/TR/WD-xml-names *
2729: * *
2730: ************************************************************************/
2731:
1.50 daniel 2732: /**
2733: * xmlNamespaceParseNCName:
2734: * @ctxt: an XML parser context
2735: *
2736: * parse an XML namespace name.
1.28 daniel 2737: *
2738: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2739: *
2740: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2741: * CombiningChar | Extender
1.68 daniel 2742: *
2743: * Returns the namespace name or NULL
1.28 daniel 2744: */
2745:
1.123 daniel 2746: xmlChar *
1.55 daniel 2747: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 2748: xmlChar buf[XML_MAX_NAMELEN + 5];
2749: int len = 0, l;
2750: int cur = CUR_CHAR(l);
1.28 daniel 2751:
1.156 daniel 2752: /* load first the value of the char !!! */
1.152 daniel 2753: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 2754:
1.152 daniel 2755: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2756: (cur == '.') || (cur == '-') ||
2757: (cur == '_') ||
2758: (IS_COMBINING(cur)) ||
2759: (IS_EXTENDER(cur))) {
2760: COPY_BUF(l,buf,len,cur);
2761: NEXTL(l);
2762: cur = CUR_CHAR(l);
1.91 daniel 2763: if (len >= XML_MAX_NAMELEN) {
2764: fprintf(stderr,
2765: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 2766: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2767: (cur == '.') || (cur == '-') ||
2768: (cur == '_') ||
2769: (IS_COMBINING(cur)) ||
2770: (IS_EXTENDER(cur))) {
2771: NEXTL(l);
2772: cur = CUR_CHAR(l);
2773: }
1.91 daniel 2774: break;
2775: }
2776: }
2777: return(xmlStrndup(buf, len));
1.28 daniel 2778: }
2779:
1.50 daniel 2780: /**
2781: * xmlNamespaceParseQName:
2782: * @ctxt: an XML parser context
1.123 daniel 2783: * @prefix: a xmlChar **
1.50 daniel 2784: *
2785: * parse an XML qualified name
1.28 daniel 2786: *
2787: * [NS 5] QName ::= (Prefix ':')? LocalPart
2788: *
2789: * [NS 6] Prefix ::= NCName
2790: *
2791: * [NS 7] LocalPart ::= NCName
1.68 daniel 2792: *
1.127 daniel 2793: * Returns the local part, and prefix is updated
1.50 daniel 2794: * to get the Prefix if any.
1.28 daniel 2795: */
2796:
1.123 daniel 2797: xmlChar *
2798: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2799: xmlChar *ret = NULL;
1.28 daniel 2800:
2801: *prefix = NULL;
2802: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 2803: if (RAW == ':') {
1.28 daniel 2804: *prefix = ret;
1.40 daniel 2805: NEXT;
1.28 daniel 2806: ret = xmlNamespaceParseNCName(ctxt);
2807: }
2808:
2809: return(ret);
2810: }
2811:
1.50 daniel 2812: /**
1.72 daniel 2813: * xmlSplitQName:
1.162 daniel 2814: * @ctxt: an XML parser context
1.72 daniel 2815: * @name: an XML parser context
1.123 daniel 2816: * @prefix: a xmlChar **
1.72 daniel 2817: *
2818: * parse an XML qualified name string
2819: *
2820: * [NS 5] QName ::= (Prefix ':')? LocalPart
2821: *
2822: * [NS 6] Prefix ::= NCName
2823: *
2824: * [NS 7] LocalPart ::= NCName
2825: *
1.127 daniel 2826: * Returns the local part, and prefix is updated
1.72 daniel 2827: * to get the Prefix if any.
2828: */
2829:
1.123 daniel 2830: xmlChar *
1.162 daniel 2831: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2832: xmlChar buf[XML_MAX_NAMELEN + 5];
2833: int len = 0;
1.123 daniel 2834: xmlChar *ret = NULL;
2835: const xmlChar *cur = name;
1.162 daniel 2836: int c,l;
1.72 daniel 2837:
2838: *prefix = NULL;
1.113 daniel 2839:
2840: /* xml: prefix is not really a namespace */
2841: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2842: (cur[2] == 'l') && (cur[3] == ':'))
2843: return(xmlStrdup(name));
2844:
1.162 daniel 2845: /* nasty but valid */
2846: if (cur[0] == ':')
2847: return(xmlStrdup(name));
2848:
2849: c = CUR_SCHAR(cur, l);
2850: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 2851:
1.162 daniel 2852: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2853: (c == '.') || (c == '-') ||
2854: (c == '_') ||
2855: (IS_COMBINING(c)) ||
2856: (IS_EXTENDER(c))) {
2857: COPY_BUF(l,buf,len,c);
2858: cur += l;
2859: c = CUR_SCHAR(cur, l);
2860: }
1.72 daniel 2861:
1.162 daniel 2862: ret = xmlStrndup(buf, len);
1.72 daniel 2863:
1.162 daniel 2864: if (c == ':') {
2865: cur += l;
1.163 daniel 2866: c = CUR_SCHAR(cur, l);
1.162 daniel 2867: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 2868: *prefix = ret;
1.162 daniel 2869: len = 0;
1.72 daniel 2870:
1.162 daniel 2871: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2872: (c == '.') || (c == '-') ||
2873: (c == '_') ||
2874: (IS_COMBINING(c)) ||
2875: (IS_EXTENDER(c))) {
2876: COPY_BUF(l,buf,len,c);
2877: cur += l;
2878: c = CUR_SCHAR(cur, l);
2879: }
1.72 daniel 2880:
1.162 daniel 2881: ret = xmlStrndup(buf, len);
1.72 daniel 2882: }
2883:
2884: return(ret);
2885: }
2886: /**
1.50 daniel 2887: * xmlNamespaceParseNSDef:
2888: * @ctxt: an XML parser context
2889: *
2890: * parse a namespace prefix declaration
1.28 daniel 2891: *
2892: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2893: *
2894: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 2895: *
2896: * Returns the namespace name
1.28 daniel 2897: */
2898:
1.123 daniel 2899: xmlChar *
1.55 daniel 2900: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 2901: xmlChar *name = NULL;
1.28 daniel 2902:
1.152 daniel 2903: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 2904: (NXT(2) == 'l') && (NXT(3) == 'n') &&
2905: (NXT(4) == 's')) {
2906: SKIP(5);
1.152 daniel 2907: if (RAW == ':') {
1.40 daniel 2908: NEXT;
1.28 daniel 2909: name = xmlNamespaceParseNCName(ctxt);
2910: }
2911: }
1.39 daniel 2912: return(name);
1.28 daniel 2913: }
2914:
1.50 daniel 2915: /**
2916: * xmlParseQuotedString:
2917: * @ctxt: an XML parser context
2918: *
1.45 daniel 2919: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 2920: * To be removed at next drop of binary compatibility
1.68 daniel 2921: *
2922: * Returns the string parser or NULL.
1.45 daniel 2923: */
1.123 daniel 2924: xmlChar *
1.55 daniel 2925: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 2926: xmlChar *buf = NULL;
1.152 daniel 2927: int len = 0,l;
1.140 daniel 2928: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2929: int c;
1.45 daniel 2930:
1.135 daniel 2931: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2932: if (buf == NULL) {
2933: fprintf(stderr, "malloc of %d byte failed\n", size);
2934: return(NULL);
2935: }
1.152 daniel 2936: if (RAW == '"') {
1.45 daniel 2937: NEXT;
1.152 daniel 2938: c = CUR_CHAR(l);
1.135 daniel 2939: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 2940: if (len + 5 >= size) {
1.135 daniel 2941: size *= 2;
2942: buf = xmlRealloc(buf, size * sizeof(xmlChar));
2943: if (buf == NULL) {
2944: fprintf(stderr, "realloc of %d byte failed\n", size);
2945: return(NULL);
2946: }
2947: }
1.152 daniel 2948: COPY_BUF(l,buf,len,c);
2949: NEXTL(l);
2950: c = CUR_CHAR(l);
1.135 daniel 2951: }
2952: if (c != '"') {
1.123 daniel 2953: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2954: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 2955: ctxt->sax->error(ctxt->userData,
2956: "String not closed \"%.50s\"\n", buf);
1.59 daniel 2957: ctxt->wellFormed = 0;
1.55 daniel 2958: } else {
1.45 daniel 2959: NEXT;
2960: }
1.152 daniel 2961: } else if (RAW == '\''){
1.45 daniel 2962: NEXT;
1.135 daniel 2963: c = CUR;
2964: while (IS_CHAR(c) && (c != '\'')) {
2965: if (len + 1 >= size) {
2966: size *= 2;
2967: buf = xmlRealloc(buf, size * sizeof(xmlChar));
2968: if (buf == NULL) {
2969: fprintf(stderr, "realloc of %d byte failed\n", size);
2970: return(NULL);
2971: }
2972: }
2973: buf[len++] = c;
2974: NEXT;
2975: c = CUR;
2976: }
1.152 daniel 2977: if (RAW != '\'') {
1.123 daniel 2978: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 2980: ctxt->sax->error(ctxt->userData,
2981: "String not closed \"%.50s\"\n", buf);
1.59 daniel 2982: ctxt->wellFormed = 0;
1.55 daniel 2983: } else {
1.45 daniel 2984: NEXT;
2985: }
2986: }
1.135 daniel 2987: return(buf);
1.45 daniel 2988: }
2989:
1.50 daniel 2990: /**
2991: * xmlParseNamespace:
2992: * @ctxt: an XML parser context
2993: *
1.45 daniel 2994: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2995: *
2996: * This is what the older xml-name Working Draft specified, a bunch of
2997: * other stuff may still rely on it, so support is still here as
1.127 daniel 2998: * if it was declared on the root of the Tree:-(
1.110 daniel 2999: *
3000: * To be removed at next drop of binary compatibility
1.45 daniel 3001: */
3002:
1.55 daniel 3003: void
3004: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3005: xmlChar *href = NULL;
3006: xmlChar *prefix = NULL;
1.45 daniel 3007: int garbage = 0;
3008:
3009: /*
3010: * We just skipped "namespace" or "xml:namespace"
3011: */
3012: SKIP_BLANKS;
3013:
1.153 daniel 3014: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3015: /*
3016: * We can have "ns" or "prefix" attributes
3017: * Old encoding as 'href' or 'AS' attributes is still supported
3018: */
1.152 daniel 3019: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3020: garbage = 0;
3021: SKIP(2);
3022: SKIP_BLANKS;
3023:
1.152 daniel 3024: if (RAW != '=') continue;
1.45 daniel 3025: NEXT;
3026: SKIP_BLANKS;
3027:
3028: href = xmlParseQuotedString(ctxt);
3029: SKIP_BLANKS;
1.152 daniel 3030: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3031: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3032: garbage = 0;
3033: SKIP(4);
3034: SKIP_BLANKS;
3035:
1.152 daniel 3036: if (RAW != '=') continue;
1.45 daniel 3037: NEXT;
3038: SKIP_BLANKS;
3039:
3040: href = xmlParseQuotedString(ctxt);
3041: SKIP_BLANKS;
1.152 daniel 3042: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3043: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3044: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3045: garbage = 0;
3046: SKIP(6);
3047: SKIP_BLANKS;
3048:
1.152 daniel 3049: if (RAW != '=') continue;
1.45 daniel 3050: NEXT;
3051: SKIP_BLANKS;
3052:
3053: prefix = xmlParseQuotedString(ctxt);
3054: SKIP_BLANKS;
1.152 daniel 3055: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3056: garbage = 0;
3057: SKIP(2);
3058: SKIP_BLANKS;
3059:
1.152 daniel 3060: if (RAW != '=') continue;
1.45 daniel 3061: NEXT;
3062: SKIP_BLANKS;
3063:
3064: prefix = xmlParseQuotedString(ctxt);
3065: SKIP_BLANKS;
1.152 daniel 3066: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3067: garbage = 0;
1.91 daniel 3068: NEXT;
1.45 daniel 3069: } else {
3070: /*
3071: * Found garbage when parsing the namespace
3072: */
1.122 daniel 3073: if (!garbage) {
1.55 daniel 3074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3075: ctxt->sax->error(ctxt->userData,
3076: "xmlParseNamespace found garbage\n");
3077: }
1.123 daniel 3078: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3079: ctxt->wellFormed = 0;
1.45 daniel 3080: NEXT;
3081: }
3082: }
3083:
3084: MOVETO_ENDTAG(CUR_PTR);
3085: NEXT;
3086:
3087: /*
3088: * Register the DTD.
1.72 daniel 3089: if (href != NULL)
3090: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3091: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3092: */
3093:
1.119 daniel 3094: if (prefix != NULL) xmlFree(prefix);
3095: if (href != NULL) xmlFree(href);
1.45 daniel 3096: }
3097:
1.28 daniel 3098: /************************************************************************
3099: * *
3100: * The parser itself *
3101: * Relates to http://www.w3.org/TR/REC-xml *
3102: * *
3103: ************************************************************************/
1.14 veillard 3104:
1.50 daniel 3105: /**
1.97 daniel 3106: * xmlScanName:
3107: * @ctxt: an XML parser context
3108: *
3109: * Trickery: parse an XML name but without consuming the input flow
3110: * Needed for rollback cases.
3111: *
3112: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3113: * CombiningChar | Extender
3114: *
3115: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3116: *
3117: * [6] Names ::= Name (S Name)*
3118: *
3119: * Returns the Name parsed or NULL
3120: */
3121:
1.123 daniel 3122: xmlChar *
1.97 daniel 3123: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3124: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3125: int len = 0;
3126:
3127: GROW;
1.152 daniel 3128: if (!IS_LETTER(RAW) && (RAW != '_') &&
3129: (RAW != ':')) {
1.97 daniel 3130: return(NULL);
3131: }
3132:
3133: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3134: (NXT(len) == '.') || (NXT(len) == '-') ||
3135: (NXT(len) == '_') || (NXT(len) == ':') ||
3136: (IS_COMBINING(NXT(len))) ||
3137: (IS_EXTENDER(NXT(len)))) {
3138: buf[len] = NXT(len);
3139: len++;
3140: if (len >= XML_MAX_NAMELEN) {
3141: fprintf(stderr,
3142: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3143: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3144: (NXT(len) == '.') || (NXT(len) == '-') ||
3145: (NXT(len) == '_') || (NXT(len) == ':') ||
3146: (IS_COMBINING(NXT(len))) ||
3147: (IS_EXTENDER(NXT(len))))
3148: len++;
3149: break;
3150: }
3151: }
3152: return(xmlStrndup(buf, len));
3153: }
3154:
3155: /**
1.50 daniel 3156: * xmlParseName:
3157: * @ctxt: an XML parser context
3158: *
3159: * parse an XML name.
1.22 daniel 3160: *
3161: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3162: * CombiningChar | Extender
3163: *
3164: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3165: *
3166: * [6] Names ::= Name (S Name)*
1.68 daniel 3167: *
3168: * Returns the Name parsed or NULL
1.1 veillard 3169: */
3170:
1.123 daniel 3171: xmlChar *
1.55 daniel 3172: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3173: xmlChar buf[XML_MAX_NAMELEN + 5];
3174: int len = 0, l;
3175: int c;
1.1 veillard 3176:
1.91 daniel 3177: GROW;
1.160 daniel 3178: c = CUR_CHAR(l);
3179: if (!IS_LETTER(c) && (c != '_') &&
3180: (c != ':')) {
1.91 daniel 3181: return(NULL);
3182: }
1.40 daniel 3183:
1.160 daniel 3184: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3185: (c == '.') || (c == '-') ||
3186: (c == '_') || (c == ':') ||
3187: (IS_COMBINING(c)) ||
3188: (IS_EXTENDER(c))) {
3189: COPY_BUF(l,buf,len,c);
3190: NEXTL(l);
3191: c = CUR_CHAR(l);
1.91 daniel 3192: if (len >= XML_MAX_NAMELEN) {
3193: fprintf(stderr,
3194: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3195: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3196: (c == '.') || (c == '-') ||
3197: (c == '_') || (c == ':') ||
3198: (IS_COMBINING(c)) ||
3199: (IS_EXTENDER(c))) {
3200: NEXTL(l);
3201: c = CUR_CHAR(l);
1.97 daniel 3202: }
1.91 daniel 3203: break;
3204: }
3205: }
3206: return(xmlStrndup(buf, len));
1.22 daniel 3207: }
3208:
1.50 daniel 3209: /**
1.135 daniel 3210: * xmlParseStringName:
3211: * @ctxt: an XML parser context
3212: * @str: a pointer to an index in the string
3213: *
3214: * parse an XML name.
3215: *
3216: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3217: * CombiningChar | Extender
3218: *
3219: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3220: *
3221: * [6] Names ::= Name (S Name)*
3222: *
3223: * Returns the Name parsed or NULL. The str pointer
3224: * is updated to the current location in the string.
3225: */
3226:
3227: xmlChar *
3228: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3229: const xmlChar *ptr;
3230: const xmlChar *start;
3231: xmlChar cur;
3232:
3233: if ((str == NULL) || (*str == NULL)) return(NULL);
3234:
3235: start = ptr = *str;
3236: cur = *ptr;
3237: if (!IS_LETTER(cur) && (cur != '_') &&
3238: (cur != ':')) {
3239: return(NULL);
3240: }
3241:
3242: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3243: (cur == '.') || (cur == '-') ||
3244: (cur == '_') || (cur == ':') ||
3245: (IS_COMBINING(cur)) ||
3246: (IS_EXTENDER(cur))) {
3247: ptr++;
3248: cur = *ptr;
3249: }
3250: *str = ptr;
3251: return(xmlStrndup(start, ptr - start ));
3252: }
3253:
3254: /**
1.50 daniel 3255: * xmlParseNmtoken:
3256: * @ctxt: an XML parser context
3257: *
3258: * parse an XML Nmtoken.
1.22 daniel 3259: *
3260: * [7] Nmtoken ::= (NameChar)+
3261: *
3262: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3263: *
3264: * Returns the Nmtoken parsed or NULL
1.22 daniel 3265: */
3266:
1.123 daniel 3267: xmlChar *
1.55 daniel 3268: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3269: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3270: int len = 0;
1.160 daniel 3271: int c,l;
1.22 daniel 3272:
1.91 daniel 3273: GROW;
1.160 daniel 3274: c = CUR_CHAR(l);
3275: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3276: (c == '.') || (c == '-') ||
3277: (c == '_') || (c == ':') ||
3278: (IS_COMBINING(c)) ||
3279: (IS_EXTENDER(c))) {
3280: COPY_BUF(l,buf,len,c);
3281: NEXTL(l);
3282: c = CUR_CHAR(l);
1.91 daniel 3283: if (len >= XML_MAX_NAMELEN) {
3284: fprintf(stderr,
3285: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3286: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3287: (c == '.') || (c == '-') ||
3288: (c == '_') || (c == ':') ||
3289: (IS_COMBINING(c)) ||
3290: (IS_EXTENDER(c))) {
3291: NEXTL(l);
3292: c = CUR_CHAR(l);
3293: }
1.91 daniel 3294: break;
3295: }
3296: }
1.168 daniel 3297: if (len == 0)
3298: return(NULL);
1.91 daniel 3299: return(xmlStrndup(buf, len));
1.1 veillard 3300: }
3301:
1.50 daniel 3302: /**
3303: * xmlParseEntityValue:
3304: * @ctxt: an XML parser context
1.78 daniel 3305: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3306: *
3307: * parse a value for ENTITY decl.
1.24 daniel 3308: *
3309: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3310: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3311: *
1.78 daniel 3312: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3313: */
3314:
1.123 daniel 3315: xmlChar *
3316: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3317: xmlChar *buf = NULL;
3318: int len = 0;
1.140 daniel 3319: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3320: int c, l;
1.135 daniel 3321: xmlChar stop;
1.123 daniel 3322: xmlChar *ret = NULL;
1.98 daniel 3323: xmlParserInputPtr input;
1.24 daniel 3324:
1.152 daniel 3325: if (RAW == '"') stop = '"';
3326: else if (RAW == '\'') stop = '\'';
1.135 daniel 3327: else {
3328: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3329: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3330: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3331: ctxt->wellFormed = 0;
3332: return(NULL);
3333: }
3334: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3335: if (buf == NULL) {
3336: fprintf(stderr, "malloc of %d byte failed\n", size);
3337: return(NULL);
3338: }
1.94 daniel 3339:
1.135 daniel 3340: /*
3341: * The content of the entity definition is copied in a buffer.
3342: */
1.94 daniel 3343:
1.135 daniel 3344: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3345: input = ctxt->input;
3346: GROW;
3347: NEXT;
1.152 daniel 3348: c = CUR_CHAR(l);
1.135 daniel 3349: /*
3350: * NOTE: 4.4.5 Included in Literal
3351: * When a parameter entity reference appears in a literal entity
3352: * value, ... a single or double quote character in the replacement
3353: * text is always treated as a normal data character and will not
3354: * terminate the literal.
3355: * In practice it means we stop the loop only when back at parsing
3356: * the initial entity and the quote is found
3357: */
3358: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3359: if (len + 5 >= size) {
1.135 daniel 3360: size *= 2;
3361: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3362: if (buf == NULL) {
3363: fprintf(stderr, "realloc of %d byte failed\n", size);
3364: return(NULL);
1.94 daniel 3365: }
1.79 daniel 3366: }
1.152 daniel 3367: COPY_BUF(l,buf,len,c);
3368: NEXTL(l);
1.98 daniel 3369: /*
1.135 daniel 3370: * Pop-up of finished entities.
1.98 daniel 3371: */
1.152 daniel 3372: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3373: xmlPopInput(ctxt);
1.152 daniel 3374:
3375: c = CUR_CHAR(l);
1.135 daniel 3376: if (c == 0) {
1.94 daniel 3377: GROW;
1.152 daniel 3378: c = CUR_CHAR(l);
1.79 daniel 3379: }
1.135 daniel 3380: }
3381: buf[len] = 0;
3382:
3383: /*
3384: * Then PEReference entities are substituted.
3385: */
3386: if (c != stop) {
3387: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3389: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3390: ctxt->wellFormed = 0;
1.170 daniel 3391: xmlFree(buf);
1.135 daniel 3392: } else {
3393: NEXT;
3394: /*
3395: * NOTE: 4.4.7 Bypassed
3396: * When a general entity reference appears in the EntityValue in
3397: * an entity declaration, it is bypassed and left as is.
3398: * so XML_SUBSTITUTE_REF is not set.
3399: */
3400: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3401: 0, 0, 0);
3402: if (orig != NULL)
3403: *orig = buf;
3404: else
3405: xmlFree(buf);
1.24 daniel 3406: }
3407:
3408: return(ret);
3409: }
3410:
1.50 daniel 3411: /**
3412: * xmlParseAttValue:
3413: * @ctxt: an XML parser context
3414: *
3415: * parse a value for an attribute
1.78 daniel 3416: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3417: * will be handled later in xmlStringGetNodeList
1.29 daniel 3418: *
3419: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3420: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3421: *
1.129 daniel 3422: * 3.3.3 Attribute-Value Normalization:
3423: * Before the value of an attribute is passed to the application or
3424: * checked for validity, the XML processor must normalize it as follows:
3425: * - a character reference is processed by appending the referenced
3426: * character to the attribute value
3427: * - an entity reference is processed by recursively processing the
3428: * replacement text of the entity
3429: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3430: * appending #x20 to the normalized value, except that only a single
3431: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3432: * parsed entity or the literal entity value of an internal parsed entity
3433: * - other characters are processed by appending them to the normalized value
1.130 daniel 3434: * If the declared value is not CDATA, then the XML processor must further
3435: * process the normalized attribute value by discarding any leading and
3436: * trailing space (#x20) characters, and by replacing sequences of space
3437: * (#x20) characters by a single space (#x20) character.
3438: * All attributes for which no declaration has been read should be treated
3439: * by a non-validating parser as if declared CDATA.
1.129 daniel 3440: *
3441: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3442: */
3443:
1.123 daniel 3444: xmlChar *
1.55 daniel 3445: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3446: xmlChar limit = 0;
3447: xmlChar *buffer = NULL;
3448: int buffer_size = 0;
3449: xmlChar *out = NULL;
3450:
3451: xmlChar *current = NULL;
3452: xmlEntityPtr ent;
3453: xmlChar cur;
3454:
1.29 daniel 3455:
1.91 daniel 3456: SHRINK;
1.151 daniel 3457: if (NXT(0) == '"') {
1.96 daniel 3458: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3459: limit = '"';
1.40 daniel 3460: NEXT;
1.151 daniel 3461: } else if (NXT(0) == '\'') {
1.129 daniel 3462: limit = '\'';
1.96 daniel 3463: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3464: NEXT;
1.29 daniel 3465: } else {
1.123 daniel 3466: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3468: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3469: ctxt->wellFormed = 0;
1.129 daniel 3470: return(NULL);
1.29 daniel 3471: }
3472:
1.129 daniel 3473: /*
3474: * allocate a translation buffer.
3475: */
1.140 daniel 3476: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3477: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3478: if (buffer == NULL) {
3479: perror("xmlParseAttValue: malloc failed");
3480: return(NULL);
3481: }
3482: out = buffer;
3483:
3484: /*
3485: * Ok loop until we reach one of the ending char or a size limit.
3486: */
3487: cur = CUR;
1.156 daniel 3488: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3489: if (cur == 0) break;
3490: if ((cur == '&') && (NXT(1) == '#')) {
3491: int val = xmlParseCharRef(ctxt);
3492: *out++ = val;
3493: } else if (cur == '&') {
3494: ent = xmlParseEntityRef(ctxt);
3495: if ((ent != NULL) &&
3496: (ctxt->replaceEntities != 0)) {
3497: current = ent->content;
3498: while (*current != 0) {
3499: *out++ = *current++;
3500: if (out - buffer > buffer_size - 10) {
3501: int index = out - buffer;
3502:
3503: growBuffer(buffer);
3504: out = &buffer[index];
3505: }
3506: }
3507: } else if (ent != NULL) {
3508: int i = xmlStrlen(ent->name);
3509: const xmlChar *cur = ent->name;
3510:
3511: *out++ = '&';
3512: if (out - buffer > buffer_size - i - 10) {
3513: int index = out - buffer;
3514:
3515: growBuffer(buffer);
3516: out = &buffer[index];
3517: }
3518: for (;i > 0;i--)
3519: *out++ = *cur++;
3520: *out++ = ';';
3521: }
3522: } else {
1.156 daniel 3523: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3524: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3525: *out++ = 0x20;
3526: if (out - buffer > buffer_size - 10) {
3527: int index = out - buffer;
3528:
3529: growBuffer(buffer);
3530: out = &buffer[index];
1.129 daniel 3531: }
3532: } else {
3533: *out++ = cur;
3534: if (out - buffer > buffer_size - 10) {
3535: int index = out - buffer;
3536:
3537: growBuffer(buffer);
3538: out = &buffer[index];
3539: }
3540: }
3541: NEXT;
3542: }
3543: cur = CUR;
3544: }
3545: *out++ = 0;
1.152 daniel 3546: if (RAW == '<') {
1.129 daniel 3547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3548: ctxt->sax->error(ctxt->userData,
3549: "Unescaped '<' not allowed in attributes values\n");
3550: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3551: ctxt->wellFormed = 0;
1.152 daniel 3552: } else if (RAW != limit) {
1.129 daniel 3553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3554: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3555: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3556: ctxt->wellFormed = 0;
3557: } else
3558: NEXT;
3559: return(buffer);
1.29 daniel 3560: }
3561:
1.50 daniel 3562: /**
3563: * xmlParseSystemLiteral:
3564: * @ctxt: an XML parser context
3565: *
3566: * parse an XML Literal
1.21 daniel 3567: *
1.22 daniel 3568: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3569: *
3570: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3571: */
3572:
1.123 daniel 3573: xmlChar *
1.55 daniel 3574: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3575: xmlChar *buf = NULL;
3576: int len = 0;
1.140 daniel 3577: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3578: int cur, l;
1.135 daniel 3579: xmlChar stop;
1.168 daniel 3580: int state = ctxt->instate;
1.21 daniel 3581:
1.91 daniel 3582: SHRINK;
1.152 daniel 3583: if (RAW == '"') {
1.40 daniel 3584: NEXT;
1.135 daniel 3585: stop = '"';
1.152 daniel 3586: } else if (RAW == '\'') {
1.40 daniel 3587: NEXT;
1.135 daniel 3588: stop = '\'';
1.21 daniel 3589: } else {
1.55 daniel 3590: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3591: ctxt->sax->error(ctxt->userData,
3592: "SystemLiteral \" or ' expected\n");
1.123 daniel 3593: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3594: ctxt->wellFormed = 0;
1.135 daniel 3595: return(NULL);
1.21 daniel 3596: }
3597:
1.135 daniel 3598: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3599: if (buf == NULL) {
3600: fprintf(stderr, "malloc of %d byte failed\n", size);
3601: return(NULL);
3602: }
1.168 daniel 3603: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3604: cur = CUR_CHAR(l);
1.135 daniel 3605: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3606: if (len + 5 >= size) {
1.135 daniel 3607: size *= 2;
3608: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3609: if (buf == NULL) {
3610: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 daniel 3611: ctxt->instate = state;
1.135 daniel 3612: return(NULL);
3613: }
3614: }
1.152 daniel 3615: COPY_BUF(l,buf,len,cur);
3616: NEXTL(l);
3617: cur = CUR_CHAR(l);
1.135 daniel 3618: if (cur == 0) {
3619: GROW;
3620: SHRINK;
1.152 daniel 3621: cur = CUR_CHAR(l);
1.135 daniel 3622: }
3623: }
3624: buf[len] = 0;
1.168 daniel 3625: ctxt->instate = state;
1.135 daniel 3626: if (!IS_CHAR(cur)) {
3627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3628: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3629: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3630: ctxt->wellFormed = 0;
3631: } else {
3632: NEXT;
3633: }
3634: return(buf);
1.21 daniel 3635: }
3636:
1.50 daniel 3637: /**
3638: * xmlParsePubidLiteral:
3639: * @ctxt: an XML parser context
1.21 daniel 3640: *
1.50 daniel 3641: * parse an XML public literal
1.68 daniel 3642: *
3643: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3644: *
3645: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3646: */
3647:
1.123 daniel 3648: xmlChar *
1.55 daniel 3649: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3650: xmlChar *buf = NULL;
3651: int len = 0;
1.140 daniel 3652: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3653: xmlChar cur;
3654: xmlChar stop;
1.125 daniel 3655:
1.91 daniel 3656: SHRINK;
1.152 daniel 3657: if (RAW == '"') {
1.40 daniel 3658: NEXT;
1.135 daniel 3659: stop = '"';
1.152 daniel 3660: } else if (RAW == '\'') {
1.40 daniel 3661: NEXT;
1.135 daniel 3662: stop = '\'';
1.21 daniel 3663: } else {
1.55 daniel 3664: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3665: ctxt->sax->error(ctxt->userData,
3666: "SystemLiteral \" or ' expected\n");
1.123 daniel 3667: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3668: ctxt->wellFormed = 0;
1.135 daniel 3669: return(NULL);
3670: }
3671: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3672: if (buf == NULL) {
3673: fprintf(stderr, "malloc of %d byte failed\n", size);
3674: return(NULL);
3675: }
3676: cur = CUR;
3677: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3678: if (len + 1 >= size) {
3679: size *= 2;
3680: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3681: if (buf == NULL) {
3682: fprintf(stderr, "realloc of %d byte failed\n", size);
3683: return(NULL);
3684: }
3685: }
3686: buf[len++] = cur;
3687: NEXT;
3688: cur = CUR;
3689: if (cur == 0) {
3690: GROW;
3691: SHRINK;
3692: cur = CUR;
3693: }
3694: }
3695: buf[len] = 0;
3696: if (cur != stop) {
3697: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3698: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3699: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3700: ctxt->wellFormed = 0;
3701: } else {
3702: NEXT;
1.21 daniel 3703: }
1.135 daniel 3704: return(buf);
1.21 daniel 3705: }
3706:
1.50 daniel 3707: /**
3708: * xmlParseCharData:
3709: * @ctxt: an XML parser context
3710: * @cdata: int indicating whether we are within a CDATA section
3711: *
3712: * parse a CharData section.
3713: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 3714: *
1.151 daniel 3715: * The right angle bracket (>) may be represented using the string ">",
3716: * and must, for compatibility, be escaped using ">" or a character
3717: * reference when it appears in the string "]]>" in content, when that
3718: * string is not marking the end of a CDATA section.
3719: *
1.27 daniel 3720: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3721: */
3722:
1.55 daniel 3723: void
3724: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 3725: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 3726: int nbchar = 0;
1.152 daniel 3727: int cur, l;
1.27 daniel 3728:
1.91 daniel 3729: SHRINK;
1.152 daniel 3730: cur = CUR_CHAR(l);
1.160 daniel 3731: while ((IS_CHAR(cur)) && ((cur != '<') || (ctxt->token == '<')) &&
1.153 daniel 3732: ((cur != '&') || (ctxt->token == '&'))) {
1.97 daniel 3733: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 3734: (NXT(2) == '>')) {
3735: if (cdata) break;
3736: else {
3737: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 3738: ctxt->sax->error(ctxt->userData,
1.59 daniel 3739: "Sequence ']]>' not allowed in content\n");
1.123 daniel 3740: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 3741: /* Should this be relaxed ??? I see a "must here */
3742: ctxt->wellFormed = 0;
1.59 daniel 3743: }
3744: }
1.152 daniel 3745: COPY_BUF(l,buf,nbchar,cur);
3746: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 3747: /*
3748: * Ok the segment is to be consumed as chars.
3749: */
1.171 ! daniel 3750: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 3751: if (areBlanks(ctxt, buf, nbchar)) {
3752: if (ctxt->sax->ignorableWhitespace != NULL)
3753: ctxt->sax->ignorableWhitespace(ctxt->userData,
3754: buf, nbchar);
3755: } else {
3756: if (ctxt->sax->characters != NULL)
3757: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3758: }
3759: }
3760: nbchar = 0;
3761: }
1.152 daniel 3762: NEXTL(l);
3763: cur = CUR_CHAR(l);
1.27 daniel 3764: }
1.91 daniel 3765: if (nbchar != 0) {
3766: /*
3767: * Ok the segment is to be consumed as chars.
3768: */
1.171 ! daniel 3769: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 3770: if (areBlanks(ctxt, buf, nbchar)) {
3771: if (ctxt->sax->ignorableWhitespace != NULL)
3772: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3773: } else {
3774: if (ctxt->sax->characters != NULL)
3775: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3776: }
3777: }
1.45 daniel 3778: }
1.27 daniel 3779: }
3780:
1.50 daniel 3781: /**
3782: * xmlParseExternalID:
3783: * @ctxt: an XML parser context
1.123 daniel 3784: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 3785: * @strict: indicate whether we should restrict parsing to only
3786: * production [75], see NOTE below
1.50 daniel 3787: *
1.67 daniel 3788: * Parse an External ID or a Public ID
3789: *
3790: * NOTE: Productions [75] and [83] interract badly since [75] can generate
3791: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 3792: *
3793: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3794: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 3795: *
3796: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3797: *
1.68 daniel 3798: * Returns the function returns SystemLiteral and in the second
1.67 daniel 3799: * case publicID receives PubidLiteral, is strict is off
3800: * it is possible to return NULL and have publicID set.
1.22 daniel 3801: */
3802:
1.123 daniel 3803: xmlChar *
3804: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3805: xmlChar *URI = NULL;
1.22 daniel 3806:
1.91 daniel 3807: SHRINK;
1.152 daniel 3808: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 3809: (NXT(2) == 'S') && (NXT(3) == 'T') &&
3810: (NXT(4) == 'E') && (NXT(5) == 'M')) {
3811: SKIP(6);
1.59 daniel 3812: if (!IS_BLANK(CUR)) {
3813: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3814: ctxt->sax->error(ctxt->userData,
1.59 daniel 3815: "Space required after 'SYSTEM'\n");
1.123 daniel 3816: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3817: ctxt->wellFormed = 0;
3818: }
1.42 daniel 3819: SKIP_BLANKS;
1.39 daniel 3820: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3821: if (URI == NULL) {
1.55 daniel 3822: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3823: ctxt->sax->error(ctxt->userData,
1.39 daniel 3824: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 3825: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3826: ctxt->wellFormed = 0;
3827: }
1.152 daniel 3828: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 3829: (NXT(2) == 'B') && (NXT(3) == 'L') &&
3830: (NXT(4) == 'I') && (NXT(5) == 'C')) {
3831: SKIP(6);
1.59 daniel 3832: if (!IS_BLANK(CUR)) {
3833: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3834: ctxt->sax->error(ctxt->userData,
1.59 daniel 3835: "Space required after 'PUBLIC'\n");
1.123 daniel 3836: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3837: ctxt->wellFormed = 0;
3838: }
1.42 daniel 3839: SKIP_BLANKS;
1.39 daniel 3840: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 3841: if (*publicID == NULL) {
1.55 daniel 3842: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3843: ctxt->sax->error(ctxt->userData,
1.39 daniel 3844: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 3845: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 3846: ctxt->wellFormed = 0;
3847: }
1.67 daniel 3848: if (strict) {
3849: /*
3850: * We don't handle [83] so "S SystemLiteral" is required.
3851: */
3852: if (!IS_BLANK(CUR)) {
3853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3854: ctxt->sax->error(ctxt->userData,
1.67 daniel 3855: "Space required after the Public Identifier\n");
1.123 daniel 3856: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 3857: ctxt->wellFormed = 0;
3858: }
3859: } else {
3860: /*
3861: * We handle [83] so we return immediately, if
3862: * "S SystemLiteral" is not detected. From a purely parsing
3863: * point of view that's a nice mess.
3864: */
1.135 daniel 3865: const xmlChar *ptr;
3866: GROW;
3867:
3868: ptr = CUR_PTR;
1.67 daniel 3869: if (!IS_BLANK(*ptr)) return(NULL);
3870:
3871: while (IS_BLANK(*ptr)) ptr++;
3872: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 3873: }
1.42 daniel 3874: SKIP_BLANKS;
1.39 daniel 3875: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3876: if (URI == NULL) {
1.55 daniel 3877: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3878: ctxt->sax->error(ctxt->userData,
1.39 daniel 3879: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 3880: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3881: ctxt->wellFormed = 0;
3882: }
1.22 daniel 3883: }
1.39 daniel 3884: return(URI);
1.22 daniel 3885: }
3886:
1.50 daniel 3887: /**
3888: * xmlParseComment:
1.69 daniel 3889: * @ctxt: an XML parser context
1.50 daniel 3890: *
1.3 veillard 3891: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 3892: * The spec says that "For compatibility, the string "--" (double-hyphen)
3893: * must not occur within comments. "
1.22 daniel 3894: *
3895: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 3896: */
1.72 daniel 3897: void
1.114 daniel 3898: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 3899: xmlChar *buf = NULL;
3900: int len = 0;
1.140 daniel 3901: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3902: int q, ql;
3903: int r, rl;
3904: int cur, l;
1.140 daniel 3905: xmlParserInputState state;
1.3 veillard 3906:
3907: /*
1.22 daniel 3908: * Check that there is a comment right here.
1.3 veillard 3909: */
1.152 daniel 3910: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 3911: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 3912:
1.140 daniel 3913: state = ctxt->instate;
1.97 daniel 3914: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 3915: SHRINK;
1.40 daniel 3916: SKIP(4);
1.135 daniel 3917: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3918: if (buf == NULL) {
3919: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 3920: ctxt->instate = state;
1.135 daniel 3921: return;
3922: }
1.152 daniel 3923: q = CUR_CHAR(ql);
3924: NEXTL(ql);
3925: r = CUR_CHAR(rl);
3926: NEXTL(rl);
3927: cur = CUR_CHAR(l);
1.135 daniel 3928: while (IS_CHAR(cur) &&
3929: ((cur != '>') ||
3930: (r != '-') || (q != '-'))) {
3931: if ((r == '-') && (q == '-')) {
1.55 daniel 3932: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3933: ctxt->sax->error(ctxt->userData,
1.38 daniel 3934: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 3935: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 3936: ctxt->wellFormed = 0;
3937: }
1.152 daniel 3938: if (len + 5 >= size) {
1.135 daniel 3939: size *= 2;
3940: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3941: if (buf == NULL) {
3942: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 3943: ctxt->instate = state;
1.135 daniel 3944: return;
3945: }
3946: }
1.152 daniel 3947: COPY_BUF(ql,buf,len,q);
1.135 daniel 3948: q = r;
1.152 daniel 3949: ql = rl;
1.135 daniel 3950: r = cur;
1.152 daniel 3951: rl = l;
3952: NEXTL(l);
3953: cur = CUR_CHAR(l);
1.135 daniel 3954: if (cur == 0) {
3955: SHRINK;
3956: GROW;
1.152 daniel 3957: cur = CUR_CHAR(l);
1.135 daniel 3958: }
1.3 veillard 3959: }
1.135 daniel 3960: buf[len] = 0;
3961: if (!IS_CHAR(cur)) {
1.55 daniel 3962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3963: ctxt->sax->error(ctxt->userData,
1.135 daniel 3964: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 3965: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 3966: ctxt->wellFormed = 0;
1.3 veillard 3967: } else {
1.40 daniel 3968: NEXT;
1.171 ! daniel 3969: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
! 3970: (!ctxt->disableSAX))
1.135 daniel 3971: ctxt->sax->comment(ctxt->userData, buf);
3972: xmlFree(buf);
1.3 veillard 3973: }
1.140 daniel 3974: ctxt->instate = state;
1.3 veillard 3975: }
3976:
1.50 daniel 3977: /**
3978: * xmlParsePITarget:
3979: * @ctxt: an XML parser context
3980: *
3981: * parse the name of a PI
1.22 daniel 3982: *
3983: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 3984: *
3985: * Returns the PITarget name or NULL
1.22 daniel 3986: */
3987:
1.123 daniel 3988: xmlChar *
1.55 daniel 3989: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 3990: xmlChar *name;
1.22 daniel 3991:
3992: name = xmlParseName(ctxt);
1.139 daniel 3993: if ((name != NULL) &&
1.22 daniel 3994: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 3995: ((name[1] == 'm') || (name[1] == 'M')) &&
3996: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 3997: int i;
1.151 daniel 3998: if ((name[0] = 'x') && (name[1] == 'm') &&
3999: (name[2] = 'l') && (name[3] == 0)) {
4000: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4001: ctxt->sax->error(ctxt->userData,
4002: "XML declaration allowed only at the start of the document\n");
4003: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4004: ctxt->wellFormed = 0;
4005: return(name);
4006: } else if (name[3] == 0) {
4007: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4008: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4009: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4010: ctxt->wellFormed = 0;
4011: return(name);
4012: }
1.139 daniel 4013: for (i = 0;;i++) {
4014: if (xmlW3CPIs[i] == NULL) break;
4015: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4016: return(name);
4017: }
4018: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4019: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4020: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4021: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4022: }
1.22 daniel 4023: }
4024: return(name);
4025: }
4026:
1.50 daniel 4027: /**
4028: * xmlParsePI:
4029: * @ctxt: an XML parser context
4030: *
4031: * parse an XML Processing Instruction.
1.22 daniel 4032: *
4033: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4034: *
1.69 daniel 4035: * The processing is transfered to SAX once parsed.
1.3 veillard 4036: */
4037:
1.55 daniel 4038: void
4039: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4040: xmlChar *buf = NULL;
4041: int len = 0;
1.140 daniel 4042: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4043: int cur, l;
1.123 daniel 4044: xmlChar *target;
1.140 daniel 4045: xmlParserInputState state;
1.22 daniel 4046:
1.152 daniel 4047: if ((RAW == '<') && (NXT(1) == '?')) {
1.140 daniel 4048: state = ctxt->instate;
4049: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4050: /*
4051: * this is a Processing Instruction.
4052: */
1.40 daniel 4053: SKIP(2);
1.91 daniel 4054: SHRINK;
1.3 veillard 4055:
4056: /*
1.22 daniel 4057: * Parse the target name and check for special support like
4058: * namespace.
1.3 veillard 4059: */
1.22 daniel 4060: target = xmlParsePITarget(ctxt);
4061: if (target != NULL) {
1.156 daniel 4062: if ((RAW == '?') && (NXT(1) == '>')) {
4063: SKIP(2);
4064:
4065: /*
4066: * SAX: PI detected.
4067: */
1.171 ! daniel 4068: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4069: (ctxt->sax->processingInstruction != NULL))
4070: ctxt->sax->processingInstruction(ctxt->userData,
4071: target, NULL);
4072: ctxt->instate = state;
1.170 daniel 4073: xmlFree(target);
1.156 daniel 4074: return;
4075: }
1.135 daniel 4076: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4077: if (buf == NULL) {
4078: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4079: ctxt->instate = state;
1.135 daniel 4080: return;
4081: }
4082: cur = CUR;
4083: if (!IS_BLANK(cur)) {
1.114 daniel 4084: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4085: ctxt->sax->error(ctxt->userData,
4086: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4087: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4088: ctxt->wellFormed = 0;
4089: }
4090: SKIP_BLANKS;
1.152 daniel 4091: cur = CUR_CHAR(l);
1.135 daniel 4092: while (IS_CHAR(cur) &&
4093: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4094: if (len + 5 >= size) {
1.135 daniel 4095: size *= 2;
4096: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4097: if (buf == NULL) {
4098: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4099: ctxt->instate = state;
1.135 daniel 4100: return;
4101: }
4102: }
1.152 daniel 4103: COPY_BUF(l,buf,len,cur);
4104: NEXTL(l);
4105: cur = CUR_CHAR(l);
1.135 daniel 4106: if (cur == 0) {
4107: SHRINK;
4108: GROW;
1.152 daniel 4109: cur = CUR_CHAR(l);
1.135 daniel 4110: }
4111: }
4112: buf[len] = 0;
1.152 daniel 4113: if (cur != '?') {
1.72 daniel 4114: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4115: ctxt->sax->error(ctxt->userData,
1.72 daniel 4116: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4117: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4118: ctxt->wellFormed = 0;
1.22 daniel 4119: } else {
1.72 daniel 4120: SKIP(2);
1.44 daniel 4121:
1.72 daniel 4122: /*
4123: * SAX: PI detected.
4124: */
1.171 ! daniel 4125: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4126: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4127: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4128: target, buf);
1.22 daniel 4129: }
1.135 daniel 4130: xmlFree(buf);
1.119 daniel 4131: xmlFree(target);
1.3 veillard 4132: } else {
1.55 daniel 4133: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4134: ctxt->sax->error(ctxt->userData,
4135: "xmlParsePI : no target name\n");
1.123 daniel 4136: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4137: ctxt->wellFormed = 0;
1.22 daniel 4138: }
1.140 daniel 4139: ctxt->instate = state;
1.22 daniel 4140: }
4141: }
4142:
1.50 daniel 4143: /**
4144: * xmlParseNotationDecl:
4145: * @ctxt: an XML parser context
4146: *
4147: * parse a notation declaration
1.22 daniel 4148: *
4149: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4150: *
4151: * Hence there is actually 3 choices:
4152: * 'PUBLIC' S PubidLiteral
4153: * 'PUBLIC' S PubidLiteral S SystemLiteral
4154: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4155: *
1.67 daniel 4156: * See the NOTE on xmlParseExternalID().
1.22 daniel 4157: */
4158:
1.55 daniel 4159: void
4160: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4161: xmlChar *name;
4162: xmlChar *Pubid;
4163: xmlChar *Systemid;
1.22 daniel 4164:
1.152 daniel 4165: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4166: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4167: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4168: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4169: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 4170: SHRINK;
1.40 daniel 4171: SKIP(10);
1.67 daniel 4172: if (!IS_BLANK(CUR)) {
4173: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4174: ctxt->sax->error(ctxt->userData,
4175: "Space required after '<!NOTATION'\n");
1.123 daniel 4176: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4177: ctxt->wellFormed = 0;
4178: return;
4179: }
4180: SKIP_BLANKS;
1.22 daniel 4181:
4182: name = xmlParseName(ctxt);
4183: if (name == NULL) {
1.55 daniel 4184: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4185: ctxt->sax->error(ctxt->userData,
4186: "NOTATION: Name expected here\n");
1.123 daniel 4187: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4188: ctxt->wellFormed = 0;
4189: return;
4190: }
4191: if (!IS_BLANK(CUR)) {
4192: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4193: ctxt->sax->error(ctxt->userData,
1.67 daniel 4194: "Space required after the NOTATION name'\n");
1.123 daniel 4195: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4196: ctxt->wellFormed = 0;
1.22 daniel 4197: return;
4198: }
1.42 daniel 4199: SKIP_BLANKS;
1.67 daniel 4200:
1.22 daniel 4201: /*
1.67 daniel 4202: * Parse the IDs.
1.22 daniel 4203: */
1.160 daniel 4204: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4205: SKIP_BLANKS;
4206:
1.152 daniel 4207: if (RAW == '>') {
1.40 daniel 4208: NEXT;
1.171 ! daniel 4209: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
! 4210: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4211: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4212: } else {
4213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4214: ctxt->sax->error(ctxt->userData,
1.67 daniel 4215: "'>' required to close NOTATION declaration\n");
1.123 daniel 4216: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4217: ctxt->wellFormed = 0;
4218: }
1.119 daniel 4219: xmlFree(name);
4220: if (Systemid != NULL) xmlFree(Systemid);
4221: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4222: }
4223: }
4224:
1.50 daniel 4225: /**
4226: * xmlParseEntityDecl:
4227: * @ctxt: an XML parser context
4228: *
4229: * parse <!ENTITY declarations
1.22 daniel 4230: *
4231: * [70] EntityDecl ::= GEDecl | PEDecl
4232: *
4233: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4234: *
4235: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4236: *
4237: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4238: *
4239: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4240: *
4241: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4242: *
4243: * [ VC: Notation Declared ]
1.116 daniel 4244: * The Name must match the declared name of a notation.
1.22 daniel 4245: */
4246:
1.55 daniel 4247: void
4248: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4249: xmlChar *name = NULL;
4250: xmlChar *value = NULL;
4251: xmlChar *URI = NULL, *literal = NULL;
4252: xmlChar *ndata = NULL;
1.39 daniel 4253: int isParameter = 0;
1.123 daniel 4254: xmlChar *orig = NULL;
1.22 daniel 4255:
1.94 daniel 4256: GROW;
1.152 daniel 4257: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4258: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4259: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4260: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 4261: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4262: SHRINK;
1.40 daniel 4263: SKIP(8);
1.59 daniel 4264: if (!IS_BLANK(CUR)) {
4265: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4266: ctxt->sax->error(ctxt->userData,
4267: "Space required after '<!ENTITY'\n");
1.123 daniel 4268: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4269: ctxt->wellFormed = 0;
4270: }
4271: SKIP_BLANKS;
1.40 daniel 4272:
1.152 daniel 4273: if (RAW == '%') {
1.40 daniel 4274: NEXT;
1.59 daniel 4275: if (!IS_BLANK(CUR)) {
4276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4277: ctxt->sax->error(ctxt->userData,
4278: "Space required after '%'\n");
1.123 daniel 4279: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4280: ctxt->wellFormed = 0;
4281: }
1.42 daniel 4282: SKIP_BLANKS;
1.39 daniel 4283: isParameter = 1;
1.22 daniel 4284: }
4285:
4286: name = xmlParseName(ctxt);
1.24 daniel 4287: if (name == NULL) {
1.55 daniel 4288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4289: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4290: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4291: ctxt->wellFormed = 0;
1.24 daniel 4292: return;
4293: }
1.59 daniel 4294: if (!IS_BLANK(CUR)) {
4295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4296: ctxt->sax->error(ctxt->userData,
1.59 daniel 4297: "Space required after the entity name\n");
1.123 daniel 4298: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4299: ctxt->wellFormed = 0;
4300: }
1.42 daniel 4301: SKIP_BLANKS;
1.24 daniel 4302:
1.22 daniel 4303: /*
1.68 daniel 4304: * handle the various case of definitions...
1.22 daniel 4305: */
1.39 daniel 4306: if (isParameter) {
1.152 daniel 4307: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4308: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4309: if (value) {
1.171 ! daniel 4310: if ((ctxt->sax != NULL) &&
! 4311: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4312: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4313: XML_INTERNAL_PARAMETER_ENTITY,
4314: NULL, NULL, value);
4315: }
1.24 daniel 4316: else {
1.67 daniel 4317: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4318: if ((URI == NULL) && (literal == NULL)) {
4319: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4320: ctxt->sax->error(ctxt->userData,
4321: "Entity value required\n");
4322: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4323: ctxt->wellFormed = 0;
4324: }
1.39 daniel 4325: if (URI) {
1.171 ! daniel 4326: if ((ctxt->sax != NULL) &&
! 4327: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4328: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4329: XML_EXTERNAL_PARAMETER_ENTITY,
4330: literal, URI, NULL);
4331: }
1.24 daniel 4332: }
4333: } else {
1.152 daniel 4334: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4335: value = xmlParseEntityValue(ctxt, &orig);
1.171 ! daniel 4336: if ((ctxt->sax != NULL) &&
! 4337: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4338: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4339: XML_INTERNAL_GENERAL_ENTITY,
4340: NULL, NULL, value);
4341: } else {
1.67 daniel 4342: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4343: if ((URI == NULL) && (literal == NULL)) {
4344: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4345: ctxt->sax->error(ctxt->userData,
4346: "Entity value required\n");
4347: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4348: ctxt->wellFormed = 0;
4349: }
1.152 daniel 4350: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4351: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4352: ctxt->sax->error(ctxt->userData,
1.59 daniel 4353: "Space required before 'NDATA'\n");
1.123 daniel 4354: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4355: ctxt->wellFormed = 0;
4356: }
1.42 daniel 4357: SKIP_BLANKS;
1.152 daniel 4358: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4359: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4360: (NXT(4) == 'A')) {
4361: SKIP(5);
1.59 daniel 4362: if (!IS_BLANK(CUR)) {
4363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4364: ctxt->sax->error(ctxt->userData,
1.59 daniel 4365: "Space required after 'NDATA'\n");
1.123 daniel 4366: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4367: ctxt->wellFormed = 0;
4368: }
1.42 daniel 4369: SKIP_BLANKS;
1.24 daniel 4370: ndata = xmlParseName(ctxt);
1.171 ! daniel 4371: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4372: (ctxt->sax->unparsedEntityDecl != NULL))
4373: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4374: literal, URI, ndata);
4375: } else {
1.171 ! daniel 4376: if ((ctxt->sax != NULL) &&
! 4377: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4378: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4379: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4380: literal, URI, NULL);
1.24 daniel 4381: }
4382: }
4383: }
1.42 daniel 4384: SKIP_BLANKS;
1.152 daniel 4385: if (RAW != '>') {
1.55 daniel 4386: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4387: ctxt->sax->error(ctxt->userData,
1.31 daniel 4388: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4389: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4390: ctxt->wellFormed = 0;
1.24 daniel 4391: } else
1.40 daniel 4392: NEXT;
1.78 daniel 4393: if (orig != NULL) {
4394: /*
1.98 daniel 4395: * Ugly mechanism to save the raw entity value.
1.78 daniel 4396: */
4397: xmlEntityPtr cur = NULL;
4398:
1.98 daniel 4399: if (isParameter) {
4400: if ((ctxt->sax != NULL) &&
4401: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4402: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4403: } else {
4404: if ((ctxt->sax != NULL) &&
4405: (ctxt->sax->getEntity != NULL))
1.120 daniel 4406: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4407: }
4408: if (cur != NULL) {
4409: if (cur->orig != NULL)
1.119 daniel 4410: xmlFree(orig);
1.98 daniel 4411: else
4412: cur->orig = orig;
4413: } else
1.119 daniel 4414: xmlFree(orig);
1.78 daniel 4415: }
1.119 daniel 4416: if (name != NULL) xmlFree(name);
4417: if (value != NULL) xmlFree(value);
4418: if (URI != NULL) xmlFree(URI);
4419: if (literal != NULL) xmlFree(literal);
4420: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4421: }
4422: }
4423:
1.50 daniel 4424: /**
1.59 daniel 4425: * xmlParseDefaultDecl:
4426: * @ctxt: an XML parser context
4427: * @value: Receive a possible fixed default value for the attribute
4428: *
4429: * Parse an attribute default declaration
4430: *
4431: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4432: *
1.99 daniel 4433: * [ VC: Required Attribute ]
1.117 daniel 4434: * if the default declaration is the keyword #REQUIRED, then the
4435: * attribute must be specified for all elements of the type in the
4436: * attribute-list declaration.
1.99 daniel 4437: *
4438: * [ VC: Attribute Default Legal ]
1.102 daniel 4439: * The declared default value must meet the lexical constraints of
4440: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4441: *
4442: * [ VC: Fixed Attribute Default ]
1.117 daniel 4443: * if an attribute has a default value declared with the #FIXED
4444: * keyword, instances of that attribute must match the default value.
1.99 daniel 4445: *
4446: * [ WFC: No < in Attribute Values ]
4447: * handled in xmlParseAttValue()
4448: *
1.59 daniel 4449: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4450: * or XML_ATTRIBUTE_FIXED.
4451: */
4452:
4453: int
1.123 daniel 4454: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4455: int val;
1.123 daniel 4456: xmlChar *ret;
1.59 daniel 4457:
4458: *value = NULL;
1.152 daniel 4459: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4460: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4461: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4462: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4463: (NXT(8) == 'D')) {
4464: SKIP(9);
4465: return(XML_ATTRIBUTE_REQUIRED);
4466: }
1.152 daniel 4467: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4468: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4469: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4470: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4471: SKIP(8);
4472: return(XML_ATTRIBUTE_IMPLIED);
4473: }
4474: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4475: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4476: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4477: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4478: SKIP(6);
4479: val = XML_ATTRIBUTE_FIXED;
4480: if (!IS_BLANK(CUR)) {
4481: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4482: ctxt->sax->error(ctxt->userData,
4483: "Space required after '#FIXED'\n");
1.123 daniel 4484: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4485: ctxt->wellFormed = 0;
4486: }
4487: SKIP_BLANKS;
4488: }
4489: ret = xmlParseAttValue(ctxt);
1.96 daniel 4490: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4491: if (ret == NULL) {
4492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4493: ctxt->sax->error(ctxt->userData,
1.59 daniel 4494: "Attribute default value declaration error\n");
4495: ctxt->wellFormed = 0;
4496: } else
4497: *value = ret;
4498: return(val);
4499: }
4500:
4501: /**
1.66 daniel 4502: * xmlParseNotationType:
4503: * @ctxt: an XML parser context
4504: *
4505: * parse an Notation attribute type.
4506: *
1.99 daniel 4507: * Note: the leading 'NOTATION' S part has already being parsed...
4508: *
1.66 daniel 4509: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4510: *
1.99 daniel 4511: * [ VC: Notation Attributes ]
1.117 daniel 4512: * Values of this type must match one of the notation names included
1.99 daniel 4513: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4514: *
4515: * Returns: the notation attribute tree built while parsing
4516: */
4517:
4518: xmlEnumerationPtr
4519: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4520: xmlChar *name;
1.66 daniel 4521: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4522:
1.152 daniel 4523: if (RAW != '(') {
1.66 daniel 4524: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4525: ctxt->sax->error(ctxt->userData,
4526: "'(' required to start 'NOTATION'\n");
1.123 daniel 4527: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4528: ctxt->wellFormed = 0;
4529: return(NULL);
4530: }
1.91 daniel 4531: SHRINK;
1.66 daniel 4532: do {
4533: NEXT;
4534: SKIP_BLANKS;
4535: name = xmlParseName(ctxt);
4536: if (name == NULL) {
4537: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4538: ctxt->sax->error(ctxt->userData,
1.66 daniel 4539: "Name expected in NOTATION declaration\n");
1.123 daniel 4540: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4541: ctxt->wellFormed = 0;
4542: return(ret);
4543: }
4544: cur = xmlCreateEnumeration(name);
1.119 daniel 4545: xmlFree(name);
1.66 daniel 4546: if (cur == NULL) return(ret);
4547: if (last == NULL) ret = last = cur;
4548: else {
4549: last->next = cur;
4550: last = cur;
4551: }
4552: SKIP_BLANKS;
1.152 daniel 4553: } while (RAW == '|');
4554: if (RAW != ')') {
1.66 daniel 4555: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4556: ctxt->sax->error(ctxt->userData,
1.66 daniel 4557: "')' required to finish NOTATION declaration\n");
1.123 daniel 4558: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 4559: ctxt->wellFormed = 0;
1.170 daniel 4560: if ((last != NULL) && (last != ret))
4561: xmlFreeEnumeration(last);
1.66 daniel 4562: return(ret);
4563: }
4564: NEXT;
4565: return(ret);
4566: }
4567:
4568: /**
4569: * xmlParseEnumerationType:
4570: * @ctxt: an XML parser context
4571: *
4572: * parse an Enumeration attribute type.
4573: *
4574: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4575: *
1.99 daniel 4576: * [ VC: Enumeration ]
1.117 daniel 4577: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 4578: * the declaration
4579: *
1.66 daniel 4580: * Returns: the enumeration attribute tree built while parsing
4581: */
4582:
4583: xmlEnumerationPtr
4584: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4585: xmlChar *name;
1.66 daniel 4586: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4587:
1.152 daniel 4588: if (RAW != '(') {
1.66 daniel 4589: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4590: ctxt->sax->error(ctxt->userData,
1.66 daniel 4591: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 4592: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 4593: ctxt->wellFormed = 0;
4594: return(NULL);
4595: }
1.91 daniel 4596: SHRINK;
1.66 daniel 4597: do {
4598: NEXT;
4599: SKIP_BLANKS;
4600: name = xmlParseNmtoken(ctxt);
4601: if (name == NULL) {
4602: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4603: ctxt->sax->error(ctxt->userData,
1.66 daniel 4604: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 4605: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 4606: ctxt->wellFormed = 0;
4607: return(ret);
4608: }
4609: cur = xmlCreateEnumeration(name);
1.119 daniel 4610: xmlFree(name);
1.66 daniel 4611: if (cur == NULL) return(ret);
4612: if (last == NULL) ret = last = cur;
4613: else {
4614: last->next = cur;
4615: last = cur;
4616: }
4617: SKIP_BLANKS;
1.152 daniel 4618: } while (RAW == '|');
4619: if (RAW != ')') {
1.66 daniel 4620: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4621: ctxt->sax->error(ctxt->userData,
1.66 daniel 4622: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 4623: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 4624: ctxt->wellFormed = 0;
4625: return(ret);
4626: }
4627: NEXT;
4628: return(ret);
4629: }
4630:
4631: /**
1.50 daniel 4632: * xmlParseEnumeratedType:
4633: * @ctxt: an XML parser context
1.66 daniel 4634: * @tree: the enumeration tree built while parsing
1.50 daniel 4635: *
1.66 daniel 4636: * parse an Enumerated attribute type.
1.22 daniel 4637: *
4638: * [57] EnumeratedType ::= NotationType | Enumeration
4639: *
4640: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4641: *
1.50 daniel 4642: *
1.66 daniel 4643: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 4644: */
4645:
1.66 daniel 4646: int
4647: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 4648: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 4649: (NXT(2) == 'T') && (NXT(3) == 'A') &&
4650: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4651: (NXT(6) == 'O') && (NXT(7) == 'N')) {
4652: SKIP(8);
4653: if (!IS_BLANK(CUR)) {
4654: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4655: ctxt->sax->error(ctxt->userData,
4656: "Space required after 'NOTATION'\n");
1.123 daniel 4657: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 4658: ctxt->wellFormed = 0;
4659: return(0);
4660: }
4661: SKIP_BLANKS;
4662: *tree = xmlParseNotationType(ctxt);
4663: if (*tree == NULL) return(0);
4664: return(XML_ATTRIBUTE_NOTATION);
4665: }
4666: *tree = xmlParseEnumerationType(ctxt);
4667: if (*tree == NULL) return(0);
4668: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 4669: }
4670:
1.50 daniel 4671: /**
4672: * xmlParseAttributeType:
4673: * @ctxt: an XML parser context
1.66 daniel 4674: * @tree: the enumeration tree built while parsing
1.50 daniel 4675: *
1.59 daniel 4676: * parse the Attribute list def for an element
1.22 daniel 4677: *
4678: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4679: *
4680: * [55] StringType ::= 'CDATA'
4681: *
4682: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4683: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 4684: *
1.102 daniel 4685: * Validity constraints for attribute values syntax are checked in
4686: * xmlValidateAttributeValue()
4687: *
1.99 daniel 4688: * [ VC: ID ]
1.117 daniel 4689: * Values of type ID must match the Name production. A name must not
1.99 daniel 4690: * appear more than once in an XML document as a value of this type;
4691: * i.e., ID values must uniquely identify the elements which bear them.
4692: *
4693: * [ VC: One ID per Element Type ]
1.117 daniel 4694: * No element type may have more than one ID attribute specified.
1.99 daniel 4695: *
4696: * [ VC: ID Attribute Default ]
1.117 daniel 4697: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 4698: *
4699: * [ VC: IDREF ]
1.102 daniel 4700: * Values of type IDREF must match the Name production, and values
1.140 daniel 4701: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 4702: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 4703: * values must match the value of some ID attribute.
4704: *
4705: * [ VC: Entity Name ]
1.102 daniel 4706: * Values of type ENTITY must match the Name production, values
1.140 daniel 4707: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 4708: * name of an unparsed entity declared in the DTD.
1.99 daniel 4709: *
4710: * [ VC: Name Token ]
1.102 daniel 4711: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 4712: * of type NMTOKENS must match Nmtokens.
4713: *
1.69 daniel 4714: * Returns the attribute type
1.22 daniel 4715: */
1.59 daniel 4716: int
1.66 daniel 4717: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 4718: SHRINK;
1.152 daniel 4719: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 4720: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4721: (NXT(4) == 'A')) {
4722: SKIP(5);
1.66 daniel 4723: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 4724: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 4725: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 4726: (NXT(4) == 'F') && (NXT(5) == 'S')) {
4727: SKIP(6);
4728: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 4729: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 4730: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 4731: (NXT(4) == 'F')) {
4732: SKIP(5);
1.59 daniel 4733: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 4734: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 4735: SKIP(2);
4736: return(XML_ATTRIBUTE_ID);
1.152 daniel 4737: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4738: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4739: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4740: SKIP(6);
1.59 daniel 4741: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 4742: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4743: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4744: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4745: (NXT(6) == 'E') && (NXT(7) == 'S')) {
4746: SKIP(8);
1.59 daniel 4747: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 4748: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 4749: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4750: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 4751: (NXT(6) == 'N') && (NXT(7) == 'S')) {
4752: SKIP(8);
4753: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 4754: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 4755: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4756: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 4757: (NXT(6) == 'N')) {
4758: SKIP(7);
1.59 daniel 4759: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 4760: }
1.66 daniel 4761: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 4762: }
4763:
1.50 daniel 4764: /**
4765: * xmlParseAttributeListDecl:
4766: * @ctxt: an XML parser context
4767: *
4768: * : parse the Attribute list def for an element
1.22 daniel 4769: *
4770: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4771: *
4772: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 4773: *
1.22 daniel 4774: */
1.55 daniel 4775: void
4776: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4777: xmlChar *elemName;
4778: xmlChar *attrName;
1.103 daniel 4779: xmlEnumerationPtr tree;
1.22 daniel 4780:
1.152 daniel 4781: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4782: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4783: (NXT(4) == 'T') && (NXT(5) == 'L') &&
4784: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 4785: (NXT(8) == 'T')) {
1.40 daniel 4786: SKIP(9);
1.59 daniel 4787: if (!IS_BLANK(CUR)) {
4788: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4789: ctxt->sax->error(ctxt->userData,
4790: "Space required after '<!ATTLIST'\n");
1.123 daniel 4791: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4792: ctxt->wellFormed = 0;
4793: }
1.42 daniel 4794: SKIP_BLANKS;
1.59 daniel 4795: elemName = xmlParseName(ctxt);
4796: if (elemName == NULL) {
1.55 daniel 4797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4798: ctxt->sax->error(ctxt->userData,
4799: "ATTLIST: no name for Element\n");
1.123 daniel 4800: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4801: ctxt->wellFormed = 0;
1.22 daniel 4802: return;
4803: }
1.42 daniel 4804: SKIP_BLANKS;
1.152 daniel 4805: while (RAW != '>') {
1.123 daniel 4806: const xmlChar *check = CUR_PTR;
1.59 daniel 4807: int type;
4808: int def;
1.123 daniel 4809: xmlChar *defaultValue = NULL;
1.59 daniel 4810:
1.103 daniel 4811: tree = NULL;
1.59 daniel 4812: attrName = xmlParseName(ctxt);
4813: if (attrName == NULL) {
4814: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4815: ctxt->sax->error(ctxt->userData,
4816: "ATTLIST: no name for Attribute\n");
1.123 daniel 4817: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4818: ctxt->wellFormed = 0;
4819: break;
4820: }
1.97 daniel 4821: GROW;
1.59 daniel 4822: if (!IS_BLANK(CUR)) {
4823: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4824: ctxt->sax->error(ctxt->userData,
1.59 daniel 4825: "Space required after the attribute name\n");
1.123 daniel 4826: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4827: ctxt->wellFormed = 0;
1.170 daniel 4828: if (attrName != NULL)
4829: xmlFree(attrName);
4830: if (defaultValue != NULL)
4831: xmlFree(defaultValue);
1.59 daniel 4832: break;
4833: }
4834: SKIP_BLANKS;
4835:
1.66 daniel 4836: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 4837: if (type <= 0) {
4838: if (attrName != NULL)
4839: xmlFree(attrName);
4840: if (defaultValue != NULL)
4841: xmlFree(defaultValue);
4842: break;
4843: }
1.22 daniel 4844:
1.97 daniel 4845: GROW;
1.59 daniel 4846: if (!IS_BLANK(CUR)) {
4847: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4848: ctxt->sax->error(ctxt->userData,
1.59 daniel 4849: "Space required after the attribute type\n");
1.123 daniel 4850: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4851: ctxt->wellFormed = 0;
1.170 daniel 4852: if (attrName != NULL)
4853: xmlFree(attrName);
4854: if (defaultValue != NULL)
4855: xmlFree(defaultValue);
4856: if (tree != NULL)
4857: xmlFreeEnumeration(tree);
1.59 daniel 4858: break;
4859: }
1.42 daniel 4860: SKIP_BLANKS;
1.59 daniel 4861:
4862: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 4863: if (def <= 0) {
4864: if (attrName != NULL)
4865: xmlFree(attrName);
4866: if (defaultValue != NULL)
4867: xmlFree(defaultValue);
4868: if (tree != NULL)
4869: xmlFreeEnumeration(tree);
4870: break;
4871: }
1.59 daniel 4872:
1.97 daniel 4873: GROW;
1.152 daniel 4874: if (RAW != '>') {
1.59 daniel 4875: if (!IS_BLANK(CUR)) {
4876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4877: ctxt->sax->error(ctxt->userData,
1.59 daniel 4878: "Space required after the attribute default value\n");
1.123 daniel 4879: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4880: ctxt->wellFormed = 0;
1.170 daniel 4881: if (attrName != NULL)
4882: xmlFree(attrName);
4883: if (defaultValue != NULL)
4884: xmlFree(defaultValue);
4885: if (tree != NULL)
4886: xmlFreeEnumeration(tree);
1.59 daniel 4887: break;
4888: }
4889: SKIP_BLANKS;
4890: }
1.40 daniel 4891: if (check == CUR_PTR) {
1.55 daniel 4892: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4893: ctxt->sax->error(ctxt->userData,
1.59 daniel 4894: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 4895: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 4896: if (attrName != NULL)
4897: xmlFree(attrName);
4898: if (defaultValue != NULL)
4899: xmlFree(defaultValue);
4900: if (tree != NULL)
4901: xmlFreeEnumeration(tree);
1.22 daniel 4902: break;
4903: }
1.171 ! daniel 4904: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
! 4905: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 4906: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 4907: type, def, defaultValue, tree);
1.59 daniel 4908: if (attrName != NULL)
1.119 daniel 4909: xmlFree(attrName);
1.59 daniel 4910: if (defaultValue != NULL)
1.119 daniel 4911: xmlFree(defaultValue);
1.97 daniel 4912: GROW;
1.22 daniel 4913: }
1.152 daniel 4914: if (RAW == '>')
1.40 daniel 4915: NEXT;
1.22 daniel 4916:
1.119 daniel 4917: xmlFree(elemName);
1.22 daniel 4918: }
4919: }
4920:
1.50 daniel 4921: /**
1.61 daniel 4922: * xmlParseElementMixedContentDecl:
4923: * @ctxt: an XML parser context
4924: *
4925: * parse the declaration for a Mixed Element content
4926: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4927: *
4928: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4929: * '(' S? '#PCDATA' S? ')'
4930: *
1.99 daniel 4931: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4932: *
4933: * [ VC: No Duplicate Types ]
1.117 daniel 4934: * The same name must not appear more than once in a single
4935: * mixed-content declaration.
1.99 daniel 4936: *
1.61 daniel 4937: * returns: the list of the xmlElementContentPtr describing the element choices
4938: */
4939: xmlElementContentPtr
1.62 daniel 4940: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 4941: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 4942: xmlChar *elem = NULL;
1.61 daniel 4943:
1.97 daniel 4944: GROW;
1.152 daniel 4945: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 4946: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4947: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4948: (NXT(6) == 'A')) {
4949: SKIP(7);
4950: SKIP_BLANKS;
1.91 daniel 4951: SHRINK;
1.152 daniel 4952: if (RAW == ')') {
1.63 daniel 4953: NEXT;
4954: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 4955: if (RAW == '*') {
1.136 daniel 4956: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4957: NEXT;
4958: }
1.63 daniel 4959: return(ret);
4960: }
1.152 daniel 4961: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 4962: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4963: if (ret == NULL) return(NULL);
1.99 daniel 4964: }
1.152 daniel 4965: while (RAW == '|') {
1.64 daniel 4966: NEXT;
1.61 daniel 4967: if (elem == NULL) {
4968: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4969: if (ret == NULL) return(NULL);
4970: ret->c1 = cur;
1.64 daniel 4971: cur = ret;
1.61 daniel 4972: } else {
1.64 daniel 4973: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4974: if (n == NULL) return(NULL);
4975: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4976: cur->c2 = n;
4977: cur = n;
1.119 daniel 4978: xmlFree(elem);
1.61 daniel 4979: }
4980: SKIP_BLANKS;
4981: elem = xmlParseName(ctxt);
4982: if (elem == NULL) {
4983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4984: ctxt->sax->error(ctxt->userData,
1.61 daniel 4985: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 4986: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 4987: ctxt->wellFormed = 0;
4988: xmlFreeElementContent(cur);
4989: return(NULL);
4990: }
4991: SKIP_BLANKS;
1.97 daniel 4992: GROW;
1.61 daniel 4993: }
1.152 daniel 4994: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 4995: if (elem != NULL) {
1.61 daniel 4996: cur->c2 = xmlNewElementContent(elem,
4997: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4998: xmlFree(elem);
1.66 daniel 4999: }
1.65 daniel 5000: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 5001: SKIP(2);
1.61 daniel 5002: } else {
1.119 daniel 5003: if (elem != NULL) xmlFree(elem);
1.61 daniel 5004: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5005: ctxt->sax->error(ctxt->userData,
1.63 daniel 5006: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5007: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5008: ctxt->wellFormed = 0;
5009: xmlFreeElementContent(ret);
5010: return(NULL);
5011: }
5012:
5013: } else {
5014: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5015: ctxt->sax->error(ctxt->userData,
1.61 daniel 5016: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5017: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5018: ctxt->wellFormed = 0;
5019: }
5020: return(ret);
5021: }
5022:
5023: /**
5024: * xmlParseElementChildrenContentDecl:
1.50 daniel 5025: * @ctxt: an XML parser context
5026: *
1.61 daniel 5027: * parse the declaration for a Mixed Element content
5028: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5029: *
1.61 daniel 5030: *
1.22 daniel 5031: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5032: *
5033: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5034: *
5035: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5036: *
5037: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5038: *
1.99 daniel 5039: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5040: * TODO Parameter-entity replacement text must be properly nested
5041: * with parenthetized groups. That is to say, if either of the
5042: * opening or closing parentheses in a choice, seq, or Mixed
5043: * construct is contained in the replacement text for a parameter
5044: * entity, both must be contained in the same replacement text. For
5045: * interoperability, if a parameter-entity reference appears in a
5046: * choice, seq, or Mixed construct, its replacement text should not
5047: * be empty, and neither the first nor last non-blank character of
5048: * the replacement text should be a connector (| or ,).
5049: *
1.62 daniel 5050: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5051: * hierarchy.
5052: */
5053: xmlElementContentPtr
1.62 daniel 5054: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5055: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5056: xmlChar *elem;
5057: xmlChar type = 0;
1.62 daniel 5058:
5059: SKIP_BLANKS;
1.94 daniel 5060: GROW;
1.152 daniel 5061: if (RAW == '(') {
1.63 daniel 5062: /* Recurse on first child */
1.62 daniel 5063: NEXT;
5064: SKIP_BLANKS;
5065: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5066: SKIP_BLANKS;
1.101 daniel 5067: GROW;
1.62 daniel 5068: } else {
5069: elem = xmlParseName(ctxt);
5070: if (elem == NULL) {
5071: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5072: ctxt->sax->error(ctxt->userData,
1.62 daniel 5073: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5074: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5075: ctxt->wellFormed = 0;
5076: return(NULL);
5077: }
5078: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5079: GROW;
1.152 daniel 5080: if (RAW == '?') {
1.104 daniel 5081: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5082: NEXT;
1.152 daniel 5083: } else if (RAW == '*') {
1.104 daniel 5084: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5085: NEXT;
1.152 daniel 5086: } else if (RAW == '+') {
1.104 daniel 5087: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5088: NEXT;
5089: } else {
1.104 daniel 5090: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5091: }
1.119 daniel 5092: xmlFree(elem);
1.101 daniel 5093: GROW;
1.62 daniel 5094: }
5095: SKIP_BLANKS;
1.91 daniel 5096: SHRINK;
1.152 daniel 5097: while (RAW != ')') {
1.63 daniel 5098: /*
5099: * Each loop we parse one separator and one element.
5100: */
1.152 daniel 5101: if (RAW == ',') {
1.62 daniel 5102: if (type == 0) type = CUR;
5103:
5104: /*
5105: * Detect "Name | Name , Name" error
5106: */
5107: else if (type != CUR) {
5108: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5109: ctxt->sax->error(ctxt->userData,
1.62 daniel 5110: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5111: type);
1.123 daniel 5112: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5113: ctxt->wellFormed = 0;
1.170 daniel 5114: if ((op != NULL) && (op != ret))
5115: xmlFreeElementContent(op);
5116: if ((last != NULL) && (last != ret))
5117: xmlFreeElementContent(last);
5118: if (ret != NULL)
5119: xmlFreeElementContent(ret);
1.62 daniel 5120: return(NULL);
5121: }
1.64 daniel 5122: NEXT;
1.62 daniel 5123:
1.63 daniel 5124: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5125: if (op == NULL) {
5126: xmlFreeElementContent(ret);
5127: return(NULL);
5128: }
5129: if (last == NULL) {
5130: op->c1 = ret;
1.65 daniel 5131: ret = cur = op;
1.63 daniel 5132: } else {
5133: cur->c2 = op;
5134: op->c1 = last;
5135: cur =op;
1.65 daniel 5136: last = NULL;
1.63 daniel 5137: }
1.152 daniel 5138: } else if (RAW == '|') {
1.62 daniel 5139: if (type == 0) type = CUR;
5140:
5141: /*
1.63 daniel 5142: * Detect "Name , Name | Name" error
1.62 daniel 5143: */
5144: else if (type != CUR) {
5145: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5146: ctxt->sax->error(ctxt->userData,
1.62 daniel 5147: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5148: type);
1.123 daniel 5149: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5150: ctxt->wellFormed = 0;
1.170 daniel 5151: if ((op != NULL) && (op != ret))
5152: xmlFreeElementContent(op);
5153: if ((last != NULL) && (last != ret))
5154: xmlFreeElementContent(last);
5155: if (ret != NULL)
5156: xmlFreeElementContent(ret);
1.62 daniel 5157: return(NULL);
5158: }
1.64 daniel 5159: NEXT;
1.62 daniel 5160:
1.63 daniel 5161: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5162: if (op == NULL) {
1.170 daniel 5163: if ((op != NULL) && (op != ret))
5164: xmlFreeElementContent(op);
5165: if ((last != NULL) && (last != ret))
5166: xmlFreeElementContent(last);
5167: if (ret != NULL)
5168: xmlFreeElementContent(ret);
1.63 daniel 5169: return(NULL);
5170: }
5171: if (last == NULL) {
5172: op->c1 = ret;
1.65 daniel 5173: ret = cur = op;
1.63 daniel 5174: } else {
5175: cur->c2 = op;
5176: op->c1 = last;
5177: cur =op;
1.65 daniel 5178: last = NULL;
1.63 daniel 5179: }
1.62 daniel 5180: } else {
5181: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5182: ctxt->sax->error(ctxt->userData,
1.62 daniel 5183: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5184: ctxt->wellFormed = 0;
1.123 daniel 5185: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5186: if ((op != NULL) && (op != ret))
5187: xmlFreeElementContent(op);
5188: if ((last != NULL) && (last != ret))
5189: xmlFreeElementContent(last);
5190: if (ret != NULL)
5191: xmlFreeElementContent(ret);
1.62 daniel 5192: return(NULL);
5193: }
1.101 daniel 5194: GROW;
1.62 daniel 5195: SKIP_BLANKS;
1.101 daniel 5196: GROW;
1.152 daniel 5197: if (RAW == '(') {
1.63 daniel 5198: /* Recurse on second child */
1.62 daniel 5199: NEXT;
5200: SKIP_BLANKS;
1.65 daniel 5201: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5202: SKIP_BLANKS;
5203: } else {
5204: elem = xmlParseName(ctxt);
5205: if (elem == NULL) {
5206: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5207: ctxt->sax->error(ctxt->userData,
1.122 daniel 5208: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5209: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5210: ctxt->wellFormed = 0;
1.170 daniel 5211: if ((op != NULL) && (op != ret))
5212: xmlFreeElementContent(op);
5213: if ((last != NULL) && (last != ret))
5214: xmlFreeElementContent(last);
5215: if (ret != NULL)
5216: xmlFreeElementContent(ret);
1.62 daniel 5217: return(NULL);
5218: }
1.65 daniel 5219: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5220: xmlFree(elem);
1.152 daniel 5221: if (RAW == '?') {
1.105 daniel 5222: last->ocur = XML_ELEMENT_CONTENT_OPT;
5223: NEXT;
1.152 daniel 5224: } else if (RAW == '*') {
1.105 daniel 5225: last->ocur = XML_ELEMENT_CONTENT_MULT;
5226: NEXT;
1.152 daniel 5227: } else if (RAW == '+') {
1.105 daniel 5228: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5229: NEXT;
5230: } else {
5231: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5232: }
1.63 daniel 5233: }
5234: SKIP_BLANKS;
1.97 daniel 5235: GROW;
1.64 daniel 5236: }
1.65 daniel 5237: if ((cur != NULL) && (last != NULL)) {
5238: cur->c2 = last;
1.62 daniel 5239: }
5240: NEXT;
1.152 daniel 5241: if (RAW == '?') {
1.62 daniel 5242: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5243: NEXT;
1.152 daniel 5244: } else if (RAW == '*') {
1.62 daniel 5245: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5246: NEXT;
1.152 daniel 5247: } else if (RAW == '+') {
1.62 daniel 5248: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5249: NEXT;
5250: }
5251: return(ret);
1.61 daniel 5252: }
5253:
5254: /**
5255: * xmlParseElementContentDecl:
5256: * @ctxt: an XML parser context
5257: * @name: the name of the element being defined.
5258: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5259: *
1.61 daniel 5260: * parse the declaration for an Element content either Mixed or Children,
5261: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5262: *
5263: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5264: *
1.61 daniel 5265: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5266: */
5267:
1.61 daniel 5268: int
1.123 daniel 5269: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5270: xmlElementContentPtr *result) {
5271:
5272: xmlElementContentPtr tree = NULL;
5273: int res;
5274:
5275: *result = NULL;
5276:
1.152 daniel 5277: if (RAW != '(') {
1.61 daniel 5278: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5279: ctxt->sax->error(ctxt->userData,
1.61 daniel 5280: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5281: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5282: ctxt->wellFormed = 0;
5283: return(-1);
5284: }
5285: NEXT;
1.97 daniel 5286: GROW;
1.61 daniel 5287: SKIP_BLANKS;
1.152 daniel 5288: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5289: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5290: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5291: (NXT(6) == 'A')) {
1.62 daniel 5292: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5293: res = XML_ELEMENT_TYPE_MIXED;
5294: } else {
1.62 daniel 5295: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5296: res = XML_ELEMENT_TYPE_ELEMENT;
5297: }
5298: SKIP_BLANKS;
1.63 daniel 5299: /****************************
1.152 daniel 5300: if (RAW != ')') {
1.61 daniel 5301: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5302: ctxt->sax->error(ctxt->userData,
1.61 daniel 5303: "xmlParseElementContentDecl : ')' expected\n");
5304: ctxt->wellFormed = 0;
5305: return(-1);
5306: }
1.63 daniel 5307: ****************************/
5308: *result = tree;
1.61 daniel 5309: return(res);
1.22 daniel 5310: }
5311:
1.50 daniel 5312: /**
5313: * xmlParseElementDecl:
5314: * @ctxt: an XML parser context
5315: *
5316: * parse an Element declaration.
1.22 daniel 5317: *
5318: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5319: *
1.99 daniel 5320: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5321: * No element type may be declared more than once
1.69 daniel 5322: *
5323: * Returns the type of the element, or -1 in case of error
1.22 daniel 5324: */
1.59 daniel 5325: int
1.55 daniel 5326: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5327: xmlChar *name;
1.59 daniel 5328: int ret = -1;
1.61 daniel 5329: xmlElementContentPtr content = NULL;
1.22 daniel 5330:
1.97 daniel 5331: GROW;
1.152 daniel 5332: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5333: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5334: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5335: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5336: (NXT(8) == 'T')) {
1.40 daniel 5337: SKIP(9);
1.59 daniel 5338: if (!IS_BLANK(CUR)) {
5339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5340: ctxt->sax->error(ctxt->userData,
1.59 daniel 5341: "Space required after 'ELEMENT'\n");
1.123 daniel 5342: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5343: ctxt->wellFormed = 0;
5344: }
1.42 daniel 5345: SKIP_BLANKS;
1.22 daniel 5346: name = xmlParseName(ctxt);
5347: if (name == NULL) {
1.55 daniel 5348: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5349: ctxt->sax->error(ctxt->userData,
1.59 daniel 5350: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5351: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5352: ctxt->wellFormed = 0;
5353: return(-1);
5354: }
5355: if (!IS_BLANK(CUR)) {
5356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5357: ctxt->sax->error(ctxt->userData,
1.59 daniel 5358: "Space required after the element name\n");
1.123 daniel 5359: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5360: ctxt->wellFormed = 0;
1.22 daniel 5361: }
1.42 daniel 5362: SKIP_BLANKS;
1.152 daniel 5363: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5364: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5365: (NXT(4) == 'Y')) {
5366: SKIP(5);
1.22 daniel 5367: /*
5368: * Element must always be empty.
5369: */
1.59 daniel 5370: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5371: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5372: (NXT(2) == 'Y')) {
5373: SKIP(3);
1.22 daniel 5374: /*
5375: * Element is a generic container.
5376: */
1.59 daniel 5377: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5378: } else if (RAW == '(') {
1.61 daniel 5379: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5380: } else {
1.98 daniel 5381: /*
5382: * [ WFC: PEs in Internal Subset ] error handling.
5383: */
1.152 daniel 5384: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5385: (ctxt->inputNr == 1)) {
5386: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5387: ctxt->sax->error(ctxt->userData,
5388: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5389: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5390: } else {
5391: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5392: ctxt->sax->error(ctxt->userData,
5393: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5394: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5395: }
1.61 daniel 5396: ctxt->wellFormed = 0;
1.119 daniel 5397: if (name != NULL) xmlFree(name);
1.61 daniel 5398: return(-1);
1.22 daniel 5399: }
1.142 daniel 5400:
5401: SKIP_BLANKS;
5402: /*
5403: * Pop-up of finished entities.
5404: */
1.152 daniel 5405: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5406: xmlPopInput(ctxt);
1.42 daniel 5407: SKIP_BLANKS;
1.142 daniel 5408:
1.152 daniel 5409: if (RAW != '>') {
1.55 daniel 5410: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5411: ctxt->sax->error(ctxt->userData,
1.31 daniel 5412: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5413: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5414: ctxt->wellFormed = 0;
1.61 daniel 5415: } else {
1.40 daniel 5416: NEXT;
1.171 ! daniel 5417: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
! 5418: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5419: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5420: content);
1.61 daniel 5421: }
1.84 daniel 5422: if (content != NULL) {
5423: xmlFreeElementContent(content);
5424: }
1.61 daniel 5425: if (name != NULL) {
1.119 daniel 5426: xmlFree(name);
1.61 daniel 5427: }
1.22 daniel 5428: }
1.59 daniel 5429: return(ret);
1.22 daniel 5430: }
5431:
1.50 daniel 5432: /**
5433: * xmlParseMarkupDecl:
5434: * @ctxt: an XML parser context
5435: *
5436: * parse Markup declarations
1.22 daniel 5437: *
5438: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5439: * NotationDecl | PI | Comment
5440: *
1.98 daniel 5441: * [ VC: Proper Declaration/PE Nesting ]
5442: * TODO Parameter-entity replacement text must be properly nested with
5443: * markup declarations. That is to say, if either the first character
5444: * or the last character of a markup declaration (markupdecl above) is
5445: * contained in the replacement text for a parameter-entity reference,
5446: * both must be contained in the same replacement text.
5447: *
5448: * [ WFC: PEs in Internal Subset ]
5449: * In the internal DTD subset, parameter-entity references can occur
5450: * only where markup declarations can occur, not within markup declarations.
5451: * (This does not apply to references that occur in external parameter
5452: * entities or to the external subset.)
1.22 daniel 5453: */
1.55 daniel 5454: void
5455: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5456: GROW;
1.22 daniel 5457: xmlParseElementDecl(ctxt);
5458: xmlParseAttributeListDecl(ctxt);
5459: xmlParseEntityDecl(ctxt);
5460: xmlParseNotationDecl(ctxt);
5461: xmlParsePI(ctxt);
1.114 daniel 5462: xmlParseComment(ctxt);
1.98 daniel 5463: /*
5464: * This is only for internal subset. On external entities,
5465: * the replacement is done before parsing stage
5466: */
5467: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5468: xmlParsePEReference(ctxt);
1.97 daniel 5469: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5470: }
5471:
1.50 daniel 5472: /**
1.76 daniel 5473: * xmlParseTextDecl:
5474: * @ctxt: an XML parser context
5475: *
5476: * parse an XML declaration header for external entities
5477: *
5478: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5479: *
5480: * Returns the only valuable info for an external parsed entity, the encoding
5481: */
5482:
1.123 daniel 5483: xmlChar *
1.76 daniel 5484: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5485: xmlChar *version;
5486: xmlChar *encoding = NULL;
1.76 daniel 5487:
5488: /*
5489: * We know that '<?xml' is here.
5490: */
5491: SKIP(5);
5492:
5493: if (!IS_BLANK(CUR)) {
5494: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5495: ctxt->sax->error(ctxt->userData,
5496: "Space needed after '<?xml'\n");
1.123 daniel 5497: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5498: ctxt->wellFormed = 0;
5499: }
5500: SKIP_BLANKS;
5501:
5502: /*
5503: * We may have the VersionInfo here.
5504: */
5505: version = xmlParseVersionInfo(ctxt);
5506: if (version == NULL)
5507: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.165 daniel 5508: ctxt->input->version = xmlStrdup(version);
1.119 daniel 5509: xmlFree(version);
1.76 daniel 5510:
5511: /*
5512: * We must have the encoding declaration
5513: */
5514: if (!IS_BLANK(CUR)) {
5515: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5516: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 5517: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5518: ctxt->wellFormed = 0;
5519: }
5520: encoding = xmlParseEncodingDecl(ctxt);
5521:
5522: SKIP_BLANKS;
1.152 daniel 5523: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 5524: SKIP(2);
1.152 daniel 5525: } else if (RAW == '>') {
1.76 daniel 5526: /* Deprecated old WD ... */
5527: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5528: ctxt->sax->error(ctxt->userData,
5529: "XML declaration must end-up with '?>'\n");
1.123 daniel 5530: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5531: ctxt->wellFormed = 0;
5532: NEXT;
5533: } else {
5534: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5535: ctxt->sax->error(ctxt->userData,
5536: "parsing XML declaration: '?>' expected\n");
1.123 daniel 5537: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5538: ctxt->wellFormed = 0;
5539: MOVETO_ENDTAG(CUR_PTR);
5540: NEXT;
5541: }
5542: return(encoding);
5543: }
5544:
5545: /*
5546: * xmlParseConditionalSections
5547: * @ctxt: an XML parser context
5548: *
5549: * TODO : Conditionnal section are not yet supported !
5550: *
5551: * [61] conditionalSect ::= includeSect | ignoreSect
5552: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5553: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5554: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5555: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5556: */
5557:
5558: void
5559: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 5560: SKIP(3);
5561: SKIP_BLANKS;
1.168 daniel 5562: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5563: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5564: (NXT(6) == 'E')) {
1.165 daniel 5565: SKIP(7);
1.168 daniel 5566: SKIP_BLANKS;
5567: if (RAW != '[') {
5568: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5569: ctxt->sax->error(ctxt->userData,
5570: "XML conditional section '[' expected\n");
5571: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5572: ctxt->wellFormed = 0;
5573: } else {
5574: NEXT;
5575: }
1.165 daniel 5576: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5577: (NXT(2) != '>'))) {
5578: const xmlChar *check = CUR_PTR;
5579: int cons = ctxt->input->consumed;
5580: int tok = ctxt->token;
5581:
5582: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5583: xmlParseConditionalSections(ctxt);
5584: } else if (IS_BLANK(CUR)) {
5585: NEXT;
5586: } else if (RAW == '%') {
5587: xmlParsePEReference(ctxt);
5588: } else
5589: xmlParseMarkupDecl(ctxt);
5590:
5591: /*
5592: * Pop-up of finished entities.
5593: */
5594: while ((RAW == 0) && (ctxt->inputNr > 1))
5595: xmlPopInput(ctxt);
5596:
5597: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5598: (tok == ctxt->token)) {
5599: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5600: ctxt->sax->error(ctxt->userData,
5601: "Content error in the external subset\n");
5602: ctxt->wellFormed = 0;
5603: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5604: break;
5605: }
5606: }
1.168 daniel 5607: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5608: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 ! daniel 5609: int state;
! 5610:
1.168 daniel 5611: SKIP(6);
5612: SKIP_BLANKS;
5613: if (RAW != '[') {
5614: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5615: ctxt->sax->error(ctxt->userData,
5616: "XML conditional section '[' expected\n");
5617: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5618: ctxt->wellFormed = 0;
5619: } else {
5620: NEXT;
5621: }
1.171 ! daniel 5622:
1.143 daniel 5623: /*
1.171 ! daniel 5624: * Parse up to the end of the conditionnal section
! 5625: * But disable SAX event generating DTD building in the meantime
1.143 daniel 5626: */
1.171 ! daniel 5627: state = ctxt->disableSAX;
1.165 daniel 5628: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5629: (NXT(2) != '>'))) {
1.171 ! daniel 5630: const xmlChar *check = CUR_PTR;
! 5631: int cons = ctxt->input->consumed;
! 5632: int tok = ctxt->token;
! 5633:
! 5634: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
! 5635: xmlParseConditionalSections(ctxt);
! 5636: } else if (IS_BLANK(CUR)) {
! 5637: NEXT;
! 5638: } else if (RAW == '%') {
! 5639: xmlParsePEReference(ctxt);
! 5640: } else
! 5641: xmlParseMarkupDecl(ctxt);
! 5642:
1.165 daniel 5643: /*
5644: * Pop-up of finished entities.
5645: */
5646: while ((RAW == 0) && (ctxt->inputNr > 1))
5647: xmlPopInput(ctxt);
1.143 daniel 5648:
1.171 ! daniel 5649: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
! 5650: (tok == ctxt->token)) {
! 5651: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 5652: ctxt->sax->error(ctxt->userData,
! 5653: "Content error in the external subset\n");
! 5654: ctxt->wellFormed = 0;
! 5655: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
! 5656: break;
! 5657: }
1.165 daniel 5658: }
1.171 ! daniel 5659: ctxt->disableSAX = state;
1.168 daniel 5660: } else {
5661: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5662: ctxt->sax->error(ctxt->userData,
5663: "XML conditional section INCLUDE or IGNORE keyword expected\n");
5664: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5665: ctxt->wellFormed = 0;
1.143 daniel 5666: }
5667:
1.152 daniel 5668: if (RAW == 0)
1.143 daniel 5669: SHRINK;
5670:
1.152 daniel 5671: if (RAW == 0) {
1.76 daniel 5672: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5673: ctxt->sax->error(ctxt->userData,
5674: "XML conditional section not closed\n");
1.123 daniel 5675: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 5676: ctxt->wellFormed = 0;
1.143 daniel 5677: } else {
5678: SKIP(3);
1.76 daniel 5679: }
5680: }
5681:
5682: /**
1.124 daniel 5683: * xmlParseExternalSubset:
1.76 daniel 5684: * @ctxt: an XML parser context
1.124 daniel 5685: * @ExternalID: the external identifier
5686: * @SystemID: the system identifier (or URL)
1.76 daniel 5687: *
5688: * parse Markup declarations from an external subset
5689: *
5690: * [30] extSubset ::= textDecl? extSubsetDecl
5691: *
5692: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5693: */
5694: void
1.123 daniel 5695: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5696: const xmlChar *SystemID) {
1.132 daniel 5697: GROW;
1.152 daniel 5698: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 5699: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5700: (NXT(4) == 'l')) {
1.134 daniel 5701: xmlChar *decl;
5702:
5703: decl = xmlParseTextDecl(ctxt);
5704: if (decl != NULL)
5705: xmlFree(decl);
1.76 daniel 5706: }
1.79 daniel 5707: if (ctxt->myDoc == NULL) {
1.116 daniel 5708: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 5709: }
5710: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5711: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5712:
1.96 daniel 5713: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 5714: ctxt->external = 1;
1.152 daniel 5715: while (((RAW == '<') && (NXT(1) == '?')) ||
5716: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 5717: IS_BLANK(CUR)) {
1.123 daniel 5718: const xmlChar *check = CUR_PTR;
1.115 daniel 5719: int cons = ctxt->input->consumed;
1.164 daniel 5720: int tok = ctxt->token;
1.115 daniel 5721:
1.152 daniel 5722: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 5723: xmlParseConditionalSections(ctxt);
5724: } else if (IS_BLANK(CUR)) {
5725: NEXT;
1.152 daniel 5726: } else if (RAW == '%') {
1.76 daniel 5727: xmlParsePEReference(ctxt);
5728: } else
5729: xmlParseMarkupDecl(ctxt);
1.77 daniel 5730:
5731: /*
5732: * Pop-up of finished entities.
5733: */
1.166 daniel 5734: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 5735: xmlPopInput(ctxt);
5736:
1.164 daniel 5737: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5738: (tok == ctxt->token)) {
1.115 daniel 5739: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5740: ctxt->sax->error(ctxt->userData,
5741: "Content error in the external subset\n");
5742: ctxt->wellFormed = 0;
1.123 daniel 5743: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 5744: break;
5745: }
1.76 daniel 5746: }
5747:
1.152 daniel 5748: if (RAW != 0) {
1.76 daniel 5749: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5750: ctxt->sax->error(ctxt->userData,
5751: "Extra content at the end of the document\n");
1.123 daniel 5752: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 5753: ctxt->wellFormed = 0;
5754: }
5755:
5756: }
5757:
5758: /**
1.77 daniel 5759: * xmlParseReference:
5760: * @ctxt: an XML parser context
5761: *
5762: * parse and handle entity references in content, depending on the SAX
5763: * interface, this may end-up in a call to character() if this is a
1.79 daniel 5764: * CharRef, a predefined entity, if there is no reference() callback.
5765: * or if the parser was asked to switch to that mode.
1.77 daniel 5766: *
5767: * [67] Reference ::= EntityRef | CharRef
5768: */
5769: void
5770: xmlParseReference(xmlParserCtxtPtr ctxt) {
5771: xmlEntityPtr ent;
1.123 daniel 5772: xmlChar *val;
1.152 daniel 5773: if (RAW != '&') return;
1.77 daniel 5774:
1.113 daniel 5775: if (ctxt->inputNr > 1) {
1.123 daniel 5776: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 5777:
1.171 ! daniel 5778: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
! 5779: (!ctxt->disableSAX))
1.113 daniel 5780: ctxt->sax->characters(ctxt->userData, cur, 1);
5781: if (ctxt->token == '&')
5782: ctxt->token = 0;
5783: else {
5784: SKIP(1);
5785: }
5786: return;
5787: }
1.77 daniel 5788: if (NXT(1) == '#') {
1.152 daniel 5789: int i = 0;
1.153 daniel 5790: xmlChar out[10];
5791: int hex = NXT(2);
1.77 daniel 5792: int val = xmlParseCharRef(ctxt);
1.152 daniel 5793:
1.153 daniel 5794: if (ctxt->encoding != NULL) {
5795: /*
5796: * So we are using non-UTF-8 buffers
5797: * Check that the char fit on 8bits, if not
5798: * generate a CharRef.
5799: */
5800: if (val <= 0xFF) {
5801: out[0] = val;
5802: out[1] = 0;
1.171 ! daniel 5803: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
! 5804: (!ctxt->disableSAX))
1.153 daniel 5805: ctxt->sax->characters(ctxt->userData, out, 1);
5806: } else {
5807: if ((hex == 'x') || (hex == 'X'))
5808: sprintf((char *)out, "#x%X", val);
5809: else
5810: sprintf((char *)out, "#%d", val);
1.171 ! daniel 5811: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
! 5812: (!ctxt->disableSAX))
1.153 daniel 5813: ctxt->sax->reference(ctxt->userData, out);
5814: }
5815: } else {
5816: /*
5817: * Just encode the value in UTF-8
5818: */
5819: COPY_BUF(0 ,out, i, val);
5820: out[i] = 0;
1.171 ! daniel 5821: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
! 5822: (!ctxt->disableSAX))
1.153 daniel 5823: ctxt->sax->characters(ctxt->userData, out, i);
5824: }
1.77 daniel 5825: } else {
5826: ent = xmlParseEntityRef(ctxt);
5827: if (ent == NULL) return;
5828: if ((ent->name != NULL) &&
1.159 daniel 5829: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.113 daniel 5830: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 ! daniel 5831: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 5832: /*
5833: * Create a node.
5834: */
5835: ctxt->sax->reference(ctxt->userData, ent->name);
5836: return;
5837: } else if (ctxt->replaceEntities) {
5838: xmlParserInputPtr input;
1.79 daniel 5839:
1.113 daniel 5840: input = xmlNewEntityInputStream(ctxt, ent);
5841: xmlPushInput(ctxt, input);
1.167 daniel 5842: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5843: (RAW == '<') && (NXT(1) == '?') &&
5844: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5845: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5846: xmlParseXMLDecl(ctxt);
5847: if (input->standalone) {
5848: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5849: ctxt->sax->error(ctxt->userData,
5850: "external parsed entities cannot be standalone\n");
5851: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5852: ctxt->wellFormed = 0;
5853: }
5854: }
1.113 daniel 5855: return;
5856: }
1.77 daniel 5857: }
5858: val = ent->content;
5859: if (val == NULL) return;
5860: /*
5861: * inline the entity.
5862: */
1.171 ! daniel 5863: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
! 5864: (!ctxt->disableSAX))
1.77 daniel 5865: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5866: }
1.24 daniel 5867: }
5868:
1.50 daniel 5869: /**
5870: * xmlParseEntityRef:
5871: * @ctxt: an XML parser context
5872: *
5873: * parse ENTITY references declarations
1.24 daniel 5874: *
5875: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 5876: *
1.98 daniel 5877: * [ WFC: Entity Declared ]
5878: * In a document without any DTD, a document with only an internal DTD
5879: * subset which contains no parameter entity references, or a document
5880: * with "standalone='yes'", the Name given in the entity reference
5881: * must match that in an entity declaration, except that well-formed
5882: * documents need not declare any of the following entities: amp, lt,
5883: * gt, apos, quot. The declaration of a parameter entity must precede
5884: * any reference to it. Similarly, the declaration of a general entity
5885: * must precede any reference to it which appears in a default value in an
5886: * attribute-list declaration. Note that if entities are declared in the
5887: * external subset or in external parameter entities, a non-validating
5888: * processor is not obligated to read and process their declarations;
5889: * for such documents, the rule that an entity must be declared is a
5890: * well-formedness constraint only if standalone='yes'.
5891: *
5892: * [ WFC: Parsed Entity ]
5893: * An entity reference must not contain the name of an unparsed entity
5894: *
1.77 daniel 5895: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 5896: */
1.77 daniel 5897: xmlEntityPtr
1.55 daniel 5898: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 5899: xmlChar *name;
1.72 daniel 5900: xmlEntityPtr ent = NULL;
1.24 daniel 5901:
1.91 daniel 5902: GROW;
1.111 daniel 5903:
1.152 daniel 5904: if (RAW == '&') {
1.40 daniel 5905: NEXT;
1.24 daniel 5906: name = xmlParseName(ctxt);
5907: if (name == NULL) {
1.55 daniel 5908: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5909: ctxt->sax->error(ctxt->userData,
5910: "xmlParseEntityRef: no name\n");
1.123 daniel 5911: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5912: ctxt->wellFormed = 0;
1.24 daniel 5913: } else {
1.152 daniel 5914: if (RAW == ';') {
1.40 daniel 5915: NEXT;
1.24 daniel 5916: /*
1.77 daniel 5917: * Ask first SAX for entity resolution, otherwise try the
5918: * predefined set.
5919: */
5920: if (ctxt->sax != NULL) {
5921: if (ctxt->sax->getEntity != NULL)
5922: ent = ctxt->sax->getEntity(ctxt->userData, name);
5923: if (ent == NULL)
5924: ent = xmlGetPredefinedEntity(name);
5925: }
5926: /*
1.98 daniel 5927: * [ WFC: Entity Declared ]
5928: * In a document without any DTD, a document with only an
5929: * internal DTD subset which contains no parameter entity
5930: * references, or a document with "standalone='yes'", the
5931: * Name given in the entity reference must match that in an
5932: * entity declaration, except that well-formed documents
5933: * need not declare any of the following entities: amp, lt,
5934: * gt, apos, quot.
5935: * The declaration of a parameter entity must precede any
5936: * reference to it.
5937: * Similarly, the declaration of a general entity must
5938: * precede any reference to it which appears in a default
5939: * value in an attribute-list declaration. Note that if
5940: * entities are declared in the external subset or in
5941: * external parameter entities, a non-validating processor
5942: * is not obligated to read and process their declarations;
5943: * for such documents, the rule that an entity must be
5944: * declared is a well-formedness constraint only if
5945: * standalone='yes'.
1.59 daniel 5946: */
1.77 daniel 5947: if (ent == NULL) {
1.98 daniel 5948: if ((ctxt->standalone == 1) ||
5949: ((ctxt->hasExternalSubset == 0) &&
5950: (ctxt->hasPErefs == 0))) {
5951: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 5952: ctxt->sax->error(ctxt->userData,
5953: "Entity '%s' not defined\n", name);
1.123 daniel 5954: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 5955: ctxt->wellFormed = 0;
5956: } else {
1.98 daniel 5957: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5958: ctxt->sax->warning(ctxt->userData,
5959: "Entity '%s' not defined\n", name);
1.123 daniel 5960: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 5961: }
1.77 daniel 5962: }
1.59 daniel 5963:
5964: /*
1.98 daniel 5965: * [ WFC: Parsed Entity ]
5966: * An entity reference must not contain the name of an
5967: * unparsed entity
5968: */
1.159 daniel 5969: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 5970: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5971: ctxt->sax->error(ctxt->userData,
5972: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 5973: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 5974: ctxt->wellFormed = 0;
5975: }
5976:
5977: /*
5978: * [ WFC: No External Entity References ]
5979: * Attribute values cannot contain direct or indirect
5980: * entity references to external entities.
5981: */
5982: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5983: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 5984: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5985: ctxt->sax->error(ctxt->userData,
5986: "Attribute references external entity '%s'\n", name);
1.123 daniel 5987: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 5988: ctxt->wellFormed = 0;
5989: }
5990: /*
5991: * [ WFC: No < in Attribute Values ]
5992: * The replacement text of any entity referred to directly or
5993: * indirectly in an attribute value (other than "<") must
5994: * not contain a <.
1.59 daniel 5995: */
1.98 daniel 5996: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 5997: (ent != NULL) &&
5998: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 5999: (ent->content != NULL) &&
6000: (xmlStrchr(ent->content, '<'))) {
6001: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6002: ctxt->sax->error(ctxt->userData,
6003: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6004: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6005: ctxt->wellFormed = 0;
6006: }
6007:
6008: /*
6009: * Internal check, no parameter entities here ...
6010: */
6011: else {
1.159 daniel 6012: switch (ent->etype) {
1.59 daniel 6013: case XML_INTERNAL_PARAMETER_ENTITY:
6014: case XML_EXTERNAL_PARAMETER_ENTITY:
6015: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6016: ctxt->sax->error(ctxt->userData,
1.59 daniel 6017: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6018: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6019: ctxt->wellFormed = 0;
6020: break;
6021: }
6022: }
6023:
6024: /*
1.98 daniel 6025: * [ WFC: No Recursion ]
1.117 daniel 6026: * TODO A parsed entity must not contain a recursive reference
6027: * to itself, either directly or indirectly.
1.59 daniel 6028: */
1.77 daniel 6029:
1.24 daniel 6030: } else {
1.55 daniel 6031: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6032: ctxt->sax->error(ctxt->userData,
1.59 daniel 6033: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6034: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6035: ctxt->wellFormed = 0;
1.24 daniel 6036: }
1.119 daniel 6037: xmlFree(name);
1.24 daniel 6038: }
6039: }
1.77 daniel 6040: return(ent);
1.24 daniel 6041: }
1.135 daniel 6042: /**
6043: * xmlParseStringEntityRef:
6044: * @ctxt: an XML parser context
6045: * @str: a pointer to an index in the string
6046: *
6047: * parse ENTITY references declarations, but this version parses it from
6048: * a string value.
6049: *
6050: * [68] EntityRef ::= '&' Name ';'
6051: *
6052: * [ WFC: Entity Declared ]
6053: * In a document without any DTD, a document with only an internal DTD
6054: * subset which contains no parameter entity references, or a document
6055: * with "standalone='yes'", the Name given in the entity reference
6056: * must match that in an entity declaration, except that well-formed
6057: * documents need not declare any of the following entities: amp, lt,
6058: * gt, apos, quot. The declaration of a parameter entity must precede
6059: * any reference to it. Similarly, the declaration of a general entity
6060: * must precede any reference to it which appears in a default value in an
6061: * attribute-list declaration. Note that if entities are declared in the
6062: * external subset or in external parameter entities, a non-validating
6063: * processor is not obligated to read and process their declarations;
6064: * for such documents, the rule that an entity must be declared is a
6065: * well-formedness constraint only if standalone='yes'.
6066: *
6067: * [ WFC: Parsed Entity ]
6068: * An entity reference must not contain the name of an unparsed entity
6069: *
6070: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6071: * is updated to the current location in the string.
6072: */
6073: xmlEntityPtr
6074: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6075: xmlChar *name;
6076: const xmlChar *ptr;
6077: xmlChar cur;
6078: xmlEntityPtr ent = NULL;
6079:
6080: GROW;
6081:
1.156 daniel 6082: if ((str == NULL) || (*str == NULL))
6083: return(NULL);
1.135 daniel 6084: ptr = *str;
6085: cur = *ptr;
6086: if (cur == '&') {
6087: ptr++;
6088: cur = *ptr;
6089: name = xmlParseStringName(ctxt, &ptr);
6090: if (name == NULL) {
6091: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6092: ctxt->sax->error(ctxt->userData,
6093: "xmlParseEntityRef: no name\n");
6094: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6095: ctxt->wellFormed = 0;
6096: } else {
1.152 daniel 6097: if (RAW == ';') {
1.135 daniel 6098: NEXT;
6099: /*
6100: * Ask first SAX for entity resolution, otherwise try the
6101: * predefined set.
6102: */
6103: if (ctxt->sax != NULL) {
6104: if (ctxt->sax->getEntity != NULL)
6105: ent = ctxt->sax->getEntity(ctxt->userData, name);
6106: if (ent == NULL)
6107: ent = xmlGetPredefinedEntity(name);
6108: }
6109: /*
6110: * [ WFC: Entity Declared ]
6111: * In a document without any DTD, a document with only an
6112: * internal DTD subset which contains no parameter entity
6113: * references, or a document with "standalone='yes'", the
6114: * Name given in the entity reference must match that in an
6115: * entity declaration, except that well-formed documents
6116: * need not declare any of the following entities: amp, lt,
6117: * gt, apos, quot.
6118: * The declaration of a parameter entity must precede any
6119: * reference to it.
6120: * Similarly, the declaration of a general entity must
6121: * precede any reference to it which appears in a default
6122: * value in an attribute-list declaration. Note that if
6123: * entities are declared in the external subset or in
6124: * external parameter entities, a non-validating processor
6125: * is not obligated to read and process their declarations;
6126: * for such documents, the rule that an entity must be
6127: * declared is a well-formedness constraint only if
6128: * standalone='yes'.
6129: */
6130: if (ent == NULL) {
6131: if ((ctxt->standalone == 1) ||
6132: ((ctxt->hasExternalSubset == 0) &&
6133: (ctxt->hasPErefs == 0))) {
6134: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6135: ctxt->sax->error(ctxt->userData,
6136: "Entity '%s' not defined\n", name);
6137: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6138: ctxt->wellFormed = 0;
6139: } else {
6140: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6141: ctxt->sax->warning(ctxt->userData,
6142: "Entity '%s' not defined\n", name);
6143: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6144: }
6145: }
6146:
6147: /*
6148: * [ WFC: Parsed Entity ]
6149: * An entity reference must not contain the name of an
6150: * unparsed entity
6151: */
1.159 daniel 6152: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6153: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6154: ctxt->sax->error(ctxt->userData,
6155: "Entity reference to unparsed entity %s\n", name);
6156: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6157: ctxt->wellFormed = 0;
6158: }
6159:
6160: /*
6161: * [ WFC: No External Entity References ]
6162: * Attribute values cannot contain direct or indirect
6163: * entity references to external entities.
6164: */
6165: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6166: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6167: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6168: ctxt->sax->error(ctxt->userData,
6169: "Attribute references external entity '%s'\n", name);
6170: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6171: ctxt->wellFormed = 0;
6172: }
6173: /*
6174: * [ WFC: No < in Attribute Values ]
6175: * The replacement text of any entity referred to directly or
6176: * indirectly in an attribute value (other than "<") must
6177: * not contain a <.
6178: */
6179: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6180: (ent != NULL) &&
6181: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6182: (ent->content != NULL) &&
6183: (xmlStrchr(ent->content, '<'))) {
6184: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6185: ctxt->sax->error(ctxt->userData,
6186: "'<' in entity '%s' is not allowed in attributes values\n", name);
6187: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6188: ctxt->wellFormed = 0;
6189: }
6190:
6191: /*
6192: * Internal check, no parameter entities here ...
6193: */
6194: else {
1.159 daniel 6195: switch (ent->etype) {
1.135 daniel 6196: case XML_INTERNAL_PARAMETER_ENTITY:
6197: case XML_EXTERNAL_PARAMETER_ENTITY:
6198: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6199: ctxt->sax->error(ctxt->userData,
6200: "Attempt to reference the parameter entity '%s'\n", name);
6201: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6202: ctxt->wellFormed = 0;
6203: break;
6204: }
6205: }
6206:
6207: /*
6208: * [ WFC: No Recursion ]
6209: * TODO A parsed entity must not contain a recursive reference
6210: * to itself, either directly or indirectly.
6211: */
6212:
6213: } else {
6214: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6215: ctxt->sax->error(ctxt->userData,
6216: "xmlParseEntityRef: expecting ';'\n");
6217: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6218: ctxt->wellFormed = 0;
6219: }
6220: xmlFree(name);
6221: }
6222: }
6223: return(ent);
6224: }
1.24 daniel 6225:
1.50 daniel 6226: /**
6227: * xmlParsePEReference:
6228: * @ctxt: an XML parser context
6229: *
6230: * parse PEReference declarations
1.77 daniel 6231: * The entity content is handled directly by pushing it's content as
6232: * a new input stream.
1.22 daniel 6233: *
6234: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6235: *
1.98 daniel 6236: * [ WFC: No Recursion ]
6237: * TODO A parsed entity must not contain a recursive
6238: * reference to itself, either directly or indirectly.
6239: *
6240: * [ WFC: Entity Declared ]
6241: * In a document without any DTD, a document with only an internal DTD
6242: * subset which contains no parameter entity references, or a document
6243: * with "standalone='yes'", ... ... The declaration of a parameter
6244: * entity must precede any reference to it...
6245: *
6246: * [ VC: Entity Declared ]
6247: * In a document with an external subset or external parameter entities
6248: * with "standalone='no'", ... ... The declaration of a parameter entity
6249: * must precede any reference to it...
6250: *
6251: * [ WFC: In DTD ]
6252: * Parameter-entity references may only appear in the DTD.
6253: * NOTE: misleading but this is handled.
1.22 daniel 6254: */
1.77 daniel 6255: void
1.55 daniel 6256: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6257: xmlChar *name;
1.72 daniel 6258: xmlEntityPtr entity = NULL;
1.50 daniel 6259: xmlParserInputPtr input;
1.22 daniel 6260:
1.152 daniel 6261: if (RAW == '%') {
1.40 daniel 6262: NEXT;
1.22 daniel 6263: name = xmlParseName(ctxt);
6264: if (name == NULL) {
1.55 daniel 6265: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6266: ctxt->sax->error(ctxt->userData,
6267: "xmlParsePEReference: no name\n");
1.123 daniel 6268: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6269: ctxt->wellFormed = 0;
1.22 daniel 6270: } else {
1.152 daniel 6271: if (RAW == ';') {
1.40 daniel 6272: NEXT;
1.98 daniel 6273: if ((ctxt->sax != NULL) &&
6274: (ctxt->sax->getParameterEntity != NULL))
6275: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6276: name);
1.45 daniel 6277: if (entity == NULL) {
1.98 daniel 6278: /*
6279: * [ WFC: Entity Declared ]
6280: * In a document without any DTD, a document with only an
6281: * internal DTD subset which contains no parameter entity
6282: * references, or a document with "standalone='yes'", ...
6283: * ... The declaration of a parameter entity must precede
6284: * any reference to it...
6285: */
6286: if ((ctxt->standalone == 1) ||
6287: ((ctxt->hasExternalSubset == 0) &&
6288: (ctxt->hasPErefs == 0))) {
6289: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6290: ctxt->sax->error(ctxt->userData,
6291: "PEReference: %%%s; not found\n", name);
1.123 daniel 6292: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 6293: ctxt->wellFormed = 0;
6294: } else {
6295: /*
6296: * [ VC: Entity Declared ]
6297: * In a document with an external subset or external
6298: * parameter entities with "standalone='no'", ...
6299: * ... The declaration of a parameter entity must precede
6300: * any reference to it...
6301: */
6302: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6303: ctxt->sax->warning(ctxt->userData,
6304: "PEReference: %%%s; not found\n", name);
6305: ctxt->valid = 0;
6306: }
1.50 daniel 6307: } else {
1.98 daniel 6308: /*
6309: * Internal checking in case the entity quest barfed
6310: */
1.159 daniel 6311: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6312: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 6313: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6314: ctxt->sax->warning(ctxt->userData,
6315: "Internal: %%%s; is not a parameter entity\n", name);
6316: } else {
1.164 daniel 6317: /*
6318: * TODO !!!
6319: * handle the extra spaces added before and after
6320: * c.f. http://www.w3.org/TR/REC-xml#as-PE
6321: */
1.98 daniel 6322: input = xmlNewEntityInputStream(ctxt, entity);
6323: xmlPushInput(ctxt, input);
1.164 daniel 6324: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6325: (RAW == '<') && (NXT(1) == '?') &&
6326: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6327: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6328: xmlParseXMLDecl(ctxt);
6329: }
6330: if (ctxt->token == 0)
6331: ctxt->token = ' ';
1.98 daniel 6332: }
1.45 daniel 6333: }
1.98 daniel 6334: ctxt->hasPErefs = 1;
1.22 daniel 6335: } else {
1.55 daniel 6336: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6337: ctxt->sax->error(ctxt->userData,
1.59 daniel 6338: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 6339: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6340: ctxt->wellFormed = 0;
1.22 daniel 6341: }
1.119 daniel 6342: xmlFree(name);
1.3 veillard 6343: }
6344: }
6345: }
6346:
1.50 daniel 6347: /**
1.135 daniel 6348: * xmlParseStringPEReference:
6349: * @ctxt: an XML parser context
6350: * @str: a pointer to an index in the string
6351: *
6352: * parse PEReference declarations
6353: *
6354: * [69] PEReference ::= '%' Name ';'
6355: *
6356: * [ WFC: No Recursion ]
6357: * TODO A parsed entity must not contain a recursive
6358: * reference to itself, either directly or indirectly.
6359: *
6360: * [ WFC: Entity Declared ]
6361: * In a document without any DTD, a document with only an internal DTD
6362: * subset which contains no parameter entity references, or a document
6363: * with "standalone='yes'", ... ... The declaration of a parameter
6364: * entity must precede any reference to it...
6365: *
6366: * [ VC: Entity Declared ]
6367: * In a document with an external subset or external parameter entities
6368: * with "standalone='no'", ... ... The declaration of a parameter entity
6369: * must precede any reference to it...
6370: *
6371: * [ WFC: In DTD ]
6372: * Parameter-entity references may only appear in the DTD.
6373: * NOTE: misleading but this is handled.
6374: *
6375: * Returns the string of the entity content.
6376: * str is updated to the current value of the index
6377: */
6378: xmlEntityPtr
6379: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6380: const xmlChar *ptr;
6381: xmlChar cur;
6382: xmlChar *name;
6383: xmlEntityPtr entity = NULL;
6384:
6385: if ((str == NULL) || (*str == NULL)) return(NULL);
6386: ptr = *str;
6387: cur = *ptr;
6388: if (cur == '%') {
6389: ptr++;
6390: cur = *ptr;
6391: name = xmlParseStringName(ctxt, &ptr);
6392: if (name == NULL) {
6393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6394: ctxt->sax->error(ctxt->userData,
6395: "xmlParseStringPEReference: no name\n");
6396: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6397: ctxt->wellFormed = 0;
6398: } else {
6399: cur = *ptr;
6400: if (cur == ';') {
6401: ptr++;
6402: cur = *ptr;
6403: if ((ctxt->sax != NULL) &&
6404: (ctxt->sax->getParameterEntity != NULL))
6405: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6406: name);
6407: if (entity == NULL) {
6408: /*
6409: * [ WFC: Entity Declared ]
6410: * In a document without any DTD, a document with only an
6411: * internal DTD subset which contains no parameter entity
6412: * references, or a document with "standalone='yes'", ...
6413: * ... The declaration of a parameter entity must precede
6414: * any reference to it...
6415: */
6416: if ((ctxt->standalone == 1) ||
6417: ((ctxt->hasExternalSubset == 0) &&
6418: (ctxt->hasPErefs == 0))) {
6419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6420: ctxt->sax->error(ctxt->userData,
6421: "PEReference: %%%s; not found\n", name);
6422: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6423: ctxt->wellFormed = 0;
6424: } else {
6425: /*
6426: * [ VC: Entity Declared ]
6427: * In a document with an external subset or external
6428: * parameter entities with "standalone='no'", ...
6429: * ... The declaration of a parameter entity must
6430: * precede any reference to it...
6431: */
6432: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6433: ctxt->sax->warning(ctxt->userData,
6434: "PEReference: %%%s; not found\n", name);
6435: ctxt->valid = 0;
6436: }
6437: } else {
6438: /*
6439: * Internal checking in case the entity quest barfed
6440: */
1.159 daniel 6441: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6442: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 6443: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6444: ctxt->sax->warning(ctxt->userData,
6445: "Internal: %%%s; is not a parameter entity\n", name);
6446: }
6447: }
6448: ctxt->hasPErefs = 1;
6449: } else {
6450: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6451: ctxt->sax->error(ctxt->userData,
6452: "xmlParseStringPEReference: expecting ';'\n");
6453: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6454: ctxt->wellFormed = 0;
6455: }
6456: xmlFree(name);
6457: }
6458: }
6459: *str = ptr;
6460: return(entity);
6461: }
6462:
6463: /**
1.50 daniel 6464: * xmlParseDocTypeDecl :
6465: * @ctxt: an XML parser context
6466: *
6467: * parse a DOCTYPE declaration
1.21 daniel 6468: *
1.22 daniel 6469: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6470: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 6471: *
6472: * [ VC: Root Element Type ]
1.99 daniel 6473: * The Name in the document type declaration must match the element
1.98 daniel 6474: * type of the root element.
1.21 daniel 6475: */
6476:
1.55 daniel 6477: void
6478: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 6479: xmlChar *name = NULL;
1.123 daniel 6480: xmlChar *ExternalID = NULL;
6481: xmlChar *URI = NULL;
1.21 daniel 6482:
6483: /*
6484: * We know that '<!DOCTYPE' has been detected.
6485: */
1.40 daniel 6486: SKIP(9);
1.21 daniel 6487:
1.42 daniel 6488: SKIP_BLANKS;
1.21 daniel 6489:
6490: /*
6491: * Parse the DOCTYPE name.
6492: */
6493: name = xmlParseName(ctxt);
6494: if (name == NULL) {
1.55 daniel 6495: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6496: ctxt->sax->error(ctxt->userData,
6497: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 6498: ctxt->wellFormed = 0;
1.123 daniel 6499: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 6500: }
1.165 daniel 6501: ctxt->intSubName = name;
1.21 daniel 6502:
1.42 daniel 6503: SKIP_BLANKS;
1.21 daniel 6504:
6505: /*
1.22 daniel 6506: * Check for SystemID and ExternalID
6507: */
1.67 daniel 6508: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 6509:
6510: if ((URI != NULL) || (ExternalID != NULL)) {
6511: ctxt->hasExternalSubset = 1;
6512: }
1.165 daniel 6513: ctxt->extSubURI = URI;
6514: ctxt->extSubSystem = ExternalID;
1.98 daniel 6515:
1.42 daniel 6516: SKIP_BLANKS;
1.36 daniel 6517:
1.76 daniel 6518: /*
1.165 daniel 6519: * Create and update the internal subset.
1.76 daniel 6520: */
1.171 ! daniel 6521: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
! 6522: (!ctxt->disableSAX))
1.74 daniel 6523: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 6524:
6525: /*
1.140 daniel 6526: * Is there any internal subset declarations ?
6527: * they are handled separately in xmlParseInternalSubset()
6528: */
1.152 daniel 6529: if (RAW == '[')
1.140 daniel 6530: return;
6531:
6532: /*
6533: * We should be at the end of the DOCTYPE declaration.
6534: */
1.152 daniel 6535: if (RAW != '>') {
1.140 daniel 6536: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6537: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6538: ctxt->wellFormed = 0;
6539: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6540: }
6541: NEXT;
6542: }
6543:
6544: /**
6545: * xmlParseInternalsubset :
6546: * @ctxt: an XML parser context
6547: *
6548: * parse the internal subset declaration
6549: *
6550: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6551: */
6552:
6553: void
6554: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6555: /*
1.22 daniel 6556: * Is there any DTD definition ?
6557: */
1.152 daniel 6558: if (RAW == '[') {
1.96 daniel 6559: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 6560: NEXT;
1.22 daniel 6561: /*
6562: * Parse the succession of Markup declarations and
6563: * PEReferences.
6564: * Subsequence (markupdecl | PEReference | S)*
6565: */
1.152 daniel 6566: while (RAW != ']') {
1.123 daniel 6567: const xmlChar *check = CUR_PTR;
1.115 daniel 6568: int cons = ctxt->input->consumed;
1.22 daniel 6569:
1.42 daniel 6570: SKIP_BLANKS;
1.22 daniel 6571: xmlParseMarkupDecl(ctxt);
1.50 daniel 6572: xmlParsePEReference(ctxt);
1.22 daniel 6573:
1.115 daniel 6574: /*
6575: * Pop-up of finished entities.
6576: */
1.152 daniel 6577: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 6578: xmlPopInput(ctxt);
6579:
1.118 daniel 6580: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 6581: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6582: ctxt->sax->error(ctxt->userData,
1.140 daniel 6583: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 6584: ctxt->wellFormed = 0;
1.123 daniel 6585: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 6586: break;
6587: }
6588: }
1.152 daniel 6589: if (RAW == ']') NEXT;
1.22 daniel 6590: }
6591:
6592: /*
6593: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 6594: */
1.152 daniel 6595: if (RAW != '>') {
1.55 daniel 6596: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6597: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 6598: ctxt->wellFormed = 0;
1.123 daniel 6599: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 6600: }
1.40 daniel 6601: NEXT;
1.21 daniel 6602: }
6603:
1.50 daniel 6604: /**
6605: * xmlParseAttribute:
6606: * @ctxt: an XML parser context
1.123 daniel 6607: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 6608: *
6609: * parse an attribute
1.3 veillard 6610: *
1.22 daniel 6611: * [41] Attribute ::= Name Eq AttValue
6612: *
1.98 daniel 6613: * [ WFC: No External Entity References ]
6614: * Attribute values cannot contain direct or indirect entity references
6615: * to external entities.
6616: *
6617: * [ WFC: No < in Attribute Values ]
6618: * The replacement text of any entity referred to directly or indirectly in
6619: * an attribute value (other than "<") must not contain a <.
6620: *
6621: * [ VC: Attribute Value Type ]
1.117 daniel 6622: * The attribute must have been declared; the value must be of the type
1.99 daniel 6623: * declared for it.
1.98 daniel 6624: *
1.22 daniel 6625: * [25] Eq ::= S? '=' S?
6626: *
1.29 daniel 6627: * With namespace:
6628: *
6629: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 6630: *
6631: * Also the case QName == xmlns:??? is handled independently as a namespace
6632: * definition.
1.69 daniel 6633: *
1.72 daniel 6634: * Returns the attribute name, and the value in *value.
1.3 veillard 6635: */
6636:
1.123 daniel 6637: xmlChar *
6638: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6639: xmlChar *name, *val;
1.3 veillard 6640:
1.72 daniel 6641: *value = NULL;
6642: name = xmlParseName(ctxt);
1.22 daniel 6643: if (name == NULL) {
1.55 daniel 6644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6645: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 6646: ctxt->wellFormed = 0;
1.123 daniel 6647: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 6648: return(NULL);
1.3 veillard 6649: }
6650:
6651: /*
1.29 daniel 6652: * read the value
1.3 veillard 6653: */
1.42 daniel 6654: SKIP_BLANKS;
1.152 daniel 6655: if (RAW == '=') {
1.40 daniel 6656: NEXT;
1.42 daniel 6657: SKIP_BLANKS;
1.72 daniel 6658: val = xmlParseAttValue(ctxt);
1.96 daniel 6659: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 6660: } else {
1.55 daniel 6661: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6662: ctxt->sax->error(ctxt->userData,
1.59 daniel 6663: "Specification mandate value for attribute %s\n", name);
1.123 daniel 6664: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 6665: ctxt->wellFormed = 0;
1.170 daniel 6666: xmlFree(name);
1.52 daniel 6667: return(NULL);
1.43 daniel 6668: }
6669:
1.72 daniel 6670: *value = val;
6671: return(name);
1.3 veillard 6672: }
6673:
1.50 daniel 6674: /**
6675: * xmlParseStartTag:
6676: * @ctxt: an XML parser context
6677: *
6678: * parse a start of tag either for rule element or
6679: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 6680: *
6681: * [40] STag ::= '<' Name (S Attribute)* S? '>'
6682: *
1.98 daniel 6683: * [ WFC: Unique Att Spec ]
6684: * No attribute name may appear more than once in the same start-tag or
6685: * empty-element tag.
6686: *
1.29 daniel 6687: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6688: *
1.98 daniel 6689: * [ WFC: Unique Att Spec ]
6690: * No attribute name may appear more than once in the same start-tag or
6691: * empty-element tag.
6692: *
1.29 daniel 6693: * With namespace:
6694: *
6695: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6696: *
6697: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 6698: *
1.129 daniel 6699: * Returne the element name parsed
1.2 veillard 6700: */
6701:
1.123 daniel 6702: xmlChar *
1.69 daniel 6703: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6704: xmlChar *name;
6705: xmlChar *attname;
6706: xmlChar *attvalue;
6707: const xmlChar **atts = NULL;
1.72 daniel 6708: int nbatts = 0;
6709: int maxatts = 0;
6710: int i;
1.2 veillard 6711:
1.152 daniel 6712: if (RAW != '<') return(NULL);
1.40 daniel 6713: NEXT;
1.3 veillard 6714:
1.72 daniel 6715: name = xmlParseName(ctxt);
1.59 daniel 6716: if (name == NULL) {
6717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6718: ctxt->sax->error(ctxt->userData,
1.59 daniel 6719: "xmlParseStartTag: invalid element name\n");
1.123 daniel 6720: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6721: ctxt->wellFormed = 0;
1.83 daniel 6722: return(NULL);
1.50 daniel 6723: }
6724:
6725: /*
1.3 veillard 6726: * Now parse the attributes, it ends up with the ending
6727: *
6728: * (S Attribute)* S?
6729: */
1.42 daniel 6730: SKIP_BLANKS;
1.91 daniel 6731: GROW;
1.168 daniel 6732:
1.153 daniel 6733: while ((IS_CHAR(RAW)) &&
1.152 daniel 6734: (RAW != '>') &&
6735: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 6736: const xmlChar *q = CUR_PTR;
1.91 daniel 6737: int cons = ctxt->input->consumed;
1.29 daniel 6738:
1.72 daniel 6739: attname = xmlParseAttribute(ctxt, &attvalue);
6740: if ((attname != NULL) && (attvalue != NULL)) {
6741: /*
1.98 daniel 6742: * [ WFC: Unique Att Spec ]
6743: * No attribute name may appear more than once in the same
6744: * start-tag or empty-element tag.
1.72 daniel 6745: */
6746: for (i = 0; i < nbatts;i += 2) {
6747: if (!xmlStrcmp(atts[i], attname)) {
6748: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6749: ctxt->sax->error(ctxt->userData,
6750: "Attribute %s redefined\n",
6751: attname);
1.72 daniel 6752: ctxt->wellFormed = 0;
1.123 daniel 6753: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 6754: xmlFree(attname);
6755: xmlFree(attvalue);
1.98 daniel 6756: goto failed;
1.72 daniel 6757: }
6758: }
6759:
6760: /*
6761: * Add the pair to atts
6762: */
6763: if (atts == NULL) {
6764: maxatts = 10;
1.123 daniel 6765: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 6766: if (atts == NULL) {
1.86 daniel 6767: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 6768: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6769: return(NULL);
1.72 daniel 6770: }
1.127 daniel 6771: } else if (nbatts + 4 > maxatts) {
1.72 daniel 6772: maxatts *= 2;
1.123 daniel 6773: atts = (const xmlChar **) xmlRealloc(atts,
6774: maxatts * sizeof(xmlChar *));
1.72 daniel 6775: if (atts == NULL) {
1.86 daniel 6776: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 6777: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6778: return(NULL);
1.72 daniel 6779: }
6780: }
6781: atts[nbatts++] = attname;
6782: atts[nbatts++] = attvalue;
6783: atts[nbatts] = NULL;
6784: atts[nbatts + 1] = NULL;
6785: }
6786:
1.116 daniel 6787: failed:
1.168 daniel 6788:
6789: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6790: break;
6791: if (!IS_BLANK(RAW)) {
6792: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6793: ctxt->sax->error(ctxt->userData,
6794: "attributes construct error\n");
6795: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6796: ctxt->wellFormed = 0;
6797: }
1.42 daniel 6798: SKIP_BLANKS;
1.91 daniel 6799: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 6800: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6801: ctxt->sax->error(ctxt->userData,
1.31 daniel 6802: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 6803: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 6804: ctxt->wellFormed = 0;
1.29 daniel 6805: break;
1.3 veillard 6806: }
1.91 daniel 6807: GROW;
1.3 veillard 6808: }
6809:
1.43 daniel 6810: /*
1.72 daniel 6811: * SAX: Start of Element !
1.43 daniel 6812: */
1.171 ! daniel 6813: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
! 6814: (!ctxt->disableSAX))
1.74 daniel 6815: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 6816:
1.72 daniel 6817: if (atts != NULL) {
1.123 daniel 6818: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 6819: xmlFree(atts);
1.72 daniel 6820: }
1.83 daniel 6821: return(name);
1.3 veillard 6822: }
6823:
1.50 daniel 6824: /**
6825: * xmlParseEndTag:
6826: * @ctxt: an XML parser context
6827: *
6828: * parse an end of tag
1.27 daniel 6829: *
6830: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 6831: *
6832: * With namespace
6833: *
1.72 daniel 6834: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 6835: */
6836:
1.55 daniel 6837: void
1.140 daniel 6838: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6839: xmlChar *name;
1.140 daniel 6840: xmlChar *oldname;
1.7 veillard 6841:
1.91 daniel 6842: GROW;
1.152 daniel 6843: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 6844: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6845: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 6846: ctxt->wellFormed = 0;
1.123 daniel 6847: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 6848: return;
6849: }
1.40 daniel 6850: SKIP(2);
1.7 veillard 6851:
1.72 daniel 6852: name = xmlParseName(ctxt);
1.7 veillard 6853:
6854: /*
6855: * We should definitely be at the ending "S? '>'" part
6856: */
1.91 daniel 6857: GROW;
1.42 daniel 6858: SKIP_BLANKS;
1.153 daniel 6859: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 6860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6861: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 6862: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 6863: ctxt->wellFormed = 0;
1.7 veillard 6864: } else
1.40 daniel 6865: NEXT;
1.7 veillard 6866:
1.72 daniel 6867: /*
1.98 daniel 6868: * [ WFC: Element Type Match ]
6869: * The Name in an element's end-tag must match the element type in the
6870: * start-tag.
6871: *
1.83 daniel 6872: */
1.147 daniel 6873: if ((name == NULL) || (ctxt->name == NULL) ||
6874: (xmlStrcmp(name, ctxt->name))) {
6875: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6876: if ((name != NULL) && (ctxt->name != NULL)) {
6877: ctxt->sax->error(ctxt->userData,
6878: "Opening and ending tag mismatch: %s and %s\n",
6879: ctxt->name, name);
6880: } else if (ctxt->name != NULL) {
6881: ctxt->sax->error(ctxt->userData,
6882: "Ending tag eror for: %s\n", ctxt->name);
6883: } else {
6884: ctxt->sax->error(ctxt->userData,
6885: "Ending tag error: internal error ???\n");
6886: }
1.122 daniel 6887:
1.147 daniel 6888: }
1.123 daniel 6889: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 6890: ctxt->wellFormed = 0;
6891: }
6892:
6893: /*
1.72 daniel 6894: * SAX: End of Tag
6895: */
1.171 ! daniel 6896: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
! 6897: (!ctxt->disableSAX))
1.74 daniel 6898: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6899:
6900: if (name != NULL)
1.119 daniel 6901: xmlFree(name);
1.140 daniel 6902: oldname = namePop(ctxt);
6903: if (oldname != NULL) {
6904: #ifdef DEBUG_STACK
6905: fprintf(stderr,"Close: popped %s\n", oldname);
6906: #endif
6907: xmlFree(oldname);
6908: }
1.7 veillard 6909: return;
6910: }
6911:
1.50 daniel 6912: /**
6913: * xmlParseCDSect:
6914: * @ctxt: an XML parser context
6915: *
6916: * Parse escaped pure raw content.
1.29 daniel 6917: *
6918: * [18] CDSect ::= CDStart CData CDEnd
6919: *
6920: * [19] CDStart ::= '<![CDATA['
6921: *
6922: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6923: *
6924: * [21] CDEnd ::= ']]>'
1.3 veillard 6925: */
1.55 daniel 6926: void
6927: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 6928: xmlChar *buf = NULL;
6929: int len = 0;
1.140 daniel 6930: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 6931: int r, rl;
6932: int s, sl;
6933: int cur, l;
1.3 veillard 6934:
1.106 daniel 6935: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 6936: (NXT(2) == '[') && (NXT(3) == 'C') &&
6937: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6938: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6939: (NXT(8) == '[')) {
6940: SKIP(9);
1.29 daniel 6941: } else
1.45 daniel 6942: return;
1.109 daniel 6943:
6944: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 6945: r = CUR_CHAR(rl);
6946: if (!IS_CHAR(r)) {
1.55 daniel 6947: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6948: ctxt->sax->error(ctxt->userData,
1.135 daniel 6949: "CData section not finished\n");
1.59 daniel 6950: ctxt->wellFormed = 0;
1.123 daniel 6951: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 6952: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6953: return;
1.3 veillard 6954: }
1.152 daniel 6955: NEXTL(rl);
6956: s = CUR_CHAR(sl);
6957: if (!IS_CHAR(s)) {
1.55 daniel 6958: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6959: ctxt->sax->error(ctxt->userData,
1.135 daniel 6960: "CData section not finished\n");
1.123 daniel 6961: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 6962: ctxt->wellFormed = 0;
1.109 daniel 6963: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6964: return;
1.3 veillard 6965: }
1.152 daniel 6966: NEXTL(sl);
6967: cur = CUR_CHAR(l);
1.135 daniel 6968: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6969: if (buf == NULL) {
6970: fprintf(stderr, "malloc of %d byte failed\n", size);
6971: return;
6972: }
1.108 veillard 6973: while (IS_CHAR(cur) &&
1.110 daniel 6974: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 6975: if (len + 5 >= size) {
1.135 daniel 6976: size *= 2;
6977: buf = xmlRealloc(buf, size * sizeof(xmlChar));
6978: if (buf == NULL) {
6979: fprintf(stderr, "realloc of %d byte failed\n", size);
6980: return;
6981: }
6982: }
1.152 daniel 6983: COPY_BUF(rl,buf,len,r);
1.110 daniel 6984: r = s;
1.152 daniel 6985: rl = sl;
1.110 daniel 6986: s = cur;
1.152 daniel 6987: sl = l;
6988: NEXTL(l);
6989: cur = CUR_CHAR(l);
1.3 veillard 6990: }
1.135 daniel 6991: buf[len] = 0;
1.109 daniel 6992: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 6993: if (cur != '>') {
1.55 daniel 6994: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6995: ctxt->sax->error(ctxt->userData,
1.135 daniel 6996: "CData section not finished\n%.50s\n", buf);
1.123 daniel 6997: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 6998: ctxt->wellFormed = 0;
1.135 daniel 6999: xmlFree(buf);
1.45 daniel 7000: return;
1.3 veillard 7001: }
1.152 daniel 7002: NEXTL(l);
1.16 daniel 7003:
1.45 daniel 7004: /*
1.135 daniel 7005: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7006: */
1.171 ! daniel 7007: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7008: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7009: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7010: }
1.135 daniel 7011: xmlFree(buf);
1.2 veillard 7012: }
7013:
1.50 daniel 7014: /**
7015: * xmlParseContent:
7016: * @ctxt: an XML parser context
7017: *
7018: * Parse a content:
1.2 veillard 7019: *
1.27 daniel 7020: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7021: */
7022:
1.55 daniel 7023: void
7024: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7025: GROW;
1.152 daniel 7026: while ((RAW != '<') || (NXT(1) != '/')) {
1.123 daniel 7027: const xmlChar *test = CUR_PTR;
1.91 daniel 7028: int cons = ctxt->input->consumed;
1.123 daniel 7029: xmlChar tok = ctxt->token;
1.27 daniel 7030:
7031: /*
1.152 daniel 7032: * Handle possible processed charrefs.
7033: */
7034: if (ctxt->token != 0) {
7035: xmlParseCharData(ctxt, 0);
7036: }
7037: /*
1.27 daniel 7038: * First case : a Processing Instruction.
7039: */
1.152 daniel 7040: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7041: xmlParsePI(ctxt);
7042: }
1.72 daniel 7043:
1.27 daniel 7044: /*
7045: * Second case : a CDSection
7046: */
1.152 daniel 7047: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7048: (NXT(2) == '[') && (NXT(3) == 'C') &&
7049: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7050: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7051: (NXT(8) == '[')) {
1.45 daniel 7052: xmlParseCDSect(ctxt);
1.27 daniel 7053: }
1.72 daniel 7054:
1.27 daniel 7055: /*
7056: * Third case : a comment
7057: */
1.152 daniel 7058: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7059: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7060: xmlParseComment(ctxt);
1.97 daniel 7061: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7062: }
1.72 daniel 7063:
1.27 daniel 7064: /*
7065: * Fourth case : a sub-element.
7066: */
1.152 daniel 7067: else if (RAW == '<') {
1.72 daniel 7068: xmlParseElement(ctxt);
1.45 daniel 7069: }
1.72 daniel 7070:
1.45 daniel 7071: /*
1.50 daniel 7072: * Fifth case : a reference. If if has not been resolved,
7073: * parsing returns it's Name, create the node
1.45 daniel 7074: */
1.97 daniel 7075:
1.152 daniel 7076: else if (RAW == '&') {
1.77 daniel 7077: xmlParseReference(ctxt);
1.27 daniel 7078: }
1.72 daniel 7079:
1.27 daniel 7080: /*
7081: * Last case, text. Note that References are handled directly.
7082: */
7083: else {
1.45 daniel 7084: xmlParseCharData(ctxt, 0);
1.3 veillard 7085: }
1.14 veillard 7086:
1.91 daniel 7087: GROW;
1.14 veillard 7088: /*
1.45 daniel 7089: * Pop-up of finished entities.
1.14 veillard 7090: */
1.152 daniel 7091: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7092: xmlPopInput(ctxt);
1.135 daniel 7093: SHRINK;
1.45 daniel 7094:
1.113 daniel 7095: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7096: (tok == ctxt->token)) {
1.55 daniel 7097: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7098: ctxt->sax->error(ctxt->userData,
1.59 daniel 7099: "detected an error in element content\n");
1.123 daniel 7100: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7101: ctxt->wellFormed = 0;
1.29 daniel 7102: break;
7103: }
1.3 veillard 7104: }
1.2 veillard 7105: }
7106:
1.50 daniel 7107: /**
7108: * xmlParseElement:
7109: * @ctxt: an XML parser context
7110: *
7111: * parse an XML element, this is highly recursive
1.26 daniel 7112: *
7113: * [39] element ::= EmptyElemTag | STag content ETag
7114: *
1.98 daniel 7115: * [ WFC: Element Type Match ]
7116: * The Name in an element's end-tag must match the element type in the
7117: * start-tag.
7118: *
7119: * [ VC: Element Valid ]
1.117 daniel 7120: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7121: * where the Name matches the element type and one of the following holds:
7122: * - The declaration matches EMPTY and the element has no content.
7123: * - The declaration matches children and the sequence of child elements
7124: * belongs to the language generated by the regular expression in the
7125: * content model, with optional white space (characters matching the
7126: * nonterminal S) between each pair of child elements.
7127: * - The declaration matches Mixed and the content consists of character
7128: * data and child elements whose types match names in the content model.
7129: * - The declaration matches ANY, and the types of any child elements have
7130: * been declared.
1.2 veillard 7131: */
1.26 daniel 7132:
1.72 daniel 7133: void
1.69 daniel 7134: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7135: const xmlChar *openTag = CUR_PTR;
7136: xmlChar *name;
1.140 daniel 7137: xmlChar *oldname;
1.32 daniel 7138: xmlParserNodeInfo node_info;
1.118 daniel 7139: xmlNodePtr ret;
1.2 veillard 7140:
1.32 daniel 7141: /* Capture start position */
1.118 daniel 7142: if (ctxt->record_info) {
7143: node_info.begin_pos = ctxt->input->consumed +
7144: (CUR_PTR - ctxt->input->base);
7145: node_info.begin_line = ctxt->input->line;
7146: }
1.32 daniel 7147:
1.83 daniel 7148: name = xmlParseStartTag(ctxt);
7149: if (name == NULL) {
7150: return;
7151: }
1.140 daniel 7152: namePush(ctxt, name);
1.118 daniel 7153: ret = ctxt->node;
1.2 veillard 7154:
7155: /*
1.99 daniel 7156: * [ VC: Root Element Type ]
7157: * The Name in the document type declaration must match the element
7158: * type of the root element.
7159: */
1.105 daniel 7160: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7161: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7162: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7163:
7164: /*
1.2 veillard 7165: * Check for an Empty Element.
7166: */
1.152 daniel 7167: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7168: SKIP(2);
1.171 ! daniel 7169: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
! 7170: (!ctxt->disableSAX))
1.83 daniel 7171: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7172: oldname = namePop(ctxt);
7173: if (oldname != NULL) {
7174: #ifdef DEBUG_STACK
7175: fprintf(stderr,"Close: popped %s\n", oldname);
7176: #endif
7177: xmlFree(oldname);
7178: }
1.72 daniel 7179: return;
1.2 veillard 7180: }
1.152 daniel 7181: if (RAW == '>') {
1.91 daniel 7182: NEXT;
7183: } else {
1.55 daniel 7184: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7185: ctxt->sax->error(ctxt->userData,
7186: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7187: openTag);
1.59 daniel 7188: ctxt->wellFormed = 0;
1.123 daniel 7189: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 7190:
7191: /*
7192: * end of parsing of this node.
7193: */
7194: nodePop(ctxt);
1.140 daniel 7195: oldname = namePop(ctxt);
7196: if (oldname != NULL) {
7197: #ifdef DEBUG_STACK
7198: fprintf(stderr,"Close: popped %s\n", oldname);
7199: #endif
7200: xmlFree(oldname);
7201: }
1.118 daniel 7202:
7203: /*
7204: * Capture end position and add node
7205: */
7206: if ( ret != NULL && ctxt->record_info ) {
7207: node_info.end_pos = ctxt->input->consumed +
7208: (CUR_PTR - ctxt->input->base);
7209: node_info.end_line = ctxt->input->line;
7210: node_info.node = ret;
7211: xmlParserAddNodeInfo(ctxt, &node_info);
7212: }
1.72 daniel 7213: return;
1.2 veillard 7214: }
7215:
7216: /*
7217: * Parse the content of the element:
7218: */
1.45 daniel 7219: xmlParseContent(ctxt);
1.153 daniel 7220: if (!IS_CHAR(RAW)) {
1.55 daniel 7221: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7222: ctxt->sax->error(ctxt->userData,
1.57 daniel 7223: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 7224: ctxt->wellFormed = 0;
1.123 daniel 7225: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 7226:
7227: /*
7228: * end of parsing of this node.
7229: */
7230: nodePop(ctxt);
1.140 daniel 7231: oldname = namePop(ctxt);
7232: if (oldname != NULL) {
7233: #ifdef DEBUG_STACK
7234: fprintf(stderr,"Close: popped %s\n", oldname);
7235: #endif
7236: xmlFree(oldname);
7237: }
1.72 daniel 7238: return;
1.2 veillard 7239: }
7240:
7241: /*
1.27 daniel 7242: * parse the end of tag: '</' should be here.
1.2 veillard 7243: */
1.140 daniel 7244: xmlParseEndTag(ctxt);
1.118 daniel 7245:
7246: /*
7247: * Capture end position and add node
7248: */
7249: if ( ret != NULL && ctxt->record_info ) {
7250: node_info.end_pos = ctxt->input->consumed +
7251: (CUR_PTR - ctxt->input->base);
7252: node_info.end_line = ctxt->input->line;
7253: node_info.node = ret;
7254: xmlParserAddNodeInfo(ctxt, &node_info);
7255: }
1.2 veillard 7256: }
7257:
1.50 daniel 7258: /**
7259: * xmlParseVersionNum:
7260: * @ctxt: an XML parser context
7261: *
7262: * parse the XML version value.
1.29 daniel 7263: *
7264: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 7265: *
7266: * Returns the string giving the XML version number, or NULL
1.29 daniel 7267: */
1.123 daniel 7268: xmlChar *
1.55 daniel 7269: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 7270: xmlChar *buf = NULL;
7271: int len = 0;
7272: int size = 10;
7273: xmlChar cur;
1.29 daniel 7274:
1.135 daniel 7275: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7276: if (buf == NULL) {
7277: fprintf(stderr, "malloc of %d byte failed\n", size);
7278: return(NULL);
7279: }
7280: cur = CUR;
1.152 daniel 7281: while (((cur >= 'a') && (cur <= 'z')) ||
7282: ((cur >= 'A') && (cur <= 'Z')) ||
7283: ((cur >= '0') && (cur <= '9')) ||
7284: (cur == '_') || (cur == '.') ||
7285: (cur == ':') || (cur == '-')) {
1.135 daniel 7286: if (len + 1 >= size) {
7287: size *= 2;
7288: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7289: if (buf == NULL) {
7290: fprintf(stderr, "realloc of %d byte failed\n", size);
7291: return(NULL);
7292: }
7293: }
7294: buf[len++] = cur;
7295: NEXT;
7296: cur=CUR;
7297: }
7298: buf[len] = 0;
7299: return(buf);
1.29 daniel 7300: }
7301:
1.50 daniel 7302: /**
7303: * xmlParseVersionInfo:
7304: * @ctxt: an XML parser context
7305: *
7306: * parse the XML version.
1.29 daniel 7307: *
7308: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7309: *
7310: * [25] Eq ::= S? '=' S?
1.50 daniel 7311: *
1.68 daniel 7312: * Returns the version string, e.g. "1.0"
1.29 daniel 7313: */
7314:
1.123 daniel 7315: xmlChar *
1.55 daniel 7316: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 7317: xmlChar *version = NULL;
7318: const xmlChar *q;
1.29 daniel 7319:
1.152 daniel 7320: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 7321: (NXT(2) == 'r') && (NXT(3) == 's') &&
7322: (NXT(4) == 'i') && (NXT(5) == 'o') &&
7323: (NXT(6) == 'n')) {
7324: SKIP(7);
1.42 daniel 7325: SKIP_BLANKS;
1.152 daniel 7326: if (RAW != '=') {
1.55 daniel 7327: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7328: ctxt->sax->error(ctxt->userData,
7329: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 7330: ctxt->wellFormed = 0;
1.123 daniel 7331: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7332: return(NULL);
7333: }
1.40 daniel 7334: NEXT;
1.42 daniel 7335: SKIP_BLANKS;
1.152 daniel 7336: if (RAW == '"') {
1.40 daniel 7337: NEXT;
7338: q = CUR_PTR;
1.29 daniel 7339: version = xmlParseVersionNum(ctxt);
1.152 daniel 7340: if (RAW != '"') {
1.55 daniel 7341: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7342: ctxt->sax->error(ctxt->userData,
7343: "String not closed\n%.50s\n", q);
1.59 daniel 7344: ctxt->wellFormed = 0;
1.123 daniel 7345: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7346: } else
1.40 daniel 7347: NEXT;
1.152 daniel 7348: } else if (RAW == '\''){
1.40 daniel 7349: NEXT;
7350: q = CUR_PTR;
1.29 daniel 7351: version = xmlParseVersionNum(ctxt);
1.152 daniel 7352: if (RAW != '\'') {
1.55 daniel 7353: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7354: ctxt->sax->error(ctxt->userData,
7355: "String not closed\n%.50s\n", q);
1.123 daniel 7356: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7357: ctxt->wellFormed = 0;
1.55 daniel 7358: } else
1.40 daniel 7359: NEXT;
1.31 daniel 7360: } else {
1.55 daniel 7361: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7362: ctxt->sax->error(ctxt->userData,
1.59 daniel 7363: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 7364: ctxt->wellFormed = 0;
1.123 daniel 7365: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7366: }
7367: }
7368: return(version);
7369: }
7370:
1.50 daniel 7371: /**
7372: * xmlParseEncName:
7373: * @ctxt: an XML parser context
7374: *
7375: * parse the XML encoding name
1.29 daniel 7376: *
7377: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 7378: *
1.68 daniel 7379: * Returns the encoding name value or NULL
1.29 daniel 7380: */
1.123 daniel 7381: xmlChar *
1.55 daniel 7382: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 7383: xmlChar *buf = NULL;
7384: int len = 0;
7385: int size = 10;
7386: xmlChar cur;
1.29 daniel 7387:
1.135 daniel 7388: cur = CUR;
7389: if (((cur >= 'a') && (cur <= 'z')) ||
7390: ((cur >= 'A') && (cur <= 'Z'))) {
7391: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7392: if (buf == NULL) {
7393: fprintf(stderr, "malloc of %d byte failed\n", size);
7394: return(NULL);
7395: }
7396:
7397: buf[len++] = cur;
1.40 daniel 7398: NEXT;
1.135 daniel 7399: cur = CUR;
1.152 daniel 7400: while (((cur >= 'a') && (cur <= 'z')) ||
7401: ((cur >= 'A') && (cur <= 'Z')) ||
7402: ((cur >= '0') && (cur <= '9')) ||
7403: (cur == '.') || (cur == '_') ||
7404: (cur == '-')) {
1.135 daniel 7405: if (len + 1 >= size) {
7406: size *= 2;
7407: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7408: if (buf == NULL) {
7409: fprintf(stderr, "realloc of %d byte failed\n", size);
7410: return(NULL);
7411: }
7412: }
7413: buf[len++] = cur;
7414: NEXT;
7415: cur = CUR;
7416: if (cur == 0) {
7417: SHRINK;
7418: GROW;
7419: cur = CUR;
7420: }
7421: }
7422: buf[len] = 0;
1.29 daniel 7423: } else {
1.55 daniel 7424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7425: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 7426: ctxt->wellFormed = 0;
1.123 daniel 7427: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 7428: }
1.135 daniel 7429: return(buf);
1.29 daniel 7430: }
7431:
1.50 daniel 7432: /**
7433: * xmlParseEncodingDecl:
7434: * @ctxt: an XML parser context
7435: *
7436: * parse the XML encoding declaration
1.29 daniel 7437: *
7438: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 7439: *
7440: * TODO: this should setup the conversion filters.
7441: *
1.68 daniel 7442: * Returns the encoding value or NULL
1.29 daniel 7443: */
7444:
1.123 daniel 7445: xmlChar *
1.55 daniel 7446: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7447: xmlChar *encoding = NULL;
7448: const xmlChar *q;
1.29 daniel 7449:
1.42 daniel 7450: SKIP_BLANKS;
1.152 daniel 7451: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 7452: (NXT(2) == 'c') && (NXT(3) == 'o') &&
7453: (NXT(4) == 'd') && (NXT(5) == 'i') &&
7454: (NXT(6) == 'n') && (NXT(7) == 'g')) {
7455: SKIP(8);
1.42 daniel 7456: SKIP_BLANKS;
1.152 daniel 7457: if (RAW != '=') {
1.55 daniel 7458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7459: ctxt->sax->error(ctxt->userData,
7460: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 7461: ctxt->wellFormed = 0;
1.123 daniel 7462: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7463: return(NULL);
7464: }
1.40 daniel 7465: NEXT;
1.42 daniel 7466: SKIP_BLANKS;
1.152 daniel 7467: if (RAW == '"') {
1.40 daniel 7468: NEXT;
7469: q = CUR_PTR;
1.29 daniel 7470: encoding = xmlParseEncName(ctxt);
1.152 daniel 7471: if (RAW != '"') {
1.55 daniel 7472: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7473: ctxt->sax->error(ctxt->userData,
7474: "String not closed\n%.50s\n", q);
1.59 daniel 7475: ctxt->wellFormed = 0;
1.123 daniel 7476: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7477: } else
1.40 daniel 7478: NEXT;
1.152 daniel 7479: } else if (RAW == '\''){
1.40 daniel 7480: NEXT;
7481: q = CUR_PTR;
1.29 daniel 7482: encoding = xmlParseEncName(ctxt);
1.152 daniel 7483: if (RAW != '\'') {
1.55 daniel 7484: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7485: ctxt->sax->error(ctxt->userData,
7486: "String not closed\n%.50s\n", q);
1.59 daniel 7487: ctxt->wellFormed = 0;
1.123 daniel 7488: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7489: } else
1.40 daniel 7490: NEXT;
1.152 daniel 7491: } else if (RAW == '"'){
1.55 daniel 7492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7493: ctxt->sax->error(ctxt->userData,
1.59 daniel 7494: "xmlParseEncodingDecl : expected ' or \"\n");
7495: ctxt->wellFormed = 0;
1.123 daniel 7496: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7497: }
7498: }
7499: return(encoding);
7500: }
7501:
1.50 daniel 7502: /**
7503: * xmlParseSDDecl:
7504: * @ctxt: an XML parser context
7505: *
7506: * parse the XML standalone declaration
1.29 daniel 7507: *
7508: * [32] SDDecl ::= S 'standalone' Eq
7509: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 7510: *
7511: * [ VC: Standalone Document Declaration ]
7512: * TODO The standalone document declaration must have the value "no"
7513: * if any external markup declarations contain declarations of:
7514: * - attributes with default values, if elements to which these
7515: * attributes apply appear in the document without specifications
7516: * of values for these attributes, or
7517: * - entities (other than amp, lt, gt, apos, quot), if references
7518: * to those entities appear in the document, or
7519: * - attributes with values subject to normalization, where the
7520: * attribute appears in the document with a value which will change
7521: * as a result of normalization, or
7522: * - element types with element content, if white space occurs directly
7523: * within any instance of those types.
1.68 daniel 7524: *
7525: * Returns 1 if standalone, 0 otherwise
1.29 daniel 7526: */
7527:
1.55 daniel 7528: int
7529: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 7530: int standalone = -1;
7531:
1.42 daniel 7532: SKIP_BLANKS;
1.152 daniel 7533: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 7534: (NXT(2) == 'a') && (NXT(3) == 'n') &&
7535: (NXT(4) == 'd') && (NXT(5) == 'a') &&
7536: (NXT(6) == 'l') && (NXT(7) == 'o') &&
7537: (NXT(8) == 'n') && (NXT(9) == 'e')) {
7538: SKIP(10);
1.81 daniel 7539: SKIP_BLANKS;
1.152 daniel 7540: if (RAW != '=') {
1.55 daniel 7541: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7542: ctxt->sax->error(ctxt->userData,
1.59 daniel 7543: "XML standalone declaration : expected '='\n");
1.123 daniel 7544: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 7545: ctxt->wellFormed = 0;
1.32 daniel 7546: return(standalone);
7547: }
1.40 daniel 7548: NEXT;
1.42 daniel 7549: SKIP_BLANKS;
1.152 daniel 7550: if (RAW == '\''){
1.40 daniel 7551: NEXT;
1.152 daniel 7552: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7553: standalone = 0;
1.40 daniel 7554: SKIP(2);
1.152 daniel 7555: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7556: (NXT(2) == 's')) {
1.29 daniel 7557: standalone = 1;
1.40 daniel 7558: SKIP(3);
1.29 daniel 7559: } else {
1.55 daniel 7560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7561: ctxt->sax->error(ctxt->userData,
7562: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7563: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7564: ctxt->wellFormed = 0;
1.29 daniel 7565: }
1.152 daniel 7566: if (RAW != '\'') {
1.55 daniel 7567: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7568: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 7569: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7570: ctxt->wellFormed = 0;
1.55 daniel 7571: } else
1.40 daniel 7572: NEXT;
1.152 daniel 7573: } else if (RAW == '"'){
1.40 daniel 7574: NEXT;
1.152 daniel 7575: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7576: standalone = 0;
1.40 daniel 7577: SKIP(2);
1.152 daniel 7578: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7579: (NXT(2) == 's')) {
1.29 daniel 7580: standalone = 1;
1.40 daniel 7581: SKIP(3);
1.29 daniel 7582: } else {
1.55 daniel 7583: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7584: ctxt->sax->error(ctxt->userData,
1.59 daniel 7585: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7586: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7587: ctxt->wellFormed = 0;
1.29 daniel 7588: }
1.152 daniel 7589: if (RAW != '"') {
1.55 daniel 7590: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7591: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 7592: ctxt->wellFormed = 0;
1.123 daniel 7593: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7594: } else
1.40 daniel 7595: NEXT;
1.37 daniel 7596: } else {
1.55 daniel 7597: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7598: ctxt->sax->error(ctxt->userData,
7599: "Standalone value not found\n");
1.59 daniel 7600: ctxt->wellFormed = 0;
1.123 daniel 7601: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 7602: }
1.29 daniel 7603: }
7604: return(standalone);
7605: }
7606:
1.50 daniel 7607: /**
7608: * xmlParseXMLDecl:
7609: * @ctxt: an XML parser context
7610: *
7611: * parse an XML declaration header
1.29 daniel 7612: *
7613: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 7614: */
7615:
1.55 daniel 7616: void
7617: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7618: xmlChar *version;
1.1 veillard 7619:
7620: /*
1.19 daniel 7621: * We know that '<?xml' is here.
1.1 veillard 7622: */
1.40 daniel 7623: SKIP(5);
1.1 veillard 7624:
1.153 daniel 7625: if (!IS_BLANK(RAW)) {
1.59 daniel 7626: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7627: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 7628: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7629: ctxt->wellFormed = 0;
7630: }
1.42 daniel 7631: SKIP_BLANKS;
1.1 veillard 7632:
7633: /*
1.29 daniel 7634: * We should have the VersionInfo here.
1.1 veillard 7635: */
1.29 daniel 7636: version = xmlParseVersionInfo(ctxt);
7637: if (version == NULL)
1.45 daniel 7638: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 7639: ctxt->version = xmlStrdup(version);
1.119 daniel 7640: xmlFree(version);
1.29 daniel 7641:
7642: /*
7643: * We may have the encoding declaration
7644: */
1.153 daniel 7645: if (!IS_BLANK(RAW)) {
1.152 daniel 7646: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7647: SKIP(2);
7648: return;
7649: }
7650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7651: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 7652: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7653: ctxt->wellFormed = 0;
7654: }
1.164 daniel 7655: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 7656:
7657: /*
1.29 daniel 7658: * We may have the standalone status.
1.1 veillard 7659: */
1.164 daniel 7660: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 7661: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7662: SKIP(2);
7663: return;
7664: }
7665: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7666: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 7667: ctxt->wellFormed = 0;
1.123 daniel 7668: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7669: }
7670: SKIP_BLANKS;
1.167 daniel 7671: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 7672:
1.42 daniel 7673: SKIP_BLANKS;
1.152 daniel 7674: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 7675: SKIP(2);
1.152 daniel 7676: } else if (RAW == '>') {
1.31 daniel 7677: /* Deprecated old WD ... */
1.55 daniel 7678: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7679: ctxt->sax->error(ctxt->userData,
7680: "XML declaration must end-up with '?>'\n");
1.59 daniel 7681: ctxt->wellFormed = 0;
1.123 daniel 7682: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7683: NEXT;
1.29 daniel 7684: } else {
1.55 daniel 7685: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7686: ctxt->sax->error(ctxt->userData,
7687: "parsing XML declaration: '?>' expected\n");
1.59 daniel 7688: ctxt->wellFormed = 0;
1.123 daniel 7689: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7690: MOVETO_ENDTAG(CUR_PTR);
7691: NEXT;
1.29 daniel 7692: }
1.1 veillard 7693: }
7694:
1.50 daniel 7695: /**
7696: * xmlParseMisc:
7697: * @ctxt: an XML parser context
7698: *
7699: * parse an XML Misc* optionnal field.
1.21 daniel 7700: *
1.22 daniel 7701: * [27] Misc ::= Comment | PI | S
1.1 veillard 7702: */
7703:
1.55 daniel 7704: void
7705: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 7706: while (((RAW == '<') && (NXT(1) == '?')) ||
7707: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7708: (NXT(2) == '-') && (NXT(3) == '-')) ||
7709: IS_BLANK(CUR)) {
1.152 daniel 7710: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 7711: xmlParsePI(ctxt);
1.40 daniel 7712: } else if (IS_BLANK(CUR)) {
7713: NEXT;
1.1 veillard 7714: } else
1.114 daniel 7715: xmlParseComment(ctxt);
1.1 veillard 7716: }
7717: }
7718:
1.50 daniel 7719: /**
7720: * xmlParseDocument :
7721: * @ctxt: an XML parser context
7722: *
7723: * parse an XML document (and build a tree if using the standard SAX
7724: * interface).
1.21 daniel 7725: *
1.22 daniel 7726: * [1] document ::= prolog element Misc*
1.29 daniel 7727: *
7728: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 7729: *
1.68 daniel 7730: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 7731: * as a result of the parsing.
1.1 veillard 7732: */
7733:
1.55 daniel 7734: int
7735: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 7736: xmlChar start[4];
7737: xmlCharEncoding enc;
7738:
1.45 daniel 7739: xmlDefaultSAXHandlerInit();
7740:
1.91 daniel 7741: GROW;
7742:
1.14 veillard 7743: /*
1.44 daniel 7744: * SAX: beginning of the document processing.
7745: */
1.72 daniel 7746: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 7747: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 7748:
1.156 daniel 7749: /*
7750: * Get the 4 first bytes and decode the charset
7751: * if enc != XML_CHAR_ENCODING_NONE
7752: * plug some encoding conversion routines.
7753: */
7754: start[0] = RAW;
7755: start[1] = NXT(1);
7756: start[2] = NXT(2);
7757: start[3] = NXT(3);
7758: enc = xmlDetectCharEncoding(start, 4);
7759: if (enc != XML_CHAR_ENCODING_NONE) {
7760: xmlSwitchEncoding(ctxt, enc);
7761: }
7762:
1.1 veillard 7763:
1.168 daniel 7764: #if 0
1.1 veillard 7765: /*
7766: * Wipe out everything which is before the first '<'
7767: */
1.153 daniel 7768: if (IS_BLANK(RAW)) {
1.59 daniel 7769: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7770: ctxt->sax->error(ctxt->userData,
1.59 daniel 7771: "Extra spaces at the beginning of the document are not allowed\n");
1.123 daniel 7772: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.59 daniel 7773: ctxt->wellFormed = 0;
7774: SKIP_BLANKS;
7775: }
1.168 daniel 7776: #endif
1.59 daniel 7777:
7778: if (CUR == 0) {
7779: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7780: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 7781: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7782: ctxt->wellFormed = 0;
7783: }
1.1 veillard 7784:
7785: /*
7786: * Check for the XMLDecl in the Prolog.
7787: */
1.91 daniel 7788: GROW;
1.152 daniel 7789: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7790: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 7791: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.19 daniel 7792: xmlParseXMLDecl(ctxt);
1.167 daniel 7793: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 7794: SKIP_BLANKS;
1.164 daniel 7795: if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
7796: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7797:
1.151 daniel 7798: #if 0
1.152 daniel 7799: } else if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7800: (NXT(2) == 'X') && (NXT(3) == 'M') &&
1.142 daniel 7801: (NXT(4) == 'L') && (IS_BLANK(NXT(5)))) {
1.19 daniel 7802: /*
7803: * The first drafts were using <?XML and the final W3C REC
7804: * now use <?xml ...
7805: */
1.16 daniel 7806: xmlParseXMLDecl(ctxt);
1.42 daniel 7807: SKIP_BLANKS;
1.151 daniel 7808: #endif
1.1 veillard 7809: } else {
1.72 daniel 7810: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 7811: }
1.171 ! daniel 7812: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 7813: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 7814:
7815: /*
7816: * The Misc part of the Prolog
7817: */
1.91 daniel 7818: GROW;
1.16 daniel 7819: xmlParseMisc(ctxt);
1.1 veillard 7820:
7821: /*
1.29 daniel 7822: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7823: * (doctypedecl Misc*)?
7824: */
1.91 daniel 7825: GROW;
1.152 daniel 7826: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7827: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7828: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7829: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7830: (NXT(8) == 'E')) {
1.165 daniel 7831:
1.166 daniel 7832: ctxt->inSubset = 1;
1.22 daniel 7833: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7834: if (RAW == '[') {
1.140 daniel 7835: ctxt->instate = XML_PARSER_DTD;
7836: xmlParseInternalSubset(ctxt);
7837: }
1.165 daniel 7838:
7839: /*
7840: * Create and update the external subset.
7841: */
1.166 daniel 7842: ctxt->inSubset = 2;
1.171 ! daniel 7843: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
! 7844: (!ctxt->disableSAX))
1.165 daniel 7845: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7846: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 7847: ctxt->inSubset = 0;
1.165 daniel 7848:
7849:
1.96 daniel 7850: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7851: xmlParseMisc(ctxt);
1.21 daniel 7852: }
7853:
7854: /*
7855: * Time to start parsing the tree itself
1.1 veillard 7856: */
1.91 daniel 7857: GROW;
1.152 daniel 7858: if (RAW != '<') {
1.59 daniel 7859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7860: ctxt->sax->error(ctxt->userData,
1.151 daniel 7861: "Start tag expected, '<' not found\n");
1.140 daniel 7862: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7863: ctxt->wellFormed = 0;
1.140 daniel 7864: ctxt->instate = XML_PARSER_EOF;
7865: } else {
7866: ctxt->instate = XML_PARSER_CONTENT;
7867: xmlParseElement(ctxt);
7868: ctxt->instate = XML_PARSER_EPILOG;
7869:
7870:
7871: /*
7872: * The Misc part at the end
7873: */
7874: xmlParseMisc(ctxt);
7875:
1.152 daniel 7876: if (RAW != 0) {
1.140 daniel 7877: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7878: ctxt->sax->error(ctxt->userData,
7879: "Extra content at the end of the document\n");
7880: ctxt->wellFormed = 0;
7881: ctxt->errNo = XML_ERR_DOCUMENT_END;
7882: }
7883: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7884: }
7885:
1.44 daniel 7886: /*
7887: * SAX: end of the document processing.
7888: */
1.171 ! daniel 7889: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
! 7890: (!ctxt->disableSAX))
1.74 daniel 7891: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7892:
7893: /*
7894: * Grab the encoding if it was added on-the-fly
7895: */
7896: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
7897: (ctxt->myDoc->encoding == NULL)) {
7898: ctxt->myDoc->encoding = ctxt->encoding;
7899: ctxt->encoding = NULL;
7900: }
1.59 daniel 7901: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7902: return(0);
7903: }
7904:
1.98 daniel 7905: /************************************************************************
7906: * *
1.128 daniel 7907: * Progressive parsing interfaces *
7908: * *
7909: ************************************************************************/
7910:
7911: /**
7912: * xmlParseLookupSequence:
7913: * @ctxt: an XML parser context
7914: * @first: the first char to lookup
1.140 daniel 7915: * @next: the next char to lookup or zero
7916: * @third: the next char to lookup or zero
1.128 daniel 7917: *
1.140 daniel 7918: * Try to find if a sequence (first, next, third) or just (first next) or
7919: * (first) is available in the input stream.
7920: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7921: * to avoid rescanning sequences of bytes, it DOES change the state of the
7922: * parser, do not use liberally.
1.128 daniel 7923: *
1.140 daniel 7924: * Returns the index to the current parsing point if the full sequence
7925: * is available, -1 otherwise.
1.128 daniel 7926: */
7927: int
1.140 daniel 7928: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7929: xmlChar next, xmlChar third) {
7930: int base, len;
7931: xmlParserInputPtr in;
7932: const xmlChar *buf;
7933:
7934: in = ctxt->input;
7935: if (in == NULL) return(-1);
7936: base = in->cur - in->base;
7937: if (base < 0) return(-1);
7938: if (ctxt->checkIndex > base)
7939: base = ctxt->checkIndex;
7940: if (in->buf == NULL) {
7941: buf = in->base;
7942: len = in->length;
7943: } else {
7944: buf = in->buf->buffer->content;
7945: len = in->buf->buffer->use;
7946: }
7947: /* take into account the sequence length */
7948: if (third) len -= 2;
7949: else if (next) len --;
7950: for (;base < len;base++) {
7951: if (buf[base] == first) {
7952: if (third != 0) {
7953: if ((buf[base + 1] != next) ||
7954: (buf[base + 2] != third)) continue;
7955: } else if (next != 0) {
7956: if (buf[base + 1] != next) continue;
7957: }
7958: ctxt->checkIndex = 0;
7959: #ifdef DEBUG_PUSH
7960: if (next == 0)
7961: fprintf(stderr, "PP: lookup '%c' found at %d\n",
7962: first, base);
7963: else if (third == 0)
7964: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
7965: first, next, base);
7966: else
7967: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
7968: first, next, third, base);
7969: #endif
7970: return(base - (in->cur - in->base));
7971: }
7972: }
7973: ctxt->checkIndex = base;
7974: #ifdef DEBUG_PUSH
7975: if (next == 0)
7976: fprintf(stderr, "PP: lookup '%c' failed\n", first);
7977: else if (third == 0)
7978: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
7979: else
7980: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
7981: #endif
7982: return(-1);
1.128 daniel 7983: }
7984:
7985: /**
1.143 daniel 7986: * xmlParseTryOrFinish:
1.128 daniel 7987: * @ctxt: an XML parser context
1.143 daniel 7988: * @terminate: last chunk indicator
1.128 daniel 7989: *
7990: * Try to progress on parsing
7991: *
7992: * Returns zero if no parsing was possible
7993: */
7994: int
1.143 daniel 7995: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 7996: int ret = 0;
1.140 daniel 7997: xmlParserInputPtr in;
7998: int avail;
7999: xmlChar cur, next;
8000:
8001: #ifdef DEBUG_PUSH
8002: switch (ctxt->instate) {
8003: case XML_PARSER_EOF:
8004: fprintf(stderr, "PP: try EOF\n"); break;
8005: case XML_PARSER_START:
8006: fprintf(stderr, "PP: try START\n"); break;
8007: case XML_PARSER_MISC:
8008: fprintf(stderr, "PP: try MISC\n");break;
8009: case XML_PARSER_COMMENT:
8010: fprintf(stderr, "PP: try COMMENT\n");break;
8011: case XML_PARSER_PROLOG:
8012: fprintf(stderr, "PP: try PROLOG\n");break;
8013: case XML_PARSER_START_TAG:
8014: fprintf(stderr, "PP: try START_TAG\n");break;
8015: case XML_PARSER_CONTENT:
8016: fprintf(stderr, "PP: try CONTENT\n");break;
8017: case XML_PARSER_CDATA_SECTION:
8018: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8019: case XML_PARSER_END_TAG:
8020: fprintf(stderr, "PP: try END_TAG\n");break;
8021: case XML_PARSER_ENTITY_DECL:
8022: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8023: case XML_PARSER_ENTITY_VALUE:
8024: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8025: case XML_PARSER_ATTRIBUTE_VALUE:
8026: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8027: case XML_PARSER_DTD:
8028: fprintf(stderr, "PP: try DTD\n");break;
8029: case XML_PARSER_EPILOG:
8030: fprintf(stderr, "PP: try EPILOG\n");break;
8031: case XML_PARSER_PI:
8032: fprintf(stderr, "PP: try PI\n");break;
8033: }
8034: #endif
1.128 daniel 8035:
8036: while (1) {
1.140 daniel 8037: /*
8038: * Pop-up of finished entities.
8039: */
1.152 daniel 8040: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8041: xmlPopInput(ctxt);
8042:
8043: in = ctxt->input;
8044: if (in == NULL) break;
8045: if (in->buf == NULL)
8046: avail = in->length - (in->cur - in->base);
8047: else
8048: avail = in->buf->buffer->use - (in->cur - in->base);
8049: if (avail < 1)
8050: goto done;
1.128 daniel 8051: switch (ctxt->instate) {
8052: case XML_PARSER_EOF:
1.140 daniel 8053: /*
8054: * Document parsing is done !
8055: */
8056: goto done;
8057: case XML_PARSER_START:
8058: /*
8059: * Very first chars read from the document flow.
8060: */
8061: cur = in->cur[0];
8062: if (IS_BLANK(cur)) {
8063: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8064: ctxt->sax->setDocumentLocator(ctxt->userData,
8065: &xmlDefaultSAXLocator);
8066: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8067: ctxt->sax->error(ctxt->userData,
8068: "Extra spaces at the beginning of the document are not allowed\n");
8069: ctxt->errNo = XML_ERR_DOCUMENT_START;
8070: ctxt->wellFormed = 0;
8071: SKIP_BLANKS;
8072: ret++;
8073: if (in->buf == NULL)
8074: avail = in->length - (in->cur - in->base);
8075: else
8076: avail = in->buf->buffer->use - (in->cur - in->base);
8077: }
8078: if (avail < 2)
8079: goto done;
8080:
8081: cur = in->cur[0];
8082: next = in->cur[1];
8083: if (cur == 0) {
8084: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8085: ctxt->sax->setDocumentLocator(ctxt->userData,
8086: &xmlDefaultSAXLocator);
8087: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8088: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8089: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8090: ctxt->wellFormed = 0;
8091: ctxt->instate = XML_PARSER_EOF;
8092: #ifdef DEBUG_PUSH
8093: fprintf(stderr, "PP: entering EOF\n");
8094: #endif
8095: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8096: ctxt->sax->endDocument(ctxt->userData);
8097: goto done;
8098: }
8099: if ((cur == '<') && (next == '?')) {
8100: /* PI or XML decl */
8101: if (avail < 5) return(ret);
1.143 daniel 8102: if ((!terminate) &&
8103: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8104: return(ret);
8105: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8106: ctxt->sax->setDocumentLocator(ctxt->userData,
8107: &xmlDefaultSAXLocator);
8108: if ((in->cur[2] == 'x') &&
8109: (in->cur[3] == 'm') &&
1.142 daniel 8110: (in->cur[4] == 'l') &&
8111: (IS_BLANK(in->cur[5]))) {
1.140 daniel 8112: ret += 5;
8113: #ifdef DEBUG_PUSH
8114: fprintf(stderr, "PP: Parsing XML Decl\n");
8115: #endif
8116: xmlParseXMLDecl(ctxt);
1.167 daniel 8117: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8118: if ((ctxt->encoding == NULL) &&
8119: (ctxt->input->encoding != NULL))
8120: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 ! daniel 8121: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
! 8122: (!ctxt->disableSAX))
1.140 daniel 8123: ctxt->sax->startDocument(ctxt->userData);
8124: ctxt->instate = XML_PARSER_MISC;
8125: #ifdef DEBUG_PUSH
8126: fprintf(stderr, "PP: entering MISC\n");
8127: #endif
8128: } else {
8129: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 ! daniel 8130: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
! 8131: (!ctxt->disableSAX))
1.140 daniel 8132: ctxt->sax->startDocument(ctxt->userData);
8133: ctxt->instate = XML_PARSER_MISC;
8134: #ifdef DEBUG_PUSH
8135: fprintf(stderr, "PP: entering MISC\n");
8136: #endif
8137: }
8138: } else {
8139: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8140: ctxt->sax->setDocumentLocator(ctxt->userData,
8141: &xmlDefaultSAXLocator);
8142: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 ! daniel 8143: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
! 8144: (!ctxt->disableSAX))
1.140 daniel 8145: ctxt->sax->startDocument(ctxt->userData);
8146: ctxt->instate = XML_PARSER_MISC;
8147: #ifdef DEBUG_PUSH
8148: fprintf(stderr, "PP: entering MISC\n");
8149: #endif
8150: }
8151: break;
8152: case XML_PARSER_MISC:
8153: SKIP_BLANKS;
8154: if (in->buf == NULL)
8155: avail = in->length - (in->cur - in->base);
8156: else
8157: avail = in->buf->buffer->use - (in->cur - in->base);
8158: if (avail < 2)
8159: goto done;
8160: cur = in->cur[0];
8161: next = in->cur[1];
8162: if ((cur == '<') && (next == '?')) {
1.143 daniel 8163: if ((!terminate) &&
8164: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8165: goto done;
8166: #ifdef DEBUG_PUSH
8167: fprintf(stderr, "PP: Parsing PI\n");
8168: #endif
8169: xmlParsePI(ctxt);
8170: } else if ((cur == '<') && (next == '!') &&
8171: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8172: if ((!terminate) &&
8173: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8174: goto done;
8175: #ifdef DEBUG_PUSH
8176: fprintf(stderr, "PP: Parsing Comment\n");
8177: #endif
8178: xmlParseComment(ctxt);
8179: ctxt->instate = XML_PARSER_MISC;
8180: } else if ((cur == '<') && (next == '!') &&
8181: (in->cur[2] == 'D') && (in->cur[3] == 'O') &&
8182: (in->cur[4] == 'C') && (in->cur[5] == 'T') &&
8183: (in->cur[6] == 'Y') && (in->cur[7] == 'P') &&
8184: (in->cur[8] == 'E')) {
1.143 daniel 8185: if ((!terminate) &&
8186: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8187: goto done;
8188: #ifdef DEBUG_PUSH
8189: fprintf(stderr, "PP: Parsing internal subset\n");
8190: #endif
1.166 daniel 8191: ctxt->inSubset = 1;
1.140 daniel 8192: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8193: if (RAW == '[') {
1.140 daniel 8194: ctxt->instate = XML_PARSER_DTD;
8195: #ifdef DEBUG_PUSH
8196: fprintf(stderr, "PP: entering DTD\n");
8197: #endif
8198: } else {
1.166 daniel 8199: /*
8200: * Create and update the external subset.
8201: */
8202: ctxt->inSubset = 2;
1.171 ! daniel 8203: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8204: (ctxt->sax->externalSubset != NULL))
8205: ctxt->sax->externalSubset(ctxt->userData,
8206: ctxt->intSubName, ctxt->extSubSystem,
8207: ctxt->extSubURI);
8208: ctxt->inSubset = 0;
1.140 daniel 8209: ctxt->instate = XML_PARSER_PROLOG;
8210: #ifdef DEBUG_PUSH
8211: fprintf(stderr, "PP: entering PROLOG\n");
8212: #endif
8213: }
8214: } else if ((cur == '<') && (next == '!') &&
8215: (avail < 9)) {
8216: goto done;
8217: } else {
8218: ctxt->instate = XML_PARSER_START_TAG;
8219: #ifdef DEBUG_PUSH
8220: fprintf(stderr, "PP: entering START_TAG\n");
8221: #endif
8222: }
8223: break;
1.128 daniel 8224: case XML_PARSER_PROLOG:
1.140 daniel 8225: SKIP_BLANKS;
8226: if (in->buf == NULL)
8227: avail = in->length - (in->cur - in->base);
8228: else
8229: avail = in->buf->buffer->use - (in->cur - in->base);
8230: if (avail < 2)
8231: goto done;
8232: cur = in->cur[0];
8233: next = in->cur[1];
8234: if ((cur == '<') && (next == '?')) {
1.143 daniel 8235: if ((!terminate) &&
8236: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8237: goto done;
8238: #ifdef DEBUG_PUSH
8239: fprintf(stderr, "PP: Parsing PI\n");
8240: #endif
8241: xmlParsePI(ctxt);
8242: } else if ((cur == '<') && (next == '!') &&
8243: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8244: if ((!terminate) &&
8245: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8246: goto done;
8247: #ifdef DEBUG_PUSH
8248: fprintf(stderr, "PP: Parsing Comment\n");
8249: #endif
8250: xmlParseComment(ctxt);
8251: ctxt->instate = XML_PARSER_PROLOG;
8252: } else if ((cur == '<') && (next == '!') &&
8253: (avail < 4)) {
8254: goto done;
8255: } else {
8256: ctxt->instate = XML_PARSER_START_TAG;
8257: #ifdef DEBUG_PUSH
8258: fprintf(stderr, "PP: entering START_TAG\n");
8259: #endif
8260: }
8261: break;
8262: case XML_PARSER_EPILOG:
8263: SKIP_BLANKS;
8264: if (in->buf == NULL)
8265: avail = in->length - (in->cur - in->base);
8266: else
8267: avail = in->buf->buffer->use - (in->cur - in->base);
8268: if (avail < 2)
8269: goto done;
8270: cur = in->cur[0];
8271: next = in->cur[1];
8272: if ((cur == '<') && (next == '?')) {
1.143 daniel 8273: if ((!terminate) &&
8274: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8275: goto done;
8276: #ifdef DEBUG_PUSH
8277: fprintf(stderr, "PP: Parsing PI\n");
8278: #endif
8279: xmlParsePI(ctxt);
8280: ctxt->instate = XML_PARSER_EPILOG;
8281: } else if ((cur == '<') && (next == '!') &&
8282: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8283: if ((!terminate) &&
8284: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8285: goto done;
8286: #ifdef DEBUG_PUSH
8287: fprintf(stderr, "PP: Parsing Comment\n");
8288: #endif
8289: xmlParseComment(ctxt);
8290: ctxt->instate = XML_PARSER_EPILOG;
8291: } else if ((cur == '<') && (next == '!') &&
8292: (avail < 4)) {
8293: goto done;
8294: } else {
8295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8296: ctxt->sax->error(ctxt->userData,
8297: "Extra content at the end of the document\n");
8298: ctxt->wellFormed = 0;
8299: ctxt->errNo = XML_ERR_DOCUMENT_END;
8300: ctxt->instate = XML_PARSER_EOF;
8301: #ifdef DEBUG_PUSH
8302: fprintf(stderr, "PP: entering EOF\n");
8303: #endif
1.171 ! daniel 8304: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
! 8305: (!ctxt->disableSAX))
1.140 daniel 8306: ctxt->sax->endDocument(ctxt->userData);
8307: goto done;
8308: }
8309: break;
8310: case XML_PARSER_START_TAG: {
8311: xmlChar *name, *oldname;
8312:
8313: if (avail < 2)
8314: goto done;
8315: cur = in->cur[0];
8316: if (cur != '<') {
8317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8318: ctxt->sax->error(ctxt->userData,
8319: "Start tag expect, '<' not found\n");
8320: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8321: ctxt->wellFormed = 0;
8322: ctxt->instate = XML_PARSER_EOF;
8323: #ifdef DEBUG_PUSH
8324: fprintf(stderr, "PP: entering EOF\n");
8325: #endif
1.171 ! daniel 8326: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
! 8327: (!ctxt->disableSAX))
1.140 daniel 8328: ctxt->sax->endDocument(ctxt->userData);
8329: goto done;
8330: }
1.143 daniel 8331: if ((!terminate) &&
8332: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8333: goto done;
8334: name = xmlParseStartTag(ctxt);
8335: if (name == NULL) {
8336: ctxt->instate = XML_PARSER_EOF;
8337: #ifdef DEBUG_PUSH
8338: fprintf(stderr, "PP: entering EOF\n");
8339: #endif
1.171 ! daniel 8340: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
! 8341: (!ctxt->disableSAX))
1.140 daniel 8342: ctxt->sax->endDocument(ctxt->userData);
8343: goto done;
8344: }
8345: namePush(ctxt, xmlStrdup(name));
8346:
8347: /*
8348: * [ VC: Root Element Type ]
8349: * The Name in the document type declaration must match
8350: * the element type of the root element.
8351: */
8352: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 8353: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 8354: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8355:
8356: /*
8357: * Check for an Empty Element.
8358: */
1.152 daniel 8359: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 8360: SKIP(2);
1.171 ! daniel 8361: if ((ctxt->sax != NULL) &&
! 8362: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 8363: ctxt->sax->endElement(ctxt->userData, name);
8364: xmlFree(name);
8365: oldname = namePop(ctxt);
8366: if (oldname != NULL) {
8367: #ifdef DEBUG_STACK
8368: fprintf(stderr,"Close: popped %s\n", oldname);
8369: #endif
8370: xmlFree(oldname);
8371: }
8372: if (ctxt->name == NULL) {
8373: ctxt->instate = XML_PARSER_EPILOG;
8374: #ifdef DEBUG_PUSH
8375: fprintf(stderr, "PP: entering EPILOG\n");
8376: #endif
8377: } else {
8378: ctxt->instate = XML_PARSER_CONTENT;
8379: #ifdef DEBUG_PUSH
8380: fprintf(stderr, "PP: entering CONTENT\n");
8381: #endif
8382: }
8383: break;
8384: }
1.152 daniel 8385: if (RAW == '>') {
1.140 daniel 8386: NEXT;
8387: } else {
8388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8389: ctxt->sax->error(ctxt->userData,
8390: "Couldn't find end of Start Tag %s\n",
8391: name);
8392: ctxt->wellFormed = 0;
8393: ctxt->errNo = XML_ERR_GT_REQUIRED;
8394:
8395: /*
8396: * end of parsing of this node.
8397: */
8398: nodePop(ctxt);
8399: oldname = namePop(ctxt);
8400: if (oldname != NULL) {
8401: #ifdef DEBUG_STACK
8402: fprintf(stderr,"Close: popped %s\n", oldname);
8403: #endif
8404: xmlFree(oldname);
8405: }
8406: }
8407: xmlFree(name);
8408: ctxt->instate = XML_PARSER_CONTENT;
8409: #ifdef DEBUG_PUSH
8410: fprintf(stderr, "PP: entering CONTENT\n");
8411: #endif
8412: break;
8413: }
1.128 daniel 8414: case XML_PARSER_CONTENT:
1.140 daniel 8415: /*
8416: * Handle preparsed entities and charRef
8417: */
8418: if (ctxt->token != 0) {
8419: xmlChar cur[2] = { 0 , 0 } ;
8420:
8421: cur[0] = (xmlChar) ctxt->token;
1.171 ! daniel 8422: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
! 8423: (ctxt->sax->characters != NULL))
1.140 daniel 8424: ctxt->sax->characters(ctxt->userData, cur, 1);
8425: ctxt->token = 0;
8426: }
8427: if (avail < 2)
8428: goto done;
8429: cur = in->cur[0];
8430: next = in->cur[1];
8431: if ((cur == '<') && (next == '?')) {
1.143 daniel 8432: if ((!terminate) &&
8433: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8434: goto done;
8435: #ifdef DEBUG_PUSH
8436: fprintf(stderr, "PP: Parsing PI\n");
8437: #endif
8438: xmlParsePI(ctxt);
8439: } else if ((cur == '<') && (next == '!') &&
8440: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8441: if ((!terminate) &&
8442: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8443: goto done;
8444: #ifdef DEBUG_PUSH
8445: fprintf(stderr, "PP: Parsing Comment\n");
8446: #endif
8447: xmlParseComment(ctxt);
8448: ctxt->instate = XML_PARSER_CONTENT;
8449: } else if ((cur == '<') && (in->cur[1] == '!') &&
8450: (in->cur[2] == '[') && (NXT(3) == 'C') &&
8451: (in->cur[4] == 'D') && (NXT(5) == 'A') &&
8452: (in->cur[6] == 'T') && (NXT(7) == 'A') &&
8453: (in->cur[8] == '[')) {
8454: SKIP(9);
8455: ctxt->instate = XML_PARSER_CDATA_SECTION;
8456: #ifdef DEBUG_PUSH
8457: fprintf(stderr, "PP: entering CDATA_SECTION\n");
8458: #endif
8459: break;
8460: } else if ((cur == '<') && (next == '!') &&
8461: (avail < 9)) {
8462: goto done;
8463: } else if ((cur == '<') && (next == '/')) {
8464: ctxt->instate = XML_PARSER_END_TAG;
8465: #ifdef DEBUG_PUSH
8466: fprintf(stderr, "PP: entering END_TAG\n");
8467: #endif
8468: break;
8469: } else if (cur == '<') {
8470: ctxt->instate = XML_PARSER_START_TAG;
8471: #ifdef DEBUG_PUSH
8472: fprintf(stderr, "PP: entering START_TAG\n");
8473: #endif
8474: break;
8475: } else if (cur == '&') {
1.143 daniel 8476: if ((!terminate) &&
8477: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 8478: goto done;
8479: #ifdef DEBUG_PUSH
8480: fprintf(stderr, "PP: Parsing Reference\n");
8481: #endif
8482: /* TODO: check generation of subtrees if noent !!! */
8483: xmlParseReference(ctxt);
8484: } else {
1.156 daniel 8485: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 8486: /*
8487: * Goal of the following test is :
8488: * - minimize calls to the SAX 'character' callback
8489: * when they are mergeable
8490: * - handle an problem for isBlank when we only parse
8491: * a sequence of blank chars and the next one is
8492: * not available to check against '<' presence.
8493: * - tries to homogenize the differences in SAX
8494: * callbacks beween the push and pull versions
8495: * of the parser.
8496: */
8497: if ((ctxt->inputNr == 1) &&
8498: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 8499: if ((!terminate) &&
8500: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 8501: goto done;
8502: }
8503: ctxt->checkIndex = 0;
8504: #ifdef DEBUG_PUSH
8505: fprintf(stderr, "PP: Parsing char data\n");
8506: #endif
8507: xmlParseCharData(ctxt, 0);
8508: }
8509: /*
8510: * Pop-up of finished entities.
8511: */
1.152 daniel 8512: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8513: xmlPopInput(ctxt);
8514: break;
8515: case XML_PARSER_CDATA_SECTION: {
8516: /*
8517: * The Push mode need to have the SAX callback for
8518: * cdataBlock merge back contiguous callbacks.
8519: */
8520: int base;
8521:
8522: in = ctxt->input;
8523: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8524: if (base < 0) {
8525: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 ! daniel 8526: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 8527: if (ctxt->sax->cdataBlock != NULL)
8528: ctxt->sax->cdataBlock(ctxt->userData, in->cur,
8529: XML_PARSER_BIG_BUFFER_SIZE);
8530: }
8531: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8532: ctxt->checkIndex = 0;
8533: }
8534: goto done;
8535: } else {
1.171 ! daniel 8536: if ((ctxt->sax != NULL) && (base > 0) &&
! 8537: (!ctxt->disableSAX)) {
1.140 daniel 8538: if (ctxt->sax->cdataBlock != NULL)
8539: ctxt->sax->cdataBlock(ctxt->userData,
8540: in->cur, base);
8541: }
8542: SKIP(base + 3);
8543: ctxt->checkIndex = 0;
8544: ctxt->instate = XML_PARSER_CONTENT;
8545: #ifdef DEBUG_PUSH
8546: fprintf(stderr, "PP: entering CONTENT\n");
8547: #endif
8548: }
8549: break;
8550: }
1.141 daniel 8551: case XML_PARSER_END_TAG:
1.140 daniel 8552: if (avail < 2)
8553: goto done;
1.143 daniel 8554: if ((!terminate) &&
8555: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8556: goto done;
8557: xmlParseEndTag(ctxt);
8558: if (ctxt->name == NULL) {
8559: ctxt->instate = XML_PARSER_EPILOG;
8560: #ifdef DEBUG_PUSH
8561: fprintf(stderr, "PP: entering EPILOG\n");
8562: #endif
8563: } else {
8564: ctxt->instate = XML_PARSER_CONTENT;
8565: #ifdef DEBUG_PUSH
8566: fprintf(stderr, "PP: entering CONTENT\n");
8567: #endif
8568: }
8569: break;
8570: case XML_PARSER_DTD: {
8571: /*
8572: * Sorry but progressive parsing of the internal subset
8573: * is not expected to be supported. We first check that
8574: * the full content of the internal subset is available and
8575: * the parsing is launched only at that point.
8576: * Internal subset ends up with "']' S? '>'" in an unescaped
8577: * section and not in a ']]>' sequence which are conditional
8578: * sections (whoever argued to keep that crap in XML deserve
8579: * a place in hell !).
8580: */
8581: int base, i;
8582: xmlChar *buf;
8583: xmlChar quote = 0;
8584:
8585: base = in->cur - in->base;
8586: if (base < 0) return(0);
8587: if (ctxt->checkIndex > base)
8588: base = ctxt->checkIndex;
8589: buf = in->buf->buffer->content;
8590: for (;base < in->buf->buffer->use;base++) {
8591: if (quote != 0) {
8592: if (buf[base] == quote)
8593: quote = 0;
8594: continue;
8595: }
8596: if (buf[base] == '"') {
8597: quote = '"';
8598: continue;
8599: }
8600: if (buf[base] == '\'') {
8601: quote = '\'';
8602: continue;
8603: }
8604: if (buf[base] == ']') {
8605: if (base +1 >= in->buf->buffer->use)
8606: break;
8607: if (buf[base + 1] == ']') {
8608: /* conditional crap, skip both ']' ! */
8609: base++;
8610: continue;
8611: }
8612: for (i = 0;base + i < in->buf->buffer->use;i++) {
8613: if (buf[base + i] == '>')
8614: goto found_end_int_subset;
8615: }
8616: break;
8617: }
8618: }
8619: /*
8620: * We didn't found the end of the Internal subset
8621: */
8622: if (quote == 0)
8623: ctxt->checkIndex = base;
8624: #ifdef DEBUG_PUSH
8625: if (next == 0)
8626: fprintf(stderr, "PP: lookup of int subset end filed\n");
8627: #endif
8628: goto done;
8629:
8630: found_end_int_subset:
8631: xmlParseInternalSubset(ctxt);
1.166 daniel 8632: ctxt->inSubset = 2;
1.171 ! daniel 8633: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8634: (ctxt->sax->externalSubset != NULL))
8635: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8636: ctxt->extSubSystem, ctxt->extSubURI);
8637: ctxt->inSubset = 0;
1.140 daniel 8638: ctxt->instate = XML_PARSER_PROLOG;
8639: ctxt->checkIndex = 0;
8640: #ifdef DEBUG_PUSH
8641: fprintf(stderr, "PP: entering PROLOG\n");
8642: #endif
8643: break;
8644: }
8645: case XML_PARSER_COMMENT:
8646: fprintf(stderr, "PP: internal error, state == COMMENT\n");
8647: ctxt->instate = XML_PARSER_CONTENT;
8648: #ifdef DEBUG_PUSH
8649: fprintf(stderr, "PP: entering CONTENT\n");
8650: #endif
8651: break;
8652: case XML_PARSER_PI:
8653: fprintf(stderr, "PP: internal error, state == PI\n");
8654: ctxt->instate = XML_PARSER_CONTENT;
8655: #ifdef DEBUG_PUSH
8656: fprintf(stderr, "PP: entering CONTENT\n");
8657: #endif
8658: break;
1.128 daniel 8659: case XML_PARSER_ENTITY_DECL:
1.140 daniel 8660: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
8661: ctxt->instate = XML_PARSER_DTD;
8662: #ifdef DEBUG_PUSH
8663: fprintf(stderr, "PP: entering DTD\n");
8664: #endif
8665: break;
1.128 daniel 8666: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 8667: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
8668: ctxt->instate = XML_PARSER_CONTENT;
8669: #ifdef DEBUG_PUSH
8670: fprintf(stderr, "PP: entering DTD\n");
8671: #endif
8672: break;
1.128 daniel 8673: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 8674: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 8675: ctxt->instate = XML_PARSER_START_TAG;
8676: #ifdef DEBUG_PUSH
8677: fprintf(stderr, "PP: entering START_TAG\n");
8678: #endif
8679: break;
8680: case XML_PARSER_SYSTEM_LITERAL:
8681: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 8682: ctxt->instate = XML_PARSER_START_TAG;
8683: #ifdef DEBUG_PUSH
8684: fprintf(stderr, "PP: entering START_TAG\n");
8685: #endif
8686: break;
1.128 daniel 8687: }
8688: }
1.140 daniel 8689: done:
8690: #ifdef DEBUG_PUSH
8691: fprintf(stderr, "PP: done %d\n", ret);
8692: #endif
1.128 daniel 8693: return(ret);
8694: }
8695:
8696: /**
1.143 daniel 8697: * xmlParseTry:
8698: * @ctxt: an XML parser context
8699: *
8700: * Try to progress on parsing
8701: *
8702: * Returns zero if no parsing was possible
8703: */
8704: int
8705: xmlParseTry(xmlParserCtxtPtr ctxt) {
8706: return(xmlParseTryOrFinish(ctxt, 0));
8707: }
8708:
8709: /**
1.128 daniel 8710: * xmlParseChunk:
8711: * @ctxt: an XML parser context
8712: * @chunk: an char array
8713: * @size: the size in byte of the chunk
8714: * @terminate: last chunk indicator
8715: *
8716: * Parse a Chunk of memory
8717: *
8718: * Returns zero if no error, the xmlParserErrors otherwise.
8719: */
1.140 daniel 8720: int
1.128 daniel 8721: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8722: int terminate) {
1.132 daniel 8723: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8724: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8725: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8726: int cur = ctxt->input->cur - ctxt->input->base;
8727:
1.132 daniel 8728: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8729: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8730: ctxt->input->cur = ctxt->input->base + cur;
8731: #ifdef DEBUG_PUSH
8732: fprintf(stderr, "PP: pushed %d\n", size);
8733: #endif
8734:
1.150 daniel 8735: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8736: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8737: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 8738: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8739: if (terminate) {
1.151 daniel 8740: /*
8741: * Grab the encoding if it was added on-the-fly
8742: */
8743: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8744: (ctxt->myDoc->encoding == NULL)) {
8745: ctxt->myDoc->encoding = ctxt->encoding;
8746: ctxt->encoding = NULL;
8747: }
8748:
8749: /*
8750: * Check for termination
8751: */
1.140 daniel 8752: if ((ctxt->instate != XML_PARSER_EOF) &&
8753: (ctxt->instate != XML_PARSER_EPILOG)) {
8754: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8755: ctxt->sax->error(ctxt->userData,
8756: "Extra content at the end of the document\n");
8757: ctxt->wellFormed = 0;
8758: ctxt->errNo = XML_ERR_DOCUMENT_END;
8759: }
8760: if (ctxt->instate != XML_PARSER_EOF) {
1.171 ! daniel 8761: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
! 8762: (!ctxt->disableSAX))
1.140 daniel 8763: ctxt->sax->endDocument(ctxt->userData);
8764: }
8765: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8766: }
8767: return((xmlParserErrors) ctxt->errNo);
8768: }
8769:
8770: /************************************************************************
8771: * *
1.98 daniel 8772: * I/O front end functions to the parser *
8773: * *
8774: ************************************************************************/
8775:
1.50 daniel 8776: /**
1.140 daniel 8777: * xmlCreatePushParserCtxt :
8778: * @sax: a SAX handler
8779: * @user_data: The user data returned on SAX callbacks
8780: * @chunk: a pointer to an array of chars
8781: * @size: number of chars in the array
8782: * @filename: an optional file name or URI
8783: *
8784: * Create a parser context for using the XML parser in push mode
8785: * To allow content encoding detection, @size should be >= 4
8786: * The value of @filename is used for fetching external entities
8787: * and error/warning reports.
8788: *
8789: * Returns the new parser context or NULL
8790: */
8791: xmlParserCtxtPtr
8792: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8793: const char *chunk, int size, const char *filename) {
8794: xmlParserCtxtPtr ctxt;
8795: xmlParserInputPtr inputStream;
8796: xmlParserInputBufferPtr buf;
8797: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8798:
8799: /*
1.156 daniel 8800: * plug some encoding conversion routines
1.140 daniel 8801: */
8802: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8803: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8804:
8805: buf = xmlAllocParserInputBuffer(enc);
8806: if (buf == NULL) return(NULL);
8807:
8808: ctxt = xmlNewParserCtxt();
8809: if (ctxt == NULL) {
8810: xmlFree(buf);
8811: return(NULL);
8812: }
8813: if (sax != NULL) {
8814: if (ctxt->sax != &xmlDefaultSAXHandler)
8815: xmlFree(ctxt->sax);
8816: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8817: if (ctxt->sax == NULL) {
8818: xmlFree(buf);
8819: xmlFree(ctxt);
8820: return(NULL);
8821: }
8822: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8823: if (user_data != NULL)
8824: ctxt->userData = user_data;
8825: }
8826: if (filename == NULL) {
8827: ctxt->directory = NULL;
8828: } else {
8829: ctxt->directory = xmlParserGetDirectory(filename);
8830: }
8831:
8832: inputStream = xmlNewInputStream(ctxt);
8833: if (inputStream == NULL) {
8834: xmlFreeParserCtxt(ctxt);
8835: return(NULL);
8836: }
8837:
8838: if (filename == NULL)
8839: inputStream->filename = NULL;
8840: else
8841: inputStream->filename = xmlMemStrdup(filename);
8842: inputStream->buf = buf;
8843: inputStream->base = inputStream->buf->buffer->content;
8844: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8845: if (enc != XML_CHAR_ENCODING_NONE) {
8846: xmlSwitchEncoding(ctxt, enc);
8847: }
1.140 daniel 8848:
8849: inputPush(ctxt, inputStream);
8850:
8851: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8852: (ctxt->input->buf != NULL)) {
8853: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8854: #ifdef DEBUG_PUSH
8855: fprintf(stderr, "PP: pushed %d\n", size);
8856: #endif
8857: }
8858:
8859: return(ctxt);
8860: }
8861:
8862: /**
1.86 daniel 8863: * xmlCreateDocParserCtxt :
1.123 daniel 8864: * @cur: a pointer to an array of xmlChar
1.50 daniel 8865: *
1.69 daniel 8866: * Create a parser context for an XML in-memory document.
8867: *
8868: * Returns the new parser context or NULL
1.16 daniel 8869: */
1.69 daniel 8870: xmlParserCtxtPtr
1.123 daniel 8871: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 8872: xmlParserCtxtPtr ctxt;
1.40 daniel 8873: xmlParserInputPtr input;
1.16 daniel 8874:
1.97 daniel 8875: ctxt = xmlNewParserCtxt();
1.16 daniel 8876: if (ctxt == NULL) {
8877: return(NULL);
8878: }
1.96 daniel 8879: input = xmlNewInputStream(ctxt);
1.40 daniel 8880: if (input == NULL) {
1.97 daniel 8881: xmlFreeParserCtxt(ctxt);
1.40 daniel 8882: return(NULL);
8883: }
8884:
8885: input->base = cur;
8886: input->cur = cur;
8887:
8888: inputPush(ctxt, input);
1.69 daniel 8889: return(ctxt);
8890: }
8891:
8892: /**
8893: * xmlSAXParseDoc :
8894: * @sax: the SAX handler block
1.123 daniel 8895: * @cur: a pointer to an array of xmlChar
1.69 daniel 8896: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
8897: * documents
8898: *
8899: * parse an XML in-memory document and build a tree.
8900: * It use the given SAX function block to handle the parsing callback.
8901: * If sax is NULL, fallback to the default DOM tree building routines.
8902: *
8903: * Returns the resulting document tree
8904: */
8905:
8906: xmlDocPtr
1.123 daniel 8907: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 8908: xmlDocPtr ret;
8909: xmlParserCtxtPtr ctxt;
8910:
8911: if (cur == NULL) return(NULL);
1.16 daniel 8912:
8913:
1.69 daniel 8914: ctxt = xmlCreateDocParserCtxt(cur);
8915: if (ctxt == NULL) return(NULL);
1.74 daniel 8916: if (sax != NULL) {
8917: ctxt->sax = sax;
8918: ctxt->userData = NULL;
8919: }
1.69 daniel 8920:
1.16 daniel 8921: xmlParseDocument(ctxt);
1.72 daniel 8922: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 8923: else {
8924: ret = NULL;
1.72 daniel 8925: xmlFreeDoc(ctxt->myDoc);
8926: ctxt->myDoc = NULL;
1.59 daniel 8927: }
1.86 daniel 8928: if (sax != NULL)
8929: ctxt->sax = NULL;
1.69 daniel 8930: xmlFreeParserCtxt(ctxt);
1.16 daniel 8931:
1.1 veillard 8932: return(ret);
8933: }
8934:
1.50 daniel 8935: /**
1.55 daniel 8936: * xmlParseDoc :
1.123 daniel 8937: * @cur: a pointer to an array of xmlChar
1.55 daniel 8938: *
8939: * parse an XML in-memory document and build a tree.
8940: *
1.68 daniel 8941: * Returns the resulting document tree
1.55 daniel 8942: */
8943:
1.69 daniel 8944: xmlDocPtr
1.123 daniel 8945: xmlParseDoc(xmlChar *cur) {
1.59 daniel 8946: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 8947: }
8948:
8949: /**
8950: * xmlSAXParseDTD :
8951: * @sax: the SAX handler block
8952: * @ExternalID: a NAME* containing the External ID of the DTD
8953: * @SystemID: a NAME* containing the URL to the DTD
8954: *
8955: * Load and parse an external subset.
8956: *
8957: * Returns the resulting xmlDtdPtr or NULL in case of error.
8958: */
8959:
8960: xmlDtdPtr
1.123 daniel 8961: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8962: const xmlChar *SystemID) {
1.76 daniel 8963: xmlDtdPtr ret = NULL;
8964: xmlParserCtxtPtr ctxt;
1.83 daniel 8965: xmlParserInputPtr input = NULL;
1.76 daniel 8966: xmlCharEncoding enc;
8967:
8968: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8969:
1.97 daniel 8970: ctxt = xmlNewParserCtxt();
1.76 daniel 8971: if (ctxt == NULL) {
8972: return(NULL);
8973: }
8974:
8975: /*
8976: * Set-up the SAX context
8977: */
8978: if (ctxt == NULL) return(NULL);
8979: if (sax != NULL) {
1.93 veillard 8980: if (ctxt->sax != NULL)
1.119 daniel 8981: xmlFree(ctxt->sax);
1.76 daniel 8982: ctxt->sax = sax;
8983: ctxt->userData = NULL;
8984: }
8985:
8986: /*
8987: * Ask the Entity resolver to load the damn thing
8988: */
8989:
8990: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8991: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8992: if (input == NULL) {
1.86 daniel 8993: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8994: xmlFreeParserCtxt(ctxt);
8995: return(NULL);
8996: }
8997:
8998: /*
1.156 daniel 8999: * plug some encoding conversion routines here.
1.76 daniel 9000: */
9001: xmlPushInput(ctxt, input);
1.156 daniel 9002: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9003: xmlSwitchEncoding(ctxt, enc);
9004:
1.95 veillard 9005: if (input->filename == NULL)
1.156 daniel 9006: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9007: input->line = 1;
9008: input->col = 1;
9009: input->base = ctxt->input->cur;
9010: input->cur = ctxt->input->cur;
9011: input->free = NULL;
9012:
9013: /*
9014: * let's parse that entity knowing it's an external subset.
9015: */
1.79 daniel 9016: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9017:
9018: if (ctxt->myDoc != NULL) {
9019: if (ctxt->wellFormed) {
9020: ret = ctxt->myDoc->intSubset;
9021: ctxt->myDoc->intSubset = NULL;
9022: } else {
9023: ret = NULL;
9024: }
9025: xmlFreeDoc(ctxt->myDoc);
9026: ctxt->myDoc = NULL;
9027: }
1.86 daniel 9028: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9029: xmlFreeParserCtxt(ctxt);
9030:
9031: return(ret);
9032: }
9033:
9034: /**
9035: * xmlParseDTD :
9036: * @ExternalID: a NAME* containing the External ID of the DTD
9037: * @SystemID: a NAME* containing the URL to the DTD
9038: *
9039: * Load and parse an external subset.
9040: *
9041: * Returns the resulting xmlDtdPtr or NULL in case of error.
9042: */
9043:
9044: xmlDtdPtr
1.123 daniel 9045: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9046: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9047: }
9048:
9049: /**
1.144 daniel 9050: * xmlSAXParseBalancedChunk :
9051: * @ctx: an XML parser context (possibly NULL)
9052: * @sax: the SAX handler bloc (possibly NULL)
9053: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9054: * @input: a parser input stream
9055: * @enc: the encoding
9056: *
9057: * Parse a well-balanced chunk of an XML document
9058: * The user has to provide SAX callback block whose routines will be
9059: * called by the parser
9060: * The allowed sequence for the Well Balanced Chunk is the one defined by
9061: * the content production in the XML grammar:
9062: *
9063: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9064: *
9065: * Returns 0 id the chunk is well balanced, -1 in case of args problem and
9066: * the error code otherwise
9067: */
9068:
9069: int
9070: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
9071: void *user_data, xmlParserInputPtr input,
9072: xmlCharEncoding enc) {
9073: xmlParserCtxtPtr ctxt;
9074: int ret;
9075:
9076: if (input == NULL) return(-1);
9077:
9078: if (ctx != NULL)
9079: ctxt = ctx;
9080: else {
9081: ctxt = xmlNewParserCtxt();
9082: if (ctxt == NULL)
9083: return(-1);
9084: if (sax == NULL)
9085: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9086: }
9087:
9088: /*
9089: * Set-up the SAX context
9090: */
9091: if (sax != NULL) {
9092: if (ctxt->sax != NULL)
9093: xmlFree(ctxt->sax);
9094: ctxt->sax = sax;
9095: ctxt->userData = user_data;
9096: }
9097:
9098: /*
9099: * plug some encoding conversion routines here.
9100: */
9101: xmlPushInput(ctxt, input);
9102: if (enc != XML_CHAR_ENCODING_NONE)
9103: xmlSwitchEncoding(ctxt, enc);
9104:
9105: /*
9106: * let's parse that entity knowing it's an external subset.
9107: */
9108: xmlParseContent(ctxt);
9109: ret = ctxt->errNo;
9110:
9111: if (ctx == NULL) {
9112: if (sax != NULL)
9113: ctxt->sax = NULL;
9114: else
9115: xmlFreeDoc(ctxt->myDoc);
9116: xmlFreeParserCtxt(ctxt);
9117: }
9118: return(ret);
9119: }
9120:
9121: /**
9122: * xmlParseBalancedChunk :
9123: * @doc: the document the chunk pertains to
9124: * @node: the node defining the context in which informations will be added
9125: *
9126: * Parse a well-balanced chunk of an XML document present in memory
9127: *
9128: * Returns the resulting list of nodes resulting from the parsing,
9129: * they are not added to @node
9130: */
9131:
9132: xmlNodePtr
9133: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 9134: /* TODO !!! */
9135: return(NULL);
1.144 daniel 9136: }
9137:
9138: /**
9139: * xmlParseBalancedChunkFile :
9140: * @doc: the document the chunk pertains to
9141: *
9142: * Parse a well-balanced chunk of an XML document contained in a file
9143: *
9144: * Returns the resulting list of nodes resulting from the parsing,
9145: * they are not added to @node
9146: */
9147:
9148: xmlNodePtr
9149: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 9150: /* TODO !!! */
9151: return(NULL);
1.144 daniel 9152: }
9153:
9154: /**
1.59 daniel 9155: * xmlRecoverDoc :
1.123 daniel 9156: * @cur: a pointer to an array of xmlChar
1.59 daniel 9157: *
9158: * parse an XML in-memory document and build a tree.
9159: * In the case the document is not Well Formed, a tree is built anyway
9160: *
1.68 daniel 9161: * Returns the resulting document tree
1.59 daniel 9162: */
9163:
1.69 daniel 9164: xmlDocPtr
1.123 daniel 9165: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 9166: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 9167: }
9168:
9169: /**
1.69 daniel 9170: * xmlCreateFileParserCtxt :
1.50 daniel 9171: * @filename: the filename
9172: *
1.69 daniel 9173: * Create a parser context for a file content.
9174: * Automatic support for ZLIB/Compress compressed document is provided
9175: * by default if found at compile-time.
1.50 daniel 9176: *
1.69 daniel 9177: * Returns the new parser context or NULL
1.9 httpng 9178: */
1.69 daniel 9179: xmlParserCtxtPtr
9180: xmlCreateFileParserCtxt(const char *filename)
9181: {
9182: xmlParserCtxtPtr ctxt;
1.40 daniel 9183: xmlParserInputPtr inputStream;
1.91 daniel 9184: xmlParserInputBufferPtr buf;
1.111 daniel 9185: char *directory = NULL;
1.9 httpng 9186:
1.91 daniel 9187: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9188: if (buf == NULL) return(NULL);
1.9 httpng 9189:
1.97 daniel 9190: ctxt = xmlNewParserCtxt();
1.16 daniel 9191: if (ctxt == NULL) {
9192: return(NULL);
9193: }
1.97 daniel 9194:
1.96 daniel 9195: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9196: if (inputStream == NULL) {
1.97 daniel 9197: xmlFreeParserCtxt(ctxt);
1.40 daniel 9198: return(NULL);
9199: }
9200:
1.119 daniel 9201: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9202: inputStream->buf = buf;
9203: inputStream->base = inputStream->buf->buffer->content;
9204: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9205:
1.40 daniel 9206: inputPush(ctxt, inputStream);
1.110 daniel 9207: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9208: directory = xmlParserGetDirectory(filename);
9209: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9210: ctxt->directory = directory;
1.106 daniel 9211:
1.69 daniel 9212: return(ctxt);
9213: }
9214:
9215: /**
9216: * xmlSAXParseFile :
9217: * @sax: the SAX handler block
9218: * @filename: the filename
9219: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9220: * documents
9221: *
9222: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9223: * compressed document is provided by default if found at compile-time.
9224: * It use the given SAX function block to handle the parsing callback.
9225: * If sax is NULL, fallback to the default DOM tree building routines.
9226: *
9227: * Returns the resulting document tree
9228: */
9229:
1.79 daniel 9230: xmlDocPtr
9231: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9232: int recovery) {
9233: xmlDocPtr ret;
9234: xmlParserCtxtPtr ctxt;
1.111 daniel 9235: char *directory = NULL;
1.69 daniel 9236:
9237: ctxt = xmlCreateFileParserCtxt(filename);
9238: if (ctxt == NULL) return(NULL);
1.74 daniel 9239: if (sax != NULL) {
1.93 veillard 9240: if (ctxt->sax != NULL)
1.119 daniel 9241: xmlFree(ctxt->sax);
1.74 daniel 9242: ctxt->sax = sax;
9243: ctxt->userData = NULL;
9244: }
1.106 daniel 9245:
1.110 daniel 9246: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9247: directory = xmlParserGetDirectory(filename);
9248: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 9249: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 9250:
9251: xmlParseDocument(ctxt);
1.40 daniel 9252:
1.72 daniel 9253: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9254: else {
9255: ret = NULL;
1.72 daniel 9256: xmlFreeDoc(ctxt->myDoc);
9257: ctxt->myDoc = NULL;
1.59 daniel 9258: }
1.86 daniel 9259: if (sax != NULL)
9260: ctxt->sax = NULL;
1.69 daniel 9261: xmlFreeParserCtxt(ctxt);
1.20 daniel 9262:
9263: return(ret);
9264: }
9265:
1.55 daniel 9266: /**
9267: * xmlParseFile :
9268: * @filename: the filename
9269: *
9270: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9271: * compressed document is provided by default if found at compile-time.
9272: *
1.68 daniel 9273: * Returns the resulting document tree
1.55 daniel 9274: */
9275:
1.79 daniel 9276: xmlDocPtr
9277: xmlParseFile(const char *filename) {
1.59 daniel 9278: return(xmlSAXParseFile(NULL, filename, 0));
9279: }
9280:
9281: /**
9282: * xmlRecoverFile :
9283: * @filename: the filename
9284: *
9285: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9286: * compressed document is provided by default if found at compile-time.
9287: * In the case the document is not Well Formed, a tree is built anyway
9288: *
1.68 daniel 9289: * Returns the resulting document tree
1.59 daniel 9290: */
9291:
1.79 daniel 9292: xmlDocPtr
9293: xmlRecoverFile(const char *filename) {
1.59 daniel 9294: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 9295: }
1.32 daniel 9296:
1.50 daniel 9297: /**
1.69 daniel 9298: * xmlCreateMemoryParserCtxt :
1.68 daniel 9299: * @buffer: an pointer to a char array
1.127 daniel 9300: * @size: the size of the array
1.50 daniel 9301: *
1.69 daniel 9302: * Create a parser context for an XML in-memory document.
1.50 daniel 9303: *
1.69 daniel 9304: * Returns the new parser context or NULL
1.20 daniel 9305: */
1.69 daniel 9306: xmlParserCtxtPtr
9307: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9308: xmlParserCtxtPtr ctxt;
1.40 daniel 9309: xmlParserInputPtr input;
9310:
1.158 daniel 9311: if (buffer[size - 1] != 0)
9312: buffer[size - 1] = '\0';
1.40 daniel 9313:
1.97 daniel 9314: ctxt = xmlNewParserCtxt();
1.20 daniel 9315: if (ctxt == NULL) {
9316: return(NULL);
9317: }
1.97 daniel 9318:
1.96 daniel 9319: input = xmlNewInputStream(ctxt);
1.40 daniel 9320: if (input == NULL) {
1.97 daniel 9321: xmlFreeParserCtxt(ctxt);
1.40 daniel 9322: return(NULL);
9323: }
1.20 daniel 9324:
1.40 daniel 9325: input->filename = NULL;
9326: input->line = 1;
9327: input->col = 1;
1.96 daniel 9328: input->buf = NULL;
1.91 daniel 9329: input->consumed = 0;
1.75 daniel 9330:
1.116 daniel 9331: input->base = BAD_CAST buffer;
9332: input->cur = BAD_CAST buffer;
1.69 daniel 9333: input->free = NULL;
1.20 daniel 9334:
1.40 daniel 9335: inputPush(ctxt, input);
1.69 daniel 9336: return(ctxt);
9337: }
9338:
9339: /**
9340: * xmlSAXParseMemory :
9341: * @sax: the SAX handler block
9342: * @buffer: an pointer to a char array
1.127 daniel 9343: * @size: the size of the array
9344: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9345: * documents
9346: *
9347: * parse an XML in-memory block and use the given SAX function block
9348: * to handle the parsing callback. If sax is NULL, fallback to the default
9349: * DOM tree building routines.
9350: *
9351: * Returns the resulting document tree
9352: */
9353: xmlDocPtr
9354: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9355: xmlDocPtr ret;
9356: xmlParserCtxtPtr ctxt;
9357:
9358: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9359: if (ctxt == NULL) return(NULL);
1.74 daniel 9360: if (sax != NULL) {
9361: ctxt->sax = sax;
9362: ctxt->userData = NULL;
9363: }
1.20 daniel 9364:
9365: xmlParseDocument(ctxt);
1.40 daniel 9366:
1.72 daniel 9367: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9368: else {
9369: ret = NULL;
1.72 daniel 9370: xmlFreeDoc(ctxt->myDoc);
9371: ctxt->myDoc = NULL;
1.59 daniel 9372: }
1.86 daniel 9373: if (sax != NULL)
9374: ctxt->sax = NULL;
1.69 daniel 9375: xmlFreeParserCtxt(ctxt);
1.16 daniel 9376:
1.9 httpng 9377: return(ret);
1.17 daniel 9378: }
9379:
1.55 daniel 9380: /**
9381: * xmlParseMemory :
1.68 daniel 9382: * @buffer: an pointer to a char array
1.55 daniel 9383: * @size: the size of the array
9384: *
9385: * parse an XML in-memory block and build a tree.
9386: *
1.68 daniel 9387: * Returns the resulting document tree
1.55 daniel 9388: */
9389:
9390: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9391: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9392: }
9393:
9394: /**
9395: * xmlRecoverMemory :
1.68 daniel 9396: * @buffer: an pointer to a char array
1.59 daniel 9397: * @size: the size of the array
9398: *
9399: * parse an XML in-memory block and build a tree.
9400: * In the case the document is not Well Formed, a tree is built anyway
9401: *
1.68 daniel 9402: * Returns the resulting document tree
1.59 daniel 9403: */
9404:
9405: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9406: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9407: }
9408:
9409:
1.50 daniel 9410: /**
9411: * xmlSetupParserForBuffer:
9412: * @ctxt: an XML parser context
1.123 daniel 9413: * @buffer: a xmlChar * buffer
1.50 daniel 9414: * @filename: a file name
9415: *
1.19 daniel 9416: * Setup the parser context to parse a new buffer; Clears any prior
9417: * contents from the parser context. The buffer parameter must not be
9418: * NULL, but the filename parameter can be
9419: */
1.55 daniel 9420: void
1.123 daniel 9421: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 9422: const char* filename)
9423: {
1.96 daniel 9424: xmlParserInputPtr input;
1.40 daniel 9425:
1.96 daniel 9426: input = xmlNewInputStream(ctxt);
9427: if (input == NULL) {
9428: perror("malloc");
1.119 daniel 9429: xmlFree(ctxt);
1.145 daniel 9430: return;
1.96 daniel 9431: }
9432:
9433: xmlClearParserCtxt(ctxt);
9434: if (filename != NULL)
1.119 daniel 9435: input->filename = xmlMemStrdup(filename);
1.96 daniel 9436: input->base = buffer;
9437: input->cur = buffer;
9438: inputPush(ctxt, input);
1.17 daniel 9439: }
9440:
1.123 daniel 9441: /**
9442: * xmlSAXUserParseFile:
9443: * @sax: a SAX handler
9444: * @user_data: The user data returned on SAX callbacks
9445: * @filename: a file name
9446: *
9447: * parse an XML file and call the given SAX handler routines.
9448: * Automatic support for ZLIB/Compress compressed document is provided
9449: *
9450: * Returns 0 in case of success or a error number otherwise
9451: */
1.131 daniel 9452: int
9453: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9454: const char *filename) {
1.123 daniel 9455: int ret = 0;
9456: xmlParserCtxtPtr ctxt;
9457:
9458: ctxt = xmlCreateFileParserCtxt(filename);
9459: if (ctxt == NULL) return -1;
1.134 daniel 9460: if (ctxt->sax != &xmlDefaultSAXHandler)
9461: xmlFree(ctxt->sax);
1.123 daniel 9462: ctxt->sax = sax;
1.140 daniel 9463: if (user_data != NULL)
9464: ctxt->userData = user_data;
1.123 daniel 9465:
9466: xmlParseDocument(ctxt);
9467:
9468: if (ctxt->wellFormed)
9469: ret = 0;
9470: else {
9471: if (ctxt->errNo != 0)
9472: ret = ctxt->errNo;
9473: else
9474: ret = -1;
9475: }
9476: if (sax != NULL)
9477: ctxt->sax = NULL;
9478: xmlFreeParserCtxt(ctxt);
9479:
9480: return ret;
9481: }
9482:
9483: /**
9484: * xmlSAXUserParseMemory:
9485: * @sax: a SAX handler
9486: * @user_data: The user data returned on SAX callbacks
9487: * @buffer: an in-memory XML document input
1.127 daniel 9488: * @size: the length of the XML document in bytes
1.123 daniel 9489: *
9490: * A better SAX parsing routine.
9491: * parse an XML in-memory buffer and call the given SAX handler routines.
9492: *
9493: * Returns 0 in case of success or a error number otherwise
9494: */
9495: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9496: char *buffer, int size) {
9497: int ret = 0;
9498: xmlParserCtxtPtr ctxt;
9499:
9500: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9501: if (ctxt == NULL) return -1;
9502: ctxt->sax = sax;
9503: ctxt->userData = user_data;
9504:
9505: xmlParseDocument(ctxt);
9506:
9507: if (ctxt->wellFormed)
9508: ret = 0;
9509: else {
9510: if (ctxt->errNo != 0)
9511: ret = ctxt->errNo;
9512: else
9513: ret = -1;
9514: }
9515: if (sax != NULL)
9516: ctxt->sax = NULL;
9517: xmlFreeParserCtxt(ctxt);
9518:
9519: return ret;
9520: }
9521:
1.32 daniel 9522:
1.98 daniel 9523: /************************************************************************
9524: * *
1.127 daniel 9525: * Miscellaneous *
1.98 daniel 9526: * *
9527: ************************************************************************/
9528:
1.132 daniel 9529: /**
9530: * xmlCleanupParser:
9531: *
9532: * Cleanup function for the XML parser. It tries to reclaim all
9533: * parsing related global memory allocated for the parser processing.
9534: * It doesn't deallocate any document related memory. Calling this
9535: * function should not prevent reusing the parser.
9536: */
9537:
9538: void
9539: xmlCleanupParser(void) {
9540: xmlCleanupCharEncodingHandlers();
1.133 daniel 9541: xmlCleanupPredefinedEntities();
1.132 daniel 9542: }
1.98 daniel 9543:
1.50 daniel 9544: /**
9545: * xmlParserFindNodeInfo:
9546: * @ctxt: an XML parser context
9547: * @node: an XML node within the tree
9548: *
9549: * Find the parser node info struct for a given node
9550: *
1.68 daniel 9551: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 9552: */
9553: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
9554: const xmlNode* node)
9555: {
9556: unsigned long pos;
9557:
9558: /* Find position where node should be at */
9559: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
9560: if ( ctx->node_seq.buffer[pos].node == node )
9561: return &ctx->node_seq.buffer[pos];
9562: else
9563: return NULL;
9564: }
9565:
9566:
1.50 daniel 9567: /**
9568: * xmlInitNodeInfoSeq :
9569: * @seq: a node info sequence pointer
9570: *
9571: * -- Initialize (set to initial state) node info sequence
1.32 daniel 9572: */
1.55 daniel 9573: void
9574: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9575: {
9576: seq->length = 0;
9577: seq->maximum = 0;
9578: seq->buffer = NULL;
9579: }
9580:
1.50 daniel 9581: /**
9582: * xmlClearNodeInfoSeq :
9583: * @seq: a node info sequence pointer
9584: *
9585: * -- Clear (release memory and reinitialize) node
1.32 daniel 9586: * info sequence
9587: */
1.55 daniel 9588: void
9589: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9590: {
9591: if ( seq->buffer != NULL )
1.119 daniel 9592: xmlFree(seq->buffer);
1.32 daniel 9593: xmlInitNodeInfoSeq(seq);
9594: }
9595:
9596:
1.50 daniel 9597: /**
9598: * xmlParserFindNodeInfoIndex:
9599: * @seq: a node info sequence pointer
9600: * @node: an XML node pointer
9601: *
9602: *
1.32 daniel 9603: * xmlParserFindNodeInfoIndex : Find the index that the info record for
9604: * the given node is or should be at in a sorted sequence
1.68 daniel 9605: *
9606: * Returns a long indicating the position of the record
1.32 daniel 9607: */
9608: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
9609: const xmlNode* node)
9610: {
9611: unsigned long upper, lower, middle;
9612: int found = 0;
9613:
9614: /* Do a binary search for the key */
9615: lower = 1;
9616: upper = seq->length;
9617: middle = 0;
9618: while ( lower <= upper && !found) {
9619: middle = lower + (upper - lower) / 2;
9620: if ( node == seq->buffer[middle - 1].node )
9621: found = 1;
9622: else if ( node < seq->buffer[middle - 1].node )
9623: upper = middle - 1;
9624: else
9625: lower = middle + 1;
9626: }
9627:
9628: /* Return position */
9629: if ( middle == 0 || seq->buffer[middle - 1].node < node )
9630: return middle;
9631: else
9632: return middle - 1;
9633: }
9634:
9635:
1.50 daniel 9636: /**
9637: * xmlParserAddNodeInfo:
9638: * @ctxt: an XML parser context
1.68 daniel 9639: * @info: a node info sequence pointer
1.50 daniel 9640: *
9641: * Insert node info record into the sorted sequence
1.32 daniel 9642: */
1.55 daniel 9643: void
9644: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 9645: const xmlParserNodeInfo* info)
1.32 daniel 9646: {
9647: unsigned long pos;
9648: static unsigned int block_size = 5;
9649:
9650: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 9651: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
9652: if ( pos < ctxt->node_seq.length
9653: && ctxt->node_seq.buffer[pos].node == info->node ) {
9654: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 9655: }
9656:
9657: /* Otherwise, we need to add new node to buffer */
9658: else {
9659: /* Expand buffer by 5 if needed */
1.55 daniel 9660: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 9661: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 9662: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
9663: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 9664:
1.55 daniel 9665: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 9666: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 9667: else
1.119 daniel 9668: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 9669:
9670: if ( tmp_buffer == NULL ) {
1.55 daniel 9671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 9672: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 9673: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 9674: return;
9675: }
1.55 daniel 9676: ctxt->node_seq.buffer = tmp_buffer;
9677: ctxt->node_seq.maximum += block_size;
1.32 daniel 9678: }
9679:
9680: /* If position is not at end, move elements out of the way */
1.55 daniel 9681: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 9682: unsigned long i;
9683:
1.55 daniel 9684: for ( i = ctxt->node_seq.length; i > pos; i-- )
9685: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 9686: }
9687:
9688: /* Copy element and increase length */
1.55 daniel 9689: ctxt->node_seq.buffer[pos] = *info;
9690: ctxt->node_seq.length++;
1.32 daniel 9691: }
9692: }
1.77 daniel 9693:
1.98 daniel 9694:
9695: /**
9696: * xmlSubstituteEntitiesDefault :
9697: * @val: int 0 or 1
9698: *
9699: * Set and return the previous value for default entity support.
9700: * Initially the parser always keep entity references instead of substituting
9701: * entity values in the output. This function has to be used to change the
9702: * default parser behaviour
9703: * SAX::subtituteEntities() has to be used for changing that on a file by
9704: * file basis.
9705: *
9706: * Returns the last value for 0 for no substitution, 1 for substitution.
9707: */
9708:
9709: int
9710: xmlSubstituteEntitiesDefault(int val) {
9711: int old = xmlSubstituteEntitiesDefaultValue;
9712:
9713: xmlSubstituteEntitiesDefaultValue = val;
9714: return(old);
9715: }
1.77 daniel 9716:
Webmaster