Annotation of XML/parser.c, revision 1.173
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.119 daniel 36: #include "xmlmemory.h"
1.14 veillard 37: #include "tree.h"
1.1 veillard 38: #include "parser.h"
1.14 veillard 39: #include "entities.h"
1.75 daniel 40: #include "encoding.h"
1.61 daniel 41: #include "valid.h"
1.69 daniel 42: #include "parserInternals.h"
1.91 daniel 43: #include "xmlIO.h"
1.122 daniel 44: #include "xml-error.h"
1.1 veillard 45:
1.140 daniel 46: #define XML_PARSER_BIG_BUFFER_SIZE 1000
47: #define XML_PARSER_BUFFER_SIZE 100
48:
1.86 daniel 49: const char *xmlParserVersion = LIBXML_VERSION;
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.91 daniel 65: /************************************************************************
66: * *
67: * Input handling functions for progressive parsing *
68: * *
69: ************************************************************************/
70:
71: /* #define DEBUG_INPUT */
1.140 daniel 72: /* #define DEBUG_STACK */
73: /* #define DEBUG_PUSH */
74:
1.91 daniel 75:
1.110 daniel 76: #define INPUT_CHUNK 250
77: /* we need to keep enough input to show errors in context */
78: #define LINE_LEN 80
1.91 daniel 79:
80: #ifdef DEBUG_INPUT
81: #define CHECK_BUFFER(in) check_buffer(in)
82:
83: void check_buffer(xmlParserInputPtr in) {
84: if (in->base != in->buf->buffer->content) {
85: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
86: }
87: if (in->cur < in->base) {
88: fprintf(stderr, "xmlParserInput: cur < base problem\n");
89: }
90: if (in->cur > in->base + in->buf->buffer->use) {
91: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
92: }
93: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
94: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
95: in->buf->buffer->use, in->buf->buffer->size);
96: }
97:
1.110 daniel 98: #else
99: #define CHECK_BUFFER(in)
100: #endif
101:
1.91 daniel 102:
103: /**
104: * xmlParserInputRead:
105: * @in: an XML parser input
106: * @len: an indicative size for the lookahead
107: *
108: * This function refresh the input for the parser. It doesn't try to
109: * preserve pointers to the input buffer, and discard already read data
110: *
1.123 daniel 111: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 112: * end of this entity
113: */
114: int
115: xmlParserInputRead(xmlParserInputPtr in, int len) {
116: int ret;
117: int used;
118: int index;
119:
120: #ifdef DEBUG_INPUT
121: fprintf(stderr, "Read\n");
122: #endif
123: if (in->buf == NULL) return(-1);
124: if (in->base == NULL) return(-1);
125: if (in->cur == NULL) return(-1);
126: if (in->buf->buffer == NULL) return(-1);
127:
128: CHECK_BUFFER(in);
129:
130: used = in->cur - in->buf->buffer->content;
131: ret = xmlBufferShrink(in->buf->buffer, used);
132: if (ret > 0) {
133: in->cur -= ret;
134: in->consumed += ret;
135: }
136: ret = xmlParserInputBufferRead(in->buf, len);
137: if (in->base != in->buf->buffer->content) {
138: /*
139: * the buffer has been realloced
140: */
141: index = in->cur - in->base;
142: in->base = in->buf->buffer->content;
143: in->cur = &in->buf->buffer->content[index];
144: }
145:
146: CHECK_BUFFER(in);
147:
148: return(ret);
149: }
150:
151: /**
152: * xmlParserInputGrow:
153: * @in: an XML parser input
154: * @len: an indicative size for the lookahead
155: *
156: * This function increase the input for the parser. It tries to
157: * preserve pointers to the input buffer, and keep already read data
158: *
1.123 daniel 159: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 160: * end of this entity
161: */
162: int
163: xmlParserInputGrow(xmlParserInputPtr in, int len) {
164: int ret;
165: int index;
166:
167: #ifdef DEBUG_INPUT
168: fprintf(stderr, "Grow\n");
169: #endif
170: if (in->buf == NULL) return(-1);
171: if (in->base == NULL) return(-1);
172: if (in->cur == NULL) return(-1);
173: if (in->buf->buffer == NULL) return(-1);
174:
175: CHECK_BUFFER(in);
176:
177: index = in->cur - in->base;
178: if (in->buf->buffer->use > index + INPUT_CHUNK) {
179:
180: CHECK_BUFFER(in);
181:
182: return(0);
183: }
1.148 daniel 184: if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) ||
185: (in->buf->file != NULL) ||
1.140 daniel 186: #ifdef HAVE_ZLIB_H
187: (in->buf->gzfile != NULL) ||
188: #endif
189: (in->buf->fd >= 0))
190: ret = xmlParserInputBufferGrow(in->buf, len);
191: else
192: return(0);
1.135 daniel 193:
194: /*
195: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
196: * block, but we use it really as an integer to do some
197: * pointer arithmetic. Insure will raise it as a bug but in
198: * that specific case, that's not !
199: */
1.91 daniel 200: if (in->base != in->buf->buffer->content) {
201: /*
202: * the buffer has been realloced
203: */
204: index = in->cur - in->base;
205: in->base = in->buf->buffer->content;
206: in->cur = &in->buf->buffer->content[index];
207: }
208:
209: CHECK_BUFFER(in);
210:
211: return(ret);
212: }
213:
214: /**
215: * xmlParserInputShrink:
216: * @in: an XML parser input
217: *
218: * This function removes used input for the parser.
219: */
220: void
221: xmlParserInputShrink(xmlParserInputPtr in) {
222: int used;
223: int ret;
224: int index;
225:
226: #ifdef DEBUG_INPUT
227: fprintf(stderr, "Shrink\n");
228: #endif
229: if (in->buf == NULL) return;
230: if (in->base == NULL) return;
231: if (in->cur == NULL) return;
232: if (in->buf->buffer == NULL) return;
233:
234: CHECK_BUFFER(in);
235:
236: used = in->cur - in->buf->buffer->content;
237: if (used > INPUT_CHUNK) {
1.110 daniel 238: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 239: if (ret > 0) {
240: in->cur -= ret;
241: in->consumed += ret;
242: }
243: }
244:
245: CHECK_BUFFER(in);
246:
247: if (in->buf->buffer->use > INPUT_CHUNK) {
248: return;
249: }
250: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
251: if (in->base != in->buf->buffer->content) {
252: /*
253: * the buffer has been realloced
254: */
255: index = in->cur - in->base;
256: in->base = in->buf->buffer->content;
257: in->cur = &in->buf->buffer->content[index];
258: }
259:
260: CHECK_BUFFER(in);
261: }
262:
1.45 daniel 263: /************************************************************************
264: * *
265: * Parser stacks related functions and macros *
266: * *
267: ************************************************************************/
1.79 daniel 268:
269: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 270: int xmlDoValidityCheckingDefaultValue = 0;
1.135 daniel 271: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
272: const xmlChar ** str);
1.79 daniel 273:
1.1 veillard 274: /*
1.40 daniel 275: * Generic function for accessing stacks in the Parser Context
1.1 veillard 276: */
277:
1.140 daniel 278: #define PUSH_AND_POP(scope, type, name) \
279: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 280: if (ctxt->name##Nr >= ctxt->name##Max) { \
281: ctxt->name##Max *= 2; \
1.119 daniel 282: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 283: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
284: if (ctxt->name##Tab == NULL) { \
1.31 daniel 285: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 286: return(0); \
1.31 daniel 287: } \
288: } \
1.40 daniel 289: ctxt->name##Tab[ctxt->name##Nr] = value; \
290: ctxt->name = value; \
291: return(ctxt->name##Nr++); \
1.31 daniel 292: } \
1.140 daniel 293: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 294: type ret; \
1.40 daniel 295: if (ctxt->name##Nr <= 0) return(0); \
296: ctxt->name##Nr--; \
1.50 daniel 297: if (ctxt->name##Nr > 0) \
298: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
299: else \
300: ctxt->name = NULL; \
1.69 daniel 301: ret = ctxt->name##Tab[ctxt->name##Nr]; \
302: ctxt->name##Tab[ctxt->name##Nr] = 0; \
303: return(ret); \
1.31 daniel 304: } \
305:
1.140 daniel 306: PUSH_AND_POP(extern, xmlParserInputPtr, input)
307: PUSH_AND_POP(extern, xmlNodePtr, node)
308: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 309:
1.55 daniel 310: /*
311: * Macros for accessing the content. Those should be used only by the parser,
312: * and not exported.
313: *
314: * Dirty macros, i.e. one need to make assumption on the context to use them
315: *
1.123 daniel 316: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 317: * To be used with extreme caution since operations consuming
318: * characters may move the input buffer to a different location !
1.123 daniel 319: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 320: * in ISO-Latin or UTF-8.
1.151 daniel 321: * This should be used internally by the parser
1.55 daniel 322: * only to compare to ASCII values otherwise it would break when
323: * running with UTF-8 encoding.
1.123 daniel 324: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 325: * to compare on ASCII based substring.
1.123 daniel 326: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 327: * strings within the parser.
328: *
1.77 daniel 329: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 330: *
331: * NEXT Skip to the next character, this does the proper decoding
332: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 333: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 334: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 335: */
1.45 daniel 336:
1.152 daniel 337: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 338: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 339: #define NXT(val) ctxt->input->cur[(val)]
340: #define CUR_PTR ctxt->input->cur
1.154 daniel 341:
1.164 daniel 342: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
343: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 344: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
345: if ((*ctxt->input->cur == 0) && \
346: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
347: xmlPopInput(ctxt)
1.164 daniel 348:
1.97 daniel 349: #define SHRINK xmlParserInputShrink(ctxt->input); \
350: if ((*ctxt->input->cur == 0) && \
351: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
352: xmlPopInput(ctxt)
353:
354: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
355: if ((*ctxt->input->cur == 0) && \
356: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
357: xmlPopInput(ctxt)
1.55 daniel 358:
1.155 daniel 359: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 360:
1.151 daniel 361: #define NEXT xmlNextChar(ctxt);
1.154 daniel 362:
1.153 daniel 363: #define NEXTL(l) \
364: if (*(ctxt->input->cur) == '\n') { \
365: ctxt->input->line++; ctxt->input->col = 1; \
366: } else ctxt->input->col++; \
1.154 daniel 367: ctxt->token = 0; ctxt->input->cur += l; \
368: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
369: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
370:
1.152 daniel 371: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 372: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 373:
1.152 daniel 374: #define COPY_BUF(l,b,i,v) \
375: if (l == 1) b[i++] = (xmlChar) v; \
376: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 377:
378: /**
379: * xmlNextChar:
380: * @ctxt: the XML parser context
381: *
382: * Skip to the next char input char.
383: */
1.55 daniel 384:
1.151 daniel 385: void
386: xmlNextChar(xmlParserCtxtPtr ctxt) {
387: if (ctxt->token != 0) ctxt->token = 0;
388: else {
389: if ((*ctxt->input->cur == 0) &&
390: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
391: (ctxt->instate != XML_PARSER_COMMENT)) {
392: /*
393: * If we are at the end of the current entity and
394: * the context allows it, we pop consumed entities
395: * automatically.
396: * TODO: the auto closing should be blocked in other cases
397: */
398: xmlPopInput(ctxt);
399: } else {
400: if (*(ctxt->input->cur) == '\n') {
401: ctxt->input->line++; ctxt->input->col = 1;
402: } else ctxt->input->col++;
403: if (ctxt->encoding == NULL) {
404: /*
405: * We are supposed to handle UTF8, check it's valid
406: * From rfc2044: encoding of the Unicode values on UTF-8:
407: *
408: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
409: * 0000 0000-0000 007F 0xxxxxxx
410: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
411: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
412: *
1.160 daniel 413: * Check for the 0x110000 limit too
1.151 daniel 414: */
415: const unsigned char *cur = ctxt->input->cur;
416: unsigned char c;
1.91 daniel 417:
1.151 daniel 418: c = *cur;
419: if (c & 0x80) {
420: if (cur[1] == 0)
421: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
422: if ((cur[1] & 0xc0) != 0x80)
423: goto encoding_error;
424: if ((c & 0xe0) == 0xe0) {
425: unsigned int val;
426:
427: if (cur[2] == 0)
428: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429: if ((cur[2] & 0xc0) != 0x80)
430: goto encoding_error;
431: if ((c & 0xf0) == 0xf0) {
432: if (cur[3] == 0)
433: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
434: if (((c & 0xf8) != 0xf0) ||
435: ((cur[3] & 0xc0) != 0x80))
436: goto encoding_error;
437: /* 4-byte code */
438: ctxt->input->cur += 4;
439: val = (cur[0] & 0x7) << 18;
440: val |= (cur[1] & 0x3f) << 12;
441: val |= (cur[2] & 0x3f) << 6;
442: val |= cur[3] & 0x3f;
443: } else {
444: /* 3-byte code */
445: ctxt->input->cur += 3;
446: val = (cur[0] & 0xf) << 12;
447: val |= (cur[1] & 0x3f) << 6;
448: val |= cur[2] & 0x3f;
449: }
450: if (((val > 0xd7ff) && (val < 0xe000)) ||
451: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 452: (val >= 0x110000)) {
1.151 daniel 453: if ((ctxt->sax != NULL) &&
454: (ctxt->sax->error != NULL))
455: ctxt->sax->error(ctxt->userData,
456: "Char out of allowed range\n");
457: ctxt->errNo = XML_ERR_INVALID_ENCODING;
458: ctxt->wellFormed = 0;
459: }
460: } else
461: /* 2-byte code */
462: ctxt->input->cur += 2;
463: } else
464: /* 1-byte code */
465: ctxt->input->cur++;
466: } else {
467: /*
468: * Assume it's a fixed lenght encoding (1) with
469: * a compatibke encoding for the ASCII set, since
470: * XML constructs only use < 128 chars
471: */
472: ctxt->input->cur++;
473: }
474: ctxt->nbChars++;
475: if (*ctxt->input->cur == 0)
476: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
477: }
478: }
1.154 daniel 479: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
480: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 481: if ((*ctxt->input->cur == 0) &&
482: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
483: xmlPopInput(ctxt);
1.151 daniel 484: return;
485: encoding_error:
486: /*
487: * If we detect an UTF8 error that probably mean that the
488: * input encoding didn't get properly advertized in the
489: * declaration header. Report the error and switch the encoding
490: * to ISO-Latin-1 (if you don't like this policy, just declare the
491: * encoding !)
492: */
493: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
494: ctxt->sax->error(ctxt->userData,
495: "Input is not proper UTF-8, indicate encoding !\n");
496: ctxt->errNo = XML_ERR_INVALID_ENCODING;
497:
498: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
499: ctxt->input->cur++;
500: return;
501: }
1.42 daniel 502:
1.152 daniel 503: /**
504: * xmlCurrentChar:
505: * @ctxt: the XML parser context
506: * @len: pointer to the length of the char read
507: *
508: * The current char value, if using UTF-8 this may actaully span multiple
509: * bytes in the input buffer.
510: *
511: * Returns the current char value and its lenght
512: */
513:
514: int
515: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
516: if (ctxt->token != 0) {
517: *len = 0;
518: return(ctxt->token);
519: }
520: if (ctxt->encoding == NULL) {
521: /*
522: * We are supposed to handle UTF8, check it's valid
523: * From rfc2044: encoding of the Unicode values on UTF-8:
524: *
525: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
526: * 0000 0000-0000 007F 0xxxxxxx
527: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
528: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
529: *
1.160 daniel 530: * Check for the 0x110000 limit too
1.152 daniel 531: */
532: const unsigned char *cur = ctxt->input->cur;
533: unsigned char c;
534: unsigned int val;
535:
536: c = *cur;
537: if (c & 0x80) {
538: if (cur[1] == 0)
539: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
540: if ((cur[1] & 0xc0) != 0x80)
541: goto encoding_error;
542: if ((c & 0xe0) == 0xe0) {
543:
544: if (cur[2] == 0)
545: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
546: if ((cur[2] & 0xc0) != 0x80)
547: goto encoding_error;
548: if ((c & 0xf0) == 0xf0) {
549: if (cur[3] == 0)
550: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
551: if (((c & 0xf8) != 0xf0) ||
552: ((cur[3] & 0xc0) != 0x80))
553: goto encoding_error;
554: /* 4-byte code */
555: *len = 4;
556: val = (cur[0] & 0x7) << 18;
557: val |= (cur[1] & 0x3f) << 12;
558: val |= (cur[2] & 0x3f) << 6;
559: val |= cur[3] & 0x3f;
560: } else {
561: /* 3-byte code */
562: *len = 3;
563: val = (cur[0] & 0xf) << 12;
564: val |= (cur[1] & 0x3f) << 6;
565: val |= cur[2] & 0x3f;
566: }
567: } else {
568: /* 2-byte code */
569: *len = 2;
570: val = (cur[0] & 0x1f) << 6;
1.168 daniel 571: val |= cur[1] & 0x3f;
1.152 daniel 572: }
573: if (!IS_CHAR(val)) {
574: if ((ctxt->sax != NULL) &&
575: (ctxt->sax->error != NULL))
576: ctxt->sax->error(ctxt->userData,
577: "Char out of allowed range\n");
578: ctxt->errNo = XML_ERR_INVALID_ENCODING;
579: ctxt->wellFormed = 0;
580: }
581: return(val);
582: } else {
583: /* 1-byte code */
584: *len = 1;
585: return((int) *ctxt->input->cur);
586: }
587: }
588: /*
589: * Assume it's a fixed lenght encoding (1) with
590: * a compatibke encoding for the ASCII set, since
591: * XML constructs only use < 128 chars
592: */
593: *len = 1;
594: return((int) *ctxt->input->cur);
595: encoding_error:
596: /*
597: * If we detect an UTF8 error that probably mean that the
598: * input encoding didn't get properly advertized in the
599: * declaration header. Report the error and switch the encoding
600: * to ISO-Latin-1 (if you don't like this policy, just declare the
601: * encoding !)
602: */
603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
604: ctxt->sax->error(ctxt->userData,
605: "Input is not proper UTF-8, indicate encoding !\n");
606: ctxt->errNo = XML_ERR_INVALID_ENCODING;
607:
608: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
609: *len = 1;
610: return((int) *ctxt->input->cur);
611: }
612:
613: /**
1.162 daniel 614: * xmlStringCurrentChar:
615: * @ctxt: the XML parser context
616: * @cur: pointer to the beginning of the char
617: * @len: pointer to the length of the char read
618: *
619: * The current char value, if using UTF-8 this may actaully span multiple
620: * bytes in the input buffer.
621: *
622: * Returns the current char value and its lenght
623: */
624:
625: int
626: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
627: if (ctxt->encoding == NULL) {
628: /*
629: * We are supposed to handle UTF8, check it's valid
630: * From rfc2044: encoding of the Unicode values on UTF-8:
631: *
632: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
633: * 0000 0000-0000 007F 0xxxxxxx
634: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
635: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
636: *
637: * Check for the 0x110000 limit too
638: */
639: unsigned char c;
640: unsigned int val;
641:
642: c = *cur;
643: if (c & 0x80) {
644: if ((cur[1] & 0xc0) != 0x80)
645: goto encoding_error;
646: if ((c & 0xe0) == 0xe0) {
647:
648: if ((cur[2] & 0xc0) != 0x80)
649: goto encoding_error;
650: if ((c & 0xf0) == 0xf0) {
651: if (((c & 0xf8) != 0xf0) ||
652: ((cur[3] & 0xc0) != 0x80))
653: goto encoding_error;
654: /* 4-byte code */
655: *len = 4;
656: val = (cur[0] & 0x7) << 18;
657: val |= (cur[1] & 0x3f) << 12;
658: val |= (cur[2] & 0x3f) << 6;
659: val |= cur[3] & 0x3f;
660: } else {
661: /* 3-byte code */
662: *len = 3;
663: val = (cur[0] & 0xf) << 12;
664: val |= (cur[1] & 0x3f) << 6;
665: val |= cur[2] & 0x3f;
666: }
667: } else {
668: /* 2-byte code */
669: *len = 2;
670: val = (cur[0] & 0x1f) << 6;
671: val |= cur[2] & 0x3f;
672: }
673: if (!IS_CHAR(val)) {
674: if ((ctxt->sax != NULL) &&
675: (ctxt->sax->error != NULL))
676: ctxt->sax->error(ctxt->userData,
677: "Char out of allowed range\n");
678: ctxt->errNo = XML_ERR_INVALID_ENCODING;
679: ctxt->wellFormed = 0;
680: }
681: return(val);
682: } else {
683: /* 1-byte code */
684: *len = 1;
685: return((int) *cur);
686: }
687: }
688: /*
689: * Assume it's a fixed lenght encoding (1) with
690: * a compatibke encoding for the ASCII set, since
691: * XML constructs only use < 128 chars
692: */
693: *len = 1;
694: return((int) *cur);
695: encoding_error:
696: /*
697: * If we detect an UTF8 error that probably mean that the
698: * input encoding didn't get properly advertized in the
699: * declaration header. Report the error and switch the encoding
700: * to ISO-Latin-1 (if you don't like this policy, just declare the
701: * encoding !)
702: */
703: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
704: ctxt->sax->error(ctxt->userData,
705: "Input is not proper UTF-8, indicate encoding !\n");
706: ctxt->errNo = XML_ERR_INVALID_ENCODING;
707:
708: *len = 1;
709: return((int) *cur);
710: }
711:
712: /**
1.152 daniel 713: * xmlCopyChar:
714: * @len: pointer to the length of the char read (or zero)
715: * @array: pointer to an arry of xmlChar
716: * @val: the char value
717: *
718: * append the char value in the array
719: *
720: * Returns the number of xmlChar written
721: */
722:
723: int
724: xmlCopyChar(int len, xmlChar *out, int val) {
725: /*
726: * We are supposed to handle UTF8, check it's valid
727: * From rfc2044: encoding of the Unicode values on UTF-8:
728: *
729: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
730: * 0000 0000-0000 007F 0xxxxxxx
731: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
732: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
733: */
734: if (len == 0) {
735: if (val < 0) len = 0;
1.160 daniel 736: else if (val < 0x80) len = 1;
737: else if (val < 0x800) len = 2;
738: else if (val < 0x10000) len = 3;
739: else if (val < 0x110000) len = 4;
1.152 daniel 740: if (len == 0) {
741: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
742: val);
743: return(0);
744: }
745: }
746: if (len > 1) {
747: int bits;
748:
749: if (val < 0x80) { *out++= val; bits= -6; }
750: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
751: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
752: else { *out++= (val >> 18) | 0xF0; bits= 12; }
753:
754: for ( ; bits >= 0; bits-= 6)
755: *out++= ((val >> bits) & 0x3F) | 0x80 ;
756:
757: return(len);
758: }
759: *out = (xmlChar) val;
760: return(1);
1.155 daniel 761: }
762:
763: /**
764: * xmlSkipBlankChars:
765: * @ctxt: the XML parser context
766: *
767: * skip all blanks character found at that point in the input streams.
768: * It pops up finished entities in the process if allowable at that point.
769: *
770: * Returns the number of space chars skipped
771: */
772:
773: int
774: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
775: int cur, res = 0;
776:
777: do {
778: cur = CUR;
779: while (IS_BLANK(cur)) {
780: NEXT;
781: cur = CUR;
782: res++;
783: }
784: while ((cur == 0) && (ctxt->inputNr > 1) &&
785: (ctxt->instate != XML_PARSER_COMMENT)) {
786: xmlPopInput(ctxt);
787: cur = CUR;
788: }
789: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
790: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
791: } while (IS_BLANK(cur));
792: return(res);
1.152 daniel 793: }
794:
1.97 daniel 795: /************************************************************************
796: * *
797: * Commodity functions to handle entities processing *
798: * *
799: ************************************************************************/
1.40 daniel 800:
1.50 daniel 801: /**
802: * xmlPopInput:
803: * @ctxt: an XML parser context
804: *
1.40 daniel 805: * xmlPopInput: the current input pointed by ctxt->input came to an end
806: * pop it and return the next char.
1.45 daniel 807: *
1.123 daniel 808: * Returns the current xmlChar in the parser context
1.40 daniel 809: */
1.123 daniel 810: xmlChar
1.55 daniel 811: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 812: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 813: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 814: if ((*ctxt->input->cur == 0) &&
815: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
816: return(xmlPopInput(ctxt));
1.40 daniel 817: return(CUR);
818: }
819:
1.50 daniel 820: /**
821: * xmlPushInput:
822: * @ctxt: an XML parser context
823: * @input: an XML parser input fragment (entity, XML fragment ...).
824: *
1.40 daniel 825: * xmlPushInput: switch to a new input stream which is stacked on top
826: * of the previous one(s).
827: */
1.55 daniel 828: void
829: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 830: if (input == NULL) return;
831: inputPush(ctxt, input);
1.164 daniel 832: GROW;
1.40 daniel 833: }
834:
1.50 daniel 835: /**
1.69 daniel 836: * xmlFreeInputStream:
1.127 daniel 837: * @input: an xmlParserInputPtr
1.69 daniel 838: *
839: * Free up an input stream.
840: */
841: void
842: xmlFreeInputStream(xmlParserInputPtr input) {
843: if (input == NULL) return;
844:
1.119 daniel 845: if (input->filename != NULL) xmlFree((char *) input->filename);
846: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 847: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 848: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 849: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 850: input->free((xmlChar *) input->base);
1.93 veillard 851: if (input->buf != NULL)
852: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 853: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 854: xmlFree(input);
1.69 daniel 855: }
856:
857: /**
1.96 daniel 858: * xmlNewInputStream:
859: * @ctxt: an XML parser context
860: *
861: * Create a new input stream structure
862: * Returns the new input stream or NULL
863: */
864: xmlParserInputPtr
865: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
866: xmlParserInputPtr input;
867:
1.119 daniel 868: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 869: if (input == NULL) {
1.123 daniel 870: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 871: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 872: ctxt->sax->error(ctxt->userData,
873: "malloc: couldn't allocate a new input stream\n");
1.123 daniel 874: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 875: return(NULL);
876: }
1.165 daniel 877: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 878: input->line = 1;
879: input->col = 1;
1.167 daniel 880: input->standalone = -1;
1.96 daniel 881: return(input);
882: }
883:
884: /**
1.50 daniel 885: * xmlNewEntityInputStream:
886: * @ctxt: an XML parser context
887: * @entity: an Entity pointer
888: *
1.82 daniel 889: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 890: *
891: * Returns the new input stream or NULL
1.45 daniel 892: */
1.50 daniel 893: xmlParserInputPtr
894: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 895: xmlParserInputPtr input;
896:
897: if (entity == NULL) {
1.123 daniel 898: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 900: ctxt->sax->error(ctxt->userData,
1.45 daniel 901: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 902: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 903: return(NULL);
1.45 daniel 904: }
905: if (entity->content == NULL) {
1.159 daniel 906: switch (entity->etype) {
1.113 daniel 907: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 908: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
910: ctxt->sax->error(ctxt->userData,
911: "xmlNewEntityInputStream unparsed entity !\n");
912: break;
913: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
914: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 915: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 916: (char *) entity->ExternalID, ctxt));
1.113 daniel 917: case XML_INTERNAL_GENERAL_ENTITY:
918: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
919: ctxt->sax->error(ctxt->userData,
920: "Internal entity %s without content !\n", entity->name);
921: break;
922: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 923: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
925: ctxt->sax->error(ctxt->userData,
926: "Internal parameter entity %s without content !\n", entity->name);
927: break;
928: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 929: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
931: ctxt->sax->error(ctxt->userData,
932: "Predefined entity %s without content !\n", entity->name);
933: break;
934: }
1.50 daniel 935: return(NULL);
1.45 daniel 936: }
1.96 daniel 937: input = xmlNewInputStream(ctxt);
1.45 daniel 938: if (input == NULL) {
1.50 daniel 939: return(NULL);
1.45 daniel 940: }
1.156 daniel 941: input->filename = (char *) entity->SystemID;
1.45 daniel 942: input->base = entity->content;
943: input->cur = entity->content;
1.140 daniel 944: input->length = entity->length;
1.50 daniel 945: return(input);
1.45 daniel 946: }
947:
1.59 daniel 948: /**
949: * xmlNewStringInputStream:
950: * @ctxt: an XML parser context
1.96 daniel 951: * @buffer: an memory buffer
1.59 daniel 952: *
953: * Create a new input stream based on a memory buffer.
1.68 daniel 954: * Returns the new input stream
1.59 daniel 955: */
956: xmlParserInputPtr
1.123 daniel 957: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 958: xmlParserInputPtr input;
959:
1.96 daniel 960: if (buffer == NULL) {
1.123 daniel 961: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 963: ctxt->sax->error(ctxt->userData,
1.59 daniel 964: "internal: xmlNewStringInputStream string = NULL\n");
965: return(NULL);
966: }
1.96 daniel 967: input = xmlNewInputStream(ctxt);
1.59 daniel 968: if (input == NULL) {
969: return(NULL);
970: }
1.96 daniel 971: input->base = buffer;
972: input->cur = buffer;
1.140 daniel 973: input->length = xmlStrlen(buffer);
1.59 daniel 974: return(input);
975: }
976:
1.76 daniel 977: /**
978: * xmlNewInputFromFile:
979: * @ctxt: an XML parser context
980: * @filename: the filename to use as entity
981: *
982: * Create a new input stream based on a file.
983: *
984: * Returns the new input stream or NULL in case of error
985: */
986: xmlParserInputPtr
1.79 daniel 987: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 988: xmlParserInputBufferPtr buf;
1.76 daniel 989: xmlParserInputPtr inputStream;
1.111 daniel 990: char *directory = NULL;
1.76 daniel 991:
1.96 daniel 992: if (ctxt == NULL) return(NULL);
1.91 daniel 993: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 994: if (buf == NULL) {
1.140 daniel 995: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 996:
1.94 daniel 997: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
998: #ifdef WIN32
999: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1000: #else
1001: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1002: #endif
1003: buf = xmlParserInputBufferCreateFilename(name,
1004: XML_CHAR_ENCODING_NONE);
1.106 daniel 1005: if (buf != NULL)
1.142 daniel 1006: directory = xmlParserGetDirectory(name);
1.106 daniel 1007: }
1008: if ((buf == NULL) && (ctxt->directory != NULL)) {
1009: #ifdef WIN32
1010: sprintf(name, "%s\\%s", ctxt->directory, filename);
1011: #else
1012: sprintf(name, "%s/%s", ctxt->directory, filename);
1013: #endif
1014: buf = xmlParserInputBufferCreateFilename(name,
1015: XML_CHAR_ENCODING_NONE);
1016: if (buf != NULL)
1.142 daniel 1017: directory = xmlParserGetDirectory(name);
1.106 daniel 1018: }
1019: if (buf == NULL)
1.94 daniel 1020: return(NULL);
1021: }
1022: if (directory == NULL)
1023: directory = xmlParserGetDirectory(filename);
1.76 daniel 1024:
1.96 daniel 1025: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1026: if (inputStream == NULL) {
1.119 daniel 1027: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1028: return(NULL);
1029: }
1030:
1.119 daniel 1031: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1032: inputStream->directory = directory;
1.91 daniel 1033: inputStream->buf = buf;
1.76 daniel 1034:
1.91 daniel 1035: inputStream->base = inputStream->buf->buffer->content;
1036: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1037: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1038: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1039: return(inputStream);
1040: }
1041:
1.77 daniel 1042: /************************************************************************
1043: * *
1.97 daniel 1044: * Commodity functions to handle parser contexts *
1045: * *
1046: ************************************************************************/
1047:
1048: /**
1049: * xmlInitParserCtxt:
1050: * @ctxt: an XML parser context
1051: *
1052: * Initialize a parser context
1053: */
1054:
1055: void
1056: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1057: {
1058: xmlSAXHandler *sax;
1059:
1.168 daniel 1060: xmlDefaultSAXHandlerInit();
1061:
1.119 daniel 1062: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1063: if (sax == NULL) {
1064: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1065: }
1066:
1067: /* Allocate the Input stack */
1.119 daniel 1068: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1069: ctxt->inputNr = 0;
1070: ctxt->inputMax = 5;
1071: ctxt->input = NULL;
1.165 daniel 1072:
1.97 daniel 1073: ctxt->version = NULL;
1074: ctxt->encoding = NULL;
1075: ctxt->standalone = -1;
1.98 daniel 1076: ctxt->hasExternalSubset = 0;
1077: ctxt->hasPErefs = 0;
1.97 daniel 1078: ctxt->html = 0;
1.98 daniel 1079: ctxt->external = 0;
1.140 daniel 1080: ctxt->instate = XML_PARSER_START;
1.97 daniel 1081: ctxt->token = 0;
1.106 daniel 1082: ctxt->directory = NULL;
1.97 daniel 1083:
1084: /* Allocate the Node stack */
1.119 daniel 1085: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1086: ctxt->nodeNr = 0;
1087: ctxt->nodeMax = 10;
1088: ctxt->node = NULL;
1089:
1.140 daniel 1090: /* Allocate the Name stack */
1091: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1092: ctxt->nameNr = 0;
1093: ctxt->nameMax = 10;
1094: ctxt->name = NULL;
1095:
1.160 daniel 1096: if (sax == NULL) {
1097: ctxt->sax = &xmlDefaultSAXHandler;
1098: } else {
1.97 daniel 1099: ctxt->sax = sax;
1100: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1101: }
1102: ctxt->userData = ctxt;
1103: ctxt->myDoc = NULL;
1104: ctxt->wellFormed = 1;
1.99 daniel 1105: ctxt->valid = 1;
1.100 daniel 1106: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1107: ctxt->vctxt.userData = ctxt;
1.149 daniel 1108: if (ctxt->validate) {
1109: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1110: if (xmlGetWarningsDefaultValue == 0)
1111: ctxt->vctxt.warning = NULL;
1112: else
1113: ctxt->vctxt.warning = xmlParserValidityWarning;
1.149 daniel 1114: } else {
1115: ctxt->vctxt.error = NULL;
1116: ctxt->vctxt.warning = NULL;
1117: }
1.97 daniel 1118: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1119: ctxt->record_info = 0;
1.135 daniel 1120: ctxt->nbChars = 0;
1.140 daniel 1121: ctxt->checkIndex = 0;
1122: ctxt->errNo = XML_ERR_OK;
1.97 daniel 1123: xmlInitNodeInfoSeq(&ctxt->node_seq);
1124: }
1125:
1126: /**
1127: * xmlFreeParserCtxt:
1128: * @ctxt: an XML parser context
1129: *
1130: * Free all the memory used by a parser context. However the parsed
1131: * document in ctxt->myDoc is not freed.
1132: */
1133:
1134: void
1135: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1136: {
1137: xmlParserInputPtr input;
1.140 daniel 1138: xmlChar *oldname;
1.97 daniel 1139:
1140: if (ctxt == NULL) return;
1141:
1142: while ((input = inputPop(ctxt)) != NULL) {
1143: xmlFreeInputStream(input);
1144: }
1.140 daniel 1145: while ((oldname = namePop(ctxt)) != NULL) {
1146: xmlFree(oldname);
1147: }
1148: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1149: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1150: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1151: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1152: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1153: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1154: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1155: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.97 daniel 1156: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1157: xmlFree(ctxt->sax);
1158: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1159: xmlFree(ctxt);
1.97 daniel 1160: }
1161:
1162: /**
1163: * xmlNewParserCtxt:
1164: *
1165: * Allocate and initialize a new parser context.
1166: *
1167: * Returns the xmlParserCtxtPtr or NULL
1168: */
1169:
1170: xmlParserCtxtPtr
1171: xmlNewParserCtxt()
1172: {
1173: xmlParserCtxtPtr ctxt;
1174:
1.119 daniel 1175: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1176: if (ctxt == NULL) {
1177: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1178: perror("malloc");
1179: return(NULL);
1180: }
1.165 daniel 1181: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1182: xmlInitParserCtxt(ctxt);
1183: return(ctxt);
1184: }
1185:
1186: /**
1187: * xmlClearParserCtxt:
1188: * @ctxt: an XML parser context
1189: *
1190: * Clear (release owned resources) and reinitialize a parser context
1191: */
1192:
1193: void
1194: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1195: {
1196: xmlClearNodeInfoSeq(&ctxt->node_seq);
1197: xmlInitParserCtxt(ctxt);
1198: }
1199:
1200: /************************************************************************
1201: * *
1.77 daniel 1202: * Commodity functions to handle entities *
1203: * *
1204: ************************************************************************/
1205:
1.97 daniel 1206:
1207: /**
1208: * xmlParseCharRef:
1209: * @ctxt: an XML parser context
1210: *
1211: * parse Reference declarations
1212: *
1213: * [66] CharRef ::= '&#' [0-9]+ ';' |
1214: * '&#x' [0-9a-fA-F]+ ';'
1215: *
1.98 daniel 1216: * [ WFC: Legal Character ]
1217: * Characters referred to using character references must match the
1218: * production for Char.
1219: *
1.135 daniel 1220: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1221: */
1.97 daniel 1222: int
1223: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1224: int val = 0;
1225:
1.111 daniel 1226: if (ctxt->token != 0) {
1227: val = ctxt->token;
1228: ctxt->token = 0;
1229: return(val);
1230: }
1.152 daniel 1231: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1232: (NXT(2) == 'x')) {
1233: SKIP(3);
1.152 daniel 1234: while (RAW != ';') {
1235: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1236: val = val * 16 + (CUR - '0');
1.152 daniel 1237: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1238: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1239: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1240: val = val * 16 + (CUR - 'A') + 10;
1241: else {
1.123 daniel 1242: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1243: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1244: ctxt->sax->error(ctxt->userData,
1245: "xmlParseCharRef: invalid hexadecimal value\n");
1246: ctxt->wellFormed = 0;
1247: val = 0;
1248: break;
1249: }
1250: NEXT;
1251: }
1.164 daniel 1252: if (RAW == ';') {
1253: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1254: ctxt->nbChars ++;
1255: ctxt->input->cur++;
1256: }
1.152 daniel 1257: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1258: SKIP(2);
1.152 daniel 1259: while (RAW != ';') {
1260: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1261: val = val * 10 + (CUR - '0');
1262: else {
1.123 daniel 1263: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1264: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1265: ctxt->sax->error(ctxt->userData,
1266: "xmlParseCharRef: invalid decimal value\n");
1267: ctxt->wellFormed = 0;
1268: val = 0;
1269: break;
1270: }
1271: NEXT;
1272: }
1.164 daniel 1273: if (RAW == ';') {
1274: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1275: ctxt->nbChars ++;
1276: ctxt->input->cur++;
1277: }
1.97 daniel 1278: } else {
1.123 daniel 1279: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1281: ctxt->sax->error(ctxt->userData,
1282: "xmlParseCharRef: invalid value\n");
1.97 daniel 1283: ctxt->wellFormed = 0;
1284: }
1.98 daniel 1285:
1.97 daniel 1286: /*
1.98 daniel 1287: * [ WFC: Legal Character ]
1288: * Characters referred to using character references must match the
1289: * production for Char.
1.97 daniel 1290: */
1291: if (IS_CHAR(val)) {
1292: return(val);
1293: } else {
1.123 daniel 1294: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1296: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1297: val);
1298: ctxt->wellFormed = 0;
1299: }
1300: return(0);
1.77 daniel 1301: }
1302:
1.96 daniel 1303: /**
1.135 daniel 1304: * xmlParseStringCharRef:
1305: * @ctxt: an XML parser context
1306: * @str: a pointer to an index in the string
1307: *
1308: * parse Reference declarations, variant parsing from a string rather
1309: * than an an input flow.
1310: *
1311: * [66] CharRef ::= '&#' [0-9]+ ';' |
1312: * '&#x' [0-9a-fA-F]+ ';'
1313: *
1314: * [ WFC: Legal Character ]
1315: * Characters referred to using character references must match the
1316: * production for Char.
1317: *
1318: * Returns the value parsed (as an int), 0 in case of error, str will be
1319: * updated to the current value of the index
1320: */
1321: int
1322: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1323: const xmlChar *ptr;
1324: xmlChar cur;
1325: int val = 0;
1326:
1327: if ((str == NULL) || (*str == NULL)) return(0);
1328: ptr = *str;
1329: cur = *ptr;
1.137 daniel 1330: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1331: ptr += 3;
1332: cur = *ptr;
1333: while (cur != ';') {
1334: if ((cur >= '0') && (cur <= '9'))
1335: val = val * 16 + (cur - '0');
1336: else if ((cur >= 'a') && (cur <= 'f'))
1337: val = val * 16 + (cur - 'a') + 10;
1338: else if ((cur >= 'A') && (cur <= 'F'))
1339: val = val * 16 + (cur - 'A') + 10;
1340: else {
1341: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1343: ctxt->sax->error(ctxt->userData,
1344: "xmlParseCharRef: invalid hexadecimal value\n");
1345: ctxt->wellFormed = 0;
1346: val = 0;
1347: break;
1348: }
1349: ptr++;
1350: cur = *ptr;
1351: }
1352: if (cur == ';')
1353: ptr++;
1.145 daniel 1354: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1355: ptr += 2;
1356: cur = *ptr;
1357: while (cur != ';') {
1358: if ((cur >= '0') && (cur <= '9'))
1359: val = val * 10 + (cur - '0');
1360: else {
1361: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1363: ctxt->sax->error(ctxt->userData,
1364: "xmlParseCharRef: invalid decimal value\n");
1365: ctxt->wellFormed = 0;
1366: val = 0;
1367: break;
1368: }
1369: ptr++;
1370: cur = *ptr;
1371: }
1372: if (cur == ';')
1373: ptr++;
1374: } else {
1375: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1376: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1377: ctxt->sax->error(ctxt->userData,
1378: "xmlParseCharRef: invalid value\n");
1379: ctxt->wellFormed = 0;
1380: return(0);
1381: }
1382: *str = ptr;
1383:
1384: /*
1385: * [ WFC: Legal Character ]
1386: * Characters referred to using character references must match the
1387: * production for Char.
1388: */
1389: if (IS_CHAR(val)) {
1390: return(val);
1391: } else {
1392: ctxt->errNo = XML_ERR_INVALID_CHAR;
1393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1394: ctxt->sax->error(ctxt->userData,
1395: "CharRef: invalid xmlChar value %d\n", val);
1396: ctxt->wellFormed = 0;
1397: }
1398: return(0);
1399: }
1400:
1401: /**
1.96 daniel 1402: * xmlParserHandleReference:
1403: * @ctxt: the parser context
1404: *
1.97 daniel 1405: * [67] Reference ::= EntityRef | CharRef
1406: *
1.96 daniel 1407: * [68] EntityRef ::= '&' Name ';'
1408: *
1.98 daniel 1409: * [ WFC: Entity Declared ]
1410: * the Name given in the entity reference must match that in an entity
1411: * declaration, except that well-formed documents need not declare any
1412: * of the following entities: amp, lt, gt, apos, quot.
1413: *
1414: * [ WFC: Parsed Entity ]
1415: * An entity reference must not contain the name of an unparsed entity
1416: *
1.97 daniel 1417: * [66] CharRef ::= '&#' [0-9]+ ';' |
1418: * '&#x' [0-9a-fA-F]+ ';'
1419: *
1.96 daniel 1420: * A PEReference may have been detectect in the current input stream
1421: * the handling is done accordingly to
1422: * http://www.w3.org/TR/REC-xml#entproc
1423: */
1424: void
1425: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1426: xmlParserInputPtr input;
1.123 daniel 1427: xmlChar *name;
1.97 daniel 1428: xmlEntityPtr ent = NULL;
1429:
1.126 daniel 1430: if (ctxt->token != 0) {
1431: return;
1432: }
1.152 daniel 1433: if (RAW != '&') return;
1.97 daniel 1434: GROW;
1.152 daniel 1435: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1436: switch(ctxt->instate) {
1.140 daniel 1437: case XML_PARSER_ENTITY_DECL:
1438: case XML_PARSER_PI:
1.109 daniel 1439: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1440: case XML_PARSER_COMMENT:
1.168 daniel 1441: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1442: /* we just ignore it there */
1443: return;
1444: case XML_PARSER_START_TAG:
1.109 daniel 1445: return;
1.140 daniel 1446: case XML_PARSER_END_TAG:
1.97 daniel 1447: return;
1448: case XML_PARSER_EOF:
1.123 daniel 1449: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1450: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1451: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1452: ctxt->wellFormed = 0;
1453: return;
1454: case XML_PARSER_PROLOG:
1.140 daniel 1455: case XML_PARSER_START:
1456: case XML_PARSER_MISC:
1.123 daniel 1457: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1459: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1460: ctxt->wellFormed = 0;
1461: return;
1462: case XML_PARSER_EPILOG:
1.123 daniel 1463: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1465: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1466: ctxt->wellFormed = 0;
1467: return;
1468: case XML_PARSER_DTD:
1.123 daniel 1469: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1470: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1471: ctxt->sax->error(ctxt->userData,
1472: "CharRef are forbiden in DTDs!\n");
1473: ctxt->wellFormed = 0;
1474: return;
1475: case XML_PARSER_ENTITY_VALUE:
1476: /*
1477: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1478: * substitution here since we need the literal
1.97 daniel 1479: * entity value to be able to save the internal
1480: * subset of the document.
1481: * This will be handled by xmlDecodeEntities
1482: */
1483: return;
1484: case XML_PARSER_CONTENT:
1485: case XML_PARSER_ATTRIBUTE_VALUE:
1486: ctxt->token = xmlParseCharRef(ctxt);
1487: return;
1488: }
1489: return;
1490: }
1491:
1492: switch(ctxt->instate) {
1.109 daniel 1493: case XML_PARSER_CDATA_SECTION:
1494: return;
1.140 daniel 1495: case XML_PARSER_PI:
1.97 daniel 1496: case XML_PARSER_COMMENT:
1.168 daniel 1497: case XML_PARSER_SYSTEM_LITERAL:
1498: case XML_PARSER_CONTENT:
1.97 daniel 1499: return;
1.140 daniel 1500: case XML_PARSER_START_TAG:
1501: return;
1502: case XML_PARSER_END_TAG:
1503: return;
1.97 daniel 1504: case XML_PARSER_EOF:
1.123 daniel 1505: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1506: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1507: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1508: ctxt->wellFormed = 0;
1509: return;
1510: case XML_PARSER_PROLOG:
1.140 daniel 1511: case XML_PARSER_START:
1512: case XML_PARSER_MISC:
1.123 daniel 1513: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1515: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1516: ctxt->wellFormed = 0;
1517: return;
1518: case XML_PARSER_EPILOG:
1.123 daniel 1519: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1521: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1522: ctxt->wellFormed = 0;
1523: return;
1524: case XML_PARSER_ENTITY_VALUE:
1525: /*
1526: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1527: * substitution here since we need the literal
1.97 daniel 1528: * entity value to be able to save the internal
1529: * subset of the document.
1530: * This will be handled by xmlDecodeEntities
1531: */
1532: return;
1533: case XML_PARSER_ATTRIBUTE_VALUE:
1534: /*
1535: * NOTE: in the case of attributes values, we don't do the
1536: * substitution here unless we are in a mode where
1537: * the parser is explicitely asked to substitute
1538: * entities. The SAX callback is called with values
1539: * without entity substitution.
1540: * This will then be handled by xmlDecodeEntities
1541: */
1.113 daniel 1542: return;
1.97 daniel 1543: case XML_PARSER_ENTITY_DECL:
1544: /*
1545: * we just ignore it there
1546: * the substitution will be done once the entity is referenced
1547: */
1548: return;
1549: case XML_PARSER_DTD:
1.123 daniel 1550: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1552: ctxt->sax->error(ctxt->userData,
1553: "Entity references are forbiden in DTDs!\n");
1554: ctxt->wellFormed = 0;
1555: return;
1556: }
1557:
1558: NEXT;
1559: name = xmlScanName(ctxt);
1560: if (name == NULL) {
1.123 daniel 1561: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1562: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1563: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1564: ctxt->wellFormed = 0;
1565: ctxt->token = '&';
1566: return;
1567: }
1568: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1569: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1570: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1571: ctxt->sax->error(ctxt->userData,
1572: "Entity reference: ';' expected\n");
1573: ctxt->wellFormed = 0;
1574: ctxt->token = '&';
1.119 daniel 1575: xmlFree(name);
1.97 daniel 1576: return;
1577: }
1578: SKIP(xmlStrlen(name) + 1);
1579: if (ctxt->sax != NULL) {
1580: if (ctxt->sax->getEntity != NULL)
1581: ent = ctxt->sax->getEntity(ctxt->userData, name);
1582: }
1.98 daniel 1583:
1584: /*
1585: * [ WFC: Entity Declared ]
1586: * the Name given in the entity reference must match that in an entity
1587: * declaration, except that well-formed documents need not declare any
1588: * of the following entities: amp, lt, gt, apos, quot.
1589: */
1.97 daniel 1590: if (ent == NULL)
1591: ent = xmlGetPredefinedEntity(name);
1592: if (ent == NULL) {
1.123 daniel 1593: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1594: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1595: ctxt->sax->error(ctxt->userData,
1.98 daniel 1596: "Entity reference: entity %s not declared\n",
1597: name);
1.97 daniel 1598: ctxt->wellFormed = 0;
1.119 daniel 1599: xmlFree(name);
1.97 daniel 1600: return;
1601: }
1.98 daniel 1602:
1603: /*
1604: * [ WFC: Parsed Entity ]
1605: * An entity reference must not contain the name of an unparsed entity
1606: */
1.159 daniel 1607: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1608: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1609: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1610: ctxt->sax->error(ctxt->userData,
1611: "Entity reference to unparsed entity %s\n", name);
1612: ctxt->wellFormed = 0;
1613: }
1614:
1.159 daniel 1615: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1616: ctxt->token = ent->content[0];
1.119 daniel 1617: xmlFree(name);
1.97 daniel 1618: return;
1619: }
1620: input = xmlNewEntityInputStream(ctxt, ent);
1621: xmlPushInput(ctxt, input);
1.119 daniel 1622: xmlFree(name);
1.96 daniel 1623: return;
1624: }
1625:
1626: /**
1627: * xmlParserHandlePEReference:
1628: * @ctxt: the parser context
1629: *
1630: * [69] PEReference ::= '%' Name ';'
1631: *
1.98 daniel 1632: * [ WFC: No Recursion ]
1633: * TODO A parsed entity must not contain a recursive
1634: * reference to itself, either directly or indirectly.
1635: *
1636: * [ WFC: Entity Declared ]
1637: * In a document without any DTD, a document with only an internal DTD
1638: * subset which contains no parameter entity references, or a document
1639: * with "standalone='yes'", ... ... The declaration of a parameter
1640: * entity must precede any reference to it...
1641: *
1642: * [ VC: Entity Declared ]
1643: * In a document with an external subset or external parameter entities
1644: * with "standalone='no'", ... ... The declaration of a parameter entity
1645: * must precede any reference to it...
1646: *
1647: * [ WFC: In DTD ]
1648: * Parameter-entity references may only appear in the DTD.
1649: * NOTE: misleading but this is handled.
1650: *
1651: * A PEReference may have been detected in the current input stream
1.96 daniel 1652: * the handling is done accordingly to
1653: * http://www.w3.org/TR/REC-xml#entproc
1654: * i.e.
1655: * - Included in literal in entity values
1656: * - Included as Paraemeter Entity reference within DTDs
1657: */
1658: void
1659: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1660: xmlChar *name;
1.96 daniel 1661: xmlEntityPtr entity = NULL;
1662: xmlParserInputPtr input;
1663:
1.126 daniel 1664: if (ctxt->token != 0) {
1665: return;
1666: }
1.152 daniel 1667: if (RAW != '%') return;
1.96 daniel 1668: switch(ctxt->instate) {
1.109 daniel 1669: case XML_PARSER_CDATA_SECTION:
1670: return;
1.97 daniel 1671: case XML_PARSER_COMMENT:
1672: return;
1.140 daniel 1673: case XML_PARSER_START_TAG:
1674: return;
1675: case XML_PARSER_END_TAG:
1676: return;
1.96 daniel 1677: case XML_PARSER_EOF:
1.123 daniel 1678: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1679: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1680: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1681: ctxt->wellFormed = 0;
1682: return;
1683: case XML_PARSER_PROLOG:
1.140 daniel 1684: case XML_PARSER_START:
1685: case XML_PARSER_MISC:
1.123 daniel 1686: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1687: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1688: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1689: ctxt->wellFormed = 0;
1690: return;
1.97 daniel 1691: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1692: case XML_PARSER_CONTENT:
1693: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1694: case XML_PARSER_PI:
1.168 daniel 1695: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1696: /* we just ignore it there */
1697: return;
1698: case XML_PARSER_EPILOG:
1.123 daniel 1699: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1701: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1702: ctxt->wellFormed = 0;
1703: return;
1.97 daniel 1704: case XML_PARSER_ENTITY_VALUE:
1705: /*
1706: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1707: * substitution here since we need the literal
1.97 daniel 1708: * entity value to be able to save the internal
1709: * subset of the document.
1710: * This will be handled by xmlDecodeEntities
1711: */
1712: return;
1.96 daniel 1713: case XML_PARSER_DTD:
1.98 daniel 1714: /*
1715: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1716: * In the internal DTD subset, parameter-entity references
1717: * can occur only where markup declarations can occur, not
1718: * within markup declarations.
1719: * In that case this is handled in xmlParseMarkupDecl
1720: */
1721: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1722: return;
1.96 daniel 1723: }
1724:
1725: NEXT;
1726: name = xmlParseName(ctxt);
1727: if (name == NULL) {
1.123 daniel 1728: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1730: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1731: ctxt->wellFormed = 0;
1732: } else {
1.152 daniel 1733: if (RAW == ';') {
1.96 daniel 1734: NEXT;
1.98 daniel 1735: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1736: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1737: if (entity == NULL) {
1.98 daniel 1738:
1739: /*
1740: * [ WFC: Entity Declared ]
1741: * In a document without any DTD, a document with only an
1742: * internal DTD subset which contains no parameter entity
1743: * references, or a document with "standalone='yes'", ...
1744: * ... The declaration of a parameter entity must precede
1745: * any reference to it...
1746: */
1747: if ((ctxt->standalone == 1) ||
1748: ((ctxt->hasExternalSubset == 0) &&
1749: (ctxt->hasPErefs == 0))) {
1750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1751: ctxt->sax->error(ctxt->userData,
1752: "PEReference: %%%s; not found\n", name);
1753: ctxt->wellFormed = 0;
1754: } else {
1755: /*
1756: * [ VC: Entity Declared ]
1757: * In a document with an external subset or external
1758: * parameter entities with "standalone='no'", ...
1759: * ... The declaration of a parameter entity must precede
1760: * any reference to it...
1761: */
1762: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1763: ctxt->sax->warning(ctxt->userData,
1764: "PEReference: %%%s; not found\n", name);
1765: ctxt->valid = 0;
1766: }
1.96 daniel 1767: } else {
1.159 daniel 1768: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1769: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1770: /*
1.156 daniel 1771: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1772: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1773: */
1774: input = xmlNewEntityInputStream(ctxt, entity);
1775: xmlPushInput(ctxt, input);
1.164 daniel 1776: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1777: (RAW == '<') && (NXT(1) == '?') &&
1778: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1779: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1780: xmlParseTextDecl(ctxt);
1.164 daniel 1781: }
1782: if (ctxt->token == 0)
1783: ctxt->token = ' ';
1.96 daniel 1784: } else {
1785: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1786: ctxt->sax->error(ctxt->userData,
1787: "xmlHandlePEReference: %s is not a parameter entity\n",
1788: name);
1789: ctxt->wellFormed = 0;
1790: }
1791: }
1792: } else {
1.123 daniel 1793: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1795: ctxt->sax->error(ctxt->userData,
1796: "xmlHandlePEReference: expecting ';'\n");
1797: ctxt->wellFormed = 0;
1798: }
1.119 daniel 1799: xmlFree(name);
1.97 daniel 1800: }
1801: }
1802:
1803: /*
1804: * Macro used to grow the current buffer.
1805: */
1806: #define growBuffer(buffer) { \
1807: buffer##_size *= 2; \
1.145 daniel 1808: buffer = (xmlChar *) \
1809: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1810: if (buffer == NULL) { \
1811: perror("realloc failed"); \
1.145 daniel 1812: return(NULL); \
1.97 daniel 1813: } \
1.96 daniel 1814: }
1.77 daniel 1815:
1816: /**
1817: * xmlDecodeEntities:
1818: * @ctxt: the parser context
1819: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1820: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1821: * @end: an end marker xmlChar, 0 if none
1822: * @end2: an end marker xmlChar, 0 if none
1823: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1824: *
1825: * [67] Reference ::= EntityRef | CharRef
1826: *
1827: * [69] PEReference ::= '%' Name ';'
1828: *
1829: * Returns A newly allocated string with the substitution done. The caller
1830: * must deallocate it !
1831: */
1.123 daniel 1832: xmlChar *
1.77 daniel 1833: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1834: xmlChar end, xmlChar end2, xmlChar end3) {
1835: xmlChar *buffer = NULL;
1.78 daniel 1836: int buffer_size = 0;
1.161 daniel 1837: int nbchars = 0;
1.78 daniel 1838:
1.123 daniel 1839: xmlChar *current = NULL;
1.77 daniel 1840: xmlEntityPtr ent;
1841: unsigned int max = (unsigned int) len;
1.161 daniel 1842: int c,l;
1.77 daniel 1843:
1844: /*
1845: * allocate a translation buffer.
1846: */
1.140 daniel 1847: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 1848: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1849: if (buffer == NULL) {
1850: perror("xmlDecodeEntities: malloc failed");
1851: return(NULL);
1852: }
1853:
1.78 daniel 1854: /*
1855: * Ok loop until we reach one of the ending char or a size limit.
1856: */
1.161 daniel 1857: c = CUR_CHAR(l);
1858: while ((nbchars < max) && (c != end) &&
1859: (c != end2) && (c != end3)) {
1.77 daniel 1860:
1.161 daniel 1861: if (c == 0) break;
1862: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 1863: int val = xmlParseCharRef(ctxt);
1.161 daniel 1864: COPY_BUF(0,buffer,nbchars,val);
1865: NEXTL(l);
1866: } else if ((c == '&') && (ctxt->token != '&') &&
1867: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 1868: ent = xmlParseEntityRef(ctxt);
1869: if ((ent != NULL) &&
1870: (ctxt->replaceEntities != 0)) {
1871: current = ent->content;
1872: while (*current != 0) {
1.161 daniel 1873: buffer[nbchars++] = *current++;
1874: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1875: growBuffer(buffer);
1.77 daniel 1876: }
1877: }
1.98 daniel 1878: } else if (ent != NULL) {
1.123 daniel 1879: const xmlChar *cur = ent->name;
1.98 daniel 1880:
1.161 daniel 1881: buffer[nbchars++] = '&';
1882: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1883: growBuffer(buffer);
1884: }
1.161 daniel 1885: while (*cur != 0) {
1886: buffer[nbchars++] = *cur++;
1887: }
1888: buffer[nbchars++] = ';';
1.77 daniel 1889: }
1.161 daniel 1890: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 1891: /*
1.77 daniel 1892: * a PEReference induce to switch the entity flow,
1893: * we break here to flush the current set of chars
1894: * parsed if any. We will be called back later.
1.97 daniel 1895: */
1.91 daniel 1896: if (nbchars != 0) break;
1.77 daniel 1897:
1898: xmlParsePEReference(ctxt);
1.79 daniel 1899:
1.97 daniel 1900: /*
1.79 daniel 1901: * Pop-up of finished entities.
1.97 daniel 1902: */
1.152 daniel 1903: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 1904: xmlPopInput(ctxt);
1905:
1.98 daniel 1906: break;
1.77 daniel 1907: } else {
1.161 daniel 1908: COPY_BUF(l,buffer,nbchars,c);
1909: NEXTL(l);
1910: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 1911: growBuffer(buffer);
1912: }
1.77 daniel 1913: }
1.161 daniel 1914: c = CUR_CHAR(l);
1.77 daniel 1915: }
1.161 daniel 1916: buffer[nbchars++] = 0;
1.77 daniel 1917: return(buffer);
1918: }
1919:
1.135 daniel 1920: /**
1921: * xmlStringDecodeEntities:
1922: * @ctxt: the parser context
1923: * @str: the input string
1924: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1925: * @end: an end marker xmlChar, 0 if none
1926: * @end2: an end marker xmlChar, 0 if none
1927: * @end3: an end marker xmlChar, 0 if none
1928: *
1929: * [67] Reference ::= EntityRef | CharRef
1930: *
1931: * [69] PEReference ::= '%' Name ';'
1932: *
1933: * Returns A newly allocated string with the substitution done. The caller
1934: * must deallocate it !
1935: */
1936: xmlChar *
1937: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1938: xmlChar end, xmlChar end2, xmlChar end3) {
1939: xmlChar *buffer = NULL;
1940: int buffer_size = 0;
1941: xmlChar *out = NULL;
1942:
1943: xmlChar *current = NULL;
1944: xmlEntityPtr ent;
1945: xmlChar cur;
1946:
1947: /*
1948: * allocate a translation buffer.
1949: */
1.140 daniel 1950: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 1951: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1952: if (buffer == NULL) {
1953: perror("xmlDecodeEntities: malloc failed");
1954: return(NULL);
1955: }
1956: out = buffer;
1957:
1958: /*
1959: * Ok loop until we reach one of the ending char or a size limit.
1960: */
1961: cur = *str;
1962: while ((cur != 0) && (cur != end) &&
1963: (cur != end2) && (cur != end3)) {
1964:
1965: if (cur == 0) break;
1966: if ((cur == '&') && (str[1] == '#')) {
1967: int val = xmlParseStringCharRef(ctxt, &str);
1968: if (val != 0)
1969: *out++ = val;
1970: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1971: ent = xmlParseStringEntityRef(ctxt, &str);
1972: if ((ent != NULL) &&
1973: (ctxt->replaceEntities != 0)) {
1974: current = ent->content;
1975: while (*current != 0) {
1976: *out++ = *current++;
1.140 daniel 1977: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1978: int index = out - buffer;
1979:
1980: growBuffer(buffer);
1981: out = &buffer[index];
1982: }
1983: }
1984: } else if (ent != NULL) {
1985: int i = xmlStrlen(ent->name);
1986: const xmlChar *cur = ent->name;
1987:
1988: *out++ = '&';
1.140 daniel 1989: if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1990: int index = out - buffer;
1991:
1992: growBuffer(buffer);
1993: out = &buffer[index];
1994: }
1995: for (;i > 0;i--)
1996: *out++ = *cur++;
1997: *out++ = ';';
1998: }
1999: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2000: ent = xmlParseStringPEReference(ctxt, &str);
2001: if (ent != NULL) {
2002: current = ent->content;
2003: while (*current != 0) {
2004: *out++ = *current++;
1.140 daniel 2005: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2006: int index = out - buffer;
2007:
2008: growBuffer(buffer);
2009: out = &buffer[index];
2010: }
2011: }
2012: }
2013: } else {
1.156 daniel 2014: /* invalid for UTF-8 , use COPY(out); !!! */
1.135 daniel 2015: *out++ = cur;
1.140 daniel 2016: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2017: int index = out - buffer;
2018:
2019: growBuffer(buffer);
2020: out = &buffer[index];
2021: }
2022: str++;
2023: }
2024: cur = *str;
2025: }
2026: *out = 0;
2027: return(buffer);
2028: }
2029:
1.1 veillard 2030:
1.28 daniel 2031: /************************************************************************
2032: * *
1.75 daniel 2033: * Commodity functions to handle encodings *
2034: * *
2035: ************************************************************************/
2036:
1.172 daniel 2037: /*
2038: * xmlCheckLanguageID
2039: * @lang: pointer to the string value
2040: *
2041: * Checks that the value conforms to the LanguageID production:
2042: *
2043: * [33] LanguageID ::= Langcode ('-' Subcode)*
2044: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2045: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2046: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2047: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2048: * [38] Subcode ::= ([a-z] | [A-Z])+
2049: *
2050: * Returns 1 if correct 0 otherwise
2051: **/
2052: int
2053: xmlCheckLanguageID(const xmlChar *lang) {
2054: const xmlChar *cur = lang;
2055:
2056: if (cur == NULL)
2057: return(0);
2058: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2059: ((cur[0] == 'I') && (cur[1] == '-'))) {
2060: /*
2061: * IANA code
2062: */
2063: cur += 2;
2064: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2065: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2066: cur++;
2067: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2068: ((cur[0] == 'X') && (cur[1] == '-'))) {
2069: /*
2070: * User code
2071: */
2072: cur += 2;
2073: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2074: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2075: cur++;
2076: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2077: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2078: /*
2079: * ISO639
2080: */
2081: cur++;
2082: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2083: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2084: cur++;
2085: else
2086: return(0);
2087: } else
2088: return(0);
2089: while (cur[0] != 0) {
2090: if (cur[0] != '-')
2091: return(0);
2092: cur++;
2093: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2094: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2095: cur++;
2096: else
2097: return(0);
2098: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2099: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2100: cur++;
2101: }
2102: return(1);
2103: }
2104:
1.75 daniel 2105: /**
2106: * xmlSwitchEncoding:
2107: * @ctxt: the parser context
1.124 daniel 2108: * @enc: the encoding value (number)
1.75 daniel 2109: *
2110: * change the input functions when discovering the character encoding
2111: * of a given entity.
2112: */
2113: void
2114: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2115: {
1.156 daniel 2116: xmlCharEncodingHandlerPtr handler;
2117:
2118: handler = xmlGetCharEncodingHandler(enc);
2119: if (handler != NULL) {
2120: if (ctxt->input != NULL) {
2121: if (ctxt->input->buf != NULL) {
2122: if (ctxt->input->buf->encoder != NULL) {
2123: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2124: ctxt->sax->error(ctxt->userData,
2125: "xmlSwitchEncoding : encoder already regitered\n");
2126: return;
2127: }
2128: ctxt->input->buf->encoder = handler;
2129:
2130: /*
2131: * Is there already some content down the pipe to convert
2132: */
2133: if ((ctxt->input->buf->buffer != NULL) &&
2134: (ctxt->input->buf->buffer->use > 0)) {
2135: xmlChar *buf;
2136: int res, len, size;
2137: int processed;
2138:
2139: /*
2140: * Specific handling of the Byte Order Mark for
2141: * UTF-16
2142: */
2143: if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
2144: (ctxt->input->cur[0] == 0xFF) &&
2145: (ctxt->input->cur[1] == 0xFE)) {
2146: SKIP(2);
2147: }
2148: if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
2149: (ctxt->input->cur[0] == 0xFE) &&
2150: (ctxt->input->cur[1] == 0xFF)) {
2151: SKIP(2);
2152: }
2153:
2154: /*
2155: * convert the non processed part
2156: */
2157: processed = ctxt->input->cur - ctxt->input->base;
2158: len = ctxt->input->buf->buffer->use - processed;
2159:
2160: if (len <= 0) {
2161: return;
2162: }
2163: size = ctxt->input->buf->buffer->use * 4;
2164: if (size < 4000)
2165: size = 4000;
1.167 daniel 2166: retry_larger:
1.160 daniel 2167: buf = (xmlChar *) xmlMalloc(size + 1);
1.156 daniel 2168: if (buf == NULL) {
2169: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2170: ctxt->sax->error(ctxt->userData,
2171: "xmlSwitchEncoding : out of memory\n");
2172: return;
2173: }
1.160 daniel 2174: /* TODO !!! Handling of buf too small */
1.156 daniel 2175: res = handler->input(buf, size, ctxt->input->cur, &len);
1.167 daniel 2176: if (res == -1) {
2177: size *= 2;
2178: xmlFree(buf);
2179: goto retry_larger;
2180: }
1.156 daniel 2181: if ((res < 0) ||
2182: (len != ctxt->input->buf->buffer->use - processed)) {
2183: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2184: ctxt->sax->error(ctxt->userData,
2185: "xmlSwitchEncoding : conversion failed\n");
2186: xmlFree(buf);
2187: return;
2188: }
1.167 daniel 2189:
1.156 daniel 2190: /*
2191: * Conversion succeeded, get rid of the old buffer
2192: */
2193: xmlFree(ctxt->input->buf->buffer->content);
2194: ctxt->input->buf->buffer->content = buf;
2195: ctxt->input->base = buf;
2196: ctxt->input->cur = buf;
2197: ctxt->input->buf->buffer->size = size;
2198: ctxt->input->buf->buffer->use = res;
1.160 daniel 2199: buf[res] = 0;
1.156 daniel 2200: }
2201: return;
2202: } else {
2203: if (ctxt->input->length == 0) {
2204: /*
2205: * When parsing a static memory array one must know the
2206: * size to be able to convert the buffer.
2207: */
2208: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2209: ctxt->sax->error(ctxt->userData,
2210: "xmlSwitchEncoding : no input\n");
2211: return;
2212: } else {
2213: xmlChar *buf;
2214: int res, len;
2215: int processed = ctxt->input->cur - ctxt->input->base;
2216:
2217: /*
2218: * convert the non processed part
2219: */
2220: len = ctxt->input->length - processed;
2221: if (len <= 0) {
2222: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2223: ctxt->sax->error(ctxt->userData,
2224: "xmlSwitchEncoding : input fully consumed?\n");
2225: return;
2226: }
2227: buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
2228: if (buf == NULL) {
2229: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2230: ctxt->sax->error(ctxt->userData,
2231: "xmlSwitchEncoding : out of memory\n");
2232: return;
2233: }
2234: res = handler->input(buf, ctxt->input->length * 4,
2235: ctxt->input->cur, &len);
2236: if ((res < 0) ||
2237: (len != ctxt->input->length - processed)) {
2238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2239: ctxt->sax->error(ctxt->userData,
2240: "xmlSwitchEncoding : conversion failed\n");
2241: xmlFree(buf);
2242: return;
2243: }
2244: /*
2245: * Conversion succeeded, get rid of the old buffer
2246: */
2247: if ((ctxt->input->free != NULL) &&
2248: (ctxt->input->base != NULL))
2249: ctxt->input->free((xmlChar *) ctxt->input->base);
2250: ctxt->input->base = ctxt->input->cur = buf;
2251: ctxt->input->length = res;
2252: }
2253: }
2254: } else {
2255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2256: ctxt->sax->error(ctxt->userData,
2257: "xmlSwitchEncoding : no input\n");
2258: }
2259: }
2260:
1.75 daniel 2261: switch (enc) {
2262: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 2263: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 2264: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2265: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2266: ctxt->wellFormed = 0;
2267: break;
2268: case XML_CHAR_ENCODING_NONE:
2269: /* let's assume it's UTF-8 without the XML decl */
2270: return;
2271: case XML_CHAR_ENCODING_UTF8:
2272: /* default encoding, no conversion should be needed */
2273: return;
2274: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 2275: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2277: ctxt->sax->error(ctxt->userData,
2278: "char encoding UTF16 little endian not supported\n");
2279: break;
2280: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 2281: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2282: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2283: ctxt->sax->error(ctxt->userData,
2284: "char encoding UTF16 big endian not supported\n");
2285: break;
2286: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 2287: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2289: ctxt->sax->error(ctxt->userData,
2290: "char encoding USC4 little endian not supported\n");
2291: break;
2292: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 2293: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2294: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2295: ctxt->sax->error(ctxt->userData,
2296: "char encoding USC4 big endian not supported\n");
2297: break;
2298: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 2299: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2301: ctxt->sax->error(ctxt->userData,
2302: "char encoding EBCDIC not supported\n");
2303: break;
2304: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 2305: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2306: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2307: ctxt->sax->error(ctxt->userData,
2308: "char encoding UCS4 2143 not supported\n");
2309: break;
2310: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 2311: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2312: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2313: ctxt->sax->error(ctxt->userData,
2314: "char encoding UCS4 3412 not supported\n");
2315: break;
2316: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 2317: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2318: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2319: ctxt->sax->error(ctxt->userData,
2320: "char encoding UCS2 not supported\n");
2321: break;
2322: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 2323: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2324: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2325: ctxt->sax->error(ctxt->userData,
2326: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2327: break;
2328: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 2329: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2330: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331: ctxt->sax->error(ctxt->userData,
2332: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2333: break;
2334: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 2335: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2336: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2337: ctxt->sax->error(ctxt->userData,
2338: "char encoding ISO_8859_3 not supported\n");
2339: break;
2340: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 2341: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2343: ctxt->sax->error(ctxt->userData,
2344: "char encoding ISO_8859_4 not supported\n");
2345: break;
2346: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 2347: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2348: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2349: ctxt->sax->error(ctxt->userData,
2350: "char encoding ISO_8859_5 not supported\n");
2351: break;
2352: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 2353: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2354: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2355: ctxt->sax->error(ctxt->userData,
2356: "char encoding ISO_8859_6 not supported\n");
2357: break;
2358: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 2359: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2360: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2361: ctxt->sax->error(ctxt->userData,
2362: "char encoding ISO_8859_7 not supported\n");
2363: break;
2364: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 2365: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2366: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2367: ctxt->sax->error(ctxt->userData,
2368: "char encoding ISO_8859_8 not supported\n");
2369: break;
2370: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 2371: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2372: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2373: ctxt->sax->error(ctxt->userData,
2374: "char encoding ISO_8859_9 not supported\n");
2375: break;
2376: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 2377: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2378: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2379: ctxt->sax->error(ctxt->userData,
2380: "char encoding ISO-2022-JPnot supported\n");
2381: break;
2382: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 2383: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2384: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2385: ctxt->sax->error(ctxt->userData,
2386: "char encoding Shift_JISnot supported\n");
2387: break;
2388: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 2389: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2390: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2391: ctxt->sax->error(ctxt->userData,
2392: "char encoding EUC-JPnot supported\n");
2393: break;
2394: }
2395: }
2396:
2397: /************************************************************************
2398: * *
1.123 daniel 2399: * Commodity functions to handle xmlChars *
1.28 daniel 2400: * *
2401: ************************************************************************/
2402:
1.50 daniel 2403: /**
2404: * xmlStrndup:
1.123 daniel 2405: * @cur: the input xmlChar *
1.50 daniel 2406: * @len: the len of @cur
2407: *
1.123 daniel 2408: * a strndup for array of xmlChar's
1.68 daniel 2409: *
1.123 daniel 2410: * Returns a new xmlChar * or NULL
1.1 veillard 2411: */
1.123 daniel 2412: xmlChar *
2413: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2414: xmlChar *ret;
2415:
2416: if ((cur == NULL) || (len < 0)) return(NULL);
2417: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2418: if (ret == NULL) {
1.86 daniel 2419: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2420: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2421: return(NULL);
2422: }
1.123 daniel 2423: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2424: ret[len] = 0;
2425: return(ret);
2426: }
2427:
1.50 daniel 2428: /**
2429: * xmlStrdup:
1.123 daniel 2430: * @cur: the input xmlChar *
1.50 daniel 2431: *
1.152 daniel 2432: * a strdup for array of xmlChar's. Since they are supposed to be
2433: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2434: * a termination mark of '0'.
1.68 daniel 2435: *
1.123 daniel 2436: * Returns a new xmlChar * or NULL
1.1 veillard 2437: */
1.123 daniel 2438: xmlChar *
2439: xmlStrdup(const xmlChar *cur) {
2440: const xmlChar *p = cur;
1.1 veillard 2441:
1.135 daniel 2442: if (cur == NULL) return(NULL);
1.152 daniel 2443: while (*p != 0) p++;
1.1 veillard 2444: return(xmlStrndup(cur, p - cur));
2445: }
2446:
1.50 daniel 2447: /**
2448: * xmlCharStrndup:
2449: * @cur: the input char *
2450: * @len: the len of @cur
2451: *
1.123 daniel 2452: * a strndup for char's to xmlChar's
1.68 daniel 2453: *
1.123 daniel 2454: * Returns a new xmlChar * or NULL
1.45 daniel 2455: */
2456:
1.123 daniel 2457: xmlChar *
1.55 daniel 2458: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2459: int i;
1.135 daniel 2460: xmlChar *ret;
2461:
2462: if ((cur == NULL) || (len < 0)) return(NULL);
2463: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2464: if (ret == NULL) {
1.86 daniel 2465: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2466: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2467: return(NULL);
2468: }
2469: for (i = 0;i < len;i++)
1.123 daniel 2470: ret[i] = (xmlChar) cur[i];
1.45 daniel 2471: ret[len] = 0;
2472: return(ret);
2473: }
2474:
1.50 daniel 2475: /**
2476: * xmlCharStrdup:
2477: * @cur: the input char *
2478: * @len: the len of @cur
2479: *
1.123 daniel 2480: * a strdup for char's to xmlChar's
1.68 daniel 2481: *
1.123 daniel 2482: * Returns a new xmlChar * or NULL
1.45 daniel 2483: */
2484:
1.123 daniel 2485: xmlChar *
1.55 daniel 2486: xmlCharStrdup(const char *cur) {
1.45 daniel 2487: const char *p = cur;
2488:
1.135 daniel 2489: if (cur == NULL) return(NULL);
1.45 daniel 2490: while (*p != '\0') p++;
2491: return(xmlCharStrndup(cur, p - cur));
2492: }
2493:
1.50 daniel 2494: /**
2495: * xmlStrcmp:
1.123 daniel 2496: * @str1: the first xmlChar *
2497: * @str2: the second xmlChar *
1.50 daniel 2498: *
1.123 daniel 2499: * a strcmp for xmlChar's
1.68 daniel 2500: *
2501: * Returns the integer result of the comparison
1.14 veillard 2502: */
2503:
1.55 daniel 2504: int
1.123 daniel 2505: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2506: register int tmp;
2507:
1.135 daniel 2508: if ((str1 == NULL) && (str2 == NULL)) return(0);
2509: if (str1 == NULL) return(-1);
2510: if (str2 == NULL) return(1);
1.14 veillard 2511: do {
2512: tmp = *str1++ - *str2++;
2513: if (tmp != 0) return(tmp);
2514: } while ((*str1 != 0) && (*str2 != 0));
2515: return (*str1 - *str2);
2516: }
2517:
1.50 daniel 2518: /**
2519: * xmlStrncmp:
1.123 daniel 2520: * @str1: the first xmlChar *
2521: * @str2: the second xmlChar *
1.50 daniel 2522: * @len: the max comparison length
2523: *
1.123 daniel 2524: * a strncmp for xmlChar's
1.68 daniel 2525: *
2526: * Returns the integer result of the comparison
1.14 veillard 2527: */
2528:
1.55 daniel 2529: int
1.123 daniel 2530: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2531: register int tmp;
2532:
2533: if (len <= 0) return(0);
1.135 daniel 2534: if ((str1 == NULL) && (str2 == NULL)) return(0);
2535: if (str1 == NULL) return(-1);
2536: if (str2 == NULL) return(1);
1.14 veillard 2537: do {
2538: tmp = *str1++ - *str2++;
2539: if (tmp != 0) return(tmp);
2540: len--;
2541: if (len <= 0) return(0);
2542: } while ((*str1 != 0) && (*str2 != 0));
2543: return (*str1 - *str2);
2544: }
2545:
1.50 daniel 2546: /**
2547: * xmlStrchr:
1.123 daniel 2548: * @str: the xmlChar * array
2549: * @val: the xmlChar to search
1.50 daniel 2550: *
1.123 daniel 2551: * a strchr for xmlChar's
1.68 daniel 2552: *
1.123 daniel 2553: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2554: */
2555:
1.123 daniel 2556: const xmlChar *
2557: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2558: if (str == NULL) return(NULL);
1.14 veillard 2559: while (*str != 0) {
1.123 daniel 2560: if (*str == val) return((xmlChar *) str);
1.14 veillard 2561: str++;
2562: }
2563: return(NULL);
1.89 daniel 2564: }
2565:
2566: /**
2567: * xmlStrstr:
1.123 daniel 2568: * @str: the xmlChar * array (haystack)
2569: * @val: the xmlChar to search (needle)
1.89 daniel 2570: *
1.123 daniel 2571: * a strstr for xmlChar's
1.89 daniel 2572: *
1.123 daniel 2573: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2574: */
2575:
1.123 daniel 2576: const xmlChar *
2577: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2578: int n;
2579:
2580: if (str == NULL) return(NULL);
2581: if (val == NULL) return(NULL);
2582: n = xmlStrlen(val);
2583:
2584: if (n == 0) return(str);
2585: while (*str != 0) {
2586: if (*str == *val) {
1.123 daniel 2587: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2588: }
2589: str++;
2590: }
2591: return(NULL);
2592: }
2593:
2594: /**
2595: * xmlStrsub:
1.123 daniel 2596: * @str: the xmlChar * array (haystack)
1.89 daniel 2597: * @start: the index of the first char (zero based)
2598: * @len: the length of the substring
2599: *
2600: * Extract a substring of a given string
2601: *
1.123 daniel 2602: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2603: */
2604:
1.123 daniel 2605: xmlChar *
2606: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2607: int i;
2608:
2609: if (str == NULL) return(NULL);
2610: if (start < 0) return(NULL);
1.90 daniel 2611: if (len < 0) return(NULL);
1.89 daniel 2612:
2613: for (i = 0;i < start;i++) {
2614: if (*str == 0) return(NULL);
2615: str++;
2616: }
2617: if (*str == 0) return(NULL);
2618: return(xmlStrndup(str, len));
1.14 veillard 2619: }
1.28 daniel 2620:
1.50 daniel 2621: /**
2622: * xmlStrlen:
1.123 daniel 2623: * @str: the xmlChar * array
1.50 daniel 2624: *
1.127 daniel 2625: * length of a xmlChar's string
1.68 daniel 2626: *
1.123 daniel 2627: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2628: */
2629:
1.55 daniel 2630: int
1.123 daniel 2631: xmlStrlen(const xmlChar *str) {
1.45 daniel 2632: int len = 0;
2633:
2634: if (str == NULL) return(0);
2635: while (*str != 0) {
2636: str++;
2637: len++;
2638: }
2639: return(len);
2640: }
2641:
1.50 daniel 2642: /**
2643: * xmlStrncat:
1.123 daniel 2644: * @cur: the original xmlChar * array
2645: * @add: the xmlChar * array added
1.50 daniel 2646: * @len: the length of @add
2647: *
1.123 daniel 2648: * a strncat for array of xmlChar's
1.68 daniel 2649: *
1.123 daniel 2650: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2651: */
2652:
1.123 daniel 2653: xmlChar *
2654: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2655: int size;
1.123 daniel 2656: xmlChar *ret;
1.45 daniel 2657:
2658: if ((add == NULL) || (len == 0))
2659: return(cur);
2660: if (cur == NULL)
2661: return(xmlStrndup(add, len));
2662:
2663: size = xmlStrlen(cur);
1.123 daniel 2664: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2665: if (ret == NULL) {
1.86 daniel 2666: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2667: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2668: return(cur);
2669: }
1.123 daniel 2670: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2671: ret[size + len] = 0;
2672: return(ret);
2673: }
2674:
1.50 daniel 2675: /**
2676: * xmlStrcat:
1.123 daniel 2677: * @cur: the original xmlChar * array
2678: * @add: the xmlChar * array added
1.50 daniel 2679: *
1.152 daniel 2680: * a strcat for array of xmlChar's. Since they are supposed to be
2681: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2682: * a termination mark of '0'.
1.68 daniel 2683: *
1.123 daniel 2684: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2685: */
1.123 daniel 2686: xmlChar *
2687: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2688: const xmlChar *p = add;
1.45 daniel 2689:
2690: if (add == NULL) return(cur);
2691: if (cur == NULL)
2692: return(xmlStrdup(add));
2693:
1.152 daniel 2694: while (*p != 0) p++;
1.45 daniel 2695: return(xmlStrncat(cur, add, p - add));
2696: }
2697:
2698: /************************************************************************
2699: * *
2700: * Commodity functions, cleanup needed ? *
2701: * *
2702: ************************************************************************/
2703:
1.50 daniel 2704: /**
2705: * areBlanks:
2706: * @ctxt: an XML parser context
1.123 daniel 2707: * @str: a xmlChar *
1.50 daniel 2708: * @len: the size of @str
2709: *
1.45 daniel 2710: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2711: *
1.68 daniel 2712: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2713: */
2714:
1.123 daniel 2715: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2716: int i, ret;
1.45 daniel 2717: xmlNodePtr lastChild;
2718:
2719: for (i = 0;i < len;i++)
2720: if (!(IS_BLANK(str[i]))) return(0);
2721:
1.152 daniel 2722: if (RAW != '<') return(0);
1.72 daniel 2723: if (ctxt->node == NULL) return(0);
1.104 daniel 2724: if (ctxt->myDoc != NULL) {
2725: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2726: if (ret == 0) return(1);
2727: if (ret == 1) return(0);
2728: }
2729: /*
2730: * heuristic
2731: */
1.45 daniel 2732: lastChild = xmlGetLastChild(ctxt->node);
2733: if (lastChild == NULL) {
2734: if (ctxt->node->content != NULL) return(0);
2735: } else if (xmlNodeIsText(lastChild))
2736: return(0);
1.157 daniel 2737: else if ((ctxt->node->children != NULL) &&
2738: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2739: return(0);
1.45 daniel 2740: return(1);
2741: }
2742:
1.50 daniel 2743: /**
2744: * xmlHandleEntity:
2745: * @ctxt: an XML parser context
2746: * @entity: an XML entity pointer.
2747: *
2748: * Default handling of defined entities, when should we define a new input
1.45 daniel 2749: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2750: *
2751: * OBSOLETE: to be removed at some point.
1.45 daniel 2752: */
2753:
1.55 daniel 2754: void
2755: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2756: int len;
1.50 daniel 2757: xmlParserInputPtr input;
1.45 daniel 2758:
2759: if (entity->content == NULL) {
1.123 daniel 2760: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2761: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2762: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2763: entity->name);
1.59 daniel 2764: ctxt->wellFormed = 0;
1.45 daniel 2765: return;
2766: }
2767: len = xmlStrlen(entity->content);
2768: if (len <= 2) goto handle_as_char;
2769:
2770: /*
2771: * Redefine its content as an input stream.
2772: */
1.50 daniel 2773: input = xmlNewEntityInputStream(ctxt, entity);
2774: xmlPushInput(ctxt, input);
1.45 daniel 2775: return;
2776:
2777: handle_as_char:
2778: /*
2779: * Just handle the content as a set of chars.
2780: */
1.171 daniel 2781: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2782: (ctxt->sax->characters != NULL))
1.74 daniel 2783: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2784:
2785: }
2786:
2787: /*
2788: * Forward definition for recusive behaviour.
2789: */
1.77 daniel 2790: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2791: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2792:
1.28 daniel 2793: /************************************************************************
2794: * *
2795: * Extra stuff for namespace support *
2796: * Relates to http://www.w3.org/TR/WD-xml-names *
2797: * *
2798: ************************************************************************/
2799:
1.50 daniel 2800: /**
2801: * xmlNamespaceParseNCName:
2802: * @ctxt: an XML parser context
2803: *
2804: * parse an XML namespace name.
1.28 daniel 2805: *
2806: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2807: *
2808: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2809: * CombiningChar | Extender
1.68 daniel 2810: *
2811: * Returns the namespace name or NULL
1.28 daniel 2812: */
2813:
1.123 daniel 2814: xmlChar *
1.55 daniel 2815: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 2816: xmlChar buf[XML_MAX_NAMELEN + 5];
2817: int len = 0, l;
2818: int cur = CUR_CHAR(l);
1.28 daniel 2819:
1.156 daniel 2820: /* load first the value of the char !!! */
1.152 daniel 2821: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 2822:
1.152 daniel 2823: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2824: (cur == '.') || (cur == '-') ||
2825: (cur == '_') ||
2826: (IS_COMBINING(cur)) ||
2827: (IS_EXTENDER(cur))) {
2828: COPY_BUF(l,buf,len,cur);
2829: NEXTL(l);
2830: cur = CUR_CHAR(l);
1.91 daniel 2831: if (len >= XML_MAX_NAMELEN) {
2832: fprintf(stderr,
2833: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 2834: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2835: (cur == '.') || (cur == '-') ||
2836: (cur == '_') ||
2837: (IS_COMBINING(cur)) ||
2838: (IS_EXTENDER(cur))) {
2839: NEXTL(l);
2840: cur = CUR_CHAR(l);
2841: }
1.91 daniel 2842: break;
2843: }
2844: }
2845: return(xmlStrndup(buf, len));
1.28 daniel 2846: }
2847:
1.50 daniel 2848: /**
2849: * xmlNamespaceParseQName:
2850: * @ctxt: an XML parser context
1.123 daniel 2851: * @prefix: a xmlChar **
1.50 daniel 2852: *
2853: * parse an XML qualified name
1.28 daniel 2854: *
2855: * [NS 5] QName ::= (Prefix ':')? LocalPart
2856: *
2857: * [NS 6] Prefix ::= NCName
2858: *
2859: * [NS 7] LocalPart ::= NCName
1.68 daniel 2860: *
1.127 daniel 2861: * Returns the local part, and prefix is updated
1.50 daniel 2862: * to get the Prefix if any.
1.28 daniel 2863: */
2864:
1.123 daniel 2865: xmlChar *
2866: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2867: xmlChar *ret = NULL;
1.28 daniel 2868:
2869: *prefix = NULL;
2870: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 2871: if (RAW == ':') {
1.28 daniel 2872: *prefix = ret;
1.40 daniel 2873: NEXT;
1.28 daniel 2874: ret = xmlNamespaceParseNCName(ctxt);
2875: }
2876:
2877: return(ret);
2878: }
2879:
1.50 daniel 2880: /**
1.72 daniel 2881: * xmlSplitQName:
1.162 daniel 2882: * @ctxt: an XML parser context
1.72 daniel 2883: * @name: an XML parser context
1.123 daniel 2884: * @prefix: a xmlChar **
1.72 daniel 2885: *
2886: * parse an XML qualified name string
2887: *
2888: * [NS 5] QName ::= (Prefix ':')? LocalPart
2889: *
2890: * [NS 6] Prefix ::= NCName
2891: *
2892: * [NS 7] LocalPart ::= NCName
2893: *
1.127 daniel 2894: * Returns the local part, and prefix is updated
1.72 daniel 2895: * to get the Prefix if any.
2896: */
2897:
1.123 daniel 2898: xmlChar *
1.162 daniel 2899: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2900: xmlChar buf[XML_MAX_NAMELEN + 5];
2901: int len = 0;
1.123 daniel 2902: xmlChar *ret = NULL;
2903: const xmlChar *cur = name;
1.162 daniel 2904: int c,l;
1.72 daniel 2905:
2906: *prefix = NULL;
1.113 daniel 2907:
2908: /* xml: prefix is not really a namespace */
2909: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2910: (cur[2] == 'l') && (cur[3] == ':'))
2911: return(xmlStrdup(name));
2912:
1.162 daniel 2913: /* nasty but valid */
2914: if (cur[0] == ':')
2915: return(xmlStrdup(name));
2916:
2917: c = CUR_SCHAR(cur, l);
2918: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 2919:
1.162 daniel 2920: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2921: (c == '.') || (c == '-') ||
2922: (c == '_') ||
2923: (IS_COMBINING(c)) ||
2924: (IS_EXTENDER(c))) {
2925: COPY_BUF(l,buf,len,c);
2926: cur += l;
2927: c = CUR_SCHAR(cur, l);
2928: }
1.72 daniel 2929:
1.162 daniel 2930: ret = xmlStrndup(buf, len);
1.72 daniel 2931:
1.162 daniel 2932: if (c == ':') {
2933: cur += l;
1.163 daniel 2934: c = CUR_SCHAR(cur, l);
1.162 daniel 2935: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 2936: *prefix = ret;
1.162 daniel 2937: len = 0;
1.72 daniel 2938:
1.162 daniel 2939: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2940: (c == '.') || (c == '-') ||
2941: (c == '_') ||
2942: (IS_COMBINING(c)) ||
2943: (IS_EXTENDER(c))) {
2944: COPY_BUF(l,buf,len,c);
2945: cur += l;
2946: c = CUR_SCHAR(cur, l);
2947: }
1.72 daniel 2948:
1.162 daniel 2949: ret = xmlStrndup(buf, len);
1.72 daniel 2950: }
2951:
2952: return(ret);
2953: }
2954: /**
1.50 daniel 2955: * xmlNamespaceParseNSDef:
2956: * @ctxt: an XML parser context
2957: *
2958: * parse a namespace prefix declaration
1.28 daniel 2959: *
2960: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2961: *
2962: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 2963: *
2964: * Returns the namespace name
1.28 daniel 2965: */
2966:
1.123 daniel 2967: xmlChar *
1.55 daniel 2968: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 2969: xmlChar *name = NULL;
1.28 daniel 2970:
1.152 daniel 2971: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 2972: (NXT(2) == 'l') && (NXT(3) == 'n') &&
2973: (NXT(4) == 's')) {
2974: SKIP(5);
1.152 daniel 2975: if (RAW == ':') {
1.40 daniel 2976: NEXT;
1.28 daniel 2977: name = xmlNamespaceParseNCName(ctxt);
2978: }
2979: }
1.39 daniel 2980: return(name);
1.28 daniel 2981: }
2982:
1.50 daniel 2983: /**
2984: * xmlParseQuotedString:
2985: * @ctxt: an XML parser context
2986: *
1.45 daniel 2987: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 2988: * To be removed at next drop of binary compatibility
1.68 daniel 2989: *
2990: * Returns the string parser or NULL.
1.45 daniel 2991: */
1.123 daniel 2992: xmlChar *
1.55 daniel 2993: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 2994: xmlChar *buf = NULL;
1.152 daniel 2995: int len = 0,l;
1.140 daniel 2996: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2997: int c;
1.45 daniel 2998:
1.135 daniel 2999: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3000: if (buf == NULL) {
3001: fprintf(stderr, "malloc of %d byte failed\n", size);
3002: return(NULL);
3003: }
1.152 daniel 3004: if (RAW == '"') {
1.45 daniel 3005: NEXT;
1.152 daniel 3006: c = CUR_CHAR(l);
1.135 daniel 3007: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3008: if (len + 5 >= size) {
1.135 daniel 3009: size *= 2;
3010: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3011: if (buf == NULL) {
3012: fprintf(stderr, "realloc of %d byte failed\n", size);
3013: return(NULL);
3014: }
3015: }
1.152 daniel 3016: COPY_BUF(l,buf,len,c);
3017: NEXTL(l);
3018: c = CUR_CHAR(l);
1.135 daniel 3019: }
3020: if (c != '"') {
1.123 daniel 3021: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3022: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3023: ctxt->sax->error(ctxt->userData,
3024: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3025: ctxt->wellFormed = 0;
1.55 daniel 3026: } else {
1.45 daniel 3027: NEXT;
3028: }
1.152 daniel 3029: } else if (RAW == '\''){
1.45 daniel 3030: NEXT;
1.135 daniel 3031: c = CUR;
3032: while (IS_CHAR(c) && (c != '\'')) {
3033: if (len + 1 >= size) {
3034: size *= 2;
3035: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3036: if (buf == NULL) {
3037: fprintf(stderr, "realloc of %d byte failed\n", size);
3038: return(NULL);
3039: }
3040: }
3041: buf[len++] = c;
3042: NEXT;
3043: c = CUR;
3044: }
1.152 daniel 3045: if (RAW != '\'') {
1.123 daniel 3046: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3047: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3048: ctxt->sax->error(ctxt->userData,
3049: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3050: ctxt->wellFormed = 0;
1.55 daniel 3051: } else {
1.45 daniel 3052: NEXT;
3053: }
3054: }
1.135 daniel 3055: return(buf);
1.45 daniel 3056: }
3057:
1.50 daniel 3058: /**
3059: * xmlParseNamespace:
3060: * @ctxt: an XML parser context
3061: *
1.45 daniel 3062: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3063: *
3064: * This is what the older xml-name Working Draft specified, a bunch of
3065: * other stuff may still rely on it, so support is still here as
1.127 daniel 3066: * if it was declared on the root of the Tree:-(
1.110 daniel 3067: *
3068: * To be removed at next drop of binary compatibility
1.45 daniel 3069: */
3070:
1.55 daniel 3071: void
3072: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3073: xmlChar *href = NULL;
3074: xmlChar *prefix = NULL;
1.45 daniel 3075: int garbage = 0;
3076:
3077: /*
3078: * We just skipped "namespace" or "xml:namespace"
3079: */
3080: SKIP_BLANKS;
3081:
1.153 daniel 3082: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3083: /*
3084: * We can have "ns" or "prefix" attributes
3085: * Old encoding as 'href' or 'AS' attributes is still supported
3086: */
1.152 daniel 3087: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3088: garbage = 0;
3089: SKIP(2);
3090: SKIP_BLANKS;
3091:
1.152 daniel 3092: if (RAW != '=') continue;
1.45 daniel 3093: NEXT;
3094: SKIP_BLANKS;
3095:
3096: href = xmlParseQuotedString(ctxt);
3097: SKIP_BLANKS;
1.152 daniel 3098: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3099: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3100: garbage = 0;
3101: SKIP(4);
3102: SKIP_BLANKS;
3103:
1.152 daniel 3104: if (RAW != '=') continue;
1.45 daniel 3105: NEXT;
3106: SKIP_BLANKS;
3107:
3108: href = xmlParseQuotedString(ctxt);
3109: SKIP_BLANKS;
1.152 daniel 3110: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3111: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3112: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3113: garbage = 0;
3114: SKIP(6);
3115: SKIP_BLANKS;
3116:
1.152 daniel 3117: if (RAW != '=') continue;
1.45 daniel 3118: NEXT;
3119: SKIP_BLANKS;
3120:
3121: prefix = xmlParseQuotedString(ctxt);
3122: SKIP_BLANKS;
1.152 daniel 3123: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3124: garbage = 0;
3125: SKIP(2);
3126: SKIP_BLANKS;
3127:
1.152 daniel 3128: if (RAW != '=') continue;
1.45 daniel 3129: NEXT;
3130: SKIP_BLANKS;
3131:
3132: prefix = xmlParseQuotedString(ctxt);
3133: SKIP_BLANKS;
1.152 daniel 3134: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3135: garbage = 0;
1.91 daniel 3136: NEXT;
1.45 daniel 3137: } else {
3138: /*
3139: * Found garbage when parsing the namespace
3140: */
1.122 daniel 3141: if (!garbage) {
1.55 daniel 3142: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3143: ctxt->sax->error(ctxt->userData,
3144: "xmlParseNamespace found garbage\n");
3145: }
1.123 daniel 3146: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3147: ctxt->wellFormed = 0;
1.45 daniel 3148: NEXT;
3149: }
3150: }
3151:
3152: MOVETO_ENDTAG(CUR_PTR);
3153: NEXT;
3154:
3155: /*
3156: * Register the DTD.
1.72 daniel 3157: if (href != NULL)
3158: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3159: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3160: */
3161:
1.119 daniel 3162: if (prefix != NULL) xmlFree(prefix);
3163: if (href != NULL) xmlFree(href);
1.45 daniel 3164: }
3165:
1.28 daniel 3166: /************************************************************************
3167: * *
3168: * The parser itself *
3169: * Relates to http://www.w3.org/TR/REC-xml *
3170: * *
3171: ************************************************************************/
1.14 veillard 3172:
1.50 daniel 3173: /**
1.97 daniel 3174: * xmlScanName:
3175: * @ctxt: an XML parser context
3176: *
3177: * Trickery: parse an XML name but without consuming the input flow
3178: * Needed for rollback cases.
3179: *
3180: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3181: * CombiningChar | Extender
3182: *
3183: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3184: *
3185: * [6] Names ::= Name (S Name)*
3186: *
3187: * Returns the Name parsed or NULL
3188: */
3189:
1.123 daniel 3190: xmlChar *
1.97 daniel 3191: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3192: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3193: int len = 0;
3194:
3195: GROW;
1.152 daniel 3196: if (!IS_LETTER(RAW) && (RAW != '_') &&
3197: (RAW != ':')) {
1.97 daniel 3198: return(NULL);
3199: }
3200:
3201: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3202: (NXT(len) == '.') || (NXT(len) == '-') ||
3203: (NXT(len) == '_') || (NXT(len) == ':') ||
3204: (IS_COMBINING(NXT(len))) ||
3205: (IS_EXTENDER(NXT(len)))) {
3206: buf[len] = NXT(len);
3207: len++;
3208: if (len >= XML_MAX_NAMELEN) {
3209: fprintf(stderr,
3210: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3211: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3212: (NXT(len) == '.') || (NXT(len) == '-') ||
3213: (NXT(len) == '_') || (NXT(len) == ':') ||
3214: (IS_COMBINING(NXT(len))) ||
3215: (IS_EXTENDER(NXT(len))))
3216: len++;
3217: break;
3218: }
3219: }
3220: return(xmlStrndup(buf, len));
3221: }
3222:
3223: /**
1.50 daniel 3224: * xmlParseName:
3225: * @ctxt: an XML parser context
3226: *
3227: * parse an XML name.
1.22 daniel 3228: *
3229: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3230: * CombiningChar | Extender
3231: *
3232: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3233: *
3234: * [6] Names ::= Name (S Name)*
1.68 daniel 3235: *
3236: * Returns the Name parsed or NULL
1.1 veillard 3237: */
3238:
1.123 daniel 3239: xmlChar *
1.55 daniel 3240: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3241: xmlChar buf[XML_MAX_NAMELEN + 5];
3242: int len = 0, l;
3243: int c;
1.1 veillard 3244:
1.91 daniel 3245: GROW;
1.160 daniel 3246: c = CUR_CHAR(l);
3247: if (!IS_LETTER(c) && (c != '_') &&
3248: (c != ':')) {
1.91 daniel 3249: return(NULL);
3250: }
1.40 daniel 3251:
1.160 daniel 3252: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3253: (c == '.') || (c == '-') ||
3254: (c == '_') || (c == ':') ||
3255: (IS_COMBINING(c)) ||
3256: (IS_EXTENDER(c))) {
3257: COPY_BUF(l,buf,len,c);
3258: NEXTL(l);
3259: c = CUR_CHAR(l);
1.91 daniel 3260: if (len >= XML_MAX_NAMELEN) {
3261: fprintf(stderr,
3262: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3263: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3264: (c == '.') || (c == '-') ||
3265: (c == '_') || (c == ':') ||
3266: (IS_COMBINING(c)) ||
3267: (IS_EXTENDER(c))) {
3268: NEXTL(l);
3269: c = CUR_CHAR(l);
1.97 daniel 3270: }
1.91 daniel 3271: break;
3272: }
3273: }
3274: return(xmlStrndup(buf, len));
1.22 daniel 3275: }
3276:
1.50 daniel 3277: /**
1.135 daniel 3278: * xmlParseStringName:
3279: * @ctxt: an XML parser context
3280: * @str: a pointer to an index in the string
3281: *
3282: * parse an XML name.
3283: *
3284: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3285: * CombiningChar | Extender
3286: *
3287: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3288: *
3289: * [6] Names ::= Name (S Name)*
3290: *
3291: * Returns the Name parsed or NULL. The str pointer
3292: * is updated to the current location in the string.
3293: */
3294:
3295: xmlChar *
3296: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3297: const xmlChar *ptr;
3298: const xmlChar *start;
3299: xmlChar cur;
3300:
3301: if ((str == NULL) || (*str == NULL)) return(NULL);
3302:
3303: start = ptr = *str;
3304: cur = *ptr;
3305: if (!IS_LETTER(cur) && (cur != '_') &&
3306: (cur != ':')) {
3307: return(NULL);
3308: }
3309:
3310: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3311: (cur == '.') || (cur == '-') ||
3312: (cur == '_') || (cur == ':') ||
3313: (IS_COMBINING(cur)) ||
3314: (IS_EXTENDER(cur))) {
3315: ptr++;
3316: cur = *ptr;
3317: }
3318: *str = ptr;
3319: return(xmlStrndup(start, ptr - start ));
3320: }
3321:
3322: /**
1.50 daniel 3323: * xmlParseNmtoken:
3324: * @ctxt: an XML parser context
3325: *
3326: * parse an XML Nmtoken.
1.22 daniel 3327: *
3328: * [7] Nmtoken ::= (NameChar)+
3329: *
3330: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3331: *
3332: * Returns the Nmtoken parsed or NULL
1.22 daniel 3333: */
3334:
1.123 daniel 3335: xmlChar *
1.55 daniel 3336: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3337: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3338: int len = 0;
1.160 daniel 3339: int c,l;
1.22 daniel 3340:
1.91 daniel 3341: GROW;
1.160 daniel 3342: c = CUR_CHAR(l);
3343: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3344: (c == '.') || (c == '-') ||
3345: (c == '_') || (c == ':') ||
3346: (IS_COMBINING(c)) ||
3347: (IS_EXTENDER(c))) {
3348: COPY_BUF(l,buf,len,c);
3349: NEXTL(l);
3350: c = CUR_CHAR(l);
1.91 daniel 3351: if (len >= XML_MAX_NAMELEN) {
3352: fprintf(stderr,
3353: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3354: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3355: (c == '.') || (c == '-') ||
3356: (c == '_') || (c == ':') ||
3357: (IS_COMBINING(c)) ||
3358: (IS_EXTENDER(c))) {
3359: NEXTL(l);
3360: c = CUR_CHAR(l);
3361: }
1.91 daniel 3362: break;
3363: }
3364: }
1.168 daniel 3365: if (len == 0)
3366: return(NULL);
1.91 daniel 3367: return(xmlStrndup(buf, len));
1.1 veillard 3368: }
3369:
1.50 daniel 3370: /**
3371: * xmlParseEntityValue:
3372: * @ctxt: an XML parser context
1.78 daniel 3373: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3374: *
3375: * parse a value for ENTITY decl.
1.24 daniel 3376: *
3377: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3378: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3379: *
1.78 daniel 3380: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3381: */
3382:
1.123 daniel 3383: xmlChar *
3384: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3385: xmlChar *buf = NULL;
3386: int len = 0;
1.140 daniel 3387: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3388: int c, l;
1.135 daniel 3389: xmlChar stop;
1.123 daniel 3390: xmlChar *ret = NULL;
1.98 daniel 3391: xmlParserInputPtr input;
1.24 daniel 3392:
1.152 daniel 3393: if (RAW == '"') stop = '"';
3394: else if (RAW == '\'') stop = '\'';
1.135 daniel 3395: else {
3396: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3397: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3398: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3399: ctxt->wellFormed = 0;
3400: return(NULL);
3401: }
3402: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3403: if (buf == NULL) {
3404: fprintf(stderr, "malloc of %d byte failed\n", size);
3405: return(NULL);
3406: }
1.94 daniel 3407:
1.135 daniel 3408: /*
3409: * The content of the entity definition is copied in a buffer.
3410: */
1.94 daniel 3411:
1.135 daniel 3412: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3413: input = ctxt->input;
3414: GROW;
3415: NEXT;
1.152 daniel 3416: c = CUR_CHAR(l);
1.135 daniel 3417: /*
3418: * NOTE: 4.4.5 Included in Literal
3419: * When a parameter entity reference appears in a literal entity
3420: * value, ... a single or double quote character in the replacement
3421: * text is always treated as a normal data character and will not
3422: * terminate the literal.
3423: * In practice it means we stop the loop only when back at parsing
3424: * the initial entity and the quote is found
3425: */
3426: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3427: if (len + 5 >= size) {
1.135 daniel 3428: size *= 2;
3429: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3430: if (buf == NULL) {
3431: fprintf(stderr, "realloc of %d byte failed\n", size);
3432: return(NULL);
1.94 daniel 3433: }
1.79 daniel 3434: }
1.152 daniel 3435: COPY_BUF(l,buf,len,c);
3436: NEXTL(l);
1.98 daniel 3437: /*
1.135 daniel 3438: * Pop-up of finished entities.
1.98 daniel 3439: */
1.152 daniel 3440: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3441: xmlPopInput(ctxt);
1.152 daniel 3442:
3443: c = CUR_CHAR(l);
1.135 daniel 3444: if (c == 0) {
1.94 daniel 3445: GROW;
1.152 daniel 3446: c = CUR_CHAR(l);
1.79 daniel 3447: }
1.135 daniel 3448: }
3449: buf[len] = 0;
3450:
3451: /*
3452: * Then PEReference entities are substituted.
3453: */
3454: if (c != stop) {
3455: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3456: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3457: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3458: ctxt->wellFormed = 0;
1.170 daniel 3459: xmlFree(buf);
1.135 daniel 3460: } else {
3461: NEXT;
3462: /*
3463: * NOTE: 4.4.7 Bypassed
3464: * When a general entity reference appears in the EntityValue in
3465: * an entity declaration, it is bypassed and left as is.
3466: * so XML_SUBSTITUTE_REF is not set.
3467: */
3468: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3469: 0, 0, 0);
3470: if (orig != NULL)
3471: *orig = buf;
3472: else
3473: xmlFree(buf);
1.24 daniel 3474: }
3475:
3476: return(ret);
3477: }
3478:
1.50 daniel 3479: /**
3480: * xmlParseAttValue:
3481: * @ctxt: an XML parser context
3482: *
3483: * parse a value for an attribute
1.78 daniel 3484: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3485: * will be handled later in xmlStringGetNodeList
1.29 daniel 3486: *
3487: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3488: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3489: *
1.129 daniel 3490: * 3.3.3 Attribute-Value Normalization:
3491: * Before the value of an attribute is passed to the application or
3492: * checked for validity, the XML processor must normalize it as follows:
3493: * - a character reference is processed by appending the referenced
3494: * character to the attribute value
3495: * - an entity reference is processed by recursively processing the
3496: * replacement text of the entity
3497: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3498: * appending #x20 to the normalized value, except that only a single
3499: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3500: * parsed entity or the literal entity value of an internal parsed entity
3501: * - other characters are processed by appending them to the normalized value
1.130 daniel 3502: * If the declared value is not CDATA, then the XML processor must further
3503: * process the normalized attribute value by discarding any leading and
3504: * trailing space (#x20) characters, and by replacing sequences of space
3505: * (#x20) characters by a single space (#x20) character.
3506: * All attributes for which no declaration has been read should be treated
3507: * by a non-validating parser as if declared CDATA.
1.129 daniel 3508: *
3509: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3510: */
3511:
1.123 daniel 3512: xmlChar *
1.55 daniel 3513: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3514: xmlChar limit = 0;
3515: xmlChar *buffer = NULL;
3516: int buffer_size = 0;
3517: xmlChar *out = NULL;
3518:
3519: xmlChar *current = NULL;
3520: xmlEntityPtr ent;
3521: xmlChar cur;
3522:
1.29 daniel 3523:
1.91 daniel 3524: SHRINK;
1.151 daniel 3525: if (NXT(0) == '"') {
1.96 daniel 3526: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3527: limit = '"';
1.40 daniel 3528: NEXT;
1.151 daniel 3529: } else if (NXT(0) == '\'') {
1.129 daniel 3530: limit = '\'';
1.96 daniel 3531: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3532: NEXT;
1.29 daniel 3533: } else {
1.123 daniel 3534: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3535: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3536: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3537: ctxt->wellFormed = 0;
1.129 daniel 3538: return(NULL);
1.29 daniel 3539: }
3540:
1.129 daniel 3541: /*
3542: * allocate a translation buffer.
3543: */
1.140 daniel 3544: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3545: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3546: if (buffer == NULL) {
3547: perror("xmlParseAttValue: malloc failed");
3548: return(NULL);
3549: }
3550: out = buffer;
3551:
3552: /*
3553: * Ok loop until we reach one of the ending char or a size limit.
3554: */
3555: cur = CUR;
1.156 daniel 3556: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3557: if (cur == 0) break;
3558: if ((cur == '&') && (NXT(1) == '#')) {
3559: int val = xmlParseCharRef(ctxt);
3560: *out++ = val;
3561: } else if (cur == '&') {
3562: ent = xmlParseEntityRef(ctxt);
3563: if ((ent != NULL) &&
3564: (ctxt->replaceEntities != 0)) {
3565: current = ent->content;
3566: while (*current != 0) {
3567: *out++ = *current++;
3568: if (out - buffer > buffer_size - 10) {
3569: int index = out - buffer;
3570:
3571: growBuffer(buffer);
3572: out = &buffer[index];
3573: }
3574: }
3575: } else if (ent != NULL) {
3576: int i = xmlStrlen(ent->name);
3577: const xmlChar *cur = ent->name;
3578:
3579: *out++ = '&';
3580: if (out - buffer > buffer_size - i - 10) {
3581: int index = out - buffer;
3582:
3583: growBuffer(buffer);
3584: out = &buffer[index];
3585: }
3586: for (;i > 0;i--)
3587: *out++ = *cur++;
3588: *out++ = ';';
3589: }
3590: } else {
1.156 daniel 3591: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3592: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3593: *out++ = 0x20;
3594: if (out - buffer > buffer_size - 10) {
3595: int index = out - buffer;
3596:
3597: growBuffer(buffer);
3598: out = &buffer[index];
1.129 daniel 3599: }
3600: } else {
3601: *out++ = cur;
3602: if (out - buffer > buffer_size - 10) {
3603: int index = out - buffer;
3604:
3605: growBuffer(buffer);
3606: out = &buffer[index];
3607: }
3608: }
3609: NEXT;
3610: }
3611: cur = CUR;
3612: }
3613: *out++ = 0;
1.152 daniel 3614: if (RAW == '<') {
1.129 daniel 3615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3616: ctxt->sax->error(ctxt->userData,
3617: "Unescaped '<' not allowed in attributes values\n");
3618: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3619: ctxt->wellFormed = 0;
1.152 daniel 3620: } else if (RAW != limit) {
1.129 daniel 3621: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3622: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3623: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3624: ctxt->wellFormed = 0;
3625: } else
3626: NEXT;
3627: return(buffer);
1.29 daniel 3628: }
3629:
1.50 daniel 3630: /**
3631: * xmlParseSystemLiteral:
3632: * @ctxt: an XML parser context
3633: *
3634: * parse an XML Literal
1.21 daniel 3635: *
1.22 daniel 3636: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3637: *
3638: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3639: */
3640:
1.123 daniel 3641: xmlChar *
1.55 daniel 3642: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3643: xmlChar *buf = NULL;
3644: int len = 0;
1.140 daniel 3645: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3646: int cur, l;
1.135 daniel 3647: xmlChar stop;
1.168 daniel 3648: int state = ctxt->instate;
1.21 daniel 3649:
1.91 daniel 3650: SHRINK;
1.152 daniel 3651: if (RAW == '"') {
1.40 daniel 3652: NEXT;
1.135 daniel 3653: stop = '"';
1.152 daniel 3654: } else if (RAW == '\'') {
1.40 daniel 3655: NEXT;
1.135 daniel 3656: stop = '\'';
1.21 daniel 3657: } else {
1.55 daniel 3658: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3659: ctxt->sax->error(ctxt->userData,
3660: "SystemLiteral \" or ' expected\n");
1.123 daniel 3661: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3662: ctxt->wellFormed = 0;
1.135 daniel 3663: return(NULL);
1.21 daniel 3664: }
3665:
1.135 daniel 3666: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3667: if (buf == NULL) {
3668: fprintf(stderr, "malloc of %d byte failed\n", size);
3669: return(NULL);
3670: }
1.168 daniel 3671: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3672: cur = CUR_CHAR(l);
1.135 daniel 3673: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3674: if (len + 5 >= size) {
1.135 daniel 3675: size *= 2;
3676: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3677: if (buf == NULL) {
3678: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 daniel 3679: ctxt->instate = state;
1.135 daniel 3680: return(NULL);
3681: }
3682: }
1.152 daniel 3683: COPY_BUF(l,buf,len,cur);
3684: NEXTL(l);
3685: cur = CUR_CHAR(l);
1.135 daniel 3686: if (cur == 0) {
3687: GROW;
3688: SHRINK;
1.152 daniel 3689: cur = CUR_CHAR(l);
1.135 daniel 3690: }
3691: }
3692: buf[len] = 0;
1.168 daniel 3693: ctxt->instate = state;
1.135 daniel 3694: if (!IS_CHAR(cur)) {
3695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3696: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3697: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3698: ctxt->wellFormed = 0;
3699: } else {
3700: NEXT;
3701: }
3702: return(buf);
1.21 daniel 3703: }
3704:
1.50 daniel 3705: /**
3706: * xmlParsePubidLiteral:
3707: * @ctxt: an XML parser context
1.21 daniel 3708: *
1.50 daniel 3709: * parse an XML public literal
1.68 daniel 3710: *
3711: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3712: *
3713: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3714: */
3715:
1.123 daniel 3716: xmlChar *
1.55 daniel 3717: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3718: xmlChar *buf = NULL;
3719: int len = 0;
1.140 daniel 3720: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3721: xmlChar cur;
3722: xmlChar stop;
1.125 daniel 3723:
1.91 daniel 3724: SHRINK;
1.152 daniel 3725: if (RAW == '"') {
1.40 daniel 3726: NEXT;
1.135 daniel 3727: stop = '"';
1.152 daniel 3728: } else if (RAW == '\'') {
1.40 daniel 3729: NEXT;
1.135 daniel 3730: stop = '\'';
1.21 daniel 3731: } else {
1.55 daniel 3732: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3733: ctxt->sax->error(ctxt->userData,
3734: "SystemLiteral \" or ' expected\n");
1.123 daniel 3735: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3736: ctxt->wellFormed = 0;
1.135 daniel 3737: return(NULL);
3738: }
3739: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3740: if (buf == NULL) {
3741: fprintf(stderr, "malloc of %d byte failed\n", size);
3742: return(NULL);
3743: }
3744: cur = CUR;
3745: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3746: if (len + 1 >= size) {
3747: size *= 2;
3748: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3749: if (buf == NULL) {
3750: fprintf(stderr, "realloc of %d byte failed\n", size);
3751: return(NULL);
3752: }
3753: }
3754: buf[len++] = cur;
3755: NEXT;
3756: cur = CUR;
3757: if (cur == 0) {
3758: GROW;
3759: SHRINK;
3760: cur = CUR;
3761: }
3762: }
3763: buf[len] = 0;
3764: if (cur != stop) {
3765: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3766: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3767: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3768: ctxt->wellFormed = 0;
3769: } else {
3770: NEXT;
1.21 daniel 3771: }
1.135 daniel 3772: return(buf);
1.21 daniel 3773: }
3774:
1.50 daniel 3775: /**
3776: * xmlParseCharData:
3777: * @ctxt: an XML parser context
3778: * @cdata: int indicating whether we are within a CDATA section
3779: *
3780: * parse a CharData section.
3781: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 3782: *
1.151 daniel 3783: * The right angle bracket (>) may be represented using the string ">",
3784: * and must, for compatibility, be escaped using ">" or a character
3785: * reference when it appears in the string "]]>" in content, when that
3786: * string is not marking the end of a CDATA section.
3787: *
1.27 daniel 3788: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3789: */
3790:
1.55 daniel 3791: void
3792: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 3793: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 3794: int nbchar = 0;
1.152 daniel 3795: int cur, l;
1.27 daniel 3796:
1.91 daniel 3797: SHRINK;
1.152 daniel 3798: cur = CUR_CHAR(l);
1.160 daniel 3799: while ((IS_CHAR(cur)) && ((cur != '<') || (ctxt->token == '<')) &&
1.153 daniel 3800: ((cur != '&') || (ctxt->token == '&'))) {
1.97 daniel 3801: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 3802: (NXT(2) == '>')) {
3803: if (cdata) break;
3804: else {
3805: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 3806: ctxt->sax->error(ctxt->userData,
1.59 daniel 3807: "Sequence ']]>' not allowed in content\n");
1.123 daniel 3808: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 3809: /* Should this be relaxed ??? I see a "must here */
3810: ctxt->wellFormed = 0;
1.59 daniel 3811: }
3812: }
1.152 daniel 3813: COPY_BUF(l,buf,nbchar,cur);
3814: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 3815: /*
3816: * Ok the segment is to be consumed as chars.
3817: */
1.171 daniel 3818: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 3819: if (areBlanks(ctxt, buf, nbchar)) {
3820: if (ctxt->sax->ignorableWhitespace != NULL)
3821: ctxt->sax->ignorableWhitespace(ctxt->userData,
3822: buf, nbchar);
3823: } else {
3824: if (ctxt->sax->characters != NULL)
3825: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3826: }
3827: }
3828: nbchar = 0;
3829: }
1.152 daniel 3830: NEXTL(l);
3831: cur = CUR_CHAR(l);
1.27 daniel 3832: }
1.91 daniel 3833: if (nbchar != 0) {
3834: /*
3835: * Ok the segment is to be consumed as chars.
3836: */
1.171 daniel 3837: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 3838: if (areBlanks(ctxt, buf, nbchar)) {
3839: if (ctxt->sax->ignorableWhitespace != NULL)
3840: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3841: } else {
3842: if (ctxt->sax->characters != NULL)
3843: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3844: }
3845: }
1.45 daniel 3846: }
1.27 daniel 3847: }
3848:
1.50 daniel 3849: /**
3850: * xmlParseExternalID:
3851: * @ctxt: an XML parser context
1.123 daniel 3852: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 3853: * @strict: indicate whether we should restrict parsing to only
3854: * production [75], see NOTE below
1.50 daniel 3855: *
1.67 daniel 3856: * Parse an External ID or a Public ID
3857: *
3858: * NOTE: Productions [75] and [83] interract badly since [75] can generate
3859: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 3860: *
3861: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3862: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 3863: *
3864: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3865: *
1.68 daniel 3866: * Returns the function returns SystemLiteral and in the second
1.67 daniel 3867: * case publicID receives PubidLiteral, is strict is off
3868: * it is possible to return NULL and have publicID set.
1.22 daniel 3869: */
3870:
1.123 daniel 3871: xmlChar *
3872: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3873: xmlChar *URI = NULL;
1.22 daniel 3874:
1.91 daniel 3875: SHRINK;
1.152 daniel 3876: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 3877: (NXT(2) == 'S') && (NXT(3) == 'T') &&
3878: (NXT(4) == 'E') && (NXT(5) == 'M')) {
3879: SKIP(6);
1.59 daniel 3880: if (!IS_BLANK(CUR)) {
3881: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3882: ctxt->sax->error(ctxt->userData,
1.59 daniel 3883: "Space required after 'SYSTEM'\n");
1.123 daniel 3884: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3885: ctxt->wellFormed = 0;
3886: }
1.42 daniel 3887: SKIP_BLANKS;
1.39 daniel 3888: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3889: if (URI == NULL) {
1.55 daniel 3890: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3891: ctxt->sax->error(ctxt->userData,
1.39 daniel 3892: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 3893: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3894: ctxt->wellFormed = 0;
3895: }
1.152 daniel 3896: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 3897: (NXT(2) == 'B') && (NXT(3) == 'L') &&
3898: (NXT(4) == 'I') && (NXT(5) == 'C')) {
3899: SKIP(6);
1.59 daniel 3900: if (!IS_BLANK(CUR)) {
3901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3902: ctxt->sax->error(ctxt->userData,
1.59 daniel 3903: "Space required after 'PUBLIC'\n");
1.123 daniel 3904: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3905: ctxt->wellFormed = 0;
3906: }
1.42 daniel 3907: SKIP_BLANKS;
1.39 daniel 3908: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 3909: if (*publicID == NULL) {
1.55 daniel 3910: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3911: ctxt->sax->error(ctxt->userData,
1.39 daniel 3912: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 3913: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 3914: ctxt->wellFormed = 0;
3915: }
1.67 daniel 3916: if (strict) {
3917: /*
3918: * We don't handle [83] so "S SystemLiteral" is required.
3919: */
3920: if (!IS_BLANK(CUR)) {
3921: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3922: ctxt->sax->error(ctxt->userData,
1.67 daniel 3923: "Space required after the Public Identifier\n");
1.123 daniel 3924: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 3925: ctxt->wellFormed = 0;
3926: }
3927: } else {
3928: /*
3929: * We handle [83] so we return immediately, if
3930: * "S SystemLiteral" is not detected. From a purely parsing
3931: * point of view that's a nice mess.
3932: */
1.135 daniel 3933: const xmlChar *ptr;
3934: GROW;
3935:
3936: ptr = CUR_PTR;
1.67 daniel 3937: if (!IS_BLANK(*ptr)) return(NULL);
3938:
3939: while (IS_BLANK(*ptr)) ptr++;
1.173 ! daniel 3940: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 3941: }
1.42 daniel 3942: SKIP_BLANKS;
1.39 daniel 3943: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3944: if (URI == NULL) {
1.55 daniel 3945: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3946: ctxt->sax->error(ctxt->userData,
1.39 daniel 3947: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 3948: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3949: ctxt->wellFormed = 0;
3950: }
1.22 daniel 3951: }
1.39 daniel 3952: return(URI);
1.22 daniel 3953: }
3954:
1.50 daniel 3955: /**
3956: * xmlParseComment:
1.69 daniel 3957: * @ctxt: an XML parser context
1.50 daniel 3958: *
1.3 veillard 3959: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 3960: * The spec says that "For compatibility, the string "--" (double-hyphen)
3961: * must not occur within comments. "
1.22 daniel 3962: *
3963: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 3964: */
1.72 daniel 3965: void
1.114 daniel 3966: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 3967: xmlChar *buf = NULL;
3968: int len = 0;
1.140 daniel 3969: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3970: int q, ql;
3971: int r, rl;
3972: int cur, l;
1.140 daniel 3973: xmlParserInputState state;
1.3 veillard 3974:
3975: /*
1.22 daniel 3976: * Check that there is a comment right here.
1.3 veillard 3977: */
1.152 daniel 3978: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 3979: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 3980:
1.140 daniel 3981: state = ctxt->instate;
1.97 daniel 3982: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 3983: SHRINK;
1.40 daniel 3984: SKIP(4);
1.135 daniel 3985: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3986: if (buf == NULL) {
3987: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 3988: ctxt->instate = state;
1.135 daniel 3989: return;
3990: }
1.152 daniel 3991: q = CUR_CHAR(ql);
3992: NEXTL(ql);
3993: r = CUR_CHAR(rl);
3994: NEXTL(rl);
3995: cur = CUR_CHAR(l);
1.135 daniel 3996: while (IS_CHAR(cur) &&
3997: ((cur != '>') ||
3998: (r != '-') || (q != '-'))) {
3999: if ((r == '-') && (q == '-')) {
1.55 daniel 4000: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4001: ctxt->sax->error(ctxt->userData,
1.38 daniel 4002: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4003: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4004: ctxt->wellFormed = 0;
4005: }
1.152 daniel 4006: if (len + 5 >= size) {
1.135 daniel 4007: size *= 2;
4008: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4009: if (buf == NULL) {
4010: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4011: ctxt->instate = state;
1.135 daniel 4012: return;
4013: }
4014: }
1.152 daniel 4015: COPY_BUF(ql,buf,len,q);
1.135 daniel 4016: q = r;
1.152 daniel 4017: ql = rl;
1.135 daniel 4018: r = cur;
1.152 daniel 4019: rl = l;
4020: NEXTL(l);
4021: cur = CUR_CHAR(l);
1.135 daniel 4022: if (cur == 0) {
4023: SHRINK;
4024: GROW;
1.152 daniel 4025: cur = CUR_CHAR(l);
1.135 daniel 4026: }
1.3 veillard 4027: }
1.135 daniel 4028: buf[len] = 0;
4029: if (!IS_CHAR(cur)) {
1.55 daniel 4030: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4031: ctxt->sax->error(ctxt->userData,
1.135 daniel 4032: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4033: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4034: ctxt->wellFormed = 0;
1.3 veillard 4035: } else {
1.40 daniel 4036: NEXT;
1.171 daniel 4037: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4038: (!ctxt->disableSAX))
1.135 daniel 4039: ctxt->sax->comment(ctxt->userData, buf);
4040: xmlFree(buf);
1.3 veillard 4041: }
1.140 daniel 4042: ctxt->instate = state;
1.3 veillard 4043: }
4044:
1.50 daniel 4045: /**
4046: * xmlParsePITarget:
4047: * @ctxt: an XML parser context
4048: *
4049: * parse the name of a PI
1.22 daniel 4050: *
4051: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4052: *
4053: * Returns the PITarget name or NULL
1.22 daniel 4054: */
4055:
1.123 daniel 4056: xmlChar *
1.55 daniel 4057: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4058: xmlChar *name;
1.22 daniel 4059:
4060: name = xmlParseName(ctxt);
1.139 daniel 4061: if ((name != NULL) &&
1.22 daniel 4062: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4063: ((name[1] == 'm') || (name[1] == 'M')) &&
4064: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4065: int i;
1.151 daniel 4066: if ((name[0] = 'x') && (name[1] == 'm') &&
4067: (name[2] = 'l') && (name[3] == 0)) {
4068: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4069: ctxt->sax->error(ctxt->userData,
4070: "XML declaration allowed only at the start of the document\n");
4071: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4072: ctxt->wellFormed = 0;
4073: return(name);
4074: } else if (name[3] == 0) {
4075: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4076: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4077: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4078: ctxt->wellFormed = 0;
4079: return(name);
4080: }
1.139 daniel 4081: for (i = 0;;i++) {
4082: if (xmlW3CPIs[i] == NULL) break;
4083: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4084: return(name);
4085: }
4086: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4087: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4088: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4089: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4090: }
1.22 daniel 4091: }
4092: return(name);
4093: }
4094:
1.50 daniel 4095: /**
4096: * xmlParsePI:
4097: * @ctxt: an XML parser context
4098: *
4099: * parse an XML Processing Instruction.
1.22 daniel 4100: *
4101: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4102: *
1.69 daniel 4103: * The processing is transfered to SAX once parsed.
1.3 veillard 4104: */
4105:
1.55 daniel 4106: void
4107: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4108: xmlChar *buf = NULL;
4109: int len = 0;
1.140 daniel 4110: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4111: int cur, l;
1.123 daniel 4112: xmlChar *target;
1.140 daniel 4113: xmlParserInputState state;
1.22 daniel 4114:
1.152 daniel 4115: if ((RAW == '<') && (NXT(1) == '?')) {
1.140 daniel 4116: state = ctxt->instate;
4117: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4118: /*
4119: * this is a Processing Instruction.
4120: */
1.40 daniel 4121: SKIP(2);
1.91 daniel 4122: SHRINK;
1.3 veillard 4123:
4124: /*
1.22 daniel 4125: * Parse the target name and check for special support like
4126: * namespace.
1.3 veillard 4127: */
1.22 daniel 4128: target = xmlParsePITarget(ctxt);
4129: if (target != NULL) {
1.156 daniel 4130: if ((RAW == '?') && (NXT(1) == '>')) {
4131: SKIP(2);
4132:
4133: /*
4134: * SAX: PI detected.
4135: */
1.171 daniel 4136: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4137: (ctxt->sax->processingInstruction != NULL))
4138: ctxt->sax->processingInstruction(ctxt->userData,
4139: target, NULL);
4140: ctxt->instate = state;
1.170 daniel 4141: xmlFree(target);
1.156 daniel 4142: return;
4143: }
1.135 daniel 4144: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4145: if (buf == NULL) {
4146: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4147: ctxt->instate = state;
1.135 daniel 4148: return;
4149: }
4150: cur = CUR;
4151: if (!IS_BLANK(cur)) {
1.114 daniel 4152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4153: ctxt->sax->error(ctxt->userData,
4154: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4155: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4156: ctxt->wellFormed = 0;
4157: }
4158: SKIP_BLANKS;
1.152 daniel 4159: cur = CUR_CHAR(l);
1.135 daniel 4160: while (IS_CHAR(cur) &&
4161: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4162: if (len + 5 >= size) {
1.135 daniel 4163: size *= 2;
4164: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4165: if (buf == NULL) {
4166: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4167: ctxt->instate = state;
1.135 daniel 4168: return;
4169: }
4170: }
1.152 daniel 4171: COPY_BUF(l,buf,len,cur);
4172: NEXTL(l);
4173: cur = CUR_CHAR(l);
1.135 daniel 4174: if (cur == 0) {
4175: SHRINK;
4176: GROW;
1.152 daniel 4177: cur = CUR_CHAR(l);
1.135 daniel 4178: }
4179: }
4180: buf[len] = 0;
1.152 daniel 4181: if (cur != '?') {
1.72 daniel 4182: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4183: ctxt->sax->error(ctxt->userData,
1.72 daniel 4184: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4185: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4186: ctxt->wellFormed = 0;
1.22 daniel 4187: } else {
1.72 daniel 4188: SKIP(2);
1.44 daniel 4189:
1.72 daniel 4190: /*
4191: * SAX: PI detected.
4192: */
1.171 daniel 4193: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4194: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4195: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4196: target, buf);
1.22 daniel 4197: }
1.135 daniel 4198: xmlFree(buf);
1.119 daniel 4199: xmlFree(target);
1.3 veillard 4200: } else {
1.55 daniel 4201: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4202: ctxt->sax->error(ctxt->userData,
4203: "xmlParsePI : no target name\n");
1.123 daniel 4204: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4205: ctxt->wellFormed = 0;
1.22 daniel 4206: }
1.140 daniel 4207: ctxt->instate = state;
1.22 daniel 4208: }
4209: }
4210:
1.50 daniel 4211: /**
4212: * xmlParseNotationDecl:
4213: * @ctxt: an XML parser context
4214: *
4215: * parse a notation declaration
1.22 daniel 4216: *
4217: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4218: *
4219: * Hence there is actually 3 choices:
4220: * 'PUBLIC' S PubidLiteral
4221: * 'PUBLIC' S PubidLiteral S SystemLiteral
4222: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4223: *
1.67 daniel 4224: * See the NOTE on xmlParseExternalID().
1.22 daniel 4225: */
4226:
1.55 daniel 4227: void
4228: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4229: xmlChar *name;
4230: xmlChar *Pubid;
4231: xmlChar *Systemid;
1.22 daniel 4232:
1.152 daniel 4233: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4234: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4235: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4236: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4237: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 4238: SHRINK;
1.40 daniel 4239: SKIP(10);
1.67 daniel 4240: if (!IS_BLANK(CUR)) {
4241: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4242: ctxt->sax->error(ctxt->userData,
4243: "Space required after '<!NOTATION'\n");
1.123 daniel 4244: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4245: ctxt->wellFormed = 0;
4246: return;
4247: }
4248: SKIP_BLANKS;
1.22 daniel 4249:
4250: name = xmlParseName(ctxt);
4251: if (name == NULL) {
1.55 daniel 4252: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4253: ctxt->sax->error(ctxt->userData,
4254: "NOTATION: Name expected here\n");
1.123 daniel 4255: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4256: ctxt->wellFormed = 0;
4257: return;
4258: }
4259: if (!IS_BLANK(CUR)) {
4260: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4261: ctxt->sax->error(ctxt->userData,
1.67 daniel 4262: "Space required after the NOTATION name'\n");
1.123 daniel 4263: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4264: ctxt->wellFormed = 0;
1.22 daniel 4265: return;
4266: }
1.42 daniel 4267: SKIP_BLANKS;
1.67 daniel 4268:
1.22 daniel 4269: /*
1.67 daniel 4270: * Parse the IDs.
1.22 daniel 4271: */
1.160 daniel 4272: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4273: SKIP_BLANKS;
4274:
1.152 daniel 4275: if (RAW == '>') {
1.40 daniel 4276: NEXT;
1.171 daniel 4277: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4278: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4279: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4280: } else {
4281: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4282: ctxt->sax->error(ctxt->userData,
1.67 daniel 4283: "'>' required to close NOTATION declaration\n");
1.123 daniel 4284: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4285: ctxt->wellFormed = 0;
4286: }
1.119 daniel 4287: xmlFree(name);
4288: if (Systemid != NULL) xmlFree(Systemid);
4289: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4290: }
4291: }
4292:
1.50 daniel 4293: /**
4294: * xmlParseEntityDecl:
4295: * @ctxt: an XML parser context
4296: *
4297: * parse <!ENTITY declarations
1.22 daniel 4298: *
4299: * [70] EntityDecl ::= GEDecl | PEDecl
4300: *
4301: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4302: *
4303: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4304: *
4305: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4306: *
4307: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4308: *
4309: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4310: *
4311: * [ VC: Notation Declared ]
1.116 daniel 4312: * The Name must match the declared name of a notation.
1.22 daniel 4313: */
4314:
1.55 daniel 4315: void
4316: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4317: xmlChar *name = NULL;
4318: xmlChar *value = NULL;
4319: xmlChar *URI = NULL, *literal = NULL;
4320: xmlChar *ndata = NULL;
1.39 daniel 4321: int isParameter = 0;
1.123 daniel 4322: xmlChar *orig = NULL;
1.22 daniel 4323:
1.94 daniel 4324: GROW;
1.152 daniel 4325: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4326: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4327: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4328: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 4329: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4330: SHRINK;
1.40 daniel 4331: SKIP(8);
1.59 daniel 4332: if (!IS_BLANK(CUR)) {
4333: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4334: ctxt->sax->error(ctxt->userData,
4335: "Space required after '<!ENTITY'\n");
1.123 daniel 4336: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4337: ctxt->wellFormed = 0;
4338: }
4339: SKIP_BLANKS;
1.40 daniel 4340:
1.152 daniel 4341: if (RAW == '%') {
1.40 daniel 4342: NEXT;
1.59 daniel 4343: if (!IS_BLANK(CUR)) {
4344: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4345: ctxt->sax->error(ctxt->userData,
4346: "Space required after '%'\n");
1.123 daniel 4347: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4348: ctxt->wellFormed = 0;
4349: }
1.42 daniel 4350: SKIP_BLANKS;
1.39 daniel 4351: isParameter = 1;
1.22 daniel 4352: }
4353:
4354: name = xmlParseName(ctxt);
1.24 daniel 4355: if (name == NULL) {
1.55 daniel 4356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4357: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4358: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4359: ctxt->wellFormed = 0;
1.24 daniel 4360: return;
4361: }
1.59 daniel 4362: if (!IS_BLANK(CUR)) {
4363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4364: ctxt->sax->error(ctxt->userData,
1.59 daniel 4365: "Space required after the entity name\n");
1.123 daniel 4366: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4367: ctxt->wellFormed = 0;
4368: }
1.42 daniel 4369: SKIP_BLANKS;
1.24 daniel 4370:
1.22 daniel 4371: /*
1.68 daniel 4372: * handle the various case of definitions...
1.22 daniel 4373: */
1.39 daniel 4374: if (isParameter) {
1.152 daniel 4375: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4376: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4377: if (value) {
1.171 daniel 4378: if ((ctxt->sax != NULL) &&
4379: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4380: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4381: XML_INTERNAL_PARAMETER_ENTITY,
4382: NULL, NULL, value);
4383: }
1.24 daniel 4384: else {
1.67 daniel 4385: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4386: if ((URI == NULL) && (literal == NULL)) {
4387: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4388: ctxt->sax->error(ctxt->userData,
4389: "Entity value required\n");
4390: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4391: ctxt->wellFormed = 0;
4392: }
1.39 daniel 4393: if (URI) {
1.171 daniel 4394: if ((ctxt->sax != NULL) &&
4395: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4396: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4397: XML_EXTERNAL_PARAMETER_ENTITY,
4398: literal, URI, NULL);
4399: }
1.24 daniel 4400: }
4401: } else {
1.152 daniel 4402: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4403: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4404: if ((ctxt->sax != NULL) &&
4405: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4406: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4407: XML_INTERNAL_GENERAL_ENTITY,
4408: NULL, NULL, value);
4409: } else {
1.67 daniel 4410: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4411: if ((URI == NULL) && (literal == NULL)) {
4412: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4413: ctxt->sax->error(ctxt->userData,
4414: "Entity value required\n");
4415: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4416: ctxt->wellFormed = 0;
4417: }
1.152 daniel 4418: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4420: ctxt->sax->error(ctxt->userData,
1.59 daniel 4421: "Space required before 'NDATA'\n");
1.123 daniel 4422: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4423: ctxt->wellFormed = 0;
4424: }
1.42 daniel 4425: SKIP_BLANKS;
1.152 daniel 4426: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4427: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4428: (NXT(4) == 'A')) {
4429: SKIP(5);
1.59 daniel 4430: if (!IS_BLANK(CUR)) {
4431: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4432: ctxt->sax->error(ctxt->userData,
1.59 daniel 4433: "Space required after 'NDATA'\n");
1.123 daniel 4434: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4435: ctxt->wellFormed = 0;
4436: }
1.42 daniel 4437: SKIP_BLANKS;
1.24 daniel 4438: ndata = xmlParseName(ctxt);
1.171 daniel 4439: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4440: (ctxt->sax->unparsedEntityDecl != NULL))
4441: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4442: literal, URI, ndata);
4443: } else {
1.171 daniel 4444: if ((ctxt->sax != NULL) &&
4445: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4446: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4447: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4448: literal, URI, NULL);
1.24 daniel 4449: }
4450: }
4451: }
1.42 daniel 4452: SKIP_BLANKS;
1.152 daniel 4453: if (RAW != '>') {
1.55 daniel 4454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4455: ctxt->sax->error(ctxt->userData,
1.31 daniel 4456: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4457: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4458: ctxt->wellFormed = 0;
1.24 daniel 4459: } else
1.40 daniel 4460: NEXT;
1.78 daniel 4461: if (orig != NULL) {
4462: /*
1.98 daniel 4463: * Ugly mechanism to save the raw entity value.
1.78 daniel 4464: */
4465: xmlEntityPtr cur = NULL;
4466:
1.98 daniel 4467: if (isParameter) {
4468: if ((ctxt->sax != NULL) &&
4469: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4470: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4471: } else {
4472: if ((ctxt->sax != NULL) &&
4473: (ctxt->sax->getEntity != NULL))
1.120 daniel 4474: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4475: }
4476: if (cur != NULL) {
4477: if (cur->orig != NULL)
1.119 daniel 4478: xmlFree(orig);
1.98 daniel 4479: else
4480: cur->orig = orig;
4481: } else
1.119 daniel 4482: xmlFree(orig);
1.78 daniel 4483: }
1.119 daniel 4484: if (name != NULL) xmlFree(name);
4485: if (value != NULL) xmlFree(value);
4486: if (URI != NULL) xmlFree(URI);
4487: if (literal != NULL) xmlFree(literal);
4488: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4489: }
4490: }
4491:
1.50 daniel 4492: /**
1.59 daniel 4493: * xmlParseDefaultDecl:
4494: * @ctxt: an XML parser context
4495: * @value: Receive a possible fixed default value for the attribute
4496: *
4497: * Parse an attribute default declaration
4498: *
4499: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4500: *
1.99 daniel 4501: * [ VC: Required Attribute ]
1.117 daniel 4502: * if the default declaration is the keyword #REQUIRED, then the
4503: * attribute must be specified for all elements of the type in the
4504: * attribute-list declaration.
1.99 daniel 4505: *
4506: * [ VC: Attribute Default Legal ]
1.102 daniel 4507: * The declared default value must meet the lexical constraints of
4508: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4509: *
4510: * [ VC: Fixed Attribute Default ]
1.117 daniel 4511: * if an attribute has a default value declared with the #FIXED
4512: * keyword, instances of that attribute must match the default value.
1.99 daniel 4513: *
4514: * [ WFC: No < in Attribute Values ]
4515: * handled in xmlParseAttValue()
4516: *
1.59 daniel 4517: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4518: * or XML_ATTRIBUTE_FIXED.
4519: */
4520:
4521: int
1.123 daniel 4522: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4523: int val;
1.123 daniel 4524: xmlChar *ret;
1.59 daniel 4525:
4526: *value = NULL;
1.152 daniel 4527: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4528: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4529: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4530: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4531: (NXT(8) == 'D')) {
4532: SKIP(9);
4533: return(XML_ATTRIBUTE_REQUIRED);
4534: }
1.152 daniel 4535: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4536: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4537: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4538: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4539: SKIP(8);
4540: return(XML_ATTRIBUTE_IMPLIED);
4541: }
4542: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4543: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4544: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4545: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4546: SKIP(6);
4547: val = XML_ATTRIBUTE_FIXED;
4548: if (!IS_BLANK(CUR)) {
4549: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4550: ctxt->sax->error(ctxt->userData,
4551: "Space required after '#FIXED'\n");
1.123 daniel 4552: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4553: ctxt->wellFormed = 0;
4554: }
4555: SKIP_BLANKS;
4556: }
4557: ret = xmlParseAttValue(ctxt);
1.96 daniel 4558: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4559: if (ret == NULL) {
4560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4561: ctxt->sax->error(ctxt->userData,
1.59 daniel 4562: "Attribute default value declaration error\n");
4563: ctxt->wellFormed = 0;
4564: } else
4565: *value = ret;
4566: return(val);
4567: }
4568:
4569: /**
1.66 daniel 4570: * xmlParseNotationType:
4571: * @ctxt: an XML parser context
4572: *
4573: * parse an Notation attribute type.
4574: *
1.99 daniel 4575: * Note: the leading 'NOTATION' S part has already being parsed...
4576: *
1.66 daniel 4577: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4578: *
1.99 daniel 4579: * [ VC: Notation Attributes ]
1.117 daniel 4580: * Values of this type must match one of the notation names included
1.99 daniel 4581: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4582: *
4583: * Returns: the notation attribute tree built while parsing
4584: */
4585:
4586: xmlEnumerationPtr
4587: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4588: xmlChar *name;
1.66 daniel 4589: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4590:
1.152 daniel 4591: if (RAW != '(') {
1.66 daniel 4592: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4593: ctxt->sax->error(ctxt->userData,
4594: "'(' required to start 'NOTATION'\n");
1.123 daniel 4595: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4596: ctxt->wellFormed = 0;
4597: return(NULL);
4598: }
1.91 daniel 4599: SHRINK;
1.66 daniel 4600: do {
4601: NEXT;
4602: SKIP_BLANKS;
4603: name = xmlParseName(ctxt);
4604: if (name == NULL) {
4605: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4606: ctxt->sax->error(ctxt->userData,
1.66 daniel 4607: "Name expected in NOTATION declaration\n");
1.123 daniel 4608: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4609: ctxt->wellFormed = 0;
4610: return(ret);
4611: }
4612: cur = xmlCreateEnumeration(name);
1.119 daniel 4613: xmlFree(name);
1.66 daniel 4614: if (cur == NULL) return(ret);
4615: if (last == NULL) ret = last = cur;
4616: else {
4617: last->next = cur;
4618: last = cur;
4619: }
4620: SKIP_BLANKS;
1.152 daniel 4621: } while (RAW == '|');
4622: if (RAW != ')') {
1.66 daniel 4623: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4624: ctxt->sax->error(ctxt->userData,
1.66 daniel 4625: "')' required to finish NOTATION declaration\n");
1.123 daniel 4626: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 4627: ctxt->wellFormed = 0;
1.170 daniel 4628: if ((last != NULL) && (last != ret))
4629: xmlFreeEnumeration(last);
1.66 daniel 4630: return(ret);
4631: }
4632: NEXT;
4633: return(ret);
4634: }
4635:
4636: /**
4637: * xmlParseEnumerationType:
4638: * @ctxt: an XML parser context
4639: *
4640: * parse an Enumeration attribute type.
4641: *
4642: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4643: *
1.99 daniel 4644: * [ VC: Enumeration ]
1.117 daniel 4645: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 4646: * the declaration
4647: *
1.66 daniel 4648: * Returns: the enumeration attribute tree built while parsing
4649: */
4650:
4651: xmlEnumerationPtr
4652: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4653: xmlChar *name;
1.66 daniel 4654: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4655:
1.152 daniel 4656: if (RAW != '(') {
1.66 daniel 4657: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4658: ctxt->sax->error(ctxt->userData,
1.66 daniel 4659: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 4660: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 4661: ctxt->wellFormed = 0;
4662: return(NULL);
4663: }
1.91 daniel 4664: SHRINK;
1.66 daniel 4665: do {
4666: NEXT;
4667: SKIP_BLANKS;
4668: name = xmlParseNmtoken(ctxt);
4669: if (name == NULL) {
4670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4671: ctxt->sax->error(ctxt->userData,
1.66 daniel 4672: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 4673: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 4674: ctxt->wellFormed = 0;
4675: return(ret);
4676: }
4677: cur = xmlCreateEnumeration(name);
1.119 daniel 4678: xmlFree(name);
1.66 daniel 4679: if (cur == NULL) return(ret);
4680: if (last == NULL) ret = last = cur;
4681: else {
4682: last->next = cur;
4683: last = cur;
4684: }
4685: SKIP_BLANKS;
1.152 daniel 4686: } while (RAW == '|');
4687: if (RAW != ')') {
1.66 daniel 4688: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4689: ctxt->sax->error(ctxt->userData,
1.66 daniel 4690: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 4691: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 4692: ctxt->wellFormed = 0;
4693: return(ret);
4694: }
4695: NEXT;
4696: return(ret);
4697: }
4698:
4699: /**
1.50 daniel 4700: * xmlParseEnumeratedType:
4701: * @ctxt: an XML parser context
1.66 daniel 4702: * @tree: the enumeration tree built while parsing
1.50 daniel 4703: *
1.66 daniel 4704: * parse an Enumerated attribute type.
1.22 daniel 4705: *
4706: * [57] EnumeratedType ::= NotationType | Enumeration
4707: *
4708: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4709: *
1.50 daniel 4710: *
1.66 daniel 4711: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 4712: */
4713:
1.66 daniel 4714: int
4715: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 4716: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 4717: (NXT(2) == 'T') && (NXT(3) == 'A') &&
4718: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4719: (NXT(6) == 'O') && (NXT(7) == 'N')) {
4720: SKIP(8);
4721: if (!IS_BLANK(CUR)) {
4722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4723: ctxt->sax->error(ctxt->userData,
4724: "Space required after 'NOTATION'\n");
1.123 daniel 4725: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 4726: ctxt->wellFormed = 0;
4727: return(0);
4728: }
4729: SKIP_BLANKS;
4730: *tree = xmlParseNotationType(ctxt);
4731: if (*tree == NULL) return(0);
4732: return(XML_ATTRIBUTE_NOTATION);
4733: }
4734: *tree = xmlParseEnumerationType(ctxt);
4735: if (*tree == NULL) return(0);
4736: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 4737: }
4738:
1.50 daniel 4739: /**
4740: * xmlParseAttributeType:
4741: * @ctxt: an XML parser context
1.66 daniel 4742: * @tree: the enumeration tree built while parsing
1.50 daniel 4743: *
1.59 daniel 4744: * parse the Attribute list def for an element
1.22 daniel 4745: *
4746: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4747: *
4748: * [55] StringType ::= 'CDATA'
4749: *
4750: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4751: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 4752: *
1.102 daniel 4753: * Validity constraints for attribute values syntax are checked in
4754: * xmlValidateAttributeValue()
4755: *
1.99 daniel 4756: * [ VC: ID ]
1.117 daniel 4757: * Values of type ID must match the Name production. A name must not
1.99 daniel 4758: * appear more than once in an XML document as a value of this type;
4759: * i.e., ID values must uniquely identify the elements which bear them.
4760: *
4761: * [ VC: One ID per Element Type ]
1.117 daniel 4762: * No element type may have more than one ID attribute specified.
1.99 daniel 4763: *
4764: * [ VC: ID Attribute Default ]
1.117 daniel 4765: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 4766: *
4767: * [ VC: IDREF ]
1.102 daniel 4768: * Values of type IDREF must match the Name production, and values
1.140 daniel 4769: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 4770: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 4771: * values must match the value of some ID attribute.
4772: *
4773: * [ VC: Entity Name ]
1.102 daniel 4774: * Values of type ENTITY must match the Name production, values
1.140 daniel 4775: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 4776: * name of an unparsed entity declared in the DTD.
1.99 daniel 4777: *
4778: * [ VC: Name Token ]
1.102 daniel 4779: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 4780: * of type NMTOKENS must match Nmtokens.
4781: *
1.69 daniel 4782: * Returns the attribute type
1.22 daniel 4783: */
1.59 daniel 4784: int
1.66 daniel 4785: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 4786: SHRINK;
1.152 daniel 4787: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 4788: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4789: (NXT(4) == 'A')) {
4790: SKIP(5);
1.66 daniel 4791: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 4792: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 4793: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 4794: (NXT(4) == 'F') && (NXT(5) == 'S')) {
4795: SKIP(6);
4796: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 4797: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 4798: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 4799: (NXT(4) == 'F')) {
4800: SKIP(5);
1.59 daniel 4801: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 4802: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 4803: SKIP(2);
4804: return(XML_ATTRIBUTE_ID);
1.152 daniel 4805: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4806: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4807: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4808: SKIP(6);
1.59 daniel 4809: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 4810: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4811: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4812: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4813: (NXT(6) == 'E') && (NXT(7) == 'S')) {
4814: SKIP(8);
1.59 daniel 4815: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 4816: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 4817: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4818: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 4819: (NXT(6) == 'N') && (NXT(7) == 'S')) {
4820: SKIP(8);
4821: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 4822: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 4823: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4824: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 4825: (NXT(6) == 'N')) {
4826: SKIP(7);
1.59 daniel 4827: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 4828: }
1.66 daniel 4829: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 4830: }
4831:
1.50 daniel 4832: /**
4833: * xmlParseAttributeListDecl:
4834: * @ctxt: an XML parser context
4835: *
4836: * : parse the Attribute list def for an element
1.22 daniel 4837: *
4838: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4839: *
4840: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 4841: *
1.22 daniel 4842: */
1.55 daniel 4843: void
4844: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4845: xmlChar *elemName;
4846: xmlChar *attrName;
1.103 daniel 4847: xmlEnumerationPtr tree;
1.22 daniel 4848:
1.152 daniel 4849: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4850: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4851: (NXT(4) == 'T') && (NXT(5) == 'L') &&
4852: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 4853: (NXT(8) == 'T')) {
1.40 daniel 4854: SKIP(9);
1.59 daniel 4855: if (!IS_BLANK(CUR)) {
4856: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4857: ctxt->sax->error(ctxt->userData,
4858: "Space required after '<!ATTLIST'\n");
1.123 daniel 4859: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4860: ctxt->wellFormed = 0;
4861: }
1.42 daniel 4862: SKIP_BLANKS;
1.59 daniel 4863: elemName = xmlParseName(ctxt);
4864: if (elemName == NULL) {
1.55 daniel 4865: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4866: ctxt->sax->error(ctxt->userData,
4867: "ATTLIST: no name for Element\n");
1.123 daniel 4868: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4869: ctxt->wellFormed = 0;
1.22 daniel 4870: return;
4871: }
1.42 daniel 4872: SKIP_BLANKS;
1.152 daniel 4873: while (RAW != '>') {
1.123 daniel 4874: const xmlChar *check = CUR_PTR;
1.59 daniel 4875: int type;
4876: int def;
1.123 daniel 4877: xmlChar *defaultValue = NULL;
1.59 daniel 4878:
1.103 daniel 4879: tree = NULL;
1.59 daniel 4880: attrName = xmlParseName(ctxt);
4881: if (attrName == NULL) {
4882: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4883: ctxt->sax->error(ctxt->userData,
4884: "ATTLIST: no name for Attribute\n");
1.123 daniel 4885: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4886: ctxt->wellFormed = 0;
4887: break;
4888: }
1.97 daniel 4889: GROW;
1.59 daniel 4890: if (!IS_BLANK(CUR)) {
4891: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4892: ctxt->sax->error(ctxt->userData,
1.59 daniel 4893: "Space required after the attribute name\n");
1.123 daniel 4894: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4895: ctxt->wellFormed = 0;
1.170 daniel 4896: if (attrName != NULL)
4897: xmlFree(attrName);
4898: if (defaultValue != NULL)
4899: xmlFree(defaultValue);
1.59 daniel 4900: break;
4901: }
4902: SKIP_BLANKS;
4903:
1.66 daniel 4904: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 4905: if (type <= 0) {
4906: if (attrName != NULL)
4907: xmlFree(attrName);
4908: if (defaultValue != NULL)
4909: xmlFree(defaultValue);
4910: break;
4911: }
1.22 daniel 4912:
1.97 daniel 4913: GROW;
1.59 daniel 4914: if (!IS_BLANK(CUR)) {
4915: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4916: ctxt->sax->error(ctxt->userData,
1.59 daniel 4917: "Space required after the attribute type\n");
1.123 daniel 4918: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4919: ctxt->wellFormed = 0;
1.170 daniel 4920: if (attrName != NULL)
4921: xmlFree(attrName);
4922: if (defaultValue != NULL)
4923: xmlFree(defaultValue);
4924: if (tree != NULL)
4925: xmlFreeEnumeration(tree);
1.59 daniel 4926: break;
4927: }
1.42 daniel 4928: SKIP_BLANKS;
1.59 daniel 4929:
4930: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 4931: if (def <= 0) {
4932: if (attrName != NULL)
4933: xmlFree(attrName);
4934: if (defaultValue != NULL)
4935: xmlFree(defaultValue);
4936: if (tree != NULL)
4937: xmlFreeEnumeration(tree);
4938: break;
4939: }
1.59 daniel 4940:
1.97 daniel 4941: GROW;
1.152 daniel 4942: if (RAW != '>') {
1.59 daniel 4943: if (!IS_BLANK(CUR)) {
4944: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4945: ctxt->sax->error(ctxt->userData,
1.59 daniel 4946: "Space required after the attribute default value\n");
1.123 daniel 4947: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4948: ctxt->wellFormed = 0;
1.170 daniel 4949: if (attrName != NULL)
4950: xmlFree(attrName);
4951: if (defaultValue != NULL)
4952: xmlFree(defaultValue);
4953: if (tree != NULL)
4954: xmlFreeEnumeration(tree);
1.59 daniel 4955: break;
4956: }
4957: SKIP_BLANKS;
4958: }
1.40 daniel 4959: if (check == CUR_PTR) {
1.55 daniel 4960: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4961: ctxt->sax->error(ctxt->userData,
1.59 daniel 4962: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 4963: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 4964: if (attrName != NULL)
4965: xmlFree(attrName);
4966: if (defaultValue != NULL)
4967: xmlFree(defaultValue);
4968: if (tree != NULL)
4969: xmlFreeEnumeration(tree);
1.22 daniel 4970: break;
4971: }
1.171 daniel 4972: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4973: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 4974: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 4975: type, def, defaultValue, tree);
1.59 daniel 4976: if (attrName != NULL)
1.119 daniel 4977: xmlFree(attrName);
1.59 daniel 4978: if (defaultValue != NULL)
1.119 daniel 4979: xmlFree(defaultValue);
1.97 daniel 4980: GROW;
1.22 daniel 4981: }
1.152 daniel 4982: if (RAW == '>')
1.40 daniel 4983: NEXT;
1.22 daniel 4984:
1.119 daniel 4985: xmlFree(elemName);
1.22 daniel 4986: }
4987: }
4988:
1.50 daniel 4989: /**
1.61 daniel 4990: * xmlParseElementMixedContentDecl:
4991: * @ctxt: an XML parser context
4992: *
4993: * parse the declaration for a Mixed Element content
4994: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4995: *
4996: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4997: * '(' S? '#PCDATA' S? ')'
4998: *
1.99 daniel 4999: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5000: *
5001: * [ VC: No Duplicate Types ]
1.117 daniel 5002: * The same name must not appear more than once in a single
5003: * mixed-content declaration.
1.99 daniel 5004: *
1.61 daniel 5005: * returns: the list of the xmlElementContentPtr describing the element choices
5006: */
5007: xmlElementContentPtr
1.62 daniel 5008: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5009: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5010: xmlChar *elem = NULL;
1.61 daniel 5011:
1.97 daniel 5012: GROW;
1.152 daniel 5013: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5014: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5015: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5016: (NXT(6) == 'A')) {
5017: SKIP(7);
5018: SKIP_BLANKS;
1.91 daniel 5019: SHRINK;
1.152 daniel 5020: if (RAW == ')') {
1.63 daniel 5021: NEXT;
5022: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5023: if (RAW == '*') {
1.136 daniel 5024: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5025: NEXT;
5026: }
1.63 daniel 5027: return(ret);
5028: }
1.152 daniel 5029: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5030: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5031: if (ret == NULL) return(NULL);
1.99 daniel 5032: }
1.152 daniel 5033: while (RAW == '|') {
1.64 daniel 5034: NEXT;
1.61 daniel 5035: if (elem == NULL) {
5036: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5037: if (ret == NULL) return(NULL);
5038: ret->c1 = cur;
1.64 daniel 5039: cur = ret;
1.61 daniel 5040: } else {
1.64 daniel 5041: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5042: if (n == NULL) return(NULL);
5043: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5044: cur->c2 = n;
5045: cur = n;
1.119 daniel 5046: xmlFree(elem);
1.61 daniel 5047: }
5048: SKIP_BLANKS;
5049: elem = xmlParseName(ctxt);
5050: if (elem == NULL) {
5051: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5052: ctxt->sax->error(ctxt->userData,
1.61 daniel 5053: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5054: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5055: ctxt->wellFormed = 0;
5056: xmlFreeElementContent(cur);
5057: return(NULL);
5058: }
5059: SKIP_BLANKS;
1.97 daniel 5060: GROW;
1.61 daniel 5061: }
1.152 daniel 5062: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5063: if (elem != NULL) {
1.61 daniel 5064: cur->c2 = xmlNewElementContent(elem,
5065: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5066: xmlFree(elem);
1.66 daniel 5067: }
1.65 daniel 5068: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 5069: SKIP(2);
1.61 daniel 5070: } else {
1.119 daniel 5071: if (elem != NULL) xmlFree(elem);
1.61 daniel 5072: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5073: ctxt->sax->error(ctxt->userData,
1.63 daniel 5074: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5075: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5076: ctxt->wellFormed = 0;
5077: xmlFreeElementContent(ret);
5078: return(NULL);
5079: }
5080:
5081: } else {
5082: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5083: ctxt->sax->error(ctxt->userData,
1.61 daniel 5084: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5085: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5086: ctxt->wellFormed = 0;
5087: }
5088: return(ret);
5089: }
5090:
5091: /**
5092: * xmlParseElementChildrenContentDecl:
1.50 daniel 5093: * @ctxt: an XML parser context
5094: *
1.61 daniel 5095: * parse the declaration for a Mixed Element content
5096: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5097: *
1.61 daniel 5098: *
1.22 daniel 5099: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5100: *
5101: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5102: *
5103: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5104: *
5105: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5106: *
1.99 daniel 5107: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5108: * TODO Parameter-entity replacement text must be properly nested
5109: * with parenthetized groups. That is to say, if either of the
5110: * opening or closing parentheses in a choice, seq, or Mixed
5111: * construct is contained in the replacement text for a parameter
5112: * entity, both must be contained in the same replacement text. For
5113: * interoperability, if a parameter-entity reference appears in a
5114: * choice, seq, or Mixed construct, its replacement text should not
5115: * be empty, and neither the first nor last non-blank character of
5116: * the replacement text should be a connector (| or ,).
5117: *
1.62 daniel 5118: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5119: * hierarchy.
5120: */
5121: xmlElementContentPtr
1.62 daniel 5122: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5123: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5124: xmlChar *elem;
5125: xmlChar type = 0;
1.62 daniel 5126:
5127: SKIP_BLANKS;
1.94 daniel 5128: GROW;
1.152 daniel 5129: if (RAW == '(') {
1.63 daniel 5130: /* Recurse on first child */
1.62 daniel 5131: NEXT;
5132: SKIP_BLANKS;
5133: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5134: SKIP_BLANKS;
1.101 daniel 5135: GROW;
1.62 daniel 5136: } else {
5137: elem = xmlParseName(ctxt);
5138: if (elem == NULL) {
5139: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5140: ctxt->sax->error(ctxt->userData,
1.62 daniel 5141: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5142: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5143: ctxt->wellFormed = 0;
5144: return(NULL);
5145: }
5146: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5147: GROW;
1.152 daniel 5148: if (RAW == '?') {
1.104 daniel 5149: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5150: NEXT;
1.152 daniel 5151: } else if (RAW == '*') {
1.104 daniel 5152: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5153: NEXT;
1.152 daniel 5154: } else if (RAW == '+') {
1.104 daniel 5155: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5156: NEXT;
5157: } else {
1.104 daniel 5158: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5159: }
1.119 daniel 5160: xmlFree(elem);
1.101 daniel 5161: GROW;
1.62 daniel 5162: }
5163: SKIP_BLANKS;
1.91 daniel 5164: SHRINK;
1.152 daniel 5165: while (RAW != ')') {
1.63 daniel 5166: /*
5167: * Each loop we parse one separator and one element.
5168: */
1.152 daniel 5169: if (RAW == ',') {
1.62 daniel 5170: if (type == 0) type = CUR;
5171:
5172: /*
5173: * Detect "Name | Name , Name" error
5174: */
5175: else if (type != CUR) {
5176: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5177: ctxt->sax->error(ctxt->userData,
1.62 daniel 5178: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5179: type);
1.123 daniel 5180: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5181: ctxt->wellFormed = 0;
1.170 daniel 5182: if ((op != NULL) && (op != ret))
5183: xmlFreeElementContent(op);
5184: if ((last != NULL) && (last != ret))
5185: xmlFreeElementContent(last);
5186: if (ret != NULL)
5187: xmlFreeElementContent(ret);
1.62 daniel 5188: return(NULL);
5189: }
1.64 daniel 5190: NEXT;
1.62 daniel 5191:
1.63 daniel 5192: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5193: if (op == NULL) {
5194: xmlFreeElementContent(ret);
5195: return(NULL);
5196: }
5197: if (last == NULL) {
5198: op->c1 = ret;
1.65 daniel 5199: ret = cur = op;
1.63 daniel 5200: } else {
5201: cur->c2 = op;
5202: op->c1 = last;
5203: cur =op;
1.65 daniel 5204: last = NULL;
1.63 daniel 5205: }
1.152 daniel 5206: } else if (RAW == '|') {
1.62 daniel 5207: if (type == 0) type = CUR;
5208:
5209: /*
1.63 daniel 5210: * Detect "Name , Name | Name" error
1.62 daniel 5211: */
5212: else if (type != CUR) {
5213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5214: ctxt->sax->error(ctxt->userData,
1.62 daniel 5215: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5216: type);
1.123 daniel 5217: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5218: ctxt->wellFormed = 0;
1.170 daniel 5219: if ((op != NULL) && (op != ret))
5220: xmlFreeElementContent(op);
5221: if ((last != NULL) && (last != ret))
5222: xmlFreeElementContent(last);
5223: if (ret != NULL)
5224: xmlFreeElementContent(ret);
1.62 daniel 5225: return(NULL);
5226: }
1.64 daniel 5227: NEXT;
1.62 daniel 5228:
1.63 daniel 5229: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5230: if (op == NULL) {
1.170 daniel 5231: if ((op != NULL) && (op != ret))
5232: xmlFreeElementContent(op);
5233: if ((last != NULL) && (last != ret))
5234: xmlFreeElementContent(last);
5235: if (ret != NULL)
5236: xmlFreeElementContent(ret);
1.63 daniel 5237: return(NULL);
5238: }
5239: if (last == NULL) {
5240: op->c1 = ret;
1.65 daniel 5241: ret = cur = op;
1.63 daniel 5242: } else {
5243: cur->c2 = op;
5244: op->c1 = last;
5245: cur =op;
1.65 daniel 5246: last = NULL;
1.63 daniel 5247: }
1.62 daniel 5248: } else {
5249: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5250: ctxt->sax->error(ctxt->userData,
1.62 daniel 5251: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5252: ctxt->wellFormed = 0;
1.123 daniel 5253: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5254: if ((op != NULL) && (op != ret))
5255: xmlFreeElementContent(op);
5256: if ((last != NULL) && (last != ret))
5257: xmlFreeElementContent(last);
5258: if (ret != NULL)
5259: xmlFreeElementContent(ret);
1.62 daniel 5260: return(NULL);
5261: }
1.101 daniel 5262: GROW;
1.62 daniel 5263: SKIP_BLANKS;
1.101 daniel 5264: GROW;
1.152 daniel 5265: if (RAW == '(') {
1.63 daniel 5266: /* Recurse on second child */
1.62 daniel 5267: NEXT;
5268: SKIP_BLANKS;
1.65 daniel 5269: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5270: SKIP_BLANKS;
5271: } else {
5272: elem = xmlParseName(ctxt);
5273: if (elem == NULL) {
5274: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5275: ctxt->sax->error(ctxt->userData,
1.122 daniel 5276: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5277: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5278: ctxt->wellFormed = 0;
1.170 daniel 5279: if ((op != NULL) && (op != ret))
5280: xmlFreeElementContent(op);
5281: if ((last != NULL) && (last != ret))
5282: xmlFreeElementContent(last);
5283: if (ret != NULL)
5284: xmlFreeElementContent(ret);
1.62 daniel 5285: return(NULL);
5286: }
1.65 daniel 5287: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5288: xmlFree(elem);
1.152 daniel 5289: if (RAW == '?') {
1.105 daniel 5290: last->ocur = XML_ELEMENT_CONTENT_OPT;
5291: NEXT;
1.152 daniel 5292: } else if (RAW == '*') {
1.105 daniel 5293: last->ocur = XML_ELEMENT_CONTENT_MULT;
5294: NEXT;
1.152 daniel 5295: } else if (RAW == '+') {
1.105 daniel 5296: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5297: NEXT;
5298: } else {
5299: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5300: }
1.63 daniel 5301: }
5302: SKIP_BLANKS;
1.97 daniel 5303: GROW;
1.64 daniel 5304: }
1.65 daniel 5305: if ((cur != NULL) && (last != NULL)) {
5306: cur->c2 = last;
1.62 daniel 5307: }
5308: NEXT;
1.152 daniel 5309: if (RAW == '?') {
1.62 daniel 5310: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5311: NEXT;
1.152 daniel 5312: } else if (RAW == '*') {
1.62 daniel 5313: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5314: NEXT;
1.152 daniel 5315: } else if (RAW == '+') {
1.62 daniel 5316: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5317: NEXT;
5318: }
5319: return(ret);
1.61 daniel 5320: }
5321:
5322: /**
5323: * xmlParseElementContentDecl:
5324: * @ctxt: an XML parser context
5325: * @name: the name of the element being defined.
5326: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5327: *
1.61 daniel 5328: * parse the declaration for an Element content either Mixed or Children,
5329: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5330: *
5331: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5332: *
1.61 daniel 5333: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5334: */
5335:
1.61 daniel 5336: int
1.123 daniel 5337: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5338: xmlElementContentPtr *result) {
5339:
5340: xmlElementContentPtr tree = NULL;
5341: int res;
5342:
5343: *result = NULL;
5344:
1.152 daniel 5345: if (RAW != '(') {
1.61 daniel 5346: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5347: ctxt->sax->error(ctxt->userData,
1.61 daniel 5348: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5349: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5350: ctxt->wellFormed = 0;
5351: return(-1);
5352: }
5353: NEXT;
1.97 daniel 5354: GROW;
1.61 daniel 5355: SKIP_BLANKS;
1.152 daniel 5356: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5357: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5358: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5359: (NXT(6) == 'A')) {
1.62 daniel 5360: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5361: res = XML_ELEMENT_TYPE_MIXED;
5362: } else {
1.62 daniel 5363: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5364: res = XML_ELEMENT_TYPE_ELEMENT;
5365: }
5366: SKIP_BLANKS;
1.63 daniel 5367: /****************************
1.152 daniel 5368: if (RAW != ')') {
1.61 daniel 5369: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5370: ctxt->sax->error(ctxt->userData,
1.61 daniel 5371: "xmlParseElementContentDecl : ')' expected\n");
5372: ctxt->wellFormed = 0;
5373: return(-1);
5374: }
1.63 daniel 5375: ****************************/
5376: *result = tree;
1.61 daniel 5377: return(res);
1.22 daniel 5378: }
5379:
1.50 daniel 5380: /**
5381: * xmlParseElementDecl:
5382: * @ctxt: an XML parser context
5383: *
5384: * parse an Element declaration.
1.22 daniel 5385: *
5386: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5387: *
1.99 daniel 5388: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5389: * No element type may be declared more than once
1.69 daniel 5390: *
5391: * Returns the type of the element, or -1 in case of error
1.22 daniel 5392: */
1.59 daniel 5393: int
1.55 daniel 5394: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5395: xmlChar *name;
1.59 daniel 5396: int ret = -1;
1.61 daniel 5397: xmlElementContentPtr content = NULL;
1.22 daniel 5398:
1.97 daniel 5399: GROW;
1.152 daniel 5400: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5401: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5402: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5403: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5404: (NXT(8) == 'T')) {
1.40 daniel 5405: SKIP(9);
1.59 daniel 5406: if (!IS_BLANK(CUR)) {
5407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5408: ctxt->sax->error(ctxt->userData,
1.59 daniel 5409: "Space required after 'ELEMENT'\n");
1.123 daniel 5410: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5411: ctxt->wellFormed = 0;
5412: }
1.42 daniel 5413: SKIP_BLANKS;
1.22 daniel 5414: name = xmlParseName(ctxt);
5415: if (name == NULL) {
1.55 daniel 5416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5417: ctxt->sax->error(ctxt->userData,
1.59 daniel 5418: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5419: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5420: ctxt->wellFormed = 0;
5421: return(-1);
5422: }
5423: if (!IS_BLANK(CUR)) {
5424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5425: ctxt->sax->error(ctxt->userData,
1.59 daniel 5426: "Space required after the element name\n");
1.123 daniel 5427: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5428: ctxt->wellFormed = 0;
1.22 daniel 5429: }
1.42 daniel 5430: SKIP_BLANKS;
1.152 daniel 5431: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5432: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5433: (NXT(4) == 'Y')) {
5434: SKIP(5);
1.22 daniel 5435: /*
5436: * Element must always be empty.
5437: */
1.59 daniel 5438: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5439: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5440: (NXT(2) == 'Y')) {
5441: SKIP(3);
1.22 daniel 5442: /*
5443: * Element is a generic container.
5444: */
1.59 daniel 5445: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5446: } else if (RAW == '(') {
1.61 daniel 5447: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5448: } else {
1.98 daniel 5449: /*
5450: * [ WFC: PEs in Internal Subset ] error handling.
5451: */
1.152 daniel 5452: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5453: (ctxt->inputNr == 1)) {
5454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5455: ctxt->sax->error(ctxt->userData,
5456: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5457: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5458: } else {
5459: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5460: ctxt->sax->error(ctxt->userData,
5461: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5462: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5463: }
1.61 daniel 5464: ctxt->wellFormed = 0;
1.119 daniel 5465: if (name != NULL) xmlFree(name);
1.61 daniel 5466: return(-1);
1.22 daniel 5467: }
1.142 daniel 5468:
5469: SKIP_BLANKS;
5470: /*
5471: * Pop-up of finished entities.
5472: */
1.152 daniel 5473: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5474: xmlPopInput(ctxt);
1.42 daniel 5475: SKIP_BLANKS;
1.142 daniel 5476:
1.152 daniel 5477: if (RAW != '>') {
1.55 daniel 5478: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5479: ctxt->sax->error(ctxt->userData,
1.31 daniel 5480: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5481: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5482: ctxt->wellFormed = 0;
1.61 daniel 5483: } else {
1.40 daniel 5484: NEXT;
1.171 daniel 5485: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5486: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5487: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5488: content);
1.61 daniel 5489: }
1.84 daniel 5490: if (content != NULL) {
5491: xmlFreeElementContent(content);
5492: }
1.61 daniel 5493: if (name != NULL) {
1.119 daniel 5494: xmlFree(name);
1.61 daniel 5495: }
1.22 daniel 5496: }
1.59 daniel 5497: return(ret);
1.22 daniel 5498: }
5499:
1.50 daniel 5500: /**
5501: * xmlParseMarkupDecl:
5502: * @ctxt: an XML parser context
5503: *
5504: * parse Markup declarations
1.22 daniel 5505: *
5506: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5507: * NotationDecl | PI | Comment
5508: *
1.98 daniel 5509: * [ VC: Proper Declaration/PE Nesting ]
5510: * TODO Parameter-entity replacement text must be properly nested with
5511: * markup declarations. That is to say, if either the first character
5512: * or the last character of a markup declaration (markupdecl above) is
5513: * contained in the replacement text for a parameter-entity reference,
5514: * both must be contained in the same replacement text.
5515: *
5516: * [ WFC: PEs in Internal Subset ]
5517: * In the internal DTD subset, parameter-entity references can occur
5518: * only where markup declarations can occur, not within markup declarations.
5519: * (This does not apply to references that occur in external parameter
5520: * entities or to the external subset.)
1.22 daniel 5521: */
1.55 daniel 5522: void
5523: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5524: GROW;
1.22 daniel 5525: xmlParseElementDecl(ctxt);
5526: xmlParseAttributeListDecl(ctxt);
5527: xmlParseEntityDecl(ctxt);
5528: xmlParseNotationDecl(ctxt);
5529: xmlParsePI(ctxt);
1.114 daniel 5530: xmlParseComment(ctxt);
1.98 daniel 5531: /*
5532: * This is only for internal subset. On external entities,
5533: * the replacement is done before parsing stage
5534: */
5535: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5536: xmlParsePEReference(ctxt);
1.97 daniel 5537: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5538: }
5539:
1.50 daniel 5540: /**
1.76 daniel 5541: * xmlParseTextDecl:
5542: * @ctxt: an XML parser context
5543: *
5544: * parse an XML declaration header for external entities
5545: *
5546: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5547: */
5548:
1.172 daniel 5549: void
1.76 daniel 5550: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5551: xmlChar *version;
1.76 daniel 5552:
5553: /*
5554: * We know that '<?xml' is here.
5555: */
5556: SKIP(5);
5557:
5558: if (!IS_BLANK(CUR)) {
5559: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5560: ctxt->sax->error(ctxt->userData,
5561: "Space needed after '<?xml'\n");
1.123 daniel 5562: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5563: ctxt->wellFormed = 0;
5564: }
5565: SKIP_BLANKS;
5566:
5567: /*
5568: * We may have the VersionInfo here.
5569: */
5570: version = xmlParseVersionInfo(ctxt);
5571: if (version == NULL)
5572: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 5573: ctxt->input->version = version;
1.76 daniel 5574:
5575: /*
5576: * We must have the encoding declaration
5577: */
5578: if (!IS_BLANK(CUR)) {
5579: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5580: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 5581: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5582: ctxt->wellFormed = 0;
5583: }
1.172 daniel 5584: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.76 daniel 5585:
5586: SKIP_BLANKS;
1.152 daniel 5587: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 5588: SKIP(2);
1.152 daniel 5589: } else if (RAW == '>') {
1.76 daniel 5590: /* Deprecated old WD ... */
5591: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5592: ctxt->sax->error(ctxt->userData,
5593: "XML declaration must end-up with '?>'\n");
1.123 daniel 5594: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5595: ctxt->wellFormed = 0;
5596: NEXT;
5597: } else {
5598: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5599: ctxt->sax->error(ctxt->userData,
5600: "parsing XML declaration: '?>' expected\n");
1.123 daniel 5601: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5602: ctxt->wellFormed = 0;
5603: MOVETO_ENDTAG(CUR_PTR);
5604: NEXT;
5605: }
5606: }
5607:
5608: /*
5609: * xmlParseConditionalSections
5610: * @ctxt: an XML parser context
5611: *
5612: * TODO : Conditionnal section are not yet supported !
5613: *
5614: * [61] conditionalSect ::= includeSect | ignoreSect
5615: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5616: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5617: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5618: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5619: */
5620:
5621: void
5622: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 5623: SKIP(3);
5624: SKIP_BLANKS;
1.168 daniel 5625: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5626: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5627: (NXT(6) == 'E')) {
1.165 daniel 5628: SKIP(7);
1.168 daniel 5629: SKIP_BLANKS;
5630: if (RAW != '[') {
5631: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5632: ctxt->sax->error(ctxt->userData,
5633: "XML conditional section '[' expected\n");
5634: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5635: ctxt->wellFormed = 0;
5636: } else {
5637: NEXT;
5638: }
1.165 daniel 5639: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5640: (NXT(2) != '>'))) {
5641: const xmlChar *check = CUR_PTR;
5642: int cons = ctxt->input->consumed;
5643: int tok = ctxt->token;
5644:
5645: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5646: xmlParseConditionalSections(ctxt);
5647: } else if (IS_BLANK(CUR)) {
5648: NEXT;
5649: } else if (RAW == '%') {
5650: xmlParsePEReference(ctxt);
5651: } else
5652: xmlParseMarkupDecl(ctxt);
5653:
5654: /*
5655: * Pop-up of finished entities.
5656: */
5657: while ((RAW == 0) && (ctxt->inputNr > 1))
5658: xmlPopInput(ctxt);
5659:
5660: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5661: (tok == ctxt->token)) {
5662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5663: ctxt->sax->error(ctxt->userData,
5664: "Content error in the external subset\n");
5665: ctxt->wellFormed = 0;
5666: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5667: break;
5668: }
5669: }
1.168 daniel 5670: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5671: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 5672: int state;
5673:
1.168 daniel 5674: SKIP(6);
5675: SKIP_BLANKS;
5676: if (RAW != '[') {
5677: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5678: ctxt->sax->error(ctxt->userData,
5679: "XML conditional section '[' expected\n");
5680: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5681: ctxt->wellFormed = 0;
5682: } else {
5683: NEXT;
5684: }
1.171 daniel 5685:
1.143 daniel 5686: /*
1.171 daniel 5687: * Parse up to the end of the conditionnal section
5688: * But disable SAX event generating DTD building in the meantime
1.143 daniel 5689: */
1.171 daniel 5690: state = ctxt->disableSAX;
1.165 daniel 5691: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5692: (NXT(2) != '>'))) {
1.171 daniel 5693: const xmlChar *check = CUR_PTR;
5694: int cons = ctxt->input->consumed;
5695: int tok = ctxt->token;
5696:
5697: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5698: xmlParseConditionalSections(ctxt);
5699: } else if (IS_BLANK(CUR)) {
5700: NEXT;
5701: } else if (RAW == '%') {
5702: xmlParsePEReference(ctxt);
5703: } else
5704: xmlParseMarkupDecl(ctxt);
5705:
1.165 daniel 5706: /*
5707: * Pop-up of finished entities.
5708: */
5709: while ((RAW == 0) && (ctxt->inputNr > 1))
5710: xmlPopInput(ctxt);
1.143 daniel 5711:
1.171 daniel 5712: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5713: (tok == ctxt->token)) {
5714: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5715: ctxt->sax->error(ctxt->userData,
5716: "Content error in the external subset\n");
5717: ctxt->wellFormed = 0;
5718: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5719: break;
5720: }
1.165 daniel 5721: }
1.171 daniel 5722: ctxt->disableSAX = state;
1.168 daniel 5723: } else {
5724: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5725: ctxt->sax->error(ctxt->userData,
5726: "XML conditional section INCLUDE or IGNORE keyword expected\n");
5727: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5728: ctxt->wellFormed = 0;
1.143 daniel 5729: }
5730:
1.152 daniel 5731: if (RAW == 0)
1.143 daniel 5732: SHRINK;
5733:
1.152 daniel 5734: if (RAW == 0) {
1.76 daniel 5735: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5736: ctxt->sax->error(ctxt->userData,
5737: "XML conditional section not closed\n");
1.123 daniel 5738: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 5739: ctxt->wellFormed = 0;
1.143 daniel 5740: } else {
5741: SKIP(3);
1.76 daniel 5742: }
5743: }
5744:
5745: /**
1.124 daniel 5746: * xmlParseExternalSubset:
1.76 daniel 5747: * @ctxt: an XML parser context
1.124 daniel 5748: * @ExternalID: the external identifier
5749: * @SystemID: the system identifier (or URL)
1.76 daniel 5750: *
5751: * parse Markup declarations from an external subset
5752: *
5753: * [30] extSubset ::= textDecl? extSubsetDecl
5754: *
5755: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5756: */
5757: void
1.123 daniel 5758: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5759: const xmlChar *SystemID) {
1.132 daniel 5760: GROW;
1.152 daniel 5761: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 5762: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5763: (NXT(4) == 'l')) {
1.172 daniel 5764: xmlParseTextDecl(ctxt);
1.76 daniel 5765: }
1.79 daniel 5766: if (ctxt->myDoc == NULL) {
1.116 daniel 5767: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 5768: }
5769: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5770: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5771:
1.96 daniel 5772: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 5773: ctxt->external = 1;
1.152 daniel 5774: while (((RAW == '<') && (NXT(1) == '?')) ||
5775: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 5776: IS_BLANK(CUR)) {
1.123 daniel 5777: const xmlChar *check = CUR_PTR;
1.115 daniel 5778: int cons = ctxt->input->consumed;
1.164 daniel 5779: int tok = ctxt->token;
1.115 daniel 5780:
1.152 daniel 5781: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 5782: xmlParseConditionalSections(ctxt);
5783: } else if (IS_BLANK(CUR)) {
5784: NEXT;
1.152 daniel 5785: } else if (RAW == '%') {
1.76 daniel 5786: xmlParsePEReference(ctxt);
5787: } else
5788: xmlParseMarkupDecl(ctxt);
1.77 daniel 5789:
5790: /*
5791: * Pop-up of finished entities.
5792: */
1.166 daniel 5793: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 5794: xmlPopInput(ctxt);
5795:
1.164 daniel 5796: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5797: (tok == ctxt->token)) {
1.115 daniel 5798: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5799: ctxt->sax->error(ctxt->userData,
5800: "Content error in the external subset\n");
5801: ctxt->wellFormed = 0;
1.123 daniel 5802: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 5803: break;
5804: }
1.76 daniel 5805: }
5806:
1.152 daniel 5807: if (RAW != 0) {
1.76 daniel 5808: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5809: ctxt->sax->error(ctxt->userData,
5810: "Extra content at the end of the document\n");
1.123 daniel 5811: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 5812: ctxt->wellFormed = 0;
5813: }
5814:
5815: }
5816:
5817: /**
1.77 daniel 5818: * xmlParseReference:
5819: * @ctxt: an XML parser context
5820: *
5821: * parse and handle entity references in content, depending on the SAX
5822: * interface, this may end-up in a call to character() if this is a
1.79 daniel 5823: * CharRef, a predefined entity, if there is no reference() callback.
5824: * or if the parser was asked to switch to that mode.
1.77 daniel 5825: *
5826: * [67] Reference ::= EntityRef | CharRef
5827: */
5828: void
5829: xmlParseReference(xmlParserCtxtPtr ctxt) {
5830: xmlEntityPtr ent;
1.123 daniel 5831: xmlChar *val;
1.152 daniel 5832: if (RAW != '&') return;
1.77 daniel 5833:
1.113 daniel 5834: if (ctxt->inputNr > 1) {
1.123 daniel 5835: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 5836:
1.171 daniel 5837: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5838: (!ctxt->disableSAX))
1.113 daniel 5839: ctxt->sax->characters(ctxt->userData, cur, 1);
5840: if (ctxt->token == '&')
5841: ctxt->token = 0;
5842: else {
5843: SKIP(1);
5844: }
5845: return;
5846: }
1.77 daniel 5847: if (NXT(1) == '#') {
1.152 daniel 5848: int i = 0;
1.153 daniel 5849: xmlChar out[10];
5850: int hex = NXT(2);
1.77 daniel 5851: int val = xmlParseCharRef(ctxt);
1.152 daniel 5852:
1.153 daniel 5853: if (ctxt->encoding != NULL) {
5854: /*
5855: * So we are using non-UTF-8 buffers
5856: * Check that the char fit on 8bits, if not
5857: * generate a CharRef.
5858: */
5859: if (val <= 0xFF) {
5860: out[0] = val;
5861: out[1] = 0;
1.171 daniel 5862: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5863: (!ctxt->disableSAX))
1.153 daniel 5864: ctxt->sax->characters(ctxt->userData, out, 1);
5865: } else {
5866: if ((hex == 'x') || (hex == 'X'))
5867: sprintf((char *)out, "#x%X", val);
5868: else
5869: sprintf((char *)out, "#%d", val);
1.171 daniel 5870: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5871: (!ctxt->disableSAX))
1.153 daniel 5872: ctxt->sax->reference(ctxt->userData, out);
5873: }
5874: } else {
5875: /*
5876: * Just encode the value in UTF-8
5877: */
5878: COPY_BUF(0 ,out, i, val);
5879: out[i] = 0;
1.171 daniel 5880: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5881: (!ctxt->disableSAX))
1.153 daniel 5882: ctxt->sax->characters(ctxt->userData, out, i);
5883: }
1.77 daniel 5884: } else {
5885: ent = xmlParseEntityRef(ctxt);
5886: if (ent == NULL) return;
5887: if ((ent->name != NULL) &&
1.159 daniel 5888: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.113 daniel 5889: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 5890: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 5891: /*
5892: * Create a node.
5893: */
5894: ctxt->sax->reference(ctxt->userData, ent->name);
5895: return;
5896: } else if (ctxt->replaceEntities) {
5897: xmlParserInputPtr input;
1.79 daniel 5898:
1.113 daniel 5899: input = xmlNewEntityInputStream(ctxt, ent);
5900: xmlPushInput(ctxt, input);
1.167 daniel 5901: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5902: (RAW == '<') && (NXT(1) == '?') &&
5903: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5904: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 5905: xmlParseTextDecl(ctxt);
1.167 daniel 5906: if (input->standalone) {
5907: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5908: ctxt->sax->error(ctxt->userData,
5909: "external parsed entities cannot be standalone\n");
5910: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5911: ctxt->wellFormed = 0;
5912: }
5913: }
1.113 daniel 5914: return;
5915: }
1.77 daniel 5916: }
5917: val = ent->content;
5918: if (val == NULL) return;
5919: /*
5920: * inline the entity.
5921: */
1.171 daniel 5922: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5923: (!ctxt->disableSAX))
1.77 daniel 5924: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5925: }
1.24 daniel 5926: }
5927:
1.50 daniel 5928: /**
5929: * xmlParseEntityRef:
5930: * @ctxt: an XML parser context
5931: *
5932: * parse ENTITY references declarations
1.24 daniel 5933: *
5934: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 5935: *
1.98 daniel 5936: * [ WFC: Entity Declared ]
5937: * In a document without any DTD, a document with only an internal DTD
5938: * subset which contains no parameter entity references, or a document
5939: * with "standalone='yes'", the Name given in the entity reference
5940: * must match that in an entity declaration, except that well-formed
5941: * documents need not declare any of the following entities: amp, lt,
5942: * gt, apos, quot. The declaration of a parameter entity must precede
5943: * any reference to it. Similarly, the declaration of a general entity
5944: * must precede any reference to it which appears in a default value in an
5945: * attribute-list declaration. Note that if entities are declared in the
5946: * external subset or in external parameter entities, a non-validating
5947: * processor is not obligated to read and process their declarations;
5948: * for such documents, the rule that an entity must be declared is a
5949: * well-formedness constraint only if standalone='yes'.
5950: *
5951: * [ WFC: Parsed Entity ]
5952: * An entity reference must not contain the name of an unparsed entity
5953: *
1.77 daniel 5954: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 5955: */
1.77 daniel 5956: xmlEntityPtr
1.55 daniel 5957: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 5958: xmlChar *name;
1.72 daniel 5959: xmlEntityPtr ent = NULL;
1.24 daniel 5960:
1.91 daniel 5961: GROW;
1.111 daniel 5962:
1.152 daniel 5963: if (RAW == '&') {
1.40 daniel 5964: NEXT;
1.24 daniel 5965: name = xmlParseName(ctxt);
5966: if (name == NULL) {
1.55 daniel 5967: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5968: ctxt->sax->error(ctxt->userData,
5969: "xmlParseEntityRef: no name\n");
1.123 daniel 5970: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5971: ctxt->wellFormed = 0;
1.24 daniel 5972: } else {
1.152 daniel 5973: if (RAW == ';') {
1.40 daniel 5974: NEXT;
1.24 daniel 5975: /*
1.77 daniel 5976: * Ask first SAX for entity resolution, otherwise try the
5977: * predefined set.
5978: */
5979: if (ctxt->sax != NULL) {
5980: if (ctxt->sax->getEntity != NULL)
5981: ent = ctxt->sax->getEntity(ctxt->userData, name);
5982: if (ent == NULL)
5983: ent = xmlGetPredefinedEntity(name);
5984: }
5985: /*
1.98 daniel 5986: * [ WFC: Entity Declared ]
5987: * In a document without any DTD, a document with only an
5988: * internal DTD subset which contains no parameter entity
5989: * references, or a document with "standalone='yes'", the
5990: * Name given in the entity reference must match that in an
5991: * entity declaration, except that well-formed documents
5992: * need not declare any of the following entities: amp, lt,
5993: * gt, apos, quot.
5994: * The declaration of a parameter entity must precede any
5995: * reference to it.
5996: * Similarly, the declaration of a general entity must
5997: * precede any reference to it which appears in a default
5998: * value in an attribute-list declaration. Note that if
5999: * entities are declared in the external subset or in
6000: * external parameter entities, a non-validating processor
6001: * is not obligated to read and process their declarations;
6002: * for such documents, the rule that an entity must be
6003: * declared is a well-formedness constraint only if
6004: * standalone='yes'.
1.59 daniel 6005: */
1.77 daniel 6006: if (ent == NULL) {
1.98 daniel 6007: if ((ctxt->standalone == 1) ||
6008: ((ctxt->hasExternalSubset == 0) &&
6009: (ctxt->hasPErefs == 0))) {
6010: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6011: ctxt->sax->error(ctxt->userData,
6012: "Entity '%s' not defined\n", name);
1.123 daniel 6013: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6014: ctxt->wellFormed = 0;
6015: } else {
1.98 daniel 6016: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6017: ctxt->sax->warning(ctxt->userData,
6018: "Entity '%s' not defined\n", name);
1.123 daniel 6019: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6020: }
1.77 daniel 6021: }
1.59 daniel 6022:
6023: /*
1.98 daniel 6024: * [ WFC: Parsed Entity ]
6025: * An entity reference must not contain the name of an
6026: * unparsed entity
6027: */
1.159 daniel 6028: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6029: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6030: ctxt->sax->error(ctxt->userData,
6031: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6032: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6033: ctxt->wellFormed = 0;
6034: }
6035:
6036: /*
6037: * [ WFC: No External Entity References ]
6038: * Attribute values cannot contain direct or indirect
6039: * entity references to external entities.
6040: */
6041: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6042: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6043: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6044: ctxt->sax->error(ctxt->userData,
6045: "Attribute references external entity '%s'\n", name);
1.123 daniel 6046: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6047: ctxt->wellFormed = 0;
6048: }
6049: /*
6050: * [ WFC: No < in Attribute Values ]
6051: * The replacement text of any entity referred to directly or
6052: * indirectly in an attribute value (other than "<") must
6053: * not contain a <.
1.59 daniel 6054: */
1.98 daniel 6055: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6056: (ent != NULL) &&
6057: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6058: (ent->content != NULL) &&
6059: (xmlStrchr(ent->content, '<'))) {
6060: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6061: ctxt->sax->error(ctxt->userData,
6062: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6063: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6064: ctxt->wellFormed = 0;
6065: }
6066:
6067: /*
6068: * Internal check, no parameter entities here ...
6069: */
6070: else {
1.159 daniel 6071: switch (ent->etype) {
1.59 daniel 6072: case XML_INTERNAL_PARAMETER_ENTITY:
6073: case XML_EXTERNAL_PARAMETER_ENTITY:
6074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6075: ctxt->sax->error(ctxt->userData,
1.59 daniel 6076: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6077: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6078: ctxt->wellFormed = 0;
6079: break;
6080: }
6081: }
6082:
6083: /*
1.98 daniel 6084: * [ WFC: No Recursion ]
1.117 daniel 6085: * TODO A parsed entity must not contain a recursive reference
6086: * to itself, either directly or indirectly.
1.59 daniel 6087: */
1.77 daniel 6088:
1.24 daniel 6089: } else {
1.55 daniel 6090: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6091: ctxt->sax->error(ctxt->userData,
1.59 daniel 6092: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6093: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6094: ctxt->wellFormed = 0;
1.24 daniel 6095: }
1.119 daniel 6096: xmlFree(name);
1.24 daniel 6097: }
6098: }
1.77 daniel 6099: return(ent);
1.24 daniel 6100: }
1.135 daniel 6101: /**
6102: * xmlParseStringEntityRef:
6103: * @ctxt: an XML parser context
6104: * @str: a pointer to an index in the string
6105: *
6106: * parse ENTITY references declarations, but this version parses it from
6107: * a string value.
6108: *
6109: * [68] EntityRef ::= '&' Name ';'
6110: *
6111: * [ WFC: Entity Declared ]
6112: * In a document without any DTD, a document with only an internal DTD
6113: * subset which contains no parameter entity references, or a document
6114: * with "standalone='yes'", the Name given in the entity reference
6115: * must match that in an entity declaration, except that well-formed
6116: * documents need not declare any of the following entities: amp, lt,
6117: * gt, apos, quot. The declaration of a parameter entity must precede
6118: * any reference to it. Similarly, the declaration of a general entity
6119: * must precede any reference to it which appears in a default value in an
6120: * attribute-list declaration. Note that if entities are declared in the
6121: * external subset or in external parameter entities, a non-validating
6122: * processor is not obligated to read and process their declarations;
6123: * for such documents, the rule that an entity must be declared is a
6124: * well-formedness constraint only if standalone='yes'.
6125: *
6126: * [ WFC: Parsed Entity ]
6127: * An entity reference must not contain the name of an unparsed entity
6128: *
6129: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6130: * is updated to the current location in the string.
6131: */
6132: xmlEntityPtr
6133: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6134: xmlChar *name;
6135: const xmlChar *ptr;
6136: xmlChar cur;
6137: xmlEntityPtr ent = NULL;
6138:
6139: GROW;
6140:
1.156 daniel 6141: if ((str == NULL) || (*str == NULL))
6142: return(NULL);
1.135 daniel 6143: ptr = *str;
6144: cur = *ptr;
6145: if (cur == '&') {
6146: ptr++;
6147: cur = *ptr;
6148: name = xmlParseStringName(ctxt, &ptr);
6149: if (name == NULL) {
6150: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6151: ctxt->sax->error(ctxt->userData,
6152: "xmlParseEntityRef: no name\n");
6153: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6154: ctxt->wellFormed = 0;
6155: } else {
1.152 daniel 6156: if (RAW == ';') {
1.135 daniel 6157: NEXT;
6158: /*
6159: * Ask first SAX for entity resolution, otherwise try the
6160: * predefined set.
6161: */
6162: if (ctxt->sax != NULL) {
6163: if (ctxt->sax->getEntity != NULL)
6164: ent = ctxt->sax->getEntity(ctxt->userData, name);
6165: if (ent == NULL)
6166: ent = xmlGetPredefinedEntity(name);
6167: }
6168: /*
6169: * [ WFC: Entity Declared ]
6170: * In a document without any DTD, a document with only an
6171: * internal DTD subset which contains no parameter entity
6172: * references, or a document with "standalone='yes'", the
6173: * Name given in the entity reference must match that in an
6174: * entity declaration, except that well-formed documents
6175: * need not declare any of the following entities: amp, lt,
6176: * gt, apos, quot.
6177: * The declaration of a parameter entity must precede any
6178: * reference to it.
6179: * Similarly, the declaration of a general entity must
6180: * precede any reference to it which appears in a default
6181: * value in an attribute-list declaration. Note that if
6182: * entities are declared in the external subset or in
6183: * external parameter entities, a non-validating processor
6184: * is not obligated to read and process their declarations;
6185: * for such documents, the rule that an entity must be
6186: * declared is a well-formedness constraint only if
6187: * standalone='yes'.
6188: */
6189: if (ent == NULL) {
6190: if ((ctxt->standalone == 1) ||
6191: ((ctxt->hasExternalSubset == 0) &&
6192: (ctxt->hasPErefs == 0))) {
6193: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6194: ctxt->sax->error(ctxt->userData,
6195: "Entity '%s' not defined\n", name);
6196: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6197: ctxt->wellFormed = 0;
6198: } else {
6199: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6200: ctxt->sax->warning(ctxt->userData,
6201: "Entity '%s' not defined\n", name);
6202: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6203: }
6204: }
6205:
6206: /*
6207: * [ WFC: Parsed Entity ]
6208: * An entity reference must not contain the name of an
6209: * unparsed entity
6210: */
1.159 daniel 6211: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6212: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6213: ctxt->sax->error(ctxt->userData,
6214: "Entity reference to unparsed entity %s\n", name);
6215: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6216: ctxt->wellFormed = 0;
6217: }
6218:
6219: /*
6220: * [ WFC: No External Entity References ]
6221: * Attribute values cannot contain direct or indirect
6222: * entity references to external entities.
6223: */
6224: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6225: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6226: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6227: ctxt->sax->error(ctxt->userData,
6228: "Attribute references external entity '%s'\n", name);
6229: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6230: ctxt->wellFormed = 0;
6231: }
6232: /*
6233: * [ WFC: No < in Attribute Values ]
6234: * The replacement text of any entity referred to directly or
6235: * indirectly in an attribute value (other than "<") must
6236: * not contain a <.
6237: */
6238: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6239: (ent != NULL) &&
6240: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6241: (ent->content != NULL) &&
6242: (xmlStrchr(ent->content, '<'))) {
6243: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6244: ctxt->sax->error(ctxt->userData,
6245: "'<' in entity '%s' is not allowed in attributes values\n", name);
6246: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6247: ctxt->wellFormed = 0;
6248: }
6249:
6250: /*
6251: * Internal check, no parameter entities here ...
6252: */
6253: else {
1.159 daniel 6254: switch (ent->etype) {
1.135 daniel 6255: case XML_INTERNAL_PARAMETER_ENTITY:
6256: case XML_EXTERNAL_PARAMETER_ENTITY:
6257: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6258: ctxt->sax->error(ctxt->userData,
6259: "Attempt to reference the parameter entity '%s'\n", name);
6260: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6261: ctxt->wellFormed = 0;
6262: break;
6263: }
6264: }
6265:
6266: /*
6267: * [ WFC: No Recursion ]
6268: * TODO A parsed entity must not contain a recursive reference
6269: * to itself, either directly or indirectly.
6270: */
6271:
6272: } else {
6273: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6274: ctxt->sax->error(ctxt->userData,
6275: "xmlParseEntityRef: expecting ';'\n");
6276: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6277: ctxt->wellFormed = 0;
6278: }
6279: xmlFree(name);
6280: }
6281: }
6282: return(ent);
6283: }
1.24 daniel 6284:
1.50 daniel 6285: /**
6286: * xmlParsePEReference:
6287: * @ctxt: an XML parser context
6288: *
6289: * parse PEReference declarations
1.77 daniel 6290: * The entity content is handled directly by pushing it's content as
6291: * a new input stream.
1.22 daniel 6292: *
6293: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6294: *
1.98 daniel 6295: * [ WFC: No Recursion ]
6296: * TODO A parsed entity must not contain a recursive
6297: * reference to itself, either directly or indirectly.
6298: *
6299: * [ WFC: Entity Declared ]
6300: * In a document without any DTD, a document with only an internal DTD
6301: * subset which contains no parameter entity references, or a document
6302: * with "standalone='yes'", ... ... The declaration of a parameter
6303: * entity must precede any reference to it...
6304: *
6305: * [ VC: Entity Declared ]
6306: * In a document with an external subset or external parameter entities
6307: * with "standalone='no'", ... ... The declaration of a parameter entity
6308: * must precede any reference to it...
6309: *
6310: * [ WFC: In DTD ]
6311: * Parameter-entity references may only appear in the DTD.
6312: * NOTE: misleading but this is handled.
1.22 daniel 6313: */
1.77 daniel 6314: void
1.55 daniel 6315: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6316: xmlChar *name;
1.72 daniel 6317: xmlEntityPtr entity = NULL;
1.50 daniel 6318: xmlParserInputPtr input;
1.22 daniel 6319:
1.152 daniel 6320: if (RAW == '%') {
1.40 daniel 6321: NEXT;
1.22 daniel 6322: name = xmlParseName(ctxt);
6323: if (name == NULL) {
1.55 daniel 6324: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6325: ctxt->sax->error(ctxt->userData,
6326: "xmlParsePEReference: no name\n");
1.123 daniel 6327: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6328: ctxt->wellFormed = 0;
1.22 daniel 6329: } else {
1.152 daniel 6330: if (RAW == ';') {
1.40 daniel 6331: NEXT;
1.98 daniel 6332: if ((ctxt->sax != NULL) &&
6333: (ctxt->sax->getParameterEntity != NULL))
6334: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6335: name);
1.45 daniel 6336: if (entity == NULL) {
1.98 daniel 6337: /*
6338: * [ WFC: Entity Declared ]
6339: * In a document without any DTD, a document with only an
6340: * internal DTD subset which contains no parameter entity
6341: * references, or a document with "standalone='yes'", ...
6342: * ... The declaration of a parameter entity must precede
6343: * any reference to it...
6344: */
6345: if ((ctxt->standalone == 1) ||
6346: ((ctxt->hasExternalSubset == 0) &&
6347: (ctxt->hasPErefs == 0))) {
6348: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6349: ctxt->sax->error(ctxt->userData,
6350: "PEReference: %%%s; not found\n", name);
1.123 daniel 6351: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 6352: ctxt->wellFormed = 0;
6353: } else {
6354: /*
6355: * [ VC: Entity Declared ]
6356: * In a document with an external subset or external
6357: * parameter entities with "standalone='no'", ...
6358: * ... The declaration of a parameter entity must precede
6359: * any reference to it...
6360: */
6361: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6362: ctxt->sax->warning(ctxt->userData,
6363: "PEReference: %%%s; not found\n", name);
6364: ctxt->valid = 0;
6365: }
1.50 daniel 6366: } else {
1.98 daniel 6367: /*
6368: * Internal checking in case the entity quest barfed
6369: */
1.159 daniel 6370: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6371: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 6372: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6373: ctxt->sax->warning(ctxt->userData,
6374: "Internal: %%%s; is not a parameter entity\n", name);
6375: } else {
1.164 daniel 6376: /*
6377: * TODO !!!
6378: * handle the extra spaces added before and after
6379: * c.f. http://www.w3.org/TR/REC-xml#as-PE
6380: */
1.98 daniel 6381: input = xmlNewEntityInputStream(ctxt, entity);
6382: xmlPushInput(ctxt, input);
1.164 daniel 6383: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6384: (RAW == '<') && (NXT(1) == '?') &&
6385: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6386: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6387: xmlParseTextDecl(ctxt);
1.164 daniel 6388: }
6389: if (ctxt->token == 0)
6390: ctxt->token = ' ';
1.98 daniel 6391: }
1.45 daniel 6392: }
1.98 daniel 6393: ctxt->hasPErefs = 1;
1.22 daniel 6394: } else {
1.55 daniel 6395: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6396: ctxt->sax->error(ctxt->userData,
1.59 daniel 6397: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 6398: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6399: ctxt->wellFormed = 0;
1.22 daniel 6400: }
1.119 daniel 6401: xmlFree(name);
1.3 veillard 6402: }
6403: }
6404: }
6405:
1.50 daniel 6406: /**
1.135 daniel 6407: * xmlParseStringPEReference:
6408: * @ctxt: an XML parser context
6409: * @str: a pointer to an index in the string
6410: *
6411: * parse PEReference declarations
6412: *
6413: * [69] PEReference ::= '%' Name ';'
6414: *
6415: * [ WFC: No Recursion ]
6416: * TODO A parsed entity must not contain a recursive
6417: * reference to itself, either directly or indirectly.
6418: *
6419: * [ WFC: Entity Declared ]
6420: * In a document without any DTD, a document with only an internal DTD
6421: * subset which contains no parameter entity references, or a document
6422: * with "standalone='yes'", ... ... The declaration of a parameter
6423: * entity must precede any reference to it...
6424: *
6425: * [ VC: Entity Declared ]
6426: * In a document with an external subset or external parameter entities
6427: * with "standalone='no'", ... ... The declaration of a parameter entity
6428: * must precede any reference to it...
6429: *
6430: * [ WFC: In DTD ]
6431: * Parameter-entity references may only appear in the DTD.
6432: * NOTE: misleading but this is handled.
6433: *
6434: * Returns the string of the entity content.
6435: * str is updated to the current value of the index
6436: */
6437: xmlEntityPtr
6438: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6439: const xmlChar *ptr;
6440: xmlChar cur;
6441: xmlChar *name;
6442: xmlEntityPtr entity = NULL;
6443:
6444: if ((str == NULL) || (*str == NULL)) return(NULL);
6445: ptr = *str;
6446: cur = *ptr;
6447: if (cur == '%') {
6448: ptr++;
6449: cur = *ptr;
6450: name = xmlParseStringName(ctxt, &ptr);
6451: if (name == NULL) {
6452: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6453: ctxt->sax->error(ctxt->userData,
6454: "xmlParseStringPEReference: no name\n");
6455: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6456: ctxt->wellFormed = 0;
6457: } else {
6458: cur = *ptr;
6459: if (cur == ';') {
6460: ptr++;
6461: cur = *ptr;
6462: if ((ctxt->sax != NULL) &&
6463: (ctxt->sax->getParameterEntity != NULL))
6464: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6465: name);
6466: if (entity == NULL) {
6467: /*
6468: * [ WFC: Entity Declared ]
6469: * In a document without any DTD, a document with only an
6470: * internal DTD subset which contains no parameter entity
6471: * references, or a document with "standalone='yes'", ...
6472: * ... The declaration of a parameter entity must precede
6473: * any reference to it...
6474: */
6475: if ((ctxt->standalone == 1) ||
6476: ((ctxt->hasExternalSubset == 0) &&
6477: (ctxt->hasPErefs == 0))) {
6478: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6479: ctxt->sax->error(ctxt->userData,
6480: "PEReference: %%%s; not found\n", name);
6481: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6482: ctxt->wellFormed = 0;
6483: } else {
6484: /*
6485: * [ VC: Entity Declared ]
6486: * In a document with an external subset or external
6487: * parameter entities with "standalone='no'", ...
6488: * ... The declaration of a parameter entity must
6489: * precede any reference to it...
6490: */
6491: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6492: ctxt->sax->warning(ctxt->userData,
6493: "PEReference: %%%s; not found\n", name);
6494: ctxt->valid = 0;
6495: }
6496: } else {
6497: /*
6498: * Internal checking in case the entity quest barfed
6499: */
1.159 daniel 6500: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6501: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 6502: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6503: ctxt->sax->warning(ctxt->userData,
6504: "Internal: %%%s; is not a parameter entity\n", name);
6505: }
6506: }
6507: ctxt->hasPErefs = 1;
6508: } else {
6509: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6510: ctxt->sax->error(ctxt->userData,
6511: "xmlParseStringPEReference: expecting ';'\n");
6512: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6513: ctxt->wellFormed = 0;
6514: }
6515: xmlFree(name);
6516: }
6517: }
6518: *str = ptr;
6519: return(entity);
6520: }
6521:
6522: /**
1.50 daniel 6523: * xmlParseDocTypeDecl :
6524: * @ctxt: an XML parser context
6525: *
6526: * parse a DOCTYPE declaration
1.21 daniel 6527: *
1.22 daniel 6528: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6529: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 6530: *
6531: * [ VC: Root Element Type ]
1.99 daniel 6532: * The Name in the document type declaration must match the element
1.98 daniel 6533: * type of the root element.
1.21 daniel 6534: */
6535:
1.55 daniel 6536: void
6537: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 6538: xmlChar *name = NULL;
1.123 daniel 6539: xmlChar *ExternalID = NULL;
6540: xmlChar *URI = NULL;
1.21 daniel 6541:
6542: /*
6543: * We know that '<!DOCTYPE' has been detected.
6544: */
1.40 daniel 6545: SKIP(9);
1.21 daniel 6546:
1.42 daniel 6547: SKIP_BLANKS;
1.21 daniel 6548:
6549: /*
6550: * Parse the DOCTYPE name.
6551: */
6552: name = xmlParseName(ctxt);
6553: if (name == NULL) {
1.55 daniel 6554: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6555: ctxt->sax->error(ctxt->userData,
6556: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 6557: ctxt->wellFormed = 0;
1.123 daniel 6558: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 6559: }
1.165 daniel 6560: ctxt->intSubName = name;
1.21 daniel 6561:
1.42 daniel 6562: SKIP_BLANKS;
1.21 daniel 6563:
6564: /*
1.22 daniel 6565: * Check for SystemID and ExternalID
6566: */
1.67 daniel 6567: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 6568:
6569: if ((URI != NULL) || (ExternalID != NULL)) {
6570: ctxt->hasExternalSubset = 1;
6571: }
1.165 daniel 6572: ctxt->extSubURI = URI;
6573: ctxt->extSubSystem = ExternalID;
1.98 daniel 6574:
1.42 daniel 6575: SKIP_BLANKS;
1.36 daniel 6576:
1.76 daniel 6577: /*
1.165 daniel 6578: * Create and update the internal subset.
1.76 daniel 6579: */
1.171 daniel 6580: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6581: (!ctxt->disableSAX))
1.74 daniel 6582: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 6583:
6584: /*
1.140 daniel 6585: * Is there any internal subset declarations ?
6586: * they are handled separately in xmlParseInternalSubset()
6587: */
1.152 daniel 6588: if (RAW == '[')
1.140 daniel 6589: return;
6590:
6591: /*
6592: * We should be at the end of the DOCTYPE declaration.
6593: */
1.152 daniel 6594: if (RAW != '>') {
1.140 daniel 6595: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6596: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6597: ctxt->wellFormed = 0;
6598: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6599: }
6600: NEXT;
6601: }
6602:
6603: /**
6604: * xmlParseInternalsubset :
6605: * @ctxt: an XML parser context
6606: *
6607: * parse the internal subset declaration
6608: *
6609: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6610: */
6611:
6612: void
6613: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6614: /*
1.22 daniel 6615: * Is there any DTD definition ?
6616: */
1.152 daniel 6617: if (RAW == '[') {
1.96 daniel 6618: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 6619: NEXT;
1.22 daniel 6620: /*
6621: * Parse the succession of Markup declarations and
6622: * PEReferences.
6623: * Subsequence (markupdecl | PEReference | S)*
6624: */
1.152 daniel 6625: while (RAW != ']') {
1.123 daniel 6626: const xmlChar *check = CUR_PTR;
1.115 daniel 6627: int cons = ctxt->input->consumed;
1.22 daniel 6628:
1.42 daniel 6629: SKIP_BLANKS;
1.22 daniel 6630: xmlParseMarkupDecl(ctxt);
1.50 daniel 6631: xmlParsePEReference(ctxt);
1.22 daniel 6632:
1.115 daniel 6633: /*
6634: * Pop-up of finished entities.
6635: */
1.152 daniel 6636: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 6637: xmlPopInput(ctxt);
6638:
1.118 daniel 6639: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 6640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6641: ctxt->sax->error(ctxt->userData,
1.140 daniel 6642: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 6643: ctxt->wellFormed = 0;
1.123 daniel 6644: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 6645: break;
6646: }
6647: }
1.152 daniel 6648: if (RAW == ']') NEXT;
1.22 daniel 6649: }
6650:
6651: /*
6652: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 6653: */
1.152 daniel 6654: if (RAW != '>') {
1.55 daniel 6655: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6656: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 6657: ctxt->wellFormed = 0;
1.123 daniel 6658: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 6659: }
1.40 daniel 6660: NEXT;
1.21 daniel 6661: }
6662:
1.50 daniel 6663: /**
6664: * xmlParseAttribute:
6665: * @ctxt: an XML parser context
1.123 daniel 6666: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 6667: *
6668: * parse an attribute
1.3 veillard 6669: *
1.22 daniel 6670: * [41] Attribute ::= Name Eq AttValue
6671: *
1.98 daniel 6672: * [ WFC: No External Entity References ]
6673: * Attribute values cannot contain direct or indirect entity references
6674: * to external entities.
6675: *
6676: * [ WFC: No < in Attribute Values ]
6677: * The replacement text of any entity referred to directly or indirectly in
6678: * an attribute value (other than "<") must not contain a <.
6679: *
6680: * [ VC: Attribute Value Type ]
1.117 daniel 6681: * The attribute must have been declared; the value must be of the type
1.99 daniel 6682: * declared for it.
1.98 daniel 6683: *
1.22 daniel 6684: * [25] Eq ::= S? '=' S?
6685: *
1.29 daniel 6686: * With namespace:
6687: *
6688: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 6689: *
6690: * Also the case QName == xmlns:??? is handled independently as a namespace
6691: * definition.
1.69 daniel 6692: *
1.72 daniel 6693: * Returns the attribute name, and the value in *value.
1.3 veillard 6694: */
6695:
1.123 daniel 6696: xmlChar *
6697: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6698: xmlChar *name, *val;
1.3 veillard 6699:
1.72 daniel 6700: *value = NULL;
6701: name = xmlParseName(ctxt);
1.22 daniel 6702: if (name == NULL) {
1.55 daniel 6703: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6704: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 6705: ctxt->wellFormed = 0;
1.123 daniel 6706: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 6707: return(NULL);
1.3 veillard 6708: }
6709:
6710: /*
1.29 daniel 6711: * read the value
1.3 veillard 6712: */
1.42 daniel 6713: SKIP_BLANKS;
1.152 daniel 6714: if (RAW == '=') {
1.40 daniel 6715: NEXT;
1.42 daniel 6716: SKIP_BLANKS;
1.72 daniel 6717: val = xmlParseAttValue(ctxt);
1.96 daniel 6718: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 6719: } else {
1.55 daniel 6720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6721: ctxt->sax->error(ctxt->userData,
1.59 daniel 6722: "Specification mandate value for attribute %s\n", name);
1.123 daniel 6723: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 6724: ctxt->wellFormed = 0;
1.170 daniel 6725: xmlFree(name);
1.52 daniel 6726: return(NULL);
1.43 daniel 6727: }
6728:
1.172 daniel 6729: /*
6730: * Check that xml:lang conforms to the specification
6731: */
6732: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
6733: if (!xmlCheckLanguageID(val)) {
6734: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6735: ctxt->sax->error(ctxt->userData,
6736: "Invalid value for xml:lang : %s\n", val);
6737: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6738: ctxt->wellFormed = 0;
6739: }
6740: }
6741:
1.72 daniel 6742: *value = val;
6743: return(name);
1.3 veillard 6744: }
6745:
1.50 daniel 6746: /**
6747: * xmlParseStartTag:
6748: * @ctxt: an XML parser context
6749: *
6750: * parse a start of tag either for rule element or
6751: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 6752: *
6753: * [40] STag ::= '<' Name (S Attribute)* S? '>'
6754: *
1.98 daniel 6755: * [ WFC: Unique Att Spec ]
6756: * No attribute name may appear more than once in the same start-tag or
6757: * empty-element tag.
6758: *
1.29 daniel 6759: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6760: *
1.98 daniel 6761: * [ WFC: Unique Att Spec ]
6762: * No attribute name may appear more than once in the same start-tag or
6763: * empty-element tag.
6764: *
1.29 daniel 6765: * With namespace:
6766: *
6767: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6768: *
6769: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 6770: *
1.129 daniel 6771: * Returne the element name parsed
1.2 veillard 6772: */
6773:
1.123 daniel 6774: xmlChar *
1.69 daniel 6775: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6776: xmlChar *name;
6777: xmlChar *attname;
6778: xmlChar *attvalue;
6779: const xmlChar **atts = NULL;
1.72 daniel 6780: int nbatts = 0;
6781: int maxatts = 0;
6782: int i;
1.2 veillard 6783:
1.152 daniel 6784: if (RAW != '<') return(NULL);
1.40 daniel 6785: NEXT;
1.3 veillard 6786:
1.72 daniel 6787: name = xmlParseName(ctxt);
1.59 daniel 6788: if (name == NULL) {
6789: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6790: ctxt->sax->error(ctxt->userData,
1.59 daniel 6791: "xmlParseStartTag: invalid element name\n");
1.123 daniel 6792: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6793: ctxt->wellFormed = 0;
1.83 daniel 6794: return(NULL);
1.50 daniel 6795: }
6796:
6797: /*
1.3 veillard 6798: * Now parse the attributes, it ends up with the ending
6799: *
6800: * (S Attribute)* S?
6801: */
1.42 daniel 6802: SKIP_BLANKS;
1.91 daniel 6803: GROW;
1.168 daniel 6804:
1.153 daniel 6805: while ((IS_CHAR(RAW)) &&
1.152 daniel 6806: (RAW != '>') &&
6807: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 6808: const xmlChar *q = CUR_PTR;
1.91 daniel 6809: int cons = ctxt->input->consumed;
1.29 daniel 6810:
1.72 daniel 6811: attname = xmlParseAttribute(ctxt, &attvalue);
6812: if ((attname != NULL) && (attvalue != NULL)) {
6813: /*
1.98 daniel 6814: * [ WFC: Unique Att Spec ]
6815: * No attribute name may appear more than once in the same
6816: * start-tag or empty-element tag.
1.72 daniel 6817: */
6818: for (i = 0; i < nbatts;i += 2) {
6819: if (!xmlStrcmp(atts[i], attname)) {
6820: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6821: ctxt->sax->error(ctxt->userData,
6822: "Attribute %s redefined\n",
6823: attname);
1.72 daniel 6824: ctxt->wellFormed = 0;
1.123 daniel 6825: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 6826: xmlFree(attname);
6827: xmlFree(attvalue);
1.98 daniel 6828: goto failed;
1.72 daniel 6829: }
6830: }
6831:
6832: /*
6833: * Add the pair to atts
6834: */
6835: if (atts == NULL) {
6836: maxatts = 10;
1.123 daniel 6837: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 6838: if (atts == NULL) {
1.86 daniel 6839: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 6840: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6841: return(NULL);
1.72 daniel 6842: }
1.127 daniel 6843: } else if (nbatts + 4 > maxatts) {
1.72 daniel 6844: maxatts *= 2;
1.123 daniel 6845: atts = (const xmlChar **) xmlRealloc(atts,
6846: maxatts * sizeof(xmlChar *));
1.72 daniel 6847: if (atts == NULL) {
1.86 daniel 6848: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 6849: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6850: return(NULL);
1.72 daniel 6851: }
6852: }
6853: atts[nbatts++] = attname;
6854: atts[nbatts++] = attvalue;
6855: atts[nbatts] = NULL;
6856: atts[nbatts + 1] = NULL;
6857: }
6858:
1.116 daniel 6859: failed:
1.168 daniel 6860:
6861: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6862: break;
6863: if (!IS_BLANK(RAW)) {
6864: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6865: ctxt->sax->error(ctxt->userData,
6866: "attributes construct error\n");
6867: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6868: ctxt->wellFormed = 0;
6869: }
1.42 daniel 6870: SKIP_BLANKS;
1.91 daniel 6871: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 6872: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6873: ctxt->sax->error(ctxt->userData,
1.31 daniel 6874: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 6875: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 6876: ctxt->wellFormed = 0;
1.29 daniel 6877: break;
1.3 veillard 6878: }
1.91 daniel 6879: GROW;
1.3 veillard 6880: }
6881:
1.43 daniel 6882: /*
1.72 daniel 6883: * SAX: Start of Element !
1.43 daniel 6884: */
1.171 daniel 6885: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6886: (!ctxt->disableSAX))
1.74 daniel 6887: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 6888:
1.72 daniel 6889: if (atts != NULL) {
1.123 daniel 6890: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 6891: xmlFree(atts);
1.72 daniel 6892: }
1.83 daniel 6893: return(name);
1.3 veillard 6894: }
6895:
1.50 daniel 6896: /**
6897: * xmlParseEndTag:
6898: * @ctxt: an XML parser context
6899: *
6900: * parse an end of tag
1.27 daniel 6901: *
6902: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 6903: *
6904: * With namespace
6905: *
1.72 daniel 6906: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 6907: */
6908:
1.55 daniel 6909: void
1.140 daniel 6910: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6911: xmlChar *name;
1.140 daniel 6912: xmlChar *oldname;
1.7 veillard 6913:
1.91 daniel 6914: GROW;
1.152 daniel 6915: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 6916: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6917: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 6918: ctxt->wellFormed = 0;
1.123 daniel 6919: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 6920: return;
6921: }
1.40 daniel 6922: SKIP(2);
1.7 veillard 6923:
1.72 daniel 6924: name = xmlParseName(ctxt);
1.7 veillard 6925:
6926: /*
6927: * We should definitely be at the ending "S? '>'" part
6928: */
1.91 daniel 6929: GROW;
1.42 daniel 6930: SKIP_BLANKS;
1.153 daniel 6931: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 6932: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6933: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 6934: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 6935: ctxt->wellFormed = 0;
1.7 veillard 6936: } else
1.40 daniel 6937: NEXT;
1.7 veillard 6938:
1.72 daniel 6939: /*
1.98 daniel 6940: * [ WFC: Element Type Match ]
6941: * The Name in an element's end-tag must match the element type in the
6942: * start-tag.
6943: *
1.83 daniel 6944: */
1.147 daniel 6945: if ((name == NULL) || (ctxt->name == NULL) ||
6946: (xmlStrcmp(name, ctxt->name))) {
6947: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6948: if ((name != NULL) && (ctxt->name != NULL)) {
6949: ctxt->sax->error(ctxt->userData,
6950: "Opening and ending tag mismatch: %s and %s\n",
6951: ctxt->name, name);
6952: } else if (ctxt->name != NULL) {
6953: ctxt->sax->error(ctxt->userData,
6954: "Ending tag eror for: %s\n", ctxt->name);
6955: } else {
6956: ctxt->sax->error(ctxt->userData,
6957: "Ending tag error: internal error ???\n");
6958: }
1.122 daniel 6959:
1.147 daniel 6960: }
1.123 daniel 6961: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 6962: ctxt->wellFormed = 0;
6963: }
6964:
6965: /*
1.72 daniel 6966: * SAX: End of Tag
6967: */
1.171 daniel 6968: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6969: (!ctxt->disableSAX))
1.74 daniel 6970: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6971:
6972: if (name != NULL)
1.119 daniel 6973: xmlFree(name);
1.140 daniel 6974: oldname = namePop(ctxt);
6975: if (oldname != NULL) {
6976: #ifdef DEBUG_STACK
6977: fprintf(stderr,"Close: popped %s\n", oldname);
6978: #endif
6979: xmlFree(oldname);
6980: }
1.7 veillard 6981: return;
6982: }
6983:
1.50 daniel 6984: /**
6985: * xmlParseCDSect:
6986: * @ctxt: an XML parser context
6987: *
6988: * Parse escaped pure raw content.
1.29 daniel 6989: *
6990: * [18] CDSect ::= CDStart CData CDEnd
6991: *
6992: * [19] CDStart ::= '<![CDATA['
6993: *
6994: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6995: *
6996: * [21] CDEnd ::= ']]>'
1.3 veillard 6997: */
1.55 daniel 6998: void
6999: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7000: xmlChar *buf = NULL;
7001: int len = 0;
1.140 daniel 7002: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7003: int r, rl;
7004: int s, sl;
7005: int cur, l;
1.3 veillard 7006:
1.106 daniel 7007: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7008: (NXT(2) == '[') && (NXT(3) == 'C') &&
7009: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7010: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7011: (NXT(8) == '[')) {
7012: SKIP(9);
1.29 daniel 7013: } else
1.45 daniel 7014: return;
1.109 daniel 7015:
7016: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7017: r = CUR_CHAR(rl);
7018: if (!IS_CHAR(r)) {
1.55 daniel 7019: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7020: ctxt->sax->error(ctxt->userData,
1.135 daniel 7021: "CData section not finished\n");
1.59 daniel 7022: ctxt->wellFormed = 0;
1.123 daniel 7023: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7024: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7025: return;
1.3 veillard 7026: }
1.152 daniel 7027: NEXTL(rl);
7028: s = CUR_CHAR(sl);
7029: if (!IS_CHAR(s)) {
1.55 daniel 7030: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7031: ctxt->sax->error(ctxt->userData,
1.135 daniel 7032: "CData section not finished\n");
1.123 daniel 7033: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7034: ctxt->wellFormed = 0;
1.109 daniel 7035: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7036: return;
1.3 veillard 7037: }
1.152 daniel 7038: NEXTL(sl);
7039: cur = CUR_CHAR(l);
1.135 daniel 7040: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7041: if (buf == NULL) {
7042: fprintf(stderr, "malloc of %d byte failed\n", size);
7043: return;
7044: }
1.108 veillard 7045: while (IS_CHAR(cur) &&
1.110 daniel 7046: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7047: if (len + 5 >= size) {
1.135 daniel 7048: size *= 2;
7049: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7050: if (buf == NULL) {
7051: fprintf(stderr, "realloc of %d byte failed\n", size);
7052: return;
7053: }
7054: }
1.152 daniel 7055: COPY_BUF(rl,buf,len,r);
1.110 daniel 7056: r = s;
1.152 daniel 7057: rl = sl;
1.110 daniel 7058: s = cur;
1.152 daniel 7059: sl = l;
7060: NEXTL(l);
7061: cur = CUR_CHAR(l);
1.3 veillard 7062: }
1.135 daniel 7063: buf[len] = 0;
1.109 daniel 7064: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7065: if (cur != '>') {
1.55 daniel 7066: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7067: ctxt->sax->error(ctxt->userData,
1.135 daniel 7068: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7069: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7070: ctxt->wellFormed = 0;
1.135 daniel 7071: xmlFree(buf);
1.45 daniel 7072: return;
1.3 veillard 7073: }
1.152 daniel 7074: NEXTL(l);
1.16 daniel 7075:
1.45 daniel 7076: /*
1.135 daniel 7077: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7078: */
1.171 daniel 7079: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7080: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7081: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7082: }
1.135 daniel 7083: xmlFree(buf);
1.2 veillard 7084: }
7085:
1.50 daniel 7086: /**
7087: * xmlParseContent:
7088: * @ctxt: an XML parser context
7089: *
7090: * Parse a content:
1.2 veillard 7091: *
1.27 daniel 7092: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7093: */
7094:
1.55 daniel 7095: void
7096: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7097: GROW;
1.152 daniel 7098: while ((RAW != '<') || (NXT(1) != '/')) {
1.123 daniel 7099: const xmlChar *test = CUR_PTR;
1.91 daniel 7100: int cons = ctxt->input->consumed;
1.123 daniel 7101: xmlChar tok = ctxt->token;
1.27 daniel 7102:
7103: /*
1.152 daniel 7104: * Handle possible processed charrefs.
7105: */
7106: if (ctxt->token != 0) {
7107: xmlParseCharData(ctxt, 0);
7108: }
7109: /*
1.27 daniel 7110: * First case : a Processing Instruction.
7111: */
1.152 daniel 7112: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7113: xmlParsePI(ctxt);
7114: }
1.72 daniel 7115:
1.27 daniel 7116: /*
7117: * Second case : a CDSection
7118: */
1.152 daniel 7119: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7120: (NXT(2) == '[') && (NXT(3) == 'C') &&
7121: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7122: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7123: (NXT(8) == '[')) {
1.45 daniel 7124: xmlParseCDSect(ctxt);
1.27 daniel 7125: }
1.72 daniel 7126:
1.27 daniel 7127: /*
7128: * Third case : a comment
7129: */
1.152 daniel 7130: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7131: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7132: xmlParseComment(ctxt);
1.97 daniel 7133: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7134: }
1.72 daniel 7135:
1.27 daniel 7136: /*
7137: * Fourth case : a sub-element.
7138: */
1.152 daniel 7139: else if (RAW == '<') {
1.72 daniel 7140: xmlParseElement(ctxt);
1.45 daniel 7141: }
1.72 daniel 7142:
1.45 daniel 7143: /*
1.50 daniel 7144: * Fifth case : a reference. If if has not been resolved,
7145: * parsing returns it's Name, create the node
1.45 daniel 7146: */
1.97 daniel 7147:
1.152 daniel 7148: else if (RAW == '&') {
1.77 daniel 7149: xmlParseReference(ctxt);
1.27 daniel 7150: }
1.72 daniel 7151:
1.27 daniel 7152: /*
7153: * Last case, text. Note that References are handled directly.
7154: */
7155: else {
1.45 daniel 7156: xmlParseCharData(ctxt, 0);
1.3 veillard 7157: }
1.14 veillard 7158:
1.91 daniel 7159: GROW;
1.14 veillard 7160: /*
1.45 daniel 7161: * Pop-up of finished entities.
1.14 veillard 7162: */
1.152 daniel 7163: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7164: xmlPopInput(ctxt);
1.135 daniel 7165: SHRINK;
1.45 daniel 7166:
1.113 daniel 7167: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7168: (tok == ctxt->token)) {
1.55 daniel 7169: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7170: ctxt->sax->error(ctxt->userData,
1.59 daniel 7171: "detected an error in element content\n");
1.123 daniel 7172: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7173: ctxt->wellFormed = 0;
1.29 daniel 7174: break;
7175: }
1.3 veillard 7176: }
1.2 veillard 7177: }
7178:
1.50 daniel 7179: /**
7180: * xmlParseElement:
7181: * @ctxt: an XML parser context
7182: *
7183: * parse an XML element, this is highly recursive
1.26 daniel 7184: *
7185: * [39] element ::= EmptyElemTag | STag content ETag
7186: *
1.98 daniel 7187: * [ WFC: Element Type Match ]
7188: * The Name in an element's end-tag must match the element type in the
7189: * start-tag.
7190: *
7191: * [ VC: Element Valid ]
1.117 daniel 7192: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7193: * where the Name matches the element type and one of the following holds:
7194: * - The declaration matches EMPTY and the element has no content.
7195: * - The declaration matches children and the sequence of child elements
7196: * belongs to the language generated by the regular expression in the
7197: * content model, with optional white space (characters matching the
7198: * nonterminal S) between each pair of child elements.
7199: * - The declaration matches Mixed and the content consists of character
7200: * data and child elements whose types match names in the content model.
7201: * - The declaration matches ANY, and the types of any child elements have
7202: * been declared.
1.2 veillard 7203: */
1.26 daniel 7204:
1.72 daniel 7205: void
1.69 daniel 7206: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7207: const xmlChar *openTag = CUR_PTR;
7208: xmlChar *name;
1.140 daniel 7209: xmlChar *oldname;
1.32 daniel 7210: xmlParserNodeInfo node_info;
1.118 daniel 7211: xmlNodePtr ret;
1.2 veillard 7212:
1.32 daniel 7213: /* Capture start position */
1.118 daniel 7214: if (ctxt->record_info) {
7215: node_info.begin_pos = ctxt->input->consumed +
7216: (CUR_PTR - ctxt->input->base);
7217: node_info.begin_line = ctxt->input->line;
7218: }
1.32 daniel 7219:
1.83 daniel 7220: name = xmlParseStartTag(ctxt);
7221: if (name == NULL) {
7222: return;
7223: }
1.140 daniel 7224: namePush(ctxt, name);
1.118 daniel 7225: ret = ctxt->node;
1.2 veillard 7226:
7227: /*
1.99 daniel 7228: * [ VC: Root Element Type ]
7229: * The Name in the document type declaration must match the element
7230: * type of the root element.
7231: */
1.105 daniel 7232: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7233: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7234: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7235:
7236: /*
1.2 veillard 7237: * Check for an Empty Element.
7238: */
1.152 daniel 7239: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7240: SKIP(2);
1.171 daniel 7241: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7242: (!ctxt->disableSAX))
1.83 daniel 7243: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7244: oldname = namePop(ctxt);
7245: if (oldname != NULL) {
7246: #ifdef DEBUG_STACK
7247: fprintf(stderr,"Close: popped %s\n", oldname);
7248: #endif
7249: xmlFree(oldname);
7250: }
1.72 daniel 7251: return;
1.2 veillard 7252: }
1.152 daniel 7253: if (RAW == '>') {
1.91 daniel 7254: NEXT;
7255: } else {
1.55 daniel 7256: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7257: ctxt->sax->error(ctxt->userData,
7258: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7259: openTag);
1.59 daniel 7260: ctxt->wellFormed = 0;
1.123 daniel 7261: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 7262:
7263: /*
7264: * end of parsing of this node.
7265: */
7266: nodePop(ctxt);
1.140 daniel 7267: oldname = namePop(ctxt);
7268: if (oldname != NULL) {
7269: #ifdef DEBUG_STACK
7270: fprintf(stderr,"Close: popped %s\n", oldname);
7271: #endif
7272: xmlFree(oldname);
7273: }
1.118 daniel 7274:
7275: /*
7276: * Capture end position and add node
7277: */
7278: if ( ret != NULL && ctxt->record_info ) {
7279: node_info.end_pos = ctxt->input->consumed +
7280: (CUR_PTR - ctxt->input->base);
7281: node_info.end_line = ctxt->input->line;
7282: node_info.node = ret;
7283: xmlParserAddNodeInfo(ctxt, &node_info);
7284: }
1.72 daniel 7285: return;
1.2 veillard 7286: }
7287:
7288: /*
7289: * Parse the content of the element:
7290: */
1.45 daniel 7291: xmlParseContent(ctxt);
1.153 daniel 7292: if (!IS_CHAR(RAW)) {
1.55 daniel 7293: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7294: ctxt->sax->error(ctxt->userData,
1.57 daniel 7295: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 7296: ctxt->wellFormed = 0;
1.123 daniel 7297: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 7298:
7299: /*
7300: * end of parsing of this node.
7301: */
7302: nodePop(ctxt);
1.140 daniel 7303: oldname = namePop(ctxt);
7304: if (oldname != NULL) {
7305: #ifdef DEBUG_STACK
7306: fprintf(stderr,"Close: popped %s\n", oldname);
7307: #endif
7308: xmlFree(oldname);
7309: }
1.72 daniel 7310: return;
1.2 veillard 7311: }
7312:
7313: /*
1.27 daniel 7314: * parse the end of tag: '</' should be here.
1.2 veillard 7315: */
1.140 daniel 7316: xmlParseEndTag(ctxt);
1.118 daniel 7317:
7318: /*
7319: * Capture end position and add node
7320: */
7321: if ( ret != NULL && ctxt->record_info ) {
7322: node_info.end_pos = ctxt->input->consumed +
7323: (CUR_PTR - ctxt->input->base);
7324: node_info.end_line = ctxt->input->line;
7325: node_info.node = ret;
7326: xmlParserAddNodeInfo(ctxt, &node_info);
7327: }
1.2 veillard 7328: }
7329:
1.50 daniel 7330: /**
7331: * xmlParseVersionNum:
7332: * @ctxt: an XML parser context
7333: *
7334: * parse the XML version value.
1.29 daniel 7335: *
7336: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 7337: *
7338: * Returns the string giving the XML version number, or NULL
1.29 daniel 7339: */
1.123 daniel 7340: xmlChar *
1.55 daniel 7341: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 7342: xmlChar *buf = NULL;
7343: int len = 0;
7344: int size = 10;
7345: xmlChar cur;
1.29 daniel 7346:
1.135 daniel 7347: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7348: if (buf == NULL) {
7349: fprintf(stderr, "malloc of %d byte failed\n", size);
7350: return(NULL);
7351: }
7352: cur = CUR;
1.152 daniel 7353: while (((cur >= 'a') && (cur <= 'z')) ||
7354: ((cur >= 'A') && (cur <= 'Z')) ||
7355: ((cur >= '0') && (cur <= '9')) ||
7356: (cur == '_') || (cur == '.') ||
7357: (cur == ':') || (cur == '-')) {
1.135 daniel 7358: if (len + 1 >= size) {
7359: size *= 2;
7360: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7361: if (buf == NULL) {
7362: fprintf(stderr, "realloc of %d byte failed\n", size);
7363: return(NULL);
7364: }
7365: }
7366: buf[len++] = cur;
7367: NEXT;
7368: cur=CUR;
7369: }
7370: buf[len] = 0;
7371: return(buf);
1.29 daniel 7372: }
7373:
1.50 daniel 7374: /**
7375: * xmlParseVersionInfo:
7376: * @ctxt: an XML parser context
7377: *
7378: * parse the XML version.
1.29 daniel 7379: *
7380: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7381: *
7382: * [25] Eq ::= S? '=' S?
1.50 daniel 7383: *
1.68 daniel 7384: * Returns the version string, e.g. "1.0"
1.29 daniel 7385: */
7386:
1.123 daniel 7387: xmlChar *
1.55 daniel 7388: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 7389: xmlChar *version = NULL;
7390: const xmlChar *q;
1.29 daniel 7391:
1.152 daniel 7392: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 7393: (NXT(2) == 'r') && (NXT(3) == 's') &&
7394: (NXT(4) == 'i') && (NXT(5) == 'o') &&
7395: (NXT(6) == 'n')) {
7396: SKIP(7);
1.42 daniel 7397: SKIP_BLANKS;
1.152 daniel 7398: if (RAW != '=') {
1.55 daniel 7399: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7400: ctxt->sax->error(ctxt->userData,
7401: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 7402: ctxt->wellFormed = 0;
1.123 daniel 7403: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7404: return(NULL);
7405: }
1.40 daniel 7406: NEXT;
1.42 daniel 7407: SKIP_BLANKS;
1.152 daniel 7408: if (RAW == '"') {
1.40 daniel 7409: NEXT;
7410: q = CUR_PTR;
1.29 daniel 7411: version = xmlParseVersionNum(ctxt);
1.152 daniel 7412: if (RAW != '"') {
1.55 daniel 7413: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7414: ctxt->sax->error(ctxt->userData,
7415: "String not closed\n%.50s\n", q);
1.59 daniel 7416: ctxt->wellFormed = 0;
1.123 daniel 7417: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7418: } else
1.40 daniel 7419: NEXT;
1.152 daniel 7420: } else if (RAW == '\''){
1.40 daniel 7421: NEXT;
7422: q = CUR_PTR;
1.29 daniel 7423: version = xmlParseVersionNum(ctxt);
1.152 daniel 7424: if (RAW != '\'') {
1.55 daniel 7425: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7426: ctxt->sax->error(ctxt->userData,
7427: "String not closed\n%.50s\n", q);
1.123 daniel 7428: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7429: ctxt->wellFormed = 0;
1.55 daniel 7430: } else
1.40 daniel 7431: NEXT;
1.31 daniel 7432: } else {
1.55 daniel 7433: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7434: ctxt->sax->error(ctxt->userData,
1.59 daniel 7435: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 7436: ctxt->wellFormed = 0;
1.123 daniel 7437: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7438: }
7439: }
7440: return(version);
7441: }
7442:
1.50 daniel 7443: /**
7444: * xmlParseEncName:
7445: * @ctxt: an XML parser context
7446: *
7447: * parse the XML encoding name
1.29 daniel 7448: *
7449: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 7450: *
1.68 daniel 7451: * Returns the encoding name value or NULL
1.29 daniel 7452: */
1.123 daniel 7453: xmlChar *
1.55 daniel 7454: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 7455: xmlChar *buf = NULL;
7456: int len = 0;
7457: int size = 10;
7458: xmlChar cur;
1.29 daniel 7459:
1.135 daniel 7460: cur = CUR;
7461: if (((cur >= 'a') && (cur <= 'z')) ||
7462: ((cur >= 'A') && (cur <= 'Z'))) {
7463: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7464: if (buf == NULL) {
7465: fprintf(stderr, "malloc of %d byte failed\n", size);
7466: return(NULL);
7467: }
7468:
7469: buf[len++] = cur;
1.40 daniel 7470: NEXT;
1.135 daniel 7471: cur = CUR;
1.152 daniel 7472: while (((cur >= 'a') && (cur <= 'z')) ||
7473: ((cur >= 'A') && (cur <= 'Z')) ||
7474: ((cur >= '0') && (cur <= '9')) ||
7475: (cur == '.') || (cur == '_') ||
7476: (cur == '-')) {
1.135 daniel 7477: if (len + 1 >= size) {
7478: size *= 2;
7479: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7480: if (buf == NULL) {
7481: fprintf(stderr, "realloc of %d byte failed\n", size);
7482: return(NULL);
7483: }
7484: }
7485: buf[len++] = cur;
7486: NEXT;
7487: cur = CUR;
7488: if (cur == 0) {
7489: SHRINK;
7490: GROW;
7491: cur = CUR;
7492: }
7493: }
7494: buf[len] = 0;
1.29 daniel 7495: } else {
1.55 daniel 7496: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7497: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 7498: ctxt->wellFormed = 0;
1.123 daniel 7499: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 7500: }
1.135 daniel 7501: return(buf);
1.29 daniel 7502: }
7503:
1.50 daniel 7504: /**
7505: * xmlParseEncodingDecl:
7506: * @ctxt: an XML parser context
7507: *
7508: * parse the XML encoding declaration
1.29 daniel 7509: *
7510: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 7511: *
7512: * TODO: this should setup the conversion filters.
7513: *
1.68 daniel 7514: * Returns the encoding value or NULL
1.29 daniel 7515: */
7516:
1.123 daniel 7517: xmlChar *
1.55 daniel 7518: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7519: xmlChar *encoding = NULL;
7520: const xmlChar *q;
1.29 daniel 7521:
1.42 daniel 7522: SKIP_BLANKS;
1.152 daniel 7523: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 7524: (NXT(2) == 'c') && (NXT(3) == 'o') &&
7525: (NXT(4) == 'd') && (NXT(5) == 'i') &&
7526: (NXT(6) == 'n') && (NXT(7) == 'g')) {
7527: SKIP(8);
1.42 daniel 7528: SKIP_BLANKS;
1.152 daniel 7529: if (RAW != '=') {
1.55 daniel 7530: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7531: ctxt->sax->error(ctxt->userData,
7532: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 7533: ctxt->wellFormed = 0;
1.123 daniel 7534: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7535: return(NULL);
7536: }
1.40 daniel 7537: NEXT;
1.42 daniel 7538: SKIP_BLANKS;
1.152 daniel 7539: if (RAW == '"') {
1.40 daniel 7540: NEXT;
7541: q = CUR_PTR;
1.29 daniel 7542: encoding = xmlParseEncName(ctxt);
1.152 daniel 7543: if (RAW != '"') {
1.55 daniel 7544: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7545: ctxt->sax->error(ctxt->userData,
7546: "String not closed\n%.50s\n", q);
1.59 daniel 7547: ctxt->wellFormed = 0;
1.123 daniel 7548: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7549: } else
1.40 daniel 7550: NEXT;
1.152 daniel 7551: } else if (RAW == '\''){
1.40 daniel 7552: NEXT;
7553: q = CUR_PTR;
1.29 daniel 7554: encoding = xmlParseEncName(ctxt);
1.152 daniel 7555: if (RAW != '\'') {
1.55 daniel 7556: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7557: ctxt->sax->error(ctxt->userData,
7558: "String not closed\n%.50s\n", q);
1.59 daniel 7559: ctxt->wellFormed = 0;
1.123 daniel 7560: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7561: } else
1.40 daniel 7562: NEXT;
1.152 daniel 7563: } else if (RAW == '"'){
1.55 daniel 7564: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7565: ctxt->sax->error(ctxt->userData,
1.59 daniel 7566: "xmlParseEncodingDecl : expected ' or \"\n");
7567: ctxt->wellFormed = 0;
1.123 daniel 7568: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7569: }
7570: }
7571: return(encoding);
7572: }
7573:
1.50 daniel 7574: /**
7575: * xmlParseSDDecl:
7576: * @ctxt: an XML parser context
7577: *
7578: * parse the XML standalone declaration
1.29 daniel 7579: *
7580: * [32] SDDecl ::= S 'standalone' Eq
7581: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 7582: *
7583: * [ VC: Standalone Document Declaration ]
7584: * TODO The standalone document declaration must have the value "no"
7585: * if any external markup declarations contain declarations of:
7586: * - attributes with default values, if elements to which these
7587: * attributes apply appear in the document without specifications
7588: * of values for these attributes, or
7589: * - entities (other than amp, lt, gt, apos, quot), if references
7590: * to those entities appear in the document, or
7591: * - attributes with values subject to normalization, where the
7592: * attribute appears in the document with a value which will change
7593: * as a result of normalization, or
7594: * - element types with element content, if white space occurs directly
7595: * within any instance of those types.
1.68 daniel 7596: *
7597: * Returns 1 if standalone, 0 otherwise
1.29 daniel 7598: */
7599:
1.55 daniel 7600: int
7601: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 7602: int standalone = -1;
7603:
1.42 daniel 7604: SKIP_BLANKS;
1.152 daniel 7605: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 7606: (NXT(2) == 'a') && (NXT(3) == 'n') &&
7607: (NXT(4) == 'd') && (NXT(5) == 'a') &&
7608: (NXT(6) == 'l') && (NXT(7) == 'o') &&
7609: (NXT(8) == 'n') && (NXT(9) == 'e')) {
7610: SKIP(10);
1.81 daniel 7611: SKIP_BLANKS;
1.152 daniel 7612: if (RAW != '=') {
1.55 daniel 7613: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7614: ctxt->sax->error(ctxt->userData,
1.59 daniel 7615: "XML standalone declaration : expected '='\n");
1.123 daniel 7616: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 7617: ctxt->wellFormed = 0;
1.32 daniel 7618: return(standalone);
7619: }
1.40 daniel 7620: NEXT;
1.42 daniel 7621: SKIP_BLANKS;
1.152 daniel 7622: if (RAW == '\''){
1.40 daniel 7623: NEXT;
1.152 daniel 7624: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7625: standalone = 0;
1.40 daniel 7626: SKIP(2);
1.152 daniel 7627: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7628: (NXT(2) == 's')) {
1.29 daniel 7629: standalone = 1;
1.40 daniel 7630: SKIP(3);
1.29 daniel 7631: } else {
1.55 daniel 7632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7633: ctxt->sax->error(ctxt->userData,
7634: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7635: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7636: ctxt->wellFormed = 0;
1.29 daniel 7637: }
1.152 daniel 7638: if (RAW != '\'') {
1.55 daniel 7639: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7640: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 7641: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7642: ctxt->wellFormed = 0;
1.55 daniel 7643: } else
1.40 daniel 7644: NEXT;
1.152 daniel 7645: } else if (RAW == '"'){
1.40 daniel 7646: NEXT;
1.152 daniel 7647: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7648: standalone = 0;
1.40 daniel 7649: SKIP(2);
1.152 daniel 7650: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7651: (NXT(2) == 's')) {
1.29 daniel 7652: standalone = 1;
1.40 daniel 7653: SKIP(3);
1.29 daniel 7654: } else {
1.55 daniel 7655: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7656: ctxt->sax->error(ctxt->userData,
1.59 daniel 7657: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7658: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7659: ctxt->wellFormed = 0;
1.29 daniel 7660: }
1.152 daniel 7661: if (RAW != '"') {
1.55 daniel 7662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7663: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 7664: ctxt->wellFormed = 0;
1.123 daniel 7665: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7666: } else
1.40 daniel 7667: NEXT;
1.37 daniel 7668: } else {
1.55 daniel 7669: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7670: ctxt->sax->error(ctxt->userData,
7671: "Standalone value not found\n");
1.59 daniel 7672: ctxt->wellFormed = 0;
1.123 daniel 7673: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 7674: }
1.29 daniel 7675: }
7676: return(standalone);
7677: }
7678:
1.50 daniel 7679: /**
7680: * xmlParseXMLDecl:
7681: * @ctxt: an XML parser context
7682: *
7683: * parse an XML declaration header
1.29 daniel 7684: *
7685: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 7686: */
7687:
1.55 daniel 7688: void
7689: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7690: xmlChar *version;
1.1 veillard 7691:
7692: /*
1.19 daniel 7693: * We know that '<?xml' is here.
1.1 veillard 7694: */
1.40 daniel 7695: SKIP(5);
1.1 veillard 7696:
1.153 daniel 7697: if (!IS_BLANK(RAW)) {
1.59 daniel 7698: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7699: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 7700: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7701: ctxt->wellFormed = 0;
7702: }
1.42 daniel 7703: SKIP_BLANKS;
1.1 veillard 7704:
7705: /*
1.29 daniel 7706: * We should have the VersionInfo here.
1.1 veillard 7707: */
1.29 daniel 7708: version = xmlParseVersionInfo(ctxt);
7709: if (version == NULL)
1.45 daniel 7710: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 7711: ctxt->version = xmlStrdup(version);
1.119 daniel 7712: xmlFree(version);
1.29 daniel 7713:
7714: /*
7715: * We may have the encoding declaration
7716: */
1.153 daniel 7717: if (!IS_BLANK(RAW)) {
1.152 daniel 7718: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7719: SKIP(2);
7720: return;
7721: }
7722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7723: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 7724: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7725: ctxt->wellFormed = 0;
7726: }
1.164 daniel 7727: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 7728:
7729: /*
1.29 daniel 7730: * We may have the standalone status.
1.1 veillard 7731: */
1.164 daniel 7732: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 7733: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7734: SKIP(2);
7735: return;
7736: }
7737: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7738: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 7739: ctxt->wellFormed = 0;
1.123 daniel 7740: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7741: }
7742: SKIP_BLANKS;
1.167 daniel 7743: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 7744:
1.42 daniel 7745: SKIP_BLANKS;
1.152 daniel 7746: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 7747: SKIP(2);
1.152 daniel 7748: } else if (RAW == '>') {
1.31 daniel 7749: /* Deprecated old WD ... */
1.55 daniel 7750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7751: ctxt->sax->error(ctxt->userData,
7752: "XML declaration must end-up with '?>'\n");
1.59 daniel 7753: ctxt->wellFormed = 0;
1.123 daniel 7754: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7755: NEXT;
1.29 daniel 7756: } else {
1.55 daniel 7757: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7758: ctxt->sax->error(ctxt->userData,
7759: "parsing XML declaration: '?>' expected\n");
1.59 daniel 7760: ctxt->wellFormed = 0;
1.123 daniel 7761: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7762: MOVETO_ENDTAG(CUR_PTR);
7763: NEXT;
1.29 daniel 7764: }
1.1 veillard 7765: }
7766:
1.50 daniel 7767: /**
7768: * xmlParseMisc:
7769: * @ctxt: an XML parser context
7770: *
7771: * parse an XML Misc* optionnal field.
1.21 daniel 7772: *
1.22 daniel 7773: * [27] Misc ::= Comment | PI | S
1.1 veillard 7774: */
7775:
1.55 daniel 7776: void
7777: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 7778: while (((RAW == '<') && (NXT(1) == '?')) ||
7779: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7780: (NXT(2) == '-') && (NXT(3) == '-')) ||
7781: IS_BLANK(CUR)) {
1.152 daniel 7782: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 7783: xmlParsePI(ctxt);
1.40 daniel 7784: } else if (IS_BLANK(CUR)) {
7785: NEXT;
1.1 veillard 7786: } else
1.114 daniel 7787: xmlParseComment(ctxt);
1.1 veillard 7788: }
7789: }
7790:
1.50 daniel 7791: /**
7792: * xmlParseDocument :
7793: * @ctxt: an XML parser context
7794: *
7795: * parse an XML document (and build a tree if using the standard SAX
7796: * interface).
1.21 daniel 7797: *
1.22 daniel 7798: * [1] document ::= prolog element Misc*
1.29 daniel 7799: *
7800: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 7801: *
1.68 daniel 7802: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 7803: * as a result of the parsing.
1.1 veillard 7804: */
7805:
1.55 daniel 7806: int
7807: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 7808: xmlChar start[4];
7809: xmlCharEncoding enc;
7810:
1.45 daniel 7811: xmlDefaultSAXHandlerInit();
7812:
1.91 daniel 7813: GROW;
7814:
1.14 veillard 7815: /*
1.44 daniel 7816: * SAX: beginning of the document processing.
7817: */
1.72 daniel 7818: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 7819: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 7820:
1.156 daniel 7821: /*
7822: * Get the 4 first bytes and decode the charset
7823: * if enc != XML_CHAR_ENCODING_NONE
7824: * plug some encoding conversion routines.
7825: */
7826: start[0] = RAW;
7827: start[1] = NXT(1);
7828: start[2] = NXT(2);
7829: start[3] = NXT(3);
7830: enc = xmlDetectCharEncoding(start, 4);
7831: if (enc != XML_CHAR_ENCODING_NONE) {
7832: xmlSwitchEncoding(ctxt, enc);
7833: }
7834:
1.1 veillard 7835:
1.59 daniel 7836: if (CUR == 0) {
7837: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7838: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 7839: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7840: ctxt->wellFormed = 0;
7841: }
1.1 veillard 7842:
7843: /*
7844: * Check for the XMLDecl in the Prolog.
7845: */
1.91 daniel 7846: GROW;
1.152 daniel 7847: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7848: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 7849: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.19 daniel 7850: xmlParseXMLDecl(ctxt);
1.167 daniel 7851: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 7852: SKIP_BLANKS;
1.164 daniel 7853: if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
7854: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7855:
1.1 veillard 7856: } else {
1.72 daniel 7857: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 7858: }
1.171 daniel 7859: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 7860: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 7861:
7862: /*
7863: * The Misc part of the Prolog
7864: */
1.91 daniel 7865: GROW;
1.16 daniel 7866: xmlParseMisc(ctxt);
1.1 veillard 7867:
7868: /*
1.29 daniel 7869: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7870: * (doctypedecl Misc*)?
7871: */
1.91 daniel 7872: GROW;
1.152 daniel 7873: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7874: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7875: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7876: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7877: (NXT(8) == 'E')) {
1.165 daniel 7878:
1.166 daniel 7879: ctxt->inSubset = 1;
1.22 daniel 7880: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7881: if (RAW == '[') {
1.140 daniel 7882: ctxt->instate = XML_PARSER_DTD;
7883: xmlParseInternalSubset(ctxt);
7884: }
1.165 daniel 7885:
7886: /*
7887: * Create and update the external subset.
7888: */
1.166 daniel 7889: ctxt->inSubset = 2;
1.171 daniel 7890: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7891: (!ctxt->disableSAX))
1.165 daniel 7892: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7893: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 7894: ctxt->inSubset = 0;
1.165 daniel 7895:
7896:
1.96 daniel 7897: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7898: xmlParseMisc(ctxt);
1.21 daniel 7899: }
7900:
7901: /*
7902: * Time to start parsing the tree itself
1.1 veillard 7903: */
1.91 daniel 7904: GROW;
1.152 daniel 7905: if (RAW != '<') {
1.59 daniel 7906: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7907: ctxt->sax->error(ctxt->userData,
1.151 daniel 7908: "Start tag expected, '<' not found\n");
1.140 daniel 7909: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7910: ctxt->wellFormed = 0;
1.140 daniel 7911: ctxt->instate = XML_PARSER_EOF;
7912: } else {
7913: ctxt->instate = XML_PARSER_CONTENT;
7914: xmlParseElement(ctxt);
7915: ctxt->instate = XML_PARSER_EPILOG;
7916:
7917:
7918: /*
7919: * The Misc part at the end
7920: */
7921: xmlParseMisc(ctxt);
7922:
1.152 daniel 7923: if (RAW != 0) {
1.140 daniel 7924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7925: ctxt->sax->error(ctxt->userData,
7926: "Extra content at the end of the document\n");
7927: ctxt->wellFormed = 0;
7928: ctxt->errNo = XML_ERR_DOCUMENT_END;
7929: }
7930: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7931: }
7932:
1.44 daniel 7933: /*
7934: * SAX: end of the document processing.
7935: */
1.171 daniel 7936: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7937: (!ctxt->disableSAX))
1.74 daniel 7938: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7939:
7940: /*
7941: * Grab the encoding if it was added on-the-fly
7942: */
7943: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
7944: (ctxt->myDoc->encoding == NULL)) {
7945: ctxt->myDoc->encoding = ctxt->encoding;
7946: ctxt->encoding = NULL;
7947: }
1.59 daniel 7948: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7949: return(0);
7950: }
7951:
1.98 daniel 7952: /************************************************************************
7953: * *
1.128 daniel 7954: * Progressive parsing interfaces *
7955: * *
7956: ************************************************************************/
7957:
7958: /**
7959: * xmlParseLookupSequence:
7960: * @ctxt: an XML parser context
7961: * @first: the first char to lookup
1.140 daniel 7962: * @next: the next char to lookup or zero
7963: * @third: the next char to lookup or zero
1.128 daniel 7964: *
1.140 daniel 7965: * Try to find if a sequence (first, next, third) or just (first next) or
7966: * (first) is available in the input stream.
7967: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7968: * to avoid rescanning sequences of bytes, it DOES change the state of the
7969: * parser, do not use liberally.
1.128 daniel 7970: *
1.140 daniel 7971: * Returns the index to the current parsing point if the full sequence
7972: * is available, -1 otherwise.
1.128 daniel 7973: */
7974: int
1.140 daniel 7975: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7976: xmlChar next, xmlChar third) {
7977: int base, len;
7978: xmlParserInputPtr in;
7979: const xmlChar *buf;
7980:
7981: in = ctxt->input;
7982: if (in == NULL) return(-1);
7983: base = in->cur - in->base;
7984: if (base < 0) return(-1);
7985: if (ctxt->checkIndex > base)
7986: base = ctxt->checkIndex;
7987: if (in->buf == NULL) {
7988: buf = in->base;
7989: len = in->length;
7990: } else {
7991: buf = in->buf->buffer->content;
7992: len = in->buf->buffer->use;
7993: }
7994: /* take into account the sequence length */
7995: if (third) len -= 2;
7996: else if (next) len --;
7997: for (;base < len;base++) {
7998: if (buf[base] == first) {
7999: if (third != 0) {
8000: if ((buf[base + 1] != next) ||
8001: (buf[base + 2] != third)) continue;
8002: } else if (next != 0) {
8003: if (buf[base + 1] != next) continue;
8004: }
8005: ctxt->checkIndex = 0;
8006: #ifdef DEBUG_PUSH
8007: if (next == 0)
8008: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8009: first, base);
8010: else if (third == 0)
8011: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8012: first, next, base);
8013: else
8014: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8015: first, next, third, base);
8016: #endif
8017: return(base - (in->cur - in->base));
8018: }
8019: }
8020: ctxt->checkIndex = base;
8021: #ifdef DEBUG_PUSH
8022: if (next == 0)
8023: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8024: else if (third == 0)
8025: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8026: else
8027: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8028: #endif
8029: return(-1);
1.128 daniel 8030: }
8031:
8032: /**
1.143 daniel 8033: * xmlParseTryOrFinish:
1.128 daniel 8034: * @ctxt: an XML parser context
1.143 daniel 8035: * @terminate: last chunk indicator
1.128 daniel 8036: *
8037: * Try to progress on parsing
8038: *
8039: * Returns zero if no parsing was possible
8040: */
8041: int
1.143 daniel 8042: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8043: int ret = 0;
1.140 daniel 8044: xmlParserInputPtr in;
8045: int avail;
8046: xmlChar cur, next;
8047:
8048: #ifdef DEBUG_PUSH
8049: switch (ctxt->instate) {
8050: case XML_PARSER_EOF:
8051: fprintf(stderr, "PP: try EOF\n"); break;
8052: case XML_PARSER_START:
8053: fprintf(stderr, "PP: try START\n"); break;
8054: case XML_PARSER_MISC:
8055: fprintf(stderr, "PP: try MISC\n");break;
8056: case XML_PARSER_COMMENT:
8057: fprintf(stderr, "PP: try COMMENT\n");break;
8058: case XML_PARSER_PROLOG:
8059: fprintf(stderr, "PP: try PROLOG\n");break;
8060: case XML_PARSER_START_TAG:
8061: fprintf(stderr, "PP: try START_TAG\n");break;
8062: case XML_PARSER_CONTENT:
8063: fprintf(stderr, "PP: try CONTENT\n");break;
8064: case XML_PARSER_CDATA_SECTION:
8065: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8066: case XML_PARSER_END_TAG:
8067: fprintf(stderr, "PP: try END_TAG\n");break;
8068: case XML_PARSER_ENTITY_DECL:
8069: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8070: case XML_PARSER_ENTITY_VALUE:
8071: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8072: case XML_PARSER_ATTRIBUTE_VALUE:
8073: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8074: case XML_PARSER_DTD:
8075: fprintf(stderr, "PP: try DTD\n");break;
8076: case XML_PARSER_EPILOG:
8077: fprintf(stderr, "PP: try EPILOG\n");break;
8078: case XML_PARSER_PI:
8079: fprintf(stderr, "PP: try PI\n");break;
8080: }
8081: #endif
1.128 daniel 8082:
8083: while (1) {
1.140 daniel 8084: /*
8085: * Pop-up of finished entities.
8086: */
1.152 daniel 8087: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8088: xmlPopInput(ctxt);
8089:
8090: in = ctxt->input;
8091: if (in == NULL) break;
8092: if (in->buf == NULL)
8093: avail = in->length - (in->cur - in->base);
8094: else
8095: avail = in->buf->buffer->use - (in->cur - in->base);
8096: if (avail < 1)
8097: goto done;
1.128 daniel 8098: switch (ctxt->instate) {
8099: case XML_PARSER_EOF:
1.140 daniel 8100: /*
8101: * Document parsing is done !
8102: */
8103: goto done;
8104: case XML_PARSER_START:
8105: /*
8106: * Very first chars read from the document flow.
8107: */
8108: cur = in->cur[0];
8109: if (IS_BLANK(cur)) {
8110: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8111: ctxt->sax->setDocumentLocator(ctxt->userData,
8112: &xmlDefaultSAXLocator);
8113: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8114: ctxt->sax->error(ctxt->userData,
8115: "Extra spaces at the beginning of the document are not allowed\n");
8116: ctxt->errNo = XML_ERR_DOCUMENT_START;
8117: ctxt->wellFormed = 0;
8118: SKIP_BLANKS;
8119: ret++;
8120: if (in->buf == NULL)
8121: avail = in->length - (in->cur - in->base);
8122: else
8123: avail = in->buf->buffer->use - (in->cur - in->base);
8124: }
8125: if (avail < 2)
8126: goto done;
8127:
8128: cur = in->cur[0];
8129: next = in->cur[1];
8130: if (cur == 0) {
8131: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8132: ctxt->sax->setDocumentLocator(ctxt->userData,
8133: &xmlDefaultSAXLocator);
8134: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8135: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8136: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8137: ctxt->wellFormed = 0;
8138: ctxt->instate = XML_PARSER_EOF;
8139: #ifdef DEBUG_PUSH
8140: fprintf(stderr, "PP: entering EOF\n");
8141: #endif
8142: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8143: ctxt->sax->endDocument(ctxt->userData);
8144: goto done;
8145: }
8146: if ((cur == '<') && (next == '?')) {
8147: /* PI or XML decl */
8148: if (avail < 5) return(ret);
1.143 daniel 8149: if ((!terminate) &&
8150: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8151: return(ret);
8152: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8153: ctxt->sax->setDocumentLocator(ctxt->userData,
8154: &xmlDefaultSAXLocator);
8155: if ((in->cur[2] == 'x') &&
8156: (in->cur[3] == 'm') &&
1.142 daniel 8157: (in->cur[4] == 'l') &&
8158: (IS_BLANK(in->cur[5]))) {
1.140 daniel 8159: ret += 5;
8160: #ifdef DEBUG_PUSH
8161: fprintf(stderr, "PP: Parsing XML Decl\n");
8162: #endif
8163: xmlParseXMLDecl(ctxt);
1.167 daniel 8164: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8165: if ((ctxt->encoding == NULL) &&
8166: (ctxt->input->encoding != NULL))
8167: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 8168: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8169: (!ctxt->disableSAX))
1.140 daniel 8170: ctxt->sax->startDocument(ctxt->userData);
8171: ctxt->instate = XML_PARSER_MISC;
8172: #ifdef DEBUG_PUSH
8173: fprintf(stderr, "PP: entering MISC\n");
8174: #endif
8175: } else {
8176: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8177: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8178: (!ctxt->disableSAX))
1.140 daniel 8179: ctxt->sax->startDocument(ctxt->userData);
8180: ctxt->instate = XML_PARSER_MISC;
8181: #ifdef DEBUG_PUSH
8182: fprintf(stderr, "PP: entering MISC\n");
8183: #endif
8184: }
8185: } else {
8186: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8187: ctxt->sax->setDocumentLocator(ctxt->userData,
8188: &xmlDefaultSAXLocator);
8189: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8190: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8191: (!ctxt->disableSAX))
1.140 daniel 8192: ctxt->sax->startDocument(ctxt->userData);
8193: ctxt->instate = XML_PARSER_MISC;
8194: #ifdef DEBUG_PUSH
8195: fprintf(stderr, "PP: entering MISC\n");
8196: #endif
8197: }
8198: break;
8199: case XML_PARSER_MISC:
8200: SKIP_BLANKS;
8201: if (in->buf == NULL)
8202: avail = in->length - (in->cur - in->base);
8203: else
8204: avail = in->buf->buffer->use - (in->cur - in->base);
8205: if (avail < 2)
8206: goto done;
8207: cur = in->cur[0];
8208: next = in->cur[1];
8209: if ((cur == '<') && (next == '?')) {
1.143 daniel 8210: if ((!terminate) &&
8211: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8212: goto done;
8213: #ifdef DEBUG_PUSH
8214: fprintf(stderr, "PP: Parsing PI\n");
8215: #endif
8216: xmlParsePI(ctxt);
8217: } else if ((cur == '<') && (next == '!') &&
8218: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8219: if ((!terminate) &&
8220: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8221: goto done;
8222: #ifdef DEBUG_PUSH
8223: fprintf(stderr, "PP: Parsing Comment\n");
8224: #endif
8225: xmlParseComment(ctxt);
8226: ctxt->instate = XML_PARSER_MISC;
8227: } else if ((cur == '<') && (next == '!') &&
8228: (in->cur[2] == 'D') && (in->cur[3] == 'O') &&
8229: (in->cur[4] == 'C') && (in->cur[5] == 'T') &&
8230: (in->cur[6] == 'Y') && (in->cur[7] == 'P') &&
8231: (in->cur[8] == 'E')) {
1.143 daniel 8232: if ((!terminate) &&
8233: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8234: goto done;
8235: #ifdef DEBUG_PUSH
8236: fprintf(stderr, "PP: Parsing internal subset\n");
8237: #endif
1.166 daniel 8238: ctxt->inSubset = 1;
1.140 daniel 8239: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8240: if (RAW == '[') {
1.140 daniel 8241: ctxt->instate = XML_PARSER_DTD;
8242: #ifdef DEBUG_PUSH
8243: fprintf(stderr, "PP: entering DTD\n");
8244: #endif
8245: } else {
1.166 daniel 8246: /*
8247: * Create and update the external subset.
8248: */
8249: ctxt->inSubset = 2;
1.171 daniel 8250: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8251: (ctxt->sax->externalSubset != NULL))
8252: ctxt->sax->externalSubset(ctxt->userData,
8253: ctxt->intSubName, ctxt->extSubSystem,
8254: ctxt->extSubURI);
8255: ctxt->inSubset = 0;
1.140 daniel 8256: ctxt->instate = XML_PARSER_PROLOG;
8257: #ifdef DEBUG_PUSH
8258: fprintf(stderr, "PP: entering PROLOG\n");
8259: #endif
8260: }
8261: } else if ((cur == '<') && (next == '!') &&
8262: (avail < 9)) {
8263: goto done;
8264: } else {
8265: ctxt->instate = XML_PARSER_START_TAG;
8266: #ifdef DEBUG_PUSH
8267: fprintf(stderr, "PP: entering START_TAG\n");
8268: #endif
8269: }
8270: break;
1.128 daniel 8271: case XML_PARSER_PROLOG:
1.140 daniel 8272: SKIP_BLANKS;
8273: if (in->buf == NULL)
8274: avail = in->length - (in->cur - in->base);
8275: else
8276: avail = in->buf->buffer->use - (in->cur - in->base);
8277: if (avail < 2)
8278: goto done;
8279: cur = in->cur[0];
8280: next = in->cur[1];
8281: if ((cur == '<') && (next == '?')) {
1.143 daniel 8282: if ((!terminate) &&
8283: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8284: goto done;
8285: #ifdef DEBUG_PUSH
8286: fprintf(stderr, "PP: Parsing PI\n");
8287: #endif
8288: xmlParsePI(ctxt);
8289: } else if ((cur == '<') && (next == '!') &&
8290: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8291: if ((!terminate) &&
8292: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8293: goto done;
8294: #ifdef DEBUG_PUSH
8295: fprintf(stderr, "PP: Parsing Comment\n");
8296: #endif
8297: xmlParseComment(ctxt);
8298: ctxt->instate = XML_PARSER_PROLOG;
8299: } else if ((cur == '<') && (next == '!') &&
8300: (avail < 4)) {
8301: goto done;
8302: } else {
8303: ctxt->instate = XML_PARSER_START_TAG;
8304: #ifdef DEBUG_PUSH
8305: fprintf(stderr, "PP: entering START_TAG\n");
8306: #endif
8307: }
8308: break;
8309: case XML_PARSER_EPILOG:
8310: SKIP_BLANKS;
8311: if (in->buf == NULL)
8312: avail = in->length - (in->cur - in->base);
8313: else
8314: avail = in->buf->buffer->use - (in->cur - in->base);
8315: if (avail < 2)
8316: goto done;
8317: cur = in->cur[0];
8318: next = in->cur[1];
8319: if ((cur == '<') && (next == '?')) {
1.143 daniel 8320: if ((!terminate) &&
8321: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8322: goto done;
8323: #ifdef DEBUG_PUSH
8324: fprintf(stderr, "PP: Parsing PI\n");
8325: #endif
8326: xmlParsePI(ctxt);
8327: ctxt->instate = XML_PARSER_EPILOG;
8328: } else if ((cur == '<') && (next == '!') &&
8329: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8330: if ((!terminate) &&
8331: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8332: goto done;
8333: #ifdef DEBUG_PUSH
8334: fprintf(stderr, "PP: Parsing Comment\n");
8335: #endif
8336: xmlParseComment(ctxt);
8337: ctxt->instate = XML_PARSER_EPILOG;
8338: } else if ((cur == '<') && (next == '!') &&
8339: (avail < 4)) {
8340: goto done;
8341: } else {
8342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8343: ctxt->sax->error(ctxt->userData,
8344: "Extra content at the end of the document\n");
8345: ctxt->wellFormed = 0;
8346: ctxt->errNo = XML_ERR_DOCUMENT_END;
8347: ctxt->instate = XML_PARSER_EOF;
8348: #ifdef DEBUG_PUSH
8349: fprintf(stderr, "PP: entering EOF\n");
8350: #endif
1.171 daniel 8351: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8352: (!ctxt->disableSAX))
1.140 daniel 8353: ctxt->sax->endDocument(ctxt->userData);
8354: goto done;
8355: }
8356: break;
8357: case XML_PARSER_START_TAG: {
8358: xmlChar *name, *oldname;
8359:
8360: if (avail < 2)
8361: goto done;
8362: cur = in->cur[0];
8363: if (cur != '<') {
8364: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8365: ctxt->sax->error(ctxt->userData,
8366: "Start tag expect, '<' not found\n");
8367: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8368: ctxt->wellFormed = 0;
8369: ctxt->instate = XML_PARSER_EOF;
8370: #ifdef DEBUG_PUSH
8371: fprintf(stderr, "PP: entering EOF\n");
8372: #endif
1.171 daniel 8373: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8374: (!ctxt->disableSAX))
1.140 daniel 8375: ctxt->sax->endDocument(ctxt->userData);
8376: goto done;
8377: }
1.143 daniel 8378: if ((!terminate) &&
8379: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8380: goto done;
8381: name = xmlParseStartTag(ctxt);
8382: if (name == NULL) {
8383: ctxt->instate = XML_PARSER_EOF;
8384: #ifdef DEBUG_PUSH
8385: fprintf(stderr, "PP: entering EOF\n");
8386: #endif
1.171 daniel 8387: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8388: (!ctxt->disableSAX))
1.140 daniel 8389: ctxt->sax->endDocument(ctxt->userData);
8390: goto done;
8391: }
8392: namePush(ctxt, xmlStrdup(name));
8393:
8394: /*
8395: * [ VC: Root Element Type ]
8396: * The Name in the document type declaration must match
8397: * the element type of the root element.
8398: */
8399: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 8400: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 8401: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8402:
8403: /*
8404: * Check for an Empty Element.
8405: */
1.152 daniel 8406: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 8407: SKIP(2);
1.171 daniel 8408: if ((ctxt->sax != NULL) &&
8409: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 8410: ctxt->sax->endElement(ctxt->userData, name);
8411: xmlFree(name);
8412: oldname = namePop(ctxt);
8413: if (oldname != NULL) {
8414: #ifdef DEBUG_STACK
8415: fprintf(stderr,"Close: popped %s\n", oldname);
8416: #endif
8417: xmlFree(oldname);
8418: }
8419: if (ctxt->name == NULL) {
8420: ctxt->instate = XML_PARSER_EPILOG;
8421: #ifdef DEBUG_PUSH
8422: fprintf(stderr, "PP: entering EPILOG\n");
8423: #endif
8424: } else {
8425: ctxt->instate = XML_PARSER_CONTENT;
8426: #ifdef DEBUG_PUSH
8427: fprintf(stderr, "PP: entering CONTENT\n");
8428: #endif
8429: }
8430: break;
8431: }
1.152 daniel 8432: if (RAW == '>') {
1.140 daniel 8433: NEXT;
8434: } else {
8435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8436: ctxt->sax->error(ctxt->userData,
8437: "Couldn't find end of Start Tag %s\n",
8438: name);
8439: ctxt->wellFormed = 0;
8440: ctxt->errNo = XML_ERR_GT_REQUIRED;
8441:
8442: /*
8443: * end of parsing of this node.
8444: */
8445: nodePop(ctxt);
8446: oldname = namePop(ctxt);
8447: if (oldname != NULL) {
8448: #ifdef DEBUG_STACK
8449: fprintf(stderr,"Close: popped %s\n", oldname);
8450: #endif
8451: xmlFree(oldname);
8452: }
8453: }
8454: xmlFree(name);
8455: ctxt->instate = XML_PARSER_CONTENT;
8456: #ifdef DEBUG_PUSH
8457: fprintf(stderr, "PP: entering CONTENT\n");
8458: #endif
8459: break;
8460: }
1.128 daniel 8461: case XML_PARSER_CONTENT:
1.140 daniel 8462: /*
8463: * Handle preparsed entities and charRef
8464: */
8465: if (ctxt->token != 0) {
8466: xmlChar cur[2] = { 0 , 0 } ;
8467:
8468: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 8469: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8470: (ctxt->sax->characters != NULL))
1.140 daniel 8471: ctxt->sax->characters(ctxt->userData, cur, 1);
8472: ctxt->token = 0;
8473: }
8474: if (avail < 2)
8475: goto done;
8476: cur = in->cur[0];
8477: next = in->cur[1];
8478: if ((cur == '<') && (next == '?')) {
1.143 daniel 8479: if ((!terminate) &&
8480: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8481: goto done;
8482: #ifdef DEBUG_PUSH
8483: fprintf(stderr, "PP: Parsing PI\n");
8484: #endif
8485: xmlParsePI(ctxt);
8486: } else if ((cur == '<') && (next == '!') &&
8487: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8488: if ((!terminate) &&
8489: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8490: goto done;
8491: #ifdef DEBUG_PUSH
8492: fprintf(stderr, "PP: Parsing Comment\n");
8493: #endif
8494: xmlParseComment(ctxt);
8495: ctxt->instate = XML_PARSER_CONTENT;
8496: } else if ((cur == '<') && (in->cur[1] == '!') &&
8497: (in->cur[2] == '[') && (NXT(3) == 'C') &&
8498: (in->cur[4] == 'D') && (NXT(5) == 'A') &&
8499: (in->cur[6] == 'T') && (NXT(7) == 'A') &&
8500: (in->cur[8] == '[')) {
8501: SKIP(9);
8502: ctxt->instate = XML_PARSER_CDATA_SECTION;
8503: #ifdef DEBUG_PUSH
8504: fprintf(stderr, "PP: entering CDATA_SECTION\n");
8505: #endif
8506: break;
8507: } else if ((cur == '<') && (next == '!') &&
8508: (avail < 9)) {
8509: goto done;
8510: } else if ((cur == '<') && (next == '/')) {
8511: ctxt->instate = XML_PARSER_END_TAG;
8512: #ifdef DEBUG_PUSH
8513: fprintf(stderr, "PP: entering END_TAG\n");
8514: #endif
8515: break;
8516: } else if (cur == '<') {
8517: ctxt->instate = XML_PARSER_START_TAG;
8518: #ifdef DEBUG_PUSH
8519: fprintf(stderr, "PP: entering START_TAG\n");
8520: #endif
8521: break;
8522: } else if (cur == '&') {
1.143 daniel 8523: if ((!terminate) &&
8524: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 8525: goto done;
8526: #ifdef DEBUG_PUSH
8527: fprintf(stderr, "PP: Parsing Reference\n");
8528: #endif
8529: /* TODO: check generation of subtrees if noent !!! */
8530: xmlParseReference(ctxt);
8531: } else {
1.156 daniel 8532: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 8533: /*
8534: * Goal of the following test is :
8535: * - minimize calls to the SAX 'character' callback
8536: * when they are mergeable
8537: * - handle an problem for isBlank when we only parse
8538: * a sequence of blank chars and the next one is
8539: * not available to check against '<' presence.
8540: * - tries to homogenize the differences in SAX
8541: * callbacks beween the push and pull versions
8542: * of the parser.
8543: */
8544: if ((ctxt->inputNr == 1) &&
8545: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 8546: if ((!terminate) &&
8547: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 8548: goto done;
8549: }
8550: ctxt->checkIndex = 0;
8551: #ifdef DEBUG_PUSH
8552: fprintf(stderr, "PP: Parsing char data\n");
8553: #endif
8554: xmlParseCharData(ctxt, 0);
8555: }
8556: /*
8557: * Pop-up of finished entities.
8558: */
1.152 daniel 8559: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8560: xmlPopInput(ctxt);
8561: break;
8562: case XML_PARSER_CDATA_SECTION: {
8563: /*
8564: * The Push mode need to have the SAX callback for
8565: * cdataBlock merge back contiguous callbacks.
8566: */
8567: int base;
8568:
8569: in = ctxt->input;
8570: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8571: if (base < 0) {
8572: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 8573: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 8574: if (ctxt->sax->cdataBlock != NULL)
8575: ctxt->sax->cdataBlock(ctxt->userData, in->cur,
8576: XML_PARSER_BIG_BUFFER_SIZE);
8577: }
8578: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8579: ctxt->checkIndex = 0;
8580: }
8581: goto done;
8582: } else {
1.171 daniel 8583: if ((ctxt->sax != NULL) && (base > 0) &&
8584: (!ctxt->disableSAX)) {
1.140 daniel 8585: if (ctxt->sax->cdataBlock != NULL)
8586: ctxt->sax->cdataBlock(ctxt->userData,
8587: in->cur, base);
8588: }
8589: SKIP(base + 3);
8590: ctxt->checkIndex = 0;
8591: ctxt->instate = XML_PARSER_CONTENT;
8592: #ifdef DEBUG_PUSH
8593: fprintf(stderr, "PP: entering CONTENT\n");
8594: #endif
8595: }
8596: break;
8597: }
1.141 daniel 8598: case XML_PARSER_END_TAG:
1.140 daniel 8599: if (avail < 2)
8600: goto done;
1.143 daniel 8601: if ((!terminate) &&
8602: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8603: goto done;
8604: xmlParseEndTag(ctxt);
8605: if (ctxt->name == NULL) {
8606: ctxt->instate = XML_PARSER_EPILOG;
8607: #ifdef DEBUG_PUSH
8608: fprintf(stderr, "PP: entering EPILOG\n");
8609: #endif
8610: } else {
8611: ctxt->instate = XML_PARSER_CONTENT;
8612: #ifdef DEBUG_PUSH
8613: fprintf(stderr, "PP: entering CONTENT\n");
8614: #endif
8615: }
8616: break;
8617: case XML_PARSER_DTD: {
8618: /*
8619: * Sorry but progressive parsing of the internal subset
8620: * is not expected to be supported. We first check that
8621: * the full content of the internal subset is available and
8622: * the parsing is launched only at that point.
8623: * Internal subset ends up with "']' S? '>'" in an unescaped
8624: * section and not in a ']]>' sequence which are conditional
8625: * sections (whoever argued to keep that crap in XML deserve
8626: * a place in hell !).
8627: */
8628: int base, i;
8629: xmlChar *buf;
8630: xmlChar quote = 0;
8631:
8632: base = in->cur - in->base;
8633: if (base < 0) return(0);
8634: if (ctxt->checkIndex > base)
8635: base = ctxt->checkIndex;
8636: buf = in->buf->buffer->content;
8637: for (;base < in->buf->buffer->use;base++) {
8638: if (quote != 0) {
8639: if (buf[base] == quote)
8640: quote = 0;
8641: continue;
8642: }
8643: if (buf[base] == '"') {
8644: quote = '"';
8645: continue;
8646: }
8647: if (buf[base] == '\'') {
8648: quote = '\'';
8649: continue;
8650: }
8651: if (buf[base] == ']') {
8652: if (base +1 >= in->buf->buffer->use)
8653: break;
8654: if (buf[base + 1] == ']') {
8655: /* conditional crap, skip both ']' ! */
8656: base++;
8657: continue;
8658: }
8659: for (i = 0;base + i < in->buf->buffer->use;i++) {
8660: if (buf[base + i] == '>')
8661: goto found_end_int_subset;
8662: }
8663: break;
8664: }
8665: }
8666: /*
8667: * We didn't found the end of the Internal subset
8668: */
8669: if (quote == 0)
8670: ctxt->checkIndex = base;
8671: #ifdef DEBUG_PUSH
8672: if (next == 0)
8673: fprintf(stderr, "PP: lookup of int subset end filed\n");
8674: #endif
8675: goto done;
8676:
8677: found_end_int_subset:
8678: xmlParseInternalSubset(ctxt);
1.166 daniel 8679: ctxt->inSubset = 2;
1.171 daniel 8680: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8681: (ctxt->sax->externalSubset != NULL))
8682: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8683: ctxt->extSubSystem, ctxt->extSubURI);
8684: ctxt->inSubset = 0;
1.140 daniel 8685: ctxt->instate = XML_PARSER_PROLOG;
8686: ctxt->checkIndex = 0;
8687: #ifdef DEBUG_PUSH
8688: fprintf(stderr, "PP: entering PROLOG\n");
8689: #endif
8690: break;
8691: }
8692: case XML_PARSER_COMMENT:
8693: fprintf(stderr, "PP: internal error, state == COMMENT\n");
8694: ctxt->instate = XML_PARSER_CONTENT;
8695: #ifdef DEBUG_PUSH
8696: fprintf(stderr, "PP: entering CONTENT\n");
8697: #endif
8698: break;
8699: case XML_PARSER_PI:
8700: fprintf(stderr, "PP: internal error, state == PI\n");
8701: ctxt->instate = XML_PARSER_CONTENT;
8702: #ifdef DEBUG_PUSH
8703: fprintf(stderr, "PP: entering CONTENT\n");
8704: #endif
8705: break;
1.128 daniel 8706: case XML_PARSER_ENTITY_DECL:
1.140 daniel 8707: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
8708: ctxt->instate = XML_PARSER_DTD;
8709: #ifdef DEBUG_PUSH
8710: fprintf(stderr, "PP: entering DTD\n");
8711: #endif
8712: break;
1.128 daniel 8713: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 8714: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
8715: ctxt->instate = XML_PARSER_CONTENT;
8716: #ifdef DEBUG_PUSH
8717: fprintf(stderr, "PP: entering DTD\n");
8718: #endif
8719: break;
1.128 daniel 8720: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 8721: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 8722: ctxt->instate = XML_PARSER_START_TAG;
8723: #ifdef DEBUG_PUSH
8724: fprintf(stderr, "PP: entering START_TAG\n");
8725: #endif
8726: break;
8727: case XML_PARSER_SYSTEM_LITERAL:
8728: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 8729: ctxt->instate = XML_PARSER_START_TAG;
8730: #ifdef DEBUG_PUSH
8731: fprintf(stderr, "PP: entering START_TAG\n");
8732: #endif
8733: break;
1.128 daniel 8734: }
8735: }
1.140 daniel 8736: done:
8737: #ifdef DEBUG_PUSH
8738: fprintf(stderr, "PP: done %d\n", ret);
8739: #endif
1.128 daniel 8740: return(ret);
8741: }
8742:
8743: /**
1.143 daniel 8744: * xmlParseTry:
8745: * @ctxt: an XML parser context
8746: *
8747: * Try to progress on parsing
8748: *
8749: * Returns zero if no parsing was possible
8750: */
8751: int
8752: xmlParseTry(xmlParserCtxtPtr ctxt) {
8753: return(xmlParseTryOrFinish(ctxt, 0));
8754: }
8755:
8756: /**
1.128 daniel 8757: * xmlParseChunk:
8758: * @ctxt: an XML parser context
8759: * @chunk: an char array
8760: * @size: the size in byte of the chunk
8761: * @terminate: last chunk indicator
8762: *
8763: * Parse a Chunk of memory
8764: *
8765: * Returns zero if no error, the xmlParserErrors otherwise.
8766: */
1.140 daniel 8767: int
1.128 daniel 8768: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8769: int terminate) {
1.132 daniel 8770: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8771: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8772: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8773: int cur = ctxt->input->cur - ctxt->input->base;
8774:
1.132 daniel 8775: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8776: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8777: ctxt->input->cur = ctxt->input->base + cur;
8778: #ifdef DEBUG_PUSH
8779: fprintf(stderr, "PP: pushed %d\n", size);
8780: #endif
8781:
1.150 daniel 8782: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8783: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8784: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 8785: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8786: if (terminate) {
1.151 daniel 8787: /*
8788: * Grab the encoding if it was added on-the-fly
8789: */
8790: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8791: (ctxt->myDoc->encoding == NULL)) {
8792: ctxt->myDoc->encoding = ctxt->encoding;
8793: ctxt->encoding = NULL;
8794: }
8795:
8796: /*
8797: * Check for termination
8798: */
1.140 daniel 8799: if ((ctxt->instate != XML_PARSER_EOF) &&
8800: (ctxt->instate != XML_PARSER_EPILOG)) {
8801: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8802: ctxt->sax->error(ctxt->userData,
8803: "Extra content at the end of the document\n");
8804: ctxt->wellFormed = 0;
8805: ctxt->errNo = XML_ERR_DOCUMENT_END;
8806: }
8807: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 8808: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8809: (!ctxt->disableSAX))
1.140 daniel 8810: ctxt->sax->endDocument(ctxt->userData);
8811: }
8812: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8813: }
8814: return((xmlParserErrors) ctxt->errNo);
8815: }
8816:
8817: /************************************************************************
8818: * *
1.98 daniel 8819: * I/O front end functions to the parser *
8820: * *
8821: ************************************************************************/
8822:
1.50 daniel 8823: /**
1.140 daniel 8824: * xmlCreatePushParserCtxt :
8825: * @sax: a SAX handler
8826: * @user_data: The user data returned on SAX callbacks
8827: * @chunk: a pointer to an array of chars
8828: * @size: number of chars in the array
8829: * @filename: an optional file name or URI
8830: *
8831: * Create a parser context for using the XML parser in push mode
8832: * To allow content encoding detection, @size should be >= 4
8833: * The value of @filename is used for fetching external entities
8834: * and error/warning reports.
8835: *
8836: * Returns the new parser context or NULL
8837: */
8838: xmlParserCtxtPtr
8839: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8840: const char *chunk, int size, const char *filename) {
8841: xmlParserCtxtPtr ctxt;
8842: xmlParserInputPtr inputStream;
8843: xmlParserInputBufferPtr buf;
8844: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8845:
8846: /*
1.156 daniel 8847: * plug some encoding conversion routines
1.140 daniel 8848: */
8849: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8850: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8851:
8852: buf = xmlAllocParserInputBuffer(enc);
8853: if (buf == NULL) return(NULL);
8854:
8855: ctxt = xmlNewParserCtxt();
8856: if (ctxt == NULL) {
8857: xmlFree(buf);
8858: return(NULL);
8859: }
8860: if (sax != NULL) {
8861: if (ctxt->sax != &xmlDefaultSAXHandler)
8862: xmlFree(ctxt->sax);
8863: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8864: if (ctxt->sax == NULL) {
8865: xmlFree(buf);
8866: xmlFree(ctxt);
8867: return(NULL);
8868: }
8869: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8870: if (user_data != NULL)
8871: ctxt->userData = user_data;
8872: }
8873: if (filename == NULL) {
8874: ctxt->directory = NULL;
8875: } else {
8876: ctxt->directory = xmlParserGetDirectory(filename);
8877: }
8878:
8879: inputStream = xmlNewInputStream(ctxt);
8880: if (inputStream == NULL) {
8881: xmlFreeParserCtxt(ctxt);
8882: return(NULL);
8883: }
8884:
8885: if (filename == NULL)
8886: inputStream->filename = NULL;
8887: else
8888: inputStream->filename = xmlMemStrdup(filename);
8889: inputStream->buf = buf;
8890: inputStream->base = inputStream->buf->buffer->content;
8891: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8892: if (enc != XML_CHAR_ENCODING_NONE) {
8893: xmlSwitchEncoding(ctxt, enc);
8894: }
1.140 daniel 8895:
8896: inputPush(ctxt, inputStream);
8897:
8898: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8899: (ctxt->input->buf != NULL)) {
8900: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8901: #ifdef DEBUG_PUSH
8902: fprintf(stderr, "PP: pushed %d\n", size);
8903: #endif
8904: }
8905:
8906: return(ctxt);
8907: }
8908:
8909: /**
1.86 daniel 8910: * xmlCreateDocParserCtxt :
1.123 daniel 8911: * @cur: a pointer to an array of xmlChar
1.50 daniel 8912: *
1.69 daniel 8913: * Create a parser context for an XML in-memory document.
8914: *
8915: * Returns the new parser context or NULL
1.16 daniel 8916: */
1.69 daniel 8917: xmlParserCtxtPtr
1.123 daniel 8918: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 8919: xmlParserCtxtPtr ctxt;
1.40 daniel 8920: xmlParserInputPtr input;
1.16 daniel 8921:
1.97 daniel 8922: ctxt = xmlNewParserCtxt();
1.16 daniel 8923: if (ctxt == NULL) {
8924: return(NULL);
8925: }
1.96 daniel 8926: input = xmlNewInputStream(ctxt);
1.40 daniel 8927: if (input == NULL) {
1.97 daniel 8928: xmlFreeParserCtxt(ctxt);
1.40 daniel 8929: return(NULL);
8930: }
8931:
8932: input->base = cur;
8933: input->cur = cur;
8934:
8935: inputPush(ctxt, input);
1.69 daniel 8936: return(ctxt);
8937: }
8938:
8939: /**
8940: * xmlSAXParseDoc :
8941: * @sax: the SAX handler block
1.123 daniel 8942: * @cur: a pointer to an array of xmlChar
1.69 daniel 8943: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
8944: * documents
8945: *
8946: * parse an XML in-memory document and build a tree.
8947: * It use the given SAX function block to handle the parsing callback.
8948: * If sax is NULL, fallback to the default DOM tree building routines.
8949: *
8950: * Returns the resulting document tree
8951: */
8952:
8953: xmlDocPtr
1.123 daniel 8954: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 8955: xmlDocPtr ret;
8956: xmlParserCtxtPtr ctxt;
8957:
8958: if (cur == NULL) return(NULL);
1.16 daniel 8959:
8960:
1.69 daniel 8961: ctxt = xmlCreateDocParserCtxt(cur);
8962: if (ctxt == NULL) return(NULL);
1.74 daniel 8963: if (sax != NULL) {
8964: ctxt->sax = sax;
8965: ctxt->userData = NULL;
8966: }
1.69 daniel 8967:
1.16 daniel 8968: xmlParseDocument(ctxt);
1.72 daniel 8969: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 8970: else {
8971: ret = NULL;
1.72 daniel 8972: xmlFreeDoc(ctxt->myDoc);
8973: ctxt->myDoc = NULL;
1.59 daniel 8974: }
1.86 daniel 8975: if (sax != NULL)
8976: ctxt->sax = NULL;
1.69 daniel 8977: xmlFreeParserCtxt(ctxt);
1.16 daniel 8978:
1.1 veillard 8979: return(ret);
8980: }
8981:
1.50 daniel 8982: /**
1.55 daniel 8983: * xmlParseDoc :
1.123 daniel 8984: * @cur: a pointer to an array of xmlChar
1.55 daniel 8985: *
8986: * parse an XML in-memory document and build a tree.
8987: *
1.68 daniel 8988: * Returns the resulting document tree
1.55 daniel 8989: */
8990:
1.69 daniel 8991: xmlDocPtr
1.123 daniel 8992: xmlParseDoc(xmlChar *cur) {
1.59 daniel 8993: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 8994: }
8995:
8996: /**
8997: * xmlSAXParseDTD :
8998: * @sax: the SAX handler block
8999: * @ExternalID: a NAME* containing the External ID of the DTD
9000: * @SystemID: a NAME* containing the URL to the DTD
9001: *
9002: * Load and parse an external subset.
9003: *
9004: * Returns the resulting xmlDtdPtr or NULL in case of error.
9005: */
9006:
9007: xmlDtdPtr
1.123 daniel 9008: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9009: const xmlChar *SystemID) {
1.76 daniel 9010: xmlDtdPtr ret = NULL;
9011: xmlParserCtxtPtr ctxt;
1.83 daniel 9012: xmlParserInputPtr input = NULL;
1.76 daniel 9013: xmlCharEncoding enc;
9014:
9015: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9016:
1.97 daniel 9017: ctxt = xmlNewParserCtxt();
1.76 daniel 9018: if (ctxt == NULL) {
9019: return(NULL);
9020: }
9021:
9022: /*
9023: * Set-up the SAX context
9024: */
9025: if (ctxt == NULL) return(NULL);
9026: if (sax != NULL) {
1.93 veillard 9027: if (ctxt->sax != NULL)
1.119 daniel 9028: xmlFree(ctxt->sax);
1.76 daniel 9029: ctxt->sax = sax;
9030: ctxt->userData = NULL;
9031: }
9032:
9033: /*
9034: * Ask the Entity resolver to load the damn thing
9035: */
9036:
9037: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9038: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9039: if (input == NULL) {
1.86 daniel 9040: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9041: xmlFreeParserCtxt(ctxt);
9042: return(NULL);
9043: }
9044:
9045: /*
1.156 daniel 9046: * plug some encoding conversion routines here.
1.76 daniel 9047: */
9048: xmlPushInput(ctxt, input);
1.156 daniel 9049: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9050: xmlSwitchEncoding(ctxt, enc);
9051:
1.95 veillard 9052: if (input->filename == NULL)
1.156 daniel 9053: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9054: input->line = 1;
9055: input->col = 1;
9056: input->base = ctxt->input->cur;
9057: input->cur = ctxt->input->cur;
9058: input->free = NULL;
9059:
9060: /*
9061: * let's parse that entity knowing it's an external subset.
9062: */
1.79 daniel 9063: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9064:
9065: if (ctxt->myDoc != NULL) {
9066: if (ctxt->wellFormed) {
9067: ret = ctxt->myDoc->intSubset;
9068: ctxt->myDoc->intSubset = NULL;
9069: } else {
9070: ret = NULL;
9071: }
9072: xmlFreeDoc(ctxt->myDoc);
9073: ctxt->myDoc = NULL;
9074: }
1.86 daniel 9075: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9076: xmlFreeParserCtxt(ctxt);
9077:
9078: return(ret);
9079: }
9080:
9081: /**
9082: * xmlParseDTD :
9083: * @ExternalID: a NAME* containing the External ID of the DTD
9084: * @SystemID: a NAME* containing the URL to the DTD
9085: *
9086: * Load and parse an external subset.
9087: *
9088: * Returns the resulting xmlDtdPtr or NULL in case of error.
9089: */
9090:
9091: xmlDtdPtr
1.123 daniel 9092: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9093: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9094: }
9095:
9096: /**
1.144 daniel 9097: * xmlSAXParseBalancedChunk :
9098: * @ctx: an XML parser context (possibly NULL)
9099: * @sax: the SAX handler bloc (possibly NULL)
9100: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9101: * @input: a parser input stream
9102: * @enc: the encoding
9103: *
9104: * Parse a well-balanced chunk of an XML document
9105: * The user has to provide SAX callback block whose routines will be
9106: * called by the parser
9107: * The allowed sequence for the Well Balanced Chunk is the one defined by
9108: * the content production in the XML grammar:
9109: *
9110: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9111: *
9112: * Returns 0 id the chunk is well balanced, -1 in case of args problem and
9113: * the error code otherwise
9114: */
9115:
9116: int
9117: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
9118: void *user_data, xmlParserInputPtr input,
9119: xmlCharEncoding enc) {
9120: xmlParserCtxtPtr ctxt;
9121: int ret;
9122:
9123: if (input == NULL) return(-1);
9124:
9125: if (ctx != NULL)
9126: ctxt = ctx;
9127: else {
9128: ctxt = xmlNewParserCtxt();
9129: if (ctxt == NULL)
9130: return(-1);
9131: if (sax == NULL)
9132: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9133: }
9134:
9135: /*
9136: * Set-up the SAX context
9137: */
9138: if (sax != NULL) {
9139: if (ctxt->sax != NULL)
9140: xmlFree(ctxt->sax);
9141: ctxt->sax = sax;
9142: ctxt->userData = user_data;
9143: }
9144:
9145: /*
9146: * plug some encoding conversion routines here.
9147: */
9148: xmlPushInput(ctxt, input);
9149: if (enc != XML_CHAR_ENCODING_NONE)
9150: xmlSwitchEncoding(ctxt, enc);
9151:
9152: /*
9153: * let's parse that entity knowing it's an external subset.
9154: */
9155: xmlParseContent(ctxt);
9156: ret = ctxt->errNo;
9157:
9158: if (ctx == NULL) {
9159: if (sax != NULL)
9160: ctxt->sax = NULL;
9161: else
9162: xmlFreeDoc(ctxt->myDoc);
9163: xmlFreeParserCtxt(ctxt);
9164: }
9165: return(ret);
9166: }
9167:
9168: /**
9169: * xmlParseBalancedChunk :
9170: * @doc: the document the chunk pertains to
9171: * @node: the node defining the context in which informations will be added
9172: *
9173: * Parse a well-balanced chunk of an XML document present in memory
9174: *
9175: * Returns the resulting list of nodes resulting from the parsing,
9176: * they are not added to @node
9177: */
9178:
9179: xmlNodePtr
9180: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 9181: /* TODO !!! */
9182: return(NULL);
1.144 daniel 9183: }
9184:
9185: /**
9186: * xmlParseBalancedChunkFile :
9187: * @doc: the document the chunk pertains to
9188: *
9189: * Parse a well-balanced chunk of an XML document contained in a file
9190: *
9191: * Returns the resulting list of nodes resulting from the parsing,
9192: * they are not added to @node
9193: */
9194:
9195: xmlNodePtr
9196: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 9197: /* TODO !!! */
9198: return(NULL);
1.144 daniel 9199: }
9200:
9201: /**
1.59 daniel 9202: * xmlRecoverDoc :
1.123 daniel 9203: * @cur: a pointer to an array of xmlChar
1.59 daniel 9204: *
9205: * parse an XML in-memory document and build a tree.
9206: * In the case the document is not Well Formed, a tree is built anyway
9207: *
1.68 daniel 9208: * Returns the resulting document tree
1.59 daniel 9209: */
9210:
1.69 daniel 9211: xmlDocPtr
1.123 daniel 9212: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 9213: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 9214: }
9215:
9216: /**
1.69 daniel 9217: * xmlCreateFileParserCtxt :
1.50 daniel 9218: * @filename: the filename
9219: *
1.69 daniel 9220: * Create a parser context for a file content.
9221: * Automatic support for ZLIB/Compress compressed document is provided
9222: * by default if found at compile-time.
1.50 daniel 9223: *
1.69 daniel 9224: * Returns the new parser context or NULL
1.9 httpng 9225: */
1.69 daniel 9226: xmlParserCtxtPtr
9227: xmlCreateFileParserCtxt(const char *filename)
9228: {
9229: xmlParserCtxtPtr ctxt;
1.40 daniel 9230: xmlParserInputPtr inputStream;
1.91 daniel 9231: xmlParserInputBufferPtr buf;
1.111 daniel 9232: char *directory = NULL;
1.9 httpng 9233:
1.91 daniel 9234: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9235: if (buf == NULL) return(NULL);
1.9 httpng 9236:
1.97 daniel 9237: ctxt = xmlNewParserCtxt();
1.16 daniel 9238: if (ctxt == NULL) {
9239: return(NULL);
9240: }
1.97 daniel 9241:
1.96 daniel 9242: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9243: if (inputStream == NULL) {
1.97 daniel 9244: xmlFreeParserCtxt(ctxt);
1.40 daniel 9245: return(NULL);
9246: }
9247:
1.119 daniel 9248: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9249: inputStream->buf = buf;
9250: inputStream->base = inputStream->buf->buffer->content;
9251: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9252:
1.40 daniel 9253: inputPush(ctxt, inputStream);
1.110 daniel 9254: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9255: directory = xmlParserGetDirectory(filename);
9256: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9257: ctxt->directory = directory;
1.106 daniel 9258:
1.69 daniel 9259: return(ctxt);
9260: }
9261:
9262: /**
9263: * xmlSAXParseFile :
9264: * @sax: the SAX handler block
9265: * @filename: the filename
9266: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9267: * documents
9268: *
9269: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9270: * compressed document is provided by default if found at compile-time.
9271: * It use the given SAX function block to handle the parsing callback.
9272: * If sax is NULL, fallback to the default DOM tree building routines.
9273: *
9274: * Returns the resulting document tree
9275: */
9276:
1.79 daniel 9277: xmlDocPtr
9278: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9279: int recovery) {
9280: xmlDocPtr ret;
9281: xmlParserCtxtPtr ctxt;
1.111 daniel 9282: char *directory = NULL;
1.69 daniel 9283:
9284: ctxt = xmlCreateFileParserCtxt(filename);
9285: if (ctxt == NULL) return(NULL);
1.74 daniel 9286: if (sax != NULL) {
1.93 veillard 9287: if (ctxt->sax != NULL)
1.119 daniel 9288: xmlFree(ctxt->sax);
1.74 daniel 9289: ctxt->sax = sax;
9290: ctxt->userData = NULL;
9291: }
1.106 daniel 9292:
1.110 daniel 9293: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9294: directory = xmlParserGetDirectory(filename);
9295: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 9296: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 9297:
9298: xmlParseDocument(ctxt);
1.40 daniel 9299:
1.72 daniel 9300: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9301: else {
9302: ret = NULL;
1.72 daniel 9303: xmlFreeDoc(ctxt->myDoc);
9304: ctxt->myDoc = NULL;
1.59 daniel 9305: }
1.86 daniel 9306: if (sax != NULL)
9307: ctxt->sax = NULL;
1.69 daniel 9308: xmlFreeParserCtxt(ctxt);
1.20 daniel 9309:
9310: return(ret);
9311: }
9312:
1.55 daniel 9313: /**
9314: * xmlParseFile :
9315: * @filename: the filename
9316: *
9317: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9318: * compressed document is provided by default if found at compile-time.
9319: *
1.68 daniel 9320: * Returns the resulting document tree
1.55 daniel 9321: */
9322:
1.79 daniel 9323: xmlDocPtr
9324: xmlParseFile(const char *filename) {
1.59 daniel 9325: return(xmlSAXParseFile(NULL, filename, 0));
9326: }
9327:
9328: /**
9329: * xmlRecoverFile :
9330: * @filename: the filename
9331: *
9332: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9333: * compressed document is provided by default if found at compile-time.
9334: * In the case the document is not Well Formed, a tree is built anyway
9335: *
1.68 daniel 9336: * Returns the resulting document tree
1.59 daniel 9337: */
9338:
1.79 daniel 9339: xmlDocPtr
9340: xmlRecoverFile(const char *filename) {
1.59 daniel 9341: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 9342: }
1.32 daniel 9343:
1.50 daniel 9344: /**
1.69 daniel 9345: * xmlCreateMemoryParserCtxt :
1.68 daniel 9346: * @buffer: an pointer to a char array
1.127 daniel 9347: * @size: the size of the array
1.50 daniel 9348: *
1.69 daniel 9349: * Create a parser context for an XML in-memory document.
1.50 daniel 9350: *
1.69 daniel 9351: * Returns the new parser context or NULL
1.20 daniel 9352: */
1.69 daniel 9353: xmlParserCtxtPtr
9354: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9355: xmlParserCtxtPtr ctxt;
1.40 daniel 9356: xmlParserInputPtr input;
9357:
1.158 daniel 9358: if (buffer[size - 1] != 0)
9359: buffer[size - 1] = '\0';
1.40 daniel 9360:
1.97 daniel 9361: ctxt = xmlNewParserCtxt();
1.20 daniel 9362: if (ctxt == NULL) {
9363: return(NULL);
9364: }
1.97 daniel 9365:
1.96 daniel 9366: input = xmlNewInputStream(ctxt);
1.40 daniel 9367: if (input == NULL) {
1.97 daniel 9368: xmlFreeParserCtxt(ctxt);
1.40 daniel 9369: return(NULL);
9370: }
1.20 daniel 9371:
1.40 daniel 9372: input->filename = NULL;
9373: input->line = 1;
9374: input->col = 1;
1.96 daniel 9375: input->buf = NULL;
1.91 daniel 9376: input->consumed = 0;
1.75 daniel 9377:
1.116 daniel 9378: input->base = BAD_CAST buffer;
9379: input->cur = BAD_CAST buffer;
1.69 daniel 9380: input->free = NULL;
1.20 daniel 9381:
1.40 daniel 9382: inputPush(ctxt, input);
1.69 daniel 9383: return(ctxt);
9384: }
9385:
9386: /**
9387: * xmlSAXParseMemory :
9388: * @sax: the SAX handler block
9389: * @buffer: an pointer to a char array
1.127 daniel 9390: * @size: the size of the array
9391: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9392: * documents
9393: *
9394: * parse an XML in-memory block and use the given SAX function block
9395: * to handle the parsing callback. If sax is NULL, fallback to the default
9396: * DOM tree building routines.
9397: *
9398: * Returns the resulting document tree
9399: */
9400: xmlDocPtr
9401: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9402: xmlDocPtr ret;
9403: xmlParserCtxtPtr ctxt;
9404:
9405: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9406: if (ctxt == NULL) return(NULL);
1.74 daniel 9407: if (sax != NULL) {
9408: ctxt->sax = sax;
9409: ctxt->userData = NULL;
9410: }
1.20 daniel 9411:
9412: xmlParseDocument(ctxt);
1.40 daniel 9413:
1.72 daniel 9414: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9415: else {
9416: ret = NULL;
1.72 daniel 9417: xmlFreeDoc(ctxt->myDoc);
9418: ctxt->myDoc = NULL;
1.59 daniel 9419: }
1.86 daniel 9420: if (sax != NULL)
9421: ctxt->sax = NULL;
1.69 daniel 9422: xmlFreeParserCtxt(ctxt);
1.16 daniel 9423:
1.9 httpng 9424: return(ret);
1.17 daniel 9425: }
9426:
1.55 daniel 9427: /**
9428: * xmlParseMemory :
1.68 daniel 9429: * @buffer: an pointer to a char array
1.55 daniel 9430: * @size: the size of the array
9431: *
9432: * parse an XML in-memory block and build a tree.
9433: *
1.68 daniel 9434: * Returns the resulting document tree
1.55 daniel 9435: */
9436:
9437: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9438: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9439: }
9440:
9441: /**
9442: * xmlRecoverMemory :
1.68 daniel 9443: * @buffer: an pointer to a char array
1.59 daniel 9444: * @size: the size of the array
9445: *
9446: * parse an XML in-memory block and build a tree.
9447: * In the case the document is not Well Formed, a tree is built anyway
9448: *
1.68 daniel 9449: * Returns the resulting document tree
1.59 daniel 9450: */
9451:
9452: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9453: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9454: }
9455:
9456:
1.50 daniel 9457: /**
9458: * xmlSetupParserForBuffer:
9459: * @ctxt: an XML parser context
1.123 daniel 9460: * @buffer: a xmlChar * buffer
1.50 daniel 9461: * @filename: a file name
9462: *
1.19 daniel 9463: * Setup the parser context to parse a new buffer; Clears any prior
9464: * contents from the parser context. The buffer parameter must not be
9465: * NULL, but the filename parameter can be
9466: */
1.55 daniel 9467: void
1.123 daniel 9468: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 9469: const char* filename)
9470: {
1.96 daniel 9471: xmlParserInputPtr input;
1.40 daniel 9472:
1.96 daniel 9473: input = xmlNewInputStream(ctxt);
9474: if (input == NULL) {
9475: perror("malloc");
1.119 daniel 9476: xmlFree(ctxt);
1.145 daniel 9477: return;
1.96 daniel 9478: }
9479:
9480: xmlClearParserCtxt(ctxt);
9481: if (filename != NULL)
1.119 daniel 9482: input->filename = xmlMemStrdup(filename);
1.96 daniel 9483: input->base = buffer;
9484: input->cur = buffer;
9485: inputPush(ctxt, input);
1.17 daniel 9486: }
9487:
1.123 daniel 9488: /**
9489: * xmlSAXUserParseFile:
9490: * @sax: a SAX handler
9491: * @user_data: The user data returned on SAX callbacks
9492: * @filename: a file name
9493: *
9494: * parse an XML file and call the given SAX handler routines.
9495: * Automatic support for ZLIB/Compress compressed document is provided
9496: *
9497: * Returns 0 in case of success or a error number otherwise
9498: */
1.131 daniel 9499: int
9500: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9501: const char *filename) {
1.123 daniel 9502: int ret = 0;
9503: xmlParserCtxtPtr ctxt;
9504:
9505: ctxt = xmlCreateFileParserCtxt(filename);
9506: if (ctxt == NULL) return -1;
1.134 daniel 9507: if (ctxt->sax != &xmlDefaultSAXHandler)
9508: xmlFree(ctxt->sax);
1.123 daniel 9509: ctxt->sax = sax;
1.140 daniel 9510: if (user_data != NULL)
9511: ctxt->userData = user_data;
1.123 daniel 9512:
9513: xmlParseDocument(ctxt);
9514:
9515: if (ctxt->wellFormed)
9516: ret = 0;
9517: else {
9518: if (ctxt->errNo != 0)
9519: ret = ctxt->errNo;
9520: else
9521: ret = -1;
9522: }
9523: if (sax != NULL)
9524: ctxt->sax = NULL;
9525: xmlFreeParserCtxt(ctxt);
9526:
9527: return ret;
9528: }
9529:
9530: /**
9531: * xmlSAXUserParseMemory:
9532: * @sax: a SAX handler
9533: * @user_data: The user data returned on SAX callbacks
9534: * @buffer: an in-memory XML document input
1.127 daniel 9535: * @size: the length of the XML document in bytes
1.123 daniel 9536: *
9537: * A better SAX parsing routine.
9538: * parse an XML in-memory buffer and call the given SAX handler routines.
9539: *
9540: * Returns 0 in case of success or a error number otherwise
9541: */
9542: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9543: char *buffer, int size) {
9544: int ret = 0;
9545: xmlParserCtxtPtr ctxt;
9546:
9547: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9548: if (ctxt == NULL) return -1;
9549: ctxt->sax = sax;
9550: ctxt->userData = user_data;
9551:
9552: xmlParseDocument(ctxt);
9553:
9554: if (ctxt->wellFormed)
9555: ret = 0;
9556: else {
9557: if (ctxt->errNo != 0)
9558: ret = ctxt->errNo;
9559: else
9560: ret = -1;
9561: }
9562: if (sax != NULL)
9563: ctxt->sax = NULL;
9564: xmlFreeParserCtxt(ctxt);
9565:
9566: return ret;
9567: }
9568:
1.32 daniel 9569:
1.98 daniel 9570: /************************************************************************
9571: * *
1.127 daniel 9572: * Miscellaneous *
1.98 daniel 9573: * *
9574: ************************************************************************/
9575:
1.132 daniel 9576: /**
9577: * xmlCleanupParser:
9578: *
9579: * Cleanup function for the XML parser. It tries to reclaim all
9580: * parsing related global memory allocated for the parser processing.
9581: * It doesn't deallocate any document related memory. Calling this
9582: * function should not prevent reusing the parser.
9583: */
9584:
9585: void
9586: xmlCleanupParser(void) {
9587: xmlCleanupCharEncodingHandlers();
1.133 daniel 9588: xmlCleanupPredefinedEntities();
1.132 daniel 9589: }
1.98 daniel 9590:
1.50 daniel 9591: /**
9592: * xmlParserFindNodeInfo:
9593: * @ctxt: an XML parser context
9594: * @node: an XML node within the tree
9595: *
9596: * Find the parser node info struct for a given node
9597: *
1.68 daniel 9598: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 9599: */
9600: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
9601: const xmlNode* node)
9602: {
9603: unsigned long pos;
9604:
9605: /* Find position where node should be at */
9606: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
9607: if ( ctx->node_seq.buffer[pos].node == node )
9608: return &ctx->node_seq.buffer[pos];
9609: else
9610: return NULL;
9611: }
9612:
9613:
1.50 daniel 9614: /**
9615: * xmlInitNodeInfoSeq :
9616: * @seq: a node info sequence pointer
9617: *
9618: * -- Initialize (set to initial state) node info sequence
1.32 daniel 9619: */
1.55 daniel 9620: void
9621: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9622: {
9623: seq->length = 0;
9624: seq->maximum = 0;
9625: seq->buffer = NULL;
9626: }
9627:
1.50 daniel 9628: /**
9629: * xmlClearNodeInfoSeq :
9630: * @seq: a node info sequence pointer
9631: *
9632: * -- Clear (release memory and reinitialize) node
1.32 daniel 9633: * info sequence
9634: */
1.55 daniel 9635: void
9636: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9637: {
9638: if ( seq->buffer != NULL )
1.119 daniel 9639: xmlFree(seq->buffer);
1.32 daniel 9640: xmlInitNodeInfoSeq(seq);
9641: }
9642:
9643:
1.50 daniel 9644: /**
9645: * xmlParserFindNodeInfoIndex:
9646: * @seq: a node info sequence pointer
9647: * @node: an XML node pointer
9648: *
9649: *
1.32 daniel 9650: * xmlParserFindNodeInfoIndex : Find the index that the info record for
9651: * the given node is or should be at in a sorted sequence
1.68 daniel 9652: *
9653: * Returns a long indicating the position of the record
1.32 daniel 9654: */
9655: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
9656: const xmlNode* node)
9657: {
9658: unsigned long upper, lower, middle;
9659: int found = 0;
9660:
9661: /* Do a binary search for the key */
9662: lower = 1;
9663: upper = seq->length;
9664: middle = 0;
9665: while ( lower <= upper && !found) {
9666: middle = lower + (upper - lower) / 2;
9667: if ( node == seq->buffer[middle - 1].node )
9668: found = 1;
9669: else if ( node < seq->buffer[middle - 1].node )
9670: upper = middle - 1;
9671: else
9672: lower = middle + 1;
9673: }
9674:
9675: /* Return position */
9676: if ( middle == 0 || seq->buffer[middle - 1].node < node )
9677: return middle;
9678: else
9679: return middle - 1;
9680: }
9681:
9682:
1.50 daniel 9683: /**
9684: * xmlParserAddNodeInfo:
9685: * @ctxt: an XML parser context
1.68 daniel 9686: * @info: a node info sequence pointer
1.50 daniel 9687: *
9688: * Insert node info record into the sorted sequence
1.32 daniel 9689: */
1.55 daniel 9690: void
9691: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 9692: const xmlParserNodeInfo* info)
1.32 daniel 9693: {
9694: unsigned long pos;
9695: static unsigned int block_size = 5;
9696:
9697: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 9698: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
9699: if ( pos < ctxt->node_seq.length
9700: && ctxt->node_seq.buffer[pos].node == info->node ) {
9701: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 9702: }
9703:
9704: /* Otherwise, we need to add new node to buffer */
9705: else {
9706: /* Expand buffer by 5 if needed */
1.55 daniel 9707: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 9708: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 9709: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
9710: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 9711:
1.55 daniel 9712: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 9713: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 9714: else
1.119 daniel 9715: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 9716:
9717: if ( tmp_buffer == NULL ) {
1.55 daniel 9718: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 9719: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 9720: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 9721: return;
9722: }
1.55 daniel 9723: ctxt->node_seq.buffer = tmp_buffer;
9724: ctxt->node_seq.maximum += block_size;
1.32 daniel 9725: }
9726:
9727: /* If position is not at end, move elements out of the way */
1.55 daniel 9728: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 9729: unsigned long i;
9730:
1.55 daniel 9731: for ( i = ctxt->node_seq.length; i > pos; i-- )
9732: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 9733: }
9734:
9735: /* Copy element and increase length */
1.55 daniel 9736: ctxt->node_seq.buffer[pos] = *info;
9737: ctxt->node_seq.length++;
1.32 daniel 9738: }
9739: }
1.77 daniel 9740:
1.98 daniel 9741:
9742: /**
9743: * xmlSubstituteEntitiesDefault :
9744: * @val: int 0 or 1
9745: *
9746: * Set and return the previous value for default entity support.
9747: * Initially the parser always keep entity references instead of substituting
9748: * entity values in the output. This function has to be used to change the
9749: * default parser behaviour
9750: * SAX::subtituteEntities() has to be used for changing that on a file by
9751: * file basis.
9752: *
9753: * Returns the last value for 0 for no substitution, 1 for substitution.
9754: */
9755:
9756: int
9757: xmlSubstituteEntitiesDefault(int val) {
9758: int old = xmlSubstituteEntitiesDefaultValue;
9759:
9760: xmlSubstituteEntitiesDefaultValue = val;
9761: return(old);
9762: }
1.77 daniel 9763:
Webmaster