Annotation of XML/parser.c, revision 1.174
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.119 daniel 36: #include "xmlmemory.h"
1.14 veillard 37: #include "tree.h"
1.1 veillard 38: #include "parser.h"
1.14 veillard 39: #include "entities.h"
1.75 daniel 40: #include "encoding.h"
1.61 daniel 41: #include "valid.h"
1.69 daniel 42: #include "parserInternals.h"
1.91 daniel 43: #include "xmlIO.h"
1.122 daniel 44: #include "xml-error.h"
1.1 veillard 45:
1.140 daniel 46: #define XML_PARSER_BIG_BUFFER_SIZE 1000
47: #define XML_PARSER_BUFFER_SIZE 100
48:
1.86 daniel 49: const char *xmlParserVersion = LIBXML_VERSION;
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.91 daniel 65: /************************************************************************
66: * *
67: * Input handling functions for progressive parsing *
68: * *
69: ************************************************************************/
70:
71: /* #define DEBUG_INPUT */
1.140 daniel 72: /* #define DEBUG_STACK */
73: /* #define DEBUG_PUSH */
74:
1.91 daniel 75:
1.110 daniel 76: #define INPUT_CHUNK 250
77: /* we need to keep enough input to show errors in context */
78: #define LINE_LEN 80
1.91 daniel 79:
80: #ifdef DEBUG_INPUT
81: #define CHECK_BUFFER(in) check_buffer(in)
82:
83: void check_buffer(xmlParserInputPtr in) {
84: if (in->base != in->buf->buffer->content) {
85: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
86: }
87: if (in->cur < in->base) {
88: fprintf(stderr, "xmlParserInput: cur < base problem\n");
89: }
90: if (in->cur > in->base + in->buf->buffer->use) {
91: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
92: }
93: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
94: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
95: in->buf->buffer->use, in->buf->buffer->size);
96: }
97:
1.110 daniel 98: #else
99: #define CHECK_BUFFER(in)
100: #endif
101:
1.91 daniel 102:
103: /**
104: * xmlParserInputRead:
105: * @in: an XML parser input
106: * @len: an indicative size for the lookahead
107: *
108: * This function refresh the input for the parser. It doesn't try to
109: * preserve pointers to the input buffer, and discard already read data
110: *
1.123 daniel 111: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 112: * end of this entity
113: */
114: int
115: xmlParserInputRead(xmlParserInputPtr in, int len) {
116: int ret;
117: int used;
118: int index;
119:
120: #ifdef DEBUG_INPUT
121: fprintf(stderr, "Read\n");
122: #endif
123: if (in->buf == NULL) return(-1);
124: if (in->base == NULL) return(-1);
125: if (in->cur == NULL) return(-1);
126: if (in->buf->buffer == NULL) return(-1);
127:
128: CHECK_BUFFER(in);
129:
130: used = in->cur - in->buf->buffer->content;
131: ret = xmlBufferShrink(in->buf->buffer, used);
132: if (ret > 0) {
133: in->cur -= ret;
134: in->consumed += ret;
135: }
136: ret = xmlParserInputBufferRead(in->buf, len);
137: if (in->base != in->buf->buffer->content) {
138: /*
139: * the buffer has been realloced
140: */
141: index = in->cur - in->base;
142: in->base = in->buf->buffer->content;
143: in->cur = &in->buf->buffer->content[index];
144: }
145:
146: CHECK_BUFFER(in);
147:
148: return(ret);
149: }
150:
151: /**
152: * xmlParserInputGrow:
153: * @in: an XML parser input
154: * @len: an indicative size for the lookahead
155: *
156: * This function increase the input for the parser. It tries to
157: * preserve pointers to the input buffer, and keep already read data
158: *
1.123 daniel 159: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 160: * end of this entity
161: */
162: int
163: xmlParserInputGrow(xmlParserInputPtr in, int len) {
164: int ret;
165: int index;
166:
167: #ifdef DEBUG_INPUT
168: fprintf(stderr, "Grow\n");
169: #endif
170: if (in->buf == NULL) return(-1);
171: if (in->base == NULL) return(-1);
172: if (in->cur == NULL) return(-1);
173: if (in->buf->buffer == NULL) return(-1);
174:
175: CHECK_BUFFER(in);
176:
177: index = in->cur - in->base;
178: if (in->buf->buffer->use > index + INPUT_CHUNK) {
179:
180: CHECK_BUFFER(in);
181:
182: return(0);
183: }
1.148 daniel 184: if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) ||
185: (in->buf->file != NULL) ||
1.140 daniel 186: #ifdef HAVE_ZLIB_H
187: (in->buf->gzfile != NULL) ||
188: #endif
189: (in->buf->fd >= 0))
190: ret = xmlParserInputBufferGrow(in->buf, len);
191: else
192: return(0);
1.135 daniel 193:
194: /*
195: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
196: * block, but we use it really as an integer to do some
197: * pointer arithmetic. Insure will raise it as a bug but in
198: * that specific case, that's not !
199: */
1.91 daniel 200: if (in->base != in->buf->buffer->content) {
201: /*
202: * the buffer has been realloced
203: */
204: index = in->cur - in->base;
205: in->base = in->buf->buffer->content;
206: in->cur = &in->buf->buffer->content[index];
207: }
208:
209: CHECK_BUFFER(in);
210:
211: return(ret);
212: }
213:
214: /**
215: * xmlParserInputShrink:
216: * @in: an XML parser input
217: *
218: * This function removes used input for the parser.
219: */
220: void
221: xmlParserInputShrink(xmlParserInputPtr in) {
222: int used;
223: int ret;
224: int index;
225:
226: #ifdef DEBUG_INPUT
227: fprintf(stderr, "Shrink\n");
228: #endif
229: if (in->buf == NULL) return;
230: if (in->base == NULL) return;
231: if (in->cur == NULL) return;
232: if (in->buf->buffer == NULL) return;
233:
234: CHECK_BUFFER(in);
235:
236: used = in->cur - in->buf->buffer->content;
237: if (used > INPUT_CHUNK) {
1.110 daniel 238: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 239: if (ret > 0) {
240: in->cur -= ret;
241: in->consumed += ret;
242: }
243: }
244:
245: CHECK_BUFFER(in);
246:
247: if (in->buf->buffer->use > INPUT_CHUNK) {
248: return;
249: }
250: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
251: if (in->base != in->buf->buffer->content) {
252: /*
253: * the buffer has been realloced
254: */
255: index = in->cur - in->base;
256: in->base = in->buf->buffer->content;
257: in->cur = &in->buf->buffer->content[index];
258: }
259:
260: CHECK_BUFFER(in);
261: }
262:
1.45 daniel 263: /************************************************************************
264: * *
265: * Parser stacks related functions and macros *
266: * *
267: ************************************************************************/
1.79 daniel 268:
269: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 270: int xmlDoValidityCheckingDefaultValue = 0;
1.135 daniel 271: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
272: const xmlChar ** str);
1.79 daniel 273:
1.1 veillard 274: /*
1.40 daniel 275: * Generic function for accessing stacks in the Parser Context
1.1 veillard 276: */
277:
1.140 daniel 278: #define PUSH_AND_POP(scope, type, name) \
279: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 280: if (ctxt->name##Nr >= ctxt->name##Max) { \
281: ctxt->name##Max *= 2; \
1.119 daniel 282: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 283: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
284: if (ctxt->name##Tab == NULL) { \
1.31 daniel 285: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 286: return(0); \
1.31 daniel 287: } \
288: } \
1.40 daniel 289: ctxt->name##Tab[ctxt->name##Nr] = value; \
290: ctxt->name = value; \
291: return(ctxt->name##Nr++); \
1.31 daniel 292: } \
1.140 daniel 293: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 294: type ret; \
1.40 daniel 295: if (ctxt->name##Nr <= 0) return(0); \
296: ctxt->name##Nr--; \
1.50 daniel 297: if (ctxt->name##Nr > 0) \
298: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
299: else \
300: ctxt->name = NULL; \
1.69 daniel 301: ret = ctxt->name##Tab[ctxt->name##Nr]; \
302: ctxt->name##Tab[ctxt->name##Nr] = 0; \
303: return(ret); \
1.31 daniel 304: } \
305:
1.140 daniel 306: PUSH_AND_POP(extern, xmlParserInputPtr, input)
307: PUSH_AND_POP(extern, xmlNodePtr, node)
308: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 309:
1.55 daniel 310: /*
311: * Macros for accessing the content. Those should be used only by the parser,
312: * and not exported.
313: *
314: * Dirty macros, i.e. one need to make assumption on the context to use them
315: *
1.123 daniel 316: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 317: * To be used with extreme caution since operations consuming
318: * characters may move the input buffer to a different location !
1.123 daniel 319: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 320: * in ISO-Latin or UTF-8.
1.151 daniel 321: * This should be used internally by the parser
1.55 daniel 322: * only to compare to ASCII values otherwise it would break when
323: * running with UTF-8 encoding.
1.123 daniel 324: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 325: * to compare on ASCII based substring.
1.123 daniel 326: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 327: * strings within the parser.
328: *
1.77 daniel 329: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 330: *
331: * NEXT Skip to the next character, this does the proper decoding
332: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 333: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 334: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 335: */
1.45 daniel 336:
1.152 daniel 337: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 338: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 339: #define NXT(val) ctxt->input->cur[(val)]
340: #define CUR_PTR ctxt->input->cur
1.154 daniel 341:
1.164 daniel 342: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
343: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 344: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
345: if ((*ctxt->input->cur == 0) && \
346: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
347: xmlPopInput(ctxt)
1.164 daniel 348:
1.97 daniel 349: #define SHRINK xmlParserInputShrink(ctxt->input); \
350: if ((*ctxt->input->cur == 0) && \
351: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
352: xmlPopInput(ctxt)
353:
354: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
355: if ((*ctxt->input->cur == 0) && \
356: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
357: xmlPopInput(ctxt)
1.55 daniel 358:
1.155 daniel 359: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 360:
1.151 daniel 361: #define NEXT xmlNextChar(ctxt);
1.154 daniel 362:
1.153 daniel 363: #define NEXTL(l) \
364: if (*(ctxt->input->cur) == '\n') { \
365: ctxt->input->line++; ctxt->input->col = 1; \
366: } else ctxt->input->col++; \
1.154 daniel 367: ctxt->token = 0; ctxt->input->cur += l; \
368: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
369: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
370:
1.152 daniel 371: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 372: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 373:
1.152 daniel 374: #define COPY_BUF(l,b,i,v) \
375: if (l == 1) b[i++] = (xmlChar) v; \
376: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 377:
378: /**
379: * xmlNextChar:
380: * @ctxt: the XML parser context
381: *
382: * Skip to the next char input char.
383: */
1.55 daniel 384:
1.151 daniel 385: void
386: xmlNextChar(xmlParserCtxtPtr ctxt) {
387: if (ctxt->token != 0) ctxt->token = 0;
388: else {
389: if ((*ctxt->input->cur == 0) &&
390: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
391: (ctxt->instate != XML_PARSER_COMMENT)) {
392: /*
393: * If we are at the end of the current entity and
394: * the context allows it, we pop consumed entities
395: * automatically.
396: * TODO: the auto closing should be blocked in other cases
397: */
398: xmlPopInput(ctxt);
399: } else {
400: if (*(ctxt->input->cur) == '\n') {
401: ctxt->input->line++; ctxt->input->col = 1;
402: } else ctxt->input->col++;
403: if (ctxt->encoding == NULL) {
404: /*
405: * We are supposed to handle UTF8, check it's valid
406: * From rfc2044: encoding of the Unicode values on UTF-8:
407: *
408: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
409: * 0000 0000-0000 007F 0xxxxxxx
410: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
411: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
412: *
1.160 daniel 413: * Check for the 0x110000 limit too
1.151 daniel 414: */
415: const unsigned char *cur = ctxt->input->cur;
416: unsigned char c;
1.91 daniel 417:
1.151 daniel 418: c = *cur;
419: if (c & 0x80) {
420: if (cur[1] == 0)
421: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
422: if ((cur[1] & 0xc0) != 0x80)
423: goto encoding_error;
424: if ((c & 0xe0) == 0xe0) {
425: unsigned int val;
426:
427: if (cur[2] == 0)
428: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429: if ((cur[2] & 0xc0) != 0x80)
430: goto encoding_error;
431: if ((c & 0xf0) == 0xf0) {
432: if (cur[3] == 0)
433: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
434: if (((c & 0xf8) != 0xf0) ||
435: ((cur[3] & 0xc0) != 0x80))
436: goto encoding_error;
437: /* 4-byte code */
438: ctxt->input->cur += 4;
439: val = (cur[0] & 0x7) << 18;
440: val |= (cur[1] & 0x3f) << 12;
441: val |= (cur[2] & 0x3f) << 6;
442: val |= cur[3] & 0x3f;
443: } else {
444: /* 3-byte code */
445: ctxt->input->cur += 3;
446: val = (cur[0] & 0xf) << 12;
447: val |= (cur[1] & 0x3f) << 6;
448: val |= cur[2] & 0x3f;
449: }
450: if (((val > 0xd7ff) && (val < 0xe000)) ||
451: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 452: (val >= 0x110000)) {
1.151 daniel 453: if ((ctxt->sax != NULL) &&
454: (ctxt->sax->error != NULL))
455: ctxt->sax->error(ctxt->userData,
456: "Char out of allowed range\n");
457: ctxt->errNo = XML_ERR_INVALID_ENCODING;
458: ctxt->wellFormed = 0;
459: }
460: } else
461: /* 2-byte code */
462: ctxt->input->cur += 2;
463: } else
464: /* 1-byte code */
465: ctxt->input->cur++;
466: } else {
467: /*
468: * Assume it's a fixed lenght encoding (1) with
469: * a compatibke encoding for the ASCII set, since
470: * XML constructs only use < 128 chars
471: */
472: ctxt->input->cur++;
473: }
474: ctxt->nbChars++;
475: if (*ctxt->input->cur == 0)
476: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
477: }
478: }
1.154 daniel 479: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
480: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 481: if ((*ctxt->input->cur == 0) &&
482: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
483: xmlPopInput(ctxt);
1.151 daniel 484: return;
485: encoding_error:
486: /*
487: * If we detect an UTF8 error that probably mean that the
488: * input encoding didn't get properly advertized in the
489: * declaration header. Report the error and switch the encoding
490: * to ISO-Latin-1 (if you don't like this policy, just declare the
491: * encoding !)
492: */
493: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
494: ctxt->sax->error(ctxt->userData,
495: "Input is not proper UTF-8, indicate encoding !\n");
496: ctxt->errNo = XML_ERR_INVALID_ENCODING;
497:
498: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
499: ctxt->input->cur++;
500: return;
501: }
1.42 daniel 502:
1.152 daniel 503: /**
504: * xmlCurrentChar:
505: * @ctxt: the XML parser context
506: * @len: pointer to the length of the char read
507: *
508: * The current char value, if using UTF-8 this may actaully span multiple
509: * bytes in the input buffer.
510: *
511: * Returns the current char value and its lenght
512: */
513:
514: int
515: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
516: if (ctxt->token != 0) {
517: *len = 0;
518: return(ctxt->token);
519: }
520: if (ctxt->encoding == NULL) {
521: /*
522: * We are supposed to handle UTF8, check it's valid
523: * From rfc2044: encoding of the Unicode values on UTF-8:
524: *
525: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
526: * 0000 0000-0000 007F 0xxxxxxx
527: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
528: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
529: *
1.160 daniel 530: * Check for the 0x110000 limit too
1.152 daniel 531: */
532: const unsigned char *cur = ctxt->input->cur;
533: unsigned char c;
534: unsigned int val;
535:
536: c = *cur;
537: if (c & 0x80) {
538: if (cur[1] == 0)
539: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
540: if ((cur[1] & 0xc0) != 0x80)
541: goto encoding_error;
542: if ((c & 0xe0) == 0xe0) {
543:
544: if (cur[2] == 0)
545: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
546: if ((cur[2] & 0xc0) != 0x80)
547: goto encoding_error;
548: if ((c & 0xf0) == 0xf0) {
549: if (cur[3] == 0)
550: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
551: if (((c & 0xf8) != 0xf0) ||
552: ((cur[3] & 0xc0) != 0x80))
553: goto encoding_error;
554: /* 4-byte code */
555: *len = 4;
556: val = (cur[0] & 0x7) << 18;
557: val |= (cur[1] & 0x3f) << 12;
558: val |= (cur[2] & 0x3f) << 6;
559: val |= cur[3] & 0x3f;
560: } else {
561: /* 3-byte code */
562: *len = 3;
563: val = (cur[0] & 0xf) << 12;
564: val |= (cur[1] & 0x3f) << 6;
565: val |= cur[2] & 0x3f;
566: }
567: } else {
568: /* 2-byte code */
569: *len = 2;
570: val = (cur[0] & 0x1f) << 6;
1.168 daniel 571: val |= cur[1] & 0x3f;
1.152 daniel 572: }
573: if (!IS_CHAR(val)) {
574: if ((ctxt->sax != NULL) &&
575: (ctxt->sax->error != NULL))
576: ctxt->sax->error(ctxt->userData,
577: "Char out of allowed range\n");
578: ctxt->errNo = XML_ERR_INVALID_ENCODING;
579: ctxt->wellFormed = 0;
580: }
581: return(val);
582: } else {
583: /* 1-byte code */
584: *len = 1;
585: return((int) *ctxt->input->cur);
586: }
587: }
588: /*
589: * Assume it's a fixed lenght encoding (1) with
590: * a compatibke encoding for the ASCII set, since
591: * XML constructs only use < 128 chars
592: */
593: *len = 1;
594: return((int) *ctxt->input->cur);
595: encoding_error:
596: /*
597: * If we detect an UTF8 error that probably mean that the
598: * input encoding didn't get properly advertized in the
599: * declaration header. Report the error and switch the encoding
600: * to ISO-Latin-1 (if you don't like this policy, just declare the
601: * encoding !)
602: */
603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
604: ctxt->sax->error(ctxt->userData,
605: "Input is not proper UTF-8, indicate encoding !\n");
606: ctxt->errNo = XML_ERR_INVALID_ENCODING;
607:
608: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
609: *len = 1;
610: return((int) *ctxt->input->cur);
611: }
612:
613: /**
1.162 daniel 614: * xmlStringCurrentChar:
615: * @ctxt: the XML parser context
616: * @cur: pointer to the beginning of the char
617: * @len: pointer to the length of the char read
618: *
619: * The current char value, if using UTF-8 this may actaully span multiple
620: * bytes in the input buffer.
621: *
622: * Returns the current char value and its lenght
623: */
624:
625: int
626: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
627: if (ctxt->encoding == NULL) {
628: /*
629: * We are supposed to handle UTF8, check it's valid
630: * From rfc2044: encoding of the Unicode values on UTF-8:
631: *
632: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
633: * 0000 0000-0000 007F 0xxxxxxx
634: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
635: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
636: *
637: * Check for the 0x110000 limit too
638: */
639: unsigned char c;
640: unsigned int val;
641:
642: c = *cur;
643: if (c & 0x80) {
644: if ((cur[1] & 0xc0) != 0x80)
645: goto encoding_error;
646: if ((c & 0xe0) == 0xe0) {
647:
648: if ((cur[2] & 0xc0) != 0x80)
649: goto encoding_error;
650: if ((c & 0xf0) == 0xf0) {
651: if (((c & 0xf8) != 0xf0) ||
652: ((cur[3] & 0xc0) != 0x80))
653: goto encoding_error;
654: /* 4-byte code */
655: *len = 4;
656: val = (cur[0] & 0x7) << 18;
657: val |= (cur[1] & 0x3f) << 12;
658: val |= (cur[2] & 0x3f) << 6;
659: val |= cur[3] & 0x3f;
660: } else {
661: /* 3-byte code */
662: *len = 3;
663: val = (cur[0] & 0xf) << 12;
664: val |= (cur[1] & 0x3f) << 6;
665: val |= cur[2] & 0x3f;
666: }
667: } else {
668: /* 2-byte code */
669: *len = 2;
670: val = (cur[0] & 0x1f) << 6;
671: val |= cur[2] & 0x3f;
672: }
673: if (!IS_CHAR(val)) {
674: if ((ctxt->sax != NULL) &&
675: (ctxt->sax->error != NULL))
676: ctxt->sax->error(ctxt->userData,
677: "Char out of allowed range\n");
678: ctxt->errNo = XML_ERR_INVALID_ENCODING;
679: ctxt->wellFormed = 0;
680: }
681: return(val);
682: } else {
683: /* 1-byte code */
684: *len = 1;
685: return((int) *cur);
686: }
687: }
688: /*
689: * Assume it's a fixed lenght encoding (1) with
690: * a compatibke encoding for the ASCII set, since
691: * XML constructs only use < 128 chars
692: */
693: *len = 1;
694: return((int) *cur);
695: encoding_error:
696: /*
697: * If we detect an UTF8 error that probably mean that the
698: * input encoding didn't get properly advertized in the
699: * declaration header. Report the error and switch the encoding
700: * to ISO-Latin-1 (if you don't like this policy, just declare the
701: * encoding !)
702: */
703: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
704: ctxt->sax->error(ctxt->userData,
705: "Input is not proper UTF-8, indicate encoding !\n");
706: ctxt->errNo = XML_ERR_INVALID_ENCODING;
707:
708: *len = 1;
709: return((int) *cur);
710: }
711:
712: /**
1.152 daniel 713: * xmlCopyChar:
714: * @len: pointer to the length of the char read (or zero)
715: * @array: pointer to an arry of xmlChar
716: * @val: the char value
717: *
718: * append the char value in the array
719: *
720: * Returns the number of xmlChar written
721: */
722:
723: int
724: xmlCopyChar(int len, xmlChar *out, int val) {
725: /*
726: * We are supposed to handle UTF8, check it's valid
727: * From rfc2044: encoding of the Unicode values on UTF-8:
728: *
729: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
730: * 0000 0000-0000 007F 0xxxxxxx
731: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
732: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
733: */
734: if (len == 0) {
735: if (val < 0) len = 0;
1.160 daniel 736: else if (val < 0x80) len = 1;
737: else if (val < 0x800) len = 2;
738: else if (val < 0x10000) len = 3;
739: else if (val < 0x110000) len = 4;
1.152 daniel 740: if (len == 0) {
741: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
742: val);
743: return(0);
744: }
745: }
746: if (len > 1) {
747: int bits;
748:
749: if (val < 0x80) { *out++= val; bits= -6; }
750: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
751: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
752: else { *out++= (val >> 18) | 0xF0; bits= 12; }
753:
754: for ( ; bits >= 0; bits-= 6)
755: *out++= ((val >> bits) & 0x3F) | 0x80 ;
756:
757: return(len);
758: }
759: *out = (xmlChar) val;
760: return(1);
1.155 daniel 761: }
762:
763: /**
764: * xmlSkipBlankChars:
765: * @ctxt: the XML parser context
766: *
767: * skip all blanks character found at that point in the input streams.
768: * It pops up finished entities in the process if allowable at that point.
769: *
770: * Returns the number of space chars skipped
771: */
772:
773: int
774: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
775: int cur, res = 0;
776:
777: do {
778: cur = CUR;
779: while (IS_BLANK(cur)) {
780: NEXT;
781: cur = CUR;
782: res++;
783: }
784: while ((cur == 0) && (ctxt->inputNr > 1) &&
785: (ctxt->instate != XML_PARSER_COMMENT)) {
786: xmlPopInput(ctxt);
787: cur = CUR;
788: }
789: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
790: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
791: } while (IS_BLANK(cur));
792: return(res);
1.152 daniel 793: }
794:
1.97 daniel 795: /************************************************************************
796: * *
797: * Commodity functions to handle entities processing *
798: * *
799: ************************************************************************/
1.40 daniel 800:
1.50 daniel 801: /**
802: * xmlPopInput:
803: * @ctxt: an XML parser context
804: *
1.40 daniel 805: * xmlPopInput: the current input pointed by ctxt->input came to an end
806: * pop it and return the next char.
1.45 daniel 807: *
1.123 daniel 808: * Returns the current xmlChar in the parser context
1.40 daniel 809: */
1.123 daniel 810: xmlChar
1.55 daniel 811: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 812: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 813: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 814: if ((*ctxt->input->cur == 0) &&
815: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
816: return(xmlPopInput(ctxt));
1.40 daniel 817: return(CUR);
818: }
819:
1.50 daniel 820: /**
821: * xmlPushInput:
822: * @ctxt: an XML parser context
823: * @input: an XML parser input fragment (entity, XML fragment ...).
824: *
1.40 daniel 825: * xmlPushInput: switch to a new input stream which is stacked on top
826: * of the previous one(s).
827: */
1.55 daniel 828: void
829: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 830: if (input == NULL) return;
831: inputPush(ctxt, input);
1.164 daniel 832: GROW;
1.40 daniel 833: }
834:
1.50 daniel 835: /**
1.69 daniel 836: * xmlFreeInputStream:
1.127 daniel 837: * @input: an xmlParserInputPtr
1.69 daniel 838: *
839: * Free up an input stream.
840: */
841: void
842: xmlFreeInputStream(xmlParserInputPtr input) {
843: if (input == NULL) return;
844:
1.119 daniel 845: if (input->filename != NULL) xmlFree((char *) input->filename);
846: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 847: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 848: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 849: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 850: input->free((xmlChar *) input->base);
1.93 veillard 851: if (input->buf != NULL)
852: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 853: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 854: xmlFree(input);
1.69 daniel 855: }
856:
857: /**
1.96 daniel 858: * xmlNewInputStream:
859: * @ctxt: an XML parser context
860: *
861: * Create a new input stream structure
862: * Returns the new input stream or NULL
863: */
864: xmlParserInputPtr
865: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
866: xmlParserInputPtr input;
867:
1.119 daniel 868: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 869: if (input == NULL) {
1.123 daniel 870: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 871: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 872: ctxt->sax->error(ctxt->userData,
873: "malloc: couldn't allocate a new input stream\n");
1.123 daniel 874: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 875: return(NULL);
876: }
1.165 daniel 877: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 878: input->line = 1;
879: input->col = 1;
1.167 daniel 880: input->standalone = -1;
1.96 daniel 881: return(input);
882: }
883:
884: /**
1.50 daniel 885: * xmlNewEntityInputStream:
886: * @ctxt: an XML parser context
887: * @entity: an Entity pointer
888: *
1.82 daniel 889: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 890: *
891: * Returns the new input stream or NULL
1.45 daniel 892: */
1.50 daniel 893: xmlParserInputPtr
894: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 895: xmlParserInputPtr input;
896:
897: if (entity == NULL) {
1.123 daniel 898: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 900: ctxt->sax->error(ctxt->userData,
1.45 daniel 901: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 902: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 903: return(NULL);
1.45 daniel 904: }
905: if (entity->content == NULL) {
1.159 daniel 906: switch (entity->etype) {
1.113 daniel 907: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 908: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
910: ctxt->sax->error(ctxt->userData,
911: "xmlNewEntityInputStream unparsed entity !\n");
912: break;
913: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
914: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 915: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 916: (char *) entity->ExternalID, ctxt));
1.113 daniel 917: case XML_INTERNAL_GENERAL_ENTITY:
918: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
919: ctxt->sax->error(ctxt->userData,
920: "Internal entity %s without content !\n", entity->name);
921: break;
922: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 923: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
925: ctxt->sax->error(ctxt->userData,
926: "Internal parameter entity %s without content !\n", entity->name);
927: break;
928: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 929: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
931: ctxt->sax->error(ctxt->userData,
932: "Predefined entity %s without content !\n", entity->name);
933: break;
934: }
1.50 daniel 935: return(NULL);
1.45 daniel 936: }
1.96 daniel 937: input = xmlNewInputStream(ctxt);
1.45 daniel 938: if (input == NULL) {
1.50 daniel 939: return(NULL);
1.45 daniel 940: }
1.156 daniel 941: input->filename = (char *) entity->SystemID;
1.45 daniel 942: input->base = entity->content;
943: input->cur = entity->content;
1.140 daniel 944: input->length = entity->length;
1.50 daniel 945: return(input);
1.45 daniel 946: }
947:
1.59 daniel 948: /**
949: * xmlNewStringInputStream:
950: * @ctxt: an XML parser context
1.96 daniel 951: * @buffer: an memory buffer
1.59 daniel 952: *
953: * Create a new input stream based on a memory buffer.
1.68 daniel 954: * Returns the new input stream
1.59 daniel 955: */
956: xmlParserInputPtr
1.123 daniel 957: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 958: xmlParserInputPtr input;
959:
1.96 daniel 960: if (buffer == NULL) {
1.123 daniel 961: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 963: ctxt->sax->error(ctxt->userData,
1.59 daniel 964: "internal: xmlNewStringInputStream string = NULL\n");
965: return(NULL);
966: }
1.96 daniel 967: input = xmlNewInputStream(ctxt);
1.59 daniel 968: if (input == NULL) {
969: return(NULL);
970: }
1.96 daniel 971: input->base = buffer;
972: input->cur = buffer;
1.140 daniel 973: input->length = xmlStrlen(buffer);
1.59 daniel 974: return(input);
975: }
976:
1.76 daniel 977: /**
978: * xmlNewInputFromFile:
979: * @ctxt: an XML parser context
980: * @filename: the filename to use as entity
981: *
982: * Create a new input stream based on a file.
983: *
984: * Returns the new input stream or NULL in case of error
985: */
986: xmlParserInputPtr
1.79 daniel 987: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 988: xmlParserInputBufferPtr buf;
1.76 daniel 989: xmlParserInputPtr inputStream;
1.111 daniel 990: char *directory = NULL;
1.76 daniel 991:
1.96 daniel 992: if (ctxt == NULL) return(NULL);
1.91 daniel 993: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 994: if (buf == NULL) {
1.140 daniel 995: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 996:
1.94 daniel 997: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
998: #ifdef WIN32
999: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1000: #else
1001: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1002: #endif
1003: buf = xmlParserInputBufferCreateFilename(name,
1004: XML_CHAR_ENCODING_NONE);
1.106 daniel 1005: if (buf != NULL)
1.142 daniel 1006: directory = xmlParserGetDirectory(name);
1.106 daniel 1007: }
1008: if ((buf == NULL) && (ctxt->directory != NULL)) {
1009: #ifdef WIN32
1010: sprintf(name, "%s\\%s", ctxt->directory, filename);
1011: #else
1012: sprintf(name, "%s/%s", ctxt->directory, filename);
1013: #endif
1014: buf = xmlParserInputBufferCreateFilename(name,
1015: XML_CHAR_ENCODING_NONE);
1016: if (buf != NULL)
1.142 daniel 1017: directory = xmlParserGetDirectory(name);
1.106 daniel 1018: }
1019: if (buf == NULL)
1.94 daniel 1020: return(NULL);
1021: }
1022: if (directory == NULL)
1023: directory = xmlParserGetDirectory(filename);
1.76 daniel 1024:
1.96 daniel 1025: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1026: if (inputStream == NULL) {
1.119 daniel 1027: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1028: return(NULL);
1029: }
1030:
1.119 daniel 1031: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1032: inputStream->directory = directory;
1.91 daniel 1033: inputStream->buf = buf;
1.76 daniel 1034:
1.91 daniel 1035: inputStream->base = inputStream->buf->buffer->content;
1036: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1037: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1038: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1039: return(inputStream);
1040: }
1041:
1.77 daniel 1042: /************************************************************************
1043: * *
1.97 daniel 1044: * Commodity functions to handle parser contexts *
1045: * *
1046: ************************************************************************/
1047:
1048: /**
1049: * xmlInitParserCtxt:
1050: * @ctxt: an XML parser context
1051: *
1052: * Initialize a parser context
1053: */
1054:
1055: void
1056: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1057: {
1058: xmlSAXHandler *sax;
1059:
1.168 daniel 1060: xmlDefaultSAXHandlerInit();
1061:
1.119 daniel 1062: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1063: if (sax == NULL) {
1064: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1065: }
1066:
1067: /* Allocate the Input stack */
1.119 daniel 1068: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1069: ctxt->inputNr = 0;
1070: ctxt->inputMax = 5;
1071: ctxt->input = NULL;
1.165 daniel 1072:
1.97 daniel 1073: ctxt->version = NULL;
1074: ctxt->encoding = NULL;
1075: ctxt->standalone = -1;
1.98 daniel 1076: ctxt->hasExternalSubset = 0;
1077: ctxt->hasPErefs = 0;
1.97 daniel 1078: ctxt->html = 0;
1.98 daniel 1079: ctxt->external = 0;
1.140 daniel 1080: ctxt->instate = XML_PARSER_START;
1.97 daniel 1081: ctxt->token = 0;
1.106 daniel 1082: ctxt->directory = NULL;
1.97 daniel 1083:
1084: /* Allocate the Node stack */
1.119 daniel 1085: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1086: ctxt->nodeNr = 0;
1087: ctxt->nodeMax = 10;
1088: ctxt->node = NULL;
1089:
1.140 daniel 1090: /* Allocate the Name stack */
1091: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1092: ctxt->nameNr = 0;
1093: ctxt->nameMax = 10;
1094: ctxt->name = NULL;
1095:
1.160 daniel 1096: if (sax == NULL) {
1097: ctxt->sax = &xmlDefaultSAXHandler;
1098: } else {
1.97 daniel 1099: ctxt->sax = sax;
1100: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1101: }
1102: ctxt->userData = ctxt;
1103: ctxt->myDoc = NULL;
1104: ctxt->wellFormed = 1;
1.99 daniel 1105: ctxt->valid = 1;
1.100 daniel 1106: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1107: ctxt->vctxt.userData = ctxt;
1.149 daniel 1108: if (ctxt->validate) {
1109: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1110: if (xmlGetWarningsDefaultValue == 0)
1111: ctxt->vctxt.warning = NULL;
1112: else
1113: ctxt->vctxt.warning = xmlParserValidityWarning;
1.149 daniel 1114: } else {
1115: ctxt->vctxt.error = NULL;
1116: ctxt->vctxt.warning = NULL;
1117: }
1.97 daniel 1118: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1119: ctxt->record_info = 0;
1.135 daniel 1120: ctxt->nbChars = 0;
1.140 daniel 1121: ctxt->checkIndex = 0;
1122: ctxt->errNo = XML_ERR_OK;
1.97 daniel 1123: xmlInitNodeInfoSeq(&ctxt->node_seq);
1124: }
1125:
1126: /**
1127: * xmlFreeParserCtxt:
1128: * @ctxt: an XML parser context
1129: *
1130: * Free all the memory used by a parser context. However the parsed
1131: * document in ctxt->myDoc is not freed.
1132: */
1133:
1134: void
1135: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1136: {
1137: xmlParserInputPtr input;
1.140 daniel 1138: xmlChar *oldname;
1.97 daniel 1139:
1140: if (ctxt == NULL) return;
1141:
1142: while ((input = inputPop(ctxt)) != NULL) {
1143: xmlFreeInputStream(input);
1144: }
1.140 daniel 1145: while ((oldname = namePop(ctxt)) != NULL) {
1146: xmlFree(oldname);
1147: }
1148: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1149: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1150: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1151: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1152: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1153: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1154: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1155: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.97 daniel 1156: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1157: xmlFree(ctxt->sax);
1158: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1159: xmlFree(ctxt);
1.97 daniel 1160: }
1161:
1162: /**
1163: * xmlNewParserCtxt:
1164: *
1165: * Allocate and initialize a new parser context.
1166: *
1167: * Returns the xmlParserCtxtPtr or NULL
1168: */
1169:
1170: xmlParserCtxtPtr
1171: xmlNewParserCtxt()
1172: {
1173: xmlParserCtxtPtr ctxt;
1174:
1.119 daniel 1175: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1176: if (ctxt == NULL) {
1177: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1178: perror("malloc");
1179: return(NULL);
1180: }
1.165 daniel 1181: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1182: xmlInitParserCtxt(ctxt);
1183: return(ctxt);
1184: }
1185:
1186: /**
1187: * xmlClearParserCtxt:
1188: * @ctxt: an XML parser context
1189: *
1190: * Clear (release owned resources) and reinitialize a parser context
1191: */
1192:
1193: void
1194: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1195: {
1196: xmlClearNodeInfoSeq(&ctxt->node_seq);
1197: xmlInitParserCtxt(ctxt);
1198: }
1199:
1200: /************************************************************************
1201: * *
1.77 daniel 1202: * Commodity functions to handle entities *
1203: * *
1204: ************************************************************************/
1205:
1.174 ! daniel 1206: /**
! 1207: * xmlCheckEntity:
! 1208: * @ctxt: an XML parser context
! 1209: * @content: the entity content string
! 1210: *
! 1211: * Parse an entity content and checks the WF constraints
! 1212: *
! 1213: */
! 1214:
! 1215: void
! 1216: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
! 1217: }
1.97 daniel 1218:
1219: /**
1220: * xmlParseCharRef:
1221: * @ctxt: an XML parser context
1222: *
1223: * parse Reference declarations
1224: *
1225: * [66] CharRef ::= '&#' [0-9]+ ';' |
1226: * '&#x' [0-9a-fA-F]+ ';'
1227: *
1.98 daniel 1228: * [ WFC: Legal Character ]
1229: * Characters referred to using character references must match the
1230: * production for Char.
1231: *
1.135 daniel 1232: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1233: */
1.97 daniel 1234: int
1235: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1236: int val = 0;
1237:
1.111 daniel 1238: if (ctxt->token != 0) {
1239: val = ctxt->token;
1240: ctxt->token = 0;
1241: return(val);
1242: }
1.152 daniel 1243: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1244: (NXT(2) == 'x')) {
1245: SKIP(3);
1.152 daniel 1246: while (RAW != ';') {
1247: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1248: val = val * 16 + (CUR - '0');
1.152 daniel 1249: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1250: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1251: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1252: val = val * 16 + (CUR - 'A') + 10;
1253: else {
1.123 daniel 1254: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1256: ctxt->sax->error(ctxt->userData,
1257: "xmlParseCharRef: invalid hexadecimal value\n");
1258: ctxt->wellFormed = 0;
1259: val = 0;
1260: break;
1261: }
1262: NEXT;
1263: }
1.164 daniel 1264: if (RAW == ';') {
1265: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1266: ctxt->nbChars ++;
1267: ctxt->input->cur++;
1268: }
1.152 daniel 1269: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1270: SKIP(2);
1.152 daniel 1271: while (RAW != ';') {
1272: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1273: val = val * 10 + (CUR - '0');
1274: else {
1.123 daniel 1275: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1277: ctxt->sax->error(ctxt->userData,
1278: "xmlParseCharRef: invalid decimal value\n");
1279: ctxt->wellFormed = 0;
1280: val = 0;
1281: break;
1282: }
1283: NEXT;
1284: }
1.164 daniel 1285: if (RAW == ';') {
1286: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1287: ctxt->nbChars ++;
1288: ctxt->input->cur++;
1289: }
1.97 daniel 1290: } else {
1.123 daniel 1291: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1293: ctxt->sax->error(ctxt->userData,
1294: "xmlParseCharRef: invalid value\n");
1.97 daniel 1295: ctxt->wellFormed = 0;
1296: }
1.98 daniel 1297:
1.97 daniel 1298: /*
1.98 daniel 1299: * [ WFC: Legal Character ]
1300: * Characters referred to using character references must match the
1301: * production for Char.
1.97 daniel 1302: */
1303: if (IS_CHAR(val)) {
1304: return(val);
1305: } else {
1.123 daniel 1306: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1307: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1308: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1309: val);
1310: ctxt->wellFormed = 0;
1311: }
1312: return(0);
1.77 daniel 1313: }
1314:
1.96 daniel 1315: /**
1.135 daniel 1316: * xmlParseStringCharRef:
1317: * @ctxt: an XML parser context
1318: * @str: a pointer to an index in the string
1319: *
1320: * parse Reference declarations, variant parsing from a string rather
1321: * than an an input flow.
1322: *
1323: * [66] CharRef ::= '&#' [0-9]+ ';' |
1324: * '&#x' [0-9a-fA-F]+ ';'
1325: *
1326: * [ WFC: Legal Character ]
1327: * Characters referred to using character references must match the
1328: * production for Char.
1329: *
1330: * Returns the value parsed (as an int), 0 in case of error, str will be
1331: * updated to the current value of the index
1332: */
1333: int
1334: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1335: const xmlChar *ptr;
1336: xmlChar cur;
1337: int val = 0;
1338:
1339: if ((str == NULL) || (*str == NULL)) return(0);
1340: ptr = *str;
1341: cur = *ptr;
1.137 daniel 1342: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1343: ptr += 3;
1344: cur = *ptr;
1345: while (cur != ';') {
1346: if ((cur >= '0') && (cur <= '9'))
1347: val = val * 16 + (cur - '0');
1348: else if ((cur >= 'a') && (cur <= 'f'))
1349: val = val * 16 + (cur - 'a') + 10;
1350: else if ((cur >= 'A') && (cur <= 'F'))
1351: val = val * 16 + (cur - 'A') + 10;
1352: else {
1353: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1354: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1355: ctxt->sax->error(ctxt->userData,
1356: "xmlParseCharRef: invalid hexadecimal value\n");
1357: ctxt->wellFormed = 0;
1358: val = 0;
1359: break;
1360: }
1361: ptr++;
1362: cur = *ptr;
1363: }
1364: if (cur == ';')
1365: ptr++;
1.145 daniel 1366: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1367: ptr += 2;
1368: cur = *ptr;
1369: while (cur != ';') {
1370: if ((cur >= '0') && (cur <= '9'))
1371: val = val * 10 + (cur - '0');
1372: else {
1373: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1374: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1375: ctxt->sax->error(ctxt->userData,
1376: "xmlParseCharRef: invalid decimal value\n");
1377: ctxt->wellFormed = 0;
1378: val = 0;
1379: break;
1380: }
1381: ptr++;
1382: cur = *ptr;
1383: }
1384: if (cur == ';')
1385: ptr++;
1386: } else {
1387: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1389: ctxt->sax->error(ctxt->userData,
1390: "xmlParseCharRef: invalid value\n");
1391: ctxt->wellFormed = 0;
1392: return(0);
1393: }
1394: *str = ptr;
1395:
1396: /*
1397: * [ WFC: Legal Character ]
1398: * Characters referred to using character references must match the
1399: * production for Char.
1400: */
1401: if (IS_CHAR(val)) {
1402: return(val);
1403: } else {
1404: ctxt->errNo = XML_ERR_INVALID_CHAR;
1405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1406: ctxt->sax->error(ctxt->userData,
1407: "CharRef: invalid xmlChar value %d\n", val);
1408: ctxt->wellFormed = 0;
1409: }
1410: return(0);
1411: }
1412:
1413: /**
1.96 daniel 1414: * xmlParserHandleReference:
1415: * @ctxt: the parser context
1416: *
1.97 daniel 1417: * [67] Reference ::= EntityRef | CharRef
1418: *
1.96 daniel 1419: * [68] EntityRef ::= '&' Name ';'
1420: *
1.98 daniel 1421: * [ WFC: Entity Declared ]
1422: * the Name given in the entity reference must match that in an entity
1423: * declaration, except that well-formed documents need not declare any
1424: * of the following entities: amp, lt, gt, apos, quot.
1425: *
1426: * [ WFC: Parsed Entity ]
1427: * An entity reference must not contain the name of an unparsed entity
1428: *
1.97 daniel 1429: * [66] CharRef ::= '&#' [0-9]+ ';' |
1430: * '&#x' [0-9a-fA-F]+ ';'
1431: *
1.96 daniel 1432: * A PEReference may have been detectect in the current input stream
1433: * the handling is done accordingly to
1434: * http://www.w3.org/TR/REC-xml#entproc
1435: */
1436: void
1437: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1438: xmlParserInputPtr input;
1.123 daniel 1439: xmlChar *name;
1.97 daniel 1440: xmlEntityPtr ent = NULL;
1441:
1.126 daniel 1442: if (ctxt->token != 0) {
1443: return;
1444: }
1.152 daniel 1445: if (RAW != '&') return;
1.97 daniel 1446: GROW;
1.152 daniel 1447: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1448: switch(ctxt->instate) {
1.140 daniel 1449: case XML_PARSER_ENTITY_DECL:
1450: case XML_PARSER_PI:
1.109 daniel 1451: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1452: case XML_PARSER_COMMENT:
1.168 daniel 1453: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1454: /* we just ignore it there */
1455: return;
1456: case XML_PARSER_START_TAG:
1.109 daniel 1457: return;
1.140 daniel 1458: case XML_PARSER_END_TAG:
1.97 daniel 1459: return;
1460: case XML_PARSER_EOF:
1.123 daniel 1461: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1462: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1463: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1464: ctxt->wellFormed = 0;
1465: return;
1466: case XML_PARSER_PROLOG:
1.140 daniel 1467: case XML_PARSER_START:
1468: case XML_PARSER_MISC:
1.123 daniel 1469: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1470: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1471: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1472: ctxt->wellFormed = 0;
1473: return;
1474: case XML_PARSER_EPILOG:
1.123 daniel 1475: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1476: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1477: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1478: ctxt->wellFormed = 0;
1479: return;
1480: case XML_PARSER_DTD:
1.123 daniel 1481: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1482: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1483: ctxt->sax->error(ctxt->userData,
1484: "CharRef are forbiden in DTDs!\n");
1485: ctxt->wellFormed = 0;
1486: return;
1487: case XML_PARSER_ENTITY_VALUE:
1488: /*
1489: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1490: * substitution here since we need the literal
1.97 daniel 1491: * entity value to be able to save the internal
1492: * subset of the document.
1493: * This will be handled by xmlDecodeEntities
1494: */
1495: return;
1496: case XML_PARSER_CONTENT:
1497: case XML_PARSER_ATTRIBUTE_VALUE:
1498: ctxt->token = xmlParseCharRef(ctxt);
1499: return;
1500: }
1501: return;
1502: }
1503:
1504: switch(ctxt->instate) {
1.109 daniel 1505: case XML_PARSER_CDATA_SECTION:
1506: return;
1.140 daniel 1507: case XML_PARSER_PI:
1.97 daniel 1508: case XML_PARSER_COMMENT:
1.168 daniel 1509: case XML_PARSER_SYSTEM_LITERAL:
1510: case XML_PARSER_CONTENT:
1.97 daniel 1511: return;
1.140 daniel 1512: case XML_PARSER_START_TAG:
1513: return;
1514: case XML_PARSER_END_TAG:
1515: return;
1.97 daniel 1516: case XML_PARSER_EOF:
1.123 daniel 1517: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1518: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1519: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1520: ctxt->wellFormed = 0;
1521: return;
1522: case XML_PARSER_PROLOG:
1.140 daniel 1523: case XML_PARSER_START:
1524: case XML_PARSER_MISC:
1.123 daniel 1525: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1526: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1527: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1528: ctxt->wellFormed = 0;
1529: return;
1530: case XML_PARSER_EPILOG:
1.123 daniel 1531: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1532: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1533: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1534: ctxt->wellFormed = 0;
1535: return;
1536: case XML_PARSER_ENTITY_VALUE:
1537: /*
1538: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1539: * substitution here since we need the literal
1.97 daniel 1540: * entity value to be able to save the internal
1541: * subset of the document.
1542: * This will be handled by xmlDecodeEntities
1543: */
1544: return;
1545: case XML_PARSER_ATTRIBUTE_VALUE:
1546: /*
1547: * NOTE: in the case of attributes values, we don't do the
1548: * substitution here unless we are in a mode where
1549: * the parser is explicitely asked to substitute
1550: * entities. The SAX callback is called with values
1551: * without entity substitution.
1552: * This will then be handled by xmlDecodeEntities
1553: */
1.113 daniel 1554: return;
1.97 daniel 1555: case XML_PARSER_ENTITY_DECL:
1556: /*
1557: * we just ignore it there
1558: * the substitution will be done once the entity is referenced
1559: */
1560: return;
1561: case XML_PARSER_DTD:
1.123 daniel 1562: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1563: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1564: ctxt->sax->error(ctxt->userData,
1565: "Entity references are forbiden in DTDs!\n");
1566: ctxt->wellFormed = 0;
1567: return;
1568: }
1569:
1570: NEXT;
1571: name = xmlScanName(ctxt);
1572: if (name == NULL) {
1.123 daniel 1573: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1575: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1576: ctxt->wellFormed = 0;
1577: ctxt->token = '&';
1578: return;
1579: }
1580: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1581: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1582: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1583: ctxt->sax->error(ctxt->userData,
1584: "Entity reference: ';' expected\n");
1585: ctxt->wellFormed = 0;
1586: ctxt->token = '&';
1.119 daniel 1587: xmlFree(name);
1.97 daniel 1588: return;
1589: }
1590: SKIP(xmlStrlen(name) + 1);
1591: if (ctxt->sax != NULL) {
1592: if (ctxt->sax->getEntity != NULL)
1593: ent = ctxt->sax->getEntity(ctxt->userData, name);
1594: }
1.98 daniel 1595:
1596: /*
1597: * [ WFC: Entity Declared ]
1598: * the Name given in the entity reference must match that in an entity
1599: * declaration, except that well-formed documents need not declare any
1600: * of the following entities: amp, lt, gt, apos, quot.
1601: */
1.97 daniel 1602: if (ent == NULL)
1603: ent = xmlGetPredefinedEntity(name);
1604: if (ent == NULL) {
1.123 daniel 1605: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1606: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1607: ctxt->sax->error(ctxt->userData,
1.98 daniel 1608: "Entity reference: entity %s not declared\n",
1609: name);
1.97 daniel 1610: ctxt->wellFormed = 0;
1.119 daniel 1611: xmlFree(name);
1.97 daniel 1612: return;
1613: }
1.98 daniel 1614:
1615: /*
1616: * [ WFC: Parsed Entity ]
1617: * An entity reference must not contain the name of an unparsed entity
1618: */
1.159 daniel 1619: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1620: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1621: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1622: ctxt->sax->error(ctxt->userData,
1623: "Entity reference to unparsed entity %s\n", name);
1624: ctxt->wellFormed = 0;
1625: }
1626:
1.159 daniel 1627: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1628: ctxt->token = ent->content[0];
1.119 daniel 1629: xmlFree(name);
1.97 daniel 1630: return;
1631: }
1632: input = xmlNewEntityInputStream(ctxt, ent);
1633: xmlPushInput(ctxt, input);
1.119 daniel 1634: xmlFree(name);
1.96 daniel 1635: return;
1636: }
1637:
1638: /**
1639: * xmlParserHandlePEReference:
1640: * @ctxt: the parser context
1641: *
1642: * [69] PEReference ::= '%' Name ';'
1643: *
1.98 daniel 1644: * [ WFC: No Recursion ]
1645: * TODO A parsed entity must not contain a recursive
1646: * reference to itself, either directly or indirectly.
1647: *
1648: * [ WFC: Entity Declared ]
1649: * In a document without any DTD, a document with only an internal DTD
1650: * subset which contains no parameter entity references, or a document
1651: * with "standalone='yes'", ... ... The declaration of a parameter
1652: * entity must precede any reference to it...
1653: *
1654: * [ VC: Entity Declared ]
1655: * In a document with an external subset or external parameter entities
1656: * with "standalone='no'", ... ... The declaration of a parameter entity
1657: * must precede any reference to it...
1658: *
1659: * [ WFC: In DTD ]
1660: * Parameter-entity references may only appear in the DTD.
1661: * NOTE: misleading but this is handled.
1662: *
1663: * A PEReference may have been detected in the current input stream
1.96 daniel 1664: * the handling is done accordingly to
1665: * http://www.w3.org/TR/REC-xml#entproc
1666: * i.e.
1667: * - Included in literal in entity values
1668: * - Included as Paraemeter Entity reference within DTDs
1669: */
1670: void
1671: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1672: xmlChar *name;
1.96 daniel 1673: xmlEntityPtr entity = NULL;
1674: xmlParserInputPtr input;
1675:
1.126 daniel 1676: if (ctxt->token != 0) {
1677: return;
1678: }
1.152 daniel 1679: if (RAW != '%') return;
1.96 daniel 1680: switch(ctxt->instate) {
1.109 daniel 1681: case XML_PARSER_CDATA_SECTION:
1682: return;
1.97 daniel 1683: case XML_PARSER_COMMENT:
1684: return;
1.140 daniel 1685: case XML_PARSER_START_TAG:
1686: return;
1687: case XML_PARSER_END_TAG:
1688: return;
1.96 daniel 1689: case XML_PARSER_EOF:
1.123 daniel 1690: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1691: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1692: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1693: ctxt->wellFormed = 0;
1694: return;
1695: case XML_PARSER_PROLOG:
1.140 daniel 1696: case XML_PARSER_START:
1697: case XML_PARSER_MISC:
1.123 daniel 1698: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1699: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1700: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1701: ctxt->wellFormed = 0;
1702: return;
1.97 daniel 1703: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1704: case XML_PARSER_CONTENT:
1705: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1706: case XML_PARSER_PI:
1.168 daniel 1707: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1708: /* we just ignore it there */
1709: return;
1710: case XML_PARSER_EPILOG:
1.123 daniel 1711: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1712: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1713: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1714: ctxt->wellFormed = 0;
1715: return;
1.97 daniel 1716: case XML_PARSER_ENTITY_VALUE:
1717: /*
1718: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1719: * substitution here since we need the literal
1.97 daniel 1720: * entity value to be able to save the internal
1721: * subset of the document.
1722: * This will be handled by xmlDecodeEntities
1723: */
1724: return;
1.96 daniel 1725: case XML_PARSER_DTD:
1.98 daniel 1726: /*
1727: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1728: * In the internal DTD subset, parameter-entity references
1729: * can occur only where markup declarations can occur, not
1730: * within markup declarations.
1731: * In that case this is handled in xmlParseMarkupDecl
1732: */
1733: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1734: return;
1.96 daniel 1735: }
1736:
1737: NEXT;
1738: name = xmlParseName(ctxt);
1739: if (name == NULL) {
1.123 daniel 1740: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1741: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1742: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1743: ctxt->wellFormed = 0;
1744: } else {
1.152 daniel 1745: if (RAW == ';') {
1.96 daniel 1746: NEXT;
1.98 daniel 1747: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1748: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1749: if (entity == NULL) {
1.98 daniel 1750:
1751: /*
1752: * [ WFC: Entity Declared ]
1753: * In a document without any DTD, a document with only an
1754: * internal DTD subset which contains no parameter entity
1755: * references, or a document with "standalone='yes'", ...
1756: * ... The declaration of a parameter entity must precede
1757: * any reference to it...
1758: */
1759: if ((ctxt->standalone == 1) ||
1760: ((ctxt->hasExternalSubset == 0) &&
1761: (ctxt->hasPErefs == 0))) {
1762: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1763: ctxt->sax->error(ctxt->userData,
1764: "PEReference: %%%s; not found\n", name);
1765: ctxt->wellFormed = 0;
1766: } else {
1767: /*
1768: * [ VC: Entity Declared ]
1769: * In a document with an external subset or external
1770: * parameter entities with "standalone='no'", ...
1771: * ... The declaration of a parameter entity must precede
1772: * any reference to it...
1773: */
1774: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1775: ctxt->sax->warning(ctxt->userData,
1776: "PEReference: %%%s; not found\n", name);
1777: ctxt->valid = 0;
1778: }
1.96 daniel 1779: } else {
1.159 daniel 1780: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1781: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1782: /*
1.156 daniel 1783: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1784: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1785: */
1786: input = xmlNewEntityInputStream(ctxt, entity);
1787: xmlPushInput(ctxt, input);
1.164 daniel 1788: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1789: (RAW == '<') && (NXT(1) == '?') &&
1790: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1791: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1792: xmlParseTextDecl(ctxt);
1.164 daniel 1793: }
1794: if (ctxt->token == 0)
1795: ctxt->token = ' ';
1.96 daniel 1796: } else {
1797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1798: ctxt->sax->error(ctxt->userData,
1799: "xmlHandlePEReference: %s is not a parameter entity\n",
1800: name);
1801: ctxt->wellFormed = 0;
1802: }
1803: }
1804: } else {
1.123 daniel 1805: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1806: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1807: ctxt->sax->error(ctxt->userData,
1808: "xmlHandlePEReference: expecting ';'\n");
1809: ctxt->wellFormed = 0;
1810: }
1.119 daniel 1811: xmlFree(name);
1.97 daniel 1812: }
1813: }
1814:
1815: /*
1816: * Macro used to grow the current buffer.
1817: */
1818: #define growBuffer(buffer) { \
1819: buffer##_size *= 2; \
1.145 daniel 1820: buffer = (xmlChar *) \
1821: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1822: if (buffer == NULL) { \
1823: perror("realloc failed"); \
1.145 daniel 1824: return(NULL); \
1.97 daniel 1825: } \
1.96 daniel 1826: }
1.77 daniel 1827:
1828: /**
1829: * xmlDecodeEntities:
1830: * @ctxt: the parser context
1831: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1832: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1833: * @end: an end marker xmlChar, 0 if none
1834: * @end2: an end marker xmlChar, 0 if none
1835: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1836: *
1837: * [67] Reference ::= EntityRef | CharRef
1838: *
1839: * [69] PEReference ::= '%' Name ';'
1840: *
1841: * Returns A newly allocated string with the substitution done. The caller
1842: * must deallocate it !
1843: */
1.123 daniel 1844: xmlChar *
1.77 daniel 1845: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1846: xmlChar end, xmlChar end2, xmlChar end3) {
1847: xmlChar *buffer = NULL;
1.78 daniel 1848: int buffer_size = 0;
1.161 daniel 1849: int nbchars = 0;
1.78 daniel 1850:
1.123 daniel 1851: xmlChar *current = NULL;
1.77 daniel 1852: xmlEntityPtr ent;
1853: unsigned int max = (unsigned int) len;
1.161 daniel 1854: int c,l;
1.77 daniel 1855:
1856: /*
1857: * allocate a translation buffer.
1858: */
1.140 daniel 1859: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 1860: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1861: if (buffer == NULL) {
1862: perror("xmlDecodeEntities: malloc failed");
1863: return(NULL);
1864: }
1865:
1.78 daniel 1866: /*
1867: * Ok loop until we reach one of the ending char or a size limit.
1868: */
1.161 daniel 1869: c = CUR_CHAR(l);
1870: while ((nbchars < max) && (c != end) &&
1871: (c != end2) && (c != end3)) {
1.77 daniel 1872:
1.161 daniel 1873: if (c == 0) break;
1874: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 1875: int val = xmlParseCharRef(ctxt);
1.161 daniel 1876: COPY_BUF(0,buffer,nbchars,val);
1877: NEXTL(l);
1878: } else if ((c == '&') && (ctxt->token != '&') &&
1879: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 1880: ent = xmlParseEntityRef(ctxt);
1881: if ((ent != NULL) &&
1882: (ctxt->replaceEntities != 0)) {
1883: current = ent->content;
1884: while (*current != 0) {
1.161 daniel 1885: buffer[nbchars++] = *current++;
1886: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1887: growBuffer(buffer);
1.77 daniel 1888: }
1889: }
1.98 daniel 1890: } else if (ent != NULL) {
1.123 daniel 1891: const xmlChar *cur = ent->name;
1.98 daniel 1892:
1.161 daniel 1893: buffer[nbchars++] = '&';
1894: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1895: growBuffer(buffer);
1896: }
1.161 daniel 1897: while (*cur != 0) {
1898: buffer[nbchars++] = *cur++;
1899: }
1900: buffer[nbchars++] = ';';
1.77 daniel 1901: }
1.161 daniel 1902: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 1903: /*
1.77 daniel 1904: * a PEReference induce to switch the entity flow,
1905: * we break here to flush the current set of chars
1906: * parsed if any. We will be called back later.
1.97 daniel 1907: */
1.91 daniel 1908: if (nbchars != 0) break;
1.77 daniel 1909:
1910: xmlParsePEReference(ctxt);
1.79 daniel 1911:
1.97 daniel 1912: /*
1.79 daniel 1913: * Pop-up of finished entities.
1.97 daniel 1914: */
1.152 daniel 1915: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 1916: xmlPopInput(ctxt);
1917:
1.98 daniel 1918: break;
1.77 daniel 1919: } else {
1.161 daniel 1920: COPY_BUF(l,buffer,nbchars,c);
1921: NEXTL(l);
1922: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 1923: growBuffer(buffer);
1924: }
1.77 daniel 1925: }
1.161 daniel 1926: c = CUR_CHAR(l);
1.77 daniel 1927: }
1.161 daniel 1928: buffer[nbchars++] = 0;
1.77 daniel 1929: return(buffer);
1930: }
1931:
1.135 daniel 1932: /**
1933: * xmlStringDecodeEntities:
1934: * @ctxt: the parser context
1935: * @str: the input string
1936: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1937: * @end: an end marker xmlChar, 0 if none
1938: * @end2: an end marker xmlChar, 0 if none
1939: * @end3: an end marker xmlChar, 0 if none
1940: *
1941: * [67] Reference ::= EntityRef | CharRef
1942: *
1943: * [69] PEReference ::= '%' Name ';'
1944: *
1945: * Returns A newly allocated string with the substitution done. The caller
1946: * must deallocate it !
1947: */
1948: xmlChar *
1949: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1950: xmlChar end, xmlChar end2, xmlChar end3) {
1951: xmlChar *buffer = NULL;
1952: int buffer_size = 0;
1953: xmlChar *out = NULL;
1954:
1955: xmlChar *current = NULL;
1956: xmlEntityPtr ent;
1957: xmlChar cur;
1958:
1959: /*
1960: * allocate a translation buffer.
1961: */
1.140 daniel 1962: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 1963: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1964: if (buffer == NULL) {
1965: perror("xmlDecodeEntities: malloc failed");
1966: return(NULL);
1967: }
1968: out = buffer;
1969:
1970: /*
1971: * Ok loop until we reach one of the ending char or a size limit.
1972: */
1973: cur = *str;
1974: while ((cur != 0) && (cur != end) &&
1975: (cur != end2) && (cur != end3)) {
1976:
1977: if (cur == 0) break;
1978: if ((cur == '&') && (str[1] == '#')) {
1979: int val = xmlParseStringCharRef(ctxt, &str);
1980: if (val != 0)
1981: *out++ = val;
1982: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1983: ent = xmlParseStringEntityRef(ctxt, &str);
1984: if ((ent != NULL) &&
1985: (ctxt->replaceEntities != 0)) {
1986: current = ent->content;
1987: while (*current != 0) {
1988: *out++ = *current++;
1.140 daniel 1989: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1990: int index = out - buffer;
1991:
1992: growBuffer(buffer);
1993: out = &buffer[index];
1994: }
1995: }
1996: } else if (ent != NULL) {
1997: int i = xmlStrlen(ent->name);
1998: const xmlChar *cur = ent->name;
1999:
2000: *out++ = '&';
1.140 daniel 2001: if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2002: int index = out - buffer;
2003:
2004: growBuffer(buffer);
2005: out = &buffer[index];
2006: }
2007: for (;i > 0;i--)
2008: *out++ = *cur++;
2009: *out++ = ';';
2010: }
2011: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2012: ent = xmlParseStringPEReference(ctxt, &str);
2013: if (ent != NULL) {
2014: current = ent->content;
2015: while (*current != 0) {
2016: *out++ = *current++;
1.140 daniel 2017: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2018: int index = out - buffer;
2019:
2020: growBuffer(buffer);
2021: out = &buffer[index];
2022: }
2023: }
2024: }
2025: } else {
1.156 daniel 2026: /* invalid for UTF-8 , use COPY(out); !!! */
1.135 daniel 2027: *out++ = cur;
1.140 daniel 2028: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2029: int index = out - buffer;
2030:
2031: growBuffer(buffer);
2032: out = &buffer[index];
2033: }
2034: str++;
2035: }
2036: cur = *str;
2037: }
2038: *out = 0;
2039: return(buffer);
2040: }
2041:
1.1 veillard 2042:
1.28 daniel 2043: /************************************************************************
2044: * *
1.75 daniel 2045: * Commodity functions to handle encodings *
2046: * *
2047: ************************************************************************/
2048:
1.172 daniel 2049: /*
2050: * xmlCheckLanguageID
2051: * @lang: pointer to the string value
2052: *
2053: * Checks that the value conforms to the LanguageID production:
2054: *
2055: * [33] LanguageID ::= Langcode ('-' Subcode)*
2056: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2057: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2058: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2059: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2060: * [38] Subcode ::= ([a-z] | [A-Z])+
2061: *
2062: * Returns 1 if correct 0 otherwise
2063: **/
2064: int
2065: xmlCheckLanguageID(const xmlChar *lang) {
2066: const xmlChar *cur = lang;
2067:
2068: if (cur == NULL)
2069: return(0);
2070: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2071: ((cur[0] == 'I') && (cur[1] == '-'))) {
2072: /*
2073: * IANA code
2074: */
2075: cur += 2;
2076: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2077: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2078: cur++;
2079: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2080: ((cur[0] == 'X') && (cur[1] == '-'))) {
2081: /*
2082: * User code
2083: */
2084: cur += 2;
2085: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2086: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2087: cur++;
2088: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2089: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2090: /*
2091: * ISO639
2092: */
2093: cur++;
2094: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2095: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2096: cur++;
2097: else
2098: return(0);
2099: } else
2100: return(0);
2101: while (cur[0] != 0) {
2102: if (cur[0] != '-')
2103: return(0);
2104: cur++;
2105: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2106: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2107: cur++;
2108: else
2109: return(0);
2110: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2111: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2112: cur++;
2113: }
2114: return(1);
2115: }
2116:
1.75 daniel 2117: /**
2118: * xmlSwitchEncoding:
2119: * @ctxt: the parser context
1.124 daniel 2120: * @enc: the encoding value (number)
1.75 daniel 2121: *
2122: * change the input functions when discovering the character encoding
2123: * of a given entity.
2124: */
2125: void
2126: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2127: {
1.156 daniel 2128: xmlCharEncodingHandlerPtr handler;
2129:
2130: handler = xmlGetCharEncodingHandler(enc);
2131: if (handler != NULL) {
2132: if (ctxt->input != NULL) {
2133: if (ctxt->input->buf != NULL) {
2134: if (ctxt->input->buf->encoder != NULL) {
2135: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2136: ctxt->sax->error(ctxt->userData,
2137: "xmlSwitchEncoding : encoder already regitered\n");
2138: return;
2139: }
2140: ctxt->input->buf->encoder = handler;
2141:
2142: /*
2143: * Is there already some content down the pipe to convert
2144: */
2145: if ((ctxt->input->buf->buffer != NULL) &&
2146: (ctxt->input->buf->buffer->use > 0)) {
2147: xmlChar *buf;
2148: int res, len, size;
2149: int processed;
2150:
2151: /*
2152: * Specific handling of the Byte Order Mark for
2153: * UTF-16
2154: */
2155: if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
2156: (ctxt->input->cur[0] == 0xFF) &&
2157: (ctxt->input->cur[1] == 0xFE)) {
2158: SKIP(2);
2159: }
2160: if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
2161: (ctxt->input->cur[0] == 0xFE) &&
2162: (ctxt->input->cur[1] == 0xFF)) {
2163: SKIP(2);
2164: }
2165:
2166: /*
2167: * convert the non processed part
2168: */
2169: processed = ctxt->input->cur - ctxt->input->base;
2170: len = ctxt->input->buf->buffer->use - processed;
2171:
2172: if (len <= 0) {
2173: return;
2174: }
2175: size = ctxt->input->buf->buffer->use * 4;
2176: if (size < 4000)
2177: size = 4000;
1.167 daniel 2178: retry_larger:
1.160 daniel 2179: buf = (xmlChar *) xmlMalloc(size + 1);
1.156 daniel 2180: if (buf == NULL) {
2181: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2182: ctxt->sax->error(ctxt->userData,
2183: "xmlSwitchEncoding : out of memory\n");
2184: return;
2185: }
1.160 daniel 2186: /* TODO !!! Handling of buf too small */
1.156 daniel 2187: res = handler->input(buf, size, ctxt->input->cur, &len);
1.167 daniel 2188: if (res == -1) {
2189: size *= 2;
2190: xmlFree(buf);
2191: goto retry_larger;
2192: }
1.156 daniel 2193: if ((res < 0) ||
2194: (len != ctxt->input->buf->buffer->use - processed)) {
2195: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2196: ctxt->sax->error(ctxt->userData,
2197: "xmlSwitchEncoding : conversion failed\n");
2198: xmlFree(buf);
2199: return;
2200: }
1.167 daniel 2201:
1.156 daniel 2202: /*
2203: * Conversion succeeded, get rid of the old buffer
2204: */
2205: xmlFree(ctxt->input->buf->buffer->content);
2206: ctxt->input->buf->buffer->content = buf;
2207: ctxt->input->base = buf;
2208: ctxt->input->cur = buf;
2209: ctxt->input->buf->buffer->size = size;
2210: ctxt->input->buf->buffer->use = res;
1.160 daniel 2211: buf[res] = 0;
1.156 daniel 2212: }
2213: return;
2214: } else {
2215: if (ctxt->input->length == 0) {
2216: /*
2217: * When parsing a static memory array one must know the
2218: * size to be able to convert the buffer.
2219: */
2220: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2221: ctxt->sax->error(ctxt->userData,
2222: "xmlSwitchEncoding : no input\n");
2223: return;
2224: } else {
2225: xmlChar *buf;
2226: int res, len;
2227: int processed = ctxt->input->cur - ctxt->input->base;
2228:
2229: /*
2230: * convert the non processed part
2231: */
2232: len = ctxt->input->length - processed;
2233: if (len <= 0) {
2234: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2235: ctxt->sax->error(ctxt->userData,
2236: "xmlSwitchEncoding : input fully consumed?\n");
2237: return;
2238: }
2239: buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
2240: if (buf == NULL) {
2241: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2242: ctxt->sax->error(ctxt->userData,
2243: "xmlSwitchEncoding : out of memory\n");
2244: return;
2245: }
2246: res = handler->input(buf, ctxt->input->length * 4,
2247: ctxt->input->cur, &len);
2248: if ((res < 0) ||
2249: (len != ctxt->input->length - processed)) {
2250: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2251: ctxt->sax->error(ctxt->userData,
2252: "xmlSwitchEncoding : conversion failed\n");
2253: xmlFree(buf);
2254: return;
2255: }
2256: /*
2257: * Conversion succeeded, get rid of the old buffer
2258: */
2259: if ((ctxt->input->free != NULL) &&
2260: (ctxt->input->base != NULL))
2261: ctxt->input->free((xmlChar *) ctxt->input->base);
2262: ctxt->input->base = ctxt->input->cur = buf;
2263: ctxt->input->length = res;
2264: }
2265: }
2266: } else {
2267: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2268: ctxt->sax->error(ctxt->userData,
2269: "xmlSwitchEncoding : no input\n");
2270: }
2271: }
2272:
1.75 daniel 2273: switch (enc) {
2274: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 2275: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 2276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2277: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2278: ctxt->wellFormed = 0;
2279: break;
2280: case XML_CHAR_ENCODING_NONE:
2281: /* let's assume it's UTF-8 without the XML decl */
2282: return;
2283: case XML_CHAR_ENCODING_UTF8:
2284: /* default encoding, no conversion should be needed */
2285: return;
2286: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 2287: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2289: ctxt->sax->error(ctxt->userData,
2290: "char encoding UTF16 little endian not supported\n");
2291: break;
2292: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 2293: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2294: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2295: ctxt->sax->error(ctxt->userData,
2296: "char encoding UTF16 big endian not supported\n");
2297: break;
2298: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 2299: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2301: ctxt->sax->error(ctxt->userData,
2302: "char encoding USC4 little endian not supported\n");
2303: break;
2304: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 2305: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2306: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2307: ctxt->sax->error(ctxt->userData,
2308: "char encoding USC4 big endian not supported\n");
2309: break;
2310: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 2311: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2312: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2313: ctxt->sax->error(ctxt->userData,
2314: "char encoding EBCDIC not supported\n");
2315: break;
2316: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 2317: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2318: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2319: ctxt->sax->error(ctxt->userData,
2320: "char encoding UCS4 2143 not supported\n");
2321: break;
2322: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 2323: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2324: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2325: ctxt->sax->error(ctxt->userData,
2326: "char encoding UCS4 3412 not supported\n");
2327: break;
2328: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 2329: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2330: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331: ctxt->sax->error(ctxt->userData,
2332: "char encoding UCS2 not supported\n");
2333: break;
2334: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 2335: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2336: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2337: ctxt->sax->error(ctxt->userData,
2338: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2339: break;
2340: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 2341: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2343: ctxt->sax->error(ctxt->userData,
2344: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2345: break;
2346: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 2347: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2348: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2349: ctxt->sax->error(ctxt->userData,
2350: "char encoding ISO_8859_3 not supported\n");
2351: break;
2352: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 2353: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2354: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2355: ctxt->sax->error(ctxt->userData,
2356: "char encoding ISO_8859_4 not supported\n");
2357: break;
2358: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 2359: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2360: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2361: ctxt->sax->error(ctxt->userData,
2362: "char encoding ISO_8859_5 not supported\n");
2363: break;
2364: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 2365: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2366: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2367: ctxt->sax->error(ctxt->userData,
2368: "char encoding ISO_8859_6 not supported\n");
2369: break;
2370: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 2371: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2372: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2373: ctxt->sax->error(ctxt->userData,
2374: "char encoding ISO_8859_7 not supported\n");
2375: break;
2376: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 2377: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2378: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2379: ctxt->sax->error(ctxt->userData,
2380: "char encoding ISO_8859_8 not supported\n");
2381: break;
2382: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 2383: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2384: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2385: ctxt->sax->error(ctxt->userData,
2386: "char encoding ISO_8859_9 not supported\n");
2387: break;
2388: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 2389: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2390: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2391: ctxt->sax->error(ctxt->userData,
2392: "char encoding ISO-2022-JPnot supported\n");
2393: break;
2394: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 2395: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2397: ctxt->sax->error(ctxt->userData,
2398: "char encoding Shift_JISnot supported\n");
2399: break;
2400: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 2401: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2403: ctxt->sax->error(ctxt->userData,
2404: "char encoding EUC-JPnot supported\n");
2405: break;
2406: }
2407: }
2408:
2409: /************************************************************************
2410: * *
1.123 daniel 2411: * Commodity functions to handle xmlChars *
1.28 daniel 2412: * *
2413: ************************************************************************/
2414:
1.50 daniel 2415: /**
2416: * xmlStrndup:
1.123 daniel 2417: * @cur: the input xmlChar *
1.50 daniel 2418: * @len: the len of @cur
2419: *
1.123 daniel 2420: * a strndup for array of xmlChar's
1.68 daniel 2421: *
1.123 daniel 2422: * Returns a new xmlChar * or NULL
1.1 veillard 2423: */
1.123 daniel 2424: xmlChar *
2425: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2426: xmlChar *ret;
2427:
2428: if ((cur == NULL) || (len < 0)) return(NULL);
2429: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2430: if (ret == NULL) {
1.86 daniel 2431: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2432: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2433: return(NULL);
2434: }
1.123 daniel 2435: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2436: ret[len] = 0;
2437: return(ret);
2438: }
2439:
1.50 daniel 2440: /**
2441: * xmlStrdup:
1.123 daniel 2442: * @cur: the input xmlChar *
1.50 daniel 2443: *
1.152 daniel 2444: * a strdup for array of xmlChar's. Since they are supposed to be
2445: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2446: * a termination mark of '0'.
1.68 daniel 2447: *
1.123 daniel 2448: * Returns a new xmlChar * or NULL
1.1 veillard 2449: */
1.123 daniel 2450: xmlChar *
2451: xmlStrdup(const xmlChar *cur) {
2452: const xmlChar *p = cur;
1.1 veillard 2453:
1.135 daniel 2454: if (cur == NULL) return(NULL);
1.152 daniel 2455: while (*p != 0) p++;
1.1 veillard 2456: return(xmlStrndup(cur, p - cur));
2457: }
2458:
1.50 daniel 2459: /**
2460: * xmlCharStrndup:
2461: * @cur: the input char *
2462: * @len: the len of @cur
2463: *
1.123 daniel 2464: * a strndup for char's to xmlChar's
1.68 daniel 2465: *
1.123 daniel 2466: * Returns a new xmlChar * or NULL
1.45 daniel 2467: */
2468:
1.123 daniel 2469: xmlChar *
1.55 daniel 2470: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2471: int i;
1.135 daniel 2472: xmlChar *ret;
2473:
2474: if ((cur == NULL) || (len < 0)) return(NULL);
2475: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2476: if (ret == NULL) {
1.86 daniel 2477: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2478: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2479: return(NULL);
2480: }
2481: for (i = 0;i < len;i++)
1.123 daniel 2482: ret[i] = (xmlChar) cur[i];
1.45 daniel 2483: ret[len] = 0;
2484: return(ret);
2485: }
2486:
1.50 daniel 2487: /**
2488: * xmlCharStrdup:
2489: * @cur: the input char *
2490: * @len: the len of @cur
2491: *
1.123 daniel 2492: * a strdup for char's to xmlChar's
1.68 daniel 2493: *
1.123 daniel 2494: * Returns a new xmlChar * or NULL
1.45 daniel 2495: */
2496:
1.123 daniel 2497: xmlChar *
1.55 daniel 2498: xmlCharStrdup(const char *cur) {
1.45 daniel 2499: const char *p = cur;
2500:
1.135 daniel 2501: if (cur == NULL) return(NULL);
1.45 daniel 2502: while (*p != '\0') p++;
2503: return(xmlCharStrndup(cur, p - cur));
2504: }
2505:
1.50 daniel 2506: /**
2507: * xmlStrcmp:
1.123 daniel 2508: * @str1: the first xmlChar *
2509: * @str2: the second xmlChar *
1.50 daniel 2510: *
1.123 daniel 2511: * a strcmp for xmlChar's
1.68 daniel 2512: *
2513: * Returns the integer result of the comparison
1.14 veillard 2514: */
2515:
1.55 daniel 2516: int
1.123 daniel 2517: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2518: register int tmp;
2519:
1.135 daniel 2520: if ((str1 == NULL) && (str2 == NULL)) return(0);
2521: if (str1 == NULL) return(-1);
2522: if (str2 == NULL) return(1);
1.14 veillard 2523: do {
2524: tmp = *str1++ - *str2++;
2525: if (tmp != 0) return(tmp);
2526: } while ((*str1 != 0) && (*str2 != 0));
2527: return (*str1 - *str2);
2528: }
2529:
1.50 daniel 2530: /**
2531: * xmlStrncmp:
1.123 daniel 2532: * @str1: the first xmlChar *
2533: * @str2: the second xmlChar *
1.50 daniel 2534: * @len: the max comparison length
2535: *
1.123 daniel 2536: * a strncmp for xmlChar's
1.68 daniel 2537: *
2538: * Returns the integer result of the comparison
1.14 veillard 2539: */
2540:
1.55 daniel 2541: int
1.123 daniel 2542: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2543: register int tmp;
2544:
2545: if (len <= 0) return(0);
1.135 daniel 2546: if ((str1 == NULL) && (str2 == NULL)) return(0);
2547: if (str1 == NULL) return(-1);
2548: if (str2 == NULL) return(1);
1.14 veillard 2549: do {
2550: tmp = *str1++ - *str2++;
2551: if (tmp != 0) return(tmp);
2552: len--;
2553: if (len <= 0) return(0);
2554: } while ((*str1 != 0) && (*str2 != 0));
2555: return (*str1 - *str2);
2556: }
2557:
1.50 daniel 2558: /**
2559: * xmlStrchr:
1.123 daniel 2560: * @str: the xmlChar * array
2561: * @val: the xmlChar to search
1.50 daniel 2562: *
1.123 daniel 2563: * a strchr for xmlChar's
1.68 daniel 2564: *
1.123 daniel 2565: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2566: */
2567:
1.123 daniel 2568: const xmlChar *
2569: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2570: if (str == NULL) return(NULL);
1.14 veillard 2571: while (*str != 0) {
1.123 daniel 2572: if (*str == val) return((xmlChar *) str);
1.14 veillard 2573: str++;
2574: }
2575: return(NULL);
1.89 daniel 2576: }
2577:
2578: /**
2579: * xmlStrstr:
1.123 daniel 2580: * @str: the xmlChar * array (haystack)
2581: * @val: the xmlChar to search (needle)
1.89 daniel 2582: *
1.123 daniel 2583: * a strstr for xmlChar's
1.89 daniel 2584: *
1.123 daniel 2585: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2586: */
2587:
1.123 daniel 2588: const xmlChar *
2589: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2590: int n;
2591:
2592: if (str == NULL) return(NULL);
2593: if (val == NULL) return(NULL);
2594: n = xmlStrlen(val);
2595:
2596: if (n == 0) return(str);
2597: while (*str != 0) {
2598: if (*str == *val) {
1.123 daniel 2599: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2600: }
2601: str++;
2602: }
2603: return(NULL);
2604: }
2605:
2606: /**
2607: * xmlStrsub:
1.123 daniel 2608: * @str: the xmlChar * array (haystack)
1.89 daniel 2609: * @start: the index of the first char (zero based)
2610: * @len: the length of the substring
2611: *
2612: * Extract a substring of a given string
2613: *
1.123 daniel 2614: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2615: */
2616:
1.123 daniel 2617: xmlChar *
2618: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2619: int i;
2620:
2621: if (str == NULL) return(NULL);
2622: if (start < 0) return(NULL);
1.90 daniel 2623: if (len < 0) return(NULL);
1.89 daniel 2624:
2625: for (i = 0;i < start;i++) {
2626: if (*str == 0) return(NULL);
2627: str++;
2628: }
2629: if (*str == 0) return(NULL);
2630: return(xmlStrndup(str, len));
1.14 veillard 2631: }
1.28 daniel 2632:
1.50 daniel 2633: /**
2634: * xmlStrlen:
1.123 daniel 2635: * @str: the xmlChar * array
1.50 daniel 2636: *
1.127 daniel 2637: * length of a xmlChar's string
1.68 daniel 2638: *
1.123 daniel 2639: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2640: */
2641:
1.55 daniel 2642: int
1.123 daniel 2643: xmlStrlen(const xmlChar *str) {
1.45 daniel 2644: int len = 0;
2645:
2646: if (str == NULL) return(0);
2647: while (*str != 0) {
2648: str++;
2649: len++;
2650: }
2651: return(len);
2652: }
2653:
1.50 daniel 2654: /**
2655: * xmlStrncat:
1.123 daniel 2656: * @cur: the original xmlChar * array
2657: * @add: the xmlChar * array added
1.50 daniel 2658: * @len: the length of @add
2659: *
1.123 daniel 2660: * a strncat for array of xmlChar's
1.68 daniel 2661: *
1.123 daniel 2662: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2663: */
2664:
1.123 daniel 2665: xmlChar *
2666: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2667: int size;
1.123 daniel 2668: xmlChar *ret;
1.45 daniel 2669:
2670: if ((add == NULL) || (len == 0))
2671: return(cur);
2672: if (cur == NULL)
2673: return(xmlStrndup(add, len));
2674:
2675: size = xmlStrlen(cur);
1.123 daniel 2676: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2677: if (ret == NULL) {
1.86 daniel 2678: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2679: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2680: return(cur);
2681: }
1.123 daniel 2682: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2683: ret[size + len] = 0;
2684: return(ret);
2685: }
2686:
1.50 daniel 2687: /**
2688: * xmlStrcat:
1.123 daniel 2689: * @cur: the original xmlChar * array
2690: * @add: the xmlChar * array added
1.50 daniel 2691: *
1.152 daniel 2692: * a strcat for array of xmlChar's. Since they are supposed to be
2693: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2694: * a termination mark of '0'.
1.68 daniel 2695: *
1.123 daniel 2696: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2697: */
1.123 daniel 2698: xmlChar *
2699: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2700: const xmlChar *p = add;
1.45 daniel 2701:
2702: if (add == NULL) return(cur);
2703: if (cur == NULL)
2704: return(xmlStrdup(add));
2705:
1.152 daniel 2706: while (*p != 0) p++;
1.45 daniel 2707: return(xmlStrncat(cur, add, p - add));
2708: }
2709:
2710: /************************************************************************
2711: * *
2712: * Commodity functions, cleanup needed ? *
2713: * *
2714: ************************************************************************/
2715:
1.50 daniel 2716: /**
2717: * areBlanks:
2718: * @ctxt: an XML parser context
1.123 daniel 2719: * @str: a xmlChar *
1.50 daniel 2720: * @len: the size of @str
2721: *
1.45 daniel 2722: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2723: *
1.68 daniel 2724: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2725: */
2726:
1.123 daniel 2727: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2728: int i, ret;
1.45 daniel 2729: xmlNodePtr lastChild;
2730:
2731: for (i = 0;i < len;i++)
2732: if (!(IS_BLANK(str[i]))) return(0);
2733:
1.152 daniel 2734: if (RAW != '<') return(0);
1.72 daniel 2735: if (ctxt->node == NULL) return(0);
1.104 daniel 2736: if (ctxt->myDoc != NULL) {
2737: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2738: if (ret == 0) return(1);
2739: if (ret == 1) return(0);
2740: }
2741: /*
2742: * heuristic
2743: */
1.45 daniel 2744: lastChild = xmlGetLastChild(ctxt->node);
2745: if (lastChild == NULL) {
2746: if (ctxt->node->content != NULL) return(0);
2747: } else if (xmlNodeIsText(lastChild))
2748: return(0);
1.157 daniel 2749: else if ((ctxt->node->children != NULL) &&
2750: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2751: return(0);
1.45 daniel 2752: return(1);
2753: }
2754:
1.50 daniel 2755: /**
2756: * xmlHandleEntity:
2757: * @ctxt: an XML parser context
2758: * @entity: an XML entity pointer.
2759: *
2760: * Default handling of defined entities, when should we define a new input
1.45 daniel 2761: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2762: *
2763: * OBSOLETE: to be removed at some point.
1.45 daniel 2764: */
2765:
1.55 daniel 2766: void
2767: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2768: int len;
1.50 daniel 2769: xmlParserInputPtr input;
1.45 daniel 2770:
2771: if (entity->content == NULL) {
1.123 daniel 2772: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2773: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2774: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2775: entity->name);
1.59 daniel 2776: ctxt->wellFormed = 0;
1.45 daniel 2777: return;
2778: }
2779: len = xmlStrlen(entity->content);
2780: if (len <= 2) goto handle_as_char;
2781:
2782: /*
2783: * Redefine its content as an input stream.
2784: */
1.50 daniel 2785: input = xmlNewEntityInputStream(ctxt, entity);
2786: xmlPushInput(ctxt, input);
1.45 daniel 2787: return;
2788:
2789: handle_as_char:
2790: /*
2791: * Just handle the content as a set of chars.
2792: */
1.171 daniel 2793: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2794: (ctxt->sax->characters != NULL))
1.74 daniel 2795: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2796:
2797: }
2798:
2799: /*
2800: * Forward definition for recusive behaviour.
2801: */
1.77 daniel 2802: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2803: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2804:
1.28 daniel 2805: /************************************************************************
2806: * *
2807: * Extra stuff for namespace support *
2808: * Relates to http://www.w3.org/TR/WD-xml-names *
2809: * *
2810: ************************************************************************/
2811:
1.50 daniel 2812: /**
2813: * xmlNamespaceParseNCName:
2814: * @ctxt: an XML parser context
2815: *
2816: * parse an XML namespace name.
1.28 daniel 2817: *
2818: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2819: *
2820: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2821: * CombiningChar | Extender
1.68 daniel 2822: *
2823: * Returns the namespace name or NULL
1.28 daniel 2824: */
2825:
1.123 daniel 2826: xmlChar *
1.55 daniel 2827: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 2828: xmlChar buf[XML_MAX_NAMELEN + 5];
2829: int len = 0, l;
2830: int cur = CUR_CHAR(l);
1.28 daniel 2831:
1.156 daniel 2832: /* load first the value of the char !!! */
1.152 daniel 2833: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 2834:
1.152 daniel 2835: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2836: (cur == '.') || (cur == '-') ||
2837: (cur == '_') ||
2838: (IS_COMBINING(cur)) ||
2839: (IS_EXTENDER(cur))) {
2840: COPY_BUF(l,buf,len,cur);
2841: NEXTL(l);
2842: cur = CUR_CHAR(l);
1.91 daniel 2843: if (len >= XML_MAX_NAMELEN) {
2844: fprintf(stderr,
2845: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 2846: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2847: (cur == '.') || (cur == '-') ||
2848: (cur == '_') ||
2849: (IS_COMBINING(cur)) ||
2850: (IS_EXTENDER(cur))) {
2851: NEXTL(l);
2852: cur = CUR_CHAR(l);
2853: }
1.91 daniel 2854: break;
2855: }
2856: }
2857: return(xmlStrndup(buf, len));
1.28 daniel 2858: }
2859:
1.50 daniel 2860: /**
2861: * xmlNamespaceParseQName:
2862: * @ctxt: an XML parser context
1.123 daniel 2863: * @prefix: a xmlChar **
1.50 daniel 2864: *
2865: * parse an XML qualified name
1.28 daniel 2866: *
2867: * [NS 5] QName ::= (Prefix ':')? LocalPart
2868: *
2869: * [NS 6] Prefix ::= NCName
2870: *
2871: * [NS 7] LocalPart ::= NCName
1.68 daniel 2872: *
1.127 daniel 2873: * Returns the local part, and prefix is updated
1.50 daniel 2874: * to get the Prefix if any.
1.28 daniel 2875: */
2876:
1.123 daniel 2877: xmlChar *
2878: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2879: xmlChar *ret = NULL;
1.28 daniel 2880:
2881: *prefix = NULL;
2882: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 2883: if (RAW == ':') {
1.28 daniel 2884: *prefix = ret;
1.40 daniel 2885: NEXT;
1.28 daniel 2886: ret = xmlNamespaceParseNCName(ctxt);
2887: }
2888:
2889: return(ret);
2890: }
2891:
1.50 daniel 2892: /**
1.72 daniel 2893: * xmlSplitQName:
1.162 daniel 2894: * @ctxt: an XML parser context
1.72 daniel 2895: * @name: an XML parser context
1.123 daniel 2896: * @prefix: a xmlChar **
1.72 daniel 2897: *
2898: * parse an XML qualified name string
2899: *
2900: * [NS 5] QName ::= (Prefix ':')? LocalPart
2901: *
2902: * [NS 6] Prefix ::= NCName
2903: *
2904: * [NS 7] LocalPart ::= NCName
2905: *
1.127 daniel 2906: * Returns the local part, and prefix is updated
1.72 daniel 2907: * to get the Prefix if any.
2908: */
2909:
1.123 daniel 2910: xmlChar *
1.162 daniel 2911: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2912: xmlChar buf[XML_MAX_NAMELEN + 5];
2913: int len = 0;
1.123 daniel 2914: xmlChar *ret = NULL;
2915: const xmlChar *cur = name;
1.162 daniel 2916: int c,l;
1.72 daniel 2917:
2918: *prefix = NULL;
1.113 daniel 2919:
2920: /* xml: prefix is not really a namespace */
2921: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2922: (cur[2] == 'l') && (cur[3] == ':'))
2923: return(xmlStrdup(name));
2924:
1.162 daniel 2925: /* nasty but valid */
2926: if (cur[0] == ':')
2927: return(xmlStrdup(name));
2928:
2929: c = CUR_SCHAR(cur, l);
2930: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 2931:
1.162 daniel 2932: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2933: (c == '.') || (c == '-') ||
2934: (c == '_') ||
2935: (IS_COMBINING(c)) ||
2936: (IS_EXTENDER(c))) {
2937: COPY_BUF(l,buf,len,c);
2938: cur += l;
2939: c = CUR_SCHAR(cur, l);
2940: }
1.72 daniel 2941:
1.162 daniel 2942: ret = xmlStrndup(buf, len);
1.72 daniel 2943:
1.162 daniel 2944: if (c == ':') {
2945: cur += l;
1.163 daniel 2946: c = CUR_SCHAR(cur, l);
1.162 daniel 2947: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 2948: *prefix = ret;
1.162 daniel 2949: len = 0;
1.72 daniel 2950:
1.162 daniel 2951: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2952: (c == '.') || (c == '-') ||
2953: (c == '_') ||
2954: (IS_COMBINING(c)) ||
2955: (IS_EXTENDER(c))) {
2956: COPY_BUF(l,buf,len,c);
2957: cur += l;
2958: c = CUR_SCHAR(cur, l);
2959: }
1.72 daniel 2960:
1.162 daniel 2961: ret = xmlStrndup(buf, len);
1.72 daniel 2962: }
2963:
2964: return(ret);
2965: }
2966: /**
1.50 daniel 2967: * xmlNamespaceParseNSDef:
2968: * @ctxt: an XML parser context
2969: *
2970: * parse a namespace prefix declaration
1.28 daniel 2971: *
2972: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2973: *
2974: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 2975: *
2976: * Returns the namespace name
1.28 daniel 2977: */
2978:
1.123 daniel 2979: xmlChar *
1.55 daniel 2980: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 2981: xmlChar *name = NULL;
1.28 daniel 2982:
1.152 daniel 2983: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 2984: (NXT(2) == 'l') && (NXT(3) == 'n') &&
2985: (NXT(4) == 's')) {
2986: SKIP(5);
1.152 daniel 2987: if (RAW == ':') {
1.40 daniel 2988: NEXT;
1.28 daniel 2989: name = xmlNamespaceParseNCName(ctxt);
2990: }
2991: }
1.39 daniel 2992: return(name);
1.28 daniel 2993: }
2994:
1.50 daniel 2995: /**
2996: * xmlParseQuotedString:
2997: * @ctxt: an XML parser context
2998: *
1.45 daniel 2999: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3000: * To be removed at next drop of binary compatibility
1.68 daniel 3001: *
3002: * Returns the string parser or NULL.
1.45 daniel 3003: */
1.123 daniel 3004: xmlChar *
1.55 daniel 3005: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3006: xmlChar *buf = NULL;
1.152 daniel 3007: int len = 0,l;
1.140 daniel 3008: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3009: int c;
1.45 daniel 3010:
1.135 daniel 3011: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3012: if (buf == NULL) {
3013: fprintf(stderr, "malloc of %d byte failed\n", size);
3014: return(NULL);
3015: }
1.152 daniel 3016: if (RAW == '"') {
1.45 daniel 3017: NEXT;
1.152 daniel 3018: c = CUR_CHAR(l);
1.135 daniel 3019: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3020: if (len + 5 >= size) {
1.135 daniel 3021: size *= 2;
3022: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3023: if (buf == NULL) {
3024: fprintf(stderr, "realloc of %d byte failed\n", size);
3025: return(NULL);
3026: }
3027: }
1.152 daniel 3028: COPY_BUF(l,buf,len,c);
3029: NEXTL(l);
3030: c = CUR_CHAR(l);
1.135 daniel 3031: }
3032: if (c != '"') {
1.123 daniel 3033: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3034: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3035: ctxt->sax->error(ctxt->userData,
3036: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3037: ctxt->wellFormed = 0;
1.55 daniel 3038: } else {
1.45 daniel 3039: NEXT;
3040: }
1.152 daniel 3041: } else if (RAW == '\''){
1.45 daniel 3042: NEXT;
1.135 daniel 3043: c = CUR;
3044: while (IS_CHAR(c) && (c != '\'')) {
3045: if (len + 1 >= size) {
3046: size *= 2;
3047: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3048: if (buf == NULL) {
3049: fprintf(stderr, "realloc of %d byte failed\n", size);
3050: return(NULL);
3051: }
3052: }
3053: buf[len++] = c;
3054: NEXT;
3055: c = CUR;
3056: }
1.152 daniel 3057: if (RAW != '\'') {
1.123 daniel 3058: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3059: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3060: ctxt->sax->error(ctxt->userData,
3061: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3062: ctxt->wellFormed = 0;
1.55 daniel 3063: } else {
1.45 daniel 3064: NEXT;
3065: }
3066: }
1.135 daniel 3067: return(buf);
1.45 daniel 3068: }
3069:
1.50 daniel 3070: /**
3071: * xmlParseNamespace:
3072: * @ctxt: an XML parser context
3073: *
1.45 daniel 3074: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3075: *
3076: * This is what the older xml-name Working Draft specified, a bunch of
3077: * other stuff may still rely on it, so support is still here as
1.127 daniel 3078: * if it was declared on the root of the Tree:-(
1.110 daniel 3079: *
3080: * To be removed at next drop of binary compatibility
1.45 daniel 3081: */
3082:
1.55 daniel 3083: void
3084: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3085: xmlChar *href = NULL;
3086: xmlChar *prefix = NULL;
1.45 daniel 3087: int garbage = 0;
3088:
3089: /*
3090: * We just skipped "namespace" or "xml:namespace"
3091: */
3092: SKIP_BLANKS;
3093:
1.153 daniel 3094: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3095: /*
3096: * We can have "ns" or "prefix" attributes
3097: * Old encoding as 'href' or 'AS' attributes is still supported
3098: */
1.152 daniel 3099: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3100: garbage = 0;
3101: SKIP(2);
3102: SKIP_BLANKS;
3103:
1.152 daniel 3104: if (RAW != '=') continue;
1.45 daniel 3105: NEXT;
3106: SKIP_BLANKS;
3107:
3108: href = xmlParseQuotedString(ctxt);
3109: SKIP_BLANKS;
1.152 daniel 3110: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3111: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3112: garbage = 0;
3113: SKIP(4);
3114: SKIP_BLANKS;
3115:
1.152 daniel 3116: if (RAW != '=') continue;
1.45 daniel 3117: NEXT;
3118: SKIP_BLANKS;
3119:
3120: href = xmlParseQuotedString(ctxt);
3121: SKIP_BLANKS;
1.152 daniel 3122: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3123: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3124: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3125: garbage = 0;
3126: SKIP(6);
3127: SKIP_BLANKS;
3128:
1.152 daniel 3129: if (RAW != '=') continue;
1.45 daniel 3130: NEXT;
3131: SKIP_BLANKS;
3132:
3133: prefix = xmlParseQuotedString(ctxt);
3134: SKIP_BLANKS;
1.152 daniel 3135: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3136: garbage = 0;
3137: SKIP(2);
3138: SKIP_BLANKS;
3139:
1.152 daniel 3140: if (RAW != '=') continue;
1.45 daniel 3141: NEXT;
3142: SKIP_BLANKS;
3143:
3144: prefix = xmlParseQuotedString(ctxt);
3145: SKIP_BLANKS;
1.152 daniel 3146: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3147: garbage = 0;
1.91 daniel 3148: NEXT;
1.45 daniel 3149: } else {
3150: /*
3151: * Found garbage when parsing the namespace
3152: */
1.122 daniel 3153: if (!garbage) {
1.55 daniel 3154: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3155: ctxt->sax->error(ctxt->userData,
3156: "xmlParseNamespace found garbage\n");
3157: }
1.123 daniel 3158: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3159: ctxt->wellFormed = 0;
1.45 daniel 3160: NEXT;
3161: }
3162: }
3163:
3164: MOVETO_ENDTAG(CUR_PTR);
3165: NEXT;
3166:
3167: /*
3168: * Register the DTD.
1.72 daniel 3169: if (href != NULL)
3170: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3171: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3172: */
3173:
1.119 daniel 3174: if (prefix != NULL) xmlFree(prefix);
3175: if (href != NULL) xmlFree(href);
1.45 daniel 3176: }
3177:
1.28 daniel 3178: /************************************************************************
3179: * *
3180: * The parser itself *
3181: * Relates to http://www.w3.org/TR/REC-xml *
3182: * *
3183: ************************************************************************/
1.14 veillard 3184:
1.50 daniel 3185: /**
1.97 daniel 3186: * xmlScanName:
3187: * @ctxt: an XML parser context
3188: *
3189: * Trickery: parse an XML name but without consuming the input flow
3190: * Needed for rollback cases.
3191: *
3192: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3193: * CombiningChar | Extender
3194: *
3195: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3196: *
3197: * [6] Names ::= Name (S Name)*
3198: *
3199: * Returns the Name parsed or NULL
3200: */
3201:
1.123 daniel 3202: xmlChar *
1.97 daniel 3203: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3204: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3205: int len = 0;
3206:
3207: GROW;
1.152 daniel 3208: if (!IS_LETTER(RAW) && (RAW != '_') &&
3209: (RAW != ':')) {
1.97 daniel 3210: return(NULL);
3211: }
3212:
3213: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3214: (NXT(len) == '.') || (NXT(len) == '-') ||
3215: (NXT(len) == '_') || (NXT(len) == ':') ||
3216: (IS_COMBINING(NXT(len))) ||
3217: (IS_EXTENDER(NXT(len)))) {
3218: buf[len] = NXT(len);
3219: len++;
3220: if (len >= XML_MAX_NAMELEN) {
3221: fprintf(stderr,
3222: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3223: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3224: (NXT(len) == '.') || (NXT(len) == '-') ||
3225: (NXT(len) == '_') || (NXT(len) == ':') ||
3226: (IS_COMBINING(NXT(len))) ||
3227: (IS_EXTENDER(NXT(len))))
3228: len++;
3229: break;
3230: }
3231: }
3232: return(xmlStrndup(buf, len));
3233: }
3234:
3235: /**
1.50 daniel 3236: * xmlParseName:
3237: * @ctxt: an XML parser context
3238: *
3239: * parse an XML name.
1.22 daniel 3240: *
3241: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3242: * CombiningChar | Extender
3243: *
3244: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3245: *
3246: * [6] Names ::= Name (S Name)*
1.68 daniel 3247: *
3248: * Returns the Name parsed or NULL
1.1 veillard 3249: */
3250:
1.123 daniel 3251: xmlChar *
1.55 daniel 3252: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3253: xmlChar buf[XML_MAX_NAMELEN + 5];
3254: int len = 0, l;
3255: int c;
1.1 veillard 3256:
1.91 daniel 3257: GROW;
1.160 daniel 3258: c = CUR_CHAR(l);
3259: if (!IS_LETTER(c) && (c != '_') &&
3260: (c != ':')) {
1.91 daniel 3261: return(NULL);
3262: }
1.40 daniel 3263:
1.160 daniel 3264: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3265: (c == '.') || (c == '-') ||
3266: (c == '_') || (c == ':') ||
3267: (IS_COMBINING(c)) ||
3268: (IS_EXTENDER(c))) {
3269: COPY_BUF(l,buf,len,c);
3270: NEXTL(l);
3271: c = CUR_CHAR(l);
1.91 daniel 3272: if (len >= XML_MAX_NAMELEN) {
3273: fprintf(stderr,
3274: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3275: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3276: (c == '.') || (c == '-') ||
3277: (c == '_') || (c == ':') ||
3278: (IS_COMBINING(c)) ||
3279: (IS_EXTENDER(c))) {
3280: NEXTL(l);
3281: c = CUR_CHAR(l);
1.97 daniel 3282: }
1.91 daniel 3283: break;
3284: }
3285: }
3286: return(xmlStrndup(buf, len));
1.22 daniel 3287: }
3288:
1.50 daniel 3289: /**
1.135 daniel 3290: * xmlParseStringName:
3291: * @ctxt: an XML parser context
3292: * @str: a pointer to an index in the string
3293: *
3294: * parse an XML name.
3295: *
3296: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3297: * CombiningChar | Extender
3298: *
3299: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3300: *
3301: * [6] Names ::= Name (S Name)*
3302: *
3303: * Returns the Name parsed or NULL. The str pointer
3304: * is updated to the current location in the string.
3305: */
3306:
3307: xmlChar *
3308: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3309: const xmlChar *ptr;
3310: const xmlChar *start;
3311: xmlChar cur;
3312:
3313: if ((str == NULL) || (*str == NULL)) return(NULL);
3314:
3315: start = ptr = *str;
3316: cur = *ptr;
3317: if (!IS_LETTER(cur) && (cur != '_') &&
3318: (cur != ':')) {
3319: return(NULL);
3320: }
3321:
3322: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3323: (cur == '.') || (cur == '-') ||
3324: (cur == '_') || (cur == ':') ||
3325: (IS_COMBINING(cur)) ||
3326: (IS_EXTENDER(cur))) {
3327: ptr++;
3328: cur = *ptr;
3329: }
3330: *str = ptr;
3331: return(xmlStrndup(start, ptr - start ));
3332: }
3333:
3334: /**
1.50 daniel 3335: * xmlParseNmtoken:
3336: * @ctxt: an XML parser context
3337: *
3338: * parse an XML Nmtoken.
1.22 daniel 3339: *
3340: * [7] Nmtoken ::= (NameChar)+
3341: *
3342: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3343: *
3344: * Returns the Nmtoken parsed or NULL
1.22 daniel 3345: */
3346:
1.123 daniel 3347: xmlChar *
1.55 daniel 3348: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3349: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3350: int len = 0;
1.160 daniel 3351: int c,l;
1.22 daniel 3352:
1.91 daniel 3353: GROW;
1.160 daniel 3354: c = CUR_CHAR(l);
3355: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3356: (c == '.') || (c == '-') ||
3357: (c == '_') || (c == ':') ||
3358: (IS_COMBINING(c)) ||
3359: (IS_EXTENDER(c))) {
3360: COPY_BUF(l,buf,len,c);
3361: NEXTL(l);
3362: c = CUR_CHAR(l);
1.91 daniel 3363: if (len >= XML_MAX_NAMELEN) {
3364: fprintf(stderr,
3365: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3366: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3367: (c == '.') || (c == '-') ||
3368: (c == '_') || (c == ':') ||
3369: (IS_COMBINING(c)) ||
3370: (IS_EXTENDER(c))) {
3371: NEXTL(l);
3372: c = CUR_CHAR(l);
3373: }
1.91 daniel 3374: break;
3375: }
3376: }
1.168 daniel 3377: if (len == 0)
3378: return(NULL);
1.91 daniel 3379: return(xmlStrndup(buf, len));
1.1 veillard 3380: }
3381:
1.50 daniel 3382: /**
3383: * xmlParseEntityValue:
3384: * @ctxt: an XML parser context
1.78 daniel 3385: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3386: *
3387: * parse a value for ENTITY decl.
1.24 daniel 3388: *
3389: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3390: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3391: *
1.78 daniel 3392: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3393: */
3394:
1.123 daniel 3395: xmlChar *
3396: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3397: xmlChar *buf = NULL;
3398: int len = 0;
1.140 daniel 3399: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3400: int c, l;
1.135 daniel 3401: xmlChar stop;
1.123 daniel 3402: xmlChar *ret = NULL;
1.98 daniel 3403: xmlParserInputPtr input;
1.24 daniel 3404:
1.152 daniel 3405: if (RAW == '"') stop = '"';
3406: else if (RAW == '\'') stop = '\'';
1.135 daniel 3407: else {
3408: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3409: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3410: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3411: ctxt->wellFormed = 0;
3412: return(NULL);
3413: }
3414: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3415: if (buf == NULL) {
3416: fprintf(stderr, "malloc of %d byte failed\n", size);
3417: return(NULL);
3418: }
1.94 daniel 3419:
1.135 daniel 3420: /*
3421: * The content of the entity definition is copied in a buffer.
3422: */
1.94 daniel 3423:
1.135 daniel 3424: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3425: input = ctxt->input;
3426: GROW;
3427: NEXT;
1.152 daniel 3428: c = CUR_CHAR(l);
1.135 daniel 3429: /*
3430: * NOTE: 4.4.5 Included in Literal
3431: * When a parameter entity reference appears in a literal entity
3432: * value, ... a single or double quote character in the replacement
3433: * text is always treated as a normal data character and will not
3434: * terminate the literal.
3435: * In practice it means we stop the loop only when back at parsing
3436: * the initial entity and the quote is found
3437: */
3438: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3439: if (len + 5 >= size) {
1.135 daniel 3440: size *= 2;
3441: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3442: if (buf == NULL) {
3443: fprintf(stderr, "realloc of %d byte failed\n", size);
3444: return(NULL);
1.94 daniel 3445: }
1.79 daniel 3446: }
1.152 daniel 3447: COPY_BUF(l,buf,len,c);
3448: NEXTL(l);
1.98 daniel 3449: /*
1.135 daniel 3450: * Pop-up of finished entities.
1.98 daniel 3451: */
1.152 daniel 3452: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3453: xmlPopInput(ctxt);
1.152 daniel 3454:
3455: c = CUR_CHAR(l);
1.135 daniel 3456: if (c == 0) {
1.94 daniel 3457: GROW;
1.152 daniel 3458: c = CUR_CHAR(l);
1.79 daniel 3459: }
1.135 daniel 3460: }
3461: buf[len] = 0;
3462:
3463: /*
3464: * Then PEReference entities are substituted.
3465: */
3466: if (c != stop) {
3467: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3468: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3469: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3470: ctxt->wellFormed = 0;
1.170 daniel 3471: xmlFree(buf);
1.135 daniel 3472: } else {
3473: NEXT;
3474: /*
3475: * NOTE: 4.4.7 Bypassed
3476: * When a general entity reference appears in the EntityValue in
3477: * an entity declaration, it is bypassed and left as is.
3478: * so XML_SUBSTITUTE_REF is not set.
3479: */
3480: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3481: 0, 0, 0);
3482: if (orig != NULL)
3483: *orig = buf;
3484: else
3485: xmlFree(buf);
1.24 daniel 3486: }
3487:
3488: return(ret);
3489: }
3490:
1.50 daniel 3491: /**
3492: * xmlParseAttValue:
3493: * @ctxt: an XML parser context
3494: *
3495: * parse a value for an attribute
1.78 daniel 3496: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3497: * will be handled later in xmlStringGetNodeList
1.29 daniel 3498: *
3499: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3500: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3501: *
1.129 daniel 3502: * 3.3.3 Attribute-Value Normalization:
3503: * Before the value of an attribute is passed to the application or
3504: * checked for validity, the XML processor must normalize it as follows:
3505: * - a character reference is processed by appending the referenced
3506: * character to the attribute value
3507: * - an entity reference is processed by recursively processing the
3508: * replacement text of the entity
3509: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3510: * appending #x20 to the normalized value, except that only a single
3511: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3512: * parsed entity or the literal entity value of an internal parsed entity
3513: * - other characters are processed by appending them to the normalized value
1.130 daniel 3514: * If the declared value is not CDATA, then the XML processor must further
3515: * process the normalized attribute value by discarding any leading and
3516: * trailing space (#x20) characters, and by replacing sequences of space
3517: * (#x20) characters by a single space (#x20) character.
3518: * All attributes for which no declaration has been read should be treated
3519: * by a non-validating parser as if declared CDATA.
1.129 daniel 3520: *
3521: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3522: */
3523:
1.123 daniel 3524: xmlChar *
1.55 daniel 3525: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3526: xmlChar limit = 0;
3527: xmlChar *buffer = NULL;
3528: int buffer_size = 0;
3529: xmlChar *out = NULL;
3530:
3531: xmlChar *current = NULL;
3532: xmlEntityPtr ent;
3533: xmlChar cur;
3534:
1.29 daniel 3535:
1.91 daniel 3536: SHRINK;
1.151 daniel 3537: if (NXT(0) == '"') {
1.96 daniel 3538: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3539: limit = '"';
1.40 daniel 3540: NEXT;
1.151 daniel 3541: } else if (NXT(0) == '\'') {
1.129 daniel 3542: limit = '\'';
1.96 daniel 3543: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3544: NEXT;
1.29 daniel 3545: } else {
1.123 daniel 3546: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3548: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3549: ctxt->wellFormed = 0;
1.129 daniel 3550: return(NULL);
1.29 daniel 3551: }
3552:
1.129 daniel 3553: /*
3554: * allocate a translation buffer.
3555: */
1.140 daniel 3556: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3557: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3558: if (buffer == NULL) {
3559: perror("xmlParseAttValue: malloc failed");
3560: return(NULL);
3561: }
3562: out = buffer;
3563:
3564: /*
3565: * Ok loop until we reach one of the ending char or a size limit.
3566: */
3567: cur = CUR;
1.156 daniel 3568: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3569: if (cur == 0) break;
3570: if ((cur == '&') && (NXT(1) == '#')) {
3571: int val = xmlParseCharRef(ctxt);
3572: *out++ = val;
3573: } else if (cur == '&') {
3574: ent = xmlParseEntityRef(ctxt);
3575: if ((ent != NULL) &&
3576: (ctxt->replaceEntities != 0)) {
3577: current = ent->content;
3578: while (*current != 0) {
3579: *out++ = *current++;
3580: if (out - buffer > buffer_size - 10) {
3581: int index = out - buffer;
3582:
3583: growBuffer(buffer);
3584: out = &buffer[index];
3585: }
3586: }
3587: } else if (ent != NULL) {
3588: int i = xmlStrlen(ent->name);
3589: const xmlChar *cur = ent->name;
3590:
3591: *out++ = '&';
3592: if (out - buffer > buffer_size - i - 10) {
3593: int index = out - buffer;
3594:
3595: growBuffer(buffer);
3596: out = &buffer[index];
3597: }
3598: for (;i > 0;i--)
3599: *out++ = *cur++;
3600: *out++ = ';';
3601: }
3602: } else {
1.156 daniel 3603: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3604: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3605: *out++ = 0x20;
3606: if (out - buffer > buffer_size - 10) {
3607: int index = out - buffer;
3608:
3609: growBuffer(buffer);
3610: out = &buffer[index];
1.129 daniel 3611: }
3612: } else {
3613: *out++ = cur;
3614: if (out - buffer > buffer_size - 10) {
3615: int index = out - buffer;
3616:
3617: growBuffer(buffer);
3618: out = &buffer[index];
3619: }
3620: }
3621: NEXT;
3622: }
3623: cur = CUR;
3624: }
3625: *out++ = 0;
1.152 daniel 3626: if (RAW == '<') {
1.129 daniel 3627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3628: ctxt->sax->error(ctxt->userData,
3629: "Unescaped '<' not allowed in attributes values\n");
3630: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3631: ctxt->wellFormed = 0;
1.152 daniel 3632: } else if (RAW != limit) {
1.129 daniel 3633: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3634: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3635: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3636: ctxt->wellFormed = 0;
3637: } else
3638: NEXT;
3639: return(buffer);
1.29 daniel 3640: }
3641:
1.50 daniel 3642: /**
3643: * xmlParseSystemLiteral:
3644: * @ctxt: an XML parser context
3645: *
3646: * parse an XML Literal
1.21 daniel 3647: *
1.22 daniel 3648: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3649: *
3650: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3651: */
3652:
1.123 daniel 3653: xmlChar *
1.55 daniel 3654: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3655: xmlChar *buf = NULL;
3656: int len = 0;
1.140 daniel 3657: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3658: int cur, l;
1.135 daniel 3659: xmlChar stop;
1.168 daniel 3660: int state = ctxt->instate;
1.21 daniel 3661:
1.91 daniel 3662: SHRINK;
1.152 daniel 3663: if (RAW == '"') {
1.40 daniel 3664: NEXT;
1.135 daniel 3665: stop = '"';
1.152 daniel 3666: } else if (RAW == '\'') {
1.40 daniel 3667: NEXT;
1.135 daniel 3668: stop = '\'';
1.21 daniel 3669: } else {
1.55 daniel 3670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3671: ctxt->sax->error(ctxt->userData,
3672: "SystemLiteral \" or ' expected\n");
1.123 daniel 3673: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3674: ctxt->wellFormed = 0;
1.135 daniel 3675: return(NULL);
1.21 daniel 3676: }
3677:
1.135 daniel 3678: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3679: if (buf == NULL) {
3680: fprintf(stderr, "malloc of %d byte failed\n", size);
3681: return(NULL);
3682: }
1.168 daniel 3683: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3684: cur = CUR_CHAR(l);
1.135 daniel 3685: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3686: if (len + 5 >= size) {
1.135 daniel 3687: size *= 2;
3688: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3689: if (buf == NULL) {
3690: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 daniel 3691: ctxt->instate = state;
1.135 daniel 3692: return(NULL);
3693: }
3694: }
1.152 daniel 3695: COPY_BUF(l,buf,len,cur);
3696: NEXTL(l);
3697: cur = CUR_CHAR(l);
1.135 daniel 3698: if (cur == 0) {
3699: GROW;
3700: SHRINK;
1.152 daniel 3701: cur = CUR_CHAR(l);
1.135 daniel 3702: }
3703: }
3704: buf[len] = 0;
1.168 daniel 3705: ctxt->instate = state;
1.135 daniel 3706: if (!IS_CHAR(cur)) {
3707: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3708: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3709: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3710: ctxt->wellFormed = 0;
3711: } else {
3712: NEXT;
3713: }
3714: return(buf);
1.21 daniel 3715: }
3716:
1.50 daniel 3717: /**
3718: * xmlParsePubidLiteral:
3719: * @ctxt: an XML parser context
1.21 daniel 3720: *
1.50 daniel 3721: * parse an XML public literal
1.68 daniel 3722: *
3723: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3724: *
3725: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3726: */
3727:
1.123 daniel 3728: xmlChar *
1.55 daniel 3729: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3730: xmlChar *buf = NULL;
3731: int len = 0;
1.140 daniel 3732: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3733: xmlChar cur;
3734: xmlChar stop;
1.125 daniel 3735:
1.91 daniel 3736: SHRINK;
1.152 daniel 3737: if (RAW == '"') {
1.40 daniel 3738: NEXT;
1.135 daniel 3739: stop = '"';
1.152 daniel 3740: } else if (RAW == '\'') {
1.40 daniel 3741: NEXT;
1.135 daniel 3742: stop = '\'';
1.21 daniel 3743: } else {
1.55 daniel 3744: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3745: ctxt->sax->error(ctxt->userData,
3746: "SystemLiteral \" or ' expected\n");
1.123 daniel 3747: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3748: ctxt->wellFormed = 0;
1.135 daniel 3749: return(NULL);
3750: }
3751: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3752: if (buf == NULL) {
3753: fprintf(stderr, "malloc of %d byte failed\n", size);
3754: return(NULL);
3755: }
3756: cur = CUR;
3757: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3758: if (len + 1 >= size) {
3759: size *= 2;
3760: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3761: if (buf == NULL) {
3762: fprintf(stderr, "realloc of %d byte failed\n", size);
3763: return(NULL);
3764: }
3765: }
3766: buf[len++] = cur;
3767: NEXT;
3768: cur = CUR;
3769: if (cur == 0) {
3770: GROW;
3771: SHRINK;
3772: cur = CUR;
3773: }
3774: }
3775: buf[len] = 0;
3776: if (cur != stop) {
3777: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3778: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3779: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3780: ctxt->wellFormed = 0;
3781: } else {
3782: NEXT;
1.21 daniel 3783: }
1.135 daniel 3784: return(buf);
1.21 daniel 3785: }
3786:
1.50 daniel 3787: /**
3788: * xmlParseCharData:
3789: * @ctxt: an XML parser context
3790: * @cdata: int indicating whether we are within a CDATA section
3791: *
3792: * parse a CharData section.
3793: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 3794: *
1.151 daniel 3795: * The right angle bracket (>) may be represented using the string ">",
3796: * and must, for compatibility, be escaped using ">" or a character
3797: * reference when it appears in the string "]]>" in content, when that
3798: * string is not marking the end of a CDATA section.
3799: *
1.27 daniel 3800: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3801: */
3802:
1.55 daniel 3803: void
3804: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 3805: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 3806: int nbchar = 0;
1.152 daniel 3807: int cur, l;
1.27 daniel 3808:
1.91 daniel 3809: SHRINK;
1.152 daniel 3810: cur = CUR_CHAR(l);
1.160 daniel 3811: while ((IS_CHAR(cur)) && ((cur != '<') || (ctxt->token == '<')) &&
1.153 daniel 3812: ((cur != '&') || (ctxt->token == '&'))) {
1.97 daniel 3813: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 3814: (NXT(2) == '>')) {
3815: if (cdata) break;
3816: else {
3817: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 3818: ctxt->sax->error(ctxt->userData,
1.59 daniel 3819: "Sequence ']]>' not allowed in content\n");
1.123 daniel 3820: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 3821: /* Should this be relaxed ??? I see a "must here */
3822: ctxt->wellFormed = 0;
1.59 daniel 3823: }
3824: }
1.152 daniel 3825: COPY_BUF(l,buf,nbchar,cur);
3826: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 3827: /*
3828: * Ok the segment is to be consumed as chars.
3829: */
1.171 daniel 3830: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 3831: if (areBlanks(ctxt, buf, nbchar)) {
3832: if (ctxt->sax->ignorableWhitespace != NULL)
3833: ctxt->sax->ignorableWhitespace(ctxt->userData,
3834: buf, nbchar);
3835: } else {
3836: if (ctxt->sax->characters != NULL)
3837: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3838: }
3839: }
3840: nbchar = 0;
3841: }
1.152 daniel 3842: NEXTL(l);
3843: cur = CUR_CHAR(l);
1.27 daniel 3844: }
1.91 daniel 3845: if (nbchar != 0) {
3846: /*
3847: * Ok the segment is to be consumed as chars.
3848: */
1.171 daniel 3849: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 3850: if (areBlanks(ctxt, buf, nbchar)) {
3851: if (ctxt->sax->ignorableWhitespace != NULL)
3852: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3853: } else {
3854: if (ctxt->sax->characters != NULL)
3855: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3856: }
3857: }
1.45 daniel 3858: }
1.27 daniel 3859: }
3860:
1.50 daniel 3861: /**
3862: * xmlParseExternalID:
3863: * @ctxt: an XML parser context
1.123 daniel 3864: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 3865: * @strict: indicate whether we should restrict parsing to only
3866: * production [75], see NOTE below
1.50 daniel 3867: *
1.67 daniel 3868: * Parse an External ID or a Public ID
3869: *
3870: * NOTE: Productions [75] and [83] interract badly since [75] can generate
3871: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 3872: *
3873: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3874: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 3875: *
3876: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3877: *
1.68 daniel 3878: * Returns the function returns SystemLiteral and in the second
1.67 daniel 3879: * case publicID receives PubidLiteral, is strict is off
3880: * it is possible to return NULL and have publicID set.
1.22 daniel 3881: */
3882:
1.123 daniel 3883: xmlChar *
3884: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3885: xmlChar *URI = NULL;
1.22 daniel 3886:
1.91 daniel 3887: SHRINK;
1.152 daniel 3888: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 3889: (NXT(2) == 'S') && (NXT(3) == 'T') &&
3890: (NXT(4) == 'E') && (NXT(5) == 'M')) {
3891: SKIP(6);
1.59 daniel 3892: if (!IS_BLANK(CUR)) {
3893: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3894: ctxt->sax->error(ctxt->userData,
1.59 daniel 3895: "Space required after 'SYSTEM'\n");
1.123 daniel 3896: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3897: ctxt->wellFormed = 0;
3898: }
1.42 daniel 3899: SKIP_BLANKS;
1.39 daniel 3900: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3901: if (URI == NULL) {
1.55 daniel 3902: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3903: ctxt->sax->error(ctxt->userData,
1.39 daniel 3904: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 3905: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3906: ctxt->wellFormed = 0;
3907: }
1.152 daniel 3908: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 3909: (NXT(2) == 'B') && (NXT(3) == 'L') &&
3910: (NXT(4) == 'I') && (NXT(5) == 'C')) {
3911: SKIP(6);
1.59 daniel 3912: if (!IS_BLANK(CUR)) {
3913: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3914: ctxt->sax->error(ctxt->userData,
1.59 daniel 3915: "Space required after 'PUBLIC'\n");
1.123 daniel 3916: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3917: ctxt->wellFormed = 0;
3918: }
1.42 daniel 3919: SKIP_BLANKS;
1.39 daniel 3920: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 3921: if (*publicID == NULL) {
1.55 daniel 3922: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3923: ctxt->sax->error(ctxt->userData,
1.39 daniel 3924: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 3925: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 3926: ctxt->wellFormed = 0;
3927: }
1.67 daniel 3928: if (strict) {
3929: /*
3930: * We don't handle [83] so "S SystemLiteral" is required.
3931: */
3932: if (!IS_BLANK(CUR)) {
3933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3934: ctxt->sax->error(ctxt->userData,
1.67 daniel 3935: "Space required after the Public Identifier\n");
1.123 daniel 3936: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 3937: ctxt->wellFormed = 0;
3938: }
3939: } else {
3940: /*
3941: * We handle [83] so we return immediately, if
3942: * "S SystemLiteral" is not detected. From a purely parsing
3943: * point of view that's a nice mess.
3944: */
1.135 daniel 3945: const xmlChar *ptr;
3946: GROW;
3947:
3948: ptr = CUR_PTR;
1.67 daniel 3949: if (!IS_BLANK(*ptr)) return(NULL);
3950:
3951: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 3952: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 3953: }
1.42 daniel 3954: SKIP_BLANKS;
1.39 daniel 3955: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3956: if (URI == NULL) {
1.55 daniel 3957: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3958: ctxt->sax->error(ctxt->userData,
1.39 daniel 3959: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 3960: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3961: ctxt->wellFormed = 0;
3962: }
1.22 daniel 3963: }
1.39 daniel 3964: return(URI);
1.22 daniel 3965: }
3966:
1.50 daniel 3967: /**
3968: * xmlParseComment:
1.69 daniel 3969: * @ctxt: an XML parser context
1.50 daniel 3970: *
1.3 veillard 3971: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 3972: * The spec says that "For compatibility, the string "--" (double-hyphen)
3973: * must not occur within comments. "
1.22 daniel 3974: *
3975: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 3976: */
1.72 daniel 3977: void
1.114 daniel 3978: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 3979: xmlChar *buf = NULL;
3980: int len = 0;
1.140 daniel 3981: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3982: int q, ql;
3983: int r, rl;
3984: int cur, l;
1.140 daniel 3985: xmlParserInputState state;
1.3 veillard 3986:
3987: /*
1.22 daniel 3988: * Check that there is a comment right here.
1.3 veillard 3989: */
1.152 daniel 3990: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 3991: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 3992:
1.140 daniel 3993: state = ctxt->instate;
1.97 daniel 3994: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 3995: SHRINK;
1.40 daniel 3996: SKIP(4);
1.135 daniel 3997: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3998: if (buf == NULL) {
3999: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4000: ctxt->instate = state;
1.135 daniel 4001: return;
4002: }
1.152 daniel 4003: q = CUR_CHAR(ql);
4004: NEXTL(ql);
4005: r = CUR_CHAR(rl);
4006: NEXTL(rl);
4007: cur = CUR_CHAR(l);
1.135 daniel 4008: while (IS_CHAR(cur) &&
4009: ((cur != '>') ||
4010: (r != '-') || (q != '-'))) {
4011: if ((r == '-') && (q == '-')) {
1.55 daniel 4012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4013: ctxt->sax->error(ctxt->userData,
1.38 daniel 4014: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4015: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4016: ctxt->wellFormed = 0;
4017: }
1.152 daniel 4018: if (len + 5 >= size) {
1.135 daniel 4019: size *= 2;
4020: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4021: if (buf == NULL) {
4022: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4023: ctxt->instate = state;
1.135 daniel 4024: return;
4025: }
4026: }
1.152 daniel 4027: COPY_BUF(ql,buf,len,q);
1.135 daniel 4028: q = r;
1.152 daniel 4029: ql = rl;
1.135 daniel 4030: r = cur;
1.152 daniel 4031: rl = l;
4032: NEXTL(l);
4033: cur = CUR_CHAR(l);
1.135 daniel 4034: if (cur == 0) {
4035: SHRINK;
4036: GROW;
1.152 daniel 4037: cur = CUR_CHAR(l);
1.135 daniel 4038: }
1.3 veillard 4039: }
1.135 daniel 4040: buf[len] = 0;
4041: if (!IS_CHAR(cur)) {
1.55 daniel 4042: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4043: ctxt->sax->error(ctxt->userData,
1.135 daniel 4044: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4045: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4046: ctxt->wellFormed = 0;
1.3 veillard 4047: } else {
1.40 daniel 4048: NEXT;
1.171 daniel 4049: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4050: (!ctxt->disableSAX))
1.135 daniel 4051: ctxt->sax->comment(ctxt->userData, buf);
4052: xmlFree(buf);
1.3 veillard 4053: }
1.140 daniel 4054: ctxt->instate = state;
1.3 veillard 4055: }
4056:
1.50 daniel 4057: /**
4058: * xmlParsePITarget:
4059: * @ctxt: an XML parser context
4060: *
4061: * parse the name of a PI
1.22 daniel 4062: *
4063: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4064: *
4065: * Returns the PITarget name or NULL
1.22 daniel 4066: */
4067:
1.123 daniel 4068: xmlChar *
1.55 daniel 4069: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4070: xmlChar *name;
1.22 daniel 4071:
4072: name = xmlParseName(ctxt);
1.139 daniel 4073: if ((name != NULL) &&
1.22 daniel 4074: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4075: ((name[1] == 'm') || (name[1] == 'M')) &&
4076: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4077: int i;
1.151 daniel 4078: if ((name[0] = 'x') && (name[1] == 'm') &&
4079: (name[2] = 'l') && (name[3] == 0)) {
4080: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4081: ctxt->sax->error(ctxt->userData,
4082: "XML declaration allowed only at the start of the document\n");
4083: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4084: ctxt->wellFormed = 0;
4085: return(name);
4086: } else if (name[3] == 0) {
4087: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4088: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4089: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4090: ctxt->wellFormed = 0;
4091: return(name);
4092: }
1.139 daniel 4093: for (i = 0;;i++) {
4094: if (xmlW3CPIs[i] == NULL) break;
4095: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4096: return(name);
4097: }
4098: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4099: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4100: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4101: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4102: }
1.22 daniel 4103: }
4104: return(name);
4105: }
4106:
1.50 daniel 4107: /**
4108: * xmlParsePI:
4109: * @ctxt: an XML parser context
4110: *
4111: * parse an XML Processing Instruction.
1.22 daniel 4112: *
4113: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4114: *
1.69 daniel 4115: * The processing is transfered to SAX once parsed.
1.3 veillard 4116: */
4117:
1.55 daniel 4118: void
4119: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4120: xmlChar *buf = NULL;
4121: int len = 0;
1.140 daniel 4122: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4123: int cur, l;
1.123 daniel 4124: xmlChar *target;
1.140 daniel 4125: xmlParserInputState state;
1.22 daniel 4126:
1.152 daniel 4127: if ((RAW == '<') && (NXT(1) == '?')) {
1.140 daniel 4128: state = ctxt->instate;
4129: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4130: /*
4131: * this is a Processing Instruction.
4132: */
1.40 daniel 4133: SKIP(2);
1.91 daniel 4134: SHRINK;
1.3 veillard 4135:
4136: /*
1.22 daniel 4137: * Parse the target name and check for special support like
4138: * namespace.
1.3 veillard 4139: */
1.22 daniel 4140: target = xmlParsePITarget(ctxt);
4141: if (target != NULL) {
1.156 daniel 4142: if ((RAW == '?') && (NXT(1) == '>')) {
4143: SKIP(2);
4144:
4145: /*
4146: * SAX: PI detected.
4147: */
1.171 daniel 4148: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4149: (ctxt->sax->processingInstruction != NULL))
4150: ctxt->sax->processingInstruction(ctxt->userData,
4151: target, NULL);
4152: ctxt->instate = state;
1.170 daniel 4153: xmlFree(target);
1.156 daniel 4154: return;
4155: }
1.135 daniel 4156: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4157: if (buf == NULL) {
4158: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4159: ctxt->instate = state;
1.135 daniel 4160: return;
4161: }
4162: cur = CUR;
4163: if (!IS_BLANK(cur)) {
1.114 daniel 4164: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4165: ctxt->sax->error(ctxt->userData,
4166: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4167: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4168: ctxt->wellFormed = 0;
4169: }
4170: SKIP_BLANKS;
1.152 daniel 4171: cur = CUR_CHAR(l);
1.135 daniel 4172: while (IS_CHAR(cur) &&
4173: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4174: if (len + 5 >= size) {
1.135 daniel 4175: size *= 2;
4176: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4177: if (buf == NULL) {
4178: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4179: ctxt->instate = state;
1.135 daniel 4180: return;
4181: }
4182: }
1.152 daniel 4183: COPY_BUF(l,buf,len,cur);
4184: NEXTL(l);
4185: cur = CUR_CHAR(l);
1.135 daniel 4186: if (cur == 0) {
4187: SHRINK;
4188: GROW;
1.152 daniel 4189: cur = CUR_CHAR(l);
1.135 daniel 4190: }
4191: }
4192: buf[len] = 0;
1.152 daniel 4193: if (cur != '?') {
1.72 daniel 4194: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4195: ctxt->sax->error(ctxt->userData,
1.72 daniel 4196: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4197: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4198: ctxt->wellFormed = 0;
1.22 daniel 4199: } else {
1.72 daniel 4200: SKIP(2);
1.44 daniel 4201:
1.72 daniel 4202: /*
4203: * SAX: PI detected.
4204: */
1.171 daniel 4205: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4206: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4207: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4208: target, buf);
1.22 daniel 4209: }
1.135 daniel 4210: xmlFree(buf);
1.119 daniel 4211: xmlFree(target);
1.3 veillard 4212: } else {
1.55 daniel 4213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4214: ctxt->sax->error(ctxt->userData,
4215: "xmlParsePI : no target name\n");
1.123 daniel 4216: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4217: ctxt->wellFormed = 0;
1.22 daniel 4218: }
1.140 daniel 4219: ctxt->instate = state;
1.22 daniel 4220: }
4221: }
4222:
1.50 daniel 4223: /**
4224: * xmlParseNotationDecl:
4225: * @ctxt: an XML parser context
4226: *
4227: * parse a notation declaration
1.22 daniel 4228: *
4229: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4230: *
4231: * Hence there is actually 3 choices:
4232: * 'PUBLIC' S PubidLiteral
4233: * 'PUBLIC' S PubidLiteral S SystemLiteral
4234: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4235: *
1.67 daniel 4236: * See the NOTE on xmlParseExternalID().
1.22 daniel 4237: */
4238:
1.55 daniel 4239: void
4240: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4241: xmlChar *name;
4242: xmlChar *Pubid;
4243: xmlChar *Systemid;
1.22 daniel 4244:
1.152 daniel 4245: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4246: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4247: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4248: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4249: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 4250: SHRINK;
1.40 daniel 4251: SKIP(10);
1.67 daniel 4252: if (!IS_BLANK(CUR)) {
4253: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4254: ctxt->sax->error(ctxt->userData,
4255: "Space required after '<!NOTATION'\n");
1.123 daniel 4256: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4257: ctxt->wellFormed = 0;
4258: return;
4259: }
4260: SKIP_BLANKS;
1.22 daniel 4261:
4262: name = xmlParseName(ctxt);
4263: if (name == NULL) {
1.55 daniel 4264: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4265: ctxt->sax->error(ctxt->userData,
4266: "NOTATION: Name expected here\n");
1.123 daniel 4267: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4268: ctxt->wellFormed = 0;
4269: return;
4270: }
4271: if (!IS_BLANK(CUR)) {
4272: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4273: ctxt->sax->error(ctxt->userData,
1.67 daniel 4274: "Space required after the NOTATION name'\n");
1.123 daniel 4275: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4276: ctxt->wellFormed = 0;
1.22 daniel 4277: return;
4278: }
1.42 daniel 4279: SKIP_BLANKS;
1.67 daniel 4280:
1.22 daniel 4281: /*
1.67 daniel 4282: * Parse the IDs.
1.22 daniel 4283: */
1.160 daniel 4284: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4285: SKIP_BLANKS;
4286:
1.152 daniel 4287: if (RAW == '>') {
1.40 daniel 4288: NEXT;
1.171 daniel 4289: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4290: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4291: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4292: } else {
4293: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4294: ctxt->sax->error(ctxt->userData,
1.67 daniel 4295: "'>' required to close NOTATION declaration\n");
1.123 daniel 4296: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4297: ctxt->wellFormed = 0;
4298: }
1.119 daniel 4299: xmlFree(name);
4300: if (Systemid != NULL) xmlFree(Systemid);
4301: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4302: }
4303: }
4304:
1.50 daniel 4305: /**
4306: * xmlParseEntityDecl:
4307: * @ctxt: an XML parser context
4308: *
4309: * parse <!ENTITY declarations
1.22 daniel 4310: *
4311: * [70] EntityDecl ::= GEDecl | PEDecl
4312: *
4313: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4314: *
4315: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4316: *
4317: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4318: *
4319: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4320: *
4321: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4322: *
4323: * [ VC: Notation Declared ]
1.116 daniel 4324: * The Name must match the declared name of a notation.
1.22 daniel 4325: */
4326:
1.55 daniel 4327: void
4328: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4329: xmlChar *name = NULL;
4330: xmlChar *value = NULL;
4331: xmlChar *URI = NULL, *literal = NULL;
4332: xmlChar *ndata = NULL;
1.39 daniel 4333: int isParameter = 0;
1.123 daniel 4334: xmlChar *orig = NULL;
1.22 daniel 4335:
1.94 daniel 4336: GROW;
1.152 daniel 4337: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4338: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4339: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4340: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 4341: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4342: SHRINK;
1.40 daniel 4343: SKIP(8);
1.59 daniel 4344: if (!IS_BLANK(CUR)) {
4345: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4346: ctxt->sax->error(ctxt->userData,
4347: "Space required after '<!ENTITY'\n");
1.123 daniel 4348: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4349: ctxt->wellFormed = 0;
4350: }
4351: SKIP_BLANKS;
1.40 daniel 4352:
1.152 daniel 4353: if (RAW == '%') {
1.40 daniel 4354: NEXT;
1.59 daniel 4355: if (!IS_BLANK(CUR)) {
4356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4357: ctxt->sax->error(ctxt->userData,
4358: "Space required after '%'\n");
1.123 daniel 4359: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4360: ctxt->wellFormed = 0;
4361: }
1.42 daniel 4362: SKIP_BLANKS;
1.39 daniel 4363: isParameter = 1;
1.22 daniel 4364: }
4365:
4366: name = xmlParseName(ctxt);
1.24 daniel 4367: if (name == NULL) {
1.55 daniel 4368: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4369: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4370: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4371: ctxt->wellFormed = 0;
1.24 daniel 4372: return;
4373: }
1.59 daniel 4374: if (!IS_BLANK(CUR)) {
4375: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4376: ctxt->sax->error(ctxt->userData,
1.59 daniel 4377: "Space required after the entity name\n");
1.123 daniel 4378: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4379: ctxt->wellFormed = 0;
4380: }
1.42 daniel 4381: SKIP_BLANKS;
1.24 daniel 4382:
1.22 daniel 4383: /*
1.68 daniel 4384: * handle the various case of definitions...
1.22 daniel 4385: */
1.39 daniel 4386: if (isParameter) {
1.152 daniel 4387: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4388: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4389: if (value) {
1.171 daniel 4390: if ((ctxt->sax != NULL) &&
4391: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4392: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4393: XML_INTERNAL_PARAMETER_ENTITY,
4394: NULL, NULL, value);
4395: }
1.24 daniel 4396: else {
1.67 daniel 4397: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4398: if ((URI == NULL) && (literal == NULL)) {
4399: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4400: ctxt->sax->error(ctxt->userData,
4401: "Entity value required\n");
4402: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4403: ctxt->wellFormed = 0;
4404: }
1.39 daniel 4405: if (URI) {
1.171 daniel 4406: if ((ctxt->sax != NULL) &&
4407: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4408: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4409: XML_EXTERNAL_PARAMETER_ENTITY,
4410: literal, URI, NULL);
4411: }
1.24 daniel 4412: }
4413: } else {
1.152 daniel 4414: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4415: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4416: if ((ctxt->sax != NULL) &&
4417: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4418: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4419: XML_INTERNAL_GENERAL_ENTITY,
4420: NULL, NULL, value);
4421: } else {
1.67 daniel 4422: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4423: if ((URI == NULL) && (literal == NULL)) {
4424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4425: ctxt->sax->error(ctxt->userData,
4426: "Entity value required\n");
4427: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4428: ctxt->wellFormed = 0;
4429: }
1.152 daniel 4430: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4431: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4432: ctxt->sax->error(ctxt->userData,
1.59 daniel 4433: "Space required before 'NDATA'\n");
1.123 daniel 4434: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4435: ctxt->wellFormed = 0;
4436: }
1.42 daniel 4437: SKIP_BLANKS;
1.152 daniel 4438: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4439: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4440: (NXT(4) == 'A')) {
4441: SKIP(5);
1.59 daniel 4442: if (!IS_BLANK(CUR)) {
4443: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4444: ctxt->sax->error(ctxt->userData,
1.59 daniel 4445: "Space required after 'NDATA'\n");
1.123 daniel 4446: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4447: ctxt->wellFormed = 0;
4448: }
1.42 daniel 4449: SKIP_BLANKS;
1.24 daniel 4450: ndata = xmlParseName(ctxt);
1.171 daniel 4451: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4452: (ctxt->sax->unparsedEntityDecl != NULL))
4453: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4454: literal, URI, ndata);
4455: } else {
1.171 daniel 4456: if ((ctxt->sax != NULL) &&
4457: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4458: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4459: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4460: literal, URI, NULL);
1.24 daniel 4461: }
4462: }
4463: }
1.42 daniel 4464: SKIP_BLANKS;
1.152 daniel 4465: if (RAW != '>') {
1.55 daniel 4466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4467: ctxt->sax->error(ctxt->userData,
1.31 daniel 4468: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4469: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4470: ctxt->wellFormed = 0;
1.24 daniel 4471: } else
1.40 daniel 4472: NEXT;
1.78 daniel 4473: if (orig != NULL) {
4474: /*
1.98 daniel 4475: * Ugly mechanism to save the raw entity value.
1.78 daniel 4476: */
4477: xmlEntityPtr cur = NULL;
4478:
1.98 daniel 4479: if (isParameter) {
4480: if ((ctxt->sax != NULL) &&
4481: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4482: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4483: } else {
4484: if ((ctxt->sax != NULL) &&
4485: (ctxt->sax->getEntity != NULL))
1.120 daniel 4486: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4487: }
4488: if (cur != NULL) {
4489: if (cur->orig != NULL)
1.119 daniel 4490: xmlFree(orig);
1.98 daniel 4491: else
4492: cur->orig = orig;
4493: } else
1.119 daniel 4494: xmlFree(orig);
1.78 daniel 4495: }
1.119 daniel 4496: if (name != NULL) xmlFree(name);
4497: if (value != NULL) xmlFree(value);
4498: if (URI != NULL) xmlFree(URI);
4499: if (literal != NULL) xmlFree(literal);
4500: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4501: }
4502: }
4503:
1.50 daniel 4504: /**
1.59 daniel 4505: * xmlParseDefaultDecl:
4506: * @ctxt: an XML parser context
4507: * @value: Receive a possible fixed default value for the attribute
4508: *
4509: * Parse an attribute default declaration
4510: *
4511: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4512: *
1.99 daniel 4513: * [ VC: Required Attribute ]
1.117 daniel 4514: * if the default declaration is the keyword #REQUIRED, then the
4515: * attribute must be specified for all elements of the type in the
4516: * attribute-list declaration.
1.99 daniel 4517: *
4518: * [ VC: Attribute Default Legal ]
1.102 daniel 4519: * The declared default value must meet the lexical constraints of
4520: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4521: *
4522: * [ VC: Fixed Attribute Default ]
1.117 daniel 4523: * if an attribute has a default value declared with the #FIXED
4524: * keyword, instances of that attribute must match the default value.
1.99 daniel 4525: *
4526: * [ WFC: No < in Attribute Values ]
4527: * handled in xmlParseAttValue()
4528: *
1.59 daniel 4529: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4530: * or XML_ATTRIBUTE_FIXED.
4531: */
4532:
4533: int
1.123 daniel 4534: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4535: int val;
1.123 daniel 4536: xmlChar *ret;
1.59 daniel 4537:
4538: *value = NULL;
1.152 daniel 4539: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4540: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4541: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4542: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4543: (NXT(8) == 'D')) {
4544: SKIP(9);
4545: return(XML_ATTRIBUTE_REQUIRED);
4546: }
1.152 daniel 4547: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4548: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4549: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4550: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4551: SKIP(8);
4552: return(XML_ATTRIBUTE_IMPLIED);
4553: }
4554: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4555: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4556: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4557: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4558: SKIP(6);
4559: val = XML_ATTRIBUTE_FIXED;
4560: if (!IS_BLANK(CUR)) {
4561: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4562: ctxt->sax->error(ctxt->userData,
4563: "Space required after '#FIXED'\n");
1.123 daniel 4564: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4565: ctxt->wellFormed = 0;
4566: }
4567: SKIP_BLANKS;
4568: }
4569: ret = xmlParseAttValue(ctxt);
1.96 daniel 4570: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4571: if (ret == NULL) {
4572: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4573: ctxt->sax->error(ctxt->userData,
1.59 daniel 4574: "Attribute default value declaration error\n");
4575: ctxt->wellFormed = 0;
4576: } else
4577: *value = ret;
4578: return(val);
4579: }
4580:
4581: /**
1.66 daniel 4582: * xmlParseNotationType:
4583: * @ctxt: an XML parser context
4584: *
4585: * parse an Notation attribute type.
4586: *
1.99 daniel 4587: * Note: the leading 'NOTATION' S part has already being parsed...
4588: *
1.66 daniel 4589: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4590: *
1.99 daniel 4591: * [ VC: Notation Attributes ]
1.117 daniel 4592: * Values of this type must match one of the notation names included
1.99 daniel 4593: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4594: *
4595: * Returns: the notation attribute tree built while parsing
4596: */
4597:
4598: xmlEnumerationPtr
4599: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4600: xmlChar *name;
1.66 daniel 4601: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4602:
1.152 daniel 4603: if (RAW != '(') {
1.66 daniel 4604: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4605: ctxt->sax->error(ctxt->userData,
4606: "'(' required to start 'NOTATION'\n");
1.123 daniel 4607: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4608: ctxt->wellFormed = 0;
4609: return(NULL);
4610: }
1.91 daniel 4611: SHRINK;
1.66 daniel 4612: do {
4613: NEXT;
4614: SKIP_BLANKS;
4615: name = xmlParseName(ctxt);
4616: if (name == NULL) {
4617: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4618: ctxt->sax->error(ctxt->userData,
1.66 daniel 4619: "Name expected in NOTATION declaration\n");
1.123 daniel 4620: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4621: ctxt->wellFormed = 0;
4622: return(ret);
4623: }
4624: cur = xmlCreateEnumeration(name);
1.119 daniel 4625: xmlFree(name);
1.66 daniel 4626: if (cur == NULL) return(ret);
4627: if (last == NULL) ret = last = cur;
4628: else {
4629: last->next = cur;
4630: last = cur;
4631: }
4632: SKIP_BLANKS;
1.152 daniel 4633: } while (RAW == '|');
4634: if (RAW != ')') {
1.66 daniel 4635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4636: ctxt->sax->error(ctxt->userData,
1.66 daniel 4637: "')' required to finish NOTATION declaration\n");
1.123 daniel 4638: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 4639: ctxt->wellFormed = 0;
1.170 daniel 4640: if ((last != NULL) && (last != ret))
4641: xmlFreeEnumeration(last);
1.66 daniel 4642: return(ret);
4643: }
4644: NEXT;
4645: return(ret);
4646: }
4647:
4648: /**
4649: * xmlParseEnumerationType:
4650: * @ctxt: an XML parser context
4651: *
4652: * parse an Enumeration attribute type.
4653: *
4654: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4655: *
1.99 daniel 4656: * [ VC: Enumeration ]
1.117 daniel 4657: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 4658: * the declaration
4659: *
1.66 daniel 4660: * Returns: the enumeration attribute tree built while parsing
4661: */
4662:
4663: xmlEnumerationPtr
4664: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4665: xmlChar *name;
1.66 daniel 4666: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4667:
1.152 daniel 4668: if (RAW != '(') {
1.66 daniel 4669: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4670: ctxt->sax->error(ctxt->userData,
1.66 daniel 4671: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 4672: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 4673: ctxt->wellFormed = 0;
4674: return(NULL);
4675: }
1.91 daniel 4676: SHRINK;
1.66 daniel 4677: do {
4678: NEXT;
4679: SKIP_BLANKS;
4680: name = xmlParseNmtoken(ctxt);
4681: if (name == NULL) {
4682: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4683: ctxt->sax->error(ctxt->userData,
1.66 daniel 4684: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 4685: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 4686: ctxt->wellFormed = 0;
4687: return(ret);
4688: }
4689: cur = xmlCreateEnumeration(name);
1.119 daniel 4690: xmlFree(name);
1.66 daniel 4691: if (cur == NULL) return(ret);
4692: if (last == NULL) ret = last = cur;
4693: else {
4694: last->next = cur;
4695: last = cur;
4696: }
4697: SKIP_BLANKS;
1.152 daniel 4698: } while (RAW == '|');
4699: if (RAW != ')') {
1.66 daniel 4700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4701: ctxt->sax->error(ctxt->userData,
1.66 daniel 4702: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 4703: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 4704: ctxt->wellFormed = 0;
4705: return(ret);
4706: }
4707: NEXT;
4708: return(ret);
4709: }
4710:
4711: /**
1.50 daniel 4712: * xmlParseEnumeratedType:
4713: * @ctxt: an XML parser context
1.66 daniel 4714: * @tree: the enumeration tree built while parsing
1.50 daniel 4715: *
1.66 daniel 4716: * parse an Enumerated attribute type.
1.22 daniel 4717: *
4718: * [57] EnumeratedType ::= NotationType | Enumeration
4719: *
4720: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4721: *
1.50 daniel 4722: *
1.66 daniel 4723: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 4724: */
4725:
1.66 daniel 4726: int
4727: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 4728: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 4729: (NXT(2) == 'T') && (NXT(3) == 'A') &&
4730: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4731: (NXT(6) == 'O') && (NXT(7) == 'N')) {
4732: SKIP(8);
4733: if (!IS_BLANK(CUR)) {
4734: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4735: ctxt->sax->error(ctxt->userData,
4736: "Space required after 'NOTATION'\n");
1.123 daniel 4737: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 4738: ctxt->wellFormed = 0;
4739: return(0);
4740: }
4741: SKIP_BLANKS;
4742: *tree = xmlParseNotationType(ctxt);
4743: if (*tree == NULL) return(0);
4744: return(XML_ATTRIBUTE_NOTATION);
4745: }
4746: *tree = xmlParseEnumerationType(ctxt);
4747: if (*tree == NULL) return(0);
4748: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 4749: }
4750:
1.50 daniel 4751: /**
4752: * xmlParseAttributeType:
4753: * @ctxt: an XML parser context
1.66 daniel 4754: * @tree: the enumeration tree built while parsing
1.50 daniel 4755: *
1.59 daniel 4756: * parse the Attribute list def for an element
1.22 daniel 4757: *
4758: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4759: *
4760: * [55] StringType ::= 'CDATA'
4761: *
4762: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4763: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 4764: *
1.102 daniel 4765: * Validity constraints for attribute values syntax are checked in
4766: * xmlValidateAttributeValue()
4767: *
1.99 daniel 4768: * [ VC: ID ]
1.117 daniel 4769: * Values of type ID must match the Name production. A name must not
1.99 daniel 4770: * appear more than once in an XML document as a value of this type;
4771: * i.e., ID values must uniquely identify the elements which bear them.
4772: *
4773: * [ VC: One ID per Element Type ]
1.117 daniel 4774: * No element type may have more than one ID attribute specified.
1.99 daniel 4775: *
4776: * [ VC: ID Attribute Default ]
1.117 daniel 4777: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 4778: *
4779: * [ VC: IDREF ]
1.102 daniel 4780: * Values of type IDREF must match the Name production, and values
1.140 daniel 4781: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 4782: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 4783: * values must match the value of some ID attribute.
4784: *
4785: * [ VC: Entity Name ]
1.102 daniel 4786: * Values of type ENTITY must match the Name production, values
1.140 daniel 4787: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 4788: * name of an unparsed entity declared in the DTD.
1.99 daniel 4789: *
4790: * [ VC: Name Token ]
1.102 daniel 4791: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 4792: * of type NMTOKENS must match Nmtokens.
4793: *
1.69 daniel 4794: * Returns the attribute type
1.22 daniel 4795: */
1.59 daniel 4796: int
1.66 daniel 4797: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 4798: SHRINK;
1.152 daniel 4799: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 4800: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4801: (NXT(4) == 'A')) {
4802: SKIP(5);
1.66 daniel 4803: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 4804: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 4805: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 4806: (NXT(4) == 'F') && (NXT(5) == 'S')) {
4807: SKIP(6);
4808: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 4809: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 4810: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 4811: (NXT(4) == 'F')) {
4812: SKIP(5);
1.59 daniel 4813: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 4814: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 4815: SKIP(2);
4816: return(XML_ATTRIBUTE_ID);
1.152 daniel 4817: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4818: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4819: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4820: SKIP(6);
1.59 daniel 4821: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 4822: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4823: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4824: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4825: (NXT(6) == 'E') && (NXT(7) == 'S')) {
4826: SKIP(8);
1.59 daniel 4827: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 4828: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 4829: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4830: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 4831: (NXT(6) == 'N') && (NXT(7) == 'S')) {
4832: SKIP(8);
4833: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 4834: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 4835: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4836: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 4837: (NXT(6) == 'N')) {
4838: SKIP(7);
1.59 daniel 4839: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 4840: }
1.66 daniel 4841: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 4842: }
4843:
1.50 daniel 4844: /**
4845: * xmlParseAttributeListDecl:
4846: * @ctxt: an XML parser context
4847: *
4848: * : parse the Attribute list def for an element
1.22 daniel 4849: *
4850: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4851: *
4852: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 4853: *
1.22 daniel 4854: */
1.55 daniel 4855: void
4856: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4857: xmlChar *elemName;
4858: xmlChar *attrName;
1.103 daniel 4859: xmlEnumerationPtr tree;
1.22 daniel 4860:
1.152 daniel 4861: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4862: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4863: (NXT(4) == 'T') && (NXT(5) == 'L') &&
4864: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 4865: (NXT(8) == 'T')) {
1.40 daniel 4866: SKIP(9);
1.59 daniel 4867: if (!IS_BLANK(CUR)) {
4868: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4869: ctxt->sax->error(ctxt->userData,
4870: "Space required after '<!ATTLIST'\n");
1.123 daniel 4871: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4872: ctxt->wellFormed = 0;
4873: }
1.42 daniel 4874: SKIP_BLANKS;
1.59 daniel 4875: elemName = xmlParseName(ctxt);
4876: if (elemName == NULL) {
1.55 daniel 4877: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4878: ctxt->sax->error(ctxt->userData,
4879: "ATTLIST: no name for Element\n");
1.123 daniel 4880: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4881: ctxt->wellFormed = 0;
1.22 daniel 4882: return;
4883: }
1.42 daniel 4884: SKIP_BLANKS;
1.152 daniel 4885: while (RAW != '>') {
1.123 daniel 4886: const xmlChar *check = CUR_PTR;
1.59 daniel 4887: int type;
4888: int def;
1.123 daniel 4889: xmlChar *defaultValue = NULL;
1.59 daniel 4890:
1.103 daniel 4891: tree = NULL;
1.59 daniel 4892: attrName = xmlParseName(ctxt);
4893: if (attrName == NULL) {
4894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4895: ctxt->sax->error(ctxt->userData,
4896: "ATTLIST: no name for Attribute\n");
1.123 daniel 4897: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4898: ctxt->wellFormed = 0;
4899: break;
4900: }
1.97 daniel 4901: GROW;
1.59 daniel 4902: if (!IS_BLANK(CUR)) {
4903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4904: ctxt->sax->error(ctxt->userData,
1.59 daniel 4905: "Space required after the attribute name\n");
1.123 daniel 4906: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4907: ctxt->wellFormed = 0;
1.170 daniel 4908: if (attrName != NULL)
4909: xmlFree(attrName);
4910: if (defaultValue != NULL)
4911: xmlFree(defaultValue);
1.59 daniel 4912: break;
4913: }
4914: SKIP_BLANKS;
4915:
1.66 daniel 4916: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 4917: if (type <= 0) {
4918: if (attrName != NULL)
4919: xmlFree(attrName);
4920: if (defaultValue != NULL)
4921: xmlFree(defaultValue);
4922: break;
4923: }
1.22 daniel 4924:
1.97 daniel 4925: GROW;
1.59 daniel 4926: if (!IS_BLANK(CUR)) {
4927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4928: ctxt->sax->error(ctxt->userData,
1.59 daniel 4929: "Space required after the attribute type\n");
1.123 daniel 4930: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4931: ctxt->wellFormed = 0;
1.170 daniel 4932: if (attrName != NULL)
4933: xmlFree(attrName);
4934: if (defaultValue != NULL)
4935: xmlFree(defaultValue);
4936: if (tree != NULL)
4937: xmlFreeEnumeration(tree);
1.59 daniel 4938: break;
4939: }
1.42 daniel 4940: SKIP_BLANKS;
1.59 daniel 4941:
4942: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 4943: if (def <= 0) {
4944: if (attrName != NULL)
4945: xmlFree(attrName);
4946: if (defaultValue != NULL)
4947: xmlFree(defaultValue);
4948: if (tree != NULL)
4949: xmlFreeEnumeration(tree);
4950: break;
4951: }
1.59 daniel 4952:
1.97 daniel 4953: GROW;
1.152 daniel 4954: if (RAW != '>') {
1.59 daniel 4955: if (!IS_BLANK(CUR)) {
4956: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4957: ctxt->sax->error(ctxt->userData,
1.59 daniel 4958: "Space required after the attribute default value\n");
1.123 daniel 4959: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4960: ctxt->wellFormed = 0;
1.170 daniel 4961: if (attrName != NULL)
4962: xmlFree(attrName);
4963: if (defaultValue != NULL)
4964: xmlFree(defaultValue);
4965: if (tree != NULL)
4966: xmlFreeEnumeration(tree);
1.59 daniel 4967: break;
4968: }
4969: SKIP_BLANKS;
4970: }
1.40 daniel 4971: if (check == CUR_PTR) {
1.55 daniel 4972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4973: ctxt->sax->error(ctxt->userData,
1.59 daniel 4974: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 4975: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 4976: if (attrName != NULL)
4977: xmlFree(attrName);
4978: if (defaultValue != NULL)
4979: xmlFree(defaultValue);
4980: if (tree != NULL)
4981: xmlFreeEnumeration(tree);
1.22 daniel 4982: break;
4983: }
1.171 daniel 4984: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4985: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 4986: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 4987: type, def, defaultValue, tree);
1.59 daniel 4988: if (attrName != NULL)
1.119 daniel 4989: xmlFree(attrName);
1.59 daniel 4990: if (defaultValue != NULL)
1.119 daniel 4991: xmlFree(defaultValue);
1.97 daniel 4992: GROW;
1.22 daniel 4993: }
1.152 daniel 4994: if (RAW == '>')
1.40 daniel 4995: NEXT;
1.22 daniel 4996:
1.119 daniel 4997: xmlFree(elemName);
1.22 daniel 4998: }
4999: }
5000:
1.50 daniel 5001: /**
1.61 daniel 5002: * xmlParseElementMixedContentDecl:
5003: * @ctxt: an XML parser context
5004: *
5005: * parse the declaration for a Mixed Element content
5006: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5007: *
5008: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5009: * '(' S? '#PCDATA' S? ')'
5010: *
1.99 daniel 5011: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5012: *
5013: * [ VC: No Duplicate Types ]
1.117 daniel 5014: * The same name must not appear more than once in a single
5015: * mixed-content declaration.
1.99 daniel 5016: *
1.61 daniel 5017: * returns: the list of the xmlElementContentPtr describing the element choices
5018: */
5019: xmlElementContentPtr
1.62 daniel 5020: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5021: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5022: xmlChar *elem = NULL;
1.61 daniel 5023:
1.97 daniel 5024: GROW;
1.152 daniel 5025: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5026: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5027: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5028: (NXT(6) == 'A')) {
5029: SKIP(7);
5030: SKIP_BLANKS;
1.91 daniel 5031: SHRINK;
1.152 daniel 5032: if (RAW == ')') {
1.63 daniel 5033: NEXT;
5034: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5035: if (RAW == '*') {
1.136 daniel 5036: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5037: NEXT;
5038: }
1.63 daniel 5039: return(ret);
5040: }
1.152 daniel 5041: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5042: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5043: if (ret == NULL) return(NULL);
1.99 daniel 5044: }
1.152 daniel 5045: while (RAW == '|') {
1.64 daniel 5046: NEXT;
1.61 daniel 5047: if (elem == NULL) {
5048: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5049: if (ret == NULL) return(NULL);
5050: ret->c1 = cur;
1.64 daniel 5051: cur = ret;
1.61 daniel 5052: } else {
1.64 daniel 5053: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5054: if (n == NULL) return(NULL);
5055: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5056: cur->c2 = n;
5057: cur = n;
1.119 daniel 5058: xmlFree(elem);
1.61 daniel 5059: }
5060: SKIP_BLANKS;
5061: elem = xmlParseName(ctxt);
5062: if (elem == NULL) {
5063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5064: ctxt->sax->error(ctxt->userData,
1.61 daniel 5065: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5066: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5067: ctxt->wellFormed = 0;
5068: xmlFreeElementContent(cur);
5069: return(NULL);
5070: }
5071: SKIP_BLANKS;
1.97 daniel 5072: GROW;
1.61 daniel 5073: }
1.152 daniel 5074: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5075: if (elem != NULL) {
1.61 daniel 5076: cur->c2 = xmlNewElementContent(elem,
5077: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5078: xmlFree(elem);
1.66 daniel 5079: }
1.65 daniel 5080: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 5081: SKIP(2);
1.61 daniel 5082: } else {
1.119 daniel 5083: if (elem != NULL) xmlFree(elem);
1.61 daniel 5084: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5085: ctxt->sax->error(ctxt->userData,
1.63 daniel 5086: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5087: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5088: ctxt->wellFormed = 0;
5089: xmlFreeElementContent(ret);
5090: return(NULL);
5091: }
5092:
5093: } else {
5094: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5095: ctxt->sax->error(ctxt->userData,
1.61 daniel 5096: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5097: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5098: ctxt->wellFormed = 0;
5099: }
5100: return(ret);
5101: }
5102:
5103: /**
5104: * xmlParseElementChildrenContentDecl:
1.50 daniel 5105: * @ctxt: an XML parser context
5106: *
1.61 daniel 5107: * parse the declaration for a Mixed Element content
5108: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5109: *
1.61 daniel 5110: *
1.22 daniel 5111: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5112: *
5113: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5114: *
5115: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5116: *
5117: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5118: *
1.99 daniel 5119: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5120: * TODO Parameter-entity replacement text must be properly nested
5121: * with parenthetized groups. That is to say, if either of the
5122: * opening or closing parentheses in a choice, seq, or Mixed
5123: * construct is contained in the replacement text for a parameter
5124: * entity, both must be contained in the same replacement text. For
5125: * interoperability, if a parameter-entity reference appears in a
5126: * choice, seq, or Mixed construct, its replacement text should not
5127: * be empty, and neither the first nor last non-blank character of
5128: * the replacement text should be a connector (| or ,).
5129: *
1.62 daniel 5130: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5131: * hierarchy.
5132: */
5133: xmlElementContentPtr
1.62 daniel 5134: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5135: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5136: xmlChar *elem;
5137: xmlChar type = 0;
1.62 daniel 5138:
5139: SKIP_BLANKS;
1.94 daniel 5140: GROW;
1.152 daniel 5141: if (RAW == '(') {
1.63 daniel 5142: /* Recurse on first child */
1.62 daniel 5143: NEXT;
5144: SKIP_BLANKS;
5145: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5146: SKIP_BLANKS;
1.101 daniel 5147: GROW;
1.62 daniel 5148: } else {
5149: elem = xmlParseName(ctxt);
5150: if (elem == NULL) {
5151: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5152: ctxt->sax->error(ctxt->userData,
1.62 daniel 5153: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5154: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5155: ctxt->wellFormed = 0;
5156: return(NULL);
5157: }
5158: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5159: GROW;
1.152 daniel 5160: if (RAW == '?') {
1.104 daniel 5161: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5162: NEXT;
1.152 daniel 5163: } else if (RAW == '*') {
1.104 daniel 5164: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5165: NEXT;
1.152 daniel 5166: } else if (RAW == '+') {
1.104 daniel 5167: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5168: NEXT;
5169: } else {
1.104 daniel 5170: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5171: }
1.119 daniel 5172: xmlFree(elem);
1.101 daniel 5173: GROW;
1.62 daniel 5174: }
5175: SKIP_BLANKS;
1.91 daniel 5176: SHRINK;
1.152 daniel 5177: while (RAW != ')') {
1.63 daniel 5178: /*
5179: * Each loop we parse one separator and one element.
5180: */
1.152 daniel 5181: if (RAW == ',') {
1.62 daniel 5182: if (type == 0) type = CUR;
5183:
5184: /*
5185: * Detect "Name | Name , Name" error
5186: */
5187: else if (type != CUR) {
5188: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5189: ctxt->sax->error(ctxt->userData,
1.62 daniel 5190: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5191: type);
1.123 daniel 5192: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5193: ctxt->wellFormed = 0;
1.170 daniel 5194: if ((op != NULL) && (op != ret))
5195: xmlFreeElementContent(op);
5196: if ((last != NULL) && (last != ret))
5197: xmlFreeElementContent(last);
5198: if (ret != NULL)
5199: xmlFreeElementContent(ret);
1.62 daniel 5200: return(NULL);
5201: }
1.64 daniel 5202: NEXT;
1.62 daniel 5203:
1.63 daniel 5204: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5205: if (op == NULL) {
5206: xmlFreeElementContent(ret);
5207: return(NULL);
5208: }
5209: if (last == NULL) {
5210: op->c1 = ret;
1.65 daniel 5211: ret = cur = op;
1.63 daniel 5212: } else {
5213: cur->c2 = op;
5214: op->c1 = last;
5215: cur =op;
1.65 daniel 5216: last = NULL;
1.63 daniel 5217: }
1.152 daniel 5218: } else if (RAW == '|') {
1.62 daniel 5219: if (type == 0) type = CUR;
5220:
5221: /*
1.63 daniel 5222: * Detect "Name , Name | Name" error
1.62 daniel 5223: */
5224: else if (type != CUR) {
5225: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5226: ctxt->sax->error(ctxt->userData,
1.62 daniel 5227: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5228: type);
1.123 daniel 5229: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5230: ctxt->wellFormed = 0;
1.170 daniel 5231: if ((op != NULL) && (op != ret))
5232: xmlFreeElementContent(op);
5233: if ((last != NULL) && (last != ret))
5234: xmlFreeElementContent(last);
5235: if (ret != NULL)
5236: xmlFreeElementContent(ret);
1.62 daniel 5237: return(NULL);
5238: }
1.64 daniel 5239: NEXT;
1.62 daniel 5240:
1.63 daniel 5241: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5242: if (op == NULL) {
1.170 daniel 5243: if ((op != NULL) && (op != ret))
5244: xmlFreeElementContent(op);
5245: if ((last != NULL) && (last != ret))
5246: xmlFreeElementContent(last);
5247: if (ret != NULL)
5248: xmlFreeElementContent(ret);
1.63 daniel 5249: return(NULL);
5250: }
5251: if (last == NULL) {
5252: op->c1 = ret;
1.65 daniel 5253: ret = cur = op;
1.63 daniel 5254: } else {
5255: cur->c2 = op;
5256: op->c1 = last;
5257: cur =op;
1.65 daniel 5258: last = NULL;
1.63 daniel 5259: }
1.62 daniel 5260: } else {
5261: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5262: ctxt->sax->error(ctxt->userData,
1.62 daniel 5263: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5264: ctxt->wellFormed = 0;
1.123 daniel 5265: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5266: if ((op != NULL) && (op != ret))
5267: xmlFreeElementContent(op);
5268: if ((last != NULL) && (last != ret))
5269: xmlFreeElementContent(last);
5270: if (ret != NULL)
5271: xmlFreeElementContent(ret);
1.62 daniel 5272: return(NULL);
5273: }
1.101 daniel 5274: GROW;
1.62 daniel 5275: SKIP_BLANKS;
1.101 daniel 5276: GROW;
1.152 daniel 5277: if (RAW == '(') {
1.63 daniel 5278: /* Recurse on second child */
1.62 daniel 5279: NEXT;
5280: SKIP_BLANKS;
1.65 daniel 5281: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5282: SKIP_BLANKS;
5283: } else {
5284: elem = xmlParseName(ctxt);
5285: if (elem == NULL) {
5286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5287: ctxt->sax->error(ctxt->userData,
1.122 daniel 5288: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5289: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5290: ctxt->wellFormed = 0;
1.170 daniel 5291: if ((op != NULL) && (op != ret))
5292: xmlFreeElementContent(op);
5293: if ((last != NULL) && (last != ret))
5294: xmlFreeElementContent(last);
5295: if (ret != NULL)
5296: xmlFreeElementContent(ret);
1.62 daniel 5297: return(NULL);
5298: }
1.65 daniel 5299: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5300: xmlFree(elem);
1.152 daniel 5301: if (RAW == '?') {
1.105 daniel 5302: last->ocur = XML_ELEMENT_CONTENT_OPT;
5303: NEXT;
1.152 daniel 5304: } else if (RAW == '*') {
1.105 daniel 5305: last->ocur = XML_ELEMENT_CONTENT_MULT;
5306: NEXT;
1.152 daniel 5307: } else if (RAW == '+') {
1.105 daniel 5308: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5309: NEXT;
5310: } else {
5311: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5312: }
1.63 daniel 5313: }
5314: SKIP_BLANKS;
1.97 daniel 5315: GROW;
1.64 daniel 5316: }
1.65 daniel 5317: if ((cur != NULL) && (last != NULL)) {
5318: cur->c2 = last;
1.62 daniel 5319: }
5320: NEXT;
1.152 daniel 5321: if (RAW == '?') {
1.62 daniel 5322: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5323: NEXT;
1.152 daniel 5324: } else if (RAW == '*') {
1.62 daniel 5325: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5326: NEXT;
1.152 daniel 5327: } else if (RAW == '+') {
1.62 daniel 5328: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5329: NEXT;
5330: }
5331: return(ret);
1.61 daniel 5332: }
5333:
5334: /**
5335: * xmlParseElementContentDecl:
5336: * @ctxt: an XML parser context
5337: * @name: the name of the element being defined.
5338: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5339: *
1.61 daniel 5340: * parse the declaration for an Element content either Mixed or Children,
5341: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5342: *
5343: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5344: *
1.61 daniel 5345: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5346: */
5347:
1.61 daniel 5348: int
1.123 daniel 5349: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5350: xmlElementContentPtr *result) {
5351:
5352: xmlElementContentPtr tree = NULL;
5353: int res;
5354:
5355: *result = NULL;
5356:
1.152 daniel 5357: if (RAW != '(') {
1.61 daniel 5358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5359: ctxt->sax->error(ctxt->userData,
1.61 daniel 5360: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5361: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5362: ctxt->wellFormed = 0;
5363: return(-1);
5364: }
5365: NEXT;
1.97 daniel 5366: GROW;
1.61 daniel 5367: SKIP_BLANKS;
1.152 daniel 5368: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5369: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5370: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5371: (NXT(6) == 'A')) {
1.62 daniel 5372: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5373: res = XML_ELEMENT_TYPE_MIXED;
5374: } else {
1.62 daniel 5375: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5376: res = XML_ELEMENT_TYPE_ELEMENT;
5377: }
5378: SKIP_BLANKS;
1.63 daniel 5379: /****************************
1.152 daniel 5380: if (RAW != ')') {
1.61 daniel 5381: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5382: ctxt->sax->error(ctxt->userData,
1.61 daniel 5383: "xmlParseElementContentDecl : ')' expected\n");
5384: ctxt->wellFormed = 0;
5385: return(-1);
5386: }
1.63 daniel 5387: ****************************/
5388: *result = tree;
1.61 daniel 5389: return(res);
1.22 daniel 5390: }
5391:
1.50 daniel 5392: /**
5393: * xmlParseElementDecl:
5394: * @ctxt: an XML parser context
5395: *
5396: * parse an Element declaration.
1.22 daniel 5397: *
5398: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5399: *
1.99 daniel 5400: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5401: * No element type may be declared more than once
1.69 daniel 5402: *
5403: * Returns the type of the element, or -1 in case of error
1.22 daniel 5404: */
1.59 daniel 5405: int
1.55 daniel 5406: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5407: xmlChar *name;
1.59 daniel 5408: int ret = -1;
1.61 daniel 5409: xmlElementContentPtr content = NULL;
1.22 daniel 5410:
1.97 daniel 5411: GROW;
1.152 daniel 5412: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5413: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5414: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5415: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5416: (NXT(8) == 'T')) {
1.40 daniel 5417: SKIP(9);
1.59 daniel 5418: if (!IS_BLANK(CUR)) {
5419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5420: ctxt->sax->error(ctxt->userData,
1.59 daniel 5421: "Space required after 'ELEMENT'\n");
1.123 daniel 5422: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5423: ctxt->wellFormed = 0;
5424: }
1.42 daniel 5425: SKIP_BLANKS;
1.22 daniel 5426: name = xmlParseName(ctxt);
5427: if (name == NULL) {
1.55 daniel 5428: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5429: ctxt->sax->error(ctxt->userData,
1.59 daniel 5430: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5431: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5432: ctxt->wellFormed = 0;
5433: return(-1);
5434: }
5435: if (!IS_BLANK(CUR)) {
5436: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5437: ctxt->sax->error(ctxt->userData,
1.59 daniel 5438: "Space required after the element name\n");
1.123 daniel 5439: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5440: ctxt->wellFormed = 0;
1.22 daniel 5441: }
1.42 daniel 5442: SKIP_BLANKS;
1.152 daniel 5443: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5444: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5445: (NXT(4) == 'Y')) {
5446: SKIP(5);
1.22 daniel 5447: /*
5448: * Element must always be empty.
5449: */
1.59 daniel 5450: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5451: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5452: (NXT(2) == 'Y')) {
5453: SKIP(3);
1.22 daniel 5454: /*
5455: * Element is a generic container.
5456: */
1.59 daniel 5457: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5458: } else if (RAW == '(') {
1.61 daniel 5459: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5460: } else {
1.98 daniel 5461: /*
5462: * [ WFC: PEs in Internal Subset ] error handling.
5463: */
1.152 daniel 5464: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5465: (ctxt->inputNr == 1)) {
5466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5467: ctxt->sax->error(ctxt->userData,
5468: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5469: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5470: } else {
5471: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5472: ctxt->sax->error(ctxt->userData,
5473: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5474: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5475: }
1.61 daniel 5476: ctxt->wellFormed = 0;
1.119 daniel 5477: if (name != NULL) xmlFree(name);
1.61 daniel 5478: return(-1);
1.22 daniel 5479: }
1.142 daniel 5480:
5481: SKIP_BLANKS;
5482: /*
5483: * Pop-up of finished entities.
5484: */
1.152 daniel 5485: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5486: xmlPopInput(ctxt);
1.42 daniel 5487: SKIP_BLANKS;
1.142 daniel 5488:
1.152 daniel 5489: if (RAW != '>') {
1.55 daniel 5490: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5491: ctxt->sax->error(ctxt->userData,
1.31 daniel 5492: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5493: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5494: ctxt->wellFormed = 0;
1.61 daniel 5495: } else {
1.40 daniel 5496: NEXT;
1.171 daniel 5497: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5498: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5499: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5500: content);
1.61 daniel 5501: }
1.84 daniel 5502: if (content != NULL) {
5503: xmlFreeElementContent(content);
5504: }
1.61 daniel 5505: if (name != NULL) {
1.119 daniel 5506: xmlFree(name);
1.61 daniel 5507: }
1.22 daniel 5508: }
1.59 daniel 5509: return(ret);
1.22 daniel 5510: }
5511:
1.50 daniel 5512: /**
5513: * xmlParseMarkupDecl:
5514: * @ctxt: an XML parser context
5515: *
5516: * parse Markup declarations
1.22 daniel 5517: *
5518: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5519: * NotationDecl | PI | Comment
5520: *
1.98 daniel 5521: * [ VC: Proper Declaration/PE Nesting ]
5522: * TODO Parameter-entity replacement text must be properly nested with
5523: * markup declarations. That is to say, if either the first character
5524: * or the last character of a markup declaration (markupdecl above) is
5525: * contained in the replacement text for a parameter-entity reference,
5526: * both must be contained in the same replacement text.
5527: *
5528: * [ WFC: PEs in Internal Subset ]
5529: * In the internal DTD subset, parameter-entity references can occur
5530: * only where markup declarations can occur, not within markup declarations.
5531: * (This does not apply to references that occur in external parameter
5532: * entities or to the external subset.)
1.22 daniel 5533: */
1.55 daniel 5534: void
5535: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5536: GROW;
1.22 daniel 5537: xmlParseElementDecl(ctxt);
5538: xmlParseAttributeListDecl(ctxt);
5539: xmlParseEntityDecl(ctxt);
5540: xmlParseNotationDecl(ctxt);
5541: xmlParsePI(ctxt);
1.114 daniel 5542: xmlParseComment(ctxt);
1.98 daniel 5543: /*
5544: * This is only for internal subset. On external entities,
5545: * the replacement is done before parsing stage
5546: */
5547: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5548: xmlParsePEReference(ctxt);
1.97 daniel 5549: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5550: }
5551:
1.50 daniel 5552: /**
1.76 daniel 5553: * xmlParseTextDecl:
5554: * @ctxt: an XML parser context
5555: *
5556: * parse an XML declaration header for external entities
5557: *
5558: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5559: */
5560:
1.172 daniel 5561: void
1.76 daniel 5562: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5563: xmlChar *version;
1.76 daniel 5564:
5565: /*
5566: * We know that '<?xml' is here.
5567: */
5568: SKIP(5);
5569:
5570: if (!IS_BLANK(CUR)) {
5571: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5572: ctxt->sax->error(ctxt->userData,
5573: "Space needed after '<?xml'\n");
1.123 daniel 5574: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5575: ctxt->wellFormed = 0;
5576: }
5577: SKIP_BLANKS;
5578:
5579: /*
5580: * We may have the VersionInfo here.
5581: */
5582: version = xmlParseVersionInfo(ctxt);
5583: if (version == NULL)
5584: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 5585: ctxt->input->version = version;
1.76 daniel 5586:
5587: /*
5588: * We must have the encoding declaration
5589: */
5590: if (!IS_BLANK(CUR)) {
5591: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5592: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 5593: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5594: ctxt->wellFormed = 0;
5595: }
1.172 daniel 5596: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.76 daniel 5597:
5598: SKIP_BLANKS;
1.152 daniel 5599: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 5600: SKIP(2);
1.152 daniel 5601: } else if (RAW == '>') {
1.76 daniel 5602: /* Deprecated old WD ... */
5603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5604: ctxt->sax->error(ctxt->userData,
5605: "XML declaration must end-up with '?>'\n");
1.123 daniel 5606: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5607: ctxt->wellFormed = 0;
5608: NEXT;
5609: } else {
5610: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5611: ctxt->sax->error(ctxt->userData,
5612: "parsing XML declaration: '?>' expected\n");
1.123 daniel 5613: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5614: ctxt->wellFormed = 0;
5615: MOVETO_ENDTAG(CUR_PTR);
5616: NEXT;
5617: }
5618: }
5619:
5620: /*
5621: * xmlParseConditionalSections
5622: * @ctxt: an XML parser context
5623: *
5624: * TODO : Conditionnal section are not yet supported !
5625: *
5626: * [61] conditionalSect ::= includeSect | ignoreSect
5627: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5628: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5629: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5630: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5631: */
5632:
5633: void
5634: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 5635: SKIP(3);
5636: SKIP_BLANKS;
1.168 daniel 5637: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5638: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5639: (NXT(6) == 'E')) {
1.165 daniel 5640: SKIP(7);
1.168 daniel 5641: SKIP_BLANKS;
5642: if (RAW != '[') {
5643: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5644: ctxt->sax->error(ctxt->userData,
5645: "XML conditional section '[' expected\n");
5646: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5647: ctxt->wellFormed = 0;
5648: } else {
5649: NEXT;
5650: }
1.165 daniel 5651: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5652: (NXT(2) != '>'))) {
5653: const xmlChar *check = CUR_PTR;
5654: int cons = ctxt->input->consumed;
5655: int tok = ctxt->token;
5656:
5657: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5658: xmlParseConditionalSections(ctxt);
5659: } else if (IS_BLANK(CUR)) {
5660: NEXT;
5661: } else if (RAW == '%') {
5662: xmlParsePEReference(ctxt);
5663: } else
5664: xmlParseMarkupDecl(ctxt);
5665:
5666: /*
5667: * Pop-up of finished entities.
5668: */
5669: while ((RAW == 0) && (ctxt->inputNr > 1))
5670: xmlPopInput(ctxt);
5671:
5672: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5673: (tok == ctxt->token)) {
5674: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5675: ctxt->sax->error(ctxt->userData,
5676: "Content error in the external subset\n");
5677: ctxt->wellFormed = 0;
5678: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5679: break;
5680: }
5681: }
1.168 daniel 5682: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5683: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 5684: int state;
5685:
1.168 daniel 5686: SKIP(6);
5687: SKIP_BLANKS;
5688: if (RAW != '[') {
5689: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5690: ctxt->sax->error(ctxt->userData,
5691: "XML conditional section '[' expected\n");
5692: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5693: ctxt->wellFormed = 0;
5694: } else {
5695: NEXT;
5696: }
1.171 daniel 5697:
1.143 daniel 5698: /*
1.171 daniel 5699: * Parse up to the end of the conditionnal section
5700: * But disable SAX event generating DTD building in the meantime
1.143 daniel 5701: */
1.171 daniel 5702: state = ctxt->disableSAX;
1.165 daniel 5703: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5704: (NXT(2) != '>'))) {
1.171 daniel 5705: const xmlChar *check = CUR_PTR;
5706: int cons = ctxt->input->consumed;
5707: int tok = ctxt->token;
5708:
5709: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5710: xmlParseConditionalSections(ctxt);
5711: } else if (IS_BLANK(CUR)) {
5712: NEXT;
5713: } else if (RAW == '%') {
5714: xmlParsePEReference(ctxt);
5715: } else
5716: xmlParseMarkupDecl(ctxt);
5717:
1.165 daniel 5718: /*
5719: * Pop-up of finished entities.
5720: */
5721: while ((RAW == 0) && (ctxt->inputNr > 1))
5722: xmlPopInput(ctxt);
1.143 daniel 5723:
1.171 daniel 5724: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5725: (tok == ctxt->token)) {
5726: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5727: ctxt->sax->error(ctxt->userData,
5728: "Content error in the external subset\n");
5729: ctxt->wellFormed = 0;
5730: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5731: break;
5732: }
1.165 daniel 5733: }
1.171 daniel 5734: ctxt->disableSAX = state;
1.168 daniel 5735: } else {
5736: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5737: ctxt->sax->error(ctxt->userData,
5738: "XML conditional section INCLUDE or IGNORE keyword expected\n");
5739: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5740: ctxt->wellFormed = 0;
1.143 daniel 5741: }
5742:
1.152 daniel 5743: if (RAW == 0)
1.143 daniel 5744: SHRINK;
5745:
1.152 daniel 5746: if (RAW == 0) {
1.76 daniel 5747: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5748: ctxt->sax->error(ctxt->userData,
5749: "XML conditional section not closed\n");
1.123 daniel 5750: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 5751: ctxt->wellFormed = 0;
1.143 daniel 5752: } else {
5753: SKIP(3);
1.76 daniel 5754: }
5755: }
5756:
5757: /**
1.124 daniel 5758: * xmlParseExternalSubset:
1.76 daniel 5759: * @ctxt: an XML parser context
1.124 daniel 5760: * @ExternalID: the external identifier
5761: * @SystemID: the system identifier (or URL)
1.76 daniel 5762: *
5763: * parse Markup declarations from an external subset
5764: *
5765: * [30] extSubset ::= textDecl? extSubsetDecl
5766: *
5767: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5768: */
5769: void
1.123 daniel 5770: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5771: const xmlChar *SystemID) {
1.132 daniel 5772: GROW;
1.152 daniel 5773: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 5774: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5775: (NXT(4) == 'l')) {
1.172 daniel 5776: xmlParseTextDecl(ctxt);
1.76 daniel 5777: }
1.79 daniel 5778: if (ctxt->myDoc == NULL) {
1.116 daniel 5779: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 5780: }
5781: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5782: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5783:
1.96 daniel 5784: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 5785: ctxt->external = 1;
1.152 daniel 5786: while (((RAW == '<') && (NXT(1) == '?')) ||
5787: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 5788: IS_BLANK(CUR)) {
1.123 daniel 5789: const xmlChar *check = CUR_PTR;
1.115 daniel 5790: int cons = ctxt->input->consumed;
1.164 daniel 5791: int tok = ctxt->token;
1.115 daniel 5792:
1.152 daniel 5793: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 5794: xmlParseConditionalSections(ctxt);
5795: } else if (IS_BLANK(CUR)) {
5796: NEXT;
1.152 daniel 5797: } else if (RAW == '%') {
1.76 daniel 5798: xmlParsePEReference(ctxt);
5799: } else
5800: xmlParseMarkupDecl(ctxt);
1.77 daniel 5801:
5802: /*
5803: * Pop-up of finished entities.
5804: */
1.166 daniel 5805: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 5806: xmlPopInput(ctxt);
5807:
1.164 daniel 5808: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5809: (tok == ctxt->token)) {
1.115 daniel 5810: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5811: ctxt->sax->error(ctxt->userData,
5812: "Content error in the external subset\n");
5813: ctxt->wellFormed = 0;
1.123 daniel 5814: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 5815: break;
5816: }
1.76 daniel 5817: }
5818:
1.152 daniel 5819: if (RAW != 0) {
1.76 daniel 5820: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5821: ctxt->sax->error(ctxt->userData,
5822: "Extra content at the end of the document\n");
1.123 daniel 5823: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 5824: ctxt->wellFormed = 0;
5825: }
5826:
5827: }
5828:
5829: /**
1.77 daniel 5830: * xmlParseReference:
5831: * @ctxt: an XML parser context
5832: *
5833: * parse and handle entity references in content, depending on the SAX
5834: * interface, this may end-up in a call to character() if this is a
1.79 daniel 5835: * CharRef, a predefined entity, if there is no reference() callback.
5836: * or if the parser was asked to switch to that mode.
1.77 daniel 5837: *
5838: * [67] Reference ::= EntityRef | CharRef
5839: */
5840: void
5841: xmlParseReference(xmlParserCtxtPtr ctxt) {
5842: xmlEntityPtr ent;
1.123 daniel 5843: xmlChar *val;
1.152 daniel 5844: if (RAW != '&') return;
1.77 daniel 5845:
1.113 daniel 5846: if (ctxt->inputNr > 1) {
1.123 daniel 5847: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 5848:
1.171 daniel 5849: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5850: (!ctxt->disableSAX))
1.113 daniel 5851: ctxt->sax->characters(ctxt->userData, cur, 1);
5852: if (ctxt->token == '&')
5853: ctxt->token = 0;
5854: else {
5855: SKIP(1);
5856: }
5857: return;
5858: }
1.77 daniel 5859: if (NXT(1) == '#') {
1.152 daniel 5860: int i = 0;
1.153 daniel 5861: xmlChar out[10];
5862: int hex = NXT(2);
1.77 daniel 5863: int val = xmlParseCharRef(ctxt);
1.152 daniel 5864:
1.153 daniel 5865: if (ctxt->encoding != NULL) {
5866: /*
5867: * So we are using non-UTF-8 buffers
5868: * Check that the char fit on 8bits, if not
5869: * generate a CharRef.
5870: */
5871: if (val <= 0xFF) {
5872: out[0] = val;
5873: out[1] = 0;
1.171 daniel 5874: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5875: (!ctxt->disableSAX))
1.153 daniel 5876: ctxt->sax->characters(ctxt->userData, out, 1);
5877: } else {
5878: if ((hex == 'x') || (hex == 'X'))
5879: sprintf((char *)out, "#x%X", val);
5880: else
5881: sprintf((char *)out, "#%d", val);
1.171 daniel 5882: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5883: (!ctxt->disableSAX))
1.153 daniel 5884: ctxt->sax->reference(ctxt->userData, out);
5885: }
5886: } else {
5887: /*
5888: * Just encode the value in UTF-8
5889: */
5890: COPY_BUF(0 ,out, i, val);
5891: out[i] = 0;
1.171 daniel 5892: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5893: (!ctxt->disableSAX))
1.153 daniel 5894: ctxt->sax->characters(ctxt->userData, out, i);
5895: }
1.77 daniel 5896: } else {
5897: ent = xmlParseEntityRef(ctxt);
5898: if (ent == NULL) return;
5899: if ((ent->name != NULL) &&
1.159 daniel 5900: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.113 daniel 5901: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 5902: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 5903: /*
5904: * Create a node.
5905: */
5906: ctxt->sax->reference(ctxt->userData, ent->name);
5907: return;
5908: } else if (ctxt->replaceEntities) {
5909: xmlParserInputPtr input;
1.79 daniel 5910:
1.113 daniel 5911: input = xmlNewEntityInputStream(ctxt, ent);
5912: xmlPushInput(ctxt, input);
1.167 daniel 5913: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5914: (RAW == '<') && (NXT(1) == '?') &&
5915: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5916: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 5917: xmlParseTextDecl(ctxt);
1.167 daniel 5918: if (input->standalone) {
5919: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5920: ctxt->sax->error(ctxt->userData,
5921: "external parsed entities cannot be standalone\n");
5922: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5923: ctxt->wellFormed = 0;
5924: }
5925: }
1.113 daniel 5926: return;
5927: }
1.77 daniel 5928: }
5929: val = ent->content;
5930: if (val == NULL) return;
5931: /*
5932: * inline the entity.
5933: */
1.171 daniel 5934: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5935: (!ctxt->disableSAX))
1.77 daniel 5936: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5937: }
1.24 daniel 5938: }
5939:
1.50 daniel 5940: /**
5941: * xmlParseEntityRef:
5942: * @ctxt: an XML parser context
5943: *
5944: * parse ENTITY references declarations
1.24 daniel 5945: *
5946: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 5947: *
1.98 daniel 5948: * [ WFC: Entity Declared ]
5949: * In a document without any DTD, a document with only an internal DTD
5950: * subset which contains no parameter entity references, or a document
5951: * with "standalone='yes'", the Name given in the entity reference
5952: * must match that in an entity declaration, except that well-formed
5953: * documents need not declare any of the following entities: amp, lt,
5954: * gt, apos, quot. The declaration of a parameter entity must precede
5955: * any reference to it. Similarly, the declaration of a general entity
5956: * must precede any reference to it which appears in a default value in an
5957: * attribute-list declaration. Note that if entities are declared in the
5958: * external subset or in external parameter entities, a non-validating
5959: * processor is not obligated to read and process their declarations;
5960: * for such documents, the rule that an entity must be declared is a
5961: * well-formedness constraint only if standalone='yes'.
5962: *
5963: * [ WFC: Parsed Entity ]
5964: * An entity reference must not contain the name of an unparsed entity
5965: *
1.77 daniel 5966: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 5967: */
1.77 daniel 5968: xmlEntityPtr
1.55 daniel 5969: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 5970: xmlChar *name;
1.72 daniel 5971: xmlEntityPtr ent = NULL;
1.24 daniel 5972:
1.91 daniel 5973: GROW;
1.111 daniel 5974:
1.152 daniel 5975: if (RAW == '&') {
1.40 daniel 5976: NEXT;
1.24 daniel 5977: name = xmlParseName(ctxt);
5978: if (name == NULL) {
1.55 daniel 5979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5980: ctxt->sax->error(ctxt->userData,
5981: "xmlParseEntityRef: no name\n");
1.123 daniel 5982: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5983: ctxt->wellFormed = 0;
1.24 daniel 5984: } else {
1.152 daniel 5985: if (RAW == ';') {
1.40 daniel 5986: NEXT;
1.24 daniel 5987: /*
1.77 daniel 5988: * Ask first SAX for entity resolution, otherwise try the
5989: * predefined set.
5990: */
5991: if (ctxt->sax != NULL) {
5992: if (ctxt->sax->getEntity != NULL)
5993: ent = ctxt->sax->getEntity(ctxt->userData, name);
5994: if (ent == NULL)
5995: ent = xmlGetPredefinedEntity(name);
5996: }
5997: /*
1.98 daniel 5998: * [ WFC: Entity Declared ]
5999: * In a document without any DTD, a document with only an
6000: * internal DTD subset which contains no parameter entity
6001: * references, or a document with "standalone='yes'", the
6002: * Name given in the entity reference must match that in an
6003: * entity declaration, except that well-formed documents
6004: * need not declare any of the following entities: amp, lt,
6005: * gt, apos, quot.
6006: * The declaration of a parameter entity must precede any
6007: * reference to it.
6008: * Similarly, the declaration of a general entity must
6009: * precede any reference to it which appears in a default
6010: * value in an attribute-list declaration. Note that if
6011: * entities are declared in the external subset or in
6012: * external parameter entities, a non-validating processor
6013: * is not obligated to read and process their declarations;
6014: * for such documents, the rule that an entity must be
6015: * declared is a well-formedness constraint only if
6016: * standalone='yes'.
1.59 daniel 6017: */
1.77 daniel 6018: if (ent == NULL) {
1.98 daniel 6019: if ((ctxt->standalone == 1) ||
6020: ((ctxt->hasExternalSubset == 0) &&
6021: (ctxt->hasPErefs == 0))) {
6022: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6023: ctxt->sax->error(ctxt->userData,
6024: "Entity '%s' not defined\n", name);
1.123 daniel 6025: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6026: ctxt->wellFormed = 0;
6027: } else {
1.98 daniel 6028: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6029: ctxt->sax->warning(ctxt->userData,
6030: "Entity '%s' not defined\n", name);
1.123 daniel 6031: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6032: }
1.77 daniel 6033: }
1.59 daniel 6034:
6035: /*
1.98 daniel 6036: * [ WFC: Parsed Entity ]
6037: * An entity reference must not contain the name of an
6038: * unparsed entity
6039: */
1.159 daniel 6040: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6041: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6042: ctxt->sax->error(ctxt->userData,
6043: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6044: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6045: ctxt->wellFormed = 0;
6046: }
6047:
6048: /*
6049: * [ WFC: No External Entity References ]
6050: * Attribute values cannot contain direct or indirect
6051: * entity references to external entities.
6052: */
6053: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6054: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6055: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6056: ctxt->sax->error(ctxt->userData,
6057: "Attribute references external entity '%s'\n", name);
1.123 daniel 6058: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6059: ctxt->wellFormed = 0;
6060: }
6061: /*
6062: * [ WFC: No < in Attribute Values ]
6063: * The replacement text of any entity referred to directly or
6064: * indirectly in an attribute value (other than "<") must
6065: * not contain a <.
1.59 daniel 6066: */
1.98 daniel 6067: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6068: (ent != NULL) &&
6069: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6070: (ent->content != NULL) &&
6071: (xmlStrchr(ent->content, '<'))) {
6072: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6073: ctxt->sax->error(ctxt->userData,
6074: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6075: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6076: ctxt->wellFormed = 0;
6077: }
6078:
6079: /*
6080: * Internal check, no parameter entities here ...
6081: */
6082: else {
1.159 daniel 6083: switch (ent->etype) {
1.59 daniel 6084: case XML_INTERNAL_PARAMETER_ENTITY:
6085: case XML_EXTERNAL_PARAMETER_ENTITY:
6086: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6087: ctxt->sax->error(ctxt->userData,
1.59 daniel 6088: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6089: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6090: ctxt->wellFormed = 0;
6091: break;
6092: }
6093: }
6094:
6095: /*
1.98 daniel 6096: * [ WFC: No Recursion ]
1.117 daniel 6097: * TODO A parsed entity must not contain a recursive reference
6098: * to itself, either directly or indirectly.
1.59 daniel 6099: */
1.77 daniel 6100:
1.24 daniel 6101: } else {
1.55 daniel 6102: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6103: ctxt->sax->error(ctxt->userData,
1.59 daniel 6104: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6105: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6106: ctxt->wellFormed = 0;
1.24 daniel 6107: }
1.119 daniel 6108: xmlFree(name);
1.24 daniel 6109: }
6110: }
1.77 daniel 6111: return(ent);
1.24 daniel 6112: }
1.135 daniel 6113: /**
6114: * xmlParseStringEntityRef:
6115: * @ctxt: an XML parser context
6116: * @str: a pointer to an index in the string
6117: *
6118: * parse ENTITY references declarations, but this version parses it from
6119: * a string value.
6120: *
6121: * [68] EntityRef ::= '&' Name ';'
6122: *
6123: * [ WFC: Entity Declared ]
6124: * In a document without any DTD, a document with only an internal DTD
6125: * subset which contains no parameter entity references, or a document
6126: * with "standalone='yes'", the Name given in the entity reference
6127: * must match that in an entity declaration, except that well-formed
6128: * documents need not declare any of the following entities: amp, lt,
6129: * gt, apos, quot. The declaration of a parameter entity must precede
6130: * any reference to it. Similarly, the declaration of a general entity
6131: * must precede any reference to it which appears in a default value in an
6132: * attribute-list declaration. Note that if entities are declared in the
6133: * external subset or in external parameter entities, a non-validating
6134: * processor is not obligated to read and process their declarations;
6135: * for such documents, the rule that an entity must be declared is a
6136: * well-formedness constraint only if standalone='yes'.
6137: *
6138: * [ WFC: Parsed Entity ]
6139: * An entity reference must not contain the name of an unparsed entity
6140: *
6141: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6142: * is updated to the current location in the string.
6143: */
6144: xmlEntityPtr
6145: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6146: xmlChar *name;
6147: const xmlChar *ptr;
6148: xmlChar cur;
6149: xmlEntityPtr ent = NULL;
6150:
6151: GROW;
6152:
1.156 daniel 6153: if ((str == NULL) || (*str == NULL))
6154: return(NULL);
1.135 daniel 6155: ptr = *str;
6156: cur = *ptr;
6157: if (cur == '&') {
6158: ptr++;
6159: cur = *ptr;
6160: name = xmlParseStringName(ctxt, &ptr);
6161: if (name == NULL) {
6162: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6163: ctxt->sax->error(ctxt->userData,
6164: "xmlParseEntityRef: no name\n");
6165: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6166: ctxt->wellFormed = 0;
6167: } else {
1.152 daniel 6168: if (RAW == ';') {
1.135 daniel 6169: NEXT;
6170: /*
6171: * Ask first SAX for entity resolution, otherwise try the
6172: * predefined set.
6173: */
6174: if (ctxt->sax != NULL) {
6175: if (ctxt->sax->getEntity != NULL)
6176: ent = ctxt->sax->getEntity(ctxt->userData, name);
6177: if (ent == NULL)
6178: ent = xmlGetPredefinedEntity(name);
6179: }
6180: /*
6181: * [ WFC: Entity Declared ]
6182: * In a document without any DTD, a document with only an
6183: * internal DTD subset which contains no parameter entity
6184: * references, or a document with "standalone='yes'", the
6185: * Name given in the entity reference must match that in an
6186: * entity declaration, except that well-formed documents
6187: * need not declare any of the following entities: amp, lt,
6188: * gt, apos, quot.
6189: * The declaration of a parameter entity must precede any
6190: * reference to it.
6191: * Similarly, the declaration of a general entity must
6192: * precede any reference to it which appears in a default
6193: * value in an attribute-list declaration. Note that if
6194: * entities are declared in the external subset or in
6195: * external parameter entities, a non-validating processor
6196: * is not obligated to read and process their declarations;
6197: * for such documents, the rule that an entity must be
6198: * declared is a well-formedness constraint only if
6199: * standalone='yes'.
6200: */
6201: if (ent == NULL) {
6202: if ((ctxt->standalone == 1) ||
6203: ((ctxt->hasExternalSubset == 0) &&
6204: (ctxt->hasPErefs == 0))) {
6205: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6206: ctxt->sax->error(ctxt->userData,
6207: "Entity '%s' not defined\n", name);
6208: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6209: ctxt->wellFormed = 0;
6210: } else {
6211: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6212: ctxt->sax->warning(ctxt->userData,
6213: "Entity '%s' not defined\n", name);
6214: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6215: }
6216: }
6217:
6218: /*
6219: * [ WFC: Parsed Entity ]
6220: * An entity reference must not contain the name of an
6221: * unparsed entity
6222: */
1.159 daniel 6223: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6225: ctxt->sax->error(ctxt->userData,
6226: "Entity reference to unparsed entity %s\n", name);
6227: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6228: ctxt->wellFormed = 0;
6229: }
6230:
6231: /*
6232: * [ WFC: No External Entity References ]
6233: * Attribute values cannot contain direct or indirect
6234: * entity references to external entities.
6235: */
6236: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6237: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6239: ctxt->sax->error(ctxt->userData,
6240: "Attribute references external entity '%s'\n", name);
6241: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6242: ctxt->wellFormed = 0;
6243: }
6244: /*
6245: * [ WFC: No < in Attribute Values ]
6246: * The replacement text of any entity referred to directly or
6247: * indirectly in an attribute value (other than "<") must
6248: * not contain a <.
6249: */
6250: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6251: (ent != NULL) &&
6252: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6253: (ent->content != NULL) &&
6254: (xmlStrchr(ent->content, '<'))) {
6255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6256: ctxt->sax->error(ctxt->userData,
6257: "'<' in entity '%s' is not allowed in attributes values\n", name);
6258: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6259: ctxt->wellFormed = 0;
6260: }
6261:
6262: /*
6263: * Internal check, no parameter entities here ...
6264: */
6265: else {
1.159 daniel 6266: switch (ent->etype) {
1.135 daniel 6267: case XML_INTERNAL_PARAMETER_ENTITY:
6268: case XML_EXTERNAL_PARAMETER_ENTITY:
6269: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6270: ctxt->sax->error(ctxt->userData,
6271: "Attempt to reference the parameter entity '%s'\n", name);
6272: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6273: ctxt->wellFormed = 0;
6274: break;
6275: }
6276: }
6277:
6278: /*
6279: * [ WFC: No Recursion ]
6280: * TODO A parsed entity must not contain a recursive reference
6281: * to itself, either directly or indirectly.
6282: */
6283:
6284: } else {
6285: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6286: ctxt->sax->error(ctxt->userData,
6287: "xmlParseEntityRef: expecting ';'\n");
6288: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6289: ctxt->wellFormed = 0;
6290: }
6291: xmlFree(name);
6292: }
6293: }
6294: return(ent);
6295: }
1.24 daniel 6296:
1.50 daniel 6297: /**
6298: * xmlParsePEReference:
6299: * @ctxt: an XML parser context
6300: *
6301: * parse PEReference declarations
1.77 daniel 6302: * The entity content is handled directly by pushing it's content as
6303: * a new input stream.
1.22 daniel 6304: *
6305: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6306: *
1.98 daniel 6307: * [ WFC: No Recursion ]
6308: * TODO A parsed entity must not contain a recursive
6309: * reference to itself, either directly or indirectly.
6310: *
6311: * [ WFC: Entity Declared ]
6312: * In a document without any DTD, a document with only an internal DTD
6313: * subset which contains no parameter entity references, or a document
6314: * with "standalone='yes'", ... ... The declaration of a parameter
6315: * entity must precede any reference to it...
6316: *
6317: * [ VC: Entity Declared ]
6318: * In a document with an external subset or external parameter entities
6319: * with "standalone='no'", ... ... The declaration of a parameter entity
6320: * must precede any reference to it...
6321: *
6322: * [ WFC: In DTD ]
6323: * Parameter-entity references may only appear in the DTD.
6324: * NOTE: misleading but this is handled.
1.22 daniel 6325: */
1.77 daniel 6326: void
1.55 daniel 6327: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6328: xmlChar *name;
1.72 daniel 6329: xmlEntityPtr entity = NULL;
1.50 daniel 6330: xmlParserInputPtr input;
1.22 daniel 6331:
1.152 daniel 6332: if (RAW == '%') {
1.40 daniel 6333: NEXT;
1.22 daniel 6334: name = xmlParseName(ctxt);
6335: if (name == NULL) {
1.55 daniel 6336: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6337: ctxt->sax->error(ctxt->userData,
6338: "xmlParsePEReference: no name\n");
1.123 daniel 6339: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6340: ctxt->wellFormed = 0;
1.22 daniel 6341: } else {
1.152 daniel 6342: if (RAW == ';') {
1.40 daniel 6343: NEXT;
1.98 daniel 6344: if ((ctxt->sax != NULL) &&
6345: (ctxt->sax->getParameterEntity != NULL))
6346: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6347: name);
1.45 daniel 6348: if (entity == NULL) {
1.98 daniel 6349: /*
6350: * [ WFC: Entity Declared ]
6351: * In a document without any DTD, a document with only an
6352: * internal DTD subset which contains no parameter entity
6353: * references, or a document with "standalone='yes'", ...
6354: * ... The declaration of a parameter entity must precede
6355: * any reference to it...
6356: */
6357: if ((ctxt->standalone == 1) ||
6358: ((ctxt->hasExternalSubset == 0) &&
6359: (ctxt->hasPErefs == 0))) {
6360: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6361: ctxt->sax->error(ctxt->userData,
6362: "PEReference: %%%s; not found\n", name);
1.123 daniel 6363: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 6364: ctxt->wellFormed = 0;
6365: } else {
6366: /*
6367: * [ VC: Entity Declared ]
6368: * In a document with an external subset or external
6369: * parameter entities with "standalone='no'", ...
6370: * ... The declaration of a parameter entity must precede
6371: * any reference to it...
6372: */
6373: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6374: ctxt->sax->warning(ctxt->userData,
6375: "PEReference: %%%s; not found\n", name);
6376: ctxt->valid = 0;
6377: }
1.50 daniel 6378: } else {
1.98 daniel 6379: /*
6380: * Internal checking in case the entity quest barfed
6381: */
1.159 daniel 6382: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6383: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 6384: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6385: ctxt->sax->warning(ctxt->userData,
6386: "Internal: %%%s; is not a parameter entity\n", name);
6387: } else {
1.164 daniel 6388: /*
6389: * TODO !!!
6390: * handle the extra spaces added before and after
6391: * c.f. http://www.w3.org/TR/REC-xml#as-PE
6392: */
1.98 daniel 6393: input = xmlNewEntityInputStream(ctxt, entity);
6394: xmlPushInput(ctxt, input);
1.164 daniel 6395: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6396: (RAW == '<') && (NXT(1) == '?') &&
6397: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6398: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6399: xmlParseTextDecl(ctxt);
1.164 daniel 6400: }
6401: if (ctxt->token == 0)
6402: ctxt->token = ' ';
1.98 daniel 6403: }
1.45 daniel 6404: }
1.98 daniel 6405: ctxt->hasPErefs = 1;
1.22 daniel 6406: } else {
1.55 daniel 6407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6408: ctxt->sax->error(ctxt->userData,
1.59 daniel 6409: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 6410: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6411: ctxt->wellFormed = 0;
1.22 daniel 6412: }
1.119 daniel 6413: xmlFree(name);
1.3 veillard 6414: }
6415: }
6416: }
6417:
1.50 daniel 6418: /**
1.135 daniel 6419: * xmlParseStringPEReference:
6420: * @ctxt: an XML parser context
6421: * @str: a pointer to an index in the string
6422: *
6423: * parse PEReference declarations
6424: *
6425: * [69] PEReference ::= '%' Name ';'
6426: *
6427: * [ WFC: No Recursion ]
6428: * TODO A parsed entity must not contain a recursive
6429: * reference to itself, either directly or indirectly.
6430: *
6431: * [ WFC: Entity Declared ]
6432: * In a document without any DTD, a document with only an internal DTD
6433: * subset which contains no parameter entity references, or a document
6434: * with "standalone='yes'", ... ... The declaration of a parameter
6435: * entity must precede any reference to it...
6436: *
6437: * [ VC: Entity Declared ]
6438: * In a document with an external subset or external parameter entities
6439: * with "standalone='no'", ... ... The declaration of a parameter entity
6440: * must precede any reference to it...
6441: *
6442: * [ WFC: In DTD ]
6443: * Parameter-entity references may only appear in the DTD.
6444: * NOTE: misleading but this is handled.
6445: *
6446: * Returns the string of the entity content.
6447: * str is updated to the current value of the index
6448: */
6449: xmlEntityPtr
6450: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6451: const xmlChar *ptr;
6452: xmlChar cur;
6453: xmlChar *name;
6454: xmlEntityPtr entity = NULL;
6455:
6456: if ((str == NULL) || (*str == NULL)) return(NULL);
6457: ptr = *str;
6458: cur = *ptr;
6459: if (cur == '%') {
6460: ptr++;
6461: cur = *ptr;
6462: name = xmlParseStringName(ctxt, &ptr);
6463: if (name == NULL) {
6464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6465: ctxt->sax->error(ctxt->userData,
6466: "xmlParseStringPEReference: no name\n");
6467: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6468: ctxt->wellFormed = 0;
6469: } else {
6470: cur = *ptr;
6471: if (cur == ';') {
6472: ptr++;
6473: cur = *ptr;
6474: if ((ctxt->sax != NULL) &&
6475: (ctxt->sax->getParameterEntity != NULL))
6476: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6477: name);
6478: if (entity == NULL) {
6479: /*
6480: * [ WFC: Entity Declared ]
6481: * In a document without any DTD, a document with only an
6482: * internal DTD subset which contains no parameter entity
6483: * references, or a document with "standalone='yes'", ...
6484: * ... The declaration of a parameter entity must precede
6485: * any reference to it...
6486: */
6487: if ((ctxt->standalone == 1) ||
6488: ((ctxt->hasExternalSubset == 0) &&
6489: (ctxt->hasPErefs == 0))) {
6490: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6491: ctxt->sax->error(ctxt->userData,
6492: "PEReference: %%%s; not found\n", name);
6493: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6494: ctxt->wellFormed = 0;
6495: } else {
6496: /*
6497: * [ VC: Entity Declared ]
6498: * In a document with an external subset or external
6499: * parameter entities with "standalone='no'", ...
6500: * ... The declaration of a parameter entity must
6501: * precede any reference to it...
6502: */
6503: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6504: ctxt->sax->warning(ctxt->userData,
6505: "PEReference: %%%s; not found\n", name);
6506: ctxt->valid = 0;
6507: }
6508: } else {
6509: /*
6510: * Internal checking in case the entity quest barfed
6511: */
1.159 daniel 6512: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6513: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 6514: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6515: ctxt->sax->warning(ctxt->userData,
6516: "Internal: %%%s; is not a parameter entity\n", name);
6517: }
6518: }
6519: ctxt->hasPErefs = 1;
6520: } else {
6521: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6522: ctxt->sax->error(ctxt->userData,
6523: "xmlParseStringPEReference: expecting ';'\n");
6524: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6525: ctxt->wellFormed = 0;
6526: }
6527: xmlFree(name);
6528: }
6529: }
6530: *str = ptr;
6531: return(entity);
6532: }
6533:
6534: /**
1.50 daniel 6535: * xmlParseDocTypeDecl :
6536: * @ctxt: an XML parser context
6537: *
6538: * parse a DOCTYPE declaration
1.21 daniel 6539: *
1.22 daniel 6540: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6541: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 6542: *
6543: * [ VC: Root Element Type ]
1.99 daniel 6544: * The Name in the document type declaration must match the element
1.98 daniel 6545: * type of the root element.
1.21 daniel 6546: */
6547:
1.55 daniel 6548: void
6549: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 6550: xmlChar *name = NULL;
1.123 daniel 6551: xmlChar *ExternalID = NULL;
6552: xmlChar *URI = NULL;
1.21 daniel 6553:
6554: /*
6555: * We know that '<!DOCTYPE' has been detected.
6556: */
1.40 daniel 6557: SKIP(9);
1.21 daniel 6558:
1.42 daniel 6559: SKIP_BLANKS;
1.21 daniel 6560:
6561: /*
6562: * Parse the DOCTYPE name.
6563: */
6564: name = xmlParseName(ctxt);
6565: if (name == NULL) {
1.55 daniel 6566: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6567: ctxt->sax->error(ctxt->userData,
6568: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 6569: ctxt->wellFormed = 0;
1.123 daniel 6570: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 6571: }
1.165 daniel 6572: ctxt->intSubName = name;
1.21 daniel 6573:
1.42 daniel 6574: SKIP_BLANKS;
1.21 daniel 6575:
6576: /*
1.22 daniel 6577: * Check for SystemID and ExternalID
6578: */
1.67 daniel 6579: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 6580:
6581: if ((URI != NULL) || (ExternalID != NULL)) {
6582: ctxt->hasExternalSubset = 1;
6583: }
1.165 daniel 6584: ctxt->extSubURI = URI;
6585: ctxt->extSubSystem = ExternalID;
1.98 daniel 6586:
1.42 daniel 6587: SKIP_BLANKS;
1.36 daniel 6588:
1.76 daniel 6589: /*
1.165 daniel 6590: * Create and update the internal subset.
1.76 daniel 6591: */
1.171 daniel 6592: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6593: (!ctxt->disableSAX))
1.74 daniel 6594: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 6595:
6596: /*
1.140 daniel 6597: * Is there any internal subset declarations ?
6598: * they are handled separately in xmlParseInternalSubset()
6599: */
1.152 daniel 6600: if (RAW == '[')
1.140 daniel 6601: return;
6602:
6603: /*
6604: * We should be at the end of the DOCTYPE declaration.
6605: */
1.152 daniel 6606: if (RAW != '>') {
1.140 daniel 6607: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6608: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6609: ctxt->wellFormed = 0;
6610: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6611: }
6612: NEXT;
6613: }
6614:
6615: /**
6616: * xmlParseInternalsubset :
6617: * @ctxt: an XML parser context
6618: *
6619: * parse the internal subset declaration
6620: *
6621: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6622: */
6623:
6624: void
6625: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6626: /*
1.22 daniel 6627: * Is there any DTD definition ?
6628: */
1.152 daniel 6629: if (RAW == '[') {
1.96 daniel 6630: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 6631: NEXT;
1.22 daniel 6632: /*
6633: * Parse the succession of Markup declarations and
6634: * PEReferences.
6635: * Subsequence (markupdecl | PEReference | S)*
6636: */
1.152 daniel 6637: while (RAW != ']') {
1.123 daniel 6638: const xmlChar *check = CUR_PTR;
1.115 daniel 6639: int cons = ctxt->input->consumed;
1.22 daniel 6640:
1.42 daniel 6641: SKIP_BLANKS;
1.22 daniel 6642: xmlParseMarkupDecl(ctxt);
1.50 daniel 6643: xmlParsePEReference(ctxt);
1.22 daniel 6644:
1.115 daniel 6645: /*
6646: * Pop-up of finished entities.
6647: */
1.152 daniel 6648: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 6649: xmlPopInput(ctxt);
6650:
1.118 daniel 6651: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 6652: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6653: ctxt->sax->error(ctxt->userData,
1.140 daniel 6654: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 6655: ctxt->wellFormed = 0;
1.123 daniel 6656: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 6657: break;
6658: }
6659: }
1.152 daniel 6660: if (RAW == ']') NEXT;
1.22 daniel 6661: }
6662:
6663: /*
6664: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 6665: */
1.152 daniel 6666: if (RAW != '>') {
1.55 daniel 6667: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6668: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 6669: ctxt->wellFormed = 0;
1.123 daniel 6670: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 6671: }
1.40 daniel 6672: NEXT;
1.21 daniel 6673: }
6674:
1.50 daniel 6675: /**
6676: * xmlParseAttribute:
6677: * @ctxt: an XML parser context
1.123 daniel 6678: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 6679: *
6680: * parse an attribute
1.3 veillard 6681: *
1.22 daniel 6682: * [41] Attribute ::= Name Eq AttValue
6683: *
1.98 daniel 6684: * [ WFC: No External Entity References ]
6685: * Attribute values cannot contain direct or indirect entity references
6686: * to external entities.
6687: *
6688: * [ WFC: No < in Attribute Values ]
6689: * The replacement text of any entity referred to directly or indirectly in
6690: * an attribute value (other than "<") must not contain a <.
6691: *
6692: * [ VC: Attribute Value Type ]
1.117 daniel 6693: * The attribute must have been declared; the value must be of the type
1.99 daniel 6694: * declared for it.
1.98 daniel 6695: *
1.22 daniel 6696: * [25] Eq ::= S? '=' S?
6697: *
1.29 daniel 6698: * With namespace:
6699: *
6700: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 6701: *
6702: * Also the case QName == xmlns:??? is handled independently as a namespace
6703: * definition.
1.69 daniel 6704: *
1.72 daniel 6705: * Returns the attribute name, and the value in *value.
1.3 veillard 6706: */
6707:
1.123 daniel 6708: xmlChar *
6709: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6710: xmlChar *name, *val;
1.3 veillard 6711:
1.72 daniel 6712: *value = NULL;
6713: name = xmlParseName(ctxt);
1.22 daniel 6714: if (name == NULL) {
1.55 daniel 6715: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6716: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 6717: ctxt->wellFormed = 0;
1.123 daniel 6718: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 6719: return(NULL);
1.3 veillard 6720: }
6721:
6722: /*
1.29 daniel 6723: * read the value
1.3 veillard 6724: */
1.42 daniel 6725: SKIP_BLANKS;
1.152 daniel 6726: if (RAW == '=') {
1.40 daniel 6727: NEXT;
1.42 daniel 6728: SKIP_BLANKS;
1.72 daniel 6729: val = xmlParseAttValue(ctxt);
1.96 daniel 6730: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 6731: } else {
1.55 daniel 6732: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6733: ctxt->sax->error(ctxt->userData,
1.59 daniel 6734: "Specification mandate value for attribute %s\n", name);
1.123 daniel 6735: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 6736: ctxt->wellFormed = 0;
1.170 daniel 6737: xmlFree(name);
1.52 daniel 6738: return(NULL);
1.43 daniel 6739: }
6740:
1.172 daniel 6741: /*
6742: * Check that xml:lang conforms to the specification
6743: */
6744: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
6745: if (!xmlCheckLanguageID(val)) {
6746: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6747: ctxt->sax->error(ctxt->userData,
6748: "Invalid value for xml:lang : %s\n", val);
6749: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6750: ctxt->wellFormed = 0;
6751: }
6752: }
6753:
1.72 daniel 6754: *value = val;
6755: return(name);
1.3 veillard 6756: }
6757:
1.50 daniel 6758: /**
6759: * xmlParseStartTag:
6760: * @ctxt: an XML parser context
6761: *
6762: * parse a start of tag either for rule element or
6763: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 6764: *
6765: * [40] STag ::= '<' Name (S Attribute)* S? '>'
6766: *
1.98 daniel 6767: * [ WFC: Unique Att Spec ]
6768: * No attribute name may appear more than once in the same start-tag or
6769: * empty-element tag.
6770: *
1.29 daniel 6771: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6772: *
1.98 daniel 6773: * [ WFC: Unique Att Spec ]
6774: * No attribute name may appear more than once in the same start-tag or
6775: * empty-element tag.
6776: *
1.29 daniel 6777: * With namespace:
6778: *
6779: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6780: *
6781: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 6782: *
1.129 daniel 6783: * Returne the element name parsed
1.2 veillard 6784: */
6785:
1.123 daniel 6786: xmlChar *
1.69 daniel 6787: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6788: xmlChar *name;
6789: xmlChar *attname;
6790: xmlChar *attvalue;
6791: const xmlChar **atts = NULL;
1.72 daniel 6792: int nbatts = 0;
6793: int maxatts = 0;
6794: int i;
1.2 veillard 6795:
1.152 daniel 6796: if (RAW != '<') return(NULL);
1.40 daniel 6797: NEXT;
1.3 veillard 6798:
1.72 daniel 6799: name = xmlParseName(ctxt);
1.59 daniel 6800: if (name == NULL) {
6801: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6802: ctxt->sax->error(ctxt->userData,
1.59 daniel 6803: "xmlParseStartTag: invalid element name\n");
1.123 daniel 6804: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6805: ctxt->wellFormed = 0;
1.83 daniel 6806: return(NULL);
1.50 daniel 6807: }
6808:
6809: /*
1.3 veillard 6810: * Now parse the attributes, it ends up with the ending
6811: *
6812: * (S Attribute)* S?
6813: */
1.42 daniel 6814: SKIP_BLANKS;
1.91 daniel 6815: GROW;
1.168 daniel 6816:
1.153 daniel 6817: while ((IS_CHAR(RAW)) &&
1.152 daniel 6818: (RAW != '>') &&
6819: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 6820: const xmlChar *q = CUR_PTR;
1.91 daniel 6821: int cons = ctxt->input->consumed;
1.29 daniel 6822:
1.72 daniel 6823: attname = xmlParseAttribute(ctxt, &attvalue);
6824: if ((attname != NULL) && (attvalue != NULL)) {
6825: /*
1.98 daniel 6826: * [ WFC: Unique Att Spec ]
6827: * No attribute name may appear more than once in the same
6828: * start-tag or empty-element tag.
1.72 daniel 6829: */
6830: for (i = 0; i < nbatts;i += 2) {
6831: if (!xmlStrcmp(atts[i], attname)) {
6832: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6833: ctxt->sax->error(ctxt->userData,
6834: "Attribute %s redefined\n",
6835: attname);
1.72 daniel 6836: ctxt->wellFormed = 0;
1.123 daniel 6837: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 6838: xmlFree(attname);
6839: xmlFree(attvalue);
1.98 daniel 6840: goto failed;
1.72 daniel 6841: }
6842: }
6843:
6844: /*
6845: * Add the pair to atts
6846: */
6847: if (atts == NULL) {
6848: maxatts = 10;
1.123 daniel 6849: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 6850: if (atts == NULL) {
1.86 daniel 6851: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 6852: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6853: return(NULL);
1.72 daniel 6854: }
1.127 daniel 6855: } else if (nbatts + 4 > maxatts) {
1.72 daniel 6856: maxatts *= 2;
1.123 daniel 6857: atts = (const xmlChar **) xmlRealloc(atts,
6858: maxatts * sizeof(xmlChar *));
1.72 daniel 6859: if (atts == NULL) {
1.86 daniel 6860: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 6861: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6862: return(NULL);
1.72 daniel 6863: }
6864: }
6865: atts[nbatts++] = attname;
6866: atts[nbatts++] = attvalue;
6867: atts[nbatts] = NULL;
6868: atts[nbatts + 1] = NULL;
6869: }
6870:
1.116 daniel 6871: failed:
1.168 daniel 6872:
6873: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6874: break;
6875: if (!IS_BLANK(RAW)) {
6876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6877: ctxt->sax->error(ctxt->userData,
6878: "attributes construct error\n");
6879: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6880: ctxt->wellFormed = 0;
6881: }
1.42 daniel 6882: SKIP_BLANKS;
1.91 daniel 6883: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 6884: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6885: ctxt->sax->error(ctxt->userData,
1.31 daniel 6886: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 6887: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 6888: ctxt->wellFormed = 0;
1.29 daniel 6889: break;
1.3 veillard 6890: }
1.91 daniel 6891: GROW;
1.3 veillard 6892: }
6893:
1.43 daniel 6894: /*
1.72 daniel 6895: * SAX: Start of Element !
1.43 daniel 6896: */
1.171 daniel 6897: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6898: (!ctxt->disableSAX))
1.74 daniel 6899: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 6900:
1.72 daniel 6901: if (atts != NULL) {
1.123 daniel 6902: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 6903: xmlFree(atts);
1.72 daniel 6904: }
1.83 daniel 6905: return(name);
1.3 veillard 6906: }
6907:
1.50 daniel 6908: /**
6909: * xmlParseEndTag:
6910: * @ctxt: an XML parser context
6911: *
6912: * parse an end of tag
1.27 daniel 6913: *
6914: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 6915: *
6916: * With namespace
6917: *
1.72 daniel 6918: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 6919: */
6920:
1.55 daniel 6921: void
1.140 daniel 6922: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6923: xmlChar *name;
1.140 daniel 6924: xmlChar *oldname;
1.7 veillard 6925:
1.91 daniel 6926: GROW;
1.152 daniel 6927: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 6928: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6929: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 6930: ctxt->wellFormed = 0;
1.123 daniel 6931: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 6932: return;
6933: }
1.40 daniel 6934: SKIP(2);
1.7 veillard 6935:
1.72 daniel 6936: name = xmlParseName(ctxt);
1.7 veillard 6937:
6938: /*
6939: * We should definitely be at the ending "S? '>'" part
6940: */
1.91 daniel 6941: GROW;
1.42 daniel 6942: SKIP_BLANKS;
1.153 daniel 6943: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 6944: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6945: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 6946: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 6947: ctxt->wellFormed = 0;
1.7 veillard 6948: } else
1.40 daniel 6949: NEXT;
1.7 veillard 6950:
1.72 daniel 6951: /*
1.98 daniel 6952: * [ WFC: Element Type Match ]
6953: * The Name in an element's end-tag must match the element type in the
6954: * start-tag.
6955: *
1.83 daniel 6956: */
1.147 daniel 6957: if ((name == NULL) || (ctxt->name == NULL) ||
6958: (xmlStrcmp(name, ctxt->name))) {
6959: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6960: if ((name != NULL) && (ctxt->name != NULL)) {
6961: ctxt->sax->error(ctxt->userData,
6962: "Opening and ending tag mismatch: %s and %s\n",
6963: ctxt->name, name);
6964: } else if (ctxt->name != NULL) {
6965: ctxt->sax->error(ctxt->userData,
6966: "Ending tag eror for: %s\n", ctxt->name);
6967: } else {
6968: ctxt->sax->error(ctxt->userData,
6969: "Ending tag error: internal error ???\n");
6970: }
1.122 daniel 6971:
1.147 daniel 6972: }
1.123 daniel 6973: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 6974: ctxt->wellFormed = 0;
6975: }
6976:
6977: /*
1.72 daniel 6978: * SAX: End of Tag
6979: */
1.171 daniel 6980: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6981: (!ctxt->disableSAX))
1.74 daniel 6982: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6983:
6984: if (name != NULL)
1.119 daniel 6985: xmlFree(name);
1.140 daniel 6986: oldname = namePop(ctxt);
6987: if (oldname != NULL) {
6988: #ifdef DEBUG_STACK
6989: fprintf(stderr,"Close: popped %s\n", oldname);
6990: #endif
6991: xmlFree(oldname);
6992: }
1.7 veillard 6993: return;
6994: }
6995:
1.50 daniel 6996: /**
6997: * xmlParseCDSect:
6998: * @ctxt: an XML parser context
6999: *
7000: * Parse escaped pure raw content.
1.29 daniel 7001: *
7002: * [18] CDSect ::= CDStart CData CDEnd
7003: *
7004: * [19] CDStart ::= '<![CDATA['
7005: *
7006: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7007: *
7008: * [21] CDEnd ::= ']]>'
1.3 veillard 7009: */
1.55 daniel 7010: void
7011: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7012: xmlChar *buf = NULL;
7013: int len = 0;
1.140 daniel 7014: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7015: int r, rl;
7016: int s, sl;
7017: int cur, l;
1.3 veillard 7018:
1.106 daniel 7019: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7020: (NXT(2) == '[') && (NXT(3) == 'C') &&
7021: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7022: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7023: (NXT(8) == '[')) {
7024: SKIP(9);
1.29 daniel 7025: } else
1.45 daniel 7026: return;
1.109 daniel 7027:
7028: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7029: r = CUR_CHAR(rl);
7030: if (!IS_CHAR(r)) {
1.55 daniel 7031: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7032: ctxt->sax->error(ctxt->userData,
1.135 daniel 7033: "CData section not finished\n");
1.59 daniel 7034: ctxt->wellFormed = 0;
1.123 daniel 7035: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7036: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7037: return;
1.3 veillard 7038: }
1.152 daniel 7039: NEXTL(rl);
7040: s = CUR_CHAR(sl);
7041: if (!IS_CHAR(s)) {
1.55 daniel 7042: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7043: ctxt->sax->error(ctxt->userData,
1.135 daniel 7044: "CData section not finished\n");
1.123 daniel 7045: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7046: ctxt->wellFormed = 0;
1.109 daniel 7047: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7048: return;
1.3 veillard 7049: }
1.152 daniel 7050: NEXTL(sl);
7051: cur = CUR_CHAR(l);
1.135 daniel 7052: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7053: if (buf == NULL) {
7054: fprintf(stderr, "malloc of %d byte failed\n", size);
7055: return;
7056: }
1.108 veillard 7057: while (IS_CHAR(cur) &&
1.110 daniel 7058: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7059: if (len + 5 >= size) {
1.135 daniel 7060: size *= 2;
7061: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7062: if (buf == NULL) {
7063: fprintf(stderr, "realloc of %d byte failed\n", size);
7064: return;
7065: }
7066: }
1.152 daniel 7067: COPY_BUF(rl,buf,len,r);
1.110 daniel 7068: r = s;
1.152 daniel 7069: rl = sl;
1.110 daniel 7070: s = cur;
1.152 daniel 7071: sl = l;
7072: NEXTL(l);
7073: cur = CUR_CHAR(l);
1.3 veillard 7074: }
1.135 daniel 7075: buf[len] = 0;
1.109 daniel 7076: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7077: if (cur != '>') {
1.55 daniel 7078: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7079: ctxt->sax->error(ctxt->userData,
1.135 daniel 7080: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7081: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7082: ctxt->wellFormed = 0;
1.135 daniel 7083: xmlFree(buf);
1.45 daniel 7084: return;
1.3 veillard 7085: }
1.152 daniel 7086: NEXTL(l);
1.16 daniel 7087:
1.45 daniel 7088: /*
1.135 daniel 7089: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7090: */
1.171 daniel 7091: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7092: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7093: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7094: }
1.135 daniel 7095: xmlFree(buf);
1.2 veillard 7096: }
7097:
1.50 daniel 7098: /**
7099: * xmlParseContent:
7100: * @ctxt: an XML parser context
7101: *
7102: * Parse a content:
1.2 veillard 7103: *
1.27 daniel 7104: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7105: */
7106:
1.55 daniel 7107: void
7108: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7109: GROW;
1.152 daniel 7110: while ((RAW != '<') || (NXT(1) != '/')) {
1.123 daniel 7111: const xmlChar *test = CUR_PTR;
1.91 daniel 7112: int cons = ctxt->input->consumed;
1.123 daniel 7113: xmlChar tok = ctxt->token;
1.27 daniel 7114:
7115: /*
1.152 daniel 7116: * Handle possible processed charrefs.
7117: */
7118: if (ctxt->token != 0) {
7119: xmlParseCharData(ctxt, 0);
7120: }
7121: /*
1.27 daniel 7122: * First case : a Processing Instruction.
7123: */
1.152 daniel 7124: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7125: xmlParsePI(ctxt);
7126: }
1.72 daniel 7127:
1.27 daniel 7128: /*
7129: * Second case : a CDSection
7130: */
1.152 daniel 7131: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7132: (NXT(2) == '[') && (NXT(3) == 'C') &&
7133: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7134: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7135: (NXT(8) == '[')) {
1.45 daniel 7136: xmlParseCDSect(ctxt);
1.27 daniel 7137: }
1.72 daniel 7138:
1.27 daniel 7139: /*
7140: * Third case : a comment
7141: */
1.152 daniel 7142: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7143: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7144: xmlParseComment(ctxt);
1.97 daniel 7145: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7146: }
1.72 daniel 7147:
1.27 daniel 7148: /*
7149: * Fourth case : a sub-element.
7150: */
1.152 daniel 7151: else if (RAW == '<') {
1.72 daniel 7152: xmlParseElement(ctxt);
1.45 daniel 7153: }
1.72 daniel 7154:
1.45 daniel 7155: /*
1.50 daniel 7156: * Fifth case : a reference. If if has not been resolved,
7157: * parsing returns it's Name, create the node
1.45 daniel 7158: */
1.97 daniel 7159:
1.152 daniel 7160: else if (RAW == '&') {
1.77 daniel 7161: xmlParseReference(ctxt);
1.27 daniel 7162: }
1.72 daniel 7163:
1.27 daniel 7164: /*
7165: * Last case, text. Note that References are handled directly.
7166: */
7167: else {
1.45 daniel 7168: xmlParseCharData(ctxt, 0);
1.3 veillard 7169: }
1.14 veillard 7170:
1.91 daniel 7171: GROW;
1.14 veillard 7172: /*
1.45 daniel 7173: * Pop-up of finished entities.
1.14 veillard 7174: */
1.152 daniel 7175: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7176: xmlPopInput(ctxt);
1.135 daniel 7177: SHRINK;
1.45 daniel 7178:
1.113 daniel 7179: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7180: (tok == ctxt->token)) {
1.55 daniel 7181: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7182: ctxt->sax->error(ctxt->userData,
1.59 daniel 7183: "detected an error in element content\n");
1.123 daniel 7184: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7185: ctxt->wellFormed = 0;
1.29 daniel 7186: break;
7187: }
1.3 veillard 7188: }
1.2 veillard 7189: }
7190:
1.50 daniel 7191: /**
7192: * xmlParseElement:
7193: * @ctxt: an XML parser context
7194: *
7195: * parse an XML element, this is highly recursive
1.26 daniel 7196: *
7197: * [39] element ::= EmptyElemTag | STag content ETag
7198: *
1.98 daniel 7199: * [ WFC: Element Type Match ]
7200: * The Name in an element's end-tag must match the element type in the
7201: * start-tag.
7202: *
7203: * [ VC: Element Valid ]
1.117 daniel 7204: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7205: * where the Name matches the element type and one of the following holds:
7206: * - The declaration matches EMPTY and the element has no content.
7207: * - The declaration matches children and the sequence of child elements
7208: * belongs to the language generated by the regular expression in the
7209: * content model, with optional white space (characters matching the
7210: * nonterminal S) between each pair of child elements.
7211: * - The declaration matches Mixed and the content consists of character
7212: * data and child elements whose types match names in the content model.
7213: * - The declaration matches ANY, and the types of any child elements have
7214: * been declared.
1.2 veillard 7215: */
1.26 daniel 7216:
1.72 daniel 7217: void
1.69 daniel 7218: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7219: const xmlChar *openTag = CUR_PTR;
7220: xmlChar *name;
1.140 daniel 7221: xmlChar *oldname;
1.32 daniel 7222: xmlParserNodeInfo node_info;
1.118 daniel 7223: xmlNodePtr ret;
1.2 veillard 7224:
1.32 daniel 7225: /* Capture start position */
1.118 daniel 7226: if (ctxt->record_info) {
7227: node_info.begin_pos = ctxt->input->consumed +
7228: (CUR_PTR - ctxt->input->base);
7229: node_info.begin_line = ctxt->input->line;
7230: }
1.32 daniel 7231:
1.83 daniel 7232: name = xmlParseStartTag(ctxt);
7233: if (name == NULL) {
7234: return;
7235: }
1.140 daniel 7236: namePush(ctxt, name);
1.118 daniel 7237: ret = ctxt->node;
1.2 veillard 7238:
7239: /*
1.99 daniel 7240: * [ VC: Root Element Type ]
7241: * The Name in the document type declaration must match the element
7242: * type of the root element.
7243: */
1.105 daniel 7244: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7245: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7246: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7247:
7248: /*
1.2 veillard 7249: * Check for an Empty Element.
7250: */
1.152 daniel 7251: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7252: SKIP(2);
1.171 daniel 7253: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7254: (!ctxt->disableSAX))
1.83 daniel 7255: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7256: oldname = namePop(ctxt);
7257: if (oldname != NULL) {
7258: #ifdef DEBUG_STACK
7259: fprintf(stderr,"Close: popped %s\n", oldname);
7260: #endif
7261: xmlFree(oldname);
7262: }
1.72 daniel 7263: return;
1.2 veillard 7264: }
1.152 daniel 7265: if (RAW == '>') {
1.91 daniel 7266: NEXT;
7267: } else {
1.55 daniel 7268: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7269: ctxt->sax->error(ctxt->userData,
7270: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7271: openTag);
1.59 daniel 7272: ctxt->wellFormed = 0;
1.123 daniel 7273: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 7274:
7275: /*
7276: * end of parsing of this node.
7277: */
7278: nodePop(ctxt);
1.140 daniel 7279: oldname = namePop(ctxt);
7280: if (oldname != NULL) {
7281: #ifdef DEBUG_STACK
7282: fprintf(stderr,"Close: popped %s\n", oldname);
7283: #endif
7284: xmlFree(oldname);
7285: }
1.118 daniel 7286:
7287: /*
7288: * Capture end position and add node
7289: */
7290: if ( ret != NULL && ctxt->record_info ) {
7291: node_info.end_pos = ctxt->input->consumed +
7292: (CUR_PTR - ctxt->input->base);
7293: node_info.end_line = ctxt->input->line;
7294: node_info.node = ret;
7295: xmlParserAddNodeInfo(ctxt, &node_info);
7296: }
1.72 daniel 7297: return;
1.2 veillard 7298: }
7299:
7300: /*
7301: * Parse the content of the element:
7302: */
1.45 daniel 7303: xmlParseContent(ctxt);
1.153 daniel 7304: if (!IS_CHAR(RAW)) {
1.55 daniel 7305: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7306: ctxt->sax->error(ctxt->userData,
1.57 daniel 7307: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 7308: ctxt->wellFormed = 0;
1.123 daniel 7309: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 7310:
7311: /*
7312: * end of parsing of this node.
7313: */
7314: nodePop(ctxt);
1.140 daniel 7315: oldname = namePop(ctxt);
7316: if (oldname != NULL) {
7317: #ifdef DEBUG_STACK
7318: fprintf(stderr,"Close: popped %s\n", oldname);
7319: #endif
7320: xmlFree(oldname);
7321: }
1.72 daniel 7322: return;
1.2 veillard 7323: }
7324:
7325: /*
1.27 daniel 7326: * parse the end of tag: '</' should be here.
1.2 veillard 7327: */
1.140 daniel 7328: xmlParseEndTag(ctxt);
1.118 daniel 7329:
7330: /*
7331: * Capture end position and add node
7332: */
7333: if ( ret != NULL && ctxt->record_info ) {
7334: node_info.end_pos = ctxt->input->consumed +
7335: (CUR_PTR - ctxt->input->base);
7336: node_info.end_line = ctxt->input->line;
7337: node_info.node = ret;
7338: xmlParserAddNodeInfo(ctxt, &node_info);
7339: }
1.2 veillard 7340: }
7341:
1.50 daniel 7342: /**
7343: * xmlParseVersionNum:
7344: * @ctxt: an XML parser context
7345: *
7346: * parse the XML version value.
1.29 daniel 7347: *
7348: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 7349: *
7350: * Returns the string giving the XML version number, or NULL
1.29 daniel 7351: */
1.123 daniel 7352: xmlChar *
1.55 daniel 7353: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 7354: xmlChar *buf = NULL;
7355: int len = 0;
7356: int size = 10;
7357: xmlChar cur;
1.29 daniel 7358:
1.135 daniel 7359: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7360: if (buf == NULL) {
7361: fprintf(stderr, "malloc of %d byte failed\n", size);
7362: return(NULL);
7363: }
7364: cur = CUR;
1.152 daniel 7365: while (((cur >= 'a') && (cur <= 'z')) ||
7366: ((cur >= 'A') && (cur <= 'Z')) ||
7367: ((cur >= '0') && (cur <= '9')) ||
7368: (cur == '_') || (cur == '.') ||
7369: (cur == ':') || (cur == '-')) {
1.135 daniel 7370: if (len + 1 >= size) {
7371: size *= 2;
7372: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7373: if (buf == NULL) {
7374: fprintf(stderr, "realloc of %d byte failed\n", size);
7375: return(NULL);
7376: }
7377: }
7378: buf[len++] = cur;
7379: NEXT;
7380: cur=CUR;
7381: }
7382: buf[len] = 0;
7383: return(buf);
1.29 daniel 7384: }
7385:
1.50 daniel 7386: /**
7387: * xmlParseVersionInfo:
7388: * @ctxt: an XML parser context
7389: *
7390: * parse the XML version.
1.29 daniel 7391: *
7392: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7393: *
7394: * [25] Eq ::= S? '=' S?
1.50 daniel 7395: *
1.68 daniel 7396: * Returns the version string, e.g. "1.0"
1.29 daniel 7397: */
7398:
1.123 daniel 7399: xmlChar *
1.55 daniel 7400: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 7401: xmlChar *version = NULL;
7402: const xmlChar *q;
1.29 daniel 7403:
1.152 daniel 7404: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 7405: (NXT(2) == 'r') && (NXT(3) == 's') &&
7406: (NXT(4) == 'i') && (NXT(5) == 'o') &&
7407: (NXT(6) == 'n')) {
7408: SKIP(7);
1.42 daniel 7409: SKIP_BLANKS;
1.152 daniel 7410: if (RAW != '=') {
1.55 daniel 7411: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7412: ctxt->sax->error(ctxt->userData,
7413: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 7414: ctxt->wellFormed = 0;
1.123 daniel 7415: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7416: return(NULL);
7417: }
1.40 daniel 7418: NEXT;
1.42 daniel 7419: SKIP_BLANKS;
1.152 daniel 7420: if (RAW == '"') {
1.40 daniel 7421: NEXT;
7422: q = CUR_PTR;
1.29 daniel 7423: version = xmlParseVersionNum(ctxt);
1.152 daniel 7424: if (RAW != '"') {
1.55 daniel 7425: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7426: ctxt->sax->error(ctxt->userData,
7427: "String not closed\n%.50s\n", q);
1.59 daniel 7428: ctxt->wellFormed = 0;
1.123 daniel 7429: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7430: } else
1.40 daniel 7431: NEXT;
1.152 daniel 7432: } else if (RAW == '\''){
1.40 daniel 7433: NEXT;
7434: q = CUR_PTR;
1.29 daniel 7435: version = xmlParseVersionNum(ctxt);
1.152 daniel 7436: if (RAW != '\'') {
1.55 daniel 7437: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7438: ctxt->sax->error(ctxt->userData,
7439: "String not closed\n%.50s\n", q);
1.123 daniel 7440: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7441: ctxt->wellFormed = 0;
1.55 daniel 7442: } else
1.40 daniel 7443: NEXT;
1.31 daniel 7444: } else {
1.55 daniel 7445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7446: ctxt->sax->error(ctxt->userData,
1.59 daniel 7447: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 7448: ctxt->wellFormed = 0;
1.123 daniel 7449: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7450: }
7451: }
7452: return(version);
7453: }
7454:
1.50 daniel 7455: /**
7456: * xmlParseEncName:
7457: * @ctxt: an XML parser context
7458: *
7459: * parse the XML encoding name
1.29 daniel 7460: *
7461: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 7462: *
1.68 daniel 7463: * Returns the encoding name value or NULL
1.29 daniel 7464: */
1.123 daniel 7465: xmlChar *
1.55 daniel 7466: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 7467: xmlChar *buf = NULL;
7468: int len = 0;
7469: int size = 10;
7470: xmlChar cur;
1.29 daniel 7471:
1.135 daniel 7472: cur = CUR;
7473: if (((cur >= 'a') && (cur <= 'z')) ||
7474: ((cur >= 'A') && (cur <= 'Z'))) {
7475: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7476: if (buf == NULL) {
7477: fprintf(stderr, "malloc of %d byte failed\n", size);
7478: return(NULL);
7479: }
7480:
7481: buf[len++] = cur;
1.40 daniel 7482: NEXT;
1.135 daniel 7483: cur = CUR;
1.152 daniel 7484: while (((cur >= 'a') && (cur <= 'z')) ||
7485: ((cur >= 'A') && (cur <= 'Z')) ||
7486: ((cur >= '0') && (cur <= '9')) ||
7487: (cur == '.') || (cur == '_') ||
7488: (cur == '-')) {
1.135 daniel 7489: if (len + 1 >= size) {
7490: size *= 2;
7491: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7492: if (buf == NULL) {
7493: fprintf(stderr, "realloc of %d byte failed\n", size);
7494: return(NULL);
7495: }
7496: }
7497: buf[len++] = cur;
7498: NEXT;
7499: cur = CUR;
7500: if (cur == 0) {
7501: SHRINK;
7502: GROW;
7503: cur = CUR;
7504: }
7505: }
7506: buf[len] = 0;
1.29 daniel 7507: } else {
1.55 daniel 7508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7509: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 7510: ctxt->wellFormed = 0;
1.123 daniel 7511: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 7512: }
1.135 daniel 7513: return(buf);
1.29 daniel 7514: }
7515:
1.50 daniel 7516: /**
7517: * xmlParseEncodingDecl:
7518: * @ctxt: an XML parser context
7519: *
7520: * parse the XML encoding declaration
1.29 daniel 7521: *
7522: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 7523: *
7524: * TODO: this should setup the conversion filters.
7525: *
1.68 daniel 7526: * Returns the encoding value or NULL
1.29 daniel 7527: */
7528:
1.123 daniel 7529: xmlChar *
1.55 daniel 7530: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7531: xmlChar *encoding = NULL;
7532: const xmlChar *q;
1.29 daniel 7533:
1.42 daniel 7534: SKIP_BLANKS;
1.152 daniel 7535: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 7536: (NXT(2) == 'c') && (NXT(3) == 'o') &&
7537: (NXT(4) == 'd') && (NXT(5) == 'i') &&
7538: (NXT(6) == 'n') && (NXT(7) == 'g')) {
7539: SKIP(8);
1.42 daniel 7540: SKIP_BLANKS;
1.152 daniel 7541: if (RAW != '=') {
1.55 daniel 7542: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7543: ctxt->sax->error(ctxt->userData,
7544: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 7545: ctxt->wellFormed = 0;
1.123 daniel 7546: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7547: return(NULL);
7548: }
1.40 daniel 7549: NEXT;
1.42 daniel 7550: SKIP_BLANKS;
1.152 daniel 7551: if (RAW == '"') {
1.40 daniel 7552: NEXT;
7553: q = CUR_PTR;
1.29 daniel 7554: encoding = xmlParseEncName(ctxt);
1.152 daniel 7555: if (RAW != '"') {
1.55 daniel 7556: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7557: ctxt->sax->error(ctxt->userData,
7558: "String not closed\n%.50s\n", q);
1.59 daniel 7559: ctxt->wellFormed = 0;
1.123 daniel 7560: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7561: } else
1.40 daniel 7562: NEXT;
1.152 daniel 7563: } else if (RAW == '\''){
1.40 daniel 7564: NEXT;
7565: q = CUR_PTR;
1.29 daniel 7566: encoding = xmlParseEncName(ctxt);
1.152 daniel 7567: if (RAW != '\'') {
1.55 daniel 7568: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7569: ctxt->sax->error(ctxt->userData,
7570: "String not closed\n%.50s\n", q);
1.59 daniel 7571: ctxt->wellFormed = 0;
1.123 daniel 7572: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7573: } else
1.40 daniel 7574: NEXT;
1.152 daniel 7575: } else if (RAW == '"'){
1.55 daniel 7576: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7577: ctxt->sax->error(ctxt->userData,
1.59 daniel 7578: "xmlParseEncodingDecl : expected ' or \"\n");
7579: ctxt->wellFormed = 0;
1.123 daniel 7580: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7581: }
7582: }
7583: return(encoding);
7584: }
7585:
1.50 daniel 7586: /**
7587: * xmlParseSDDecl:
7588: * @ctxt: an XML parser context
7589: *
7590: * parse the XML standalone declaration
1.29 daniel 7591: *
7592: * [32] SDDecl ::= S 'standalone' Eq
7593: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 7594: *
7595: * [ VC: Standalone Document Declaration ]
7596: * TODO The standalone document declaration must have the value "no"
7597: * if any external markup declarations contain declarations of:
7598: * - attributes with default values, if elements to which these
7599: * attributes apply appear in the document without specifications
7600: * of values for these attributes, or
7601: * - entities (other than amp, lt, gt, apos, quot), if references
7602: * to those entities appear in the document, or
7603: * - attributes with values subject to normalization, where the
7604: * attribute appears in the document with a value which will change
7605: * as a result of normalization, or
7606: * - element types with element content, if white space occurs directly
7607: * within any instance of those types.
1.68 daniel 7608: *
7609: * Returns 1 if standalone, 0 otherwise
1.29 daniel 7610: */
7611:
1.55 daniel 7612: int
7613: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 7614: int standalone = -1;
7615:
1.42 daniel 7616: SKIP_BLANKS;
1.152 daniel 7617: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 7618: (NXT(2) == 'a') && (NXT(3) == 'n') &&
7619: (NXT(4) == 'd') && (NXT(5) == 'a') &&
7620: (NXT(6) == 'l') && (NXT(7) == 'o') &&
7621: (NXT(8) == 'n') && (NXT(9) == 'e')) {
7622: SKIP(10);
1.81 daniel 7623: SKIP_BLANKS;
1.152 daniel 7624: if (RAW != '=') {
1.55 daniel 7625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7626: ctxt->sax->error(ctxt->userData,
1.59 daniel 7627: "XML standalone declaration : expected '='\n");
1.123 daniel 7628: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 7629: ctxt->wellFormed = 0;
1.32 daniel 7630: return(standalone);
7631: }
1.40 daniel 7632: NEXT;
1.42 daniel 7633: SKIP_BLANKS;
1.152 daniel 7634: if (RAW == '\''){
1.40 daniel 7635: NEXT;
1.152 daniel 7636: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7637: standalone = 0;
1.40 daniel 7638: SKIP(2);
1.152 daniel 7639: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7640: (NXT(2) == 's')) {
1.29 daniel 7641: standalone = 1;
1.40 daniel 7642: SKIP(3);
1.29 daniel 7643: } else {
1.55 daniel 7644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7645: ctxt->sax->error(ctxt->userData,
7646: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7647: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7648: ctxt->wellFormed = 0;
1.29 daniel 7649: }
1.152 daniel 7650: if (RAW != '\'') {
1.55 daniel 7651: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7652: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 7653: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7654: ctxt->wellFormed = 0;
1.55 daniel 7655: } else
1.40 daniel 7656: NEXT;
1.152 daniel 7657: } else if (RAW == '"'){
1.40 daniel 7658: NEXT;
1.152 daniel 7659: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7660: standalone = 0;
1.40 daniel 7661: SKIP(2);
1.152 daniel 7662: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7663: (NXT(2) == 's')) {
1.29 daniel 7664: standalone = 1;
1.40 daniel 7665: SKIP(3);
1.29 daniel 7666: } else {
1.55 daniel 7667: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7668: ctxt->sax->error(ctxt->userData,
1.59 daniel 7669: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7670: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7671: ctxt->wellFormed = 0;
1.29 daniel 7672: }
1.152 daniel 7673: if (RAW != '"') {
1.55 daniel 7674: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7675: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 7676: ctxt->wellFormed = 0;
1.123 daniel 7677: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7678: } else
1.40 daniel 7679: NEXT;
1.37 daniel 7680: } else {
1.55 daniel 7681: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7682: ctxt->sax->error(ctxt->userData,
7683: "Standalone value not found\n");
1.59 daniel 7684: ctxt->wellFormed = 0;
1.123 daniel 7685: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 7686: }
1.29 daniel 7687: }
7688: return(standalone);
7689: }
7690:
1.50 daniel 7691: /**
7692: * xmlParseXMLDecl:
7693: * @ctxt: an XML parser context
7694: *
7695: * parse an XML declaration header
1.29 daniel 7696: *
7697: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 7698: */
7699:
1.55 daniel 7700: void
7701: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7702: xmlChar *version;
1.1 veillard 7703:
7704: /*
1.19 daniel 7705: * We know that '<?xml' is here.
1.1 veillard 7706: */
1.40 daniel 7707: SKIP(5);
1.1 veillard 7708:
1.153 daniel 7709: if (!IS_BLANK(RAW)) {
1.59 daniel 7710: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7711: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 7712: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7713: ctxt->wellFormed = 0;
7714: }
1.42 daniel 7715: SKIP_BLANKS;
1.1 veillard 7716:
7717: /*
1.29 daniel 7718: * We should have the VersionInfo here.
1.1 veillard 7719: */
1.29 daniel 7720: version = xmlParseVersionInfo(ctxt);
7721: if (version == NULL)
1.45 daniel 7722: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 7723: ctxt->version = xmlStrdup(version);
1.119 daniel 7724: xmlFree(version);
1.29 daniel 7725:
7726: /*
7727: * We may have the encoding declaration
7728: */
1.153 daniel 7729: if (!IS_BLANK(RAW)) {
1.152 daniel 7730: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7731: SKIP(2);
7732: return;
7733: }
7734: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7735: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 7736: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7737: ctxt->wellFormed = 0;
7738: }
1.164 daniel 7739: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 7740:
7741: /*
1.29 daniel 7742: * We may have the standalone status.
1.1 veillard 7743: */
1.164 daniel 7744: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 7745: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7746: SKIP(2);
7747: return;
7748: }
7749: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7750: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 7751: ctxt->wellFormed = 0;
1.123 daniel 7752: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7753: }
7754: SKIP_BLANKS;
1.167 daniel 7755: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 7756:
1.42 daniel 7757: SKIP_BLANKS;
1.152 daniel 7758: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 7759: SKIP(2);
1.152 daniel 7760: } else if (RAW == '>') {
1.31 daniel 7761: /* Deprecated old WD ... */
1.55 daniel 7762: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7763: ctxt->sax->error(ctxt->userData,
7764: "XML declaration must end-up with '?>'\n");
1.59 daniel 7765: ctxt->wellFormed = 0;
1.123 daniel 7766: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7767: NEXT;
1.29 daniel 7768: } else {
1.55 daniel 7769: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7770: ctxt->sax->error(ctxt->userData,
7771: "parsing XML declaration: '?>' expected\n");
1.59 daniel 7772: ctxt->wellFormed = 0;
1.123 daniel 7773: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7774: MOVETO_ENDTAG(CUR_PTR);
7775: NEXT;
1.29 daniel 7776: }
1.1 veillard 7777: }
7778:
1.50 daniel 7779: /**
7780: * xmlParseMisc:
7781: * @ctxt: an XML parser context
7782: *
7783: * parse an XML Misc* optionnal field.
1.21 daniel 7784: *
1.22 daniel 7785: * [27] Misc ::= Comment | PI | S
1.1 veillard 7786: */
7787:
1.55 daniel 7788: void
7789: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 7790: while (((RAW == '<') && (NXT(1) == '?')) ||
7791: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7792: (NXT(2) == '-') && (NXT(3) == '-')) ||
7793: IS_BLANK(CUR)) {
1.152 daniel 7794: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 7795: xmlParsePI(ctxt);
1.40 daniel 7796: } else if (IS_BLANK(CUR)) {
7797: NEXT;
1.1 veillard 7798: } else
1.114 daniel 7799: xmlParseComment(ctxt);
1.1 veillard 7800: }
7801: }
7802:
1.50 daniel 7803: /**
7804: * xmlParseDocument :
7805: * @ctxt: an XML parser context
7806: *
7807: * parse an XML document (and build a tree if using the standard SAX
7808: * interface).
1.21 daniel 7809: *
1.22 daniel 7810: * [1] document ::= prolog element Misc*
1.29 daniel 7811: *
7812: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 7813: *
1.68 daniel 7814: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 7815: * as a result of the parsing.
1.1 veillard 7816: */
7817:
1.55 daniel 7818: int
7819: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 7820: xmlChar start[4];
7821: xmlCharEncoding enc;
7822:
1.45 daniel 7823: xmlDefaultSAXHandlerInit();
7824:
1.91 daniel 7825: GROW;
7826:
1.14 veillard 7827: /*
1.44 daniel 7828: * SAX: beginning of the document processing.
7829: */
1.72 daniel 7830: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 7831: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 7832:
1.156 daniel 7833: /*
7834: * Get the 4 first bytes and decode the charset
7835: * if enc != XML_CHAR_ENCODING_NONE
7836: * plug some encoding conversion routines.
7837: */
7838: start[0] = RAW;
7839: start[1] = NXT(1);
7840: start[2] = NXT(2);
7841: start[3] = NXT(3);
7842: enc = xmlDetectCharEncoding(start, 4);
7843: if (enc != XML_CHAR_ENCODING_NONE) {
7844: xmlSwitchEncoding(ctxt, enc);
7845: }
7846:
1.1 veillard 7847:
1.59 daniel 7848: if (CUR == 0) {
7849: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7850: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 7851: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7852: ctxt->wellFormed = 0;
7853: }
1.1 veillard 7854:
7855: /*
7856: * Check for the XMLDecl in the Prolog.
7857: */
1.91 daniel 7858: GROW;
1.152 daniel 7859: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7860: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 7861: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.19 daniel 7862: xmlParseXMLDecl(ctxt);
1.167 daniel 7863: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 7864: SKIP_BLANKS;
1.164 daniel 7865: if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
7866: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7867:
1.1 veillard 7868: } else {
1.72 daniel 7869: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 7870: }
1.171 daniel 7871: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 7872: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 7873:
7874: /*
7875: * The Misc part of the Prolog
7876: */
1.91 daniel 7877: GROW;
1.16 daniel 7878: xmlParseMisc(ctxt);
1.1 veillard 7879:
7880: /*
1.29 daniel 7881: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7882: * (doctypedecl Misc*)?
7883: */
1.91 daniel 7884: GROW;
1.152 daniel 7885: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7886: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7887: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7888: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7889: (NXT(8) == 'E')) {
1.165 daniel 7890:
1.166 daniel 7891: ctxt->inSubset = 1;
1.22 daniel 7892: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7893: if (RAW == '[') {
1.140 daniel 7894: ctxt->instate = XML_PARSER_DTD;
7895: xmlParseInternalSubset(ctxt);
7896: }
1.165 daniel 7897:
7898: /*
7899: * Create and update the external subset.
7900: */
1.166 daniel 7901: ctxt->inSubset = 2;
1.171 daniel 7902: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7903: (!ctxt->disableSAX))
1.165 daniel 7904: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7905: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 7906: ctxt->inSubset = 0;
1.165 daniel 7907:
7908:
1.96 daniel 7909: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7910: xmlParseMisc(ctxt);
1.21 daniel 7911: }
7912:
7913: /*
7914: * Time to start parsing the tree itself
1.1 veillard 7915: */
1.91 daniel 7916: GROW;
1.152 daniel 7917: if (RAW != '<') {
1.59 daniel 7918: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7919: ctxt->sax->error(ctxt->userData,
1.151 daniel 7920: "Start tag expected, '<' not found\n");
1.140 daniel 7921: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7922: ctxt->wellFormed = 0;
1.140 daniel 7923: ctxt->instate = XML_PARSER_EOF;
7924: } else {
7925: ctxt->instate = XML_PARSER_CONTENT;
7926: xmlParseElement(ctxt);
7927: ctxt->instate = XML_PARSER_EPILOG;
7928:
7929:
7930: /*
7931: * The Misc part at the end
7932: */
7933: xmlParseMisc(ctxt);
7934:
1.152 daniel 7935: if (RAW != 0) {
1.140 daniel 7936: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7937: ctxt->sax->error(ctxt->userData,
7938: "Extra content at the end of the document\n");
7939: ctxt->wellFormed = 0;
7940: ctxt->errNo = XML_ERR_DOCUMENT_END;
7941: }
7942: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7943: }
7944:
1.44 daniel 7945: /*
7946: * SAX: end of the document processing.
7947: */
1.171 daniel 7948: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7949: (!ctxt->disableSAX))
1.74 daniel 7950: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7951:
7952: /*
7953: * Grab the encoding if it was added on-the-fly
7954: */
7955: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
7956: (ctxt->myDoc->encoding == NULL)) {
7957: ctxt->myDoc->encoding = ctxt->encoding;
7958: ctxt->encoding = NULL;
7959: }
1.59 daniel 7960: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7961: return(0);
7962: }
7963:
1.98 daniel 7964: /************************************************************************
7965: * *
1.128 daniel 7966: * Progressive parsing interfaces *
7967: * *
7968: ************************************************************************/
7969:
7970: /**
7971: * xmlParseLookupSequence:
7972: * @ctxt: an XML parser context
7973: * @first: the first char to lookup
1.140 daniel 7974: * @next: the next char to lookup or zero
7975: * @third: the next char to lookup or zero
1.128 daniel 7976: *
1.140 daniel 7977: * Try to find if a sequence (first, next, third) or just (first next) or
7978: * (first) is available in the input stream.
7979: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7980: * to avoid rescanning sequences of bytes, it DOES change the state of the
7981: * parser, do not use liberally.
1.128 daniel 7982: *
1.140 daniel 7983: * Returns the index to the current parsing point if the full sequence
7984: * is available, -1 otherwise.
1.128 daniel 7985: */
7986: int
1.140 daniel 7987: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7988: xmlChar next, xmlChar third) {
7989: int base, len;
7990: xmlParserInputPtr in;
7991: const xmlChar *buf;
7992:
7993: in = ctxt->input;
7994: if (in == NULL) return(-1);
7995: base = in->cur - in->base;
7996: if (base < 0) return(-1);
7997: if (ctxt->checkIndex > base)
7998: base = ctxt->checkIndex;
7999: if (in->buf == NULL) {
8000: buf = in->base;
8001: len = in->length;
8002: } else {
8003: buf = in->buf->buffer->content;
8004: len = in->buf->buffer->use;
8005: }
8006: /* take into account the sequence length */
8007: if (third) len -= 2;
8008: else if (next) len --;
8009: for (;base < len;base++) {
8010: if (buf[base] == first) {
8011: if (third != 0) {
8012: if ((buf[base + 1] != next) ||
8013: (buf[base + 2] != third)) continue;
8014: } else if (next != 0) {
8015: if (buf[base + 1] != next) continue;
8016: }
8017: ctxt->checkIndex = 0;
8018: #ifdef DEBUG_PUSH
8019: if (next == 0)
8020: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8021: first, base);
8022: else if (third == 0)
8023: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8024: first, next, base);
8025: else
8026: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8027: first, next, third, base);
8028: #endif
8029: return(base - (in->cur - in->base));
8030: }
8031: }
8032: ctxt->checkIndex = base;
8033: #ifdef DEBUG_PUSH
8034: if (next == 0)
8035: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8036: else if (third == 0)
8037: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8038: else
8039: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8040: #endif
8041: return(-1);
1.128 daniel 8042: }
8043:
8044: /**
1.143 daniel 8045: * xmlParseTryOrFinish:
1.128 daniel 8046: * @ctxt: an XML parser context
1.143 daniel 8047: * @terminate: last chunk indicator
1.128 daniel 8048: *
8049: * Try to progress on parsing
8050: *
8051: * Returns zero if no parsing was possible
8052: */
8053: int
1.143 daniel 8054: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8055: int ret = 0;
1.140 daniel 8056: xmlParserInputPtr in;
8057: int avail;
8058: xmlChar cur, next;
8059:
8060: #ifdef DEBUG_PUSH
8061: switch (ctxt->instate) {
8062: case XML_PARSER_EOF:
8063: fprintf(stderr, "PP: try EOF\n"); break;
8064: case XML_PARSER_START:
8065: fprintf(stderr, "PP: try START\n"); break;
8066: case XML_PARSER_MISC:
8067: fprintf(stderr, "PP: try MISC\n");break;
8068: case XML_PARSER_COMMENT:
8069: fprintf(stderr, "PP: try COMMENT\n");break;
8070: case XML_PARSER_PROLOG:
8071: fprintf(stderr, "PP: try PROLOG\n");break;
8072: case XML_PARSER_START_TAG:
8073: fprintf(stderr, "PP: try START_TAG\n");break;
8074: case XML_PARSER_CONTENT:
8075: fprintf(stderr, "PP: try CONTENT\n");break;
8076: case XML_PARSER_CDATA_SECTION:
8077: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8078: case XML_PARSER_END_TAG:
8079: fprintf(stderr, "PP: try END_TAG\n");break;
8080: case XML_PARSER_ENTITY_DECL:
8081: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8082: case XML_PARSER_ENTITY_VALUE:
8083: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8084: case XML_PARSER_ATTRIBUTE_VALUE:
8085: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8086: case XML_PARSER_DTD:
8087: fprintf(stderr, "PP: try DTD\n");break;
8088: case XML_PARSER_EPILOG:
8089: fprintf(stderr, "PP: try EPILOG\n");break;
8090: case XML_PARSER_PI:
8091: fprintf(stderr, "PP: try PI\n");break;
8092: }
8093: #endif
1.128 daniel 8094:
8095: while (1) {
1.140 daniel 8096: /*
8097: * Pop-up of finished entities.
8098: */
1.152 daniel 8099: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8100: xmlPopInput(ctxt);
8101:
8102: in = ctxt->input;
8103: if (in == NULL) break;
8104: if (in->buf == NULL)
8105: avail = in->length - (in->cur - in->base);
8106: else
8107: avail = in->buf->buffer->use - (in->cur - in->base);
8108: if (avail < 1)
8109: goto done;
1.128 daniel 8110: switch (ctxt->instate) {
8111: case XML_PARSER_EOF:
1.140 daniel 8112: /*
8113: * Document parsing is done !
8114: */
8115: goto done;
8116: case XML_PARSER_START:
8117: /*
8118: * Very first chars read from the document flow.
8119: */
8120: cur = in->cur[0];
8121: if (IS_BLANK(cur)) {
8122: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8123: ctxt->sax->setDocumentLocator(ctxt->userData,
8124: &xmlDefaultSAXLocator);
8125: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8126: ctxt->sax->error(ctxt->userData,
8127: "Extra spaces at the beginning of the document are not allowed\n");
8128: ctxt->errNo = XML_ERR_DOCUMENT_START;
8129: ctxt->wellFormed = 0;
8130: SKIP_BLANKS;
8131: ret++;
8132: if (in->buf == NULL)
8133: avail = in->length - (in->cur - in->base);
8134: else
8135: avail = in->buf->buffer->use - (in->cur - in->base);
8136: }
8137: if (avail < 2)
8138: goto done;
8139:
8140: cur = in->cur[0];
8141: next = in->cur[1];
8142: if (cur == 0) {
8143: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8144: ctxt->sax->setDocumentLocator(ctxt->userData,
8145: &xmlDefaultSAXLocator);
8146: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8147: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8148: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8149: ctxt->wellFormed = 0;
8150: ctxt->instate = XML_PARSER_EOF;
8151: #ifdef DEBUG_PUSH
8152: fprintf(stderr, "PP: entering EOF\n");
8153: #endif
8154: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8155: ctxt->sax->endDocument(ctxt->userData);
8156: goto done;
8157: }
8158: if ((cur == '<') && (next == '?')) {
8159: /* PI or XML decl */
8160: if (avail < 5) return(ret);
1.143 daniel 8161: if ((!terminate) &&
8162: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8163: return(ret);
8164: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8165: ctxt->sax->setDocumentLocator(ctxt->userData,
8166: &xmlDefaultSAXLocator);
8167: if ((in->cur[2] == 'x') &&
8168: (in->cur[3] == 'm') &&
1.142 daniel 8169: (in->cur[4] == 'l') &&
8170: (IS_BLANK(in->cur[5]))) {
1.140 daniel 8171: ret += 5;
8172: #ifdef DEBUG_PUSH
8173: fprintf(stderr, "PP: Parsing XML Decl\n");
8174: #endif
8175: xmlParseXMLDecl(ctxt);
1.167 daniel 8176: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8177: if ((ctxt->encoding == NULL) &&
8178: (ctxt->input->encoding != NULL))
8179: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 8180: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8181: (!ctxt->disableSAX))
1.140 daniel 8182: ctxt->sax->startDocument(ctxt->userData);
8183: ctxt->instate = XML_PARSER_MISC;
8184: #ifdef DEBUG_PUSH
8185: fprintf(stderr, "PP: entering MISC\n");
8186: #endif
8187: } else {
8188: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8189: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8190: (!ctxt->disableSAX))
1.140 daniel 8191: ctxt->sax->startDocument(ctxt->userData);
8192: ctxt->instate = XML_PARSER_MISC;
8193: #ifdef DEBUG_PUSH
8194: fprintf(stderr, "PP: entering MISC\n");
8195: #endif
8196: }
8197: } else {
8198: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8199: ctxt->sax->setDocumentLocator(ctxt->userData,
8200: &xmlDefaultSAXLocator);
8201: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8202: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8203: (!ctxt->disableSAX))
1.140 daniel 8204: ctxt->sax->startDocument(ctxt->userData);
8205: ctxt->instate = XML_PARSER_MISC;
8206: #ifdef DEBUG_PUSH
8207: fprintf(stderr, "PP: entering MISC\n");
8208: #endif
8209: }
8210: break;
8211: case XML_PARSER_MISC:
8212: SKIP_BLANKS;
8213: if (in->buf == NULL)
8214: avail = in->length - (in->cur - in->base);
8215: else
8216: avail = in->buf->buffer->use - (in->cur - in->base);
8217: if (avail < 2)
8218: goto done;
8219: cur = in->cur[0];
8220: next = in->cur[1];
8221: if ((cur == '<') && (next == '?')) {
1.143 daniel 8222: if ((!terminate) &&
8223: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8224: goto done;
8225: #ifdef DEBUG_PUSH
8226: fprintf(stderr, "PP: Parsing PI\n");
8227: #endif
8228: xmlParsePI(ctxt);
8229: } else if ((cur == '<') && (next == '!') &&
8230: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8231: if ((!terminate) &&
8232: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8233: goto done;
8234: #ifdef DEBUG_PUSH
8235: fprintf(stderr, "PP: Parsing Comment\n");
8236: #endif
8237: xmlParseComment(ctxt);
8238: ctxt->instate = XML_PARSER_MISC;
8239: } else if ((cur == '<') && (next == '!') &&
8240: (in->cur[2] == 'D') && (in->cur[3] == 'O') &&
8241: (in->cur[4] == 'C') && (in->cur[5] == 'T') &&
8242: (in->cur[6] == 'Y') && (in->cur[7] == 'P') &&
8243: (in->cur[8] == 'E')) {
1.143 daniel 8244: if ((!terminate) &&
8245: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8246: goto done;
8247: #ifdef DEBUG_PUSH
8248: fprintf(stderr, "PP: Parsing internal subset\n");
8249: #endif
1.166 daniel 8250: ctxt->inSubset = 1;
1.140 daniel 8251: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8252: if (RAW == '[') {
1.140 daniel 8253: ctxt->instate = XML_PARSER_DTD;
8254: #ifdef DEBUG_PUSH
8255: fprintf(stderr, "PP: entering DTD\n");
8256: #endif
8257: } else {
1.166 daniel 8258: /*
8259: * Create and update the external subset.
8260: */
8261: ctxt->inSubset = 2;
1.171 daniel 8262: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8263: (ctxt->sax->externalSubset != NULL))
8264: ctxt->sax->externalSubset(ctxt->userData,
8265: ctxt->intSubName, ctxt->extSubSystem,
8266: ctxt->extSubURI);
8267: ctxt->inSubset = 0;
1.140 daniel 8268: ctxt->instate = XML_PARSER_PROLOG;
8269: #ifdef DEBUG_PUSH
8270: fprintf(stderr, "PP: entering PROLOG\n");
8271: #endif
8272: }
8273: } else if ((cur == '<') && (next == '!') &&
8274: (avail < 9)) {
8275: goto done;
8276: } else {
8277: ctxt->instate = XML_PARSER_START_TAG;
8278: #ifdef DEBUG_PUSH
8279: fprintf(stderr, "PP: entering START_TAG\n");
8280: #endif
8281: }
8282: break;
1.128 daniel 8283: case XML_PARSER_PROLOG:
1.140 daniel 8284: SKIP_BLANKS;
8285: if (in->buf == NULL)
8286: avail = in->length - (in->cur - in->base);
8287: else
8288: avail = in->buf->buffer->use - (in->cur - in->base);
8289: if (avail < 2)
8290: goto done;
8291: cur = in->cur[0];
8292: next = in->cur[1];
8293: if ((cur == '<') && (next == '?')) {
1.143 daniel 8294: if ((!terminate) &&
8295: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8296: goto done;
8297: #ifdef DEBUG_PUSH
8298: fprintf(stderr, "PP: Parsing PI\n");
8299: #endif
8300: xmlParsePI(ctxt);
8301: } else if ((cur == '<') && (next == '!') &&
8302: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8303: if ((!terminate) &&
8304: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8305: goto done;
8306: #ifdef DEBUG_PUSH
8307: fprintf(stderr, "PP: Parsing Comment\n");
8308: #endif
8309: xmlParseComment(ctxt);
8310: ctxt->instate = XML_PARSER_PROLOG;
8311: } else if ((cur == '<') && (next == '!') &&
8312: (avail < 4)) {
8313: goto done;
8314: } else {
8315: ctxt->instate = XML_PARSER_START_TAG;
8316: #ifdef DEBUG_PUSH
8317: fprintf(stderr, "PP: entering START_TAG\n");
8318: #endif
8319: }
8320: break;
8321: case XML_PARSER_EPILOG:
8322: SKIP_BLANKS;
8323: if (in->buf == NULL)
8324: avail = in->length - (in->cur - in->base);
8325: else
8326: avail = in->buf->buffer->use - (in->cur - in->base);
8327: if (avail < 2)
8328: goto done;
8329: cur = in->cur[0];
8330: next = in->cur[1];
8331: if ((cur == '<') && (next == '?')) {
1.143 daniel 8332: if ((!terminate) &&
8333: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8334: goto done;
8335: #ifdef DEBUG_PUSH
8336: fprintf(stderr, "PP: Parsing PI\n");
8337: #endif
8338: xmlParsePI(ctxt);
8339: ctxt->instate = XML_PARSER_EPILOG;
8340: } else if ((cur == '<') && (next == '!') &&
8341: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8342: if ((!terminate) &&
8343: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8344: goto done;
8345: #ifdef DEBUG_PUSH
8346: fprintf(stderr, "PP: Parsing Comment\n");
8347: #endif
8348: xmlParseComment(ctxt);
8349: ctxt->instate = XML_PARSER_EPILOG;
8350: } else if ((cur == '<') && (next == '!') &&
8351: (avail < 4)) {
8352: goto done;
8353: } else {
8354: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8355: ctxt->sax->error(ctxt->userData,
8356: "Extra content at the end of the document\n");
8357: ctxt->wellFormed = 0;
8358: ctxt->errNo = XML_ERR_DOCUMENT_END;
8359: ctxt->instate = XML_PARSER_EOF;
8360: #ifdef DEBUG_PUSH
8361: fprintf(stderr, "PP: entering EOF\n");
8362: #endif
1.171 daniel 8363: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8364: (!ctxt->disableSAX))
1.140 daniel 8365: ctxt->sax->endDocument(ctxt->userData);
8366: goto done;
8367: }
8368: break;
8369: case XML_PARSER_START_TAG: {
8370: xmlChar *name, *oldname;
8371:
8372: if (avail < 2)
8373: goto done;
8374: cur = in->cur[0];
8375: if (cur != '<') {
8376: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8377: ctxt->sax->error(ctxt->userData,
8378: "Start tag expect, '<' not found\n");
8379: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8380: ctxt->wellFormed = 0;
8381: ctxt->instate = XML_PARSER_EOF;
8382: #ifdef DEBUG_PUSH
8383: fprintf(stderr, "PP: entering EOF\n");
8384: #endif
1.171 daniel 8385: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8386: (!ctxt->disableSAX))
1.140 daniel 8387: ctxt->sax->endDocument(ctxt->userData);
8388: goto done;
8389: }
1.143 daniel 8390: if ((!terminate) &&
8391: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8392: goto done;
8393: name = xmlParseStartTag(ctxt);
8394: if (name == NULL) {
8395: ctxt->instate = XML_PARSER_EOF;
8396: #ifdef DEBUG_PUSH
8397: fprintf(stderr, "PP: entering EOF\n");
8398: #endif
1.171 daniel 8399: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8400: (!ctxt->disableSAX))
1.140 daniel 8401: ctxt->sax->endDocument(ctxt->userData);
8402: goto done;
8403: }
8404: namePush(ctxt, xmlStrdup(name));
8405:
8406: /*
8407: * [ VC: Root Element Type ]
8408: * The Name in the document type declaration must match
8409: * the element type of the root element.
8410: */
8411: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 8412: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 8413: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8414:
8415: /*
8416: * Check for an Empty Element.
8417: */
1.152 daniel 8418: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 8419: SKIP(2);
1.171 daniel 8420: if ((ctxt->sax != NULL) &&
8421: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 8422: ctxt->sax->endElement(ctxt->userData, name);
8423: xmlFree(name);
8424: oldname = namePop(ctxt);
8425: if (oldname != NULL) {
8426: #ifdef DEBUG_STACK
8427: fprintf(stderr,"Close: popped %s\n", oldname);
8428: #endif
8429: xmlFree(oldname);
8430: }
8431: if (ctxt->name == NULL) {
8432: ctxt->instate = XML_PARSER_EPILOG;
8433: #ifdef DEBUG_PUSH
8434: fprintf(stderr, "PP: entering EPILOG\n");
8435: #endif
8436: } else {
8437: ctxt->instate = XML_PARSER_CONTENT;
8438: #ifdef DEBUG_PUSH
8439: fprintf(stderr, "PP: entering CONTENT\n");
8440: #endif
8441: }
8442: break;
8443: }
1.152 daniel 8444: if (RAW == '>') {
1.140 daniel 8445: NEXT;
8446: } else {
8447: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8448: ctxt->sax->error(ctxt->userData,
8449: "Couldn't find end of Start Tag %s\n",
8450: name);
8451: ctxt->wellFormed = 0;
8452: ctxt->errNo = XML_ERR_GT_REQUIRED;
8453:
8454: /*
8455: * end of parsing of this node.
8456: */
8457: nodePop(ctxt);
8458: oldname = namePop(ctxt);
8459: if (oldname != NULL) {
8460: #ifdef DEBUG_STACK
8461: fprintf(stderr,"Close: popped %s\n", oldname);
8462: #endif
8463: xmlFree(oldname);
8464: }
8465: }
8466: xmlFree(name);
8467: ctxt->instate = XML_PARSER_CONTENT;
8468: #ifdef DEBUG_PUSH
8469: fprintf(stderr, "PP: entering CONTENT\n");
8470: #endif
8471: break;
8472: }
1.128 daniel 8473: case XML_PARSER_CONTENT:
1.140 daniel 8474: /*
8475: * Handle preparsed entities and charRef
8476: */
8477: if (ctxt->token != 0) {
8478: xmlChar cur[2] = { 0 , 0 } ;
8479:
8480: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 8481: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8482: (ctxt->sax->characters != NULL))
1.140 daniel 8483: ctxt->sax->characters(ctxt->userData, cur, 1);
8484: ctxt->token = 0;
8485: }
8486: if (avail < 2)
8487: goto done;
8488: cur = in->cur[0];
8489: next = in->cur[1];
8490: if ((cur == '<') && (next == '?')) {
1.143 daniel 8491: if ((!terminate) &&
8492: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8493: goto done;
8494: #ifdef DEBUG_PUSH
8495: fprintf(stderr, "PP: Parsing PI\n");
8496: #endif
8497: xmlParsePI(ctxt);
8498: } else if ((cur == '<') && (next == '!') &&
8499: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8500: if ((!terminate) &&
8501: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8502: goto done;
8503: #ifdef DEBUG_PUSH
8504: fprintf(stderr, "PP: Parsing Comment\n");
8505: #endif
8506: xmlParseComment(ctxt);
8507: ctxt->instate = XML_PARSER_CONTENT;
8508: } else if ((cur == '<') && (in->cur[1] == '!') &&
8509: (in->cur[2] == '[') && (NXT(3) == 'C') &&
8510: (in->cur[4] == 'D') && (NXT(5) == 'A') &&
8511: (in->cur[6] == 'T') && (NXT(7) == 'A') &&
8512: (in->cur[8] == '[')) {
8513: SKIP(9);
8514: ctxt->instate = XML_PARSER_CDATA_SECTION;
8515: #ifdef DEBUG_PUSH
8516: fprintf(stderr, "PP: entering CDATA_SECTION\n");
8517: #endif
8518: break;
8519: } else if ((cur == '<') && (next == '!') &&
8520: (avail < 9)) {
8521: goto done;
8522: } else if ((cur == '<') && (next == '/')) {
8523: ctxt->instate = XML_PARSER_END_TAG;
8524: #ifdef DEBUG_PUSH
8525: fprintf(stderr, "PP: entering END_TAG\n");
8526: #endif
8527: break;
8528: } else if (cur == '<') {
8529: ctxt->instate = XML_PARSER_START_TAG;
8530: #ifdef DEBUG_PUSH
8531: fprintf(stderr, "PP: entering START_TAG\n");
8532: #endif
8533: break;
8534: } else if (cur == '&') {
1.143 daniel 8535: if ((!terminate) &&
8536: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 8537: goto done;
8538: #ifdef DEBUG_PUSH
8539: fprintf(stderr, "PP: Parsing Reference\n");
8540: #endif
8541: /* TODO: check generation of subtrees if noent !!! */
8542: xmlParseReference(ctxt);
8543: } else {
1.156 daniel 8544: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 8545: /*
8546: * Goal of the following test is :
8547: * - minimize calls to the SAX 'character' callback
8548: * when they are mergeable
8549: * - handle an problem for isBlank when we only parse
8550: * a sequence of blank chars and the next one is
8551: * not available to check against '<' presence.
8552: * - tries to homogenize the differences in SAX
8553: * callbacks beween the push and pull versions
8554: * of the parser.
8555: */
8556: if ((ctxt->inputNr == 1) &&
8557: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 8558: if ((!terminate) &&
8559: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 8560: goto done;
8561: }
8562: ctxt->checkIndex = 0;
8563: #ifdef DEBUG_PUSH
8564: fprintf(stderr, "PP: Parsing char data\n");
8565: #endif
8566: xmlParseCharData(ctxt, 0);
8567: }
8568: /*
8569: * Pop-up of finished entities.
8570: */
1.152 daniel 8571: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8572: xmlPopInput(ctxt);
8573: break;
8574: case XML_PARSER_CDATA_SECTION: {
8575: /*
8576: * The Push mode need to have the SAX callback for
8577: * cdataBlock merge back contiguous callbacks.
8578: */
8579: int base;
8580:
8581: in = ctxt->input;
8582: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8583: if (base < 0) {
8584: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 8585: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 8586: if (ctxt->sax->cdataBlock != NULL)
8587: ctxt->sax->cdataBlock(ctxt->userData, in->cur,
8588: XML_PARSER_BIG_BUFFER_SIZE);
8589: }
8590: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8591: ctxt->checkIndex = 0;
8592: }
8593: goto done;
8594: } else {
1.171 daniel 8595: if ((ctxt->sax != NULL) && (base > 0) &&
8596: (!ctxt->disableSAX)) {
1.140 daniel 8597: if (ctxt->sax->cdataBlock != NULL)
8598: ctxt->sax->cdataBlock(ctxt->userData,
8599: in->cur, base);
8600: }
8601: SKIP(base + 3);
8602: ctxt->checkIndex = 0;
8603: ctxt->instate = XML_PARSER_CONTENT;
8604: #ifdef DEBUG_PUSH
8605: fprintf(stderr, "PP: entering CONTENT\n");
8606: #endif
8607: }
8608: break;
8609: }
1.141 daniel 8610: case XML_PARSER_END_TAG:
1.140 daniel 8611: if (avail < 2)
8612: goto done;
1.143 daniel 8613: if ((!terminate) &&
8614: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8615: goto done;
8616: xmlParseEndTag(ctxt);
8617: if (ctxt->name == NULL) {
8618: ctxt->instate = XML_PARSER_EPILOG;
8619: #ifdef DEBUG_PUSH
8620: fprintf(stderr, "PP: entering EPILOG\n");
8621: #endif
8622: } else {
8623: ctxt->instate = XML_PARSER_CONTENT;
8624: #ifdef DEBUG_PUSH
8625: fprintf(stderr, "PP: entering CONTENT\n");
8626: #endif
8627: }
8628: break;
8629: case XML_PARSER_DTD: {
8630: /*
8631: * Sorry but progressive parsing of the internal subset
8632: * is not expected to be supported. We first check that
8633: * the full content of the internal subset is available and
8634: * the parsing is launched only at that point.
8635: * Internal subset ends up with "']' S? '>'" in an unescaped
8636: * section and not in a ']]>' sequence which are conditional
8637: * sections (whoever argued to keep that crap in XML deserve
8638: * a place in hell !).
8639: */
8640: int base, i;
8641: xmlChar *buf;
8642: xmlChar quote = 0;
8643:
8644: base = in->cur - in->base;
8645: if (base < 0) return(0);
8646: if (ctxt->checkIndex > base)
8647: base = ctxt->checkIndex;
8648: buf = in->buf->buffer->content;
8649: for (;base < in->buf->buffer->use;base++) {
8650: if (quote != 0) {
8651: if (buf[base] == quote)
8652: quote = 0;
8653: continue;
8654: }
8655: if (buf[base] == '"') {
8656: quote = '"';
8657: continue;
8658: }
8659: if (buf[base] == '\'') {
8660: quote = '\'';
8661: continue;
8662: }
8663: if (buf[base] == ']') {
8664: if (base +1 >= in->buf->buffer->use)
8665: break;
8666: if (buf[base + 1] == ']') {
8667: /* conditional crap, skip both ']' ! */
8668: base++;
8669: continue;
8670: }
8671: for (i = 0;base + i < in->buf->buffer->use;i++) {
8672: if (buf[base + i] == '>')
8673: goto found_end_int_subset;
8674: }
8675: break;
8676: }
8677: }
8678: /*
8679: * We didn't found the end of the Internal subset
8680: */
8681: if (quote == 0)
8682: ctxt->checkIndex = base;
8683: #ifdef DEBUG_PUSH
8684: if (next == 0)
8685: fprintf(stderr, "PP: lookup of int subset end filed\n");
8686: #endif
8687: goto done;
8688:
8689: found_end_int_subset:
8690: xmlParseInternalSubset(ctxt);
1.166 daniel 8691: ctxt->inSubset = 2;
1.171 daniel 8692: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8693: (ctxt->sax->externalSubset != NULL))
8694: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8695: ctxt->extSubSystem, ctxt->extSubURI);
8696: ctxt->inSubset = 0;
1.140 daniel 8697: ctxt->instate = XML_PARSER_PROLOG;
8698: ctxt->checkIndex = 0;
8699: #ifdef DEBUG_PUSH
8700: fprintf(stderr, "PP: entering PROLOG\n");
8701: #endif
8702: break;
8703: }
8704: case XML_PARSER_COMMENT:
8705: fprintf(stderr, "PP: internal error, state == COMMENT\n");
8706: ctxt->instate = XML_PARSER_CONTENT;
8707: #ifdef DEBUG_PUSH
8708: fprintf(stderr, "PP: entering CONTENT\n");
8709: #endif
8710: break;
8711: case XML_PARSER_PI:
8712: fprintf(stderr, "PP: internal error, state == PI\n");
8713: ctxt->instate = XML_PARSER_CONTENT;
8714: #ifdef DEBUG_PUSH
8715: fprintf(stderr, "PP: entering CONTENT\n");
8716: #endif
8717: break;
1.128 daniel 8718: case XML_PARSER_ENTITY_DECL:
1.140 daniel 8719: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
8720: ctxt->instate = XML_PARSER_DTD;
8721: #ifdef DEBUG_PUSH
8722: fprintf(stderr, "PP: entering DTD\n");
8723: #endif
8724: break;
1.128 daniel 8725: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 8726: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
8727: ctxt->instate = XML_PARSER_CONTENT;
8728: #ifdef DEBUG_PUSH
8729: fprintf(stderr, "PP: entering DTD\n");
8730: #endif
8731: break;
1.128 daniel 8732: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 8733: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 8734: ctxt->instate = XML_PARSER_START_TAG;
8735: #ifdef DEBUG_PUSH
8736: fprintf(stderr, "PP: entering START_TAG\n");
8737: #endif
8738: break;
8739: case XML_PARSER_SYSTEM_LITERAL:
8740: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 8741: ctxt->instate = XML_PARSER_START_TAG;
8742: #ifdef DEBUG_PUSH
8743: fprintf(stderr, "PP: entering START_TAG\n");
8744: #endif
8745: break;
1.128 daniel 8746: }
8747: }
1.140 daniel 8748: done:
8749: #ifdef DEBUG_PUSH
8750: fprintf(stderr, "PP: done %d\n", ret);
8751: #endif
1.128 daniel 8752: return(ret);
8753: }
8754:
8755: /**
1.143 daniel 8756: * xmlParseTry:
8757: * @ctxt: an XML parser context
8758: *
8759: * Try to progress on parsing
8760: *
8761: * Returns zero if no parsing was possible
8762: */
8763: int
8764: xmlParseTry(xmlParserCtxtPtr ctxt) {
8765: return(xmlParseTryOrFinish(ctxt, 0));
8766: }
8767:
8768: /**
1.128 daniel 8769: * xmlParseChunk:
8770: * @ctxt: an XML parser context
8771: * @chunk: an char array
8772: * @size: the size in byte of the chunk
8773: * @terminate: last chunk indicator
8774: *
8775: * Parse a Chunk of memory
8776: *
8777: * Returns zero if no error, the xmlParserErrors otherwise.
8778: */
1.140 daniel 8779: int
1.128 daniel 8780: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8781: int terminate) {
1.132 daniel 8782: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8783: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8784: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8785: int cur = ctxt->input->cur - ctxt->input->base;
8786:
1.132 daniel 8787: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8788: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8789: ctxt->input->cur = ctxt->input->base + cur;
8790: #ifdef DEBUG_PUSH
8791: fprintf(stderr, "PP: pushed %d\n", size);
8792: #endif
8793:
1.150 daniel 8794: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8795: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8796: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 8797: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8798: if (terminate) {
1.151 daniel 8799: /*
8800: * Grab the encoding if it was added on-the-fly
8801: */
8802: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8803: (ctxt->myDoc->encoding == NULL)) {
8804: ctxt->myDoc->encoding = ctxt->encoding;
8805: ctxt->encoding = NULL;
8806: }
8807:
8808: /*
8809: * Check for termination
8810: */
1.140 daniel 8811: if ((ctxt->instate != XML_PARSER_EOF) &&
8812: (ctxt->instate != XML_PARSER_EPILOG)) {
8813: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8814: ctxt->sax->error(ctxt->userData,
8815: "Extra content at the end of the document\n");
8816: ctxt->wellFormed = 0;
8817: ctxt->errNo = XML_ERR_DOCUMENT_END;
8818: }
8819: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 8820: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8821: (!ctxt->disableSAX))
1.140 daniel 8822: ctxt->sax->endDocument(ctxt->userData);
8823: }
8824: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8825: }
8826: return((xmlParserErrors) ctxt->errNo);
8827: }
8828:
8829: /************************************************************************
8830: * *
1.98 daniel 8831: * I/O front end functions to the parser *
8832: * *
8833: ************************************************************************/
8834:
1.50 daniel 8835: /**
1.140 daniel 8836: * xmlCreatePushParserCtxt :
8837: * @sax: a SAX handler
8838: * @user_data: The user data returned on SAX callbacks
8839: * @chunk: a pointer to an array of chars
8840: * @size: number of chars in the array
8841: * @filename: an optional file name or URI
8842: *
8843: * Create a parser context for using the XML parser in push mode
8844: * To allow content encoding detection, @size should be >= 4
8845: * The value of @filename is used for fetching external entities
8846: * and error/warning reports.
8847: *
8848: * Returns the new parser context or NULL
8849: */
8850: xmlParserCtxtPtr
8851: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8852: const char *chunk, int size, const char *filename) {
8853: xmlParserCtxtPtr ctxt;
8854: xmlParserInputPtr inputStream;
8855: xmlParserInputBufferPtr buf;
8856: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8857:
8858: /*
1.156 daniel 8859: * plug some encoding conversion routines
1.140 daniel 8860: */
8861: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8862: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8863:
8864: buf = xmlAllocParserInputBuffer(enc);
8865: if (buf == NULL) return(NULL);
8866:
8867: ctxt = xmlNewParserCtxt();
8868: if (ctxt == NULL) {
8869: xmlFree(buf);
8870: return(NULL);
8871: }
8872: if (sax != NULL) {
8873: if (ctxt->sax != &xmlDefaultSAXHandler)
8874: xmlFree(ctxt->sax);
8875: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8876: if (ctxt->sax == NULL) {
8877: xmlFree(buf);
8878: xmlFree(ctxt);
8879: return(NULL);
8880: }
8881: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8882: if (user_data != NULL)
8883: ctxt->userData = user_data;
8884: }
8885: if (filename == NULL) {
8886: ctxt->directory = NULL;
8887: } else {
8888: ctxt->directory = xmlParserGetDirectory(filename);
8889: }
8890:
8891: inputStream = xmlNewInputStream(ctxt);
8892: if (inputStream == NULL) {
8893: xmlFreeParserCtxt(ctxt);
8894: return(NULL);
8895: }
8896:
8897: if (filename == NULL)
8898: inputStream->filename = NULL;
8899: else
8900: inputStream->filename = xmlMemStrdup(filename);
8901: inputStream->buf = buf;
8902: inputStream->base = inputStream->buf->buffer->content;
8903: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8904: if (enc != XML_CHAR_ENCODING_NONE) {
8905: xmlSwitchEncoding(ctxt, enc);
8906: }
1.140 daniel 8907:
8908: inputPush(ctxt, inputStream);
8909:
8910: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8911: (ctxt->input->buf != NULL)) {
8912: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8913: #ifdef DEBUG_PUSH
8914: fprintf(stderr, "PP: pushed %d\n", size);
8915: #endif
8916: }
8917:
8918: return(ctxt);
8919: }
8920:
8921: /**
1.86 daniel 8922: * xmlCreateDocParserCtxt :
1.123 daniel 8923: * @cur: a pointer to an array of xmlChar
1.50 daniel 8924: *
1.69 daniel 8925: * Create a parser context for an XML in-memory document.
8926: *
8927: * Returns the new parser context or NULL
1.16 daniel 8928: */
1.69 daniel 8929: xmlParserCtxtPtr
1.123 daniel 8930: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 8931: xmlParserCtxtPtr ctxt;
1.40 daniel 8932: xmlParserInputPtr input;
1.16 daniel 8933:
1.97 daniel 8934: ctxt = xmlNewParserCtxt();
1.16 daniel 8935: if (ctxt == NULL) {
8936: return(NULL);
8937: }
1.96 daniel 8938: input = xmlNewInputStream(ctxt);
1.40 daniel 8939: if (input == NULL) {
1.97 daniel 8940: xmlFreeParserCtxt(ctxt);
1.40 daniel 8941: return(NULL);
8942: }
8943:
8944: input->base = cur;
8945: input->cur = cur;
8946:
8947: inputPush(ctxt, input);
1.69 daniel 8948: return(ctxt);
8949: }
8950:
8951: /**
8952: * xmlSAXParseDoc :
8953: * @sax: the SAX handler block
1.123 daniel 8954: * @cur: a pointer to an array of xmlChar
1.69 daniel 8955: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
8956: * documents
8957: *
8958: * parse an XML in-memory document and build a tree.
8959: * It use the given SAX function block to handle the parsing callback.
8960: * If sax is NULL, fallback to the default DOM tree building routines.
8961: *
8962: * Returns the resulting document tree
8963: */
8964:
8965: xmlDocPtr
1.123 daniel 8966: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 8967: xmlDocPtr ret;
8968: xmlParserCtxtPtr ctxt;
8969:
8970: if (cur == NULL) return(NULL);
1.16 daniel 8971:
8972:
1.69 daniel 8973: ctxt = xmlCreateDocParserCtxt(cur);
8974: if (ctxt == NULL) return(NULL);
1.74 daniel 8975: if (sax != NULL) {
8976: ctxt->sax = sax;
8977: ctxt->userData = NULL;
8978: }
1.69 daniel 8979:
1.16 daniel 8980: xmlParseDocument(ctxt);
1.72 daniel 8981: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 8982: else {
8983: ret = NULL;
1.72 daniel 8984: xmlFreeDoc(ctxt->myDoc);
8985: ctxt->myDoc = NULL;
1.59 daniel 8986: }
1.86 daniel 8987: if (sax != NULL)
8988: ctxt->sax = NULL;
1.69 daniel 8989: xmlFreeParserCtxt(ctxt);
1.16 daniel 8990:
1.1 veillard 8991: return(ret);
8992: }
8993:
1.50 daniel 8994: /**
1.55 daniel 8995: * xmlParseDoc :
1.123 daniel 8996: * @cur: a pointer to an array of xmlChar
1.55 daniel 8997: *
8998: * parse an XML in-memory document and build a tree.
8999: *
1.68 daniel 9000: * Returns the resulting document tree
1.55 daniel 9001: */
9002:
1.69 daniel 9003: xmlDocPtr
1.123 daniel 9004: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9005: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9006: }
9007:
9008: /**
9009: * xmlSAXParseDTD :
9010: * @sax: the SAX handler block
9011: * @ExternalID: a NAME* containing the External ID of the DTD
9012: * @SystemID: a NAME* containing the URL to the DTD
9013: *
9014: * Load and parse an external subset.
9015: *
9016: * Returns the resulting xmlDtdPtr or NULL in case of error.
9017: */
9018:
9019: xmlDtdPtr
1.123 daniel 9020: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9021: const xmlChar *SystemID) {
1.76 daniel 9022: xmlDtdPtr ret = NULL;
9023: xmlParserCtxtPtr ctxt;
1.83 daniel 9024: xmlParserInputPtr input = NULL;
1.76 daniel 9025: xmlCharEncoding enc;
9026:
9027: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9028:
1.97 daniel 9029: ctxt = xmlNewParserCtxt();
1.76 daniel 9030: if (ctxt == NULL) {
9031: return(NULL);
9032: }
9033:
9034: /*
9035: * Set-up the SAX context
9036: */
9037: if (ctxt == NULL) return(NULL);
9038: if (sax != NULL) {
1.93 veillard 9039: if (ctxt->sax != NULL)
1.119 daniel 9040: xmlFree(ctxt->sax);
1.76 daniel 9041: ctxt->sax = sax;
9042: ctxt->userData = NULL;
9043: }
9044:
9045: /*
9046: * Ask the Entity resolver to load the damn thing
9047: */
9048:
9049: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9050: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9051: if (input == NULL) {
1.86 daniel 9052: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9053: xmlFreeParserCtxt(ctxt);
9054: return(NULL);
9055: }
9056:
9057: /*
1.156 daniel 9058: * plug some encoding conversion routines here.
1.76 daniel 9059: */
9060: xmlPushInput(ctxt, input);
1.156 daniel 9061: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9062: xmlSwitchEncoding(ctxt, enc);
9063:
1.95 veillard 9064: if (input->filename == NULL)
1.156 daniel 9065: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9066: input->line = 1;
9067: input->col = 1;
9068: input->base = ctxt->input->cur;
9069: input->cur = ctxt->input->cur;
9070: input->free = NULL;
9071:
9072: /*
9073: * let's parse that entity knowing it's an external subset.
9074: */
1.79 daniel 9075: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9076:
9077: if (ctxt->myDoc != NULL) {
9078: if (ctxt->wellFormed) {
9079: ret = ctxt->myDoc->intSubset;
9080: ctxt->myDoc->intSubset = NULL;
9081: } else {
9082: ret = NULL;
9083: }
9084: xmlFreeDoc(ctxt->myDoc);
9085: ctxt->myDoc = NULL;
9086: }
1.86 daniel 9087: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9088: xmlFreeParserCtxt(ctxt);
9089:
9090: return(ret);
9091: }
9092:
9093: /**
9094: * xmlParseDTD :
9095: * @ExternalID: a NAME* containing the External ID of the DTD
9096: * @SystemID: a NAME* containing the URL to the DTD
9097: *
9098: * Load and parse an external subset.
9099: *
9100: * Returns the resulting xmlDtdPtr or NULL in case of error.
9101: */
9102:
9103: xmlDtdPtr
1.123 daniel 9104: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9105: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9106: }
9107:
9108: /**
1.144 daniel 9109: * xmlSAXParseBalancedChunk :
9110: * @ctx: an XML parser context (possibly NULL)
9111: * @sax: the SAX handler bloc (possibly NULL)
9112: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9113: * @input: a parser input stream
9114: * @enc: the encoding
9115: *
9116: * Parse a well-balanced chunk of an XML document
9117: * The user has to provide SAX callback block whose routines will be
9118: * called by the parser
9119: * The allowed sequence for the Well Balanced Chunk is the one defined by
9120: * the content production in the XML grammar:
9121: *
9122: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9123: *
9124: * Returns 0 id the chunk is well balanced, -1 in case of args problem and
9125: * the error code otherwise
9126: */
9127:
9128: int
9129: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
9130: void *user_data, xmlParserInputPtr input,
9131: xmlCharEncoding enc) {
9132: xmlParserCtxtPtr ctxt;
9133: int ret;
9134:
9135: if (input == NULL) return(-1);
9136:
9137: if (ctx != NULL)
9138: ctxt = ctx;
9139: else {
9140: ctxt = xmlNewParserCtxt();
9141: if (ctxt == NULL)
9142: return(-1);
9143: if (sax == NULL)
9144: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9145: }
9146:
9147: /*
9148: * Set-up the SAX context
9149: */
9150: if (sax != NULL) {
9151: if (ctxt->sax != NULL)
9152: xmlFree(ctxt->sax);
9153: ctxt->sax = sax;
9154: ctxt->userData = user_data;
9155: }
9156:
9157: /*
9158: * plug some encoding conversion routines here.
9159: */
9160: xmlPushInput(ctxt, input);
9161: if (enc != XML_CHAR_ENCODING_NONE)
9162: xmlSwitchEncoding(ctxt, enc);
9163:
9164: /*
9165: * let's parse that entity knowing it's an external subset.
9166: */
9167: xmlParseContent(ctxt);
9168: ret = ctxt->errNo;
9169:
9170: if (ctx == NULL) {
9171: if (sax != NULL)
9172: ctxt->sax = NULL;
9173: else
9174: xmlFreeDoc(ctxt->myDoc);
9175: xmlFreeParserCtxt(ctxt);
9176: }
9177: return(ret);
9178: }
9179:
9180: /**
9181: * xmlParseBalancedChunk :
9182: * @doc: the document the chunk pertains to
9183: * @node: the node defining the context in which informations will be added
9184: *
9185: * Parse a well-balanced chunk of an XML document present in memory
9186: *
9187: * Returns the resulting list of nodes resulting from the parsing,
9188: * they are not added to @node
9189: */
9190:
9191: xmlNodePtr
9192: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 9193: /* TODO !!! */
9194: return(NULL);
1.144 daniel 9195: }
9196:
9197: /**
9198: * xmlParseBalancedChunkFile :
9199: * @doc: the document the chunk pertains to
9200: *
9201: * Parse a well-balanced chunk of an XML document contained in a file
9202: *
9203: * Returns the resulting list of nodes resulting from the parsing,
9204: * they are not added to @node
9205: */
9206:
9207: xmlNodePtr
9208: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 9209: /* TODO !!! */
9210: return(NULL);
1.144 daniel 9211: }
9212:
9213: /**
1.59 daniel 9214: * xmlRecoverDoc :
1.123 daniel 9215: * @cur: a pointer to an array of xmlChar
1.59 daniel 9216: *
9217: * parse an XML in-memory document and build a tree.
9218: * In the case the document is not Well Formed, a tree is built anyway
9219: *
1.68 daniel 9220: * Returns the resulting document tree
1.59 daniel 9221: */
9222:
1.69 daniel 9223: xmlDocPtr
1.123 daniel 9224: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 9225: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 9226: }
9227:
9228: /**
1.69 daniel 9229: * xmlCreateFileParserCtxt :
1.50 daniel 9230: * @filename: the filename
9231: *
1.69 daniel 9232: * Create a parser context for a file content.
9233: * Automatic support for ZLIB/Compress compressed document is provided
9234: * by default if found at compile-time.
1.50 daniel 9235: *
1.69 daniel 9236: * Returns the new parser context or NULL
1.9 httpng 9237: */
1.69 daniel 9238: xmlParserCtxtPtr
9239: xmlCreateFileParserCtxt(const char *filename)
9240: {
9241: xmlParserCtxtPtr ctxt;
1.40 daniel 9242: xmlParserInputPtr inputStream;
1.91 daniel 9243: xmlParserInputBufferPtr buf;
1.111 daniel 9244: char *directory = NULL;
1.9 httpng 9245:
1.91 daniel 9246: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9247: if (buf == NULL) return(NULL);
1.9 httpng 9248:
1.97 daniel 9249: ctxt = xmlNewParserCtxt();
1.16 daniel 9250: if (ctxt == NULL) {
9251: return(NULL);
9252: }
1.97 daniel 9253:
1.96 daniel 9254: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9255: if (inputStream == NULL) {
1.97 daniel 9256: xmlFreeParserCtxt(ctxt);
1.40 daniel 9257: return(NULL);
9258: }
9259:
1.119 daniel 9260: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9261: inputStream->buf = buf;
9262: inputStream->base = inputStream->buf->buffer->content;
9263: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9264:
1.40 daniel 9265: inputPush(ctxt, inputStream);
1.110 daniel 9266: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9267: directory = xmlParserGetDirectory(filename);
9268: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9269: ctxt->directory = directory;
1.106 daniel 9270:
1.69 daniel 9271: return(ctxt);
9272: }
9273:
9274: /**
9275: * xmlSAXParseFile :
9276: * @sax: the SAX handler block
9277: * @filename: the filename
9278: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9279: * documents
9280: *
9281: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9282: * compressed document is provided by default if found at compile-time.
9283: * It use the given SAX function block to handle the parsing callback.
9284: * If sax is NULL, fallback to the default DOM tree building routines.
9285: *
9286: * Returns the resulting document tree
9287: */
9288:
1.79 daniel 9289: xmlDocPtr
9290: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9291: int recovery) {
9292: xmlDocPtr ret;
9293: xmlParserCtxtPtr ctxt;
1.111 daniel 9294: char *directory = NULL;
1.69 daniel 9295:
9296: ctxt = xmlCreateFileParserCtxt(filename);
9297: if (ctxt == NULL) return(NULL);
1.74 daniel 9298: if (sax != NULL) {
1.93 veillard 9299: if (ctxt->sax != NULL)
1.119 daniel 9300: xmlFree(ctxt->sax);
1.74 daniel 9301: ctxt->sax = sax;
9302: ctxt->userData = NULL;
9303: }
1.106 daniel 9304:
1.110 daniel 9305: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9306: directory = xmlParserGetDirectory(filename);
9307: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 9308: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 9309:
9310: xmlParseDocument(ctxt);
1.40 daniel 9311:
1.72 daniel 9312: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9313: else {
9314: ret = NULL;
1.72 daniel 9315: xmlFreeDoc(ctxt->myDoc);
9316: ctxt->myDoc = NULL;
1.59 daniel 9317: }
1.86 daniel 9318: if (sax != NULL)
9319: ctxt->sax = NULL;
1.69 daniel 9320: xmlFreeParserCtxt(ctxt);
1.20 daniel 9321:
9322: return(ret);
9323: }
9324:
1.55 daniel 9325: /**
9326: * xmlParseFile :
9327: * @filename: the filename
9328: *
9329: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9330: * compressed document is provided by default if found at compile-time.
9331: *
1.68 daniel 9332: * Returns the resulting document tree
1.55 daniel 9333: */
9334:
1.79 daniel 9335: xmlDocPtr
9336: xmlParseFile(const char *filename) {
1.59 daniel 9337: return(xmlSAXParseFile(NULL, filename, 0));
9338: }
9339:
9340: /**
9341: * xmlRecoverFile :
9342: * @filename: the filename
9343: *
9344: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9345: * compressed document is provided by default if found at compile-time.
9346: * In the case the document is not Well Formed, a tree is built anyway
9347: *
1.68 daniel 9348: * Returns the resulting document tree
1.59 daniel 9349: */
9350:
1.79 daniel 9351: xmlDocPtr
9352: xmlRecoverFile(const char *filename) {
1.59 daniel 9353: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 9354: }
1.32 daniel 9355:
1.50 daniel 9356: /**
1.69 daniel 9357: * xmlCreateMemoryParserCtxt :
1.68 daniel 9358: * @buffer: an pointer to a char array
1.127 daniel 9359: * @size: the size of the array
1.50 daniel 9360: *
1.69 daniel 9361: * Create a parser context for an XML in-memory document.
1.50 daniel 9362: *
1.69 daniel 9363: * Returns the new parser context or NULL
1.20 daniel 9364: */
1.69 daniel 9365: xmlParserCtxtPtr
9366: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9367: xmlParserCtxtPtr ctxt;
1.40 daniel 9368: xmlParserInputPtr input;
9369:
1.158 daniel 9370: if (buffer[size - 1] != 0)
9371: buffer[size - 1] = '\0';
1.40 daniel 9372:
1.97 daniel 9373: ctxt = xmlNewParserCtxt();
1.20 daniel 9374: if (ctxt == NULL) {
9375: return(NULL);
9376: }
1.97 daniel 9377:
1.96 daniel 9378: input = xmlNewInputStream(ctxt);
1.40 daniel 9379: if (input == NULL) {
1.97 daniel 9380: xmlFreeParserCtxt(ctxt);
1.40 daniel 9381: return(NULL);
9382: }
1.20 daniel 9383:
1.40 daniel 9384: input->filename = NULL;
9385: input->line = 1;
9386: input->col = 1;
1.96 daniel 9387: input->buf = NULL;
1.91 daniel 9388: input->consumed = 0;
1.75 daniel 9389:
1.116 daniel 9390: input->base = BAD_CAST buffer;
9391: input->cur = BAD_CAST buffer;
1.69 daniel 9392: input->free = NULL;
1.20 daniel 9393:
1.40 daniel 9394: inputPush(ctxt, input);
1.69 daniel 9395: return(ctxt);
9396: }
9397:
9398: /**
9399: * xmlSAXParseMemory :
9400: * @sax: the SAX handler block
9401: * @buffer: an pointer to a char array
1.127 daniel 9402: * @size: the size of the array
9403: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9404: * documents
9405: *
9406: * parse an XML in-memory block and use the given SAX function block
9407: * to handle the parsing callback. If sax is NULL, fallback to the default
9408: * DOM tree building routines.
9409: *
9410: * Returns the resulting document tree
9411: */
9412: xmlDocPtr
9413: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9414: xmlDocPtr ret;
9415: xmlParserCtxtPtr ctxt;
9416:
9417: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9418: if (ctxt == NULL) return(NULL);
1.74 daniel 9419: if (sax != NULL) {
9420: ctxt->sax = sax;
9421: ctxt->userData = NULL;
9422: }
1.20 daniel 9423:
9424: xmlParseDocument(ctxt);
1.40 daniel 9425:
1.72 daniel 9426: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9427: else {
9428: ret = NULL;
1.72 daniel 9429: xmlFreeDoc(ctxt->myDoc);
9430: ctxt->myDoc = NULL;
1.59 daniel 9431: }
1.86 daniel 9432: if (sax != NULL)
9433: ctxt->sax = NULL;
1.69 daniel 9434: xmlFreeParserCtxt(ctxt);
1.16 daniel 9435:
1.9 httpng 9436: return(ret);
1.17 daniel 9437: }
9438:
1.55 daniel 9439: /**
9440: * xmlParseMemory :
1.68 daniel 9441: * @buffer: an pointer to a char array
1.55 daniel 9442: * @size: the size of the array
9443: *
9444: * parse an XML in-memory block and build a tree.
9445: *
1.68 daniel 9446: * Returns the resulting document tree
1.55 daniel 9447: */
9448:
9449: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9450: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9451: }
9452:
9453: /**
9454: * xmlRecoverMemory :
1.68 daniel 9455: * @buffer: an pointer to a char array
1.59 daniel 9456: * @size: the size of the array
9457: *
9458: * parse an XML in-memory block and build a tree.
9459: * In the case the document is not Well Formed, a tree is built anyway
9460: *
1.68 daniel 9461: * Returns the resulting document tree
1.59 daniel 9462: */
9463:
9464: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9465: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9466: }
9467:
9468:
1.50 daniel 9469: /**
9470: * xmlSetupParserForBuffer:
9471: * @ctxt: an XML parser context
1.123 daniel 9472: * @buffer: a xmlChar * buffer
1.50 daniel 9473: * @filename: a file name
9474: *
1.19 daniel 9475: * Setup the parser context to parse a new buffer; Clears any prior
9476: * contents from the parser context. The buffer parameter must not be
9477: * NULL, but the filename parameter can be
9478: */
1.55 daniel 9479: void
1.123 daniel 9480: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 9481: const char* filename)
9482: {
1.96 daniel 9483: xmlParserInputPtr input;
1.40 daniel 9484:
1.96 daniel 9485: input = xmlNewInputStream(ctxt);
9486: if (input == NULL) {
9487: perror("malloc");
1.119 daniel 9488: xmlFree(ctxt);
1.145 daniel 9489: return;
1.96 daniel 9490: }
9491:
9492: xmlClearParserCtxt(ctxt);
9493: if (filename != NULL)
1.119 daniel 9494: input->filename = xmlMemStrdup(filename);
1.96 daniel 9495: input->base = buffer;
9496: input->cur = buffer;
9497: inputPush(ctxt, input);
1.17 daniel 9498: }
9499:
1.123 daniel 9500: /**
9501: * xmlSAXUserParseFile:
9502: * @sax: a SAX handler
9503: * @user_data: The user data returned on SAX callbacks
9504: * @filename: a file name
9505: *
9506: * parse an XML file and call the given SAX handler routines.
9507: * Automatic support for ZLIB/Compress compressed document is provided
9508: *
9509: * Returns 0 in case of success or a error number otherwise
9510: */
1.131 daniel 9511: int
9512: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9513: const char *filename) {
1.123 daniel 9514: int ret = 0;
9515: xmlParserCtxtPtr ctxt;
9516:
9517: ctxt = xmlCreateFileParserCtxt(filename);
9518: if (ctxt == NULL) return -1;
1.134 daniel 9519: if (ctxt->sax != &xmlDefaultSAXHandler)
9520: xmlFree(ctxt->sax);
1.123 daniel 9521: ctxt->sax = sax;
1.140 daniel 9522: if (user_data != NULL)
9523: ctxt->userData = user_data;
1.123 daniel 9524:
9525: xmlParseDocument(ctxt);
9526:
9527: if (ctxt->wellFormed)
9528: ret = 0;
9529: else {
9530: if (ctxt->errNo != 0)
9531: ret = ctxt->errNo;
9532: else
9533: ret = -1;
9534: }
9535: if (sax != NULL)
9536: ctxt->sax = NULL;
9537: xmlFreeParserCtxt(ctxt);
9538:
9539: return ret;
9540: }
9541:
9542: /**
9543: * xmlSAXUserParseMemory:
9544: * @sax: a SAX handler
9545: * @user_data: The user data returned on SAX callbacks
9546: * @buffer: an in-memory XML document input
1.127 daniel 9547: * @size: the length of the XML document in bytes
1.123 daniel 9548: *
9549: * A better SAX parsing routine.
9550: * parse an XML in-memory buffer and call the given SAX handler routines.
9551: *
9552: * Returns 0 in case of success or a error number otherwise
9553: */
9554: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9555: char *buffer, int size) {
9556: int ret = 0;
9557: xmlParserCtxtPtr ctxt;
9558:
9559: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9560: if (ctxt == NULL) return -1;
9561: ctxt->sax = sax;
9562: ctxt->userData = user_data;
9563:
9564: xmlParseDocument(ctxt);
9565:
9566: if (ctxt->wellFormed)
9567: ret = 0;
9568: else {
9569: if (ctxt->errNo != 0)
9570: ret = ctxt->errNo;
9571: else
9572: ret = -1;
9573: }
9574: if (sax != NULL)
9575: ctxt->sax = NULL;
9576: xmlFreeParserCtxt(ctxt);
9577:
9578: return ret;
9579: }
9580:
1.32 daniel 9581:
1.98 daniel 9582: /************************************************************************
9583: * *
1.127 daniel 9584: * Miscellaneous *
1.98 daniel 9585: * *
9586: ************************************************************************/
9587:
1.132 daniel 9588: /**
9589: * xmlCleanupParser:
9590: *
9591: * Cleanup function for the XML parser. It tries to reclaim all
9592: * parsing related global memory allocated for the parser processing.
9593: * It doesn't deallocate any document related memory. Calling this
9594: * function should not prevent reusing the parser.
9595: */
9596:
9597: void
9598: xmlCleanupParser(void) {
9599: xmlCleanupCharEncodingHandlers();
1.133 daniel 9600: xmlCleanupPredefinedEntities();
1.132 daniel 9601: }
1.98 daniel 9602:
1.50 daniel 9603: /**
9604: * xmlParserFindNodeInfo:
9605: * @ctxt: an XML parser context
9606: * @node: an XML node within the tree
9607: *
9608: * Find the parser node info struct for a given node
9609: *
1.68 daniel 9610: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 9611: */
9612: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
9613: const xmlNode* node)
9614: {
9615: unsigned long pos;
9616:
9617: /* Find position where node should be at */
9618: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
9619: if ( ctx->node_seq.buffer[pos].node == node )
9620: return &ctx->node_seq.buffer[pos];
9621: else
9622: return NULL;
9623: }
9624:
9625:
1.50 daniel 9626: /**
9627: * xmlInitNodeInfoSeq :
9628: * @seq: a node info sequence pointer
9629: *
9630: * -- Initialize (set to initial state) node info sequence
1.32 daniel 9631: */
1.55 daniel 9632: void
9633: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9634: {
9635: seq->length = 0;
9636: seq->maximum = 0;
9637: seq->buffer = NULL;
9638: }
9639:
1.50 daniel 9640: /**
9641: * xmlClearNodeInfoSeq :
9642: * @seq: a node info sequence pointer
9643: *
9644: * -- Clear (release memory and reinitialize) node
1.32 daniel 9645: * info sequence
9646: */
1.55 daniel 9647: void
9648: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9649: {
9650: if ( seq->buffer != NULL )
1.119 daniel 9651: xmlFree(seq->buffer);
1.32 daniel 9652: xmlInitNodeInfoSeq(seq);
9653: }
9654:
9655:
1.50 daniel 9656: /**
9657: * xmlParserFindNodeInfoIndex:
9658: * @seq: a node info sequence pointer
9659: * @node: an XML node pointer
9660: *
9661: *
1.32 daniel 9662: * xmlParserFindNodeInfoIndex : Find the index that the info record for
9663: * the given node is or should be at in a sorted sequence
1.68 daniel 9664: *
9665: * Returns a long indicating the position of the record
1.32 daniel 9666: */
9667: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
9668: const xmlNode* node)
9669: {
9670: unsigned long upper, lower, middle;
9671: int found = 0;
9672:
9673: /* Do a binary search for the key */
9674: lower = 1;
9675: upper = seq->length;
9676: middle = 0;
9677: while ( lower <= upper && !found) {
9678: middle = lower + (upper - lower) / 2;
9679: if ( node == seq->buffer[middle - 1].node )
9680: found = 1;
9681: else if ( node < seq->buffer[middle - 1].node )
9682: upper = middle - 1;
9683: else
9684: lower = middle + 1;
9685: }
9686:
9687: /* Return position */
9688: if ( middle == 0 || seq->buffer[middle - 1].node < node )
9689: return middle;
9690: else
9691: return middle - 1;
9692: }
9693:
9694:
1.50 daniel 9695: /**
9696: * xmlParserAddNodeInfo:
9697: * @ctxt: an XML parser context
1.68 daniel 9698: * @info: a node info sequence pointer
1.50 daniel 9699: *
9700: * Insert node info record into the sorted sequence
1.32 daniel 9701: */
1.55 daniel 9702: void
9703: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 9704: const xmlParserNodeInfo* info)
1.32 daniel 9705: {
9706: unsigned long pos;
9707: static unsigned int block_size = 5;
9708:
9709: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 9710: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
9711: if ( pos < ctxt->node_seq.length
9712: && ctxt->node_seq.buffer[pos].node == info->node ) {
9713: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 9714: }
9715:
9716: /* Otherwise, we need to add new node to buffer */
9717: else {
9718: /* Expand buffer by 5 if needed */
1.55 daniel 9719: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 9720: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 9721: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
9722: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 9723:
1.55 daniel 9724: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 9725: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 9726: else
1.119 daniel 9727: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 9728:
9729: if ( tmp_buffer == NULL ) {
1.55 daniel 9730: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 9731: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 9732: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 9733: return;
9734: }
1.55 daniel 9735: ctxt->node_seq.buffer = tmp_buffer;
9736: ctxt->node_seq.maximum += block_size;
1.32 daniel 9737: }
9738:
9739: /* If position is not at end, move elements out of the way */
1.55 daniel 9740: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 9741: unsigned long i;
9742:
1.55 daniel 9743: for ( i = ctxt->node_seq.length; i > pos; i-- )
9744: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 9745: }
9746:
9747: /* Copy element and increase length */
1.55 daniel 9748: ctxt->node_seq.buffer[pos] = *info;
9749: ctxt->node_seq.length++;
1.32 daniel 9750: }
9751: }
1.77 daniel 9752:
1.98 daniel 9753:
9754: /**
9755: * xmlSubstituteEntitiesDefault :
9756: * @val: int 0 or 1
9757: *
9758: * Set and return the previous value for default entity support.
9759: * Initially the parser always keep entity references instead of substituting
9760: * entity values in the output. This function has to be used to change the
9761: * default parser behaviour
9762: * SAX::subtituteEntities() has to be used for changing that on a file by
9763: * file basis.
9764: *
9765: * Returns the last value for 0 for no substitution, 1 for substitution.
9766: */
9767:
9768: int
9769: xmlSubstituteEntitiesDefault(int val) {
9770: int old = xmlSubstituteEntitiesDefaultValue;
9771:
9772: xmlSubstituteEntitiesDefaultValue = val;
9773: return(old);
9774: }
1.77 daniel 9775:
Webmaster