Annotation of XML/parser.c, revision 1.181
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.119 daniel 36: #include "xmlmemory.h"
1.14 veillard 37: #include "tree.h"
1.1 veillard 38: #include "parser.h"
1.14 veillard 39: #include "entities.h"
1.75 daniel 40: #include "encoding.h"
1.61 daniel 41: #include "valid.h"
1.69 daniel 42: #include "parserInternals.h"
1.91 daniel 43: #include "xmlIO.h"
1.122 daniel 44: #include "xml-error.h"
1.1 veillard 45:
1.140 daniel 46: #define XML_PARSER_BIG_BUFFER_SIZE 1000
47: #define XML_PARSER_BUFFER_SIZE 100
48:
1.86 daniel 49: const char *xmlParserVersion = LIBXML_VERSION;
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.91 daniel 65: /************************************************************************
66: * *
67: * Input handling functions for progressive parsing *
68: * *
69: ************************************************************************/
70:
71: /* #define DEBUG_INPUT */
1.140 daniel 72: /* #define DEBUG_STACK */
73: /* #define DEBUG_PUSH */
74:
1.91 daniel 75:
1.110 daniel 76: #define INPUT_CHUNK 250
77: /* we need to keep enough input to show errors in context */
78: #define LINE_LEN 80
1.91 daniel 79:
80: #ifdef DEBUG_INPUT
81: #define CHECK_BUFFER(in) check_buffer(in)
82:
83: void check_buffer(xmlParserInputPtr in) {
84: if (in->base != in->buf->buffer->content) {
85: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
86: }
87: if (in->cur < in->base) {
88: fprintf(stderr, "xmlParserInput: cur < base problem\n");
89: }
90: if (in->cur > in->base + in->buf->buffer->use) {
91: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
92: }
93: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
94: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
95: in->buf->buffer->use, in->buf->buffer->size);
96: }
97:
1.110 daniel 98: #else
99: #define CHECK_BUFFER(in)
100: #endif
101:
1.91 daniel 102:
103: /**
104: * xmlParserInputRead:
105: * @in: an XML parser input
106: * @len: an indicative size for the lookahead
107: *
108: * This function refresh the input for the parser. It doesn't try to
109: * preserve pointers to the input buffer, and discard already read data
110: *
1.123 daniel 111: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 112: * end of this entity
113: */
114: int
115: xmlParserInputRead(xmlParserInputPtr in, int len) {
116: int ret;
117: int used;
118: int index;
119:
120: #ifdef DEBUG_INPUT
121: fprintf(stderr, "Read\n");
122: #endif
123: if (in->buf == NULL) return(-1);
124: if (in->base == NULL) return(-1);
125: if (in->cur == NULL) return(-1);
126: if (in->buf->buffer == NULL) return(-1);
127:
128: CHECK_BUFFER(in);
129:
130: used = in->cur - in->buf->buffer->content;
131: ret = xmlBufferShrink(in->buf->buffer, used);
132: if (ret > 0) {
133: in->cur -= ret;
134: in->consumed += ret;
135: }
136: ret = xmlParserInputBufferRead(in->buf, len);
137: if (in->base != in->buf->buffer->content) {
138: /*
139: * the buffer has been realloced
140: */
141: index = in->cur - in->base;
142: in->base = in->buf->buffer->content;
143: in->cur = &in->buf->buffer->content[index];
144: }
145:
146: CHECK_BUFFER(in);
147:
148: return(ret);
149: }
150:
151: /**
152: * xmlParserInputGrow:
153: * @in: an XML parser input
154: * @len: an indicative size for the lookahead
155: *
156: * This function increase the input for the parser. It tries to
157: * preserve pointers to the input buffer, and keep already read data
158: *
1.123 daniel 159: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 160: * end of this entity
161: */
162: int
163: xmlParserInputGrow(xmlParserInputPtr in, int len) {
164: int ret;
165: int index;
166:
167: #ifdef DEBUG_INPUT
168: fprintf(stderr, "Grow\n");
169: #endif
170: if (in->buf == NULL) return(-1);
171: if (in->base == NULL) return(-1);
172: if (in->cur == NULL) return(-1);
173: if (in->buf->buffer == NULL) return(-1);
174:
175: CHECK_BUFFER(in);
176:
177: index = in->cur - in->base;
178: if (in->buf->buffer->use > index + INPUT_CHUNK) {
179:
180: CHECK_BUFFER(in);
181:
182: return(0);
183: }
1.148 daniel 184: if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) ||
185: (in->buf->file != NULL) ||
1.140 daniel 186: #ifdef HAVE_ZLIB_H
187: (in->buf->gzfile != NULL) ||
188: #endif
189: (in->buf->fd >= 0))
190: ret = xmlParserInputBufferGrow(in->buf, len);
191: else
192: return(0);
1.135 daniel 193:
194: /*
195: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
196: * block, but we use it really as an integer to do some
197: * pointer arithmetic. Insure will raise it as a bug but in
198: * that specific case, that's not !
199: */
1.91 daniel 200: if (in->base != in->buf->buffer->content) {
201: /*
202: * the buffer has been realloced
203: */
204: index = in->cur - in->base;
205: in->base = in->buf->buffer->content;
206: in->cur = &in->buf->buffer->content[index];
207: }
208:
209: CHECK_BUFFER(in);
210:
211: return(ret);
212: }
213:
214: /**
215: * xmlParserInputShrink:
216: * @in: an XML parser input
217: *
218: * This function removes used input for the parser.
219: */
220: void
221: xmlParserInputShrink(xmlParserInputPtr in) {
222: int used;
223: int ret;
224: int index;
225:
226: #ifdef DEBUG_INPUT
227: fprintf(stderr, "Shrink\n");
228: #endif
229: if (in->buf == NULL) return;
230: if (in->base == NULL) return;
231: if (in->cur == NULL) return;
232: if (in->buf->buffer == NULL) return;
233:
234: CHECK_BUFFER(in);
235:
236: used = in->cur - in->buf->buffer->content;
237: if (used > INPUT_CHUNK) {
1.110 daniel 238: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 239: if (ret > 0) {
240: in->cur -= ret;
241: in->consumed += ret;
242: }
243: }
244:
245: CHECK_BUFFER(in);
246:
247: if (in->buf->buffer->use > INPUT_CHUNK) {
248: return;
249: }
250: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
251: if (in->base != in->buf->buffer->content) {
252: /*
253: * the buffer has been realloced
254: */
255: index = in->cur - in->base;
256: in->base = in->buf->buffer->content;
257: in->cur = &in->buf->buffer->content[index];
258: }
259:
260: CHECK_BUFFER(in);
261: }
262:
1.45 daniel 263: /************************************************************************
264: * *
265: * Parser stacks related functions and macros *
266: * *
267: ************************************************************************/
1.79 daniel 268:
269: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 270: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 271: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 272: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
273: const xmlChar ** str);
1.79 daniel 274:
1.1 veillard 275: /*
1.40 daniel 276: * Generic function for accessing stacks in the Parser Context
1.1 veillard 277: */
278:
1.140 daniel 279: #define PUSH_AND_POP(scope, type, name) \
280: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 281: if (ctxt->name##Nr >= ctxt->name##Max) { \
282: ctxt->name##Max *= 2; \
1.119 daniel 283: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 284: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
285: if (ctxt->name##Tab == NULL) { \
1.31 daniel 286: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 287: return(0); \
1.31 daniel 288: } \
289: } \
1.40 daniel 290: ctxt->name##Tab[ctxt->name##Nr] = value; \
291: ctxt->name = value; \
292: return(ctxt->name##Nr++); \
1.31 daniel 293: } \
1.140 daniel 294: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 295: type ret; \
1.40 daniel 296: if (ctxt->name##Nr <= 0) return(0); \
297: ctxt->name##Nr--; \
1.50 daniel 298: if (ctxt->name##Nr > 0) \
299: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
300: else \
301: ctxt->name = NULL; \
1.69 daniel 302: ret = ctxt->name##Tab[ctxt->name##Nr]; \
303: ctxt->name##Tab[ctxt->name##Nr] = 0; \
304: return(ret); \
1.31 daniel 305: } \
306:
1.140 daniel 307: PUSH_AND_POP(extern, xmlParserInputPtr, input)
308: PUSH_AND_POP(extern, xmlNodePtr, node)
309: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 310:
1.176 daniel 311: int spacePush(xmlParserCtxtPtr ctxt, int val) {
312: if (ctxt->spaceNr >= ctxt->spaceMax) {
313: ctxt->spaceMax *= 2;
314: ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab,
315: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
316: if (ctxt->spaceTab == NULL) {
317: fprintf(stderr, "realloc failed !\n");
318: return(0);
319: }
320: }
321: ctxt->spaceTab[ctxt->spaceNr] = val;
322: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
323: return(ctxt->spaceNr++);
324: }
325:
326: int spacePop(xmlParserCtxtPtr ctxt) {
327: int ret;
328: if (ctxt->spaceNr <= 0) return(0);
329: ctxt->spaceNr--;
330: if (ctxt->spaceNr > 0)
331: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
332: else
333: ctxt->space = NULL;
334: ret = ctxt->spaceTab[ctxt->spaceNr];
335: ctxt->spaceTab[ctxt->spaceNr] = -1;
336: return(ret);
337: }
338:
1.55 daniel 339: /*
340: * Macros for accessing the content. Those should be used only by the parser,
341: * and not exported.
342: *
343: * Dirty macros, i.e. one need to make assumption on the context to use them
344: *
1.123 daniel 345: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 346: * To be used with extreme caution since operations consuming
347: * characters may move the input buffer to a different location !
1.123 daniel 348: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 349: * in ISO-Latin or UTF-8.
1.151 daniel 350: * This should be used internally by the parser
1.55 daniel 351: * only to compare to ASCII values otherwise it would break when
352: * running with UTF-8 encoding.
1.123 daniel 353: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 354: * to compare on ASCII based substring.
1.123 daniel 355: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 356: * strings within the parser.
357: *
1.77 daniel 358: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 359: *
360: * NEXT Skip to the next character, this does the proper decoding
361: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 362: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 363: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 364: */
1.45 daniel 365:
1.152 daniel 366: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 367: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 368: #define NXT(val) ctxt->input->cur[(val)]
369: #define CUR_PTR ctxt->input->cur
1.154 daniel 370:
1.164 daniel 371: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
372: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 373: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
374: if ((*ctxt->input->cur == 0) && \
375: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
376: xmlPopInput(ctxt)
1.164 daniel 377:
1.97 daniel 378: #define SHRINK xmlParserInputShrink(ctxt->input); \
379: if ((*ctxt->input->cur == 0) && \
380: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
381: xmlPopInput(ctxt)
382:
383: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
384: if ((*ctxt->input->cur == 0) && \
385: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
386: xmlPopInput(ctxt)
1.55 daniel 387:
1.155 daniel 388: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 389:
1.151 daniel 390: #define NEXT xmlNextChar(ctxt);
1.154 daniel 391:
1.153 daniel 392: #define NEXTL(l) \
393: if (*(ctxt->input->cur) == '\n') { \
394: ctxt->input->line++; ctxt->input->col = 1; \
395: } else ctxt->input->col++; \
1.154 daniel 396: ctxt->token = 0; ctxt->input->cur += l; \
397: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
398: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
399:
1.152 daniel 400: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 401: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 402:
1.152 daniel 403: #define COPY_BUF(l,b,i,v) \
404: if (l == 1) b[i++] = (xmlChar) v; \
405: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 406:
407: /**
408: * xmlNextChar:
409: * @ctxt: the XML parser context
410: *
411: * Skip to the next char input char.
412: */
1.55 daniel 413:
1.151 daniel 414: void
415: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.176 daniel 416: /*
417: * TODO: 2.11 End-of-Line Handling
418: * the literal two-character sequence "#xD#xA" or a standalone
419: * literal #xD, an XML processor must pass to the application
420: * the single character #xA.
421: */
1.151 daniel 422: if (ctxt->token != 0) ctxt->token = 0;
423: else {
424: if ((*ctxt->input->cur == 0) &&
425: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
426: (ctxt->instate != XML_PARSER_COMMENT)) {
427: /*
428: * If we are at the end of the current entity and
429: * the context allows it, we pop consumed entities
430: * automatically.
431: * TODO: the auto closing should be blocked in other cases
432: */
433: xmlPopInput(ctxt);
434: } else {
435: if (*(ctxt->input->cur) == '\n') {
436: ctxt->input->line++; ctxt->input->col = 1;
437: } else ctxt->input->col++;
438: if (ctxt->encoding == NULL) {
439: /*
440: * We are supposed to handle UTF8, check it's valid
441: * From rfc2044: encoding of the Unicode values on UTF-8:
442: *
443: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
444: * 0000 0000-0000 007F 0xxxxxxx
445: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
446: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
447: *
1.160 daniel 448: * Check for the 0x110000 limit too
1.151 daniel 449: */
450: const unsigned char *cur = ctxt->input->cur;
451: unsigned char c;
1.91 daniel 452:
1.151 daniel 453: c = *cur;
454: if (c & 0x80) {
455: if (cur[1] == 0)
456: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
457: if ((cur[1] & 0xc0) != 0x80)
458: goto encoding_error;
459: if ((c & 0xe0) == 0xe0) {
460: unsigned int val;
461:
462: if (cur[2] == 0)
463: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
464: if ((cur[2] & 0xc0) != 0x80)
465: goto encoding_error;
466: if ((c & 0xf0) == 0xf0) {
467: if (cur[3] == 0)
468: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
469: if (((c & 0xf8) != 0xf0) ||
470: ((cur[3] & 0xc0) != 0x80))
471: goto encoding_error;
472: /* 4-byte code */
473: ctxt->input->cur += 4;
474: val = (cur[0] & 0x7) << 18;
475: val |= (cur[1] & 0x3f) << 12;
476: val |= (cur[2] & 0x3f) << 6;
477: val |= cur[3] & 0x3f;
478: } else {
479: /* 3-byte code */
480: ctxt->input->cur += 3;
481: val = (cur[0] & 0xf) << 12;
482: val |= (cur[1] & 0x3f) << 6;
483: val |= cur[2] & 0x3f;
484: }
485: if (((val > 0xd7ff) && (val < 0xe000)) ||
486: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 487: (val >= 0x110000)) {
1.151 daniel 488: if ((ctxt->sax != NULL) &&
489: (ctxt->sax->error != NULL))
490: ctxt->sax->error(ctxt->userData,
491: "Char out of allowed range\n");
492: ctxt->errNo = XML_ERR_INVALID_ENCODING;
493: ctxt->wellFormed = 0;
1.180 daniel 494: ctxt->disableSAX = 1;
1.151 daniel 495: }
496: } else
497: /* 2-byte code */
498: ctxt->input->cur += 2;
499: } else
500: /* 1-byte code */
501: ctxt->input->cur++;
502: } else {
503: /*
504: * Assume it's a fixed lenght encoding (1) with
505: * a compatibke encoding for the ASCII set, since
506: * XML constructs only use < 128 chars
507: */
508: ctxt->input->cur++;
509: }
510: ctxt->nbChars++;
511: if (*ctxt->input->cur == 0)
512: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
513: }
514: }
1.154 daniel 515: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
516: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 517: if ((*ctxt->input->cur == 0) &&
518: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
519: xmlPopInput(ctxt);
1.151 daniel 520: return;
521: encoding_error:
522: /*
523: * If we detect an UTF8 error that probably mean that the
524: * input encoding didn't get properly advertized in the
525: * declaration header. Report the error and switch the encoding
526: * to ISO-Latin-1 (if you don't like this policy, just declare the
527: * encoding !)
528: */
529: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
530: ctxt->sax->error(ctxt->userData,
531: "Input is not proper UTF-8, indicate encoding !\n");
532: ctxt->errNo = XML_ERR_INVALID_ENCODING;
533:
534: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
535: ctxt->input->cur++;
536: return;
537: }
1.42 daniel 538:
1.152 daniel 539: /**
540: * xmlCurrentChar:
541: * @ctxt: the XML parser context
542: * @len: pointer to the length of the char read
543: *
544: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 545: * bytes in the input buffer. Implement the end of line normalization:
546: * 2.11 End-of-Line Handling
547: * Wherever an external parsed entity or the literal entity value
548: * of an internal parsed entity contains either the literal two-character
549: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
550: * must pass to the application the single character #xA.
551: * This behavior can conveniently be produced by normalizing all
552: * line breaks to #xA on input, before parsing.)
1.152 daniel 553: *
554: * Returns the current char value and its lenght
555: */
556:
557: int
558: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
559: if (ctxt->token != 0) {
560: *len = 0;
561: return(ctxt->token);
562: }
563: if (ctxt->encoding == NULL) {
564: /*
565: * We are supposed to handle UTF8, check it's valid
566: * From rfc2044: encoding of the Unicode values on UTF-8:
567: *
568: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
569: * 0000 0000-0000 007F 0xxxxxxx
570: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
571: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
572: *
1.160 daniel 573: * Check for the 0x110000 limit too
1.152 daniel 574: */
575: const unsigned char *cur = ctxt->input->cur;
576: unsigned char c;
577: unsigned int val;
578:
579: c = *cur;
580: if (c & 0x80) {
581: if (cur[1] == 0)
582: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
583: if ((cur[1] & 0xc0) != 0x80)
584: goto encoding_error;
585: if ((c & 0xe0) == 0xe0) {
586:
587: if (cur[2] == 0)
588: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
589: if ((cur[2] & 0xc0) != 0x80)
590: goto encoding_error;
591: if ((c & 0xf0) == 0xf0) {
592: if (cur[3] == 0)
593: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
594: if (((c & 0xf8) != 0xf0) ||
595: ((cur[3] & 0xc0) != 0x80))
596: goto encoding_error;
597: /* 4-byte code */
598: *len = 4;
599: val = (cur[0] & 0x7) << 18;
600: val |= (cur[1] & 0x3f) << 12;
601: val |= (cur[2] & 0x3f) << 6;
602: val |= cur[3] & 0x3f;
603: } else {
604: /* 3-byte code */
605: *len = 3;
606: val = (cur[0] & 0xf) << 12;
607: val |= (cur[1] & 0x3f) << 6;
608: val |= cur[2] & 0x3f;
609: }
610: } else {
611: /* 2-byte code */
612: *len = 2;
613: val = (cur[0] & 0x1f) << 6;
1.168 daniel 614: val |= cur[1] & 0x3f;
1.152 daniel 615: }
616: if (!IS_CHAR(val)) {
617: if ((ctxt->sax != NULL) &&
618: (ctxt->sax->error != NULL))
619: ctxt->sax->error(ctxt->userData,
620: "Char out of allowed range\n");
621: ctxt->errNo = XML_ERR_INVALID_ENCODING;
622: ctxt->wellFormed = 0;
1.180 daniel 623: ctxt->disableSAX = 1;
1.152 daniel 624: }
625: return(val);
626: } else {
627: /* 1-byte code */
628: *len = 1;
1.180 daniel 629: if (*ctxt->input->cur == 0xD) {
630: if (ctxt->input->cur[1] == 0xA) {
631: ctxt->nbChars++;
632: ctxt->input->cur++;
633: }
634: return(0xA);
635: }
1.152 daniel 636: return((int) *ctxt->input->cur);
637: }
638: }
639: /*
640: * Assume it's a fixed lenght encoding (1) with
641: * a compatibke encoding for the ASCII set, since
642: * XML constructs only use < 128 chars
643: */
644: *len = 1;
1.180 daniel 645: if (*ctxt->input->cur == 0xD) {
646: if (ctxt->input->cur[1] == 0xA) {
647: ctxt->nbChars++;
648: ctxt->input->cur++;
649: }
650: return(0xA);
651: }
1.152 daniel 652: return((int) *ctxt->input->cur);
653: encoding_error:
654: /*
655: * If we detect an UTF8 error that probably mean that the
656: * input encoding didn't get properly advertized in the
657: * declaration header. Report the error and switch the encoding
658: * to ISO-Latin-1 (if you don't like this policy, just declare the
659: * encoding !)
660: */
661: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
662: ctxt->sax->error(ctxt->userData,
663: "Input is not proper UTF-8, indicate encoding !\n");
664: ctxt->errNo = XML_ERR_INVALID_ENCODING;
665:
666: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
667: *len = 1;
668: return((int) *ctxt->input->cur);
669: }
670:
671: /**
1.162 daniel 672: * xmlStringCurrentChar:
673: * @ctxt: the XML parser context
674: * @cur: pointer to the beginning of the char
675: * @len: pointer to the length of the char read
676: *
677: * The current char value, if using UTF-8 this may actaully span multiple
678: * bytes in the input buffer.
679: *
680: * Returns the current char value and its lenght
681: */
682:
683: int
684: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
685: if (ctxt->encoding == NULL) {
686: /*
687: * We are supposed to handle UTF8, check it's valid
688: * From rfc2044: encoding of the Unicode values on UTF-8:
689: *
690: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
691: * 0000 0000-0000 007F 0xxxxxxx
692: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
693: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
694: *
695: * Check for the 0x110000 limit too
696: */
697: unsigned char c;
698: unsigned int val;
699:
700: c = *cur;
701: if (c & 0x80) {
702: if ((cur[1] & 0xc0) != 0x80)
703: goto encoding_error;
704: if ((c & 0xe0) == 0xe0) {
705:
706: if ((cur[2] & 0xc0) != 0x80)
707: goto encoding_error;
708: if ((c & 0xf0) == 0xf0) {
709: if (((c & 0xf8) != 0xf0) ||
710: ((cur[3] & 0xc0) != 0x80))
711: goto encoding_error;
712: /* 4-byte code */
713: *len = 4;
714: val = (cur[0] & 0x7) << 18;
715: val |= (cur[1] & 0x3f) << 12;
716: val |= (cur[2] & 0x3f) << 6;
717: val |= cur[3] & 0x3f;
718: } else {
719: /* 3-byte code */
720: *len = 3;
721: val = (cur[0] & 0xf) << 12;
722: val |= (cur[1] & 0x3f) << 6;
723: val |= cur[2] & 0x3f;
724: }
725: } else {
726: /* 2-byte code */
727: *len = 2;
728: val = (cur[0] & 0x1f) << 6;
729: val |= cur[2] & 0x3f;
730: }
731: if (!IS_CHAR(val)) {
732: if ((ctxt->sax != NULL) &&
733: (ctxt->sax->error != NULL))
734: ctxt->sax->error(ctxt->userData,
735: "Char out of allowed range\n");
736: ctxt->errNo = XML_ERR_INVALID_ENCODING;
737: ctxt->wellFormed = 0;
1.180 daniel 738: ctxt->disableSAX = 1;
1.162 daniel 739: }
740: return(val);
741: } else {
742: /* 1-byte code */
743: *len = 1;
744: return((int) *cur);
745: }
746: }
747: /*
748: * Assume it's a fixed lenght encoding (1) with
749: * a compatibke encoding for the ASCII set, since
750: * XML constructs only use < 128 chars
751: */
752: *len = 1;
753: return((int) *cur);
754: encoding_error:
755: /*
756: * If we detect an UTF8 error that probably mean that the
757: * input encoding didn't get properly advertized in the
758: * declaration header. Report the error and switch the encoding
759: * to ISO-Latin-1 (if you don't like this policy, just declare the
760: * encoding !)
761: */
762: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
763: ctxt->sax->error(ctxt->userData,
764: "Input is not proper UTF-8, indicate encoding !\n");
765: ctxt->errNo = XML_ERR_INVALID_ENCODING;
766:
767: *len = 1;
768: return((int) *cur);
769: }
770:
771: /**
1.152 daniel 772: * xmlCopyChar:
773: * @len: pointer to the length of the char read (or zero)
774: * @array: pointer to an arry of xmlChar
775: * @val: the char value
776: *
777: * append the char value in the array
778: *
779: * Returns the number of xmlChar written
780: */
781:
782: int
783: xmlCopyChar(int len, xmlChar *out, int val) {
784: /*
785: * We are supposed to handle UTF8, check it's valid
786: * From rfc2044: encoding of the Unicode values on UTF-8:
787: *
788: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
789: * 0000 0000-0000 007F 0xxxxxxx
790: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
791: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
792: */
793: if (len == 0) {
794: if (val < 0) len = 0;
1.160 daniel 795: else if (val < 0x80) len = 1;
796: else if (val < 0x800) len = 2;
797: else if (val < 0x10000) len = 3;
798: else if (val < 0x110000) len = 4;
1.152 daniel 799: if (len == 0) {
800: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
801: val);
802: return(0);
803: }
804: }
805: if (len > 1) {
806: int bits;
807:
808: if (val < 0x80) { *out++= val; bits= -6; }
809: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
810: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
811: else { *out++= (val >> 18) | 0xF0; bits= 12; }
812:
813: for ( ; bits >= 0; bits-= 6)
814: *out++= ((val >> bits) & 0x3F) | 0x80 ;
815:
816: return(len);
817: }
818: *out = (xmlChar) val;
819: return(1);
1.155 daniel 820: }
821:
822: /**
823: * xmlSkipBlankChars:
824: * @ctxt: the XML parser context
825: *
826: * skip all blanks character found at that point in the input streams.
827: * It pops up finished entities in the process if allowable at that point.
828: *
829: * Returns the number of space chars skipped
830: */
831:
832: int
833: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
834: int cur, res = 0;
835:
836: do {
837: cur = CUR;
838: while (IS_BLANK(cur)) {
839: NEXT;
840: cur = CUR;
841: res++;
842: }
843: while ((cur == 0) && (ctxt->inputNr > 1) &&
844: (ctxt->instate != XML_PARSER_COMMENT)) {
845: xmlPopInput(ctxt);
846: cur = CUR;
847: }
848: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
849: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
850: } while (IS_BLANK(cur));
851: return(res);
1.152 daniel 852: }
853:
1.97 daniel 854: /************************************************************************
855: * *
856: * Commodity functions to handle entities processing *
857: * *
858: ************************************************************************/
1.40 daniel 859:
1.50 daniel 860: /**
861: * xmlPopInput:
862: * @ctxt: an XML parser context
863: *
1.40 daniel 864: * xmlPopInput: the current input pointed by ctxt->input came to an end
865: * pop it and return the next char.
1.45 daniel 866: *
1.123 daniel 867: * Returns the current xmlChar in the parser context
1.40 daniel 868: */
1.123 daniel 869: xmlChar
1.55 daniel 870: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 871: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 872: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 873: if ((*ctxt->input->cur == 0) &&
874: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
875: return(xmlPopInput(ctxt));
1.40 daniel 876: return(CUR);
877: }
878:
1.50 daniel 879: /**
880: * xmlPushInput:
881: * @ctxt: an XML parser context
882: * @input: an XML parser input fragment (entity, XML fragment ...).
883: *
1.40 daniel 884: * xmlPushInput: switch to a new input stream which is stacked on top
885: * of the previous one(s).
886: */
1.55 daniel 887: void
888: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 889: if (input == NULL) return;
890: inputPush(ctxt, input);
1.164 daniel 891: GROW;
1.40 daniel 892: }
893:
1.50 daniel 894: /**
1.69 daniel 895: * xmlFreeInputStream:
1.127 daniel 896: * @input: an xmlParserInputPtr
1.69 daniel 897: *
898: * Free up an input stream.
899: */
900: void
901: xmlFreeInputStream(xmlParserInputPtr input) {
902: if (input == NULL) return;
903:
1.119 daniel 904: if (input->filename != NULL) xmlFree((char *) input->filename);
905: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 906: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 907: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 908: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 909: input->free((xmlChar *) input->base);
1.93 veillard 910: if (input->buf != NULL)
911: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 912: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 913: xmlFree(input);
1.69 daniel 914: }
915:
916: /**
1.96 daniel 917: * xmlNewInputStream:
918: * @ctxt: an XML parser context
919: *
920: * Create a new input stream structure
921: * Returns the new input stream or NULL
922: */
923: xmlParserInputPtr
924: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
925: xmlParserInputPtr input;
926:
1.119 daniel 927: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 928: if (input == NULL) {
1.123 daniel 929: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 931: ctxt->sax->error(ctxt->userData,
932: "malloc: couldn't allocate a new input stream\n");
1.123 daniel 933: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 934: return(NULL);
935: }
1.165 daniel 936: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 937: input->line = 1;
938: input->col = 1;
1.167 daniel 939: input->standalone = -1;
1.96 daniel 940: return(input);
941: }
942:
943: /**
1.50 daniel 944: * xmlNewEntityInputStream:
945: * @ctxt: an XML parser context
946: * @entity: an Entity pointer
947: *
1.82 daniel 948: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 949: *
950: * Returns the new input stream or NULL
1.45 daniel 951: */
1.50 daniel 952: xmlParserInputPtr
953: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 954: xmlParserInputPtr input;
955:
956: if (entity == NULL) {
1.123 daniel 957: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 958: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 959: ctxt->sax->error(ctxt->userData,
1.45 daniel 960: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 961: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 962: return(NULL);
1.45 daniel 963: }
964: if (entity->content == NULL) {
1.159 daniel 965: switch (entity->etype) {
1.113 daniel 966: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 967: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 968: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
969: ctxt->sax->error(ctxt->userData,
970: "xmlNewEntityInputStream unparsed entity !\n");
971: break;
972: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
973: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 974: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 975: (char *) entity->ExternalID, ctxt));
1.113 daniel 976: case XML_INTERNAL_GENERAL_ENTITY:
977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
978: ctxt->sax->error(ctxt->userData,
979: "Internal entity %s without content !\n", entity->name);
980: break;
981: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 982: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
984: ctxt->sax->error(ctxt->userData,
985: "Internal parameter entity %s without content !\n", entity->name);
986: break;
987: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 988: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
990: ctxt->sax->error(ctxt->userData,
991: "Predefined entity %s without content !\n", entity->name);
992: break;
993: }
1.50 daniel 994: return(NULL);
1.45 daniel 995: }
1.96 daniel 996: input = xmlNewInputStream(ctxt);
1.45 daniel 997: if (input == NULL) {
1.50 daniel 998: return(NULL);
1.45 daniel 999: }
1.156 daniel 1000: input->filename = (char *) entity->SystemID;
1.45 daniel 1001: input->base = entity->content;
1002: input->cur = entity->content;
1.140 daniel 1003: input->length = entity->length;
1.50 daniel 1004: return(input);
1.45 daniel 1005: }
1006:
1.59 daniel 1007: /**
1008: * xmlNewStringInputStream:
1009: * @ctxt: an XML parser context
1.96 daniel 1010: * @buffer: an memory buffer
1.59 daniel 1011: *
1012: * Create a new input stream based on a memory buffer.
1.68 daniel 1013: * Returns the new input stream
1.59 daniel 1014: */
1015: xmlParserInputPtr
1.123 daniel 1016: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1017: xmlParserInputPtr input;
1018:
1.96 daniel 1019: if (buffer == NULL) {
1.123 daniel 1020: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1022: ctxt->sax->error(ctxt->userData,
1.59 daniel 1023: "internal: xmlNewStringInputStream string = NULL\n");
1024: return(NULL);
1025: }
1.96 daniel 1026: input = xmlNewInputStream(ctxt);
1.59 daniel 1027: if (input == NULL) {
1028: return(NULL);
1029: }
1.96 daniel 1030: input->base = buffer;
1031: input->cur = buffer;
1.140 daniel 1032: input->length = xmlStrlen(buffer);
1.59 daniel 1033: return(input);
1034: }
1035:
1.76 daniel 1036: /**
1037: * xmlNewInputFromFile:
1038: * @ctxt: an XML parser context
1039: * @filename: the filename to use as entity
1040: *
1041: * Create a new input stream based on a file.
1042: *
1043: * Returns the new input stream or NULL in case of error
1044: */
1045: xmlParserInputPtr
1.79 daniel 1046: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1047: xmlParserInputBufferPtr buf;
1.76 daniel 1048: xmlParserInputPtr inputStream;
1.111 daniel 1049: char *directory = NULL;
1.76 daniel 1050:
1.96 daniel 1051: if (ctxt == NULL) return(NULL);
1.91 daniel 1052: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1053: if (buf == NULL) {
1.140 daniel 1054: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1055:
1.94 daniel 1056: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1057: #ifdef WIN32
1058: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1059: #else
1060: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1061: #endif
1062: buf = xmlParserInputBufferCreateFilename(name,
1063: XML_CHAR_ENCODING_NONE);
1.106 daniel 1064: if (buf != NULL)
1.142 daniel 1065: directory = xmlParserGetDirectory(name);
1.106 daniel 1066: }
1067: if ((buf == NULL) && (ctxt->directory != NULL)) {
1068: #ifdef WIN32
1069: sprintf(name, "%s\\%s", ctxt->directory, filename);
1070: #else
1071: sprintf(name, "%s/%s", ctxt->directory, filename);
1072: #endif
1073: buf = xmlParserInputBufferCreateFilename(name,
1074: XML_CHAR_ENCODING_NONE);
1075: if (buf != NULL)
1.142 daniel 1076: directory = xmlParserGetDirectory(name);
1.106 daniel 1077: }
1078: if (buf == NULL)
1.94 daniel 1079: return(NULL);
1080: }
1081: if (directory == NULL)
1082: directory = xmlParserGetDirectory(filename);
1.76 daniel 1083:
1.96 daniel 1084: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1085: if (inputStream == NULL) {
1.119 daniel 1086: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1087: return(NULL);
1088: }
1089:
1.119 daniel 1090: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1091: inputStream->directory = directory;
1.91 daniel 1092: inputStream->buf = buf;
1.76 daniel 1093:
1.91 daniel 1094: inputStream->base = inputStream->buf->buffer->content;
1095: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1096: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1097: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1098: return(inputStream);
1099: }
1100:
1.77 daniel 1101: /************************************************************************
1102: * *
1.97 daniel 1103: * Commodity functions to handle parser contexts *
1104: * *
1105: ************************************************************************/
1106:
1107: /**
1108: * xmlInitParserCtxt:
1109: * @ctxt: an XML parser context
1110: *
1111: * Initialize a parser context
1112: */
1113:
1114: void
1115: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1116: {
1117: xmlSAXHandler *sax;
1118:
1.168 daniel 1119: xmlDefaultSAXHandlerInit();
1120:
1.119 daniel 1121: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1122: if (sax == NULL) {
1123: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1124: }
1.180 daniel 1125: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1126:
1127: /* Allocate the Input stack */
1.119 daniel 1128: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1129: ctxt->inputNr = 0;
1130: ctxt->inputMax = 5;
1131: ctxt->input = NULL;
1.165 daniel 1132:
1.97 daniel 1133: ctxt->version = NULL;
1134: ctxt->encoding = NULL;
1135: ctxt->standalone = -1;
1.98 daniel 1136: ctxt->hasExternalSubset = 0;
1137: ctxt->hasPErefs = 0;
1.97 daniel 1138: ctxt->html = 0;
1.98 daniel 1139: ctxt->external = 0;
1.140 daniel 1140: ctxt->instate = XML_PARSER_START;
1.97 daniel 1141: ctxt->token = 0;
1.106 daniel 1142: ctxt->directory = NULL;
1.97 daniel 1143:
1144: /* Allocate the Node stack */
1.119 daniel 1145: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1146: ctxt->nodeNr = 0;
1147: ctxt->nodeMax = 10;
1148: ctxt->node = NULL;
1149:
1.140 daniel 1150: /* Allocate the Name stack */
1151: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1152: ctxt->nameNr = 0;
1153: ctxt->nameMax = 10;
1154: ctxt->name = NULL;
1155:
1.176 daniel 1156: /* Allocate the space stack */
1157: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1158: ctxt->spaceNr = 1;
1159: ctxt->spaceMax = 10;
1160: ctxt->spaceTab[0] = -1;
1161: ctxt->space = &ctxt->spaceTab[0];
1162:
1.160 daniel 1163: if (sax == NULL) {
1164: ctxt->sax = &xmlDefaultSAXHandler;
1165: } else {
1.97 daniel 1166: ctxt->sax = sax;
1167: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1168: }
1169: ctxt->userData = ctxt;
1170: ctxt->myDoc = NULL;
1171: ctxt->wellFormed = 1;
1.99 daniel 1172: ctxt->valid = 1;
1.100 daniel 1173: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1174: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1175: ctxt->vctxt.userData = ctxt;
1.149 daniel 1176: if (ctxt->validate) {
1177: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1178: if (xmlGetWarningsDefaultValue == 0)
1179: ctxt->vctxt.warning = NULL;
1180: else
1181: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1182: /* Allocate the Node stack */
1183: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1184: ctxt->vctxt.nodeNr = 0;
1185: ctxt->vctxt.nodeMax = 4;
1186: ctxt->vctxt.node = NULL;
1.149 daniel 1187: } else {
1188: ctxt->vctxt.error = NULL;
1189: ctxt->vctxt.warning = NULL;
1190: }
1.97 daniel 1191: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1192: ctxt->record_info = 0;
1.135 daniel 1193: ctxt->nbChars = 0;
1.140 daniel 1194: ctxt->checkIndex = 0;
1.180 daniel 1195: ctxt->inSubset = 0;
1.140 daniel 1196: ctxt->errNo = XML_ERR_OK;
1.97 daniel 1197: xmlInitNodeInfoSeq(&ctxt->node_seq);
1198: }
1199:
1200: /**
1201: * xmlFreeParserCtxt:
1202: * @ctxt: an XML parser context
1203: *
1204: * Free all the memory used by a parser context. However the parsed
1205: * document in ctxt->myDoc is not freed.
1206: */
1207:
1208: void
1209: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1210: {
1211: xmlParserInputPtr input;
1.140 daniel 1212: xmlChar *oldname;
1.97 daniel 1213:
1214: if (ctxt == NULL) return;
1215:
1216: while ((input = inputPop(ctxt)) != NULL) {
1217: xmlFreeInputStream(input);
1218: }
1.140 daniel 1219: while ((oldname = namePop(ctxt)) != NULL) {
1220: xmlFree(oldname);
1221: }
1.176 daniel 1222: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1223: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1224: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1225: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1226: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1227: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1228: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1229: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1230: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1231: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1232: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1233: xmlFree(ctxt->sax);
1234: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1235: xmlFree(ctxt);
1.97 daniel 1236: }
1237:
1238: /**
1239: * xmlNewParserCtxt:
1240: *
1241: * Allocate and initialize a new parser context.
1242: *
1243: * Returns the xmlParserCtxtPtr or NULL
1244: */
1245:
1246: xmlParserCtxtPtr
1247: xmlNewParserCtxt()
1248: {
1249: xmlParserCtxtPtr ctxt;
1250:
1.119 daniel 1251: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1252: if (ctxt == NULL) {
1253: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1254: perror("malloc");
1255: return(NULL);
1256: }
1.165 daniel 1257: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1258: xmlInitParserCtxt(ctxt);
1259: return(ctxt);
1260: }
1261:
1262: /**
1263: * xmlClearParserCtxt:
1264: * @ctxt: an XML parser context
1265: *
1266: * Clear (release owned resources) and reinitialize a parser context
1267: */
1268:
1269: void
1270: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1271: {
1272: xmlClearNodeInfoSeq(&ctxt->node_seq);
1273: xmlInitParserCtxt(ctxt);
1274: }
1275:
1276: /************************************************************************
1277: * *
1.77 daniel 1278: * Commodity functions to handle entities *
1279: * *
1280: ************************************************************************/
1281:
1.174 daniel 1282: /**
1283: * xmlCheckEntity:
1284: * @ctxt: an XML parser context
1285: * @content: the entity content string
1286: *
1287: * Parse an entity content and checks the WF constraints
1288: *
1289: */
1290:
1291: void
1292: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1293: }
1.97 daniel 1294:
1295: /**
1296: * xmlParseCharRef:
1297: * @ctxt: an XML parser context
1298: *
1299: * parse Reference declarations
1300: *
1301: * [66] CharRef ::= '&#' [0-9]+ ';' |
1302: * '&#x' [0-9a-fA-F]+ ';'
1303: *
1.98 daniel 1304: * [ WFC: Legal Character ]
1305: * Characters referred to using character references must match the
1306: * production for Char.
1307: *
1.135 daniel 1308: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1309: */
1.97 daniel 1310: int
1311: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1312: int val = 0;
1313:
1.111 daniel 1314: if (ctxt->token != 0) {
1315: val = ctxt->token;
1316: ctxt->token = 0;
1317: return(val);
1318: }
1.152 daniel 1319: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1320: (NXT(2) == 'x')) {
1321: SKIP(3);
1.152 daniel 1322: while (RAW != ';') {
1323: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1324: val = val * 16 + (CUR - '0');
1.152 daniel 1325: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1326: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1327: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1328: val = val * 16 + (CUR - 'A') + 10;
1329: else {
1.123 daniel 1330: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1331: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1332: ctxt->sax->error(ctxt->userData,
1333: "xmlParseCharRef: invalid hexadecimal value\n");
1334: ctxt->wellFormed = 0;
1.180 daniel 1335: ctxt->disableSAX = 1;
1.97 daniel 1336: val = 0;
1337: break;
1338: }
1339: NEXT;
1340: }
1.164 daniel 1341: if (RAW == ';') {
1342: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1343: ctxt->nbChars ++;
1344: ctxt->input->cur++;
1345: }
1.152 daniel 1346: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1347: SKIP(2);
1.152 daniel 1348: while (RAW != ';') {
1349: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1350: val = val * 10 + (CUR - '0');
1351: else {
1.123 daniel 1352: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1353: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1354: ctxt->sax->error(ctxt->userData,
1355: "xmlParseCharRef: invalid decimal value\n");
1356: ctxt->wellFormed = 0;
1.180 daniel 1357: ctxt->disableSAX = 1;
1.97 daniel 1358: val = 0;
1359: break;
1360: }
1361: NEXT;
1362: }
1.164 daniel 1363: if (RAW == ';') {
1364: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1365: ctxt->nbChars ++;
1366: ctxt->input->cur++;
1367: }
1.97 daniel 1368: } else {
1.123 daniel 1369: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1370: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1371: ctxt->sax->error(ctxt->userData,
1372: "xmlParseCharRef: invalid value\n");
1.97 daniel 1373: ctxt->wellFormed = 0;
1.180 daniel 1374: ctxt->disableSAX = 1;
1.97 daniel 1375: }
1.98 daniel 1376:
1.97 daniel 1377: /*
1.98 daniel 1378: * [ WFC: Legal Character ]
1379: * Characters referred to using character references must match the
1380: * production for Char.
1.97 daniel 1381: */
1382: if (IS_CHAR(val)) {
1383: return(val);
1384: } else {
1.123 daniel 1385: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1386: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1387: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1388: val);
1389: ctxt->wellFormed = 0;
1.180 daniel 1390: ctxt->disableSAX = 1;
1.97 daniel 1391: }
1392: return(0);
1.77 daniel 1393: }
1394:
1.96 daniel 1395: /**
1.135 daniel 1396: * xmlParseStringCharRef:
1397: * @ctxt: an XML parser context
1398: * @str: a pointer to an index in the string
1399: *
1400: * parse Reference declarations, variant parsing from a string rather
1401: * than an an input flow.
1402: *
1403: * [66] CharRef ::= '&#' [0-9]+ ';' |
1404: * '&#x' [0-9a-fA-F]+ ';'
1405: *
1406: * [ WFC: Legal Character ]
1407: * Characters referred to using character references must match the
1408: * production for Char.
1409: *
1410: * Returns the value parsed (as an int), 0 in case of error, str will be
1411: * updated to the current value of the index
1412: */
1413: int
1414: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1415: const xmlChar *ptr;
1416: xmlChar cur;
1417: int val = 0;
1418:
1419: if ((str == NULL) || (*str == NULL)) return(0);
1420: ptr = *str;
1421: cur = *ptr;
1.137 daniel 1422: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1423: ptr += 3;
1424: cur = *ptr;
1425: while (cur != ';') {
1426: if ((cur >= '0') && (cur <= '9'))
1427: val = val * 16 + (cur - '0');
1428: else if ((cur >= 'a') && (cur <= 'f'))
1429: val = val * 16 + (cur - 'a') + 10;
1430: else if ((cur >= 'A') && (cur <= 'F'))
1431: val = val * 16 + (cur - 'A') + 10;
1432: else {
1433: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1434: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1435: ctxt->sax->error(ctxt->userData,
1436: "xmlParseCharRef: invalid hexadecimal value\n");
1437: ctxt->wellFormed = 0;
1.180 daniel 1438: ctxt->disableSAX = 1;
1.135 daniel 1439: val = 0;
1440: break;
1441: }
1442: ptr++;
1443: cur = *ptr;
1444: }
1445: if (cur == ';')
1446: ptr++;
1.145 daniel 1447: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1448: ptr += 2;
1449: cur = *ptr;
1450: while (cur != ';') {
1451: if ((cur >= '0') && (cur <= '9'))
1452: val = val * 10 + (cur - '0');
1453: else {
1454: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1456: ctxt->sax->error(ctxt->userData,
1457: "xmlParseCharRef: invalid decimal value\n");
1458: ctxt->wellFormed = 0;
1.180 daniel 1459: ctxt->disableSAX = 1;
1.135 daniel 1460: val = 0;
1461: break;
1462: }
1463: ptr++;
1464: cur = *ptr;
1465: }
1466: if (cur == ';')
1467: ptr++;
1468: } else {
1469: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1470: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1471: ctxt->sax->error(ctxt->userData,
1472: "xmlParseCharRef: invalid value\n");
1473: ctxt->wellFormed = 0;
1.180 daniel 1474: ctxt->disableSAX = 1;
1.135 daniel 1475: return(0);
1476: }
1477: *str = ptr;
1478:
1479: /*
1480: * [ WFC: Legal Character ]
1481: * Characters referred to using character references must match the
1482: * production for Char.
1483: */
1484: if (IS_CHAR(val)) {
1485: return(val);
1486: } else {
1487: ctxt->errNo = XML_ERR_INVALID_CHAR;
1488: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1489: ctxt->sax->error(ctxt->userData,
1490: "CharRef: invalid xmlChar value %d\n", val);
1491: ctxt->wellFormed = 0;
1.180 daniel 1492: ctxt->disableSAX = 1;
1.135 daniel 1493: }
1494: return(0);
1495: }
1496:
1497: /**
1.96 daniel 1498: * xmlParserHandleReference:
1499: * @ctxt: the parser context
1500: *
1.97 daniel 1501: * [67] Reference ::= EntityRef | CharRef
1502: *
1.96 daniel 1503: * [68] EntityRef ::= '&' Name ';'
1504: *
1.98 daniel 1505: * [ WFC: Entity Declared ]
1506: * the Name given in the entity reference must match that in an entity
1507: * declaration, except that well-formed documents need not declare any
1508: * of the following entities: amp, lt, gt, apos, quot.
1509: *
1510: * [ WFC: Parsed Entity ]
1511: * An entity reference must not contain the name of an unparsed entity
1512: *
1.97 daniel 1513: * [66] CharRef ::= '&#' [0-9]+ ';' |
1514: * '&#x' [0-9a-fA-F]+ ';'
1515: *
1.96 daniel 1516: * A PEReference may have been detectect in the current input stream
1517: * the handling is done accordingly to
1518: * http://www.w3.org/TR/REC-xml#entproc
1519: */
1520: void
1521: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1522: xmlParserInputPtr input;
1.123 daniel 1523: xmlChar *name;
1.97 daniel 1524: xmlEntityPtr ent = NULL;
1525:
1.126 daniel 1526: if (ctxt->token != 0) {
1527: return;
1528: }
1.152 daniel 1529: if (RAW != '&') return;
1.97 daniel 1530: GROW;
1.152 daniel 1531: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1532: switch(ctxt->instate) {
1.140 daniel 1533: case XML_PARSER_ENTITY_DECL:
1534: case XML_PARSER_PI:
1.109 daniel 1535: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1536: case XML_PARSER_COMMENT:
1.168 daniel 1537: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1538: /* we just ignore it there */
1539: return;
1540: case XML_PARSER_START_TAG:
1.109 daniel 1541: return;
1.140 daniel 1542: case XML_PARSER_END_TAG:
1.97 daniel 1543: return;
1544: case XML_PARSER_EOF:
1.123 daniel 1545: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1546: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1547: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1548: ctxt->wellFormed = 0;
1.180 daniel 1549: ctxt->disableSAX = 1;
1.97 daniel 1550: return;
1551: case XML_PARSER_PROLOG:
1.140 daniel 1552: case XML_PARSER_START:
1553: case XML_PARSER_MISC:
1.123 daniel 1554: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1555: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1556: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1557: ctxt->wellFormed = 0;
1.180 daniel 1558: ctxt->disableSAX = 1;
1.97 daniel 1559: return;
1560: case XML_PARSER_EPILOG:
1.123 daniel 1561: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1562: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1563: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1564: ctxt->wellFormed = 0;
1.180 daniel 1565: ctxt->disableSAX = 1;
1.97 daniel 1566: return;
1567: case XML_PARSER_DTD:
1.123 daniel 1568: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1569: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1570: ctxt->sax->error(ctxt->userData,
1571: "CharRef are forbiden in DTDs!\n");
1572: ctxt->wellFormed = 0;
1.180 daniel 1573: ctxt->disableSAX = 1;
1.97 daniel 1574: return;
1575: case XML_PARSER_ENTITY_VALUE:
1576: /*
1577: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1578: * substitution here since we need the literal
1.97 daniel 1579: * entity value to be able to save the internal
1580: * subset of the document.
1581: * This will be handled by xmlDecodeEntities
1582: */
1583: return;
1584: case XML_PARSER_CONTENT:
1585: case XML_PARSER_ATTRIBUTE_VALUE:
1586: ctxt->token = xmlParseCharRef(ctxt);
1587: return;
1588: }
1589: return;
1590: }
1591:
1592: switch(ctxt->instate) {
1.109 daniel 1593: case XML_PARSER_CDATA_SECTION:
1594: return;
1.140 daniel 1595: case XML_PARSER_PI:
1.97 daniel 1596: case XML_PARSER_COMMENT:
1.168 daniel 1597: case XML_PARSER_SYSTEM_LITERAL:
1598: case XML_PARSER_CONTENT:
1.97 daniel 1599: return;
1.140 daniel 1600: case XML_PARSER_START_TAG:
1601: return;
1602: case XML_PARSER_END_TAG:
1603: return;
1.97 daniel 1604: case XML_PARSER_EOF:
1.123 daniel 1605: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1606: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1607: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1608: ctxt->wellFormed = 0;
1.180 daniel 1609: ctxt->disableSAX = 1;
1.97 daniel 1610: return;
1611: case XML_PARSER_PROLOG:
1.140 daniel 1612: case XML_PARSER_START:
1613: case XML_PARSER_MISC:
1.123 daniel 1614: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1616: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1617: ctxt->wellFormed = 0;
1.180 daniel 1618: ctxt->disableSAX = 1;
1.97 daniel 1619: return;
1620: case XML_PARSER_EPILOG:
1.123 daniel 1621: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1622: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1623: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1624: ctxt->wellFormed = 0;
1.180 daniel 1625: ctxt->disableSAX = 1;
1.97 daniel 1626: return;
1627: case XML_PARSER_ENTITY_VALUE:
1628: /*
1629: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1630: * substitution here since we need the literal
1.97 daniel 1631: * entity value to be able to save the internal
1632: * subset of the document.
1633: * This will be handled by xmlDecodeEntities
1634: */
1635: return;
1636: case XML_PARSER_ATTRIBUTE_VALUE:
1637: /*
1638: * NOTE: in the case of attributes values, we don't do the
1639: * substitution here unless we are in a mode where
1640: * the parser is explicitely asked to substitute
1641: * entities. The SAX callback is called with values
1642: * without entity substitution.
1643: * This will then be handled by xmlDecodeEntities
1644: */
1.113 daniel 1645: return;
1.97 daniel 1646: case XML_PARSER_ENTITY_DECL:
1647: /*
1648: * we just ignore it there
1649: * the substitution will be done once the entity is referenced
1650: */
1651: return;
1652: case XML_PARSER_DTD:
1.123 daniel 1653: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1654: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655: ctxt->sax->error(ctxt->userData,
1656: "Entity references are forbiden in DTDs!\n");
1657: ctxt->wellFormed = 0;
1.180 daniel 1658: ctxt->disableSAX = 1;
1.97 daniel 1659: return;
1660: }
1661:
1662: NEXT;
1663: name = xmlScanName(ctxt);
1664: if (name == NULL) {
1.123 daniel 1665: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1666: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1667: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1668: ctxt->wellFormed = 0;
1.180 daniel 1669: ctxt->disableSAX = 1;
1.97 daniel 1670: ctxt->token = '&';
1671: return;
1672: }
1673: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1674: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1675: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1676: ctxt->sax->error(ctxt->userData,
1677: "Entity reference: ';' expected\n");
1678: ctxt->wellFormed = 0;
1.180 daniel 1679: ctxt->disableSAX = 1;
1.97 daniel 1680: ctxt->token = '&';
1.119 daniel 1681: xmlFree(name);
1.97 daniel 1682: return;
1683: }
1684: SKIP(xmlStrlen(name) + 1);
1685: if (ctxt->sax != NULL) {
1686: if (ctxt->sax->getEntity != NULL)
1687: ent = ctxt->sax->getEntity(ctxt->userData, name);
1688: }
1.98 daniel 1689:
1690: /*
1691: * [ WFC: Entity Declared ]
1692: * the Name given in the entity reference must match that in an entity
1693: * declaration, except that well-formed documents need not declare any
1694: * of the following entities: amp, lt, gt, apos, quot.
1695: */
1.97 daniel 1696: if (ent == NULL)
1697: ent = xmlGetPredefinedEntity(name);
1698: if (ent == NULL) {
1.123 daniel 1699: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1701: ctxt->sax->error(ctxt->userData,
1.98 daniel 1702: "Entity reference: entity %s not declared\n",
1703: name);
1.97 daniel 1704: ctxt->wellFormed = 0;
1.180 daniel 1705: ctxt->disableSAX = 1;
1.119 daniel 1706: xmlFree(name);
1.97 daniel 1707: return;
1708: }
1.98 daniel 1709:
1710: /*
1711: * [ WFC: Parsed Entity ]
1712: * An entity reference must not contain the name of an unparsed entity
1713: */
1.159 daniel 1714: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1715: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1716: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1717: ctxt->sax->error(ctxt->userData,
1718: "Entity reference to unparsed entity %s\n", name);
1719: ctxt->wellFormed = 0;
1.180 daniel 1720: ctxt->disableSAX = 1;
1.98 daniel 1721: }
1722:
1.159 daniel 1723: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1724: ctxt->token = ent->content[0];
1.119 daniel 1725: xmlFree(name);
1.97 daniel 1726: return;
1727: }
1728: input = xmlNewEntityInputStream(ctxt, ent);
1729: xmlPushInput(ctxt, input);
1.119 daniel 1730: xmlFree(name);
1.96 daniel 1731: return;
1732: }
1733:
1734: /**
1735: * xmlParserHandlePEReference:
1736: * @ctxt: the parser context
1737: *
1738: * [69] PEReference ::= '%' Name ';'
1739: *
1.98 daniel 1740: * [ WFC: No Recursion ]
1741: * TODO A parsed entity must not contain a recursive
1742: * reference to itself, either directly or indirectly.
1743: *
1744: * [ WFC: Entity Declared ]
1745: * In a document without any DTD, a document with only an internal DTD
1746: * subset which contains no parameter entity references, or a document
1747: * with "standalone='yes'", ... ... The declaration of a parameter
1748: * entity must precede any reference to it...
1749: *
1750: * [ VC: Entity Declared ]
1751: * In a document with an external subset or external parameter entities
1752: * with "standalone='no'", ... ... The declaration of a parameter entity
1753: * must precede any reference to it...
1754: *
1755: * [ WFC: In DTD ]
1756: * Parameter-entity references may only appear in the DTD.
1757: * NOTE: misleading but this is handled.
1758: *
1759: * A PEReference may have been detected in the current input stream
1.96 daniel 1760: * the handling is done accordingly to
1761: * http://www.w3.org/TR/REC-xml#entproc
1762: * i.e.
1763: * - Included in literal in entity values
1764: * - Included as Paraemeter Entity reference within DTDs
1765: */
1766: void
1767: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1768: xmlChar *name;
1.96 daniel 1769: xmlEntityPtr entity = NULL;
1770: xmlParserInputPtr input;
1771:
1.126 daniel 1772: if (ctxt->token != 0) {
1773: return;
1774: }
1.152 daniel 1775: if (RAW != '%') return;
1.96 daniel 1776: switch(ctxt->instate) {
1.109 daniel 1777: case XML_PARSER_CDATA_SECTION:
1778: return;
1.97 daniel 1779: case XML_PARSER_COMMENT:
1780: return;
1.140 daniel 1781: case XML_PARSER_START_TAG:
1782: return;
1783: case XML_PARSER_END_TAG:
1784: return;
1.96 daniel 1785: case XML_PARSER_EOF:
1.123 daniel 1786: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1787: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1788: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1789: ctxt->wellFormed = 0;
1.180 daniel 1790: ctxt->disableSAX = 1;
1.96 daniel 1791: return;
1792: case XML_PARSER_PROLOG:
1.140 daniel 1793: case XML_PARSER_START:
1794: case XML_PARSER_MISC:
1.123 daniel 1795: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1796: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1797: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1798: ctxt->wellFormed = 0;
1.180 daniel 1799: ctxt->disableSAX = 1;
1.96 daniel 1800: return;
1.97 daniel 1801: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1802: case XML_PARSER_CONTENT:
1803: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1804: case XML_PARSER_PI:
1.168 daniel 1805: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1806: /* we just ignore it there */
1807: return;
1808: case XML_PARSER_EPILOG:
1.123 daniel 1809: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1810: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1811: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1812: ctxt->wellFormed = 0;
1.180 daniel 1813: ctxt->disableSAX = 1;
1.96 daniel 1814: return;
1.97 daniel 1815: case XML_PARSER_ENTITY_VALUE:
1816: /*
1817: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1818: * substitution here since we need the literal
1.97 daniel 1819: * entity value to be able to save the internal
1820: * subset of the document.
1821: * This will be handled by xmlDecodeEntities
1822: */
1823: return;
1.96 daniel 1824: case XML_PARSER_DTD:
1.98 daniel 1825: /*
1826: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1827: * In the internal DTD subset, parameter-entity references
1828: * can occur only where markup declarations can occur, not
1829: * within markup declarations.
1830: * In that case this is handled in xmlParseMarkupDecl
1831: */
1832: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1833: return;
1.96 daniel 1834: }
1835:
1836: NEXT;
1837: name = xmlParseName(ctxt);
1838: if (name == NULL) {
1.123 daniel 1839: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1840: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1841: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1842: ctxt->wellFormed = 0;
1.180 daniel 1843: ctxt->disableSAX = 1;
1.96 daniel 1844: } else {
1.152 daniel 1845: if (RAW == ';') {
1.96 daniel 1846: NEXT;
1.98 daniel 1847: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1848: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1849: if (entity == NULL) {
1.98 daniel 1850:
1851: /*
1852: * [ WFC: Entity Declared ]
1853: * In a document without any DTD, a document with only an
1854: * internal DTD subset which contains no parameter entity
1855: * references, or a document with "standalone='yes'", ...
1856: * ... The declaration of a parameter entity must precede
1857: * any reference to it...
1858: */
1859: if ((ctxt->standalone == 1) ||
1860: ((ctxt->hasExternalSubset == 0) &&
1861: (ctxt->hasPErefs == 0))) {
1862: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1863: ctxt->sax->error(ctxt->userData,
1864: "PEReference: %%%s; not found\n", name);
1865: ctxt->wellFormed = 0;
1.180 daniel 1866: ctxt->disableSAX = 1;
1.98 daniel 1867: } else {
1868: /*
1869: * [ VC: Entity Declared ]
1870: * In a document with an external subset or external
1871: * parameter entities with "standalone='no'", ...
1872: * ... The declaration of a parameter entity must precede
1873: * any reference to it...
1874: */
1875: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1876: ctxt->sax->warning(ctxt->userData,
1877: "PEReference: %%%s; not found\n", name);
1878: ctxt->valid = 0;
1879: }
1.96 daniel 1880: } else {
1.159 daniel 1881: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1882: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1883: /*
1.156 daniel 1884: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1885: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1886: */
1887: input = xmlNewEntityInputStream(ctxt, entity);
1888: xmlPushInput(ctxt, input);
1.164 daniel 1889: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1890: (RAW == '<') && (NXT(1) == '?') &&
1891: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1892: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1893: xmlParseTextDecl(ctxt);
1.164 daniel 1894: }
1895: if (ctxt->token == 0)
1896: ctxt->token = ' ';
1.96 daniel 1897: } else {
1898: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1899: ctxt->sax->error(ctxt->userData,
1900: "xmlHandlePEReference: %s is not a parameter entity\n",
1901: name);
1902: ctxt->wellFormed = 0;
1.180 daniel 1903: ctxt->disableSAX = 1;
1.96 daniel 1904: }
1905: }
1906: } else {
1.123 daniel 1907: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1908: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1909: ctxt->sax->error(ctxt->userData,
1910: "xmlHandlePEReference: expecting ';'\n");
1911: ctxt->wellFormed = 0;
1.180 daniel 1912: ctxt->disableSAX = 1;
1.96 daniel 1913: }
1.119 daniel 1914: xmlFree(name);
1.97 daniel 1915: }
1916: }
1917:
1918: /*
1919: * Macro used to grow the current buffer.
1920: */
1921: #define growBuffer(buffer) { \
1922: buffer##_size *= 2; \
1.145 daniel 1923: buffer = (xmlChar *) \
1924: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1925: if (buffer == NULL) { \
1926: perror("realloc failed"); \
1.145 daniel 1927: return(NULL); \
1.97 daniel 1928: } \
1.96 daniel 1929: }
1.77 daniel 1930:
1931: /**
1932: * xmlDecodeEntities:
1933: * @ctxt: the parser context
1934: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1935: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1936: * @end: an end marker xmlChar, 0 if none
1937: * @end2: an end marker xmlChar, 0 if none
1938: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1939: *
1940: * [67] Reference ::= EntityRef | CharRef
1941: *
1942: * [69] PEReference ::= '%' Name ';'
1943: *
1944: * Returns A newly allocated string with the substitution done. The caller
1945: * must deallocate it !
1946: */
1.123 daniel 1947: xmlChar *
1.77 daniel 1948: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1949: xmlChar end, xmlChar end2, xmlChar end3) {
1950: xmlChar *buffer = NULL;
1.78 daniel 1951: int buffer_size = 0;
1.161 daniel 1952: int nbchars = 0;
1.78 daniel 1953:
1.123 daniel 1954: xmlChar *current = NULL;
1.77 daniel 1955: xmlEntityPtr ent;
1956: unsigned int max = (unsigned int) len;
1.161 daniel 1957: int c,l;
1.77 daniel 1958:
1959: /*
1960: * allocate a translation buffer.
1961: */
1.140 daniel 1962: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 1963: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1964: if (buffer == NULL) {
1965: perror("xmlDecodeEntities: malloc failed");
1966: return(NULL);
1967: }
1968:
1.78 daniel 1969: /*
1970: * Ok loop until we reach one of the ending char or a size limit.
1971: */
1.161 daniel 1972: c = CUR_CHAR(l);
1973: while ((nbchars < max) && (c != end) &&
1974: (c != end2) && (c != end3)) {
1.77 daniel 1975:
1.161 daniel 1976: if (c == 0) break;
1977: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 1978: int val = xmlParseCharRef(ctxt);
1.161 daniel 1979: COPY_BUF(0,buffer,nbchars,val);
1980: NEXTL(l);
1981: } else if ((c == '&') && (ctxt->token != '&') &&
1982: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 1983: ent = xmlParseEntityRef(ctxt);
1984: if ((ent != NULL) &&
1985: (ctxt->replaceEntities != 0)) {
1986: current = ent->content;
1987: while (*current != 0) {
1.161 daniel 1988: buffer[nbchars++] = *current++;
1989: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1990: growBuffer(buffer);
1.77 daniel 1991: }
1992: }
1.98 daniel 1993: } else if (ent != NULL) {
1.123 daniel 1994: const xmlChar *cur = ent->name;
1.98 daniel 1995:
1.161 daniel 1996: buffer[nbchars++] = '&';
1997: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1998: growBuffer(buffer);
1999: }
1.161 daniel 2000: while (*cur != 0) {
2001: buffer[nbchars++] = *cur++;
2002: }
2003: buffer[nbchars++] = ';';
1.77 daniel 2004: }
1.161 daniel 2005: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2006: /*
1.77 daniel 2007: * a PEReference induce to switch the entity flow,
2008: * we break here to flush the current set of chars
2009: * parsed if any. We will be called back later.
1.97 daniel 2010: */
1.91 daniel 2011: if (nbchars != 0) break;
1.77 daniel 2012:
2013: xmlParsePEReference(ctxt);
1.79 daniel 2014:
1.97 daniel 2015: /*
1.79 daniel 2016: * Pop-up of finished entities.
1.97 daniel 2017: */
1.152 daniel 2018: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2019: xmlPopInput(ctxt);
2020:
1.98 daniel 2021: break;
1.77 daniel 2022: } else {
1.161 daniel 2023: COPY_BUF(l,buffer,nbchars,c);
2024: NEXTL(l);
2025: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2026: growBuffer(buffer);
2027: }
1.77 daniel 2028: }
1.161 daniel 2029: c = CUR_CHAR(l);
1.77 daniel 2030: }
1.161 daniel 2031: buffer[nbchars++] = 0;
1.77 daniel 2032: return(buffer);
2033: }
2034:
1.135 daniel 2035: /**
2036: * xmlStringDecodeEntities:
2037: * @ctxt: the parser context
2038: * @str: the input string
2039: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2040: * @end: an end marker xmlChar, 0 if none
2041: * @end2: an end marker xmlChar, 0 if none
2042: * @end3: an end marker xmlChar, 0 if none
2043: *
2044: * [67] Reference ::= EntityRef | CharRef
2045: *
2046: * [69] PEReference ::= '%' Name ';'
2047: *
2048: * Returns A newly allocated string with the substitution done. The caller
2049: * must deallocate it !
2050: */
2051: xmlChar *
2052: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2053: xmlChar end, xmlChar end2, xmlChar end3) {
2054: xmlChar *buffer = NULL;
2055: int buffer_size = 0;
2056:
2057: xmlChar *current = NULL;
2058: xmlEntityPtr ent;
1.176 daniel 2059: int c,l;
2060: int nbchars = 0;
1.135 daniel 2061:
2062: /*
2063: * allocate a translation buffer.
2064: */
1.140 daniel 2065: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2066: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2067: if (buffer == NULL) {
2068: perror("xmlDecodeEntities: malloc failed");
2069: return(NULL);
2070: }
2071:
2072: /*
2073: * Ok loop until we reach one of the ending char or a size limit.
2074: */
1.176 daniel 2075: c = CUR_SCHAR(str, l);
2076: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2077:
1.176 daniel 2078: if (c == 0) break;
2079: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2080: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2081: if (val != 0) {
2082: COPY_BUF(0,buffer,nbchars,val);
2083: }
2084: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2085: ent = xmlParseStringEntityRef(ctxt, &str);
2086: if ((ent != NULL) &&
2087: (ctxt->replaceEntities != 0)) {
2088: current = ent->content;
2089: while (*current != 0) {
1.176 daniel 2090: buffer[nbchars++] = *current++;
2091: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2092: growBuffer(buffer);
2093: }
2094: }
2095: } else if (ent != NULL) {
2096: int i = xmlStrlen(ent->name);
2097: const xmlChar *cur = ent->name;
2098:
1.176 daniel 2099: buffer[nbchars++] = '&';
2100: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2101: growBuffer(buffer);
2102: }
2103: for (;i > 0;i--)
1.176 daniel 2104: buffer[nbchars++] = *cur++;
2105: buffer[nbchars++] = ';';
1.135 daniel 2106: }
1.176 daniel 2107: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2108: ent = xmlParseStringPEReference(ctxt, &str);
2109: if (ent != NULL) {
2110: current = ent->content;
2111: while (*current != 0) {
1.176 daniel 2112: buffer[nbchars++] = *current++;
2113: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2114: growBuffer(buffer);
2115: }
2116: }
2117: }
2118: } else {
1.176 daniel 2119: COPY_BUF(l,buffer,nbchars,c);
2120: str += l;
2121: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2122: growBuffer(buffer);
2123: }
2124: }
1.176 daniel 2125: c = CUR_SCHAR(str, l);
1.135 daniel 2126: }
1.176 daniel 2127: buffer[nbchars++] = 0;
1.135 daniel 2128: return(buffer);
2129: }
2130:
1.1 veillard 2131:
1.28 daniel 2132: /************************************************************************
2133: * *
1.75 daniel 2134: * Commodity functions to handle encodings *
2135: * *
2136: ************************************************************************/
2137:
1.172 daniel 2138: /*
2139: * xmlCheckLanguageID
2140: * @lang: pointer to the string value
2141: *
2142: * Checks that the value conforms to the LanguageID production:
2143: *
2144: * [33] LanguageID ::= Langcode ('-' Subcode)*
2145: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2146: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2147: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2148: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2149: * [38] Subcode ::= ([a-z] | [A-Z])+
2150: *
2151: * Returns 1 if correct 0 otherwise
2152: **/
2153: int
2154: xmlCheckLanguageID(const xmlChar *lang) {
2155: const xmlChar *cur = lang;
2156:
2157: if (cur == NULL)
2158: return(0);
2159: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2160: ((cur[0] == 'I') && (cur[1] == '-'))) {
2161: /*
2162: * IANA code
2163: */
2164: cur += 2;
2165: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2166: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2167: cur++;
2168: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2169: ((cur[0] == 'X') && (cur[1] == '-'))) {
2170: /*
2171: * User code
2172: */
2173: cur += 2;
2174: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2175: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2176: cur++;
2177: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2178: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2179: /*
2180: * ISO639
2181: */
2182: cur++;
2183: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2184: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2185: cur++;
2186: else
2187: return(0);
2188: } else
2189: return(0);
2190: while (cur[0] != 0) {
2191: if (cur[0] != '-')
2192: return(0);
2193: cur++;
2194: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2195: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2196: cur++;
2197: else
2198: return(0);
2199: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2200: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2201: cur++;
2202: }
2203: return(1);
2204: }
2205:
1.75 daniel 2206: /**
2207: * xmlSwitchEncoding:
2208: * @ctxt: the parser context
1.124 daniel 2209: * @enc: the encoding value (number)
1.75 daniel 2210: *
2211: * change the input functions when discovering the character encoding
2212: * of a given entity.
2213: */
2214: void
2215: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2216: {
1.156 daniel 2217: xmlCharEncodingHandlerPtr handler;
2218:
2219: handler = xmlGetCharEncodingHandler(enc);
2220: if (handler != NULL) {
2221: if (ctxt->input != NULL) {
2222: if (ctxt->input->buf != NULL) {
2223: if (ctxt->input->buf->encoder != NULL) {
2224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2225: ctxt->sax->error(ctxt->userData,
2226: "xmlSwitchEncoding : encoder already regitered\n");
2227: return;
2228: }
2229: ctxt->input->buf->encoder = handler;
2230:
2231: /*
2232: * Is there already some content down the pipe to convert
2233: */
2234: if ((ctxt->input->buf->buffer != NULL) &&
2235: (ctxt->input->buf->buffer->use > 0)) {
2236: xmlChar *buf;
2237: int res, len, size;
2238: int processed;
2239:
2240: /*
2241: * Specific handling of the Byte Order Mark for
2242: * UTF-16
2243: */
2244: if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
2245: (ctxt->input->cur[0] == 0xFF) &&
2246: (ctxt->input->cur[1] == 0xFE)) {
2247: SKIP(2);
2248: }
2249: if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
2250: (ctxt->input->cur[0] == 0xFE) &&
2251: (ctxt->input->cur[1] == 0xFF)) {
2252: SKIP(2);
2253: }
2254:
2255: /*
2256: * convert the non processed part
2257: */
2258: processed = ctxt->input->cur - ctxt->input->base;
2259: len = ctxt->input->buf->buffer->use - processed;
2260:
2261: if (len <= 0) {
2262: return;
2263: }
2264: size = ctxt->input->buf->buffer->use * 4;
2265: if (size < 4000)
2266: size = 4000;
1.167 daniel 2267: retry_larger:
1.160 daniel 2268: buf = (xmlChar *) xmlMalloc(size + 1);
1.156 daniel 2269: if (buf == NULL) {
2270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2271: ctxt->sax->error(ctxt->userData,
2272: "xmlSwitchEncoding : out of memory\n");
2273: return;
2274: }
1.160 daniel 2275: /* TODO !!! Handling of buf too small */
1.156 daniel 2276: res = handler->input(buf, size, ctxt->input->cur, &len);
1.167 daniel 2277: if (res == -1) {
2278: size *= 2;
2279: xmlFree(buf);
2280: goto retry_larger;
2281: }
1.156 daniel 2282: if ((res < 0) ||
2283: (len != ctxt->input->buf->buffer->use - processed)) {
2284: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2285: ctxt->sax->error(ctxt->userData,
2286: "xmlSwitchEncoding : conversion failed\n");
2287: xmlFree(buf);
2288: return;
2289: }
1.167 daniel 2290:
1.156 daniel 2291: /*
2292: * Conversion succeeded, get rid of the old buffer
2293: */
2294: xmlFree(ctxt->input->buf->buffer->content);
2295: ctxt->input->buf->buffer->content = buf;
2296: ctxt->input->base = buf;
2297: ctxt->input->cur = buf;
2298: ctxt->input->buf->buffer->size = size;
2299: ctxt->input->buf->buffer->use = res;
1.160 daniel 2300: buf[res] = 0;
1.156 daniel 2301: }
2302: return;
2303: } else {
2304: if (ctxt->input->length == 0) {
2305: /*
2306: * When parsing a static memory array one must know the
2307: * size to be able to convert the buffer.
2308: */
2309: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2310: ctxt->sax->error(ctxt->userData,
2311: "xmlSwitchEncoding : no input\n");
2312: return;
2313: } else {
2314: xmlChar *buf;
2315: int res, len;
2316: int processed = ctxt->input->cur - ctxt->input->base;
2317:
2318: /*
2319: * convert the non processed part
2320: */
2321: len = ctxt->input->length - processed;
2322: if (len <= 0) {
2323: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2324: ctxt->sax->error(ctxt->userData,
2325: "xmlSwitchEncoding : input fully consumed?\n");
2326: return;
2327: }
2328: buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
2329: if (buf == NULL) {
2330: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331: ctxt->sax->error(ctxt->userData,
2332: "xmlSwitchEncoding : out of memory\n");
2333: return;
2334: }
2335: res = handler->input(buf, ctxt->input->length * 4,
2336: ctxt->input->cur, &len);
2337: if ((res < 0) ||
2338: (len != ctxt->input->length - processed)) {
2339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2340: ctxt->sax->error(ctxt->userData,
2341: "xmlSwitchEncoding : conversion failed\n");
2342: xmlFree(buf);
2343: return;
2344: }
2345: /*
2346: * Conversion succeeded, get rid of the old buffer
2347: */
2348: if ((ctxt->input->free != NULL) &&
2349: (ctxt->input->base != NULL))
2350: ctxt->input->free((xmlChar *) ctxt->input->base);
2351: ctxt->input->base = ctxt->input->cur = buf;
2352: ctxt->input->length = res;
2353: }
2354: }
2355: } else {
2356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2357: ctxt->sax->error(ctxt->userData,
2358: "xmlSwitchEncoding : no input\n");
2359: }
2360: }
2361:
1.75 daniel 2362: switch (enc) {
2363: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 2364: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 2365: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2366: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2367: ctxt->wellFormed = 0;
1.180 daniel 2368: ctxt->disableSAX = 1;
1.75 daniel 2369: break;
2370: case XML_CHAR_ENCODING_NONE:
2371: /* let's assume it's UTF-8 without the XML decl */
2372: return;
2373: case XML_CHAR_ENCODING_UTF8:
2374: /* default encoding, no conversion should be needed */
2375: return;
2376: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 2377: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2378: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2379: ctxt->sax->error(ctxt->userData,
2380: "char encoding UTF16 little endian not supported\n");
2381: break;
2382: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 2383: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2384: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2385: ctxt->sax->error(ctxt->userData,
2386: "char encoding UTF16 big endian not supported\n");
2387: break;
2388: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 2389: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2390: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2391: ctxt->sax->error(ctxt->userData,
2392: "char encoding USC4 little endian not supported\n");
2393: break;
2394: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 2395: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2397: ctxt->sax->error(ctxt->userData,
2398: "char encoding USC4 big endian not supported\n");
2399: break;
2400: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 2401: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2403: ctxt->sax->error(ctxt->userData,
2404: "char encoding EBCDIC not supported\n");
2405: break;
2406: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 2407: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2408: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2409: ctxt->sax->error(ctxt->userData,
2410: "char encoding UCS4 2143 not supported\n");
2411: break;
2412: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 2413: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2414: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2415: ctxt->sax->error(ctxt->userData,
2416: "char encoding UCS4 3412 not supported\n");
2417: break;
2418: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 2419: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2420: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2421: ctxt->sax->error(ctxt->userData,
2422: "char encoding UCS2 not supported\n");
2423: break;
2424: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 2425: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2426: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2427: ctxt->sax->error(ctxt->userData,
2428: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2429: break;
2430: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 2431: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2433: ctxt->sax->error(ctxt->userData,
2434: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2435: break;
2436: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 2437: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2438: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2439: ctxt->sax->error(ctxt->userData,
2440: "char encoding ISO_8859_3 not supported\n");
2441: break;
2442: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 2443: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2444: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2445: ctxt->sax->error(ctxt->userData,
2446: "char encoding ISO_8859_4 not supported\n");
2447: break;
2448: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 2449: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2450: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2451: ctxt->sax->error(ctxt->userData,
2452: "char encoding ISO_8859_5 not supported\n");
2453: break;
2454: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 2455: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2456: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2457: ctxt->sax->error(ctxt->userData,
2458: "char encoding ISO_8859_6 not supported\n");
2459: break;
2460: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 2461: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2462: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2463: ctxt->sax->error(ctxt->userData,
2464: "char encoding ISO_8859_7 not supported\n");
2465: break;
2466: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 2467: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2468: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2469: ctxt->sax->error(ctxt->userData,
2470: "char encoding ISO_8859_8 not supported\n");
2471: break;
2472: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 2473: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2474: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2475: ctxt->sax->error(ctxt->userData,
2476: "char encoding ISO_8859_9 not supported\n");
2477: break;
2478: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 2479: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2480: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2481: ctxt->sax->error(ctxt->userData,
2482: "char encoding ISO-2022-JPnot supported\n");
2483: break;
2484: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 2485: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2486: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2487: ctxt->sax->error(ctxt->userData,
2488: "char encoding Shift_JISnot supported\n");
2489: break;
2490: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 2491: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2493: ctxt->sax->error(ctxt->userData,
2494: "char encoding EUC-JPnot supported\n");
2495: break;
2496: }
2497: }
2498:
2499: /************************************************************************
2500: * *
1.123 daniel 2501: * Commodity functions to handle xmlChars *
1.28 daniel 2502: * *
2503: ************************************************************************/
2504:
1.50 daniel 2505: /**
2506: * xmlStrndup:
1.123 daniel 2507: * @cur: the input xmlChar *
1.50 daniel 2508: * @len: the len of @cur
2509: *
1.123 daniel 2510: * a strndup for array of xmlChar's
1.68 daniel 2511: *
1.123 daniel 2512: * Returns a new xmlChar * or NULL
1.1 veillard 2513: */
1.123 daniel 2514: xmlChar *
2515: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2516: xmlChar *ret;
2517:
2518: if ((cur == NULL) || (len < 0)) return(NULL);
2519: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2520: if (ret == NULL) {
1.86 daniel 2521: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2522: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2523: return(NULL);
2524: }
1.123 daniel 2525: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2526: ret[len] = 0;
2527: return(ret);
2528: }
2529:
1.50 daniel 2530: /**
2531: * xmlStrdup:
1.123 daniel 2532: * @cur: the input xmlChar *
1.50 daniel 2533: *
1.152 daniel 2534: * a strdup for array of xmlChar's. Since they are supposed to be
2535: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2536: * a termination mark of '0'.
1.68 daniel 2537: *
1.123 daniel 2538: * Returns a new xmlChar * or NULL
1.1 veillard 2539: */
1.123 daniel 2540: xmlChar *
2541: xmlStrdup(const xmlChar *cur) {
2542: const xmlChar *p = cur;
1.1 veillard 2543:
1.135 daniel 2544: if (cur == NULL) return(NULL);
1.152 daniel 2545: while (*p != 0) p++;
1.1 veillard 2546: return(xmlStrndup(cur, p - cur));
2547: }
2548:
1.50 daniel 2549: /**
2550: * xmlCharStrndup:
2551: * @cur: the input char *
2552: * @len: the len of @cur
2553: *
1.123 daniel 2554: * a strndup for char's to xmlChar's
1.68 daniel 2555: *
1.123 daniel 2556: * Returns a new xmlChar * or NULL
1.45 daniel 2557: */
2558:
1.123 daniel 2559: xmlChar *
1.55 daniel 2560: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2561: int i;
1.135 daniel 2562: xmlChar *ret;
2563:
2564: if ((cur == NULL) || (len < 0)) return(NULL);
2565: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2566: if (ret == NULL) {
1.86 daniel 2567: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2568: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2569: return(NULL);
2570: }
2571: for (i = 0;i < len;i++)
1.123 daniel 2572: ret[i] = (xmlChar) cur[i];
1.45 daniel 2573: ret[len] = 0;
2574: return(ret);
2575: }
2576:
1.50 daniel 2577: /**
2578: * xmlCharStrdup:
2579: * @cur: the input char *
2580: * @len: the len of @cur
2581: *
1.123 daniel 2582: * a strdup for char's to xmlChar's
1.68 daniel 2583: *
1.123 daniel 2584: * Returns a new xmlChar * or NULL
1.45 daniel 2585: */
2586:
1.123 daniel 2587: xmlChar *
1.55 daniel 2588: xmlCharStrdup(const char *cur) {
1.45 daniel 2589: const char *p = cur;
2590:
1.135 daniel 2591: if (cur == NULL) return(NULL);
1.45 daniel 2592: while (*p != '\0') p++;
2593: return(xmlCharStrndup(cur, p - cur));
2594: }
2595:
1.50 daniel 2596: /**
2597: * xmlStrcmp:
1.123 daniel 2598: * @str1: the first xmlChar *
2599: * @str2: the second xmlChar *
1.50 daniel 2600: *
1.123 daniel 2601: * a strcmp for xmlChar's
1.68 daniel 2602: *
2603: * Returns the integer result of the comparison
1.14 veillard 2604: */
2605:
1.55 daniel 2606: int
1.123 daniel 2607: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2608: register int tmp;
2609:
1.135 daniel 2610: if ((str1 == NULL) && (str2 == NULL)) return(0);
2611: if (str1 == NULL) return(-1);
2612: if (str2 == NULL) return(1);
1.14 veillard 2613: do {
2614: tmp = *str1++ - *str2++;
2615: if (tmp != 0) return(tmp);
2616: } while ((*str1 != 0) && (*str2 != 0));
2617: return (*str1 - *str2);
2618: }
2619:
1.50 daniel 2620: /**
2621: * xmlStrncmp:
1.123 daniel 2622: * @str1: the first xmlChar *
2623: * @str2: the second xmlChar *
1.50 daniel 2624: * @len: the max comparison length
2625: *
1.123 daniel 2626: * a strncmp for xmlChar's
1.68 daniel 2627: *
2628: * Returns the integer result of the comparison
1.14 veillard 2629: */
2630:
1.55 daniel 2631: int
1.123 daniel 2632: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2633: register int tmp;
2634:
2635: if (len <= 0) return(0);
1.135 daniel 2636: if ((str1 == NULL) && (str2 == NULL)) return(0);
2637: if (str1 == NULL) return(-1);
2638: if (str2 == NULL) return(1);
1.14 veillard 2639: do {
2640: tmp = *str1++ - *str2++;
2641: if (tmp != 0) return(tmp);
2642: len--;
2643: if (len <= 0) return(0);
2644: } while ((*str1 != 0) && (*str2 != 0));
2645: return (*str1 - *str2);
2646: }
2647:
1.50 daniel 2648: /**
2649: * xmlStrchr:
1.123 daniel 2650: * @str: the xmlChar * array
2651: * @val: the xmlChar to search
1.50 daniel 2652: *
1.123 daniel 2653: * a strchr for xmlChar's
1.68 daniel 2654: *
1.123 daniel 2655: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2656: */
2657:
1.123 daniel 2658: const xmlChar *
2659: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2660: if (str == NULL) return(NULL);
1.14 veillard 2661: while (*str != 0) {
1.123 daniel 2662: if (*str == val) return((xmlChar *) str);
1.14 veillard 2663: str++;
2664: }
2665: return(NULL);
1.89 daniel 2666: }
2667:
2668: /**
2669: * xmlStrstr:
1.123 daniel 2670: * @str: the xmlChar * array (haystack)
2671: * @val: the xmlChar to search (needle)
1.89 daniel 2672: *
1.123 daniel 2673: * a strstr for xmlChar's
1.89 daniel 2674: *
1.123 daniel 2675: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2676: */
2677:
1.123 daniel 2678: const xmlChar *
2679: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2680: int n;
2681:
2682: if (str == NULL) return(NULL);
2683: if (val == NULL) return(NULL);
2684: n = xmlStrlen(val);
2685:
2686: if (n == 0) return(str);
2687: while (*str != 0) {
2688: if (*str == *val) {
1.123 daniel 2689: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2690: }
2691: str++;
2692: }
2693: return(NULL);
2694: }
2695:
2696: /**
2697: * xmlStrsub:
1.123 daniel 2698: * @str: the xmlChar * array (haystack)
1.89 daniel 2699: * @start: the index of the first char (zero based)
2700: * @len: the length of the substring
2701: *
2702: * Extract a substring of a given string
2703: *
1.123 daniel 2704: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2705: */
2706:
1.123 daniel 2707: xmlChar *
2708: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2709: int i;
2710:
2711: if (str == NULL) return(NULL);
2712: if (start < 0) return(NULL);
1.90 daniel 2713: if (len < 0) return(NULL);
1.89 daniel 2714:
2715: for (i = 0;i < start;i++) {
2716: if (*str == 0) return(NULL);
2717: str++;
2718: }
2719: if (*str == 0) return(NULL);
2720: return(xmlStrndup(str, len));
1.14 veillard 2721: }
1.28 daniel 2722:
1.50 daniel 2723: /**
2724: * xmlStrlen:
1.123 daniel 2725: * @str: the xmlChar * array
1.50 daniel 2726: *
1.127 daniel 2727: * length of a xmlChar's string
1.68 daniel 2728: *
1.123 daniel 2729: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2730: */
2731:
1.55 daniel 2732: int
1.123 daniel 2733: xmlStrlen(const xmlChar *str) {
1.45 daniel 2734: int len = 0;
2735:
2736: if (str == NULL) return(0);
2737: while (*str != 0) {
2738: str++;
2739: len++;
2740: }
2741: return(len);
2742: }
2743:
1.50 daniel 2744: /**
2745: * xmlStrncat:
1.123 daniel 2746: * @cur: the original xmlChar * array
2747: * @add: the xmlChar * array added
1.50 daniel 2748: * @len: the length of @add
2749: *
1.123 daniel 2750: * a strncat for array of xmlChar's
1.68 daniel 2751: *
1.123 daniel 2752: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2753: */
2754:
1.123 daniel 2755: xmlChar *
2756: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2757: int size;
1.123 daniel 2758: xmlChar *ret;
1.45 daniel 2759:
2760: if ((add == NULL) || (len == 0))
2761: return(cur);
2762: if (cur == NULL)
2763: return(xmlStrndup(add, len));
2764:
2765: size = xmlStrlen(cur);
1.123 daniel 2766: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2767: if (ret == NULL) {
1.86 daniel 2768: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2769: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2770: return(cur);
2771: }
1.123 daniel 2772: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2773: ret[size + len] = 0;
2774: return(ret);
2775: }
2776:
1.50 daniel 2777: /**
2778: * xmlStrcat:
1.123 daniel 2779: * @cur: the original xmlChar * array
2780: * @add: the xmlChar * array added
1.50 daniel 2781: *
1.152 daniel 2782: * a strcat for array of xmlChar's. Since they are supposed to be
2783: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2784: * a termination mark of '0'.
1.68 daniel 2785: *
1.123 daniel 2786: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2787: */
1.123 daniel 2788: xmlChar *
2789: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2790: const xmlChar *p = add;
1.45 daniel 2791:
2792: if (add == NULL) return(cur);
2793: if (cur == NULL)
2794: return(xmlStrdup(add));
2795:
1.152 daniel 2796: while (*p != 0) p++;
1.45 daniel 2797: return(xmlStrncat(cur, add, p - add));
2798: }
2799:
2800: /************************************************************************
2801: * *
2802: * Commodity functions, cleanup needed ? *
2803: * *
2804: ************************************************************************/
2805:
1.50 daniel 2806: /**
2807: * areBlanks:
2808: * @ctxt: an XML parser context
1.123 daniel 2809: * @str: a xmlChar *
1.50 daniel 2810: * @len: the size of @str
2811: *
1.45 daniel 2812: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2813: *
1.68 daniel 2814: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2815: */
2816:
1.123 daniel 2817: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2818: int i, ret;
1.45 daniel 2819: xmlNodePtr lastChild;
2820:
1.176 daniel 2821: /*
2822: * Check for xml:space value.
2823: */
2824: if (*(ctxt->space) == 1)
2825: return(0);
2826:
2827: /*
2828: * Check that the string is made of blanks
2829: */
1.45 daniel 2830: for (i = 0;i < len;i++)
2831: if (!(IS_BLANK(str[i]))) return(0);
2832:
1.176 daniel 2833: /*
2834: * Look if the element is mixed content in the Dtd if available
2835: */
1.104 daniel 2836: if (ctxt->myDoc != NULL) {
2837: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2838: if (ret == 0) return(1);
2839: if (ret == 1) return(0);
2840: }
1.176 daniel 2841:
1.104 daniel 2842: /*
1.176 daniel 2843: * Otherwise, heuristic :-\
1.104 daniel 2844: */
1.179 daniel 2845: if (ctxt->keepBlanks)
2846: return(0);
2847: if (RAW != '<') return(0);
2848: if (ctxt->node == NULL) return(0);
2849: if ((ctxt->node->children == NULL) &&
2850: (RAW == '<') && (NXT(1) == '/')) return(0);
2851:
1.45 daniel 2852: lastChild = xmlGetLastChild(ctxt->node);
2853: if (lastChild == NULL) {
2854: if (ctxt->node->content != NULL) return(0);
2855: } else if (xmlNodeIsText(lastChild))
2856: return(0);
1.157 daniel 2857: else if ((ctxt->node->children != NULL) &&
2858: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2859: return(0);
1.45 daniel 2860: return(1);
2861: }
2862:
1.50 daniel 2863: /**
2864: * xmlHandleEntity:
2865: * @ctxt: an XML parser context
2866: * @entity: an XML entity pointer.
2867: *
2868: * Default handling of defined entities, when should we define a new input
1.45 daniel 2869: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2870: *
2871: * OBSOLETE: to be removed at some point.
1.45 daniel 2872: */
2873:
1.55 daniel 2874: void
2875: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2876: int len;
1.50 daniel 2877: xmlParserInputPtr input;
1.45 daniel 2878:
2879: if (entity->content == NULL) {
1.123 daniel 2880: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2881: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2882: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2883: entity->name);
1.59 daniel 2884: ctxt->wellFormed = 0;
1.180 daniel 2885: ctxt->disableSAX = 1;
1.45 daniel 2886: return;
2887: }
2888: len = xmlStrlen(entity->content);
2889: if (len <= 2) goto handle_as_char;
2890:
2891: /*
2892: * Redefine its content as an input stream.
2893: */
1.50 daniel 2894: input = xmlNewEntityInputStream(ctxt, entity);
2895: xmlPushInput(ctxt, input);
1.45 daniel 2896: return;
2897:
2898: handle_as_char:
2899: /*
2900: * Just handle the content as a set of chars.
2901: */
1.171 daniel 2902: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2903: (ctxt->sax->characters != NULL))
1.74 daniel 2904: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2905:
2906: }
2907:
2908: /*
2909: * Forward definition for recusive behaviour.
2910: */
1.77 daniel 2911: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2912: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2913:
1.28 daniel 2914: /************************************************************************
2915: * *
2916: * Extra stuff for namespace support *
2917: * Relates to http://www.w3.org/TR/WD-xml-names *
2918: * *
2919: ************************************************************************/
2920:
1.50 daniel 2921: /**
2922: * xmlNamespaceParseNCName:
2923: * @ctxt: an XML parser context
2924: *
2925: * parse an XML namespace name.
1.28 daniel 2926: *
2927: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2928: *
2929: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2930: * CombiningChar | Extender
1.68 daniel 2931: *
2932: * Returns the namespace name or NULL
1.28 daniel 2933: */
2934:
1.123 daniel 2935: xmlChar *
1.55 daniel 2936: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 2937: xmlChar buf[XML_MAX_NAMELEN + 5];
2938: int len = 0, l;
2939: int cur = CUR_CHAR(l);
1.28 daniel 2940:
1.156 daniel 2941: /* load first the value of the char !!! */
1.152 daniel 2942: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 2943:
1.152 daniel 2944: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2945: (cur == '.') || (cur == '-') ||
2946: (cur == '_') ||
2947: (IS_COMBINING(cur)) ||
2948: (IS_EXTENDER(cur))) {
2949: COPY_BUF(l,buf,len,cur);
2950: NEXTL(l);
2951: cur = CUR_CHAR(l);
1.91 daniel 2952: if (len >= XML_MAX_NAMELEN) {
2953: fprintf(stderr,
2954: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 2955: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2956: (cur == '.') || (cur == '-') ||
2957: (cur == '_') ||
2958: (IS_COMBINING(cur)) ||
2959: (IS_EXTENDER(cur))) {
2960: NEXTL(l);
2961: cur = CUR_CHAR(l);
2962: }
1.91 daniel 2963: break;
2964: }
2965: }
2966: return(xmlStrndup(buf, len));
1.28 daniel 2967: }
2968:
1.50 daniel 2969: /**
2970: * xmlNamespaceParseQName:
2971: * @ctxt: an XML parser context
1.123 daniel 2972: * @prefix: a xmlChar **
1.50 daniel 2973: *
2974: * parse an XML qualified name
1.28 daniel 2975: *
2976: * [NS 5] QName ::= (Prefix ':')? LocalPart
2977: *
2978: * [NS 6] Prefix ::= NCName
2979: *
2980: * [NS 7] LocalPart ::= NCName
1.68 daniel 2981: *
1.127 daniel 2982: * Returns the local part, and prefix is updated
1.50 daniel 2983: * to get the Prefix if any.
1.28 daniel 2984: */
2985:
1.123 daniel 2986: xmlChar *
2987: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2988: xmlChar *ret = NULL;
1.28 daniel 2989:
2990: *prefix = NULL;
2991: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 2992: if (RAW == ':') {
1.28 daniel 2993: *prefix = ret;
1.40 daniel 2994: NEXT;
1.28 daniel 2995: ret = xmlNamespaceParseNCName(ctxt);
2996: }
2997:
2998: return(ret);
2999: }
3000:
1.50 daniel 3001: /**
1.72 daniel 3002: * xmlSplitQName:
1.162 daniel 3003: * @ctxt: an XML parser context
1.72 daniel 3004: * @name: an XML parser context
1.123 daniel 3005: * @prefix: a xmlChar **
1.72 daniel 3006: *
3007: * parse an XML qualified name string
3008: *
3009: * [NS 5] QName ::= (Prefix ':')? LocalPart
3010: *
3011: * [NS 6] Prefix ::= NCName
3012: *
3013: * [NS 7] LocalPart ::= NCName
3014: *
1.127 daniel 3015: * Returns the local part, and prefix is updated
1.72 daniel 3016: * to get the Prefix if any.
3017: */
3018:
1.123 daniel 3019: xmlChar *
1.162 daniel 3020: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3021: xmlChar buf[XML_MAX_NAMELEN + 5];
3022: int len = 0;
1.123 daniel 3023: xmlChar *ret = NULL;
3024: const xmlChar *cur = name;
1.162 daniel 3025: int c,l;
1.72 daniel 3026:
3027: *prefix = NULL;
1.113 daniel 3028:
3029: /* xml: prefix is not really a namespace */
3030: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3031: (cur[2] == 'l') && (cur[3] == ':'))
3032: return(xmlStrdup(name));
3033:
1.162 daniel 3034: /* nasty but valid */
3035: if (cur[0] == ':')
3036: return(xmlStrdup(name));
3037:
3038: c = CUR_SCHAR(cur, l);
3039: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 3040:
1.162 daniel 3041: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3042: (c == '.') || (c == '-') ||
3043: (c == '_') ||
3044: (IS_COMBINING(c)) ||
3045: (IS_EXTENDER(c))) {
3046: COPY_BUF(l,buf,len,c);
3047: cur += l;
3048: c = CUR_SCHAR(cur, l);
3049: }
1.72 daniel 3050:
1.162 daniel 3051: ret = xmlStrndup(buf, len);
1.72 daniel 3052:
1.162 daniel 3053: if (c == ':') {
3054: cur += l;
1.163 daniel 3055: c = CUR_SCHAR(cur, l);
1.162 daniel 3056: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 3057: *prefix = ret;
1.162 daniel 3058: len = 0;
1.72 daniel 3059:
1.162 daniel 3060: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3061: (c == '.') || (c == '-') ||
3062: (c == '_') ||
3063: (IS_COMBINING(c)) ||
3064: (IS_EXTENDER(c))) {
3065: COPY_BUF(l,buf,len,c);
3066: cur += l;
3067: c = CUR_SCHAR(cur, l);
3068: }
1.72 daniel 3069:
1.162 daniel 3070: ret = xmlStrndup(buf, len);
1.72 daniel 3071: }
3072:
3073: return(ret);
3074: }
3075: /**
1.50 daniel 3076: * xmlNamespaceParseNSDef:
3077: * @ctxt: an XML parser context
3078: *
3079: * parse a namespace prefix declaration
1.28 daniel 3080: *
3081: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3082: *
3083: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3084: *
3085: * Returns the namespace name
1.28 daniel 3086: */
3087:
1.123 daniel 3088: xmlChar *
1.55 daniel 3089: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3090: xmlChar *name = NULL;
1.28 daniel 3091:
1.152 daniel 3092: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3093: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3094: (NXT(4) == 's')) {
3095: SKIP(5);
1.152 daniel 3096: if (RAW == ':') {
1.40 daniel 3097: NEXT;
1.28 daniel 3098: name = xmlNamespaceParseNCName(ctxt);
3099: }
3100: }
1.39 daniel 3101: return(name);
1.28 daniel 3102: }
3103:
1.50 daniel 3104: /**
3105: * xmlParseQuotedString:
3106: * @ctxt: an XML parser context
3107: *
1.45 daniel 3108: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3109: * To be removed at next drop of binary compatibility
1.68 daniel 3110: *
3111: * Returns the string parser or NULL.
1.45 daniel 3112: */
1.123 daniel 3113: xmlChar *
1.55 daniel 3114: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3115: xmlChar *buf = NULL;
1.152 daniel 3116: int len = 0,l;
1.140 daniel 3117: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3118: int c;
1.45 daniel 3119:
1.135 daniel 3120: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3121: if (buf == NULL) {
3122: fprintf(stderr, "malloc of %d byte failed\n", size);
3123: return(NULL);
3124: }
1.152 daniel 3125: if (RAW == '"') {
1.45 daniel 3126: NEXT;
1.152 daniel 3127: c = CUR_CHAR(l);
1.135 daniel 3128: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3129: if (len + 5 >= size) {
1.135 daniel 3130: size *= 2;
3131: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3132: if (buf == NULL) {
3133: fprintf(stderr, "realloc of %d byte failed\n", size);
3134: return(NULL);
3135: }
3136: }
1.152 daniel 3137: COPY_BUF(l,buf,len,c);
3138: NEXTL(l);
3139: c = CUR_CHAR(l);
1.135 daniel 3140: }
3141: if (c != '"') {
1.123 daniel 3142: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3143: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3144: ctxt->sax->error(ctxt->userData,
3145: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3146: ctxt->wellFormed = 0;
1.180 daniel 3147: ctxt->disableSAX = 1;
1.55 daniel 3148: } else {
1.45 daniel 3149: NEXT;
3150: }
1.152 daniel 3151: } else if (RAW == '\''){
1.45 daniel 3152: NEXT;
1.135 daniel 3153: c = CUR;
3154: while (IS_CHAR(c) && (c != '\'')) {
3155: if (len + 1 >= size) {
3156: size *= 2;
3157: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3158: if (buf == NULL) {
3159: fprintf(stderr, "realloc of %d byte failed\n", size);
3160: return(NULL);
3161: }
3162: }
3163: buf[len++] = c;
3164: NEXT;
3165: c = CUR;
3166: }
1.152 daniel 3167: if (RAW != '\'') {
1.123 daniel 3168: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3169: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3170: ctxt->sax->error(ctxt->userData,
3171: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3172: ctxt->wellFormed = 0;
1.180 daniel 3173: ctxt->disableSAX = 1;
1.55 daniel 3174: } else {
1.45 daniel 3175: NEXT;
3176: }
3177: }
1.135 daniel 3178: return(buf);
1.45 daniel 3179: }
3180:
1.50 daniel 3181: /**
3182: * xmlParseNamespace:
3183: * @ctxt: an XML parser context
3184: *
1.45 daniel 3185: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3186: *
3187: * This is what the older xml-name Working Draft specified, a bunch of
3188: * other stuff may still rely on it, so support is still here as
1.127 daniel 3189: * if it was declared on the root of the Tree:-(
1.110 daniel 3190: *
3191: * To be removed at next drop of binary compatibility
1.45 daniel 3192: */
3193:
1.55 daniel 3194: void
3195: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3196: xmlChar *href = NULL;
3197: xmlChar *prefix = NULL;
1.45 daniel 3198: int garbage = 0;
3199:
3200: /*
3201: * We just skipped "namespace" or "xml:namespace"
3202: */
3203: SKIP_BLANKS;
3204:
1.153 daniel 3205: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3206: /*
3207: * We can have "ns" or "prefix" attributes
3208: * Old encoding as 'href' or 'AS' attributes is still supported
3209: */
1.152 daniel 3210: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3211: garbage = 0;
3212: SKIP(2);
3213: SKIP_BLANKS;
3214:
1.152 daniel 3215: if (RAW != '=') continue;
1.45 daniel 3216: NEXT;
3217: SKIP_BLANKS;
3218:
3219: href = xmlParseQuotedString(ctxt);
3220: SKIP_BLANKS;
1.152 daniel 3221: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3222: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3223: garbage = 0;
3224: SKIP(4);
3225: SKIP_BLANKS;
3226:
1.152 daniel 3227: if (RAW != '=') continue;
1.45 daniel 3228: NEXT;
3229: SKIP_BLANKS;
3230:
3231: href = xmlParseQuotedString(ctxt);
3232: SKIP_BLANKS;
1.152 daniel 3233: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3234: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3235: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3236: garbage = 0;
3237: SKIP(6);
3238: SKIP_BLANKS;
3239:
1.152 daniel 3240: if (RAW != '=') continue;
1.45 daniel 3241: NEXT;
3242: SKIP_BLANKS;
3243:
3244: prefix = xmlParseQuotedString(ctxt);
3245: SKIP_BLANKS;
1.152 daniel 3246: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3247: garbage = 0;
3248: SKIP(2);
3249: SKIP_BLANKS;
3250:
1.152 daniel 3251: if (RAW != '=') continue;
1.45 daniel 3252: NEXT;
3253: SKIP_BLANKS;
3254:
3255: prefix = xmlParseQuotedString(ctxt);
3256: SKIP_BLANKS;
1.152 daniel 3257: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3258: garbage = 0;
1.91 daniel 3259: NEXT;
1.45 daniel 3260: } else {
3261: /*
3262: * Found garbage when parsing the namespace
3263: */
1.122 daniel 3264: if (!garbage) {
1.55 daniel 3265: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3266: ctxt->sax->error(ctxt->userData,
3267: "xmlParseNamespace found garbage\n");
3268: }
1.123 daniel 3269: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3270: ctxt->wellFormed = 0;
1.180 daniel 3271: ctxt->disableSAX = 1;
1.45 daniel 3272: NEXT;
3273: }
3274: }
3275:
3276: MOVETO_ENDTAG(CUR_PTR);
3277: NEXT;
3278:
3279: /*
3280: * Register the DTD.
1.72 daniel 3281: if (href != NULL)
3282: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3283: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3284: */
3285:
1.119 daniel 3286: if (prefix != NULL) xmlFree(prefix);
3287: if (href != NULL) xmlFree(href);
1.45 daniel 3288: }
3289:
1.28 daniel 3290: /************************************************************************
3291: * *
3292: * The parser itself *
3293: * Relates to http://www.w3.org/TR/REC-xml *
3294: * *
3295: ************************************************************************/
1.14 veillard 3296:
1.50 daniel 3297: /**
1.97 daniel 3298: * xmlScanName:
3299: * @ctxt: an XML parser context
3300: *
3301: * Trickery: parse an XML name but without consuming the input flow
3302: * Needed for rollback cases.
3303: *
3304: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3305: * CombiningChar | Extender
3306: *
3307: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3308: *
3309: * [6] Names ::= Name (S Name)*
3310: *
3311: * Returns the Name parsed or NULL
3312: */
3313:
1.123 daniel 3314: xmlChar *
1.97 daniel 3315: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3316: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3317: int len = 0;
3318:
3319: GROW;
1.152 daniel 3320: if (!IS_LETTER(RAW) && (RAW != '_') &&
3321: (RAW != ':')) {
1.97 daniel 3322: return(NULL);
3323: }
3324:
3325: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3326: (NXT(len) == '.') || (NXT(len) == '-') ||
3327: (NXT(len) == '_') || (NXT(len) == ':') ||
3328: (IS_COMBINING(NXT(len))) ||
3329: (IS_EXTENDER(NXT(len)))) {
3330: buf[len] = NXT(len);
3331: len++;
3332: if (len >= XML_MAX_NAMELEN) {
3333: fprintf(stderr,
3334: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3335: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3336: (NXT(len) == '.') || (NXT(len) == '-') ||
3337: (NXT(len) == '_') || (NXT(len) == ':') ||
3338: (IS_COMBINING(NXT(len))) ||
3339: (IS_EXTENDER(NXT(len))))
3340: len++;
3341: break;
3342: }
3343: }
3344: return(xmlStrndup(buf, len));
3345: }
3346:
3347: /**
1.50 daniel 3348: * xmlParseName:
3349: * @ctxt: an XML parser context
3350: *
3351: * parse an XML name.
1.22 daniel 3352: *
3353: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3354: * CombiningChar | Extender
3355: *
3356: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3357: *
3358: * [6] Names ::= Name (S Name)*
1.68 daniel 3359: *
3360: * Returns the Name parsed or NULL
1.1 veillard 3361: */
3362:
1.123 daniel 3363: xmlChar *
1.55 daniel 3364: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3365: xmlChar buf[XML_MAX_NAMELEN + 5];
3366: int len = 0, l;
3367: int c;
1.1 veillard 3368:
1.91 daniel 3369: GROW;
1.160 daniel 3370: c = CUR_CHAR(l);
3371: if (!IS_LETTER(c) && (c != '_') &&
3372: (c != ':')) {
1.91 daniel 3373: return(NULL);
3374: }
1.40 daniel 3375:
1.160 daniel 3376: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3377: (c == '.') || (c == '-') ||
3378: (c == '_') || (c == ':') ||
3379: (IS_COMBINING(c)) ||
3380: (IS_EXTENDER(c))) {
3381: COPY_BUF(l,buf,len,c);
3382: NEXTL(l);
3383: c = CUR_CHAR(l);
1.91 daniel 3384: if (len >= XML_MAX_NAMELEN) {
3385: fprintf(stderr,
3386: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3387: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3388: (c == '.') || (c == '-') ||
3389: (c == '_') || (c == ':') ||
3390: (IS_COMBINING(c)) ||
3391: (IS_EXTENDER(c))) {
3392: NEXTL(l);
3393: c = CUR_CHAR(l);
1.97 daniel 3394: }
1.91 daniel 3395: break;
3396: }
3397: }
3398: return(xmlStrndup(buf, len));
1.22 daniel 3399: }
3400:
1.50 daniel 3401: /**
1.135 daniel 3402: * xmlParseStringName:
3403: * @ctxt: an XML parser context
3404: * @str: a pointer to an index in the string
3405: *
3406: * parse an XML name.
3407: *
3408: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3409: * CombiningChar | Extender
3410: *
3411: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3412: *
3413: * [6] Names ::= Name (S Name)*
3414: *
3415: * Returns the Name parsed or NULL. The str pointer
3416: * is updated to the current location in the string.
3417: */
3418:
3419: xmlChar *
3420: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3421: xmlChar buf[XML_MAX_NAMELEN + 5];
3422: const xmlChar *cur = *str;
3423: int len = 0, l;
3424: int c;
1.135 daniel 3425:
1.176 daniel 3426: GROW;
3427: c = CUR_SCHAR(cur, l);
3428: if (!IS_LETTER(c) && (c != '_') &&
3429: (c != ':')) {
1.135 daniel 3430: return(NULL);
3431: }
3432:
1.176 daniel 3433: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3434: (c == '.') || (c == '-') ||
3435: (c == '_') || (c == ':') ||
3436: (IS_COMBINING(c)) ||
3437: (IS_EXTENDER(c))) {
3438: COPY_BUF(l,buf,len,c);
3439: cur += l;
3440: c = CUR_SCHAR(cur, l);
3441: if (len >= XML_MAX_NAMELEN) {
3442: fprintf(stderr,
3443: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3444: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3445: (c == '.') || (c == '-') ||
3446: (c == '_') || (c == ':') ||
3447: (IS_COMBINING(c)) ||
3448: (IS_EXTENDER(c))) {
3449: cur += l;
3450: c = CUR_SCHAR(cur, l);
3451: }
3452: break;
3453: }
1.135 daniel 3454: }
1.176 daniel 3455: *str = cur;
3456: return(xmlStrndup(buf, len));
1.135 daniel 3457: }
3458:
3459: /**
1.50 daniel 3460: * xmlParseNmtoken:
3461: * @ctxt: an XML parser context
3462: *
3463: * parse an XML Nmtoken.
1.22 daniel 3464: *
3465: * [7] Nmtoken ::= (NameChar)+
3466: *
3467: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3468: *
3469: * Returns the Nmtoken parsed or NULL
1.22 daniel 3470: */
3471:
1.123 daniel 3472: xmlChar *
1.55 daniel 3473: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3474: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3475: int len = 0;
1.160 daniel 3476: int c,l;
1.22 daniel 3477:
1.91 daniel 3478: GROW;
1.160 daniel 3479: c = CUR_CHAR(l);
3480: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3481: (c == '.') || (c == '-') ||
3482: (c == '_') || (c == ':') ||
3483: (IS_COMBINING(c)) ||
3484: (IS_EXTENDER(c))) {
3485: COPY_BUF(l,buf,len,c);
3486: NEXTL(l);
3487: c = CUR_CHAR(l);
1.91 daniel 3488: if (len >= XML_MAX_NAMELEN) {
3489: fprintf(stderr,
3490: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3491: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3492: (c == '.') || (c == '-') ||
3493: (c == '_') || (c == ':') ||
3494: (IS_COMBINING(c)) ||
3495: (IS_EXTENDER(c))) {
3496: NEXTL(l);
3497: c = CUR_CHAR(l);
3498: }
1.91 daniel 3499: break;
3500: }
3501: }
1.168 daniel 3502: if (len == 0)
3503: return(NULL);
1.91 daniel 3504: return(xmlStrndup(buf, len));
1.1 veillard 3505: }
3506:
1.50 daniel 3507: /**
3508: * xmlParseEntityValue:
3509: * @ctxt: an XML parser context
1.78 daniel 3510: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3511: *
3512: * parse a value for ENTITY decl.
1.24 daniel 3513: *
3514: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3515: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3516: *
1.78 daniel 3517: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3518: */
3519:
1.123 daniel 3520: xmlChar *
3521: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3522: xmlChar *buf = NULL;
3523: int len = 0;
1.140 daniel 3524: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3525: int c, l;
1.135 daniel 3526: xmlChar stop;
1.123 daniel 3527: xmlChar *ret = NULL;
1.176 daniel 3528: const xmlChar *cur = NULL;
1.98 daniel 3529: xmlParserInputPtr input;
1.24 daniel 3530:
1.152 daniel 3531: if (RAW == '"') stop = '"';
3532: else if (RAW == '\'') stop = '\'';
1.135 daniel 3533: else {
3534: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3535: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3536: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3537: ctxt->wellFormed = 0;
1.180 daniel 3538: ctxt->disableSAX = 1;
1.135 daniel 3539: return(NULL);
3540: }
3541: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3542: if (buf == NULL) {
3543: fprintf(stderr, "malloc of %d byte failed\n", size);
3544: return(NULL);
3545: }
1.94 daniel 3546:
1.135 daniel 3547: /*
3548: * The content of the entity definition is copied in a buffer.
3549: */
1.94 daniel 3550:
1.135 daniel 3551: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3552: input = ctxt->input;
3553: GROW;
3554: NEXT;
1.152 daniel 3555: c = CUR_CHAR(l);
1.135 daniel 3556: /*
3557: * NOTE: 4.4.5 Included in Literal
3558: * When a parameter entity reference appears in a literal entity
3559: * value, ... a single or double quote character in the replacement
3560: * text is always treated as a normal data character and will not
3561: * terminate the literal.
3562: * In practice it means we stop the loop only when back at parsing
3563: * the initial entity and the quote is found
3564: */
3565: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3566: if (len + 5 >= size) {
1.135 daniel 3567: size *= 2;
3568: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3569: if (buf == NULL) {
3570: fprintf(stderr, "realloc of %d byte failed\n", size);
3571: return(NULL);
1.94 daniel 3572: }
1.79 daniel 3573: }
1.152 daniel 3574: COPY_BUF(l,buf,len,c);
3575: NEXTL(l);
1.98 daniel 3576: /*
1.135 daniel 3577: * Pop-up of finished entities.
1.98 daniel 3578: */
1.152 daniel 3579: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3580: xmlPopInput(ctxt);
1.152 daniel 3581:
3582: c = CUR_CHAR(l);
1.135 daniel 3583: if (c == 0) {
1.94 daniel 3584: GROW;
1.152 daniel 3585: c = CUR_CHAR(l);
1.79 daniel 3586: }
1.135 daniel 3587: }
3588: buf[len] = 0;
3589:
3590: /*
1.176 daniel 3591: * Raise problem w.r.t. '&' and '%' being used in non-entities
3592: * reference constructs. Note Charref will be handled in
3593: * xmlStringDecodeEntities()
3594: */
3595: cur = buf;
3596: while (*cur != 0) {
3597: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3598: xmlChar *name;
3599: xmlChar tmp = *cur;
3600:
3601: cur++;
3602: name = xmlParseStringName(ctxt, &cur);
3603: if ((name == NULL) || (*cur != ';')) {
3604: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3605: ctxt->sax->error(ctxt->userData,
3606: "EntityValue: '%c' forbidden except for entities references\n",
3607: tmp);
3608: ctxt->wellFormed = 0;
1.180 daniel 3609: ctxt->disableSAX = 1;
1.176 daniel 3610: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3611: }
3612: if ((ctxt->inSubset == 1) && (tmp == '%')) {
3613: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3614: ctxt->sax->error(ctxt->userData,
3615: "EntityValue: PEReferences forbidden in internal subset\n",
3616: tmp);
3617: ctxt->wellFormed = 0;
1.180 daniel 3618: ctxt->disableSAX = 1;
1.176 daniel 3619: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3620: }
3621: if (name != NULL)
3622: xmlFree(name);
3623: }
3624: cur++;
3625: }
3626:
3627: /*
1.135 daniel 3628: * Then PEReference entities are substituted.
3629: */
3630: if (c != stop) {
3631: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3633: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3634: ctxt->wellFormed = 0;
1.180 daniel 3635: ctxt->disableSAX = 1;
1.170 daniel 3636: xmlFree(buf);
1.135 daniel 3637: } else {
3638: NEXT;
3639: /*
3640: * NOTE: 4.4.7 Bypassed
3641: * When a general entity reference appears in the EntityValue in
3642: * an entity declaration, it is bypassed and left as is.
1.176 daniel 3643: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 3644: */
3645: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3646: 0, 0, 0);
3647: if (orig != NULL)
3648: *orig = buf;
3649: else
3650: xmlFree(buf);
1.24 daniel 3651: }
3652:
3653: return(ret);
3654: }
3655:
1.50 daniel 3656: /**
3657: * xmlParseAttValue:
3658: * @ctxt: an XML parser context
3659: *
3660: * parse a value for an attribute
1.78 daniel 3661: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3662: * will be handled later in xmlStringGetNodeList
1.29 daniel 3663: *
3664: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3665: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3666: *
1.129 daniel 3667: * 3.3.3 Attribute-Value Normalization:
3668: * Before the value of an attribute is passed to the application or
3669: * checked for validity, the XML processor must normalize it as follows:
3670: * - a character reference is processed by appending the referenced
3671: * character to the attribute value
3672: * - an entity reference is processed by recursively processing the
3673: * replacement text of the entity
3674: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3675: * appending #x20 to the normalized value, except that only a single
3676: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3677: * parsed entity or the literal entity value of an internal parsed entity
3678: * - other characters are processed by appending them to the normalized value
1.130 daniel 3679: * If the declared value is not CDATA, then the XML processor must further
3680: * process the normalized attribute value by discarding any leading and
3681: * trailing space (#x20) characters, and by replacing sequences of space
3682: * (#x20) characters by a single space (#x20) character.
3683: * All attributes for which no declaration has been read should be treated
3684: * by a non-validating parser as if declared CDATA.
1.129 daniel 3685: *
3686: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3687: */
3688:
1.123 daniel 3689: xmlChar *
1.55 daniel 3690: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3691: xmlChar limit = 0;
3692: xmlChar *buffer = NULL;
3693: int buffer_size = 0;
3694: xmlChar *out = NULL;
3695:
3696: xmlChar *current = NULL;
3697: xmlEntityPtr ent;
3698: xmlChar cur;
3699:
1.29 daniel 3700:
1.91 daniel 3701: SHRINK;
1.151 daniel 3702: if (NXT(0) == '"') {
1.96 daniel 3703: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3704: limit = '"';
1.40 daniel 3705: NEXT;
1.151 daniel 3706: } else if (NXT(0) == '\'') {
1.129 daniel 3707: limit = '\'';
1.96 daniel 3708: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3709: NEXT;
1.29 daniel 3710: } else {
1.123 daniel 3711: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3712: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3713: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3714: ctxt->wellFormed = 0;
1.180 daniel 3715: ctxt->disableSAX = 1;
1.129 daniel 3716: return(NULL);
1.29 daniel 3717: }
3718:
1.129 daniel 3719: /*
3720: * allocate a translation buffer.
3721: */
1.140 daniel 3722: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3723: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3724: if (buffer == NULL) {
3725: perror("xmlParseAttValue: malloc failed");
3726: return(NULL);
3727: }
3728: out = buffer;
3729:
3730: /*
3731: * Ok loop until we reach one of the ending char or a size limit.
3732: */
3733: cur = CUR;
1.156 daniel 3734: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3735: if (cur == 0) break;
3736: if ((cur == '&') && (NXT(1) == '#')) {
3737: int val = xmlParseCharRef(ctxt);
3738: *out++ = val;
3739: } else if (cur == '&') {
3740: ent = xmlParseEntityRef(ctxt);
3741: if ((ent != NULL) &&
3742: (ctxt->replaceEntities != 0)) {
3743: current = ent->content;
3744: while (*current != 0) {
3745: *out++ = *current++;
3746: if (out - buffer > buffer_size - 10) {
3747: int index = out - buffer;
3748:
3749: growBuffer(buffer);
3750: out = &buffer[index];
3751: }
3752: }
3753: } else if (ent != NULL) {
3754: int i = xmlStrlen(ent->name);
3755: const xmlChar *cur = ent->name;
3756:
3757: *out++ = '&';
3758: if (out - buffer > buffer_size - i - 10) {
3759: int index = out - buffer;
3760:
3761: growBuffer(buffer);
3762: out = &buffer[index];
3763: }
3764: for (;i > 0;i--)
3765: *out++ = *cur++;
3766: *out++ = ';';
3767: }
3768: } else {
1.156 daniel 3769: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3770: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3771: *out++ = 0x20;
3772: if (out - buffer > buffer_size - 10) {
3773: int index = out - buffer;
3774:
3775: growBuffer(buffer);
3776: out = &buffer[index];
1.129 daniel 3777: }
3778: } else {
3779: *out++ = cur;
3780: if (out - buffer > buffer_size - 10) {
3781: int index = out - buffer;
3782:
3783: growBuffer(buffer);
3784: out = &buffer[index];
3785: }
3786: }
3787: NEXT;
3788: }
3789: cur = CUR;
3790: }
3791: *out++ = 0;
1.152 daniel 3792: if (RAW == '<') {
1.129 daniel 3793: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3794: ctxt->sax->error(ctxt->userData,
3795: "Unescaped '<' not allowed in attributes values\n");
3796: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3797: ctxt->wellFormed = 0;
1.180 daniel 3798: ctxt->disableSAX = 1;
1.152 daniel 3799: } else if (RAW != limit) {
1.129 daniel 3800: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3801: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3802: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3803: ctxt->wellFormed = 0;
1.180 daniel 3804: ctxt->disableSAX = 1;
1.129 daniel 3805: } else
3806: NEXT;
3807: return(buffer);
1.29 daniel 3808: }
3809:
1.50 daniel 3810: /**
3811: * xmlParseSystemLiteral:
3812: * @ctxt: an XML parser context
3813: *
3814: * parse an XML Literal
1.21 daniel 3815: *
1.22 daniel 3816: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3817: *
3818: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3819: */
3820:
1.123 daniel 3821: xmlChar *
1.55 daniel 3822: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3823: xmlChar *buf = NULL;
3824: int len = 0;
1.140 daniel 3825: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3826: int cur, l;
1.135 daniel 3827: xmlChar stop;
1.168 daniel 3828: int state = ctxt->instate;
1.21 daniel 3829:
1.91 daniel 3830: SHRINK;
1.152 daniel 3831: if (RAW == '"') {
1.40 daniel 3832: NEXT;
1.135 daniel 3833: stop = '"';
1.152 daniel 3834: } else if (RAW == '\'') {
1.40 daniel 3835: NEXT;
1.135 daniel 3836: stop = '\'';
1.21 daniel 3837: } else {
1.55 daniel 3838: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3839: ctxt->sax->error(ctxt->userData,
3840: "SystemLiteral \" or ' expected\n");
1.123 daniel 3841: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3842: ctxt->wellFormed = 0;
1.180 daniel 3843: ctxt->disableSAX = 1;
1.135 daniel 3844: return(NULL);
1.21 daniel 3845: }
3846:
1.135 daniel 3847: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3848: if (buf == NULL) {
3849: fprintf(stderr, "malloc of %d byte failed\n", size);
3850: return(NULL);
3851: }
1.168 daniel 3852: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3853: cur = CUR_CHAR(l);
1.135 daniel 3854: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3855: if (len + 5 >= size) {
1.135 daniel 3856: size *= 2;
3857: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3858: if (buf == NULL) {
3859: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 daniel 3860: ctxt->instate = state;
1.135 daniel 3861: return(NULL);
3862: }
3863: }
1.152 daniel 3864: COPY_BUF(l,buf,len,cur);
3865: NEXTL(l);
3866: cur = CUR_CHAR(l);
1.135 daniel 3867: if (cur == 0) {
3868: GROW;
3869: SHRINK;
1.152 daniel 3870: cur = CUR_CHAR(l);
1.135 daniel 3871: }
3872: }
3873: buf[len] = 0;
1.168 daniel 3874: ctxt->instate = state;
1.135 daniel 3875: if (!IS_CHAR(cur)) {
3876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3877: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3878: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3879: ctxt->wellFormed = 0;
1.180 daniel 3880: ctxt->disableSAX = 1;
1.135 daniel 3881: } else {
3882: NEXT;
3883: }
3884: return(buf);
1.21 daniel 3885: }
3886:
1.50 daniel 3887: /**
3888: * xmlParsePubidLiteral:
3889: * @ctxt: an XML parser context
1.21 daniel 3890: *
1.50 daniel 3891: * parse an XML public literal
1.68 daniel 3892: *
3893: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3894: *
3895: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3896: */
3897:
1.123 daniel 3898: xmlChar *
1.55 daniel 3899: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3900: xmlChar *buf = NULL;
3901: int len = 0;
1.140 daniel 3902: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3903: xmlChar cur;
3904: xmlChar stop;
1.125 daniel 3905:
1.91 daniel 3906: SHRINK;
1.152 daniel 3907: if (RAW == '"') {
1.40 daniel 3908: NEXT;
1.135 daniel 3909: stop = '"';
1.152 daniel 3910: } else if (RAW == '\'') {
1.40 daniel 3911: NEXT;
1.135 daniel 3912: stop = '\'';
1.21 daniel 3913: } else {
1.55 daniel 3914: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3915: ctxt->sax->error(ctxt->userData,
3916: "SystemLiteral \" or ' expected\n");
1.123 daniel 3917: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3918: ctxt->wellFormed = 0;
1.180 daniel 3919: ctxt->disableSAX = 1;
1.135 daniel 3920: return(NULL);
3921: }
3922: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3923: if (buf == NULL) {
3924: fprintf(stderr, "malloc of %d byte failed\n", size);
3925: return(NULL);
3926: }
3927: cur = CUR;
3928: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3929: if (len + 1 >= size) {
3930: size *= 2;
3931: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3932: if (buf == NULL) {
3933: fprintf(stderr, "realloc of %d byte failed\n", size);
3934: return(NULL);
3935: }
3936: }
3937: buf[len++] = cur;
3938: NEXT;
3939: cur = CUR;
3940: if (cur == 0) {
3941: GROW;
3942: SHRINK;
3943: cur = CUR;
3944: }
3945: }
3946: buf[len] = 0;
3947: if (cur != stop) {
3948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3949: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3950: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3951: ctxt->wellFormed = 0;
1.180 daniel 3952: ctxt->disableSAX = 1;
1.135 daniel 3953: } else {
3954: NEXT;
1.21 daniel 3955: }
1.135 daniel 3956: return(buf);
1.21 daniel 3957: }
3958:
1.50 daniel 3959: /**
3960: * xmlParseCharData:
3961: * @ctxt: an XML parser context
3962: * @cdata: int indicating whether we are within a CDATA section
3963: *
3964: * parse a CharData section.
3965: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 3966: *
1.151 daniel 3967: * The right angle bracket (>) may be represented using the string ">",
3968: * and must, for compatibility, be escaped using ">" or a character
3969: * reference when it appears in the string "]]>" in content, when that
3970: * string is not marking the end of a CDATA section.
3971: *
1.27 daniel 3972: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3973: */
3974:
1.55 daniel 3975: void
3976: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 3977: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 3978: int nbchar = 0;
1.152 daniel 3979: int cur, l;
1.27 daniel 3980:
1.91 daniel 3981: SHRINK;
1.152 daniel 3982: cur = CUR_CHAR(l);
1.160 daniel 3983: while ((IS_CHAR(cur)) && ((cur != '<') || (ctxt->token == '<')) &&
1.153 daniel 3984: ((cur != '&') || (ctxt->token == '&'))) {
1.97 daniel 3985: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 3986: (NXT(2) == '>')) {
3987: if (cdata) break;
3988: else {
3989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 3990: ctxt->sax->error(ctxt->userData,
1.59 daniel 3991: "Sequence ']]>' not allowed in content\n");
1.123 daniel 3992: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 3993: /* Should this be relaxed ??? I see a "must here */
3994: ctxt->wellFormed = 0;
1.180 daniel 3995: ctxt->disableSAX = 1;
1.59 daniel 3996: }
3997: }
1.152 daniel 3998: COPY_BUF(l,buf,nbchar,cur);
3999: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4000: /*
4001: * Ok the segment is to be consumed as chars.
4002: */
1.171 daniel 4003: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4004: if (areBlanks(ctxt, buf, nbchar)) {
4005: if (ctxt->sax->ignorableWhitespace != NULL)
4006: ctxt->sax->ignorableWhitespace(ctxt->userData,
4007: buf, nbchar);
4008: } else {
4009: if (ctxt->sax->characters != NULL)
4010: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4011: }
4012: }
4013: nbchar = 0;
4014: }
1.152 daniel 4015: NEXTL(l);
4016: cur = CUR_CHAR(l);
1.27 daniel 4017: }
1.91 daniel 4018: if (nbchar != 0) {
4019: /*
4020: * Ok the segment is to be consumed as chars.
4021: */
1.171 daniel 4022: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4023: if (areBlanks(ctxt, buf, nbchar)) {
4024: if (ctxt->sax->ignorableWhitespace != NULL)
4025: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4026: } else {
4027: if (ctxt->sax->characters != NULL)
4028: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4029: }
4030: }
1.45 daniel 4031: }
1.27 daniel 4032: }
4033:
1.50 daniel 4034: /**
4035: * xmlParseExternalID:
4036: * @ctxt: an XML parser context
1.123 daniel 4037: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4038: * @strict: indicate whether we should restrict parsing to only
4039: * production [75], see NOTE below
1.50 daniel 4040: *
1.67 daniel 4041: * Parse an External ID or a Public ID
4042: *
4043: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4044: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4045: *
4046: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4047: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4048: *
4049: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4050: *
1.68 daniel 4051: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4052: * case publicID receives PubidLiteral, is strict is off
4053: * it is possible to return NULL and have publicID set.
1.22 daniel 4054: */
4055:
1.123 daniel 4056: xmlChar *
4057: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4058: xmlChar *URI = NULL;
1.22 daniel 4059:
1.91 daniel 4060: SHRINK;
1.152 daniel 4061: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4062: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4063: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4064: SKIP(6);
1.59 daniel 4065: if (!IS_BLANK(CUR)) {
4066: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4067: ctxt->sax->error(ctxt->userData,
1.59 daniel 4068: "Space required after 'SYSTEM'\n");
1.123 daniel 4069: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4070: ctxt->wellFormed = 0;
1.180 daniel 4071: ctxt->disableSAX = 1;
1.59 daniel 4072: }
1.42 daniel 4073: SKIP_BLANKS;
1.39 daniel 4074: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4075: if (URI == NULL) {
1.55 daniel 4076: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4077: ctxt->sax->error(ctxt->userData,
1.39 daniel 4078: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4079: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4080: ctxt->wellFormed = 0;
1.180 daniel 4081: ctxt->disableSAX = 1;
1.59 daniel 4082: }
1.152 daniel 4083: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4084: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4085: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4086: SKIP(6);
1.59 daniel 4087: if (!IS_BLANK(CUR)) {
4088: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4089: ctxt->sax->error(ctxt->userData,
1.59 daniel 4090: "Space required after 'PUBLIC'\n");
1.123 daniel 4091: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4092: ctxt->wellFormed = 0;
1.180 daniel 4093: ctxt->disableSAX = 1;
1.59 daniel 4094: }
1.42 daniel 4095: SKIP_BLANKS;
1.39 daniel 4096: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4097: if (*publicID == NULL) {
1.55 daniel 4098: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4099: ctxt->sax->error(ctxt->userData,
1.39 daniel 4100: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4101: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4102: ctxt->wellFormed = 0;
1.180 daniel 4103: ctxt->disableSAX = 1;
1.59 daniel 4104: }
1.67 daniel 4105: if (strict) {
4106: /*
4107: * We don't handle [83] so "S SystemLiteral" is required.
4108: */
4109: if (!IS_BLANK(CUR)) {
4110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4111: ctxt->sax->error(ctxt->userData,
1.67 daniel 4112: "Space required after the Public Identifier\n");
1.123 daniel 4113: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4114: ctxt->wellFormed = 0;
1.180 daniel 4115: ctxt->disableSAX = 1;
1.67 daniel 4116: }
4117: } else {
4118: /*
4119: * We handle [83] so we return immediately, if
4120: * "S SystemLiteral" is not detected. From a purely parsing
4121: * point of view that's a nice mess.
4122: */
1.135 daniel 4123: const xmlChar *ptr;
4124: GROW;
4125:
4126: ptr = CUR_PTR;
1.67 daniel 4127: if (!IS_BLANK(*ptr)) return(NULL);
4128:
4129: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4130: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4131: }
1.42 daniel 4132: SKIP_BLANKS;
1.39 daniel 4133: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4134: if (URI == NULL) {
1.55 daniel 4135: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4136: ctxt->sax->error(ctxt->userData,
1.39 daniel 4137: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4138: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4139: ctxt->wellFormed = 0;
1.180 daniel 4140: ctxt->disableSAX = 1;
1.59 daniel 4141: }
1.22 daniel 4142: }
1.39 daniel 4143: return(URI);
1.22 daniel 4144: }
4145:
1.50 daniel 4146: /**
4147: * xmlParseComment:
1.69 daniel 4148: * @ctxt: an XML parser context
1.50 daniel 4149: *
1.3 veillard 4150: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4151: * The spec says that "For compatibility, the string "--" (double-hyphen)
4152: * must not occur within comments. "
1.22 daniel 4153: *
4154: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4155: */
1.72 daniel 4156: void
1.114 daniel 4157: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4158: xmlChar *buf = NULL;
4159: int len = 0;
1.140 daniel 4160: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4161: int q, ql;
4162: int r, rl;
4163: int cur, l;
1.140 daniel 4164: xmlParserInputState state;
1.3 veillard 4165:
4166: /*
1.22 daniel 4167: * Check that there is a comment right here.
1.3 veillard 4168: */
1.152 daniel 4169: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4170: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4171:
1.140 daniel 4172: state = ctxt->instate;
1.97 daniel 4173: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4174: SHRINK;
1.40 daniel 4175: SKIP(4);
1.135 daniel 4176: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4177: if (buf == NULL) {
4178: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4179: ctxt->instate = state;
1.135 daniel 4180: return;
4181: }
1.152 daniel 4182: q = CUR_CHAR(ql);
4183: NEXTL(ql);
4184: r = CUR_CHAR(rl);
4185: NEXTL(rl);
4186: cur = CUR_CHAR(l);
1.135 daniel 4187: while (IS_CHAR(cur) &&
4188: ((cur != '>') ||
4189: (r != '-') || (q != '-'))) {
4190: if ((r == '-') && (q == '-')) {
1.55 daniel 4191: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4192: ctxt->sax->error(ctxt->userData,
1.38 daniel 4193: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4194: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4195: ctxt->wellFormed = 0;
1.180 daniel 4196: ctxt->disableSAX = 1;
1.59 daniel 4197: }
1.152 daniel 4198: if (len + 5 >= size) {
1.135 daniel 4199: size *= 2;
4200: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4201: if (buf == NULL) {
4202: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4203: ctxt->instate = state;
1.135 daniel 4204: return;
4205: }
4206: }
1.152 daniel 4207: COPY_BUF(ql,buf,len,q);
1.135 daniel 4208: q = r;
1.152 daniel 4209: ql = rl;
1.135 daniel 4210: r = cur;
1.152 daniel 4211: rl = l;
4212: NEXTL(l);
4213: cur = CUR_CHAR(l);
1.135 daniel 4214: if (cur == 0) {
4215: SHRINK;
4216: GROW;
1.152 daniel 4217: cur = CUR_CHAR(l);
1.135 daniel 4218: }
1.3 veillard 4219: }
1.135 daniel 4220: buf[len] = 0;
4221: if (!IS_CHAR(cur)) {
1.55 daniel 4222: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4223: ctxt->sax->error(ctxt->userData,
1.135 daniel 4224: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4225: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4226: ctxt->wellFormed = 0;
1.180 daniel 4227: ctxt->disableSAX = 1;
1.178 daniel 4228: xmlFree(buf);
1.3 veillard 4229: } else {
1.40 daniel 4230: NEXT;
1.171 daniel 4231: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4232: (!ctxt->disableSAX))
1.135 daniel 4233: ctxt->sax->comment(ctxt->userData, buf);
4234: xmlFree(buf);
1.3 veillard 4235: }
1.140 daniel 4236: ctxt->instate = state;
1.3 veillard 4237: }
4238:
1.50 daniel 4239: /**
4240: * xmlParsePITarget:
4241: * @ctxt: an XML parser context
4242: *
4243: * parse the name of a PI
1.22 daniel 4244: *
4245: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4246: *
4247: * Returns the PITarget name or NULL
1.22 daniel 4248: */
4249:
1.123 daniel 4250: xmlChar *
1.55 daniel 4251: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4252: xmlChar *name;
1.22 daniel 4253:
4254: name = xmlParseName(ctxt);
1.139 daniel 4255: if ((name != NULL) &&
1.22 daniel 4256: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4257: ((name[1] == 'm') || (name[1] == 'M')) &&
4258: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4259: int i;
1.177 daniel 4260: if ((name[0] == 'x') && (name[1] == 'm') &&
4261: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4262: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4263: ctxt->sax->error(ctxt->userData,
4264: "XML declaration allowed only at the start of the document\n");
4265: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4266: ctxt->wellFormed = 0;
1.180 daniel 4267: ctxt->disableSAX = 1;
1.151 daniel 4268: return(name);
4269: } else if (name[3] == 0) {
4270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4271: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4272: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4273: ctxt->wellFormed = 0;
1.180 daniel 4274: ctxt->disableSAX = 1;
1.151 daniel 4275: return(name);
4276: }
1.139 daniel 4277: for (i = 0;;i++) {
4278: if (xmlW3CPIs[i] == NULL) break;
4279: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4280: return(name);
4281: }
4282: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4283: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4284: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4285: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4286: }
1.22 daniel 4287: }
4288: return(name);
4289: }
4290:
1.50 daniel 4291: /**
4292: * xmlParsePI:
4293: * @ctxt: an XML parser context
4294: *
4295: * parse an XML Processing Instruction.
1.22 daniel 4296: *
4297: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4298: *
1.69 daniel 4299: * The processing is transfered to SAX once parsed.
1.3 veillard 4300: */
4301:
1.55 daniel 4302: void
4303: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4304: xmlChar *buf = NULL;
4305: int len = 0;
1.140 daniel 4306: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4307: int cur, l;
1.123 daniel 4308: xmlChar *target;
1.140 daniel 4309: xmlParserInputState state;
1.22 daniel 4310:
1.152 daniel 4311: if ((RAW == '<') && (NXT(1) == '?')) {
1.140 daniel 4312: state = ctxt->instate;
4313: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4314: /*
4315: * this is a Processing Instruction.
4316: */
1.40 daniel 4317: SKIP(2);
1.91 daniel 4318: SHRINK;
1.3 veillard 4319:
4320: /*
1.22 daniel 4321: * Parse the target name and check for special support like
4322: * namespace.
1.3 veillard 4323: */
1.22 daniel 4324: target = xmlParsePITarget(ctxt);
4325: if (target != NULL) {
1.156 daniel 4326: if ((RAW == '?') && (NXT(1) == '>')) {
4327: SKIP(2);
4328:
4329: /*
4330: * SAX: PI detected.
4331: */
1.171 daniel 4332: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4333: (ctxt->sax->processingInstruction != NULL))
4334: ctxt->sax->processingInstruction(ctxt->userData,
4335: target, NULL);
4336: ctxt->instate = state;
1.170 daniel 4337: xmlFree(target);
1.156 daniel 4338: return;
4339: }
1.135 daniel 4340: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4341: if (buf == NULL) {
4342: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4343: ctxt->instate = state;
1.135 daniel 4344: return;
4345: }
4346: cur = CUR;
4347: if (!IS_BLANK(cur)) {
1.114 daniel 4348: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4349: ctxt->sax->error(ctxt->userData,
4350: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4351: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4352: ctxt->wellFormed = 0;
1.180 daniel 4353: ctxt->disableSAX = 1;
1.114 daniel 4354: }
4355: SKIP_BLANKS;
1.152 daniel 4356: cur = CUR_CHAR(l);
1.135 daniel 4357: while (IS_CHAR(cur) &&
4358: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4359: if (len + 5 >= size) {
1.135 daniel 4360: size *= 2;
4361: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4362: if (buf == NULL) {
4363: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4364: ctxt->instate = state;
1.135 daniel 4365: return;
4366: }
4367: }
1.152 daniel 4368: COPY_BUF(l,buf,len,cur);
4369: NEXTL(l);
4370: cur = CUR_CHAR(l);
1.135 daniel 4371: if (cur == 0) {
4372: SHRINK;
4373: GROW;
1.152 daniel 4374: cur = CUR_CHAR(l);
1.135 daniel 4375: }
4376: }
4377: buf[len] = 0;
1.152 daniel 4378: if (cur != '?') {
1.72 daniel 4379: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4380: ctxt->sax->error(ctxt->userData,
1.72 daniel 4381: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4382: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4383: ctxt->wellFormed = 0;
1.180 daniel 4384: ctxt->disableSAX = 1;
1.22 daniel 4385: } else {
1.72 daniel 4386: SKIP(2);
1.44 daniel 4387:
1.72 daniel 4388: /*
4389: * SAX: PI detected.
4390: */
1.171 daniel 4391: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4392: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4393: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4394: target, buf);
1.22 daniel 4395: }
1.135 daniel 4396: xmlFree(buf);
1.119 daniel 4397: xmlFree(target);
1.3 veillard 4398: } else {
1.55 daniel 4399: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4400: ctxt->sax->error(ctxt->userData,
4401: "xmlParsePI : no target name\n");
1.123 daniel 4402: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4403: ctxt->wellFormed = 0;
1.180 daniel 4404: ctxt->disableSAX = 1;
1.22 daniel 4405: }
1.140 daniel 4406: ctxt->instate = state;
1.22 daniel 4407: }
4408: }
4409:
1.50 daniel 4410: /**
4411: * xmlParseNotationDecl:
4412: * @ctxt: an XML parser context
4413: *
4414: * parse a notation declaration
1.22 daniel 4415: *
4416: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4417: *
4418: * Hence there is actually 3 choices:
4419: * 'PUBLIC' S PubidLiteral
4420: * 'PUBLIC' S PubidLiteral S SystemLiteral
4421: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4422: *
1.67 daniel 4423: * See the NOTE on xmlParseExternalID().
1.22 daniel 4424: */
4425:
1.55 daniel 4426: void
4427: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4428: xmlChar *name;
4429: xmlChar *Pubid;
4430: xmlChar *Systemid;
1.22 daniel 4431:
1.152 daniel 4432: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4433: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4434: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4435: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4436: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 4437: SHRINK;
1.40 daniel 4438: SKIP(10);
1.67 daniel 4439: if (!IS_BLANK(CUR)) {
4440: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4441: ctxt->sax->error(ctxt->userData,
4442: "Space required after '<!NOTATION'\n");
1.123 daniel 4443: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4444: ctxt->wellFormed = 0;
1.180 daniel 4445: ctxt->disableSAX = 1;
1.67 daniel 4446: return;
4447: }
4448: SKIP_BLANKS;
1.22 daniel 4449:
4450: name = xmlParseName(ctxt);
4451: if (name == NULL) {
1.55 daniel 4452: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4453: ctxt->sax->error(ctxt->userData,
4454: "NOTATION: Name expected here\n");
1.123 daniel 4455: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4456: ctxt->wellFormed = 0;
1.180 daniel 4457: ctxt->disableSAX = 1;
1.67 daniel 4458: return;
4459: }
4460: if (!IS_BLANK(CUR)) {
4461: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4462: ctxt->sax->error(ctxt->userData,
1.67 daniel 4463: "Space required after the NOTATION name'\n");
1.123 daniel 4464: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4465: ctxt->wellFormed = 0;
1.180 daniel 4466: ctxt->disableSAX = 1;
1.22 daniel 4467: return;
4468: }
1.42 daniel 4469: SKIP_BLANKS;
1.67 daniel 4470:
1.22 daniel 4471: /*
1.67 daniel 4472: * Parse the IDs.
1.22 daniel 4473: */
1.160 daniel 4474: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4475: SKIP_BLANKS;
4476:
1.152 daniel 4477: if (RAW == '>') {
1.40 daniel 4478: NEXT;
1.171 daniel 4479: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4480: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4481: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4482: } else {
4483: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4484: ctxt->sax->error(ctxt->userData,
1.67 daniel 4485: "'>' required to close NOTATION declaration\n");
1.123 daniel 4486: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4487: ctxt->wellFormed = 0;
1.180 daniel 4488: ctxt->disableSAX = 1;
1.67 daniel 4489: }
1.119 daniel 4490: xmlFree(name);
4491: if (Systemid != NULL) xmlFree(Systemid);
4492: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4493: }
4494: }
4495:
1.50 daniel 4496: /**
4497: * xmlParseEntityDecl:
4498: * @ctxt: an XML parser context
4499: *
4500: * parse <!ENTITY declarations
1.22 daniel 4501: *
4502: * [70] EntityDecl ::= GEDecl | PEDecl
4503: *
4504: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4505: *
4506: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4507: *
4508: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4509: *
4510: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4511: *
4512: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4513: *
4514: * [ VC: Notation Declared ]
1.116 daniel 4515: * The Name must match the declared name of a notation.
1.22 daniel 4516: */
4517:
1.55 daniel 4518: void
4519: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4520: xmlChar *name = NULL;
4521: xmlChar *value = NULL;
4522: xmlChar *URI = NULL, *literal = NULL;
4523: xmlChar *ndata = NULL;
1.39 daniel 4524: int isParameter = 0;
1.123 daniel 4525: xmlChar *orig = NULL;
1.22 daniel 4526:
1.94 daniel 4527: GROW;
1.152 daniel 4528: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4529: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4530: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4531: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 4532: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4533: SHRINK;
1.40 daniel 4534: SKIP(8);
1.59 daniel 4535: if (!IS_BLANK(CUR)) {
4536: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4537: ctxt->sax->error(ctxt->userData,
4538: "Space required after '<!ENTITY'\n");
1.123 daniel 4539: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4540: ctxt->wellFormed = 0;
1.180 daniel 4541: ctxt->disableSAX = 1;
1.59 daniel 4542: }
4543: SKIP_BLANKS;
1.40 daniel 4544:
1.152 daniel 4545: if (RAW == '%') {
1.40 daniel 4546: NEXT;
1.59 daniel 4547: if (!IS_BLANK(CUR)) {
4548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4549: ctxt->sax->error(ctxt->userData,
4550: "Space required after '%'\n");
1.123 daniel 4551: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4552: ctxt->wellFormed = 0;
1.180 daniel 4553: ctxt->disableSAX = 1;
1.59 daniel 4554: }
1.42 daniel 4555: SKIP_BLANKS;
1.39 daniel 4556: isParameter = 1;
1.22 daniel 4557: }
4558:
4559: name = xmlParseName(ctxt);
1.24 daniel 4560: if (name == NULL) {
1.55 daniel 4561: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4562: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4563: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4564: ctxt->wellFormed = 0;
1.180 daniel 4565: ctxt->disableSAX = 1;
1.24 daniel 4566: return;
4567: }
1.59 daniel 4568: if (!IS_BLANK(CUR)) {
4569: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4570: ctxt->sax->error(ctxt->userData,
1.59 daniel 4571: "Space required after the entity name\n");
1.123 daniel 4572: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4573: ctxt->wellFormed = 0;
1.180 daniel 4574: ctxt->disableSAX = 1;
1.59 daniel 4575: }
1.42 daniel 4576: SKIP_BLANKS;
1.24 daniel 4577:
1.22 daniel 4578: /*
1.68 daniel 4579: * handle the various case of definitions...
1.22 daniel 4580: */
1.39 daniel 4581: if (isParameter) {
1.152 daniel 4582: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4583: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4584: if (value) {
1.171 daniel 4585: if ((ctxt->sax != NULL) &&
4586: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4587: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4588: XML_INTERNAL_PARAMETER_ENTITY,
4589: NULL, NULL, value);
4590: }
1.24 daniel 4591: else {
1.67 daniel 4592: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4593: if ((URI == NULL) && (literal == NULL)) {
4594: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4595: ctxt->sax->error(ctxt->userData,
4596: "Entity value required\n");
4597: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4598: ctxt->wellFormed = 0;
1.180 daniel 4599: ctxt->disableSAX = 1;
1.169 daniel 4600: }
1.39 daniel 4601: if (URI) {
1.171 daniel 4602: if ((ctxt->sax != NULL) &&
4603: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4604: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4605: XML_EXTERNAL_PARAMETER_ENTITY,
4606: literal, URI, NULL);
4607: }
1.24 daniel 4608: }
4609: } else {
1.152 daniel 4610: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4611: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4612: if ((ctxt->sax != NULL) &&
4613: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4614: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4615: XML_INTERNAL_GENERAL_ENTITY,
4616: NULL, NULL, value);
4617: } else {
1.67 daniel 4618: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4619: if ((URI == NULL) && (literal == NULL)) {
4620: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4621: ctxt->sax->error(ctxt->userData,
4622: "Entity value required\n");
4623: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4624: ctxt->wellFormed = 0;
1.180 daniel 4625: ctxt->disableSAX = 1;
1.169 daniel 4626: }
1.152 daniel 4627: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4628: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4629: ctxt->sax->error(ctxt->userData,
1.59 daniel 4630: "Space required before 'NDATA'\n");
1.123 daniel 4631: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4632: ctxt->wellFormed = 0;
1.180 daniel 4633: ctxt->disableSAX = 1;
1.59 daniel 4634: }
1.42 daniel 4635: SKIP_BLANKS;
1.152 daniel 4636: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4637: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4638: (NXT(4) == 'A')) {
4639: SKIP(5);
1.59 daniel 4640: if (!IS_BLANK(CUR)) {
4641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4642: ctxt->sax->error(ctxt->userData,
1.59 daniel 4643: "Space required after 'NDATA'\n");
1.123 daniel 4644: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4645: ctxt->wellFormed = 0;
1.180 daniel 4646: ctxt->disableSAX = 1;
1.59 daniel 4647: }
1.42 daniel 4648: SKIP_BLANKS;
1.24 daniel 4649: ndata = xmlParseName(ctxt);
1.171 daniel 4650: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4651: (ctxt->sax->unparsedEntityDecl != NULL))
4652: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4653: literal, URI, ndata);
4654: } else {
1.171 daniel 4655: if ((ctxt->sax != NULL) &&
4656: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4657: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4658: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4659: literal, URI, NULL);
1.24 daniel 4660: }
4661: }
4662: }
1.42 daniel 4663: SKIP_BLANKS;
1.152 daniel 4664: if (RAW != '>') {
1.55 daniel 4665: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4666: ctxt->sax->error(ctxt->userData,
1.31 daniel 4667: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4668: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4669: ctxt->wellFormed = 0;
1.180 daniel 4670: ctxt->disableSAX = 1;
1.24 daniel 4671: } else
1.40 daniel 4672: NEXT;
1.78 daniel 4673: if (orig != NULL) {
4674: /*
1.98 daniel 4675: * Ugly mechanism to save the raw entity value.
1.78 daniel 4676: */
4677: xmlEntityPtr cur = NULL;
4678:
1.98 daniel 4679: if (isParameter) {
4680: if ((ctxt->sax != NULL) &&
4681: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4682: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4683: } else {
4684: if ((ctxt->sax != NULL) &&
4685: (ctxt->sax->getEntity != NULL))
1.120 daniel 4686: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4687: }
4688: if (cur != NULL) {
4689: if (cur->orig != NULL)
1.119 daniel 4690: xmlFree(orig);
1.98 daniel 4691: else
4692: cur->orig = orig;
4693: } else
1.119 daniel 4694: xmlFree(orig);
1.78 daniel 4695: }
1.119 daniel 4696: if (name != NULL) xmlFree(name);
4697: if (value != NULL) xmlFree(value);
4698: if (URI != NULL) xmlFree(URI);
4699: if (literal != NULL) xmlFree(literal);
4700: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4701: }
4702: }
4703:
1.50 daniel 4704: /**
1.59 daniel 4705: * xmlParseDefaultDecl:
4706: * @ctxt: an XML parser context
4707: * @value: Receive a possible fixed default value for the attribute
4708: *
4709: * Parse an attribute default declaration
4710: *
4711: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4712: *
1.99 daniel 4713: * [ VC: Required Attribute ]
1.117 daniel 4714: * if the default declaration is the keyword #REQUIRED, then the
4715: * attribute must be specified for all elements of the type in the
4716: * attribute-list declaration.
1.99 daniel 4717: *
4718: * [ VC: Attribute Default Legal ]
1.102 daniel 4719: * The declared default value must meet the lexical constraints of
4720: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4721: *
4722: * [ VC: Fixed Attribute Default ]
1.117 daniel 4723: * if an attribute has a default value declared with the #FIXED
4724: * keyword, instances of that attribute must match the default value.
1.99 daniel 4725: *
4726: * [ WFC: No < in Attribute Values ]
4727: * handled in xmlParseAttValue()
4728: *
1.59 daniel 4729: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4730: * or XML_ATTRIBUTE_FIXED.
4731: */
4732:
4733: int
1.123 daniel 4734: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4735: int val;
1.123 daniel 4736: xmlChar *ret;
1.59 daniel 4737:
4738: *value = NULL;
1.152 daniel 4739: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4740: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4741: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4742: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4743: (NXT(8) == 'D')) {
4744: SKIP(9);
4745: return(XML_ATTRIBUTE_REQUIRED);
4746: }
1.152 daniel 4747: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4748: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4749: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4750: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4751: SKIP(8);
4752: return(XML_ATTRIBUTE_IMPLIED);
4753: }
4754: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4755: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4756: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4757: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4758: SKIP(6);
4759: val = XML_ATTRIBUTE_FIXED;
4760: if (!IS_BLANK(CUR)) {
4761: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4762: ctxt->sax->error(ctxt->userData,
4763: "Space required after '#FIXED'\n");
1.123 daniel 4764: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4765: ctxt->wellFormed = 0;
1.180 daniel 4766: ctxt->disableSAX = 1;
1.59 daniel 4767: }
4768: SKIP_BLANKS;
4769: }
4770: ret = xmlParseAttValue(ctxt);
1.96 daniel 4771: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4772: if (ret == NULL) {
4773: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4774: ctxt->sax->error(ctxt->userData,
1.59 daniel 4775: "Attribute default value declaration error\n");
4776: ctxt->wellFormed = 0;
1.180 daniel 4777: ctxt->disableSAX = 1;
1.59 daniel 4778: } else
4779: *value = ret;
4780: return(val);
4781: }
4782:
4783: /**
1.66 daniel 4784: * xmlParseNotationType:
4785: * @ctxt: an XML parser context
4786: *
4787: * parse an Notation attribute type.
4788: *
1.99 daniel 4789: * Note: the leading 'NOTATION' S part has already being parsed...
4790: *
1.66 daniel 4791: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4792: *
1.99 daniel 4793: * [ VC: Notation Attributes ]
1.117 daniel 4794: * Values of this type must match one of the notation names included
1.99 daniel 4795: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4796: *
4797: * Returns: the notation attribute tree built while parsing
4798: */
4799:
4800: xmlEnumerationPtr
4801: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4802: xmlChar *name;
1.66 daniel 4803: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4804:
1.152 daniel 4805: if (RAW != '(') {
1.66 daniel 4806: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4807: ctxt->sax->error(ctxt->userData,
4808: "'(' required to start 'NOTATION'\n");
1.123 daniel 4809: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4810: ctxt->wellFormed = 0;
1.180 daniel 4811: ctxt->disableSAX = 1;
1.66 daniel 4812: return(NULL);
4813: }
1.91 daniel 4814: SHRINK;
1.66 daniel 4815: do {
4816: NEXT;
4817: SKIP_BLANKS;
4818: name = xmlParseName(ctxt);
4819: if (name == NULL) {
4820: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4821: ctxt->sax->error(ctxt->userData,
1.66 daniel 4822: "Name expected in NOTATION declaration\n");
1.123 daniel 4823: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4824: ctxt->wellFormed = 0;
1.180 daniel 4825: ctxt->disableSAX = 1;
1.66 daniel 4826: return(ret);
4827: }
4828: cur = xmlCreateEnumeration(name);
1.119 daniel 4829: xmlFree(name);
1.66 daniel 4830: if (cur == NULL) return(ret);
4831: if (last == NULL) ret = last = cur;
4832: else {
4833: last->next = cur;
4834: last = cur;
4835: }
4836: SKIP_BLANKS;
1.152 daniel 4837: } while (RAW == '|');
4838: if (RAW != ')') {
1.66 daniel 4839: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4840: ctxt->sax->error(ctxt->userData,
1.66 daniel 4841: "')' required to finish NOTATION declaration\n");
1.123 daniel 4842: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 4843: ctxt->wellFormed = 0;
1.180 daniel 4844: ctxt->disableSAX = 1;
1.170 daniel 4845: if ((last != NULL) && (last != ret))
4846: xmlFreeEnumeration(last);
1.66 daniel 4847: return(ret);
4848: }
4849: NEXT;
4850: return(ret);
4851: }
4852:
4853: /**
4854: * xmlParseEnumerationType:
4855: * @ctxt: an XML parser context
4856: *
4857: * parse an Enumeration attribute type.
4858: *
4859: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4860: *
1.99 daniel 4861: * [ VC: Enumeration ]
1.117 daniel 4862: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 4863: * the declaration
4864: *
1.66 daniel 4865: * Returns: the enumeration attribute tree built while parsing
4866: */
4867:
4868: xmlEnumerationPtr
4869: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4870: xmlChar *name;
1.66 daniel 4871: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4872:
1.152 daniel 4873: if (RAW != '(') {
1.66 daniel 4874: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4875: ctxt->sax->error(ctxt->userData,
1.66 daniel 4876: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 4877: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 4878: ctxt->wellFormed = 0;
1.180 daniel 4879: ctxt->disableSAX = 1;
1.66 daniel 4880: return(NULL);
4881: }
1.91 daniel 4882: SHRINK;
1.66 daniel 4883: do {
4884: NEXT;
4885: SKIP_BLANKS;
4886: name = xmlParseNmtoken(ctxt);
4887: if (name == NULL) {
4888: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4889: ctxt->sax->error(ctxt->userData,
1.66 daniel 4890: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 4891: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 4892: ctxt->wellFormed = 0;
1.180 daniel 4893: ctxt->disableSAX = 1;
1.66 daniel 4894: return(ret);
4895: }
4896: cur = xmlCreateEnumeration(name);
1.119 daniel 4897: xmlFree(name);
1.66 daniel 4898: if (cur == NULL) return(ret);
4899: if (last == NULL) ret = last = cur;
4900: else {
4901: last->next = cur;
4902: last = cur;
4903: }
4904: SKIP_BLANKS;
1.152 daniel 4905: } while (RAW == '|');
4906: if (RAW != ')') {
1.66 daniel 4907: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4908: ctxt->sax->error(ctxt->userData,
1.66 daniel 4909: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 4910: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 4911: ctxt->wellFormed = 0;
1.180 daniel 4912: ctxt->disableSAX = 1;
1.66 daniel 4913: return(ret);
4914: }
4915: NEXT;
4916: return(ret);
4917: }
4918:
4919: /**
1.50 daniel 4920: * xmlParseEnumeratedType:
4921: * @ctxt: an XML parser context
1.66 daniel 4922: * @tree: the enumeration tree built while parsing
1.50 daniel 4923: *
1.66 daniel 4924: * parse an Enumerated attribute type.
1.22 daniel 4925: *
4926: * [57] EnumeratedType ::= NotationType | Enumeration
4927: *
4928: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4929: *
1.50 daniel 4930: *
1.66 daniel 4931: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 4932: */
4933:
1.66 daniel 4934: int
4935: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 4936: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 4937: (NXT(2) == 'T') && (NXT(3) == 'A') &&
4938: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4939: (NXT(6) == 'O') && (NXT(7) == 'N')) {
4940: SKIP(8);
4941: if (!IS_BLANK(CUR)) {
4942: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4943: ctxt->sax->error(ctxt->userData,
4944: "Space required after 'NOTATION'\n");
1.123 daniel 4945: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 4946: ctxt->wellFormed = 0;
1.180 daniel 4947: ctxt->disableSAX = 1;
1.66 daniel 4948: return(0);
4949: }
4950: SKIP_BLANKS;
4951: *tree = xmlParseNotationType(ctxt);
4952: if (*tree == NULL) return(0);
4953: return(XML_ATTRIBUTE_NOTATION);
4954: }
4955: *tree = xmlParseEnumerationType(ctxt);
4956: if (*tree == NULL) return(0);
4957: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 4958: }
4959:
1.50 daniel 4960: /**
4961: * xmlParseAttributeType:
4962: * @ctxt: an XML parser context
1.66 daniel 4963: * @tree: the enumeration tree built while parsing
1.50 daniel 4964: *
1.59 daniel 4965: * parse the Attribute list def for an element
1.22 daniel 4966: *
4967: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4968: *
4969: * [55] StringType ::= 'CDATA'
4970: *
4971: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4972: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 4973: *
1.102 daniel 4974: * Validity constraints for attribute values syntax are checked in
4975: * xmlValidateAttributeValue()
4976: *
1.99 daniel 4977: * [ VC: ID ]
1.117 daniel 4978: * Values of type ID must match the Name production. A name must not
1.99 daniel 4979: * appear more than once in an XML document as a value of this type;
4980: * i.e., ID values must uniquely identify the elements which bear them.
4981: *
4982: * [ VC: One ID per Element Type ]
1.117 daniel 4983: * No element type may have more than one ID attribute specified.
1.99 daniel 4984: *
4985: * [ VC: ID Attribute Default ]
1.117 daniel 4986: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 4987: *
4988: * [ VC: IDREF ]
1.102 daniel 4989: * Values of type IDREF must match the Name production, and values
1.140 daniel 4990: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 4991: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 4992: * values must match the value of some ID attribute.
4993: *
4994: * [ VC: Entity Name ]
1.102 daniel 4995: * Values of type ENTITY must match the Name production, values
1.140 daniel 4996: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 4997: * name of an unparsed entity declared in the DTD.
1.99 daniel 4998: *
4999: * [ VC: Name Token ]
1.102 daniel 5000: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5001: * of type NMTOKENS must match Nmtokens.
5002: *
1.69 daniel 5003: * Returns the attribute type
1.22 daniel 5004: */
1.59 daniel 5005: int
1.66 daniel 5006: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5007: SHRINK;
1.152 daniel 5008: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5009: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5010: (NXT(4) == 'A')) {
5011: SKIP(5);
1.66 daniel 5012: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5013: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5014: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5015: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5016: SKIP(6);
5017: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5018: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5019: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5020: (NXT(4) == 'F')) {
5021: SKIP(5);
1.59 daniel 5022: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5023: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5024: SKIP(2);
5025: return(XML_ATTRIBUTE_ID);
1.152 daniel 5026: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5027: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5028: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5029: SKIP(6);
1.59 daniel 5030: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5031: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5032: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5033: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5034: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5035: SKIP(8);
1.59 daniel 5036: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5037: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5038: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5039: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5040: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5041: SKIP(8);
5042: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5043: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5044: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5045: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5046: (NXT(6) == 'N')) {
5047: SKIP(7);
1.59 daniel 5048: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5049: }
1.66 daniel 5050: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5051: }
5052:
1.50 daniel 5053: /**
5054: * xmlParseAttributeListDecl:
5055: * @ctxt: an XML parser context
5056: *
5057: * : parse the Attribute list def for an element
1.22 daniel 5058: *
5059: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5060: *
5061: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5062: *
1.22 daniel 5063: */
1.55 daniel 5064: void
5065: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5066: xmlChar *elemName;
5067: xmlChar *attrName;
1.103 daniel 5068: xmlEnumerationPtr tree;
1.22 daniel 5069:
1.152 daniel 5070: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5071: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5072: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5073: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5074: (NXT(8) == 'T')) {
1.40 daniel 5075: SKIP(9);
1.59 daniel 5076: if (!IS_BLANK(CUR)) {
5077: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5078: ctxt->sax->error(ctxt->userData,
5079: "Space required after '<!ATTLIST'\n");
1.123 daniel 5080: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5081: ctxt->wellFormed = 0;
1.180 daniel 5082: ctxt->disableSAX = 1;
1.59 daniel 5083: }
1.42 daniel 5084: SKIP_BLANKS;
1.59 daniel 5085: elemName = xmlParseName(ctxt);
5086: if (elemName == NULL) {
1.55 daniel 5087: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5088: ctxt->sax->error(ctxt->userData,
5089: "ATTLIST: no name for Element\n");
1.123 daniel 5090: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5091: ctxt->wellFormed = 0;
1.180 daniel 5092: ctxt->disableSAX = 1;
1.22 daniel 5093: return;
5094: }
1.42 daniel 5095: SKIP_BLANKS;
1.152 daniel 5096: while (RAW != '>') {
1.123 daniel 5097: const xmlChar *check = CUR_PTR;
1.59 daniel 5098: int type;
5099: int def;
1.123 daniel 5100: xmlChar *defaultValue = NULL;
1.59 daniel 5101:
1.103 daniel 5102: tree = NULL;
1.59 daniel 5103: attrName = xmlParseName(ctxt);
5104: if (attrName == NULL) {
5105: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5106: ctxt->sax->error(ctxt->userData,
5107: "ATTLIST: no name for Attribute\n");
1.123 daniel 5108: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5109: ctxt->wellFormed = 0;
1.180 daniel 5110: ctxt->disableSAX = 1;
1.59 daniel 5111: break;
5112: }
1.97 daniel 5113: GROW;
1.59 daniel 5114: if (!IS_BLANK(CUR)) {
5115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5116: ctxt->sax->error(ctxt->userData,
1.59 daniel 5117: "Space required after the attribute name\n");
1.123 daniel 5118: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5119: ctxt->wellFormed = 0;
1.180 daniel 5120: ctxt->disableSAX = 1;
1.170 daniel 5121: if (attrName != NULL)
5122: xmlFree(attrName);
5123: if (defaultValue != NULL)
5124: xmlFree(defaultValue);
1.59 daniel 5125: break;
5126: }
5127: SKIP_BLANKS;
5128:
1.66 daniel 5129: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5130: if (type <= 0) {
5131: if (attrName != NULL)
5132: xmlFree(attrName);
5133: if (defaultValue != NULL)
5134: xmlFree(defaultValue);
5135: break;
5136: }
1.22 daniel 5137:
1.97 daniel 5138: GROW;
1.59 daniel 5139: if (!IS_BLANK(CUR)) {
5140: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5141: ctxt->sax->error(ctxt->userData,
1.59 daniel 5142: "Space required after the attribute type\n");
1.123 daniel 5143: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5144: ctxt->wellFormed = 0;
1.180 daniel 5145: ctxt->disableSAX = 1;
1.170 daniel 5146: if (attrName != NULL)
5147: xmlFree(attrName);
5148: if (defaultValue != NULL)
5149: xmlFree(defaultValue);
5150: if (tree != NULL)
5151: xmlFreeEnumeration(tree);
1.59 daniel 5152: break;
5153: }
1.42 daniel 5154: SKIP_BLANKS;
1.59 daniel 5155:
5156: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5157: if (def <= 0) {
5158: if (attrName != NULL)
5159: xmlFree(attrName);
5160: if (defaultValue != NULL)
5161: xmlFree(defaultValue);
5162: if (tree != NULL)
5163: xmlFreeEnumeration(tree);
5164: break;
5165: }
1.59 daniel 5166:
1.97 daniel 5167: GROW;
1.152 daniel 5168: if (RAW != '>') {
1.59 daniel 5169: if (!IS_BLANK(CUR)) {
5170: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5171: ctxt->sax->error(ctxt->userData,
1.59 daniel 5172: "Space required after the attribute default value\n");
1.123 daniel 5173: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5174: ctxt->wellFormed = 0;
1.180 daniel 5175: ctxt->disableSAX = 1;
1.170 daniel 5176: if (attrName != NULL)
5177: xmlFree(attrName);
5178: if (defaultValue != NULL)
5179: xmlFree(defaultValue);
5180: if (tree != NULL)
5181: xmlFreeEnumeration(tree);
1.59 daniel 5182: break;
5183: }
5184: SKIP_BLANKS;
5185: }
1.40 daniel 5186: if (check == CUR_PTR) {
1.55 daniel 5187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5188: ctxt->sax->error(ctxt->userData,
1.59 daniel 5189: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5190: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5191: if (attrName != NULL)
5192: xmlFree(attrName);
5193: if (defaultValue != NULL)
5194: xmlFree(defaultValue);
5195: if (tree != NULL)
5196: xmlFreeEnumeration(tree);
1.22 daniel 5197: break;
5198: }
1.171 daniel 5199: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5200: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5201: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5202: type, def, defaultValue, tree);
1.59 daniel 5203: if (attrName != NULL)
1.119 daniel 5204: xmlFree(attrName);
1.59 daniel 5205: if (defaultValue != NULL)
1.119 daniel 5206: xmlFree(defaultValue);
1.97 daniel 5207: GROW;
1.22 daniel 5208: }
1.152 daniel 5209: if (RAW == '>')
1.40 daniel 5210: NEXT;
1.22 daniel 5211:
1.119 daniel 5212: xmlFree(elemName);
1.22 daniel 5213: }
5214: }
5215:
1.50 daniel 5216: /**
1.61 daniel 5217: * xmlParseElementMixedContentDecl:
5218: * @ctxt: an XML parser context
5219: *
5220: * parse the declaration for a Mixed Element content
5221: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5222: *
5223: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5224: * '(' S? '#PCDATA' S? ')'
5225: *
1.99 daniel 5226: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5227: *
5228: * [ VC: No Duplicate Types ]
1.117 daniel 5229: * The same name must not appear more than once in a single
5230: * mixed-content declaration.
1.99 daniel 5231: *
1.61 daniel 5232: * returns: the list of the xmlElementContentPtr describing the element choices
5233: */
5234: xmlElementContentPtr
1.62 daniel 5235: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5236: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5237: xmlChar *elem = NULL;
1.61 daniel 5238:
1.97 daniel 5239: GROW;
1.152 daniel 5240: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5241: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5242: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5243: (NXT(6) == 'A')) {
5244: SKIP(7);
5245: SKIP_BLANKS;
1.91 daniel 5246: SHRINK;
1.152 daniel 5247: if (RAW == ')') {
1.63 daniel 5248: NEXT;
5249: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5250: if (RAW == '*') {
1.136 daniel 5251: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5252: NEXT;
5253: }
1.63 daniel 5254: return(ret);
5255: }
1.152 daniel 5256: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5257: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5258: if (ret == NULL) return(NULL);
1.99 daniel 5259: }
1.152 daniel 5260: while (RAW == '|') {
1.64 daniel 5261: NEXT;
1.61 daniel 5262: if (elem == NULL) {
5263: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5264: if (ret == NULL) return(NULL);
5265: ret->c1 = cur;
1.64 daniel 5266: cur = ret;
1.61 daniel 5267: } else {
1.64 daniel 5268: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5269: if (n == NULL) return(NULL);
5270: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5271: cur->c2 = n;
5272: cur = n;
1.119 daniel 5273: xmlFree(elem);
1.61 daniel 5274: }
5275: SKIP_BLANKS;
5276: elem = xmlParseName(ctxt);
5277: if (elem == NULL) {
5278: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5279: ctxt->sax->error(ctxt->userData,
1.61 daniel 5280: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5281: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5282: ctxt->wellFormed = 0;
1.180 daniel 5283: ctxt->disableSAX = 1;
1.61 daniel 5284: xmlFreeElementContent(cur);
5285: return(NULL);
5286: }
5287: SKIP_BLANKS;
1.97 daniel 5288: GROW;
1.61 daniel 5289: }
1.152 daniel 5290: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5291: if (elem != NULL) {
1.61 daniel 5292: cur->c2 = xmlNewElementContent(elem,
5293: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5294: xmlFree(elem);
1.66 daniel 5295: }
1.65 daniel 5296: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 5297: SKIP(2);
1.61 daniel 5298: } else {
1.119 daniel 5299: if (elem != NULL) xmlFree(elem);
1.61 daniel 5300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5301: ctxt->sax->error(ctxt->userData,
1.63 daniel 5302: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5303: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5304: ctxt->wellFormed = 0;
1.180 daniel 5305: ctxt->disableSAX = 1;
1.61 daniel 5306: xmlFreeElementContent(ret);
5307: return(NULL);
5308: }
5309:
5310: } else {
5311: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5312: ctxt->sax->error(ctxt->userData,
1.61 daniel 5313: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5314: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5315: ctxt->wellFormed = 0;
1.180 daniel 5316: ctxt->disableSAX = 1;
1.61 daniel 5317: }
5318: return(ret);
5319: }
5320:
5321: /**
5322: * xmlParseElementChildrenContentDecl:
1.50 daniel 5323: * @ctxt: an XML parser context
5324: *
1.61 daniel 5325: * parse the declaration for a Mixed Element content
5326: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5327: *
1.61 daniel 5328: *
1.22 daniel 5329: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5330: *
5331: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5332: *
5333: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5334: *
5335: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5336: *
1.99 daniel 5337: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5338: * TODO Parameter-entity replacement text must be properly nested
5339: * with parenthetized groups. That is to say, if either of the
5340: * opening or closing parentheses in a choice, seq, or Mixed
5341: * construct is contained in the replacement text for a parameter
5342: * entity, both must be contained in the same replacement text. For
5343: * interoperability, if a parameter-entity reference appears in a
5344: * choice, seq, or Mixed construct, its replacement text should not
5345: * be empty, and neither the first nor last non-blank character of
5346: * the replacement text should be a connector (| or ,).
5347: *
1.62 daniel 5348: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5349: * hierarchy.
5350: */
5351: xmlElementContentPtr
1.62 daniel 5352: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5353: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5354: xmlChar *elem;
5355: xmlChar type = 0;
1.62 daniel 5356:
5357: SKIP_BLANKS;
1.94 daniel 5358: GROW;
1.152 daniel 5359: if (RAW == '(') {
1.63 daniel 5360: /* Recurse on first child */
1.62 daniel 5361: NEXT;
5362: SKIP_BLANKS;
5363: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5364: SKIP_BLANKS;
1.101 daniel 5365: GROW;
1.62 daniel 5366: } else {
5367: elem = xmlParseName(ctxt);
5368: if (elem == NULL) {
5369: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5370: ctxt->sax->error(ctxt->userData,
1.62 daniel 5371: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5372: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5373: ctxt->wellFormed = 0;
1.180 daniel 5374: ctxt->disableSAX = 1;
1.62 daniel 5375: return(NULL);
5376: }
5377: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5378: GROW;
1.152 daniel 5379: if (RAW == '?') {
1.104 daniel 5380: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5381: NEXT;
1.152 daniel 5382: } else if (RAW == '*') {
1.104 daniel 5383: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5384: NEXT;
1.152 daniel 5385: } else if (RAW == '+') {
1.104 daniel 5386: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5387: NEXT;
5388: } else {
1.104 daniel 5389: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5390: }
1.119 daniel 5391: xmlFree(elem);
1.101 daniel 5392: GROW;
1.62 daniel 5393: }
5394: SKIP_BLANKS;
1.91 daniel 5395: SHRINK;
1.152 daniel 5396: while (RAW != ')') {
1.63 daniel 5397: /*
5398: * Each loop we parse one separator and one element.
5399: */
1.152 daniel 5400: if (RAW == ',') {
1.62 daniel 5401: if (type == 0) type = CUR;
5402:
5403: /*
5404: * Detect "Name | Name , Name" error
5405: */
5406: else if (type != CUR) {
5407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5408: ctxt->sax->error(ctxt->userData,
1.62 daniel 5409: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5410: type);
1.123 daniel 5411: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5412: ctxt->wellFormed = 0;
1.180 daniel 5413: ctxt->disableSAX = 1;
1.170 daniel 5414: if ((op != NULL) && (op != ret))
5415: xmlFreeElementContent(op);
5416: if ((last != NULL) && (last != ret))
5417: xmlFreeElementContent(last);
5418: if (ret != NULL)
5419: xmlFreeElementContent(ret);
1.62 daniel 5420: return(NULL);
5421: }
1.64 daniel 5422: NEXT;
1.62 daniel 5423:
1.63 daniel 5424: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5425: if (op == NULL) {
5426: xmlFreeElementContent(ret);
5427: return(NULL);
5428: }
5429: if (last == NULL) {
5430: op->c1 = ret;
1.65 daniel 5431: ret = cur = op;
1.63 daniel 5432: } else {
5433: cur->c2 = op;
5434: op->c1 = last;
5435: cur =op;
1.65 daniel 5436: last = NULL;
1.63 daniel 5437: }
1.152 daniel 5438: } else if (RAW == '|') {
1.62 daniel 5439: if (type == 0) type = CUR;
5440:
5441: /*
1.63 daniel 5442: * Detect "Name , Name | Name" error
1.62 daniel 5443: */
5444: else if (type != CUR) {
5445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5446: ctxt->sax->error(ctxt->userData,
1.62 daniel 5447: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5448: type);
1.123 daniel 5449: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5450: ctxt->wellFormed = 0;
1.180 daniel 5451: ctxt->disableSAX = 1;
1.170 daniel 5452: if ((op != NULL) && (op != ret))
5453: xmlFreeElementContent(op);
5454: if ((last != NULL) && (last != ret))
5455: xmlFreeElementContent(last);
5456: if (ret != NULL)
5457: xmlFreeElementContent(ret);
1.62 daniel 5458: return(NULL);
5459: }
1.64 daniel 5460: NEXT;
1.62 daniel 5461:
1.63 daniel 5462: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5463: if (op == NULL) {
1.170 daniel 5464: if ((op != NULL) && (op != ret))
5465: xmlFreeElementContent(op);
5466: if ((last != NULL) && (last != ret))
5467: xmlFreeElementContent(last);
5468: if (ret != NULL)
5469: xmlFreeElementContent(ret);
1.63 daniel 5470: return(NULL);
5471: }
5472: if (last == NULL) {
5473: op->c1 = ret;
1.65 daniel 5474: ret = cur = op;
1.63 daniel 5475: } else {
5476: cur->c2 = op;
5477: op->c1 = last;
5478: cur =op;
1.65 daniel 5479: last = NULL;
1.63 daniel 5480: }
1.62 daniel 5481: } else {
5482: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5483: ctxt->sax->error(ctxt->userData,
1.62 daniel 5484: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5485: ctxt->wellFormed = 0;
1.180 daniel 5486: ctxt->disableSAX = 1;
1.123 daniel 5487: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5488: if ((op != NULL) && (op != ret))
5489: xmlFreeElementContent(op);
5490: if ((last != NULL) && (last != ret))
5491: xmlFreeElementContent(last);
5492: if (ret != NULL)
5493: xmlFreeElementContent(ret);
1.62 daniel 5494: return(NULL);
5495: }
1.101 daniel 5496: GROW;
1.62 daniel 5497: SKIP_BLANKS;
1.101 daniel 5498: GROW;
1.152 daniel 5499: if (RAW == '(') {
1.63 daniel 5500: /* Recurse on second child */
1.62 daniel 5501: NEXT;
5502: SKIP_BLANKS;
1.65 daniel 5503: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5504: SKIP_BLANKS;
5505: } else {
5506: elem = xmlParseName(ctxt);
5507: if (elem == NULL) {
5508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5509: ctxt->sax->error(ctxt->userData,
1.122 daniel 5510: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5511: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5512: ctxt->wellFormed = 0;
1.180 daniel 5513: ctxt->disableSAX = 1;
1.170 daniel 5514: if ((op != NULL) && (op != ret))
5515: xmlFreeElementContent(op);
5516: if ((last != NULL) && (last != ret))
5517: xmlFreeElementContent(last);
5518: if (ret != NULL)
5519: xmlFreeElementContent(ret);
1.62 daniel 5520: return(NULL);
5521: }
1.65 daniel 5522: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5523: xmlFree(elem);
1.152 daniel 5524: if (RAW == '?') {
1.105 daniel 5525: last->ocur = XML_ELEMENT_CONTENT_OPT;
5526: NEXT;
1.152 daniel 5527: } else if (RAW == '*') {
1.105 daniel 5528: last->ocur = XML_ELEMENT_CONTENT_MULT;
5529: NEXT;
1.152 daniel 5530: } else if (RAW == '+') {
1.105 daniel 5531: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5532: NEXT;
5533: } else {
5534: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5535: }
1.63 daniel 5536: }
5537: SKIP_BLANKS;
1.97 daniel 5538: GROW;
1.64 daniel 5539: }
1.65 daniel 5540: if ((cur != NULL) && (last != NULL)) {
5541: cur->c2 = last;
1.62 daniel 5542: }
5543: NEXT;
1.152 daniel 5544: if (RAW == '?') {
1.62 daniel 5545: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5546: NEXT;
1.152 daniel 5547: } else if (RAW == '*') {
1.62 daniel 5548: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5549: NEXT;
1.152 daniel 5550: } else if (RAW == '+') {
1.62 daniel 5551: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5552: NEXT;
5553: }
5554: return(ret);
1.61 daniel 5555: }
5556:
5557: /**
5558: * xmlParseElementContentDecl:
5559: * @ctxt: an XML parser context
5560: * @name: the name of the element being defined.
5561: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5562: *
1.61 daniel 5563: * parse the declaration for an Element content either Mixed or Children,
5564: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5565: *
5566: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5567: *
1.61 daniel 5568: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5569: */
5570:
1.61 daniel 5571: int
1.123 daniel 5572: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5573: xmlElementContentPtr *result) {
5574:
5575: xmlElementContentPtr tree = NULL;
5576: int res;
5577:
5578: *result = NULL;
5579:
1.152 daniel 5580: if (RAW != '(') {
1.61 daniel 5581: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5582: ctxt->sax->error(ctxt->userData,
1.61 daniel 5583: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5584: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5585: ctxt->wellFormed = 0;
1.180 daniel 5586: ctxt->disableSAX = 1;
1.61 daniel 5587: return(-1);
5588: }
5589: NEXT;
1.97 daniel 5590: GROW;
1.61 daniel 5591: SKIP_BLANKS;
1.152 daniel 5592: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5593: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5594: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5595: (NXT(6) == 'A')) {
1.62 daniel 5596: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5597: res = XML_ELEMENT_TYPE_MIXED;
5598: } else {
1.62 daniel 5599: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5600: res = XML_ELEMENT_TYPE_ELEMENT;
5601: }
5602: SKIP_BLANKS;
1.63 daniel 5603: /****************************
1.152 daniel 5604: if (RAW != ')') {
1.61 daniel 5605: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5606: ctxt->sax->error(ctxt->userData,
1.61 daniel 5607: "xmlParseElementContentDecl : ')' expected\n");
5608: ctxt->wellFormed = 0;
1.180 daniel 5609: ctxt->disableSAX = 1;
1.61 daniel 5610: return(-1);
5611: }
1.63 daniel 5612: ****************************/
5613: *result = tree;
1.61 daniel 5614: return(res);
1.22 daniel 5615: }
5616:
1.50 daniel 5617: /**
5618: * xmlParseElementDecl:
5619: * @ctxt: an XML parser context
5620: *
5621: * parse an Element declaration.
1.22 daniel 5622: *
5623: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5624: *
1.99 daniel 5625: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5626: * No element type may be declared more than once
1.69 daniel 5627: *
5628: * Returns the type of the element, or -1 in case of error
1.22 daniel 5629: */
1.59 daniel 5630: int
1.55 daniel 5631: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5632: xmlChar *name;
1.59 daniel 5633: int ret = -1;
1.61 daniel 5634: xmlElementContentPtr content = NULL;
1.22 daniel 5635:
1.97 daniel 5636: GROW;
1.152 daniel 5637: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5638: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5639: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5640: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5641: (NXT(8) == 'T')) {
1.40 daniel 5642: SKIP(9);
1.59 daniel 5643: if (!IS_BLANK(CUR)) {
5644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5645: ctxt->sax->error(ctxt->userData,
1.59 daniel 5646: "Space required after 'ELEMENT'\n");
1.123 daniel 5647: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5648: ctxt->wellFormed = 0;
1.180 daniel 5649: ctxt->disableSAX = 1;
1.59 daniel 5650: }
1.42 daniel 5651: SKIP_BLANKS;
1.22 daniel 5652: name = xmlParseName(ctxt);
5653: if (name == NULL) {
1.55 daniel 5654: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5655: ctxt->sax->error(ctxt->userData,
1.59 daniel 5656: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5657: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5658: ctxt->wellFormed = 0;
1.180 daniel 5659: ctxt->disableSAX = 1;
1.59 daniel 5660: return(-1);
5661: }
5662: if (!IS_BLANK(CUR)) {
5663: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5664: ctxt->sax->error(ctxt->userData,
1.59 daniel 5665: "Space required after the element name\n");
1.123 daniel 5666: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5667: ctxt->wellFormed = 0;
1.180 daniel 5668: ctxt->disableSAX = 1;
1.22 daniel 5669: }
1.42 daniel 5670: SKIP_BLANKS;
1.152 daniel 5671: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5672: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5673: (NXT(4) == 'Y')) {
5674: SKIP(5);
1.22 daniel 5675: /*
5676: * Element must always be empty.
5677: */
1.59 daniel 5678: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5679: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5680: (NXT(2) == 'Y')) {
5681: SKIP(3);
1.22 daniel 5682: /*
5683: * Element is a generic container.
5684: */
1.59 daniel 5685: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5686: } else if (RAW == '(') {
1.61 daniel 5687: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5688: } else {
1.98 daniel 5689: /*
5690: * [ WFC: PEs in Internal Subset ] error handling.
5691: */
1.152 daniel 5692: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5693: (ctxt->inputNr == 1)) {
5694: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5695: ctxt->sax->error(ctxt->userData,
5696: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5697: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5698: } else {
5699: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5700: ctxt->sax->error(ctxt->userData,
5701: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5702: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5703: }
1.61 daniel 5704: ctxt->wellFormed = 0;
1.180 daniel 5705: ctxt->disableSAX = 1;
1.119 daniel 5706: if (name != NULL) xmlFree(name);
1.61 daniel 5707: return(-1);
1.22 daniel 5708: }
1.142 daniel 5709:
5710: SKIP_BLANKS;
5711: /*
5712: * Pop-up of finished entities.
5713: */
1.152 daniel 5714: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5715: xmlPopInput(ctxt);
1.42 daniel 5716: SKIP_BLANKS;
1.142 daniel 5717:
1.152 daniel 5718: if (RAW != '>') {
1.55 daniel 5719: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5720: ctxt->sax->error(ctxt->userData,
1.31 daniel 5721: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5722: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5723: ctxt->wellFormed = 0;
1.180 daniel 5724: ctxt->disableSAX = 1;
1.61 daniel 5725: } else {
1.40 daniel 5726: NEXT;
1.171 daniel 5727: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5728: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5729: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5730: content);
1.61 daniel 5731: }
1.84 daniel 5732: if (content != NULL) {
5733: xmlFreeElementContent(content);
5734: }
1.61 daniel 5735: if (name != NULL) {
1.119 daniel 5736: xmlFree(name);
1.61 daniel 5737: }
1.22 daniel 5738: }
1.59 daniel 5739: return(ret);
1.22 daniel 5740: }
5741:
1.50 daniel 5742: /**
5743: * xmlParseMarkupDecl:
5744: * @ctxt: an XML parser context
5745: *
5746: * parse Markup declarations
1.22 daniel 5747: *
5748: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5749: * NotationDecl | PI | Comment
5750: *
1.98 daniel 5751: * [ VC: Proper Declaration/PE Nesting ]
5752: * TODO Parameter-entity replacement text must be properly nested with
5753: * markup declarations. That is to say, if either the first character
5754: * or the last character of a markup declaration (markupdecl above) is
5755: * contained in the replacement text for a parameter-entity reference,
5756: * both must be contained in the same replacement text.
5757: *
5758: * [ WFC: PEs in Internal Subset ]
5759: * In the internal DTD subset, parameter-entity references can occur
5760: * only where markup declarations can occur, not within markup declarations.
5761: * (This does not apply to references that occur in external parameter
5762: * entities or to the external subset.)
1.22 daniel 5763: */
1.55 daniel 5764: void
5765: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5766: GROW;
1.22 daniel 5767: xmlParseElementDecl(ctxt);
5768: xmlParseAttributeListDecl(ctxt);
5769: xmlParseEntityDecl(ctxt);
5770: xmlParseNotationDecl(ctxt);
5771: xmlParsePI(ctxt);
1.114 daniel 5772: xmlParseComment(ctxt);
1.98 daniel 5773: /*
5774: * This is only for internal subset. On external entities,
5775: * the replacement is done before parsing stage
5776: */
5777: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5778: xmlParsePEReference(ctxt);
1.97 daniel 5779: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5780: }
5781:
1.50 daniel 5782: /**
1.76 daniel 5783: * xmlParseTextDecl:
5784: * @ctxt: an XML parser context
5785: *
5786: * parse an XML declaration header for external entities
5787: *
5788: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 5789: *
5790: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 5791: */
5792:
1.172 daniel 5793: void
1.76 daniel 5794: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5795: xmlChar *version;
1.76 daniel 5796:
5797: /*
5798: * We know that '<?xml' is here.
5799: */
5800: SKIP(5);
5801:
5802: if (!IS_BLANK(CUR)) {
5803: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5804: ctxt->sax->error(ctxt->userData,
5805: "Space needed after '<?xml'\n");
1.123 daniel 5806: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5807: ctxt->wellFormed = 0;
1.180 daniel 5808: ctxt->disableSAX = 1;
1.76 daniel 5809: }
5810: SKIP_BLANKS;
5811:
5812: /*
5813: * We may have the VersionInfo here.
5814: */
5815: version = xmlParseVersionInfo(ctxt);
5816: if (version == NULL)
5817: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 5818: ctxt->input->version = version;
1.76 daniel 5819:
5820: /*
5821: * We must have the encoding declaration
5822: */
5823: if (!IS_BLANK(CUR)) {
5824: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5825: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 5826: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5827: ctxt->wellFormed = 0;
1.180 daniel 5828: ctxt->disableSAX = 1;
1.76 daniel 5829: }
1.172 daniel 5830: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.76 daniel 5831:
5832: SKIP_BLANKS;
1.152 daniel 5833: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 5834: SKIP(2);
1.152 daniel 5835: } else if (RAW == '>') {
1.76 daniel 5836: /* Deprecated old WD ... */
5837: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5838: ctxt->sax->error(ctxt->userData,
5839: "XML declaration must end-up with '?>'\n");
1.123 daniel 5840: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5841: ctxt->wellFormed = 0;
1.180 daniel 5842: ctxt->disableSAX = 1;
1.76 daniel 5843: NEXT;
5844: } else {
5845: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5846: ctxt->sax->error(ctxt->userData,
5847: "parsing XML declaration: '?>' expected\n");
1.123 daniel 5848: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5849: ctxt->wellFormed = 0;
1.180 daniel 5850: ctxt->disableSAX = 1;
1.76 daniel 5851: MOVETO_ENDTAG(CUR_PTR);
5852: NEXT;
5853: }
5854: }
5855:
5856: /*
5857: * xmlParseConditionalSections
5858: * @ctxt: an XML parser context
5859: *
5860: * TODO : Conditionnal section are not yet supported !
5861: *
5862: * [61] conditionalSect ::= includeSect | ignoreSect
5863: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5864: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5865: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5866: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5867: */
5868:
5869: void
5870: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 5871: SKIP(3);
5872: SKIP_BLANKS;
1.168 daniel 5873: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5874: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5875: (NXT(6) == 'E')) {
1.165 daniel 5876: SKIP(7);
1.168 daniel 5877: SKIP_BLANKS;
5878: if (RAW != '[') {
5879: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5880: ctxt->sax->error(ctxt->userData,
5881: "XML conditional section '[' expected\n");
5882: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5883: ctxt->wellFormed = 0;
1.180 daniel 5884: ctxt->disableSAX = 1;
1.168 daniel 5885: } else {
5886: NEXT;
5887: }
1.165 daniel 5888: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5889: (NXT(2) != '>'))) {
5890: const xmlChar *check = CUR_PTR;
5891: int cons = ctxt->input->consumed;
5892: int tok = ctxt->token;
5893:
5894: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5895: xmlParseConditionalSections(ctxt);
5896: } else if (IS_BLANK(CUR)) {
5897: NEXT;
5898: } else if (RAW == '%') {
5899: xmlParsePEReference(ctxt);
5900: } else
5901: xmlParseMarkupDecl(ctxt);
5902:
5903: /*
5904: * Pop-up of finished entities.
5905: */
5906: while ((RAW == 0) && (ctxt->inputNr > 1))
5907: xmlPopInput(ctxt);
5908:
5909: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5910: (tok == ctxt->token)) {
5911: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5912: ctxt->sax->error(ctxt->userData,
5913: "Content error in the external subset\n");
5914: ctxt->wellFormed = 0;
1.180 daniel 5915: ctxt->disableSAX = 1;
1.165 daniel 5916: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5917: break;
5918: }
5919: }
1.168 daniel 5920: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5921: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 5922: int state;
5923:
1.168 daniel 5924: SKIP(6);
5925: SKIP_BLANKS;
5926: if (RAW != '[') {
5927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5928: ctxt->sax->error(ctxt->userData,
5929: "XML conditional section '[' expected\n");
5930: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5931: ctxt->wellFormed = 0;
1.180 daniel 5932: ctxt->disableSAX = 1;
1.168 daniel 5933: } else {
5934: NEXT;
5935: }
1.171 daniel 5936:
1.143 daniel 5937: /*
1.171 daniel 5938: * Parse up to the end of the conditionnal section
5939: * But disable SAX event generating DTD building in the meantime
1.143 daniel 5940: */
1.171 daniel 5941: state = ctxt->disableSAX;
1.165 daniel 5942: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5943: (NXT(2) != '>'))) {
1.171 daniel 5944: const xmlChar *check = CUR_PTR;
5945: int cons = ctxt->input->consumed;
5946: int tok = ctxt->token;
5947:
5948: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5949: xmlParseConditionalSections(ctxt);
5950: } else if (IS_BLANK(CUR)) {
5951: NEXT;
5952: } else if (RAW == '%') {
5953: xmlParsePEReference(ctxt);
5954: } else
5955: xmlParseMarkupDecl(ctxt);
5956:
1.165 daniel 5957: /*
5958: * Pop-up of finished entities.
5959: */
5960: while ((RAW == 0) && (ctxt->inputNr > 1))
5961: xmlPopInput(ctxt);
1.143 daniel 5962:
1.171 daniel 5963: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5964: (tok == ctxt->token)) {
5965: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5966: ctxt->sax->error(ctxt->userData,
5967: "Content error in the external subset\n");
5968: ctxt->wellFormed = 0;
1.180 daniel 5969: ctxt->disableSAX = 1;
1.171 daniel 5970: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5971: break;
5972: }
1.165 daniel 5973: }
1.171 daniel 5974: ctxt->disableSAX = state;
1.168 daniel 5975: } else {
5976: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5977: ctxt->sax->error(ctxt->userData,
5978: "XML conditional section INCLUDE or IGNORE keyword expected\n");
5979: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5980: ctxt->wellFormed = 0;
1.180 daniel 5981: ctxt->disableSAX = 1;
1.143 daniel 5982: }
5983:
1.152 daniel 5984: if (RAW == 0)
1.143 daniel 5985: SHRINK;
5986:
1.152 daniel 5987: if (RAW == 0) {
1.76 daniel 5988: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5989: ctxt->sax->error(ctxt->userData,
5990: "XML conditional section not closed\n");
1.123 daniel 5991: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 5992: ctxt->wellFormed = 0;
1.180 daniel 5993: ctxt->disableSAX = 1;
1.143 daniel 5994: } else {
5995: SKIP(3);
1.76 daniel 5996: }
5997: }
5998:
5999: /**
1.124 daniel 6000: * xmlParseExternalSubset:
1.76 daniel 6001: * @ctxt: an XML parser context
1.124 daniel 6002: * @ExternalID: the external identifier
6003: * @SystemID: the system identifier (or URL)
1.76 daniel 6004: *
6005: * parse Markup declarations from an external subset
6006: *
6007: * [30] extSubset ::= textDecl? extSubsetDecl
6008: *
6009: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6010: */
6011: void
1.123 daniel 6012: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6013: const xmlChar *SystemID) {
1.132 daniel 6014: GROW;
1.152 daniel 6015: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6016: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6017: (NXT(4) == 'l')) {
1.172 daniel 6018: xmlParseTextDecl(ctxt);
1.76 daniel 6019: }
1.79 daniel 6020: if (ctxt->myDoc == NULL) {
1.116 daniel 6021: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6022: }
6023: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6024: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6025:
1.96 daniel 6026: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6027: ctxt->external = 1;
1.152 daniel 6028: while (((RAW == '<') && (NXT(1) == '?')) ||
6029: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6030: IS_BLANK(CUR)) {
1.123 daniel 6031: const xmlChar *check = CUR_PTR;
1.115 daniel 6032: int cons = ctxt->input->consumed;
1.164 daniel 6033: int tok = ctxt->token;
1.115 daniel 6034:
1.152 daniel 6035: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6036: xmlParseConditionalSections(ctxt);
6037: } else if (IS_BLANK(CUR)) {
6038: NEXT;
1.152 daniel 6039: } else if (RAW == '%') {
1.76 daniel 6040: xmlParsePEReference(ctxt);
6041: } else
6042: xmlParseMarkupDecl(ctxt);
1.77 daniel 6043:
6044: /*
6045: * Pop-up of finished entities.
6046: */
1.166 daniel 6047: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6048: xmlPopInput(ctxt);
6049:
1.164 daniel 6050: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6051: (tok == ctxt->token)) {
1.115 daniel 6052: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6053: ctxt->sax->error(ctxt->userData,
6054: "Content error in the external subset\n");
6055: ctxt->wellFormed = 0;
1.180 daniel 6056: ctxt->disableSAX = 1;
1.123 daniel 6057: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6058: break;
6059: }
1.76 daniel 6060: }
6061:
1.152 daniel 6062: if (RAW != 0) {
1.76 daniel 6063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6064: ctxt->sax->error(ctxt->userData,
6065: "Extra content at the end of the document\n");
1.123 daniel 6066: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6067: ctxt->wellFormed = 0;
1.180 daniel 6068: ctxt->disableSAX = 1;
1.76 daniel 6069: }
6070:
6071: }
6072:
6073: /**
1.77 daniel 6074: * xmlParseReference:
6075: * @ctxt: an XML parser context
6076: *
6077: * parse and handle entity references in content, depending on the SAX
6078: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6079: * CharRef, a predefined entity, if there is no reference() callback.
6080: * or if the parser was asked to switch to that mode.
1.77 daniel 6081: *
6082: * [67] Reference ::= EntityRef | CharRef
6083: */
6084: void
6085: xmlParseReference(xmlParserCtxtPtr ctxt) {
6086: xmlEntityPtr ent;
1.123 daniel 6087: xmlChar *val;
1.152 daniel 6088: if (RAW != '&') return;
1.77 daniel 6089:
1.113 daniel 6090: if (ctxt->inputNr > 1) {
1.123 daniel 6091: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6092:
1.171 daniel 6093: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6094: (!ctxt->disableSAX))
1.113 daniel 6095: ctxt->sax->characters(ctxt->userData, cur, 1);
6096: if (ctxt->token == '&')
6097: ctxt->token = 0;
6098: else {
6099: SKIP(1);
6100: }
6101: return;
6102: }
1.77 daniel 6103: if (NXT(1) == '#') {
1.152 daniel 6104: int i = 0;
1.153 daniel 6105: xmlChar out[10];
6106: int hex = NXT(2);
1.77 daniel 6107: int val = xmlParseCharRef(ctxt);
1.152 daniel 6108:
1.153 daniel 6109: if (ctxt->encoding != NULL) {
6110: /*
6111: * So we are using non-UTF-8 buffers
6112: * Check that the char fit on 8bits, if not
6113: * generate a CharRef.
6114: */
6115: if (val <= 0xFF) {
6116: out[0] = val;
6117: out[1] = 0;
1.171 daniel 6118: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6119: (!ctxt->disableSAX))
1.153 daniel 6120: ctxt->sax->characters(ctxt->userData, out, 1);
6121: } else {
6122: if ((hex == 'x') || (hex == 'X'))
6123: sprintf((char *)out, "#x%X", val);
6124: else
6125: sprintf((char *)out, "#%d", val);
1.171 daniel 6126: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6127: (!ctxt->disableSAX))
1.153 daniel 6128: ctxt->sax->reference(ctxt->userData, out);
6129: }
6130: } else {
6131: /*
6132: * Just encode the value in UTF-8
6133: */
6134: COPY_BUF(0 ,out, i, val);
6135: out[i] = 0;
1.171 daniel 6136: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6137: (!ctxt->disableSAX))
1.153 daniel 6138: ctxt->sax->characters(ctxt->userData, out, i);
6139: }
1.77 daniel 6140: } else {
6141: ent = xmlParseEntityRef(ctxt);
6142: if (ent == NULL) return;
6143: if ((ent->name != NULL) &&
1.159 daniel 6144: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6145: xmlNodePtr list = NULL;
6146: int ret;
6147:
6148:
6149: /*
6150: * The first reference to the entity trigger a parsing phase
6151: * where the ent->children is filled with the result from
6152: * the parsing.
6153: */
6154: if (ent->children == NULL) {
6155: xmlChar *value;
6156: value = ent->content;
6157:
6158: /*
6159: * Check that this entity is well formed
6160: */
6161: if ((value != NULL) &&
6162: (value[1] == 0) && (value[0] == '<') &&
6163: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6164: /*
6165: * TODO: get definite answer on this !!!
6166: * Lots of entity decls are used to declare a single
6167: * char
6168: * <!ENTITY lt "<">
6169: * Which seems to be valid since
6170: * 2.4: The ampersand character (&) and the left angle
6171: * bracket (<) may appear in their literal form only
6172: * when used ... They are also legal within the literal
6173: * entity value of an internal entity declaration;i
6174: * see "4.3.2 Well-Formed Parsed Entities".
6175: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6176: * Looking at the OASIS test suite and James Clark
6177: * tests, this is broken. However the XML REC uses
6178: * it. Is the XML REC not well-formed ????
6179: * This is a hack to avoid this problem
6180: */
6181: list = xmlNewDocText(ctxt->myDoc, value);
6182: if (list != NULL) {
6183: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6184: (ent->children == NULL)) {
6185: ent->children = list;
6186: ent->last = list;
6187: list->parent = (xmlNodePtr) ent;
6188: } else {
6189: xmlFreeNodeList(list);
6190: }
6191: } else if (list != NULL) {
6192: xmlFreeNodeList(list);
6193: }
1.181 ! daniel 6194: } else {
1.180 daniel 6195: /*
6196: * 4.3.2: An internal general parsed entity is well-formed
6197: * if its replacement text matches the production labeled
6198: * content.
6199: */
6200: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY)
6201: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
6202: ctxt->sax, NULL, value, &list);
6203: else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6204: ret = xmlParseExternalEntity(ctxt->myDoc,
1.181 ! daniel 6205: ctxt->sax, NULL, (char *) ent->SystemID,
! 6206: (char *) ent->ExternalID, &list);
1.180 daniel 6207: else {
6208: ret = -1;
6209: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6210: ctxt->sax->error(ctxt->userData,
6211: "Internal: invalid entity type\n");
6212: }
6213: if ((ret == 0) && (list != NULL)) {
6214: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6215: (ent->children == NULL)) {
6216: ent->children = list;
6217: while (list != NULL) {
6218: list->parent = (xmlNodePtr) ent;
6219: if (list->next == NULL)
6220: ent->last = list;
6221: list = list->next;
6222: }
6223: } else {
6224: xmlFreeNodeList(list);
6225: }
6226: } else if (ret > 0) {
6227: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6228: ctxt->sax->error(ctxt->userData,
6229: "Entity value required\n");
6230: ctxt->errNo = ret;
6231: ctxt->wellFormed = 0;
6232: ctxt->disableSAX = 1;
6233: } else if (list != NULL) {
6234: xmlFreeNodeList(list);
6235: }
6236: }
6237: }
1.113 daniel 6238: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6239: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6240: /*
6241: * Create a node.
6242: */
6243: ctxt->sax->reference(ctxt->userData, ent->name);
6244: return;
6245: } else if (ctxt->replaceEntities) {
6246: xmlParserInputPtr input;
1.79 daniel 6247:
1.113 daniel 6248: input = xmlNewEntityInputStream(ctxt, ent);
6249: xmlPushInput(ctxt, input);
1.167 daniel 6250: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6251: (RAW == '<') && (NXT(1) == '?') &&
6252: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6253: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6254: xmlParseTextDecl(ctxt);
1.167 daniel 6255: if (input->standalone) {
6256: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6257: ctxt->sax->error(ctxt->userData,
6258: "external parsed entities cannot be standalone\n");
6259: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6260: ctxt->wellFormed = 0;
1.180 daniel 6261: ctxt->disableSAX = 1;
1.167 daniel 6262: }
6263: }
1.179 daniel 6264: /*
6265: * !!! TODO: build the tree under the entity first
6266: * 1234
6267: */
1.113 daniel 6268: return;
6269: }
1.77 daniel 6270: }
6271: val = ent->content;
6272: if (val == NULL) return;
6273: /*
6274: * inline the entity.
6275: */
1.171 daniel 6276: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6277: (!ctxt->disableSAX))
1.77 daniel 6278: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6279: }
1.24 daniel 6280: }
6281:
1.50 daniel 6282: /**
6283: * xmlParseEntityRef:
6284: * @ctxt: an XML parser context
6285: *
6286: * parse ENTITY references declarations
1.24 daniel 6287: *
6288: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6289: *
1.98 daniel 6290: * [ WFC: Entity Declared ]
6291: * In a document without any DTD, a document with only an internal DTD
6292: * subset which contains no parameter entity references, or a document
6293: * with "standalone='yes'", the Name given in the entity reference
6294: * must match that in an entity declaration, except that well-formed
6295: * documents need not declare any of the following entities: amp, lt,
6296: * gt, apos, quot. The declaration of a parameter entity must precede
6297: * any reference to it. Similarly, the declaration of a general entity
6298: * must precede any reference to it which appears in a default value in an
6299: * attribute-list declaration. Note that if entities are declared in the
6300: * external subset or in external parameter entities, a non-validating
6301: * processor is not obligated to read and process their declarations;
6302: * for such documents, the rule that an entity must be declared is a
6303: * well-formedness constraint only if standalone='yes'.
6304: *
6305: * [ WFC: Parsed Entity ]
6306: * An entity reference must not contain the name of an unparsed entity
6307: *
1.77 daniel 6308: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6309: */
1.77 daniel 6310: xmlEntityPtr
1.55 daniel 6311: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6312: xmlChar *name;
1.72 daniel 6313: xmlEntityPtr ent = NULL;
1.24 daniel 6314:
1.91 daniel 6315: GROW;
1.111 daniel 6316:
1.152 daniel 6317: if (RAW == '&') {
1.40 daniel 6318: NEXT;
1.24 daniel 6319: name = xmlParseName(ctxt);
6320: if (name == NULL) {
1.55 daniel 6321: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6322: ctxt->sax->error(ctxt->userData,
6323: "xmlParseEntityRef: no name\n");
1.123 daniel 6324: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6325: ctxt->wellFormed = 0;
1.180 daniel 6326: ctxt->disableSAX = 1;
1.24 daniel 6327: } else {
1.152 daniel 6328: if (RAW == ';') {
1.40 daniel 6329: NEXT;
1.24 daniel 6330: /*
1.77 daniel 6331: * Ask first SAX for entity resolution, otherwise try the
6332: * predefined set.
6333: */
6334: if (ctxt->sax != NULL) {
6335: if (ctxt->sax->getEntity != NULL)
6336: ent = ctxt->sax->getEntity(ctxt->userData, name);
6337: if (ent == NULL)
6338: ent = xmlGetPredefinedEntity(name);
6339: }
6340: /*
1.98 daniel 6341: * [ WFC: Entity Declared ]
6342: * In a document without any DTD, a document with only an
6343: * internal DTD subset which contains no parameter entity
6344: * references, or a document with "standalone='yes'", the
6345: * Name given in the entity reference must match that in an
6346: * entity declaration, except that well-formed documents
6347: * need not declare any of the following entities: amp, lt,
6348: * gt, apos, quot.
6349: * The declaration of a parameter entity must precede any
6350: * reference to it.
6351: * Similarly, the declaration of a general entity must
6352: * precede any reference to it which appears in a default
6353: * value in an attribute-list declaration. Note that if
6354: * entities are declared in the external subset or in
6355: * external parameter entities, a non-validating processor
6356: * is not obligated to read and process their declarations;
6357: * for such documents, the rule that an entity must be
6358: * declared is a well-formedness constraint only if
6359: * standalone='yes'.
1.59 daniel 6360: */
1.77 daniel 6361: if (ent == NULL) {
1.98 daniel 6362: if ((ctxt->standalone == 1) ||
6363: ((ctxt->hasExternalSubset == 0) &&
6364: (ctxt->hasPErefs == 0))) {
6365: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6366: ctxt->sax->error(ctxt->userData,
6367: "Entity '%s' not defined\n", name);
1.123 daniel 6368: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6369: ctxt->wellFormed = 0;
1.180 daniel 6370: ctxt->disableSAX = 1;
1.77 daniel 6371: } else {
1.98 daniel 6372: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6373: ctxt->sax->warning(ctxt->userData,
6374: "Entity '%s' not defined\n", name);
1.123 daniel 6375: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6376: }
1.77 daniel 6377: }
1.59 daniel 6378:
6379: /*
1.98 daniel 6380: * [ WFC: Parsed Entity ]
6381: * An entity reference must not contain the name of an
6382: * unparsed entity
6383: */
1.159 daniel 6384: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6385: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6386: ctxt->sax->error(ctxt->userData,
6387: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6388: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6389: ctxt->wellFormed = 0;
1.180 daniel 6390: ctxt->disableSAX = 1;
1.98 daniel 6391: }
6392:
6393: /*
6394: * [ WFC: No External Entity References ]
6395: * Attribute values cannot contain direct or indirect
6396: * entity references to external entities.
6397: */
6398: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6399: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6401: ctxt->sax->error(ctxt->userData,
6402: "Attribute references external entity '%s'\n", name);
1.123 daniel 6403: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6404: ctxt->wellFormed = 0;
1.180 daniel 6405: ctxt->disableSAX = 1;
1.98 daniel 6406: }
6407: /*
6408: * [ WFC: No < in Attribute Values ]
6409: * The replacement text of any entity referred to directly or
6410: * indirectly in an attribute value (other than "<") must
6411: * not contain a <.
1.59 daniel 6412: */
1.98 daniel 6413: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6414: (ent != NULL) &&
6415: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6416: (ent->content != NULL) &&
6417: (xmlStrchr(ent->content, '<'))) {
6418: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6419: ctxt->sax->error(ctxt->userData,
6420: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6421: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6422: ctxt->wellFormed = 0;
1.180 daniel 6423: ctxt->disableSAX = 1;
1.98 daniel 6424: }
6425:
6426: /*
6427: * Internal check, no parameter entities here ...
6428: */
6429: else {
1.159 daniel 6430: switch (ent->etype) {
1.59 daniel 6431: case XML_INTERNAL_PARAMETER_ENTITY:
6432: case XML_EXTERNAL_PARAMETER_ENTITY:
6433: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6434: ctxt->sax->error(ctxt->userData,
1.59 daniel 6435: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6436: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6437: ctxt->wellFormed = 0;
1.180 daniel 6438: ctxt->disableSAX = 1;
6439: break;
6440: default:
1.59 daniel 6441: break;
6442: }
6443: }
6444:
6445: /*
1.98 daniel 6446: * [ WFC: No Recursion ]
1.117 daniel 6447: * TODO A parsed entity must not contain a recursive reference
6448: * to itself, either directly or indirectly.
1.59 daniel 6449: */
1.77 daniel 6450:
1.24 daniel 6451: } else {
1.55 daniel 6452: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6453: ctxt->sax->error(ctxt->userData,
1.59 daniel 6454: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6455: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6456: ctxt->wellFormed = 0;
1.180 daniel 6457: ctxt->disableSAX = 1;
1.24 daniel 6458: }
1.119 daniel 6459: xmlFree(name);
1.24 daniel 6460: }
6461: }
1.77 daniel 6462: return(ent);
1.24 daniel 6463: }
1.135 daniel 6464: /**
6465: * xmlParseStringEntityRef:
6466: * @ctxt: an XML parser context
6467: * @str: a pointer to an index in the string
6468: *
6469: * parse ENTITY references declarations, but this version parses it from
6470: * a string value.
6471: *
6472: * [68] EntityRef ::= '&' Name ';'
6473: *
6474: * [ WFC: Entity Declared ]
6475: * In a document without any DTD, a document with only an internal DTD
6476: * subset which contains no parameter entity references, or a document
6477: * with "standalone='yes'", the Name given in the entity reference
6478: * must match that in an entity declaration, except that well-formed
6479: * documents need not declare any of the following entities: amp, lt,
6480: * gt, apos, quot. The declaration of a parameter entity must precede
6481: * any reference to it. Similarly, the declaration of a general entity
6482: * must precede any reference to it which appears in a default value in an
6483: * attribute-list declaration. Note that if entities are declared in the
6484: * external subset or in external parameter entities, a non-validating
6485: * processor is not obligated to read and process their declarations;
6486: * for such documents, the rule that an entity must be declared is a
6487: * well-formedness constraint only if standalone='yes'.
6488: *
6489: * [ WFC: Parsed Entity ]
6490: * An entity reference must not contain the name of an unparsed entity
6491: *
6492: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6493: * is updated to the current location in the string.
6494: */
6495: xmlEntityPtr
6496: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6497: xmlChar *name;
6498: const xmlChar *ptr;
6499: xmlChar cur;
6500: xmlEntityPtr ent = NULL;
6501:
6502: GROW;
6503:
1.156 daniel 6504: if ((str == NULL) || (*str == NULL))
6505: return(NULL);
1.135 daniel 6506: ptr = *str;
6507: cur = *ptr;
6508: if (cur == '&') {
6509: ptr++;
6510: cur = *ptr;
6511: name = xmlParseStringName(ctxt, &ptr);
6512: if (name == NULL) {
6513: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6514: ctxt->sax->error(ctxt->userData,
6515: "xmlParseEntityRef: no name\n");
6516: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6517: ctxt->wellFormed = 0;
1.180 daniel 6518: ctxt->disableSAX = 1;
1.135 daniel 6519: } else {
1.152 daniel 6520: if (RAW == ';') {
1.135 daniel 6521: NEXT;
6522: /*
6523: * Ask first SAX for entity resolution, otherwise try the
6524: * predefined set.
6525: */
6526: if (ctxt->sax != NULL) {
6527: if (ctxt->sax->getEntity != NULL)
6528: ent = ctxt->sax->getEntity(ctxt->userData, name);
6529: if (ent == NULL)
6530: ent = xmlGetPredefinedEntity(name);
6531: }
6532: /*
6533: * [ WFC: Entity Declared ]
6534: * In a document without any DTD, a document with only an
6535: * internal DTD subset which contains no parameter entity
6536: * references, or a document with "standalone='yes'", the
6537: * Name given in the entity reference must match that in an
6538: * entity declaration, except that well-formed documents
6539: * need not declare any of the following entities: amp, lt,
6540: * gt, apos, quot.
6541: * The declaration of a parameter entity must precede any
6542: * reference to it.
6543: * Similarly, the declaration of a general entity must
6544: * precede any reference to it which appears in a default
6545: * value in an attribute-list declaration. Note that if
6546: * entities are declared in the external subset or in
6547: * external parameter entities, a non-validating processor
6548: * is not obligated to read and process their declarations;
6549: * for such documents, the rule that an entity must be
6550: * declared is a well-formedness constraint only if
6551: * standalone='yes'.
6552: */
6553: if (ent == NULL) {
6554: if ((ctxt->standalone == 1) ||
6555: ((ctxt->hasExternalSubset == 0) &&
6556: (ctxt->hasPErefs == 0))) {
6557: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6558: ctxt->sax->error(ctxt->userData,
6559: "Entity '%s' not defined\n", name);
6560: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6561: ctxt->wellFormed = 0;
1.180 daniel 6562: ctxt->disableSAX = 1;
1.135 daniel 6563: } else {
6564: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6565: ctxt->sax->warning(ctxt->userData,
6566: "Entity '%s' not defined\n", name);
6567: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6568: }
6569: }
6570:
6571: /*
6572: * [ WFC: Parsed Entity ]
6573: * An entity reference must not contain the name of an
6574: * unparsed entity
6575: */
1.159 daniel 6576: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6577: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6578: ctxt->sax->error(ctxt->userData,
6579: "Entity reference to unparsed entity %s\n", name);
6580: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6581: ctxt->wellFormed = 0;
1.180 daniel 6582: ctxt->disableSAX = 1;
1.135 daniel 6583: }
6584:
6585: /*
6586: * [ WFC: No External Entity References ]
6587: * Attribute values cannot contain direct or indirect
6588: * entity references to external entities.
6589: */
6590: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6591: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6592: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6593: ctxt->sax->error(ctxt->userData,
6594: "Attribute references external entity '%s'\n", name);
6595: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6596: ctxt->wellFormed = 0;
1.180 daniel 6597: ctxt->disableSAX = 1;
1.135 daniel 6598: }
6599: /*
6600: * [ WFC: No < in Attribute Values ]
6601: * The replacement text of any entity referred to directly or
6602: * indirectly in an attribute value (other than "<") must
6603: * not contain a <.
6604: */
6605: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6606: (ent != NULL) &&
6607: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6608: (ent->content != NULL) &&
6609: (xmlStrchr(ent->content, '<'))) {
6610: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6611: ctxt->sax->error(ctxt->userData,
6612: "'<' in entity '%s' is not allowed in attributes values\n", name);
6613: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6614: ctxt->wellFormed = 0;
1.180 daniel 6615: ctxt->disableSAX = 1;
1.135 daniel 6616: }
6617:
6618: /*
6619: * Internal check, no parameter entities here ...
6620: */
6621: else {
1.159 daniel 6622: switch (ent->etype) {
1.135 daniel 6623: case XML_INTERNAL_PARAMETER_ENTITY:
6624: case XML_EXTERNAL_PARAMETER_ENTITY:
6625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6626: ctxt->sax->error(ctxt->userData,
6627: "Attempt to reference the parameter entity '%s'\n", name);
6628: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6629: ctxt->wellFormed = 0;
1.180 daniel 6630: ctxt->disableSAX = 1;
6631: break;
6632: default:
1.135 daniel 6633: break;
6634: }
6635: }
6636:
6637: /*
6638: * [ WFC: No Recursion ]
6639: * TODO A parsed entity must not contain a recursive reference
6640: * to itself, either directly or indirectly.
6641: */
6642:
6643: } else {
6644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6645: ctxt->sax->error(ctxt->userData,
6646: "xmlParseEntityRef: expecting ';'\n");
6647: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6648: ctxt->wellFormed = 0;
1.180 daniel 6649: ctxt->disableSAX = 1;
1.135 daniel 6650: }
6651: xmlFree(name);
6652: }
6653: }
6654: return(ent);
6655: }
1.24 daniel 6656:
1.50 daniel 6657: /**
6658: * xmlParsePEReference:
6659: * @ctxt: an XML parser context
6660: *
6661: * parse PEReference declarations
1.77 daniel 6662: * The entity content is handled directly by pushing it's content as
6663: * a new input stream.
1.22 daniel 6664: *
6665: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6666: *
1.98 daniel 6667: * [ WFC: No Recursion ]
6668: * TODO A parsed entity must not contain a recursive
6669: * reference to itself, either directly or indirectly.
6670: *
6671: * [ WFC: Entity Declared ]
6672: * In a document without any DTD, a document with only an internal DTD
6673: * subset which contains no parameter entity references, or a document
6674: * with "standalone='yes'", ... ... The declaration of a parameter
6675: * entity must precede any reference to it...
6676: *
6677: * [ VC: Entity Declared ]
6678: * In a document with an external subset or external parameter entities
6679: * with "standalone='no'", ... ... The declaration of a parameter entity
6680: * must precede any reference to it...
6681: *
6682: * [ WFC: In DTD ]
6683: * Parameter-entity references may only appear in the DTD.
6684: * NOTE: misleading but this is handled.
1.22 daniel 6685: */
1.77 daniel 6686: void
1.55 daniel 6687: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6688: xmlChar *name;
1.72 daniel 6689: xmlEntityPtr entity = NULL;
1.50 daniel 6690: xmlParserInputPtr input;
1.22 daniel 6691:
1.152 daniel 6692: if (RAW == '%') {
1.40 daniel 6693: NEXT;
1.22 daniel 6694: name = xmlParseName(ctxt);
6695: if (name == NULL) {
1.55 daniel 6696: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6697: ctxt->sax->error(ctxt->userData,
6698: "xmlParsePEReference: no name\n");
1.123 daniel 6699: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6700: ctxt->wellFormed = 0;
1.180 daniel 6701: ctxt->disableSAX = 1;
1.22 daniel 6702: } else {
1.152 daniel 6703: if (RAW == ';') {
1.40 daniel 6704: NEXT;
1.98 daniel 6705: if ((ctxt->sax != NULL) &&
6706: (ctxt->sax->getParameterEntity != NULL))
6707: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6708: name);
1.45 daniel 6709: if (entity == NULL) {
1.98 daniel 6710: /*
6711: * [ WFC: Entity Declared ]
6712: * In a document without any DTD, a document with only an
6713: * internal DTD subset which contains no parameter entity
6714: * references, or a document with "standalone='yes'", ...
6715: * ... The declaration of a parameter entity must precede
6716: * any reference to it...
6717: */
6718: if ((ctxt->standalone == 1) ||
6719: ((ctxt->hasExternalSubset == 0) &&
6720: (ctxt->hasPErefs == 0))) {
6721: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6722: ctxt->sax->error(ctxt->userData,
6723: "PEReference: %%%s; not found\n", name);
1.123 daniel 6724: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 6725: ctxt->wellFormed = 0;
1.180 daniel 6726: ctxt->disableSAX = 1;
1.98 daniel 6727: } else {
6728: /*
6729: * [ VC: Entity Declared ]
6730: * In a document with an external subset or external
6731: * parameter entities with "standalone='no'", ...
6732: * ... The declaration of a parameter entity must precede
6733: * any reference to it...
6734: */
6735: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6736: ctxt->sax->warning(ctxt->userData,
6737: "PEReference: %%%s; not found\n", name);
6738: ctxt->valid = 0;
6739: }
1.50 daniel 6740: } else {
1.98 daniel 6741: /*
6742: * Internal checking in case the entity quest barfed
6743: */
1.159 daniel 6744: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6745: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 6746: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6747: ctxt->sax->warning(ctxt->userData,
6748: "Internal: %%%s; is not a parameter entity\n", name);
6749: } else {
1.164 daniel 6750: /*
6751: * TODO !!!
6752: * handle the extra spaces added before and after
6753: * c.f. http://www.w3.org/TR/REC-xml#as-PE
6754: */
1.98 daniel 6755: input = xmlNewEntityInputStream(ctxt, entity);
6756: xmlPushInput(ctxt, input);
1.164 daniel 6757: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6758: (RAW == '<') && (NXT(1) == '?') &&
6759: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6760: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6761: xmlParseTextDecl(ctxt);
1.164 daniel 6762: }
6763: if (ctxt->token == 0)
6764: ctxt->token = ' ';
1.98 daniel 6765: }
1.45 daniel 6766: }
1.98 daniel 6767: ctxt->hasPErefs = 1;
1.22 daniel 6768: } else {
1.55 daniel 6769: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6770: ctxt->sax->error(ctxt->userData,
1.59 daniel 6771: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 6772: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6773: ctxt->wellFormed = 0;
1.180 daniel 6774: ctxt->disableSAX = 1;
1.22 daniel 6775: }
1.119 daniel 6776: xmlFree(name);
1.3 veillard 6777: }
6778: }
6779: }
6780:
1.50 daniel 6781: /**
1.135 daniel 6782: * xmlParseStringPEReference:
6783: * @ctxt: an XML parser context
6784: * @str: a pointer to an index in the string
6785: *
6786: * parse PEReference declarations
6787: *
6788: * [69] PEReference ::= '%' Name ';'
6789: *
6790: * [ WFC: No Recursion ]
6791: * TODO A parsed entity must not contain a recursive
6792: * reference to itself, either directly or indirectly.
6793: *
6794: * [ WFC: Entity Declared ]
6795: * In a document without any DTD, a document with only an internal DTD
6796: * subset which contains no parameter entity references, or a document
6797: * with "standalone='yes'", ... ... The declaration of a parameter
6798: * entity must precede any reference to it...
6799: *
6800: * [ VC: Entity Declared ]
6801: * In a document with an external subset or external parameter entities
6802: * with "standalone='no'", ... ... The declaration of a parameter entity
6803: * must precede any reference to it...
6804: *
6805: * [ WFC: In DTD ]
6806: * Parameter-entity references may only appear in the DTD.
6807: * NOTE: misleading but this is handled.
6808: *
6809: * Returns the string of the entity content.
6810: * str is updated to the current value of the index
6811: */
6812: xmlEntityPtr
6813: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6814: const xmlChar *ptr;
6815: xmlChar cur;
6816: xmlChar *name;
6817: xmlEntityPtr entity = NULL;
6818:
6819: if ((str == NULL) || (*str == NULL)) return(NULL);
6820: ptr = *str;
6821: cur = *ptr;
6822: if (cur == '%') {
6823: ptr++;
6824: cur = *ptr;
6825: name = xmlParseStringName(ctxt, &ptr);
6826: if (name == NULL) {
6827: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6828: ctxt->sax->error(ctxt->userData,
6829: "xmlParseStringPEReference: no name\n");
6830: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6831: ctxt->wellFormed = 0;
1.180 daniel 6832: ctxt->disableSAX = 1;
1.135 daniel 6833: } else {
6834: cur = *ptr;
6835: if (cur == ';') {
6836: ptr++;
6837: cur = *ptr;
6838: if ((ctxt->sax != NULL) &&
6839: (ctxt->sax->getParameterEntity != NULL))
6840: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6841: name);
6842: if (entity == NULL) {
6843: /*
6844: * [ WFC: Entity Declared ]
6845: * In a document without any DTD, a document with only an
6846: * internal DTD subset which contains no parameter entity
6847: * references, or a document with "standalone='yes'", ...
6848: * ... The declaration of a parameter entity must precede
6849: * any reference to it...
6850: */
6851: if ((ctxt->standalone == 1) ||
6852: ((ctxt->hasExternalSubset == 0) &&
6853: (ctxt->hasPErefs == 0))) {
6854: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6855: ctxt->sax->error(ctxt->userData,
6856: "PEReference: %%%s; not found\n", name);
6857: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6858: ctxt->wellFormed = 0;
1.180 daniel 6859: ctxt->disableSAX = 1;
1.135 daniel 6860: } else {
6861: /*
6862: * [ VC: Entity Declared ]
6863: * In a document with an external subset or external
6864: * parameter entities with "standalone='no'", ...
6865: * ... The declaration of a parameter entity must
6866: * precede any reference to it...
6867: */
6868: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6869: ctxt->sax->warning(ctxt->userData,
6870: "PEReference: %%%s; not found\n", name);
6871: ctxt->valid = 0;
6872: }
6873: } else {
6874: /*
6875: * Internal checking in case the entity quest barfed
6876: */
1.159 daniel 6877: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6878: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 6879: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6880: ctxt->sax->warning(ctxt->userData,
6881: "Internal: %%%s; is not a parameter entity\n", name);
6882: }
6883: }
6884: ctxt->hasPErefs = 1;
6885: } else {
6886: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6887: ctxt->sax->error(ctxt->userData,
6888: "xmlParseStringPEReference: expecting ';'\n");
6889: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6890: ctxt->wellFormed = 0;
1.180 daniel 6891: ctxt->disableSAX = 1;
1.135 daniel 6892: }
6893: xmlFree(name);
6894: }
6895: }
6896: *str = ptr;
6897: return(entity);
6898: }
6899:
6900: /**
1.181 ! daniel 6901: * xmlParseDocTypeDecl:
1.50 daniel 6902: * @ctxt: an XML parser context
6903: *
6904: * parse a DOCTYPE declaration
1.21 daniel 6905: *
1.22 daniel 6906: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6907: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 6908: *
6909: * [ VC: Root Element Type ]
1.99 daniel 6910: * The Name in the document type declaration must match the element
1.98 daniel 6911: * type of the root element.
1.21 daniel 6912: */
6913:
1.55 daniel 6914: void
6915: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 6916: xmlChar *name = NULL;
1.123 daniel 6917: xmlChar *ExternalID = NULL;
6918: xmlChar *URI = NULL;
1.21 daniel 6919:
6920: /*
6921: * We know that '<!DOCTYPE' has been detected.
6922: */
1.40 daniel 6923: SKIP(9);
1.21 daniel 6924:
1.42 daniel 6925: SKIP_BLANKS;
1.21 daniel 6926:
6927: /*
6928: * Parse the DOCTYPE name.
6929: */
6930: name = xmlParseName(ctxt);
6931: if (name == NULL) {
1.55 daniel 6932: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6933: ctxt->sax->error(ctxt->userData,
6934: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 6935: ctxt->wellFormed = 0;
1.180 daniel 6936: ctxt->disableSAX = 1;
1.123 daniel 6937: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 6938: }
1.165 daniel 6939: ctxt->intSubName = name;
1.21 daniel 6940:
1.42 daniel 6941: SKIP_BLANKS;
1.21 daniel 6942:
6943: /*
1.22 daniel 6944: * Check for SystemID and ExternalID
6945: */
1.67 daniel 6946: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 6947:
6948: if ((URI != NULL) || (ExternalID != NULL)) {
6949: ctxt->hasExternalSubset = 1;
6950: }
1.165 daniel 6951: ctxt->extSubURI = URI;
6952: ctxt->extSubSystem = ExternalID;
1.98 daniel 6953:
1.42 daniel 6954: SKIP_BLANKS;
1.36 daniel 6955:
1.76 daniel 6956: /*
1.165 daniel 6957: * Create and update the internal subset.
1.76 daniel 6958: */
1.171 daniel 6959: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6960: (!ctxt->disableSAX))
1.74 daniel 6961: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 6962:
6963: /*
1.140 daniel 6964: * Is there any internal subset declarations ?
6965: * they are handled separately in xmlParseInternalSubset()
6966: */
1.152 daniel 6967: if (RAW == '[')
1.140 daniel 6968: return;
6969:
6970: /*
6971: * We should be at the end of the DOCTYPE declaration.
6972: */
1.152 daniel 6973: if (RAW != '>') {
1.140 daniel 6974: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6975: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6976: ctxt->wellFormed = 0;
1.180 daniel 6977: ctxt->disableSAX = 1;
1.140 daniel 6978: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6979: }
6980: NEXT;
6981: }
6982:
6983: /**
1.181 ! daniel 6984: * xmlParseInternalsubset:
1.140 daniel 6985: * @ctxt: an XML parser context
6986: *
6987: * parse the internal subset declaration
6988: *
6989: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6990: */
6991:
6992: void
6993: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6994: /*
1.22 daniel 6995: * Is there any DTD definition ?
6996: */
1.152 daniel 6997: if (RAW == '[') {
1.96 daniel 6998: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 6999: NEXT;
1.22 daniel 7000: /*
7001: * Parse the succession of Markup declarations and
7002: * PEReferences.
7003: * Subsequence (markupdecl | PEReference | S)*
7004: */
1.152 daniel 7005: while (RAW != ']') {
1.123 daniel 7006: const xmlChar *check = CUR_PTR;
1.115 daniel 7007: int cons = ctxt->input->consumed;
1.22 daniel 7008:
1.42 daniel 7009: SKIP_BLANKS;
1.22 daniel 7010: xmlParseMarkupDecl(ctxt);
1.50 daniel 7011: xmlParsePEReference(ctxt);
1.22 daniel 7012:
1.115 daniel 7013: /*
7014: * Pop-up of finished entities.
7015: */
1.152 daniel 7016: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7017: xmlPopInput(ctxt);
7018:
1.118 daniel 7019: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7020: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7021: ctxt->sax->error(ctxt->userData,
1.140 daniel 7022: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7023: ctxt->wellFormed = 0;
1.180 daniel 7024: ctxt->disableSAX = 1;
1.123 daniel 7025: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7026: break;
7027: }
7028: }
1.152 daniel 7029: if (RAW == ']') NEXT;
1.22 daniel 7030: }
7031:
7032: /*
7033: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7034: */
1.152 daniel 7035: if (RAW != '>') {
1.55 daniel 7036: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7037: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7038: ctxt->wellFormed = 0;
1.180 daniel 7039: ctxt->disableSAX = 1;
1.123 daniel 7040: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7041: }
1.40 daniel 7042: NEXT;
1.21 daniel 7043: }
7044:
1.50 daniel 7045: /**
7046: * xmlParseAttribute:
7047: * @ctxt: an XML parser context
1.123 daniel 7048: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7049: *
7050: * parse an attribute
1.3 veillard 7051: *
1.22 daniel 7052: * [41] Attribute ::= Name Eq AttValue
7053: *
1.98 daniel 7054: * [ WFC: No External Entity References ]
7055: * Attribute values cannot contain direct or indirect entity references
7056: * to external entities.
7057: *
7058: * [ WFC: No < in Attribute Values ]
7059: * The replacement text of any entity referred to directly or indirectly in
7060: * an attribute value (other than "<") must not contain a <.
7061: *
7062: * [ VC: Attribute Value Type ]
1.117 daniel 7063: * The attribute must have been declared; the value must be of the type
1.99 daniel 7064: * declared for it.
1.98 daniel 7065: *
1.22 daniel 7066: * [25] Eq ::= S? '=' S?
7067: *
1.29 daniel 7068: * With namespace:
7069: *
7070: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7071: *
7072: * Also the case QName == xmlns:??? is handled independently as a namespace
7073: * definition.
1.69 daniel 7074: *
1.72 daniel 7075: * Returns the attribute name, and the value in *value.
1.3 veillard 7076: */
7077:
1.123 daniel 7078: xmlChar *
7079: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7080: xmlChar *name, *val;
1.3 veillard 7081:
1.72 daniel 7082: *value = NULL;
7083: name = xmlParseName(ctxt);
1.22 daniel 7084: if (name == NULL) {
1.55 daniel 7085: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7086: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7087: ctxt->wellFormed = 0;
1.180 daniel 7088: ctxt->disableSAX = 1;
1.123 daniel 7089: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7090: return(NULL);
1.3 veillard 7091: }
7092:
7093: /*
1.29 daniel 7094: * read the value
1.3 veillard 7095: */
1.42 daniel 7096: SKIP_BLANKS;
1.152 daniel 7097: if (RAW == '=') {
1.40 daniel 7098: NEXT;
1.42 daniel 7099: SKIP_BLANKS;
1.72 daniel 7100: val = xmlParseAttValue(ctxt);
1.96 daniel 7101: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7102: } else {
1.55 daniel 7103: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7104: ctxt->sax->error(ctxt->userData,
1.59 daniel 7105: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7106: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7107: ctxt->wellFormed = 0;
1.180 daniel 7108: ctxt->disableSAX = 1;
1.170 daniel 7109: xmlFree(name);
1.52 daniel 7110: return(NULL);
1.43 daniel 7111: }
7112:
1.172 daniel 7113: /*
7114: * Check that xml:lang conforms to the specification
7115: */
7116: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7117: if (!xmlCheckLanguageID(val)) {
7118: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7119: ctxt->sax->error(ctxt->userData,
7120: "Invalid value for xml:lang : %s\n", val);
7121: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7122: ctxt->wellFormed = 0;
1.180 daniel 7123: ctxt->disableSAX = 1;
1.172 daniel 7124: }
7125: }
7126:
1.176 daniel 7127: /*
7128: * Check that xml:space conforms to the specification
7129: */
7130: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7131: if (!xmlStrcmp(val, BAD_CAST "default"))
7132: *(ctxt->space) = 0;
7133: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7134: *(ctxt->space) = 1;
7135: else {
7136: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7137: ctxt->sax->error(ctxt->userData,
7138: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7139: val);
7140: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7141: ctxt->wellFormed = 0;
1.180 daniel 7142: ctxt->disableSAX = 1;
1.176 daniel 7143: }
7144: }
7145:
1.72 daniel 7146: *value = val;
7147: return(name);
1.3 veillard 7148: }
7149:
1.50 daniel 7150: /**
7151: * xmlParseStartTag:
7152: * @ctxt: an XML parser context
7153: *
7154: * parse a start of tag either for rule element or
7155: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7156: *
7157: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7158: *
1.98 daniel 7159: * [ WFC: Unique Att Spec ]
7160: * No attribute name may appear more than once in the same start-tag or
7161: * empty-element tag.
7162: *
1.29 daniel 7163: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7164: *
1.98 daniel 7165: * [ WFC: Unique Att Spec ]
7166: * No attribute name may appear more than once in the same start-tag or
7167: * empty-element tag.
7168: *
1.29 daniel 7169: * With namespace:
7170: *
7171: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7172: *
7173: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7174: *
1.129 daniel 7175: * Returne the element name parsed
1.2 veillard 7176: */
7177:
1.123 daniel 7178: xmlChar *
1.69 daniel 7179: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7180: xmlChar *name;
7181: xmlChar *attname;
7182: xmlChar *attvalue;
7183: const xmlChar **atts = NULL;
1.72 daniel 7184: int nbatts = 0;
7185: int maxatts = 0;
7186: int i;
1.2 veillard 7187:
1.152 daniel 7188: if (RAW != '<') return(NULL);
1.40 daniel 7189: NEXT;
1.3 veillard 7190:
1.72 daniel 7191: name = xmlParseName(ctxt);
1.59 daniel 7192: if (name == NULL) {
7193: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7194: ctxt->sax->error(ctxt->userData,
1.59 daniel 7195: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7196: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7197: ctxt->wellFormed = 0;
1.180 daniel 7198: ctxt->disableSAX = 1;
1.83 daniel 7199: return(NULL);
1.50 daniel 7200: }
7201:
7202: /*
1.3 veillard 7203: * Now parse the attributes, it ends up with the ending
7204: *
7205: * (S Attribute)* S?
7206: */
1.42 daniel 7207: SKIP_BLANKS;
1.91 daniel 7208: GROW;
1.168 daniel 7209:
1.153 daniel 7210: while ((IS_CHAR(RAW)) &&
1.152 daniel 7211: (RAW != '>') &&
7212: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7213: const xmlChar *q = CUR_PTR;
1.91 daniel 7214: int cons = ctxt->input->consumed;
1.29 daniel 7215:
1.72 daniel 7216: attname = xmlParseAttribute(ctxt, &attvalue);
7217: if ((attname != NULL) && (attvalue != NULL)) {
7218: /*
1.98 daniel 7219: * [ WFC: Unique Att Spec ]
7220: * No attribute name may appear more than once in the same
7221: * start-tag or empty-element tag.
1.72 daniel 7222: */
7223: for (i = 0; i < nbatts;i += 2) {
7224: if (!xmlStrcmp(atts[i], attname)) {
7225: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7226: ctxt->sax->error(ctxt->userData,
7227: "Attribute %s redefined\n",
7228: attname);
1.72 daniel 7229: ctxt->wellFormed = 0;
1.180 daniel 7230: ctxt->disableSAX = 1;
1.123 daniel 7231: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7232: xmlFree(attname);
7233: xmlFree(attvalue);
1.98 daniel 7234: goto failed;
1.72 daniel 7235: }
7236: }
7237:
7238: /*
7239: * Add the pair to atts
7240: */
7241: if (atts == NULL) {
7242: maxatts = 10;
1.123 daniel 7243: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7244: if (atts == NULL) {
1.86 daniel 7245: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7246: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7247: return(NULL);
1.72 daniel 7248: }
1.127 daniel 7249: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7250: maxatts *= 2;
1.123 daniel 7251: atts = (const xmlChar **) xmlRealloc(atts,
7252: maxatts * sizeof(xmlChar *));
1.72 daniel 7253: if (atts == NULL) {
1.86 daniel 7254: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7255: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7256: return(NULL);
1.72 daniel 7257: }
7258: }
7259: atts[nbatts++] = attname;
7260: atts[nbatts++] = attvalue;
7261: atts[nbatts] = NULL;
7262: atts[nbatts + 1] = NULL;
1.176 daniel 7263: } else {
7264: if (attname != NULL)
7265: xmlFree(attname);
7266: if (attvalue != NULL)
7267: xmlFree(attvalue);
1.72 daniel 7268: }
7269:
1.116 daniel 7270: failed:
1.168 daniel 7271:
7272: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7273: break;
7274: if (!IS_BLANK(RAW)) {
7275: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7276: ctxt->sax->error(ctxt->userData,
7277: "attributes construct error\n");
7278: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7279: ctxt->wellFormed = 0;
1.180 daniel 7280: ctxt->disableSAX = 1;
1.168 daniel 7281: }
1.42 daniel 7282: SKIP_BLANKS;
1.91 daniel 7283: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7284: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7285: ctxt->sax->error(ctxt->userData,
1.31 daniel 7286: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7287: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7288: ctxt->wellFormed = 0;
1.180 daniel 7289: ctxt->disableSAX = 1;
1.29 daniel 7290: break;
1.3 veillard 7291: }
1.91 daniel 7292: GROW;
1.3 veillard 7293: }
7294:
1.43 daniel 7295: /*
1.72 daniel 7296: * SAX: Start of Element !
1.43 daniel 7297: */
1.171 daniel 7298: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7299: (!ctxt->disableSAX))
1.74 daniel 7300: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7301:
1.72 daniel 7302: if (atts != NULL) {
1.123 daniel 7303: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7304: xmlFree(atts);
1.72 daniel 7305: }
1.83 daniel 7306: return(name);
1.3 veillard 7307: }
7308:
1.50 daniel 7309: /**
7310: * xmlParseEndTag:
7311: * @ctxt: an XML parser context
7312: *
7313: * parse an end of tag
1.27 daniel 7314: *
7315: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7316: *
7317: * With namespace
7318: *
1.72 daniel 7319: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7320: */
7321:
1.55 daniel 7322: void
1.140 daniel 7323: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7324: xmlChar *name;
1.140 daniel 7325: xmlChar *oldname;
1.7 veillard 7326:
1.91 daniel 7327: GROW;
1.152 daniel 7328: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7329: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7330: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7331: ctxt->wellFormed = 0;
1.180 daniel 7332: ctxt->disableSAX = 1;
1.123 daniel 7333: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7334: return;
7335: }
1.40 daniel 7336: SKIP(2);
1.7 veillard 7337:
1.72 daniel 7338: name = xmlParseName(ctxt);
1.7 veillard 7339:
7340: /*
7341: * We should definitely be at the ending "S? '>'" part
7342: */
1.91 daniel 7343: GROW;
1.42 daniel 7344: SKIP_BLANKS;
1.153 daniel 7345: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7346: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7347: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7348: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7349: ctxt->wellFormed = 0;
1.180 daniel 7350: ctxt->disableSAX = 1;
1.7 veillard 7351: } else
1.40 daniel 7352: NEXT;
1.7 veillard 7353:
1.72 daniel 7354: /*
1.98 daniel 7355: * [ WFC: Element Type Match ]
7356: * The Name in an element's end-tag must match the element type in the
7357: * start-tag.
7358: *
1.83 daniel 7359: */
1.147 daniel 7360: if ((name == NULL) || (ctxt->name == NULL) ||
7361: (xmlStrcmp(name, ctxt->name))) {
7362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7363: if ((name != NULL) && (ctxt->name != NULL)) {
7364: ctxt->sax->error(ctxt->userData,
7365: "Opening and ending tag mismatch: %s and %s\n",
7366: ctxt->name, name);
7367: } else if (ctxt->name != NULL) {
7368: ctxt->sax->error(ctxt->userData,
7369: "Ending tag eror for: %s\n", ctxt->name);
7370: } else {
7371: ctxt->sax->error(ctxt->userData,
7372: "Ending tag error: internal error ???\n");
7373: }
1.122 daniel 7374:
1.147 daniel 7375: }
1.123 daniel 7376: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 7377: ctxt->wellFormed = 0;
1.180 daniel 7378: ctxt->disableSAX = 1;
1.83 daniel 7379: }
7380:
7381: /*
1.72 daniel 7382: * SAX: End of Tag
7383: */
1.171 daniel 7384: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7385: (!ctxt->disableSAX))
1.74 daniel 7386: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 7387:
7388: if (name != NULL)
1.119 daniel 7389: xmlFree(name);
1.140 daniel 7390: oldname = namePop(ctxt);
1.176 daniel 7391: spacePop(ctxt);
1.140 daniel 7392: if (oldname != NULL) {
7393: #ifdef DEBUG_STACK
7394: fprintf(stderr,"Close: popped %s\n", oldname);
7395: #endif
7396: xmlFree(oldname);
7397: }
1.7 veillard 7398: return;
7399: }
7400:
1.50 daniel 7401: /**
7402: * xmlParseCDSect:
7403: * @ctxt: an XML parser context
7404: *
7405: * Parse escaped pure raw content.
1.29 daniel 7406: *
7407: * [18] CDSect ::= CDStart CData CDEnd
7408: *
7409: * [19] CDStart ::= '<![CDATA['
7410: *
7411: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7412: *
7413: * [21] CDEnd ::= ']]>'
1.3 veillard 7414: */
1.55 daniel 7415: void
7416: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7417: xmlChar *buf = NULL;
7418: int len = 0;
1.140 daniel 7419: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7420: int r, rl;
7421: int s, sl;
7422: int cur, l;
1.3 veillard 7423:
1.106 daniel 7424: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7425: (NXT(2) == '[') && (NXT(3) == 'C') &&
7426: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7427: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7428: (NXT(8) == '[')) {
7429: SKIP(9);
1.29 daniel 7430: } else
1.45 daniel 7431: return;
1.109 daniel 7432:
7433: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7434: r = CUR_CHAR(rl);
7435: if (!IS_CHAR(r)) {
1.55 daniel 7436: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7437: ctxt->sax->error(ctxt->userData,
1.135 daniel 7438: "CData section not finished\n");
1.59 daniel 7439: ctxt->wellFormed = 0;
1.180 daniel 7440: ctxt->disableSAX = 1;
1.123 daniel 7441: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7442: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7443: return;
1.3 veillard 7444: }
1.152 daniel 7445: NEXTL(rl);
7446: s = CUR_CHAR(sl);
7447: if (!IS_CHAR(s)) {
1.55 daniel 7448: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7449: ctxt->sax->error(ctxt->userData,
1.135 daniel 7450: "CData section not finished\n");
1.123 daniel 7451: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7452: ctxt->wellFormed = 0;
1.180 daniel 7453: ctxt->disableSAX = 1;
1.109 daniel 7454: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7455: return;
1.3 veillard 7456: }
1.152 daniel 7457: NEXTL(sl);
7458: cur = CUR_CHAR(l);
1.135 daniel 7459: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7460: if (buf == NULL) {
7461: fprintf(stderr, "malloc of %d byte failed\n", size);
7462: return;
7463: }
1.108 veillard 7464: while (IS_CHAR(cur) &&
1.110 daniel 7465: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7466: if (len + 5 >= size) {
1.135 daniel 7467: size *= 2;
7468: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7469: if (buf == NULL) {
7470: fprintf(stderr, "realloc of %d byte failed\n", size);
7471: return;
7472: }
7473: }
1.152 daniel 7474: COPY_BUF(rl,buf,len,r);
1.110 daniel 7475: r = s;
1.152 daniel 7476: rl = sl;
1.110 daniel 7477: s = cur;
1.152 daniel 7478: sl = l;
7479: NEXTL(l);
7480: cur = CUR_CHAR(l);
1.3 veillard 7481: }
1.135 daniel 7482: buf[len] = 0;
1.109 daniel 7483: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7484: if (cur != '>') {
1.55 daniel 7485: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7486: ctxt->sax->error(ctxt->userData,
1.135 daniel 7487: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7488: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7489: ctxt->wellFormed = 0;
1.180 daniel 7490: ctxt->disableSAX = 1;
1.135 daniel 7491: xmlFree(buf);
1.45 daniel 7492: return;
1.3 veillard 7493: }
1.152 daniel 7494: NEXTL(l);
1.16 daniel 7495:
1.45 daniel 7496: /*
1.135 daniel 7497: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7498: */
1.171 daniel 7499: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7500: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7501: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7502: }
1.135 daniel 7503: xmlFree(buf);
1.2 veillard 7504: }
7505:
1.50 daniel 7506: /**
7507: * xmlParseContent:
7508: * @ctxt: an XML parser context
7509: *
7510: * Parse a content:
1.2 veillard 7511: *
1.27 daniel 7512: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7513: */
7514:
1.55 daniel 7515: void
7516: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7517: GROW;
1.176 daniel 7518: while (((RAW != 0) || (ctxt->token != 0)) &&
7519: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 7520: const xmlChar *test = CUR_PTR;
1.91 daniel 7521: int cons = ctxt->input->consumed;
1.123 daniel 7522: xmlChar tok = ctxt->token;
1.27 daniel 7523:
7524: /*
1.152 daniel 7525: * Handle possible processed charrefs.
7526: */
7527: if (ctxt->token != 0) {
7528: xmlParseCharData(ctxt, 0);
7529: }
7530: /*
1.27 daniel 7531: * First case : a Processing Instruction.
7532: */
1.152 daniel 7533: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7534: xmlParsePI(ctxt);
7535: }
1.72 daniel 7536:
1.27 daniel 7537: /*
7538: * Second case : a CDSection
7539: */
1.152 daniel 7540: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7541: (NXT(2) == '[') && (NXT(3) == 'C') &&
7542: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7543: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7544: (NXT(8) == '[')) {
1.45 daniel 7545: xmlParseCDSect(ctxt);
1.27 daniel 7546: }
1.72 daniel 7547:
1.27 daniel 7548: /*
7549: * Third case : a comment
7550: */
1.152 daniel 7551: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7552: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7553: xmlParseComment(ctxt);
1.97 daniel 7554: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7555: }
1.72 daniel 7556:
1.27 daniel 7557: /*
7558: * Fourth case : a sub-element.
7559: */
1.152 daniel 7560: else if (RAW == '<') {
1.72 daniel 7561: xmlParseElement(ctxt);
1.45 daniel 7562: }
1.72 daniel 7563:
1.45 daniel 7564: /*
1.50 daniel 7565: * Fifth case : a reference. If if has not been resolved,
7566: * parsing returns it's Name, create the node
1.45 daniel 7567: */
1.97 daniel 7568:
1.152 daniel 7569: else if (RAW == '&') {
1.77 daniel 7570: xmlParseReference(ctxt);
1.27 daniel 7571: }
1.72 daniel 7572:
1.27 daniel 7573: /*
7574: * Last case, text. Note that References are handled directly.
7575: */
7576: else {
1.45 daniel 7577: xmlParseCharData(ctxt, 0);
1.3 veillard 7578: }
1.14 veillard 7579:
1.91 daniel 7580: GROW;
1.14 veillard 7581: /*
1.45 daniel 7582: * Pop-up of finished entities.
1.14 veillard 7583: */
1.152 daniel 7584: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7585: xmlPopInput(ctxt);
1.135 daniel 7586: SHRINK;
1.45 daniel 7587:
1.113 daniel 7588: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7589: (tok == ctxt->token)) {
1.55 daniel 7590: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7591: ctxt->sax->error(ctxt->userData,
1.59 daniel 7592: "detected an error in element content\n");
1.123 daniel 7593: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7594: ctxt->wellFormed = 0;
1.180 daniel 7595: ctxt->disableSAX = 1;
1.29 daniel 7596: break;
7597: }
1.3 veillard 7598: }
1.2 veillard 7599: }
7600:
1.50 daniel 7601: /**
7602: * xmlParseElement:
7603: * @ctxt: an XML parser context
7604: *
7605: * parse an XML element, this is highly recursive
1.26 daniel 7606: *
7607: * [39] element ::= EmptyElemTag | STag content ETag
7608: *
1.98 daniel 7609: * [ WFC: Element Type Match ]
7610: * The Name in an element's end-tag must match the element type in the
7611: * start-tag.
7612: *
7613: * [ VC: Element Valid ]
1.117 daniel 7614: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7615: * where the Name matches the element type and one of the following holds:
7616: * - The declaration matches EMPTY and the element has no content.
7617: * - The declaration matches children and the sequence of child elements
7618: * belongs to the language generated by the regular expression in the
7619: * content model, with optional white space (characters matching the
7620: * nonterminal S) between each pair of child elements.
7621: * - The declaration matches Mixed and the content consists of character
7622: * data and child elements whose types match names in the content model.
7623: * - The declaration matches ANY, and the types of any child elements have
7624: * been declared.
1.2 veillard 7625: */
1.26 daniel 7626:
1.72 daniel 7627: void
1.69 daniel 7628: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7629: const xmlChar *openTag = CUR_PTR;
7630: xmlChar *name;
1.140 daniel 7631: xmlChar *oldname;
1.32 daniel 7632: xmlParserNodeInfo node_info;
1.118 daniel 7633: xmlNodePtr ret;
1.2 veillard 7634:
1.32 daniel 7635: /* Capture start position */
1.118 daniel 7636: if (ctxt->record_info) {
7637: node_info.begin_pos = ctxt->input->consumed +
7638: (CUR_PTR - ctxt->input->base);
7639: node_info.begin_line = ctxt->input->line;
7640: }
1.32 daniel 7641:
1.176 daniel 7642: if (ctxt->spaceNr == 0)
7643: spacePush(ctxt, -1);
7644: else
7645: spacePush(ctxt, *ctxt->space);
7646:
1.83 daniel 7647: name = xmlParseStartTag(ctxt);
7648: if (name == NULL) {
1.176 daniel 7649: spacePop(ctxt);
1.83 daniel 7650: return;
7651: }
1.140 daniel 7652: namePush(ctxt, name);
1.118 daniel 7653: ret = ctxt->node;
1.2 veillard 7654:
7655: /*
1.99 daniel 7656: * [ VC: Root Element Type ]
7657: * The Name in the document type declaration must match the element
7658: * type of the root element.
7659: */
1.105 daniel 7660: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7661: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7662: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7663:
7664: /*
1.2 veillard 7665: * Check for an Empty Element.
7666: */
1.152 daniel 7667: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7668: SKIP(2);
1.171 daniel 7669: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7670: (!ctxt->disableSAX))
1.83 daniel 7671: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7672: oldname = namePop(ctxt);
1.176 daniel 7673: spacePop(ctxt);
1.140 daniel 7674: if (oldname != NULL) {
7675: #ifdef DEBUG_STACK
7676: fprintf(stderr,"Close: popped %s\n", oldname);
7677: #endif
7678: xmlFree(oldname);
7679: }
1.72 daniel 7680: return;
1.2 veillard 7681: }
1.152 daniel 7682: if (RAW == '>') {
1.91 daniel 7683: NEXT;
7684: } else {
1.55 daniel 7685: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7686: ctxt->sax->error(ctxt->userData,
7687: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7688: openTag);
1.59 daniel 7689: ctxt->wellFormed = 0;
1.180 daniel 7690: ctxt->disableSAX = 1;
1.123 daniel 7691: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 7692:
7693: /*
7694: * end of parsing of this node.
7695: */
7696: nodePop(ctxt);
1.140 daniel 7697: oldname = namePop(ctxt);
1.176 daniel 7698: spacePop(ctxt);
1.140 daniel 7699: if (oldname != NULL) {
7700: #ifdef DEBUG_STACK
7701: fprintf(stderr,"Close: popped %s\n", oldname);
7702: #endif
7703: xmlFree(oldname);
7704: }
1.118 daniel 7705:
7706: /*
7707: * Capture end position and add node
7708: */
7709: if ( ret != NULL && ctxt->record_info ) {
7710: node_info.end_pos = ctxt->input->consumed +
7711: (CUR_PTR - ctxt->input->base);
7712: node_info.end_line = ctxt->input->line;
7713: node_info.node = ret;
7714: xmlParserAddNodeInfo(ctxt, &node_info);
7715: }
1.72 daniel 7716: return;
1.2 veillard 7717: }
7718:
7719: /*
7720: * Parse the content of the element:
7721: */
1.45 daniel 7722: xmlParseContent(ctxt);
1.153 daniel 7723: if (!IS_CHAR(RAW)) {
1.55 daniel 7724: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7725: ctxt->sax->error(ctxt->userData,
1.57 daniel 7726: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 7727: ctxt->wellFormed = 0;
1.180 daniel 7728: ctxt->disableSAX = 1;
1.123 daniel 7729: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 7730:
7731: /*
7732: * end of parsing of this node.
7733: */
7734: nodePop(ctxt);
1.140 daniel 7735: oldname = namePop(ctxt);
1.176 daniel 7736: spacePop(ctxt);
1.140 daniel 7737: if (oldname != NULL) {
7738: #ifdef DEBUG_STACK
7739: fprintf(stderr,"Close: popped %s\n", oldname);
7740: #endif
7741: xmlFree(oldname);
7742: }
1.72 daniel 7743: return;
1.2 veillard 7744: }
7745:
7746: /*
1.27 daniel 7747: * parse the end of tag: '</' should be here.
1.2 veillard 7748: */
1.140 daniel 7749: xmlParseEndTag(ctxt);
1.118 daniel 7750:
7751: /*
7752: * Capture end position and add node
7753: */
7754: if ( ret != NULL && ctxt->record_info ) {
7755: node_info.end_pos = ctxt->input->consumed +
7756: (CUR_PTR - ctxt->input->base);
7757: node_info.end_line = ctxt->input->line;
7758: node_info.node = ret;
7759: xmlParserAddNodeInfo(ctxt, &node_info);
7760: }
1.2 veillard 7761: }
7762:
1.50 daniel 7763: /**
7764: * xmlParseVersionNum:
7765: * @ctxt: an XML parser context
7766: *
7767: * parse the XML version value.
1.29 daniel 7768: *
7769: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 7770: *
7771: * Returns the string giving the XML version number, or NULL
1.29 daniel 7772: */
1.123 daniel 7773: xmlChar *
1.55 daniel 7774: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 7775: xmlChar *buf = NULL;
7776: int len = 0;
7777: int size = 10;
7778: xmlChar cur;
1.29 daniel 7779:
1.135 daniel 7780: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7781: if (buf == NULL) {
7782: fprintf(stderr, "malloc of %d byte failed\n", size);
7783: return(NULL);
7784: }
7785: cur = CUR;
1.152 daniel 7786: while (((cur >= 'a') && (cur <= 'z')) ||
7787: ((cur >= 'A') && (cur <= 'Z')) ||
7788: ((cur >= '0') && (cur <= '9')) ||
7789: (cur == '_') || (cur == '.') ||
7790: (cur == ':') || (cur == '-')) {
1.135 daniel 7791: if (len + 1 >= size) {
7792: size *= 2;
7793: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7794: if (buf == NULL) {
7795: fprintf(stderr, "realloc of %d byte failed\n", size);
7796: return(NULL);
7797: }
7798: }
7799: buf[len++] = cur;
7800: NEXT;
7801: cur=CUR;
7802: }
7803: buf[len] = 0;
7804: return(buf);
1.29 daniel 7805: }
7806:
1.50 daniel 7807: /**
7808: * xmlParseVersionInfo:
7809: * @ctxt: an XML parser context
7810: *
7811: * parse the XML version.
1.29 daniel 7812: *
7813: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7814: *
7815: * [25] Eq ::= S? '=' S?
1.50 daniel 7816: *
1.68 daniel 7817: * Returns the version string, e.g. "1.0"
1.29 daniel 7818: */
7819:
1.123 daniel 7820: xmlChar *
1.55 daniel 7821: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 7822: xmlChar *version = NULL;
7823: const xmlChar *q;
1.29 daniel 7824:
1.152 daniel 7825: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 7826: (NXT(2) == 'r') && (NXT(3) == 's') &&
7827: (NXT(4) == 'i') && (NXT(5) == 'o') &&
7828: (NXT(6) == 'n')) {
7829: SKIP(7);
1.42 daniel 7830: SKIP_BLANKS;
1.152 daniel 7831: if (RAW != '=') {
1.55 daniel 7832: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7833: ctxt->sax->error(ctxt->userData,
7834: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 7835: ctxt->wellFormed = 0;
1.180 daniel 7836: ctxt->disableSAX = 1;
1.123 daniel 7837: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7838: return(NULL);
7839: }
1.40 daniel 7840: NEXT;
1.42 daniel 7841: SKIP_BLANKS;
1.152 daniel 7842: if (RAW == '"') {
1.40 daniel 7843: NEXT;
7844: q = CUR_PTR;
1.29 daniel 7845: version = xmlParseVersionNum(ctxt);
1.152 daniel 7846: if (RAW != '"') {
1.55 daniel 7847: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7848: ctxt->sax->error(ctxt->userData,
7849: "String not closed\n%.50s\n", q);
1.59 daniel 7850: ctxt->wellFormed = 0;
1.180 daniel 7851: ctxt->disableSAX = 1;
1.123 daniel 7852: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7853: } else
1.40 daniel 7854: NEXT;
1.152 daniel 7855: } else if (RAW == '\''){
1.40 daniel 7856: NEXT;
7857: q = CUR_PTR;
1.29 daniel 7858: version = xmlParseVersionNum(ctxt);
1.152 daniel 7859: if (RAW != '\'') {
1.55 daniel 7860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7861: ctxt->sax->error(ctxt->userData,
7862: "String not closed\n%.50s\n", q);
1.123 daniel 7863: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7864: ctxt->wellFormed = 0;
1.180 daniel 7865: ctxt->disableSAX = 1;
1.55 daniel 7866: } else
1.40 daniel 7867: NEXT;
1.31 daniel 7868: } else {
1.55 daniel 7869: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7870: ctxt->sax->error(ctxt->userData,
1.59 daniel 7871: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 7872: ctxt->wellFormed = 0;
1.180 daniel 7873: ctxt->disableSAX = 1;
1.123 daniel 7874: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7875: }
7876: }
7877: return(version);
7878: }
7879:
1.50 daniel 7880: /**
7881: * xmlParseEncName:
7882: * @ctxt: an XML parser context
7883: *
7884: * parse the XML encoding name
1.29 daniel 7885: *
7886: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 7887: *
1.68 daniel 7888: * Returns the encoding name value or NULL
1.29 daniel 7889: */
1.123 daniel 7890: xmlChar *
1.55 daniel 7891: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 7892: xmlChar *buf = NULL;
7893: int len = 0;
7894: int size = 10;
7895: xmlChar cur;
1.29 daniel 7896:
1.135 daniel 7897: cur = CUR;
7898: if (((cur >= 'a') && (cur <= 'z')) ||
7899: ((cur >= 'A') && (cur <= 'Z'))) {
7900: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7901: if (buf == NULL) {
7902: fprintf(stderr, "malloc of %d byte failed\n", size);
7903: return(NULL);
7904: }
7905:
7906: buf[len++] = cur;
1.40 daniel 7907: NEXT;
1.135 daniel 7908: cur = CUR;
1.152 daniel 7909: while (((cur >= 'a') && (cur <= 'z')) ||
7910: ((cur >= 'A') && (cur <= 'Z')) ||
7911: ((cur >= '0') && (cur <= '9')) ||
7912: (cur == '.') || (cur == '_') ||
7913: (cur == '-')) {
1.135 daniel 7914: if (len + 1 >= size) {
7915: size *= 2;
7916: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7917: if (buf == NULL) {
7918: fprintf(stderr, "realloc of %d byte failed\n", size);
7919: return(NULL);
7920: }
7921: }
7922: buf[len++] = cur;
7923: NEXT;
7924: cur = CUR;
7925: if (cur == 0) {
7926: SHRINK;
7927: GROW;
7928: cur = CUR;
7929: }
7930: }
7931: buf[len] = 0;
1.29 daniel 7932: } else {
1.55 daniel 7933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7934: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 7935: ctxt->wellFormed = 0;
1.180 daniel 7936: ctxt->disableSAX = 1;
1.123 daniel 7937: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 7938: }
1.135 daniel 7939: return(buf);
1.29 daniel 7940: }
7941:
1.50 daniel 7942: /**
7943: * xmlParseEncodingDecl:
7944: * @ctxt: an XML parser context
7945: *
7946: * parse the XML encoding declaration
1.29 daniel 7947: *
7948: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 7949: *
7950: * TODO: this should setup the conversion filters.
7951: *
1.68 daniel 7952: * Returns the encoding value or NULL
1.29 daniel 7953: */
7954:
1.123 daniel 7955: xmlChar *
1.55 daniel 7956: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7957: xmlChar *encoding = NULL;
7958: const xmlChar *q;
1.29 daniel 7959:
1.42 daniel 7960: SKIP_BLANKS;
1.152 daniel 7961: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 7962: (NXT(2) == 'c') && (NXT(3) == 'o') &&
7963: (NXT(4) == 'd') && (NXT(5) == 'i') &&
7964: (NXT(6) == 'n') && (NXT(7) == 'g')) {
7965: SKIP(8);
1.42 daniel 7966: SKIP_BLANKS;
1.152 daniel 7967: if (RAW != '=') {
1.55 daniel 7968: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7969: ctxt->sax->error(ctxt->userData,
7970: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 7971: ctxt->wellFormed = 0;
1.180 daniel 7972: ctxt->disableSAX = 1;
1.123 daniel 7973: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7974: return(NULL);
7975: }
1.40 daniel 7976: NEXT;
1.42 daniel 7977: SKIP_BLANKS;
1.152 daniel 7978: if (RAW == '"') {
1.40 daniel 7979: NEXT;
7980: q = CUR_PTR;
1.29 daniel 7981: encoding = xmlParseEncName(ctxt);
1.152 daniel 7982: if (RAW != '"') {
1.55 daniel 7983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7984: ctxt->sax->error(ctxt->userData,
7985: "String not closed\n%.50s\n", q);
1.59 daniel 7986: ctxt->wellFormed = 0;
1.180 daniel 7987: ctxt->disableSAX = 1;
1.123 daniel 7988: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7989: } else
1.40 daniel 7990: NEXT;
1.152 daniel 7991: } else if (RAW == '\''){
1.40 daniel 7992: NEXT;
7993: q = CUR_PTR;
1.29 daniel 7994: encoding = xmlParseEncName(ctxt);
1.152 daniel 7995: if (RAW != '\'') {
1.55 daniel 7996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7997: ctxt->sax->error(ctxt->userData,
7998: "String not closed\n%.50s\n", q);
1.59 daniel 7999: ctxt->wellFormed = 0;
1.180 daniel 8000: ctxt->disableSAX = 1;
1.123 daniel 8001: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8002: } else
1.40 daniel 8003: NEXT;
1.152 daniel 8004: } else if (RAW == '"'){
1.55 daniel 8005: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8006: ctxt->sax->error(ctxt->userData,
1.59 daniel 8007: "xmlParseEncodingDecl : expected ' or \"\n");
8008: ctxt->wellFormed = 0;
1.180 daniel 8009: ctxt->disableSAX = 1;
1.123 daniel 8010: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8011: }
8012: }
8013: return(encoding);
8014: }
8015:
1.50 daniel 8016: /**
8017: * xmlParseSDDecl:
8018: * @ctxt: an XML parser context
8019: *
8020: * parse the XML standalone declaration
1.29 daniel 8021: *
8022: * [32] SDDecl ::= S 'standalone' Eq
8023: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8024: *
8025: * [ VC: Standalone Document Declaration ]
8026: * TODO The standalone document declaration must have the value "no"
8027: * if any external markup declarations contain declarations of:
8028: * - attributes with default values, if elements to which these
8029: * attributes apply appear in the document without specifications
8030: * of values for these attributes, or
8031: * - entities (other than amp, lt, gt, apos, quot), if references
8032: * to those entities appear in the document, or
8033: * - attributes with values subject to normalization, where the
8034: * attribute appears in the document with a value which will change
8035: * as a result of normalization, or
8036: * - element types with element content, if white space occurs directly
8037: * within any instance of those types.
1.68 daniel 8038: *
8039: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8040: */
8041:
1.55 daniel 8042: int
8043: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8044: int standalone = -1;
8045:
1.42 daniel 8046: SKIP_BLANKS;
1.152 daniel 8047: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8048: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8049: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8050: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8051: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8052: SKIP(10);
1.81 daniel 8053: SKIP_BLANKS;
1.152 daniel 8054: if (RAW != '=') {
1.55 daniel 8055: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8056: ctxt->sax->error(ctxt->userData,
1.59 daniel 8057: "XML standalone declaration : expected '='\n");
1.123 daniel 8058: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8059: ctxt->wellFormed = 0;
1.180 daniel 8060: ctxt->disableSAX = 1;
1.32 daniel 8061: return(standalone);
8062: }
1.40 daniel 8063: NEXT;
1.42 daniel 8064: SKIP_BLANKS;
1.152 daniel 8065: if (RAW == '\''){
1.40 daniel 8066: NEXT;
1.152 daniel 8067: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8068: standalone = 0;
1.40 daniel 8069: SKIP(2);
1.152 daniel 8070: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8071: (NXT(2) == 's')) {
1.29 daniel 8072: standalone = 1;
1.40 daniel 8073: SKIP(3);
1.29 daniel 8074: } else {
1.55 daniel 8075: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8076: ctxt->sax->error(ctxt->userData,
8077: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8078: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8079: ctxt->wellFormed = 0;
1.180 daniel 8080: ctxt->disableSAX = 1;
1.29 daniel 8081: }
1.152 daniel 8082: if (RAW != '\'') {
1.55 daniel 8083: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8084: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8085: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8086: ctxt->wellFormed = 0;
1.180 daniel 8087: ctxt->disableSAX = 1;
1.55 daniel 8088: } else
1.40 daniel 8089: NEXT;
1.152 daniel 8090: } else if (RAW == '"'){
1.40 daniel 8091: NEXT;
1.152 daniel 8092: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8093: standalone = 0;
1.40 daniel 8094: SKIP(2);
1.152 daniel 8095: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8096: (NXT(2) == 's')) {
1.29 daniel 8097: standalone = 1;
1.40 daniel 8098: SKIP(3);
1.29 daniel 8099: } else {
1.55 daniel 8100: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8101: ctxt->sax->error(ctxt->userData,
1.59 daniel 8102: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8103: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8104: ctxt->wellFormed = 0;
1.180 daniel 8105: ctxt->disableSAX = 1;
1.29 daniel 8106: }
1.152 daniel 8107: if (RAW != '"') {
1.55 daniel 8108: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8109: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8110: ctxt->wellFormed = 0;
1.180 daniel 8111: ctxt->disableSAX = 1;
1.123 daniel 8112: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8113: } else
1.40 daniel 8114: NEXT;
1.37 daniel 8115: } else {
1.55 daniel 8116: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8117: ctxt->sax->error(ctxt->userData,
8118: "Standalone value not found\n");
1.59 daniel 8119: ctxt->wellFormed = 0;
1.180 daniel 8120: ctxt->disableSAX = 1;
1.123 daniel 8121: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8122: }
1.29 daniel 8123: }
8124: return(standalone);
8125: }
8126:
1.50 daniel 8127: /**
8128: * xmlParseXMLDecl:
8129: * @ctxt: an XML parser context
8130: *
8131: * parse an XML declaration header
1.29 daniel 8132: *
8133: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8134: */
8135:
1.55 daniel 8136: void
8137: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8138: xmlChar *version;
1.1 veillard 8139:
8140: /*
1.19 daniel 8141: * We know that '<?xml' is here.
1.1 veillard 8142: */
1.40 daniel 8143: SKIP(5);
1.1 veillard 8144:
1.153 daniel 8145: if (!IS_BLANK(RAW)) {
1.59 daniel 8146: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8147: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8148: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8149: ctxt->wellFormed = 0;
1.180 daniel 8150: ctxt->disableSAX = 1;
1.59 daniel 8151: }
1.42 daniel 8152: SKIP_BLANKS;
1.1 veillard 8153:
8154: /*
1.29 daniel 8155: * We should have the VersionInfo here.
1.1 veillard 8156: */
1.29 daniel 8157: version = xmlParseVersionInfo(ctxt);
8158: if (version == NULL)
1.45 daniel 8159: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8160: ctxt->version = xmlStrdup(version);
1.119 daniel 8161: xmlFree(version);
1.29 daniel 8162:
8163: /*
8164: * We may have the encoding declaration
8165: */
1.153 daniel 8166: if (!IS_BLANK(RAW)) {
1.152 daniel 8167: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8168: SKIP(2);
8169: return;
8170: }
8171: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8172: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8173: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8174: ctxt->wellFormed = 0;
1.180 daniel 8175: ctxt->disableSAX = 1;
1.59 daniel 8176: }
1.164 daniel 8177: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 8178:
8179: /*
1.29 daniel 8180: * We may have the standalone status.
1.1 veillard 8181: */
1.164 daniel 8182: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8183: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8184: SKIP(2);
8185: return;
8186: }
8187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8188: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8189: ctxt->wellFormed = 0;
1.180 daniel 8190: ctxt->disableSAX = 1;
1.123 daniel 8191: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8192: }
8193: SKIP_BLANKS;
1.167 daniel 8194: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8195:
1.42 daniel 8196: SKIP_BLANKS;
1.152 daniel 8197: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8198: SKIP(2);
1.152 daniel 8199: } else if (RAW == '>') {
1.31 daniel 8200: /* Deprecated old WD ... */
1.55 daniel 8201: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8202: ctxt->sax->error(ctxt->userData,
8203: "XML declaration must end-up with '?>'\n");
1.59 daniel 8204: ctxt->wellFormed = 0;
1.180 daniel 8205: ctxt->disableSAX = 1;
1.123 daniel 8206: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8207: NEXT;
1.29 daniel 8208: } else {
1.55 daniel 8209: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8210: ctxt->sax->error(ctxt->userData,
8211: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8212: ctxt->wellFormed = 0;
1.180 daniel 8213: ctxt->disableSAX = 1;
1.123 daniel 8214: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8215: MOVETO_ENDTAG(CUR_PTR);
8216: NEXT;
1.29 daniel 8217: }
1.1 veillard 8218: }
8219:
1.50 daniel 8220: /**
8221: * xmlParseMisc:
8222: * @ctxt: an XML parser context
8223: *
8224: * parse an XML Misc* optionnal field.
1.21 daniel 8225: *
1.22 daniel 8226: * [27] Misc ::= Comment | PI | S
1.1 veillard 8227: */
8228:
1.55 daniel 8229: void
8230: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8231: while (((RAW == '<') && (NXT(1) == '?')) ||
8232: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8233: (NXT(2) == '-') && (NXT(3) == '-')) ||
8234: IS_BLANK(CUR)) {
1.152 daniel 8235: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8236: xmlParsePI(ctxt);
1.40 daniel 8237: } else if (IS_BLANK(CUR)) {
8238: NEXT;
1.1 veillard 8239: } else
1.114 daniel 8240: xmlParseComment(ctxt);
1.1 veillard 8241: }
8242: }
8243:
1.50 daniel 8244: /**
1.181 ! daniel 8245: * xmlParseDocument:
1.50 daniel 8246: * @ctxt: an XML parser context
8247: *
8248: * parse an XML document (and build a tree if using the standard SAX
8249: * interface).
1.21 daniel 8250: *
1.22 daniel 8251: * [1] document ::= prolog element Misc*
1.29 daniel 8252: *
8253: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8254: *
1.68 daniel 8255: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8256: * as a result of the parsing.
1.1 veillard 8257: */
8258:
1.55 daniel 8259: int
8260: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8261: xmlChar start[4];
8262: xmlCharEncoding enc;
8263:
1.45 daniel 8264: xmlDefaultSAXHandlerInit();
8265:
1.91 daniel 8266: GROW;
8267:
1.14 veillard 8268: /*
1.44 daniel 8269: * SAX: beginning of the document processing.
8270: */
1.72 daniel 8271: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8272: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8273:
1.156 daniel 8274: /*
8275: * Get the 4 first bytes and decode the charset
8276: * if enc != XML_CHAR_ENCODING_NONE
8277: * plug some encoding conversion routines.
8278: */
8279: start[0] = RAW;
8280: start[1] = NXT(1);
8281: start[2] = NXT(2);
8282: start[3] = NXT(3);
8283: enc = xmlDetectCharEncoding(start, 4);
8284: if (enc != XML_CHAR_ENCODING_NONE) {
8285: xmlSwitchEncoding(ctxt, enc);
8286: }
8287:
1.1 veillard 8288:
1.59 daniel 8289: if (CUR == 0) {
8290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8291: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8292: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8293: ctxt->wellFormed = 0;
1.180 daniel 8294: ctxt->disableSAX = 1;
1.59 daniel 8295: }
1.1 veillard 8296:
8297: /*
8298: * Check for the XMLDecl in the Prolog.
8299: */
1.91 daniel 8300: GROW;
1.152 daniel 8301: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8302: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8303: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.19 daniel 8304: xmlParseXMLDecl(ctxt);
1.167 daniel 8305: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8306: SKIP_BLANKS;
1.164 daniel 8307: if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
8308: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8309:
1.1 veillard 8310: } else {
1.72 daniel 8311: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8312: }
1.171 daniel 8313: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8314: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8315:
8316: /*
8317: * The Misc part of the Prolog
8318: */
1.91 daniel 8319: GROW;
1.16 daniel 8320: xmlParseMisc(ctxt);
1.1 veillard 8321:
8322: /*
1.29 daniel 8323: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 8324: * (doctypedecl Misc*)?
8325: */
1.91 daniel 8326: GROW;
1.152 daniel 8327: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8328: (NXT(2) == 'D') && (NXT(3) == 'O') &&
8329: (NXT(4) == 'C') && (NXT(5) == 'T') &&
8330: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8331: (NXT(8) == 'E')) {
1.165 daniel 8332:
1.166 daniel 8333: ctxt->inSubset = 1;
1.22 daniel 8334: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8335: if (RAW == '[') {
1.140 daniel 8336: ctxt->instate = XML_PARSER_DTD;
8337: xmlParseInternalSubset(ctxt);
8338: }
1.165 daniel 8339:
8340: /*
8341: * Create and update the external subset.
8342: */
1.166 daniel 8343: ctxt->inSubset = 2;
1.171 daniel 8344: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8345: (!ctxt->disableSAX))
1.165 daniel 8346: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8347: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 8348: ctxt->inSubset = 0;
1.165 daniel 8349:
8350:
1.96 daniel 8351: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 8352: xmlParseMisc(ctxt);
1.21 daniel 8353: }
8354:
8355: /*
8356: * Time to start parsing the tree itself
1.1 veillard 8357: */
1.91 daniel 8358: GROW;
1.152 daniel 8359: if (RAW != '<') {
1.59 daniel 8360: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8361: ctxt->sax->error(ctxt->userData,
1.151 daniel 8362: "Start tag expected, '<' not found\n");
1.140 daniel 8363: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8364: ctxt->wellFormed = 0;
1.180 daniel 8365: ctxt->disableSAX = 1;
1.140 daniel 8366: ctxt->instate = XML_PARSER_EOF;
8367: } else {
8368: ctxt->instate = XML_PARSER_CONTENT;
8369: xmlParseElement(ctxt);
8370: ctxt->instate = XML_PARSER_EPILOG;
8371:
8372:
8373: /*
8374: * The Misc part at the end
8375: */
8376: xmlParseMisc(ctxt);
8377:
1.152 daniel 8378: if (RAW != 0) {
1.140 daniel 8379: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8380: ctxt->sax->error(ctxt->userData,
8381: "Extra content at the end of the document\n");
8382: ctxt->wellFormed = 0;
1.180 daniel 8383: ctxt->disableSAX = 1;
1.140 daniel 8384: ctxt->errNo = XML_ERR_DOCUMENT_END;
8385: }
8386: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 8387: }
8388:
1.44 daniel 8389: /*
8390: * SAX: end of the document processing.
8391: */
1.171 daniel 8392: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8393: (!ctxt->disableSAX))
1.74 daniel 8394: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 8395:
8396: /*
8397: * Grab the encoding if it was added on-the-fly
8398: */
8399: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8400: (ctxt->myDoc->encoding == NULL)) {
8401: ctxt->myDoc->encoding = ctxt->encoding;
8402: ctxt->encoding = NULL;
8403: }
1.59 daniel 8404: if (! ctxt->wellFormed) return(-1);
1.16 daniel 8405: return(0);
8406: }
8407:
1.98 daniel 8408: /************************************************************************
8409: * *
1.128 daniel 8410: * Progressive parsing interfaces *
8411: * *
8412: ************************************************************************/
8413:
8414: /**
8415: * xmlParseLookupSequence:
8416: * @ctxt: an XML parser context
8417: * @first: the first char to lookup
1.140 daniel 8418: * @next: the next char to lookup or zero
8419: * @third: the next char to lookup or zero
1.128 daniel 8420: *
1.140 daniel 8421: * Try to find if a sequence (first, next, third) or just (first next) or
8422: * (first) is available in the input stream.
8423: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8424: * to avoid rescanning sequences of bytes, it DOES change the state of the
8425: * parser, do not use liberally.
1.128 daniel 8426: *
1.140 daniel 8427: * Returns the index to the current parsing point if the full sequence
8428: * is available, -1 otherwise.
1.128 daniel 8429: */
8430: int
1.140 daniel 8431: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8432: xmlChar next, xmlChar third) {
8433: int base, len;
8434: xmlParserInputPtr in;
8435: const xmlChar *buf;
8436:
8437: in = ctxt->input;
8438: if (in == NULL) return(-1);
8439: base = in->cur - in->base;
8440: if (base < 0) return(-1);
8441: if (ctxt->checkIndex > base)
8442: base = ctxt->checkIndex;
8443: if (in->buf == NULL) {
8444: buf = in->base;
8445: len = in->length;
8446: } else {
8447: buf = in->buf->buffer->content;
8448: len = in->buf->buffer->use;
8449: }
8450: /* take into account the sequence length */
8451: if (third) len -= 2;
8452: else if (next) len --;
8453: for (;base < len;base++) {
8454: if (buf[base] == first) {
8455: if (third != 0) {
8456: if ((buf[base + 1] != next) ||
8457: (buf[base + 2] != third)) continue;
8458: } else if (next != 0) {
8459: if (buf[base + 1] != next) continue;
8460: }
8461: ctxt->checkIndex = 0;
8462: #ifdef DEBUG_PUSH
8463: if (next == 0)
8464: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8465: first, base);
8466: else if (third == 0)
8467: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8468: first, next, base);
8469: else
8470: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8471: first, next, third, base);
8472: #endif
8473: return(base - (in->cur - in->base));
8474: }
8475: }
8476: ctxt->checkIndex = base;
8477: #ifdef DEBUG_PUSH
8478: if (next == 0)
8479: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8480: else if (third == 0)
8481: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8482: else
8483: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8484: #endif
8485: return(-1);
1.128 daniel 8486: }
8487:
8488: /**
1.143 daniel 8489: * xmlParseTryOrFinish:
1.128 daniel 8490: * @ctxt: an XML parser context
1.143 daniel 8491: * @terminate: last chunk indicator
1.128 daniel 8492: *
8493: * Try to progress on parsing
8494: *
8495: * Returns zero if no parsing was possible
8496: */
8497: int
1.143 daniel 8498: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8499: int ret = 0;
1.140 daniel 8500: xmlParserInputPtr in;
8501: int avail;
8502: xmlChar cur, next;
8503:
8504: #ifdef DEBUG_PUSH
8505: switch (ctxt->instate) {
8506: case XML_PARSER_EOF:
8507: fprintf(stderr, "PP: try EOF\n"); break;
8508: case XML_PARSER_START:
8509: fprintf(stderr, "PP: try START\n"); break;
8510: case XML_PARSER_MISC:
8511: fprintf(stderr, "PP: try MISC\n");break;
8512: case XML_PARSER_COMMENT:
8513: fprintf(stderr, "PP: try COMMENT\n");break;
8514: case XML_PARSER_PROLOG:
8515: fprintf(stderr, "PP: try PROLOG\n");break;
8516: case XML_PARSER_START_TAG:
8517: fprintf(stderr, "PP: try START_TAG\n");break;
8518: case XML_PARSER_CONTENT:
8519: fprintf(stderr, "PP: try CONTENT\n");break;
8520: case XML_PARSER_CDATA_SECTION:
8521: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8522: case XML_PARSER_END_TAG:
8523: fprintf(stderr, "PP: try END_TAG\n");break;
8524: case XML_PARSER_ENTITY_DECL:
8525: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8526: case XML_PARSER_ENTITY_VALUE:
8527: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8528: case XML_PARSER_ATTRIBUTE_VALUE:
8529: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8530: case XML_PARSER_DTD:
8531: fprintf(stderr, "PP: try DTD\n");break;
8532: case XML_PARSER_EPILOG:
8533: fprintf(stderr, "PP: try EPILOG\n");break;
8534: case XML_PARSER_PI:
8535: fprintf(stderr, "PP: try PI\n");break;
8536: }
8537: #endif
1.128 daniel 8538:
8539: while (1) {
1.140 daniel 8540: /*
8541: * Pop-up of finished entities.
8542: */
1.152 daniel 8543: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8544: xmlPopInput(ctxt);
8545:
8546: in = ctxt->input;
8547: if (in == NULL) break;
8548: if (in->buf == NULL)
8549: avail = in->length - (in->cur - in->base);
8550: else
8551: avail = in->buf->buffer->use - (in->cur - in->base);
8552: if (avail < 1)
8553: goto done;
1.128 daniel 8554: switch (ctxt->instate) {
8555: case XML_PARSER_EOF:
1.140 daniel 8556: /*
8557: * Document parsing is done !
8558: */
8559: goto done;
8560: case XML_PARSER_START:
8561: /*
8562: * Very first chars read from the document flow.
8563: */
8564: cur = in->cur[0];
8565: if (IS_BLANK(cur)) {
8566: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8567: ctxt->sax->setDocumentLocator(ctxt->userData,
8568: &xmlDefaultSAXLocator);
8569: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8570: ctxt->sax->error(ctxt->userData,
8571: "Extra spaces at the beginning of the document are not allowed\n");
8572: ctxt->errNo = XML_ERR_DOCUMENT_START;
8573: ctxt->wellFormed = 0;
1.180 daniel 8574: ctxt->disableSAX = 1;
1.140 daniel 8575: SKIP_BLANKS;
8576: ret++;
8577: if (in->buf == NULL)
8578: avail = in->length - (in->cur - in->base);
8579: else
8580: avail = in->buf->buffer->use - (in->cur - in->base);
8581: }
8582: if (avail < 2)
8583: goto done;
8584:
8585: cur = in->cur[0];
8586: next = in->cur[1];
8587: if (cur == 0) {
8588: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8589: ctxt->sax->setDocumentLocator(ctxt->userData,
8590: &xmlDefaultSAXLocator);
8591: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8592: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8593: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8594: ctxt->wellFormed = 0;
1.180 daniel 8595: ctxt->disableSAX = 1;
1.140 daniel 8596: ctxt->instate = XML_PARSER_EOF;
8597: #ifdef DEBUG_PUSH
8598: fprintf(stderr, "PP: entering EOF\n");
8599: #endif
8600: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8601: ctxt->sax->endDocument(ctxt->userData);
8602: goto done;
8603: }
8604: if ((cur == '<') && (next == '?')) {
8605: /* PI or XML decl */
8606: if (avail < 5) return(ret);
1.143 daniel 8607: if ((!terminate) &&
8608: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8609: return(ret);
8610: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8611: ctxt->sax->setDocumentLocator(ctxt->userData,
8612: &xmlDefaultSAXLocator);
8613: if ((in->cur[2] == 'x') &&
8614: (in->cur[3] == 'm') &&
1.142 daniel 8615: (in->cur[4] == 'l') &&
8616: (IS_BLANK(in->cur[5]))) {
1.140 daniel 8617: ret += 5;
8618: #ifdef DEBUG_PUSH
8619: fprintf(stderr, "PP: Parsing XML Decl\n");
8620: #endif
8621: xmlParseXMLDecl(ctxt);
1.167 daniel 8622: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8623: if ((ctxt->encoding == NULL) &&
8624: (ctxt->input->encoding != NULL))
8625: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 8626: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8627: (!ctxt->disableSAX))
1.140 daniel 8628: ctxt->sax->startDocument(ctxt->userData);
8629: ctxt->instate = XML_PARSER_MISC;
8630: #ifdef DEBUG_PUSH
8631: fprintf(stderr, "PP: entering MISC\n");
8632: #endif
8633: } else {
8634: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8635: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8636: (!ctxt->disableSAX))
1.140 daniel 8637: ctxt->sax->startDocument(ctxt->userData);
8638: ctxt->instate = XML_PARSER_MISC;
8639: #ifdef DEBUG_PUSH
8640: fprintf(stderr, "PP: entering MISC\n");
8641: #endif
8642: }
8643: } else {
8644: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8645: ctxt->sax->setDocumentLocator(ctxt->userData,
8646: &xmlDefaultSAXLocator);
8647: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8648: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8649: (!ctxt->disableSAX))
1.140 daniel 8650: ctxt->sax->startDocument(ctxt->userData);
8651: ctxt->instate = XML_PARSER_MISC;
8652: #ifdef DEBUG_PUSH
8653: fprintf(stderr, "PP: entering MISC\n");
8654: #endif
8655: }
8656: break;
8657: case XML_PARSER_MISC:
8658: SKIP_BLANKS;
8659: if (in->buf == NULL)
8660: avail = in->length - (in->cur - in->base);
8661: else
8662: avail = in->buf->buffer->use - (in->cur - in->base);
8663: if (avail < 2)
8664: goto done;
8665: cur = in->cur[0];
8666: next = in->cur[1];
8667: if ((cur == '<') && (next == '?')) {
1.143 daniel 8668: if ((!terminate) &&
8669: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8670: goto done;
8671: #ifdef DEBUG_PUSH
8672: fprintf(stderr, "PP: Parsing PI\n");
8673: #endif
8674: xmlParsePI(ctxt);
8675: } else if ((cur == '<') && (next == '!') &&
8676: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8677: if ((!terminate) &&
8678: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8679: goto done;
8680: #ifdef DEBUG_PUSH
8681: fprintf(stderr, "PP: Parsing Comment\n");
8682: #endif
8683: xmlParseComment(ctxt);
8684: ctxt->instate = XML_PARSER_MISC;
8685: } else if ((cur == '<') && (next == '!') &&
8686: (in->cur[2] == 'D') && (in->cur[3] == 'O') &&
8687: (in->cur[4] == 'C') && (in->cur[5] == 'T') &&
8688: (in->cur[6] == 'Y') && (in->cur[7] == 'P') &&
8689: (in->cur[8] == 'E')) {
1.143 daniel 8690: if ((!terminate) &&
8691: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8692: goto done;
8693: #ifdef DEBUG_PUSH
8694: fprintf(stderr, "PP: Parsing internal subset\n");
8695: #endif
1.166 daniel 8696: ctxt->inSubset = 1;
1.140 daniel 8697: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8698: if (RAW == '[') {
1.140 daniel 8699: ctxt->instate = XML_PARSER_DTD;
8700: #ifdef DEBUG_PUSH
8701: fprintf(stderr, "PP: entering DTD\n");
8702: #endif
8703: } else {
1.166 daniel 8704: /*
8705: * Create and update the external subset.
8706: */
8707: ctxt->inSubset = 2;
1.171 daniel 8708: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8709: (ctxt->sax->externalSubset != NULL))
8710: ctxt->sax->externalSubset(ctxt->userData,
8711: ctxt->intSubName, ctxt->extSubSystem,
8712: ctxt->extSubURI);
8713: ctxt->inSubset = 0;
1.140 daniel 8714: ctxt->instate = XML_PARSER_PROLOG;
8715: #ifdef DEBUG_PUSH
8716: fprintf(stderr, "PP: entering PROLOG\n");
8717: #endif
8718: }
8719: } else if ((cur == '<') && (next == '!') &&
8720: (avail < 9)) {
8721: goto done;
8722: } else {
8723: ctxt->instate = XML_PARSER_START_TAG;
8724: #ifdef DEBUG_PUSH
8725: fprintf(stderr, "PP: entering START_TAG\n");
8726: #endif
8727: }
8728: break;
1.128 daniel 8729: case XML_PARSER_PROLOG:
1.140 daniel 8730: SKIP_BLANKS;
8731: if (in->buf == NULL)
8732: avail = in->length - (in->cur - in->base);
8733: else
8734: avail = in->buf->buffer->use - (in->cur - in->base);
8735: if (avail < 2)
8736: goto done;
8737: cur = in->cur[0];
8738: next = in->cur[1];
8739: if ((cur == '<') && (next == '?')) {
1.143 daniel 8740: if ((!terminate) &&
8741: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8742: goto done;
8743: #ifdef DEBUG_PUSH
8744: fprintf(stderr, "PP: Parsing PI\n");
8745: #endif
8746: xmlParsePI(ctxt);
8747: } else if ((cur == '<') && (next == '!') &&
8748: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8749: if ((!terminate) &&
8750: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8751: goto done;
8752: #ifdef DEBUG_PUSH
8753: fprintf(stderr, "PP: Parsing Comment\n");
8754: #endif
8755: xmlParseComment(ctxt);
8756: ctxt->instate = XML_PARSER_PROLOG;
8757: } else if ((cur == '<') && (next == '!') &&
8758: (avail < 4)) {
8759: goto done;
8760: } else {
8761: ctxt->instate = XML_PARSER_START_TAG;
8762: #ifdef DEBUG_PUSH
8763: fprintf(stderr, "PP: entering START_TAG\n");
8764: #endif
8765: }
8766: break;
8767: case XML_PARSER_EPILOG:
8768: SKIP_BLANKS;
8769: if (in->buf == NULL)
8770: avail = in->length - (in->cur - in->base);
8771: else
8772: avail = in->buf->buffer->use - (in->cur - in->base);
8773: if (avail < 2)
8774: goto done;
8775: cur = in->cur[0];
8776: next = in->cur[1];
8777: if ((cur == '<') && (next == '?')) {
1.143 daniel 8778: if ((!terminate) &&
8779: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8780: goto done;
8781: #ifdef DEBUG_PUSH
8782: fprintf(stderr, "PP: Parsing PI\n");
8783: #endif
8784: xmlParsePI(ctxt);
8785: ctxt->instate = XML_PARSER_EPILOG;
8786: } else if ((cur == '<') && (next == '!') &&
8787: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8788: if ((!terminate) &&
8789: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8790: goto done;
8791: #ifdef DEBUG_PUSH
8792: fprintf(stderr, "PP: Parsing Comment\n");
8793: #endif
8794: xmlParseComment(ctxt);
8795: ctxt->instate = XML_PARSER_EPILOG;
8796: } else if ((cur == '<') && (next == '!') &&
8797: (avail < 4)) {
8798: goto done;
8799: } else {
8800: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8801: ctxt->sax->error(ctxt->userData,
8802: "Extra content at the end of the document\n");
8803: ctxt->wellFormed = 0;
1.180 daniel 8804: ctxt->disableSAX = 1;
1.140 daniel 8805: ctxt->errNo = XML_ERR_DOCUMENT_END;
8806: ctxt->instate = XML_PARSER_EOF;
8807: #ifdef DEBUG_PUSH
8808: fprintf(stderr, "PP: entering EOF\n");
8809: #endif
1.171 daniel 8810: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8811: (!ctxt->disableSAX))
1.140 daniel 8812: ctxt->sax->endDocument(ctxt->userData);
8813: goto done;
8814: }
8815: break;
8816: case XML_PARSER_START_TAG: {
8817: xmlChar *name, *oldname;
8818:
8819: if (avail < 2)
8820: goto done;
8821: cur = in->cur[0];
8822: if (cur != '<') {
8823: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8824: ctxt->sax->error(ctxt->userData,
8825: "Start tag expect, '<' not found\n");
8826: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8827: ctxt->wellFormed = 0;
1.180 daniel 8828: ctxt->disableSAX = 1;
1.140 daniel 8829: ctxt->instate = XML_PARSER_EOF;
8830: #ifdef DEBUG_PUSH
8831: fprintf(stderr, "PP: entering EOF\n");
8832: #endif
1.171 daniel 8833: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8834: (!ctxt->disableSAX))
1.140 daniel 8835: ctxt->sax->endDocument(ctxt->userData);
8836: goto done;
8837: }
1.143 daniel 8838: if ((!terminate) &&
8839: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8840: goto done;
1.176 daniel 8841: if (ctxt->spaceNr == 0)
8842: spacePush(ctxt, -1);
8843: else
8844: spacePush(ctxt, *ctxt->space);
1.140 daniel 8845: name = xmlParseStartTag(ctxt);
8846: if (name == NULL) {
1.176 daniel 8847: spacePop(ctxt);
1.140 daniel 8848: ctxt->instate = XML_PARSER_EOF;
8849: #ifdef DEBUG_PUSH
8850: fprintf(stderr, "PP: entering EOF\n");
8851: #endif
1.171 daniel 8852: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8853: (!ctxt->disableSAX))
1.140 daniel 8854: ctxt->sax->endDocument(ctxt->userData);
8855: goto done;
8856: }
8857: namePush(ctxt, xmlStrdup(name));
8858:
8859: /*
8860: * [ VC: Root Element Type ]
8861: * The Name in the document type declaration must match
8862: * the element type of the root element.
8863: */
8864: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 8865: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 8866: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8867:
8868: /*
8869: * Check for an Empty Element.
8870: */
1.152 daniel 8871: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 8872: SKIP(2);
1.171 daniel 8873: if ((ctxt->sax != NULL) &&
8874: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 8875: ctxt->sax->endElement(ctxt->userData, name);
8876: xmlFree(name);
8877: oldname = namePop(ctxt);
1.176 daniel 8878: spacePop(ctxt);
1.140 daniel 8879: if (oldname != NULL) {
8880: #ifdef DEBUG_STACK
8881: fprintf(stderr,"Close: popped %s\n", oldname);
8882: #endif
8883: xmlFree(oldname);
8884: }
8885: if (ctxt->name == NULL) {
8886: ctxt->instate = XML_PARSER_EPILOG;
8887: #ifdef DEBUG_PUSH
8888: fprintf(stderr, "PP: entering EPILOG\n");
8889: #endif
8890: } else {
8891: ctxt->instate = XML_PARSER_CONTENT;
8892: #ifdef DEBUG_PUSH
8893: fprintf(stderr, "PP: entering CONTENT\n");
8894: #endif
8895: }
8896: break;
8897: }
1.152 daniel 8898: if (RAW == '>') {
1.140 daniel 8899: NEXT;
8900: } else {
8901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8902: ctxt->sax->error(ctxt->userData,
8903: "Couldn't find end of Start Tag %s\n",
8904: name);
8905: ctxt->wellFormed = 0;
1.180 daniel 8906: ctxt->disableSAX = 1;
1.140 daniel 8907: ctxt->errNo = XML_ERR_GT_REQUIRED;
8908:
8909: /*
8910: * end of parsing of this node.
8911: */
8912: nodePop(ctxt);
8913: oldname = namePop(ctxt);
1.176 daniel 8914: spacePop(ctxt);
1.140 daniel 8915: if (oldname != NULL) {
8916: #ifdef DEBUG_STACK
8917: fprintf(stderr,"Close: popped %s\n", oldname);
8918: #endif
8919: xmlFree(oldname);
8920: }
8921: }
8922: xmlFree(name);
8923: ctxt->instate = XML_PARSER_CONTENT;
8924: #ifdef DEBUG_PUSH
8925: fprintf(stderr, "PP: entering CONTENT\n");
8926: #endif
8927: break;
8928: }
1.128 daniel 8929: case XML_PARSER_CONTENT:
1.140 daniel 8930: /*
8931: * Handle preparsed entities and charRef
8932: */
8933: if (ctxt->token != 0) {
8934: xmlChar cur[2] = { 0 , 0 } ;
8935:
8936: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 8937: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8938: (ctxt->sax->characters != NULL))
1.140 daniel 8939: ctxt->sax->characters(ctxt->userData, cur, 1);
8940: ctxt->token = 0;
8941: }
8942: if (avail < 2)
8943: goto done;
8944: cur = in->cur[0];
8945: next = in->cur[1];
8946: if ((cur == '<') && (next == '?')) {
1.143 daniel 8947: if ((!terminate) &&
8948: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8949: goto done;
8950: #ifdef DEBUG_PUSH
8951: fprintf(stderr, "PP: Parsing PI\n");
8952: #endif
8953: xmlParsePI(ctxt);
8954: } else if ((cur == '<') && (next == '!') &&
8955: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8956: if ((!terminate) &&
8957: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8958: goto done;
8959: #ifdef DEBUG_PUSH
8960: fprintf(stderr, "PP: Parsing Comment\n");
8961: #endif
8962: xmlParseComment(ctxt);
8963: ctxt->instate = XML_PARSER_CONTENT;
8964: } else if ((cur == '<') && (in->cur[1] == '!') &&
8965: (in->cur[2] == '[') && (NXT(3) == 'C') &&
8966: (in->cur[4] == 'D') && (NXT(5) == 'A') &&
8967: (in->cur[6] == 'T') && (NXT(7) == 'A') &&
8968: (in->cur[8] == '[')) {
8969: SKIP(9);
8970: ctxt->instate = XML_PARSER_CDATA_SECTION;
8971: #ifdef DEBUG_PUSH
8972: fprintf(stderr, "PP: entering CDATA_SECTION\n");
8973: #endif
8974: break;
8975: } else if ((cur == '<') && (next == '!') &&
8976: (avail < 9)) {
8977: goto done;
8978: } else if ((cur == '<') && (next == '/')) {
8979: ctxt->instate = XML_PARSER_END_TAG;
8980: #ifdef DEBUG_PUSH
8981: fprintf(stderr, "PP: entering END_TAG\n");
8982: #endif
8983: break;
8984: } else if (cur == '<') {
8985: ctxt->instate = XML_PARSER_START_TAG;
8986: #ifdef DEBUG_PUSH
8987: fprintf(stderr, "PP: entering START_TAG\n");
8988: #endif
8989: break;
8990: } else if (cur == '&') {
1.143 daniel 8991: if ((!terminate) &&
8992: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 8993: goto done;
8994: #ifdef DEBUG_PUSH
8995: fprintf(stderr, "PP: Parsing Reference\n");
8996: #endif
8997: /* TODO: check generation of subtrees if noent !!! */
8998: xmlParseReference(ctxt);
8999: } else {
1.156 daniel 9000: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9001: /*
1.181 ! daniel 9002: * Goal of the following test is:
1.140 daniel 9003: * - minimize calls to the SAX 'character' callback
9004: * when they are mergeable
9005: * - handle an problem for isBlank when we only parse
9006: * a sequence of blank chars and the next one is
9007: * not available to check against '<' presence.
9008: * - tries to homogenize the differences in SAX
9009: * callbacks beween the push and pull versions
9010: * of the parser.
9011: */
9012: if ((ctxt->inputNr == 1) &&
9013: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9014: if ((!terminate) &&
9015: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9016: goto done;
9017: }
9018: ctxt->checkIndex = 0;
9019: #ifdef DEBUG_PUSH
9020: fprintf(stderr, "PP: Parsing char data\n");
9021: #endif
9022: xmlParseCharData(ctxt, 0);
9023: }
9024: /*
9025: * Pop-up of finished entities.
9026: */
1.152 daniel 9027: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9028: xmlPopInput(ctxt);
9029: break;
9030: case XML_PARSER_CDATA_SECTION: {
9031: /*
9032: * The Push mode need to have the SAX callback for
9033: * cdataBlock merge back contiguous callbacks.
9034: */
9035: int base;
9036:
9037: in = ctxt->input;
9038: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9039: if (base < 0) {
9040: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9041: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9042: if (ctxt->sax->cdataBlock != NULL)
9043: ctxt->sax->cdataBlock(ctxt->userData, in->cur,
9044: XML_PARSER_BIG_BUFFER_SIZE);
9045: }
9046: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9047: ctxt->checkIndex = 0;
9048: }
9049: goto done;
9050: } else {
1.171 daniel 9051: if ((ctxt->sax != NULL) && (base > 0) &&
9052: (!ctxt->disableSAX)) {
1.140 daniel 9053: if (ctxt->sax->cdataBlock != NULL)
9054: ctxt->sax->cdataBlock(ctxt->userData,
9055: in->cur, base);
9056: }
9057: SKIP(base + 3);
9058: ctxt->checkIndex = 0;
9059: ctxt->instate = XML_PARSER_CONTENT;
9060: #ifdef DEBUG_PUSH
9061: fprintf(stderr, "PP: entering CONTENT\n");
9062: #endif
9063: }
9064: break;
9065: }
1.141 daniel 9066: case XML_PARSER_END_TAG:
1.140 daniel 9067: if (avail < 2)
9068: goto done;
1.143 daniel 9069: if ((!terminate) &&
9070: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9071: goto done;
9072: xmlParseEndTag(ctxt);
9073: if (ctxt->name == NULL) {
9074: ctxt->instate = XML_PARSER_EPILOG;
9075: #ifdef DEBUG_PUSH
9076: fprintf(stderr, "PP: entering EPILOG\n");
9077: #endif
9078: } else {
9079: ctxt->instate = XML_PARSER_CONTENT;
9080: #ifdef DEBUG_PUSH
9081: fprintf(stderr, "PP: entering CONTENT\n");
9082: #endif
9083: }
9084: break;
9085: case XML_PARSER_DTD: {
9086: /*
9087: * Sorry but progressive parsing of the internal subset
9088: * is not expected to be supported. We first check that
9089: * the full content of the internal subset is available and
9090: * the parsing is launched only at that point.
9091: * Internal subset ends up with "']' S? '>'" in an unescaped
9092: * section and not in a ']]>' sequence which are conditional
9093: * sections (whoever argued to keep that crap in XML deserve
9094: * a place in hell !).
9095: */
9096: int base, i;
9097: xmlChar *buf;
9098: xmlChar quote = 0;
9099:
9100: base = in->cur - in->base;
9101: if (base < 0) return(0);
9102: if (ctxt->checkIndex > base)
9103: base = ctxt->checkIndex;
9104: buf = in->buf->buffer->content;
9105: for (;base < in->buf->buffer->use;base++) {
9106: if (quote != 0) {
9107: if (buf[base] == quote)
9108: quote = 0;
9109: continue;
9110: }
9111: if (buf[base] == '"') {
9112: quote = '"';
9113: continue;
9114: }
9115: if (buf[base] == '\'') {
9116: quote = '\'';
9117: continue;
9118: }
9119: if (buf[base] == ']') {
9120: if (base +1 >= in->buf->buffer->use)
9121: break;
9122: if (buf[base + 1] == ']') {
9123: /* conditional crap, skip both ']' ! */
9124: base++;
9125: continue;
9126: }
9127: for (i = 0;base + i < in->buf->buffer->use;i++) {
9128: if (buf[base + i] == '>')
9129: goto found_end_int_subset;
9130: }
9131: break;
9132: }
9133: }
9134: /*
9135: * We didn't found the end of the Internal subset
9136: */
9137: if (quote == 0)
9138: ctxt->checkIndex = base;
9139: #ifdef DEBUG_PUSH
9140: if (next == 0)
9141: fprintf(stderr, "PP: lookup of int subset end filed\n");
9142: #endif
9143: goto done;
9144:
9145: found_end_int_subset:
9146: xmlParseInternalSubset(ctxt);
1.166 daniel 9147: ctxt->inSubset = 2;
1.171 daniel 9148: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9149: (ctxt->sax->externalSubset != NULL))
9150: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9151: ctxt->extSubSystem, ctxt->extSubURI);
9152: ctxt->inSubset = 0;
1.140 daniel 9153: ctxt->instate = XML_PARSER_PROLOG;
9154: ctxt->checkIndex = 0;
9155: #ifdef DEBUG_PUSH
9156: fprintf(stderr, "PP: entering PROLOG\n");
9157: #endif
9158: break;
9159: }
9160: case XML_PARSER_COMMENT:
9161: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9162: ctxt->instate = XML_PARSER_CONTENT;
9163: #ifdef DEBUG_PUSH
9164: fprintf(stderr, "PP: entering CONTENT\n");
9165: #endif
9166: break;
9167: case XML_PARSER_PI:
9168: fprintf(stderr, "PP: internal error, state == PI\n");
9169: ctxt->instate = XML_PARSER_CONTENT;
9170: #ifdef DEBUG_PUSH
9171: fprintf(stderr, "PP: entering CONTENT\n");
9172: #endif
9173: break;
1.128 daniel 9174: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9175: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9176: ctxt->instate = XML_PARSER_DTD;
9177: #ifdef DEBUG_PUSH
9178: fprintf(stderr, "PP: entering DTD\n");
9179: #endif
9180: break;
1.128 daniel 9181: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9182: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9183: ctxt->instate = XML_PARSER_CONTENT;
9184: #ifdef DEBUG_PUSH
9185: fprintf(stderr, "PP: entering DTD\n");
9186: #endif
9187: break;
1.128 daniel 9188: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9189: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9190: ctxt->instate = XML_PARSER_START_TAG;
9191: #ifdef DEBUG_PUSH
9192: fprintf(stderr, "PP: entering START_TAG\n");
9193: #endif
9194: break;
9195: case XML_PARSER_SYSTEM_LITERAL:
9196: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9197: ctxt->instate = XML_PARSER_START_TAG;
9198: #ifdef DEBUG_PUSH
9199: fprintf(stderr, "PP: entering START_TAG\n");
9200: #endif
9201: break;
1.128 daniel 9202: }
9203: }
1.140 daniel 9204: done:
9205: #ifdef DEBUG_PUSH
9206: fprintf(stderr, "PP: done %d\n", ret);
9207: #endif
1.128 daniel 9208: return(ret);
9209: }
9210:
9211: /**
1.143 daniel 9212: * xmlParseTry:
9213: * @ctxt: an XML parser context
9214: *
9215: * Try to progress on parsing
9216: *
9217: * Returns zero if no parsing was possible
9218: */
9219: int
9220: xmlParseTry(xmlParserCtxtPtr ctxt) {
9221: return(xmlParseTryOrFinish(ctxt, 0));
9222: }
9223:
9224: /**
1.128 daniel 9225: * xmlParseChunk:
9226: * @ctxt: an XML parser context
9227: * @chunk: an char array
9228: * @size: the size in byte of the chunk
9229: * @terminate: last chunk indicator
9230: *
9231: * Parse a Chunk of memory
9232: *
9233: * Returns zero if no error, the xmlParserErrors otherwise.
9234: */
1.140 daniel 9235: int
1.128 daniel 9236: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9237: int terminate) {
1.132 daniel 9238: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9239: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9240: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9241: int cur = ctxt->input->cur - ctxt->input->base;
9242:
1.132 daniel 9243: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9244: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9245: ctxt->input->cur = ctxt->input->base + cur;
9246: #ifdef DEBUG_PUSH
9247: fprintf(stderr, "PP: pushed %d\n", size);
9248: #endif
9249:
1.150 daniel 9250: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9251: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9252: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9253: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9254: if (terminate) {
1.151 daniel 9255: /*
9256: * Grab the encoding if it was added on-the-fly
9257: */
9258: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
9259: (ctxt->myDoc->encoding == NULL)) {
9260: ctxt->myDoc->encoding = ctxt->encoding;
9261: ctxt->encoding = NULL;
9262: }
9263:
9264: /*
9265: * Check for termination
9266: */
1.140 daniel 9267: if ((ctxt->instate != XML_PARSER_EOF) &&
9268: (ctxt->instate != XML_PARSER_EPILOG)) {
9269: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9270: ctxt->sax->error(ctxt->userData,
9271: "Extra content at the end of the document\n");
9272: ctxt->wellFormed = 0;
1.180 daniel 9273: ctxt->disableSAX = 1;
1.140 daniel 9274: ctxt->errNo = XML_ERR_DOCUMENT_END;
9275: }
9276: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9277: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9278: (!ctxt->disableSAX))
1.140 daniel 9279: ctxt->sax->endDocument(ctxt->userData);
9280: }
9281: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9282: }
9283: return((xmlParserErrors) ctxt->errNo);
9284: }
9285:
9286: /************************************************************************
9287: * *
1.98 daniel 9288: * I/O front end functions to the parser *
9289: * *
9290: ************************************************************************/
9291:
1.50 daniel 9292: /**
1.181 ! daniel 9293: * xmlCreatePushParserCtxt:
1.140 daniel 9294: * @sax: a SAX handler
9295: * @user_data: The user data returned on SAX callbacks
9296: * @chunk: a pointer to an array of chars
9297: * @size: number of chars in the array
9298: * @filename: an optional file name or URI
9299: *
9300: * Create a parser context for using the XML parser in push mode
9301: * To allow content encoding detection, @size should be >= 4
9302: * The value of @filename is used for fetching external entities
9303: * and error/warning reports.
9304: *
9305: * Returns the new parser context or NULL
9306: */
9307: xmlParserCtxtPtr
9308: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9309: const char *chunk, int size, const char *filename) {
9310: xmlParserCtxtPtr ctxt;
9311: xmlParserInputPtr inputStream;
9312: xmlParserInputBufferPtr buf;
9313: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9314:
9315: /*
1.156 daniel 9316: * plug some encoding conversion routines
1.140 daniel 9317: */
9318: if ((chunk != NULL) && (size >= 4))
1.156 daniel 9319: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 9320:
9321: buf = xmlAllocParserInputBuffer(enc);
9322: if (buf == NULL) return(NULL);
9323:
9324: ctxt = xmlNewParserCtxt();
9325: if (ctxt == NULL) {
9326: xmlFree(buf);
9327: return(NULL);
9328: }
9329: if (sax != NULL) {
9330: if (ctxt->sax != &xmlDefaultSAXHandler)
9331: xmlFree(ctxt->sax);
9332: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9333: if (ctxt->sax == NULL) {
9334: xmlFree(buf);
9335: xmlFree(ctxt);
9336: return(NULL);
9337: }
9338: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9339: if (user_data != NULL)
9340: ctxt->userData = user_data;
9341: }
9342: if (filename == NULL) {
9343: ctxt->directory = NULL;
9344: } else {
9345: ctxt->directory = xmlParserGetDirectory(filename);
9346: }
9347:
9348: inputStream = xmlNewInputStream(ctxt);
9349: if (inputStream == NULL) {
9350: xmlFreeParserCtxt(ctxt);
9351: return(NULL);
9352: }
9353:
9354: if (filename == NULL)
9355: inputStream->filename = NULL;
9356: else
9357: inputStream->filename = xmlMemStrdup(filename);
9358: inputStream->buf = buf;
9359: inputStream->base = inputStream->buf->buffer->content;
9360: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 9361: if (enc != XML_CHAR_ENCODING_NONE) {
9362: xmlSwitchEncoding(ctxt, enc);
9363: }
1.140 daniel 9364:
9365: inputPush(ctxt, inputStream);
9366:
9367: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9368: (ctxt->input->buf != NULL)) {
9369: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9370: #ifdef DEBUG_PUSH
9371: fprintf(stderr, "PP: pushed %d\n", size);
9372: #endif
9373: }
9374:
9375: return(ctxt);
9376: }
9377:
9378: /**
1.181 ! daniel 9379: * xmlCreateDocParserCtxt:
1.123 daniel 9380: * @cur: a pointer to an array of xmlChar
1.50 daniel 9381: *
1.69 daniel 9382: * Create a parser context for an XML in-memory document.
9383: *
9384: * Returns the new parser context or NULL
1.16 daniel 9385: */
1.69 daniel 9386: xmlParserCtxtPtr
1.123 daniel 9387: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 9388: xmlParserCtxtPtr ctxt;
1.40 daniel 9389: xmlParserInputPtr input;
1.16 daniel 9390:
1.97 daniel 9391: ctxt = xmlNewParserCtxt();
1.16 daniel 9392: if (ctxt == NULL) {
9393: return(NULL);
9394: }
1.96 daniel 9395: input = xmlNewInputStream(ctxt);
1.40 daniel 9396: if (input == NULL) {
1.97 daniel 9397: xmlFreeParserCtxt(ctxt);
1.40 daniel 9398: return(NULL);
9399: }
9400:
9401: input->base = cur;
9402: input->cur = cur;
9403:
9404: inputPush(ctxt, input);
1.69 daniel 9405: return(ctxt);
9406: }
9407:
9408: /**
1.181 ! daniel 9409: * xmlSAXParseDoc:
1.69 daniel 9410: * @sax: the SAX handler block
1.123 daniel 9411: * @cur: a pointer to an array of xmlChar
1.69 daniel 9412: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9413: * documents
9414: *
9415: * parse an XML in-memory document and build a tree.
9416: * It use the given SAX function block to handle the parsing callback.
9417: * If sax is NULL, fallback to the default DOM tree building routines.
9418: *
9419: * Returns the resulting document tree
9420: */
9421:
9422: xmlDocPtr
1.123 daniel 9423: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 9424: xmlDocPtr ret;
9425: xmlParserCtxtPtr ctxt;
9426:
9427: if (cur == NULL) return(NULL);
1.16 daniel 9428:
9429:
1.69 daniel 9430: ctxt = xmlCreateDocParserCtxt(cur);
9431: if (ctxt == NULL) return(NULL);
1.74 daniel 9432: if (sax != NULL) {
9433: ctxt->sax = sax;
9434: ctxt->userData = NULL;
9435: }
1.69 daniel 9436:
1.16 daniel 9437: xmlParseDocument(ctxt);
1.72 daniel 9438: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9439: else {
9440: ret = NULL;
1.72 daniel 9441: xmlFreeDoc(ctxt->myDoc);
9442: ctxt->myDoc = NULL;
1.59 daniel 9443: }
1.86 daniel 9444: if (sax != NULL)
9445: ctxt->sax = NULL;
1.69 daniel 9446: xmlFreeParserCtxt(ctxt);
1.16 daniel 9447:
1.1 veillard 9448: return(ret);
9449: }
9450:
1.50 daniel 9451: /**
1.181 ! daniel 9452: * xmlParseDoc:
1.123 daniel 9453: * @cur: a pointer to an array of xmlChar
1.55 daniel 9454: *
9455: * parse an XML in-memory document and build a tree.
9456: *
1.68 daniel 9457: * Returns the resulting document tree
1.55 daniel 9458: */
9459:
1.69 daniel 9460: xmlDocPtr
1.123 daniel 9461: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9462: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9463: }
9464:
9465: /**
1.181 ! daniel 9466: * xmlSAXParseDTD:
1.76 daniel 9467: * @sax: the SAX handler block
9468: * @ExternalID: a NAME* containing the External ID of the DTD
9469: * @SystemID: a NAME* containing the URL to the DTD
9470: *
9471: * Load and parse an external subset.
9472: *
9473: * Returns the resulting xmlDtdPtr or NULL in case of error.
9474: */
9475:
9476: xmlDtdPtr
1.123 daniel 9477: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9478: const xmlChar *SystemID) {
1.76 daniel 9479: xmlDtdPtr ret = NULL;
9480: xmlParserCtxtPtr ctxt;
1.83 daniel 9481: xmlParserInputPtr input = NULL;
1.76 daniel 9482: xmlCharEncoding enc;
9483:
9484: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9485:
1.97 daniel 9486: ctxt = xmlNewParserCtxt();
1.76 daniel 9487: if (ctxt == NULL) {
9488: return(NULL);
9489: }
9490:
9491: /*
9492: * Set-up the SAX context
9493: */
9494: if (ctxt == NULL) return(NULL);
9495: if (sax != NULL) {
1.93 veillard 9496: if (ctxt->sax != NULL)
1.119 daniel 9497: xmlFree(ctxt->sax);
1.76 daniel 9498: ctxt->sax = sax;
9499: ctxt->userData = NULL;
9500: }
9501:
9502: /*
9503: * Ask the Entity resolver to load the damn thing
9504: */
9505:
9506: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9507: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9508: if (input == NULL) {
1.86 daniel 9509: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9510: xmlFreeParserCtxt(ctxt);
9511: return(NULL);
9512: }
9513:
9514: /*
1.156 daniel 9515: * plug some encoding conversion routines here.
1.76 daniel 9516: */
9517: xmlPushInput(ctxt, input);
1.156 daniel 9518: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9519: xmlSwitchEncoding(ctxt, enc);
9520:
1.95 veillard 9521: if (input->filename == NULL)
1.156 daniel 9522: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9523: input->line = 1;
9524: input->col = 1;
9525: input->base = ctxt->input->cur;
9526: input->cur = ctxt->input->cur;
9527: input->free = NULL;
9528:
9529: /*
9530: * let's parse that entity knowing it's an external subset.
9531: */
1.79 daniel 9532: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9533:
9534: if (ctxt->myDoc != NULL) {
9535: if (ctxt->wellFormed) {
9536: ret = ctxt->myDoc->intSubset;
9537: ctxt->myDoc->intSubset = NULL;
9538: } else {
9539: ret = NULL;
9540: }
9541: xmlFreeDoc(ctxt->myDoc);
9542: ctxt->myDoc = NULL;
9543: }
1.86 daniel 9544: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9545: xmlFreeParserCtxt(ctxt);
9546:
9547: return(ret);
9548: }
9549:
9550: /**
1.181 ! daniel 9551: * xmlParseDTD:
1.76 daniel 9552: * @ExternalID: a NAME* containing the External ID of the DTD
9553: * @SystemID: a NAME* containing the URL to the DTD
9554: *
9555: * Load and parse an external subset.
9556: *
9557: * Returns the resulting xmlDtdPtr or NULL in case of error.
9558: */
9559:
9560: xmlDtdPtr
1.123 daniel 9561: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9562: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9563: }
9564:
9565: /**
1.181 ! daniel 9566: * xmlSAXParseBalancedChunk:
1.144 daniel 9567: * @ctx: an XML parser context (possibly NULL)
9568: * @sax: the SAX handler bloc (possibly NULL)
9569: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9570: * @input: a parser input stream
9571: * @enc: the encoding
9572: *
9573: * Parse a well-balanced chunk of an XML document
9574: * The user has to provide SAX callback block whose routines will be
9575: * called by the parser
9576: * The allowed sequence for the Well Balanced Chunk is the one defined by
9577: * the content production in the XML grammar:
9578: *
9579: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9580: *
1.176 daniel 9581: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 9582: * the error code otherwise
9583: */
9584:
9585: int
9586: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
9587: void *user_data, xmlParserInputPtr input,
9588: xmlCharEncoding enc) {
9589: xmlParserCtxtPtr ctxt;
9590: int ret;
9591:
9592: if (input == NULL) return(-1);
9593:
9594: if (ctx != NULL)
9595: ctxt = ctx;
9596: else {
9597: ctxt = xmlNewParserCtxt();
9598: if (ctxt == NULL)
9599: return(-1);
9600: if (sax == NULL)
9601: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9602: }
9603:
9604: /*
9605: * Set-up the SAX context
9606: */
9607: if (sax != NULL) {
9608: if (ctxt->sax != NULL)
9609: xmlFree(ctxt->sax);
9610: ctxt->sax = sax;
9611: ctxt->userData = user_data;
9612: }
9613:
9614: /*
9615: * plug some encoding conversion routines here.
9616: */
9617: xmlPushInput(ctxt, input);
9618: if (enc != XML_CHAR_ENCODING_NONE)
9619: xmlSwitchEncoding(ctxt, enc);
9620:
9621: /*
9622: * let's parse that entity knowing it's an external subset.
9623: */
9624: xmlParseContent(ctxt);
9625: ret = ctxt->errNo;
9626:
9627: if (ctx == NULL) {
9628: if (sax != NULL)
9629: ctxt->sax = NULL;
9630: else
9631: xmlFreeDoc(ctxt->myDoc);
9632: xmlFreeParserCtxt(ctxt);
9633: }
9634: return(ret);
9635: }
9636:
9637: /**
1.181 ! daniel 9638: * xmlParseExternalEntity:
! 9639: * @doc: the document the chunk pertains to
! 9640: * @sax: the SAX handler bloc (possibly NULL)
! 9641: * @user_data: The user data returned on SAX callbacks (possibly NULL)
! 9642: * @URL: the URL for the entity to load
! 9643: * @ID: the System ID for the entity to load
! 9644: * @list: the return value for the set of parsed nodes
! 9645: *
! 9646: * Parse an external general entity
! 9647: * An external general parsed entity is well-formed if it matches the
! 9648: * production labeled extParsedEnt.
! 9649: *
! 9650: * [78] extParsedEnt ::= TextDecl? content
! 9651: *
! 9652: * Returns 0 if the entity is well formed, -1 in case of args problem and
! 9653: * the parser error code otherwise
! 9654: */
! 9655:
! 9656: int
! 9657: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
! 9658: const char *URL, const char *ID, xmlNodePtr *list) {
! 9659: xmlParserCtxtPtr ctxt;
! 9660: xmlDocPtr newDoc;
! 9661: xmlSAXHandlerPtr oldsax = NULL;
! 9662: int ret = 0;
! 9663:
! 9664:
! 9665: if (list != NULL)
! 9666: *list = NULL;
! 9667: if ((URL == NULL) && (ID == NULL))
! 9668: return(-1);
! 9669:
! 9670:
! 9671: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
! 9672: if (ctxt == NULL) return(-1);
! 9673: ctxt->userData = ctxt;
! 9674: if (sax != NULL) {
! 9675: oldsax = ctxt->sax;
! 9676: ctxt->sax = sax;
! 9677: if (user_data != NULL)
! 9678: ctxt->userData = user_data;
! 9679: }
! 9680: newDoc = xmlNewDoc(BAD_CAST "1.0");
! 9681: if (newDoc == NULL) {
! 9682: xmlFreeParserCtxt(ctxt);
! 9683: return(-1);
! 9684: }
! 9685: if (doc != NULL) {
! 9686: newDoc->intSubset = doc->intSubset;
! 9687: newDoc->extSubset = doc->extSubset;
! 9688: }
! 9689: if (doc->URL != NULL) {
! 9690: newDoc->URL = xmlStrdup(doc->URL);
! 9691: }
! 9692: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
! 9693: if (newDoc->children == NULL) {
! 9694: if (sax != NULL)
! 9695: ctxt->sax = oldsax;
! 9696: xmlFreeParserCtxt(ctxt);
! 9697: newDoc->intSubset = NULL;
! 9698: newDoc->extSubset = NULL;
! 9699: xmlFreeDoc(newDoc);
! 9700: return(-1);
! 9701: }
! 9702: nodePush(ctxt, newDoc->children);
! 9703: if (doc == NULL) {
! 9704: ctxt->myDoc = newDoc;
! 9705: } else {
! 9706: ctxt->myDoc = doc;
! 9707: newDoc->children->doc = doc;
! 9708: }
! 9709:
! 9710: /*
! 9711: * Parse a possible text declaration first
! 9712: */
! 9713: GROW;
! 9714: if ((RAW == '<') && (NXT(1) == '?') &&
! 9715: (NXT(2) == 'x') && (NXT(3) == 'm') &&
! 9716: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
! 9717: xmlParseTextDecl(ctxt);
! 9718: }
! 9719:
! 9720: /*
! 9721: * Doing validity checking on chunk doesn't make sense
! 9722: */
! 9723: ctxt->instate = XML_PARSER_CONTENT;
! 9724: ctxt->validate = 0;
! 9725:
! 9726: xmlParseContent(ctxt);
! 9727:
! 9728: if ((RAW == '<') && (NXT(1) == '/')) {
! 9729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 9730: ctxt->sax->error(ctxt->userData,
! 9731: "chunk is not well balanced\n");
! 9732: ctxt->wellFormed = 0;
! 9733: ctxt->disableSAX = 1;
! 9734: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
! 9735: } else if (RAW != 0) {
! 9736: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 9737: ctxt->sax->error(ctxt->userData,
! 9738: "extra content at the end of well balanced chunk\n");
! 9739: ctxt->wellFormed = 0;
! 9740: ctxt->disableSAX = 1;
! 9741: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
! 9742: }
! 9743: if (ctxt->node != newDoc->children) {
! 9744: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 9745: ctxt->sax->error(ctxt->userData,
! 9746: "chunk is not well balanced\n");
! 9747: ctxt->wellFormed = 0;
! 9748: ctxt->disableSAX = 1;
! 9749: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
! 9750: }
! 9751:
! 9752: if (!ctxt->wellFormed) {
! 9753: if (ctxt->errNo == 0)
! 9754: ret = 1;
! 9755: else
! 9756: ret = ctxt->errNo;
! 9757: } else {
! 9758: if (list != NULL) {
! 9759: xmlNodePtr cur;
! 9760:
! 9761: /*
! 9762: * Return the newly created nodeset after unlinking it from
! 9763: * they pseudo parent.
! 9764: */
! 9765: cur = newDoc->children->children;
! 9766: *list = cur;
! 9767: while (cur != NULL) {
! 9768: cur->parent = NULL;
! 9769: cur = cur->next;
! 9770: }
! 9771: newDoc->children->children = NULL;
! 9772: }
! 9773: ret = 0;
! 9774: }
! 9775: if (sax != NULL)
! 9776: ctxt->sax = oldsax;
! 9777: xmlFreeParserCtxt(ctxt);
! 9778: newDoc->intSubset = NULL;
! 9779: newDoc->extSubset = NULL;
! 9780: xmlFreeDoc(newDoc);
! 9781:
! 9782: return(ret);
! 9783: }
! 9784:
! 9785: /**
! 9786: * xmlParseBalancedChunk:
1.176 daniel 9787: * @doc: the document the chunk pertains to
9788: * @sax: the SAX handler bloc (possibly NULL)
9789: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9790: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9791: * @list: the return value for the set of parsed nodes
9792: *
9793: * Parse a well-balanced chunk of an XML document
9794: * called by the parser
9795: * The allowed sequence for the Well Balanced Chunk is the one defined by
9796: * the content production in the XML grammar:
1.144 daniel 9797: *
1.175 daniel 9798: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9799: *
1.176 daniel 9800: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9801: * the parser error code otherwise
1.144 daniel 9802: */
9803:
1.175 daniel 9804: int
9805: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.176 daniel 9806: void *user_data, const xmlChar *string, xmlNodePtr *list) {
9807: xmlParserCtxtPtr ctxt;
1.175 daniel 9808: xmlDocPtr newDoc;
1.181 ! daniel 9809: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 9810: int size;
1.176 daniel 9811: int ret = 0;
1.175 daniel 9812:
9813:
1.176 daniel 9814: if (list != NULL)
9815: *list = NULL;
9816: if (string == NULL)
9817: return(-1);
9818:
9819: size = xmlStrlen(string);
9820:
9821: ctxt = xmlCreateMemoryParserCtxt((char *) string, size + 1);
9822: if (ctxt == NULL) return(-1);
9823: ctxt->userData = ctxt;
1.175 daniel 9824: if (sax != NULL) {
1.176 daniel 9825: oldsax = ctxt->sax;
9826: ctxt->sax = sax;
9827: if (user_data != NULL)
9828: ctxt->userData = user_data;
1.175 daniel 9829: }
9830: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 9831: if (newDoc == NULL) {
9832: xmlFreeParserCtxt(ctxt);
9833: return(-1);
9834: }
1.175 daniel 9835: if (doc != NULL) {
9836: newDoc->intSubset = doc->intSubset;
9837: newDoc->extSubset = doc->extSubset;
9838: }
1.176 daniel 9839: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9840: if (newDoc->children == NULL) {
9841: if (sax != NULL)
9842: ctxt->sax = oldsax;
9843: xmlFreeParserCtxt(ctxt);
9844: newDoc->intSubset = NULL;
9845: newDoc->extSubset = NULL;
9846: xmlFreeDoc(newDoc);
9847: return(-1);
9848: }
9849: nodePush(ctxt, newDoc->children);
9850: if (doc == NULL) {
9851: ctxt->myDoc = newDoc;
9852: } else {
9853: ctxt->myDoc = doc;
9854: newDoc->children->doc = doc;
9855: }
9856: ctxt->instate = XML_PARSER_CONTENT;
9857:
9858: /*
9859: * Doing validity checking on chunk doesn't make sense
9860: */
9861: ctxt->validate = 0;
9862:
1.175 daniel 9863: xmlParseContent(ctxt);
1.176 daniel 9864:
9865: if ((RAW == '<') && (NXT(1) == '/')) {
9866: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9867: ctxt->sax->error(ctxt->userData,
9868: "chunk is not well balanced\n");
9869: ctxt->wellFormed = 0;
1.180 daniel 9870: ctxt->disableSAX = 1;
1.176 daniel 9871: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9872: } else if (RAW != 0) {
9873: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9874: ctxt->sax->error(ctxt->userData,
9875: "extra content at the end of well balanced chunk\n");
9876: ctxt->wellFormed = 0;
1.180 daniel 9877: ctxt->disableSAX = 1;
1.176 daniel 9878: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9879: }
9880: if (ctxt->node != newDoc->children) {
9881: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9882: ctxt->sax->error(ctxt->userData,
9883: "chunk is not well balanced\n");
9884: ctxt->wellFormed = 0;
1.180 daniel 9885: ctxt->disableSAX = 1;
1.176 daniel 9886: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9887: }
1.175 daniel 9888:
1.176 daniel 9889: if (!ctxt->wellFormed) {
9890: if (ctxt->errNo == 0)
9891: ret = 1;
9892: else
9893: ret = ctxt->errNo;
9894: } else {
9895: if (list != NULL) {
9896: xmlNodePtr cur;
1.175 daniel 9897:
1.176 daniel 9898: /*
9899: * Return the newly created nodeset after unlinking it from
9900: * they pseudo parent.
9901: */
9902: cur = newDoc->children->children;
9903: *list = cur;
9904: while (cur != NULL) {
9905: cur->parent = NULL;
9906: cur = cur->next;
9907: }
9908: newDoc->children->children = NULL;
9909: }
9910: ret = 0;
1.175 daniel 9911: }
1.176 daniel 9912: if (sax != NULL)
9913: ctxt->sax = oldsax;
1.175 daniel 9914: xmlFreeParserCtxt(ctxt);
9915: newDoc->intSubset = NULL;
9916: newDoc->extSubset = NULL;
1.176 daniel 9917: xmlFreeDoc(newDoc);
1.175 daniel 9918:
1.176 daniel 9919: return(ret);
1.144 daniel 9920: }
9921:
9922: /**
1.181 ! daniel 9923: * xmlParseBalancedChunkFile:
1.144 daniel 9924: * @doc: the document the chunk pertains to
9925: *
9926: * Parse a well-balanced chunk of an XML document contained in a file
9927: *
9928: * Returns the resulting list of nodes resulting from the parsing,
9929: * they are not added to @node
9930: */
9931:
9932: xmlNodePtr
9933: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 9934: /* TODO !!! */
9935: return(NULL);
1.144 daniel 9936: }
9937:
9938: /**
1.181 ! daniel 9939: * xmlRecoverDoc:
1.123 daniel 9940: * @cur: a pointer to an array of xmlChar
1.59 daniel 9941: *
9942: * parse an XML in-memory document and build a tree.
9943: * In the case the document is not Well Formed, a tree is built anyway
9944: *
1.68 daniel 9945: * Returns the resulting document tree
1.59 daniel 9946: */
9947:
1.69 daniel 9948: xmlDocPtr
1.123 daniel 9949: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 9950: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 9951: }
9952:
9953: /**
1.181 ! daniel 9954: * xmlCreateEntityParserCtxt:
! 9955: * @URL: the entity URL
! 9956: * @ID: the entity PUBLIC ID
! 9957: * @base: a posible base for the target URI
! 9958: *
! 9959: * Create a parser context for an external entity
! 9960: * Automatic support for ZLIB/Compress compressed document is provided
! 9961: * by default if found at compile-time.
! 9962: *
! 9963: * Returns the new parser context or NULL
! 9964: */
! 9965: xmlParserCtxtPtr
! 9966: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
! 9967: const xmlChar *base) {
! 9968: xmlParserCtxtPtr ctxt;
! 9969: xmlParserInputPtr inputStream;
! 9970: char *directory = NULL;
! 9971:
! 9972: ctxt = xmlNewParserCtxt();
! 9973: if (ctxt == NULL) {
! 9974: return(NULL);
! 9975: }
! 9976: ctxt->directory = xmlParserGetDirectory
! 9977:
! 9978: inputStream = xmlLoadExternalEntity(URL, ID, ctxt);
! 9979: if (inputStream == NULL) {
! 9980: xmlFreeParserCtxt(ctxt);
! 9981: return(NULL);
! 9982: }
! 9983:
! 9984: inputPush(ctxt, inputStream);
! 9985:
! 9986: if ((ctxt->directory == NULL) && (directory == NULL))
! 9987: directory = xmlParserGetDirectory(URL);
! 9988: if ((ctxt->directory == NULL) && (directory != NULL))
! 9989: ctxt->directory = directory;
! 9990:
! 9991: return(ctxt);
! 9992: }
! 9993:
! 9994: /**
! 9995: * xmlCreateFileParserCtxt:
1.50 daniel 9996: * @filename: the filename
9997: *
1.69 daniel 9998: * Create a parser context for a file content.
9999: * Automatic support for ZLIB/Compress compressed document is provided
10000: * by default if found at compile-time.
1.50 daniel 10001: *
1.69 daniel 10002: * Returns the new parser context or NULL
1.9 httpng 10003: */
1.69 daniel 10004: xmlParserCtxtPtr
10005: xmlCreateFileParserCtxt(const char *filename)
10006: {
10007: xmlParserCtxtPtr ctxt;
1.40 daniel 10008: xmlParserInputPtr inputStream;
1.91 daniel 10009: xmlParserInputBufferPtr buf;
1.111 daniel 10010: char *directory = NULL;
1.9 httpng 10011:
1.91 daniel 10012: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10013: if (buf == NULL) return(NULL);
1.9 httpng 10014:
1.97 daniel 10015: ctxt = xmlNewParserCtxt();
1.16 daniel 10016: if (ctxt == NULL) {
10017: return(NULL);
10018: }
1.97 daniel 10019:
1.96 daniel 10020: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10021: if (inputStream == NULL) {
1.97 daniel 10022: xmlFreeParserCtxt(ctxt);
1.40 daniel 10023: return(NULL);
10024: }
10025:
1.119 daniel 10026: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10027: inputStream->buf = buf;
10028: inputStream->base = inputStream->buf->buffer->content;
10029: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10030:
1.40 daniel 10031: inputPush(ctxt, inputStream);
1.110 daniel 10032: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10033: directory = xmlParserGetDirectory(filename);
10034: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10035: ctxt->directory = directory;
1.106 daniel 10036:
1.69 daniel 10037: return(ctxt);
10038: }
10039:
10040: /**
1.181 ! daniel 10041: * xmlSAXParseFile:
1.69 daniel 10042: * @sax: the SAX handler block
10043: * @filename: the filename
10044: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10045: * documents
10046: *
10047: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10048: * compressed document is provided by default if found at compile-time.
10049: * It use the given SAX function block to handle the parsing callback.
10050: * If sax is NULL, fallback to the default DOM tree building routines.
10051: *
10052: * Returns the resulting document tree
10053: */
10054:
1.79 daniel 10055: xmlDocPtr
10056: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10057: int recovery) {
10058: xmlDocPtr ret;
10059: xmlParserCtxtPtr ctxt;
1.111 daniel 10060: char *directory = NULL;
1.69 daniel 10061:
10062: ctxt = xmlCreateFileParserCtxt(filename);
10063: if (ctxt == NULL) return(NULL);
1.74 daniel 10064: if (sax != NULL) {
1.93 veillard 10065: if (ctxt->sax != NULL)
1.119 daniel 10066: xmlFree(ctxt->sax);
1.74 daniel 10067: ctxt->sax = sax;
10068: ctxt->userData = NULL;
10069: }
1.106 daniel 10070:
1.110 daniel 10071: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10072: directory = xmlParserGetDirectory(filename);
10073: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 10074: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 10075:
10076: xmlParseDocument(ctxt);
1.40 daniel 10077:
1.72 daniel 10078: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10079: else {
10080: ret = NULL;
1.72 daniel 10081: xmlFreeDoc(ctxt->myDoc);
10082: ctxt->myDoc = NULL;
1.59 daniel 10083: }
1.86 daniel 10084: if (sax != NULL)
10085: ctxt->sax = NULL;
1.69 daniel 10086: xmlFreeParserCtxt(ctxt);
1.20 daniel 10087:
10088: return(ret);
10089: }
10090:
1.55 daniel 10091: /**
1.181 ! daniel 10092: * xmlParseFile:
1.55 daniel 10093: * @filename: the filename
10094: *
10095: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10096: * compressed document is provided by default if found at compile-time.
10097: *
1.68 daniel 10098: * Returns the resulting document tree
1.55 daniel 10099: */
10100:
1.79 daniel 10101: xmlDocPtr
10102: xmlParseFile(const char *filename) {
1.59 daniel 10103: return(xmlSAXParseFile(NULL, filename, 0));
10104: }
10105:
10106: /**
1.181 ! daniel 10107: * xmlRecoverFile:
1.59 daniel 10108: * @filename: the filename
10109: *
10110: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10111: * compressed document is provided by default if found at compile-time.
10112: * In the case the document is not Well Formed, a tree is built anyway
10113: *
1.68 daniel 10114: * Returns the resulting document tree
1.59 daniel 10115: */
10116:
1.79 daniel 10117: xmlDocPtr
10118: xmlRecoverFile(const char *filename) {
1.59 daniel 10119: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 10120: }
1.32 daniel 10121:
1.50 daniel 10122: /**
1.181 ! daniel 10123: * xmlCreateMemoryParserCtxt:
! 10124: * @buffer: a pointer to a zero terminated char array
! 10125: * @size: the size of the array (without the trailing 0)
1.50 daniel 10126: *
1.69 daniel 10127: * Create a parser context for an XML in-memory document.
1.50 daniel 10128: *
1.69 daniel 10129: * Returns the new parser context or NULL
1.20 daniel 10130: */
1.69 daniel 10131: xmlParserCtxtPtr
10132: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 10133: xmlParserCtxtPtr ctxt;
1.40 daniel 10134: xmlParserInputPtr input;
10135:
1.179 daniel 10136: if (buffer[size] != 0)
1.181 ! daniel 10137: return(NULL);
1.40 daniel 10138:
1.97 daniel 10139: ctxt = xmlNewParserCtxt();
1.181 ! daniel 10140: if (ctxt == NULL)
1.20 daniel 10141: return(NULL);
1.97 daniel 10142:
1.96 daniel 10143: input = xmlNewInputStream(ctxt);
1.40 daniel 10144: if (input == NULL) {
1.97 daniel 10145: xmlFreeParserCtxt(ctxt);
1.40 daniel 10146: return(NULL);
10147: }
1.20 daniel 10148:
1.40 daniel 10149: input->filename = NULL;
10150: input->line = 1;
10151: input->col = 1;
1.96 daniel 10152: input->buf = NULL;
1.91 daniel 10153: input->consumed = 0;
1.75 daniel 10154:
1.116 daniel 10155: input->base = BAD_CAST buffer;
10156: input->cur = BAD_CAST buffer;
1.69 daniel 10157: input->free = NULL;
1.20 daniel 10158:
1.40 daniel 10159: inputPush(ctxt, input);
1.69 daniel 10160: return(ctxt);
10161: }
10162:
10163: /**
1.181 ! daniel 10164: * xmlSAXParseMemory:
1.69 daniel 10165: * @sax: the SAX handler block
10166: * @buffer: an pointer to a char array
1.127 daniel 10167: * @size: the size of the array
10168: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 10169: * documents
10170: *
10171: * parse an XML in-memory block and use the given SAX function block
10172: * to handle the parsing callback. If sax is NULL, fallback to the default
10173: * DOM tree building routines.
10174: *
10175: * Returns the resulting document tree
10176: */
10177: xmlDocPtr
10178: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
10179: xmlDocPtr ret;
10180: xmlParserCtxtPtr ctxt;
10181:
10182: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10183: if (ctxt == NULL) return(NULL);
1.74 daniel 10184: if (sax != NULL) {
10185: ctxt->sax = sax;
10186: ctxt->userData = NULL;
10187: }
1.20 daniel 10188:
10189: xmlParseDocument(ctxt);
1.40 daniel 10190:
1.72 daniel 10191: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10192: else {
10193: ret = NULL;
1.72 daniel 10194: xmlFreeDoc(ctxt->myDoc);
10195: ctxt->myDoc = NULL;
1.59 daniel 10196: }
1.86 daniel 10197: if (sax != NULL)
10198: ctxt->sax = NULL;
1.69 daniel 10199: xmlFreeParserCtxt(ctxt);
1.16 daniel 10200:
1.9 httpng 10201: return(ret);
1.17 daniel 10202: }
10203:
1.55 daniel 10204: /**
1.181 ! daniel 10205: * xmlParseMemory:
1.68 daniel 10206: * @buffer: an pointer to a char array
1.55 daniel 10207: * @size: the size of the array
10208: *
10209: * parse an XML in-memory block and build a tree.
10210: *
1.68 daniel 10211: * Returns the resulting document tree
1.55 daniel 10212: */
10213:
10214: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 10215: return(xmlSAXParseMemory(NULL, buffer, size, 0));
10216: }
10217:
10218: /**
1.181 ! daniel 10219: * xmlRecoverMemory:
1.68 daniel 10220: * @buffer: an pointer to a char array
1.59 daniel 10221: * @size: the size of the array
10222: *
10223: * parse an XML in-memory block and build a tree.
10224: * In the case the document is not Well Formed, a tree is built anyway
10225: *
1.68 daniel 10226: * Returns the resulting document tree
1.59 daniel 10227: */
10228:
10229: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
10230: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 10231: }
10232:
10233:
1.50 daniel 10234: /**
10235: * xmlSetupParserForBuffer:
10236: * @ctxt: an XML parser context
1.123 daniel 10237: * @buffer: a xmlChar * buffer
1.50 daniel 10238: * @filename: a file name
10239: *
1.19 daniel 10240: * Setup the parser context to parse a new buffer; Clears any prior
10241: * contents from the parser context. The buffer parameter must not be
10242: * NULL, but the filename parameter can be
10243: */
1.55 daniel 10244: void
1.123 daniel 10245: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 10246: const char* filename)
10247: {
1.96 daniel 10248: xmlParserInputPtr input;
1.40 daniel 10249:
1.96 daniel 10250: input = xmlNewInputStream(ctxt);
10251: if (input == NULL) {
10252: perror("malloc");
1.119 daniel 10253: xmlFree(ctxt);
1.145 daniel 10254: return;
1.96 daniel 10255: }
10256:
10257: xmlClearParserCtxt(ctxt);
10258: if (filename != NULL)
1.119 daniel 10259: input->filename = xmlMemStrdup(filename);
1.96 daniel 10260: input->base = buffer;
10261: input->cur = buffer;
10262: inputPush(ctxt, input);
1.17 daniel 10263: }
10264:
1.123 daniel 10265: /**
10266: * xmlSAXUserParseFile:
10267: * @sax: a SAX handler
10268: * @user_data: The user data returned on SAX callbacks
10269: * @filename: a file name
10270: *
10271: * parse an XML file and call the given SAX handler routines.
10272: * Automatic support for ZLIB/Compress compressed document is provided
10273: *
10274: * Returns 0 in case of success or a error number otherwise
10275: */
1.131 daniel 10276: int
10277: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10278: const char *filename) {
1.123 daniel 10279: int ret = 0;
10280: xmlParserCtxtPtr ctxt;
10281:
10282: ctxt = xmlCreateFileParserCtxt(filename);
10283: if (ctxt == NULL) return -1;
1.134 daniel 10284: if (ctxt->sax != &xmlDefaultSAXHandler)
10285: xmlFree(ctxt->sax);
1.123 daniel 10286: ctxt->sax = sax;
1.140 daniel 10287: if (user_data != NULL)
10288: ctxt->userData = user_data;
1.123 daniel 10289:
10290: xmlParseDocument(ctxt);
10291:
10292: if (ctxt->wellFormed)
10293: ret = 0;
10294: else {
10295: if (ctxt->errNo != 0)
10296: ret = ctxt->errNo;
10297: else
10298: ret = -1;
10299: }
10300: if (sax != NULL)
10301: ctxt->sax = NULL;
10302: xmlFreeParserCtxt(ctxt);
10303:
10304: return ret;
10305: }
10306:
10307: /**
10308: * xmlSAXUserParseMemory:
10309: * @sax: a SAX handler
10310: * @user_data: The user data returned on SAX callbacks
10311: * @buffer: an in-memory XML document input
1.127 daniel 10312: * @size: the length of the XML document in bytes
1.123 daniel 10313: *
10314: * A better SAX parsing routine.
10315: * parse an XML in-memory buffer and call the given SAX handler routines.
10316: *
10317: * Returns 0 in case of success or a error number otherwise
10318: */
10319: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10320: char *buffer, int size) {
10321: int ret = 0;
10322: xmlParserCtxtPtr ctxt;
10323:
10324: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10325: if (ctxt == NULL) return -1;
10326: ctxt->sax = sax;
10327: ctxt->userData = user_data;
10328:
10329: xmlParseDocument(ctxt);
10330:
10331: if (ctxt->wellFormed)
10332: ret = 0;
10333: else {
10334: if (ctxt->errNo != 0)
10335: ret = ctxt->errNo;
10336: else
10337: ret = -1;
10338: }
10339: if (sax != NULL)
10340: ctxt->sax = NULL;
10341: xmlFreeParserCtxt(ctxt);
10342:
10343: return ret;
10344: }
10345:
1.32 daniel 10346:
1.98 daniel 10347: /************************************************************************
10348: * *
1.127 daniel 10349: * Miscellaneous *
1.98 daniel 10350: * *
10351: ************************************************************************/
10352:
1.132 daniel 10353: /**
10354: * xmlCleanupParser:
10355: *
10356: * Cleanup function for the XML parser. It tries to reclaim all
10357: * parsing related global memory allocated for the parser processing.
10358: * It doesn't deallocate any document related memory. Calling this
10359: * function should not prevent reusing the parser.
10360: */
10361:
10362: void
10363: xmlCleanupParser(void) {
10364: xmlCleanupCharEncodingHandlers();
1.133 daniel 10365: xmlCleanupPredefinedEntities();
1.132 daniel 10366: }
1.98 daniel 10367:
1.50 daniel 10368: /**
10369: * xmlParserFindNodeInfo:
10370: * @ctxt: an XML parser context
10371: * @node: an XML node within the tree
10372: *
10373: * Find the parser node info struct for a given node
10374: *
1.68 daniel 10375: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 10376: */
10377: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
10378: const xmlNode* node)
10379: {
10380: unsigned long pos;
10381:
10382: /* Find position where node should be at */
10383: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
10384: if ( ctx->node_seq.buffer[pos].node == node )
10385: return &ctx->node_seq.buffer[pos];
10386: else
10387: return NULL;
10388: }
10389:
10390:
1.50 daniel 10391: /**
1.181 ! daniel 10392: * xmlInitNodeInfoSeq:
1.50 daniel 10393: * @seq: a node info sequence pointer
10394: *
10395: * -- Initialize (set to initial state) node info sequence
1.32 daniel 10396: */
1.55 daniel 10397: void
10398: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10399: {
10400: seq->length = 0;
10401: seq->maximum = 0;
10402: seq->buffer = NULL;
10403: }
10404:
1.50 daniel 10405: /**
1.181 ! daniel 10406: * xmlClearNodeInfoSeq:
1.50 daniel 10407: * @seq: a node info sequence pointer
10408: *
10409: * -- Clear (release memory and reinitialize) node
1.32 daniel 10410: * info sequence
10411: */
1.55 daniel 10412: void
10413: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10414: {
10415: if ( seq->buffer != NULL )
1.119 daniel 10416: xmlFree(seq->buffer);
1.32 daniel 10417: xmlInitNodeInfoSeq(seq);
10418: }
10419:
10420:
1.50 daniel 10421: /**
10422: * xmlParserFindNodeInfoIndex:
10423: * @seq: a node info sequence pointer
10424: * @node: an XML node pointer
10425: *
10426: *
1.32 daniel 10427: * xmlParserFindNodeInfoIndex : Find the index that the info record for
10428: * the given node is or should be at in a sorted sequence
1.68 daniel 10429: *
10430: * Returns a long indicating the position of the record
1.32 daniel 10431: */
10432: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
10433: const xmlNode* node)
10434: {
10435: unsigned long upper, lower, middle;
10436: int found = 0;
10437:
10438: /* Do a binary search for the key */
10439: lower = 1;
10440: upper = seq->length;
10441: middle = 0;
10442: while ( lower <= upper && !found) {
10443: middle = lower + (upper - lower) / 2;
10444: if ( node == seq->buffer[middle - 1].node )
10445: found = 1;
10446: else if ( node < seq->buffer[middle - 1].node )
10447: upper = middle - 1;
10448: else
10449: lower = middle + 1;
10450: }
10451:
10452: /* Return position */
10453: if ( middle == 0 || seq->buffer[middle - 1].node < node )
10454: return middle;
10455: else
10456: return middle - 1;
10457: }
10458:
10459:
1.50 daniel 10460: /**
10461: * xmlParserAddNodeInfo:
10462: * @ctxt: an XML parser context
1.68 daniel 10463: * @info: a node info sequence pointer
1.50 daniel 10464: *
10465: * Insert node info record into the sorted sequence
1.32 daniel 10466: */
1.55 daniel 10467: void
10468: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 10469: const xmlParserNodeInfo* info)
1.32 daniel 10470: {
10471: unsigned long pos;
10472: static unsigned int block_size = 5;
10473:
10474: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 10475: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
10476: if ( pos < ctxt->node_seq.length
10477: && ctxt->node_seq.buffer[pos].node == info->node ) {
10478: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 10479: }
10480:
10481: /* Otherwise, we need to add new node to buffer */
10482: else {
10483: /* Expand buffer by 5 if needed */
1.55 daniel 10484: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 10485: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 10486: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
10487: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 10488:
1.55 daniel 10489: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 10490: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 10491: else
1.119 daniel 10492: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 10493:
10494: if ( tmp_buffer == NULL ) {
1.55 daniel 10495: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 10496: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 10497: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 10498: return;
10499: }
1.55 daniel 10500: ctxt->node_seq.buffer = tmp_buffer;
10501: ctxt->node_seq.maximum += block_size;
1.32 daniel 10502: }
10503:
10504: /* If position is not at end, move elements out of the way */
1.55 daniel 10505: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 10506: unsigned long i;
10507:
1.55 daniel 10508: for ( i = ctxt->node_seq.length; i > pos; i-- )
10509: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 10510: }
10511:
10512: /* Copy element and increase length */
1.55 daniel 10513: ctxt->node_seq.buffer[pos] = *info;
10514: ctxt->node_seq.length++;
1.32 daniel 10515: }
10516: }
1.77 daniel 10517:
1.98 daniel 10518:
10519: /**
1.181 ! daniel 10520: * xmlSubstituteEntitiesDefault:
1.98 daniel 10521: * @val: int 0 or 1
10522: *
10523: * Set and return the previous value for default entity support.
10524: * Initially the parser always keep entity references instead of substituting
10525: * entity values in the output. This function has to be used to change the
10526: * default parser behaviour
10527: * SAX::subtituteEntities() has to be used for changing that on a file by
10528: * file basis.
10529: *
10530: * Returns the last value for 0 for no substitution, 1 for substitution.
10531: */
10532:
10533: int
10534: xmlSubstituteEntitiesDefault(int val) {
10535: int old = xmlSubstituteEntitiesDefaultValue;
10536:
10537: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 10538: return(old);
10539: }
10540:
10541: /**
10542: * xmlKeepBlanksDefault:
10543: * @val: int 0 or 1
10544: *
10545: * Set and return the previous value for default blanks text nodes support.
10546: * The 1.x version of the parser used an heuristic to try to detect
10547: * ignorable white spaces. As a result the SAX callback was generating
10548: * ignorableWhitespace() callbacks instead of characters() one, and when
10549: * using the DOM output text nodes containing those blanks were not generated.
10550: * The 2.x and later version will switch to the XML standard way and
10551: * ignorableWhitespace() are only generated when running the parser in
10552: * validating mode and when the current element doesn't allow CDATA or
10553: * mixed content.
10554: * This function is provided as a way to force the standard behaviour
10555: * on 1.X libs and to switch back to the old mode for compatibility when
10556: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10557: * by using xmlIsBlankNode() commodity function to detect the "empty"
10558: * nodes generated.
10559: * This value also affect autogeneration of indentation when saving code
10560: * if blanks sections are kept, indentation is not generated.
10561: *
10562: * Returns the last value for 0 for no substitution, 1 for substitution.
10563: */
10564:
10565: int
10566: xmlKeepBlanksDefault(int val) {
10567: int old = xmlKeepBlanksDefaultValue;
10568:
10569: xmlKeepBlanksDefaultValue = val;
10570: xmlIndentTreeOutput = !val;
1.98 daniel 10571: return(old);
10572: }
1.77 daniel 10573:
Webmaster