Annotation of XML/parser.c, revision 1.187
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.119 daniel 36: #include "xmlmemory.h"
1.14 veillard 37: #include "tree.h"
1.1 veillard 38: #include "parser.h"
1.14 veillard 39: #include "entities.h"
1.75 daniel 40: #include "encoding.h"
1.61 daniel 41: #include "valid.h"
1.69 daniel 42: #include "parserInternals.h"
1.91 daniel 43: #include "xmlIO.h"
1.122 daniel 44: #include "xml-error.h"
1.1 veillard 45:
1.140 daniel 46: #define XML_PARSER_BIG_BUFFER_SIZE 1000
47: #define XML_PARSER_BUFFER_SIZE 100
48:
1.86 daniel 49: const char *xmlParserVersion = LIBXML_VERSION;
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.91 daniel 65: /************************************************************************
66: * *
67: * Input handling functions for progressive parsing *
68: * *
69: ************************************************************************/
70:
71: /* #define DEBUG_INPUT */
1.140 daniel 72: /* #define DEBUG_STACK */
73: /* #define DEBUG_PUSH */
74:
1.91 daniel 75:
1.110 daniel 76: #define INPUT_CHUNK 250
77: /* we need to keep enough input to show errors in context */
78: #define LINE_LEN 80
1.91 daniel 79:
80: #ifdef DEBUG_INPUT
81: #define CHECK_BUFFER(in) check_buffer(in)
82:
83: void check_buffer(xmlParserInputPtr in) {
84: if (in->base != in->buf->buffer->content) {
85: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
86: }
87: if (in->cur < in->base) {
88: fprintf(stderr, "xmlParserInput: cur < base problem\n");
89: }
90: if (in->cur > in->base + in->buf->buffer->use) {
91: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
92: }
93: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
94: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
95: in->buf->buffer->use, in->buf->buffer->size);
96: }
97:
1.110 daniel 98: #else
99: #define CHECK_BUFFER(in)
100: #endif
101:
1.91 daniel 102:
103: /**
104: * xmlParserInputRead:
105: * @in: an XML parser input
106: * @len: an indicative size for the lookahead
107: *
108: * This function refresh the input for the parser. It doesn't try to
109: * preserve pointers to the input buffer, and discard already read data
110: *
1.123 daniel 111: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 112: * end of this entity
113: */
114: int
115: xmlParserInputRead(xmlParserInputPtr in, int len) {
116: int ret;
117: int used;
118: int index;
119:
120: #ifdef DEBUG_INPUT
121: fprintf(stderr, "Read\n");
122: #endif
123: if (in->buf == NULL) return(-1);
124: if (in->base == NULL) return(-1);
125: if (in->cur == NULL) return(-1);
126: if (in->buf->buffer == NULL) return(-1);
127:
128: CHECK_BUFFER(in);
129:
130: used = in->cur - in->buf->buffer->content;
131: ret = xmlBufferShrink(in->buf->buffer, used);
132: if (ret > 0) {
133: in->cur -= ret;
134: in->consumed += ret;
135: }
136: ret = xmlParserInputBufferRead(in->buf, len);
137: if (in->base != in->buf->buffer->content) {
138: /*
139: * the buffer has been realloced
140: */
141: index = in->cur - in->base;
142: in->base = in->buf->buffer->content;
143: in->cur = &in->buf->buffer->content[index];
144: }
145:
146: CHECK_BUFFER(in);
147:
148: return(ret);
149: }
150:
151: /**
152: * xmlParserInputGrow:
153: * @in: an XML parser input
154: * @len: an indicative size for the lookahead
155: *
156: * This function increase the input for the parser. It tries to
157: * preserve pointers to the input buffer, and keep already read data
158: *
1.123 daniel 159: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 160: * end of this entity
161: */
162: int
163: xmlParserInputGrow(xmlParserInputPtr in, int len) {
164: int ret;
165: int index;
166:
167: #ifdef DEBUG_INPUT
168: fprintf(stderr, "Grow\n");
169: #endif
170: if (in->buf == NULL) return(-1);
171: if (in->base == NULL) return(-1);
172: if (in->cur == NULL) return(-1);
173: if (in->buf->buffer == NULL) return(-1);
174:
175: CHECK_BUFFER(in);
176:
177: index = in->cur - in->base;
178: if (in->buf->buffer->use > index + INPUT_CHUNK) {
179:
180: CHECK_BUFFER(in);
181:
182: return(0);
183: }
1.148 daniel 184: if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) ||
185: (in->buf->file != NULL) ||
1.140 daniel 186: #ifdef HAVE_ZLIB_H
187: (in->buf->gzfile != NULL) ||
188: #endif
189: (in->buf->fd >= 0))
190: ret = xmlParserInputBufferGrow(in->buf, len);
191: else
192: return(0);
1.135 daniel 193:
194: /*
195: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
196: * block, but we use it really as an integer to do some
197: * pointer arithmetic. Insure will raise it as a bug but in
198: * that specific case, that's not !
199: */
1.91 daniel 200: if (in->base != in->buf->buffer->content) {
201: /*
202: * the buffer has been realloced
203: */
204: index = in->cur - in->base;
205: in->base = in->buf->buffer->content;
206: in->cur = &in->buf->buffer->content[index];
207: }
208:
209: CHECK_BUFFER(in);
210:
211: return(ret);
212: }
213:
214: /**
215: * xmlParserInputShrink:
216: * @in: an XML parser input
217: *
218: * This function removes used input for the parser.
219: */
220: void
221: xmlParserInputShrink(xmlParserInputPtr in) {
222: int used;
223: int ret;
224: int index;
225:
226: #ifdef DEBUG_INPUT
227: fprintf(stderr, "Shrink\n");
228: #endif
229: if (in->buf == NULL) return;
230: if (in->base == NULL) return;
231: if (in->cur == NULL) return;
232: if (in->buf->buffer == NULL) return;
233:
234: CHECK_BUFFER(in);
235:
236: used = in->cur - in->buf->buffer->content;
237: if (used > INPUT_CHUNK) {
1.110 daniel 238: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 239: if (ret > 0) {
240: in->cur -= ret;
241: in->consumed += ret;
242: }
243: }
244:
245: CHECK_BUFFER(in);
246:
247: if (in->buf->buffer->use > INPUT_CHUNK) {
248: return;
249: }
250: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
251: if (in->base != in->buf->buffer->content) {
252: /*
253: * the buffer has been realloced
254: */
255: index = in->cur - in->base;
256: in->base = in->buf->buffer->content;
257: in->cur = &in->buf->buffer->content[index];
258: }
259:
260: CHECK_BUFFER(in);
261: }
262:
1.45 daniel 263: /************************************************************************
264: * *
265: * Parser stacks related functions and macros *
266: * *
267: ************************************************************************/
1.79 daniel 268:
269: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 270: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 271: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 272: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
273: const xmlChar ** str);
1.79 daniel 274:
1.1 veillard 275: /*
1.40 daniel 276: * Generic function for accessing stacks in the Parser Context
1.1 veillard 277: */
278:
1.140 daniel 279: #define PUSH_AND_POP(scope, type, name) \
280: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 281: if (ctxt->name##Nr >= ctxt->name##Max) { \
282: ctxt->name##Max *= 2; \
1.119 daniel 283: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 284: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
285: if (ctxt->name##Tab == NULL) { \
1.31 daniel 286: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 287: return(0); \
1.31 daniel 288: } \
289: } \
1.40 daniel 290: ctxt->name##Tab[ctxt->name##Nr] = value; \
291: ctxt->name = value; \
292: return(ctxt->name##Nr++); \
1.31 daniel 293: } \
1.140 daniel 294: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 295: type ret; \
1.40 daniel 296: if (ctxt->name##Nr <= 0) return(0); \
297: ctxt->name##Nr--; \
1.50 daniel 298: if (ctxt->name##Nr > 0) \
299: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
300: else \
301: ctxt->name = NULL; \
1.69 daniel 302: ret = ctxt->name##Tab[ctxt->name##Nr]; \
303: ctxt->name##Tab[ctxt->name##Nr] = 0; \
304: return(ret); \
1.31 daniel 305: } \
306:
1.140 daniel 307: PUSH_AND_POP(extern, xmlParserInputPtr, input)
308: PUSH_AND_POP(extern, xmlNodePtr, node)
309: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 310:
1.176 daniel 311: int spacePush(xmlParserCtxtPtr ctxt, int val) {
312: if (ctxt->spaceNr >= ctxt->spaceMax) {
313: ctxt->spaceMax *= 2;
314: ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab,
315: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
316: if (ctxt->spaceTab == NULL) {
317: fprintf(stderr, "realloc failed !\n");
318: return(0);
319: }
320: }
321: ctxt->spaceTab[ctxt->spaceNr] = val;
322: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
323: return(ctxt->spaceNr++);
324: }
325:
326: int spacePop(xmlParserCtxtPtr ctxt) {
327: int ret;
328: if (ctxt->spaceNr <= 0) return(0);
329: ctxt->spaceNr--;
330: if (ctxt->spaceNr > 0)
331: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
332: else
333: ctxt->space = NULL;
334: ret = ctxt->spaceTab[ctxt->spaceNr];
335: ctxt->spaceTab[ctxt->spaceNr] = -1;
336: return(ret);
337: }
338:
1.55 daniel 339: /*
340: * Macros for accessing the content. Those should be used only by the parser,
341: * and not exported.
342: *
343: * Dirty macros, i.e. one need to make assumption on the context to use them
344: *
1.123 daniel 345: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 346: * To be used with extreme caution since operations consuming
347: * characters may move the input buffer to a different location !
1.123 daniel 348: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 349: * in ISO-Latin or UTF-8.
1.151 daniel 350: * This should be used internally by the parser
1.55 daniel 351: * only to compare to ASCII values otherwise it would break when
352: * running with UTF-8 encoding.
1.123 daniel 353: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 354: * to compare on ASCII based substring.
1.123 daniel 355: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 356: * strings within the parser.
357: *
1.77 daniel 358: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 359: *
360: * NEXT Skip to the next character, this does the proper decoding
361: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 362: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 363: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 364: */
1.45 daniel 365:
1.152 daniel 366: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 367: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 368: #define NXT(val) ctxt->input->cur[(val)]
369: #define CUR_PTR ctxt->input->cur
1.154 daniel 370:
1.164 daniel 371: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
372: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 373: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
374: if ((*ctxt->input->cur == 0) && \
375: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
376: xmlPopInput(ctxt)
1.164 daniel 377:
1.97 daniel 378: #define SHRINK xmlParserInputShrink(ctxt->input); \
379: if ((*ctxt->input->cur == 0) && \
380: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
381: xmlPopInput(ctxt)
382:
383: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
384: if ((*ctxt->input->cur == 0) && \
385: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
386: xmlPopInput(ctxt)
1.55 daniel 387:
1.155 daniel 388: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 389:
1.151 daniel 390: #define NEXT xmlNextChar(ctxt);
1.154 daniel 391:
1.153 daniel 392: #define NEXTL(l) \
393: if (*(ctxt->input->cur) == '\n') { \
394: ctxt->input->line++; ctxt->input->col = 1; \
395: } else ctxt->input->col++; \
1.154 daniel 396: ctxt->token = 0; ctxt->input->cur += l; \
397: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
398: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
399:
1.152 daniel 400: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 401: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 402:
1.152 daniel 403: #define COPY_BUF(l,b,i,v) \
404: if (l == 1) b[i++] = (xmlChar) v; \
405: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 406:
407: /**
408: * xmlNextChar:
409: * @ctxt: the XML parser context
410: *
411: * Skip to the next char input char.
412: */
1.55 daniel 413:
1.151 daniel 414: void
415: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.176 daniel 416: /*
417: * TODO: 2.11 End-of-Line Handling
418: * the literal two-character sequence "#xD#xA" or a standalone
419: * literal #xD, an XML processor must pass to the application
420: * the single character #xA.
421: */
1.151 daniel 422: if (ctxt->token != 0) ctxt->token = 0;
423: else {
424: if ((*ctxt->input->cur == 0) &&
425: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
426: (ctxt->instate != XML_PARSER_COMMENT)) {
427: /*
428: * If we are at the end of the current entity and
429: * the context allows it, we pop consumed entities
430: * automatically.
431: * TODO: the auto closing should be blocked in other cases
432: */
433: xmlPopInput(ctxt);
434: } else {
435: if (*(ctxt->input->cur) == '\n') {
436: ctxt->input->line++; ctxt->input->col = 1;
437: } else ctxt->input->col++;
438: if (ctxt->encoding == NULL) {
439: /*
440: * We are supposed to handle UTF8, check it's valid
441: * From rfc2044: encoding of the Unicode values on UTF-8:
442: *
443: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
444: * 0000 0000-0000 007F 0xxxxxxx
445: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
446: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
447: *
1.160 daniel 448: * Check for the 0x110000 limit too
1.151 daniel 449: */
450: const unsigned char *cur = ctxt->input->cur;
451: unsigned char c;
1.91 daniel 452:
1.151 daniel 453: c = *cur;
454: if (c & 0x80) {
455: if (cur[1] == 0)
456: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
457: if ((cur[1] & 0xc0) != 0x80)
458: goto encoding_error;
459: if ((c & 0xe0) == 0xe0) {
460: unsigned int val;
461:
462: if (cur[2] == 0)
463: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
464: if ((cur[2] & 0xc0) != 0x80)
465: goto encoding_error;
466: if ((c & 0xf0) == 0xf0) {
467: if (cur[3] == 0)
468: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
469: if (((c & 0xf8) != 0xf0) ||
470: ((cur[3] & 0xc0) != 0x80))
471: goto encoding_error;
472: /* 4-byte code */
473: ctxt->input->cur += 4;
474: val = (cur[0] & 0x7) << 18;
475: val |= (cur[1] & 0x3f) << 12;
476: val |= (cur[2] & 0x3f) << 6;
477: val |= cur[3] & 0x3f;
478: } else {
479: /* 3-byte code */
480: ctxt->input->cur += 3;
481: val = (cur[0] & 0xf) << 12;
482: val |= (cur[1] & 0x3f) << 6;
483: val |= cur[2] & 0x3f;
484: }
485: if (((val > 0xd7ff) && (val < 0xe000)) ||
486: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 487: (val >= 0x110000)) {
1.151 daniel 488: if ((ctxt->sax != NULL) &&
489: (ctxt->sax->error != NULL))
490: ctxt->sax->error(ctxt->userData,
491: "Char out of allowed range\n");
492: ctxt->errNo = XML_ERR_INVALID_ENCODING;
493: ctxt->wellFormed = 0;
1.180 daniel 494: ctxt->disableSAX = 1;
1.151 daniel 495: }
496: } else
497: /* 2-byte code */
498: ctxt->input->cur += 2;
499: } else
500: /* 1-byte code */
501: ctxt->input->cur++;
502: } else {
503: /*
504: * Assume it's a fixed lenght encoding (1) with
505: * a compatibke encoding for the ASCII set, since
506: * XML constructs only use < 128 chars
507: */
508: ctxt->input->cur++;
509: }
510: ctxt->nbChars++;
511: if (*ctxt->input->cur == 0)
512: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
513: }
514: }
1.154 daniel 515: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
516: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 517: if ((*ctxt->input->cur == 0) &&
518: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
519: xmlPopInput(ctxt);
1.151 daniel 520: return;
521: encoding_error:
522: /*
523: * If we detect an UTF8 error that probably mean that the
524: * input encoding didn't get properly advertized in the
525: * declaration header. Report the error and switch the encoding
526: * to ISO-Latin-1 (if you don't like this policy, just declare the
527: * encoding !)
528: */
529: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
530: ctxt->sax->error(ctxt->userData,
531: "Input is not proper UTF-8, indicate encoding !\n");
532: ctxt->errNo = XML_ERR_INVALID_ENCODING;
533:
534: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
535: ctxt->input->cur++;
536: return;
537: }
1.42 daniel 538:
1.152 daniel 539: /**
540: * xmlCurrentChar:
541: * @ctxt: the XML parser context
542: * @len: pointer to the length of the char read
543: *
544: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 545: * bytes in the input buffer. Implement the end of line normalization:
546: * 2.11 End-of-Line Handling
547: * Wherever an external parsed entity or the literal entity value
548: * of an internal parsed entity contains either the literal two-character
549: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
550: * must pass to the application the single character #xA.
551: * This behavior can conveniently be produced by normalizing all
552: * line breaks to #xA on input, before parsing.)
1.152 daniel 553: *
554: * Returns the current char value and its lenght
555: */
556:
557: int
558: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
559: if (ctxt->token != 0) {
560: *len = 0;
561: return(ctxt->token);
562: }
563: if (ctxt->encoding == NULL) {
564: /*
565: * We are supposed to handle UTF8, check it's valid
566: * From rfc2044: encoding of the Unicode values on UTF-8:
567: *
568: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
569: * 0000 0000-0000 007F 0xxxxxxx
570: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
571: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
572: *
1.160 daniel 573: * Check for the 0x110000 limit too
1.152 daniel 574: */
575: const unsigned char *cur = ctxt->input->cur;
576: unsigned char c;
577: unsigned int val;
578:
579: c = *cur;
580: if (c & 0x80) {
581: if (cur[1] == 0)
582: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
583: if ((cur[1] & 0xc0) != 0x80)
584: goto encoding_error;
585: if ((c & 0xe0) == 0xe0) {
586:
587: if (cur[2] == 0)
588: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
589: if ((cur[2] & 0xc0) != 0x80)
590: goto encoding_error;
591: if ((c & 0xf0) == 0xf0) {
592: if (cur[3] == 0)
593: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
594: if (((c & 0xf8) != 0xf0) ||
595: ((cur[3] & 0xc0) != 0x80))
596: goto encoding_error;
597: /* 4-byte code */
598: *len = 4;
599: val = (cur[0] & 0x7) << 18;
600: val |= (cur[1] & 0x3f) << 12;
601: val |= (cur[2] & 0x3f) << 6;
602: val |= cur[3] & 0x3f;
603: } else {
604: /* 3-byte code */
605: *len = 3;
606: val = (cur[0] & 0xf) << 12;
607: val |= (cur[1] & 0x3f) << 6;
608: val |= cur[2] & 0x3f;
609: }
610: } else {
611: /* 2-byte code */
612: *len = 2;
613: val = (cur[0] & 0x1f) << 6;
1.168 daniel 614: val |= cur[1] & 0x3f;
1.152 daniel 615: }
616: if (!IS_CHAR(val)) {
617: if ((ctxt->sax != NULL) &&
618: (ctxt->sax->error != NULL))
619: ctxt->sax->error(ctxt->userData,
620: "Char out of allowed range\n");
621: ctxt->errNo = XML_ERR_INVALID_ENCODING;
622: ctxt->wellFormed = 0;
1.180 daniel 623: ctxt->disableSAX = 1;
1.152 daniel 624: }
625: return(val);
626: } else {
627: /* 1-byte code */
628: *len = 1;
1.180 daniel 629: if (*ctxt->input->cur == 0xD) {
630: if (ctxt->input->cur[1] == 0xA) {
631: ctxt->nbChars++;
632: ctxt->input->cur++;
633: }
634: return(0xA);
635: }
1.152 daniel 636: return((int) *ctxt->input->cur);
637: }
638: }
639: /*
640: * Assume it's a fixed lenght encoding (1) with
641: * a compatibke encoding for the ASCII set, since
642: * XML constructs only use < 128 chars
643: */
644: *len = 1;
1.180 daniel 645: if (*ctxt->input->cur == 0xD) {
646: if (ctxt->input->cur[1] == 0xA) {
647: ctxt->nbChars++;
648: ctxt->input->cur++;
649: }
650: return(0xA);
651: }
1.152 daniel 652: return((int) *ctxt->input->cur);
653: encoding_error:
654: /*
655: * If we detect an UTF8 error that probably mean that the
656: * input encoding didn't get properly advertized in the
657: * declaration header. Report the error and switch the encoding
658: * to ISO-Latin-1 (if you don't like this policy, just declare the
659: * encoding !)
660: */
661: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
662: ctxt->sax->error(ctxt->userData,
663: "Input is not proper UTF-8, indicate encoding !\n");
664: ctxt->errNo = XML_ERR_INVALID_ENCODING;
665:
666: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
667: *len = 1;
668: return((int) *ctxt->input->cur);
669: }
670:
671: /**
1.162 daniel 672: * xmlStringCurrentChar:
673: * @ctxt: the XML parser context
674: * @cur: pointer to the beginning of the char
675: * @len: pointer to the length of the char read
676: *
677: * The current char value, if using UTF-8 this may actaully span multiple
678: * bytes in the input buffer.
679: *
680: * Returns the current char value and its lenght
681: */
682:
683: int
684: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
685: if (ctxt->encoding == NULL) {
686: /*
687: * We are supposed to handle UTF8, check it's valid
688: * From rfc2044: encoding of the Unicode values on UTF-8:
689: *
690: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
691: * 0000 0000-0000 007F 0xxxxxxx
692: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
693: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
694: *
695: * Check for the 0x110000 limit too
696: */
697: unsigned char c;
698: unsigned int val;
699:
700: c = *cur;
701: if (c & 0x80) {
702: if ((cur[1] & 0xc0) != 0x80)
703: goto encoding_error;
704: if ((c & 0xe0) == 0xe0) {
705:
706: if ((cur[2] & 0xc0) != 0x80)
707: goto encoding_error;
708: if ((c & 0xf0) == 0xf0) {
709: if (((c & 0xf8) != 0xf0) ||
710: ((cur[3] & 0xc0) != 0x80))
711: goto encoding_error;
712: /* 4-byte code */
713: *len = 4;
714: val = (cur[0] & 0x7) << 18;
715: val |= (cur[1] & 0x3f) << 12;
716: val |= (cur[2] & 0x3f) << 6;
717: val |= cur[3] & 0x3f;
718: } else {
719: /* 3-byte code */
720: *len = 3;
721: val = (cur[0] & 0xf) << 12;
722: val |= (cur[1] & 0x3f) << 6;
723: val |= cur[2] & 0x3f;
724: }
725: } else {
726: /* 2-byte code */
727: *len = 2;
728: val = (cur[0] & 0x1f) << 6;
729: val |= cur[2] & 0x3f;
730: }
731: if (!IS_CHAR(val)) {
732: if ((ctxt->sax != NULL) &&
733: (ctxt->sax->error != NULL))
734: ctxt->sax->error(ctxt->userData,
735: "Char out of allowed range\n");
736: ctxt->errNo = XML_ERR_INVALID_ENCODING;
737: ctxt->wellFormed = 0;
1.180 daniel 738: ctxt->disableSAX = 1;
1.162 daniel 739: }
740: return(val);
741: } else {
742: /* 1-byte code */
743: *len = 1;
744: return((int) *cur);
745: }
746: }
747: /*
748: * Assume it's a fixed lenght encoding (1) with
749: * a compatibke encoding for the ASCII set, since
750: * XML constructs only use < 128 chars
751: */
752: *len = 1;
753: return((int) *cur);
754: encoding_error:
755: /*
756: * If we detect an UTF8 error that probably mean that the
757: * input encoding didn't get properly advertized in the
758: * declaration header. Report the error and switch the encoding
759: * to ISO-Latin-1 (if you don't like this policy, just declare the
760: * encoding !)
761: */
762: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
763: ctxt->sax->error(ctxt->userData,
764: "Input is not proper UTF-8, indicate encoding !\n");
765: ctxt->errNo = XML_ERR_INVALID_ENCODING;
766:
767: *len = 1;
768: return((int) *cur);
769: }
770:
771: /**
1.152 daniel 772: * xmlCopyChar:
773: * @len: pointer to the length of the char read (or zero)
774: * @array: pointer to an arry of xmlChar
775: * @val: the char value
776: *
777: * append the char value in the array
778: *
779: * Returns the number of xmlChar written
780: */
781:
782: int
783: xmlCopyChar(int len, xmlChar *out, int val) {
784: /*
785: * We are supposed to handle UTF8, check it's valid
786: * From rfc2044: encoding of the Unicode values on UTF-8:
787: *
788: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
789: * 0000 0000-0000 007F 0xxxxxxx
790: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
791: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
792: */
793: if (len == 0) {
794: if (val < 0) len = 0;
1.160 daniel 795: else if (val < 0x80) len = 1;
796: else if (val < 0x800) len = 2;
797: else if (val < 0x10000) len = 3;
798: else if (val < 0x110000) len = 4;
1.152 daniel 799: if (len == 0) {
800: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
801: val);
802: return(0);
803: }
804: }
805: if (len > 1) {
806: int bits;
807:
808: if (val < 0x80) { *out++= val; bits= -6; }
809: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
810: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
811: else { *out++= (val >> 18) | 0xF0; bits= 12; }
812:
813: for ( ; bits >= 0; bits-= 6)
814: *out++= ((val >> bits) & 0x3F) | 0x80 ;
815:
816: return(len);
817: }
818: *out = (xmlChar) val;
819: return(1);
1.155 daniel 820: }
821:
822: /**
823: * xmlSkipBlankChars:
824: * @ctxt: the XML parser context
825: *
826: * skip all blanks character found at that point in the input streams.
827: * It pops up finished entities in the process if allowable at that point.
828: *
829: * Returns the number of space chars skipped
830: */
831:
832: int
833: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
834: int cur, res = 0;
835:
836: do {
837: cur = CUR;
838: while (IS_BLANK(cur)) {
839: NEXT;
840: cur = CUR;
841: res++;
842: }
843: while ((cur == 0) && (ctxt->inputNr > 1) &&
844: (ctxt->instate != XML_PARSER_COMMENT)) {
845: xmlPopInput(ctxt);
846: cur = CUR;
847: }
848: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
849: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
850: } while (IS_BLANK(cur));
851: return(res);
1.152 daniel 852: }
853:
1.97 daniel 854: /************************************************************************
855: * *
856: * Commodity functions to handle entities processing *
857: * *
858: ************************************************************************/
1.40 daniel 859:
1.50 daniel 860: /**
861: * xmlPopInput:
862: * @ctxt: an XML parser context
863: *
1.40 daniel 864: * xmlPopInput: the current input pointed by ctxt->input came to an end
865: * pop it and return the next char.
1.45 daniel 866: *
1.123 daniel 867: * Returns the current xmlChar in the parser context
1.40 daniel 868: */
1.123 daniel 869: xmlChar
1.55 daniel 870: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 871: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 872: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 873: if ((*ctxt->input->cur == 0) &&
874: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
875: return(xmlPopInput(ctxt));
1.40 daniel 876: return(CUR);
877: }
878:
1.50 daniel 879: /**
880: * xmlPushInput:
881: * @ctxt: an XML parser context
882: * @input: an XML parser input fragment (entity, XML fragment ...).
883: *
1.40 daniel 884: * xmlPushInput: switch to a new input stream which is stacked on top
885: * of the previous one(s).
886: */
1.55 daniel 887: void
888: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 889: if (input == NULL) return;
890: inputPush(ctxt, input);
1.164 daniel 891: GROW;
1.40 daniel 892: }
893:
1.50 daniel 894: /**
1.69 daniel 895: * xmlFreeInputStream:
1.127 daniel 896: * @input: an xmlParserInputPtr
1.69 daniel 897: *
898: * Free up an input stream.
899: */
900: void
901: xmlFreeInputStream(xmlParserInputPtr input) {
902: if (input == NULL) return;
903:
1.119 daniel 904: if (input->filename != NULL) xmlFree((char *) input->filename);
905: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 906: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 907: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 908: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 909: input->free((xmlChar *) input->base);
1.93 veillard 910: if (input->buf != NULL)
911: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 912: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 913: xmlFree(input);
1.69 daniel 914: }
915:
916: /**
1.96 daniel 917: * xmlNewInputStream:
918: * @ctxt: an XML parser context
919: *
920: * Create a new input stream structure
921: * Returns the new input stream or NULL
922: */
923: xmlParserInputPtr
924: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
925: xmlParserInputPtr input;
926:
1.119 daniel 927: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 928: if (input == NULL) {
1.123 daniel 929: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 931: ctxt->sax->error(ctxt->userData,
932: "malloc: couldn't allocate a new input stream\n");
1.123 daniel 933: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 934: return(NULL);
935: }
1.165 daniel 936: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 937: input->line = 1;
938: input->col = 1;
1.167 daniel 939: input->standalone = -1;
1.96 daniel 940: return(input);
941: }
942:
943: /**
1.50 daniel 944: * xmlNewEntityInputStream:
945: * @ctxt: an XML parser context
946: * @entity: an Entity pointer
947: *
1.82 daniel 948: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 949: *
950: * Returns the new input stream or NULL
1.45 daniel 951: */
1.50 daniel 952: xmlParserInputPtr
953: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 954: xmlParserInputPtr input;
955:
956: if (entity == NULL) {
1.123 daniel 957: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 958: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 959: ctxt->sax->error(ctxt->userData,
1.45 daniel 960: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 961: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 962: return(NULL);
1.45 daniel 963: }
964: if (entity->content == NULL) {
1.159 daniel 965: switch (entity->etype) {
1.113 daniel 966: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 967: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 968: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
969: ctxt->sax->error(ctxt->userData,
970: "xmlNewEntityInputStream unparsed entity !\n");
971: break;
972: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
973: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 974: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 975: (char *) entity->ExternalID, ctxt));
1.113 daniel 976: case XML_INTERNAL_GENERAL_ENTITY:
977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
978: ctxt->sax->error(ctxt->userData,
979: "Internal entity %s without content !\n", entity->name);
980: break;
981: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 982: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
984: ctxt->sax->error(ctxt->userData,
985: "Internal parameter entity %s without content !\n", entity->name);
986: break;
987: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 988: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
990: ctxt->sax->error(ctxt->userData,
991: "Predefined entity %s without content !\n", entity->name);
992: break;
993: }
1.50 daniel 994: return(NULL);
1.45 daniel 995: }
1.96 daniel 996: input = xmlNewInputStream(ctxt);
1.45 daniel 997: if (input == NULL) {
1.50 daniel 998: return(NULL);
1.45 daniel 999: }
1.156 daniel 1000: input->filename = (char *) entity->SystemID;
1.45 daniel 1001: input->base = entity->content;
1002: input->cur = entity->content;
1.140 daniel 1003: input->length = entity->length;
1.50 daniel 1004: return(input);
1.45 daniel 1005: }
1006:
1.59 daniel 1007: /**
1008: * xmlNewStringInputStream:
1009: * @ctxt: an XML parser context
1.96 daniel 1010: * @buffer: an memory buffer
1.59 daniel 1011: *
1012: * Create a new input stream based on a memory buffer.
1.68 daniel 1013: * Returns the new input stream
1.59 daniel 1014: */
1015: xmlParserInputPtr
1.123 daniel 1016: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1017: xmlParserInputPtr input;
1018:
1.96 daniel 1019: if (buffer == NULL) {
1.123 daniel 1020: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1022: ctxt->sax->error(ctxt->userData,
1.59 daniel 1023: "internal: xmlNewStringInputStream string = NULL\n");
1024: return(NULL);
1025: }
1.96 daniel 1026: input = xmlNewInputStream(ctxt);
1.59 daniel 1027: if (input == NULL) {
1028: return(NULL);
1029: }
1.96 daniel 1030: input->base = buffer;
1031: input->cur = buffer;
1.140 daniel 1032: input->length = xmlStrlen(buffer);
1.59 daniel 1033: return(input);
1034: }
1035:
1.76 daniel 1036: /**
1037: * xmlNewInputFromFile:
1038: * @ctxt: an XML parser context
1039: * @filename: the filename to use as entity
1040: *
1041: * Create a new input stream based on a file.
1042: *
1043: * Returns the new input stream or NULL in case of error
1044: */
1045: xmlParserInputPtr
1.79 daniel 1046: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1047: xmlParserInputBufferPtr buf;
1.76 daniel 1048: xmlParserInputPtr inputStream;
1.111 daniel 1049: char *directory = NULL;
1.76 daniel 1050:
1.96 daniel 1051: if (ctxt == NULL) return(NULL);
1.91 daniel 1052: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1053: if (buf == NULL) {
1.140 daniel 1054: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1055:
1.94 daniel 1056: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1057: #ifdef WIN32
1058: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1059: #else
1060: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1061: #endif
1062: buf = xmlParserInputBufferCreateFilename(name,
1063: XML_CHAR_ENCODING_NONE);
1.106 daniel 1064: if (buf != NULL)
1.142 daniel 1065: directory = xmlParserGetDirectory(name);
1.106 daniel 1066: }
1067: if ((buf == NULL) && (ctxt->directory != NULL)) {
1068: #ifdef WIN32
1069: sprintf(name, "%s\\%s", ctxt->directory, filename);
1070: #else
1071: sprintf(name, "%s/%s", ctxt->directory, filename);
1072: #endif
1073: buf = xmlParserInputBufferCreateFilename(name,
1074: XML_CHAR_ENCODING_NONE);
1075: if (buf != NULL)
1.142 daniel 1076: directory = xmlParserGetDirectory(name);
1.106 daniel 1077: }
1078: if (buf == NULL)
1.94 daniel 1079: return(NULL);
1080: }
1081: if (directory == NULL)
1082: directory = xmlParserGetDirectory(filename);
1.76 daniel 1083:
1.96 daniel 1084: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1085: if (inputStream == NULL) {
1.119 daniel 1086: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1087: return(NULL);
1088: }
1089:
1.119 daniel 1090: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1091: inputStream->directory = directory;
1.91 daniel 1092: inputStream->buf = buf;
1.76 daniel 1093:
1.91 daniel 1094: inputStream->base = inputStream->buf->buffer->content;
1095: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1096: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1097: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1098: return(inputStream);
1099: }
1100:
1.77 daniel 1101: /************************************************************************
1102: * *
1.97 daniel 1103: * Commodity functions to handle parser contexts *
1104: * *
1105: ************************************************************************/
1106:
1107: /**
1108: * xmlInitParserCtxt:
1109: * @ctxt: an XML parser context
1110: *
1111: * Initialize a parser context
1112: */
1113:
1114: void
1115: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1116: {
1117: xmlSAXHandler *sax;
1118:
1.168 daniel 1119: xmlDefaultSAXHandlerInit();
1120:
1.119 daniel 1121: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1122: if (sax == NULL) {
1123: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1124: }
1.180 daniel 1125: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1126:
1127: /* Allocate the Input stack */
1.119 daniel 1128: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1129: ctxt->inputNr = 0;
1130: ctxt->inputMax = 5;
1131: ctxt->input = NULL;
1.165 daniel 1132:
1.97 daniel 1133: ctxt->version = NULL;
1134: ctxt->encoding = NULL;
1135: ctxt->standalone = -1;
1.98 daniel 1136: ctxt->hasExternalSubset = 0;
1137: ctxt->hasPErefs = 0;
1.97 daniel 1138: ctxt->html = 0;
1.98 daniel 1139: ctxt->external = 0;
1.140 daniel 1140: ctxt->instate = XML_PARSER_START;
1.97 daniel 1141: ctxt->token = 0;
1.106 daniel 1142: ctxt->directory = NULL;
1.97 daniel 1143:
1144: /* Allocate the Node stack */
1.119 daniel 1145: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1146: ctxt->nodeNr = 0;
1147: ctxt->nodeMax = 10;
1148: ctxt->node = NULL;
1149:
1.140 daniel 1150: /* Allocate the Name stack */
1151: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1152: ctxt->nameNr = 0;
1153: ctxt->nameMax = 10;
1154: ctxt->name = NULL;
1155:
1.176 daniel 1156: /* Allocate the space stack */
1157: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1158: ctxt->spaceNr = 1;
1159: ctxt->spaceMax = 10;
1160: ctxt->spaceTab[0] = -1;
1161: ctxt->space = &ctxt->spaceTab[0];
1162:
1.160 daniel 1163: if (sax == NULL) {
1164: ctxt->sax = &xmlDefaultSAXHandler;
1165: } else {
1.97 daniel 1166: ctxt->sax = sax;
1167: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1168: }
1169: ctxt->userData = ctxt;
1170: ctxt->myDoc = NULL;
1171: ctxt->wellFormed = 1;
1.99 daniel 1172: ctxt->valid = 1;
1.100 daniel 1173: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1174: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1175: ctxt->vctxt.userData = ctxt;
1.149 daniel 1176: if (ctxt->validate) {
1177: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1178: if (xmlGetWarningsDefaultValue == 0)
1179: ctxt->vctxt.warning = NULL;
1180: else
1181: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1182: /* Allocate the Node stack */
1183: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1184: ctxt->vctxt.nodeNr = 0;
1185: ctxt->vctxt.nodeMax = 4;
1186: ctxt->vctxt.node = NULL;
1.149 daniel 1187: } else {
1188: ctxt->vctxt.error = NULL;
1189: ctxt->vctxt.warning = NULL;
1190: }
1.97 daniel 1191: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1192: ctxt->record_info = 0;
1.135 daniel 1193: ctxt->nbChars = 0;
1.140 daniel 1194: ctxt->checkIndex = 0;
1.180 daniel 1195: ctxt->inSubset = 0;
1.140 daniel 1196: ctxt->errNo = XML_ERR_OK;
1.185 daniel 1197: ctxt->depth = 0;
1.97 daniel 1198: xmlInitNodeInfoSeq(&ctxt->node_seq);
1199: }
1200:
1201: /**
1202: * xmlFreeParserCtxt:
1203: * @ctxt: an XML parser context
1204: *
1205: * Free all the memory used by a parser context. However the parsed
1206: * document in ctxt->myDoc is not freed.
1207: */
1208:
1209: void
1210: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1211: {
1212: xmlParserInputPtr input;
1.140 daniel 1213: xmlChar *oldname;
1.97 daniel 1214:
1215: if (ctxt == NULL) return;
1216:
1217: while ((input = inputPop(ctxt)) != NULL) {
1218: xmlFreeInputStream(input);
1219: }
1.140 daniel 1220: while ((oldname = namePop(ctxt)) != NULL) {
1221: xmlFree(oldname);
1222: }
1.176 daniel 1223: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1224: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1225: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1226: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1227: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1228: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1229: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1230: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1231: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1232: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1233: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1234: xmlFree(ctxt->sax);
1235: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1236: xmlFree(ctxt);
1.97 daniel 1237: }
1238:
1239: /**
1240: * xmlNewParserCtxt:
1241: *
1242: * Allocate and initialize a new parser context.
1243: *
1244: * Returns the xmlParserCtxtPtr or NULL
1245: */
1246:
1247: xmlParserCtxtPtr
1248: xmlNewParserCtxt()
1249: {
1250: xmlParserCtxtPtr ctxt;
1251:
1.119 daniel 1252: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1253: if (ctxt == NULL) {
1254: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1255: perror("malloc");
1256: return(NULL);
1257: }
1.165 daniel 1258: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1259: xmlInitParserCtxt(ctxt);
1260: return(ctxt);
1261: }
1262:
1263: /**
1264: * xmlClearParserCtxt:
1265: * @ctxt: an XML parser context
1266: *
1267: * Clear (release owned resources) and reinitialize a parser context
1268: */
1269:
1270: void
1271: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1272: {
1273: xmlClearNodeInfoSeq(&ctxt->node_seq);
1274: xmlInitParserCtxt(ctxt);
1275: }
1276:
1277: /************************************************************************
1278: * *
1.77 daniel 1279: * Commodity functions to handle entities *
1280: * *
1281: ************************************************************************/
1282:
1.174 daniel 1283: /**
1284: * xmlCheckEntity:
1285: * @ctxt: an XML parser context
1286: * @content: the entity content string
1287: *
1288: * Parse an entity content and checks the WF constraints
1289: *
1290: */
1291:
1292: void
1293: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1294: }
1.97 daniel 1295:
1296: /**
1297: * xmlParseCharRef:
1298: * @ctxt: an XML parser context
1299: *
1300: * parse Reference declarations
1301: *
1302: * [66] CharRef ::= '&#' [0-9]+ ';' |
1303: * '&#x' [0-9a-fA-F]+ ';'
1304: *
1.98 daniel 1305: * [ WFC: Legal Character ]
1306: * Characters referred to using character references must match the
1307: * production for Char.
1308: *
1.135 daniel 1309: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1310: */
1.97 daniel 1311: int
1312: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1313: int val = 0;
1314:
1.111 daniel 1315: if (ctxt->token != 0) {
1316: val = ctxt->token;
1317: ctxt->token = 0;
1318: return(val);
1319: }
1.152 daniel 1320: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1321: (NXT(2) == 'x')) {
1322: SKIP(3);
1.152 daniel 1323: while (RAW != ';') {
1324: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1325: val = val * 16 + (CUR - '0');
1.152 daniel 1326: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1327: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1328: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1329: val = val * 16 + (CUR - 'A') + 10;
1330: else {
1.123 daniel 1331: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1332: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1333: ctxt->sax->error(ctxt->userData,
1334: "xmlParseCharRef: invalid hexadecimal value\n");
1335: ctxt->wellFormed = 0;
1.180 daniel 1336: ctxt->disableSAX = 1;
1.97 daniel 1337: val = 0;
1338: break;
1339: }
1340: NEXT;
1341: }
1.164 daniel 1342: if (RAW == ';') {
1343: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1344: ctxt->nbChars ++;
1345: ctxt->input->cur++;
1346: }
1.152 daniel 1347: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1348: SKIP(2);
1.152 daniel 1349: while (RAW != ';') {
1350: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1351: val = val * 10 + (CUR - '0');
1352: else {
1.123 daniel 1353: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1354: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1355: ctxt->sax->error(ctxt->userData,
1356: "xmlParseCharRef: invalid decimal value\n");
1357: ctxt->wellFormed = 0;
1.180 daniel 1358: ctxt->disableSAX = 1;
1.97 daniel 1359: val = 0;
1360: break;
1361: }
1362: NEXT;
1363: }
1.164 daniel 1364: if (RAW == ';') {
1365: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1366: ctxt->nbChars ++;
1367: ctxt->input->cur++;
1368: }
1.97 daniel 1369: } else {
1.123 daniel 1370: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1371: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1372: ctxt->sax->error(ctxt->userData,
1373: "xmlParseCharRef: invalid value\n");
1.97 daniel 1374: ctxt->wellFormed = 0;
1.180 daniel 1375: ctxt->disableSAX = 1;
1.97 daniel 1376: }
1.98 daniel 1377:
1.97 daniel 1378: /*
1.98 daniel 1379: * [ WFC: Legal Character ]
1380: * Characters referred to using character references must match the
1381: * production for Char.
1.97 daniel 1382: */
1383: if (IS_CHAR(val)) {
1384: return(val);
1385: } else {
1.123 daniel 1386: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1387: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1388: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1389: val);
1390: ctxt->wellFormed = 0;
1.180 daniel 1391: ctxt->disableSAX = 1;
1.97 daniel 1392: }
1393: return(0);
1.77 daniel 1394: }
1395:
1.96 daniel 1396: /**
1.135 daniel 1397: * xmlParseStringCharRef:
1398: * @ctxt: an XML parser context
1399: * @str: a pointer to an index in the string
1400: *
1401: * parse Reference declarations, variant parsing from a string rather
1402: * than an an input flow.
1403: *
1404: * [66] CharRef ::= '&#' [0-9]+ ';' |
1405: * '&#x' [0-9a-fA-F]+ ';'
1406: *
1407: * [ WFC: Legal Character ]
1408: * Characters referred to using character references must match the
1409: * production for Char.
1410: *
1411: * Returns the value parsed (as an int), 0 in case of error, str will be
1412: * updated to the current value of the index
1413: */
1414: int
1415: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1416: const xmlChar *ptr;
1417: xmlChar cur;
1418: int val = 0;
1419:
1420: if ((str == NULL) || (*str == NULL)) return(0);
1421: ptr = *str;
1422: cur = *ptr;
1.137 daniel 1423: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1424: ptr += 3;
1425: cur = *ptr;
1426: while (cur != ';') {
1427: if ((cur >= '0') && (cur <= '9'))
1428: val = val * 16 + (cur - '0');
1429: else if ((cur >= 'a') && (cur <= 'f'))
1430: val = val * 16 + (cur - 'a') + 10;
1431: else if ((cur >= 'A') && (cur <= 'F'))
1432: val = val * 16 + (cur - 'A') + 10;
1433: else {
1434: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1436: ctxt->sax->error(ctxt->userData,
1437: "xmlParseCharRef: invalid hexadecimal value\n");
1438: ctxt->wellFormed = 0;
1.180 daniel 1439: ctxt->disableSAX = 1;
1.135 daniel 1440: val = 0;
1441: break;
1442: }
1443: ptr++;
1444: cur = *ptr;
1445: }
1446: if (cur == ';')
1447: ptr++;
1.145 daniel 1448: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1449: ptr += 2;
1450: cur = *ptr;
1451: while (cur != ';') {
1452: if ((cur >= '0') && (cur <= '9'))
1453: val = val * 10 + (cur - '0');
1454: else {
1455: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1456: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1457: ctxt->sax->error(ctxt->userData,
1458: "xmlParseCharRef: invalid decimal value\n");
1459: ctxt->wellFormed = 0;
1.180 daniel 1460: ctxt->disableSAX = 1;
1.135 daniel 1461: val = 0;
1462: break;
1463: }
1464: ptr++;
1465: cur = *ptr;
1466: }
1467: if (cur == ';')
1468: ptr++;
1469: } else {
1470: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1471: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1472: ctxt->sax->error(ctxt->userData,
1473: "xmlParseCharRef: invalid value\n");
1474: ctxt->wellFormed = 0;
1.180 daniel 1475: ctxt->disableSAX = 1;
1.135 daniel 1476: return(0);
1477: }
1478: *str = ptr;
1479:
1480: /*
1481: * [ WFC: Legal Character ]
1482: * Characters referred to using character references must match the
1483: * production for Char.
1484: */
1485: if (IS_CHAR(val)) {
1486: return(val);
1487: } else {
1488: ctxt->errNo = XML_ERR_INVALID_CHAR;
1489: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1490: ctxt->sax->error(ctxt->userData,
1491: "CharRef: invalid xmlChar value %d\n", val);
1492: ctxt->wellFormed = 0;
1.180 daniel 1493: ctxt->disableSAX = 1;
1.135 daniel 1494: }
1495: return(0);
1496: }
1497:
1498: /**
1.96 daniel 1499: * xmlParserHandleReference:
1500: * @ctxt: the parser context
1501: *
1.97 daniel 1502: * [67] Reference ::= EntityRef | CharRef
1503: *
1.96 daniel 1504: * [68] EntityRef ::= '&' Name ';'
1505: *
1.98 daniel 1506: * [ WFC: Entity Declared ]
1507: * the Name given in the entity reference must match that in an entity
1508: * declaration, except that well-formed documents need not declare any
1509: * of the following entities: amp, lt, gt, apos, quot.
1510: *
1511: * [ WFC: Parsed Entity ]
1512: * An entity reference must not contain the name of an unparsed entity
1513: *
1.97 daniel 1514: * [66] CharRef ::= '&#' [0-9]+ ';' |
1515: * '&#x' [0-9a-fA-F]+ ';'
1516: *
1.96 daniel 1517: * A PEReference may have been detectect in the current input stream
1518: * the handling is done accordingly to
1519: * http://www.w3.org/TR/REC-xml#entproc
1520: */
1521: void
1522: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1523: xmlParserInputPtr input;
1.123 daniel 1524: xmlChar *name;
1.97 daniel 1525: xmlEntityPtr ent = NULL;
1526:
1.126 daniel 1527: if (ctxt->token != 0) {
1528: return;
1529: }
1.152 daniel 1530: if (RAW != '&') return;
1.97 daniel 1531: GROW;
1.152 daniel 1532: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1533: switch(ctxt->instate) {
1.140 daniel 1534: case XML_PARSER_ENTITY_DECL:
1535: case XML_PARSER_PI:
1.109 daniel 1536: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1537: case XML_PARSER_COMMENT:
1.168 daniel 1538: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1539: /* we just ignore it there */
1540: return;
1541: case XML_PARSER_START_TAG:
1.109 daniel 1542: return;
1.140 daniel 1543: case XML_PARSER_END_TAG:
1.97 daniel 1544: return;
1545: case XML_PARSER_EOF:
1.123 daniel 1546: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1548: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1549: ctxt->wellFormed = 0;
1.180 daniel 1550: ctxt->disableSAX = 1;
1.97 daniel 1551: return;
1552: case XML_PARSER_PROLOG:
1.140 daniel 1553: case XML_PARSER_START:
1554: case XML_PARSER_MISC:
1.123 daniel 1555: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1556: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1557: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1558: ctxt->wellFormed = 0;
1.180 daniel 1559: ctxt->disableSAX = 1;
1.97 daniel 1560: return;
1561: case XML_PARSER_EPILOG:
1.123 daniel 1562: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1563: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1564: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1565: ctxt->wellFormed = 0;
1.180 daniel 1566: ctxt->disableSAX = 1;
1.97 daniel 1567: return;
1568: case XML_PARSER_DTD:
1.123 daniel 1569: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1570: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1571: ctxt->sax->error(ctxt->userData,
1572: "CharRef are forbiden in DTDs!\n");
1573: ctxt->wellFormed = 0;
1.180 daniel 1574: ctxt->disableSAX = 1;
1.97 daniel 1575: return;
1576: case XML_PARSER_ENTITY_VALUE:
1577: /*
1578: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1579: * substitution here since we need the literal
1.97 daniel 1580: * entity value to be able to save the internal
1581: * subset of the document.
1582: * This will be handled by xmlDecodeEntities
1583: */
1584: return;
1585: case XML_PARSER_CONTENT:
1586: case XML_PARSER_ATTRIBUTE_VALUE:
1587: ctxt->token = xmlParseCharRef(ctxt);
1588: return;
1589: }
1590: return;
1591: }
1592:
1593: switch(ctxt->instate) {
1.109 daniel 1594: case XML_PARSER_CDATA_SECTION:
1595: return;
1.140 daniel 1596: case XML_PARSER_PI:
1.97 daniel 1597: case XML_PARSER_COMMENT:
1.168 daniel 1598: case XML_PARSER_SYSTEM_LITERAL:
1599: case XML_PARSER_CONTENT:
1.97 daniel 1600: return;
1.140 daniel 1601: case XML_PARSER_START_TAG:
1602: return;
1603: case XML_PARSER_END_TAG:
1604: return;
1.97 daniel 1605: case XML_PARSER_EOF:
1.123 daniel 1606: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1607: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1608: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1609: ctxt->wellFormed = 0;
1.180 daniel 1610: ctxt->disableSAX = 1;
1.97 daniel 1611: return;
1612: case XML_PARSER_PROLOG:
1.140 daniel 1613: case XML_PARSER_START:
1614: case XML_PARSER_MISC:
1.123 daniel 1615: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1616: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1618: ctxt->wellFormed = 0;
1.180 daniel 1619: ctxt->disableSAX = 1;
1.97 daniel 1620: return;
1621: case XML_PARSER_EPILOG:
1.123 daniel 1622: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1623: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1624: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1625: ctxt->wellFormed = 0;
1.180 daniel 1626: ctxt->disableSAX = 1;
1.97 daniel 1627: return;
1628: case XML_PARSER_ENTITY_VALUE:
1629: /*
1630: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1631: * substitution here since we need the literal
1.97 daniel 1632: * entity value to be able to save the internal
1633: * subset of the document.
1634: * This will be handled by xmlDecodeEntities
1635: */
1636: return;
1637: case XML_PARSER_ATTRIBUTE_VALUE:
1638: /*
1639: * NOTE: in the case of attributes values, we don't do the
1640: * substitution here unless we are in a mode where
1641: * the parser is explicitely asked to substitute
1642: * entities. The SAX callback is called with values
1643: * without entity substitution.
1644: * This will then be handled by xmlDecodeEntities
1645: */
1.113 daniel 1646: return;
1.97 daniel 1647: case XML_PARSER_ENTITY_DECL:
1648: /*
1649: * we just ignore it there
1650: * the substitution will be done once the entity is referenced
1651: */
1652: return;
1653: case XML_PARSER_DTD:
1.123 daniel 1654: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1655: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656: ctxt->sax->error(ctxt->userData,
1657: "Entity references are forbiden in DTDs!\n");
1658: ctxt->wellFormed = 0;
1.180 daniel 1659: ctxt->disableSAX = 1;
1.97 daniel 1660: return;
1661: }
1662:
1663: NEXT;
1664: name = xmlScanName(ctxt);
1665: if (name == NULL) {
1.123 daniel 1666: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1667: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1668: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1669: ctxt->wellFormed = 0;
1.180 daniel 1670: ctxt->disableSAX = 1;
1.97 daniel 1671: ctxt->token = '&';
1672: return;
1673: }
1674: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1675: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1676: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1677: ctxt->sax->error(ctxt->userData,
1678: "Entity reference: ';' expected\n");
1679: ctxt->wellFormed = 0;
1.180 daniel 1680: ctxt->disableSAX = 1;
1.97 daniel 1681: ctxt->token = '&';
1.119 daniel 1682: xmlFree(name);
1.97 daniel 1683: return;
1684: }
1685: SKIP(xmlStrlen(name) + 1);
1686: if (ctxt->sax != NULL) {
1687: if (ctxt->sax->getEntity != NULL)
1688: ent = ctxt->sax->getEntity(ctxt->userData, name);
1689: }
1.98 daniel 1690:
1691: /*
1692: * [ WFC: Entity Declared ]
1693: * the Name given in the entity reference must match that in an entity
1694: * declaration, except that well-formed documents need not declare any
1695: * of the following entities: amp, lt, gt, apos, quot.
1696: */
1.97 daniel 1697: if (ent == NULL)
1698: ent = xmlGetPredefinedEntity(name);
1699: if (ent == NULL) {
1.123 daniel 1700: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1702: ctxt->sax->error(ctxt->userData,
1.98 daniel 1703: "Entity reference: entity %s not declared\n",
1704: name);
1.97 daniel 1705: ctxt->wellFormed = 0;
1.180 daniel 1706: ctxt->disableSAX = 1;
1.119 daniel 1707: xmlFree(name);
1.97 daniel 1708: return;
1709: }
1.98 daniel 1710:
1711: /*
1712: * [ WFC: Parsed Entity ]
1713: * An entity reference must not contain the name of an unparsed entity
1714: */
1.159 daniel 1715: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1716: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1718: ctxt->sax->error(ctxt->userData,
1719: "Entity reference to unparsed entity %s\n", name);
1720: ctxt->wellFormed = 0;
1.180 daniel 1721: ctxt->disableSAX = 1;
1.98 daniel 1722: }
1723:
1.159 daniel 1724: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1725: ctxt->token = ent->content[0];
1.119 daniel 1726: xmlFree(name);
1.97 daniel 1727: return;
1728: }
1729: input = xmlNewEntityInputStream(ctxt, ent);
1730: xmlPushInput(ctxt, input);
1.119 daniel 1731: xmlFree(name);
1.96 daniel 1732: return;
1733: }
1734:
1735: /**
1736: * xmlParserHandlePEReference:
1737: * @ctxt: the parser context
1738: *
1739: * [69] PEReference ::= '%' Name ';'
1740: *
1.98 daniel 1741: * [ WFC: No Recursion ]
1742: * TODO A parsed entity must not contain a recursive
1743: * reference to itself, either directly or indirectly.
1744: *
1745: * [ WFC: Entity Declared ]
1746: * In a document without any DTD, a document with only an internal DTD
1747: * subset which contains no parameter entity references, or a document
1748: * with "standalone='yes'", ... ... The declaration of a parameter
1749: * entity must precede any reference to it...
1750: *
1751: * [ VC: Entity Declared ]
1752: * In a document with an external subset or external parameter entities
1753: * with "standalone='no'", ... ... The declaration of a parameter entity
1754: * must precede any reference to it...
1755: *
1756: * [ WFC: In DTD ]
1757: * Parameter-entity references may only appear in the DTD.
1758: * NOTE: misleading but this is handled.
1759: *
1760: * A PEReference may have been detected in the current input stream
1.96 daniel 1761: * the handling is done accordingly to
1762: * http://www.w3.org/TR/REC-xml#entproc
1763: * i.e.
1764: * - Included in literal in entity values
1765: * - Included as Paraemeter Entity reference within DTDs
1766: */
1767: void
1768: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1769: xmlChar *name;
1.96 daniel 1770: xmlEntityPtr entity = NULL;
1771: xmlParserInputPtr input;
1772:
1.126 daniel 1773: if (ctxt->token != 0) {
1774: return;
1775: }
1.152 daniel 1776: if (RAW != '%') return;
1.96 daniel 1777: switch(ctxt->instate) {
1.109 daniel 1778: case XML_PARSER_CDATA_SECTION:
1779: return;
1.97 daniel 1780: case XML_PARSER_COMMENT:
1781: return;
1.140 daniel 1782: case XML_PARSER_START_TAG:
1783: return;
1784: case XML_PARSER_END_TAG:
1785: return;
1.96 daniel 1786: case XML_PARSER_EOF:
1.123 daniel 1787: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1788: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1789: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1790: ctxt->wellFormed = 0;
1.180 daniel 1791: ctxt->disableSAX = 1;
1.96 daniel 1792: return;
1793: case XML_PARSER_PROLOG:
1.140 daniel 1794: case XML_PARSER_START:
1795: case XML_PARSER_MISC:
1.123 daniel 1796: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1798: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1799: ctxt->wellFormed = 0;
1.180 daniel 1800: ctxt->disableSAX = 1;
1.96 daniel 1801: return;
1.97 daniel 1802: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1803: case XML_PARSER_CONTENT:
1804: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1805: case XML_PARSER_PI:
1.168 daniel 1806: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1807: /* we just ignore it there */
1808: return;
1809: case XML_PARSER_EPILOG:
1.123 daniel 1810: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1811: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1812: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1813: ctxt->wellFormed = 0;
1.180 daniel 1814: ctxt->disableSAX = 1;
1.96 daniel 1815: return;
1.97 daniel 1816: case XML_PARSER_ENTITY_VALUE:
1817: /*
1818: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1819: * substitution here since we need the literal
1.97 daniel 1820: * entity value to be able to save the internal
1821: * subset of the document.
1822: * This will be handled by xmlDecodeEntities
1823: */
1824: return;
1.96 daniel 1825: case XML_PARSER_DTD:
1.98 daniel 1826: /*
1827: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1828: * In the internal DTD subset, parameter-entity references
1829: * can occur only where markup declarations can occur, not
1830: * within markup declarations.
1831: * In that case this is handled in xmlParseMarkupDecl
1832: */
1833: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1834: return;
1.96 daniel 1835: }
1836:
1837: NEXT;
1838: name = xmlParseName(ctxt);
1839: if (name == NULL) {
1.123 daniel 1840: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1841: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1842: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1843: ctxt->wellFormed = 0;
1.180 daniel 1844: ctxt->disableSAX = 1;
1.96 daniel 1845: } else {
1.152 daniel 1846: if (RAW == ';') {
1.96 daniel 1847: NEXT;
1.98 daniel 1848: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1849: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1850: if (entity == NULL) {
1.98 daniel 1851:
1852: /*
1853: * [ WFC: Entity Declared ]
1854: * In a document without any DTD, a document with only an
1855: * internal DTD subset which contains no parameter entity
1856: * references, or a document with "standalone='yes'", ...
1857: * ... The declaration of a parameter entity must precede
1858: * any reference to it...
1859: */
1860: if ((ctxt->standalone == 1) ||
1861: ((ctxt->hasExternalSubset == 0) &&
1862: (ctxt->hasPErefs == 0))) {
1863: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1864: ctxt->sax->error(ctxt->userData,
1865: "PEReference: %%%s; not found\n", name);
1866: ctxt->wellFormed = 0;
1.180 daniel 1867: ctxt->disableSAX = 1;
1.98 daniel 1868: } else {
1869: /*
1870: * [ VC: Entity Declared ]
1871: * In a document with an external subset or external
1872: * parameter entities with "standalone='no'", ...
1873: * ... The declaration of a parameter entity must precede
1874: * any reference to it...
1875: */
1876: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1877: ctxt->sax->warning(ctxt->userData,
1878: "PEReference: %%%s; not found\n", name);
1879: ctxt->valid = 0;
1880: }
1.96 daniel 1881: } else {
1.159 daniel 1882: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1883: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1884: /*
1.156 daniel 1885: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1886: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1887: */
1888: input = xmlNewEntityInputStream(ctxt, entity);
1889: xmlPushInput(ctxt, input);
1.164 daniel 1890: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1891: (RAW == '<') && (NXT(1) == '?') &&
1892: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1893: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1894: xmlParseTextDecl(ctxt);
1.164 daniel 1895: }
1896: if (ctxt->token == 0)
1897: ctxt->token = ' ';
1.96 daniel 1898: } else {
1899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1900: ctxt->sax->error(ctxt->userData,
1901: "xmlHandlePEReference: %s is not a parameter entity\n",
1902: name);
1903: ctxt->wellFormed = 0;
1.180 daniel 1904: ctxt->disableSAX = 1;
1.96 daniel 1905: }
1906: }
1907: } else {
1.123 daniel 1908: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1910: ctxt->sax->error(ctxt->userData,
1911: "xmlHandlePEReference: expecting ';'\n");
1912: ctxt->wellFormed = 0;
1.180 daniel 1913: ctxt->disableSAX = 1;
1.96 daniel 1914: }
1.119 daniel 1915: xmlFree(name);
1.97 daniel 1916: }
1917: }
1918:
1919: /*
1920: * Macro used to grow the current buffer.
1921: */
1922: #define growBuffer(buffer) { \
1923: buffer##_size *= 2; \
1.145 daniel 1924: buffer = (xmlChar *) \
1925: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1926: if (buffer == NULL) { \
1927: perror("realloc failed"); \
1.145 daniel 1928: return(NULL); \
1.97 daniel 1929: } \
1.96 daniel 1930: }
1.77 daniel 1931:
1932: /**
1933: * xmlDecodeEntities:
1934: * @ctxt: the parser context
1935: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1936: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1937: * @end: an end marker xmlChar, 0 if none
1938: * @end2: an end marker xmlChar, 0 if none
1939: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1940: *
1941: * [67] Reference ::= EntityRef | CharRef
1942: *
1943: * [69] PEReference ::= '%' Name ';'
1944: *
1945: * Returns A newly allocated string with the substitution done. The caller
1946: * must deallocate it !
1947: */
1.123 daniel 1948: xmlChar *
1.77 daniel 1949: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1950: xmlChar end, xmlChar end2, xmlChar end3) {
1951: xmlChar *buffer = NULL;
1.78 daniel 1952: int buffer_size = 0;
1.161 daniel 1953: int nbchars = 0;
1.78 daniel 1954:
1.123 daniel 1955: xmlChar *current = NULL;
1.77 daniel 1956: xmlEntityPtr ent;
1957: unsigned int max = (unsigned int) len;
1.161 daniel 1958: int c,l;
1.77 daniel 1959:
1.185 daniel 1960: if (ctxt->depth > 40) {
1961: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1962: ctxt->sax->error(ctxt->userData,
1963: "Detected entity reference loop\n");
1964: ctxt->wellFormed = 0;
1965: ctxt->disableSAX = 1;
1966: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1967: return(NULL);
1968: }
1969:
1.77 daniel 1970: /*
1971: * allocate a translation buffer.
1972: */
1.140 daniel 1973: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 1974: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1975: if (buffer == NULL) {
1976: perror("xmlDecodeEntities: malloc failed");
1977: return(NULL);
1978: }
1979:
1.78 daniel 1980: /*
1981: * Ok loop until we reach one of the ending char or a size limit.
1982: */
1.161 daniel 1983: c = CUR_CHAR(l);
1984: while ((nbchars < max) && (c != end) &&
1985: (c != end2) && (c != end3)) {
1.77 daniel 1986:
1.161 daniel 1987: if (c == 0) break;
1988: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 1989: int val = xmlParseCharRef(ctxt);
1.161 daniel 1990: COPY_BUF(0,buffer,nbchars,val);
1991: NEXTL(l);
1992: } else if ((c == '&') && (ctxt->token != '&') &&
1993: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 1994: ent = xmlParseEntityRef(ctxt);
1995: if ((ent != NULL) &&
1996: (ctxt->replaceEntities != 0)) {
1997: current = ent->content;
1998: while (*current != 0) {
1.161 daniel 1999: buffer[nbchars++] = *current++;
2000: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2001: growBuffer(buffer);
1.77 daniel 2002: }
2003: }
1.98 daniel 2004: } else if (ent != NULL) {
1.123 daniel 2005: const xmlChar *cur = ent->name;
1.98 daniel 2006:
1.161 daniel 2007: buffer[nbchars++] = '&';
2008: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2009: growBuffer(buffer);
2010: }
1.161 daniel 2011: while (*cur != 0) {
2012: buffer[nbchars++] = *cur++;
2013: }
2014: buffer[nbchars++] = ';';
1.77 daniel 2015: }
1.161 daniel 2016: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2017: /*
1.77 daniel 2018: * a PEReference induce to switch the entity flow,
2019: * we break here to flush the current set of chars
2020: * parsed if any. We will be called back later.
1.97 daniel 2021: */
1.91 daniel 2022: if (nbchars != 0) break;
1.77 daniel 2023:
2024: xmlParsePEReference(ctxt);
1.79 daniel 2025:
1.97 daniel 2026: /*
1.79 daniel 2027: * Pop-up of finished entities.
1.97 daniel 2028: */
1.152 daniel 2029: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2030: xmlPopInput(ctxt);
2031:
1.98 daniel 2032: break;
1.77 daniel 2033: } else {
1.161 daniel 2034: COPY_BUF(l,buffer,nbchars,c);
2035: NEXTL(l);
2036: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2037: growBuffer(buffer);
2038: }
1.77 daniel 2039: }
1.161 daniel 2040: c = CUR_CHAR(l);
1.77 daniel 2041: }
1.161 daniel 2042: buffer[nbchars++] = 0;
1.77 daniel 2043: return(buffer);
2044: }
2045:
1.135 daniel 2046: /**
2047: * xmlStringDecodeEntities:
2048: * @ctxt: the parser context
2049: * @str: the input string
2050: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2051: * @end: an end marker xmlChar, 0 if none
2052: * @end2: an end marker xmlChar, 0 if none
2053: * @end3: an end marker xmlChar, 0 if none
2054: *
2055: * [67] Reference ::= EntityRef | CharRef
2056: *
2057: * [69] PEReference ::= '%' Name ';'
2058: *
2059: * Returns A newly allocated string with the substitution done. The caller
2060: * must deallocate it !
2061: */
2062: xmlChar *
2063: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2064: xmlChar end, xmlChar end2, xmlChar end3) {
2065: xmlChar *buffer = NULL;
2066: int buffer_size = 0;
2067:
2068: xmlChar *current = NULL;
2069: xmlEntityPtr ent;
1.176 daniel 2070: int c,l;
2071: int nbchars = 0;
1.135 daniel 2072:
1.185 daniel 2073: if (ctxt->depth > 40) {
2074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2075: ctxt->sax->error(ctxt->userData,
2076: "Detected entity reference loop\n");
2077: ctxt->wellFormed = 0;
2078: ctxt->disableSAX = 1;
2079: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2080: return(NULL);
2081: }
2082:
1.135 daniel 2083: /*
2084: * allocate a translation buffer.
2085: */
1.140 daniel 2086: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2087: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2088: if (buffer == NULL) {
2089: perror("xmlDecodeEntities: malloc failed");
2090: return(NULL);
2091: }
2092:
2093: /*
2094: * Ok loop until we reach one of the ending char or a size limit.
2095: */
1.176 daniel 2096: c = CUR_SCHAR(str, l);
2097: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2098:
1.176 daniel 2099: if (c == 0) break;
2100: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2101: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2102: if (val != 0) {
2103: COPY_BUF(0,buffer,nbchars,val);
2104: }
2105: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2106: ent = xmlParseStringEntityRef(ctxt, &str);
1.185 daniel 2107: if ((ent != NULL) && (ent->content != NULL)) {
2108: xmlChar *rep;
2109:
2110: ctxt->depth++;
2111: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2112: 0, 0, 0);
2113: ctxt->depth--;
2114: if (rep != NULL) {
2115: current = rep;
2116: while (*current != 0) {
2117: buffer[nbchars++] = *current++;
2118: if (nbchars >
2119: buffer_size - XML_PARSER_BUFFER_SIZE) {
2120: growBuffer(buffer);
2121: }
1.135 daniel 2122: }
1.185 daniel 2123: xmlFree(rep);
1.135 daniel 2124: }
2125: } else if (ent != NULL) {
2126: int i = xmlStrlen(ent->name);
2127: const xmlChar *cur = ent->name;
2128:
1.176 daniel 2129: buffer[nbchars++] = '&';
2130: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2131: growBuffer(buffer);
2132: }
2133: for (;i > 0;i--)
1.176 daniel 2134: buffer[nbchars++] = *cur++;
2135: buffer[nbchars++] = ';';
1.135 daniel 2136: }
1.176 daniel 2137: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2138: ent = xmlParseStringPEReference(ctxt, &str);
2139: if (ent != NULL) {
1.185 daniel 2140: xmlChar *rep;
2141:
2142: ctxt->depth++;
2143: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2144: 0, 0, 0);
2145: ctxt->depth--;
2146: if (rep != NULL) {
2147: current = rep;
2148: while (*current != 0) {
2149: buffer[nbchars++] = *current++;
2150: if (nbchars >
2151: buffer_size - XML_PARSER_BUFFER_SIZE) {
2152: growBuffer(buffer);
2153: }
1.135 daniel 2154: }
1.185 daniel 2155: xmlFree(rep);
1.135 daniel 2156: }
2157: }
2158: } else {
1.176 daniel 2159: COPY_BUF(l,buffer,nbchars,c);
2160: str += l;
2161: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2162: growBuffer(buffer);
2163: }
2164: }
1.176 daniel 2165: c = CUR_SCHAR(str, l);
1.135 daniel 2166: }
1.176 daniel 2167: buffer[nbchars++] = 0;
1.135 daniel 2168: return(buffer);
2169: }
2170:
1.1 veillard 2171:
1.28 daniel 2172: /************************************************************************
2173: * *
1.75 daniel 2174: * Commodity functions to handle encodings *
2175: * *
2176: ************************************************************************/
2177:
1.172 daniel 2178: /*
2179: * xmlCheckLanguageID
2180: * @lang: pointer to the string value
2181: *
2182: * Checks that the value conforms to the LanguageID production:
2183: *
2184: * [33] LanguageID ::= Langcode ('-' Subcode)*
2185: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2186: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2187: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2188: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2189: * [38] Subcode ::= ([a-z] | [A-Z])+
2190: *
2191: * Returns 1 if correct 0 otherwise
2192: **/
2193: int
2194: xmlCheckLanguageID(const xmlChar *lang) {
2195: const xmlChar *cur = lang;
2196:
2197: if (cur == NULL)
2198: return(0);
2199: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2200: ((cur[0] == 'I') && (cur[1] == '-'))) {
2201: /*
2202: * IANA code
2203: */
2204: cur += 2;
2205: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2206: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2207: cur++;
2208: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2209: ((cur[0] == 'X') && (cur[1] == '-'))) {
2210: /*
2211: * User code
2212: */
2213: cur += 2;
2214: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2215: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2216: cur++;
2217: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2218: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2219: /*
2220: * ISO639
2221: */
2222: cur++;
2223: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2224: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2225: cur++;
2226: else
2227: return(0);
2228: } else
2229: return(0);
2230: while (cur[0] != 0) {
2231: if (cur[0] != '-')
2232: return(0);
2233: cur++;
2234: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2235: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2236: cur++;
2237: else
2238: return(0);
2239: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2240: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2241: cur++;
2242: }
2243: return(1);
2244: }
2245:
1.75 daniel 2246: /**
2247: * xmlSwitchEncoding:
2248: * @ctxt: the parser context
1.124 daniel 2249: * @enc: the encoding value (number)
1.75 daniel 2250: *
2251: * change the input functions when discovering the character encoding
2252: * of a given entity.
2253: */
2254: void
2255: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2256: {
1.156 daniel 2257: xmlCharEncodingHandlerPtr handler;
2258:
2259: handler = xmlGetCharEncodingHandler(enc);
2260: if (handler != NULL) {
2261: if (ctxt->input != NULL) {
2262: if (ctxt->input->buf != NULL) {
2263: if (ctxt->input->buf->encoder != NULL) {
2264: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2265: ctxt->sax->error(ctxt->userData,
2266: "xmlSwitchEncoding : encoder already regitered\n");
2267: return;
2268: }
2269: ctxt->input->buf->encoder = handler;
2270:
2271: /*
2272: * Is there already some content down the pipe to convert
2273: */
2274: if ((ctxt->input->buf->buffer != NULL) &&
2275: (ctxt->input->buf->buffer->use > 0)) {
2276: xmlChar *buf;
2277: int res, len, size;
2278: int processed;
2279:
2280: /*
2281: * Specific handling of the Byte Order Mark for
2282: * UTF-16
2283: */
2284: if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
2285: (ctxt->input->cur[0] == 0xFF) &&
2286: (ctxt->input->cur[1] == 0xFE)) {
2287: SKIP(2);
2288: }
2289: if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
2290: (ctxt->input->cur[0] == 0xFE) &&
2291: (ctxt->input->cur[1] == 0xFF)) {
2292: SKIP(2);
2293: }
2294:
2295: /*
2296: * convert the non processed part
2297: */
2298: processed = ctxt->input->cur - ctxt->input->base;
2299: len = ctxt->input->buf->buffer->use - processed;
2300:
2301: if (len <= 0) {
2302: return;
2303: }
2304: size = ctxt->input->buf->buffer->use * 4;
2305: if (size < 4000)
2306: size = 4000;
1.167 daniel 2307: retry_larger:
1.160 daniel 2308: buf = (xmlChar *) xmlMalloc(size + 1);
1.156 daniel 2309: if (buf == NULL) {
2310: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2311: ctxt->sax->error(ctxt->userData,
2312: "xmlSwitchEncoding : out of memory\n");
2313: return;
2314: }
1.160 daniel 2315: /* TODO !!! Handling of buf too small */
1.156 daniel 2316: res = handler->input(buf, size, ctxt->input->cur, &len);
1.167 daniel 2317: if (res == -1) {
2318: size *= 2;
2319: xmlFree(buf);
2320: goto retry_larger;
2321: }
1.156 daniel 2322: if ((res < 0) ||
2323: (len != ctxt->input->buf->buffer->use - processed)) {
2324: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2325: ctxt->sax->error(ctxt->userData,
2326: "xmlSwitchEncoding : conversion failed\n");
2327: xmlFree(buf);
2328: return;
2329: }
1.167 daniel 2330:
1.156 daniel 2331: /*
2332: * Conversion succeeded, get rid of the old buffer
2333: */
2334: xmlFree(ctxt->input->buf->buffer->content);
2335: ctxt->input->buf->buffer->content = buf;
2336: ctxt->input->base = buf;
2337: ctxt->input->cur = buf;
2338: ctxt->input->buf->buffer->size = size;
2339: ctxt->input->buf->buffer->use = res;
1.160 daniel 2340: buf[res] = 0;
1.156 daniel 2341: }
2342: return;
2343: } else {
2344: if (ctxt->input->length == 0) {
2345: /*
2346: * When parsing a static memory array one must know the
2347: * size to be able to convert the buffer.
2348: */
2349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2350: ctxt->sax->error(ctxt->userData,
2351: "xmlSwitchEncoding : no input\n");
2352: return;
2353: } else {
2354: xmlChar *buf;
2355: int res, len;
2356: int processed = ctxt->input->cur - ctxt->input->base;
2357:
2358: /*
2359: * convert the non processed part
2360: */
2361: len = ctxt->input->length - processed;
2362: if (len <= 0) {
2363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2364: ctxt->sax->error(ctxt->userData,
2365: "xmlSwitchEncoding : input fully consumed?\n");
2366: return;
2367: }
2368: buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
2369: if (buf == NULL) {
2370: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2371: ctxt->sax->error(ctxt->userData,
2372: "xmlSwitchEncoding : out of memory\n");
2373: return;
2374: }
2375: res = handler->input(buf, ctxt->input->length * 4,
2376: ctxt->input->cur, &len);
2377: if ((res < 0) ||
2378: (len != ctxt->input->length - processed)) {
2379: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2380: ctxt->sax->error(ctxt->userData,
2381: "xmlSwitchEncoding : conversion failed\n");
2382: xmlFree(buf);
2383: return;
2384: }
2385: /*
2386: * Conversion succeeded, get rid of the old buffer
2387: */
2388: if ((ctxt->input->free != NULL) &&
2389: (ctxt->input->base != NULL))
2390: ctxt->input->free((xmlChar *) ctxt->input->base);
2391: ctxt->input->base = ctxt->input->cur = buf;
2392: ctxt->input->length = res;
2393: }
2394: }
2395: } else {
2396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2397: ctxt->sax->error(ctxt->userData,
2398: "xmlSwitchEncoding : no input\n");
2399: }
2400: }
2401:
1.75 daniel 2402: switch (enc) {
2403: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 2404: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 2405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2406: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2407: ctxt->wellFormed = 0;
1.180 daniel 2408: ctxt->disableSAX = 1;
1.75 daniel 2409: break;
2410: case XML_CHAR_ENCODING_NONE:
2411: /* let's assume it's UTF-8 without the XML decl */
2412: return;
2413: case XML_CHAR_ENCODING_UTF8:
2414: /* default encoding, no conversion should be needed */
2415: return;
2416: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 2417: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2418: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2419: ctxt->sax->error(ctxt->userData,
2420: "char encoding UTF16 little endian not supported\n");
2421: break;
2422: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 2423: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2425: ctxt->sax->error(ctxt->userData,
2426: "char encoding UTF16 big endian not supported\n");
2427: break;
2428: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 2429: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2431: ctxt->sax->error(ctxt->userData,
2432: "char encoding USC4 little endian not supported\n");
2433: break;
2434: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 2435: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2436: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2437: ctxt->sax->error(ctxt->userData,
2438: "char encoding USC4 big endian not supported\n");
2439: break;
2440: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 2441: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2442: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2443: ctxt->sax->error(ctxt->userData,
2444: "char encoding EBCDIC not supported\n");
2445: break;
2446: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 2447: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2448: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2449: ctxt->sax->error(ctxt->userData,
2450: "char encoding UCS4 2143 not supported\n");
2451: break;
2452: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 2453: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2455: ctxt->sax->error(ctxt->userData,
2456: "char encoding UCS4 3412 not supported\n");
2457: break;
2458: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 2459: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2460: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2461: ctxt->sax->error(ctxt->userData,
2462: "char encoding UCS2 not supported\n");
2463: break;
2464: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 2465: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2467: ctxt->sax->error(ctxt->userData,
2468: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2469: break;
2470: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 2471: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2472: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2473: ctxt->sax->error(ctxt->userData,
2474: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2475: break;
2476: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 2477: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2478: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2479: ctxt->sax->error(ctxt->userData,
2480: "char encoding ISO_8859_3 not supported\n");
2481: break;
2482: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 2483: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2484: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2485: ctxt->sax->error(ctxt->userData,
2486: "char encoding ISO_8859_4 not supported\n");
2487: break;
2488: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 2489: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2490: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2491: ctxt->sax->error(ctxt->userData,
2492: "char encoding ISO_8859_5 not supported\n");
2493: break;
2494: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 2495: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2496: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2497: ctxt->sax->error(ctxt->userData,
2498: "char encoding ISO_8859_6 not supported\n");
2499: break;
2500: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 2501: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2502: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2503: ctxt->sax->error(ctxt->userData,
2504: "char encoding ISO_8859_7 not supported\n");
2505: break;
2506: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 2507: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2509: ctxt->sax->error(ctxt->userData,
2510: "char encoding ISO_8859_8 not supported\n");
2511: break;
2512: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 2513: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2515: ctxt->sax->error(ctxt->userData,
2516: "char encoding ISO_8859_9 not supported\n");
2517: break;
2518: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 2519: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2521: ctxt->sax->error(ctxt->userData,
2522: "char encoding ISO-2022-JPnot supported\n");
2523: break;
2524: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 2525: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2526: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2527: ctxt->sax->error(ctxt->userData,
2528: "char encoding Shift_JISnot supported\n");
2529: break;
2530: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 2531: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2532: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2533: ctxt->sax->error(ctxt->userData,
2534: "char encoding EUC-JPnot supported\n");
2535: break;
2536: }
2537: }
2538:
2539: /************************************************************************
2540: * *
1.123 daniel 2541: * Commodity functions to handle xmlChars *
1.28 daniel 2542: * *
2543: ************************************************************************/
2544:
1.50 daniel 2545: /**
2546: * xmlStrndup:
1.123 daniel 2547: * @cur: the input xmlChar *
1.50 daniel 2548: * @len: the len of @cur
2549: *
1.123 daniel 2550: * a strndup for array of xmlChar's
1.68 daniel 2551: *
1.123 daniel 2552: * Returns a new xmlChar * or NULL
1.1 veillard 2553: */
1.123 daniel 2554: xmlChar *
2555: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2556: xmlChar *ret;
2557:
2558: if ((cur == NULL) || (len < 0)) return(NULL);
2559: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2560: if (ret == NULL) {
1.86 daniel 2561: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2562: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2563: return(NULL);
2564: }
1.123 daniel 2565: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2566: ret[len] = 0;
2567: return(ret);
2568: }
2569:
1.50 daniel 2570: /**
2571: * xmlStrdup:
1.123 daniel 2572: * @cur: the input xmlChar *
1.50 daniel 2573: *
1.152 daniel 2574: * a strdup for array of xmlChar's. Since they are supposed to be
2575: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2576: * a termination mark of '0'.
1.68 daniel 2577: *
1.123 daniel 2578: * Returns a new xmlChar * or NULL
1.1 veillard 2579: */
1.123 daniel 2580: xmlChar *
2581: xmlStrdup(const xmlChar *cur) {
2582: const xmlChar *p = cur;
1.1 veillard 2583:
1.135 daniel 2584: if (cur == NULL) return(NULL);
1.152 daniel 2585: while (*p != 0) p++;
1.1 veillard 2586: return(xmlStrndup(cur, p - cur));
2587: }
2588:
1.50 daniel 2589: /**
2590: * xmlCharStrndup:
2591: * @cur: the input char *
2592: * @len: the len of @cur
2593: *
1.123 daniel 2594: * a strndup for char's to xmlChar's
1.68 daniel 2595: *
1.123 daniel 2596: * Returns a new xmlChar * or NULL
1.45 daniel 2597: */
2598:
1.123 daniel 2599: xmlChar *
1.55 daniel 2600: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2601: int i;
1.135 daniel 2602: xmlChar *ret;
2603:
2604: if ((cur == NULL) || (len < 0)) return(NULL);
2605: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2606: if (ret == NULL) {
1.86 daniel 2607: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2608: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2609: return(NULL);
2610: }
2611: for (i = 0;i < len;i++)
1.123 daniel 2612: ret[i] = (xmlChar) cur[i];
1.45 daniel 2613: ret[len] = 0;
2614: return(ret);
2615: }
2616:
1.50 daniel 2617: /**
2618: * xmlCharStrdup:
2619: * @cur: the input char *
2620: * @len: the len of @cur
2621: *
1.123 daniel 2622: * a strdup for char's to xmlChar's
1.68 daniel 2623: *
1.123 daniel 2624: * Returns a new xmlChar * or NULL
1.45 daniel 2625: */
2626:
1.123 daniel 2627: xmlChar *
1.55 daniel 2628: xmlCharStrdup(const char *cur) {
1.45 daniel 2629: const char *p = cur;
2630:
1.135 daniel 2631: if (cur == NULL) return(NULL);
1.45 daniel 2632: while (*p != '\0') p++;
2633: return(xmlCharStrndup(cur, p - cur));
2634: }
2635:
1.50 daniel 2636: /**
2637: * xmlStrcmp:
1.123 daniel 2638: * @str1: the first xmlChar *
2639: * @str2: the second xmlChar *
1.50 daniel 2640: *
1.123 daniel 2641: * a strcmp for xmlChar's
1.68 daniel 2642: *
2643: * Returns the integer result of the comparison
1.14 veillard 2644: */
2645:
1.55 daniel 2646: int
1.123 daniel 2647: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2648: register int tmp;
2649:
1.135 daniel 2650: if ((str1 == NULL) && (str2 == NULL)) return(0);
2651: if (str1 == NULL) return(-1);
2652: if (str2 == NULL) return(1);
1.14 veillard 2653: do {
2654: tmp = *str1++ - *str2++;
2655: if (tmp != 0) return(tmp);
2656: } while ((*str1 != 0) && (*str2 != 0));
2657: return (*str1 - *str2);
2658: }
2659:
1.50 daniel 2660: /**
2661: * xmlStrncmp:
1.123 daniel 2662: * @str1: the first xmlChar *
2663: * @str2: the second xmlChar *
1.50 daniel 2664: * @len: the max comparison length
2665: *
1.123 daniel 2666: * a strncmp for xmlChar's
1.68 daniel 2667: *
2668: * Returns the integer result of the comparison
1.14 veillard 2669: */
2670:
1.55 daniel 2671: int
1.123 daniel 2672: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2673: register int tmp;
2674:
2675: if (len <= 0) return(0);
1.135 daniel 2676: if ((str1 == NULL) && (str2 == NULL)) return(0);
2677: if (str1 == NULL) return(-1);
2678: if (str2 == NULL) return(1);
1.14 veillard 2679: do {
2680: tmp = *str1++ - *str2++;
2681: if (tmp != 0) return(tmp);
2682: len--;
2683: if (len <= 0) return(0);
2684: } while ((*str1 != 0) && (*str2 != 0));
2685: return (*str1 - *str2);
2686: }
2687:
1.50 daniel 2688: /**
2689: * xmlStrchr:
1.123 daniel 2690: * @str: the xmlChar * array
2691: * @val: the xmlChar to search
1.50 daniel 2692: *
1.123 daniel 2693: * a strchr for xmlChar's
1.68 daniel 2694: *
1.123 daniel 2695: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2696: */
2697:
1.123 daniel 2698: const xmlChar *
2699: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2700: if (str == NULL) return(NULL);
1.14 veillard 2701: while (*str != 0) {
1.123 daniel 2702: if (*str == val) return((xmlChar *) str);
1.14 veillard 2703: str++;
2704: }
2705: return(NULL);
1.89 daniel 2706: }
2707:
2708: /**
2709: * xmlStrstr:
1.123 daniel 2710: * @str: the xmlChar * array (haystack)
2711: * @val: the xmlChar to search (needle)
1.89 daniel 2712: *
1.123 daniel 2713: * a strstr for xmlChar's
1.89 daniel 2714: *
1.123 daniel 2715: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2716: */
2717:
1.123 daniel 2718: const xmlChar *
2719: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2720: int n;
2721:
2722: if (str == NULL) return(NULL);
2723: if (val == NULL) return(NULL);
2724: n = xmlStrlen(val);
2725:
2726: if (n == 0) return(str);
2727: while (*str != 0) {
2728: if (*str == *val) {
1.123 daniel 2729: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2730: }
2731: str++;
2732: }
2733: return(NULL);
2734: }
2735:
2736: /**
2737: * xmlStrsub:
1.123 daniel 2738: * @str: the xmlChar * array (haystack)
1.89 daniel 2739: * @start: the index of the first char (zero based)
2740: * @len: the length of the substring
2741: *
2742: * Extract a substring of a given string
2743: *
1.123 daniel 2744: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2745: */
2746:
1.123 daniel 2747: xmlChar *
2748: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2749: int i;
2750:
2751: if (str == NULL) return(NULL);
2752: if (start < 0) return(NULL);
1.90 daniel 2753: if (len < 0) return(NULL);
1.89 daniel 2754:
2755: for (i = 0;i < start;i++) {
2756: if (*str == 0) return(NULL);
2757: str++;
2758: }
2759: if (*str == 0) return(NULL);
2760: return(xmlStrndup(str, len));
1.14 veillard 2761: }
1.28 daniel 2762:
1.50 daniel 2763: /**
2764: * xmlStrlen:
1.123 daniel 2765: * @str: the xmlChar * array
1.50 daniel 2766: *
1.127 daniel 2767: * length of a xmlChar's string
1.68 daniel 2768: *
1.123 daniel 2769: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2770: */
2771:
1.55 daniel 2772: int
1.123 daniel 2773: xmlStrlen(const xmlChar *str) {
1.45 daniel 2774: int len = 0;
2775:
2776: if (str == NULL) return(0);
2777: while (*str != 0) {
2778: str++;
2779: len++;
2780: }
2781: return(len);
2782: }
2783:
1.50 daniel 2784: /**
2785: * xmlStrncat:
1.123 daniel 2786: * @cur: the original xmlChar * array
2787: * @add: the xmlChar * array added
1.50 daniel 2788: * @len: the length of @add
2789: *
1.123 daniel 2790: * a strncat for array of xmlChar's
1.68 daniel 2791: *
1.123 daniel 2792: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2793: */
2794:
1.123 daniel 2795: xmlChar *
2796: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2797: int size;
1.123 daniel 2798: xmlChar *ret;
1.45 daniel 2799:
2800: if ((add == NULL) || (len == 0))
2801: return(cur);
2802: if (cur == NULL)
2803: return(xmlStrndup(add, len));
2804:
2805: size = xmlStrlen(cur);
1.123 daniel 2806: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2807: if (ret == NULL) {
1.86 daniel 2808: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2809: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2810: return(cur);
2811: }
1.123 daniel 2812: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2813: ret[size + len] = 0;
2814: return(ret);
2815: }
2816:
1.50 daniel 2817: /**
2818: * xmlStrcat:
1.123 daniel 2819: * @cur: the original xmlChar * array
2820: * @add: the xmlChar * array added
1.50 daniel 2821: *
1.152 daniel 2822: * a strcat for array of xmlChar's. Since they are supposed to be
2823: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2824: * a termination mark of '0'.
1.68 daniel 2825: *
1.123 daniel 2826: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2827: */
1.123 daniel 2828: xmlChar *
2829: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2830: const xmlChar *p = add;
1.45 daniel 2831:
2832: if (add == NULL) return(cur);
2833: if (cur == NULL)
2834: return(xmlStrdup(add));
2835:
1.152 daniel 2836: while (*p != 0) p++;
1.45 daniel 2837: return(xmlStrncat(cur, add, p - add));
2838: }
2839:
2840: /************************************************************************
2841: * *
2842: * Commodity functions, cleanup needed ? *
2843: * *
2844: ************************************************************************/
2845:
1.50 daniel 2846: /**
2847: * areBlanks:
2848: * @ctxt: an XML parser context
1.123 daniel 2849: * @str: a xmlChar *
1.50 daniel 2850: * @len: the size of @str
2851: *
1.45 daniel 2852: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2853: *
1.68 daniel 2854: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2855: */
2856:
1.123 daniel 2857: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2858: int i, ret;
1.45 daniel 2859: xmlNodePtr lastChild;
2860:
1.176 daniel 2861: /*
2862: * Check for xml:space value.
2863: */
2864: if (*(ctxt->space) == 1)
2865: return(0);
2866:
2867: /*
2868: * Check that the string is made of blanks
2869: */
1.45 daniel 2870: for (i = 0;i < len;i++)
2871: if (!(IS_BLANK(str[i]))) return(0);
2872:
1.176 daniel 2873: /*
2874: * Look if the element is mixed content in the Dtd if available
2875: */
1.104 daniel 2876: if (ctxt->myDoc != NULL) {
2877: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2878: if (ret == 0) return(1);
2879: if (ret == 1) return(0);
2880: }
1.176 daniel 2881:
1.104 daniel 2882: /*
1.176 daniel 2883: * Otherwise, heuristic :-\
1.104 daniel 2884: */
1.179 daniel 2885: if (ctxt->keepBlanks)
2886: return(0);
2887: if (RAW != '<') return(0);
2888: if (ctxt->node == NULL) return(0);
2889: if ((ctxt->node->children == NULL) &&
2890: (RAW == '<') && (NXT(1) == '/')) return(0);
2891:
1.45 daniel 2892: lastChild = xmlGetLastChild(ctxt->node);
2893: if (lastChild == NULL) {
2894: if (ctxt->node->content != NULL) return(0);
2895: } else if (xmlNodeIsText(lastChild))
2896: return(0);
1.157 daniel 2897: else if ((ctxt->node->children != NULL) &&
2898: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2899: return(0);
1.45 daniel 2900: return(1);
2901: }
2902:
1.50 daniel 2903: /**
2904: * xmlHandleEntity:
2905: * @ctxt: an XML parser context
2906: * @entity: an XML entity pointer.
2907: *
2908: * Default handling of defined entities, when should we define a new input
1.45 daniel 2909: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2910: *
2911: * OBSOLETE: to be removed at some point.
1.45 daniel 2912: */
2913:
1.55 daniel 2914: void
2915: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2916: int len;
1.50 daniel 2917: xmlParserInputPtr input;
1.45 daniel 2918:
2919: if (entity->content == NULL) {
1.123 daniel 2920: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2921: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2922: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2923: entity->name);
1.59 daniel 2924: ctxt->wellFormed = 0;
1.180 daniel 2925: ctxt->disableSAX = 1;
1.45 daniel 2926: return;
2927: }
2928: len = xmlStrlen(entity->content);
2929: if (len <= 2) goto handle_as_char;
2930:
2931: /*
2932: * Redefine its content as an input stream.
2933: */
1.50 daniel 2934: input = xmlNewEntityInputStream(ctxt, entity);
2935: xmlPushInput(ctxt, input);
1.45 daniel 2936: return;
2937:
2938: handle_as_char:
2939: /*
2940: * Just handle the content as a set of chars.
2941: */
1.171 daniel 2942: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2943: (ctxt->sax->characters != NULL))
1.74 daniel 2944: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2945:
2946: }
2947:
2948: /*
2949: * Forward definition for recusive behaviour.
2950: */
1.77 daniel 2951: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2952: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2953:
1.28 daniel 2954: /************************************************************************
2955: * *
2956: * Extra stuff for namespace support *
2957: * Relates to http://www.w3.org/TR/WD-xml-names *
2958: * *
2959: ************************************************************************/
2960:
1.50 daniel 2961: /**
2962: * xmlNamespaceParseNCName:
2963: * @ctxt: an XML parser context
2964: *
2965: * parse an XML namespace name.
1.28 daniel 2966: *
2967: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2968: *
2969: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2970: * CombiningChar | Extender
1.68 daniel 2971: *
2972: * Returns the namespace name or NULL
1.28 daniel 2973: */
2974:
1.123 daniel 2975: xmlChar *
1.55 daniel 2976: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 2977: xmlChar buf[XML_MAX_NAMELEN + 5];
2978: int len = 0, l;
2979: int cur = CUR_CHAR(l);
1.28 daniel 2980:
1.156 daniel 2981: /* load first the value of the char !!! */
1.152 daniel 2982: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 2983:
1.152 daniel 2984: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2985: (cur == '.') || (cur == '-') ||
2986: (cur == '_') ||
2987: (IS_COMBINING(cur)) ||
2988: (IS_EXTENDER(cur))) {
2989: COPY_BUF(l,buf,len,cur);
2990: NEXTL(l);
2991: cur = CUR_CHAR(l);
1.91 daniel 2992: if (len >= XML_MAX_NAMELEN) {
2993: fprintf(stderr,
2994: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 2995: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2996: (cur == '.') || (cur == '-') ||
2997: (cur == '_') ||
2998: (IS_COMBINING(cur)) ||
2999: (IS_EXTENDER(cur))) {
3000: NEXTL(l);
3001: cur = CUR_CHAR(l);
3002: }
1.91 daniel 3003: break;
3004: }
3005: }
3006: return(xmlStrndup(buf, len));
1.28 daniel 3007: }
3008:
1.50 daniel 3009: /**
3010: * xmlNamespaceParseQName:
3011: * @ctxt: an XML parser context
1.123 daniel 3012: * @prefix: a xmlChar **
1.50 daniel 3013: *
3014: * parse an XML qualified name
1.28 daniel 3015: *
3016: * [NS 5] QName ::= (Prefix ':')? LocalPart
3017: *
3018: * [NS 6] Prefix ::= NCName
3019: *
3020: * [NS 7] LocalPart ::= NCName
1.68 daniel 3021: *
1.127 daniel 3022: * Returns the local part, and prefix is updated
1.50 daniel 3023: * to get the Prefix if any.
1.28 daniel 3024: */
3025:
1.123 daniel 3026: xmlChar *
3027: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3028: xmlChar *ret = NULL;
1.28 daniel 3029:
3030: *prefix = NULL;
3031: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 3032: if (RAW == ':') {
1.28 daniel 3033: *prefix = ret;
1.40 daniel 3034: NEXT;
1.28 daniel 3035: ret = xmlNamespaceParseNCName(ctxt);
3036: }
3037:
3038: return(ret);
3039: }
3040:
1.50 daniel 3041: /**
1.72 daniel 3042: * xmlSplitQName:
1.162 daniel 3043: * @ctxt: an XML parser context
1.72 daniel 3044: * @name: an XML parser context
1.123 daniel 3045: * @prefix: a xmlChar **
1.72 daniel 3046: *
3047: * parse an XML qualified name string
3048: *
3049: * [NS 5] QName ::= (Prefix ':')? LocalPart
3050: *
3051: * [NS 6] Prefix ::= NCName
3052: *
3053: * [NS 7] LocalPart ::= NCName
3054: *
1.127 daniel 3055: * Returns the local part, and prefix is updated
1.72 daniel 3056: * to get the Prefix if any.
3057: */
3058:
1.123 daniel 3059: xmlChar *
1.162 daniel 3060: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3061: xmlChar buf[XML_MAX_NAMELEN + 5];
3062: int len = 0;
1.123 daniel 3063: xmlChar *ret = NULL;
3064: const xmlChar *cur = name;
1.162 daniel 3065: int c,l;
1.72 daniel 3066:
3067: *prefix = NULL;
1.113 daniel 3068:
3069: /* xml: prefix is not really a namespace */
3070: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3071: (cur[2] == 'l') && (cur[3] == ':'))
3072: return(xmlStrdup(name));
3073:
1.162 daniel 3074: /* nasty but valid */
3075: if (cur[0] == ':')
3076: return(xmlStrdup(name));
3077:
3078: c = CUR_SCHAR(cur, l);
3079: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 3080:
1.162 daniel 3081: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3082: (c == '.') || (c == '-') ||
3083: (c == '_') ||
3084: (IS_COMBINING(c)) ||
3085: (IS_EXTENDER(c))) {
3086: COPY_BUF(l,buf,len,c);
3087: cur += l;
3088: c = CUR_SCHAR(cur, l);
3089: }
1.72 daniel 3090:
1.162 daniel 3091: ret = xmlStrndup(buf, len);
1.72 daniel 3092:
1.162 daniel 3093: if (c == ':') {
3094: cur += l;
1.163 daniel 3095: c = CUR_SCHAR(cur, l);
1.162 daniel 3096: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 3097: *prefix = ret;
1.162 daniel 3098: len = 0;
1.72 daniel 3099:
1.162 daniel 3100: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3101: (c == '.') || (c == '-') ||
3102: (c == '_') ||
3103: (IS_COMBINING(c)) ||
3104: (IS_EXTENDER(c))) {
3105: COPY_BUF(l,buf,len,c);
3106: cur += l;
3107: c = CUR_SCHAR(cur, l);
3108: }
1.72 daniel 3109:
1.162 daniel 3110: ret = xmlStrndup(buf, len);
1.72 daniel 3111: }
3112:
3113: return(ret);
3114: }
3115: /**
1.50 daniel 3116: * xmlNamespaceParseNSDef:
3117: * @ctxt: an XML parser context
3118: *
3119: * parse a namespace prefix declaration
1.28 daniel 3120: *
3121: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3122: *
3123: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3124: *
3125: * Returns the namespace name
1.28 daniel 3126: */
3127:
1.123 daniel 3128: xmlChar *
1.55 daniel 3129: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3130: xmlChar *name = NULL;
1.28 daniel 3131:
1.152 daniel 3132: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3133: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3134: (NXT(4) == 's')) {
3135: SKIP(5);
1.152 daniel 3136: if (RAW == ':') {
1.40 daniel 3137: NEXT;
1.28 daniel 3138: name = xmlNamespaceParseNCName(ctxt);
3139: }
3140: }
1.39 daniel 3141: return(name);
1.28 daniel 3142: }
3143:
1.50 daniel 3144: /**
3145: * xmlParseQuotedString:
3146: * @ctxt: an XML parser context
3147: *
1.45 daniel 3148: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3149: * To be removed at next drop of binary compatibility
1.68 daniel 3150: *
3151: * Returns the string parser or NULL.
1.45 daniel 3152: */
1.123 daniel 3153: xmlChar *
1.55 daniel 3154: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3155: xmlChar *buf = NULL;
1.152 daniel 3156: int len = 0,l;
1.140 daniel 3157: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3158: int c;
1.45 daniel 3159:
1.135 daniel 3160: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3161: if (buf == NULL) {
3162: fprintf(stderr, "malloc of %d byte failed\n", size);
3163: return(NULL);
3164: }
1.152 daniel 3165: if (RAW == '"') {
1.45 daniel 3166: NEXT;
1.152 daniel 3167: c = CUR_CHAR(l);
1.135 daniel 3168: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3169: if (len + 5 >= size) {
1.135 daniel 3170: size *= 2;
3171: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3172: if (buf == NULL) {
3173: fprintf(stderr, "realloc of %d byte failed\n", size);
3174: return(NULL);
3175: }
3176: }
1.152 daniel 3177: COPY_BUF(l,buf,len,c);
3178: NEXTL(l);
3179: c = CUR_CHAR(l);
1.135 daniel 3180: }
3181: if (c != '"') {
1.123 daniel 3182: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3183: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3184: ctxt->sax->error(ctxt->userData,
3185: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3186: ctxt->wellFormed = 0;
1.180 daniel 3187: ctxt->disableSAX = 1;
1.55 daniel 3188: } else {
1.45 daniel 3189: NEXT;
3190: }
1.152 daniel 3191: } else if (RAW == '\''){
1.45 daniel 3192: NEXT;
1.135 daniel 3193: c = CUR;
3194: while (IS_CHAR(c) && (c != '\'')) {
3195: if (len + 1 >= size) {
3196: size *= 2;
3197: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3198: if (buf == NULL) {
3199: fprintf(stderr, "realloc of %d byte failed\n", size);
3200: return(NULL);
3201: }
3202: }
3203: buf[len++] = c;
3204: NEXT;
3205: c = CUR;
3206: }
1.152 daniel 3207: if (RAW != '\'') {
1.123 daniel 3208: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3209: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3210: ctxt->sax->error(ctxt->userData,
3211: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3212: ctxt->wellFormed = 0;
1.180 daniel 3213: ctxt->disableSAX = 1;
1.55 daniel 3214: } else {
1.45 daniel 3215: NEXT;
3216: }
3217: }
1.135 daniel 3218: return(buf);
1.45 daniel 3219: }
3220:
1.50 daniel 3221: /**
3222: * xmlParseNamespace:
3223: * @ctxt: an XML parser context
3224: *
1.45 daniel 3225: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3226: *
3227: * This is what the older xml-name Working Draft specified, a bunch of
3228: * other stuff may still rely on it, so support is still here as
1.127 daniel 3229: * if it was declared on the root of the Tree:-(
1.110 daniel 3230: *
3231: * To be removed at next drop of binary compatibility
1.45 daniel 3232: */
3233:
1.55 daniel 3234: void
3235: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3236: xmlChar *href = NULL;
3237: xmlChar *prefix = NULL;
1.45 daniel 3238: int garbage = 0;
3239:
3240: /*
3241: * We just skipped "namespace" or "xml:namespace"
3242: */
3243: SKIP_BLANKS;
3244:
1.153 daniel 3245: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3246: /*
3247: * We can have "ns" or "prefix" attributes
3248: * Old encoding as 'href' or 'AS' attributes is still supported
3249: */
1.152 daniel 3250: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3251: garbage = 0;
3252: SKIP(2);
3253: SKIP_BLANKS;
3254:
1.152 daniel 3255: if (RAW != '=') continue;
1.45 daniel 3256: NEXT;
3257: SKIP_BLANKS;
3258:
3259: href = xmlParseQuotedString(ctxt);
3260: SKIP_BLANKS;
1.152 daniel 3261: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3262: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3263: garbage = 0;
3264: SKIP(4);
3265: SKIP_BLANKS;
3266:
1.152 daniel 3267: if (RAW != '=') continue;
1.45 daniel 3268: NEXT;
3269: SKIP_BLANKS;
3270:
3271: href = xmlParseQuotedString(ctxt);
3272: SKIP_BLANKS;
1.152 daniel 3273: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3274: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3275: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3276: garbage = 0;
3277: SKIP(6);
3278: SKIP_BLANKS;
3279:
1.152 daniel 3280: if (RAW != '=') continue;
1.45 daniel 3281: NEXT;
3282: SKIP_BLANKS;
3283:
3284: prefix = xmlParseQuotedString(ctxt);
3285: SKIP_BLANKS;
1.152 daniel 3286: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3287: garbage = 0;
3288: SKIP(2);
3289: SKIP_BLANKS;
3290:
1.152 daniel 3291: if (RAW != '=') continue;
1.45 daniel 3292: NEXT;
3293: SKIP_BLANKS;
3294:
3295: prefix = xmlParseQuotedString(ctxt);
3296: SKIP_BLANKS;
1.152 daniel 3297: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3298: garbage = 0;
1.91 daniel 3299: NEXT;
1.45 daniel 3300: } else {
3301: /*
3302: * Found garbage when parsing the namespace
3303: */
1.122 daniel 3304: if (!garbage) {
1.55 daniel 3305: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3306: ctxt->sax->error(ctxt->userData,
3307: "xmlParseNamespace found garbage\n");
3308: }
1.123 daniel 3309: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3310: ctxt->wellFormed = 0;
1.180 daniel 3311: ctxt->disableSAX = 1;
1.45 daniel 3312: NEXT;
3313: }
3314: }
3315:
3316: MOVETO_ENDTAG(CUR_PTR);
3317: NEXT;
3318:
3319: /*
3320: * Register the DTD.
1.72 daniel 3321: if (href != NULL)
3322: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3323: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3324: */
3325:
1.119 daniel 3326: if (prefix != NULL) xmlFree(prefix);
3327: if (href != NULL) xmlFree(href);
1.45 daniel 3328: }
3329:
1.28 daniel 3330: /************************************************************************
3331: * *
3332: * The parser itself *
3333: * Relates to http://www.w3.org/TR/REC-xml *
3334: * *
3335: ************************************************************************/
1.14 veillard 3336:
1.50 daniel 3337: /**
1.97 daniel 3338: * xmlScanName:
3339: * @ctxt: an XML parser context
3340: *
3341: * Trickery: parse an XML name but without consuming the input flow
3342: * Needed for rollback cases.
3343: *
3344: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3345: * CombiningChar | Extender
3346: *
3347: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3348: *
3349: * [6] Names ::= Name (S Name)*
3350: *
3351: * Returns the Name parsed or NULL
3352: */
3353:
1.123 daniel 3354: xmlChar *
1.97 daniel 3355: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3356: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3357: int len = 0;
3358:
3359: GROW;
1.152 daniel 3360: if (!IS_LETTER(RAW) && (RAW != '_') &&
3361: (RAW != ':')) {
1.97 daniel 3362: return(NULL);
3363: }
3364:
3365: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3366: (NXT(len) == '.') || (NXT(len) == '-') ||
3367: (NXT(len) == '_') || (NXT(len) == ':') ||
3368: (IS_COMBINING(NXT(len))) ||
3369: (IS_EXTENDER(NXT(len)))) {
3370: buf[len] = NXT(len);
3371: len++;
3372: if (len >= XML_MAX_NAMELEN) {
3373: fprintf(stderr,
3374: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3375: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3376: (NXT(len) == '.') || (NXT(len) == '-') ||
3377: (NXT(len) == '_') || (NXT(len) == ':') ||
3378: (IS_COMBINING(NXT(len))) ||
3379: (IS_EXTENDER(NXT(len))))
3380: len++;
3381: break;
3382: }
3383: }
3384: return(xmlStrndup(buf, len));
3385: }
3386:
3387: /**
1.50 daniel 3388: * xmlParseName:
3389: * @ctxt: an XML parser context
3390: *
3391: * parse an XML name.
1.22 daniel 3392: *
3393: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3394: * CombiningChar | Extender
3395: *
3396: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3397: *
3398: * [6] Names ::= Name (S Name)*
1.68 daniel 3399: *
3400: * Returns the Name parsed or NULL
1.1 veillard 3401: */
3402:
1.123 daniel 3403: xmlChar *
1.55 daniel 3404: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3405: xmlChar buf[XML_MAX_NAMELEN + 5];
3406: int len = 0, l;
3407: int c;
1.1 veillard 3408:
1.91 daniel 3409: GROW;
1.160 daniel 3410: c = CUR_CHAR(l);
3411: if (!IS_LETTER(c) && (c != '_') &&
3412: (c != ':')) {
1.91 daniel 3413: return(NULL);
3414: }
1.40 daniel 3415:
1.160 daniel 3416: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3417: (c == '.') || (c == '-') ||
3418: (c == '_') || (c == ':') ||
3419: (IS_COMBINING(c)) ||
3420: (IS_EXTENDER(c))) {
3421: COPY_BUF(l,buf,len,c);
3422: NEXTL(l);
3423: c = CUR_CHAR(l);
1.91 daniel 3424: if (len >= XML_MAX_NAMELEN) {
3425: fprintf(stderr,
3426: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3427: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3428: (c == '.') || (c == '-') ||
3429: (c == '_') || (c == ':') ||
3430: (IS_COMBINING(c)) ||
3431: (IS_EXTENDER(c))) {
3432: NEXTL(l);
3433: c = CUR_CHAR(l);
1.97 daniel 3434: }
1.91 daniel 3435: break;
3436: }
3437: }
3438: return(xmlStrndup(buf, len));
1.22 daniel 3439: }
3440:
1.50 daniel 3441: /**
1.135 daniel 3442: * xmlParseStringName:
3443: * @ctxt: an XML parser context
3444: * @str: a pointer to an index in the string
3445: *
3446: * parse an XML name.
3447: *
3448: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3449: * CombiningChar | Extender
3450: *
3451: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3452: *
3453: * [6] Names ::= Name (S Name)*
3454: *
3455: * Returns the Name parsed or NULL. The str pointer
3456: * is updated to the current location in the string.
3457: */
3458:
3459: xmlChar *
3460: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3461: xmlChar buf[XML_MAX_NAMELEN + 5];
3462: const xmlChar *cur = *str;
3463: int len = 0, l;
3464: int c;
1.135 daniel 3465:
1.176 daniel 3466: c = CUR_SCHAR(cur, l);
3467: if (!IS_LETTER(c) && (c != '_') &&
3468: (c != ':')) {
1.135 daniel 3469: return(NULL);
3470: }
3471:
1.176 daniel 3472: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3473: (c == '.') || (c == '-') ||
3474: (c == '_') || (c == ':') ||
3475: (IS_COMBINING(c)) ||
3476: (IS_EXTENDER(c))) {
3477: COPY_BUF(l,buf,len,c);
3478: cur += l;
3479: c = CUR_SCHAR(cur, l);
3480: if (len >= XML_MAX_NAMELEN) {
3481: fprintf(stderr,
3482: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3483: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3484: (c == '.') || (c == '-') ||
3485: (c == '_') || (c == ':') ||
3486: (IS_COMBINING(c)) ||
3487: (IS_EXTENDER(c))) {
3488: cur += l;
3489: c = CUR_SCHAR(cur, l);
3490: }
3491: break;
3492: }
1.135 daniel 3493: }
1.176 daniel 3494: *str = cur;
3495: return(xmlStrndup(buf, len));
1.135 daniel 3496: }
3497:
3498: /**
1.50 daniel 3499: * xmlParseNmtoken:
3500: * @ctxt: an XML parser context
3501: *
3502: * parse an XML Nmtoken.
1.22 daniel 3503: *
3504: * [7] Nmtoken ::= (NameChar)+
3505: *
3506: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3507: *
3508: * Returns the Nmtoken parsed or NULL
1.22 daniel 3509: */
3510:
1.123 daniel 3511: xmlChar *
1.55 daniel 3512: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3513: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3514: int len = 0;
1.160 daniel 3515: int c,l;
1.22 daniel 3516:
1.91 daniel 3517: GROW;
1.160 daniel 3518: c = CUR_CHAR(l);
3519: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3520: (c == '.') || (c == '-') ||
3521: (c == '_') || (c == ':') ||
3522: (IS_COMBINING(c)) ||
3523: (IS_EXTENDER(c))) {
3524: COPY_BUF(l,buf,len,c);
3525: NEXTL(l);
3526: c = CUR_CHAR(l);
1.91 daniel 3527: if (len >= XML_MAX_NAMELEN) {
3528: fprintf(stderr,
3529: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3530: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3531: (c == '.') || (c == '-') ||
3532: (c == '_') || (c == ':') ||
3533: (IS_COMBINING(c)) ||
3534: (IS_EXTENDER(c))) {
3535: NEXTL(l);
3536: c = CUR_CHAR(l);
3537: }
1.91 daniel 3538: break;
3539: }
3540: }
1.168 daniel 3541: if (len == 0)
3542: return(NULL);
1.91 daniel 3543: return(xmlStrndup(buf, len));
1.1 veillard 3544: }
3545:
1.50 daniel 3546: /**
3547: * xmlParseEntityValue:
3548: * @ctxt: an XML parser context
1.78 daniel 3549: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3550: *
3551: * parse a value for ENTITY decl.
1.24 daniel 3552: *
3553: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3554: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3555: *
1.78 daniel 3556: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3557: */
3558:
1.123 daniel 3559: xmlChar *
3560: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3561: xmlChar *buf = NULL;
3562: int len = 0;
1.140 daniel 3563: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3564: int c, l;
1.135 daniel 3565: xmlChar stop;
1.123 daniel 3566: xmlChar *ret = NULL;
1.176 daniel 3567: const xmlChar *cur = NULL;
1.98 daniel 3568: xmlParserInputPtr input;
1.24 daniel 3569:
1.152 daniel 3570: if (RAW == '"') stop = '"';
3571: else if (RAW == '\'') stop = '\'';
1.135 daniel 3572: else {
3573: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3575: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3576: ctxt->wellFormed = 0;
1.180 daniel 3577: ctxt->disableSAX = 1;
1.135 daniel 3578: return(NULL);
3579: }
3580: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3581: if (buf == NULL) {
3582: fprintf(stderr, "malloc of %d byte failed\n", size);
3583: return(NULL);
3584: }
1.94 daniel 3585:
1.135 daniel 3586: /*
3587: * The content of the entity definition is copied in a buffer.
3588: */
1.94 daniel 3589:
1.135 daniel 3590: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3591: input = ctxt->input;
3592: GROW;
3593: NEXT;
1.152 daniel 3594: c = CUR_CHAR(l);
1.135 daniel 3595: /*
3596: * NOTE: 4.4.5 Included in Literal
3597: * When a parameter entity reference appears in a literal entity
3598: * value, ... a single or double quote character in the replacement
3599: * text is always treated as a normal data character and will not
3600: * terminate the literal.
3601: * In practice it means we stop the loop only when back at parsing
3602: * the initial entity and the quote is found
3603: */
3604: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3605: if (len + 5 >= size) {
1.135 daniel 3606: size *= 2;
3607: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3608: if (buf == NULL) {
3609: fprintf(stderr, "realloc of %d byte failed\n", size);
3610: return(NULL);
1.94 daniel 3611: }
1.79 daniel 3612: }
1.152 daniel 3613: COPY_BUF(l,buf,len,c);
3614: NEXTL(l);
1.98 daniel 3615: /*
1.135 daniel 3616: * Pop-up of finished entities.
1.98 daniel 3617: */
1.152 daniel 3618: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3619: xmlPopInput(ctxt);
1.152 daniel 3620:
3621: c = CUR_CHAR(l);
1.135 daniel 3622: if (c == 0) {
1.94 daniel 3623: GROW;
1.152 daniel 3624: c = CUR_CHAR(l);
1.79 daniel 3625: }
1.135 daniel 3626: }
3627: buf[len] = 0;
3628:
3629: /*
1.176 daniel 3630: * Raise problem w.r.t. '&' and '%' being used in non-entities
3631: * reference constructs. Note Charref will be handled in
3632: * xmlStringDecodeEntities()
3633: */
3634: cur = buf;
3635: while (*cur != 0) {
3636: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3637: xmlChar *name;
3638: xmlChar tmp = *cur;
3639:
3640: cur++;
3641: name = xmlParseStringName(ctxt, &cur);
3642: if ((name == NULL) || (*cur != ';')) {
3643: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3644: ctxt->sax->error(ctxt->userData,
3645: "EntityValue: '%c' forbidden except for entities references\n",
3646: tmp);
3647: ctxt->wellFormed = 0;
1.180 daniel 3648: ctxt->disableSAX = 1;
1.176 daniel 3649: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3650: }
3651: if ((ctxt->inSubset == 1) && (tmp == '%')) {
3652: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3653: ctxt->sax->error(ctxt->userData,
3654: "EntityValue: PEReferences forbidden in internal subset\n",
3655: tmp);
3656: ctxt->wellFormed = 0;
1.180 daniel 3657: ctxt->disableSAX = 1;
1.176 daniel 3658: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3659: }
3660: if (name != NULL)
3661: xmlFree(name);
3662: }
3663: cur++;
3664: }
3665:
3666: /*
1.135 daniel 3667: * Then PEReference entities are substituted.
3668: */
3669: if (c != stop) {
3670: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3672: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3673: ctxt->wellFormed = 0;
1.180 daniel 3674: ctxt->disableSAX = 1;
1.170 daniel 3675: xmlFree(buf);
1.135 daniel 3676: } else {
3677: NEXT;
3678: /*
3679: * NOTE: 4.4.7 Bypassed
3680: * When a general entity reference appears in the EntityValue in
3681: * an entity declaration, it is bypassed and left as is.
1.176 daniel 3682: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 3683: */
3684: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3685: 0, 0, 0);
3686: if (orig != NULL)
3687: *orig = buf;
3688: else
3689: xmlFree(buf);
1.24 daniel 3690: }
3691:
3692: return(ret);
3693: }
3694:
1.50 daniel 3695: /**
3696: * xmlParseAttValue:
3697: * @ctxt: an XML parser context
3698: *
3699: * parse a value for an attribute
1.78 daniel 3700: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3701: * will be handled later in xmlStringGetNodeList
1.29 daniel 3702: *
3703: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3704: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3705: *
1.129 daniel 3706: * 3.3.3 Attribute-Value Normalization:
3707: * Before the value of an attribute is passed to the application or
3708: * checked for validity, the XML processor must normalize it as follows:
3709: * - a character reference is processed by appending the referenced
3710: * character to the attribute value
3711: * - an entity reference is processed by recursively processing the
3712: * replacement text of the entity
3713: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3714: * appending #x20 to the normalized value, except that only a single
3715: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3716: * parsed entity or the literal entity value of an internal parsed entity
3717: * - other characters are processed by appending them to the normalized value
1.130 daniel 3718: * If the declared value is not CDATA, then the XML processor must further
3719: * process the normalized attribute value by discarding any leading and
3720: * trailing space (#x20) characters, and by replacing sequences of space
3721: * (#x20) characters by a single space (#x20) character.
3722: * All attributes for which no declaration has been read should be treated
3723: * by a non-validating parser as if declared CDATA.
1.129 daniel 3724: *
3725: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3726: */
3727:
1.123 daniel 3728: xmlChar *
1.55 daniel 3729: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3730: xmlChar limit = 0;
3731: xmlChar *buffer = NULL;
3732: int buffer_size = 0;
3733: xmlChar *out = NULL;
3734:
3735: xmlChar *current = NULL;
3736: xmlEntityPtr ent;
3737: xmlChar cur;
3738:
1.29 daniel 3739:
1.91 daniel 3740: SHRINK;
1.151 daniel 3741: if (NXT(0) == '"') {
1.96 daniel 3742: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3743: limit = '"';
1.40 daniel 3744: NEXT;
1.151 daniel 3745: } else if (NXT(0) == '\'') {
1.129 daniel 3746: limit = '\'';
1.96 daniel 3747: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3748: NEXT;
1.29 daniel 3749: } else {
1.123 daniel 3750: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3751: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3752: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3753: ctxt->wellFormed = 0;
1.180 daniel 3754: ctxt->disableSAX = 1;
1.129 daniel 3755: return(NULL);
1.29 daniel 3756: }
3757:
1.129 daniel 3758: /*
3759: * allocate a translation buffer.
3760: */
1.140 daniel 3761: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3762: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3763: if (buffer == NULL) {
3764: perror("xmlParseAttValue: malloc failed");
3765: return(NULL);
3766: }
3767: out = buffer;
3768:
3769: /*
3770: * Ok loop until we reach one of the ending char or a size limit.
3771: */
3772: cur = CUR;
1.156 daniel 3773: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3774: if (cur == 0) break;
3775: if ((cur == '&') && (NXT(1) == '#')) {
3776: int val = xmlParseCharRef(ctxt);
3777: *out++ = val;
3778: } else if (cur == '&') {
3779: ent = xmlParseEntityRef(ctxt);
3780: if ((ent != NULL) &&
3781: (ctxt->replaceEntities != 0)) {
1.185 daniel 3782: xmlChar *rep;
3783:
1.186 daniel 3784: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3785: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 3786: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 3787: if (rep != NULL) {
3788: current = rep;
3789: while (*current != 0) {
3790: *out++ = *current++;
3791: if (out - buffer > buffer_size - 10) {
3792: int index = out - buffer;
1.129 daniel 3793:
1.186 daniel 3794: growBuffer(buffer);
3795: out = &buffer[index];
3796: }
1.185 daniel 3797: }
1.186 daniel 3798: xmlFree(rep);
1.129 daniel 3799: }
1.186 daniel 3800: } else {
3801: if (ent->content != NULL)
3802: *out++ = ent->content[0];
1.129 daniel 3803: }
3804: } else if (ent != NULL) {
3805: int i = xmlStrlen(ent->name);
3806: const xmlChar *cur = ent->name;
3807:
1.186 daniel 3808: /*
3809: * This may look absurd but is needed to detect
3810: * entities problems
3811: */
3812: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3813: xmlChar *rep;
3814: rep = xmlStringDecodeEntities(ctxt, ent->content,
3815: XML_SUBSTITUTE_REF, 0, 0, 0);
3816: if (rep != NULL)
3817: xmlFree(rep);
3818: }
3819:
3820: /*
3821: * Just output the reference
3822: */
1.129 daniel 3823: *out++ = '&';
3824: if (out - buffer > buffer_size - i - 10) {
3825: int index = out - buffer;
3826:
3827: growBuffer(buffer);
3828: out = &buffer[index];
3829: }
3830: for (;i > 0;i--)
3831: *out++ = *cur++;
3832: *out++ = ';';
3833: }
3834: } else {
1.156 daniel 3835: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3836: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3837: *out++ = 0x20;
3838: if (out - buffer > buffer_size - 10) {
3839: int index = out - buffer;
3840:
3841: growBuffer(buffer);
3842: out = &buffer[index];
1.129 daniel 3843: }
3844: } else {
3845: *out++ = cur;
3846: if (out - buffer > buffer_size - 10) {
3847: int index = out - buffer;
3848:
3849: growBuffer(buffer);
3850: out = &buffer[index];
3851: }
3852: }
3853: NEXT;
3854: }
3855: cur = CUR;
3856: }
3857: *out++ = 0;
1.152 daniel 3858: if (RAW == '<') {
1.129 daniel 3859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3860: ctxt->sax->error(ctxt->userData,
3861: "Unescaped '<' not allowed in attributes values\n");
3862: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3863: ctxt->wellFormed = 0;
1.180 daniel 3864: ctxt->disableSAX = 1;
1.152 daniel 3865: } else if (RAW != limit) {
1.129 daniel 3866: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3867: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3868: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3869: ctxt->wellFormed = 0;
1.180 daniel 3870: ctxt->disableSAX = 1;
1.129 daniel 3871: } else
3872: NEXT;
3873: return(buffer);
1.29 daniel 3874: }
3875:
1.50 daniel 3876: /**
3877: * xmlParseSystemLiteral:
3878: * @ctxt: an XML parser context
3879: *
3880: * parse an XML Literal
1.21 daniel 3881: *
1.22 daniel 3882: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3883: *
3884: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3885: */
3886:
1.123 daniel 3887: xmlChar *
1.55 daniel 3888: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3889: xmlChar *buf = NULL;
3890: int len = 0;
1.140 daniel 3891: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3892: int cur, l;
1.135 daniel 3893: xmlChar stop;
1.168 daniel 3894: int state = ctxt->instate;
1.21 daniel 3895:
1.91 daniel 3896: SHRINK;
1.152 daniel 3897: if (RAW == '"') {
1.40 daniel 3898: NEXT;
1.135 daniel 3899: stop = '"';
1.152 daniel 3900: } else if (RAW == '\'') {
1.40 daniel 3901: NEXT;
1.135 daniel 3902: stop = '\'';
1.21 daniel 3903: } else {
1.55 daniel 3904: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3905: ctxt->sax->error(ctxt->userData,
3906: "SystemLiteral \" or ' expected\n");
1.123 daniel 3907: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3908: ctxt->wellFormed = 0;
1.180 daniel 3909: ctxt->disableSAX = 1;
1.135 daniel 3910: return(NULL);
1.21 daniel 3911: }
3912:
1.135 daniel 3913: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3914: if (buf == NULL) {
3915: fprintf(stderr, "malloc of %d byte failed\n", size);
3916: return(NULL);
3917: }
1.168 daniel 3918: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3919: cur = CUR_CHAR(l);
1.135 daniel 3920: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3921: if (len + 5 >= size) {
1.135 daniel 3922: size *= 2;
3923: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3924: if (buf == NULL) {
3925: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 daniel 3926: ctxt->instate = state;
1.135 daniel 3927: return(NULL);
3928: }
3929: }
1.152 daniel 3930: COPY_BUF(l,buf,len,cur);
3931: NEXTL(l);
3932: cur = CUR_CHAR(l);
1.135 daniel 3933: if (cur == 0) {
3934: GROW;
3935: SHRINK;
1.152 daniel 3936: cur = CUR_CHAR(l);
1.135 daniel 3937: }
3938: }
3939: buf[len] = 0;
1.168 daniel 3940: ctxt->instate = state;
1.135 daniel 3941: if (!IS_CHAR(cur)) {
3942: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3944: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3945: ctxt->wellFormed = 0;
1.180 daniel 3946: ctxt->disableSAX = 1;
1.135 daniel 3947: } else {
3948: NEXT;
3949: }
3950: return(buf);
1.21 daniel 3951: }
3952:
1.50 daniel 3953: /**
3954: * xmlParsePubidLiteral:
3955: * @ctxt: an XML parser context
1.21 daniel 3956: *
1.50 daniel 3957: * parse an XML public literal
1.68 daniel 3958: *
3959: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3960: *
3961: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3962: */
3963:
1.123 daniel 3964: xmlChar *
1.55 daniel 3965: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3966: xmlChar *buf = NULL;
3967: int len = 0;
1.140 daniel 3968: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3969: xmlChar cur;
3970: xmlChar stop;
1.125 daniel 3971:
1.91 daniel 3972: SHRINK;
1.152 daniel 3973: if (RAW == '"') {
1.40 daniel 3974: NEXT;
1.135 daniel 3975: stop = '"';
1.152 daniel 3976: } else if (RAW == '\'') {
1.40 daniel 3977: NEXT;
1.135 daniel 3978: stop = '\'';
1.21 daniel 3979: } else {
1.55 daniel 3980: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3981: ctxt->sax->error(ctxt->userData,
3982: "SystemLiteral \" or ' expected\n");
1.123 daniel 3983: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3984: ctxt->wellFormed = 0;
1.180 daniel 3985: ctxt->disableSAX = 1;
1.135 daniel 3986: return(NULL);
3987: }
3988: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3989: if (buf == NULL) {
3990: fprintf(stderr, "malloc of %d byte failed\n", size);
3991: return(NULL);
3992: }
3993: cur = CUR;
3994: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3995: if (len + 1 >= size) {
3996: size *= 2;
3997: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3998: if (buf == NULL) {
3999: fprintf(stderr, "realloc of %d byte failed\n", size);
4000: return(NULL);
4001: }
4002: }
4003: buf[len++] = cur;
4004: NEXT;
4005: cur = CUR;
4006: if (cur == 0) {
4007: GROW;
4008: SHRINK;
4009: cur = CUR;
4010: }
4011: }
4012: buf[len] = 0;
4013: if (cur != stop) {
4014: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4015: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4016: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4017: ctxt->wellFormed = 0;
1.180 daniel 4018: ctxt->disableSAX = 1;
1.135 daniel 4019: } else {
4020: NEXT;
1.21 daniel 4021: }
1.135 daniel 4022: return(buf);
1.21 daniel 4023: }
4024:
1.50 daniel 4025: /**
4026: * xmlParseCharData:
4027: * @ctxt: an XML parser context
4028: * @cdata: int indicating whether we are within a CDATA section
4029: *
4030: * parse a CharData section.
4031: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 4032: *
1.151 daniel 4033: * The right angle bracket (>) may be represented using the string ">",
4034: * and must, for compatibility, be escaped using ">" or a character
4035: * reference when it appears in the string "]]>" in content, when that
4036: * string is not marking the end of a CDATA section.
4037: *
1.27 daniel 4038: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4039: */
4040:
1.55 daniel 4041: void
4042: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 4043: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 4044: int nbchar = 0;
1.152 daniel 4045: int cur, l;
1.27 daniel 4046:
1.91 daniel 4047: SHRINK;
1.152 daniel 4048: cur = CUR_CHAR(l);
1.160 daniel 4049: while ((IS_CHAR(cur)) && ((cur != '<') || (ctxt->token == '<')) &&
1.153 daniel 4050: ((cur != '&') || (ctxt->token == '&'))) {
1.97 daniel 4051: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 4052: (NXT(2) == '>')) {
4053: if (cdata) break;
4054: else {
4055: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 4056: ctxt->sax->error(ctxt->userData,
1.59 daniel 4057: "Sequence ']]>' not allowed in content\n");
1.123 daniel 4058: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 4059: /* Should this be relaxed ??? I see a "must here */
4060: ctxt->wellFormed = 0;
1.180 daniel 4061: ctxt->disableSAX = 1;
1.59 daniel 4062: }
4063: }
1.152 daniel 4064: COPY_BUF(l,buf,nbchar,cur);
4065: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4066: /*
4067: * Ok the segment is to be consumed as chars.
4068: */
1.171 daniel 4069: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4070: if (areBlanks(ctxt, buf, nbchar)) {
4071: if (ctxt->sax->ignorableWhitespace != NULL)
4072: ctxt->sax->ignorableWhitespace(ctxt->userData,
4073: buf, nbchar);
4074: } else {
4075: if (ctxt->sax->characters != NULL)
4076: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4077: }
4078: }
4079: nbchar = 0;
4080: }
1.152 daniel 4081: NEXTL(l);
4082: cur = CUR_CHAR(l);
1.27 daniel 4083: }
1.91 daniel 4084: if (nbchar != 0) {
4085: /*
4086: * Ok the segment is to be consumed as chars.
4087: */
1.171 daniel 4088: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4089: if (areBlanks(ctxt, buf, nbchar)) {
4090: if (ctxt->sax->ignorableWhitespace != NULL)
4091: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4092: } else {
4093: if (ctxt->sax->characters != NULL)
4094: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4095: }
4096: }
1.45 daniel 4097: }
1.27 daniel 4098: }
4099:
1.50 daniel 4100: /**
4101: * xmlParseExternalID:
4102: * @ctxt: an XML parser context
1.123 daniel 4103: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4104: * @strict: indicate whether we should restrict parsing to only
4105: * production [75], see NOTE below
1.50 daniel 4106: *
1.67 daniel 4107: * Parse an External ID or a Public ID
4108: *
4109: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4110: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4111: *
4112: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4113: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4114: *
4115: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4116: *
1.68 daniel 4117: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4118: * case publicID receives PubidLiteral, is strict is off
4119: * it is possible to return NULL and have publicID set.
1.22 daniel 4120: */
4121:
1.123 daniel 4122: xmlChar *
4123: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4124: xmlChar *URI = NULL;
1.22 daniel 4125:
1.91 daniel 4126: SHRINK;
1.152 daniel 4127: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4128: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4129: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4130: SKIP(6);
1.59 daniel 4131: if (!IS_BLANK(CUR)) {
4132: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4133: ctxt->sax->error(ctxt->userData,
1.59 daniel 4134: "Space required after 'SYSTEM'\n");
1.123 daniel 4135: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4136: ctxt->wellFormed = 0;
1.180 daniel 4137: ctxt->disableSAX = 1;
1.59 daniel 4138: }
1.42 daniel 4139: SKIP_BLANKS;
1.39 daniel 4140: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4141: if (URI == NULL) {
1.55 daniel 4142: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4143: ctxt->sax->error(ctxt->userData,
1.39 daniel 4144: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4145: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4146: ctxt->wellFormed = 0;
1.180 daniel 4147: ctxt->disableSAX = 1;
1.59 daniel 4148: }
1.152 daniel 4149: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4150: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4151: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4152: SKIP(6);
1.59 daniel 4153: if (!IS_BLANK(CUR)) {
4154: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4155: ctxt->sax->error(ctxt->userData,
1.59 daniel 4156: "Space required after 'PUBLIC'\n");
1.123 daniel 4157: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4158: ctxt->wellFormed = 0;
1.180 daniel 4159: ctxt->disableSAX = 1;
1.59 daniel 4160: }
1.42 daniel 4161: SKIP_BLANKS;
1.39 daniel 4162: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4163: if (*publicID == NULL) {
1.55 daniel 4164: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4165: ctxt->sax->error(ctxt->userData,
1.39 daniel 4166: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4167: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4168: ctxt->wellFormed = 0;
1.180 daniel 4169: ctxt->disableSAX = 1;
1.59 daniel 4170: }
1.67 daniel 4171: if (strict) {
4172: /*
4173: * We don't handle [83] so "S SystemLiteral" is required.
4174: */
4175: if (!IS_BLANK(CUR)) {
4176: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4177: ctxt->sax->error(ctxt->userData,
1.67 daniel 4178: "Space required after the Public Identifier\n");
1.123 daniel 4179: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4180: ctxt->wellFormed = 0;
1.180 daniel 4181: ctxt->disableSAX = 1;
1.67 daniel 4182: }
4183: } else {
4184: /*
4185: * We handle [83] so we return immediately, if
4186: * "S SystemLiteral" is not detected. From a purely parsing
4187: * point of view that's a nice mess.
4188: */
1.135 daniel 4189: const xmlChar *ptr;
4190: GROW;
4191:
4192: ptr = CUR_PTR;
1.67 daniel 4193: if (!IS_BLANK(*ptr)) return(NULL);
4194:
4195: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4196: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4197: }
1.42 daniel 4198: SKIP_BLANKS;
1.39 daniel 4199: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4200: if (URI == NULL) {
1.55 daniel 4201: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4202: ctxt->sax->error(ctxt->userData,
1.39 daniel 4203: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4204: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4205: ctxt->wellFormed = 0;
1.180 daniel 4206: ctxt->disableSAX = 1;
1.59 daniel 4207: }
1.22 daniel 4208: }
1.39 daniel 4209: return(URI);
1.22 daniel 4210: }
4211:
1.50 daniel 4212: /**
4213: * xmlParseComment:
1.69 daniel 4214: * @ctxt: an XML parser context
1.50 daniel 4215: *
1.3 veillard 4216: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4217: * The spec says that "For compatibility, the string "--" (double-hyphen)
4218: * must not occur within comments. "
1.22 daniel 4219: *
4220: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4221: */
1.72 daniel 4222: void
1.114 daniel 4223: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4224: xmlChar *buf = NULL;
4225: int len = 0;
1.140 daniel 4226: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4227: int q, ql;
4228: int r, rl;
4229: int cur, l;
1.140 daniel 4230: xmlParserInputState state;
1.187 ! daniel 4231: xmlParserInputPtr input = ctxt->input;
1.3 veillard 4232:
4233: /*
1.22 daniel 4234: * Check that there is a comment right here.
1.3 veillard 4235: */
1.152 daniel 4236: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4237: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4238:
1.140 daniel 4239: state = ctxt->instate;
1.97 daniel 4240: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4241: SHRINK;
1.40 daniel 4242: SKIP(4);
1.135 daniel 4243: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4244: if (buf == NULL) {
4245: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4246: ctxt->instate = state;
1.135 daniel 4247: return;
4248: }
1.152 daniel 4249: q = CUR_CHAR(ql);
4250: NEXTL(ql);
4251: r = CUR_CHAR(rl);
4252: NEXTL(rl);
4253: cur = CUR_CHAR(l);
1.135 daniel 4254: while (IS_CHAR(cur) &&
4255: ((cur != '>') ||
4256: (r != '-') || (q != '-'))) {
4257: if ((r == '-') && (q == '-')) {
1.55 daniel 4258: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4259: ctxt->sax->error(ctxt->userData,
1.38 daniel 4260: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4261: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4262: ctxt->wellFormed = 0;
1.180 daniel 4263: ctxt->disableSAX = 1;
1.59 daniel 4264: }
1.152 daniel 4265: if (len + 5 >= size) {
1.135 daniel 4266: size *= 2;
4267: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4268: if (buf == NULL) {
4269: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4270: ctxt->instate = state;
1.135 daniel 4271: return;
4272: }
4273: }
1.152 daniel 4274: COPY_BUF(ql,buf,len,q);
1.135 daniel 4275: q = r;
1.152 daniel 4276: ql = rl;
1.135 daniel 4277: r = cur;
1.152 daniel 4278: rl = l;
4279: NEXTL(l);
4280: cur = CUR_CHAR(l);
1.135 daniel 4281: if (cur == 0) {
4282: SHRINK;
4283: GROW;
1.152 daniel 4284: cur = CUR_CHAR(l);
1.135 daniel 4285: }
1.3 veillard 4286: }
1.135 daniel 4287: buf[len] = 0;
4288: if (!IS_CHAR(cur)) {
1.55 daniel 4289: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4290: ctxt->sax->error(ctxt->userData,
1.135 daniel 4291: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4292: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4293: ctxt->wellFormed = 0;
1.180 daniel 4294: ctxt->disableSAX = 1;
1.178 daniel 4295: xmlFree(buf);
1.3 veillard 4296: } else {
1.187 ! daniel 4297: if (input != ctxt->input) {
! 4298: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4299: ctxt->sax->error(ctxt->userData,
! 4300: "Comment doesn't start and stop in the same entity\n");
! 4301: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
! 4302: ctxt->wellFormed = 0;
! 4303: ctxt->disableSAX = 1;
! 4304: }
1.40 daniel 4305: NEXT;
1.171 daniel 4306: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4307: (!ctxt->disableSAX))
1.135 daniel 4308: ctxt->sax->comment(ctxt->userData, buf);
4309: xmlFree(buf);
1.3 veillard 4310: }
1.140 daniel 4311: ctxt->instate = state;
1.3 veillard 4312: }
4313:
1.50 daniel 4314: /**
4315: * xmlParsePITarget:
4316: * @ctxt: an XML parser context
4317: *
4318: * parse the name of a PI
1.22 daniel 4319: *
4320: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4321: *
4322: * Returns the PITarget name or NULL
1.22 daniel 4323: */
4324:
1.123 daniel 4325: xmlChar *
1.55 daniel 4326: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4327: xmlChar *name;
1.22 daniel 4328:
4329: name = xmlParseName(ctxt);
1.139 daniel 4330: if ((name != NULL) &&
1.22 daniel 4331: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4332: ((name[1] == 'm') || (name[1] == 'M')) &&
4333: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4334: int i;
1.177 daniel 4335: if ((name[0] == 'x') && (name[1] == 'm') &&
4336: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4337: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4338: ctxt->sax->error(ctxt->userData,
4339: "XML declaration allowed only at the start of the document\n");
4340: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4341: ctxt->wellFormed = 0;
1.180 daniel 4342: ctxt->disableSAX = 1;
1.151 daniel 4343: return(name);
4344: } else if (name[3] == 0) {
4345: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4346: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4347: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4348: ctxt->wellFormed = 0;
1.180 daniel 4349: ctxt->disableSAX = 1;
1.151 daniel 4350: return(name);
4351: }
1.139 daniel 4352: for (i = 0;;i++) {
4353: if (xmlW3CPIs[i] == NULL) break;
4354: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4355: return(name);
4356: }
4357: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4358: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4359: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4360: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4361: }
1.22 daniel 4362: }
4363: return(name);
4364: }
4365:
1.50 daniel 4366: /**
4367: * xmlParsePI:
4368: * @ctxt: an XML parser context
4369: *
4370: * parse an XML Processing Instruction.
1.22 daniel 4371: *
4372: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4373: *
1.69 daniel 4374: * The processing is transfered to SAX once parsed.
1.3 veillard 4375: */
4376:
1.55 daniel 4377: void
4378: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4379: xmlChar *buf = NULL;
4380: int len = 0;
1.140 daniel 4381: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4382: int cur, l;
1.123 daniel 4383: xmlChar *target;
1.140 daniel 4384: xmlParserInputState state;
1.22 daniel 4385:
1.152 daniel 4386: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 ! daniel 4387: xmlParserInputPtr input = ctxt->input;
1.140 daniel 4388: state = ctxt->instate;
4389: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4390: /*
4391: * this is a Processing Instruction.
4392: */
1.40 daniel 4393: SKIP(2);
1.91 daniel 4394: SHRINK;
1.3 veillard 4395:
4396: /*
1.22 daniel 4397: * Parse the target name and check for special support like
4398: * namespace.
1.3 veillard 4399: */
1.22 daniel 4400: target = xmlParsePITarget(ctxt);
4401: if (target != NULL) {
1.156 daniel 4402: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 ! daniel 4403: if (input != ctxt->input) {
! 4404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4405: ctxt->sax->error(ctxt->userData,
! 4406: "PI declaration doesn't start and stop in the same entity\n");
! 4407: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
! 4408: ctxt->wellFormed = 0;
! 4409: ctxt->disableSAX = 1;
! 4410: }
1.156 daniel 4411: SKIP(2);
4412:
4413: /*
4414: * SAX: PI detected.
4415: */
1.171 daniel 4416: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4417: (ctxt->sax->processingInstruction != NULL))
4418: ctxt->sax->processingInstruction(ctxt->userData,
4419: target, NULL);
4420: ctxt->instate = state;
1.170 daniel 4421: xmlFree(target);
1.156 daniel 4422: return;
4423: }
1.135 daniel 4424: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4425: if (buf == NULL) {
4426: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4427: ctxt->instate = state;
1.135 daniel 4428: return;
4429: }
4430: cur = CUR;
4431: if (!IS_BLANK(cur)) {
1.114 daniel 4432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4433: ctxt->sax->error(ctxt->userData,
4434: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4435: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4436: ctxt->wellFormed = 0;
1.180 daniel 4437: ctxt->disableSAX = 1;
1.114 daniel 4438: }
4439: SKIP_BLANKS;
1.152 daniel 4440: cur = CUR_CHAR(l);
1.135 daniel 4441: while (IS_CHAR(cur) &&
4442: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4443: if (len + 5 >= size) {
1.135 daniel 4444: size *= 2;
4445: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4446: if (buf == NULL) {
4447: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4448: ctxt->instate = state;
1.135 daniel 4449: return;
4450: }
4451: }
1.152 daniel 4452: COPY_BUF(l,buf,len,cur);
4453: NEXTL(l);
4454: cur = CUR_CHAR(l);
1.135 daniel 4455: if (cur == 0) {
4456: SHRINK;
4457: GROW;
1.152 daniel 4458: cur = CUR_CHAR(l);
1.135 daniel 4459: }
4460: }
4461: buf[len] = 0;
1.152 daniel 4462: if (cur != '?') {
1.72 daniel 4463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4464: ctxt->sax->error(ctxt->userData,
1.72 daniel 4465: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4466: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4467: ctxt->wellFormed = 0;
1.180 daniel 4468: ctxt->disableSAX = 1;
1.22 daniel 4469: } else {
1.187 ! daniel 4470: if (input != ctxt->input) {
! 4471: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4472: ctxt->sax->error(ctxt->userData,
! 4473: "PI declaration doesn't start and stop in the same entity\n");
! 4474: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
! 4475: ctxt->wellFormed = 0;
! 4476: ctxt->disableSAX = 1;
! 4477: }
1.72 daniel 4478: SKIP(2);
1.44 daniel 4479:
1.72 daniel 4480: /*
4481: * SAX: PI detected.
4482: */
1.171 daniel 4483: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4484: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4485: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4486: target, buf);
1.22 daniel 4487: }
1.135 daniel 4488: xmlFree(buf);
1.119 daniel 4489: xmlFree(target);
1.3 veillard 4490: } else {
1.55 daniel 4491: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4492: ctxt->sax->error(ctxt->userData,
4493: "xmlParsePI : no target name\n");
1.123 daniel 4494: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4495: ctxt->wellFormed = 0;
1.180 daniel 4496: ctxt->disableSAX = 1;
1.22 daniel 4497: }
1.140 daniel 4498: ctxt->instate = state;
1.22 daniel 4499: }
4500: }
4501:
1.50 daniel 4502: /**
4503: * xmlParseNotationDecl:
4504: * @ctxt: an XML parser context
4505: *
4506: * parse a notation declaration
1.22 daniel 4507: *
4508: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4509: *
4510: * Hence there is actually 3 choices:
4511: * 'PUBLIC' S PubidLiteral
4512: * 'PUBLIC' S PubidLiteral S SystemLiteral
4513: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4514: *
1.67 daniel 4515: * See the NOTE on xmlParseExternalID().
1.22 daniel 4516: */
4517:
1.55 daniel 4518: void
4519: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4520: xmlChar *name;
4521: xmlChar *Pubid;
4522: xmlChar *Systemid;
1.22 daniel 4523:
1.152 daniel 4524: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4525: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4526: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4527: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4528: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 ! daniel 4529: xmlParserInputPtr input = ctxt->input;
1.91 daniel 4530: SHRINK;
1.40 daniel 4531: SKIP(10);
1.67 daniel 4532: if (!IS_BLANK(CUR)) {
4533: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4534: ctxt->sax->error(ctxt->userData,
4535: "Space required after '<!NOTATION'\n");
1.123 daniel 4536: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4537: ctxt->wellFormed = 0;
1.180 daniel 4538: ctxt->disableSAX = 1;
1.67 daniel 4539: return;
4540: }
4541: SKIP_BLANKS;
1.22 daniel 4542:
4543: name = xmlParseName(ctxt);
4544: if (name == NULL) {
1.55 daniel 4545: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4546: ctxt->sax->error(ctxt->userData,
4547: "NOTATION: Name expected here\n");
1.123 daniel 4548: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4549: ctxt->wellFormed = 0;
1.180 daniel 4550: ctxt->disableSAX = 1;
1.67 daniel 4551: return;
4552: }
4553: if (!IS_BLANK(CUR)) {
4554: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4555: ctxt->sax->error(ctxt->userData,
1.67 daniel 4556: "Space required after the NOTATION name'\n");
1.123 daniel 4557: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4558: ctxt->wellFormed = 0;
1.180 daniel 4559: ctxt->disableSAX = 1;
1.22 daniel 4560: return;
4561: }
1.42 daniel 4562: SKIP_BLANKS;
1.67 daniel 4563:
1.22 daniel 4564: /*
1.67 daniel 4565: * Parse the IDs.
1.22 daniel 4566: */
1.160 daniel 4567: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4568: SKIP_BLANKS;
4569:
1.152 daniel 4570: if (RAW == '>') {
1.187 ! daniel 4571: if (input != ctxt->input) {
! 4572: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4573: ctxt->sax->error(ctxt->userData,
! 4574: "Notation declaration doesn't start and stop in the same entity\n");
! 4575: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
! 4576: ctxt->wellFormed = 0;
! 4577: ctxt->disableSAX = 1;
! 4578: }
1.40 daniel 4579: NEXT;
1.171 daniel 4580: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4581: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4582: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4583: } else {
4584: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4585: ctxt->sax->error(ctxt->userData,
1.67 daniel 4586: "'>' required to close NOTATION declaration\n");
1.123 daniel 4587: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4588: ctxt->wellFormed = 0;
1.180 daniel 4589: ctxt->disableSAX = 1;
1.67 daniel 4590: }
1.119 daniel 4591: xmlFree(name);
4592: if (Systemid != NULL) xmlFree(Systemid);
4593: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4594: }
4595: }
4596:
1.50 daniel 4597: /**
4598: * xmlParseEntityDecl:
4599: * @ctxt: an XML parser context
4600: *
4601: * parse <!ENTITY declarations
1.22 daniel 4602: *
4603: * [70] EntityDecl ::= GEDecl | PEDecl
4604: *
4605: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4606: *
4607: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4608: *
4609: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4610: *
4611: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4612: *
4613: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4614: *
4615: * [ VC: Notation Declared ]
1.116 daniel 4616: * The Name must match the declared name of a notation.
1.22 daniel 4617: */
4618:
1.55 daniel 4619: void
4620: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4621: xmlChar *name = NULL;
4622: xmlChar *value = NULL;
4623: xmlChar *URI = NULL, *literal = NULL;
4624: xmlChar *ndata = NULL;
1.39 daniel 4625: int isParameter = 0;
1.123 daniel 4626: xmlChar *orig = NULL;
1.22 daniel 4627:
1.94 daniel 4628: GROW;
1.152 daniel 4629: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4630: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4631: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4632: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 ! daniel 4633: xmlParserInputPtr input = ctxt->input;
1.96 daniel 4634: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4635: SHRINK;
1.40 daniel 4636: SKIP(8);
1.59 daniel 4637: if (!IS_BLANK(CUR)) {
4638: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4639: ctxt->sax->error(ctxt->userData,
4640: "Space required after '<!ENTITY'\n");
1.123 daniel 4641: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4642: ctxt->wellFormed = 0;
1.180 daniel 4643: ctxt->disableSAX = 1;
1.59 daniel 4644: }
4645: SKIP_BLANKS;
1.40 daniel 4646:
1.152 daniel 4647: if (RAW == '%') {
1.40 daniel 4648: NEXT;
1.59 daniel 4649: if (!IS_BLANK(CUR)) {
4650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4651: ctxt->sax->error(ctxt->userData,
4652: "Space required after '%'\n");
1.123 daniel 4653: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4654: ctxt->wellFormed = 0;
1.180 daniel 4655: ctxt->disableSAX = 1;
1.59 daniel 4656: }
1.42 daniel 4657: SKIP_BLANKS;
1.39 daniel 4658: isParameter = 1;
1.22 daniel 4659: }
4660:
4661: name = xmlParseName(ctxt);
1.24 daniel 4662: if (name == NULL) {
1.55 daniel 4663: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4664: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4665: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4666: ctxt->wellFormed = 0;
1.180 daniel 4667: ctxt->disableSAX = 1;
1.24 daniel 4668: return;
4669: }
1.59 daniel 4670: if (!IS_BLANK(CUR)) {
4671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4672: ctxt->sax->error(ctxt->userData,
1.59 daniel 4673: "Space required after the entity name\n");
1.123 daniel 4674: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4675: ctxt->wellFormed = 0;
1.180 daniel 4676: ctxt->disableSAX = 1;
1.59 daniel 4677: }
1.42 daniel 4678: SKIP_BLANKS;
1.24 daniel 4679:
1.22 daniel 4680: /*
1.68 daniel 4681: * handle the various case of definitions...
1.22 daniel 4682: */
1.39 daniel 4683: if (isParameter) {
1.152 daniel 4684: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4685: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4686: if (value) {
1.171 daniel 4687: if ((ctxt->sax != NULL) &&
4688: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4689: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4690: XML_INTERNAL_PARAMETER_ENTITY,
4691: NULL, NULL, value);
4692: }
1.24 daniel 4693: else {
1.67 daniel 4694: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4695: if ((URI == NULL) && (literal == NULL)) {
4696: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4697: ctxt->sax->error(ctxt->userData,
4698: "Entity value required\n");
4699: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4700: ctxt->wellFormed = 0;
1.180 daniel 4701: ctxt->disableSAX = 1;
1.169 daniel 4702: }
1.39 daniel 4703: if (URI) {
1.171 daniel 4704: if ((ctxt->sax != NULL) &&
4705: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4706: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4707: XML_EXTERNAL_PARAMETER_ENTITY,
4708: literal, URI, NULL);
4709: }
1.24 daniel 4710: }
4711: } else {
1.152 daniel 4712: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4713: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4714: if ((ctxt->sax != NULL) &&
4715: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4716: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4717: XML_INTERNAL_GENERAL_ENTITY,
4718: NULL, NULL, value);
4719: } else {
1.67 daniel 4720: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4721: if ((URI == NULL) && (literal == NULL)) {
4722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4723: ctxt->sax->error(ctxt->userData,
4724: "Entity value required\n");
4725: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4726: ctxt->wellFormed = 0;
1.180 daniel 4727: ctxt->disableSAX = 1;
1.169 daniel 4728: }
1.152 daniel 4729: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4730: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4731: ctxt->sax->error(ctxt->userData,
1.59 daniel 4732: "Space required before 'NDATA'\n");
1.123 daniel 4733: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4734: ctxt->wellFormed = 0;
1.180 daniel 4735: ctxt->disableSAX = 1;
1.59 daniel 4736: }
1.42 daniel 4737: SKIP_BLANKS;
1.152 daniel 4738: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4739: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4740: (NXT(4) == 'A')) {
4741: SKIP(5);
1.59 daniel 4742: if (!IS_BLANK(CUR)) {
4743: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4744: ctxt->sax->error(ctxt->userData,
1.59 daniel 4745: "Space required after 'NDATA'\n");
1.123 daniel 4746: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4747: ctxt->wellFormed = 0;
1.180 daniel 4748: ctxt->disableSAX = 1;
1.59 daniel 4749: }
1.42 daniel 4750: SKIP_BLANKS;
1.24 daniel 4751: ndata = xmlParseName(ctxt);
1.171 daniel 4752: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4753: (ctxt->sax->unparsedEntityDecl != NULL))
4754: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4755: literal, URI, ndata);
4756: } else {
1.171 daniel 4757: if ((ctxt->sax != NULL) &&
4758: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4759: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4760: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4761: literal, URI, NULL);
1.24 daniel 4762: }
4763: }
4764: }
1.42 daniel 4765: SKIP_BLANKS;
1.152 daniel 4766: if (RAW != '>') {
1.55 daniel 4767: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4768: ctxt->sax->error(ctxt->userData,
1.31 daniel 4769: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4770: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4771: ctxt->wellFormed = 0;
1.180 daniel 4772: ctxt->disableSAX = 1;
1.187 ! daniel 4773: } else {
! 4774: if (input != ctxt->input) {
! 4775: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4776: ctxt->sax->error(ctxt->userData,
! 4777: "Entity declaration doesn't start and stop in the same entity\n");
! 4778: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
! 4779: ctxt->wellFormed = 0;
! 4780: ctxt->disableSAX = 1;
! 4781: }
1.40 daniel 4782: NEXT;
1.187 ! daniel 4783: }
1.78 daniel 4784: if (orig != NULL) {
4785: /*
1.98 daniel 4786: * Ugly mechanism to save the raw entity value.
1.78 daniel 4787: */
4788: xmlEntityPtr cur = NULL;
4789:
1.98 daniel 4790: if (isParameter) {
4791: if ((ctxt->sax != NULL) &&
4792: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4793: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4794: } else {
4795: if ((ctxt->sax != NULL) &&
4796: (ctxt->sax->getEntity != NULL))
1.120 daniel 4797: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4798: }
4799: if (cur != NULL) {
4800: if (cur->orig != NULL)
1.119 daniel 4801: xmlFree(orig);
1.98 daniel 4802: else
4803: cur->orig = orig;
4804: } else
1.119 daniel 4805: xmlFree(orig);
1.78 daniel 4806: }
1.119 daniel 4807: if (name != NULL) xmlFree(name);
4808: if (value != NULL) xmlFree(value);
4809: if (URI != NULL) xmlFree(URI);
4810: if (literal != NULL) xmlFree(literal);
4811: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4812: }
4813: }
4814:
1.50 daniel 4815: /**
1.59 daniel 4816: * xmlParseDefaultDecl:
4817: * @ctxt: an XML parser context
4818: * @value: Receive a possible fixed default value for the attribute
4819: *
4820: * Parse an attribute default declaration
4821: *
4822: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4823: *
1.99 daniel 4824: * [ VC: Required Attribute ]
1.117 daniel 4825: * if the default declaration is the keyword #REQUIRED, then the
4826: * attribute must be specified for all elements of the type in the
4827: * attribute-list declaration.
1.99 daniel 4828: *
4829: * [ VC: Attribute Default Legal ]
1.102 daniel 4830: * The declared default value must meet the lexical constraints of
4831: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4832: *
4833: * [ VC: Fixed Attribute Default ]
1.117 daniel 4834: * if an attribute has a default value declared with the #FIXED
4835: * keyword, instances of that attribute must match the default value.
1.99 daniel 4836: *
4837: * [ WFC: No < in Attribute Values ]
4838: * handled in xmlParseAttValue()
4839: *
1.59 daniel 4840: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4841: * or XML_ATTRIBUTE_FIXED.
4842: */
4843:
4844: int
1.123 daniel 4845: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4846: int val;
1.123 daniel 4847: xmlChar *ret;
1.59 daniel 4848:
4849: *value = NULL;
1.152 daniel 4850: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4851: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4852: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4853: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4854: (NXT(8) == 'D')) {
4855: SKIP(9);
4856: return(XML_ATTRIBUTE_REQUIRED);
4857: }
1.152 daniel 4858: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4859: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4860: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4861: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4862: SKIP(8);
4863: return(XML_ATTRIBUTE_IMPLIED);
4864: }
4865: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4866: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4867: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4868: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4869: SKIP(6);
4870: val = XML_ATTRIBUTE_FIXED;
4871: if (!IS_BLANK(CUR)) {
4872: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4873: ctxt->sax->error(ctxt->userData,
4874: "Space required after '#FIXED'\n");
1.123 daniel 4875: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4876: ctxt->wellFormed = 0;
1.180 daniel 4877: ctxt->disableSAX = 1;
1.59 daniel 4878: }
4879: SKIP_BLANKS;
4880: }
4881: ret = xmlParseAttValue(ctxt);
1.96 daniel 4882: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4883: if (ret == NULL) {
4884: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4885: ctxt->sax->error(ctxt->userData,
1.59 daniel 4886: "Attribute default value declaration error\n");
4887: ctxt->wellFormed = 0;
1.180 daniel 4888: ctxt->disableSAX = 1;
1.59 daniel 4889: } else
4890: *value = ret;
4891: return(val);
4892: }
4893:
4894: /**
1.66 daniel 4895: * xmlParseNotationType:
4896: * @ctxt: an XML parser context
4897: *
4898: * parse an Notation attribute type.
4899: *
1.99 daniel 4900: * Note: the leading 'NOTATION' S part has already being parsed...
4901: *
1.66 daniel 4902: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4903: *
1.99 daniel 4904: * [ VC: Notation Attributes ]
1.117 daniel 4905: * Values of this type must match one of the notation names included
1.99 daniel 4906: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4907: *
4908: * Returns: the notation attribute tree built while parsing
4909: */
4910:
4911: xmlEnumerationPtr
4912: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4913: xmlChar *name;
1.66 daniel 4914: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4915:
1.152 daniel 4916: if (RAW != '(') {
1.66 daniel 4917: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4918: ctxt->sax->error(ctxt->userData,
4919: "'(' required to start 'NOTATION'\n");
1.123 daniel 4920: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4921: ctxt->wellFormed = 0;
1.180 daniel 4922: ctxt->disableSAX = 1;
1.66 daniel 4923: return(NULL);
4924: }
1.91 daniel 4925: SHRINK;
1.66 daniel 4926: do {
4927: NEXT;
4928: SKIP_BLANKS;
4929: name = xmlParseName(ctxt);
4930: if (name == NULL) {
4931: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4932: ctxt->sax->error(ctxt->userData,
1.66 daniel 4933: "Name expected in NOTATION declaration\n");
1.123 daniel 4934: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4935: ctxt->wellFormed = 0;
1.180 daniel 4936: ctxt->disableSAX = 1;
1.66 daniel 4937: return(ret);
4938: }
4939: cur = xmlCreateEnumeration(name);
1.119 daniel 4940: xmlFree(name);
1.66 daniel 4941: if (cur == NULL) return(ret);
4942: if (last == NULL) ret = last = cur;
4943: else {
4944: last->next = cur;
4945: last = cur;
4946: }
4947: SKIP_BLANKS;
1.152 daniel 4948: } while (RAW == '|');
4949: if (RAW != ')') {
1.66 daniel 4950: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4951: ctxt->sax->error(ctxt->userData,
1.66 daniel 4952: "')' required to finish NOTATION declaration\n");
1.123 daniel 4953: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 4954: ctxt->wellFormed = 0;
1.180 daniel 4955: ctxt->disableSAX = 1;
1.170 daniel 4956: if ((last != NULL) && (last != ret))
4957: xmlFreeEnumeration(last);
1.66 daniel 4958: return(ret);
4959: }
4960: NEXT;
4961: return(ret);
4962: }
4963:
4964: /**
4965: * xmlParseEnumerationType:
4966: * @ctxt: an XML parser context
4967: *
4968: * parse an Enumeration attribute type.
4969: *
4970: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4971: *
1.99 daniel 4972: * [ VC: Enumeration ]
1.117 daniel 4973: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 4974: * the declaration
4975: *
1.66 daniel 4976: * Returns: the enumeration attribute tree built while parsing
4977: */
4978:
4979: xmlEnumerationPtr
4980: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4981: xmlChar *name;
1.66 daniel 4982: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4983:
1.152 daniel 4984: if (RAW != '(') {
1.66 daniel 4985: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4986: ctxt->sax->error(ctxt->userData,
1.66 daniel 4987: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 4988: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 4989: ctxt->wellFormed = 0;
1.180 daniel 4990: ctxt->disableSAX = 1;
1.66 daniel 4991: return(NULL);
4992: }
1.91 daniel 4993: SHRINK;
1.66 daniel 4994: do {
4995: NEXT;
4996: SKIP_BLANKS;
4997: name = xmlParseNmtoken(ctxt);
4998: if (name == NULL) {
4999: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5000: ctxt->sax->error(ctxt->userData,
1.66 daniel 5001: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 5002: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 5003: ctxt->wellFormed = 0;
1.180 daniel 5004: ctxt->disableSAX = 1;
1.66 daniel 5005: return(ret);
5006: }
5007: cur = xmlCreateEnumeration(name);
1.119 daniel 5008: xmlFree(name);
1.66 daniel 5009: if (cur == NULL) return(ret);
5010: if (last == NULL) ret = last = cur;
5011: else {
5012: last->next = cur;
5013: last = cur;
5014: }
5015: SKIP_BLANKS;
1.152 daniel 5016: } while (RAW == '|');
5017: if (RAW != ')') {
1.66 daniel 5018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5019: ctxt->sax->error(ctxt->userData,
1.66 daniel 5020: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 5021: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 5022: ctxt->wellFormed = 0;
1.180 daniel 5023: ctxt->disableSAX = 1;
1.66 daniel 5024: return(ret);
5025: }
5026: NEXT;
5027: return(ret);
5028: }
5029:
5030: /**
1.50 daniel 5031: * xmlParseEnumeratedType:
5032: * @ctxt: an XML parser context
1.66 daniel 5033: * @tree: the enumeration tree built while parsing
1.50 daniel 5034: *
1.66 daniel 5035: * parse an Enumerated attribute type.
1.22 daniel 5036: *
5037: * [57] EnumeratedType ::= NotationType | Enumeration
5038: *
5039: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5040: *
1.50 daniel 5041: *
1.66 daniel 5042: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 5043: */
5044:
1.66 daniel 5045: int
5046: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 5047: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 5048: (NXT(2) == 'T') && (NXT(3) == 'A') &&
5049: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5050: (NXT(6) == 'O') && (NXT(7) == 'N')) {
5051: SKIP(8);
5052: if (!IS_BLANK(CUR)) {
5053: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5054: ctxt->sax->error(ctxt->userData,
5055: "Space required after 'NOTATION'\n");
1.123 daniel 5056: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 5057: ctxt->wellFormed = 0;
1.180 daniel 5058: ctxt->disableSAX = 1;
1.66 daniel 5059: return(0);
5060: }
5061: SKIP_BLANKS;
5062: *tree = xmlParseNotationType(ctxt);
5063: if (*tree == NULL) return(0);
5064: return(XML_ATTRIBUTE_NOTATION);
5065: }
5066: *tree = xmlParseEnumerationType(ctxt);
5067: if (*tree == NULL) return(0);
5068: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 5069: }
5070:
1.50 daniel 5071: /**
5072: * xmlParseAttributeType:
5073: * @ctxt: an XML parser context
1.66 daniel 5074: * @tree: the enumeration tree built while parsing
1.50 daniel 5075: *
1.59 daniel 5076: * parse the Attribute list def for an element
1.22 daniel 5077: *
5078: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5079: *
5080: * [55] StringType ::= 'CDATA'
5081: *
5082: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5083: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 5084: *
1.102 daniel 5085: * Validity constraints for attribute values syntax are checked in
5086: * xmlValidateAttributeValue()
5087: *
1.99 daniel 5088: * [ VC: ID ]
1.117 daniel 5089: * Values of type ID must match the Name production. A name must not
1.99 daniel 5090: * appear more than once in an XML document as a value of this type;
5091: * i.e., ID values must uniquely identify the elements which bear them.
5092: *
5093: * [ VC: One ID per Element Type ]
1.117 daniel 5094: * No element type may have more than one ID attribute specified.
1.99 daniel 5095: *
5096: * [ VC: ID Attribute Default ]
1.117 daniel 5097: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 5098: *
5099: * [ VC: IDREF ]
1.102 daniel 5100: * Values of type IDREF must match the Name production, and values
1.140 daniel 5101: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 5102: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 5103: * values must match the value of some ID attribute.
5104: *
5105: * [ VC: Entity Name ]
1.102 daniel 5106: * Values of type ENTITY must match the Name production, values
1.140 daniel 5107: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 5108: * name of an unparsed entity declared in the DTD.
1.99 daniel 5109: *
5110: * [ VC: Name Token ]
1.102 daniel 5111: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5112: * of type NMTOKENS must match Nmtokens.
5113: *
1.69 daniel 5114: * Returns the attribute type
1.22 daniel 5115: */
1.59 daniel 5116: int
1.66 daniel 5117: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5118: SHRINK;
1.152 daniel 5119: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5120: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5121: (NXT(4) == 'A')) {
5122: SKIP(5);
1.66 daniel 5123: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5124: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5125: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5126: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5127: SKIP(6);
5128: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5129: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5130: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5131: (NXT(4) == 'F')) {
5132: SKIP(5);
1.59 daniel 5133: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5134: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5135: SKIP(2);
5136: return(XML_ATTRIBUTE_ID);
1.152 daniel 5137: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5138: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5139: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5140: SKIP(6);
1.59 daniel 5141: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5142: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5143: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5144: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5145: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5146: SKIP(8);
1.59 daniel 5147: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5148: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5149: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5150: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5151: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5152: SKIP(8);
5153: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5154: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5155: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5156: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5157: (NXT(6) == 'N')) {
5158: SKIP(7);
1.59 daniel 5159: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5160: }
1.66 daniel 5161: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5162: }
5163:
1.50 daniel 5164: /**
5165: * xmlParseAttributeListDecl:
5166: * @ctxt: an XML parser context
5167: *
5168: * : parse the Attribute list def for an element
1.22 daniel 5169: *
5170: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5171: *
5172: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5173: *
1.22 daniel 5174: */
1.55 daniel 5175: void
5176: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5177: xmlChar *elemName;
5178: xmlChar *attrName;
1.103 daniel 5179: xmlEnumerationPtr tree;
1.22 daniel 5180:
1.152 daniel 5181: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5182: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5183: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5184: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5185: (NXT(8) == 'T')) {
1.187 ! daniel 5186: xmlParserInputPtr input = ctxt->input;
! 5187:
1.40 daniel 5188: SKIP(9);
1.59 daniel 5189: if (!IS_BLANK(CUR)) {
5190: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5191: ctxt->sax->error(ctxt->userData,
5192: "Space required after '<!ATTLIST'\n");
1.123 daniel 5193: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5194: ctxt->wellFormed = 0;
1.180 daniel 5195: ctxt->disableSAX = 1;
1.59 daniel 5196: }
1.42 daniel 5197: SKIP_BLANKS;
1.59 daniel 5198: elemName = xmlParseName(ctxt);
5199: if (elemName == NULL) {
1.55 daniel 5200: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5201: ctxt->sax->error(ctxt->userData,
5202: "ATTLIST: no name for Element\n");
1.123 daniel 5203: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5204: ctxt->wellFormed = 0;
1.180 daniel 5205: ctxt->disableSAX = 1;
1.22 daniel 5206: return;
5207: }
1.42 daniel 5208: SKIP_BLANKS;
1.152 daniel 5209: while (RAW != '>') {
1.123 daniel 5210: const xmlChar *check = CUR_PTR;
1.59 daniel 5211: int type;
5212: int def;
1.123 daniel 5213: xmlChar *defaultValue = NULL;
1.59 daniel 5214:
1.103 daniel 5215: tree = NULL;
1.59 daniel 5216: attrName = xmlParseName(ctxt);
5217: if (attrName == NULL) {
5218: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5219: ctxt->sax->error(ctxt->userData,
5220: "ATTLIST: no name for Attribute\n");
1.123 daniel 5221: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5222: ctxt->wellFormed = 0;
1.180 daniel 5223: ctxt->disableSAX = 1;
1.59 daniel 5224: break;
5225: }
1.97 daniel 5226: GROW;
1.59 daniel 5227: if (!IS_BLANK(CUR)) {
5228: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5229: ctxt->sax->error(ctxt->userData,
1.59 daniel 5230: "Space required after the attribute name\n");
1.123 daniel 5231: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5232: ctxt->wellFormed = 0;
1.180 daniel 5233: ctxt->disableSAX = 1;
1.170 daniel 5234: if (attrName != NULL)
5235: xmlFree(attrName);
5236: if (defaultValue != NULL)
5237: xmlFree(defaultValue);
1.59 daniel 5238: break;
5239: }
5240: SKIP_BLANKS;
5241:
1.66 daniel 5242: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5243: if (type <= 0) {
5244: if (attrName != NULL)
5245: xmlFree(attrName);
5246: if (defaultValue != NULL)
5247: xmlFree(defaultValue);
5248: break;
5249: }
1.22 daniel 5250:
1.97 daniel 5251: GROW;
1.59 daniel 5252: if (!IS_BLANK(CUR)) {
5253: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5254: ctxt->sax->error(ctxt->userData,
1.59 daniel 5255: "Space required after the attribute type\n");
1.123 daniel 5256: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5257: ctxt->wellFormed = 0;
1.180 daniel 5258: ctxt->disableSAX = 1;
1.170 daniel 5259: if (attrName != NULL)
5260: xmlFree(attrName);
5261: if (defaultValue != NULL)
5262: xmlFree(defaultValue);
5263: if (tree != NULL)
5264: xmlFreeEnumeration(tree);
1.59 daniel 5265: break;
5266: }
1.42 daniel 5267: SKIP_BLANKS;
1.59 daniel 5268:
5269: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5270: if (def <= 0) {
5271: if (attrName != NULL)
5272: xmlFree(attrName);
5273: if (defaultValue != NULL)
5274: xmlFree(defaultValue);
5275: if (tree != NULL)
5276: xmlFreeEnumeration(tree);
5277: break;
5278: }
1.59 daniel 5279:
1.97 daniel 5280: GROW;
1.152 daniel 5281: if (RAW != '>') {
1.59 daniel 5282: if (!IS_BLANK(CUR)) {
5283: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5284: ctxt->sax->error(ctxt->userData,
1.59 daniel 5285: "Space required after the attribute default value\n");
1.123 daniel 5286: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5287: ctxt->wellFormed = 0;
1.180 daniel 5288: ctxt->disableSAX = 1;
1.170 daniel 5289: if (attrName != NULL)
5290: xmlFree(attrName);
5291: if (defaultValue != NULL)
5292: xmlFree(defaultValue);
5293: if (tree != NULL)
5294: xmlFreeEnumeration(tree);
1.59 daniel 5295: break;
5296: }
5297: SKIP_BLANKS;
5298: }
1.40 daniel 5299: if (check == CUR_PTR) {
1.55 daniel 5300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5301: ctxt->sax->error(ctxt->userData,
1.59 daniel 5302: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5303: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5304: if (attrName != NULL)
5305: xmlFree(attrName);
5306: if (defaultValue != NULL)
5307: xmlFree(defaultValue);
5308: if (tree != NULL)
5309: xmlFreeEnumeration(tree);
1.22 daniel 5310: break;
5311: }
1.171 daniel 5312: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5313: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5314: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5315: type, def, defaultValue, tree);
1.59 daniel 5316: if (attrName != NULL)
1.119 daniel 5317: xmlFree(attrName);
1.59 daniel 5318: if (defaultValue != NULL)
1.119 daniel 5319: xmlFree(defaultValue);
1.97 daniel 5320: GROW;
1.22 daniel 5321: }
1.187 ! daniel 5322: if (RAW == '>') {
! 5323: if (input != ctxt->input) {
! 5324: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 5325: ctxt->sax->error(ctxt->userData,
! 5326: "Attribute list declaration doesn't start and stop in the same entity\n");
! 5327: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
! 5328: ctxt->wellFormed = 0;
! 5329: ctxt->disableSAX = 1;
! 5330: }
1.40 daniel 5331: NEXT;
1.187 ! daniel 5332: }
1.22 daniel 5333:
1.119 daniel 5334: xmlFree(elemName);
1.22 daniel 5335: }
5336: }
5337:
1.50 daniel 5338: /**
1.61 daniel 5339: * xmlParseElementMixedContentDecl:
5340: * @ctxt: an XML parser context
5341: *
5342: * parse the declaration for a Mixed Element content
5343: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5344: *
5345: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5346: * '(' S? '#PCDATA' S? ')'
5347: *
1.99 daniel 5348: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5349: *
5350: * [ VC: No Duplicate Types ]
1.117 daniel 5351: * The same name must not appear more than once in a single
5352: * mixed-content declaration.
1.99 daniel 5353: *
1.61 daniel 5354: * returns: the list of the xmlElementContentPtr describing the element choices
5355: */
5356: xmlElementContentPtr
1.62 daniel 5357: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5358: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5359: xmlChar *elem = NULL;
1.61 daniel 5360:
1.97 daniel 5361: GROW;
1.152 daniel 5362: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5363: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5364: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5365: (NXT(6) == 'A')) {
5366: SKIP(7);
5367: SKIP_BLANKS;
1.91 daniel 5368: SHRINK;
1.152 daniel 5369: if (RAW == ')') {
1.187 ! daniel 5370: ctxt->entity = ctxt->input;
1.63 daniel 5371: NEXT;
5372: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5373: if (RAW == '*') {
1.136 daniel 5374: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5375: NEXT;
5376: }
1.63 daniel 5377: return(ret);
5378: }
1.152 daniel 5379: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5380: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5381: if (ret == NULL) return(NULL);
1.99 daniel 5382: }
1.152 daniel 5383: while (RAW == '|') {
1.64 daniel 5384: NEXT;
1.61 daniel 5385: if (elem == NULL) {
5386: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5387: if (ret == NULL) return(NULL);
5388: ret->c1 = cur;
1.64 daniel 5389: cur = ret;
1.61 daniel 5390: } else {
1.64 daniel 5391: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5392: if (n == NULL) return(NULL);
5393: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5394: cur->c2 = n;
5395: cur = n;
1.119 daniel 5396: xmlFree(elem);
1.61 daniel 5397: }
5398: SKIP_BLANKS;
5399: elem = xmlParseName(ctxt);
5400: if (elem == NULL) {
5401: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5402: ctxt->sax->error(ctxt->userData,
1.61 daniel 5403: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5404: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5405: ctxt->wellFormed = 0;
1.180 daniel 5406: ctxt->disableSAX = 1;
1.61 daniel 5407: xmlFreeElementContent(cur);
5408: return(NULL);
5409: }
5410: SKIP_BLANKS;
1.97 daniel 5411: GROW;
1.61 daniel 5412: }
1.152 daniel 5413: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5414: if (elem != NULL) {
1.61 daniel 5415: cur->c2 = xmlNewElementContent(elem,
5416: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5417: xmlFree(elem);
1.66 daniel 5418: }
1.65 daniel 5419: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 ! daniel 5420: ctxt->entity = ctxt->input;
1.64 daniel 5421: SKIP(2);
1.61 daniel 5422: } else {
1.119 daniel 5423: if (elem != NULL) xmlFree(elem);
1.61 daniel 5424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5425: ctxt->sax->error(ctxt->userData,
1.63 daniel 5426: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5427: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5428: ctxt->wellFormed = 0;
1.180 daniel 5429: ctxt->disableSAX = 1;
1.61 daniel 5430: xmlFreeElementContent(ret);
5431: return(NULL);
5432: }
5433:
5434: } else {
5435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5436: ctxt->sax->error(ctxt->userData,
1.61 daniel 5437: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5438: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5439: ctxt->wellFormed = 0;
1.180 daniel 5440: ctxt->disableSAX = 1;
1.61 daniel 5441: }
5442: return(ret);
5443: }
5444:
5445: /**
5446: * xmlParseElementChildrenContentDecl:
1.50 daniel 5447: * @ctxt: an XML parser context
5448: *
1.61 daniel 5449: * parse the declaration for a Mixed Element content
5450: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5451: *
1.61 daniel 5452: *
1.22 daniel 5453: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5454: *
5455: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5456: *
5457: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5458: *
5459: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5460: *
1.99 daniel 5461: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5462: * TODO Parameter-entity replacement text must be properly nested
5463: * with parenthetized groups. That is to say, if either of the
5464: * opening or closing parentheses in a choice, seq, or Mixed
5465: * construct is contained in the replacement text for a parameter
5466: * entity, both must be contained in the same replacement text. For
5467: * interoperability, if a parameter-entity reference appears in a
5468: * choice, seq, or Mixed construct, its replacement text should not
5469: * be empty, and neither the first nor last non-blank character of
5470: * the replacement text should be a connector (| or ,).
5471: *
1.62 daniel 5472: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5473: * hierarchy.
5474: */
5475: xmlElementContentPtr
1.62 daniel 5476: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5477: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5478: xmlChar *elem;
5479: xmlChar type = 0;
1.62 daniel 5480:
5481: SKIP_BLANKS;
1.94 daniel 5482: GROW;
1.152 daniel 5483: if (RAW == '(') {
1.63 daniel 5484: /* Recurse on first child */
1.62 daniel 5485: NEXT;
5486: SKIP_BLANKS;
5487: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5488: SKIP_BLANKS;
1.101 daniel 5489: GROW;
1.62 daniel 5490: } else {
5491: elem = xmlParseName(ctxt);
5492: if (elem == NULL) {
5493: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5494: ctxt->sax->error(ctxt->userData,
1.62 daniel 5495: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5496: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5497: ctxt->wellFormed = 0;
1.180 daniel 5498: ctxt->disableSAX = 1;
1.62 daniel 5499: return(NULL);
5500: }
5501: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5502: GROW;
1.152 daniel 5503: if (RAW == '?') {
1.104 daniel 5504: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5505: NEXT;
1.152 daniel 5506: } else if (RAW == '*') {
1.104 daniel 5507: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5508: NEXT;
1.152 daniel 5509: } else if (RAW == '+') {
1.104 daniel 5510: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5511: NEXT;
5512: } else {
1.104 daniel 5513: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5514: }
1.119 daniel 5515: xmlFree(elem);
1.101 daniel 5516: GROW;
1.62 daniel 5517: }
5518: SKIP_BLANKS;
1.91 daniel 5519: SHRINK;
1.152 daniel 5520: while (RAW != ')') {
1.63 daniel 5521: /*
5522: * Each loop we parse one separator and one element.
5523: */
1.152 daniel 5524: if (RAW == ',') {
1.62 daniel 5525: if (type == 0) type = CUR;
5526:
5527: /*
5528: * Detect "Name | Name , Name" error
5529: */
5530: else if (type != CUR) {
5531: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5532: ctxt->sax->error(ctxt->userData,
1.62 daniel 5533: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5534: type);
1.123 daniel 5535: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5536: ctxt->wellFormed = 0;
1.180 daniel 5537: ctxt->disableSAX = 1;
1.170 daniel 5538: if ((op != NULL) && (op != ret))
5539: xmlFreeElementContent(op);
5540: if ((last != NULL) && (last != ret))
5541: xmlFreeElementContent(last);
5542: if (ret != NULL)
5543: xmlFreeElementContent(ret);
1.62 daniel 5544: return(NULL);
5545: }
1.64 daniel 5546: NEXT;
1.62 daniel 5547:
1.63 daniel 5548: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5549: if (op == NULL) {
5550: xmlFreeElementContent(ret);
5551: return(NULL);
5552: }
5553: if (last == NULL) {
5554: op->c1 = ret;
1.65 daniel 5555: ret = cur = op;
1.63 daniel 5556: } else {
5557: cur->c2 = op;
5558: op->c1 = last;
5559: cur =op;
1.65 daniel 5560: last = NULL;
1.63 daniel 5561: }
1.152 daniel 5562: } else if (RAW == '|') {
1.62 daniel 5563: if (type == 0) type = CUR;
5564:
5565: /*
1.63 daniel 5566: * Detect "Name , Name | Name" error
1.62 daniel 5567: */
5568: else if (type != CUR) {
5569: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5570: ctxt->sax->error(ctxt->userData,
1.62 daniel 5571: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5572: type);
1.123 daniel 5573: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5574: ctxt->wellFormed = 0;
1.180 daniel 5575: ctxt->disableSAX = 1;
1.170 daniel 5576: if ((op != NULL) && (op != ret))
5577: xmlFreeElementContent(op);
5578: if ((last != NULL) && (last != ret))
5579: xmlFreeElementContent(last);
5580: if (ret != NULL)
5581: xmlFreeElementContent(ret);
1.62 daniel 5582: return(NULL);
5583: }
1.64 daniel 5584: NEXT;
1.62 daniel 5585:
1.63 daniel 5586: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5587: if (op == NULL) {
1.170 daniel 5588: if ((op != NULL) && (op != ret))
5589: xmlFreeElementContent(op);
5590: if ((last != NULL) && (last != ret))
5591: xmlFreeElementContent(last);
5592: if (ret != NULL)
5593: xmlFreeElementContent(ret);
1.63 daniel 5594: return(NULL);
5595: }
5596: if (last == NULL) {
5597: op->c1 = ret;
1.65 daniel 5598: ret = cur = op;
1.63 daniel 5599: } else {
5600: cur->c2 = op;
5601: op->c1 = last;
5602: cur =op;
1.65 daniel 5603: last = NULL;
1.63 daniel 5604: }
1.62 daniel 5605: } else {
5606: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5607: ctxt->sax->error(ctxt->userData,
1.62 daniel 5608: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5609: ctxt->wellFormed = 0;
1.180 daniel 5610: ctxt->disableSAX = 1;
1.123 daniel 5611: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5612: if ((op != NULL) && (op != ret))
5613: xmlFreeElementContent(op);
5614: if ((last != NULL) && (last != ret))
5615: xmlFreeElementContent(last);
5616: if (ret != NULL)
5617: xmlFreeElementContent(ret);
1.62 daniel 5618: return(NULL);
5619: }
1.101 daniel 5620: GROW;
1.62 daniel 5621: SKIP_BLANKS;
1.101 daniel 5622: GROW;
1.152 daniel 5623: if (RAW == '(') {
1.63 daniel 5624: /* Recurse on second child */
1.62 daniel 5625: NEXT;
5626: SKIP_BLANKS;
1.65 daniel 5627: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5628: SKIP_BLANKS;
5629: } else {
5630: elem = xmlParseName(ctxt);
5631: if (elem == NULL) {
5632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5633: ctxt->sax->error(ctxt->userData,
1.122 daniel 5634: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5635: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5636: ctxt->wellFormed = 0;
1.180 daniel 5637: ctxt->disableSAX = 1;
1.170 daniel 5638: if ((op != NULL) && (op != ret))
5639: xmlFreeElementContent(op);
5640: if ((last != NULL) && (last != ret))
5641: xmlFreeElementContent(last);
5642: if (ret != NULL)
5643: xmlFreeElementContent(ret);
1.62 daniel 5644: return(NULL);
5645: }
1.65 daniel 5646: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5647: xmlFree(elem);
1.152 daniel 5648: if (RAW == '?') {
1.105 daniel 5649: last->ocur = XML_ELEMENT_CONTENT_OPT;
5650: NEXT;
1.152 daniel 5651: } else if (RAW == '*') {
1.105 daniel 5652: last->ocur = XML_ELEMENT_CONTENT_MULT;
5653: NEXT;
1.152 daniel 5654: } else if (RAW == '+') {
1.105 daniel 5655: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5656: NEXT;
5657: } else {
5658: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5659: }
1.63 daniel 5660: }
5661: SKIP_BLANKS;
1.97 daniel 5662: GROW;
1.64 daniel 5663: }
1.65 daniel 5664: if ((cur != NULL) && (last != NULL)) {
5665: cur->c2 = last;
1.62 daniel 5666: }
1.187 ! daniel 5667: ctxt->entity = ctxt->input;
1.62 daniel 5668: NEXT;
1.152 daniel 5669: if (RAW == '?') {
1.62 daniel 5670: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5671: NEXT;
1.152 daniel 5672: } else if (RAW == '*') {
1.62 daniel 5673: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5674: NEXT;
1.152 daniel 5675: } else if (RAW == '+') {
1.62 daniel 5676: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5677: NEXT;
5678: }
5679: return(ret);
1.61 daniel 5680: }
5681:
5682: /**
5683: * xmlParseElementContentDecl:
5684: * @ctxt: an XML parser context
5685: * @name: the name of the element being defined.
5686: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5687: *
1.61 daniel 5688: * parse the declaration for an Element content either Mixed or Children,
5689: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5690: *
5691: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5692: *
1.61 daniel 5693: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5694: */
5695:
1.61 daniel 5696: int
1.123 daniel 5697: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5698: xmlElementContentPtr *result) {
5699:
5700: xmlElementContentPtr tree = NULL;
1.187 ! daniel 5701: xmlParserInputPtr input = ctxt->input;
1.61 daniel 5702: int res;
5703:
5704: *result = NULL;
5705:
1.152 daniel 5706: if (RAW != '(') {
1.61 daniel 5707: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5708: ctxt->sax->error(ctxt->userData,
1.61 daniel 5709: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5710: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5711: ctxt->wellFormed = 0;
1.180 daniel 5712: ctxt->disableSAX = 1;
1.61 daniel 5713: return(-1);
5714: }
5715: NEXT;
1.97 daniel 5716: GROW;
1.61 daniel 5717: SKIP_BLANKS;
1.152 daniel 5718: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5719: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5720: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5721: (NXT(6) == 'A')) {
1.62 daniel 5722: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5723: res = XML_ELEMENT_TYPE_MIXED;
5724: } else {
1.62 daniel 5725: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5726: res = XML_ELEMENT_TYPE_ELEMENT;
5727: }
1.187 ! daniel 5728: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
! 5729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 5730: ctxt->sax->error(ctxt->userData,
! 5731: "Element content declaration doesn't start and stop in the same entity\n");
! 5732: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
! 5733: ctxt->wellFormed = 0;
! 5734: ctxt->disableSAX = 1;
! 5735: }
1.61 daniel 5736: SKIP_BLANKS;
1.63 daniel 5737: /****************************
1.152 daniel 5738: if (RAW != ')') {
1.61 daniel 5739: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5740: ctxt->sax->error(ctxt->userData,
1.61 daniel 5741: "xmlParseElementContentDecl : ')' expected\n");
5742: ctxt->wellFormed = 0;
1.180 daniel 5743: ctxt->disableSAX = 1;
1.61 daniel 5744: return(-1);
5745: }
1.63 daniel 5746: ****************************/
5747: *result = tree;
1.61 daniel 5748: return(res);
1.22 daniel 5749: }
5750:
1.50 daniel 5751: /**
5752: * xmlParseElementDecl:
5753: * @ctxt: an XML parser context
5754: *
5755: * parse an Element declaration.
1.22 daniel 5756: *
5757: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5758: *
1.99 daniel 5759: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5760: * No element type may be declared more than once
1.69 daniel 5761: *
5762: * Returns the type of the element, or -1 in case of error
1.22 daniel 5763: */
1.59 daniel 5764: int
1.55 daniel 5765: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5766: xmlChar *name;
1.59 daniel 5767: int ret = -1;
1.61 daniel 5768: xmlElementContentPtr content = NULL;
1.22 daniel 5769:
1.97 daniel 5770: GROW;
1.152 daniel 5771: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5772: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5773: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5774: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5775: (NXT(8) == 'T')) {
1.187 ! daniel 5776: xmlParserInputPtr input = ctxt->input;
! 5777:
1.40 daniel 5778: SKIP(9);
1.59 daniel 5779: if (!IS_BLANK(CUR)) {
5780: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5781: ctxt->sax->error(ctxt->userData,
1.59 daniel 5782: "Space required after 'ELEMENT'\n");
1.123 daniel 5783: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5784: ctxt->wellFormed = 0;
1.180 daniel 5785: ctxt->disableSAX = 1;
1.59 daniel 5786: }
1.42 daniel 5787: SKIP_BLANKS;
1.22 daniel 5788: name = xmlParseName(ctxt);
5789: if (name == NULL) {
1.55 daniel 5790: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5791: ctxt->sax->error(ctxt->userData,
1.59 daniel 5792: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5793: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5794: ctxt->wellFormed = 0;
1.180 daniel 5795: ctxt->disableSAX = 1;
1.59 daniel 5796: return(-1);
5797: }
5798: if (!IS_BLANK(CUR)) {
5799: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5800: ctxt->sax->error(ctxt->userData,
1.59 daniel 5801: "Space required after the element name\n");
1.123 daniel 5802: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5803: ctxt->wellFormed = 0;
1.180 daniel 5804: ctxt->disableSAX = 1;
1.22 daniel 5805: }
1.42 daniel 5806: SKIP_BLANKS;
1.152 daniel 5807: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5808: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5809: (NXT(4) == 'Y')) {
5810: SKIP(5);
1.22 daniel 5811: /*
5812: * Element must always be empty.
5813: */
1.59 daniel 5814: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5815: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5816: (NXT(2) == 'Y')) {
5817: SKIP(3);
1.22 daniel 5818: /*
5819: * Element is a generic container.
5820: */
1.59 daniel 5821: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5822: } else if (RAW == '(') {
1.61 daniel 5823: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5824: } else {
1.98 daniel 5825: /*
5826: * [ WFC: PEs in Internal Subset ] error handling.
5827: */
1.152 daniel 5828: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5829: (ctxt->inputNr == 1)) {
5830: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5831: ctxt->sax->error(ctxt->userData,
5832: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5833: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5834: } else {
5835: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5836: ctxt->sax->error(ctxt->userData,
5837: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5838: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5839: }
1.61 daniel 5840: ctxt->wellFormed = 0;
1.180 daniel 5841: ctxt->disableSAX = 1;
1.119 daniel 5842: if (name != NULL) xmlFree(name);
1.61 daniel 5843: return(-1);
1.22 daniel 5844: }
1.142 daniel 5845:
5846: SKIP_BLANKS;
5847: /*
5848: * Pop-up of finished entities.
5849: */
1.152 daniel 5850: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5851: xmlPopInput(ctxt);
1.42 daniel 5852: SKIP_BLANKS;
1.142 daniel 5853:
1.152 daniel 5854: if (RAW != '>') {
1.55 daniel 5855: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5856: ctxt->sax->error(ctxt->userData,
1.31 daniel 5857: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5858: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5859: ctxt->wellFormed = 0;
1.180 daniel 5860: ctxt->disableSAX = 1;
1.61 daniel 5861: } else {
1.187 ! daniel 5862: if (input != ctxt->input) {
! 5863: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 5864: ctxt->sax->error(ctxt->userData,
! 5865: "Element declaration doesn't start and stop in the same entity\n");
! 5866: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
! 5867: ctxt->wellFormed = 0;
! 5868: ctxt->disableSAX = 1;
! 5869: }
! 5870:
1.40 daniel 5871: NEXT;
1.171 daniel 5872: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5873: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5874: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5875: content);
1.61 daniel 5876: }
1.84 daniel 5877: if (content != NULL) {
5878: xmlFreeElementContent(content);
5879: }
1.61 daniel 5880: if (name != NULL) {
1.119 daniel 5881: xmlFree(name);
1.61 daniel 5882: }
1.22 daniel 5883: }
1.59 daniel 5884: return(ret);
1.22 daniel 5885: }
5886:
1.50 daniel 5887: /**
5888: * xmlParseMarkupDecl:
5889: * @ctxt: an XML parser context
5890: *
5891: * parse Markup declarations
1.22 daniel 5892: *
5893: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5894: * NotationDecl | PI | Comment
5895: *
1.98 daniel 5896: * [ VC: Proper Declaration/PE Nesting ]
5897: * TODO Parameter-entity replacement text must be properly nested with
5898: * markup declarations. That is to say, if either the first character
5899: * or the last character of a markup declaration (markupdecl above) is
5900: * contained in the replacement text for a parameter-entity reference,
5901: * both must be contained in the same replacement text.
5902: *
5903: * [ WFC: PEs in Internal Subset ]
5904: * In the internal DTD subset, parameter-entity references can occur
5905: * only where markup declarations can occur, not within markup declarations.
5906: * (This does not apply to references that occur in external parameter
5907: * entities or to the external subset.)
1.22 daniel 5908: */
1.55 daniel 5909: void
5910: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5911: GROW;
1.22 daniel 5912: xmlParseElementDecl(ctxt);
5913: xmlParseAttributeListDecl(ctxt);
5914: xmlParseEntityDecl(ctxt);
5915: xmlParseNotationDecl(ctxt);
5916: xmlParsePI(ctxt);
1.114 daniel 5917: xmlParseComment(ctxt);
1.98 daniel 5918: /*
5919: * This is only for internal subset. On external entities,
5920: * the replacement is done before parsing stage
5921: */
5922: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5923: xmlParsePEReference(ctxt);
1.97 daniel 5924: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5925: }
5926:
1.50 daniel 5927: /**
1.76 daniel 5928: * xmlParseTextDecl:
5929: * @ctxt: an XML parser context
5930: *
5931: * parse an XML declaration header for external entities
5932: *
5933: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 5934: *
5935: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 5936: */
5937:
1.172 daniel 5938: void
1.76 daniel 5939: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5940: xmlChar *version;
1.76 daniel 5941:
5942: /*
5943: * We know that '<?xml' is here.
5944: */
5945: SKIP(5);
5946:
5947: if (!IS_BLANK(CUR)) {
5948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5949: ctxt->sax->error(ctxt->userData,
5950: "Space needed after '<?xml'\n");
1.123 daniel 5951: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5952: ctxt->wellFormed = 0;
1.180 daniel 5953: ctxt->disableSAX = 1;
1.76 daniel 5954: }
5955: SKIP_BLANKS;
5956:
5957: /*
5958: * We may have the VersionInfo here.
5959: */
5960: version = xmlParseVersionInfo(ctxt);
5961: if (version == NULL)
5962: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 5963: ctxt->input->version = version;
1.76 daniel 5964:
5965: /*
5966: * We must have the encoding declaration
5967: */
5968: if (!IS_BLANK(CUR)) {
5969: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5970: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 5971: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5972: ctxt->wellFormed = 0;
1.180 daniel 5973: ctxt->disableSAX = 1;
1.76 daniel 5974: }
1.172 daniel 5975: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.76 daniel 5976:
5977: SKIP_BLANKS;
1.152 daniel 5978: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 5979: SKIP(2);
1.152 daniel 5980: } else if (RAW == '>') {
1.76 daniel 5981: /* Deprecated old WD ... */
5982: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5983: ctxt->sax->error(ctxt->userData,
5984: "XML declaration must end-up with '?>'\n");
1.123 daniel 5985: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5986: ctxt->wellFormed = 0;
1.180 daniel 5987: ctxt->disableSAX = 1;
1.76 daniel 5988: NEXT;
5989: } else {
5990: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5991: ctxt->sax->error(ctxt->userData,
5992: "parsing XML declaration: '?>' expected\n");
1.123 daniel 5993: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5994: ctxt->wellFormed = 0;
1.180 daniel 5995: ctxt->disableSAX = 1;
1.76 daniel 5996: MOVETO_ENDTAG(CUR_PTR);
5997: NEXT;
5998: }
5999: }
6000:
6001: /*
6002: * xmlParseConditionalSections
6003: * @ctxt: an XML parser context
6004: *
6005: * TODO : Conditionnal section are not yet supported !
6006: *
6007: * [61] conditionalSect ::= includeSect | ignoreSect
6008: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6009: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6010: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6011: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6012: */
6013:
6014: void
6015: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 6016: SKIP(3);
6017: SKIP_BLANKS;
1.168 daniel 6018: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6019: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6020: (NXT(6) == 'E')) {
1.165 daniel 6021: SKIP(7);
1.168 daniel 6022: SKIP_BLANKS;
6023: if (RAW != '[') {
6024: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6025: ctxt->sax->error(ctxt->userData,
6026: "XML conditional section '[' expected\n");
6027: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6028: ctxt->wellFormed = 0;
1.180 daniel 6029: ctxt->disableSAX = 1;
1.168 daniel 6030: } else {
6031: NEXT;
6032: }
1.165 daniel 6033: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6034: (NXT(2) != '>'))) {
6035: const xmlChar *check = CUR_PTR;
6036: int cons = ctxt->input->consumed;
6037: int tok = ctxt->token;
6038:
6039: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6040: xmlParseConditionalSections(ctxt);
6041: } else if (IS_BLANK(CUR)) {
6042: NEXT;
6043: } else if (RAW == '%') {
6044: xmlParsePEReference(ctxt);
6045: } else
6046: xmlParseMarkupDecl(ctxt);
6047:
6048: /*
6049: * Pop-up of finished entities.
6050: */
6051: while ((RAW == 0) && (ctxt->inputNr > 1))
6052: xmlPopInput(ctxt);
6053:
6054: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6055: (tok == ctxt->token)) {
6056: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6057: ctxt->sax->error(ctxt->userData,
6058: "Content error in the external subset\n");
6059: ctxt->wellFormed = 0;
1.180 daniel 6060: ctxt->disableSAX = 1;
1.165 daniel 6061: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6062: break;
6063: }
6064: }
1.168 daniel 6065: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6066: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 6067: int state;
6068:
1.168 daniel 6069: SKIP(6);
6070: SKIP_BLANKS;
6071: if (RAW != '[') {
6072: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6073: ctxt->sax->error(ctxt->userData,
6074: "XML conditional section '[' expected\n");
6075: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6076: ctxt->wellFormed = 0;
1.180 daniel 6077: ctxt->disableSAX = 1;
1.168 daniel 6078: } else {
6079: NEXT;
6080: }
1.171 daniel 6081:
1.143 daniel 6082: /*
1.171 daniel 6083: * Parse up to the end of the conditionnal section
6084: * But disable SAX event generating DTD building in the meantime
1.143 daniel 6085: */
1.171 daniel 6086: state = ctxt->disableSAX;
1.165 daniel 6087: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6088: (NXT(2) != '>'))) {
1.171 daniel 6089: const xmlChar *check = CUR_PTR;
6090: int cons = ctxt->input->consumed;
6091: int tok = ctxt->token;
6092:
6093: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6094: xmlParseConditionalSections(ctxt);
6095: } else if (IS_BLANK(CUR)) {
6096: NEXT;
6097: } else if (RAW == '%') {
6098: xmlParsePEReference(ctxt);
6099: } else
6100: xmlParseMarkupDecl(ctxt);
6101:
1.165 daniel 6102: /*
6103: * Pop-up of finished entities.
6104: */
6105: while ((RAW == 0) && (ctxt->inputNr > 1))
6106: xmlPopInput(ctxt);
1.143 daniel 6107:
1.171 daniel 6108: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6109: (tok == ctxt->token)) {
6110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6111: ctxt->sax->error(ctxt->userData,
6112: "Content error in the external subset\n");
6113: ctxt->wellFormed = 0;
1.180 daniel 6114: ctxt->disableSAX = 1;
1.171 daniel 6115: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6116: break;
6117: }
1.165 daniel 6118: }
1.171 daniel 6119: ctxt->disableSAX = state;
1.168 daniel 6120: } else {
6121: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6122: ctxt->sax->error(ctxt->userData,
6123: "XML conditional section INCLUDE or IGNORE keyword expected\n");
6124: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6125: ctxt->wellFormed = 0;
1.180 daniel 6126: ctxt->disableSAX = 1;
1.143 daniel 6127: }
6128:
1.152 daniel 6129: if (RAW == 0)
1.143 daniel 6130: SHRINK;
6131:
1.152 daniel 6132: if (RAW == 0) {
1.76 daniel 6133: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6134: ctxt->sax->error(ctxt->userData,
6135: "XML conditional section not closed\n");
1.123 daniel 6136: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 6137: ctxt->wellFormed = 0;
1.180 daniel 6138: ctxt->disableSAX = 1;
1.143 daniel 6139: } else {
6140: SKIP(3);
1.76 daniel 6141: }
6142: }
6143:
6144: /**
1.124 daniel 6145: * xmlParseExternalSubset:
1.76 daniel 6146: * @ctxt: an XML parser context
1.124 daniel 6147: * @ExternalID: the external identifier
6148: * @SystemID: the system identifier (or URL)
1.76 daniel 6149: *
6150: * parse Markup declarations from an external subset
6151: *
6152: * [30] extSubset ::= textDecl? extSubsetDecl
6153: *
6154: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6155: */
6156: void
1.123 daniel 6157: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6158: const xmlChar *SystemID) {
1.132 daniel 6159: GROW;
1.152 daniel 6160: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6161: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6162: (NXT(4) == 'l')) {
1.172 daniel 6163: xmlParseTextDecl(ctxt);
1.76 daniel 6164: }
1.79 daniel 6165: if (ctxt->myDoc == NULL) {
1.116 daniel 6166: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6167: }
6168: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6169: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6170:
1.96 daniel 6171: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6172: ctxt->external = 1;
1.152 daniel 6173: while (((RAW == '<') && (NXT(1) == '?')) ||
6174: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6175: IS_BLANK(CUR)) {
1.123 daniel 6176: const xmlChar *check = CUR_PTR;
1.115 daniel 6177: int cons = ctxt->input->consumed;
1.164 daniel 6178: int tok = ctxt->token;
1.115 daniel 6179:
1.152 daniel 6180: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6181: xmlParseConditionalSections(ctxt);
6182: } else if (IS_BLANK(CUR)) {
6183: NEXT;
1.152 daniel 6184: } else if (RAW == '%') {
1.76 daniel 6185: xmlParsePEReference(ctxt);
6186: } else
6187: xmlParseMarkupDecl(ctxt);
1.77 daniel 6188:
6189: /*
6190: * Pop-up of finished entities.
6191: */
1.166 daniel 6192: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6193: xmlPopInput(ctxt);
6194:
1.164 daniel 6195: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6196: (tok == ctxt->token)) {
1.115 daniel 6197: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6198: ctxt->sax->error(ctxt->userData,
6199: "Content error in the external subset\n");
6200: ctxt->wellFormed = 0;
1.180 daniel 6201: ctxt->disableSAX = 1;
1.123 daniel 6202: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6203: break;
6204: }
1.76 daniel 6205: }
6206:
1.152 daniel 6207: if (RAW != 0) {
1.76 daniel 6208: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6209: ctxt->sax->error(ctxt->userData,
6210: "Extra content at the end of the document\n");
1.123 daniel 6211: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6212: ctxt->wellFormed = 0;
1.180 daniel 6213: ctxt->disableSAX = 1;
1.76 daniel 6214: }
6215:
6216: }
6217:
6218: /**
1.77 daniel 6219: * xmlParseReference:
6220: * @ctxt: an XML parser context
6221: *
6222: * parse and handle entity references in content, depending on the SAX
6223: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6224: * CharRef, a predefined entity, if there is no reference() callback.
6225: * or if the parser was asked to switch to that mode.
1.77 daniel 6226: *
6227: * [67] Reference ::= EntityRef | CharRef
6228: */
6229: void
6230: xmlParseReference(xmlParserCtxtPtr ctxt) {
6231: xmlEntityPtr ent;
1.123 daniel 6232: xmlChar *val;
1.152 daniel 6233: if (RAW != '&') return;
1.77 daniel 6234:
1.113 daniel 6235: if (ctxt->inputNr > 1) {
1.123 daniel 6236: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6237:
1.171 daniel 6238: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6239: (!ctxt->disableSAX))
1.113 daniel 6240: ctxt->sax->characters(ctxt->userData, cur, 1);
6241: if (ctxt->token == '&')
6242: ctxt->token = 0;
6243: else {
6244: SKIP(1);
6245: }
6246: return;
6247: }
1.77 daniel 6248: if (NXT(1) == '#') {
1.152 daniel 6249: int i = 0;
1.153 daniel 6250: xmlChar out[10];
6251: int hex = NXT(2);
1.77 daniel 6252: int val = xmlParseCharRef(ctxt);
1.152 daniel 6253:
1.153 daniel 6254: if (ctxt->encoding != NULL) {
6255: /*
6256: * So we are using non-UTF-8 buffers
6257: * Check that the char fit on 8bits, if not
6258: * generate a CharRef.
6259: */
6260: if (val <= 0xFF) {
6261: out[0] = val;
6262: out[1] = 0;
1.171 daniel 6263: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6264: (!ctxt->disableSAX))
1.153 daniel 6265: ctxt->sax->characters(ctxt->userData, out, 1);
6266: } else {
6267: if ((hex == 'x') || (hex == 'X'))
6268: sprintf((char *)out, "#x%X", val);
6269: else
6270: sprintf((char *)out, "#%d", val);
1.171 daniel 6271: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6272: (!ctxt->disableSAX))
1.153 daniel 6273: ctxt->sax->reference(ctxt->userData, out);
6274: }
6275: } else {
6276: /*
6277: * Just encode the value in UTF-8
6278: */
6279: COPY_BUF(0 ,out, i, val);
6280: out[i] = 0;
1.171 daniel 6281: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6282: (!ctxt->disableSAX))
1.153 daniel 6283: ctxt->sax->characters(ctxt->userData, out, i);
6284: }
1.77 daniel 6285: } else {
6286: ent = xmlParseEntityRef(ctxt);
6287: if (ent == NULL) return;
6288: if ((ent->name != NULL) &&
1.159 daniel 6289: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6290: xmlNodePtr list = NULL;
6291: int ret;
6292:
6293:
6294: /*
6295: * The first reference to the entity trigger a parsing phase
6296: * where the ent->children is filled with the result from
6297: * the parsing.
6298: */
6299: if (ent->children == NULL) {
6300: xmlChar *value;
6301: value = ent->content;
6302:
6303: /*
6304: * Check that this entity is well formed
6305: */
6306: if ((value != NULL) &&
6307: (value[1] == 0) && (value[0] == '<') &&
6308: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6309: /*
6310: * TODO: get definite answer on this !!!
6311: * Lots of entity decls are used to declare a single
6312: * char
6313: * <!ENTITY lt "<">
6314: * Which seems to be valid since
6315: * 2.4: The ampersand character (&) and the left angle
6316: * bracket (<) may appear in their literal form only
6317: * when used ... They are also legal within the literal
6318: * entity value of an internal entity declaration;i
6319: * see "4.3.2 Well-Formed Parsed Entities".
6320: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6321: * Looking at the OASIS test suite and James Clark
6322: * tests, this is broken. However the XML REC uses
6323: * it. Is the XML REC not well-formed ????
6324: * This is a hack to avoid this problem
6325: */
6326: list = xmlNewDocText(ctxt->myDoc, value);
6327: if (list != NULL) {
6328: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6329: (ent->children == NULL)) {
6330: ent->children = list;
6331: ent->last = list;
6332: list->parent = (xmlNodePtr) ent;
6333: } else {
6334: xmlFreeNodeList(list);
6335: }
6336: } else if (list != NULL) {
6337: xmlFreeNodeList(list);
6338: }
1.181 daniel 6339: } else {
1.180 daniel 6340: /*
6341: * 4.3.2: An internal general parsed entity is well-formed
6342: * if its replacement text matches the production labeled
6343: * content.
6344: */
1.185 daniel 6345: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6346: ctxt->depth++;
1.180 daniel 6347: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 6348: ctxt->sax, NULL, ctxt->depth,
6349: value, &list);
6350: ctxt->depth--;
6351: } else if (ent->etype ==
6352: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6353: ctxt->depth++;
1.180 daniel 6354: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 6355: ctxt->sax, NULL, ctxt->depth,
6356: ent->SystemID, ent->ExternalID, &list);
6357: ctxt->depth--;
6358: } else {
1.180 daniel 6359: ret = -1;
6360: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6361: ctxt->sax->error(ctxt->userData,
6362: "Internal: invalid entity type\n");
6363: }
1.185 daniel 6364: if (ret == XML_ERR_ENTITY_LOOP) {
6365: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6366: ctxt->sax->error(ctxt->userData,
6367: "Detected entity reference loop\n");
6368: ctxt->wellFormed = 0;
6369: ctxt->disableSAX = 1;
6370: ctxt->errNo = XML_ERR_ENTITY_LOOP;
6371: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 6372: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6373: (ent->children == NULL)) {
6374: ent->children = list;
6375: while (list != NULL) {
6376: list->parent = (xmlNodePtr) ent;
6377: if (list->next == NULL)
6378: ent->last = list;
6379: list = list->next;
6380: }
6381: } else {
6382: xmlFreeNodeList(list);
6383: }
6384: } else if (ret > 0) {
6385: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6386: ctxt->sax->error(ctxt->userData,
6387: "Entity value required\n");
6388: ctxt->errNo = ret;
6389: ctxt->wellFormed = 0;
6390: ctxt->disableSAX = 1;
6391: } else if (list != NULL) {
6392: xmlFreeNodeList(list);
6393: }
6394: }
6395: }
1.113 daniel 6396: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6397: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6398: /*
6399: * Create a node.
6400: */
6401: ctxt->sax->reference(ctxt->userData, ent->name);
6402: return;
6403: } else if (ctxt->replaceEntities) {
6404: xmlParserInputPtr input;
1.79 daniel 6405:
1.113 daniel 6406: input = xmlNewEntityInputStream(ctxt, ent);
6407: xmlPushInput(ctxt, input);
1.167 daniel 6408: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6409: (RAW == '<') && (NXT(1) == '?') &&
6410: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6411: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6412: xmlParseTextDecl(ctxt);
1.167 daniel 6413: if (input->standalone) {
6414: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6415: ctxt->sax->error(ctxt->userData,
6416: "external parsed entities cannot be standalone\n");
6417: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6418: ctxt->wellFormed = 0;
1.180 daniel 6419: ctxt->disableSAX = 1;
1.167 daniel 6420: }
6421: }
1.179 daniel 6422: /*
6423: * !!! TODO: build the tree under the entity first
6424: * 1234
6425: */
1.113 daniel 6426: return;
6427: }
1.77 daniel 6428: }
6429: val = ent->content;
6430: if (val == NULL) return;
6431: /*
6432: * inline the entity.
6433: */
1.171 daniel 6434: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6435: (!ctxt->disableSAX))
1.77 daniel 6436: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6437: }
1.24 daniel 6438: }
6439:
1.50 daniel 6440: /**
6441: * xmlParseEntityRef:
6442: * @ctxt: an XML parser context
6443: *
6444: * parse ENTITY references declarations
1.24 daniel 6445: *
6446: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6447: *
1.98 daniel 6448: * [ WFC: Entity Declared ]
6449: * In a document without any DTD, a document with only an internal DTD
6450: * subset which contains no parameter entity references, or a document
6451: * with "standalone='yes'", the Name given in the entity reference
6452: * must match that in an entity declaration, except that well-formed
6453: * documents need not declare any of the following entities: amp, lt,
6454: * gt, apos, quot. The declaration of a parameter entity must precede
6455: * any reference to it. Similarly, the declaration of a general entity
6456: * must precede any reference to it which appears in a default value in an
6457: * attribute-list declaration. Note that if entities are declared in the
6458: * external subset or in external parameter entities, a non-validating
6459: * processor is not obligated to read and process their declarations;
6460: * for such documents, the rule that an entity must be declared is a
6461: * well-formedness constraint only if standalone='yes'.
6462: *
6463: * [ WFC: Parsed Entity ]
6464: * An entity reference must not contain the name of an unparsed entity
6465: *
1.77 daniel 6466: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6467: */
1.77 daniel 6468: xmlEntityPtr
1.55 daniel 6469: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6470: xmlChar *name;
1.72 daniel 6471: xmlEntityPtr ent = NULL;
1.24 daniel 6472:
1.91 daniel 6473: GROW;
1.111 daniel 6474:
1.152 daniel 6475: if (RAW == '&') {
1.40 daniel 6476: NEXT;
1.24 daniel 6477: name = xmlParseName(ctxt);
6478: if (name == NULL) {
1.55 daniel 6479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6480: ctxt->sax->error(ctxt->userData,
6481: "xmlParseEntityRef: no name\n");
1.123 daniel 6482: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6483: ctxt->wellFormed = 0;
1.180 daniel 6484: ctxt->disableSAX = 1;
1.24 daniel 6485: } else {
1.152 daniel 6486: if (RAW == ';') {
1.40 daniel 6487: NEXT;
1.24 daniel 6488: /*
1.77 daniel 6489: * Ask first SAX for entity resolution, otherwise try the
6490: * predefined set.
6491: */
6492: if (ctxt->sax != NULL) {
6493: if (ctxt->sax->getEntity != NULL)
6494: ent = ctxt->sax->getEntity(ctxt->userData, name);
6495: if (ent == NULL)
6496: ent = xmlGetPredefinedEntity(name);
6497: }
6498: /*
1.98 daniel 6499: * [ WFC: Entity Declared ]
6500: * In a document without any DTD, a document with only an
6501: * internal DTD subset which contains no parameter entity
6502: * references, or a document with "standalone='yes'", the
6503: * Name given in the entity reference must match that in an
6504: * entity declaration, except that well-formed documents
6505: * need not declare any of the following entities: amp, lt,
6506: * gt, apos, quot.
6507: * The declaration of a parameter entity must precede any
6508: * reference to it.
6509: * Similarly, the declaration of a general entity must
6510: * precede any reference to it which appears in a default
6511: * value in an attribute-list declaration. Note that if
6512: * entities are declared in the external subset or in
6513: * external parameter entities, a non-validating processor
6514: * is not obligated to read and process their declarations;
6515: * for such documents, the rule that an entity must be
6516: * declared is a well-formedness constraint only if
6517: * standalone='yes'.
1.59 daniel 6518: */
1.77 daniel 6519: if (ent == NULL) {
1.98 daniel 6520: if ((ctxt->standalone == 1) ||
6521: ((ctxt->hasExternalSubset == 0) &&
6522: (ctxt->hasPErefs == 0))) {
6523: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6524: ctxt->sax->error(ctxt->userData,
6525: "Entity '%s' not defined\n", name);
1.123 daniel 6526: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6527: ctxt->wellFormed = 0;
1.180 daniel 6528: ctxt->disableSAX = 1;
1.77 daniel 6529: } else {
1.98 daniel 6530: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6531: ctxt->sax->warning(ctxt->userData,
6532: "Entity '%s' not defined\n", name);
1.123 daniel 6533: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6534: }
1.77 daniel 6535: }
1.59 daniel 6536:
6537: /*
1.98 daniel 6538: * [ WFC: Parsed Entity ]
6539: * An entity reference must not contain the name of an
6540: * unparsed entity
6541: */
1.159 daniel 6542: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6543: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6544: ctxt->sax->error(ctxt->userData,
6545: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6546: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6547: ctxt->wellFormed = 0;
1.180 daniel 6548: ctxt->disableSAX = 1;
1.98 daniel 6549: }
6550:
6551: /*
6552: * [ WFC: No External Entity References ]
6553: * Attribute values cannot contain direct or indirect
6554: * entity references to external entities.
6555: */
6556: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6557: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6558: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6559: ctxt->sax->error(ctxt->userData,
6560: "Attribute references external entity '%s'\n", name);
1.123 daniel 6561: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6562: ctxt->wellFormed = 0;
1.180 daniel 6563: ctxt->disableSAX = 1;
1.98 daniel 6564: }
6565: /*
6566: * [ WFC: No < in Attribute Values ]
6567: * The replacement text of any entity referred to directly or
6568: * indirectly in an attribute value (other than "<") must
6569: * not contain a <.
1.59 daniel 6570: */
1.98 daniel 6571: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6572: (ent != NULL) &&
6573: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6574: (ent->content != NULL) &&
6575: (xmlStrchr(ent->content, '<'))) {
6576: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6577: ctxt->sax->error(ctxt->userData,
6578: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6579: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6580: ctxt->wellFormed = 0;
1.180 daniel 6581: ctxt->disableSAX = 1;
1.98 daniel 6582: }
6583:
6584: /*
6585: * Internal check, no parameter entities here ...
6586: */
6587: else {
1.159 daniel 6588: switch (ent->etype) {
1.59 daniel 6589: case XML_INTERNAL_PARAMETER_ENTITY:
6590: case XML_EXTERNAL_PARAMETER_ENTITY:
6591: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6592: ctxt->sax->error(ctxt->userData,
1.59 daniel 6593: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6594: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6595: ctxt->wellFormed = 0;
1.180 daniel 6596: ctxt->disableSAX = 1;
6597: break;
6598: default:
1.59 daniel 6599: break;
6600: }
6601: }
6602:
6603: /*
1.98 daniel 6604: * [ WFC: No Recursion ]
1.117 daniel 6605: * TODO A parsed entity must not contain a recursive reference
6606: * to itself, either directly or indirectly.
1.59 daniel 6607: */
1.77 daniel 6608:
1.24 daniel 6609: } else {
1.55 daniel 6610: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6611: ctxt->sax->error(ctxt->userData,
1.59 daniel 6612: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6613: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6614: ctxt->wellFormed = 0;
1.180 daniel 6615: ctxt->disableSAX = 1;
1.24 daniel 6616: }
1.119 daniel 6617: xmlFree(name);
1.24 daniel 6618: }
6619: }
1.77 daniel 6620: return(ent);
1.24 daniel 6621: }
1.135 daniel 6622: /**
6623: * xmlParseStringEntityRef:
6624: * @ctxt: an XML parser context
6625: * @str: a pointer to an index in the string
6626: *
6627: * parse ENTITY references declarations, but this version parses it from
6628: * a string value.
6629: *
6630: * [68] EntityRef ::= '&' Name ';'
6631: *
6632: * [ WFC: Entity Declared ]
6633: * In a document without any DTD, a document with only an internal DTD
6634: * subset which contains no parameter entity references, or a document
6635: * with "standalone='yes'", the Name given in the entity reference
6636: * must match that in an entity declaration, except that well-formed
6637: * documents need not declare any of the following entities: amp, lt,
6638: * gt, apos, quot. The declaration of a parameter entity must precede
6639: * any reference to it. Similarly, the declaration of a general entity
6640: * must precede any reference to it which appears in a default value in an
6641: * attribute-list declaration. Note that if entities are declared in the
6642: * external subset or in external parameter entities, a non-validating
6643: * processor is not obligated to read and process their declarations;
6644: * for such documents, the rule that an entity must be declared is a
6645: * well-formedness constraint only if standalone='yes'.
6646: *
6647: * [ WFC: Parsed Entity ]
6648: * An entity reference must not contain the name of an unparsed entity
6649: *
6650: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6651: * is updated to the current location in the string.
6652: */
6653: xmlEntityPtr
6654: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6655: xmlChar *name;
6656: const xmlChar *ptr;
6657: xmlChar cur;
6658: xmlEntityPtr ent = NULL;
6659:
1.156 daniel 6660: if ((str == NULL) || (*str == NULL))
6661: return(NULL);
1.135 daniel 6662: ptr = *str;
6663: cur = *ptr;
6664: if (cur == '&') {
6665: ptr++;
6666: cur = *ptr;
6667: name = xmlParseStringName(ctxt, &ptr);
6668: if (name == NULL) {
6669: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6670: ctxt->sax->error(ctxt->userData,
6671: "xmlParseEntityRef: no name\n");
6672: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6673: ctxt->wellFormed = 0;
1.180 daniel 6674: ctxt->disableSAX = 1;
1.135 daniel 6675: } else {
1.185 daniel 6676: if (*ptr == ';') {
6677: ptr++;
1.135 daniel 6678: /*
6679: * Ask first SAX for entity resolution, otherwise try the
6680: * predefined set.
6681: */
6682: if (ctxt->sax != NULL) {
6683: if (ctxt->sax->getEntity != NULL)
6684: ent = ctxt->sax->getEntity(ctxt->userData, name);
6685: if (ent == NULL)
6686: ent = xmlGetPredefinedEntity(name);
6687: }
6688: /*
6689: * [ WFC: Entity Declared ]
6690: * In a document without any DTD, a document with only an
6691: * internal DTD subset which contains no parameter entity
6692: * references, or a document with "standalone='yes'", the
6693: * Name given in the entity reference must match that in an
6694: * entity declaration, except that well-formed documents
6695: * need not declare any of the following entities: amp, lt,
6696: * gt, apos, quot.
6697: * The declaration of a parameter entity must precede any
6698: * reference to it.
6699: * Similarly, the declaration of a general entity must
6700: * precede any reference to it which appears in a default
6701: * value in an attribute-list declaration. Note that if
6702: * entities are declared in the external subset or in
6703: * external parameter entities, a non-validating processor
6704: * is not obligated to read and process their declarations;
6705: * for such documents, the rule that an entity must be
6706: * declared is a well-formedness constraint only if
6707: * standalone='yes'.
6708: */
6709: if (ent == NULL) {
6710: if ((ctxt->standalone == 1) ||
6711: ((ctxt->hasExternalSubset == 0) &&
6712: (ctxt->hasPErefs == 0))) {
6713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6714: ctxt->sax->error(ctxt->userData,
6715: "Entity '%s' not defined\n", name);
6716: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6717: ctxt->wellFormed = 0;
1.180 daniel 6718: ctxt->disableSAX = 1;
1.135 daniel 6719: } else {
6720: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6721: ctxt->sax->warning(ctxt->userData,
6722: "Entity '%s' not defined\n", name);
6723: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6724: }
6725: }
6726:
6727: /*
6728: * [ WFC: Parsed Entity ]
6729: * An entity reference must not contain the name of an
6730: * unparsed entity
6731: */
1.159 daniel 6732: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6733: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6734: ctxt->sax->error(ctxt->userData,
6735: "Entity reference to unparsed entity %s\n", name);
6736: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6737: ctxt->wellFormed = 0;
1.180 daniel 6738: ctxt->disableSAX = 1;
1.135 daniel 6739: }
6740:
6741: /*
6742: * [ WFC: No External Entity References ]
6743: * Attribute values cannot contain direct or indirect
6744: * entity references to external entities.
6745: */
6746: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6747: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6748: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6749: ctxt->sax->error(ctxt->userData,
6750: "Attribute references external entity '%s'\n", name);
6751: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6752: ctxt->wellFormed = 0;
1.180 daniel 6753: ctxt->disableSAX = 1;
1.135 daniel 6754: }
6755: /*
6756: * [ WFC: No < in Attribute Values ]
6757: * The replacement text of any entity referred to directly or
6758: * indirectly in an attribute value (other than "<") must
6759: * not contain a <.
6760: */
6761: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6762: (ent != NULL) &&
6763: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6764: (ent->content != NULL) &&
6765: (xmlStrchr(ent->content, '<'))) {
6766: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6767: ctxt->sax->error(ctxt->userData,
6768: "'<' in entity '%s' is not allowed in attributes values\n", name);
6769: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6770: ctxt->wellFormed = 0;
1.180 daniel 6771: ctxt->disableSAX = 1;
1.135 daniel 6772: }
6773:
6774: /*
6775: * Internal check, no parameter entities here ...
6776: */
6777: else {
1.159 daniel 6778: switch (ent->etype) {
1.135 daniel 6779: case XML_INTERNAL_PARAMETER_ENTITY:
6780: case XML_EXTERNAL_PARAMETER_ENTITY:
6781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6782: ctxt->sax->error(ctxt->userData,
6783: "Attempt to reference the parameter entity '%s'\n", name);
6784: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6785: ctxt->wellFormed = 0;
1.180 daniel 6786: ctxt->disableSAX = 1;
6787: break;
6788: default:
1.135 daniel 6789: break;
6790: }
6791: }
6792:
6793: /*
6794: * [ WFC: No Recursion ]
6795: * TODO A parsed entity must not contain a recursive reference
6796: * to itself, either directly or indirectly.
6797: */
6798:
6799: } else {
6800: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6801: ctxt->sax->error(ctxt->userData,
6802: "xmlParseEntityRef: expecting ';'\n");
6803: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6804: ctxt->wellFormed = 0;
1.180 daniel 6805: ctxt->disableSAX = 1;
1.135 daniel 6806: }
6807: xmlFree(name);
6808: }
6809: }
1.185 daniel 6810: *str = ptr;
1.135 daniel 6811: return(ent);
6812: }
1.24 daniel 6813:
1.50 daniel 6814: /**
6815: * xmlParsePEReference:
6816: * @ctxt: an XML parser context
6817: *
6818: * parse PEReference declarations
1.77 daniel 6819: * The entity content is handled directly by pushing it's content as
6820: * a new input stream.
1.22 daniel 6821: *
6822: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6823: *
1.98 daniel 6824: * [ WFC: No Recursion ]
6825: * TODO A parsed entity must not contain a recursive
6826: * reference to itself, either directly or indirectly.
6827: *
6828: * [ WFC: Entity Declared ]
6829: * In a document without any DTD, a document with only an internal DTD
6830: * subset which contains no parameter entity references, or a document
6831: * with "standalone='yes'", ... ... The declaration of a parameter
6832: * entity must precede any reference to it...
6833: *
6834: * [ VC: Entity Declared ]
6835: * In a document with an external subset or external parameter entities
6836: * with "standalone='no'", ... ... The declaration of a parameter entity
6837: * must precede any reference to it...
6838: *
6839: * [ WFC: In DTD ]
6840: * Parameter-entity references may only appear in the DTD.
6841: * NOTE: misleading but this is handled.
1.22 daniel 6842: */
1.77 daniel 6843: void
1.55 daniel 6844: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6845: xmlChar *name;
1.72 daniel 6846: xmlEntityPtr entity = NULL;
1.50 daniel 6847: xmlParserInputPtr input;
1.22 daniel 6848:
1.152 daniel 6849: if (RAW == '%') {
1.40 daniel 6850: NEXT;
1.22 daniel 6851: name = xmlParseName(ctxt);
6852: if (name == NULL) {
1.55 daniel 6853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6854: ctxt->sax->error(ctxt->userData,
6855: "xmlParsePEReference: no name\n");
1.123 daniel 6856: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6857: ctxt->wellFormed = 0;
1.180 daniel 6858: ctxt->disableSAX = 1;
1.22 daniel 6859: } else {
1.152 daniel 6860: if (RAW == ';') {
1.40 daniel 6861: NEXT;
1.98 daniel 6862: if ((ctxt->sax != NULL) &&
6863: (ctxt->sax->getParameterEntity != NULL))
6864: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6865: name);
1.45 daniel 6866: if (entity == NULL) {
1.98 daniel 6867: /*
6868: * [ WFC: Entity Declared ]
6869: * In a document without any DTD, a document with only an
6870: * internal DTD subset which contains no parameter entity
6871: * references, or a document with "standalone='yes'", ...
6872: * ... The declaration of a parameter entity must precede
6873: * any reference to it...
6874: */
6875: if ((ctxt->standalone == 1) ||
6876: ((ctxt->hasExternalSubset == 0) &&
6877: (ctxt->hasPErefs == 0))) {
6878: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6879: ctxt->sax->error(ctxt->userData,
6880: "PEReference: %%%s; not found\n", name);
1.123 daniel 6881: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 6882: ctxt->wellFormed = 0;
1.180 daniel 6883: ctxt->disableSAX = 1;
1.98 daniel 6884: } else {
6885: /*
6886: * [ VC: Entity Declared ]
6887: * In a document with an external subset or external
6888: * parameter entities with "standalone='no'", ...
6889: * ... The declaration of a parameter entity must precede
6890: * any reference to it...
6891: */
6892: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6893: ctxt->sax->warning(ctxt->userData,
6894: "PEReference: %%%s; not found\n", name);
6895: ctxt->valid = 0;
6896: }
1.50 daniel 6897: } else {
1.98 daniel 6898: /*
6899: * Internal checking in case the entity quest barfed
6900: */
1.159 daniel 6901: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6902: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 6903: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6904: ctxt->sax->warning(ctxt->userData,
6905: "Internal: %%%s; is not a parameter entity\n", name);
6906: } else {
1.164 daniel 6907: /*
6908: * TODO !!!
6909: * handle the extra spaces added before and after
6910: * c.f. http://www.w3.org/TR/REC-xml#as-PE
6911: */
1.98 daniel 6912: input = xmlNewEntityInputStream(ctxt, entity);
6913: xmlPushInput(ctxt, input);
1.164 daniel 6914: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6915: (RAW == '<') && (NXT(1) == '?') &&
6916: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6917: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6918: xmlParseTextDecl(ctxt);
1.164 daniel 6919: }
6920: if (ctxt->token == 0)
6921: ctxt->token = ' ';
1.98 daniel 6922: }
1.45 daniel 6923: }
1.98 daniel 6924: ctxt->hasPErefs = 1;
1.22 daniel 6925: } else {
1.55 daniel 6926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6927: ctxt->sax->error(ctxt->userData,
1.59 daniel 6928: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 6929: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6930: ctxt->wellFormed = 0;
1.180 daniel 6931: ctxt->disableSAX = 1;
1.22 daniel 6932: }
1.119 daniel 6933: xmlFree(name);
1.3 veillard 6934: }
6935: }
6936: }
6937:
1.50 daniel 6938: /**
1.135 daniel 6939: * xmlParseStringPEReference:
6940: * @ctxt: an XML parser context
6941: * @str: a pointer to an index in the string
6942: *
6943: * parse PEReference declarations
6944: *
6945: * [69] PEReference ::= '%' Name ';'
6946: *
6947: * [ WFC: No Recursion ]
6948: * TODO A parsed entity must not contain a recursive
6949: * reference to itself, either directly or indirectly.
6950: *
6951: * [ WFC: Entity Declared ]
6952: * In a document without any DTD, a document with only an internal DTD
6953: * subset which contains no parameter entity references, or a document
6954: * with "standalone='yes'", ... ... The declaration of a parameter
6955: * entity must precede any reference to it...
6956: *
6957: * [ VC: Entity Declared ]
6958: * In a document with an external subset or external parameter entities
6959: * with "standalone='no'", ... ... The declaration of a parameter entity
6960: * must precede any reference to it...
6961: *
6962: * [ WFC: In DTD ]
6963: * Parameter-entity references may only appear in the DTD.
6964: * NOTE: misleading but this is handled.
6965: *
6966: * Returns the string of the entity content.
6967: * str is updated to the current value of the index
6968: */
6969: xmlEntityPtr
6970: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6971: const xmlChar *ptr;
6972: xmlChar cur;
6973: xmlChar *name;
6974: xmlEntityPtr entity = NULL;
6975:
6976: if ((str == NULL) || (*str == NULL)) return(NULL);
6977: ptr = *str;
6978: cur = *ptr;
6979: if (cur == '%') {
6980: ptr++;
6981: cur = *ptr;
6982: name = xmlParseStringName(ctxt, &ptr);
6983: if (name == NULL) {
6984: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6985: ctxt->sax->error(ctxt->userData,
6986: "xmlParseStringPEReference: no name\n");
6987: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6988: ctxt->wellFormed = 0;
1.180 daniel 6989: ctxt->disableSAX = 1;
1.135 daniel 6990: } else {
6991: cur = *ptr;
6992: if (cur == ';') {
6993: ptr++;
6994: cur = *ptr;
6995: if ((ctxt->sax != NULL) &&
6996: (ctxt->sax->getParameterEntity != NULL))
6997: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6998: name);
6999: if (entity == NULL) {
7000: /*
7001: * [ WFC: Entity Declared ]
7002: * In a document without any DTD, a document with only an
7003: * internal DTD subset which contains no parameter entity
7004: * references, or a document with "standalone='yes'", ...
7005: * ... The declaration of a parameter entity must precede
7006: * any reference to it...
7007: */
7008: if ((ctxt->standalone == 1) ||
7009: ((ctxt->hasExternalSubset == 0) &&
7010: (ctxt->hasPErefs == 0))) {
7011: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7012: ctxt->sax->error(ctxt->userData,
7013: "PEReference: %%%s; not found\n", name);
7014: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7015: ctxt->wellFormed = 0;
1.180 daniel 7016: ctxt->disableSAX = 1;
1.135 daniel 7017: } else {
7018: /*
7019: * [ VC: Entity Declared ]
7020: * In a document with an external subset or external
7021: * parameter entities with "standalone='no'", ...
7022: * ... The declaration of a parameter entity must
7023: * precede any reference to it...
7024: */
7025: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7026: ctxt->sax->warning(ctxt->userData,
7027: "PEReference: %%%s; not found\n", name);
7028: ctxt->valid = 0;
7029: }
7030: } else {
7031: /*
7032: * Internal checking in case the entity quest barfed
7033: */
1.159 daniel 7034: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7035: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 7036: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7037: ctxt->sax->warning(ctxt->userData,
7038: "Internal: %%%s; is not a parameter entity\n", name);
7039: }
7040: }
7041: ctxt->hasPErefs = 1;
7042: } else {
7043: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7044: ctxt->sax->error(ctxt->userData,
7045: "xmlParseStringPEReference: expecting ';'\n");
7046: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7047: ctxt->wellFormed = 0;
1.180 daniel 7048: ctxt->disableSAX = 1;
1.135 daniel 7049: }
7050: xmlFree(name);
7051: }
7052: }
7053: *str = ptr;
7054: return(entity);
7055: }
7056:
7057: /**
1.181 daniel 7058: * xmlParseDocTypeDecl:
1.50 daniel 7059: * @ctxt: an XML parser context
7060: *
7061: * parse a DOCTYPE declaration
1.21 daniel 7062: *
1.22 daniel 7063: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7064: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 7065: *
7066: * [ VC: Root Element Type ]
1.99 daniel 7067: * The Name in the document type declaration must match the element
1.98 daniel 7068: * type of the root element.
1.21 daniel 7069: */
7070:
1.55 daniel 7071: void
7072: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 7073: xmlChar *name = NULL;
1.123 daniel 7074: xmlChar *ExternalID = NULL;
7075: xmlChar *URI = NULL;
1.21 daniel 7076:
7077: /*
7078: * We know that '<!DOCTYPE' has been detected.
7079: */
1.40 daniel 7080: SKIP(9);
1.21 daniel 7081:
1.42 daniel 7082: SKIP_BLANKS;
1.21 daniel 7083:
7084: /*
7085: * Parse the DOCTYPE name.
7086: */
7087: name = xmlParseName(ctxt);
7088: if (name == NULL) {
1.55 daniel 7089: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7090: ctxt->sax->error(ctxt->userData,
7091: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 7092: ctxt->wellFormed = 0;
1.180 daniel 7093: ctxt->disableSAX = 1;
1.123 daniel 7094: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 7095: }
1.165 daniel 7096: ctxt->intSubName = name;
1.21 daniel 7097:
1.42 daniel 7098: SKIP_BLANKS;
1.21 daniel 7099:
7100: /*
1.22 daniel 7101: * Check for SystemID and ExternalID
7102: */
1.67 daniel 7103: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 7104:
7105: if ((URI != NULL) || (ExternalID != NULL)) {
7106: ctxt->hasExternalSubset = 1;
7107: }
1.165 daniel 7108: ctxt->extSubURI = URI;
7109: ctxt->extSubSystem = ExternalID;
1.98 daniel 7110:
1.42 daniel 7111: SKIP_BLANKS;
1.36 daniel 7112:
1.76 daniel 7113: /*
1.165 daniel 7114: * Create and update the internal subset.
1.76 daniel 7115: */
1.171 daniel 7116: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7117: (!ctxt->disableSAX))
1.74 daniel 7118: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 7119:
7120: /*
1.140 daniel 7121: * Is there any internal subset declarations ?
7122: * they are handled separately in xmlParseInternalSubset()
7123: */
1.152 daniel 7124: if (RAW == '[')
1.140 daniel 7125: return;
7126:
7127: /*
7128: * We should be at the end of the DOCTYPE declaration.
7129: */
1.152 daniel 7130: if (RAW != '>') {
1.140 daniel 7131: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7132: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7133: ctxt->wellFormed = 0;
1.180 daniel 7134: ctxt->disableSAX = 1;
1.140 daniel 7135: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7136: }
7137: NEXT;
7138: }
7139:
7140: /**
1.181 daniel 7141: * xmlParseInternalsubset:
1.140 daniel 7142: * @ctxt: an XML parser context
7143: *
7144: * parse the internal subset declaration
7145: *
7146: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7147: */
7148:
7149: void
7150: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7151: /*
1.22 daniel 7152: * Is there any DTD definition ?
7153: */
1.152 daniel 7154: if (RAW == '[') {
1.96 daniel 7155: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 7156: NEXT;
1.22 daniel 7157: /*
7158: * Parse the succession of Markup declarations and
7159: * PEReferences.
7160: * Subsequence (markupdecl | PEReference | S)*
7161: */
1.152 daniel 7162: while (RAW != ']') {
1.123 daniel 7163: const xmlChar *check = CUR_PTR;
1.115 daniel 7164: int cons = ctxt->input->consumed;
1.22 daniel 7165:
1.42 daniel 7166: SKIP_BLANKS;
1.22 daniel 7167: xmlParseMarkupDecl(ctxt);
1.50 daniel 7168: xmlParsePEReference(ctxt);
1.22 daniel 7169:
1.115 daniel 7170: /*
7171: * Pop-up of finished entities.
7172: */
1.152 daniel 7173: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7174: xmlPopInput(ctxt);
7175:
1.118 daniel 7176: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7177: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7178: ctxt->sax->error(ctxt->userData,
1.140 daniel 7179: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7180: ctxt->wellFormed = 0;
1.180 daniel 7181: ctxt->disableSAX = 1;
1.123 daniel 7182: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7183: break;
7184: }
7185: }
1.152 daniel 7186: if (RAW == ']') NEXT;
1.22 daniel 7187: }
7188:
7189: /*
7190: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7191: */
1.152 daniel 7192: if (RAW != '>') {
1.55 daniel 7193: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7194: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7195: ctxt->wellFormed = 0;
1.180 daniel 7196: ctxt->disableSAX = 1;
1.123 daniel 7197: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7198: }
1.40 daniel 7199: NEXT;
1.21 daniel 7200: }
7201:
1.50 daniel 7202: /**
7203: * xmlParseAttribute:
7204: * @ctxt: an XML parser context
1.123 daniel 7205: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7206: *
7207: * parse an attribute
1.3 veillard 7208: *
1.22 daniel 7209: * [41] Attribute ::= Name Eq AttValue
7210: *
1.98 daniel 7211: * [ WFC: No External Entity References ]
7212: * Attribute values cannot contain direct or indirect entity references
7213: * to external entities.
7214: *
7215: * [ WFC: No < in Attribute Values ]
7216: * The replacement text of any entity referred to directly or indirectly in
7217: * an attribute value (other than "<") must not contain a <.
7218: *
7219: * [ VC: Attribute Value Type ]
1.117 daniel 7220: * The attribute must have been declared; the value must be of the type
1.99 daniel 7221: * declared for it.
1.98 daniel 7222: *
1.22 daniel 7223: * [25] Eq ::= S? '=' S?
7224: *
1.29 daniel 7225: * With namespace:
7226: *
7227: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7228: *
7229: * Also the case QName == xmlns:??? is handled independently as a namespace
7230: * definition.
1.69 daniel 7231: *
1.72 daniel 7232: * Returns the attribute name, and the value in *value.
1.3 veillard 7233: */
7234:
1.123 daniel 7235: xmlChar *
7236: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7237: xmlChar *name, *val;
1.3 veillard 7238:
1.72 daniel 7239: *value = NULL;
7240: name = xmlParseName(ctxt);
1.22 daniel 7241: if (name == NULL) {
1.55 daniel 7242: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7243: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7244: ctxt->wellFormed = 0;
1.180 daniel 7245: ctxt->disableSAX = 1;
1.123 daniel 7246: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7247: return(NULL);
1.3 veillard 7248: }
7249:
7250: /*
1.29 daniel 7251: * read the value
1.3 veillard 7252: */
1.42 daniel 7253: SKIP_BLANKS;
1.152 daniel 7254: if (RAW == '=') {
1.40 daniel 7255: NEXT;
1.42 daniel 7256: SKIP_BLANKS;
1.72 daniel 7257: val = xmlParseAttValue(ctxt);
1.96 daniel 7258: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7259: } else {
1.55 daniel 7260: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7261: ctxt->sax->error(ctxt->userData,
1.59 daniel 7262: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7263: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7264: ctxt->wellFormed = 0;
1.180 daniel 7265: ctxt->disableSAX = 1;
1.170 daniel 7266: xmlFree(name);
1.52 daniel 7267: return(NULL);
1.43 daniel 7268: }
7269:
1.172 daniel 7270: /*
7271: * Check that xml:lang conforms to the specification
7272: */
7273: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7274: if (!xmlCheckLanguageID(val)) {
7275: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7276: ctxt->sax->error(ctxt->userData,
7277: "Invalid value for xml:lang : %s\n", val);
7278: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7279: ctxt->wellFormed = 0;
1.180 daniel 7280: ctxt->disableSAX = 1;
1.172 daniel 7281: }
7282: }
7283:
1.176 daniel 7284: /*
7285: * Check that xml:space conforms to the specification
7286: */
7287: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7288: if (!xmlStrcmp(val, BAD_CAST "default"))
7289: *(ctxt->space) = 0;
7290: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7291: *(ctxt->space) = 1;
7292: else {
7293: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7294: ctxt->sax->error(ctxt->userData,
7295: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7296: val);
7297: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7298: ctxt->wellFormed = 0;
1.180 daniel 7299: ctxt->disableSAX = 1;
1.176 daniel 7300: }
7301: }
7302:
1.72 daniel 7303: *value = val;
7304: return(name);
1.3 veillard 7305: }
7306:
1.50 daniel 7307: /**
7308: * xmlParseStartTag:
7309: * @ctxt: an XML parser context
7310: *
7311: * parse a start of tag either for rule element or
7312: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7313: *
7314: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7315: *
1.98 daniel 7316: * [ WFC: Unique Att Spec ]
7317: * No attribute name may appear more than once in the same start-tag or
7318: * empty-element tag.
7319: *
1.29 daniel 7320: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7321: *
1.98 daniel 7322: * [ WFC: Unique Att Spec ]
7323: * No attribute name may appear more than once in the same start-tag or
7324: * empty-element tag.
7325: *
1.29 daniel 7326: * With namespace:
7327: *
7328: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7329: *
7330: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7331: *
1.129 daniel 7332: * Returne the element name parsed
1.2 veillard 7333: */
7334:
1.123 daniel 7335: xmlChar *
1.69 daniel 7336: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7337: xmlChar *name;
7338: xmlChar *attname;
7339: xmlChar *attvalue;
7340: const xmlChar **atts = NULL;
1.72 daniel 7341: int nbatts = 0;
7342: int maxatts = 0;
7343: int i;
1.2 veillard 7344:
1.152 daniel 7345: if (RAW != '<') return(NULL);
1.40 daniel 7346: NEXT;
1.3 veillard 7347:
1.72 daniel 7348: name = xmlParseName(ctxt);
1.59 daniel 7349: if (name == NULL) {
7350: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7351: ctxt->sax->error(ctxt->userData,
1.59 daniel 7352: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7353: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7354: ctxt->wellFormed = 0;
1.180 daniel 7355: ctxt->disableSAX = 1;
1.83 daniel 7356: return(NULL);
1.50 daniel 7357: }
7358:
7359: /*
1.3 veillard 7360: * Now parse the attributes, it ends up with the ending
7361: *
7362: * (S Attribute)* S?
7363: */
1.42 daniel 7364: SKIP_BLANKS;
1.91 daniel 7365: GROW;
1.168 daniel 7366:
1.153 daniel 7367: while ((IS_CHAR(RAW)) &&
1.152 daniel 7368: (RAW != '>') &&
7369: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7370: const xmlChar *q = CUR_PTR;
1.91 daniel 7371: int cons = ctxt->input->consumed;
1.29 daniel 7372:
1.72 daniel 7373: attname = xmlParseAttribute(ctxt, &attvalue);
7374: if ((attname != NULL) && (attvalue != NULL)) {
7375: /*
1.98 daniel 7376: * [ WFC: Unique Att Spec ]
7377: * No attribute name may appear more than once in the same
7378: * start-tag or empty-element tag.
1.72 daniel 7379: */
7380: for (i = 0; i < nbatts;i += 2) {
7381: if (!xmlStrcmp(atts[i], attname)) {
7382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7383: ctxt->sax->error(ctxt->userData,
7384: "Attribute %s redefined\n",
7385: attname);
1.72 daniel 7386: ctxt->wellFormed = 0;
1.180 daniel 7387: ctxt->disableSAX = 1;
1.123 daniel 7388: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7389: xmlFree(attname);
7390: xmlFree(attvalue);
1.98 daniel 7391: goto failed;
1.72 daniel 7392: }
7393: }
7394:
7395: /*
7396: * Add the pair to atts
7397: */
7398: if (atts == NULL) {
7399: maxatts = 10;
1.123 daniel 7400: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7401: if (atts == NULL) {
1.86 daniel 7402: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7403: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7404: return(NULL);
1.72 daniel 7405: }
1.127 daniel 7406: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7407: maxatts *= 2;
1.123 daniel 7408: atts = (const xmlChar **) xmlRealloc(atts,
7409: maxatts * sizeof(xmlChar *));
1.72 daniel 7410: if (atts == NULL) {
1.86 daniel 7411: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7412: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7413: return(NULL);
1.72 daniel 7414: }
7415: }
7416: atts[nbatts++] = attname;
7417: atts[nbatts++] = attvalue;
7418: atts[nbatts] = NULL;
7419: atts[nbatts + 1] = NULL;
1.176 daniel 7420: } else {
7421: if (attname != NULL)
7422: xmlFree(attname);
7423: if (attvalue != NULL)
7424: xmlFree(attvalue);
1.72 daniel 7425: }
7426:
1.116 daniel 7427: failed:
1.168 daniel 7428:
7429: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7430: break;
7431: if (!IS_BLANK(RAW)) {
7432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7433: ctxt->sax->error(ctxt->userData,
7434: "attributes construct error\n");
7435: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7436: ctxt->wellFormed = 0;
1.180 daniel 7437: ctxt->disableSAX = 1;
1.168 daniel 7438: }
1.42 daniel 7439: SKIP_BLANKS;
1.91 daniel 7440: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7442: ctxt->sax->error(ctxt->userData,
1.31 daniel 7443: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7444: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7445: ctxt->wellFormed = 0;
1.180 daniel 7446: ctxt->disableSAX = 1;
1.29 daniel 7447: break;
1.3 veillard 7448: }
1.91 daniel 7449: GROW;
1.3 veillard 7450: }
7451:
1.43 daniel 7452: /*
1.72 daniel 7453: * SAX: Start of Element !
1.43 daniel 7454: */
1.171 daniel 7455: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7456: (!ctxt->disableSAX))
1.74 daniel 7457: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7458:
1.72 daniel 7459: if (atts != NULL) {
1.123 daniel 7460: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7461: xmlFree(atts);
1.72 daniel 7462: }
1.83 daniel 7463: return(name);
1.3 veillard 7464: }
7465:
1.50 daniel 7466: /**
7467: * xmlParseEndTag:
7468: * @ctxt: an XML parser context
7469: *
7470: * parse an end of tag
1.27 daniel 7471: *
7472: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7473: *
7474: * With namespace
7475: *
1.72 daniel 7476: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7477: */
7478:
1.55 daniel 7479: void
1.140 daniel 7480: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7481: xmlChar *name;
1.140 daniel 7482: xmlChar *oldname;
1.7 veillard 7483:
1.91 daniel 7484: GROW;
1.152 daniel 7485: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7486: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7487: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7488: ctxt->wellFormed = 0;
1.180 daniel 7489: ctxt->disableSAX = 1;
1.123 daniel 7490: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7491: return;
7492: }
1.40 daniel 7493: SKIP(2);
1.7 veillard 7494:
1.72 daniel 7495: name = xmlParseName(ctxt);
1.7 veillard 7496:
7497: /*
7498: * We should definitely be at the ending "S? '>'" part
7499: */
1.91 daniel 7500: GROW;
1.42 daniel 7501: SKIP_BLANKS;
1.153 daniel 7502: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7503: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7504: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7505: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7506: ctxt->wellFormed = 0;
1.180 daniel 7507: ctxt->disableSAX = 1;
1.7 veillard 7508: } else
1.40 daniel 7509: NEXT;
1.7 veillard 7510:
1.72 daniel 7511: /*
1.98 daniel 7512: * [ WFC: Element Type Match ]
7513: * The Name in an element's end-tag must match the element type in the
7514: * start-tag.
7515: *
1.83 daniel 7516: */
1.147 daniel 7517: if ((name == NULL) || (ctxt->name == NULL) ||
7518: (xmlStrcmp(name, ctxt->name))) {
7519: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7520: if ((name != NULL) && (ctxt->name != NULL)) {
7521: ctxt->sax->error(ctxt->userData,
7522: "Opening and ending tag mismatch: %s and %s\n",
7523: ctxt->name, name);
7524: } else if (ctxt->name != NULL) {
7525: ctxt->sax->error(ctxt->userData,
7526: "Ending tag eror for: %s\n", ctxt->name);
7527: } else {
7528: ctxt->sax->error(ctxt->userData,
7529: "Ending tag error: internal error ???\n");
7530: }
1.122 daniel 7531:
1.147 daniel 7532: }
1.123 daniel 7533: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 7534: ctxt->wellFormed = 0;
1.180 daniel 7535: ctxt->disableSAX = 1;
1.83 daniel 7536: }
7537:
7538: /*
1.72 daniel 7539: * SAX: End of Tag
7540: */
1.171 daniel 7541: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7542: (!ctxt->disableSAX))
1.74 daniel 7543: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 7544:
7545: if (name != NULL)
1.119 daniel 7546: xmlFree(name);
1.140 daniel 7547: oldname = namePop(ctxt);
1.176 daniel 7548: spacePop(ctxt);
1.140 daniel 7549: if (oldname != NULL) {
7550: #ifdef DEBUG_STACK
7551: fprintf(stderr,"Close: popped %s\n", oldname);
7552: #endif
7553: xmlFree(oldname);
7554: }
1.7 veillard 7555: return;
7556: }
7557:
1.50 daniel 7558: /**
7559: * xmlParseCDSect:
7560: * @ctxt: an XML parser context
7561: *
7562: * Parse escaped pure raw content.
1.29 daniel 7563: *
7564: * [18] CDSect ::= CDStart CData CDEnd
7565: *
7566: * [19] CDStart ::= '<![CDATA['
7567: *
7568: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7569: *
7570: * [21] CDEnd ::= ']]>'
1.3 veillard 7571: */
1.55 daniel 7572: void
7573: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7574: xmlChar *buf = NULL;
7575: int len = 0;
1.140 daniel 7576: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7577: int r, rl;
7578: int s, sl;
7579: int cur, l;
1.3 veillard 7580:
1.106 daniel 7581: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7582: (NXT(2) == '[') && (NXT(3) == 'C') &&
7583: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7584: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7585: (NXT(8) == '[')) {
7586: SKIP(9);
1.29 daniel 7587: } else
1.45 daniel 7588: return;
1.109 daniel 7589:
7590: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7591: r = CUR_CHAR(rl);
7592: if (!IS_CHAR(r)) {
1.55 daniel 7593: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7594: ctxt->sax->error(ctxt->userData,
1.135 daniel 7595: "CData section not finished\n");
1.59 daniel 7596: ctxt->wellFormed = 0;
1.180 daniel 7597: ctxt->disableSAX = 1;
1.123 daniel 7598: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7599: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7600: return;
1.3 veillard 7601: }
1.152 daniel 7602: NEXTL(rl);
7603: s = CUR_CHAR(sl);
7604: if (!IS_CHAR(s)) {
1.55 daniel 7605: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7606: ctxt->sax->error(ctxt->userData,
1.135 daniel 7607: "CData section not finished\n");
1.123 daniel 7608: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7609: ctxt->wellFormed = 0;
1.180 daniel 7610: ctxt->disableSAX = 1;
1.109 daniel 7611: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7612: return;
1.3 veillard 7613: }
1.152 daniel 7614: NEXTL(sl);
7615: cur = CUR_CHAR(l);
1.135 daniel 7616: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7617: if (buf == NULL) {
7618: fprintf(stderr, "malloc of %d byte failed\n", size);
7619: return;
7620: }
1.108 veillard 7621: while (IS_CHAR(cur) &&
1.110 daniel 7622: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7623: if (len + 5 >= size) {
1.135 daniel 7624: size *= 2;
7625: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7626: if (buf == NULL) {
7627: fprintf(stderr, "realloc of %d byte failed\n", size);
7628: return;
7629: }
7630: }
1.152 daniel 7631: COPY_BUF(rl,buf,len,r);
1.110 daniel 7632: r = s;
1.152 daniel 7633: rl = sl;
1.110 daniel 7634: s = cur;
1.152 daniel 7635: sl = l;
7636: NEXTL(l);
7637: cur = CUR_CHAR(l);
1.3 veillard 7638: }
1.135 daniel 7639: buf[len] = 0;
1.109 daniel 7640: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7641: if (cur != '>') {
1.55 daniel 7642: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7643: ctxt->sax->error(ctxt->userData,
1.135 daniel 7644: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7645: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7646: ctxt->wellFormed = 0;
1.180 daniel 7647: ctxt->disableSAX = 1;
1.135 daniel 7648: xmlFree(buf);
1.45 daniel 7649: return;
1.3 veillard 7650: }
1.152 daniel 7651: NEXTL(l);
1.16 daniel 7652:
1.45 daniel 7653: /*
1.135 daniel 7654: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7655: */
1.171 daniel 7656: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7657: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7658: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7659: }
1.135 daniel 7660: xmlFree(buf);
1.2 veillard 7661: }
7662:
1.50 daniel 7663: /**
7664: * xmlParseContent:
7665: * @ctxt: an XML parser context
7666: *
7667: * Parse a content:
1.2 veillard 7668: *
1.27 daniel 7669: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7670: */
7671:
1.55 daniel 7672: void
7673: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7674: GROW;
1.176 daniel 7675: while (((RAW != 0) || (ctxt->token != 0)) &&
7676: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 7677: const xmlChar *test = CUR_PTR;
1.91 daniel 7678: int cons = ctxt->input->consumed;
1.123 daniel 7679: xmlChar tok = ctxt->token;
1.27 daniel 7680:
7681: /*
1.152 daniel 7682: * Handle possible processed charrefs.
7683: */
7684: if (ctxt->token != 0) {
7685: xmlParseCharData(ctxt, 0);
7686: }
7687: /*
1.27 daniel 7688: * First case : a Processing Instruction.
7689: */
1.152 daniel 7690: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7691: xmlParsePI(ctxt);
7692: }
1.72 daniel 7693:
1.27 daniel 7694: /*
7695: * Second case : a CDSection
7696: */
1.152 daniel 7697: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7698: (NXT(2) == '[') && (NXT(3) == 'C') &&
7699: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7700: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7701: (NXT(8) == '[')) {
1.45 daniel 7702: xmlParseCDSect(ctxt);
1.27 daniel 7703: }
1.72 daniel 7704:
1.27 daniel 7705: /*
7706: * Third case : a comment
7707: */
1.152 daniel 7708: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7709: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7710: xmlParseComment(ctxt);
1.97 daniel 7711: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7712: }
1.72 daniel 7713:
1.27 daniel 7714: /*
7715: * Fourth case : a sub-element.
7716: */
1.152 daniel 7717: else if (RAW == '<') {
1.72 daniel 7718: xmlParseElement(ctxt);
1.45 daniel 7719: }
1.72 daniel 7720:
1.45 daniel 7721: /*
1.50 daniel 7722: * Fifth case : a reference. If if has not been resolved,
7723: * parsing returns it's Name, create the node
1.45 daniel 7724: */
1.97 daniel 7725:
1.152 daniel 7726: else if (RAW == '&') {
1.77 daniel 7727: xmlParseReference(ctxt);
1.27 daniel 7728: }
1.72 daniel 7729:
1.27 daniel 7730: /*
7731: * Last case, text. Note that References are handled directly.
7732: */
7733: else {
1.45 daniel 7734: xmlParseCharData(ctxt, 0);
1.3 veillard 7735: }
1.14 veillard 7736:
1.91 daniel 7737: GROW;
1.14 veillard 7738: /*
1.45 daniel 7739: * Pop-up of finished entities.
1.14 veillard 7740: */
1.152 daniel 7741: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7742: xmlPopInput(ctxt);
1.135 daniel 7743: SHRINK;
1.45 daniel 7744:
1.113 daniel 7745: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7746: (tok == ctxt->token)) {
1.55 daniel 7747: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7748: ctxt->sax->error(ctxt->userData,
1.59 daniel 7749: "detected an error in element content\n");
1.123 daniel 7750: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7751: ctxt->wellFormed = 0;
1.180 daniel 7752: ctxt->disableSAX = 1;
1.29 daniel 7753: break;
7754: }
1.3 veillard 7755: }
1.2 veillard 7756: }
7757:
1.50 daniel 7758: /**
7759: * xmlParseElement:
7760: * @ctxt: an XML parser context
7761: *
7762: * parse an XML element, this is highly recursive
1.26 daniel 7763: *
7764: * [39] element ::= EmptyElemTag | STag content ETag
7765: *
1.98 daniel 7766: * [ WFC: Element Type Match ]
7767: * The Name in an element's end-tag must match the element type in the
7768: * start-tag.
7769: *
7770: * [ VC: Element Valid ]
1.117 daniel 7771: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7772: * where the Name matches the element type and one of the following holds:
7773: * - The declaration matches EMPTY and the element has no content.
7774: * - The declaration matches children and the sequence of child elements
7775: * belongs to the language generated by the regular expression in the
7776: * content model, with optional white space (characters matching the
7777: * nonterminal S) between each pair of child elements.
7778: * - The declaration matches Mixed and the content consists of character
7779: * data and child elements whose types match names in the content model.
7780: * - The declaration matches ANY, and the types of any child elements have
7781: * been declared.
1.2 veillard 7782: */
1.26 daniel 7783:
1.72 daniel 7784: void
1.69 daniel 7785: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7786: const xmlChar *openTag = CUR_PTR;
7787: xmlChar *name;
1.140 daniel 7788: xmlChar *oldname;
1.32 daniel 7789: xmlParserNodeInfo node_info;
1.118 daniel 7790: xmlNodePtr ret;
1.2 veillard 7791:
1.32 daniel 7792: /* Capture start position */
1.118 daniel 7793: if (ctxt->record_info) {
7794: node_info.begin_pos = ctxt->input->consumed +
7795: (CUR_PTR - ctxt->input->base);
7796: node_info.begin_line = ctxt->input->line;
7797: }
1.32 daniel 7798:
1.176 daniel 7799: if (ctxt->spaceNr == 0)
7800: spacePush(ctxt, -1);
7801: else
7802: spacePush(ctxt, *ctxt->space);
7803:
1.83 daniel 7804: name = xmlParseStartTag(ctxt);
7805: if (name == NULL) {
1.176 daniel 7806: spacePop(ctxt);
1.83 daniel 7807: return;
7808: }
1.140 daniel 7809: namePush(ctxt, name);
1.118 daniel 7810: ret = ctxt->node;
1.2 veillard 7811:
7812: /*
1.99 daniel 7813: * [ VC: Root Element Type ]
7814: * The Name in the document type declaration must match the element
7815: * type of the root element.
7816: */
1.105 daniel 7817: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7818: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7819: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7820:
7821: /*
1.2 veillard 7822: * Check for an Empty Element.
7823: */
1.152 daniel 7824: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7825: SKIP(2);
1.171 daniel 7826: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7827: (!ctxt->disableSAX))
1.83 daniel 7828: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7829: oldname = namePop(ctxt);
1.176 daniel 7830: spacePop(ctxt);
1.140 daniel 7831: if (oldname != NULL) {
7832: #ifdef DEBUG_STACK
7833: fprintf(stderr,"Close: popped %s\n", oldname);
7834: #endif
7835: xmlFree(oldname);
7836: }
1.72 daniel 7837: return;
1.2 veillard 7838: }
1.152 daniel 7839: if (RAW == '>') {
1.91 daniel 7840: NEXT;
7841: } else {
1.55 daniel 7842: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7843: ctxt->sax->error(ctxt->userData,
7844: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7845: openTag);
1.59 daniel 7846: ctxt->wellFormed = 0;
1.180 daniel 7847: ctxt->disableSAX = 1;
1.123 daniel 7848: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 7849:
7850: /*
7851: * end of parsing of this node.
7852: */
7853: nodePop(ctxt);
1.140 daniel 7854: oldname = namePop(ctxt);
1.176 daniel 7855: spacePop(ctxt);
1.140 daniel 7856: if (oldname != NULL) {
7857: #ifdef DEBUG_STACK
7858: fprintf(stderr,"Close: popped %s\n", oldname);
7859: #endif
7860: xmlFree(oldname);
7861: }
1.118 daniel 7862:
7863: /*
7864: * Capture end position and add node
7865: */
7866: if ( ret != NULL && ctxt->record_info ) {
7867: node_info.end_pos = ctxt->input->consumed +
7868: (CUR_PTR - ctxt->input->base);
7869: node_info.end_line = ctxt->input->line;
7870: node_info.node = ret;
7871: xmlParserAddNodeInfo(ctxt, &node_info);
7872: }
1.72 daniel 7873: return;
1.2 veillard 7874: }
7875:
7876: /*
7877: * Parse the content of the element:
7878: */
1.45 daniel 7879: xmlParseContent(ctxt);
1.153 daniel 7880: if (!IS_CHAR(RAW)) {
1.55 daniel 7881: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7882: ctxt->sax->error(ctxt->userData,
1.57 daniel 7883: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 7884: ctxt->wellFormed = 0;
1.180 daniel 7885: ctxt->disableSAX = 1;
1.123 daniel 7886: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 7887:
7888: /*
7889: * end of parsing of this node.
7890: */
7891: nodePop(ctxt);
1.140 daniel 7892: oldname = namePop(ctxt);
1.176 daniel 7893: spacePop(ctxt);
1.140 daniel 7894: if (oldname != NULL) {
7895: #ifdef DEBUG_STACK
7896: fprintf(stderr,"Close: popped %s\n", oldname);
7897: #endif
7898: xmlFree(oldname);
7899: }
1.72 daniel 7900: return;
1.2 veillard 7901: }
7902:
7903: /*
1.27 daniel 7904: * parse the end of tag: '</' should be here.
1.2 veillard 7905: */
1.140 daniel 7906: xmlParseEndTag(ctxt);
1.118 daniel 7907:
7908: /*
7909: * Capture end position and add node
7910: */
7911: if ( ret != NULL && ctxt->record_info ) {
7912: node_info.end_pos = ctxt->input->consumed +
7913: (CUR_PTR - ctxt->input->base);
7914: node_info.end_line = ctxt->input->line;
7915: node_info.node = ret;
7916: xmlParserAddNodeInfo(ctxt, &node_info);
7917: }
1.2 veillard 7918: }
7919:
1.50 daniel 7920: /**
7921: * xmlParseVersionNum:
7922: * @ctxt: an XML parser context
7923: *
7924: * parse the XML version value.
1.29 daniel 7925: *
7926: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 7927: *
7928: * Returns the string giving the XML version number, or NULL
1.29 daniel 7929: */
1.123 daniel 7930: xmlChar *
1.55 daniel 7931: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 7932: xmlChar *buf = NULL;
7933: int len = 0;
7934: int size = 10;
7935: xmlChar cur;
1.29 daniel 7936:
1.135 daniel 7937: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7938: if (buf == NULL) {
7939: fprintf(stderr, "malloc of %d byte failed\n", size);
7940: return(NULL);
7941: }
7942: cur = CUR;
1.152 daniel 7943: while (((cur >= 'a') && (cur <= 'z')) ||
7944: ((cur >= 'A') && (cur <= 'Z')) ||
7945: ((cur >= '0') && (cur <= '9')) ||
7946: (cur == '_') || (cur == '.') ||
7947: (cur == ':') || (cur == '-')) {
1.135 daniel 7948: if (len + 1 >= size) {
7949: size *= 2;
7950: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7951: if (buf == NULL) {
7952: fprintf(stderr, "realloc of %d byte failed\n", size);
7953: return(NULL);
7954: }
7955: }
7956: buf[len++] = cur;
7957: NEXT;
7958: cur=CUR;
7959: }
7960: buf[len] = 0;
7961: return(buf);
1.29 daniel 7962: }
7963:
1.50 daniel 7964: /**
7965: * xmlParseVersionInfo:
7966: * @ctxt: an XML parser context
7967: *
7968: * parse the XML version.
1.29 daniel 7969: *
7970: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7971: *
7972: * [25] Eq ::= S? '=' S?
1.50 daniel 7973: *
1.68 daniel 7974: * Returns the version string, e.g. "1.0"
1.29 daniel 7975: */
7976:
1.123 daniel 7977: xmlChar *
1.55 daniel 7978: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 7979: xmlChar *version = NULL;
7980: const xmlChar *q;
1.29 daniel 7981:
1.152 daniel 7982: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 7983: (NXT(2) == 'r') && (NXT(3) == 's') &&
7984: (NXT(4) == 'i') && (NXT(5) == 'o') &&
7985: (NXT(6) == 'n')) {
7986: SKIP(7);
1.42 daniel 7987: SKIP_BLANKS;
1.152 daniel 7988: if (RAW != '=') {
1.55 daniel 7989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7990: ctxt->sax->error(ctxt->userData,
7991: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 7992: ctxt->wellFormed = 0;
1.180 daniel 7993: ctxt->disableSAX = 1;
1.123 daniel 7994: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7995: return(NULL);
7996: }
1.40 daniel 7997: NEXT;
1.42 daniel 7998: SKIP_BLANKS;
1.152 daniel 7999: if (RAW == '"') {
1.40 daniel 8000: NEXT;
8001: q = CUR_PTR;
1.29 daniel 8002: version = xmlParseVersionNum(ctxt);
1.152 daniel 8003: if (RAW != '"') {
1.55 daniel 8004: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8005: ctxt->sax->error(ctxt->userData,
8006: "String not closed\n%.50s\n", q);
1.59 daniel 8007: ctxt->wellFormed = 0;
1.180 daniel 8008: ctxt->disableSAX = 1;
1.123 daniel 8009: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8010: } else
1.40 daniel 8011: NEXT;
1.152 daniel 8012: } else if (RAW == '\''){
1.40 daniel 8013: NEXT;
8014: q = CUR_PTR;
1.29 daniel 8015: version = xmlParseVersionNum(ctxt);
1.152 daniel 8016: if (RAW != '\'') {
1.55 daniel 8017: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8018: ctxt->sax->error(ctxt->userData,
8019: "String not closed\n%.50s\n", q);
1.123 daniel 8020: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8021: ctxt->wellFormed = 0;
1.180 daniel 8022: ctxt->disableSAX = 1;
1.55 daniel 8023: } else
1.40 daniel 8024: NEXT;
1.31 daniel 8025: } else {
1.55 daniel 8026: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8027: ctxt->sax->error(ctxt->userData,
1.59 daniel 8028: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 8029: ctxt->wellFormed = 0;
1.180 daniel 8030: ctxt->disableSAX = 1;
1.123 daniel 8031: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8032: }
8033: }
8034: return(version);
8035: }
8036:
1.50 daniel 8037: /**
8038: * xmlParseEncName:
8039: * @ctxt: an XML parser context
8040: *
8041: * parse the XML encoding name
1.29 daniel 8042: *
8043: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 8044: *
1.68 daniel 8045: * Returns the encoding name value or NULL
1.29 daniel 8046: */
1.123 daniel 8047: xmlChar *
1.55 daniel 8048: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 8049: xmlChar *buf = NULL;
8050: int len = 0;
8051: int size = 10;
8052: xmlChar cur;
1.29 daniel 8053:
1.135 daniel 8054: cur = CUR;
8055: if (((cur >= 'a') && (cur <= 'z')) ||
8056: ((cur >= 'A') && (cur <= 'Z'))) {
8057: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8058: if (buf == NULL) {
8059: fprintf(stderr, "malloc of %d byte failed\n", size);
8060: return(NULL);
8061: }
8062:
8063: buf[len++] = cur;
1.40 daniel 8064: NEXT;
1.135 daniel 8065: cur = CUR;
1.152 daniel 8066: while (((cur >= 'a') && (cur <= 'z')) ||
8067: ((cur >= 'A') && (cur <= 'Z')) ||
8068: ((cur >= '0') && (cur <= '9')) ||
8069: (cur == '.') || (cur == '_') ||
8070: (cur == '-')) {
1.135 daniel 8071: if (len + 1 >= size) {
8072: size *= 2;
8073: buf = xmlRealloc(buf, size * sizeof(xmlChar));
8074: if (buf == NULL) {
8075: fprintf(stderr, "realloc of %d byte failed\n", size);
8076: return(NULL);
8077: }
8078: }
8079: buf[len++] = cur;
8080: NEXT;
8081: cur = CUR;
8082: if (cur == 0) {
8083: SHRINK;
8084: GROW;
8085: cur = CUR;
8086: }
8087: }
8088: buf[len] = 0;
1.29 daniel 8089: } else {
1.55 daniel 8090: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8091: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 8092: ctxt->wellFormed = 0;
1.180 daniel 8093: ctxt->disableSAX = 1;
1.123 daniel 8094: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 8095: }
1.135 daniel 8096: return(buf);
1.29 daniel 8097: }
8098:
1.50 daniel 8099: /**
8100: * xmlParseEncodingDecl:
8101: * @ctxt: an XML parser context
8102: *
8103: * parse the XML encoding declaration
1.29 daniel 8104: *
8105: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 8106: *
8107: * TODO: this should setup the conversion filters.
8108: *
1.68 daniel 8109: * Returns the encoding value or NULL
1.29 daniel 8110: */
8111:
1.123 daniel 8112: xmlChar *
1.55 daniel 8113: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8114: xmlChar *encoding = NULL;
8115: const xmlChar *q;
1.29 daniel 8116:
1.42 daniel 8117: SKIP_BLANKS;
1.152 daniel 8118: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 8119: (NXT(2) == 'c') && (NXT(3) == 'o') &&
8120: (NXT(4) == 'd') && (NXT(5) == 'i') &&
8121: (NXT(6) == 'n') && (NXT(7) == 'g')) {
8122: SKIP(8);
1.42 daniel 8123: SKIP_BLANKS;
1.152 daniel 8124: if (RAW != '=') {
1.55 daniel 8125: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8126: ctxt->sax->error(ctxt->userData,
8127: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 8128: ctxt->wellFormed = 0;
1.180 daniel 8129: ctxt->disableSAX = 1;
1.123 daniel 8130: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8131: return(NULL);
8132: }
1.40 daniel 8133: NEXT;
1.42 daniel 8134: SKIP_BLANKS;
1.152 daniel 8135: if (RAW == '"') {
1.40 daniel 8136: NEXT;
8137: q = CUR_PTR;
1.29 daniel 8138: encoding = xmlParseEncName(ctxt);
1.152 daniel 8139: if (RAW != '"') {
1.55 daniel 8140: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8141: ctxt->sax->error(ctxt->userData,
8142: "String not closed\n%.50s\n", q);
1.59 daniel 8143: ctxt->wellFormed = 0;
1.180 daniel 8144: ctxt->disableSAX = 1;
1.123 daniel 8145: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8146: } else
1.40 daniel 8147: NEXT;
1.152 daniel 8148: } else if (RAW == '\''){
1.40 daniel 8149: NEXT;
8150: q = CUR_PTR;
1.29 daniel 8151: encoding = xmlParseEncName(ctxt);
1.152 daniel 8152: if (RAW != '\'') {
1.55 daniel 8153: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8154: ctxt->sax->error(ctxt->userData,
8155: "String not closed\n%.50s\n", q);
1.59 daniel 8156: ctxt->wellFormed = 0;
1.180 daniel 8157: ctxt->disableSAX = 1;
1.123 daniel 8158: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8159: } else
1.40 daniel 8160: NEXT;
1.152 daniel 8161: } else if (RAW == '"'){
1.55 daniel 8162: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8163: ctxt->sax->error(ctxt->userData,
1.59 daniel 8164: "xmlParseEncodingDecl : expected ' or \"\n");
8165: ctxt->wellFormed = 0;
1.180 daniel 8166: ctxt->disableSAX = 1;
1.123 daniel 8167: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8168: }
8169: }
8170: return(encoding);
8171: }
8172:
1.50 daniel 8173: /**
8174: * xmlParseSDDecl:
8175: * @ctxt: an XML parser context
8176: *
8177: * parse the XML standalone declaration
1.29 daniel 8178: *
8179: * [32] SDDecl ::= S 'standalone' Eq
8180: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8181: *
8182: * [ VC: Standalone Document Declaration ]
8183: * TODO The standalone document declaration must have the value "no"
8184: * if any external markup declarations contain declarations of:
8185: * - attributes with default values, if elements to which these
8186: * attributes apply appear in the document without specifications
8187: * of values for these attributes, or
8188: * - entities (other than amp, lt, gt, apos, quot), if references
8189: * to those entities appear in the document, or
8190: * - attributes with values subject to normalization, where the
8191: * attribute appears in the document with a value which will change
8192: * as a result of normalization, or
8193: * - element types with element content, if white space occurs directly
8194: * within any instance of those types.
1.68 daniel 8195: *
8196: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8197: */
8198:
1.55 daniel 8199: int
8200: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8201: int standalone = -1;
8202:
1.42 daniel 8203: SKIP_BLANKS;
1.152 daniel 8204: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8205: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8206: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8207: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8208: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8209: SKIP(10);
1.81 daniel 8210: SKIP_BLANKS;
1.152 daniel 8211: if (RAW != '=') {
1.55 daniel 8212: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8213: ctxt->sax->error(ctxt->userData,
1.59 daniel 8214: "XML standalone declaration : expected '='\n");
1.123 daniel 8215: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8216: ctxt->wellFormed = 0;
1.180 daniel 8217: ctxt->disableSAX = 1;
1.32 daniel 8218: return(standalone);
8219: }
1.40 daniel 8220: NEXT;
1.42 daniel 8221: SKIP_BLANKS;
1.152 daniel 8222: if (RAW == '\''){
1.40 daniel 8223: NEXT;
1.152 daniel 8224: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8225: standalone = 0;
1.40 daniel 8226: SKIP(2);
1.152 daniel 8227: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8228: (NXT(2) == 's')) {
1.29 daniel 8229: standalone = 1;
1.40 daniel 8230: SKIP(3);
1.29 daniel 8231: } else {
1.55 daniel 8232: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8233: ctxt->sax->error(ctxt->userData,
8234: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8235: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8236: ctxt->wellFormed = 0;
1.180 daniel 8237: ctxt->disableSAX = 1;
1.29 daniel 8238: }
1.152 daniel 8239: if (RAW != '\'') {
1.55 daniel 8240: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8241: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8242: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8243: ctxt->wellFormed = 0;
1.180 daniel 8244: ctxt->disableSAX = 1;
1.55 daniel 8245: } else
1.40 daniel 8246: NEXT;
1.152 daniel 8247: } else if (RAW == '"'){
1.40 daniel 8248: NEXT;
1.152 daniel 8249: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8250: standalone = 0;
1.40 daniel 8251: SKIP(2);
1.152 daniel 8252: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8253: (NXT(2) == 's')) {
1.29 daniel 8254: standalone = 1;
1.40 daniel 8255: SKIP(3);
1.29 daniel 8256: } else {
1.55 daniel 8257: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8258: ctxt->sax->error(ctxt->userData,
1.59 daniel 8259: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8260: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8261: ctxt->wellFormed = 0;
1.180 daniel 8262: ctxt->disableSAX = 1;
1.29 daniel 8263: }
1.152 daniel 8264: if (RAW != '"') {
1.55 daniel 8265: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8266: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8267: ctxt->wellFormed = 0;
1.180 daniel 8268: ctxt->disableSAX = 1;
1.123 daniel 8269: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8270: } else
1.40 daniel 8271: NEXT;
1.37 daniel 8272: } else {
1.55 daniel 8273: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8274: ctxt->sax->error(ctxt->userData,
8275: "Standalone value not found\n");
1.59 daniel 8276: ctxt->wellFormed = 0;
1.180 daniel 8277: ctxt->disableSAX = 1;
1.123 daniel 8278: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8279: }
1.29 daniel 8280: }
8281: return(standalone);
8282: }
8283:
1.50 daniel 8284: /**
8285: * xmlParseXMLDecl:
8286: * @ctxt: an XML parser context
8287: *
8288: * parse an XML declaration header
1.29 daniel 8289: *
8290: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8291: */
8292:
1.55 daniel 8293: void
8294: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8295: xmlChar *version;
1.1 veillard 8296:
8297: /*
1.19 daniel 8298: * We know that '<?xml' is here.
1.1 veillard 8299: */
1.40 daniel 8300: SKIP(5);
1.1 veillard 8301:
1.153 daniel 8302: if (!IS_BLANK(RAW)) {
1.59 daniel 8303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8304: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8305: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8306: ctxt->wellFormed = 0;
1.180 daniel 8307: ctxt->disableSAX = 1;
1.59 daniel 8308: }
1.42 daniel 8309: SKIP_BLANKS;
1.1 veillard 8310:
8311: /*
1.29 daniel 8312: * We should have the VersionInfo here.
1.1 veillard 8313: */
1.29 daniel 8314: version = xmlParseVersionInfo(ctxt);
8315: if (version == NULL)
1.45 daniel 8316: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8317: ctxt->version = xmlStrdup(version);
1.119 daniel 8318: xmlFree(version);
1.29 daniel 8319:
8320: /*
8321: * We may have the encoding declaration
8322: */
1.153 daniel 8323: if (!IS_BLANK(RAW)) {
1.152 daniel 8324: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8325: SKIP(2);
8326: return;
8327: }
8328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8329: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8330: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8331: ctxt->wellFormed = 0;
1.180 daniel 8332: ctxt->disableSAX = 1;
1.59 daniel 8333: }
1.164 daniel 8334: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 8335:
8336: /*
1.29 daniel 8337: * We may have the standalone status.
1.1 veillard 8338: */
1.164 daniel 8339: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8340: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8341: SKIP(2);
8342: return;
8343: }
8344: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8345: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8346: ctxt->wellFormed = 0;
1.180 daniel 8347: ctxt->disableSAX = 1;
1.123 daniel 8348: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8349: }
8350: SKIP_BLANKS;
1.167 daniel 8351: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8352:
1.42 daniel 8353: SKIP_BLANKS;
1.152 daniel 8354: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8355: SKIP(2);
1.152 daniel 8356: } else if (RAW == '>') {
1.31 daniel 8357: /* Deprecated old WD ... */
1.55 daniel 8358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8359: ctxt->sax->error(ctxt->userData,
8360: "XML declaration must end-up with '?>'\n");
1.59 daniel 8361: ctxt->wellFormed = 0;
1.180 daniel 8362: ctxt->disableSAX = 1;
1.123 daniel 8363: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8364: NEXT;
1.29 daniel 8365: } else {
1.55 daniel 8366: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8367: ctxt->sax->error(ctxt->userData,
8368: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8369: ctxt->wellFormed = 0;
1.180 daniel 8370: ctxt->disableSAX = 1;
1.123 daniel 8371: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8372: MOVETO_ENDTAG(CUR_PTR);
8373: NEXT;
1.29 daniel 8374: }
1.1 veillard 8375: }
8376:
1.50 daniel 8377: /**
8378: * xmlParseMisc:
8379: * @ctxt: an XML parser context
8380: *
8381: * parse an XML Misc* optionnal field.
1.21 daniel 8382: *
1.22 daniel 8383: * [27] Misc ::= Comment | PI | S
1.1 veillard 8384: */
8385:
1.55 daniel 8386: void
8387: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8388: while (((RAW == '<') && (NXT(1) == '?')) ||
8389: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8390: (NXT(2) == '-') && (NXT(3) == '-')) ||
8391: IS_BLANK(CUR)) {
1.152 daniel 8392: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8393: xmlParsePI(ctxt);
1.40 daniel 8394: } else if (IS_BLANK(CUR)) {
8395: NEXT;
1.1 veillard 8396: } else
1.114 daniel 8397: xmlParseComment(ctxt);
1.1 veillard 8398: }
8399: }
8400:
1.50 daniel 8401: /**
1.181 daniel 8402: * xmlParseDocument:
1.50 daniel 8403: * @ctxt: an XML parser context
8404: *
8405: * parse an XML document (and build a tree if using the standard SAX
8406: * interface).
1.21 daniel 8407: *
1.22 daniel 8408: * [1] document ::= prolog element Misc*
1.29 daniel 8409: *
8410: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8411: *
1.68 daniel 8412: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8413: * as a result of the parsing.
1.1 veillard 8414: */
8415:
1.55 daniel 8416: int
8417: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8418: xmlChar start[4];
8419: xmlCharEncoding enc;
8420:
1.45 daniel 8421: xmlDefaultSAXHandlerInit();
8422:
1.91 daniel 8423: GROW;
8424:
1.14 veillard 8425: /*
1.44 daniel 8426: * SAX: beginning of the document processing.
8427: */
1.72 daniel 8428: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8429: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8430:
1.156 daniel 8431: /*
8432: * Get the 4 first bytes and decode the charset
8433: * if enc != XML_CHAR_ENCODING_NONE
8434: * plug some encoding conversion routines.
8435: */
8436: start[0] = RAW;
8437: start[1] = NXT(1);
8438: start[2] = NXT(2);
8439: start[3] = NXT(3);
8440: enc = xmlDetectCharEncoding(start, 4);
8441: if (enc != XML_CHAR_ENCODING_NONE) {
8442: xmlSwitchEncoding(ctxt, enc);
8443: }
8444:
1.1 veillard 8445:
1.59 daniel 8446: if (CUR == 0) {
8447: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8448: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8449: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8450: ctxt->wellFormed = 0;
1.180 daniel 8451: ctxt->disableSAX = 1;
1.59 daniel 8452: }
1.1 veillard 8453:
8454: /*
8455: * Check for the XMLDecl in the Prolog.
8456: */
1.91 daniel 8457: GROW;
1.152 daniel 8458: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8459: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8460: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.19 daniel 8461: xmlParseXMLDecl(ctxt);
1.167 daniel 8462: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8463: SKIP_BLANKS;
1.164 daniel 8464: if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
8465: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8466:
1.1 veillard 8467: } else {
1.72 daniel 8468: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8469: }
1.171 daniel 8470: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8471: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8472:
8473: /*
8474: * The Misc part of the Prolog
8475: */
1.91 daniel 8476: GROW;
1.16 daniel 8477: xmlParseMisc(ctxt);
1.1 veillard 8478:
8479: /*
1.29 daniel 8480: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 8481: * (doctypedecl Misc*)?
8482: */
1.91 daniel 8483: GROW;
1.152 daniel 8484: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8485: (NXT(2) == 'D') && (NXT(3) == 'O') &&
8486: (NXT(4) == 'C') && (NXT(5) == 'T') &&
8487: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8488: (NXT(8) == 'E')) {
1.165 daniel 8489:
1.166 daniel 8490: ctxt->inSubset = 1;
1.22 daniel 8491: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8492: if (RAW == '[') {
1.140 daniel 8493: ctxt->instate = XML_PARSER_DTD;
8494: xmlParseInternalSubset(ctxt);
8495: }
1.165 daniel 8496:
8497: /*
8498: * Create and update the external subset.
8499: */
1.166 daniel 8500: ctxt->inSubset = 2;
1.171 daniel 8501: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8502: (!ctxt->disableSAX))
1.165 daniel 8503: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8504: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 8505: ctxt->inSubset = 0;
1.165 daniel 8506:
8507:
1.96 daniel 8508: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 8509: xmlParseMisc(ctxt);
1.21 daniel 8510: }
8511:
8512: /*
8513: * Time to start parsing the tree itself
1.1 veillard 8514: */
1.91 daniel 8515: GROW;
1.152 daniel 8516: if (RAW != '<') {
1.59 daniel 8517: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8518: ctxt->sax->error(ctxt->userData,
1.151 daniel 8519: "Start tag expected, '<' not found\n");
1.140 daniel 8520: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8521: ctxt->wellFormed = 0;
1.180 daniel 8522: ctxt->disableSAX = 1;
1.140 daniel 8523: ctxt->instate = XML_PARSER_EOF;
8524: } else {
8525: ctxt->instate = XML_PARSER_CONTENT;
8526: xmlParseElement(ctxt);
8527: ctxt->instate = XML_PARSER_EPILOG;
8528:
8529:
8530: /*
8531: * The Misc part at the end
8532: */
8533: xmlParseMisc(ctxt);
8534:
1.152 daniel 8535: if (RAW != 0) {
1.140 daniel 8536: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8537: ctxt->sax->error(ctxt->userData,
8538: "Extra content at the end of the document\n");
8539: ctxt->wellFormed = 0;
1.180 daniel 8540: ctxt->disableSAX = 1;
1.140 daniel 8541: ctxt->errNo = XML_ERR_DOCUMENT_END;
8542: }
8543: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 8544: }
8545:
1.44 daniel 8546: /*
8547: * SAX: end of the document processing.
8548: */
1.171 daniel 8549: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8550: (!ctxt->disableSAX))
1.74 daniel 8551: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 8552:
8553: /*
8554: * Grab the encoding if it was added on-the-fly
8555: */
8556: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8557: (ctxt->myDoc->encoding == NULL)) {
8558: ctxt->myDoc->encoding = ctxt->encoding;
8559: ctxt->encoding = NULL;
8560: }
1.59 daniel 8561: if (! ctxt->wellFormed) return(-1);
1.16 daniel 8562: return(0);
8563: }
8564:
1.98 daniel 8565: /************************************************************************
8566: * *
1.128 daniel 8567: * Progressive parsing interfaces *
8568: * *
8569: ************************************************************************/
8570:
8571: /**
8572: * xmlParseLookupSequence:
8573: * @ctxt: an XML parser context
8574: * @first: the first char to lookup
1.140 daniel 8575: * @next: the next char to lookup or zero
8576: * @third: the next char to lookup or zero
1.128 daniel 8577: *
1.140 daniel 8578: * Try to find if a sequence (first, next, third) or just (first next) or
8579: * (first) is available in the input stream.
8580: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8581: * to avoid rescanning sequences of bytes, it DOES change the state of the
8582: * parser, do not use liberally.
1.128 daniel 8583: *
1.140 daniel 8584: * Returns the index to the current parsing point if the full sequence
8585: * is available, -1 otherwise.
1.128 daniel 8586: */
8587: int
1.140 daniel 8588: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8589: xmlChar next, xmlChar third) {
8590: int base, len;
8591: xmlParserInputPtr in;
8592: const xmlChar *buf;
8593:
8594: in = ctxt->input;
8595: if (in == NULL) return(-1);
8596: base = in->cur - in->base;
8597: if (base < 0) return(-1);
8598: if (ctxt->checkIndex > base)
8599: base = ctxt->checkIndex;
8600: if (in->buf == NULL) {
8601: buf = in->base;
8602: len = in->length;
8603: } else {
8604: buf = in->buf->buffer->content;
8605: len = in->buf->buffer->use;
8606: }
8607: /* take into account the sequence length */
8608: if (third) len -= 2;
8609: else if (next) len --;
8610: for (;base < len;base++) {
8611: if (buf[base] == first) {
8612: if (third != 0) {
8613: if ((buf[base + 1] != next) ||
8614: (buf[base + 2] != third)) continue;
8615: } else if (next != 0) {
8616: if (buf[base + 1] != next) continue;
8617: }
8618: ctxt->checkIndex = 0;
8619: #ifdef DEBUG_PUSH
8620: if (next == 0)
8621: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8622: first, base);
8623: else if (third == 0)
8624: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8625: first, next, base);
8626: else
8627: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8628: first, next, third, base);
8629: #endif
8630: return(base - (in->cur - in->base));
8631: }
8632: }
8633: ctxt->checkIndex = base;
8634: #ifdef DEBUG_PUSH
8635: if (next == 0)
8636: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8637: else if (third == 0)
8638: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8639: else
8640: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8641: #endif
8642: return(-1);
1.128 daniel 8643: }
8644:
8645: /**
1.143 daniel 8646: * xmlParseTryOrFinish:
1.128 daniel 8647: * @ctxt: an XML parser context
1.143 daniel 8648: * @terminate: last chunk indicator
1.128 daniel 8649: *
8650: * Try to progress on parsing
8651: *
8652: * Returns zero if no parsing was possible
8653: */
8654: int
1.143 daniel 8655: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8656: int ret = 0;
1.140 daniel 8657: int avail;
8658: xmlChar cur, next;
8659:
8660: #ifdef DEBUG_PUSH
8661: switch (ctxt->instate) {
8662: case XML_PARSER_EOF:
8663: fprintf(stderr, "PP: try EOF\n"); break;
8664: case XML_PARSER_START:
8665: fprintf(stderr, "PP: try START\n"); break;
8666: case XML_PARSER_MISC:
8667: fprintf(stderr, "PP: try MISC\n");break;
8668: case XML_PARSER_COMMENT:
8669: fprintf(stderr, "PP: try COMMENT\n");break;
8670: case XML_PARSER_PROLOG:
8671: fprintf(stderr, "PP: try PROLOG\n");break;
8672: case XML_PARSER_START_TAG:
8673: fprintf(stderr, "PP: try START_TAG\n");break;
8674: case XML_PARSER_CONTENT:
8675: fprintf(stderr, "PP: try CONTENT\n");break;
8676: case XML_PARSER_CDATA_SECTION:
8677: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8678: case XML_PARSER_END_TAG:
8679: fprintf(stderr, "PP: try END_TAG\n");break;
8680: case XML_PARSER_ENTITY_DECL:
8681: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8682: case XML_PARSER_ENTITY_VALUE:
8683: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8684: case XML_PARSER_ATTRIBUTE_VALUE:
8685: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8686: case XML_PARSER_DTD:
8687: fprintf(stderr, "PP: try DTD\n");break;
8688: case XML_PARSER_EPILOG:
8689: fprintf(stderr, "PP: try EPILOG\n");break;
8690: case XML_PARSER_PI:
8691: fprintf(stderr, "PP: try PI\n");break;
8692: }
8693: #endif
1.128 daniel 8694:
8695: while (1) {
1.140 daniel 8696: /*
8697: * Pop-up of finished entities.
8698: */
1.152 daniel 8699: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8700: xmlPopInput(ctxt);
8701:
1.184 daniel 8702: if (ctxt->input ==NULL) break;
8703: if (ctxt->input->buf == NULL)
8704: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8705: else
1.184 daniel 8706: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8707: if (avail < 1)
8708: goto done;
1.128 daniel 8709: switch (ctxt->instate) {
8710: case XML_PARSER_EOF:
1.140 daniel 8711: /*
8712: * Document parsing is done !
8713: */
8714: goto done;
8715: case XML_PARSER_START:
8716: /*
8717: * Very first chars read from the document flow.
8718: */
1.184 daniel 8719: cur = ctxt->input->cur[0];
1.140 daniel 8720: if (IS_BLANK(cur)) {
8721: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8722: ctxt->sax->setDocumentLocator(ctxt->userData,
8723: &xmlDefaultSAXLocator);
8724: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8725: ctxt->sax->error(ctxt->userData,
8726: "Extra spaces at the beginning of the document are not allowed\n");
8727: ctxt->errNo = XML_ERR_DOCUMENT_START;
8728: ctxt->wellFormed = 0;
1.180 daniel 8729: ctxt->disableSAX = 1;
1.140 daniel 8730: SKIP_BLANKS;
8731: ret++;
1.184 daniel 8732: if (ctxt->input->buf == NULL)
8733: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8734: else
1.184 daniel 8735: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8736: }
8737: if (avail < 2)
8738: goto done;
8739:
1.184 daniel 8740: cur = ctxt->input->cur[0];
8741: next = ctxt->input->cur[1];
1.140 daniel 8742: if (cur == 0) {
8743: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8744: ctxt->sax->setDocumentLocator(ctxt->userData,
8745: &xmlDefaultSAXLocator);
8746: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8747: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8748: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8749: ctxt->wellFormed = 0;
1.180 daniel 8750: ctxt->disableSAX = 1;
1.140 daniel 8751: ctxt->instate = XML_PARSER_EOF;
8752: #ifdef DEBUG_PUSH
8753: fprintf(stderr, "PP: entering EOF\n");
8754: #endif
8755: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8756: ctxt->sax->endDocument(ctxt->userData);
8757: goto done;
8758: }
8759: if ((cur == '<') && (next == '?')) {
8760: /* PI or XML decl */
8761: if (avail < 5) return(ret);
1.143 daniel 8762: if ((!terminate) &&
8763: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8764: return(ret);
8765: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8766: ctxt->sax->setDocumentLocator(ctxt->userData,
8767: &xmlDefaultSAXLocator);
1.184 daniel 8768: if ((ctxt->input->cur[2] == 'x') &&
8769: (ctxt->input->cur[3] == 'm') &&
8770: (ctxt->input->cur[4] == 'l') &&
8771: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 8772: ret += 5;
8773: #ifdef DEBUG_PUSH
8774: fprintf(stderr, "PP: Parsing XML Decl\n");
8775: #endif
8776: xmlParseXMLDecl(ctxt);
1.167 daniel 8777: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8778: if ((ctxt->encoding == NULL) &&
8779: (ctxt->input->encoding != NULL))
8780: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 8781: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8782: (!ctxt->disableSAX))
1.140 daniel 8783: ctxt->sax->startDocument(ctxt->userData);
8784: ctxt->instate = XML_PARSER_MISC;
8785: #ifdef DEBUG_PUSH
8786: fprintf(stderr, "PP: entering MISC\n");
8787: #endif
8788: } else {
8789: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8790: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8791: (!ctxt->disableSAX))
1.140 daniel 8792: ctxt->sax->startDocument(ctxt->userData);
8793: ctxt->instate = XML_PARSER_MISC;
8794: #ifdef DEBUG_PUSH
8795: fprintf(stderr, "PP: entering MISC\n");
8796: #endif
8797: }
8798: } else {
8799: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8800: ctxt->sax->setDocumentLocator(ctxt->userData,
8801: &xmlDefaultSAXLocator);
8802: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8803: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8804: (!ctxt->disableSAX))
1.140 daniel 8805: ctxt->sax->startDocument(ctxt->userData);
8806: ctxt->instate = XML_PARSER_MISC;
8807: #ifdef DEBUG_PUSH
8808: fprintf(stderr, "PP: entering MISC\n");
8809: #endif
8810: }
8811: break;
8812: case XML_PARSER_MISC:
8813: SKIP_BLANKS;
1.184 daniel 8814: if (ctxt->input->buf == NULL)
8815: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8816: else
1.184 daniel 8817: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8818: if (avail < 2)
8819: goto done;
1.184 daniel 8820: cur = ctxt->input->cur[0];
8821: next = ctxt->input->cur[1];
1.140 daniel 8822: if ((cur == '<') && (next == '?')) {
1.143 daniel 8823: if ((!terminate) &&
8824: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8825: goto done;
8826: #ifdef DEBUG_PUSH
8827: fprintf(stderr, "PP: Parsing PI\n");
8828: #endif
8829: xmlParsePI(ctxt);
8830: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8831: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 8832: if ((!terminate) &&
8833: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8834: goto done;
8835: #ifdef DEBUG_PUSH
8836: fprintf(stderr, "PP: Parsing Comment\n");
8837: #endif
8838: xmlParseComment(ctxt);
8839: ctxt->instate = XML_PARSER_MISC;
8840: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8841: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8842: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8843: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8844: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 8845: if ((!terminate) &&
8846: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8847: goto done;
8848: #ifdef DEBUG_PUSH
8849: fprintf(stderr, "PP: Parsing internal subset\n");
8850: #endif
1.166 daniel 8851: ctxt->inSubset = 1;
1.140 daniel 8852: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8853: if (RAW == '[') {
1.140 daniel 8854: ctxt->instate = XML_PARSER_DTD;
8855: #ifdef DEBUG_PUSH
8856: fprintf(stderr, "PP: entering DTD\n");
8857: #endif
8858: } else {
1.166 daniel 8859: /*
8860: * Create and update the external subset.
8861: */
8862: ctxt->inSubset = 2;
1.171 daniel 8863: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8864: (ctxt->sax->externalSubset != NULL))
8865: ctxt->sax->externalSubset(ctxt->userData,
8866: ctxt->intSubName, ctxt->extSubSystem,
8867: ctxt->extSubURI);
8868: ctxt->inSubset = 0;
1.140 daniel 8869: ctxt->instate = XML_PARSER_PROLOG;
8870: #ifdef DEBUG_PUSH
8871: fprintf(stderr, "PP: entering PROLOG\n");
8872: #endif
8873: }
8874: } else if ((cur == '<') && (next == '!') &&
8875: (avail < 9)) {
8876: goto done;
8877: } else {
8878: ctxt->instate = XML_PARSER_START_TAG;
8879: #ifdef DEBUG_PUSH
8880: fprintf(stderr, "PP: entering START_TAG\n");
8881: #endif
8882: }
8883: break;
1.128 daniel 8884: case XML_PARSER_PROLOG:
1.140 daniel 8885: SKIP_BLANKS;
1.184 daniel 8886: if (ctxt->input->buf == NULL)
8887: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8888: else
1.184 daniel 8889: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8890: if (avail < 2)
8891: goto done;
1.184 daniel 8892: cur = ctxt->input->cur[0];
8893: next = ctxt->input->cur[1];
1.140 daniel 8894: if ((cur == '<') && (next == '?')) {
1.143 daniel 8895: if ((!terminate) &&
8896: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8897: goto done;
8898: #ifdef DEBUG_PUSH
8899: fprintf(stderr, "PP: Parsing PI\n");
8900: #endif
8901: xmlParsePI(ctxt);
8902: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8903: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 8904: if ((!terminate) &&
8905: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8906: goto done;
8907: #ifdef DEBUG_PUSH
8908: fprintf(stderr, "PP: Parsing Comment\n");
8909: #endif
8910: xmlParseComment(ctxt);
8911: ctxt->instate = XML_PARSER_PROLOG;
8912: } else if ((cur == '<') && (next == '!') &&
8913: (avail < 4)) {
8914: goto done;
8915: } else {
8916: ctxt->instate = XML_PARSER_START_TAG;
8917: #ifdef DEBUG_PUSH
8918: fprintf(stderr, "PP: entering START_TAG\n");
8919: #endif
8920: }
8921: break;
8922: case XML_PARSER_EPILOG:
8923: SKIP_BLANKS;
1.184 daniel 8924: if (ctxt->input->buf == NULL)
8925: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8926: else
1.184 daniel 8927: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8928: if (avail < 2)
8929: goto done;
1.184 daniel 8930: cur = ctxt->input->cur[0];
8931: next = ctxt->input->cur[1];
1.140 daniel 8932: if ((cur == '<') && (next == '?')) {
1.143 daniel 8933: if ((!terminate) &&
8934: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8935: goto done;
8936: #ifdef DEBUG_PUSH
8937: fprintf(stderr, "PP: Parsing PI\n");
8938: #endif
8939: xmlParsePI(ctxt);
8940: ctxt->instate = XML_PARSER_EPILOG;
8941: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8942: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 8943: if ((!terminate) &&
8944: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8945: goto done;
8946: #ifdef DEBUG_PUSH
8947: fprintf(stderr, "PP: Parsing Comment\n");
8948: #endif
8949: xmlParseComment(ctxt);
8950: ctxt->instate = XML_PARSER_EPILOG;
8951: } else if ((cur == '<') && (next == '!') &&
8952: (avail < 4)) {
8953: goto done;
8954: } else {
8955: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8956: ctxt->sax->error(ctxt->userData,
8957: "Extra content at the end of the document\n");
8958: ctxt->wellFormed = 0;
1.180 daniel 8959: ctxt->disableSAX = 1;
1.140 daniel 8960: ctxt->errNo = XML_ERR_DOCUMENT_END;
8961: ctxt->instate = XML_PARSER_EOF;
8962: #ifdef DEBUG_PUSH
8963: fprintf(stderr, "PP: entering EOF\n");
8964: #endif
1.171 daniel 8965: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8966: (!ctxt->disableSAX))
1.140 daniel 8967: ctxt->sax->endDocument(ctxt->userData);
8968: goto done;
8969: }
8970: break;
8971: case XML_PARSER_START_TAG: {
8972: xmlChar *name, *oldname;
8973:
1.184 daniel 8974: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 8975: goto done;
1.184 daniel 8976: cur = ctxt->input->cur[0];
1.140 daniel 8977: if (cur != '<') {
8978: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8979: ctxt->sax->error(ctxt->userData,
8980: "Start tag expect, '<' not found\n");
8981: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8982: ctxt->wellFormed = 0;
1.180 daniel 8983: ctxt->disableSAX = 1;
1.140 daniel 8984: ctxt->instate = XML_PARSER_EOF;
8985: #ifdef DEBUG_PUSH
8986: fprintf(stderr, "PP: entering EOF\n");
8987: #endif
1.171 daniel 8988: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8989: (!ctxt->disableSAX))
1.140 daniel 8990: ctxt->sax->endDocument(ctxt->userData);
8991: goto done;
8992: }
1.143 daniel 8993: if ((!terminate) &&
8994: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8995: goto done;
1.176 daniel 8996: if (ctxt->spaceNr == 0)
8997: spacePush(ctxt, -1);
8998: else
8999: spacePush(ctxt, *ctxt->space);
1.140 daniel 9000: name = xmlParseStartTag(ctxt);
9001: if (name == NULL) {
1.176 daniel 9002: spacePop(ctxt);
1.140 daniel 9003: ctxt->instate = XML_PARSER_EOF;
9004: #ifdef DEBUG_PUSH
9005: fprintf(stderr, "PP: entering EOF\n");
9006: #endif
1.171 daniel 9007: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9008: (!ctxt->disableSAX))
1.140 daniel 9009: ctxt->sax->endDocument(ctxt->userData);
9010: goto done;
9011: }
9012: namePush(ctxt, xmlStrdup(name));
9013:
9014: /*
9015: * [ VC: Root Element Type ]
9016: * The Name in the document type declaration must match
9017: * the element type of the root element.
9018: */
9019: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 9020: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 9021: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9022:
9023: /*
9024: * Check for an Empty Element.
9025: */
1.152 daniel 9026: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 9027: SKIP(2);
1.171 daniel 9028: if ((ctxt->sax != NULL) &&
9029: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 9030: ctxt->sax->endElement(ctxt->userData, name);
9031: xmlFree(name);
9032: oldname = namePop(ctxt);
1.176 daniel 9033: spacePop(ctxt);
1.140 daniel 9034: if (oldname != NULL) {
9035: #ifdef DEBUG_STACK
9036: fprintf(stderr,"Close: popped %s\n", oldname);
9037: #endif
9038: xmlFree(oldname);
9039: }
9040: if (ctxt->name == NULL) {
9041: ctxt->instate = XML_PARSER_EPILOG;
9042: #ifdef DEBUG_PUSH
9043: fprintf(stderr, "PP: entering EPILOG\n");
9044: #endif
9045: } else {
9046: ctxt->instate = XML_PARSER_CONTENT;
9047: #ifdef DEBUG_PUSH
9048: fprintf(stderr, "PP: entering CONTENT\n");
9049: #endif
9050: }
9051: break;
9052: }
1.152 daniel 9053: if (RAW == '>') {
1.140 daniel 9054: NEXT;
9055: } else {
9056: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9057: ctxt->sax->error(ctxt->userData,
9058: "Couldn't find end of Start Tag %s\n",
9059: name);
9060: ctxt->wellFormed = 0;
1.180 daniel 9061: ctxt->disableSAX = 1;
1.140 daniel 9062: ctxt->errNo = XML_ERR_GT_REQUIRED;
9063:
9064: /*
9065: * end of parsing of this node.
9066: */
9067: nodePop(ctxt);
9068: oldname = namePop(ctxt);
1.176 daniel 9069: spacePop(ctxt);
1.140 daniel 9070: if (oldname != NULL) {
9071: #ifdef DEBUG_STACK
9072: fprintf(stderr,"Close: popped %s\n", oldname);
9073: #endif
9074: xmlFree(oldname);
9075: }
9076: }
9077: xmlFree(name);
9078: ctxt->instate = XML_PARSER_CONTENT;
9079: #ifdef DEBUG_PUSH
9080: fprintf(stderr, "PP: entering CONTENT\n");
9081: #endif
9082: break;
9083: }
1.128 daniel 9084: case XML_PARSER_CONTENT:
1.140 daniel 9085: /*
9086: * Handle preparsed entities and charRef
9087: */
9088: if (ctxt->token != 0) {
9089: xmlChar cur[2] = { 0 , 0 } ;
9090:
9091: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 9092: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9093: (ctxt->sax->characters != NULL))
1.140 daniel 9094: ctxt->sax->characters(ctxt->userData, cur, 1);
9095: ctxt->token = 0;
9096: }
1.184 daniel 9097: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9098: goto done;
1.184 daniel 9099: cur = ctxt->input->cur[0];
9100: next = ctxt->input->cur[1];
1.140 daniel 9101: if ((cur == '<') && (next == '?')) {
1.143 daniel 9102: if ((!terminate) &&
9103: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9104: goto done;
9105: #ifdef DEBUG_PUSH
9106: fprintf(stderr, "PP: Parsing PI\n");
9107: #endif
9108: xmlParsePI(ctxt);
9109: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9110: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9111: if ((!terminate) &&
9112: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9113: goto done;
9114: #ifdef DEBUG_PUSH
9115: fprintf(stderr, "PP: Parsing Comment\n");
9116: #endif
9117: xmlParseComment(ctxt);
9118: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 9119: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9120: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9121: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9122: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9123: (ctxt->input->cur[8] == '[')) {
1.140 daniel 9124: SKIP(9);
9125: ctxt->instate = XML_PARSER_CDATA_SECTION;
9126: #ifdef DEBUG_PUSH
9127: fprintf(stderr, "PP: entering CDATA_SECTION\n");
9128: #endif
9129: break;
9130: } else if ((cur == '<') && (next == '!') &&
9131: (avail < 9)) {
9132: goto done;
9133: } else if ((cur == '<') && (next == '/')) {
9134: ctxt->instate = XML_PARSER_END_TAG;
9135: #ifdef DEBUG_PUSH
9136: fprintf(stderr, "PP: entering END_TAG\n");
9137: #endif
9138: break;
9139: } else if (cur == '<') {
9140: ctxt->instate = XML_PARSER_START_TAG;
9141: #ifdef DEBUG_PUSH
9142: fprintf(stderr, "PP: entering START_TAG\n");
9143: #endif
9144: break;
9145: } else if (cur == '&') {
1.143 daniel 9146: if ((!terminate) &&
9147: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 9148: goto done;
9149: #ifdef DEBUG_PUSH
9150: fprintf(stderr, "PP: Parsing Reference\n");
9151: #endif
9152: /* TODO: check generation of subtrees if noent !!! */
9153: xmlParseReference(ctxt);
9154: } else {
1.156 daniel 9155: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9156: /*
1.181 daniel 9157: * Goal of the following test is:
1.140 daniel 9158: * - minimize calls to the SAX 'character' callback
9159: * when they are mergeable
9160: * - handle an problem for isBlank when we only parse
9161: * a sequence of blank chars and the next one is
9162: * not available to check against '<' presence.
9163: * - tries to homogenize the differences in SAX
9164: * callbacks beween the push and pull versions
9165: * of the parser.
9166: */
9167: if ((ctxt->inputNr == 1) &&
9168: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9169: if ((!terminate) &&
9170: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9171: goto done;
9172: }
9173: ctxt->checkIndex = 0;
9174: #ifdef DEBUG_PUSH
9175: fprintf(stderr, "PP: Parsing char data\n");
9176: #endif
9177: xmlParseCharData(ctxt, 0);
9178: }
9179: /*
9180: * Pop-up of finished entities.
9181: */
1.152 daniel 9182: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9183: xmlPopInput(ctxt);
9184: break;
9185: case XML_PARSER_CDATA_SECTION: {
9186: /*
9187: * The Push mode need to have the SAX callback for
9188: * cdataBlock merge back contiguous callbacks.
9189: */
9190: int base;
9191:
9192: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9193: if (base < 0) {
9194: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9195: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9196: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 9197: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 9198: XML_PARSER_BIG_BUFFER_SIZE);
9199: }
9200: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9201: ctxt->checkIndex = 0;
9202: }
9203: goto done;
9204: } else {
1.171 daniel 9205: if ((ctxt->sax != NULL) && (base > 0) &&
9206: (!ctxt->disableSAX)) {
1.140 daniel 9207: if (ctxt->sax->cdataBlock != NULL)
9208: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 9209: ctxt->input->cur, base);
1.140 daniel 9210: }
9211: SKIP(base + 3);
9212: ctxt->checkIndex = 0;
9213: ctxt->instate = XML_PARSER_CONTENT;
9214: #ifdef DEBUG_PUSH
9215: fprintf(stderr, "PP: entering CONTENT\n");
9216: #endif
9217: }
9218: break;
9219: }
1.141 daniel 9220: case XML_PARSER_END_TAG:
1.140 daniel 9221: if (avail < 2)
9222: goto done;
1.143 daniel 9223: if ((!terminate) &&
9224: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9225: goto done;
9226: xmlParseEndTag(ctxt);
9227: if (ctxt->name == NULL) {
9228: ctxt->instate = XML_PARSER_EPILOG;
9229: #ifdef DEBUG_PUSH
9230: fprintf(stderr, "PP: entering EPILOG\n");
9231: #endif
9232: } else {
9233: ctxt->instate = XML_PARSER_CONTENT;
9234: #ifdef DEBUG_PUSH
9235: fprintf(stderr, "PP: entering CONTENT\n");
9236: #endif
9237: }
9238: break;
9239: case XML_PARSER_DTD: {
9240: /*
9241: * Sorry but progressive parsing of the internal subset
9242: * is not expected to be supported. We first check that
9243: * the full content of the internal subset is available and
9244: * the parsing is launched only at that point.
9245: * Internal subset ends up with "']' S? '>'" in an unescaped
9246: * section and not in a ']]>' sequence which are conditional
9247: * sections (whoever argued to keep that crap in XML deserve
9248: * a place in hell !).
9249: */
9250: int base, i;
9251: xmlChar *buf;
9252: xmlChar quote = 0;
9253:
1.184 daniel 9254: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 9255: if (base < 0) return(0);
9256: if (ctxt->checkIndex > base)
9257: base = ctxt->checkIndex;
1.184 daniel 9258: buf = ctxt->input->buf->buffer->content;
9259: for (;base < ctxt->input->buf->buffer->use;base++) {
1.140 daniel 9260: if (quote != 0) {
9261: if (buf[base] == quote)
9262: quote = 0;
9263: continue;
9264: }
9265: if (buf[base] == '"') {
9266: quote = '"';
9267: continue;
9268: }
9269: if (buf[base] == '\'') {
9270: quote = '\'';
9271: continue;
9272: }
9273: if (buf[base] == ']') {
1.184 daniel 9274: if (base +1 >= ctxt->input->buf->buffer->use)
1.140 daniel 9275: break;
9276: if (buf[base + 1] == ']') {
9277: /* conditional crap, skip both ']' ! */
9278: base++;
9279: continue;
9280: }
1.184 daniel 9281: for (i = 0;base + i < ctxt->input->buf->buffer->use;i++) {
1.140 daniel 9282: if (buf[base + i] == '>')
9283: goto found_end_int_subset;
9284: }
9285: break;
9286: }
9287: }
9288: /*
9289: * We didn't found the end of the Internal subset
9290: */
9291: if (quote == 0)
9292: ctxt->checkIndex = base;
9293: #ifdef DEBUG_PUSH
9294: if (next == 0)
9295: fprintf(stderr, "PP: lookup of int subset end filed\n");
9296: #endif
9297: goto done;
9298:
9299: found_end_int_subset:
9300: xmlParseInternalSubset(ctxt);
1.166 daniel 9301: ctxt->inSubset = 2;
1.171 daniel 9302: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9303: (ctxt->sax->externalSubset != NULL))
9304: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9305: ctxt->extSubSystem, ctxt->extSubURI);
9306: ctxt->inSubset = 0;
1.140 daniel 9307: ctxt->instate = XML_PARSER_PROLOG;
9308: ctxt->checkIndex = 0;
9309: #ifdef DEBUG_PUSH
9310: fprintf(stderr, "PP: entering PROLOG\n");
9311: #endif
9312: break;
9313: }
9314: case XML_PARSER_COMMENT:
9315: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9316: ctxt->instate = XML_PARSER_CONTENT;
9317: #ifdef DEBUG_PUSH
9318: fprintf(stderr, "PP: entering CONTENT\n");
9319: #endif
9320: break;
9321: case XML_PARSER_PI:
9322: fprintf(stderr, "PP: internal error, state == PI\n");
9323: ctxt->instate = XML_PARSER_CONTENT;
9324: #ifdef DEBUG_PUSH
9325: fprintf(stderr, "PP: entering CONTENT\n");
9326: #endif
9327: break;
1.128 daniel 9328: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9329: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9330: ctxt->instate = XML_PARSER_DTD;
9331: #ifdef DEBUG_PUSH
9332: fprintf(stderr, "PP: entering DTD\n");
9333: #endif
9334: break;
1.128 daniel 9335: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9336: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9337: ctxt->instate = XML_PARSER_CONTENT;
9338: #ifdef DEBUG_PUSH
9339: fprintf(stderr, "PP: entering DTD\n");
9340: #endif
9341: break;
1.128 daniel 9342: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9343: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9344: ctxt->instate = XML_PARSER_START_TAG;
9345: #ifdef DEBUG_PUSH
9346: fprintf(stderr, "PP: entering START_TAG\n");
9347: #endif
9348: break;
9349: case XML_PARSER_SYSTEM_LITERAL:
9350: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9351: ctxt->instate = XML_PARSER_START_TAG;
9352: #ifdef DEBUG_PUSH
9353: fprintf(stderr, "PP: entering START_TAG\n");
9354: #endif
9355: break;
1.128 daniel 9356: }
9357: }
1.140 daniel 9358: done:
9359: #ifdef DEBUG_PUSH
9360: fprintf(stderr, "PP: done %d\n", ret);
9361: #endif
1.128 daniel 9362: return(ret);
9363: }
9364:
9365: /**
1.143 daniel 9366: * xmlParseTry:
9367: * @ctxt: an XML parser context
9368: *
9369: * Try to progress on parsing
9370: *
9371: * Returns zero if no parsing was possible
9372: */
9373: int
9374: xmlParseTry(xmlParserCtxtPtr ctxt) {
9375: return(xmlParseTryOrFinish(ctxt, 0));
9376: }
9377:
9378: /**
1.128 daniel 9379: * xmlParseChunk:
9380: * @ctxt: an XML parser context
9381: * @chunk: an char array
9382: * @size: the size in byte of the chunk
9383: * @terminate: last chunk indicator
9384: *
9385: * Parse a Chunk of memory
9386: *
9387: * Returns zero if no error, the xmlParserErrors otherwise.
9388: */
1.140 daniel 9389: int
1.128 daniel 9390: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9391: int terminate) {
1.132 daniel 9392: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9393: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9394: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9395: int cur = ctxt->input->cur - ctxt->input->base;
9396:
1.132 daniel 9397: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9398: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9399: ctxt->input->cur = ctxt->input->base + cur;
9400: #ifdef DEBUG_PUSH
9401: fprintf(stderr, "PP: pushed %d\n", size);
9402: #endif
9403:
1.150 daniel 9404: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9405: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9406: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9407: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9408: if (terminate) {
1.151 daniel 9409: /*
9410: * Grab the encoding if it was added on-the-fly
9411: */
9412: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
9413: (ctxt->myDoc->encoding == NULL)) {
9414: ctxt->myDoc->encoding = ctxt->encoding;
9415: ctxt->encoding = NULL;
9416: }
9417:
9418: /*
9419: * Check for termination
9420: */
1.140 daniel 9421: if ((ctxt->instate != XML_PARSER_EOF) &&
9422: (ctxt->instate != XML_PARSER_EPILOG)) {
9423: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9424: ctxt->sax->error(ctxt->userData,
9425: "Extra content at the end of the document\n");
9426: ctxt->wellFormed = 0;
1.180 daniel 9427: ctxt->disableSAX = 1;
1.140 daniel 9428: ctxt->errNo = XML_ERR_DOCUMENT_END;
9429: }
9430: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9431: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9432: (!ctxt->disableSAX))
1.140 daniel 9433: ctxt->sax->endDocument(ctxt->userData);
9434: }
9435: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9436: }
9437: return((xmlParserErrors) ctxt->errNo);
9438: }
9439:
9440: /************************************************************************
9441: * *
1.98 daniel 9442: * I/O front end functions to the parser *
9443: * *
9444: ************************************************************************/
9445:
1.50 daniel 9446: /**
1.181 daniel 9447: * xmlCreatePushParserCtxt:
1.140 daniel 9448: * @sax: a SAX handler
9449: * @user_data: The user data returned on SAX callbacks
9450: * @chunk: a pointer to an array of chars
9451: * @size: number of chars in the array
9452: * @filename: an optional file name or URI
9453: *
9454: * Create a parser context for using the XML parser in push mode
9455: * To allow content encoding detection, @size should be >= 4
9456: * The value of @filename is used for fetching external entities
9457: * and error/warning reports.
9458: *
9459: * Returns the new parser context or NULL
9460: */
9461: xmlParserCtxtPtr
9462: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9463: const char *chunk, int size, const char *filename) {
9464: xmlParserCtxtPtr ctxt;
9465: xmlParserInputPtr inputStream;
9466: xmlParserInputBufferPtr buf;
9467: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9468:
9469: /*
1.156 daniel 9470: * plug some encoding conversion routines
1.140 daniel 9471: */
9472: if ((chunk != NULL) && (size >= 4))
1.156 daniel 9473: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 9474:
9475: buf = xmlAllocParserInputBuffer(enc);
9476: if (buf == NULL) return(NULL);
9477:
9478: ctxt = xmlNewParserCtxt();
9479: if (ctxt == NULL) {
9480: xmlFree(buf);
9481: return(NULL);
9482: }
9483: if (sax != NULL) {
9484: if (ctxt->sax != &xmlDefaultSAXHandler)
9485: xmlFree(ctxt->sax);
9486: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9487: if (ctxt->sax == NULL) {
9488: xmlFree(buf);
9489: xmlFree(ctxt);
9490: return(NULL);
9491: }
9492: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9493: if (user_data != NULL)
9494: ctxt->userData = user_data;
9495: }
9496: if (filename == NULL) {
9497: ctxt->directory = NULL;
9498: } else {
9499: ctxt->directory = xmlParserGetDirectory(filename);
9500: }
9501:
9502: inputStream = xmlNewInputStream(ctxt);
9503: if (inputStream == NULL) {
9504: xmlFreeParserCtxt(ctxt);
9505: return(NULL);
9506: }
9507:
9508: if (filename == NULL)
9509: inputStream->filename = NULL;
9510: else
9511: inputStream->filename = xmlMemStrdup(filename);
9512: inputStream->buf = buf;
9513: inputStream->base = inputStream->buf->buffer->content;
9514: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 9515: if (enc != XML_CHAR_ENCODING_NONE) {
9516: xmlSwitchEncoding(ctxt, enc);
9517: }
1.140 daniel 9518:
9519: inputPush(ctxt, inputStream);
9520:
9521: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9522: (ctxt->input->buf != NULL)) {
9523: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9524: #ifdef DEBUG_PUSH
9525: fprintf(stderr, "PP: pushed %d\n", size);
9526: #endif
9527: }
9528:
9529: return(ctxt);
9530: }
9531:
9532: /**
1.181 daniel 9533: * xmlCreateDocParserCtxt:
1.123 daniel 9534: * @cur: a pointer to an array of xmlChar
1.50 daniel 9535: *
1.69 daniel 9536: * Create a parser context for an XML in-memory document.
9537: *
9538: * Returns the new parser context or NULL
1.16 daniel 9539: */
1.69 daniel 9540: xmlParserCtxtPtr
1.123 daniel 9541: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 9542: xmlParserCtxtPtr ctxt;
1.40 daniel 9543: xmlParserInputPtr input;
1.16 daniel 9544:
1.97 daniel 9545: ctxt = xmlNewParserCtxt();
1.16 daniel 9546: if (ctxt == NULL) {
9547: return(NULL);
9548: }
1.96 daniel 9549: input = xmlNewInputStream(ctxt);
1.40 daniel 9550: if (input == NULL) {
1.97 daniel 9551: xmlFreeParserCtxt(ctxt);
1.40 daniel 9552: return(NULL);
9553: }
9554:
9555: input->base = cur;
9556: input->cur = cur;
9557:
9558: inputPush(ctxt, input);
1.69 daniel 9559: return(ctxt);
9560: }
9561:
9562: /**
1.181 daniel 9563: * xmlSAXParseDoc:
1.69 daniel 9564: * @sax: the SAX handler block
1.123 daniel 9565: * @cur: a pointer to an array of xmlChar
1.69 daniel 9566: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9567: * documents
9568: *
9569: * parse an XML in-memory document and build a tree.
9570: * It use the given SAX function block to handle the parsing callback.
9571: * If sax is NULL, fallback to the default DOM tree building routines.
9572: *
9573: * Returns the resulting document tree
9574: */
9575:
9576: xmlDocPtr
1.123 daniel 9577: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 9578: xmlDocPtr ret;
9579: xmlParserCtxtPtr ctxt;
9580:
9581: if (cur == NULL) return(NULL);
1.16 daniel 9582:
9583:
1.69 daniel 9584: ctxt = xmlCreateDocParserCtxt(cur);
9585: if (ctxt == NULL) return(NULL);
1.74 daniel 9586: if (sax != NULL) {
9587: ctxt->sax = sax;
9588: ctxt->userData = NULL;
9589: }
1.69 daniel 9590:
1.16 daniel 9591: xmlParseDocument(ctxt);
1.72 daniel 9592: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9593: else {
9594: ret = NULL;
1.72 daniel 9595: xmlFreeDoc(ctxt->myDoc);
9596: ctxt->myDoc = NULL;
1.59 daniel 9597: }
1.86 daniel 9598: if (sax != NULL)
9599: ctxt->sax = NULL;
1.69 daniel 9600: xmlFreeParserCtxt(ctxt);
1.16 daniel 9601:
1.1 veillard 9602: return(ret);
9603: }
9604:
1.50 daniel 9605: /**
1.181 daniel 9606: * xmlParseDoc:
1.123 daniel 9607: * @cur: a pointer to an array of xmlChar
1.55 daniel 9608: *
9609: * parse an XML in-memory document and build a tree.
9610: *
1.68 daniel 9611: * Returns the resulting document tree
1.55 daniel 9612: */
9613:
1.69 daniel 9614: xmlDocPtr
1.123 daniel 9615: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9616: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9617: }
9618:
9619: /**
1.181 daniel 9620: * xmlSAXParseDTD:
1.76 daniel 9621: * @sax: the SAX handler block
9622: * @ExternalID: a NAME* containing the External ID of the DTD
9623: * @SystemID: a NAME* containing the URL to the DTD
9624: *
9625: * Load and parse an external subset.
9626: *
9627: * Returns the resulting xmlDtdPtr or NULL in case of error.
9628: */
9629:
9630: xmlDtdPtr
1.123 daniel 9631: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9632: const xmlChar *SystemID) {
1.76 daniel 9633: xmlDtdPtr ret = NULL;
9634: xmlParserCtxtPtr ctxt;
1.83 daniel 9635: xmlParserInputPtr input = NULL;
1.76 daniel 9636: xmlCharEncoding enc;
9637:
9638: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9639:
1.97 daniel 9640: ctxt = xmlNewParserCtxt();
1.76 daniel 9641: if (ctxt == NULL) {
9642: return(NULL);
9643: }
9644:
9645: /*
9646: * Set-up the SAX context
9647: */
9648: if (ctxt == NULL) return(NULL);
9649: if (sax != NULL) {
1.93 veillard 9650: if (ctxt->sax != NULL)
1.119 daniel 9651: xmlFree(ctxt->sax);
1.76 daniel 9652: ctxt->sax = sax;
9653: ctxt->userData = NULL;
9654: }
9655:
9656: /*
9657: * Ask the Entity resolver to load the damn thing
9658: */
9659:
9660: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9661: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9662: if (input == NULL) {
1.86 daniel 9663: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9664: xmlFreeParserCtxt(ctxt);
9665: return(NULL);
9666: }
9667:
9668: /*
1.156 daniel 9669: * plug some encoding conversion routines here.
1.76 daniel 9670: */
9671: xmlPushInput(ctxt, input);
1.156 daniel 9672: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9673: xmlSwitchEncoding(ctxt, enc);
9674:
1.95 veillard 9675: if (input->filename == NULL)
1.156 daniel 9676: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9677: input->line = 1;
9678: input->col = 1;
9679: input->base = ctxt->input->cur;
9680: input->cur = ctxt->input->cur;
9681: input->free = NULL;
9682:
9683: /*
9684: * let's parse that entity knowing it's an external subset.
9685: */
1.79 daniel 9686: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9687:
9688: if (ctxt->myDoc != NULL) {
9689: if (ctxt->wellFormed) {
9690: ret = ctxt->myDoc->intSubset;
9691: ctxt->myDoc->intSubset = NULL;
9692: } else {
9693: ret = NULL;
9694: }
9695: xmlFreeDoc(ctxt->myDoc);
9696: ctxt->myDoc = NULL;
9697: }
1.86 daniel 9698: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9699: xmlFreeParserCtxt(ctxt);
9700:
9701: return(ret);
9702: }
9703:
9704: /**
1.181 daniel 9705: * xmlParseDTD:
1.76 daniel 9706: * @ExternalID: a NAME* containing the External ID of the DTD
9707: * @SystemID: a NAME* containing the URL to the DTD
9708: *
9709: * Load and parse an external subset.
9710: *
9711: * Returns the resulting xmlDtdPtr or NULL in case of error.
9712: */
9713:
9714: xmlDtdPtr
1.123 daniel 9715: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9716: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9717: }
9718:
9719: /**
1.181 daniel 9720: * xmlSAXParseBalancedChunk:
1.144 daniel 9721: * @ctx: an XML parser context (possibly NULL)
9722: * @sax: the SAX handler bloc (possibly NULL)
9723: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9724: * @input: a parser input stream
9725: * @enc: the encoding
9726: *
9727: * Parse a well-balanced chunk of an XML document
9728: * The user has to provide SAX callback block whose routines will be
9729: * called by the parser
9730: * The allowed sequence for the Well Balanced Chunk is the one defined by
9731: * the content production in the XML grammar:
9732: *
9733: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9734: *
1.176 daniel 9735: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 9736: * the error code otherwise
9737: */
9738:
9739: int
9740: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
9741: void *user_data, xmlParserInputPtr input,
9742: xmlCharEncoding enc) {
9743: xmlParserCtxtPtr ctxt;
9744: int ret;
9745:
9746: if (input == NULL) return(-1);
9747:
9748: if (ctx != NULL)
9749: ctxt = ctx;
9750: else {
9751: ctxt = xmlNewParserCtxt();
9752: if (ctxt == NULL)
9753: return(-1);
9754: if (sax == NULL)
9755: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9756: }
9757:
9758: /*
9759: * Set-up the SAX context
9760: */
9761: if (sax != NULL) {
9762: if (ctxt->sax != NULL)
9763: xmlFree(ctxt->sax);
9764: ctxt->sax = sax;
9765: ctxt->userData = user_data;
9766: }
9767:
9768: /*
9769: * plug some encoding conversion routines here.
9770: */
9771: xmlPushInput(ctxt, input);
9772: if (enc != XML_CHAR_ENCODING_NONE)
9773: xmlSwitchEncoding(ctxt, enc);
9774:
9775: /*
9776: * let's parse that entity knowing it's an external subset.
9777: */
9778: xmlParseContent(ctxt);
9779: ret = ctxt->errNo;
9780:
9781: if (ctx == NULL) {
9782: if (sax != NULL)
9783: ctxt->sax = NULL;
9784: else
9785: xmlFreeDoc(ctxt->myDoc);
9786: xmlFreeParserCtxt(ctxt);
9787: }
9788: return(ret);
9789: }
9790:
9791: /**
1.181 daniel 9792: * xmlParseExternalEntity:
9793: * @doc: the document the chunk pertains to
9794: * @sax: the SAX handler bloc (possibly NULL)
9795: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 9796: * @depth: Used for loop detection, use 0
1.181 daniel 9797: * @URL: the URL for the entity to load
9798: * @ID: the System ID for the entity to load
9799: * @list: the return value for the set of parsed nodes
9800: *
9801: * Parse an external general entity
9802: * An external general parsed entity is well-formed if it matches the
9803: * production labeled extParsedEnt.
9804: *
9805: * [78] extParsedEnt ::= TextDecl? content
9806: *
9807: * Returns 0 if the entity is well formed, -1 in case of args problem and
9808: * the parser error code otherwise
9809: */
9810:
9811: int
9812: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 9813: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 9814: xmlParserCtxtPtr ctxt;
9815: xmlDocPtr newDoc;
9816: xmlSAXHandlerPtr oldsax = NULL;
9817: int ret = 0;
9818:
1.185 daniel 9819: if (depth > 40) {
9820: return(XML_ERR_ENTITY_LOOP);
9821: }
9822:
9823:
1.181 daniel 9824:
9825: if (list != NULL)
9826: *list = NULL;
9827: if ((URL == NULL) && (ID == NULL))
9828: return(-1);
9829:
9830:
9831: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
9832: if (ctxt == NULL) return(-1);
9833: ctxt->userData = ctxt;
9834: if (sax != NULL) {
9835: oldsax = ctxt->sax;
9836: ctxt->sax = sax;
9837: if (user_data != NULL)
9838: ctxt->userData = user_data;
9839: }
9840: newDoc = xmlNewDoc(BAD_CAST "1.0");
9841: if (newDoc == NULL) {
9842: xmlFreeParserCtxt(ctxt);
9843: return(-1);
9844: }
9845: if (doc != NULL) {
9846: newDoc->intSubset = doc->intSubset;
9847: newDoc->extSubset = doc->extSubset;
9848: }
9849: if (doc->URL != NULL) {
9850: newDoc->URL = xmlStrdup(doc->URL);
9851: }
9852: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9853: if (newDoc->children == NULL) {
9854: if (sax != NULL)
9855: ctxt->sax = oldsax;
9856: xmlFreeParserCtxt(ctxt);
9857: newDoc->intSubset = NULL;
9858: newDoc->extSubset = NULL;
9859: xmlFreeDoc(newDoc);
9860: return(-1);
9861: }
9862: nodePush(ctxt, newDoc->children);
9863: if (doc == NULL) {
9864: ctxt->myDoc = newDoc;
9865: } else {
9866: ctxt->myDoc = doc;
9867: newDoc->children->doc = doc;
9868: }
9869:
9870: /*
9871: * Parse a possible text declaration first
9872: */
9873: GROW;
9874: if ((RAW == '<') && (NXT(1) == '?') &&
9875: (NXT(2) == 'x') && (NXT(3) == 'm') &&
9876: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9877: xmlParseTextDecl(ctxt);
9878: }
9879:
9880: /*
9881: * Doing validity checking on chunk doesn't make sense
9882: */
9883: ctxt->instate = XML_PARSER_CONTENT;
9884: ctxt->validate = 0;
1.185 daniel 9885: ctxt->depth = depth;
1.181 daniel 9886:
9887: xmlParseContent(ctxt);
9888:
9889: if ((RAW == '<') && (NXT(1) == '/')) {
9890: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9891: ctxt->sax->error(ctxt->userData,
9892: "chunk is not well balanced\n");
9893: ctxt->wellFormed = 0;
9894: ctxt->disableSAX = 1;
9895: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9896: } else if (RAW != 0) {
9897: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9898: ctxt->sax->error(ctxt->userData,
9899: "extra content at the end of well balanced chunk\n");
9900: ctxt->wellFormed = 0;
9901: ctxt->disableSAX = 1;
9902: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9903: }
9904: if (ctxt->node != newDoc->children) {
9905: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9906: ctxt->sax->error(ctxt->userData,
9907: "chunk is not well balanced\n");
9908: ctxt->wellFormed = 0;
9909: ctxt->disableSAX = 1;
9910: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9911: }
9912:
9913: if (!ctxt->wellFormed) {
9914: if (ctxt->errNo == 0)
9915: ret = 1;
9916: else
9917: ret = ctxt->errNo;
9918: } else {
9919: if (list != NULL) {
9920: xmlNodePtr cur;
9921:
9922: /*
9923: * Return the newly created nodeset after unlinking it from
9924: * they pseudo parent.
9925: */
9926: cur = newDoc->children->children;
9927: *list = cur;
9928: while (cur != NULL) {
9929: cur->parent = NULL;
9930: cur = cur->next;
9931: }
9932: newDoc->children->children = NULL;
9933: }
9934: ret = 0;
9935: }
9936: if (sax != NULL)
9937: ctxt->sax = oldsax;
9938: xmlFreeParserCtxt(ctxt);
9939: newDoc->intSubset = NULL;
9940: newDoc->extSubset = NULL;
9941: xmlFreeDoc(newDoc);
9942:
9943: return(ret);
9944: }
9945:
9946: /**
9947: * xmlParseBalancedChunk:
1.176 daniel 9948: * @doc: the document the chunk pertains to
9949: * @sax: the SAX handler bloc (possibly NULL)
9950: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 9951: * @depth: Used for loop detection, use 0
1.176 daniel 9952: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9953: * @list: the return value for the set of parsed nodes
9954: *
9955: * Parse a well-balanced chunk of an XML document
9956: * called by the parser
9957: * The allowed sequence for the Well Balanced Chunk is the one defined by
9958: * the content production in the XML grammar:
1.144 daniel 9959: *
1.175 daniel 9960: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9961: *
1.176 daniel 9962: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9963: * the parser error code otherwise
1.144 daniel 9964: */
9965:
1.175 daniel 9966: int
9967: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 9968: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 9969: xmlParserCtxtPtr ctxt;
1.175 daniel 9970: xmlDocPtr newDoc;
1.181 daniel 9971: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 9972: int size;
1.176 daniel 9973: int ret = 0;
1.175 daniel 9974:
1.185 daniel 9975: if (depth > 40) {
9976: return(XML_ERR_ENTITY_LOOP);
9977: }
9978:
1.175 daniel 9979:
1.176 daniel 9980: if (list != NULL)
9981: *list = NULL;
9982: if (string == NULL)
9983: return(-1);
9984:
9985: size = xmlStrlen(string);
9986:
1.183 daniel 9987: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 9988: if (ctxt == NULL) return(-1);
9989: ctxt->userData = ctxt;
1.175 daniel 9990: if (sax != NULL) {
1.176 daniel 9991: oldsax = ctxt->sax;
9992: ctxt->sax = sax;
9993: if (user_data != NULL)
9994: ctxt->userData = user_data;
1.175 daniel 9995: }
9996: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 9997: if (newDoc == NULL) {
9998: xmlFreeParserCtxt(ctxt);
9999: return(-1);
10000: }
1.175 daniel 10001: if (doc != NULL) {
10002: newDoc->intSubset = doc->intSubset;
10003: newDoc->extSubset = doc->extSubset;
10004: }
1.176 daniel 10005: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10006: if (newDoc->children == NULL) {
10007: if (sax != NULL)
10008: ctxt->sax = oldsax;
10009: xmlFreeParserCtxt(ctxt);
10010: newDoc->intSubset = NULL;
10011: newDoc->extSubset = NULL;
10012: xmlFreeDoc(newDoc);
10013: return(-1);
10014: }
10015: nodePush(ctxt, newDoc->children);
10016: if (doc == NULL) {
10017: ctxt->myDoc = newDoc;
10018: } else {
10019: ctxt->myDoc = doc;
10020: newDoc->children->doc = doc;
10021: }
10022: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 10023: ctxt->depth = depth;
1.176 daniel 10024:
10025: /*
10026: * Doing validity checking on chunk doesn't make sense
10027: */
10028: ctxt->validate = 0;
10029:
1.175 daniel 10030: xmlParseContent(ctxt);
1.176 daniel 10031:
10032: if ((RAW == '<') && (NXT(1) == '/')) {
10033: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10034: ctxt->sax->error(ctxt->userData,
10035: "chunk is not well balanced\n");
10036: ctxt->wellFormed = 0;
1.180 daniel 10037: ctxt->disableSAX = 1;
1.176 daniel 10038: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10039: } else if (RAW != 0) {
10040: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10041: ctxt->sax->error(ctxt->userData,
10042: "extra content at the end of well balanced chunk\n");
10043: ctxt->wellFormed = 0;
1.180 daniel 10044: ctxt->disableSAX = 1;
1.176 daniel 10045: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10046: }
10047: if (ctxt->node != newDoc->children) {
10048: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10049: ctxt->sax->error(ctxt->userData,
10050: "chunk is not well balanced\n");
10051: ctxt->wellFormed = 0;
1.180 daniel 10052: ctxt->disableSAX = 1;
1.176 daniel 10053: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10054: }
1.175 daniel 10055:
1.176 daniel 10056: if (!ctxt->wellFormed) {
10057: if (ctxt->errNo == 0)
10058: ret = 1;
10059: else
10060: ret = ctxt->errNo;
10061: } else {
10062: if (list != NULL) {
10063: xmlNodePtr cur;
1.175 daniel 10064:
1.176 daniel 10065: /*
10066: * Return the newly created nodeset after unlinking it from
10067: * they pseudo parent.
10068: */
10069: cur = newDoc->children->children;
10070: *list = cur;
10071: while (cur != NULL) {
10072: cur->parent = NULL;
10073: cur = cur->next;
10074: }
10075: newDoc->children->children = NULL;
10076: }
10077: ret = 0;
1.175 daniel 10078: }
1.176 daniel 10079: if (sax != NULL)
10080: ctxt->sax = oldsax;
1.175 daniel 10081: xmlFreeParserCtxt(ctxt);
10082: newDoc->intSubset = NULL;
10083: newDoc->extSubset = NULL;
1.176 daniel 10084: xmlFreeDoc(newDoc);
1.175 daniel 10085:
1.176 daniel 10086: return(ret);
1.144 daniel 10087: }
10088:
10089: /**
1.181 daniel 10090: * xmlParseBalancedChunkFile:
1.144 daniel 10091: * @doc: the document the chunk pertains to
10092: *
10093: * Parse a well-balanced chunk of an XML document contained in a file
10094: *
10095: * Returns the resulting list of nodes resulting from the parsing,
10096: * they are not added to @node
10097: */
10098:
10099: xmlNodePtr
10100: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 10101: /* TODO !!! */
10102: return(NULL);
1.144 daniel 10103: }
10104:
10105: /**
1.181 daniel 10106: * xmlRecoverDoc:
1.123 daniel 10107: * @cur: a pointer to an array of xmlChar
1.59 daniel 10108: *
10109: * parse an XML in-memory document and build a tree.
10110: * In the case the document is not Well Formed, a tree is built anyway
10111: *
1.68 daniel 10112: * Returns the resulting document tree
1.59 daniel 10113: */
10114:
1.69 daniel 10115: xmlDocPtr
1.123 daniel 10116: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 10117: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 10118: }
10119:
10120: /**
1.181 daniel 10121: * xmlCreateEntityParserCtxt:
10122: * @URL: the entity URL
10123: * @ID: the entity PUBLIC ID
10124: * @base: a posible base for the target URI
10125: *
10126: * Create a parser context for an external entity
10127: * Automatic support for ZLIB/Compress compressed document is provided
10128: * by default if found at compile-time.
10129: *
10130: * Returns the new parser context or NULL
10131: */
10132: xmlParserCtxtPtr
10133: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10134: const xmlChar *base) {
10135: xmlParserCtxtPtr ctxt;
10136: xmlParserInputPtr inputStream;
10137: char *directory = NULL;
10138:
10139: ctxt = xmlNewParserCtxt();
10140: if (ctxt == NULL) {
10141: return(NULL);
10142: }
10143:
1.182 daniel 10144: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
1.181 daniel 10145: if (inputStream == NULL) {
10146: xmlFreeParserCtxt(ctxt);
10147: return(NULL);
10148: }
10149:
10150: inputPush(ctxt, inputStream);
10151:
10152: if ((ctxt->directory == NULL) && (directory == NULL))
1.182 daniel 10153: directory = xmlParserGetDirectory((char *)URL);
1.181 daniel 10154: if ((ctxt->directory == NULL) && (directory != NULL))
10155: ctxt->directory = directory;
10156:
10157: return(ctxt);
10158: }
10159:
10160: /**
10161: * xmlCreateFileParserCtxt:
1.50 daniel 10162: * @filename: the filename
10163: *
1.69 daniel 10164: * Create a parser context for a file content.
10165: * Automatic support for ZLIB/Compress compressed document is provided
10166: * by default if found at compile-time.
1.50 daniel 10167: *
1.69 daniel 10168: * Returns the new parser context or NULL
1.9 httpng 10169: */
1.69 daniel 10170: xmlParserCtxtPtr
10171: xmlCreateFileParserCtxt(const char *filename)
10172: {
10173: xmlParserCtxtPtr ctxt;
1.40 daniel 10174: xmlParserInputPtr inputStream;
1.91 daniel 10175: xmlParserInputBufferPtr buf;
1.111 daniel 10176: char *directory = NULL;
1.9 httpng 10177:
1.91 daniel 10178: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10179: if (buf == NULL) return(NULL);
1.9 httpng 10180:
1.97 daniel 10181: ctxt = xmlNewParserCtxt();
1.16 daniel 10182: if (ctxt == NULL) {
10183: return(NULL);
10184: }
1.97 daniel 10185:
1.96 daniel 10186: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10187: if (inputStream == NULL) {
1.97 daniel 10188: xmlFreeParserCtxt(ctxt);
1.40 daniel 10189: return(NULL);
10190: }
10191:
1.119 daniel 10192: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10193: inputStream->buf = buf;
10194: inputStream->base = inputStream->buf->buffer->content;
10195: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10196:
1.40 daniel 10197: inputPush(ctxt, inputStream);
1.110 daniel 10198: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10199: directory = xmlParserGetDirectory(filename);
10200: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10201: ctxt->directory = directory;
1.106 daniel 10202:
1.69 daniel 10203: return(ctxt);
10204: }
10205:
10206: /**
1.181 daniel 10207: * xmlSAXParseFile:
1.69 daniel 10208: * @sax: the SAX handler block
10209: * @filename: the filename
10210: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10211: * documents
10212: *
10213: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10214: * compressed document is provided by default if found at compile-time.
10215: * It use the given SAX function block to handle the parsing callback.
10216: * If sax is NULL, fallback to the default DOM tree building routines.
10217: *
10218: * Returns the resulting document tree
10219: */
10220:
1.79 daniel 10221: xmlDocPtr
10222: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10223: int recovery) {
10224: xmlDocPtr ret;
10225: xmlParserCtxtPtr ctxt;
1.111 daniel 10226: char *directory = NULL;
1.69 daniel 10227:
10228: ctxt = xmlCreateFileParserCtxt(filename);
10229: if (ctxt == NULL) return(NULL);
1.74 daniel 10230: if (sax != NULL) {
1.93 veillard 10231: if (ctxt->sax != NULL)
1.119 daniel 10232: xmlFree(ctxt->sax);
1.74 daniel 10233: ctxt->sax = sax;
10234: ctxt->userData = NULL;
10235: }
1.106 daniel 10236:
1.110 daniel 10237: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10238: directory = xmlParserGetDirectory(filename);
10239: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 10240: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 10241:
10242: xmlParseDocument(ctxt);
1.40 daniel 10243:
1.72 daniel 10244: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10245: else {
10246: ret = NULL;
1.72 daniel 10247: xmlFreeDoc(ctxt->myDoc);
10248: ctxt->myDoc = NULL;
1.59 daniel 10249: }
1.86 daniel 10250: if (sax != NULL)
10251: ctxt->sax = NULL;
1.69 daniel 10252: xmlFreeParserCtxt(ctxt);
1.20 daniel 10253:
10254: return(ret);
10255: }
10256:
1.55 daniel 10257: /**
1.181 daniel 10258: * xmlParseFile:
1.55 daniel 10259: * @filename: the filename
10260: *
10261: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10262: * compressed document is provided by default if found at compile-time.
10263: *
1.68 daniel 10264: * Returns the resulting document tree
1.55 daniel 10265: */
10266:
1.79 daniel 10267: xmlDocPtr
10268: xmlParseFile(const char *filename) {
1.59 daniel 10269: return(xmlSAXParseFile(NULL, filename, 0));
10270: }
10271:
10272: /**
1.181 daniel 10273: * xmlRecoverFile:
1.59 daniel 10274: * @filename: the filename
10275: *
10276: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10277: * compressed document is provided by default if found at compile-time.
10278: * In the case the document is not Well Formed, a tree is built anyway
10279: *
1.68 daniel 10280: * Returns the resulting document tree
1.59 daniel 10281: */
10282:
1.79 daniel 10283: xmlDocPtr
10284: xmlRecoverFile(const char *filename) {
1.59 daniel 10285: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 10286: }
1.32 daniel 10287:
1.50 daniel 10288: /**
1.181 daniel 10289: * xmlCreateMemoryParserCtxt:
10290: * @buffer: a pointer to a zero terminated char array
10291: * @size: the size of the array (without the trailing 0)
1.50 daniel 10292: *
1.69 daniel 10293: * Create a parser context for an XML in-memory document.
1.50 daniel 10294: *
1.69 daniel 10295: * Returns the new parser context or NULL
1.20 daniel 10296: */
1.69 daniel 10297: xmlParserCtxtPtr
10298: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 10299: xmlParserCtxtPtr ctxt;
1.40 daniel 10300: xmlParserInputPtr input;
10301:
1.179 daniel 10302: if (buffer[size] != 0)
1.181 daniel 10303: return(NULL);
1.40 daniel 10304:
1.97 daniel 10305: ctxt = xmlNewParserCtxt();
1.181 daniel 10306: if (ctxt == NULL)
1.20 daniel 10307: return(NULL);
1.97 daniel 10308:
1.96 daniel 10309: input = xmlNewInputStream(ctxt);
1.40 daniel 10310: if (input == NULL) {
1.97 daniel 10311: xmlFreeParserCtxt(ctxt);
1.40 daniel 10312: return(NULL);
10313: }
1.20 daniel 10314:
1.40 daniel 10315: input->filename = NULL;
10316: input->line = 1;
10317: input->col = 1;
1.96 daniel 10318: input->buf = NULL;
1.91 daniel 10319: input->consumed = 0;
1.75 daniel 10320:
1.116 daniel 10321: input->base = BAD_CAST buffer;
10322: input->cur = BAD_CAST buffer;
1.69 daniel 10323: input->free = NULL;
1.20 daniel 10324:
1.40 daniel 10325: inputPush(ctxt, input);
1.69 daniel 10326: return(ctxt);
10327: }
10328:
10329: /**
1.181 daniel 10330: * xmlSAXParseMemory:
1.69 daniel 10331: * @sax: the SAX handler block
10332: * @buffer: an pointer to a char array
1.127 daniel 10333: * @size: the size of the array
10334: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 10335: * documents
10336: *
10337: * parse an XML in-memory block and use the given SAX function block
10338: * to handle the parsing callback. If sax is NULL, fallback to the default
10339: * DOM tree building routines.
10340: *
10341: * Returns the resulting document tree
10342: */
10343: xmlDocPtr
10344: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
10345: xmlDocPtr ret;
10346: xmlParserCtxtPtr ctxt;
10347:
10348: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10349: if (ctxt == NULL) return(NULL);
1.74 daniel 10350: if (sax != NULL) {
10351: ctxt->sax = sax;
10352: ctxt->userData = NULL;
10353: }
1.20 daniel 10354:
10355: xmlParseDocument(ctxt);
1.40 daniel 10356:
1.72 daniel 10357: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10358: else {
10359: ret = NULL;
1.72 daniel 10360: xmlFreeDoc(ctxt->myDoc);
10361: ctxt->myDoc = NULL;
1.59 daniel 10362: }
1.86 daniel 10363: if (sax != NULL)
10364: ctxt->sax = NULL;
1.69 daniel 10365: xmlFreeParserCtxt(ctxt);
1.16 daniel 10366:
1.9 httpng 10367: return(ret);
1.17 daniel 10368: }
10369:
1.55 daniel 10370: /**
1.181 daniel 10371: * xmlParseMemory:
1.68 daniel 10372: * @buffer: an pointer to a char array
1.55 daniel 10373: * @size: the size of the array
10374: *
10375: * parse an XML in-memory block and build a tree.
10376: *
1.68 daniel 10377: * Returns the resulting document tree
1.55 daniel 10378: */
10379:
10380: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 10381: return(xmlSAXParseMemory(NULL, buffer, size, 0));
10382: }
10383:
10384: /**
1.181 daniel 10385: * xmlRecoverMemory:
1.68 daniel 10386: * @buffer: an pointer to a char array
1.59 daniel 10387: * @size: the size of the array
10388: *
10389: * parse an XML in-memory block and build a tree.
10390: * In the case the document is not Well Formed, a tree is built anyway
10391: *
1.68 daniel 10392: * Returns the resulting document tree
1.59 daniel 10393: */
10394:
10395: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
10396: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 10397: }
10398:
10399:
1.50 daniel 10400: /**
10401: * xmlSetupParserForBuffer:
10402: * @ctxt: an XML parser context
1.123 daniel 10403: * @buffer: a xmlChar * buffer
1.50 daniel 10404: * @filename: a file name
10405: *
1.19 daniel 10406: * Setup the parser context to parse a new buffer; Clears any prior
10407: * contents from the parser context. The buffer parameter must not be
10408: * NULL, but the filename parameter can be
10409: */
1.55 daniel 10410: void
1.123 daniel 10411: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 10412: const char* filename)
10413: {
1.96 daniel 10414: xmlParserInputPtr input;
1.40 daniel 10415:
1.96 daniel 10416: input = xmlNewInputStream(ctxt);
10417: if (input == NULL) {
10418: perror("malloc");
1.119 daniel 10419: xmlFree(ctxt);
1.145 daniel 10420: return;
1.96 daniel 10421: }
10422:
10423: xmlClearParserCtxt(ctxt);
10424: if (filename != NULL)
1.119 daniel 10425: input->filename = xmlMemStrdup(filename);
1.96 daniel 10426: input->base = buffer;
10427: input->cur = buffer;
10428: inputPush(ctxt, input);
1.17 daniel 10429: }
10430:
1.123 daniel 10431: /**
10432: * xmlSAXUserParseFile:
10433: * @sax: a SAX handler
10434: * @user_data: The user data returned on SAX callbacks
10435: * @filename: a file name
10436: *
10437: * parse an XML file and call the given SAX handler routines.
10438: * Automatic support for ZLIB/Compress compressed document is provided
10439: *
10440: * Returns 0 in case of success or a error number otherwise
10441: */
1.131 daniel 10442: int
10443: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10444: const char *filename) {
1.123 daniel 10445: int ret = 0;
10446: xmlParserCtxtPtr ctxt;
10447:
10448: ctxt = xmlCreateFileParserCtxt(filename);
10449: if (ctxt == NULL) return -1;
1.134 daniel 10450: if (ctxt->sax != &xmlDefaultSAXHandler)
10451: xmlFree(ctxt->sax);
1.123 daniel 10452: ctxt->sax = sax;
1.140 daniel 10453: if (user_data != NULL)
10454: ctxt->userData = user_data;
1.123 daniel 10455:
10456: xmlParseDocument(ctxt);
10457:
10458: if (ctxt->wellFormed)
10459: ret = 0;
10460: else {
10461: if (ctxt->errNo != 0)
10462: ret = ctxt->errNo;
10463: else
10464: ret = -1;
10465: }
10466: if (sax != NULL)
10467: ctxt->sax = NULL;
10468: xmlFreeParserCtxt(ctxt);
10469:
10470: return ret;
10471: }
10472:
10473: /**
10474: * xmlSAXUserParseMemory:
10475: * @sax: a SAX handler
10476: * @user_data: The user data returned on SAX callbacks
10477: * @buffer: an in-memory XML document input
1.127 daniel 10478: * @size: the length of the XML document in bytes
1.123 daniel 10479: *
10480: * A better SAX parsing routine.
10481: * parse an XML in-memory buffer and call the given SAX handler routines.
10482: *
10483: * Returns 0 in case of success or a error number otherwise
10484: */
10485: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10486: char *buffer, int size) {
10487: int ret = 0;
10488: xmlParserCtxtPtr ctxt;
10489:
10490: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10491: if (ctxt == NULL) return -1;
10492: ctxt->sax = sax;
10493: ctxt->userData = user_data;
10494:
10495: xmlParseDocument(ctxt);
10496:
10497: if (ctxt->wellFormed)
10498: ret = 0;
10499: else {
10500: if (ctxt->errNo != 0)
10501: ret = ctxt->errNo;
10502: else
10503: ret = -1;
10504: }
10505: if (sax != NULL)
10506: ctxt->sax = NULL;
10507: xmlFreeParserCtxt(ctxt);
10508:
10509: return ret;
10510: }
10511:
1.32 daniel 10512:
1.98 daniel 10513: /************************************************************************
10514: * *
1.127 daniel 10515: * Miscellaneous *
1.98 daniel 10516: * *
10517: ************************************************************************/
10518:
1.132 daniel 10519: /**
10520: * xmlCleanupParser:
10521: *
10522: * Cleanup function for the XML parser. It tries to reclaim all
10523: * parsing related global memory allocated for the parser processing.
10524: * It doesn't deallocate any document related memory. Calling this
10525: * function should not prevent reusing the parser.
10526: */
10527:
10528: void
10529: xmlCleanupParser(void) {
10530: xmlCleanupCharEncodingHandlers();
1.133 daniel 10531: xmlCleanupPredefinedEntities();
1.132 daniel 10532: }
1.98 daniel 10533:
1.50 daniel 10534: /**
10535: * xmlParserFindNodeInfo:
10536: * @ctxt: an XML parser context
10537: * @node: an XML node within the tree
10538: *
10539: * Find the parser node info struct for a given node
10540: *
1.68 daniel 10541: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 10542: */
10543: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
10544: const xmlNode* node)
10545: {
10546: unsigned long pos;
10547:
10548: /* Find position where node should be at */
10549: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
10550: if ( ctx->node_seq.buffer[pos].node == node )
10551: return &ctx->node_seq.buffer[pos];
10552: else
10553: return NULL;
10554: }
10555:
10556:
1.50 daniel 10557: /**
1.181 daniel 10558: * xmlInitNodeInfoSeq:
1.50 daniel 10559: * @seq: a node info sequence pointer
10560: *
10561: * -- Initialize (set to initial state) node info sequence
1.32 daniel 10562: */
1.55 daniel 10563: void
10564: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10565: {
10566: seq->length = 0;
10567: seq->maximum = 0;
10568: seq->buffer = NULL;
10569: }
10570:
1.50 daniel 10571: /**
1.181 daniel 10572: * xmlClearNodeInfoSeq:
1.50 daniel 10573: * @seq: a node info sequence pointer
10574: *
10575: * -- Clear (release memory and reinitialize) node
1.32 daniel 10576: * info sequence
10577: */
1.55 daniel 10578: void
10579: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10580: {
10581: if ( seq->buffer != NULL )
1.119 daniel 10582: xmlFree(seq->buffer);
1.32 daniel 10583: xmlInitNodeInfoSeq(seq);
10584: }
10585:
10586:
1.50 daniel 10587: /**
10588: * xmlParserFindNodeInfoIndex:
10589: * @seq: a node info sequence pointer
10590: * @node: an XML node pointer
10591: *
10592: *
1.32 daniel 10593: * xmlParserFindNodeInfoIndex : Find the index that the info record for
10594: * the given node is or should be at in a sorted sequence
1.68 daniel 10595: *
10596: * Returns a long indicating the position of the record
1.32 daniel 10597: */
10598: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
10599: const xmlNode* node)
10600: {
10601: unsigned long upper, lower, middle;
10602: int found = 0;
10603:
10604: /* Do a binary search for the key */
10605: lower = 1;
10606: upper = seq->length;
10607: middle = 0;
10608: while ( lower <= upper && !found) {
10609: middle = lower + (upper - lower) / 2;
10610: if ( node == seq->buffer[middle - 1].node )
10611: found = 1;
10612: else if ( node < seq->buffer[middle - 1].node )
10613: upper = middle - 1;
10614: else
10615: lower = middle + 1;
10616: }
10617:
10618: /* Return position */
10619: if ( middle == 0 || seq->buffer[middle - 1].node < node )
10620: return middle;
10621: else
10622: return middle - 1;
10623: }
10624:
10625:
1.50 daniel 10626: /**
10627: * xmlParserAddNodeInfo:
10628: * @ctxt: an XML parser context
1.68 daniel 10629: * @info: a node info sequence pointer
1.50 daniel 10630: *
10631: * Insert node info record into the sorted sequence
1.32 daniel 10632: */
1.55 daniel 10633: void
10634: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 10635: const xmlParserNodeInfo* info)
1.32 daniel 10636: {
10637: unsigned long pos;
10638: static unsigned int block_size = 5;
10639:
10640: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 10641: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
10642: if ( pos < ctxt->node_seq.length
10643: && ctxt->node_seq.buffer[pos].node == info->node ) {
10644: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 10645: }
10646:
10647: /* Otherwise, we need to add new node to buffer */
10648: else {
10649: /* Expand buffer by 5 if needed */
1.55 daniel 10650: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 10651: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 10652: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
10653: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 10654:
1.55 daniel 10655: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 10656: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 10657: else
1.119 daniel 10658: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 10659:
10660: if ( tmp_buffer == NULL ) {
1.55 daniel 10661: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 10662: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 10663: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 10664: return;
10665: }
1.55 daniel 10666: ctxt->node_seq.buffer = tmp_buffer;
10667: ctxt->node_seq.maximum += block_size;
1.32 daniel 10668: }
10669:
10670: /* If position is not at end, move elements out of the way */
1.55 daniel 10671: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 10672: unsigned long i;
10673:
1.55 daniel 10674: for ( i = ctxt->node_seq.length; i > pos; i-- )
10675: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 10676: }
10677:
10678: /* Copy element and increase length */
1.55 daniel 10679: ctxt->node_seq.buffer[pos] = *info;
10680: ctxt->node_seq.length++;
1.32 daniel 10681: }
10682: }
1.77 daniel 10683:
1.98 daniel 10684:
10685: /**
1.181 daniel 10686: * xmlSubstituteEntitiesDefault:
1.98 daniel 10687: * @val: int 0 or 1
10688: *
10689: * Set and return the previous value for default entity support.
10690: * Initially the parser always keep entity references instead of substituting
10691: * entity values in the output. This function has to be used to change the
10692: * default parser behaviour
10693: * SAX::subtituteEntities() has to be used for changing that on a file by
10694: * file basis.
10695: *
10696: * Returns the last value for 0 for no substitution, 1 for substitution.
10697: */
10698:
10699: int
10700: xmlSubstituteEntitiesDefault(int val) {
10701: int old = xmlSubstituteEntitiesDefaultValue;
10702:
10703: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 10704: return(old);
10705: }
10706:
10707: /**
10708: * xmlKeepBlanksDefault:
10709: * @val: int 0 or 1
10710: *
10711: * Set and return the previous value for default blanks text nodes support.
10712: * The 1.x version of the parser used an heuristic to try to detect
10713: * ignorable white spaces. As a result the SAX callback was generating
10714: * ignorableWhitespace() callbacks instead of characters() one, and when
10715: * using the DOM output text nodes containing those blanks were not generated.
10716: * The 2.x and later version will switch to the XML standard way and
10717: * ignorableWhitespace() are only generated when running the parser in
10718: * validating mode and when the current element doesn't allow CDATA or
10719: * mixed content.
10720: * This function is provided as a way to force the standard behaviour
10721: * on 1.X libs and to switch back to the old mode for compatibility when
10722: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10723: * by using xmlIsBlankNode() commodity function to detect the "empty"
10724: * nodes generated.
10725: * This value also affect autogeneration of indentation when saving code
10726: * if blanks sections are kept, indentation is not generated.
10727: *
10728: * Returns the last value for 0 for no substitution, 1 for substitution.
10729: */
10730:
10731: int
10732: xmlKeepBlanksDefault(int val) {
10733: int old = xmlKeepBlanksDefaultValue;
10734:
10735: xmlKeepBlanksDefaultValue = val;
10736: xmlIndentTreeOutput = !val;
1.98 daniel 10737: return(old);
10738: }
1.77 daniel 10739:
Webmaster