Annotation of XML/parser.c, revision 1.177
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.119 daniel 36: #include "xmlmemory.h"
1.14 veillard 37: #include "tree.h"
1.1 veillard 38: #include "parser.h"
1.14 veillard 39: #include "entities.h"
1.75 daniel 40: #include "encoding.h"
1.61 daniel 41: #include "valid.h"
1.69 daniel 42: #include "parserInternals.h"
1.91 daniel 43: #include "xmlIO.h"
1.122 daniel 44: #include "xml-error.h"
1.1 veillard 45:
1.140 daniel 46: #define XML_PARSER_BIG_BUFFER_SIZE 1000
47: #define XML_PARSER_BUFFER_SIZE 100
48:
1.86 daniel 49: const char *xmlParserVersion = LIBXML_VERSION;
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.91 daniel 65: /************************************************************************
66: * *
67: * Input handling functions for progressive parsing *
68: * *
69: ************************************************************************/
70:
71: /* #define DEBUG_INPUT */
1.140 daniel 72: /* #define DEBUG_STACK */
73: /* #define DEBUG_PUSH */
74:
1.91 daniel 75:
1.110 daniel 76: #define INPUT_CHUNK 250
77: /* we need to keep enough input to show errors in context */
78: #define LINE_LEN 80
1.91 daniel 79:
80: #ifdef DEBUG_INPUT
81: #define CHECK_BUFFER(in) check_buffer(in)
82:
83: void check_buffer(xmlParserInputPtr in) {
84: if (in->base != in->buf->buffer->content) {
85: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
86: }
87: if (in->cur < in->base) {
88: fprintf(stderr, "xmlParserInput: cur < base problem\n");
89: }
90: if (in->cur > in->base + in->buf->buffer->use) {
91: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
92: }
93: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
94: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
95: in->buf->buffer->use, in->buf->buffer->size);
96: }
97:
1.110 daniel 98: #else
99: #define CHECK_BUFFER(in)
100: #endif
101:
1.91 daniel 102:
103: /**
104: * xmlParserInputRead:
105: * @in: an XML parser input
106: * @len: an indicative size for the lookahead
107: *
108: * This function refresh the input for the parser. It doesn't try to
109: * preserve pointers to the input buffer, and discard already read data
110: *
1.123 daniel 111: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 112: * end of this entity
113: */
114: int
115: xmlParserInputRead(xmlParserInputPtr in, int len) {
116: int ret;
117: int used;
118: int index;
119:
120: #ifdef DEBUG_INPUT
121: fprintf(stderr, "Read\n");
122: #endif
123: if (in->buf == NULL) return(-1);
124: if (in->base == NULL) return(-1);
125: if (in->cur == NULL) return(-1);
126: if (in->buf->buffer == NULL) return(-1);
127:
128: CHECK_BUFFER(in);
129:
130: used = in->cur - in->buf->buffer->content;
131: ret = xmlBufferShrink(in->buf->buffer, used);
132: if (ret > 0) {
133: in->cur -= ret;
134: in->consumed += ret;
135: }
136: ret = xmlParserInputBufferRead(in->buf, len);
137: if (in->base != in->buf->buffer->content) {
138: /*
139: * the buffer has been realloced
140: */
141: index = in->cur - in->base;
142: in->base = in->buf->buffer->content;
143: in->cur = &in->buf->buffer->content[index];
144: }
145:
146: CHECK_BUFFER(in);
147:
148: return(ret);
149: }
150:
151: /**
152: * xmlParserInputGrow:
153: * @in: an XML parser input
154: * @len: an indicative size for the lookahead
155: *
156: * This function increase the input for the parser. It tries to
157: * preserve pointers to the input buffer, and keep already read data
158: *
1.123 daniel 159: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 160: * end of this entity
161: */
162: int
163: xmlParserInputGrow(xmlParserInputPtr in, int len) {
164: int ret;
165: int index;
166:
167: #ifdef DEBUG_INPUT
168: fprintf(stderr, "Grow\n");
169: #endif
170: if (in->buf == NULL) return(-1);
171: if (in->base == NULL) return(-1);
172: if (in->cur == NULL) return(-1);
173: if (in->buf->buffer == NULL) return(-1);
174:
175: CHECK_BUFFER(in);
176:
177: index = in->cur - in->base;
178: if (in->buf->buffer->use > index + INPUT_CHUNK) {
179:
180: CHECK_BUFFER(in);
181:
182: return(0);
183: }
1.148 daniel 184: if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) ||
185: (in->buf->file != NULL) ||
1.140 daniel 186: #ifdef HAVE_ZLIB_H
187: (in->buf->gzfile != NULL) ||
188: #endif
189: (in->buf->fd >= 0))
190: ret = xmlParserInputBufferGrow(in->buf, len);
191: else
192: return(0);
1.135 daniel 193:
194: /*
195: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
196: * block, but we use it really as an integer to do some
197: * pointer arithmetic. Insure will raise it as a bug but in
198: * that specific case, that's not !
199: */
1.91 daniel 200: if (in->base != in->buf->buffer->content) {
201: /*
202: * the buffer has been realloced
203: */
204: index = in->cur - in->base;
205: in->base = in->buf->buffer->content;
206: in->cur = &in->buf->buffer->content[index];
207: }
208:
209: CHECK_BUFFER(in);
210:
211: return(ret);
212: }
213:
214: /**
215: * xmlParserInputShrink:
216: * @in: an XML parser input
217: *
218: * This function removes used input for the parser.
219: */
220: void
221: xmlParserInputShrink(xmlParserInputPtr in) {
222: int used;
223: int ret;
224: int index;
225:
226: #ifdef DEBUG_INPUT
227: fprintf(stderr, "Shrink\n");
228: #endif
229: if (in->buf == NULL) return;
230: if (in->base == NULL) return;
231: if (in->cur == NULL) return;
232: if (in->buf->buffer == NULL) return;
233:
234: CHECK_BUFFER(in);
235:
236: used = in->cur - in->buf->buffer->content;
237: if (used > INPUT_CHUNK) {
1.110 daniel 238: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 239: if (ret > 0) {
240: in->cur -= ret;
241: in->consumed += ret;
242: }
243: }
244:
245: CHECK_BUFFER(in);
246:
247: if (in->buf->buffer->use > INPUT_CHUNK) {
248: return;
249: }
250: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
251: if (in->base != in->buf->buffer->content) {
252: /*
253: * the buffer has been realloced
254: */
255: index = in->cur - in->base;
256: in->base = in->buf->buffer->content;
257: in->cur = &in->buf->buffer->content[index];
258: }
259:
260: CHECK_BUFFER(in);
261: }
262:
1.45 daniel 263: /************************************************************************
264: * *
265: * Parser stacks related functions and macros *
266: * *
267: ************************************************************************/
1.79 daniel 268:
269: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 270: int xmlDoValidityCheckingDefaultValue = 0;
1.135 daniel 271: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
272: const xmlChar ** str);
1.79 daniel 273:
1.1 veillard 274: /*
1.40 daniel 275: * Generic function for accessing stacks in the Parser Context
1.1 veillard 276: */
277:
1.140 daniel 278: #define PUSH_AND_POP(scope, type, name) \
279: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 280: if (ctxt->name##Nr >= ctxt->name##Max) { \
281: ctxt->name##Max *= 2; \
1.119 daniel 282: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 283: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
284: if (ctxt->name##Tab == NULL) { \
1.31 daniel 285: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 286: return(0); \
1.31 daniel 287: } \
288: } \
1.40 daniel 289: ctxt->name##Tab[ctxt->name##Nr] = value; \
290: ctxt->name = value; \
291: return(ctxt->name##Nr++); \
1.31 daniel 292: } \
1.140 daniel 293: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 294: type ret; \
1.40 daniel 295: if (ctxt->name##Nr <= 0) return(0); \
296: ctxt->name##Nr--; \
1.50 daniel 297: if (ctxt->name##Nr > 0) \
298: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
299: else \
300: ctxt->name = NULL; \
1.69 daniel 301: ret = ctxt->name##Tab[ctxt->name##Nr]; \
302: ctxt->name##Tab[ctxt->name##Nr] = 0; \
303: return(ret); \
1.31 daniel 304: } \
305:
1.140 daniel 306: PUSH_AND_POP(extern, xmlParserInputPtr, input)
307: PUSH_AND_POP(extern, xmlNodePtr, node)
308: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 309:
1.176 daniel 310: int spacePush(xmlParserCtxtPtr ctxt, int val) {
311: if (ctxt->spaceNr >= ctxt->spaceMax) {
312: ctxt->spaceMax *= 2;
313: ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab,
314: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
315: if (ctxt->spaceTab == NULL) {
316: fprintf(stderr, "realloc failed !\n");
317: return(0);
318: }
319: }
320: ctxt->spaceTab[ctxt->spaceNr] = val;
321: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
322: return(ctxt->spaceNr++);
323: }
324:
325: int spacePop(xmlParserCtxtPtr ctxt) {
326: int ret;
327: if (ctxt->spaceNr <= 0) return(0);
328: ctxt->spaceNr--;
329: if (ctxt->spaceNr > 0)
330: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
331: else
332: ctxt->space = NULL;
333: ret = ctxt->spaceTab[ctxt->spaceNr];
334: ctxt->spaceTab[ctxt->spaceNr] = -1;
335: return(ret);
336: }
337:
1.55 daniel 338: /*
339: * Macros for accessing the content. Those should be used only by the parser,
340: * and not exported.
341: *
342: * Dirty macros, i.e. one need to make assumption on the context to use them
343: *
1.123 daniel 344: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 345: * To be used with extreme caution since operations consuming
346: * characters may move the input buffer to a different location !
1.123 daniel 347: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 348: * in ISO-Latin or UTF-8.
1.151 daniel 349: * This should be used internally by the parser
1.55 daniel 350: * only to compare to ASCII values otherwise it would break when
351: * running with UTF-8 encoding.
1.123 daniel 352: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 353: * to compare on ASCII based substring.
1.123 daniel 354: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 355: * strings within the parser.
356: *
1.77 daniel 357: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 358: *
359: * NEXT Skip to the next character, this does the proper decoding
360: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 361: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 362: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 363: */
1.45 daniel 364:
1.152 daniel 365: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 366: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 367: #define NXT(val) ctxt->input->cur[(val)]
368: #define CUR_PTR ctxt->input->cur
1.154 daniel 369:
1.164 daniel 370: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
371: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 372: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
373: if ((*ctxt->input->cur == 0) && \
374: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
375: xmlPopInput(ctxt)
1.164 daniel 376:
1.97 daniel 377: #define SHRINK xmlParserInputShrink(ctxt->input); \
378: if ((*ctxt->input->cur == 0) && \
379: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
380: xmlPopInput(ctxt)
381:
382: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
383: if ((*ctxt->input->cur == 0) && \
384: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
385: xmlPopInput(ctxt)
1.55 daniel 386:
1.155 daniel 387: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 388:
1.151 daniel 389: #define NEXT xmlNextChar(ctxt);
1.154 daniel 390:
1.153 daniel 391: #define NEXTL(l) \
392: if (*(ctxt->input->cur) == '\n') { \
393: ctxt->input->line++; ctxt->input->col = 1; \
394: } else ctxt->input->col++; \
1.154 daniel 395: ctxt->token = 0; ctxt->input->cur += l; \
396: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
397: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
398:
1.152 daniel 399: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 400: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 401:
1.152 daniel 402: #define COPY_BUF(l,b,i,v) \
403: if (l == 1) b[i++] = (xmlChar) v; \
404: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 405:
406: /**
407: * xmlNextChar:
408: * @ctxt: the XML parser context
409: *
410: * Skip to the next char input char.
411: */
1.55 daniel 412:
1.151 daniel 413: void
414: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.176 daniel 415: /*
416: * TODO: 2.11 End-of-Line Handling
417: * the literal two-character sequence "#xD#xA" or a standalone
418: * literal #xD, an XML processor must pass to the application
419: * the single character #xA.
420: */
1.151 daniel 421: if (ctxt->token != 0) ctxt->token = 0;
422: else {
423: if ((*ctxt->input->cur == 0) &&
424: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
425: (ctxt->instate != XML_PARSER_COMMENT)) {
426: /*
427: * If we are at the end of the current entity and
428: * the context allows it, we pop consumed entities
429: * automatically.
430: * TODO: the auto closing should be blocked in other cases
431: */
432: xmlPopInput(ctxt);
433: } else {
434: if (*(ctxt->input->cur) == '\n') {
435: ctxt->input->line++; ctxt->input->col = 1;
436: } else ctxt->input->col++;
437: if (ctxt->encoding == NULL) {
438: /*
439: * We are supposed to handle UTF8, check it's valid
440: * From rfc2044: encoding of the Unicode values on UTF-8:
441: *
442: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
443: * 0000 0000-0000 007F 0xxxxxxx
444: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
445: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
446: *
1.160 daniel 447: * Check for the 0x110000 limit too
1.151 daniel 448: */
449: const unsigned char *cur = ctxt->input->cur;
450: unsigned char c;
1.91 daniel 451:
1.151 daniel 452: c = *cur;
453: if (c & 0x80) {
454: if (cur[1] == 0)
455: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
456: if ((cur[1] & 0xc0) != 0x80)
457: goto encoding_error;
458: if ((c & 0xe0) == 0xe0) {
459: unsigned int val;
460:
461: if (cur[2] == 0)
462: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
463: if ((cur[2] & 0xc0) != 0x80)
464: goto encoding_error;
465: if ((c & 0xf0) == 0xf0) {
466: if (cur[3] == 0)
467: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
468: if (((c & 0xf8) != 0xf0) ||
469: ((cur[3] & 0xc0) != 0x80))
470: goto encoding_error;
471: /* 4-byte code */
472: ctxt->input->cur += 4;
473: val = (cur[0] & 0x7) << 18;
474: val |= (cur[1] & 0x3f) << 12;
475: val |= (cur[2] & 0x3f) << 6;
476: val |= cur[3] & 0x3f;
477: } else {
478: /* 3-byte code */
479: ctxt->input->cur += 3;
480: val = (cur[0] & 0xf) << 12;
481: val |= (cur[1] & 0x3f) << 6;
482: val |= cur[2] & 0x3f;
483: }
484: if (((val > 0xd7ff) && (val < 0xe000)) ||
485: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 486: (val >= 0x110000)) {
1.151 daniel 487: if ((ctxt->sax != NULL) &&
488: (ctxt->sax->error != NULL))
489: ctxt->sax->error(ctxt->userData,
490: "Char out of allowed range\n");
491: ctxt->errNo = XML_ERR_INVALID_ENCODING;
492: ctxt->wellFormed = 0;
493: }
494: } else
495: /* 2-byte code */
496: ctxt->input->cur += 2;
497: } else
498: /* 1-byte code */
499: ctxt->input->cur++;
500: } else {
501: /*
502: * Assume it's a fixed lenght encoding (1) with
503: * a compatibke encoding for the ASCII set, since
504: * XML constructs only use < 128 chars
505: */
506: ctxt->input->cur++;
507: }
508: ctxt->nbChars++;
509: if (*ctxt->input->cur == 0)
510: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
511: }
512: }
1.154 daniel 513: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
514: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 515: if ((*ctxt->input->cur == 0) &&
516: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
517: xmlPopInput(ctxt);
1.151 daniel 518: return;
519: encoding_error:
520: /*
521: * If we detect an UTF8 error that probably mean that the
522: * input encoding didn't get properly advertized in the
523: * declaration header. Report the error and switch the encoding
524: * to ISO-Latin-1 (if you don't like this policy, just declare the
525: * encoding !)
526: */
527: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
528: ctxt->sax->error(ctxt->userData,
529: "Input is not proper UTF-8, indicate encoding !\n");
530: ctxt->errNo = XML_ERR_INVALID_ENCODING;
531:
532: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
533: ctxt->input->cur++;
534: return;
535: }
1.42 daniel 536:
1.152 daniel 537: /**
538: * xmlCurrentChar:
539: * @ctxt: the XML parser context
540: * @len: pointer to the length of the char read
541: *
542: * The current char value, if using UTF-8 this may actaully span multiple
543: * bytes in the input buffer.
544: *
545: * Returns the current char value and its lenght
546: */
547:
548: int
549: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
550: if (ctxt->token != 0) {
551: *len = 0;
552: return(ctxt->token);
553: }
554: if (ctxt->encoding == NULL) {
555: /*
556: * We are supposed to handle UTF8, check it's valid
557: * From rfc2044: encoding of the Unicode values on UTF-8:
558: *
559: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
560: * 0000 0000-0000 007F 0xxxxxxx
561: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
562: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
563: *
1.160 daniel 564: * Check for the 0x110000 limit too
1.152 daniel 565: */
566: const unsigned char *cur = ctxt->input->cur;
567: unsigned char c;
568: unsigned int val;
569:
570: c = *cur;
571: if (c & 0x80) {
572: if (cur[1] == 0)
573: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
574: if ((cur[1] & 0xc0) != 0x80)
575: goto encoding_error;
576: if ((c & 0xe0) == 0xe0) {
577:
578: if (cur[2] == 0)
579: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
580: if ((cur[2] & 0xc0) != 0x80)
581: goto encoding_error;
582: if ((c & 0xf0) == 0xf0) {
583: if (cur[3] == 0)
584: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
585: if (((c & 0xf8) != 0xf0) ||
586: ((cur[3] & 0xc0) != 0x80))
587: goto encoding_error;
588: /* 4-byte code */
589: *len = 4;
590: val = (cur[0] & 0x7) << 18;
591: val |= (cur[1] & 0x3f) << 12;
592: val |= (cur[2] & 0x3f) << 6;
593: val |= cur[3] & 0x3f;
594: } else {
595: /* 3-byte code */
596: *len = 3;
597: val = (cur[0] & 0xf) << 12;
598: val |= (cur[1] & 0x3f) << 6;
599: val |= cur[2] & 0x3f;
600: }
601: } else {
602: /* 2-byte code */
603: *len = 2;
604: val = (cur[0] & 0x1f) << 6;
1.168 daniel 605: val |= cur[1] & 0x3f;
1.152 daniel 606: }
607: if (!IS_CHAR(val)) {
608: if ((ctxt->sax != NULL) &&
609: (ctxt->sax->error != NULL))
610: ctxt->sax->error(ctxt->userData,
611: "Char out of allowed range\n");
612: ctxt->errNo = XML_ERR_INVALID_ENCODING;
613: ctxt->wellFormed = 0;
614: }
615: return(val);
616: } else {
617: /* 1-byte code */
618: *len = 1;
619: return((int) *ctxt->input->cur);
620: }
621: }
622: /*
623: * Assume it's a fixed lenght encoding (1) with
624: * a compatibke encoding for the ASCII set, since
625: * XML constructs only use < 128 chars
626: */
627: *len = 1;
628: return((int) *ctxt->input->cur);
629: encoding_error:
630: /*
631: * If we detect an UTF8 error that probably mean that the
632: * input encoding didn't get properly advertized in the
633: * declaration header. Report the error and switch the encoding
634: * to ISO-Latin-1 (if you don't like this policy, just declare the
635: * encoding !)
636: */
637: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
638: ctxt->sax->error(ctxt->userData,
639: "Input is not proper UTF-8, indicate encoding !\n");
640: ctxt->errNo = XML_ERR_INVALID_ENCODING;
641:
642: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
643: *len = 1;
644: return((int) *ctxt->input->cur);
645: }
646:
647: /**
1.162 daniel 648: * xmlStringCurrentChar:
649: * @ctxt: the XML parser context
650: * @cur: pointer to the beginning of the char
651: * @len: pointer to the length of the char read
652: *
653: * The current char value, if using UTF-8 this may actaully span multiple
654: * bytes in the input buffer.
655: *
656: * Returns the current char value and its lenght
657: */
658:
659: int
660: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
661: if (ctxt->encoding == NULL) {
662: /*
663: * We are supposed to handle UTF8, check it's valid
664: * From rfc2044: encoding of the Unicode values on UTF-8:
665: *
666: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
667: * 0000 0000-0000 007F 0xxxxxxx
668: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
669: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
670: *
671: * Check for the 0x110000 limit too
672: */
673: unsigned char c;
674: unsigned int val;
675:
676: c = *cur;
677: if (c & 0x80) {
678: if ((cur[1] & 0xc0) != 0x80)
679: goto encoding_error;
680: if ((c & 0xe0) == 0xe0) {
681:
682: if ((cur[2] & 0xc0) != 0x80)
683: goto encoding_error;
684: if ((c & 0xf0) == 0xf0) {
685: if (((c & 0xf8) != 0xf0) ||
686: ((cur[3] & 0xc0) != 0x80))
687: goto encoding_error;
688: /* 4-byte code */
689: *len = 4;
690: val = (cur[0] & 0x7) << 18;
691: val |= (cur[1] & 0x3f) << 12;
692: val |= (cur[2] & 0x3f) << 6;
693: val |= cur[3] & 0x3f;
694: } else {
695: /* 3-byte code */
696: *len = 3;
697: val = (cur[0] & 0xf) << 12;
698: val |= (cur[1] & 0x3f) << 6;
699: val |= cur[2] & 0x3f;
700: }
701: } else {
702: /* 2-byte code */
703: *len = 2;
704: val = (cur[0] & 0x1f) << 6;
705: val |= cur[2] & 0x3f;
706: }
707: if (!IS_CHAR(val)) {
708: if ((ctxt->sax != NULL) &&
709: (ctxt->sax->error != NULL))
710: ctxt->sax->error(ctxt->userData,
711: "Char out of allowed range\n");
712: ctxt->errNo = XML_ERR_INVALID_ENCODING;
713: ctxt->wellFormed = 0;
714: }
715: return(val);
716: } else {
717: /* 1-byte code */
718: *len = 1;
719: return((int) *cur);
720: }
721: }
722: /*
723: * Assume it's a fixed lenght encoding (1) with
724: * a compatibke encoding for the ASCII set, since
725: * XML constructs only use < 128 chars
726: */
727: *len = 1;
728: return((int) *cur);
729: encoding_error:
730: /*
731: * If we detect an UTF8 error that probably mean that the
732: * input encoding didn't get properly advertized in the
733: * declaration header. Report the error and switch the encoding
734: * to ISO-Latin-1 (if you don't like this policy, just declare the
735: * encoding !)
736: */
737: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
738: ctxt->sax->error(ctxt->userData,
739: "Input is not proper UTF-8, indicate encoding !\n");
740: ctxt->errNo = XML_ERR_INVALID_ENCODING;
741:
742: *len = 1;
743: return((int) *cur);
744: }
745:
746: /**
1.152 daniel 747: * xmlCopyChar:
748: * @len: pointer to the length of the char read (or zero)
749: * @array: pointer to an arry of xmlChar
750: * @val: the char value
751: *
752: * append the char value in the array
753: *
754: * Returns the number of xmlChar written
755: */
756:
757: int
758: xmlCopyChar(int len, xmlChar *out, int val) {
759: /*
760: * We are supposed to handle UTF8, check it's valid
761: * From rfc2044: encoding of the Unicode values on UTF-8:
762: *
763: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
764: * 0000 0000-0000 007F 0xxxxxxx
765: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
766: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
767: */
768: if (len == 0) {
769: if (val < 0) len = 0;
1.160 daniel 770: else if (val < 0x80) len = 1;
771: else if (val < 0x800) len = 2;
772: else if (val < 0x10000) len = 3;
773: else if (val < 0x110000) len = 4;
1.152 daniel 774: if (len == 0) {
775: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
776: val);
777: return(0);
778: }
779: }
780: if (len > 1) {
781: int bits;
782:
783: if (val < 0x80) { *out++= val; bits= -6; }
784: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
785: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
786: else { *out++= (val >> 18) | 0xF0; bits= 12; }
787:
788: for ( ; bits >= 0; bits-= 6)
789: *out++= ((val >> bits) & 0x3F) | 0x80 ;
790:
791: return(len);
792: }
793: *out = (xmlChar) val;
794: return(1);
1.155 daniel 795: }
796:
797: /**
798: * xmlSkipBlankChars:
799: * @ctxt: the XML parser context
800: *
801: * skip all blanks character found at that point in the input streams.
802: * It pops up finished entities in the process if allowable at that point.
803: *
804: * Returns the number of space chars skipped
805: */
806:
807: int
808: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
809: int cur, res = 0;
810:
811: do {
812: cur = CUR;
813: while (IS_BLANK(cur)) {
814: NEXT;
815: cur = CUR;
816: res++;
817: }
818: while ((cur == 0) && (ctxt->inputNr > 1) &&
819: (ctxt->instate != XML_PARSER_COMMENT)) {
820: xmlPopInput(ctxt);
821: cur = CUR;
822: }
823: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
824: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
825: } while (IS_BLANK(cur));
826: return(res);
1.152 daniel 827: }
828:
1.97 daniel 829: /************************************************************************
830: * *
831: * Commodity functions to handle entities processing *
832: * *
833: ************************************************************************/
1.40 daniel 834:
1.50 daniel 835: /**
836: * xmlPopInput:
837: * @ctxt: an XML parser context
838: *
1.40 daniel 839: * xmlPopInput: the current input pointed by ctxt->input came to an end
840: * pop it and return the next char.
1.45 daniel 841: *
1.123 daniel 842: * Returns the current xmlChar in the parser context
1.40 daniel 843: */
1.123 daniel 844: xmlChar
1.55 daniel 845: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 846: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 847: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 848: if ((*ctxt->input->cur == 0) &&
849: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
850: return(xmlPopInput(ctxt));
1.40 daniel 851: return(CUR);
852: }
853:
1.50 daniel 854: /**
855: * xmlPushInput:
856: * @ctxt: an XML parser context
857: * @input: an XML parser input fragment (entity, XML fragment ...).
858: *
1.40 daniel 859: * xmlPushInput: switch to a new input stream which is stacked on top
860: * of the previous one(s).
861: */
1.55 daniel 862: void
863: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 864: if (input == NULL) return;
865: inputPush(ctxt, input);
1.164 daniel 866: GROW;
1.40 daniel 867: }
868:
1.50 daniel 869: /**
1.69 daniel 870: * xmlFreeInputStream:
1.127 daniel 871: * @input: an xmlParserInputPtr
1.69 daniel 872: *
873: * Free up an input stream.
874: */
875: void
876: xmlFreeInputStream(xmlParserInputPtr input) {
877: if (input == NULL) return;
878:
1.119 daniel 879: if (input->filename != NULL) xmlFree((char *) input->filename);
880: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 881: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 882: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 883: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 884: input->free((xmlChar *) input->base);
1.93 veillard 885: if (input->buf != NULL)
886: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 887: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 888: xmlFree(input);
1.69 daniel 889: }
890:
891: /**
1.96 daniel 892: * xmlNewInputStream:
893: * @ctxt: an XML parser context
894: *
895: * Create a new input stream structure
896: * Returns the new input stream or NULL
897: */
898: xmlParserInputPtr
899: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
900: xmlParserInputPtr input;
901:
1.119 daniel 902: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 903: if (input == NULL) {
1.123 daniel 904: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 905: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 906: ctxt->sax->error(ctxt->userData,
907: "malloc: couldn't allocate a new input stream\n");
1.123 daniel 908: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 909: return(NULL);
910: }
1.165 daniel 911: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 912: input->line = 1;
913: input->col = 1;
1.167 daniel 914: input->standalone = -1;
1.96 daniel 915: return(input);
916: }
917:
918: /**
1.50 daniel 919: * xmlNewEntityInputStream:
920: * @ctxt: an XML parser context
921: * @entity: an Entity pointer
922: *
1.82 daniel 923: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 924: *
925: * Returns the new input stream or NULL
1.45 daniel 926: */
1.50 daniel 927: xmlParserInputPtr
928: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 929: xmlParserInputPtr input;
930:
931: if (entity == NULL) {
1.123 daniel 932: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 934: ctxt->sax->error(ctxt->userData,
1.45 daniel 935: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 936: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 937: return(NULL);
1.45 daniel 938: }
939: if (entity->content == NULL) {
1.159 daniel 940: switch (entity->etype) {
1.113 daniel 941: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 942: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
944: ctxt->sax->error(ctxt->userData,
945: "xmlNewEntityInputStream unparsed entity !\n");
946: break;
947: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
948: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 949: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 950: (char *) entity->ExternalID, ctxt));
1.113 daniel 951: case XML_INTERNAL_GENERAL_ENTITY:
952: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
953: ctxt->sax->error(ctxt->userData,
954: "Internal entity %s without content !\n", entity->name);
955: break;
956: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 957: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 958: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
959: ctxt->sax->error(ctxt->userData,
960: "Internal parameter entity %s without content !\n", entity->name);
961: break;
962: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 963: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 964: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
965: ctxt->sax->error(ctxt->userData,
966: "Predefined entity %s without content !\n", entity->name);
967: break;
968: }
1.50 daniel 969: return(NULL);
1.45 daniel 970: }
1.96 daniel 971: input = xmlNewInputStream(ctxt);
1.45 daniel 972: if (input == NULL) {
1.50 daniel 973: return(NULL);
1.45 daniel 974: }
1.156 daniel 975: input->filename = (char *) entity->SystemID;
1.45 daniel 976: input->base = entity->content;
977: input->cur = entity->content;
1.140 daniel 978: input->length = entity->length;
1.50 daniel 979: return(input);
1.45 daniel 980: }
981:
1.59 daniel 982: /**
983: * xmlNewStringInputStream:
984: * @ctxt: an XML parser context
1.96 daniel 985: * @buffer: an memory buffer
1.59 daniel 986: *
987: * Create a new input stream based on a memory buffer.
1.68 daniel 988: * Returns the new input stream
1.59 daniel 989: */
990: xmlParserInputPtr
1.123 daniel 991: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 992: xmlParserInputPtr input;
993:
1.96 daniel 994: if (buffer == NULL) {
1.123 daniel 995: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 997: ctxt->sax->error(ctxt->userData,
1.59 daniel 998: "internal: xmlNewStringInputStream string = NULL\n");
999: return(NULL);
1000: }
1.96 daniel 1001: input = xmlNewInputStream(ctxt);
1.59 daniel 1002: if (input == NULL) {
1003: return(NULL);
1004: }
1.96 daniel 1005: input->base = buffer;
1006: input->cur = buffer;
1.140 daniel 1007: input->length = xmlStrlen(buffer);
1.59 daniel 1008: return(input);
1009: }
1010:
1.76 daniel 1011: /**
1012: * xmlNewInputFromFile:
1013: * @ctxt: an XML parser context
1014: * @filename: the filename to use as entity
1015: *
1016: * Create a new input stream based on a file.
1017: *
1018: * Returns the new input stream or NULL in case of error
1019: */
1020: xmlParserInputPtr
1.79 daniel 1021: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1022: xmlParserInputBufferPtr buf;
1.76 daniel 1023: xmlParserInputPtr inputStream;
1.111 daniel 1024: char *directory = NULL;
1.76 daniel 1025:
1.96 daniel 1026: if (ctxt == NULL) return(NULL);
1.91 daniel 1027: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1028: if (buf == NULL) {
1.140 daniel 1029: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1030:
1.94 daniel 1031: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1032: #ifdef WIN32
1033: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1034: #else
1035: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1036: #endif
1037: buf = xmlParserInputBufferCreateFilename(name,
1038: XML_CHAR_ENCODING_NONE);
1.106 daniel 1039: if (buf != NULL)
1.142 daniel 1040: directory = xmlParserGetDirectory(name);
1.106 daniel 1041: }
1042: if ((buf == NULL) && (ctxt->directory != NULL)) {
1043: #ifdef WIN32
1044: sprintf(name, "%s\\%s", ctxt->directory, filename);
1045: #else
1046: sprintf(name, "%s/%s", ctxt->directory, filename);
1047: #endif
1048: buf = xmlParserInputBufferCreateFilename(name,
1049: XML_CHAR_ENCODING_NONE);
1050: if (buf != NULL)
1.142 daniel 1051: directory = xmlParserGetDirectory(name);
1.106 daniel 1052: }
1053: if (buf == NULL)
1.94 daniel 1054: return(NULL);
1055: }
1056: if (directory == NULL)
1057: directory = xmlParserGetDirectory(filename);
1.76 daniel 1058:
1.96 daniel 1059: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1060: if (inputStream == NULL) {
1.119 daniel 1061: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1062: return(NULL);
1063: }
1064:
1.119 daniel 1065: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1066: inputStream->directory = directory;
1.91 daniel 1067: inputStream->buf = buf;
1.76 daniel 1068:
1.91 daniel 1069: inputStream->base = inputStream->buf->buffer->content;
1070: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1071: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1072: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1073: return(inputStream);
1074: }
1075:
1.77 daniel 1076: /************************************************************************
1077: * *
1.97 daniel 1078: * Commodity functions to handle parser contexts *
1079: * *
1080: ************************************************************************/
1081:
1082: /**
1083: * xmlInitParserCtxt:
1084: * @ctxt: an XML parser context
1085: *
1086: * Initialize a parser context
1087: */
1088:
1089: void
1090: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1091: {
1092: xmlSAXHandler *sax;
1093:
1.168 daniel 1094: xmlDefaultSAXHandlerInit();
1095:
1.119 daniel 1096: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1097: if (sax == NULL) {
1098: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1099: }
1100:
1101: /* Allocate the Input stack */
1.119 daniel 1102: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1103: ctxt->inputNr = 0;
1104: ctxt->inputMax = 5;
1105: ctxt->input = NULL;
1.165 daniel 1106:
1.97 daniel 1107: ctxt->version = NULL;
1108: ctxt->encoding = NULL;
1109: ctxt->standalone = -1;
1.98 daniel 1110: ctxt->hasExternalSubset = 0;
1111: ctxt->hasPErefs = 0;
1.97 daniel 1112: ctxt->html = 0;
1.98 daniel 1113: ctxt->external = 0;
1.140 daniel 1114: ctxt->instate = XML_PARSER_START;
1.97 daniel 1115: ctxt->token = 0;
1.106 daniel 1116: ctxt->directory = NULL;
1.97 daniel 1117:
1118: /* Allocate the Node stack */
1.119 daniel 1119: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1120: ctxt->nodeNr = 0;
1121: ctxt->nodeMax = 10;
1122: ctxt->node = NULL;
1123:
1.140 daniel 1124: /* Allocate the Name stack */
1125: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1126: ctxt->nameNr = 0;
1127: ctxt->nameMax = 10;
1128: ctxt->name = NULL;
1129:
1.176 daniel 1130: /* Allocate the space stack */
1131: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1132: ctxt->spaceNr = 1;
1133: ctxt->spaceMax = 10;
1134: ctxt->spaceTab[0] = -1;
1135: ctxt->space = &ctxt->spaceTab[0];
1136:
1.160 daniel 1137: if (sax == NULL) {
1138: ctxt->sax = &xmlDefaultSAXHandler;
1139: } else {
1.97 daniel 1140: ctxt->sax = sax;
1141: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1142: }
1143: ctxt->userData = ctxt;
1144: ctxt->myDoc = NULL;
1145: ctxt->wellFormed = 1;
1.99 daniel 1146: ctxt->valid = 1;
1.100 daniel 1147: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1148: ctxt->vctxt.userData = ctxt;
1.149 daniel 1149: if (ctxt->validate) {
1150: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1151: if (xmlGetWarningsDefaultValue == 0)
1152: ctxt->vctxt.warning = NULL;
1153: else
1154: ctxt->vctxt.warning = xmlParserValidityWarning;
1.149 daniel 1155: } else {
1156: ctxt->vctxt.error = NULL;
1157: ctxt->vctxt.warning = NULL;
1158: }
1.97 daniel 1159: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1160: ctxt->record_info = 0;
1.135 daniel 1161: ctxt->nbChars = 0;
1.140 daniel 1162: ctxt->checkIndex = 0;
1163: ctxt->errNo = XML_ERR_OK;
1.97 daniel 1164: xmlInitNodeInfoSeq(&ctxt->node_seq);
1165: }
1166:
1167: /**
1168: * xmlFreeParserCtxt:
1169: * @ctxt: an XML parser context
1170: *
1171: * Free all the memory used by a parser context. However the parsed
1172: * document in ctxt->myDoc is not freed.
1173: */
1174:
1175: void
1176: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1177: {
1178: xmlParserInputPtr input;
1.140 daniel 1179: xmlChar *oldname;
1.97 daniel 1180:
1181: if (ctxt == NULL) return;
1182:
1183: while ((input = inputPop(ctxt)) != NULL) {
1184: xmlFreeInputStream(input);
1185: }
1.140 daniel 1186: while ((oldname = namePop(ctxt)) != NULL) {
1187: xmlFree(oldname);
1188: }
1.176 daniel 1189: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1190: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1191: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1192: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1193: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1194: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1195: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1196: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1197: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.97 daniel 1198: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1199: xmlFree(ctxt->sax);
1200: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1201: xmlFree(ctxt);
1.97 daniel 1202: }
1203:
1204: /**
1205: * xmlNewParserCtxt:
1206: *
1207: * Allocate and initialize a new parser context.
1208: *
1209: * Returns the xmlParserCtxtPtr or NULL
1210: */
1211:
1212: xmlParserCtxtPtr
1213: xmlNewParserCtxt()
1214: {
1215: xmlParserCtxtPtr ctxt;
1216:
1.119 daniel 1217: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1218: if (ctxt == NULL) {
1219: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1220: perror("malloc");
1221: return(NULL);
1222: }
1.165 daniel 1223: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1224: xmlInitParserCtxt(ctxt);
1225: return(ctxt);
1226: }
1227:
1228: /**
1229: * xmlClearParserCtxt:
1230: * @ctxt: an XML parser context
1231: *
1232: * Clear (release owned resources) and reinitialize a parser context
1233: */
1234:
1235: void
1236: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1237: {
1238: xmlClearNodeInfoSeq(&ctxt->node_seq);
1239: xmlInitParserCtxt(ctxt);
1240: }
1241:
1242: /************************************************************************
1243: * *
1.77 daniel 1244: * Commodity functions to handle entities *
1245: * *
1246: ************************************************************************/
1247:
1.174 daniel 1248: /**
1249: * xmlCheckEntity:
1250: * @ctxt: an XML parser context
1251: * @content: the entity content string
1252: *
1253: * Parse an entity content and checks the WF constraints
1254: *
1255: */
1256:
1257: void
1258: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1259: }
1.97 daniel 1260:
1261: /**
1262: * xmlParseCharRef:
1263: * @ctxt: an XML parser context
1264: *
1265: * parse Reference declarations
1266: *
1267: * [66] CharRef ::= '&#' [0-9]+ ';' |
1268: * '&#x' [0-9a-fA-F]+ ';'
1269: *
1.98 daniel 1270: * [ WFC: Legal Character ]
1271: * Characters referred to using character references must match the
1272: * production for Char.
1273: *
1.135 daniel 1274: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1275: */
1.97 daniel 1276: int
1277: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1278: int val = 0;
1279:
1.111 daniel 1280: if (ctxt->token != 0) {
1281: val = ctxt->token;
1282: ctxt->token = 0;
1283: return(val);
1284: }
1.152 daniel 1285: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1286: (NXT(2) == 'x')) {
1287: SKIP(3);
1.152 daniel 1288: while (RAW != ';') {
1289: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1290: val = val * 16 + (CUR - '0');
1.152 daniel 1291: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1292: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1293: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1294: val = val * 16 + (CUR - 'A') + 10;
1295: else {
1.123 daniel 1296: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1297: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1298: ctxt->sax->error(ctxt->userData,
1299: "xmlParseCharRef: invalid hexadecimal value\n");
1300: ctxt->wellFormed = 0;
1301: val = 0;
1302: break;
1303: }
1304: NEXT;
1305: }
1.164 daniel 1306: if (RAW == ';') {
1307: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1308: ctxt->nbChars ++;
1309: ctxt->input->cur++;
1310: }
1.152 daniel 1311: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1312: SKIP(2);
1.152 daniel 1313: while (RAW != ';') {
1314: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1315: val = val * 10 + (CUR - '0');
1316: else {
1.123 daniel 1317: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1318: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1319: ctxt->sax->error(ctxt->userData,
1320: "xmlParseCharRef: invalid decimal value\n");
1321: ctxt->wellFormed = 0;
1322: val = 0;
1323: break;
1324: }
1325: NEXT;
1326: }
1.164 daniel 1327: if (RAW == ';') {
1328: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1329: ctxt->nbChars ++;
1330: ctxt->input->cur++;
1331: }
1.97 daniel 1332: } else {
1.123 daniel 1333: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1334: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1335: ctxt->sax->error(ctxt->userData,
1336: "xmlParseCharRef: invalid value\n");
1.97 daniel 1337: ctxt->wellFormed = 0;
1338: }
1.98 daniel 1339:
1.97 daniel 1340: /*
1.98 daniel 1341: * [ WFC: Legal Character ]
1342: * Characters referred to using character references must match the
1343: * production for Char.
1.97 daniel 1344: */
1345: if (IS_CHAR(val)) {
1346: return(val);
1347: } else {
1.123 daniel 1348: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1350: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1351: val);
1352: ctxt->wellFormed = 0;
1353: }
1354: return(0);
1.77 daniel 1355: }
1356:
1.96 daniel 1357: /**
1.135 daniel 1358: * xmlParseStringCharRef:
1359: * @ctxt: an XML parser context
1360: * @str: a pointer to an index in the string
1361: *
1362: * parse Reference declarations, variant parsing from a string rather
1363: * than an an input flow.
1364: *
1365: * [66] CharRef ::= '&#' [0-9]+ ';' |
1366: * '&#x' [0-9a-fA-F]+ ';'
1367: *
1368: * [ WFC: Legal Character ]
1369: * Characters referred to using character references must match the
1370: * production for Char.
1371: *
1372: * Returns the value parsed (as an int), 0 in case of error, str will be
1373: * updated to the current value of the index
1374: */
1375: int
1376: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1377: const xmlChar *ptr;
1378: xmlChar cur;
1379: int val = 0;
1380:
1381: if ((str == NULL) || (*str == NULL)) return(0);
1382: ptr = *str;
1383: cur = *ptr;
1.137 daniel 1384: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1385: ptr += 3;
1386: cur = *ptr;
1387: while (cur != ';') {
1388: if ((cur >= '0') && (cur <= '9'))
1389: val = val * 16 + (cur - '0');
1390: else if ((cur >= 'a') && (cur <= 'f'))
1391: val = val * 16 + (cur - 'a') + 10;
1392: else if ((cur >= 'A') && (cur <= 'F'))
1393: val = val * 16 + (cur - 'A') + 10;
1394: else {
1395: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1397: ctxt->sax->error(ctxt->userData,
1398: "xmlParseCharRef: invalid hexadecimal value\n");
1399: ctxt->wellFormed = 0;
1400: val = 0;
1401: break;
1402: }
1403: ptr++;
1404: cur = *ptr;
1405: }
1406: if (cur == ';')
1407: ptr++;
1.145 daniel 1408: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1409: ptr += 2;
1410: cur = *ptr;
1411: while (cur != ';') {
1412: if ((cur >= '0') && (cur <= '9'))
1413: val = val * 10 + (cur - '0');
1414: else {
1415: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1417: ctxt->sax->error(ctxt->userData,
1418: "xmlParseCharRef: invalid decimal value\n");
1419: ctxt->wellFormed = 0;
1420: val = 0;
1421: break;
1422: }
1423: ptr++;
1424: cur = *ptr;
1425: }
1426: if (cur == ';')
1427: ptr++;
1428: } else {
1429: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1431: ctxt->sax->error(ctxt->userData,
1432: "xmlParseCharRef: invalid value\n");
1433: ctxt->wellFormed = 0;
1434: return(0);
1435: }
1436: *str = ptr;
1437:
1438: /*
1439: * [ WFC: Legal Character ]
1440: * Characters referred to using character references must match the
1441: * production for Char.
1442: */
1443: if (IS_CHAR(val)) {
1444: return(val);
1445: } else {
1446: ctxt->errNo = XML_ERR_INVALID_CHAR;
1447: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1448: ctxt->sax->error(ctxt->userData,
1449: "CharRef: invalid xmlChar value %d\n", val);
1450: ctxt->wellFormed = 0;
1451: }
1452: return(0);
1453: }
1454:
1455: /**
1.96 daniel 1456: * xmlParserHandleReference:
1457: * @ctxt: the parser context
1458: *
1.97 daniel 1459: * [67] Reference ::= EntityRef | CharRef
1460: *
1.96 daniel 1461: * [68] EntityRef ::= '&' Name ';'
1462: *
1.98 daniel 1463: * [ WFC: Entity Declared ]
1464: * the Name given in the entity reference must match that in an entity
1465: * declaration, except that well-formed documents need not declare any
1466: * of the following entities: amp, lt, gt, apos, quot.
1467: *
1468: * [ WFC: Parsed Entity ]
1469: * An entity reference must not contain the name of an unparsed entity
1470: *
1.97 daniel 1471: * [66] CharRef ::= '&#' [0-9]+ ';' |
1472: * '&#x' [0-9a-fA-F]+ ';'
1473: *
1.96 daniel 1474: * A PEReference may have been detectect in the current input stream
1475: * the handling is done accordingly to
1476: * http://www.w3.org/TR/REC-xml#entproc
1477: */
1478: void
1479: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1480: xmlParserInputPtr input;
1.123 daniel 1481: xmlChar *name;
1.97 daniel 1482: xmlEntityPtr ent = NULL;
1483:
1.126 daniel 1484: if (ctxt->token != 0) {
1485: return;
1486: }
1.152 daniel 1487: if (RAW != '&') return;
1.97 daniel 1488: GROW;
1.152 daniel 1489: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1490: switch(ctxt->instate) {
1.140 daniel 1491: case XML_PARSER_ENTITY_DECL:
1492: case XML_PARSER_PI:
1.109 daniel 1493: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1494: case XML_PARSER_COMMENT:
1.168 daniel 1495: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1496: /* we just ignore it there */
1497: return;
1498: case XML_PARSER_START_TAG:
1.109 daniel 1499: return;
1.140 daniel 1500: case XML_PARSER_END_TAG:
1.97 daniel 1501: return;
1502: case XML_PARSER_EOF:
1.123 daniel 1503: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1504: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1505: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1506: ctxt->wellFormed = 0;
1507: return;
1508: case XML_PARSER_PROLOG:
1.140 daniel 1509: case XML_PARSER_START:
1510: case XML_PARSER_MISC:
1.123 daniel 1511: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1512: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1513: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1514: ctxt->wellFormed = 0;
1515: return;
1516: case XML_PARSER_EPILOG:
1.123 daniel 1517: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1518: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1519: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1520: ctxt->wellFormed = 0;
1521: return;
1522: case XML_PARSER_DTD:
1.123 daniel 1523: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1524: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1525: ctxt->sax->error(ctxt->userData,
1526: "CharRef are forbiden in DTDs!\n");
1527: ctxt->wellFormed = 0;
1528: return;
1529: case XML_PARSER_ENTITY_VALUE:
1530: /*
1531: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1532: * substitution here since we need the literal
1.97 daniel 1533: * entity value to be able to save the internal
1534: * subset of the document.
1535: * This will be handled by xmlDecodeEntities
1536: */
1537: return;
1538: case XML_PARSER_CONTENT:
1539: case XML_PARSER_ATTRIBUTE_VALUE:
1540: ctxt->token = xmlParseCharRef(ctxt);
1541: return;
1542: }
1543: return;
1544: }
1545:
1546: switch(ctxt->instate) {
1.109 daniel 1547: case XML_PARSER_CDATA_SECTION:
1548: return;
1.140 daniel 1549: case XML_PARSER_PI:
1.97 daniel 1550: case XML_PARSER_COMMENT:
1.168 daniel 1551: case XML_PARSER_SYSTEM_LITERAL:
1552: case XML_PARSER_CONTENT:
1.97 daniel 1553: return;
1.140 daniel 1554: case XML_PARSER_START_TAG:
1555: return;
1556: case XML_PARSER_END_TAG:
1557: return;
1.97 daniel 1558: case XML_PARSER_EOF:
1.123 daniel 1559: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1561: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1562: ctxt->wellFormed = 0;
1563: return;
1564: case XML_PARSER_PROLOG:
1.140 daniel 1565: case XML_PARSER_START:
1566: case XML_PARSER_MISC:
1.123 daniel 1567: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1568: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1569: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1570: ctxt->wellFormed = 0;
1571: return;
1572: case XML_PARSER_EPILOG:
1.123 daniel 1573: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1575: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1576: ctxt->wellFormed = 0;
1577: return;
1578: case XML_PARSER_ENTITY_VALUE:
1579: /*
1580: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1581: * substitution here since we need the literal
1.97 daniel 1582: * entity value to be able to save the internal
1583: * subset of the document.
1584: * This will be handled by xmlDecodeEntities
1585: */
1586: return;
1587: case XML_PARSER_ATTRIBUTE_VALUE:
1588: /*
1589: * NOTE: in the case of attributes values, we don't do the
1590: * substitution here unless we are in a mode where
1591: * the parser is explicitely asked to substitute
1592: * entities. The SAX callback is called with values
1593: * without entity substitution.
1594: * This will then be handled by xmlDecodeEntities
1595: */
1.113 daniel 1596: return;
1.97 daniel 1597: case XML_PARSER_ENTITY_DECL:
1598: /*
1599: * we just ignore it there
1600: * the substitution will be done once the entity is referenced
1601: */
1602: return;
1603: case XML_PARSER_DTD:
1.123 daniel 1604: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1605: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1606: ctxt->sax->error(ctxt->userData,
1607: "Entity references are forbiden in DTDs!\n");
1608: ctxt->wellFormed = 0;
1609: return;
1610: }
1611:
1612: NEXT;
1613: name = xmlScanName(ctxt);
1614: if (name == NULL) {
1.123 daniel 1615: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1616: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1618: ctxt->wellFormed = 0;
1619: ctxt->token = '&';
1620: return;
1621: }
1622: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1623: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1624: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1625: ctxt->sax->error(ctxt->userData,
1626: "Entity reference: ';' expected\n");
1627: ctxt->wellFormed = 0;
1628: ctxt->token = '&';
1.119 daniel 1629: xmlFree(name);
1.97 daniel 1630: return;
1631: }
1632: SKIP(xmlStrlen(name) + 1);
1633: if (ctxt->sax != NULL) {
1634: if (ctxt->sax->getEntity != NULL)
1635: ent = ctxt->sax->getEntity(ctxt->userData, name);
1636: }
1.98 daniel 1637:
1638: /*
1639: * [ WFC: Entity Declared ]
1640: * the Name given in the entity reference must match that in an entity
1641: * declaration, except that well-formed documents need not declare any
1642: * of the following entities: amp, lt, gt, apos, quot.
1643: */
1.97 daniel 1644: if (ent == NULL)
1645: ent = xmlGetPredefinedEntity(name);
1646: if (ent == NULL) {
1.123 daniel 1647: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1649: ctxt->sax->error(ctxt->userData,
1.98 daniel 1650: "Entity reference: entity %s not declared\n",
1651: name);
1.97 daniel 1652: ctxt->wellFormed = 0;
1.119 daniel 1653: xmlFree(name);
1.97 daniel 1654: return;
1655: }
1.98 daniel 1656:
1657: /*
1658: * [ WFC: Parsed Entity ]
1659: * An entity reference must not contain the name of an unparsed entity
1660: */
1.159 daniel 1661: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1662: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1663: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1664: ctxt->sax->error(ctxt->userData,
1665: "Entity reference to unparsed entity %s\n", name);
1666: ctxt->wellFormed = 0;
1667: }
1668:
1.159 daniel 1669: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1670: ctxt->token = ent->content[0];
1.119 daniel 1671: xmlFree(name);
1.97 daniel 1672: return;
1673: }
1674: input = xmlNewEntityInputStream(ctxt, ent);
1675: xmlPushInput(ctxt, input);
1.119 daniel 1676: xmlFree(name);
1.96 daniel 1677: return;
1678: }
1679:
1680: /**
1681: * xmlParserHandlePEReference:
1682: * @ctxt: the parser context
1683: *
1684: * [69] PEReference ::= '%' Name ';'
1685: *
1.98 daniel 1686: * [ WFC: No Recursion ]
1687: * TODO A parsed entity must not contain a recursive
1688: * reference to itself, either directly or indirectly.
1689: *
1690: * [ WFC: Entity Declared ]
1691: * In a document without any DTD, a document with only an internal DTD
1692: * subset which contains no parameter entity references, or a document
1693: * with "standalone='yes'", ... ... The declaration of a parameter
1694: * entity must precede any reference to it...
1695: *
1696: * [ VC: Entity Declared ]
1697: * In a document with an external subset or external parameter entities
1698: * with "standalone='no'", ... ... The declaration of a parameter entity
1699: * must precede any reference to it...
1700: *
1701: * [ WFC: In DTD ]
1702: * Parameter-entity references may only appear in the DTD.
1703: * NOTE: misleading but this is handled.
1704: *
1705: * A PEReference may have been detected in the current input stream
1.96 daniel 1706: * the handling is done accordingly to
1707: * http://www.w3.org/TR/REC-xml#entproc
1708: * i.e.
1709: * - Included in literal in entity values
1710: * - Included as Paraemeter Entity reference within DTDs
1711: */
1712: void
1713: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1714: xmlChar *name;
1.96 daniel 1715: xmlEntityPtr entity = NULL;
1716: xmlParserInputPtr input;
1717:
1.126 daniel 1718: if (ctxt->token != 0) {
1719: return;
1720: }
1.152 daniel 1721: if (RAW != '%') return;
1.96 daniel 1722: switch(ctxt->instate) {
1.109 daniel 1723: case XML_PARSER_CDATA_SECTION:
1724: return;
1.97 daniel 1725: case XML_PARSER_COMMENT:
1726: return;
1.140 daniel 1727: case XML_PARSER_START_TAG:
1728: return;
1729: case XML_PARSER_END_TAG:
1730: return;
1.96 daniel 1731: case XML_PARSER_EOF:
1.123 daniel 1732: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1733: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1734: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1735: ctxt->wellFormed = 0;
1736: return;
1737: case XML_PARSER_PROLOG:
1.140 daniel 1738: case XML_PARSER_START:
1739: case XML_PARSER_MISC:
1.123 daniel 1740: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1741: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1742: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1743: ctxt->wellFormed = 0;
1744: return;
1.97 daniel 1745: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1746: case XML_PARSER_CONTENT:
1747: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1748: case XML_PARSER_PI:
1.168 daniel 1749: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1750: /* we just ignore it there */
1751: return;
1752: case XML_PARSER_EPILOG:
1.123 daniel 1753: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1754: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1755: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1756: ctxt->wellFormed = 0;
1757: return;
1.97 daniel 1758: case XML_PARSER_ENTITY_VALUE:
1759: /*
1760: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1761: * substitution here since we need the literal
1.97 daniel 1762: * entity value to be able to save the internal
1763: * subset of the document.
1764: * This will be handled by xmlDecodeEntities
1765: */
1766: return;
1.96 daniel 1767: case XML_PARSER_DTD:
1.98 daniel 1768: /*
1769: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1770: * In the internal DTD subset, parameter-entity references
1771: * can occur only where markup declarations can occur, not
1772: * within markup declarations.
1773: * In that case this is handled in xmlParseMarkupDecl
1774: */
1775: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1776: return;
1.96 daniel 1777: }
1778:
1779: NEXT;
1780: name = xmlParseName(ctxt);
1781: if (name == NULL) {
1.123 daniel 1782: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1783: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1784: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1785: ctxt->wellFormed = 0;
1786: } else {
1.152 daniel 1787: if (RAW == ';') {
1.96 daniel 1788: NEXT;
1.98 daniel 1789: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1790: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1791: if (entity == NULL) {
1.98 daniel 1792:
1793: /*
1794: * [ WFC: Entity Declared ]
1795: * In a document without any DTD, a document with only an
1796: * internal DTD subset which contains no parameter entity
1797: * references, or a document with "standalone='yes'", ...
1798: * ... The declaration of a parameter entity must precede
1799: * any reference to it...
1800: */
1801: if ((ctxt->standalone == 1) ||
1802: ((ctxt->hasExternalSubset == 0) &&
1803: (ctxt->hasPErefs == 0))) {
1804: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1805: ctxt->sax->error(ctxt->userData,
1806: "PEReference: %%%s; not found\n", name);
1807: ctxt->wellFormed = 0;
1808: } else {
1809: /*
1810: * [ VC: Entity Declared ]
1811: * In a document with an external subset or external
1812: * parameter entities with "standalone='no'", ...
1813: * ... The declaration of a parameter entity must precede
1814: * any reference to it...
1815: */
1816: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1817: ctxt->sax->warning(ctxt->userData,
1818: "PEReference: %%%s; not found\n", name);
1819: ctxt->valid = 0;
1820: }
1.96 daniel 1821: } else {
1.159 daniel 1822: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1823: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1824: /*
1.156 daniel 1825: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1826: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1827: */
1828: input = xmlNewEntityInputStream(ctxt, entity);
1829: xmlPushInput(ctxt, input);
1.164 daniel 1830: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1831: (RAW == '<') && (NXT(1) == '?') &&
1832: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1833: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1834: xmlParseTextDecl(ctxt);
1.164 daniel 1835: }
1836: if (ctxt->token == 0)
1837: ctxt->token = ' ';
1.96 daniel 1838: } else {
1839: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1840: ctxt->sax->error(ctxt->userData,
1841: "xmlHandlePEReference: %s is not a parameter entity\n",
1842: name);
1843: ctxt->wellFormed = 0;
1844: }
1845: }
1846: } else {
1.123 daniel 1847: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1848: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1849: ctxt->sax->error(ctxt->userData,
1850: "xmlHandlePEReference: expecting ';'\n");
1851: ctxt->wellFormed = 0;
1852: }
1.119 daniel 1853: xmlFree(name);
1.97 daniel 1854: }
1855: }
1856:
1857: /*
1858: * Macro used to grow the current buffer.
1859: */
1860: #define growBuffer(buffer) { \
1861: buffer##_size *= 2; \
1.145 daniel 1862: buffer = (xmlChar *) \
1863: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1864: if (buffer == NULL) { \
1865: perror("realloc failed"); \
1.145 daniel 1866: return(NULL); \
1.97 daniel 1867: } \
1.96 daniel 1868: }
1.77 daniel 1869:
1870: /**
1871: * xmlDecodeEntities:
1872: * @ctxt: the parser context
1873: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1874: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1875: * @end: an end marker xmlChar, 0 if none
1876: * @end2: an end marker xmlChar, 0 if none
1877: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1878: *
1879: * [67] Reference ::= EntityRef | CharRef
1880: *
1881: * [69] PEReference ::= '%' Name ';'
1882: *
1883: * Returns A newly allocated string with the substitution done. The caller
1884: * must deallocate it !
1885: */
1.123 daniel 1886: xmlChar *
1.77 daniel 1887: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1888: xmlChar end, xmlChar end2, xmlChar end3) {
1889: xmlChar *buffer = NULL;
1.78 daniel 1890: int buffer_size = 0;
1.161 daniel 1891: int nbchars = 0;
1.78 daniel 1892:
1.123 daniel 1893: xmlChar *current = NULL;
1.77 daniel 1894: xmlEntityPtr ent;
1895: unsigned int max = (unsigned int) len;
1.161 daniel 1896: int c,l;
1.77 daniel 1897:
1898: /*
1899: * allocate a translation buffer.
1900: */
1.140 daniel 1901: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 1902: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1903: if (buffer == NULL) {
1904: perror("xmlDecodeEntities: malloc failed");
1905: return(NULL);
1906: }
1907:
1.78 daniel 1908: /*
1909: * Ok loop until we reach one of the ending char or a size limit.
1910: */
1.161 daniel 1911: c = CUR_CHAR(l);
1912: while ((nbchars < max) && (c != end) &&
1913: (c != end2) && (c != end3)) {
1.77 daniel 1914:
1.161 daniel 1915: if (c == 0) break;
1916: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 1917: int val = xmlParseCharRef(ctxt);
1.161 daniel 1918: COPY_BUF(0,buffer,nbchars,val);
1919: NEXTL(l);
1920: } else if ((c == '&') && (ctxt->token != '&') &&
1921: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 1922: ent = xmlParseEntityRef(ctxt);
1923: if ((ent != NULL) &&
1924: (ctxt->replaceEntities != 0)) {
1925: current = ent->content;
1926: while (*current != 0) {
1.161 daniel 1927: buffer[nbchars++] = *current++;
1928: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1929: growBuffer(buffer);
1.77 daniel 1930: }
1931: }
1.98 daniel 1932: } else if (ent != NULL) {
1.123 daniel 1933: const xmlChar *cur = ent->name;
1.98 daniel 1934:
1.161 daniel 1935: buffer[nbchars++] = '&';
1936: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1937: growBuffer(buffer);
1938: }
1.161 daniel 1939: while (*cur != 0) {
1940: buffer[nbchars++] = *cur++;
1941: }
1942: buffer[nbchars++] = ';';
1.77 daniel 1943: }
1.161 daniel 1944: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 1945: /*
1.77 daniel 1946: * a PEReference induce to switch the entity flow,
1947: * we break here to flush the current set of chars
1948: * parsed if any. We will be called back later.
1.97 daniel 1949: */
1.91 daniel 1950: if (nbchars != 0) break;
1.77 daniel 1951:
1952: xmlParsePEReference(ctxt);
1.79 daniel 1953:
1.97 daniel 1954: /*
1.79 daniel 1955: * Pop-up of finished entities.
1.97 daniel 1956: */
1.152 daniel 1957: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 1958: xmlPopInput(ctxt);
1959:
1.98 daniel 1960: break;
1.77 daniel 1961: } else {
1.161 daniel 1962: COPY_BUF(l,buffer,nbchars,c);
1963: NEXTL(l);
1964: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 1965: growBuffer(buffer);
1966: }
1.77 daniel 1967: }
1.161 daniel 1968: c = CUR_CHAR(l);
1.77 daniel 1969: }
1.161 daniel 1970: buffer[nbchars++] = 0;
1.77 daniel 1971: return(buffer);
1972: }
1973:
1.135 daniel 1974: /**
1975: * xmlStringDecodeEntities:
1976: * @ctxt: the parser context
1977: * @str: the input string
1978: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1979: * @end: an end marker xmlChar, 0 if none
1980: * @end2: an end marker xmlChar, 0 if none
1981: * @end3: an end marker xmlChar, 0 if none
1982: *
1983: * [67] Reference ::= EntityRef | CharRef
1984: *
1985: * [69] PEReference ::= '%' Name ';'
1986: *
1987: * Returns A newly allocated string with the substitution done. The caller
1988: * must deallocate it !
1989: */
1990: xmlChar *
1991: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1992: xmlChar end, xmlChar end2, xmlChar end3) {
1993: xmlChar *buffer = NULL;
1994: int buffer_size = 0;
1995:
1996: xmlChar *current = NULL;
1997: xmlEntityPtr ent;
1.176 daniel 1998: int c,l;
1999: int nbchars = 0;
1.135 daniel 2000:
2001: /*
2002: * allocate a translation buffer.
2003: */
1.140 daniel 2004: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2005: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2006: if (buffer == NULL) {
2007: perror("xmlDecodeEntities: malloc failed");
2008: return(NULL);
2009: }
2010:
2011: /*
2012: * Ok loop until we reach one of the ending char or a size limit.
2013: */
1.176 daniel 2014: c = CUR_SCHAR(str, l);
2015: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2016:
1.176 daniel 2017: if (c == 0) break;
2018: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2019: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2020: if (val != 0) {
2021: COPY_BUF(0,buffer,nbchars,val);
2022: }
2023: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2024: ent = xmlParseStringEntityRef(ctxt, &str);
2025: if ((ent != NULL) &&
2026: (ctxt->replaceEntities != 0)) {
2027: current = ent->content;
2028: while (*current != 0) {
1.176 daniel 2029: buffer[nbchars++] = *current++;
2030: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2031: growBuffer(buffer);
2032: }
2033: }
2034: } else if (ent != NULL) {
2035: int i = xmlStrlen(ent->name);
2036: const xmlChar *cur = ent->name;
2037:
1.176 daniel 2038: buffer[nbchars++] = '&';
2039: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2040: growBuffer(buffer);
2041: }
2042: for (;i > 0;i--)
1.176 daniel 2043: buffer[nbchars++] = *cur++;
2044: buffer[nbchars++] = ';';
1.135 daniel 2045: }
1.176 daniel 2046: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2047: ent = xmlParseStringPEReference(ctxt, &str);
2048: if (ent != NULL) {
2049: current = ent->content;
2050: while (*current != 0) {
1.176 daniel 2051: buffer[nbchars++] = *current++;
2052: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2053: growBuffer(buffer);
2054: }
2055: }
2056: }
2057: } else {
1.176 daniel 2058: COPY_BUF(l,buffer,nbchars,c);
2059: str += l;
2060: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2061: growBuffer(buffer);
2062: }
2063: }
1.176 daniel 2064: c = CUR_SCHAR(str, l);
1.135 daniel 2065: }
1.176 daniel 2066: buffer[nbchars++] = 0;
1.135 daniel 2067: return(buffer);
2068: }
2069:
1.1 veillard 2070:
1.28 daniel 2071: /************************************************************************
2072: * *
1.75 daniel 2073: * Commodity functions to handle encodings *
2074: * *
2075: ************************************************************************/
2076:
1.172 daniel 2077: /*
2078: * xmlCheckLanguageID
2079: * @lang: pointer to the string value
2080: *
2081: * Checks that the value conforms to the LanguageID production:
2082: *
2083: * [33] LanguageID ::= Langcode ('-' Subcode)*
2084: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2085: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2086: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2087: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2088: * [38] Subcode ::= ([a-z] | [A-Z])+
2089: *
2090: * Returns 1 if correct 0 otherwise
2091: **/
2092: int
2093: xmlCheckLanguageID(const xmlChar *lang) {
2094: const xmlChar *cur = lang;
2095:
2096: if (cur == NULL)
2097: return(0);
2098: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2099: ((cur[0] == 'I') && (cur[1] == '-'))) {
2100: /*
2101: * IANA code
2102: */
2103: cur += 2;
2104: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2105: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2106: cur++;
2107: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2108: ((cur[0] == 'X') && (cur[1] == '-'))) {
2109: /*
2110: * User code
2111: */
2112: cur += 2;
2113: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2114: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2115: cur++;
2116: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2117: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2118: /*
2119: * ISO639
2120: */
2121: cur++;
2122: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2123: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2124: cur++;
2125: else
2126: return(0);
2127: } else
2128: return(0);
2129: while (cur[0] != 0) {
2130: if (cur[0] != '-')
2131: return(0);
2132: cur++;
2133: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2134: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2135: cur++;
2136: else
2137: return(0);
2138: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2139: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2140: cur++;
2141: }
2142: return(1);
2143: }
2144:
1.75 daniel 2145: /**
2146: * xmlSwitchEncoding:
2147: * @ctxt: the parser context
1.124 daniel 2148: * @enc: the encoding value (number)
1.75 daniel 2149: *
2150: * change the input functions when discovering the character encoding
2151: * of a given entity.
2152: */
2153: void
2154: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2155: {
1.156 daniel 2156: xmlCharEncodingHandlerPtr handler;
2157:
2158: handler = xmlGetCharEncodingHandler(enc);
2159: if (handler != NULL) {
2160: if (ctxt->input != NULL) {
2161: if (ctxt->input->buf != NULL) {
2162: if (ctxt->input->buf->encoder != NULL) {
2163: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2164: ctxt->sax->error(ctxt->userData,
2165: "xmlSwitchEncoding : encoder already regitered\n");
2166: return;
2167: }
2168: ctxt->input->buf->encoder = handler;
2169:
2170: /*
2171: * Is there already some content down the pipe to convert
2172: */
2173: if ((ctxt->input->buf->buffer != NULL) &&
2174: (ctxt->input->buf->buffer->use > 0)) {
2175: xmlChar *buf;
2176: int res, len, size;
2177: int processed;
2178:
2179: /*
2180: * Specific handling of the Byte Order Mark for
2181: * UTF-16
2182: */
2183: if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
2184: (ctxt->input->cur[0] == 0xFF) &&
2185: (ctxt->input->cur[1] == 0xFE)) {
2186: SKIP(2);
2187: }
2188: if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
2189: (ctxt->input->cur[0] == 0xFE) &&
2190: (ctxt->input->cur[1] == 0xFF)) {
2191: SKIP(2);
2192: }
2193:
2194: /*
2195: * convert the non processed part
2196: */
2197: processed = ctxt->input->cur - ctxt->input->base;
2198: len = ctxt->input->buf->buffer->use - processed;
2199:
2200: if (len <= 0) {
2201: return;
2202: }
2203: size = ctxt->input->buf->buffer->use * 4;
2204: if (size < 4000)
2205: size = 4000;
1.167 daniel 2206: retry_larger:
1.160 daniel 2207: buf = (xmlChar *) xmlMalloc(size + 1);
1.156 daniel 2208: if (buf == NULL) {
2209: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2210: ctxt->sax->error(ctxt->userData,
2211: "xmlSwitchEncoding : out of memory\n");
2212: return;
2213: }
1.160 daniel 2214: /* TODO !!! Handling of buf too small */
1.156 daniel 2215: res = handler->input(buf, size, ctxt->input->cur, &len);
1.167 daniel 2216: if (res == -1) {
2217: size *= 2;
2218: xmlFree(buf);
2219: goto retry_larger;
2220: }
1.156 daniel 2221: if ((res < 0) ||
2222: (len != ctxt->input->buf->buffer->use - processed)) {
2223: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2224: ctxt->sax->error(ctxt->userData,
2225: "xmlSwitchEncoding : conversion failed\n");
2226: xmlFree(buf);
2227: return;
2228: }
1.167 daniel 2229:
1.156 daniel 2230: /*
2231: * Conversion succeeded, get rid of the old buffer
2232: */
2233: xmlFree(ctxt->input->buf->buffer->content);
2234: ctxt->input->buf->buffer->content = buf;
2235: ctxt->input->base = buf;
2236: ctxt->input->cur = buf;
2237: ctxt->input->buf->buffer->size = size;
2238: ctxt->input->buf->buffer->use = res;
1.160 daniel 2239: buf[res] = 0;
1.156 daniel 2240: }
2241: return;
2242: } else {
2243: if (ctxt->input->length == 0) {
2244: /*
2245: * When parsing a static memory array one must know the
2246: * size to be able to convert the buffer.
2247: */
2248: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2249: ctxt->sax->error(ctxt->userData,
2250: "xmlSwitchEncoding : no input\n");
2251: return;
2252: } else {
2253: xmlChar *buf;
2254: int res, len;
2255: int processed = ctxt->input->cur - ctxt->input->base;
2256:
2257: /*
2258: * convert the non processed part
2259: */
2260: len = ctxt->input->length - processed;
2261: if (len <= 0) {
2262: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2263: ctxt->sax->error(ctxt->userData,
2264: "xmlSwitchEncoding : input fully consumed?\n");
2265: return;
2266: }
2267: buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
2268: if (buf == NULL) {
2269: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2270: ctxt->sax->error(ctxt->userData,
2271: "xmlSwitchEncoding : out of memory\n");
2272: return;
2273: }
2274: res = handler->input(buf, ctxt->input->length * 4,
2275: ctxt->input->cur, &len);
2276: if ((res < 0) ||
2277: (len != ctxt->input->length - processed)) {
2278: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2279: ctxt->sax->error(ctxt->userData,
2280: "xmlSwitchEncoding : conversion failed\n");
2281: xmlFree(buf);
2282: return;
2283: }
2284: /*
2285: * Conversion succeeded, get rid of the old buffer
2286: */
2287: if ((ctxt->input->free != NULL) &&
2288: (ctxt->input->base != NULL))
2289: ctxt->input->free((xmlChar *) ctxt->input->base);
2290: ctxt->input->base = ctxt->input->cur = buf;
2291: ctxt->input->length = res;
2292: }
2293: }
2294: } else {
2295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2296: ctxt->sax->error(ctxt->userData,
2297: "xmlSwitchEncoding : no input\n");
2298: }
2299: }
2300:
1.75 daniel 2301: switch (enc) {
2302: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 2303: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 2304: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2305: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2306: ctxt->wellFormed = 0;
2307: break;
2308: case XML_CHAR_ENCODING_NONE:
2309: /* let's assume it's UTF-8 without the XML decl */
2310: return;
2311: case XML_CHAR_ENCODING_UTF8:
2312: /* default encoding, no conversion should be needed */
2313: return;
2314: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 2315: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2316: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2317: ctxt->sax->error(ctxt->userData,
2318: "char encoding UTF16 little endian not supported\n");
2319: break;
2320: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 2321: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2322: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2323: ctxt->sax->error(ctxt->userData,
2324: "char encoding UTF16 big endian not supported\n");
2325: break;
2326: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 2327: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2329: ctxt->sax->error(ctxt->userData,
2330: "char encoding USC4 little endian not supported\n");
2331: break;
2332: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 2333: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2334: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2335: ctxt->sax->error(ctxt->userData,
2336: "char encoding USC4 big endian not supported\n");
2337: break;
2338: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 2339: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2341: ctxt->sax->error(ctxt->userData,
2342: "char encoding EBCDIC not supported\n");
2343: break;
2344: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 2345: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2346: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2347: ctxt->sax->error(ctxt->userData,
2348: "char encoding UCS4 2143 not supported\n");
2349: break;
2350: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 2351: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2352: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353: ctxt->sax->error(ctxt->userData,
2354: "char encoding UCS4 3412 not supported\n");
2355: break;
2356: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 2357: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2359: ctxt->sax->error(ctxt->userData,
2360: "char encoding UCS2 not supported\n");
2361: break;
2362: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 2363: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2364: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2365: ctxt->sax->error(ctxt->userData,
2366: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2367: break;
2368: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 2369: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2370: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2371: ctxt->sax->error(ctxt->userData,
2372: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2373: break;
2374: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 2375: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2376: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2377: ctxt->sax->error(ctxt->userData,
2378: "char encoding ISO_8859_3 not supported\n");
2379: break;
2380: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 2381: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2383: ctxt->sax->error(ctxt->userData,
2384: "char encoding ISO_8859_4 not supported\n");
2385: break;
2386: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 2387: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2389: ctxt->sax->error(ctxt->userData,
2390: "char encoding ISO_8859_5 not supported\n");
2391: break;
2392: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 2393: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2394: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2395: ctxt->sax->error(ctxt->userData,
2396: "char encoding ISO_8859_6 not supported\n");
2397: break;
2398: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 2399: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2401: ctxt->sax->error(ctxt->userData,
2402: "char encoding ISO_8859_7 not supported\n");
2403: break;
2404: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 2405: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2406: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2407: ctxt->sax->error(ctxt->userData,
2408: "char encoding ISO_8859_8 not supported\n");
2409: break;
2410: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 2411: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2412: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2413: ctxt->sax->error(ctxt->userData,
2414: "char encoding ISO_8859_9 not supported\n");
2415: break;
2416: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 2417: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2418: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2419: ctxt->sax->error(ctxt->userData,
2420: "char encoding ISO-2022-JPnot supported\n");
2421: break;
2422: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 2423: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2425: ctxt->sax->error(ctxt->userData,
2426: "char encoding Shift_JISnot supported\n");
2427: break;
2428: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 2429: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2431: ctxt->sax->error(ctxt->userData,
2432: "char encoding EUC-JPnot supported\n");
2433: break;
2434: }
2435: }
2436:
2437: /************************************************************************
2438: * *
1.123 daniel 2439: * Commodity functions to handle xmlChars *
1.28 daniel 2440: * *
2441: ************************************************************************/
2442:
1.50 daniel 2443: /**
2444: * xmlStrndup:
1.123 daniel 2445: * @cur: the input xmlChar *
1.50 daniel 2446: * @len: the len of @cur
2447: *
1.123 daniel 2448: * a strndup for array of xmlChar's
1.68 daniel 2449: *
1.123 daniel 2450: * Returns a new xmlChar * or NULL
1.1 veillard 2451: */
1.123 daniel 2452: xmlChar *
2453: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2454: xmlChar *ret;
2455:
2456: if ((cur == NULL) || (len < 0)) return(NULL);
2457: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2458: if (ret == NULL) {
1.86 daniel 2459: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2460: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2461: return(NULL);
2462: }
1.123 daniel 2463: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2464: ret[len] = 0;
2465: return(ret);
2466: }
2467:
1.50 daniel 2468: /**
2469: * xmlStrdup:
1.123 daniel 2470: * @cur: the input xmlChar *
1.50 daniel 2471: *
1.152 daniel 2472: * a strdup for array of xmlChar's. Since they are supposed to be
2473: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2474: * a termination mark of '0'.
1.68 daniel 2475: *
1.123 daniel 2476: * Returns a new xmlChar * or NULL
1.1 veillard 2477: */
1.123 daniel 2478: xmlChar *
2479: xmlStrdup(const xmlChar *cur) {
2480: const xmlChar *p = cur;
1.1 veillard 2481:
1.135 daniel 2482: if (cur == NULL) return(NULL);
1.152 daniel 2483: while (*p != 0) p++;
1.1 veillard 2484: return(xmlStrndup(cur, p - cur));
2485: }
2486:
1.50 daniel 2487: /**
2488: * xmlCharStrndup:
2489: * @cur: the input char *
2490: * @len: the len of @cur
2491: *
1.123 daniel 2492: * a strndup for char's to xmlChar's
1.68 daniel 2493: *
1.123 daniel 2494: * Returns a new xmlChar * or NULL
1.45 daniel 2495: */
2496:
1.123 daniel 2497: xmlChar *
1.55 daniel 2498: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2499: int i;
1.135 daniel 2500: xmlChar *ret;
2501:
2502: if ((cur == NULL) || (len < 0)) return(NULL);
2503: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2504: if (ret == NULL) {
1.86 daniel 2505: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2506: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2507: return(NULL);
2508: }
2509: for (i = 0;i < len;i++)
1.123 daniel 2510: ret[i] = (xmlChar) cur[i];
1.45 daniel 2511: ret[len] = 0;
2512: return(ret);
2513: }
2514:
1.50 daniel 2515: /**
2516: * xmlCharStrdup:
2517: * @cur: the input char *
2518: * @len: the len of @cur
2519: *
1.123 daniel 2520: * a strdup for char's to xmlChar's
1.68 daniel 2521: *
1.123 daniel 2522: * Returns a new xmlChar * or NULL
1.45 daniel 2523: */
2524:
1.123 daniel 2525: xmlChar *
1.55 daniel 2526: xmlCharStrdup(const char *cur) {
1.45 daniel 2527: const char *p = cur;
2528:
1.135 daniel 2529: if (cur == NULL) return(NULL);
1.45 daniel 2530: while (*p != '\0') p++;
2531: return(xmlCharStrndup(cur, p - cur));
2532: }
2533:
1.50 daniel 2534: /**
2535: * xmlStrcmp:
1.123 daniel 2536: * @str1: the first xmlChar *
2537: * @str2: the second xmlChar *
1.50 daniel 2538: *
1.123 daniel 2539: * a strcmp for xmlChar's
1.68 daniel 2540: *
2541: * Returns the integer result of the comparison
1.14 veillard 2542: */
2543:
1.55 daniel 2544: int
1.123 daniel 2545: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2546: register int tmp;
2547:
1.135 daniel 2548: if ((str1 == NULL) && (str2 == NULL)) return(0);
2549: if (str1 == NULL) return(-1);
2550: if (str2 == NULL) return(1);
1.14 veillard 2551: do {
2552: tmp = *str1++ - *str2++;
2553: if (tmp != 0) return(tmp);
2554: } while ((*str1 != 0) && (*str2 != 0));
2555: return (*str1 - *str2);
2556: }
2557:
1.50 daniel 2558: /**
2559: * xmlStrncmp:
1.123 daniel 2560: * @str1: the first xmlChar *
2561: * @str2: the second xmlChar *
1.50 daniel 2562: * @len: the max comparison length
2563: *
1.123 daniel 2564: * a strncmp for xmlChar's
1.68 daniel 2565: *
2566: * Returns the integer result of the comparison
1.14 veillard 2567: */
2568:
1.55 daniel 2569: int
1.123 daniel 2570: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2571: register int tmp;
2572:
2573: if (len <= 0) return(0);
1.135 daniel 2574: if ((str1 == NULL) && (str2 == NULL)) return(0);
2575: if (str1 == NULL) return(-1);
2576: if (str2 == NULL) return(1);
1.14 veillard 2577: do {
2578: tmp = *str1++ - *str2++;
2579: if (tmp != 0) return(tmp);
2580: len--;
2581: if (len <= 0) return(0);
2582: } while ((*str1 != 0) && (*str2 != 0));
2583: return (*str1 - *str2);
2584: }
2585:
1.50 daniel 2586: /**
2587: * xmlStrchr:
1.123 daniel 2588: * @str: the xmlChar * array
2589: * @val: the xmlChar to search
1.50 daniel 2590: *
1.123 daniel 2591: * a strchr for xmlChar's
1.68 daniel 2592: *
1.123 daniel 2593: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2594: */
2595:
1.123 daniel 2596: const xmlChar *
2597: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2598: if (str == NULL) return(NULL);
1.14 veillard 2599: while (*str != 0) {
1.123 daniel 2600: if (*str == val) return((xmlChar *) str);
1.14 veillard 2601: str++;
2602: }
2603: return(NULL);
1.89 daniel 2604: }
2605:
2606: /**
2607: * xmlStrstr:
1.123 daniel 2608: * @str: the xmlChar * array (haystack)
2609: * @val: the xmlChar to search (needle)
1.89 daniel 2610: *
1.123 daniel 2611: * a strstr for xmlChar's
1.89 daniel 2612: *
1.123 daniel 2613: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2614: */
2615:
1.123 daniel 2616: const xmlChar *
2617: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2618: int n;
2619:
2620: if (str == NULL) return(NULL);
2621: if (val == NULL) return(NULL);
2622: n = xmlStrlen(val);
2623:
2624: if (n == 0) return(str);
2625: while (*str != 0) {
2626: if (*str == *val) {
1.123 daniel 2627: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2628: }
2629: str++;
2630: }
2631: return(NULL);
2632: }
2633:
2634: /**
2635: * xmlStrsub:
1.123 daniel 2636: * @str: the xmlChar * array (haystack)
1.89 daniel 2637: * @start: the index of the first char (zero based)
2638: * @len: the length of the substring
2639: *
2640: * Extract a substring of a given string
2641: *
1.123 daniel 2642: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2643: */
2644:
1.123 daniel 2645: xmlChar *
2646: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2647: int i;
2648:
2649: if (str == NULL) return(NULL);
2650: if (start < 0) return(NULL);
1.90 daniel 2651: if (len < 0) return(NULL);
1.89 daniel 2652:
2653: for (i = 0;i < start;i++) {
2654: if (*str == 0) return(NULL);
2655: str++;
2656: }
2657: if (*str == 0) return(NULL);
2658: return(xmlStrndup(str, len));
1.14 veillard 2659: }
1.28 daniel 2660:
1.50 daniel 2661: /**
2662: * xmlStrlen:
1.123 daniel 2663: * @str: the xmlChar * array
1.50 daniel 2664: *
1.127 daniel 2665: * length of a xmlChar's string
1.68 daniel 2666: *
1.123 daniel 2667: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2668: */
2669:
1.55 daniel 2670: int
1.123 daniel 2671: xmlStrlen(const xmlChar *str) {
1.45 daniel 2672: int len = 0;
2673:
2674: if (str == NULL) return(0);
2675: while (*str != 0) {
2676: str++;
2677: len++;
2678: }
2679: return(len);
2680: }
2681:
1.50 daniel 2682: /**
2683: * xmlStrncat:
1.123 daniel 2684: * @cur: the original xmlChar * array
2685: * @add: the xmlChar * array added
1.50 daniel 2686: * @len: the length of @add
2687: *
1.123 daniel 2688: * a strncat for array of xmlChar's
1.68 daniel 2689: *
1.123 daniel 2690: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2691: */
2692:
1.123 daniel 2693: xmlChar *
2694: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2695: int size;
1.123 daniel 2696: xmlChar *ret;
1.45 daniel 2697:
2698: if ((add == NULL) || (len == 0))
2699: return(cur);
2700: if (cur == NULL)
2701: return(xmlStrndup(add, len));
2702:
2703: size = xmlStrlen(cur);
1.123 daniel 2704: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2705: if (ret == NULL) {
1.86 daniel 2706: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2707: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2708: return(cur);
2709: }
1.123 daniel 2710: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2711: ret[size + len] = 0;
2712: return(ret);
2713: }
2714:
1.50 daniel 2715: /**
2716: * xmlStrcat:
1.123 daniel 2717: * @cur: the original xmlChar * array
2718: * @add: the xmlChar * array added
1.50 daniel 2719: *
1.152 daniel 2720: * a strcat for array of xmlChar's. Since they are supposed to be
2721: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2722: * a termination mark of '0'.
1.68 daniel 2723: *
1.123 daniel 2724: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2725: */
1.123 daniel 2726: xmlChar *
2727: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2728: const xmlChar *p = add;
1.45 daniel 2729:
2730: if (add == NULL) return(cur);
2731: if (cur == NULL)
2732: return(xmlStrdup(add));
2733:
1.152 daniel 2734: while (*p != 0) p++;
1.45 daniel 2735: return(xmlStrncat(cur, add, p - add));
2736: }
2737:
2738: /************************************************************************
2739: * *
2740: * Commodity functions, cleanup needed ? *
2741: * *
2742: ************************************************************************/
2743:
1.50 daniel 2744: /**
2745: * areBlanks:
2746: * @ctxt: an XML parser context
1.123 daniel 2747: * @str: a xmlChar *
1.50 daniel 2748: * @len: the size of @str
2749: *
1.45 daniel 2750: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2751: *
1.68 daniel 2752: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2753: */
2754:
1.123 daniel 2755: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2756: int i, ret;
1.45 daniel 2757: xmlNodePtr lastChild;
2758:
1.176 daniel 2759: /*
2760: * Check for xml:space value.
2761: */
2762: if (*(ctxt->space) == 1)
2763: return(0);
2764:
2765: /*
2766: * Check that the string is made of blanks
2767: */
1.45 daniel 2768: for (i = 0;i < len;i++)
2769: if (!(IS_BLANK(str[i]))) return(0);
2770:
1.176 daniel 2771: /*
2772: * Look if the element is mixed content in the Dtd if available
2773: */
1.152 daniel 2774: if (RAW != '<') return(0);
1.72 daniel 2775: if (ctxt->node == NULL) return(0);
1.104 daniel 2776: if (ctxt->myDoc != NULL) {
2777: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2778: if (ret == 0) return(1);
2779: if (ret == 1) return(0);
2780: }
1.176 daniel 2781:
1.104 daniel 2782: /*
1.176 daniel 2783: * Otherwise, heuristic :-\
1.104 daniel 2784: */
1.45 daniel 2785: lastChild = xmlGetLastChild(ctxt->node);
2786: if (lastChild == NULL) {
2787: if (ctxt->node->content != NULL) return(0);
2788: } else if (xmlNodeIsText(lastChild))
2789: return(0);
1.157 daniel 2790: else if ((ctxt->node->children != NULL) &&
2791: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2792: return(0);
1.45 daniel 2793: return(1);
2794: }
2795:
1.50 daniel 2796: /**
2797: * xmlHandleEntity:
2798: * @ctxt: an XML parser context
2799: * @entity: an XML entity pointer.
2800: *
2801: * Default handling of defined entities, when should we define a new input
1.45 daniel 2802: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2803: *
2804: * OBSOLETE: to be removed at some point.
1.45 daniel 2805: */
2806:
1.55 daniel 2807: void
2808: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2809: int len;
1.50 daniel 2810: xmlParserInputPtr input;
1.45 daniel 2811:
2812: if (entity->content == NULL) {
1.123 daniel 2813: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2814: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2815: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2816: entity->name);
1.59 daniel 2817: ctxt->wellFormed = 0;
1.45 daniel 2818: return;
2819: }
2820: len = xmlStrlen(entity->content);
2821: if (len <= 2) goto handle_as_char;
2822:
2823: /*
2824: * Redefine its content as an input stream.
2825: */
1.50 daniel 2826: input = xmlNewEntityInputStream(ctxt, entity);
2827: xmlPushInput(ctxt, input);
1.45 daniel 2828: return;
2829:
2830: handle_as_char:
2831: /*
2832: * Just handle the content as a set of chars.
2833: */
1.171 daniel 2834: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2835: (ctxt->sax->characters != NULL))
1.74 daniel 2836: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2837:
2838: }
2839:
2840: /*
2841: * Forward definition for recusive behaviour.
2842: */
1.77 daniel 2843: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2844: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2845:
1.28 daniel 2846: /************************************************************************
2847: * *
2848: * Extra stuff for namespace support *
2849: * Relates to http://www.w3.org/TR/WD-xml-names *
2850: * *
2851: ************************************************************************/
2852:
1.50 daniel 2853: /**
2854: * xmlNamespaceParseNCName:
2855: * @ctxt: an XML parser context
2856: *
2857: * parse an XML namespace name.
1.28 daniel 2858: *
2859: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2860: *
2861: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2862: * CombiningChar | Extender
1.68 daniel 2863: *
2864: * Returns the namespace name or NULL
1.28 daniel 2865: */
2866:
1.123 daniel 2867: xmlChar *
1.55 daniel 2868: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 2869: xmlChar buf[XML_MAX_NAMELEN + 5];
2870: int len = 0, l;
2871: int cur = CUR_CHAR(l);
1.28 daniel 2872:
1.156 daniel 2873: /* load first the value of the char !!! */
1.152 daniel 2874: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 2875:
1.152 daniel 2876: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2877: (cur == '.') || (cur == '-') ||
2878: (cur == '_') ||
2879: (IS_COMBINING(cur)) ||
2880: (IS_EXTENDER(cur))) {
2881: COPY_BUF(l,buf,len,cur);
2882: NEXTL(l);
2883: cur = CUR_CHAR(l);
1.91 daniel 2884: if (len >= XML_MAX_NAMELEN) {
2885: fprintf(stderr,
2886: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 2887: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2888: (cur == '.') || (cur == '-') ||
2889: (cur == '_') ||
2890: (IS_COMBINING(cur)) ||
2891: (IS_EXTENDER(cur))) {
2892: NEXTL(l);
2893: cur = CUR_CHAR(l);
2894: }
1.91 daniel 2895: break;
2896: }
2897: }
2898: return(xmlStrndup(buf, len));
1.28 daniel 2899: }
2900:
1.50 daniel 2901: /**
2902: * xmlNamespaceParseQName:
2903: * @ctxt: an XML parser context
1.123 daniel 2904: * @prefix: a xmlChar **
1.50 daniel 2905: *
2906: * parse an XML qualified name
1.28 daniel 2907: *
2908: * [NS 5] QName ::= (Prefix ':')? LocalPart
2909: *
2910: * [NS 6] Prefix ::= NCName
2911: *
2912: * [NS 7] LocalPart ::= NCName
1.68 daniel 2913: *
1.127 daniel 2914: * Returns the local part, and prefix is updated
1.50 daniel 2915: * to get the Prefix if any.
1.28 daniel 2916: */
2917:
1.123 daniel 2918: xmlChar *
2919: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2920: xmlChar *ret = NULL;
1.28 daniel 2921:
2922: *prefix = NULL;
2923: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 2924: if (RAW == ':') {
1.28 daniel 2925: *prefix = ret;
1.40 daniel 2926: NEXT;
1.28 daniel 2927: ret = xmlNamespaceParseNCName(ctxt);
2928: }
2929:
2930: return(ret);
2931: }
2932:
1.50 daniel 2933: /**
1.72 daniel 2934: * xmlSplitQName:
1.162 daniel 2935: * @ctxt: an XML parser context
1.72 daniel 2936: * @name: an XML parser context
1.123 daniel 2937: * @prefix: a xmlChar **
1.72 daniel 2938: *
2939: * parse an XML qualified name string
2940: *
2941: * [NS 5] QName ::= (Prefix ':')? LocalPart
2942: *
2943: * [NS 6] Prefix ::= NCName
2944: *
2945: * [NS 7] LocalPart ::= NCName
2946: *
1.127 daniel 2947: * Returns the local part, and prefix is updated
1.72 daniel 2948: * to get the Prefix if any.
2949: */
2950:
1.123 daniel 2951: xmlChar *
1.162 daniel 2952: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2953: xmlChar buf[XML_MAX_NAMELEN + 5];
2954: int len = 0;
1.123 daniel 2955: xmlChar *ret = NULL;
2956: const xmlChar *cur = name;
1.162 daniel 2957: int c,l;
1.72 daniel 2958:
2959: *prefix = NULL;
1.113 daniel 2960:
2961: /* xml: prefix is not really a namespace */
2962: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2963: (cur[2] == 'l') && (cur[3] == ':'))
2964: return(xmlStrdup(name));
2965:
1.162 daniel 2966: /* nasty but valid */
2967: if (cur[0] == ':')
2968: return(xmlStrdup(name));
2969:
2970: c = CUR_SCHAR(cur, l);
2971: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 2972:
1.162 daniel 2973: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2974: (c == '.') || (c == '-') ||
2975: (c == '_') ||
2976: (IS_COMBINING(c)) ||
2977: (IS_EXTENDER(c))) {
2978: COPY_BUF(l,buf,len,c);
2979: cur += l;
2980: c = CUR_SCHAR(cur, l);
2981: }
1.72 daniel 2982:
1.162 daniel 2983: ret = xmlStrndup(buf, len);
1.72 daniel 2984:
1.162 daniel 2985: if (c == ':') {
2986: cur += l;
1.163 daniel 2987: c = CUR_SCHAR(cur, l);
1.162 daniel 2988: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 2989: *prefix = ret;
1.162 daniel 2990: len = 0;
1.72 daniel 2991:
1.162 daniel 2992: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2993: (c == '.') || (c == '-') ||
2994: (c == '_') ||
2995: (IS_COMBINING(c)) ||
2996: (IS_EXTENDER(c))) {
2997: COPY_BUF(l,buf,len,c);
2998: cur += l;
2999: c = CUR_SCHAR(cur, l);
3000: }
1.72 daniel 3001:
1.162 daniel 3002: ret = xmlStrndup(buf, len);
1.72 daniel 3003: }
3004:
3005: return(ret);
3006: }
3007: /**
1.50 daniel 3008: * xmlNamespaceParseNSDef:
3009: * @ctxt: an XML parser context
3010: *
3011: * parse a namespace prefix declaration
1.28 daniel 3012: *
3013: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3014: *
3015: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3016: *
3017: * Returns the namespace name
1.28 daniel 3018: */
3019:
1.123 daniel 3020: xmlChar *
1.55 daniel 3021: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3022: xmlChar *name = NULL;
1.28 daniel 3023:
1.152 daniel 3024: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3025: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3026: (NXT(4) == 's')) {
3027: SKIP(5);
1.152 daniel 3028: if (RAW == ':') {
1.40 daniel 3029: NEXT;
1.28 daniel 3030: name = xmlNamespaceParseNCName(ctxt);
3031: }
3032: }
1.39 daniel 3033: return(name);
1.28 daniel 3034: }
3035:
1.50 daniel 3036: /**
3037: * xmlParseQuotedString:
3038: * @ctxt: an XML parser context
3039: *
1.45 daniel 3040: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3041: * To be removed at next drop of binary compatibility
1.68 daniel 3042: *
3043: * Returns the string parser or NULL.
1.45 daniel 3044: */
1.123 daniel 3045: xmlChar *
1.55 daniel 3046: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3047: xmlChar *buf = NULL;
1.152 daniel 3048: int len = 0,l;
1.140 daniel 3049: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3050: int c;
1.45 daniel 3051:
1.135 daniel 3052: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3053: if (buf == NULL) {
3054: fprintf(stderr, "malloc of %d byte failed\n", size);
3055: return(NULL);
3056: }
1.152 daniel 3057: if (RAW == '"') {
1.45 daniel 3058: NEXT;
1.152 daniel 3059: c = CUR_CHAR(l);
1.135 daniel 3060: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3061: if (len + 5 >= size) {
1.135 daniel 3062: size *= 2;
3063: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3064: if (buf == NULL) {
3065: fprintf(stderr, "realloc of %d byte failed\n", size);
3066: return(NULL);
3067: }
3068: }
1.152 daniel 3069: COPY_BUF(l,buf,len,c);
3070: NEXTL(l);
3071: c = CUR_CHAR(l);
1.135 daniel 3072: }
3073: if (c != '"') {
1.123 daniel 3074: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3075: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3076: ctxt->sax->error(ctxt->userData,
3077: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3078: ctxt->wellFormed = 0;
1.55 daniel 3079: } else {
1.45 daniel 3080: NEXT;
3081: }
1.152 daniel 3082: } else if (RAW == '\''){
1.45 daniel 3083: NEXT;
1.135 daniel 3084: c = CUR;
3085: while (IS_CHAR(c) && (c != '\'')) {
3086: if (len + 1 >= size) {
3087: size *= 2;
3088: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3089: if (buf == NULL) {
3090: fprintf(stderr, "realloc of %d byte failed\n", size);
3091: return(NULL);
3092: }
3093: }
3094: buf[len++] = c;
3095: NEXT;
3096: c = CUR;
3097: }
1.152 daniel 3098: if (RAW != '\'') {
1.123 daniel 3099: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3100: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3101: ctxt->sax->error(ctxt->userData,
3102: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3103: ctxt->wellFormed = 0;
1.55 daniel 3104: } else {
1.45 daniel 3105: NEXT;
3106: }
3107: }
1.135 daniel 3108: return(buf);
1.45 daniel 3109: }
3110:
1.50 daniel 3111: /**
3112: * xmlParseNamespace:
3113: * @ctxt: an XML parser context
3114: *
1.45 daniel 3115: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3116: *
3117: * This is what the older xml-name Working Draft specified, a bunch of
3118: * other stuff may still rely on it, so support is still here as
1.127 daniel 3119: * if it was declared on the root of the Tree:-(
1.110 daniel 3120: *
3121: * To be removed at next drop of binary compatibility
1.45 daniel 3122: */
3123:
1.55 daniel 3124: void
3125: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3126: xmlChar *href = NULL;
3127: xmlChar *prefix = NULL;
1.45 daniel 3128: int garbage = 0;
3129:
3130: /*
3131: * We just skipped "namespace" or "xml:namespace"
3132: */
3133: SKIP_BLANKS;
3134:
1.153 daniel 3135: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3136: /*
3137: * We can have "ns" or "prefix" attributes
3138: * Old encoding as 'href' or 'AS' attributes is still supported
3139: */
1.152 daniel 3140: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3141: garbage = 0;
3142: SKIP(2);
3143: SKIP_BLANKS;
3144:
1.152 daniel 3145: if (RAW != '=') continue;
1.45 daniel 3146: NEXT;
3147: SKIP_BLANKS;
3148:
3149: href = xmlParseQuotedString(ctxt);
3150: SKIP_BLANKS;
1.152 daniel 3151: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3152: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3153: garbage = 0;
3154: SKIP(4);
3155: SKIP_BLANKS;
3156:
1.152 daniel 3157: if (RAW != '=') continue;
1.45 daniel 3158: NEXT;
3159: SKIP_BLANKS;
3160:
3161: href = xmlParseQuotedString(ctxt);
3162: SKIP_BLANKS;
1.152 daniel 3163: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3164: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3165: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3166: garbage = 0;
3167: SKIP(6);
3168: SKIP_BLANKS;
3169:
1.152 daniel 3170: if (RAW != '=') continue;
1.45 daniel 3171: NEXT;
3172: SKIP_BLANKS;
3173:
3174: prefix = xmlParseQuotedString(ctxt);
3175: SKIP_BLANKS;
1.152 daniel 3176: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3177: garbage = 0;
3178: SKIP(2);
3179: SKIP_BLANKS;
3180:
1.152 daniel 3181: if (RAW != '=') continue;
1.45 daniel 3182: NEXT;
3183: SKIP_BLANKS;
3184:
3185: prefix = xmlParseQuotedString(ctxt);
3186: SKIP_BLANKS;
1.152 daniel 3187: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3188: garbage = 0;
1.91 daniel 3189: NEXT;
1.45 daniel 3190: } else {
3191: /*
3192: * Found garbage when parsing the namespace
3193: */
1.122 daniel 3194: if (!garbage) {
1.55 daniel 3195: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3196: ctxt->sax->error(ctxt->userData,
3197: "xmlParseNamespace found garbage\n");
3198: }
1.123 daniel 3199: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3200: ctxt->wellFormed = 0;
1.45 daniel 3201: NEXT;
3202: }
3203: }
3204:
3205: MOVETO_ENDTAG(CUR_PTR);
3206: NEXT;
3207:
3208: /*
3209: * Register the DTD.
1.72 daniel 3210: if (href != NULL)
3211: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3212: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3213: */
3214:
1.119 daniel 3215: if (prefix != NULL) xmlFree(prefix);
3216: if (href != NULL) xmlFree(href);
1.45 daniel 3217: }
3218:
1.28 daniel 3219: /************************************************************************
3220: * *
3221: * The parser itself *
3222: * Relates to http://www.w3.org/TR/REC-xml *
3223: * *
3224: ************************************************************************/
1.14 veillard 3225:
1.50 daniel 3226: /**
1.97 daniel 3227: * xmlScanName:
3228: * @ctxt: an XML parser context
3229: *
3230: * Trickery: parse an XML name but without consuming the input flow
3231: * Needed for rollback cases.
3232: *
3233: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3234: * CombiningChar | Extender
3235: *
3236: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3237: *
3238: * [6] Names ::= Name (S Name)*
3239: *
3240: * Returns the Name parsed or NULL
3241: */
3242:
1.123 daniel 3243: xmlChar *
1.97 daniel 3244: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3245: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3246: int len = 0;
3247:
3248: GROW;
1.152 daniel 3249: if (!IS_LETTER(RAW) && (RAW != '_') &&
3250: (RAW != ':')) {
1.97 daniel 3251: return(NULL);
3252: }
3253:
3254: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3255: (NXT(len) == '.') || (NXT(len) == '-') ||
3256: (NXT(len) == '_') || (NXT(len) == ':') ||
3257: (IS_COMBINING(NXT(len))) ||
3258: (IS_EXTENDER(NXT(len)))) {
3259: buf[len] = NXT(len);
3260: len++;
3261: if (len >= XML_MAX_NAMELEN) {
3262: fprintf(stderr,
3263: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3264: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3265: (NXT(len) == '.') || (NXT(len) == '-') ||
3266: (NXT(len) == '_') || (NXT(len) == ':') ||
3267: (IS_COMBINING(NXT(len))) ||
3268: (IS_EXTENDER(NXT(len))))
3269: len++;
3270: break;
3271: }
3272: }
3273: return(xmlStrndup(buf, len));
3274: }
3275:
3276: /**
1.50 daniel 3277: * xmlParseName:
3278: * @ctxt: an XML parser context
3279: *
3280: * parse an XML name.
1.22 daniel 3281: *
3282: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3283: * CombiningChar | Extender
3284: *
3285: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3286: *
3287: * [6] Names ::= Name (S Name)*
1.68 daniel 3288: *
3289: * Returns the Name parsed or NULL
1.1 veillard 3290: */
3291:
1.123 daniel 3292: xmlChar *
1.55 daniel 3293: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3294: xmlChar buf[XML_MAX_NAMELEN + 5];
3295: int len = 0, l;
3296: int c;
1.1 veillard 3297:
1.91 daniel 3298: GROW;
1.160 daniel 3299: c = CUR_CHAR(l);
3300: if (!IS_LETTER(c) && (c != '_') &&
3301: (c != ':')) {
1.91 daniel 3302: return(NULL);
3303: }
1.40 daniel 3304:
1.160 daniel 3305: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306: (c == '.') || (c == '-') ||
3307: (c == '_') || (c == ':') ||
3308: (IS_COMBINING(c)) ||
3309: (IS_EXTENDER(c))) {
3310: COPY_BUF(l,buf,len,c);
3311: NEXTL(l);
3312: c = CUR_CHAR(l);
1.91 daniel 3313: if (len >= XML_MAX_NAMELEN) {
3314: fprintf(stderr,
3315: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3316: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3317: (c == '.') || (c == '-') ||
3318: (c == '_') || (c == ':') ||
3319: (IS_COMBINING(c)) ||
3320: (IS_EXTENDER(c))) {
3321: NEXTL(l);
3322: c = CUR_CHAR(l);
1.97 daniel 3323: }
1.91 daniel 3324: break;
3325: }
3326: }
3327: return(xmlStrndup(buf, len));
1.22 daniel 3328: }
3329:
1.50 daniel 3330: /**
1.135 daniel 3331: * xmlParseStringName:
3332: * @ctxt: an XML parser context
3333: * @str: a pointer to an index in the string
3334: *
3335: * parse an XML name.
3336: *
3337: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3338: * CombiningChar | Extender
3339: *
3340: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3341: *
3342: * [6] Names ::= Name (S Name)*
3343: *
3344: * Returns the Name parsed or NULL. The str pointer
3345: * is updated to the current location in the string.
3346: */
3347:
3348: xmlChar *
3349: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3350: xmlChar buf[XML_MAX_NAMELEN + 5];
3351: const xmlChar *cur = *str;
3352: int len = 0, l;
3353: int c;
1.135 daniel 3354:
1.176 daniel 3355: GROW;
3356: c = CUR_SCHAR(cur, l);
3357: if (!IS_LETTER(c) && (c != '_') &&
3358: (c != ':')) {
1.135 daniel 3359: return(NULL);
3360: }
3361:
1.176 daniel 3362: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3363: (c == '.') || (c == '-') ||
3364: (c == '_') || (c == ':') ||
3365: (IS_COMBINING(c)) ||
3366: (IS_EXTENDER(c))) {
3367: COPY_BUF(l,buf,len,c);
3368: cur += l;
3369: c = CUR_SCHAR(cur, l);
3370: if (len >= XML_MAX_NAMELEN) {
3371: fprintf(stderr,
3372: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3373: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3374: (c == '.') || (c == '-') ||
3375: (c == '_') || (c == ':') ||
3376: (IS_COMBINING(c)) ||
3377: (IS_EXTENDER(c))) {
3378: cur += l;
3379: c = CUR_SCHAR(cur, l);
3380: }
3381: break;
3382: }
1.135 daniel 3383: }
1.176 daniel 3384: *str = cur;
3385: return(xmlStrndup(buf, len));
1.135 daniel 3386: }
3387:
3388: /**
1.50 daniel 3389: * xmlParseNmtoken:
3390: * @ctxt: an XML parser context
3391: *
3392: * parse an XML Nmtoken.
1.22 daniel 3393: *
3394: * [7] Nmtoken ::= (NameChar)+
3395: *
3396: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3397: *
3398: * Returns the Nmtoken parsed or NULL
1.22 daniel 3399: */
3400:
1.123 daniel 3401: xmlChar *
1.55 daniel 3402: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3403: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3404: int len = 0;
1.160 daniel 3405: int c,l;
1.22 daniel 3406:
1.91 daniel 3407: GROW;
1.160 daniel 3408: c = CUR_CHAR(l);
3409: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3410: (c == '.') || (c == '-') ||
3411: (c == '_') || (c == ':') ||
3412: (IS_COMBINING(c)) ||
3413: (IS_EXTENDER(c))) {
3414: COPY_BUF(l,buf,len,c);
3415: NEXTL(l);
3416: c = CUR_CHAR(l);
1.91 daniel 3417: if (len >= XML_MAX_NAMELEN) {
3418: fprintf(stderr,
3419: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3420: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3421: (c == '.') || (c == '-') ||
3422: (c == '_') || (c == ':') ||
3423: (IS_COMBINING(c)) ||
3424: (IS_EXTENDER(c))) {
3425: NEXTL(l);
3426: c = CUR_CHAR(l);
3427: }
1.91 daniel 3428: break;
3429: }
3430: }
1.168 daniel 3431: if (len == 0)
3432: return(NULL);
1.91 daniel 3433: return(xmlStrndup(buf, len));
1.1 veillard 3434: }
3435:
1.50 daniel 3436: /**
3437: * xmlParseEntityValue:
3438: * @ctxt: an XML parser context
1.78 daniel 3439: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3440: *
3441: * parse a value for ENTITY decl.
1.24 daniel 3442: *
3443: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3444: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3445: *
1.78 daniel 3446: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3447: */
3448:
1.123 daniel 3449: xmlChar *
3450: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3451: xmlChar *buf = NULL;
3452: int len = 0;
1.140 daniel 3453: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3454: int c, l;
1.135 daniel 3455: xmlChar stop;
1.123 daniel 3456: xmlChar *ret = NULL;
1.176 daniel 3457: const xmlChar *cur = NULL;
1.98 daniel 3458: xmlParserInputPtr input;
1.24 daniel 3459:
1.152 daniel 3460: if (RAW == '"') stop = '"';
3461: else if (RAW == '\'') stop = '\'';
1.135 daniel 3462: else {
3463: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3465: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3466: ctxt->wellFormed = 0;
3467: return(NULL);
3468: }
3469: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3470: if (buf == NULL) {
3471: fprintf(stderr, "malloc of %d byte failed\n", size);
3472: return(NULL);
3473: }
1.94 daniel 3474:
1.135 daniel 3475: /*
3476: * The content of the entity definition is copied in a buffer.
3477: */
1.94 daniel 3478:
1.135 daniel 3479: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3480: input = ctxt->input;
3481: GROW;
3482: NEXT;
1.152 daniel 3483: c = CUR_CHAR(l);
1.135 daniel 3484: /*
3485: * NOTE: 4.4.5 Included in Literal
3486: * When a parameter entity reference appears in a literal entity
3487: * value, ... a single or double quote character in the replacement
3488: * text is always treated as a normal data character and will not
3489: * terminate the literal.
3490: * In practice it means we stop the loop only when back at parsing
3491: * the initial entity and the quote is found
3492: */
3493: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3494: if (len + 5 >= size) {
1.135 daniel 3495: size *= 2;
3496: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3497: if (buf == NULL) {
3498: fprintf(stderr, "realloc of %d byte failed\n", size);
3499: return(NULL);
1.94 daniel 3500: }
1.79 daniel 3501: }
1.152 daniel 3502: COPY_BUF(l,buf,len,c);
3503: NEXTL(l);
1.98 daniel 3504: /*
1.135 daniel 3505: * Pop-up of finished entities.
1.98 daniel 3506: */
1.152 daniel 3507: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3508: xmlPopInput(ctxt);
1.152 daniel 3509:
3510: c = CUR_CHAR(l);
1.135 daniel 3511: if (c == 0) {
1.94 daniel 3512: GROW;
1.152 daniel 3513: c = CUR_CHAR(l);
1.79 daniel 3514: }
1.135 daniel 3515: }
3516: buf[len] = 0;
3517:
3518: /*
1.176 daniel 3519: * Raise problem w.r.t. '&' and '%' being used in non-entities
3520: * reference constructs. Note Charref will be handled in
3521: * xmlStringDecodeEntities()
3522: */
3523: cur = buf;
3524: while (*cur != 0) {
3525: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3526: xmlChar *name;
3527: xmlChar tmp = *cur;
3528:
3529: cur++;
3530: name = xmlParseStringName(ctxt, &cur);
3531: if ((name == NULL) || (*cur != ';')) {
3532: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3533: ctxt->sax->error(ctxt->userData,
3534: "EntityValue: '%c' forbidden except for entities references\n",
3535: tmp);
3536: ctxt->wellFormed = 0;
3537: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3538: }
3539: if ((ctxt->inSubset == 1) && (tmp == '%')) {
3540: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3541: ctxt->sax->error(ctxt->userData,
3542: "EntityValue: PEReferences forbidden in internal subset\n",
3543: tmp);
3544: ctxt->wellFormed = 0;
3545: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3546: }
3547: if (name != NULL)
3548: xmlFree(name);
3549: }
3550: cur++;
3551: }
3552:
3553: /*
1.135 daniel 3554: * Then PEReference entities are substituted.
3555: */
3556: if (c != stop) {
3557: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3558: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3559: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3560: ctxt->wellFormed = 0;
1.170 daniel 3561: xmlFree(buf);
1.135 daniel 3562: } else {
3563: NEXT;
3564: /*
3565: * NOTE: 4.4.7 Bypassed
3566: * When a general entity reference appears in the EntityValue in
3567: * an entity declaration, it is bypassed and left as is.
1.176 daniel 3568: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 3569: */
3570: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3571: 0, 0, 0);
3572: if (orig != NULL)
3573: *orig = buf;
3574: else
3575: xmlFree(buf);
1.24 daniel 3576: }
3577:
3578: return(ret);
3579: }
3580:
1.50 daniel 3581: /**
3582: * xmlParseAttValue:
3583: * @ctxt: an XML parser context
3584: *
3585: * parse a value for an attribute
1.78 daniel 3586: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3587: * will be handled later in xmlStringGetNodeList
1.29 daniel 3588: *
3589: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3590: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3591: *
1.129 daniel 3592: * 3.3.3 Attribute-Value Normalization:
3593: * Before the value of an attribute is passed to the application or
3594: * checked for validity, the XML processor must normalize it as follows:
3595: * - a character reference is processed by appending the referenced
3596: * character to the attribute value
3597: * - an entity reference is processed by recursively processing the
3598: * replacement text of the entity
3599: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3600: * appending #x20 to the normalized value, except that only a single
3601: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3602: * parsed entity or the literal entity value of an internal parsed entity
3603: * - other characters are processed by appending them to the normalized value
1.130 daniel 3604: * If the declared value is not CDATA, then the XML processor must further
3605: * process the normalized attribute value by discarding any leading and
3606: * trailing space (#x20) characters, and by replacing sequences of space
3607: * (#x20) characters by a single space (#x20) character.
3608: * All attributes for which no declaration has been read should be treated
3609: * by a non-validating parser as if declared CDATA.
1.129 daniel 3610: *
3611: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3612: */
3613:
1.123 daniel 3614: xmlChar *
1.55 daniel 3615: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3616: xmlChar limit = 0;
3617: xmlChar *buffer = NULL;
3618: int buffer_size = 0;
3619: xmlChar *out = NULL;
3620:
3621: xmlChar *current = NULL;
3622: xmlEntityPtr ent;
3623: xmlChar cur;
3624:
1.29 daniel 3625:
1.91 daniel 3626: SHRINK;
1.151 daniel 3627: if (NXT(0) == '"') {
1.96 daniel 3628: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3629: limit = '"';
1.40 daniel 3630: NEXT;
1.151 daniel 3631: } else if (NXT(0) == '\'') {
1.129 daniel 3632: limit = '\'';
1.96 daniel 3633: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3634: NEXT;
1.29 daniel 3635: } else {
1.123 daniel 3636: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3637: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3638: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3639: ctxt->wellFormed = 0;
1.129 daniel 3640: return(NULL);
1.29 daniel 3641: }
3642:
1.129 daniel 3643: /*
3644: * allocate a translation buffer.
3645: */
1.140 daniel 3646: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3647: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3648: if (buffer == NULL) {
3649: perror("xmlParseAttValue: malloc failed");
3650: return(NULL);
3651: }
3652: out = buffer;
3653:
3654: /*
3655: * Ok loop until we reach one of the ending char or a size limit.
3656: */
3657: cur = CUR;
1.156 daniel 3658: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3659: if (cur == 0) break;
3660: if ((cur == '&') && (NXT(1) == '#')) {
3661: int val = xmlParseCharRef(ctxt);
3662: *out++ = val;
3663: } else if (cur == '&') {
3664: ent = xmlParseEntityRef(ctxt);
3665: if ((ent != NULL) &&
3666: (ctxt->replaceEntities != 0)) {
3667: current = ent->content;
3668: while (*current != 0) {
3669: *out++ = *current++;
3670: if (out - buffer > buffer_size - 10) {
3671: int index = out - buffer;
3672:
3673: growBuffer(buffer);
3674: out = &buffer[index];
3675: }
3676: }
3677: } else if (ent != NULL) {
3678: int i = xmlStrlen(ent->name);
3679: const xmlChar *cur = ent->name;
3680:
3681: *out++ = '&';
3682: if (out - buffer > buffer_size - i - 10) {
3683: int index = out - buffer;
3684:
3685: growBuffer(buffer);
3686: out = &buffer[index];
3687: }
3688: for (;i > 0;i--)
3689: *out++ = *cur++;
3690: *out++ = ';';
3691: }
3692: } else {
1.156 daniel 3693: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3694: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3695: *out++ = 0x20;
3696: if (out - buffer > buffer_size - 10) {
3697: int index = out - buffer;
3698:
3699: growBuffer(buffer);
3700: out = &buffer[index];
1.129 daniel 3701: }
3702: } else {
3703: *out++ = cur;
3704: if (out - buffer > buffer_size - 10) {
3705: int index = out - buffer;
3706:
3707: growBuffer(buffer);
3708: out = &buffer[index];
3709: }
3710: }
3711: NEXT;
3712: }
3713: cur = CUR;
3714: }
3715: *out++ = 0;
1.152 daniel 3716: if (RAW == '<') {
1.129 daniel 3717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3718: ctxt->sax->error(ctxt->userData,
3719: "Unescaped '<' not allowed in attributes values\n");
3720: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3721: ctxt->wellFormed = 0;
1.152 daniel 3722: } else if (RAW != limit) {
1.129 daniel 3723: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3724: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3725: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3726: ctxt->wellFormed = 0;
3727: } else
3728: NEXT;
3729: return(buffer);
1.29 daniel 3730: }
3731:
1.50 daniel 3732: /**
3733: * xmlParseSystemLiteral:
3734: * @ctxt: an XML parser context
3735: *
3736: * parse an XML Literal
1.21 daniel 3737: *
1.22 daniel 3738: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3739: *
3740: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3741: */
3742:
1.123 daniel 3743: xmlChar *
1.55 daniel 3744: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3745: xmlChar *buf = NULL;
3746: int len = 0;
1.140 daniel 3747: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3748: int cur, l;
1.135 daniel 3749: xmlChar stop;
1.168 daniel 3750: int state = ctxt->instate;
1.21 daniel 3751:
1.91 daniel 3752: SHRINK;
1.152 daniel 3753: if (RAW == '"') {
1.40 daniel 3754: NEXT;
1.135 daniel 3755: stop = '"';
1.152 daniel 3756: } else if (RAW == '\'') {
1.40 daniel 3757: NEXT;
1.135 daniel 3758: stop = '\'';
1.21 daniel 3759: } else {
1.55 daniel 3760: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3761: ctxt->sax->error(ctxt->userData,
3762: "SystemLiteral \" or ' expected\n");
1.123 daniel 3763: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3764: ctxt->wellFormed = 0;
1.135 daniel 3765: return(NULL);
1.21 daniel 3766: }
3767:
1.135 daniel 3768: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3769: if (buf == NULL) {
3770: fprintf(stderr, "malloc of %d byte failed\n", size);
3771: return(NULL);
3772: }
1.168 daniel 3773: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3774: cur = CUR_CHAR(l);
1.135 daniel 3775: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3776: if (len + 5 >= size) {
1.135 daniel 3777: size *= 2;
3778: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3779: if (buf == NULL) {
3780: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 daniel 3781: ctxt->instate = state;
1.135 daniel 3782: return(NULL);
3783: }
3784: }
1.152 daniel 3785: COPY_BUF(l,buf,len,cur);
3786: NEXTL(l);
3787: cur = CUR_CHAR(l);
1.135 daniel 3788: if (cur == 0) {
3789: GROW;
3790: SHRINK;
1.152 daniel 3791: cur = CUR_CHAR(l);
1.135 daniel 3792: }
3793: }
3794: buf[len] = 0;
1.168 daniel 3795: ctxt->instate = state;
1.135 daniel 3796: if (!IS_CHAR(cur)) {
3797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3798: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3799: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3800: ctxt->wellFormed = 0;
3801: } else {
3802: NEXT;
3803: }
3804: return(buf);
1.21 daniel 3805: }
3806:
1.50 daniel 3807: /**
3808: * xmlParsePubidLiteral:
3809: * @ctxt: an XML parser context
1.21 daniel 3810: *
1.50 daniel 3811: * parse an XML public literal
1.68 daniel 3812: *
3813: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3814: *
3815: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3816: */
3817:
1.123 daniel 3818: xmlChar *
1.55 daniel 3819: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3820: xmlChar *buf = NULL;
3821: int len = 0;
1.140 daniel 3822: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3823: xmlChar cur;
3824: xmlChar stop;
1.125 daniel 3825:
1.91 daniel 3826: SHRINK;
1.152 daniel 3827: if (RAW == '"') {
1.40 daniel 3828: NEXT;
1.135 daniel 3829: stop = '"';
1.152 daniel 3830: } else if (RAW == '\'') {
1.40 daniel 3831: NEXT;
1.135 daniel 3832: stop = '\'';
1.21 daniel 3833: } else {
1.55 daniel 3834: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3835: ctxt->sax->error(ctxt->userData,
3836: "SystemLiteral \" or ' expected\n");
1.123 daniel 3837: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3838: ctxt->wellFormed = 0;
1.135 daniel 3839: return(NULL);
3840: }
3841: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3842: if (buf == NULL) {
3843: fprintf(stderr, "malloc of %d byte failed\n", size);
3844: return(NULL);
3845: }
3846: cur = CUR;
3847: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3848: if (len + 1 >= size) {
3849: size *= 2;
3850: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3851: if (buf == NULL) {
3852: fprintf(stderr, "realloc of %d byte failed\n", size);
3853: return(NULL);
3854: }
3855: }
3856: buf[len++] = cur;
3857: NEXT;
3858: cur = CUR;
3859: if (cur == 0) {
3860: GROW;
3861: SHRINK;
3862: cur = CUR;
3863: }
3864: }
3865: buf[len] = 0;
3866: if (cur != stop) {
3867: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3868: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3869: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3870: ctxt->wellFormed = 0;
3871: } else {
3872: NEXT;
1.21 daniel 3873: }
1.135 daniel 3874: return(buf);
1.21 daniel 3875: }
3876:
1.50 daniel 3877: /**
3878: * xmlParseCharData:
3879: * @ctxt: an XML parser context
3880: * @cdata: int indicating whether we are within a CDATA section
3881: *
3882: * parse a CharData section.
3883: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 3884: *
1.151 daniel 3885: * The right angle bracket (>) may be represented using the string ">",
3886: * and must, for compatibility, be escaped using ">" or a character
3887: * reference when it appears in the string "]]>" in content, when that
3888: * string is not marking the end of a CDATA section.
3889: *
1.27 daniel 3890: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3891: */
3892:
1.55 daniel 3893: void
3894: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 3895: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 3896: int nbchar = 0;
1.152 daniel 3897: int cur, l;
1.27 daniel 3898:
1.91 daniel 3899: SHRINK;
1.152 daniel 3900: cur = CUR_CHAR(l);
1.160 daniel 3901: while ((IS_CHAR(cur)) && ((cur != '<') || (ctxt->token == '<')) &&
1.153 daniel 3902: ((cur != '&') || (ctxt->token == '&'))) {
1.97 daniel 3903: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 3904: (NXT(2) == '>')) {
3905: if (cdata) break;
3906: else {
3907: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 3908: ctxt->sax->error(ctxt->userData,
1.59 daniel 3909: "Sequence ']]>' not allowed in content\n");
1.123 daniel 3910: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 3911: /* Should this be relaxed ??? I see a "must here */
3912: ctxt->wellFormed = 0;
1.59 daniel 3913: }
3914: }
1.152 daniel 3915: COPY_BUF(l,buf,nbchar,cur);
3916: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 3917: /*
3918: * Ok the segment is to be consumed as chars.
3919: */
1.171 daniel 3920: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 3921: if (areBlanks(ctxt, buf, nbchar)) {
3922: if (ctxt->sax->ignorableWhitespace != NULL)
3923: ctxt->sax->ignorableWhitespace(ctxt->userData,
3924: buf, nbchar);
3925: } else {
3926: if (ctxt->sax->characters != NULL)
3927: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3928: }
3929: }
3930: nbchar = 0;
3931: }
1.152 daniel 3932: NEXTL(l);
3933: cur = CUR_CHAR(l);
1.27 daniel 3934: }
1.91 daniel 3935: if (nbchar != 0) {
3936: /*
3937: * Ok the segment is to be consumed as chars.
3938: */
1.171 daniel 3939: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 3940: if (areBlanks(ctxt, buf, nbchar)) {
3941: if (ctxt->sax->ignorableWhitespace != NULL)
3942: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3943: } else {
3944: if (ctxt->sax->characters != NULL)
3945: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3946: }
3947: }
1.45 daniel 3948: }
1.27 daniel 3949: }
3950:
1.50 daniel 3951: /**
3952: * xmlParseExternalID:
3953: * @ctxt: an XML parser context
1.123 daniel 3954: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 3955: * @strict: indicate whether we should restrict parsing to only
3956: * production [75], see NOTE below
1.50 daniel 3957: *
1.67 daniel 3958: * Parse an External ID or a Public ID
3959: *
3960: * NOTE: Productions [75] and [83] interract badly since [75] can generate
3961: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 3962: *
3963: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3964: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 3965: *
3966: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3967: *
1.68 daniel 3968: * Returns the function returns SystemLiteral and in the second
1.67 daniel 3969: * case publicID receives PubidLiteral, is strict is off
3970: * it is possible to return NULL and have publicID set.
1.22 daniel 3971: */
3972:
1.123 daniel 3973: xmlChar *
3974: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3975: xmlChar *URI = NULL;
1.22 daniel 3976:
1.91 daniel 3977: SHRINK;
1.152 daniel 3978: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 3979: (NXT(2) == 'S') && (NXT(3) == 'T') &&
3980: (NXT(4) == 'E') && (NXT(5) == 'M')) {
3981: SKIP(6);
1.59 daniel 3982: if (!IS_BLANK(CUR)) {
3983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3984: ctxt->sax->error(ctxt->userData,
1.59 daniel 3985: "Space required after 'SYSTEM'\n");
1.123 daniel 3986: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3987: ctxt->wellFormed = 0;
3988: }
1.42 daniel 3989: SKIP_BLANKS;
1.39 daniel 3990: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3991: if (URI == NULL) {
1.55 daniel 3992: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3993: ctxt->sax->error(ctxt->userData,
1.39 daniel 3994: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 3995: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3996: ctxt->wellFormed = 0;
3997: }
1.152 daniel 3998: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 3999: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4000: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4001: SKIP(6);
1.59 daniel 4002: if (!IS_BLANK(CUR)) {
4003: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4004: ctxt->sax->error(ctxt->userData,
1.59 daniel 4005: "Space required after 'PUBLIC'\n");
1.123 daniel 4006: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4007: ctxt->wellFormed = 0;
4008: }
1.42 daniel 4009: SKIP_BLANKS;
1.39 daniel 4010: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4011: if (*publicID == NULL) {
1.55 daniel 4012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4013: ctxt->sax->error(ctxt->userData,
1.39 daniel 4014: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4015: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4016: ctxt->wellFormed = 0;
4017: }
1.67 daniel 4018: if (strict) {
4019: /*
4020: * We don't handle [83] so "S SystemLiteral" is required.
4021: */
4022: if (!IS_BLANK(CUR)) {
4023: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4024: ctxt->sax->error(ctxt->userData,
1.67 daniel 4025: "Space required after the Public Identifier\n");
1.123 daniel 4026: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4027: ctxt->wellFormed = 0;
4028: }
4029: } else {
4030: /*
4031: * We handle [83] so we return immediately, if
4032: * "S SystemLiteral" is not detected. From a purely parsing
4033: * point of view that's a nice mess.
4034: */
1.135 daniel 4035: const xmlChar *ptr;
4036: GROW;
4037:
4038: ptr = CUR_PTR;
1.67 daniel 4039: if (!IS_BLANK(*ptr)) return(NULL);
4040:
4041: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4042: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4043: }
1.42 daniel 4044: SKIP_BLANKS;
1.39 daniel 4045: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4046: if (URI == NULL) {
1.55 daniel 4047: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4048: ctxt->sax->error(ctxt->userData,
1.39 daniel 4049: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4050: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4051: ctxt->wellFormed = 0;
4052: }
1.22 daniel 4053: }
1.39 daniel 4054: return(URI);
1.22 daniel 4055: }
4056:
1.50 daniel 4057: /**
4058: * xmlParseComment:
1.69 daniel 4059: * @ctxt: an XML parser context
1.50 daniel 4060: *
1.3 veillard 4061: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4062: * The spec says that "For compatibility, the string "--" (double-hyphen)
4063: * must not occur within comments. "
1.22 daniel 4064: *
4065: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4066: */
1.72 daniel 4067: void
1.114 daniel 4068: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4069: xmlChar *buf = NULL;
4070: int len = 0;
1.140 daniel 4071: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4072: int q, ql;
4073: int r, rl;
4074: int cur, l;
1.140 daniel 4075: xmlParserInputState state;
1.3 veillard 4076:
4077: /*
1.22 daniel 4078: * Check that there is a comment right here.
1.3 veillard 4079: */
1.152 daniel 4080: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4081: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4082:
1.140 daniel 4083: state = ctxt->instate;
1.97 daniel 4084: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4085: SHRINK;
1.40 daniel 4086: SKIP(4);
1.135 daniel 4087: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4088: if (buf == NULL) {
4089: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4090: ctxt->instate = state;
1.135 daniel 4091: return;
4092: }
1.152 daniel 4093: q = CUR_CHAR(ql);
4094: NEXTL(ql);
4095: r = CUR_CHAR(rl);
4096: NEXTL(rl);
4097: cur = CUR_CHAR(l);
1.135 daniel 4098: while (IS_CHAR(cur) &&
4099: ((cur != '>') ||
4100: (r != '-') || (q != '-'))) {
4101: if ((r == '-') && (q == '-')) {
1.55 daniel 4102: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4103: ctxt->sax->error(ctxt->userData,
1.38 daniel 4104: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4105: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4106: ctxt->wellFormed = 0;
4107: }
1.152 daniel 4108: if (len + 5 >= size) {
1.135 daniel 4109: size *= 2;
4110: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4111: if (buf == NULL) {
4112: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4113: ctxt->instate = state;
1.135 daniel 4114: return;
4115: }
4116: }
1.152 daniel 4117: COPY_BUF(ql,buf,len,q);
1.135 daniel 4118: q = r;
1.152 daniel 4119: ql = rl;
1.135 daniel 4120: r = cur;
1.152 daniel 4121: rl = l;
4122: NEXTL(l);
4123: cur = CUR_CHAR(l);
1.135 daniel 4124: if (cur == 0) {
4125: SHRINK;
4126: GROW;
1.152 daniel 4127: cur = CUR_CHAR(l);
1.135 daniel 4128: }
1.3 veillard 4129: }
1.135 daniel 4130: buf[len] = 0;
4131: if (!IS_CHAR(cur)) {
1.55 daniel 4132: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4133: ctxt->sax->error(ctxt->userData,
1.135 daniel 4134: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4135: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4136: ctxt->wellFormed = 0;
1.3 veillard 4137: } else {
1.40 daniel 4138: NEXT;
1.171 daniel 4139: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4140: (!ctxt->disableSAX))
1.135 daniel 4141: ctxt->sax->comment(ctxt->userData, buf);
4142: xmlFree(buf);
1.3 veillard 4143: }
1.140 daniel 4144: ctxt->instate = state;
1.3 veillard 4145: }
4146:
1.50 daniel 4147: /**
4148: * xmlParsePITarget:
4149: * @ctxt: an XML parser context
4150: *
4151: * parse the name of a PI
1.22 daniel 4152: *
4153: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4154: *
4155: * Returns the PITarget name or NULL
1.22 daniel 4156: */
4157:
1.123 daniel 4158: xmlChar *
1.55 daniel 4159: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4160: xmlChar *name;
1.22 daniel 4161:
4162: name = xmlParseName(ctxt);
1.139 daniel 4163: if ((name != NULL) &&
1.22 daniel 4164: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4165: ((name[1] == 'm') || (name[1] == 'M')) &&
4166: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4167: int i;
1.177 ! daniel 4168: if ((name[0] == 'x') && (name[1] == 'm') &&
! 4169: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4170: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4171: ctxt->sax->error(ctxt->userData,
4172: "XML declaration allowed only at the start of the document\n");
4173: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4174: ctxt->wellFormed = 0;
4175: return(name);
4176: } else if (name[3] == 0) {
4177: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4178: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4179: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4180: ctxt->wellFormed = 0;
4181: return(name);
4182: }
1.139 daniel 4183: for (i = 0;;i++) {
4184: if (xmlW3CPIs[i] == NULL) break;
4185: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4186: return(name);
4187: }
4188: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4189: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4190: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4191: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4192: }
1.22 daniel 4193: }
4194: return(name);
4195: }
4196:
1.50 daniel 4197: /**
4198: * xmlParsePI:
4199: * @ctxt: an XML parser context
4200: *
4201: * parse an XML Processing Instruction.
1.22 daniel 4202: *
4203: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4204: *
1.69 daniel 4205: * The processing is transfered to SAX once parsed.
1.3 veillard 4206: */
4207:
1.55 daniel 4208: void
4209: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4210: xmlChar *buf = NULL;
4211: int len = 0;
1.140 daniel 4212: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4213: int cur, l;
1.123 daniel 4214: xmlChar *target;
1.140 daniel 4215: xmlParserInputState state;
1.22 daniel 4216:
1.152 daniel 4217: if ((RAW == '<') && (NXT(1) == '?')) {
1.140 daniel 4218: state = ctxt->instate;
4219: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4220: /*
4221: * this is a Processing Instruction.
4222: */
1.40 daniel 4223: SKIP(2);
1.91 daniel 4224: SHRINK;
1.3 veillard 4225:
4226: /*
1.22 daniel 4227: * Parse the target name and check for special support like
4228: * namespace.
1.3 veillard 4229: */
1.22 daniel 4230: target = xmlParsePITarget(ctxt);
4231: if (target != NULL) {
1.156 daniel 4232: if ((RAW == '?') && (NXT(1) == '>')) {
4233: SKIP(2);
4234:
4235: /*
4236: * SAX: PI detected.
4237: */
1.171 daniel 4238: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4239: (ctxt->sax->processingInstruction != NULL))
4240: ctxt->sax->processingInstruction(ctxt->userData,
4241: target, NULL);
4242: ctxt->instate = state;
1.170 daniel 4243: xmlFree(target);
1.156 daniel 4244: return;
4245: }
1.135 daniel 4246: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4247: if (buf == NULL) {
4248: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4249: ctxt->instate = state;
1.135 daniel 4250: return;
4251: }
4252: cur = CUR;
4253: if (!IS_BLANK(cur)) {
1.114 daniel 4254: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4255: ctxt->sax->error(ctxt->userData,
4256: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4257: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4258: ctxt->wellFormed = 0;
4259: }
4260: SKIP_BLANKS;
1.152 daniel 4261: cur = CUR_CHAR(l);
1.135 daniel 4262: while (IS_CHAR(cur) &&
4263: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4264: if (len + 5 >= size) {
1.135 daniel 4265: size *= 2;
4266: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4267: if (buf == NULL) {
4268: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4269: ctxt->instate = state;
1.135 daniel 4270: return;
4271: }
4272: }
1.152 daniel 4273: COPY_BUF(l,buf,len,cur);
4274: NEXTL(l);
4275: cur = CUR_CHAR(l);
1.135 daniel 4276: if (cur == 0) {
4277: SHRINK;
4278: GROW;
1.152 daniel 4279: cur = CUR_CHAR(l);
1.135 daniel 4280: }
4281: }
4282: buf[len] = 0;
1.152 daniel 4283: if (cur != '?') {
1.72 daniel 4284: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4285: ctxt->sax->error(ctxt->userData,
1.72 daniel 4286: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4287: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4288: ctxt->wellFormed = 0;
1.22 daniel 4289: } else {
1.72 daniel 4290: SKIP(2);
1.44 daniel 4291:
1.72 daniel 4292: /*
4293: * SAX: PI detected.
4294: */
1.171 daniel 4295: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4296: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4297: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4298: target, buf);
1.22 daniel 4299: }
1.135 daniel 4300: xmlFree(buf);
1.119 daniel 4301: xmlFree(target);
1.3 veillard 4302: } else {
1.55 daniel 4303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4304: ctxt->sax->error(ctxt->userData,
4305: "xmlParsePI : no target name\n");
1.123 daniel 4306: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4307: ctxt->wellFormed = 0;
1.22 daniel 4308: }
1.140 daniel 4309: ctxt->instate = state;
1.22 daniel 4310: }
4311: }
4312:
1.50 daniel 4313: /**
4314: * xmlParseNotationDecl:
4315: * @ctxt: an XML parser context
4316: *
4317: * parse a notation declaration
1.22 daniel 4318: *
4319: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4320: *
4321: * Hence there is actually 3 choices:
4322: * 'PUBLIC' S PubidLiteral
4323: * 'PUBLIC' S PubidLiteral S SystemLiteral
4324: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4325: *
1.67 daniel 4326: * See the NOTE on xmlParseExternalID().
1.22 daniel 4327: */
4328:
1.55 daniel 4329: void
4330: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4331: xmlChar *name;
4332: xmlChar *Pubid;
4333: xmlChar *Systemid;
1.22 daniel 4334:
1.152 daniel 4335: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4336: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4337: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4338: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4339: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 4340: SHRINK;
1.40 daniel 4341: SKIP(10);
1.67 daniel 4342: if (!IS_BLANK(CUR)) {
4343: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4344: ctxt->sax->error(ctxt->userData,
4345: "Space required after '<!NOTATION'\n");
1.123 daniel 4346: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4347: ctxt->wellFormed = 0;
4348: return;
4349: }
4350: SKIP_BLANKS;
1.22 daniel 4351:
4352: name = xmlParseName(ctxt);
4353: if (name == NULL) {
1.55 daniel 4354: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4355: ctxt->sax->error(ctxt->userData,
4356: "NOTATION: Name expected here\n");
1.123 daniel 4357: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4358: ctxt->wellFormed = 0;
4359: return;
4360: }
4361: if (!IS_BLANK(CUR)) {
4362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4363: ctxt->sax->error(ctxt->userData,
1.67 daniel 4364: "Space required after the NOTATION name'\n");
1.123 daniel 4365: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4366: ctxt->wellFormed = 0;
1.22 daniel 4367: return;
4368: }
1.42 daniel 4369: SKIP_BLANKS;
1.67 daniel 4370:
1.22 daniel 4371: /*
1.67 daniel 4372: * Parse the IDs.
1.22 daniel 4373: */
1.160 daniel 4374: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4375: SKIP_BLANKS;
4376:
1.152 daniel 4377: if (RAW == '>') {
1.40 daniel 4378: NEXT;
1.171 daniel 4379: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4380: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4381: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4382: } else {
4383: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4384: ctxt->sax->error(ctxt->userData,
1.67 daniel 4385: "'>' required to close NOTATION declaration\n");
1.123 daniel 4386: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4387: ctxt->wellFormed = 0;
4388: }
1.119 daniel 4389: xmlFree(name);
4390: if (Systemid != NULL) xmlFree(Systemid);
4391: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4392: }
4393: }
4394:
1.50 daniel 4395: /**
4396: * xmlParseEntityDecl:
4397: * @ctxt: an XML parser context
4398: *
4399: * parse <!ENTITY declarations
1.22 daniel 4400: *
4401: * [70] EntityDecl ::= GEDecl | PEDecl
4402: *
4403: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4404: *
4405: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4406: *
4407: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4408: *
4409: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4410: *
4411: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4412: *
4413: * [ VC: Notation Declared ]
1.116 daniel 4414: * The Name must match the declared name of a notation.
1.22 daniel 4415: */
4416:
1.55 daniel 4417: void
4418: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4419: xmlChar *name = NULL;
4420: xmlChar *value = NULL;
4421: xmlChar *URI = NULL, *literal = NULL;
4422: xmlChar *ndata = NULL;
1.39 daniel 4423: int isParameter = 0;
1.123 daniel 4424: xmlChar *orig = NULL;
1.22 daniel 4425:
1.94 daniel 4426: GROW;
1.152 daniel 4427: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4428: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4429: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4430: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 4431: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4432: SHRINK;
1.40 daniel 4433: SKIP(8);
1.59 daniel 4434: if (!IS_BLANK(CUR)) {
4435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4436: ctxt->sax->error(ctxt->userData,
4437: "Space required after '<!ENTITY'\n");
1.123 daniel 4438: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4439: ctxt->wellFormed = 0;
4440: }
4441: SKIP_BLANKS;
1.40 daniel 4442:
1.152 daniel 4443: if (RAW == '%') {
1.40 daniel 4444: NEXT;
1.59 daniel 4445: if (!IS_BLANK(CUR)) {
4446: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4447: ctxt->sax->error(ctxt->userData,
4448: "Space required after '%'\n");
1.123 daniel 4449: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4450: ctxt->wellFormed = 0;
4451: }
1.42 daniel 4452: SKIP_BLANKS;
1.39 daniel 4453: isParameter = 1;
1.22 daniel 4454: }
4455:
4456: name = xmlParseName(ctxt);
1.24 daniel 4457: if (name == NULL) {
1.55 daniel 4458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4459: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4460: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4461: ctxt->wellFormed = 0;
1.24 daniel 4462: return;
4463: }
1.59 daniel 4464: if (!IS_BLANK(CUR)) {
4465: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4466: ctxt->sax->error(ctxt->userData,
1.59 daniel 4467: "Space required after the entity name\n");
1.123 daniel 4468: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4469: ctxt->wellFormed = 0;
4470: }
1.42 daniel 4471: SKIP_BLANKS;
1.24 daniel 4472:
1.22 daniel 4473: /*
1.68 daniel 4474: * handle the various case of definitions...
1.22 daniel 4475: */
1.39 daniel 4476: if (isParameter) {
1.152 daniel 4477: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4478: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4479: if (value) {
1.171 daniel 4480: if ((ctxt->sax != NULL) &&
4481: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4482: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4483: XML_INTERNAL_PARAMETER_ENTITY,
4484: NULL, NULL, value);
4485: }
1.24 daniel 4486: else {
1.67 daniel 4487: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4488: if ((URI == NULL) && (literal == NULL)) {
4489: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4490: ctxt->sax->error(ctxt->userData,
4491: "Entity value required\n");
4492: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4493: ctxt->wellFormed = 0;
4494: }
1.39 daniel 4495: if (URI) {
1.171 daniel 4496: if ((ctxt->sax != NULL) &&
4497: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4498: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4499: XML_EXTERNAL_PARAMETER_ENTITY,
4500: literal, URI, NULL);
4501: }
1.24 daniel 4502: }
4503: } else {
1.152 daniel 4504: if ((RAW == '"') || (RAW == '\'')) {
1.176 daniel 4505: xmlNodePtr list = NULL;
4506: xmlEntityPtr ent;
4507: int ret;
4508:
1.78 daniel 4509: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4510: if ((ctxt->sax != NULL) &&
4511: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4512: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4513: XML_INTERNAL_GENERAL_ENTITY,
4514: NULL, NULL, value);
1.176 daniel 4515:
4516: /*
4517: * Check that this entity is well formed
4518: */
4519: if ((value[1] == 0) &&
4520: (value[0] == '<') && (!xmlStrcmp(name, BAD_CAST "lt"))) {
4521: /*
4522: * TODO: get definite answer on this !!!
4523: * Lots of entity decls are used to declare a single
4524: * char
4525: * <!ENTITY lt "<">
4526: * Which seems to be valid since
4527: * 2.4: The ampersand character (&) and the left angle
4528: * bracket (<) may appear in their literal form only
4529: * when used ... They are also legal within the literal
4530: * entity value of an internal entity declaration;i
4531: * see "4.3.2 Well-Formed Parsed Entities".
4532: * IMHO 2.4 and 4.3.2 are directly in contradiction.
4533: * Looking at the OASIS test suite and James Clark
4534: * tests, this is broken. However the XML REC uses
4535: * it. Is the XML REC not well-formed ????
4536: * This is a hack to avoid this problem
4537: */
4538: list = xmlNewDocText(ctxt->myDoc, value);
4539: if ((ctxt->sax != NULL) &&
4540: (ctxt->sax->getEntity != NULL) && (list != NULL)) {
4541:
4542: ent = ctxt->sax->getEntity(ctxt->userData, name);
4543: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4544: (ent->children == NULL)) {
4545: ent->children = list;
4546: ent->last = list;
4547: list->parent = (xmlNodePtr) ent;
4548: } else {
4549: xmlFreeNodeList(list);
4550: }
4551: } else if (list != NULL) {
4552: xmlFreeNodeList(list);
4553: }
4554: } else {
4555: /*
4556: * 4.3.2: An internal general parsed entity is well-formed
4557: * if its replacement text matches the production labeled
4558: * content.
4559: */
4560: ret = xmlParseBalancedChunkMemory(ctxt->myDoc, ctxt->sax,
4561: NULL, value, &list);
4562: if ((ret == 0) && (ctxt->sax != NULL) &&
4563: (ctxt->sax->getEntity != NULL) && (list != NULL)) {
4564:
4565: ent = ctxt->sax->getEntity(ctxt->userData, name);
4566: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4567: (ent->children == NULL)) {
4568: ent->children = list;
4569: while (list != NULL) {
4570: list->parent = (xmlNodePtr) ent;
4571: if (list->next == NULL)
4572: ent->last = list;
4573: list = list->next;
4574: }
4575: } else {
4576: xmlFreeNodeList(list);
4577: }
4578: } else if (ret > 0) {
4579: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4580: ctxt->sax->error(ctxt->userData,
4581: "Entity value required\n");
4582: ctxt->errNo = ret;
4583: ctxt->wellFormed = 0;
4584: } else if (list != NULL) {
4585: xmlFreeNodeList(list);
4586: }
4587: }
1.39 daniel 4588: } else {
1.67 daniel 4589: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4590: if ((URI == NULL) && (literal == NULL)) {
4591: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4592: ctxt->sax->error(ctxt->userData,
4593: "Entity value required\n");
4594: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4595: ctxt->wellFormed = 0;
4596: }
1.152 daniel 4597: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4598: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4599: ctxt->sax->error(ctxt->userData,
1.59 daniel 4600: "Space required before 'NDATA'\n");
1.123 daniel 4601: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4602: ctxt->wellFormed = 0;
4603: }
1.42 daniel 4604: SKIP_BLANKS;
1.152 daniel 4605: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4606: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4607: (NXT(4) == 'A')) {
4608: SKIP(5);
1.59 daniel 4609: if (!IS_BLANK(CUR)) {
4610: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4611: ctxt->sax->error(ctxt->userData,
1.59 daniel 4612: "Space required after 'NDATA'\n");
1.123 daniel 4613: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4614: ctxt->wellFormed = 0;
4615: }
1.42 daniel 4616: SKIP_BLANKS;
1.24 daniel 4617: ndata = xmlParseName(ctxt);
1.171 daniel 4618: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4619: (ctxt->sax->unparsedEntityDecl != NULL))
4620: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4621: literal, URI, ndata);
4622: } else {
1.171 daniel 4623: if ((ctxt->sax != NULL) &&
4624: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4625: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4626: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4627: literal, URI, NULL);
1.24 daniel 4628: }
4629: }
4630: }
1.42 daniel 4631: SKIP_BLANKS;
1.152 daniel 4632: if (RAW != '>') {
1.55 daniel 4633: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4634: ctxt->sax->error(ctxt->userData,
1.31 daniel 4635: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4636: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4637: ctxt->wellFormed = 0;
1.24 daniel 4638: } else
1.40 daniel 4639: NEXT;
1.78 daniel 4640: if (orig != NULL) {
4641: /*
1.98 daniel 4642: * Ugly mechanism to save the raw entity value.
1.78 daniel 4643: */
4644: xmlEntityPtr cur = NULL;
4645:
1.98 daniel 4646: if (isParameter) {
4647: if ((ctxt->sax != NULL) &&
4648: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4649: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4650: } else {
4651: if ((ctxt->sax != NULL) &&
4652: (ctxt->sax->getEntity != NULL))
1.120 daniel 4653: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4654: }
4655: if (cur != NULL) {
4656: if (cur->orig != NULL)
1.119 daniel 4657: xmlFree(orig);
1.98 daniel 4658: else
4659: cur->orig = orig;
4660: } else
1.119 daniel 4661: xmlFree(orig);
1.78 daniel 4662: }
1.119 daniel 4663: if (name != NULL) xmlFree(name);
4664: if (value != NULL) xmlFree(value);
4665: if (URI != NULL) xmlFree(URI);
4666: if (literal != NULL) xmlFree(literal);
4667: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4668: }
4669: }
4670:
1.50 daniel 4671: /**
1.59 daniel 4672: * xmlParseDefaultDecl:
4673: * @ctxt: an XML parser context
4674: * @value: Receive a possible fixed default value for the attribute
4675: *
4676: * Parse an attribute default declaration
4677: *
4678: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4679: *
1.99 daniel 4680: * [ VC: Required Attribute ]
1.117 daniel 4681: * if the default declaration is the keyword #REQUIRED, then the
4682: * attribute must be specified for all elements of the type in the
4683: * attribute-list declaration.
1.99 daniel 4684: *
4685: * [ VC: Attribute Default Legal ]
1.102 daniel 4686: * The declared default value must meet the lexical constraints of
4687: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4688: *
4689: * [ VC: Fixed Attribute Default ]
1.117 daniel 4690: * if an attribute has a default value declared with the #FIXED
4691: * keyword, instances of that attribute must match the default value.
1.99 daniel 4692: *
4693: * [ WFC: No < in Attribute Values ]
4694: * handled in xmlParseAttValue()
4695: *
1.59 daniel 4696: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4697: * or XML_ATTRIBUTE_FIXED.
4698: */
4699:
4700: int
1.123 daniel 4701: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4702: int val;
1.123 daniel 4703: xmlChar *ret;
1.59 daniel 4704:
4705: *value = NULL;
1.152 daniel 4706: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4707: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4708: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4709: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4710: (NXT(8) == 'D')) {
4711: SKIP(9);
4712: return(XML_ATTRIBUTE_REQUIRED);
4713: }
1.152 daniel 4714: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4715: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4716: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4717: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4718: SKIP(8);
4719: return(XML_ATTRIBUTE_IMPLIED);
4720: }
4721: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4722: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4723: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4724: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4725: SKIP(6);
4726: val = XML_ATTRIBUTE_FIXED;
4727: if (!IS_BLANK(CUR)) {
4728: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4729: ctxt->sax->error(ctxt->userData,
4730: "Space required after '#FIXED'\n");
1.123 daniel 4731: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4732: ctxt->wellFormed = 0;
4733: }
4734: SKIP_BLANKS;
4735: }
4736: ret = xmlParseAttValue(ctxt);
1.96 daniel 4737: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4738: if (ret == NULL) {
4739: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4740: ctxt->sax->error(ctxt->userData,
1.59 daniel 4741: "Attribute default value declaration error\n");
4742: ctxt->wellFormed = 0;
4743: } else
4744: *value = ret;
4745: return(val);
4746: }
4747:
4748: /**
1.66 daniel 4749: * xmlParseNotationType:
4750: * @ctxt: an XML parser context
4751: *
4752: * parse an Notation attribute type.
4753: *
1.99 daniel 4754: * Note: the leading 'NOTATION' S part has already being parsed...
4755: *
1.66 daniel 4756: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4757: *
1.99 daniel 4758: * [ VC: Notation Attributes ]
1.117 daniel 4759: * Values of this type must match one of the notation names included
1.99 daniel 4760: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4761: *
4762: * Returns: the notation attribute tree built while parsing
4763: */
4764:
4765: xmlEnumerationPtr
4766: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4767: xmlChar *name;
1.66 daniel 4768: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4769:
1.152 daniel 4770: if (RAW != '(') {
1.66 daniel 4771: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4772: ctxt->sax->error(ctxt->userData,
4773: "'(' required to start 'NOTATION'\n");
1.123 daniel 4774: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4775: ctxt->wellFormed = 0;
4776: return(NULL);
4777: }
1.91 daniel 4778: SHRINK;
1.66 daniel 4779: do {
4780: NEXT;
4781: SKIP_BLANKS;
4782: name = xmlParseName(ctxt);
4783: if (name == NULL) {
4784: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4785: ctxt->sax->error(ctxt->userData,
1.66 daniel 4786: "Name expected in NOTATION declaration\n");
1.123 daniel 4787: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4788: ctxt->wellFormed = 0;
4789: return(ret);
4790: }
4791: cur = xmlCreateEnumeration(name);
1.119 daniel 4792: xmlFree(name);
1.66 daniel 4793: if (cur == NULL) return(ret);
4794: if (last == NULL) ret = last = cur;
4795: else {
4796: last->next = cur;
4797: last = cur;
4798: }
4799: SKIP_BLANKS;
1.152 daniel 4800: } while (RAW == '|');
4801: if (RAW != ')') {
1.66 daniel 4802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4803: ctxt->sax->error(ctxt->userData,
1.66 daniel 4804: "')' required to finish NOTATION declaration\n");
1.123 daniel 4805: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 4806: ctxt->wellFormed = 0;
1.170 daniel 4807: if ((last != NULL) && (last != ret))
4808: xmlFreeEnumeration(last);
1.66 daniel 4809: return(ret);
4810: }
4811: NEXT;
4812: return(ret);
4813: }
4814:
4815: /**
4816: * xmlParseEnumerationType:
4817: * @ctxt: an XML parser context
4818: *
4819: * parse an Enumeration attribute type.
4820: *
4821: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4822: *
1.99 daniel 4823: * [ VC: Enumeration ]
1.117 daniel 4824: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 4825: * the declaration
4826: *
1.66 daniel 4827: * Returns: the enumeration attribute tree built while parsing
4828: */
4829:
4830: xmlEnumerationPtr
4831: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4832: xmlChar *name;
1.66 daniel 4833: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4834:
1.152 daniel 4835: if (RAW != '(') {
1.66 daniel 4836: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4837: ctxt->sax->error(ctxt->userData,
1.66 daniel 4838: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 4839: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 4840: ctxt->wellFormed = 0;
4841: return(NULL);
4842: }
1.91 daniel 4843: SHRINK;
1.66 daniel 4844: do {
4845: NEXT;
4846: SKIP_BLANKS;
4847: name = xmlParseNmtoken(ctxt);
4848: if (name == NULL) {
4849: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4850: ctxt->sax->error(ctxt->userData,
1.66 daniel 4851: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 4852: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 4853: ctxt->wellFormed = 0;
4854: return(ret);
4855: }
4856: cur = xmlCreateEnumeration(name);
1.119 daniel 4857: xmlFree(name);
1.66 daniel 4858: if (cur == NULL) return(ret);
4859: if (last == NULL) ret = last = cur;
4860: else {
4861: last->next = cur;
4862: last = cur;
4863: }
4864: SKIP_BLANKS;
1.152 daniel 4865: } while (RAW == '|');
4866: if (RAW != ')') {
1.66 daniel 4867: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4868: ctxt->sax->error(ctxt->userData,
1.66 daniel 4869: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 4870: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 4871: ctxt->wellFormed = 0;
4872: return(ret);
4873: }
4874: NEXT;
4875: return(ret);
4876: }
4877:
4878: /**
1.50 daniel 4879: * xmlParseEnumeratedType:
4880: * @ctxt: an XML parser context
1.66 daniel 4881: * @tree: the enumeration tree built while parsing
1.50 daniel 4882: *
1.66 daniel 4883: * parse an Enumerated attribute type.
1.22 daniel 4884: *
4885: * [57] EnumeratedType ::= NotationType | Enumeration
4886: *
4887: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4888: *
1.50 daniel 4889: *
1.66 daniel 4890: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 4891: */
4892:
1.66 daniel 4893: int
4894: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 4895: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 4896: (NXT(2) == 'T') && (NXT(3) == 'A') &&
4897: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4898: (NXT(6) == 'O') && (NXT(7) == 'N')) {
4899: SKIP(8);
4900: if (!IS_BLANK(CUR)) {
4901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4902: ctxt->sax->error(ctxt->userData,
4903: "Space required after 'NOTATION'\n");
1.123 daniel 4904: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 4905: ctxt->wellFormed = 0;
4906: return(0);
4907: }
4908: SKIP_BLANKS;
4909: *tree = xmlParseNotationType(ctxt);
4910: if (*tree == NULL) return(0);
4911: return(XML_ATTRIBUTE_NOTATION);
4912: }
4913: *tree = xmlParseEnumerationType(ctxt);
4914: if (*tree == NULL) return(0);
4915: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 4916: }
4917:
1.50 daniel 4918: /**
4919: * xmlParseAttributeType:
4920: * @ctxt: an XML parser context
1.66 daniel 4921: * @tree: the enumeration tree built while parsing
1.50 daniel 4922: *
1.59 daniel 4923: * parse the Attribute list def for an element
1.22 daniel 4924: *
4925: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4926: *
4927: * [55] StringType ::= 'CDATA'
4928: *
4929: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4930: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 4931: *
1.102 daniel 4932: * Validity constraints for attribute values syntax are checked in
4933: * xmlValidateAttributeValue()
4934: *
1.99 daniel 4935: * [ VC: ID ]
1.117 daniel 4936: * Values of type ID must match the Name production. A name must not
1.99 daniel 4937: * appear more than once in an XML document as a value of this type;
4938: * i.e., ID values must uniquely identify the elements which bear them.
4939: *
4940: * [ VC: One ID per Element Type ]
1.117 daniel 4941: * No element type may have more than one ID attribute specified.
1.99 daniel 4942: *
4943: * [ VC: ID Attribute Default ]
1.117 daniel 4944: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 4945: *
4946: * [ VC: IDREF ]
1.102 daniel 4947: * Values of type IDREF must match the Name production, and values
1.140 daniel 4948: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 4949: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 4950: * values must match the value of some ID attribute.
4951: *
4952: * [ VC: Entity Name ]
1.102 daniel 4953: * Values of type ENTITY must match the Name production, values
1.140 daniel 4954: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 4955: * name of an unparsed entity declared in the DTD.
1.99 daniel 4956: *
4957: * [ VC: Name Token ]
1.102 daniel 4958: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 4959: * of type NMTOKENS must match Nmtokens.
4960: *
1.69 daniel 4961: * Returns the attribute type
1.22 daniel 4962: */
1.59 daniel 4963: int
1.66 daniel 4964: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 4965: SHRINK;
1.152 daniel 4966: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 4967: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4968: (NXT(4) == 'A')) {
4969: SKIP(5);
1.66 daniel 4970: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 4971: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 4972: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 4973: (NXT(4) == 'F') && (NXT(5) == 'S')) {
4974: SKIP(6);
4975: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 4976: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 4977: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 4978: (NXT(4) == 'F')) {
4979: SKIP(5);
1.59 daniel 4980: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 4981: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 4982: SKIP(2);
4983: return(XML_ATTRIBUTE_ID);
1.152 daniel 4984: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4985: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4986: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4987: SKIP(6);
1.59 daniel 4988: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 4989: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4990: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4991: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4992: (NXT(6) == 'E') && (NXT(7) == 'S')) {
4993: SKIP(8);
1.59 daniel 4994: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 4995: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 4996: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4997: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 4998: (NXT(6) == 'N') && (NXT(7) == 'S')) {
4999: SKIP(8);
5000: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5001: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5002: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5003: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5004: (NXT(6) == 'N')) {
5005: SKIP(7);
1.59 daniel 5006: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5007: }
1.66 daniel 5008: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5009: }
5010:
1.50 daniel 5011: /**
5012: * xmlParseAttributeListDecl:
5013: * @ctxt: an XML parser context
5014: *
5015: * : parse the Attribute list def for an element
1.22 daniel 5016: *
5017: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5018: *
5019: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5020: *
1.22 daniel 5021: */
1.55 daniel 5022: void
5023: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5024: xmlChar *elemName;
5025: xmlChar *attrName;
1.103 daniel 5026: xmlEnumerationPtr tree;
1.22 daniel 5027:
1.152 daniel 5028: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5029: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5030: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5031: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5032: (NXT(8) == 'T')) {
1.40 daniel 5033: SKIP(9);
1.59 daniel 5034: if (!IS_BLANK(CUR)) {
5035: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5036: ctxt->sax->error(ctxt->userData,
5037: "Space required after '<!ATTLIST'\n");
1.123 daniel 5038: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5039: ctxt->wellFormed = 0;
5040: }
1.42 daniel 5041: SKIP_BLANKS;
1.59 daniel 5042: elemName = xmlParseName(ctxt);
5043: if (elemName == NULL) {
1.55 daniel 5044: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5045: ctxt->sax->error(ctxt->userData,
5046: "ATTLIST: no name for Element\n");
1.123 daniel 5047: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5048: ctxt->wellFormed = 0;
1.22 daniel 5049: return;
5050: }
1.42 daniel 5051: SKIP_BLANKS;
1.152 daniel 5052: while (RAW != '>') {
1.123 daniel 5053: const xmlChar *check = CUR_PTR;
1.59 daniel 5054: int type;
5055: int def;
1.123 daniel 5056: xmlChar *defaultValue = NULL;
1.59 daniel 5057:
1.103 daniel 5058: tree = NULL;
1.59 daniel 5059: attrName = xmlParseName(ctxt);
5060: if (attrName == NULL) {
5061: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5062: ctxt->sax->error(ctxt->userData,
5063: "ATTLIST: no name for Attribute\n");
1.123 daniel 5064: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5065: ctxt->wellFormed = 0;
5066: break;
5067: }
1.97 daniel 5068: GROW;
1.59 daniel 5069: if (!IS_BLANK(CUR)) {
5070: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5071: ctxt->sax->error(ctxt->userData,
1.59 daniel 5072: "Space required after the attribute name\n");
1.123 daniel 5073: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5074: ctxt->wellFormed = 0;
1.170 daniel 5075: if (attrName != NULL)
5076: xmlFree(attrName);
5077: if (defaultValue != NULL)
5078: xmlFree(defaultValue);
1.59 daniel 5079: break;
5080: }
5081: SKIP_BLANKS;
5082:
1.66 daniel 5083: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5084: if (type <= 0) {
5085: if (attrName != NULL)
5086: xmlFree(attrName);
5087: if (defaultValue != NULL)
5088: xmlFree(defaultValue);
5089: break;
5090: }
1.22 daniel 5091:
1.97 daniel 5092: GROW;
1.59 daniel 5093: if (!IS_BLANK(CUR)) {
5094: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5095: ctxt->sax->error(ctxt->userData,
1.59 daniel 5096: "Space required after the attribute type\n");
1.123 daniel 5097: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5098: ctxt->wellFormed = 0;
1.170 daniel 5099: if (attrName != NULL)
5100: xmlFree(attrName);
5101: if (defaultValue != NULL)
5102: xmlFree(defaultValue);
5103: if (tree != NULL)
5104: xmlFreeEnumeration(tree);
1.59 daniel 5105: break;
5106: }
1.42 daniel 5107: SKIP_BLANKS;
1.59 daniel 5108:
5109: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5110: if (def <= 0) {
5111: if (attrName != NULL)
5112: xmlFree(attrName);
5113: if (defaultValue != NULL)
5114: xmlFree(defaultValue);
5115: if (tree != NULL)
5116: xmlFreeEnumeration(tree);
5117: break;
5118: }
1.59 daniel 5119:
1.97 daniel 5120: GROW;
1.152 daniel 5121: if (RAW != '>') {
1.59 daniel 5122: if (!IS_BLANK(CUR)) {
5123: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5124: ctxt->sax->error(ctxt->userData,
1.59 daniel 5125: "Space required after the attribute default value\n");
1.123 daniel 5126: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5127: ctxt->wellFormed = 0;
1.170 daniel 5128: if (attrName != NULL)
5129: xmlFree(attrName);
5130: if (defaultValue != NULL)
5131: xmlFree(defaultValue);
5132: if (tree != NULL)
5133: xmlFreeEnumeration(tree);
1.59 daniel 5134: break;
5135: }
5136: SKIP_BLANKS;
5137: }
1.40 daniel 5138: if (check == CUR_PTR) {
1.55 daniel 5139: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5140: ctxt->sax->error(ctxt->userData,
1.59 daniel 5141: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5142: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5143: if (attrName != NULL)
5144: xmlFree(attrName);
5145: if (defaultValue != NULL)
5146: xmlFree(defaultValue);
5147: if (tree != NULL)
5148: xmlFreeEnumeration(tree);
1.22 daniel 5149: break;
5150: }
1.171 daniel 5151: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5152: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5153: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5154: type, def, defaultValue, tree);
1.59 daniel 5155: if (attrName != NULL)
1.119 daniel 5156: xmlFree(attrName);
1.59 daniel 5157: if (defaultValue != NULL)
1.119 daniel 5158: xmlFree(defaultValue);
1.97 daniel 5159: GROW;
1.22 daniel 5160: }
1.152 daniel 5161: if (RAW == '>')
1.40 daniel 5162: NEXT;
1.22 daniel 5163:
1.119 daniel 5164: xmlFree(elemName);
1.22 daniel 5165: }
5166: }
5167:
1.50 daniel 5168: /**
1.61 daniel 5169: * xmlParseElementMixedContentDecl:
5170: * @ctxt: an XML parser context
5171: *
5172: * parse the declaration for a Mixed Element content
5173: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5174: *
5175: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5176: * '(' S? '#PCDATA' S? ')'
5177: *
1.99 daniel 5178: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5179: *
5180: * [ VC: No Duplicate Types ]
1.117 daniel 5181: * The same name must not appear more than once in a single
5182: * mixed-content declaration.
1.99 daniel 5183: *
1.61 daniel 5184: * returns: the list of the xmlElementContentPtr describing the element choices
5185: */
5186: xmlElementContentPtr
1.62 daniel 5187: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5188: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5189: xmlChar *elem = NULL;
1.61 daniel 5190:
1.97 daniel 5191: GROW;
1.152 daniel 5192: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5193: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5194: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5195: (NXT(6) == 'A')) {
5196: SKIP(7);
5197: SKIP_BLANKS;
1.91 daniel 5198: SHRINK;
1.152 daniel 5199: if (RAW == ')') {
1.63 daniel 5200: NEXT;
5201: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5202: if (RAW == '*') {
1.136 daniel 5203: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5204: NEXT;
5205: }
1.63 daniel 5206: return(ret);
5207: }
1.152 daniel 5208: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5209: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5210: if (ret == NULL) return(NULL);
1.99 daniel 5211: }
1.152 daniel 5212: while (RAW == '|') {
1.64 daniel 5213: NEXT;
1.61 daniel 5214: if (elem == NULL) {
5215: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5216: if (ret == NULL) return(NULL);
5217: ret->c1 = cur;
1.64 daniel 5218: cur = ret;
1.61 daniel 5219: } else {
1.64 daniel 5220: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5221: if (n == NULL) return(NULL);
5222: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5223: cur->c2 = n;
5224: cur = n;
1.119 daniel 5225: xmlFree(elem);
1.61 daniel 5226: }
5227: SKIP_BLANKS;
5228: elem = xmlParseName(ctxt);
5229: if (elem == NULL) {
5230: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5231: ctxt->sax->error(ctxt->userData,
1.61 daniel 5232: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5233: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5234: ctxt->wellFormed = 0;
5235: xmlFreeElementContent(cur);
5236: return(NULL);
5237: }
5238: SKIP_BLANKS;
1.97 daniel 5239: GROW;
1.61 daniel 5240: }
1.152 daniel 5241: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5242: if (elem != NULL) {
1.61 daniel 5243: cur->c2 = xmlNewElementContent(elem,
5244: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5245: xmlFree(elem);
1.66 daniel 5246: }
1.65 daniel 5247: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 5248: SKIP(2);
1.61 daniel 5249: } else {
1.119 daniel 5250: if (elem != NULL) xmlFree(elem);
1.61 daniel 5251: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5252: ctxt->sax->error(ctxt->userData,
1.63 daniel 5253: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5254: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5255: ctxt->wellFormed = 0;
5256: xmlFreeElementContent(ret);
5257: return(NULL);
5258: }
5259:
5260: } else {
5261: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5262: ctxt->sax->error(ctxt->userData,
1.61 daniel 5263: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5264: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5265: ctxt->wellFormed = 0;
5266: }
5267: return(ret);
5268: }
5269:
5270: /**
5271: * xmlParseElementChildrenContentDecl:
1.50 daniel 5272: * @ctxt: an XML parser context
5273: *
1.61 daniel 5274: * parse the declaration for a Mixed Element content
5275: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5276: *
1.61 daniel 5277: *
1.22 daniel 5278: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5279: *
5280: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5281: *
5282: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5283: *
5284: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5285: *
1.99 daniel 5286: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5287: * TODO Parameter-entity replacement text must be properly nested
5288: * with parenthetized groups. That is to say, if either of the
5289: * opening or closing parentheses in a choice, seq, or Mixed
5290: * construct is contained in the replacement text for a parameter
5291: * entity, both must be contained in the same replacement text. For
5292: * interoperability, if a parameter-entity reference appears in a
5293: * choice, seq, or Mixed construct, its replacement text should not
5294: * be empty, and neither the first nor last non-blank character of
5295: * the replacement text should be a connector (| or ,).
5296: *
1.62 daniel 5297: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5298: * hierarchy.
5299: */
5300: xmlElementContentPtr
1.62 daniel 5301: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5302: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5303: xmlChar *elem;
5304: xmlChar type = 0;
1.62 daniel 5305:
5306: SKIP_BLANKS;
1.94 daniel 5307: GROW;
1.152 daniel 5308: if (RAW == '(') {
1.63 daniel 5309: /* Recurse on first child */
1.62 daniel 5310: NEXT;
5311: SKIP_BLANKS;
5312: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5313: SKIP_BLANKS;
1.101 daniel 5314: GROW;
1.62 daniel 5315: } else {
5316: elem = xmlParseName(ctxt);
5317: if (elem == NULL) {
5318: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5319: ctxt->sax->error(ctxt->userData,
1.62 daniel 5320: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5321: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5322: ctxt->wellFormed = 0;
5323: return(NULL);
5324: }
5325: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5326: GROW;
1.152 daniel 5327: if (RAW == '?') {
1.104 daniel 5328: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5329: NEXT;
1.152 daniel 5330: } else if (RAW == '*') {
1.104 daniel 5331: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5332: NEXT;
1.152 daniel 5333: } else if (RAW == '+') {
1.104 daniel 5334: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5335: NEXT;
5336: } else {
1.104 daniel 5337: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5338: }
1.119 daniel 5339: xmlFree(elem);
1.101 daniel 5340: GROW;
1.62 daniel 5341: }
5342: SKIP_BLANKS;
1.91 daniel 5343: SHRINK;
1.152 daniel 5344: while (RAW != ')') {
1.63 daniel 5345: /*
5346: * Each loop we parse one separator and one element.
5347: */
1.152 daniel 5348: if (RAW == ',') {
1.62 daniel 5349: if (type == 0) type = CUR;
5350:
5351: /*
5352: * Detect "Name | Name , Name" error
5353: */
5354: else if (type != CUR) {
5355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5356: ctxt->sax->error(ctxt->userData,
1.62 daniel 5357: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5358: type);
1.123 daniel 5359: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5360: ctxt->wellFormed = 0;
1.170 daniel 5361: if ((op != NULL) && (op != ret))
5362: xmlFreeElementContent(op);
5363: if ((last != NULL) && (last != ret))
5364: xmlFreeElementContent(last);
5365: if (ret != NULL)
5366: xmlFreeElementContent(ret);
1.62 daniel 5367: return(NULL);
5368: }
1.64 daniel 5369: NEXT;
1.62 daniel 5370:
1.63 daniel 5371: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5372: if (op == NULL) {
5373: xmlFreeElementContent(ret);
5374: return(NULL);
5375: }
5376: if (last == NULL) {
5377: op->c1 = ret;
1.65 daniel 5378: ret = cur = op;
1.63 daniel 5379: } else {
5380: cur->c2 = op;
5381: op->c1 = last;
5382: cur =op;
1.65 daniel 5383: last = NULL;
1.63 daniel 5384: }
1.152 daniel 5385: } else if (RAW == '|') {
1.62 daniel 5386: if (type == 0) type = CUR;
5387:
5388: /*
1.63 daniel 5389: * Detect "Name , Name | Name" error
1.62 daniel 5390: */
5391: else if (type != CUR) {
5392: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5393: ctxt->sax->error(ctxt->userData,
1.62 daniel 5394: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5395: type);
1.123 daniel 5396: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5397: ctxt->wellFormed = 0;
1.170 daniel 5398: if ((op != NULL) && (op != ret))
5399: xmlFreeElementContent(op);
5400: if ((last != NULL) && (last != ret))
5401: xmlFreeElementContent(last);
5402: if (ret != NULL)
5403: xmlFreeElementContent(ret);
1.62 daniel 5404: return(NULL);
5405: }
1.64 daniel 5406: NEXT;
1.62 daniel 5407:
1.63 daniel 5408: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5409: if (op == NULL) {
1.170 daniel 5410: if ((op != NULL) && (op != ret))
5411: xmlFreeElementContent(op);
5412: if ((last != NULL) && (last != ret))
5413: xmlFreeElementContent(last);
5414: if (ret != NULL)
5415: xmlFreeElementContent(ret);
1.63 daniel 5416: return(NULL);
5417: }
5418: if (last == NULL) {
5419: op->c1 = ret;
1.65 daniel 5420: ret = cur = op;
1.63 daniel 5421: } else {
5422: cur->c2 = op;
5423: op->c1 = last;
5424: cur =op;
1.65 daniel 5425: last = NULL;
1.63 daniel 5426: }
1.62 daniel 5427: } else {
5428: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5429: ctxt->sax->error(ctxt->userData,
1.62 daniel 5430: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5431: ctxt->wellFormed = 0;
1.123 daniel 5432: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5433: if ((op != NULL) && (op != ret))
5434: xmlFreeElementContent(op);
5435: if ((last != NULL) && (last != ret))
5436: xmlFreeElementContent(last);
5437: if (ret != NULL)
5438: xmlFreeElementContent(ret);
1.62 daniel 5439: return(NULL);
5440: }
1.101 daniel 5441: GROW;
1.62 daniel 5442: SKIP_BLANKS;
1.101 daniel 5443: GROW;
1.152 daniel 5444: if (RAW == '(') {
1.63 daniel 5445: /* Recurse on second child */
1.62 daniel 5446: NEXT;
5447: SKIP_BLANKS;
1.65 daniel 5448: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5449: SKIP_BLANKS;
5450: } else {
5451: elem = xmlParseName(ctxt);
5452: if (elem == NULL) {
5453: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5454: ctxt->sax->error(ctxt->userData,
1.122 daniel 5455: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5456: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5457: ctxt->wellFormed = 0;
1.170 daniel 5458: if ((op != NULL) && (op != ret))
5459: xmlFreeElementContent(op);
5460: if ((last != NULL) && (last != ret))
5461: xmlFreeElementContent(last);
5462: if (ret != NULL)
5463: xmlFreeElementContent(ret);
1.62 daniel 5464: return(NULL);
5465: }
1.65 daniel 5466: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5467: xmlFree(elem);
1.152 daniel 5468: if (RAW == '?') {
1.105 daniel 5469: last->ocur = XML_ELEMENT_CONTENT_OPT;
5470: NEXT;
1.152 daniel 5471: } else if (RAW == '*') {
1.105 daniel 5472: last->ocur = XML_ELEMENT_CONTENT_MULT;
5473: NEXT;
1.152 daniel 5474: } else if (RAW == '+') {
1.105 daniel 5475: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5476: NEXT;
5477: } else {
5478: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5479: }
1.63 daniel 5480: }
5481: SKIP_BLANKS;
1.97 daniel 5482: GROW;
1.64 daniel 5483: }
1.65 daniel 5484: if ((cur != NULL) && (last != NULL)) {
5485: cur->c2 = last;
1.62 daniel 5486: }
5487: NEXT;
1.152 daniel 5488: if (RAW == '?') {
1.62 daniel 5489: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5490: NEXT;
1.152 daniel 5491: } else if (RAW == '*') {
1.62 daniel 5492: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5493: NEXT;
1.152 daniel 5494: } else if (RAW == '+') {
1.62 daniel 5495: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5496: NEXT;
5497: }
5498: return(ret);
1.61 daniel 5499: }
5500:
5501: /**
5502: * xmlParseElementContentDecl:
5503: * @ctxt: an XML parser context
5504: * @name: the name of the element being defined.
5505: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5506: *
1.61 daniel 5507: * parse the declaration for an Element content either Mixed or Children,
5508: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5509: *
5510: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5511: *
1.61 daniel 5512: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5513: */
5514:
1.61 daniel 5515: int
1.123 daniel 5516: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5517: xmlElementContentPtr *result) {
5518:
5519: xmlElementContentPtr tree = NULL;
5520: int res;
5521:
5522: *result = NULL;
5523:
1.152 daniel 5524: if (RAW != '(') {
1.61 daniel 5525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5526: ctxt->sax->error(ctxt->userData,
1.61 daniel 5527: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5528: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5529: ctxt->wellFormed = 0;
5530: return(-1);
5531: }
5532: NEXT;
1.97 daniel 5533: GROW;
1.61 daniel 5534: SKIP_BLANKS;
1.152 daniel 5535: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5536: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5537: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5538: (NXT(6) == 'A')) {
1.62 daniel 5539: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5540: res = XML_ELEMENT_TYPE_MIXED;
5541: } else {
1.62 daniel 5542: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5543: res = XML_ELEMENT_TYPE_ELEMENT;
5544: }
5545: SKIP_BLANKS;
1.63 daniel 5546: /****************************
1.152 daniel 5547: if (RAW != ')') {
1.61 daniel 5548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5549: ctxt->sax->error(ctxt->userData,
1.61 daniel 5550: "xmlParseElementContentDecl : ')' expected\n");
5551: ctxt->wellFormed = 0;
5552: return(-1);
5553: }
1.63 daniel 5554: ****************************/
5555: *result = tree;
1.61 daniel 5556: return(res);
1.22 daniel 5557: }
5558:
1.50 daniel 5559: /**
5560: * xmlParseElementDecl:
5561: * @ctxt: an XML parser context
5562: *
5563: * parse an Element declaration.
1.22 daniel 5564: *
5565: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5566: *
1.99 daniel 5567: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5568: * No element type may be declared more than once
1.69 daniel 5569: *
5570: * Returns the type of the element, or -1 in case of error
1.22 daniel 5571: */
1.59 daniel 5572: int
1.55 daniel 5573: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5574: xmlChar *name;
1.59 daniel 5575: int ret = -1;
1.61 daniel 5576: xmlElementContentPtr content = NULL;
1.22 daniel 5577:
1.97 daniel 5578: GROW;
1.152 daniel 5579: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5580: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5581: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5582: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5583: (NXT(8) == 'T')) {
1.40 daniel 5584: SKIP(9);
1.59 daniel 5585: if (!IS_BLANK(CUR)) {
5586: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5587: ctxt->sax->error(ctxt->userData,
1.59 daniel 5588: "Space required after 'ELEMENT'\n");
1.123 daniel 5589: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5590: ctxt->wellFormed = 0;
5591: }
1.42 daniel 5592: SKIP_BLANKS;
1.22 daniel 5593: name = xmlParseName(ctxt);
5594: if (name == NULL) {
1.55 daniel 5595: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5596: ctxt->sax->error(ctxt->userData,
1.59 daniel 5597: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5598: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5599: ctxt->wellFormed = 0;
5600: return(-1);
5601: }
5602: if (!IS_BLANK(CUR)) {
5603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5604: ctxt->sax->error(ctxt->userData,
1.59 daniel 5605: "Space required after the element name\n");
1.123 daniel 5606: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5607: ctxt->wellFormed = 0;
1.22 daniel 5608: }
1.42 daniel 5609: SKIP_BLANKS;
1.152 daniel 5610: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5611: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5612: (NXT(4) == 'Y')) {
5613: SKIP(5);
1.22 daniel 5614: /*
5615: * Element must always be empty.
5616: */
1.59 daniel 5617: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5618: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5619: (NXT(2) == 'Y')) {
5620: SKIP(3);
1.22 daniel 5621: /*
5622: * Element is a generic container.
5623: */
1.59 daniel 5624: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5625: } else if (RAW == '(') {
1.61 daniel 5626: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5627: } else {
1.98 daniel 5628: /*
5629: * [ WFC: PEs in Internal Subset ] error handling.
5630: */
1.152 daniel 5631: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5632: (ctxt->inputNr == 1)) {
5633: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5634: ctxt->sax->error(ctxt->userData,
5635: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5636: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5637: } else {
5638: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5639: ctxt->sax->error(ctxt->userData,
5640: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5641: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5642: }
1.61 daniel 5643: ctxt->wellFormed = 0;
1.119 daniel 5644: if (name != NULL) xmlFree(name);
1.61 daniel 5645: return(-1);
1.22 daniel 5646: }
1.142 daniel 5647:
5648: SKIP_BLANKS;
5649: /*
5650: * Pop-up of finished entities.
5651: */
1.152 daniel 5652: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5653: xmlPopInput(ctxt);
1.42 daniel 5654: SKIP_BLANKS;
1.142 daniel 5655:
1.152 daniel 5656: if (RAW != '>') {
1.55 daniel 5657: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5658: ctxt->sax->error(ctxt->userData,
1.31 daniel 5659: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5660: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5661: ctxt->wellFormed = 0;
1.61 daniel 5662: } else {
1.40 daniel 5663: NEXT;
1.171 daniel 5664: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5665: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5666: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5667: content);
1.61 daniel 5668: }
1.84 daniel 5669: if (content != NULL) {
5670: xmlFreeElementContent(content);
5671: }
1.61 daniel 5672: if (name != NULL) {
1.119 daniel 5673: xmlFree(name);
1.61 daniel 5674: }
1.22 daniel 5675: }
1.59 daniel 5676: return(ret);
1.22 daniel 5677: }
5678:
1.50 daniel 5679: /**
5680: * xmlParseMarkupDecl:
5681: * @ctxt: an XML parser context
5682: *
5683: * parse Markup declarations
1.22 daniel 5684: *
5685: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5686: * NotationDecl | PI | Comment
5687: *
1.98 daniel 5688: * [ VC: Proper Declaration/PE Nesting ]
5689: * TODO Parameter-entity replacement text must be properly nested with
5690: * markup declarations. That is to say, if either the first character
5691: * or the last character of a markup declaration (markupdecl above) is
5692: * contained in the replacement text for a parameter-entity reference,
5693: * both must be contained in the same replacement text.
5694: *
5695: * [ WFC: PEs in Internal Subset ]
5696: * In the internal DTD subset, parameter-entity references can occur
5697: * only where markup declarations can occur, not within markup declarations.
5698: * (This does not apply to references that occur in external parameter
5699: * entities or to the external subset.)
1.22 daniel 5700: */
1.55 daniel 5701: void
5702: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5703: GROW;
1.22 daniel 5704: xmlParseElementDecl(ctxt);
5705: xmlParseAttributeListDecl(ctxt);
5706: xmlParseEntityDecl(ctxt);
5707: xmlParseNotationDecl(ctxt);
5708: xmlParsePI(ctxt);
1.114 daniel 5709: xmlParseComment(ctxt);
1.98 daniel 5710: /*
5711: * This is only for internal subset. On external entities,
5712: * the replacement is done before parsing stage
5713: */
5714: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5715: xmlParsePEReference(ctxt);
1.97 daniel 5716: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5717: }
5718:
1.50 daniel 5719: /**
1.76 daniel 5720: * xmlParseTextDecl:
5721: * @ctxt: an XML parser context
5722: *
5723: * parse an XML declaration header for external entities
5724: *
5725: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 5726: *
5727: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 5728: */
5729:
1.172 daniel 5730: void
1.76 daniel 5731: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5732: xmlChar *version;
1.76 daniel 5733:
5734: /*
5735: * We know that '<?xml' is here.
5736: */
5737: SKIP(5);
5738:
5739: if (!IS_BLANK(CUR)) {
5740: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5741: ctxt->sax->error(ctxt->userData,
5742: "Space needed after '<?xml'\n");
1.123 daniel 5743: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5744: ctxt->wellFormed = 0;
5745: }
5746: SKIP_BLANKS;
5747:
5748: /*
5749: * We may have the VersionInfo here.
5750: */
5751: version = xmlParseVersionInfo(ctxt);
5752: if (version == NULL)
5753: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 5754: ctxt->input->version = version;
1.76 daniel 5755:
5756: /*
5757: * We must have the encoding declaration
5758: */
5759: if (!IS_BLANK(CUR)) {
5760: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5761: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 5762: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5763: ctxt->wellFormed = 0;
5764: }
1.172 daniel 5765: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.76 daniel 5766:
5767: SKIP_BLANKS;
1.152 daniel 5768: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 5769: SKIP(2);
1.152 daniel 5770: } else if (RAW == '>') {
1.76 daniel 5771: /* Deprecated old WD ... */
5772: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5773: ctxt->sax->error(ctxt->userData,
5774: "XML declaration must end-up with '?>'\n");
1.123 daniel 5775: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5776: ctxt->wellFormed = 0;
5777: NEXT;
5778: } else {
5779: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5780: ctxt->sax->error(ctxt->userData,
5781: "parsing XML declaration: '?>' expected\n");
1.123 daniel 5782: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5783: ctxt->wellFormed = 0;
5784: MOVETO_ENDTAG(CUR_PTR);
5785: NEXT;
5786: }
5787: }
5788:
5789: /*
5790: * xmlParseConditionalSections
5791: * @ctxt: an XML parser context
5792: *
5793: * TODO : Conditionnal section are not yet supported !
5794: *
5795: * [61] conditionalSect ::= includeSect | ignoreSect
5796: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5797: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5798: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5799: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5800: */
5801:
5802: void
5803: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 5804: SKIP(3);
5805: SKIP_BLANKS;
1.168 daniel 5806: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5807: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5808: (NXT(6) == 'E')) {
1.165 daniel 5809: SKIP(7);
1.168 daniel 5810: SKIP_BLANKS;
5811: if (RAW != '[') {
5812: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5813: ctxt->sax->error(ctxt->userData,
5814: "XML conditional section '[' expected\n");
5815: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5816: ctxt->wellFormed = 0;
5817: } else {
5818: NEXT;
5819: }
1.165 daniel 5820: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5821: (NXT(2) != '>'))) {
5822: const xmlChar *check = CUR_PTR;
5823: int cons = ctxt->input->consumed;
5824: int tok = ctxt->token;
5825:
5826: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5827: xmlParseConditionalSections(ctxt);
5828: } else if (IS_BLANK(CUR)) {
5829: NEXT;
5830: } else if (RAW == '%') {
5831: xmlParsePEReference(ctxt);
5832: } else
5833: xmlParseMarkupDecl(ctxt);
5834:
5835: /*
5836: * Pop-up of finished entities.
5837: */
5838: while ((RAW == 0) && (ctxt->inputNr > 1))
5839: xmlPopInput(ctxt);
5840:
5841: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5842: (tok == ctxt->token)) {
5843: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5844: ctxt->sax->error(ctxt->userData,
5845: "Content error in the external subset\n");
5846: ctxt->wellFormed = 0;
5847: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5848: break;
5849: }
5850: }
1.168 daniel 5851: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5852: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 5853: int state;
5854:
1.168 daniel 5855: SKIP(6);
5856: SKIP_BLANKS;
5857: if (RAW != '[') {
5858: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5859: ctxt->sax->error(ctxt->userData,
5860: "XML conditional section '[' expected\n");
5861: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5862: ctxt->wellFormed = 0;
5863: } else {
5864: NEXT;
5865: }
1.171 daniel 5866:
1.143 daniel 5867: /*
1.171 daniel 5868: * Parse up to the end of the conditionnal section
5869: * But disable SAX event generating DTD building in the meantime
1.143 daniel 5870: */
1.171 daniel 5871: state = ctxt->disableSAX;
1.165 daniel 5872: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5873: (NXT(2) != '>'))) {
1.171 daniel 5874: const xmlChar *check = CUR_PTR;
5875: int cons = ctxt->input->consumed;
5876: int tok = ctxt->token;
5877:
5878: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5879: xmlParseConditionalSections(ctxt);
5880: } else if (IS_BLANK(CUR)) {
5881: NEXT;
5882: } else if (RAW == '%') {
5883: xmlParsePEReference(ctxt);
5884: } else
5885: xmlParseMarkupDecl(ctxt);
5886:
1.165 daniel 5887: /*
5888: * Pop-up of finished entities.
5889: */
5890: while ((RAW == 0) && (ctxt->inputNr > 1))
5891: xmlPopInput(ctxt);
1.143 daniel 5892:
1.171 daniel 5893: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5894: (tok == ctxt->token)) {
5895: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5896: ctxt->sax->error(ctxt->userData,
5897: "Content error in the external subset\n");
5898: ctxt->wellFormed = 0;
5899: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5900: break;
5901: }
1.165 daniel 5902: }
1.171 daniel 5903: ctxt->disableSAX = state;
1.168 daniel 5904: } else {
5905: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5906: ctxt->sax->error(ctxt->userData,
5907: "XML conditional section INCLUDE or IGNORE keyword expected\n");
5908: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5909: ctxt->wellFormed = 0;
1.143 daniel 5910: }
5911:
1.152 daniel 5912: if (RAW == 0)
1.143 daniel 5913: SHRINK;
5914:
1.152 daniel 5915: if (RAW == 0) {
1.76 daniel 5916: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5917: ctxt->sax->error(ctxt->userData,
5918: "XML conditional section not closed\n");
1.123 daniel 5919: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 5920: ctxt->wellFormed = 0;
1.143 daniel 5921: } else {
5922: SKIP(3);
1.76 daniel 5923: }
5924: }
5925:
5926: /**
1.124 daniel 5927: * xmlParseExternalSubset:
1.76 daniel 5928: * @ctxt: an XML parser context
1.124 daniel 5929: * @ExternalID: the external identifier
5930: * @SystemID: the system identifier (or URL)
1.76 daniel 5931: *
5932: * parse Markup declarations from an external subset
5933: *
5934: * [30] extSubset ::= textDecl? extSubsetDecl
5935: *
5936: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5937: */
5938: void
1.123 daniel 5939: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5940: const xmlChar *SystemID) {
1.132 daniel 5941: GROW;
1.152 daniel 5942: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 5943: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5944: (NXT(4) == 'l')) {
1.172 daniel 5945: xmlParseTextDecl(ctxt);
1.76 daniel 5946: }
1.79 daniel 5947: if (ctxt->myDoc == NULL) {
1.116 daniel 5948: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 5949: }
5950: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5951: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5952:
1.96 daniel 5953: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 5954: ctxt->external = 1;
1.152 daniel 5955: while (((RAW == '<') && (NXT(1) == '?')) ||
5956: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 5957: IS_BLANK(CUR)) {
1.123 daniel 5958: const xmlChar *check = CUR_PTR;
1.115 daniel 5959: int cons = ctxt->input->consumed;
1.164 daniel 5960: int tok = ctxt->token;
1.115 daniel 5961:
1.152 daniel 5962: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 5963: xmlParseConditionalSections(ctxt);
5964: } else if (IS_BLANK(CUR)) {
5965: NEXT;
1.152 daniel 5966: } else if (RAW == '%') {
1.76 daniel 5967: xmlParsePEReference(ctxt);
5968: } else
5969: xmlParseMarkupDecl(ctxt);
1.77 daniel 5970:
5971: /*
5972: * Pop-up of finished entities.
5973: */
1.166 daniel 5974: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 5975: xmlPopInput(ctxt);
5976:
1.164 daniel 5977: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5978: (tok == ctxt->token)) {
1.115 daniel 5979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5980: ctxt->sax->error(ctxt->userData,
5981: "Content error in the external subset\n");
5982: ctxt->wellFormed = 0;
1.123 daniel 5983: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 5984: break;
5985: }
1.76 daniel 5986: }
5987:
1.152 daniel 5988: if (RAW != 0) {
1.76 daniel 5989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5990: ctxt->sax->error(ctxt->userData,
5991: "Extra content at the end of the document\n");
1.123 daniel 5992: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 5993: ctxt->wellFormed = 0;
5994: }
5995:
5996: }
5997:
5998: /**
1.77 daniel 5999: * xmlParseReference:
6000: * @ctxt: an XML parser context
6001: *
6002: * parse and handle entity references in content, depending on the SAX
6003: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6004: * CharRef, a predefined entity, if there is no reference() callback.
6005: * or if the parser was asked to switch to that mode.
1.77 daniel 6006: *
6007: * [67] Reference ::= EntityRef | CharRef
6008: */
6009: void
6010: xmlParseReference(xmlParserCtxtPtr ctxt) {
6011: xmlEntityPtr ent;
1.123 daniel 6012: xmlChar *val;
1.152 daniel 6013: if (RAW != '&') return;
1.77 daniel 6014:
1.113 daniel 6015: if (ctxt->inputNr > 1) {
1.123 daniel 6016: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6017:
1.171 daniel 6018: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6019: (!ctxt->disableSAX))
1.113 daniel 6020: ctxt->sax->characters(ctxt->userData, cur, 1);
6021: if (ctxt->token == '&')
6022: ctxt->token = 0;
6023: else {
6024: SKIP(1);
6025: }
6026: return;
6027: }
1.77 daniel 6028: if (NXT(1) == '#') {
1.152 daniel 6029: int i = 0;
1.153 daniel 6030: xmlChar out[10];
6031: int hex = NXT(2);
1.77 daniel 6032: int val = xmlParseCharRef(ctxt);
1.152 daniel 6033:
1.153 daniel 6034: if (ctxt->encoding != NULL) {
6035: /*
6036: * So we are using non-UTF-8 buffers
6037: * Check that the char fit on 8bits, if not
6038: * generate a CharRef.
6039: */
6040: if (val <= 0xFF) {
6041: out[0] = val;
6042: out[1] = 0;
1.171 daniel 6043: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6044: (!ctxt->disableSAX))
1.153 daniel 6045: ctxt->sax->characters(ctxt->userData, out, 1);
6046: } else {
6047: if ((hex == 'x') || (hex == 'X'))
6048: sprintf((char *)out, "#x%X", val);
6049: else
6050: sprintf((char *)out, "#%d", val);
1.171 daniel 6051: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6052: (!ctxt->disableSAX))
1.153 daniel 6053: ctxt->sax->reference(ctxt->userData, out);
6054: }
6055: } else {
6056: /*
6057: * Just encode the value in UTF-8
6058: */
6059: COPY_BUF(0 ,out, i, val);
6060: out[i] = 0;
1.171 daniel 6061: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6062: (!ctxt->disableSAX))
1.153 daniel 6063: ctxt->sax->characters(ctxt->userData, out, i);
6064: }
1.77 daniel 6065: } else {
6066: ent = xmlParseEntityRef(ctxt);
6067: if (ent == NULL) return;
6068: if ((ent->name != NULL) &&
1.159 daniel 6069: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.113 daniel 6070: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6071: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6072: /*
6073: * Create a node.
6074: */
6075: ctxt->sax->reference(ctxt->userData, ent->name);
6076: return;
6077: } else if (ctxt->replaceEntities) {
6078: xmlParserInputPtr input;
1.79 daniel 6079:
1.113 daniel 6080: input = xmlNewEntityInputStream(ctxt, ent);
6081: xmlPushInput(ctxt, input);
1.167 daniel 6082: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6083: (RAW == '<') && (NXT(1) == '?') &&
6084: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6085: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6086: xmlParseTextDecl(ctxt);
1.167 daniel 6087: if (input->standalone) {
6088: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6089: ctxt->sax->error(ctxt->userData,
6090: "external parsed entities cannot be standalone\n");
6091: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6092: ctxt->wellFormed = 0;
6093: }
6094: }
1.113 daniel 6095: return;
6096: }
1.77 daniel 6097: }
6098: val = ent->content;
6099: if (val == NULL) return;
6100: /*
6101: * inline the entity.
6102: */
1.171 daniel 6103: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6104: (!ctxt->disableSAX))
1.77 daniel 6105: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6106: }
1.24 daniel 6107: }
6108:
1.50 daniel 6109: /**
6110: * xmlParseEntityRef:
6111: * @ctxt: an XML parser context
6112: *
6113: * parse ENTITY references declarations
1.24 daniel 6114: *
6115: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6116: *
1.98 daniel 6117: * [ WFC: Entity Declared ]
6118: * In a document without any DTD, a document with only an internal DTD
6119: * subset which contains no parameter entity references, or a document
6120: * with "standalone='yes'", the Name given in the entity reference
6121: * must match that in an entity declaration, except that well-formed
6122: * documents need not declare any of the following entities: amp, lt,
6123: * gt, apos, quot. The declaration of a parameter entity must precede
6124: * any reference to it. Similarly, the declaration of a general entity
6125: * must precede any reference to it which appears in a default value in an
6126: * attribute-list declaration. Note that if entities are declared in the
6127: * external subset or in external parameter entities, a non-validating
6128: * processor is not obligated to read and process their declarations;
6129: * for such documents, the rule that an entity must be declared is a
6130: * well-formedness constraint only if standalone='yes'.
6131: *
6132: * [ WFC: Parsed Entity ]
6133: * An entity reference must not contain the name of an unparsed entity
6134: *
1.77 daniel 6135: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6136: */
1.77 daniel 6137: xmlEntityPtr
1.55 daniel 6138: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6139: xmlChar *name;
1.72 daniel 6140: xmlEntityPtr ent = NULL;
1.24 daniel 6141:
1.91 daniel 6142: GROW;
1.111 daniel 6143:
1.152 daniel 6144: if (RAW == '&') {
1.40 daniel 6145: NEXT;
1.24 daniel 6146: name = xmlParseName(ctxt);
6147: if (name == NULL) {
1.55 daniel 6148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6149: ctxt->sax->error(ctxt->userData,
6150: "xmlParseEntityRef: no name\n");
1.123 daniel 6151: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6152: ctxt->wellFormed = 0;
1.24 daniel 6153: } else {
1.152 daniel 6154: if (RAW == ';') {
1.40 daniel 6155: NEXT;
1.24 daniel 6156: /*
1.77 daniel 6157: * Ask first SAX for entity resolution, otherwise try the
6158: * predefined set.
6159: */
6160: if (ctxt->sax != NULL) {
6161: if (ctxt->sax->getEntity != NULL)
6162: ent = ctxt->sax->getEntity(ctxt->userData, name);
6163: if (ent == NULL)
6164: ent = xmlGetPredefinedEntity(name);
6165: }
6166: /*
1.98 daniel 6167: * [ WFC: Entity Declared ]
6168: * In a document without any DTD, a document with only an
6169: * internal DTD subset which contains no parameter entity
6170: * references, or a document with "standalone='yes'", the
6171: * Name given in the entity reference must match that in an
6172: * entity declaration, except that well-formed documents
6173: * need not declare any of the following entities: amp, lt,
6174: * gt, apos, quot.
6175: * The declaration of a parameter entity must precede any
6176: * reference to it.
6177: * Similarly, the declaration of a general entity must
6178: * precede any reference to it which appears in a default
6179: * value in an attribute-list declaration. Note that if
6180: * entities are declared in the external subset or in
6181: * external parameter entities, a non-validating processor
6182: * is not obligated to read and process their declarations;
6183: * for such documents, the rule that an entity must be
6184: * declared is a well-formedness constraint only if
6185: * standalone='yes'.
1.59 daniel 6186: */
1.77 daniel 6187: if (ent == NULL) {
1.98 daniel 6188: if ((ctxt->standalone == 1) ||
6189: ((ctxt->hasExternalSubset == 0) &&
6190: (ctxt->hasPErefs == 0))) {
6191: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6192: ctxt->sax->error(ctxt->userData,
6193: "Entity '%s' not defined\n", name);
1.123 daniel 6194: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6195: ctxt->wellFormed = 0;
6196: } else {
1.98 daniel 6197: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6198: ctxt->sax->warning(ctxt->userData,
6199: "Entity '%s' not defined\n", name);
1.123 daniel 6200: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6201: }
1.77 daniel 6202: }
1.59 daniel 6203:
6204: /*
1.98 daniel 6205: * [ WFC: Parsed Entity ]
6206: * An entity reference must not contain the name of an
6207: * unparsed entity
6208: */
1.159 daniel 6209: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6210: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6211: ctxt->sax->error(ctxt->userData,
6212: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6213: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6214: ctxt->wellFormed = 0;
6215: }
6216:
6217: /*
6218: * [ WFC: No External Entity References ]
6219: * Attribute values cannot contain direct or indirect
6220: * entity references to external entities.
6221: */
6222: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6223: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6225: ctxt->sax->error(ctxt->userData,
6226: "Attribute references external entity '%s'\n", name);
1.123 daniel 6227: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6228: ctxt->wellFormed = 0;
6229: }
6230: /*
6231: * [ WFC: No < in Attribute Values ]
6232: * The replacement text of any entity referred to directly or
6233: * indirectly in an attribute value (other than "<") must
6234: * not contain a <.
1.59 daniel 6235: */
1.98 daniel 6236: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6237: (ent != NULL) &&
6238: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6239: (ent->content != NULL) &&
6240: (xmlStrchr(ent->content, '<'))) {
6241: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6242: ctxt->sax->error(ctxt->userData,
6243: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6244: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6245: ctxt->wellFormed = 0;
6246: }
6247:
6248: /*
6249: * Internal check, no parameter entities here ...
6250: */
6251: else {
1.159 daniel 6252: switch (ent->etype) {
1.59 daniel 6253: case XML_INTERNAL_PARAMETER_ENTITY:
6254: case XML_EXTERNAL_PARAMETER_ENTITY:
6255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6256: ctxt->sax->error(ctxt->userData,
1.59 daniel 6257: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6258: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6259: ctxt->wellFormed = 0;
6260: break;
6261: }
6262: }
6263:
6264: /*
1.98 daniel 6265: * [ WFC: No Recursion ]
1.117 daniel 6266: * TODO A parsed entity must not contain a recursive reference
6267: * to itself, either directly or indirectly.
1.59 daniel 6268: */
1.77 daniel 6269:
1.24 daniel 6270: } else {
1.55 daniel 6271: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6272: ctxt->sax->error(ctxt->userData,
1.59 daniel 6273: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6274: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6275: ctxt->wellFormed = 0;
1.24 daniel 6276: }
1.119 daniel 6277: xmlFree(name);
1.24 daniel 6278: }
6279: }
1.77 daniel 6280: return(ent);
1.24 daniel 6281: }
1.135 daniel 6282: /**
6283: * xmlParseStringEntityRef:
6284: * @ctxt: an XML parser context
6285: * @str: a pointer to an index in the string
6286: *
6287: * parse ENTITY references declarations, but this version parses it from
6288: * a string value.
6289: *
6290: * [68] EntityRef ::= '&' Name ';'
6291: *
6292: * [ WFC: Entity Declared ]
6293: * In a document without any DTD, a document with only an internal DTD
6294: * subset which contains no parameter entity references, or a document
6295: * with "standalone='yes'", the Name given in the entity reference
6296: * must match that in an entity declaration, except that well-formed
6297: * documents need not declare any of the following entities: amp, lt,
6298: * gt, apos, quot. The declaration of a parameter entity must precede
6299: * any reference to it. Similarly, the declaration of a general entity
6300: * must precede any reference to it which appears in a default value in an
6301: * attribute-list declaration. Note that if entities are declared in the
6302: * external subset or in external parameter entities, a non-validating
6303: * processor is not obligated to read and process their declarations;
6304: * for such documents, the rule that an entity must be declared is a
6305: * well-formedness constraint only if standalone='yes'.
6306: *
6307: * [ WFC: Parsed Entity ]
6308: * An entity reference must not contain the name of an unparsed entity
6309: *
6310: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6311: * is updated to the current location in the string.
6312: */
6313: xmlEntityPtr
6314: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6315: xmlChar *name;
6316: const xmlChar *ptr;
6317: xmlChar cur;
6318: xmlEntityPtr ent = NULL;
6319:
6320: GROW;
6321:
1.156 daniel 6322: if ((str == NULL) || (*str == NULL))
6323: return(NULL);
1.135 daniel 6324: ptr = *str;
6325: cur = *ptr;
6326: if (cur == '&') {
6327: ptr++;
6328: cur = *ptr;
6329: name = xmlParseStringName(ctxt, &ptr);
6330: if (name == NULL) {
6331: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6332: ctxt->sax->error(ctxt->userData,
6333: "xmlParseEntityRef: no name\n");
6334: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6335: ctxt->wellFormed = 0;
6336: } else {
1.152 daniel 6337: if (RAW == ';') {
1.135 daniel 6338: NEXT;
6339: /*
6340: * Ask first SAX for entity resolution, otherwise try the
6341: * predefined set.
6342: */
6343: if (ctxt->sax != NULL) {
6344: if (ctxt->sax->getEntity != NULL)
6345: ent = ctxt->sax->getEntity(ctxt->userData, name);
6346: if (ent == NULL)
6347: ent = xmlGetPredefinedEntity(name);
6348: }
6349: /*
6350: * [ WFC: Entity Declared ]
6351: * In a document without any DTD, a document with only an
6352: * internal DTD subset which contains no parameter entity
6353: * references, or a document with "standalone='yes'", the
6354: * Name given in the entity reference must match that in an
6355: * entity declaration, except that well-formed documents
6356: * need not declare any of the following entities: amp, lt,
6357: * gt, apos, quot.
6358: * The declaration of a parameter entity must precede any
6359: * reference to it.
6360: * Similarly, the declaration of a general entity must
6361: * precede any reference to it which appears in a default
6362: * value in an attribute-list declaration. Note that if
6363: * entities are declared in the external subset or in
6364: * external parameter entities, a non-validating processor
6365: * is not obligated to read and process their declarations;
6366: * for such documents, the rule that an entity must be
6367: * declared is a well-formedness constraint only if
6368: * standalone='yes'.
6369: */
6370: if (ent == NULL) {
6371: if ((ctxt->standalone == 1) ||
6372: ((ctxt->hasExternalSubset == 0) &&
6373: (ctxt->hasPErefs == 0))) {
6374: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6375: ctxt->sax->error(ctxt->userData,
6376: "Entity '%s' not defined\n", name);
6377: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6378: ctxt->wellFormed = 0;
6379: } else {
6380: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6381: ctxt->sax->warning(ctxt->userData,
6382: "Entity '%s' not defined\n", name);
6383: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6384: }
6385: }
6386:
6387: /*
6388: * [ WFC: Parsed Entity ]
6389: * An entity reference must not contain the name of an
6390: * unparsed entity
6391: */
1.159 daniel 6392: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6394: ctxt->sax->error(ctxt->userData,
6395: "Entity reference to unparsed entity %s\n", name);
6396: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6397: ctxt->wellFormed = 0;
6398: }
6399:
6400: /*
6401: * [ WFC: No External Entity References ]
6402: * Attribute values cannot contain direct or indirect
6403: * entity references to external entities.
6404: */
6405: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6406: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6408: ctxt->sax->error(ctxt->userData,
6409: "Attribute references external entity '%s'\n", name);
6410: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6411: ctxt->wellFormed = 0;
6412: }
6413: /*
6414: * [ WFC: No < in Attribute Values ]
6415: * The replacement text of any entity referred to directly or
6416: * indirectly in an attribute value (other than "<") must
6417: * not contain a <.
6418: */
6419: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6420: (ent != NULL) &&
6421: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6422: (ent->content != NULL) &&
6423: (xmlStrchr(ent->content, '<'))) {
6424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6425: ctxt->sax->error(ctxt->userData,
6426: "'<' in entity '%s' is not allowed in attributes values\n", name);
6427: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6428: ctxt->wellFormed = 0;
6429: }
6430:
6431: /*
6432: * Internal check, no parameter entities here ...
6433: */
6434: else {
1.159 daniel 6435: switch (ent->etype) {
1.135 daniel 6436: case XML_INTERNAL_PARAMETER_ENTITY:
6437: case XML_EXTERNAL_PARAMETER_ENTITY:
6438: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6439: ctxt->sax->error(ctxt->userData,
6440: "Attempt to reference the parameter entity '%s'\n", name);
6441: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6442: ctxt->wellFormed = 0;
6443: break;
6444: }
6445: }
6446:
6447: /*
6448: * [ WFC: No Recursion ]
6449: * TODO A parsed entity must not contain a recursive reference
6450: * to itself, either directly or indirectly.
6451: */
6452:
6453: } else {
6454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6455: ctxt->sax->error(ctxt->userData,
6456: "xmlParseEntityRef: expecting ';'\n");
6457: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6458: ctxt->wellFormed = 0;
6459: }
6460: xmlFree(name);
6461: }
6462: }
6463: return(ent);
6464: }
1.24 daniel 6465:
1.50 daniel 6466: /**
6467: * xmlParsePEReference:
6468: * @ctxt: an XML parser context
6469: *
6470: * parse PEReference declarations
1.77 daniel 6471: * The entity content is handled directly by pushing it's content as
6472: * a new input stream.
1.22 daniel 6473: *
6474: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6475: *
1.98 daniel 6476: * [ WFC: No Recursion ]
6477: * TODO A parsed entity must not contain a recursive
6478: * reference to itself, either directly or indirectly.
6479: *
6480: * [ WFC: Entity Declared ]
6481: * In a document without any DTD, a document with only an internal DTD
6482: * subset which contains no parameter entity references, or a document
6483: * with "standalone='yes'", ... ... The declaration of a parameter
6484: * entity must precede any reference to it...
6485: *
6486: * [ VC: Entity Declared ]
6487: * In a document with an external subset or external parameter entities
6488: * with "standalone='no'", ... ... The declaration of a parameter entity
6489: * must precede any reference to it...
6490: *
6491: * [ WFC: In DTD ]
6492: * Parameter-entity references may only appear in the DTD.
6493: * NOTE: misleading but this is handled.
1.22 daniel 6494: */
1.77 daniel 6495: void
1.55 daniel 6496: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6497: xmlChar *name;
1.72 daniel 6498: xmlEntityPtr entity = NULL;
1.50 daniel 6499: xmlParserInputPtr input;
1.22 daniel 6500:
1.152 daniel 6501: if (RAW == '%') {
1.40 daniel 6502: NEXT;
1.22 daniel 6503: name = xmlParseName(ctxt);
6504: if (name == NULL) {
1.55 daniel 6505: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6506: ctxt->sax->error(ctxt->userData,
6507: "xmlParsePEReference: no name\n");
1.123 daniel 6508: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6509: ctxt->wellFormed = 0;
1.22 daniel 6510: } else {
1.152 daniel 6511: if (RAW == ';') {
1.40 daniel 6512: NEXT;
1.98 daniel 6513: if ((ctxt->sax != NULL) &&
6514: (ctxt->sax->getParameterEntity != NULL))
6515: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6516: name);
1.45 daniel 6517: if (entity == NULL) {
1.98 daniel 6518: /*
6519: * [ WFC: Entity Declared ]
6520: * In a document without any DTD, a document with only an
6521: * internal DTD subset which contains no parameter entity
6522: * references, or a document with "standalone='yes'", ...
6523: * ... The declaration of a parameter entity must precede
6524: * any reference to it...
6525: */
6526: if ((ctxt->standalone == 1) ||
6527: ((ctxt->hasExternalSubset == 0) &&
6528: (ctxt->hasPErefs == 0))) {
6529: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6530: ctxt->sax->error(ctxt->userData,
6531: "PEReference: %%%s; not found\n", name);
1.123 daniel 6532: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 6533: ctxt->wellFormed = 0;
6534: } else {
6535: /*
6536: * [ VC: Entity Declared ]
6537: * In a document with an external subset or external
6538: * parameter entities with "standalone='no'", ...
6539: * ... The declaration of a parameter entity must precede
6540: * any reference to it...
6541: */
6542: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6543: ctxt->sax->warning(ctxt->userData,
6544: "PEReference: %%%s; not found\n", name);
6545: ctxt->valid = 0;
6546: }
1.50 daniel 6547: } else {
1.98 daniel 6548: /*
6549: * Internal checking in case the entity quest barfed
6550: */
1.159 daniel 6551: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6552: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 6553: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6554: ctxt->sax->warning(ctxt->userData,
6555: "Internal: %%%s; is not a parameter entity\n", name);
6556: } else {
1.164 daniel 6557: /*
6558: * TODO !!!
6559: * handle the extra spaces added before and after
6560: * c.f. http://www.w3.org/TR/REC-xml#as-PE
6561: */
1.98 daniel 6562: input = xmlNewEntityInputStream(ctxt, entity);
6563: xmlPushInput(ctxt, input);
1.164 daniel 6564: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6565: (RAW == '<') && (NXT(1) == '?') &&
6566: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6567: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6568: xmlParseTextDecl(ctxt);
1.164 daniel 6569: }
6570: if (ctxt->token == 0)
6571: ctxt->token = ' ';
1.98 daniel 6572: }
1.45 daniel 6573: }
1.98 daniel 6574: ctxt->hasPErefs = 1;
1.22 daniel 6575: } else {
1.55 daniel 6576: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6577: ctxt->sax->error(ctxt->userData,
1.59 daniel 6578: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 6579: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6580: ctxt->wellFormed = 0;
1.22 daniel 6581: }
1.119 daniel 6582: xmlFree(name);
1.3 veillard 6583: }
6584: }
6585: }
6586:
1.50 daniel 6587: /**
1.135 daniel 6588: * xmlParseStringPEReference:
6589: * @ctxt: an XML parser context
6590: * @str: a pointer to an index in the string
6591: *
6592: * parse PEReference declarations
6593: *
6594: * [69] PEReference ::= '%' Name ';'
6595: *
6596: * [ WFC: No Recursion ]
6597: * TODO A parsed entity must not contain a recursive
6598: * reference to itself, either directly or indirectly.
6599: *
6600: * [ WFC: Entity Declared ]
6601: * In a document without any DTD, a document with only an internal DTD
6602: * subset which contains no parameter entity references, or a document
6603: * with "standalone='yes'", ... ... The declaration of a parameter
6604: * entity must precede any reference to it...
6605: *
6606: * [ VC: Entity Declared ]
6607: * In a document with an external subset or external parameter entities
6608: * with "standalone='no'", ... ... The declaration of a parameter entity
6609: * must precede any reference to it...
6610: *
6611: * [ WFC: In DTD ]
6612: * Parameter-entity references may only appear in the DTD.
6613: * NOTE: misleading but this is handled.
6614: *
6615: * Returns the string of the entity content.
6616: * str is updated to the current value of the index
6617: */
6618: xmlEntityPtr
6619: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6620: const xmlChar *ptr;
6621: xmlChar cur;
6622: xmlChar *name;
6623: xmlEntityPtr entity = NULL;
6624:
6625: if ((str == NULL) || (*str == NULL)) return(NULL);
6626: ptr = *str;
6627: cur = *ptr;
6628: if (cur == '%') {
6629: ptr++;
6630: cur = *ptr;
6631: name = xmlParseStringName(ctxt, &ptr);
6632: if (name == NULL) {
6633: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6634: ctxt->sax->error(ctxt->userData,
6635: "xmlParseStringPEReference: no name\n");
6636: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6637: ctxt->wellFormed = 0;
6638: } else {
6639: cur = *ptr;
6640: if (cur == ';') {
6641: ptr++;
6642: cur = *ptr;
6643: if ((ctxt->sax != NULL) &&
6644: (ctxt->sax->getParameterEntity != NULL))
6645: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6646: name);
6647: if (entity == NULL) {
6648: /*
6649: * [ WFC: Entity Declared ]
6650: * In a document without any DTD, a document with only an
6651: * internal DTD subset which contains no parameter entity
6652: * references, or a document with "standalone='yes'", ...
6653: * ... The declaration of a parameter entity must precede
6654: * any reference to it...
6655: */
6656: if ((ctxt->standalone == 1) ||
6657: ((ctxt->hasExternalSubset == 0) &&
6658: (ctxt->hasPErefs == 0))) {
6659: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6660: ctxt->sax->error(ctxt->userData,
6661: "PEReference: %%%s; not found\n", name);
6662: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6663: ctxt->wellFormed = 0;
6664: } else {
6665: /*
6666: * [ VC: Entity Declared ]
6667: * In a document with an external subset or external
6668: * parameter entities with "standalone='no'", ...
6669: * ... The declaration of a parameter entity must
6670: * precede any reference to it...
6671: */
6672: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6673: ctxt->sax->warning(ctxt->userData,
6674: "PEReference: %%%s; not found\n", name);
6675: ctxt->valid = 0;
6676: }
6677: } else {
6678: /*
6679: * Internal checking in case the entity quest barfed
6680: */
1.159 daniel 6681: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6682: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 6683: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6684: ctxt->sax->warning(ctxt->userData,
6685: "Internal: %%%s; is not a parameter entity\n", name);
6686: }
6687: }
6688: ctxt->hasPErefs = 1;
6689: } else {
6690: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6691: ctxt->sax->error(ctxt->userData,
6692: "xmlParseStringPEReference: expecting ';'\n");
6693: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6694: ctxt->wellFormed = 0;
6695: }
6696: xmlFree(name);
6697: }
6698: }
6699: *str = ptr;
6700: return(entity);
6701: }
6702:
6703: /**
1.50 daniel 6704: * xmlParseDocTypeDecl :
6705: * @ctxt: an XML parser context
6706: *
6707: * parse a DOCTYPE declaration
1.21 daniel 6708: *
1.22 daniel 6709: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6710: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 6711: *
6712: * [ VC: Root Element Type ]
1.99 daniel 6713: * The Name in the document type declaration must match the element
1.98 daniel 6714: * type of the root element.
1.21 daniel 6715: */
6716:
1.55 daniel 6717: void
6718: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 6719: xmlChar *name = NULL;
1.123 daniel 6720: xmlChar *ExternalID = NULL;
6721: xmlChar *URI = NULL;
1.21 daniel 6722:
6723: /*
6724: * We know that '<!DOCTYPE' has been detected.
6725: */
1.40 daniel 6726: SKIP(9);
1.21 daniel 6727:
1.42 daniel 6728: SKIP_BLANKS;
1.21 daniel 6729:
6730: /*
6731: * Parse the DOCTYPE name.
6732: */
6733: name = xmlParseName(ctxt);
6734: if (name == NULL) {
1.55 daniel 6735: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6736: ctxt->sax->error(ctxt->userData,
6737: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 6738: ctxt->wellFormed = 0;
1.123 daniel 6739: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 6740: }
1.165 daniel 6741: ctxt->intSubName = name;
1.21 daniel 6742:
1.42 daniel 6743: SKIP_BLANKS;
1.21 daniel 6744:
6745: /*
1.22 daniel 6746: * Check for SystemID and ExternalID
6747: */
1.67 daniel 6748: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 6749:
6750: if ((URI != NULL) || (ExternalID != NULL)) {
6751: ctxt->hasExternalSubset = 1;
6752: }
1.165 daniel 6753: ctxt->extSubURI = URI;
6754: ctxt->extSubSystem = ExternalID;
1.98 daniel 6755:
1.42 daniel 6756: SKIP_BLANKS;
1.36 daniel 6757:
1.76 daniel 6758: /*
1.165 daniel 6759: * Create and update the internal subset.
1.76 daniel 6760: */
1.171 daniel 6761: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6762: (!ctxt->disableSAX))
1.74 daniel 6763: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 6764:
6765: /*
1.140 daniel 6766: * Is there any internal subset declarations ?
6767: * they are handled separately in xmlParseInternalSubset()
6768: */
1.152 daniel 6769: if (RAW == '[')
1.140 daniel 6770: return;
6771:
6772: /*
6773: * We should be at the end of the DOCTYPE declaration.
6774: */
1.152 daniel 6775: if (RAW != '>') {
1.140 daniel 6776: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6777: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6778: ctxt->wellFormed = 0;
6779: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6780: }
6781: NEXT;
6782: }
6783:
6784: /**
6785: * xmlParseInternalsubset :
6786: * @ctxt: an XML parser context
6787: *
6788: * parse the internal subset declaration
6789: *
6790: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6791: */
6792:
6793: void
6794: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6795: /*
1.22 daniel 6796: * Is there any DTD definition ?
6797: */
1.152 daniel 6798: if (RAW == '[') {
1.96 daniel 6799: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 6800: NEXT;
1.22 daniel 6801: /*
6802: * Parse the succession of Markup declarations and
6803: * PEReferences.
6804: * Subsequence (markupdecl | PEReference | S)*
6805: */
1.152 daniel 6806: while (RAW != ']') {
1.123 daniel 6807: const xmlChar *check = CUR_PTR;
1.115 daniel 6808: int cons = ctxt->input->consumed;
1.22 daniel 6809:
1.42 daniel 6810: SKIP_BLANKS;
1.22 daniel 6811: xmlParseMarkupDecl(ctxt);
1.50 daniel 6812: xmlParsePEReference(ctxt);
1.22 daniel 6813:
1.115 daniel 6814: /*
6815: * Pop-up of finished entities.
6816: */
1.152 daniel 6817: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 6818: xmlPopInput(ctxt);
6819:
1.118 daniel 6820: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 6821: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6822: ctxt->sax->error(ctxt->userData,
1.140 daniel 6823: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 6824: ctxt->wellFormed = 0;
1.123 daniel 6825: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 6826: break;
6827: }
6828: }
1.152 daniel 6829: if (RAW == ']') NEXT;
1.22 daniel 6830: }
6831:
6832: /*
6833: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 6834: */
1.152 daniel 6835: if (RAW != '>') {
1.55 daniel 6836: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6837: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 6838: ctxt->wellFormed = 0;
1.123 daniel 6839: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 6840: }
1.40 daniel 6841: NEXT;
1.21 daniel 6842: }
6843:
1.50 daniel 6844: /**
6845: * xmlParseAttribute:
6846: * @ctxt: an XML parser context
1.123 daniel 6847: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 6848: *
6849: * parse an attribute
1.3 veillard 6850: *
1.22 daniel 6851: * [41] Attribute ::= Name Eq AttValue
6852: *
1.98 daniel 6853: * [ WFC: No External Entity References ]
6854: * Attribute values cannot contain direct or indirect entity references
6855: * to external entities.
6856: *
6857: * [ WFC: No < in Attribute Values ]
6858: * The replacement text of any entity referred to directly or indirectly in
6859: * an attribute value (other than "<") must not contain a <.
6860: *
6861: * [ VC: Attribute Value Type ]
1.117 daniel 6862: * The attribute must have been declared; the value must be of the type
1.99 daniel 6863: * declared for it.
1.98 daniel 6864: *
1.22 daniel 6865: * [25] Eq ::= S? '=' S?
6866: *
1.29 daniel 6867: * With namespace:
6868: *
6869: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 6870: *
6871: * Also the case QName == xmlns:??? is handled independently as a namespace
6872: * definition.
1.69 daniel 6873: *
1.72 daniel 6874: * Returns the attribute name, and the value in *value.
1.3 veillard 6875: */
6876:
1.123 daniel 6877: xmlChar *
6878: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6879: xmlChar *name, *val;
1.3 veillard 6880:
1.72 daniel 6881: *value = NULL;
6882: name = xmlParseName(ctxt);
1.22 daniel 6883: if (name == NULL) {
1.55 daniel 6884: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6885: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 6886: ctxt->wellFormed = 0;
1.123 daniel 6887: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 6888: return(NULL);
1.3 veillard 6889: }
6890:
6891: /*
1.29 daniel 6892: * read the value
1.3 veillard 6893: */
1.42 daniel 6894: SKIP_BLANKS;
1.152 daniel 6895: if (RAW == '=') {
1.40 daniel 6896: NEXT;
1.42 daniel 6897: SKIP_BLANKS;
1.72 daniel 6898: val = xmlParseAttValue(ctxt);
1.96 daniel 6899: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 6900: } else {
1.55 daniel 6901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6902: ctxt->sax->error(ctxt->userData,
1.59 daniel 6903: "Specification mandate value for attribute %s\n", name);
1.123 daniel 6904: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 6905: ctxt->wellFormed = 0;
1.170 daniel 6906: xmlFree(name);
1.52 daniel 6907: return(NULL);
1.43 daniel 6908: }
6909:
1.172 daniel 6910: /*
6911: * Check that xml:lang conforms to the specification
6912: */
6913: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
6914: if (!xmlCheckLanguageID(val)) {
6915: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6916: ctxt->sax->error(ctxt->userData,
6917: "Invalid value for xml:lang : %s\n", val);
6918: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6919: ctxt->wellFormed = 0;
6920: }
6921: }
6922:
1.176 daniel 6923: /*
6924: * Check that xml:space conforms to the specification
6925: */
6926: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
6927: if (!xmlStrcmp(val, BAD_CAST "default"))
6928: *(ctxt->space) = 0;
6929: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
6930: *(ctxt->space) = 1;
6931: else {
6932: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6933: ctxt->sax->error(ctxt->userData,
6934: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6935: val);
6936: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6937: ctxt->wellFormed = 0;
6938: }
6939: }
6940:
1.72 daniel 6941: *value = val;
6942: return(name);
1.3 veillard 6943: }
6944:
1.50 daniel 6945: /**
6946: * xmlParseStartTag:
6947: * @ctxt: an XML parser context
6948: *
6949: * parse a start of tag either for rule element or
6950: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 6951: *
6952: * [40] STag ::= '<' Name (S Attribute)* S? '>'
6953: *
1.98 daniel 6954: * [ WFC: Unique Att Spec ]
6955: * No attribute name may appear more than once in the same start-tag or
6956: * empty-element tag.
6957: *
1.29 daniel 6958: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6959: *
1.98 daniel 6960: * [ WFC: Unique Att Spec ]
6961: * No attribute name may appear more than once in the same start-tag or
6962: * empty-element tag.
6963: *
1.29 daniel 6964: * With namespace:
6965: *
6966: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6967: *
6968: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 6969: *
1.129 daniel 6970: * Returne the element name parsed
1.2 veillard 6971: */
6972:
1.123 daniel 6973: xmlChar *
1.69 daniel 6974: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6975: xmlChar *name;
6976: xmlChar *attname;
6977: xmlChar *attvalue;
6978: const xmlChar **atts = NULL;
1.72 daniel 6979: int nbatts = 0;
6980: int maxatts = 0;
6981: int i;
1.2 veillard 6982:
1.152 daniel 6983: if (RAW != '<') return(NULL);
1.40 daniel 6984: NEXT;
1.3 veillard 6985:
1.72 daniel 6986: name = xmlParseName(ctxt);
1.59 daniel 6987: if (name == NULL) {
6988: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6989: ctxt->sax->error(ctxt->userData,
1.59 daniel 6990: "xmlParseStartTag: invalid element name\n");
1.123 daniel 6991: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6992: ctxt->wellFormed = 0;
1.83 daniel 6993: return(NULL);
1.50 daniel 6994: }
6995:
6996: /*
1.3 veillard 6997: * Now parse the attributes, it ends up with the ending
6998: *
6999: * (S Attribute)* S?
7000: */
1.42 daniel 7001: SKIP_BLANKS;
1.91 daniel 7002: GROW;
1.168 daniel 7003:
1.153 daniel 7004: while ((IS_CHAR(RAW)) &&
1.152 daniel 7005: (RAW != '>') &&
7006: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7007: const xmlChar *q = CUR_PTR;
1.91 daniel 7008: int cons = ctxt->input->consumed;
1.29 daniel 7009:
1.72 daniel 7010: attname = xmlParseAttribute(ctxt, &attvalue);
7011: if ((attname != NULL) && (attvalue != NULL)) {
7012: /*
1.98 daniel 7013: * [ WFC: Unique Att Spec ]
7014: * No attribute name may appear more than once in the same
7015: * start-tag or empty-element tag.
1.72 daniel 7016: */
7017: for (i = 0; i < nbatts;i += 2) {
7018: if (!xmlStrcmp(atts[i], attname)) {
7019: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7020: ctxt->sax->error(ctxt->userData,
7021: "Attribute %s redefined\n",
7022: attname);
1.72 daniel 7023: ctxt->wellFormed = 0;
1.123 daniel 7024: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7025: xmlFree(attname);
7026: xmlFree(attvalue);
1.98 daniel 7027: goto failed;
1.72 daniel 7028: }
7029: }
7030:
7031: /*
7032: * Add the pair to atts
7033: */
7034: if (atts == NULL) {
7035: maxatts = 10;
1.123 daniel 7036: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7037: if (atts == NULL) {
1.86 daniel 7038: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7039: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7040: return(NULL);
1.72 daniel 7041: }
1.127 daniel 7042: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7043: maxatts *= 2;
1.123 daniel 7044: atts = (const xmlChar **) xmlRealloc(atts,
7045: maxatts * sizeof(xmlChar *));
1.72 daniel 7046: if (atts == NULL) {
1.86 daniel 7047: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7048: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7049: return(NULL);
1.72 daniel 7050: }
7051: }
7052: atts[nbatts++] = attname;
7053: atts[nbatts++] = attvalue;
7054: atts[nbatts] = NULL;
7055: atts[nbatts + 1] = NULL;
1.176 daniel 7056: } else {
7057: if (attname != NULL)
7058: xmlFree(attname);
7059: if (attvalue != NULL)
7060: xmlFree(attvalue);
1.72 daniel 7061: }
7062:
1.116 daniel 7063: failed:
1.168 daniel 7064:
7065: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7066: break;
7067: if (!IS_BLANK(RAW)) {
7068: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7069: ctxt->sax->error(ctxt->userData,
7070: "attributes construct error\n");
7071: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7072: ctxt->wellFormed = 0;
7073: }
1.42 daniel 7074: SKIP_BLANKS;
1.91 daniel 7075: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7076: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7077: ctxt->sax->error(ctxt->userData,
1.31 daniel 7078: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7079: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7080: ctxt->wellFormed = 0;
1.29 daniel 7081: break;
1.3 veillard 7082: }
1.91 daniel 7083: GROW;
1.3 veillard 7084: }
7085:
1.43 daniel 7086: /*
1.72 daniel 7087: * SAX: Start of Element !
1.43 daniel 7088: */
1.171 daniel 7089: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7090: (!ctxt->disableSAX))
1.74 daniel 7091: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7092:
1.72 daniel 7093: if (atts != NULL) {
1.123 daniel 7094: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7095: xmlFree(atts);
1.72 daniel 7096: }
1.83 daniel 7097: return(name);
1.3 veillard 7098: }
7099:
1.50 daniel 7100: /**
7101: * xmlParseEndTag:
7102: * @ctxt: an XML parser context
7103: *
7104: * parse an end of tag
1.27 daniel 7105: *
7106: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7107: *
7108: * With namespace
7109: *
1.72 daniel 7110: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7111: */
7112:
1.55 daniel 7113: void
1.140 daniel 7114: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7115: xmlChar *name;
1.140 daniel 7116: xmlChar *oldname;
1.7 veillard 7117:
1.91 daniel 7118: GROW;
1.152 daniel 7119: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7120: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7121: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7122: ctxt->wellFormed = 0;
1.123 daniel 7123: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7124: return;
7125: }
1.40 daniel 7126: SKIP(2);
1.7 veillard 7127:
1.72 daniel 7128: name = xmlParseName(ctxt);
1.7 veillard 7129:
7130: /*
7131: * We should definitely be at the ending "S? '>'" part
7132: */
1.91 daniel 7133: GROW;
1.42 daniel 7134: SKIP_BLANKS;
1.153 daniel 7135: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7136: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7137: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7138: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7139: ctxt->wellFormed = 0;
1.7 veillard 7140: } else
1.40 daniel 7141: NEXT;
1.7 veillard 7142:
1.72 daniel 7143: /*
1.98 daniel 7144: * [ WFC: Element Type Match ]
7145: * The Name in an element's end-tag must match the element type in the
7146: * start-tag.
7147: *
1.83 daniel 7148: */
1.147 daniel 7149: if ((name == NULL) || (ctxt->name == NULL) ||
7150: (xmlStrcmp(name, ctxt->name))) {
7151: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7152: if ((name != NULL) && (ctxt->name != NULL)) {
7153: ctxt->sax->error(ctxt->userData,
7154: "Opening and ending tag mismatch: %s and %s\n",
7155: ctxt->name, name);
7156: } else if (ctxt->name != NULL) {
7157: ctxt->sax->error(ctxt->userData,
7158: "Ending tag eror for: %s\n", ctxt->name);
7159: } else {
7160: ctxt->sax->error(ctxt->userData,
7161: "Ending tag error: internal error ???\n");
7162: }
1.122 daniel 7163:
1.147 daniel 7164: }
1.123 daniel 7165: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 7166: ctxt->wellFormed = 0;
7167: }
7168:
7169: /*
1.72 daniel 7170: * SAX: End of Tag
7171: */
1.171 daniel 7172: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7173: (!ctxt->disableSAX))
1.74 daniel 7174: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 7175:
7176: if (name != NULL)
1.119 daniel 7177: xmlFree(name);
1.140 daniel 7178: oldname = namePop(ctxt);
1.176 daniel 7179: spacePop(ctxt);
1.140 daniel 7180: if (oldname != NULL) {
7181: #ifdef DEBUG_STACK
7182: fprintf(stderr,"Close: popped %s\n", oldname);
7183: #endif
7184: xmlFree(oldname);
7185: }
1.7 veillard 7186: return;
7187: }
7188:
1.50 daniel 7189: /**
7190: * xmlParseCDSect:
7191: * @ctxt: an XML parser context
7192: *
7193: * Parse escaped pure raw content.
1.29 daniel 7194: *
7195: * [18] CDSect ::= CDStart CData CDEnd
7196: *
7197: * [19] CDStart ::= '<![CDATA['
7198: *
7199: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7200: *
7201: * [21] CDEnd ::= ']]>'
1.3 veillard 7202: */
1.55 daniel 7203: void
7204: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7205: xmlChar *buf = NULL;
7206: int len = 0;
1.140 daniel 7207: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7208: int r, rl;
7209: int s, sl;
7210: int cur, l;
1.3 veillard 7211:
1.106 daniel 7212: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7213: (NXT(2) == '[') && (NXT(3) == 'C') &&
7214: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7215: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7216: (NXT(8) == '[')) {
7217: SKIP(9);
1.29 daniel 7218: } else
1.45 daniel 7219: return;
1.109 daniel 7220:
7221: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7222: r = CUR_CHAR(rl);
7223: if (!IS_CHAR(r)) {
1.55 daniel 7224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7225: ctxt->sax->error(ctxt->userData,
1.135 daniel 7226: "CData section not finished\n");
1.59 daniel 7227: ctxt->wellFormed = 0;
1.123 daniel 7228: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7229: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7230: return;
1.3 veillard 7231: }
1.152 daniel 7232: NEXTL(rl);
7233: s = CUR_CHAR(sl);
7234: if (!IS_CHAR(s)) {
1.55 daniel 7235: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7236: ctxt->sax->error(ctxt->userData,
1.135 daniel 7237: "CData section not finished\n");
1.123 daniel 7238: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7239: ctxt->wellFormed = 0;
1.109 daniel 7240: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7241: return;
1.3 veillard 7242: }
1.152 daniel 7243: NEXTL(sl);
7244: cur = CUR_CHAR(l);
1.135 daniel 7245: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7246: if (buf == NULL) {
7247: fprintf(stderr, "malloc of %d byte failed\n", size);
7248: return;
7249: }
1.108 veillard 7250: while (IS_CHAR(cur) &&
1.110 daniel 7251: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7252: if (len + 5 >= size) {
1.135 daniel 7253: size *= 2;
7254: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7255: if (buf == NULL) {
7256: fprintf(stderr, "realloc of %d byte failed\n", size);
7257: return;
7258: }
7259: }
1.152 daniel 7260: COPY_BUF(rl,buf,len,r);
1.110 daniel 7261: r = s;
1.152 daniel 7262: rl = sl;
1.110 daniel 7263: s = cur;
1.152 daniel 7264: sl = l;
7265: NEXTL(l);
7266: cur = CUR_CHAR(l);
1.3 veillard 7267: }
1.135 daniel 7268: buf[len] = 0;
1.109 daniel 7269: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7270: if (cur != '>') {
1.55 daniel 7271: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7272: ctxt->sax->error(ctxt->userData,
1.135 daniel 7273: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7274: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7275: ctxt->wellFormed = 0;
1.135 daniel 7276: xmlFree(buf);
1.45 daniel 7277: return;
1.3 veillard 7278: }
1.152 daniel 7279: NEXTL(l);
1.16 daniel 7280:
1.45 daniel 7281: /*
1.135 daniel 7282: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7283: */
1.171 daniel 7284: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7285: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7286: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7287: }
1.135 daniel 7288: xmlFree(buf);
1.2 veillard 7289: }
7290:
1.50 daniel 7291: /**
7292: * xmlParseContent:
7293: * @ctxt: an XML parser context
7294: *
7295: * Parse a content:
1.2 veillard 7296: *
1.27 daniel 7297: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7298: */
7299:
1.55 daniel 7300: void
7301: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7302: GROW;
1.176 daniel 7303: while (((RAW != 0) || (ctxt->token != 0)) &&
7304: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 7305: const xmlChar *test = CUR_PTR;
1.91 daniel 7306: int cons = ctxt->input->consumed;
1.123 daniel 7307: xmlChar tok = ctxt->token;
1.27 daniel 7308:
7309: /*
1.152 daniel 7310: * Handle possible processed charrefs.
7311: */
7312: if (ctxt->token != 0) {
7313: xmlParseCharData(ctxt, 0);
7314: }
7315: /*
1.27 daniel 7316: * First case : a Processing Instruction.
7317: */
1.152 daniel 7318: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7319: xmlParsePI(ctxt);
7320: }
1.72 daniel 7321:
1.27 daniel 7322: /*
7323: * Second case : a CDSection
7324: */
1.152 daniel 7325: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7326: (NXT(2) == '[') && (NXT(3) == 'C') &&
7327: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7328: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7329: (NXT(8) == '[')) {
1.45 daniel 7330: xmlParseCDSect(ctxt);
1.27 daniel 7331: }
1.72 daniel 7332:
1.27 daniel 7333: /*
7334: * Third case : a comment
7335: */
1.152 daniel 7336: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7337: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7338: xmlParseComment(ctxt);
1.97 daniel 7339: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7340: }
1.72 daniel 7341:
1.27 daniel 7342: /*
7343: * Fourth case : a sub-element.
7344: */
1.152 daniel 7345: else if (RAW == '<') {
1.72 daniel 7346: xmlParseElement(ctxt);
1.45 daniel 7347: }
1.72 daniel 7348:
1.45 daniel 7349: /*
1.50 daniel 7350: * Fifth case : a reference. If if has not been resolved,
7351: * parsing returns it's Name, create the node
1.45 daniel 7352: */
1.97 daniel 7353:
1.152 daniel 7354: else if (RAW == '&') {
1.77 daniel 7355: xmlParseReference(ctxt);
1.27 daniel 7356: }
1.72 daniel 7357:
1.27 daniel 7358: /*
7359: * Last case, text. Note that References are handled directly.
7360: */
7361: else {
1.45 daniel 7362: xmlParseCharData(ctxt, 0);
1.3 veillard 7363: }
1.14 veillard 7364:
1.91 daniel 7365: GROW;
1.14 veillard 7366: /*
1.45 daniel 7367: * Pop-up of finished entities.
1.14 veillard 7368: */
1.152 daniel 7369: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7370: xmlPopInput(ctxt);
1.135 daniel 7371: SHRINK;
1.45 daniel 7372:
1.113 daniel 7373: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7374: (tok == ctxt->token)) {
1.55 daniel 7375: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7376: ctxt->sax->error(ctxt->userData,
1.59 daniel 7377: "detected an error in element content\n");
1.123 daniel 7378: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7379: ctxt->wellFormed = 0;
1.29 daniel 7380: break;
7381: }
1.3 veillard 7382: }
1.2 veillard 7383: }
7384:
1.50 daniel 7385: /**
7386: * xmlParseElement:
7387: * @ctxt: an XML parser context
7388: *
7389: * parse an XML element, this is highly recursive
1.26 daniel 7390: *
7391: * [39] element ::= EmptyElemTag | STag content ETag
7392: *
1.98 daniel 7393: * [ WFC: Element Type Match ]
7394: * The Name in an element's end-tag must match the element type in the
7395: * start-tag.
7396: *
7397: * [ VC: Element Valid ]
1.117 daniel 7398: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7399: * where the Name matches the element type and one of the following holds:
7400: * - The declaration matches EMPTY and the element has no content.
7401: * - The declaration matches children and the sequence of child elements
7402: * belongs to the language generated by the regular expression in the
7403: * content model, with optional white space (characters matching the
7404: * nonterminal S) between each pair of child elements.
7405: * - The declaration matches Mixed and the content consists of character
7406: * data and child elements whose types match names in the content model.
7407: * - The declaration matches ANY, and the types of any child elements have
7408: * been declared.
1.2 veillard 7409: */
1.26 daniel 7410:
1.72 daniel 7411: void
1.69 daniel 7412: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7413: const xmlChar *openTag = CUR_PTR;
7414: xmlChar *name;
1.140 daniel 7415: xmlChar *oldname;
1.32 daniel 7416: xmlParserNodeInfo node_info;
1.118 daniel 7417: xmlNodePtr ret;
1.2 veillard 7418:
1.32 daniel 7419: /* Capture start position */
1.118 daniel 7420: if (ctxt->record_info) {
7421: node_info.begin_pos = ctxt->input->consumed +
7422: (CUR_PTR - ctxt->input->base);
7423: node_info.begin_line = ctxt->input->line;
7424: }
1.32 daniel 7425:
1.176 daniel 7426: if (ctxt->spaceNr == 0)
7427: spacePush(ctxt, -1);
7428: else
7429: spacePush(ctxt, *ctxt->space);
7430:
1.83 daniel 7431: name = xmlParseStartTag(ctxt);
7432: if (name == NULL) {
1.176 daniel 7433: spacePop(ctxt);
1.83 daniel 7434: return;
7435: }
1.140 daniel 7436: namePush(ctxt, name);
1.118 daniel 7437: ret = ctxt->node;
1.2 veillard 7438:
7439: /*
1.99 daniel 7440: * [ VC: Root Element Type ]
7441: * The Name in the document type declaration must match the element
7442: * type of the root element.
7443: */
1.105 daniel 7444: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7445: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7446: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7447:
7448: /*
1.2 veillard 7449: * Check for an Empty Element.
7450: */
1.152 daniel 7451: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7452: SKIP(2);
1.171 daniel 7453: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7454: (!ctxt->disableSAX))
1.83 daniel 7455: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7456: oldname = namePop(ctxt);
1.176 daniel 7457: spacePop(ctxt);
1.140 daniel 7458: if (oldname != NULL) {
7459: #ifdef DEBUG_STACK
7460: fprintf(stderr,"Close: popped %s\n", oldname);
7461: #endif
7462: xmlFree(oldname);
7463: }
1.72 daniel 7464: return;
1.2 veillard 7465: }
1.152 daniel 7466: if (RAW == '>') {
1.91 daniel 7467: NEXT;
7468: } else {
1.55 daniel 7469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7470: ctxt->sax->error(ctxt->userData,
7471: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7472: openTag);
1.59 daniel 7473: ctxt->wellFormed = 0;
1.123 daniel 7474: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 7475:
7476: /*
7477: * end of parsing of this node.
7478: */
7479: nodePop(ctxt);
1.140 daniel 7480: oldname = namePop(ctxt);
1.176 daniel 7481: spacePop(ctxt);
1.140 daniel 7482: if (oldname != NULL) {
7483: #ifdef DEBUG_STACK
7484: fprintf(stderr,"Close: popped %s\n", oldname);
7485: #endif
7486: xmlFree(oldname);
7487: }
1.118 daniel 7488:
7489: /*
7490: * Capture end position and add node
7491: */
7492: if ( ret != NULL && ctxt->record_info ) {
7493: node_info.end_pos = ctxt->input->consumed +
7494: (CUR_PTR - ctxt->input->base);
7495: node_info.end_line = ctxt->input->line;
7496: node_info.node = ret;
7497: xmlParserAddNodeInfo(ctxt, &node_info);
7498: }
1.72 daniel 7499: return;
1.2 veillard 7500: }
7501:
7502: /*
7503: * Parse the content of the element:
7504: */
1.45 daniel 7505: xmlParseContent(ctxt);
1.153 daniel 7506: if (!IS_CHAR(RAW)) {
1.55 daniel 7507: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7508: ctxt->sax->error(ctxt->userData,
1.57 daniel 7509: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 7510: ctxt->wellFormed = 0;
1.123 daniel 7511: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 7512:
7513: /*
7514: * end of parsing of this node.
7515: */
7516: nodePop(ctxt);
1.140 daniel 7517: oldname = namePop(ctxt);
1.176 daniel 7518: spacePop(ctxt);
1.140 daniel 7519: if (oldname != NULL) {
7520: #ifdef DEBUG_STACK
7521: fprintf(stderr,"Close: popped %s\n", oldname);
7522: #endif
7523: xmlFree(oldname);
7524: }
1.72 daniel 7525: return;
1.2 veillard 7526: }
7527:
7528: /*
1.27 daniel 7529: * parse the end of tag: '</' should be here.
1.2 veillard 7530: */
1.140 daniel 7531: xmlParseEndTag(ctxt);
1.118 daniel 7532:
7533: /*
7534: * Capture end position and add node
7535: */
7536: if ( ret != NULL && ctxt->record_info ) {
7537: node_info.end_pos = ctxt->input->consumed +
7538: (CUR_PTR - ctxt->input->base);
7539: node_info.end_line = ctxt->input->line;
7540: node_info.node = ret;
7541: xmlParserAddNodeInfo(ctxt, &node_info);
7542: }
1.2 veillard 7543: }
7544:
1.50 daniel 7545: /**
7546: * xmlParseVersionNum:
7547: * @ctxt: an XML parser context
7548: *
7549: * parse the XML version value.
1.29 daniel 7550: *
7551: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 7552: *
7553: * Returns the string giving the XML version number, or NULL
1.29 daniel 7554: */
1.123 daniel 7555: xmlChar *
1.55 daniel 7556: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 7557: xmlChar *buf = NULL;
7558: int len = 0;
7559: int size = 10;
7560: xmlChar cur;
1.29 daniel 7561:
1.135 daniel 7562: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7563: if (buf == NULL) {
7564: fprintf(stderr, "malloc of %d byte failed\n", size);
7565: return(NULL);
7566: }
7567: cur = CUR;
1.152 daniel 7568: while (((cur >= 'a') && (cur <= 'z')) ||
7569: ((cur >= 'A') && (cur <= 'Z')) ||
7570: ((cur >= '0') && (cur <= '9')) ||
7571: (cur == '_') || (cur == '.') ||
7572: (cur == ':') || (cur == '-')) {
1.135 daniel 7573: if (len + 1 >= size) {
7574: size *= 2;
7575: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7576: if (buf == NULL) {
7577: fprintf(stderr, "realloc of %d byte failed\n", size);
7578: return(NULL);
7579: }
7580: }
7581: buf[len++] = cur;
7582: NEXT;
7583: cur=CUR;
7584: }
7585: buf[len] = 0;
7586: return(buf);
1.29 daniel 7587: }
7588:
1.50 daniel 7589: /**
7590: * xmlParseVersionInfo:
7591: * @ctxt: an XML parser context
7592: *
7593: * parse the XML version.
1.29 daniel 7594: *
7595: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7596: *
7597: * [25] Eq ::= S? '=' S?
1.50 daniel 7598: *
1.68 daniel 7599: * Returns the version string, e.g. "1.0"
1.29 daniel 7600: */
7601:
1.123 daniel 7602: xmlChar *
1.55 daniel 7603: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 7604: xmlChar *version = NULL;
7605: const xmlChar *q;
1.29 daniel 7606:
1.152 daniel 7607: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 7608: (NXT(2) == 'r') && (NXT(3) == 's') &&
7609: (NXT(4) == 'i') && (NXT(5) == 'o') &&
7610: (NXT(6) == 'n')) {
7611: SKIP(7);
1.42 daniel 7612: SKIP_BLANKS;
1.152 daniel 7613: if (RAW != '=') {
1.55 daniel 7614: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7615: ctxt->sax->error(ctxt->userData,
7616: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 7617: ctxt->wellFormed = 0;
1.123 daniel 7618: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7619: return(NULL);
7620: }
1.40 daniel 7621: NEXT;
1.42 daniel 7622: SKIP_BLANKS;
1.152 daniel 7623: if (RAW == '"') {
1.40 daniel 7624: NEXT;
7625: q = CUR_PTR;
1.29 daniel 7626: version = xmlParseVersionNum(ctxt);
1.152 daniel 7627: if (RAW != '"') {
1.55 daniel 7628: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7629: ctxt->sax->error(ctxt->userData,
7630: "String not closed\n%.50s\n", q);
1.59 daniel 7631: ctxt->wellFormed = 0;
1.123 daniel 7632: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7633: } else
1.40 daniel 7634: NEXT;
1.152 daniel 7635: } else if (RAW == '\''){
1.40 daniel 7636: NEXT;
7637: q = CUR_PTR;
1.29 daniel 7638: version = xmlParseVersionNum(ctxt);
1.152 daniel 7639: if (RAW != '\'') {
1.55 daniel 7640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7641: ctxt->sax->error(ctxt->userData,
7642: "String not closed\n%.50s\n", q);
1.123 daniel 7643: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7644: ctxt->wellFormed = 0;
1.55 daniel 7645: } else
1.40 daniel 7646: NEXT;
1.31 daniel 7647: } else {
1.55 daniel 7648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7649: ctxt->sax->error(ctxt->userData,
1.59 daniel 7650: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 7651: ctxt->wellFormed = 0;
1.123 daniel 7652: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7653: }
7654: }
7655: return(version);
7656: }
7657:
1.50 daniel 7658: /**
7659: * xmlParseEncName:
7660: * @ctxt: an XML parser context
7661: *
7662: * parse the XML encoding name
1.29 daniel 7663: *
7664: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 7665: *
1.68 daniel 7666: * Returns the encoding name value or NULL
1.29 daniel 7667: */
1.123 daniel 7668: xmlChar *
1.55 daniel 7669: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 7670: xmlChar *buf = NULL;
7671: int len = 0;
7672: int size = 10;
7673: xmlChar cur;
1.29 daniel 7674:
1.135 daniel 7675: cur = CUR;
7676: if (((cur >= 'a') && (cur <= 'z')) ||
7677: ((cur >= 'A') && (cur <= 'Z'))) {
7678: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7679: if (buf == NULL) {
7680: fprintf(stderr, "malloc of %d byte failed\n", size);
7681: return(NULL);
7682: }
7683:
7684: buf[len++] = cur;
1.40 daniel 7685: NEXT;
1.135 daniel 7686: cur = CUR;
1.152 daniel 7687: while (((cur >= 'a') && (cur <= 'z')) ||
7688: ((cur >= 'A') && (cur <= 'Z')) ||
7689: ((cur >= '0') && (cur <= '9')) ||
7690: (cur == '.') || (cur == '_') ||
7691: (cur == '-')) {
1.135 daniel 7692: if (len + 1 >= size) {
7693: size *= 2;
7694: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7695: if (buf == NULL) {
7696: fprintf(stderr, "realloc of %d byte failed\n", size);
7697: return(NULL);
7698: }
7699: }
7700: buf[len++] = cur;
7701: NEXT;
7702: cur = CUR;
7703: if (cur == 0) {
7704: SHRINK;
7705: GROW;
7706: cur = CUR;
7707: }
7708: }
7709: buf[len] = 0;
1.29 daniel 7710: } else {
1.55 daniel 7711: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7712: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 7713: ctxt->wellFormed = 0;
1.123 daniel 7714: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 7715: }
1.135 daniel 7716: return(buf);
1.29 daniel 7717: }
7718:
1.50 daniel 7719: /**
7720: * xmlParseEncodingDecl:
7721: * @ctxt: an XML parser context
7722: *
7723: * parse the XML encoding declaration
1.29 daniel 7724: *
7725: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 7726: *
7727: * TODO: this should setup the conversion filters.
7728: *
1.68 daniel 7729: * Returns the encoding value or NULL
1.29 daniel 7730: */
7731:
1.123 daniel 7732: xmlChar *
1.55 daniel 7733: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7734: xmlChar *encoding = NULL;
7735: const xmlChar *q;
1.29 daniel 7736:
1.42 daniel 7737: SKIP_BLANKS;
1.152 daniel 7738: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 7739: (NXT(2) == 'c') && (NXT(3) == 'o') &&
7740: (NXT(4) == 'd') && (NXT(5) == 'i') &&
7741: (NXT(6) == 'n') && (NXT(7) == 'g')) {
7742: SKIP(8);
1.42 daniel 7743: SKIP_BLANKS;
1.152 daniel 7744: if (RAW != '=') {
1.55 daniel 7745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7746: ctxt->sax->error(ctxt->userData,
7747: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 7748: ctxt->wellFormed = 0;
1.123 daniel 7749: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7750: return(NULL);
7751: }
1.40 daniel 7752: NEXT;
1.42 daniel 7753: SKIP_BLANKS;
1.152 daniel 7754: if (RAW == '"') {
1.40 daniel 7755: NEXT;
7756: q = CUR_PTR;
1.29 daniel 7757: encoding = xmlParseEncName(ctxt);
1.152 daniel 7758: if (RAW != '"') {
1.55 daniel 7759: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7760: ctxt->sax->error(ctxt->userData,
7761: "String not closed\n%.50s\n", q);
1.59 daniel 7762: ctxt->wellFormed = 0;
1.123 daniel 7763: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7764: } else
1.40 daniel 7765: NEXT;
1.152 daniel 7766: } else if (RAW == '\''){
1.40 daniel 7767: NEXT;
7768: q = CUR_PTR;
1.29 daniel 7769: encoding = xmlParseEncName(ctxt);
1.152 daniel 7770: if (RAW != '\'') {
1.55 daniel 7771: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7772: ctxt->sax->error(ctxt->userData,
7773: "String not closed\n%.50s\n", q);
1.59 daniel 7774: ctxt->wellFormed = 0;
1.123 daniel 7775: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7776: } else
1.40 daniel 7777: NEXT;
1.152 daniel 7778: } else if (RAW == '"'){
1.55 daniel 7779: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7780: ctxt->sax->error(ctxt->userData,
1.59 daniel 7781: "xmlParseEncodingDecl : expected ' or \"\n");
7782: ctxt->wellFormed = 0;
1.123 daniel 7783: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7784: }
7785: }
7786: return(encoding);
7787: }
7788:
1.50 daniel 7789: /**
7790: * xmlParseSDDecl:
7791: * @ctxt: an XML parser context
7792: *
7793: * parse the XML standalone declaration
1.29 daniel 7794: *
7795: * [32] SDDecl ::= S 'standalone' Eq
7796: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 7797: *
7798: * [ VC: Standalone Document Declaration ]
7799: * TODO The standalone document declaration must have the value "no"
7800: * if any external markup declarations contain declarations of:
7801: * - attributes with default values, if elements to which these
7802: * attributes apply appear in the document without specifications
7803: * of values for these attributes, or
7804: * - entities (other than amp, lt, gt, apos, quot), if references
7805: * to those entities appear in the document, or
7806: * - attributes with values subject to normalization, where the
7807: * attribute appears in the document with a value which will change
7808: * as a result of normalization, or
7809: * - element types with element content, if white space occurs directly
7810: * within any instance of those types.
1.68 daniel 7811: *
7812: * Returns 1 if standalone, 0 otherwise
1.29 daniel 7813: */
7814:
1.55 daniel 7815: int
7816: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 7817: int standalone = -1;
7818:
1.42 daniel 7819: SKIP_BLANKS;
1.152 daniel 7820: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 7821: (NXT(2) == 'a') && (NXT(3) == 'n') &&
7822: (NXT(4) == 'd') && (NXT(5) == 'a') &&
7823: (NXT(6) == 'l') && (NXT(7) == 'o') &&
7824: (NXT(8) == 'n') && (NXT(9) == 'e')) {
7825: SKIP(10);
1.81 daniel 7826: SKIP_BLANKS;
1.152 daniel 7827: if (RAW != '=') {
1.55 daniel 7828: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7829: ctxt->sax->error(ctxt->userData,
1.59 daniel 7830: "XML standalone declaration : expected '='\n");
1.123 daniel 7831: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 7832: ctxt->wellFormed = 0;
1.32 daniel 7833: return(standalone);
7834: }
1.40 daniel 7835: NEXT;
1.42 daniel 7836: SKIP_BLANKS;
1.152 daniel 7837: if (RAW == '\''){
1.40 daniel 7838: NEXT;
1.152 daniel 7839: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7840: standalone = 0;
1.40 daniel 7841: SKIP(2);
1.152 daniel 7842: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7843: (NXT(2) == 's')) {
1.29 daniel 7844: standalone = 1;
1.40 daniel 7845: SKIP(3);
1.29 daniel 7846: } else {
1.55 daniel 7847: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7848: ctxt->sax->error(ctxt->userData,
7849: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7850: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7851: ctxt->wellFormed = 0;
1.29 daniel 7852: }
1.152 daniel 7853: if (RAW != '\'') {
1.55 daniel 7854: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7855: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 7856: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7857: ctxt->wellFormed = 0;
1.55 daniel 7858: } else
1.40 daniel 7859: NEXT;
1.152 daniel 7860: } else if (RAW == '"'){
1.40 daniel 7861: NEXT;
1.152 daniel 7862: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7863: standalone = 0;
1.40 daniel 7864: SKIP(2);
1.152 daniel 7865: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7866: (NXT(2) == 's')) {
1.29 daniel 7867: standalone = 1;
1.40 daniel 7868: SKIP(3);
1.29 daniel 7869: } else {
1.55 daniel 7870: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7871: ctxt->sax->error(ctxt->userData,
1.59 daniel 7872: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7873: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7874: ctxt->wellFormed = 0;
1.29 daniel 7875: }
1.152 daniel 7876: if (RAW != '"') {
1.55 daniel 7877: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7878: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 7879: ctxt->wellFormed = 0;
1.123 daniel 7880: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7881: } else
1.40 daniel 7882: NEXT;
1.37 daniel 7883: } else {
1.55 daniel 7884: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7885: ctxt->sax->error(ctxt->userData,
7886: "Standalone value not found\n");
1.59 daniel 7887: ctxt->wellFormed = 0;
1.123 daniel 7888: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 7889: }
1.29 daniel 7890: }
7891: return(standalone);
7892: }
7893:
1.50 daniel 7894: /**
7895: * xmlParseXMLDecl:
7896: * @ctxt: an XML parser context
7897: *
7898: * parse an XML declaration header
1.29 daniel 7899: *
7900: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 7901: */
7902:
1.55 daniel 7903: void
7904: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7905: xmlChar *version;
1.1 veillard 7906:
7907: /*
1.19 daniel 7908: * We know that '<?xml' is here.
1.1 veillard 7909: */
1.40 daniel 7910: SKIP(5);
1.1 veillard 7911:
1.153 daniel 7912: if (!IS_BLANK(RAW)) {
1.59 daniel 7913: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7914: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 7915: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7916: ctxt->wellFormed = 0;
7917: }
1.42 daniel 7918: SKIP_BLANKS;
1.1 veillard 7919:
7920: /*
1.29 daniel 7921: * We should have the VersionInfo here.
1.1 veillard 7922: */
1.29 daniel 7923: version = xmlParseVersionInfo(ctxt);
7924: if (version == NULL)
1.45 daniel 7925: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 7926: ctxt->version = xmlStrdup(version);
1.119 daniel 7927: xmlFree(version);
1.29 daniel 7928:
7929: /*
7930: * We may have the encoding declaration
7931: */
1.153 daniel 7932: if (!IS_BLANK(RAW)) {
1.152 daniel 7933: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7934: SKIP(2);
7935: return;
7936: }
7937: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7938: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 7939: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7940: ctxt->wellFormed = 0;
7941: }
1.164 daniel 7942: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 7943:
7944: /*
1.29 daniel 7945: * We may have the standalone status.
1.1 veillard 7946: */
1.164 daniel 7947: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 7948: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7949: SKIP(2);
7950: return;
7951: }
7952: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7953: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 7954: ctxt->wellFormed = 0;
1.123 daniel 7955: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7956: }
7957: SKIP_BLANKS;
1.167 daniel 7958: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 7959:
1.42 daniel 7960: SKIP_BLANKS;
1.152 daniel 7961: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 7962: SKIP(2);
1.152 daniel 7963: } else if (RAW == '>') {
1.31 daniel 7964: /* Deprecated old WD ... */
1.55 daniel 7965: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7966: ctxt->sax->error(ctxt->userData,
7967: "XML declaration must end-up with '?>'\n");
1.59 daniel 7968: ctxt->wellFormed = 0;
1.123 daniel 7969: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7970: NEXT;
1.29 daniel 7971: } else {
1.55 daniel 7972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7973: ctxt->sax->error(ctxt->userData,
7974: "parsing XML declaration: '?>' expected\n");
1.59 daniel 7975: ctxt->wellFormed = 0;
1.123 daniel 7976: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7977: MOVETO_ENDTAG(CUR_PTR);
7978: NEXT;
1.29 daniel 7979: }
1.1 veillard 7980: }
7981:
1.50 daniel 7982: /**
7983: * xmlParseMisc:
7984: * @ctxt: an XML parser context
7985: *
7986: * parse an XML Misc* optionnal field.
1.21 daniel 7987: *
1.22 daniel 7988: * [27] Misc ::= Comment | PI | S
1.1 veillard 7989: */
7990:
1.55 daniel 7991: void
7992: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 7993: while (((RAW == '<') && (NXT(1) == '?')) ||
7994: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7995: (NXT(2) == '-') && (NXT(3) == '-')) ||
7996: IS_BLANK(CUR)) {
1.152 daniel 7997: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 7998: xmlParsePI(ctxt);
1.40 daniel 7999: } else if (IS_BLANK(CUR)) {
8000: NEXT;
1.1 veillard 8001: } else
1.114 daniel 8002: xmlParseComment(ctxt);
1.1 veillard 8003: }
8004: }
8005:
1.50 daniel 8006: /**
8007: * xmlParseDocument :
8008: * @ctxt: an XML parser context
8009: *
8010: * parse an XML document (and build a tree if using the standard SAX
8011: * interface).
1.21 daniel 8012: *
1.22 daniel 8013: * [1] document ::= prolog element Misc*
1.29 daniel 8014: *
8015: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8016: *
1.68 daniel 8017: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8018: * as a result of the parsing.
1.1 veillard 8019: */
8020:
1.55 daniel 8021: int
8022: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8023: xmlChar start[4];
8024: xmlCharEncoding enc;
8025:
1.45 daniel 8026: xmlDefaultSAXHandlerInit();
8027:
1.91 daniel 8028: GROW;
8029:
1.14 veillard 8030: /*
1.44 daniel 8031: * SAX: beginning of the document processing.
8032: */
1.72 daniel 8033: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8034: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8035:
1.156 daniel 8036: /*
8037: * Get the 4 first bytes and decode the charset
8038: * if enc != XML_CHAR_ENCODING_NONE
8039: * plug some encoding conversion routines.
8040: */
8041: start[0] = RAW;
8042: start[1] = NXT(1);
8043: start[2] = NXT(2);
8044: start[3] = NXT(3);
8045: enc = xmlDetectCharEncoding(start, 4);
8046: if (enc != XML_CHAR_ENCODING_NONE) {
8047: xmlSwitchEncoding(ctxt, enc);
8048: }
8049:
1.1 veillard 8050:
1.59 daniel 8051: if (CUR == 0) {
8052: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8053: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8054: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8055: ctxt->wellFormed = 0;
8056: }
1.1 veillard 8057:
8058: /*
8059: * Check for the XMLDecl in the Prolog.
8060: */
1.91 daniel 8061: GROW;
1.152 daniel 8062: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8063: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8064: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.19 daniel 8065: xmlParseXMLDecl(ctxt);
1.167 daniel 8066: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8067: SKIP_BLANKS;
1.164 daniel 8068: if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
8069: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8070:
1.1 veillard 8071: } else {
1.72 daniel 8072: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8073: }
1.171 daniel 8074: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8075: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8076:
8077: /*
8078: * The Misc part of the Prolog
8079: */
1.91 daniel 8080: GROW;
1.16 daniel 8081: xmlParseMisc(ctxt);
1.1 veillard 8082:
8083: /*
1.29 daniel 8084: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 8085: * (doctypedecl Misc*)?
8086: */
1.91 daniel 8087: GROW;
1.152 daniel 8088: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8089: (NXT(2) == 'D') && (NXT(3) == 'O') &&
8090: (NXT(4) == 'C') && (NXT(5) == 'T') &&
8091: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8092: (NXT(8) == 'E')) {
1.165 daniel 8093:
1.166 daniel 8094: ctxt->inSubset = 1;
1.22 daniel 8095: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8096: if (RAW == '[') {
1.140 daniel 8097: ctxt->instate = XML_PARSER_DTD;
8098: xmlParseInternalSubset(ctxt);
8099: }
1.165 daniel 8100:
8101: /*
8102: * Create and update the external subset.
8103: */
1.166 daniel 8104: ctxt->inSubset = 2;
1.171 daniel 8105: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8106: (!ctxt->disableSAX))
1.165 daniel 8107: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8108: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 8109: ctxt->inSubset = 0;
1.165 daniel 8110:
8111:
1.96 daniel 8112: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 8113: xmlParseMisc(ctxt);
1.21 daniel 8114: }
8115:
8116: /*
8117: * Time to start parsing the tree itself
1.1 veillard 8118: */
1.91 daniel 8119: GROW;
1.152 daniel 8120: if (RAW != '<') {
1.59 daniel 8121: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8122: ctxt->sax->error(ctxt->userData,
1.151 daniel 8123: "Start tag expected, '<' not found\n");
1.140 daniel 8124: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8125: ctxt->wellFormed = 0;
1.140 daniel 8126: ctxt->instate = XML_PARSER_EOF;
8127: } else {
8128: ctxt->instate = XML_PARSER_CONTENT;
8129: xmlParseElement(ctxt);
8130: ctxt->instate = XML_PARSER_EPILOG;
8131:
8132:
8133: /*
8134: * The Misc part at the end
8135: */
8136: xmlParseMisc(ctxt);
8137:
1.152 daniel 8138: if (RAW != 0) {
1.140 daniel 8139: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8140: ctxt->sax->error(ctxt->userData,
8141: "Extra content at the end of the document\n");
8142: ctxt->wellFormed = 0;
8143: ctxt->errNo = XML_ERR_DOCUMENT_END;
8144: }
8145: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 8146: }
8147:
1.44 daniel 8148: /*
8149: * SAX: end of the document processing.
8150: */
1.171 daniel 8151: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8152: (!ctxt->disableSAX))
1.74 daniel 8153: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 8154:
8155: /*
8156: * Grab the encoding if it was added on-the-fly
8157: */
8158: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8159: (ctxt->myDoc->encoding == NULL)) {
8160: ctxt->myDoc->encoding = ctxt->encoding;
8161: ctxt->encoding = NULL;
8162: }
1.59 daniel 8163: if (! ctxt->wellFormed) return(-1);
1.16 daniel 8164: return(0);
8165: }
8166:
1.98 daniel 8167: /************************************************************************
8168: * *
1.128 daniel 8169: * Progressive parsing interfaces *
8170: * *
8171: ************************************************************************/
8172:
8173: /**
8174: * xmlParseLookupSequence:
8175: * @ctxt: an XML parser context
8176: * @first: the first char to lookup
1.140 daniel 8177: * @next: the next char to lookup or zero
8178: * @third: the next char to lookup or zero
1.128 daniel 8179: *
1.140 daniel 8180: * Try to find if a sequence (first, next, third) or just (first next) or
8181: * (first) is available in the input stream.
8182: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8183: * to avoid rescanning sequences of bytes, it DOES change the state of the
8184: * parser, do not use liberally.
1.128 daniel 8185: *
1.140 daniel 8186: * Returns the index to the current parsing point if the full sequence
8187: * is available, -1 otherwise.
1.128 daniel 8188: */
8189: int
1.140 daniel 8190: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8191: xmlChar next, xmlChar third) {
8192: int base, len;
8193: xmlParserInputPtr in;
8194: const xmlChar *buf;
8195:
8196: in = ctxt->input;
8197: if (in == NULL) return(-1);
8198: base = in->cur - in->base;
8199: if (base < 0) return(-1);
8200: if (ctxt->checkIndex > base)
8201: base = ctxt->checkIndex;
8202: if (in->buf == NULL) {
8203: buf = in->base;
8204: len = in->length;
8205: } else {
8206: buf = in->buf->buffer->content;
8207: len = in->buf->buffer->use;
8208: }
8209: /* take into account the sequence length */
8210: if (third) len -= 2;
8211: else if (next) len --;
8212: for (;base < len;base++) {
8213: if (buf[base] == first) {
8214: if (third != 0) {
8215: if ((buf[base + 1] != next) ||
8216: (buf[base + 2] != third)) continue;
8217: } else if (next != 0) {
8218: if (buf[base + 1] != next) continue;
8219: }
8220: ctxt->checkIndex = 0;
8221: #ifdef DEBUG_PUSH
8222: if (next == 0)
8223: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8224: first, base);
8225: else if (third == 0)
8226: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8227: first, next, base);
8228: else
8229: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8230: first, next, third, base);
8231: #endif
8232: return(base - (in->cur - in->base));
8233: }
8234: }
8235: ctxt->checkIndex = base;
8236: #ifdef DEBUG_PUSH
8237: if (next == 0)
8238: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8239: else if (third == 0)
8240: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8241: else
8242: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8243: #endif
8244: return(-1);
1.128 daniel 8245: }
8246:
8247: /**
1.143 daniel 8248: * xmlParseTryOrFinish:
1.128 daniel 8249: * @ctxt: an XML parser context
1.143 daniel 8250: * @terminate: last chunk indicator
1.128 daniel 8251: *
8252: * Try to progress on parsing
8253: *
8254: * Returns zero if no parsing was possible
8255: */
8256: int
1.143 daniel 8257: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8258: int ret = 0;
1.140 daniel 8259: xmlParserInputPtr in;
8260: int avail;
8261: xmlChar cur, next;
8262:
8263: #ifdef DEBUG_PUSH
8264: switch (ctxt->instate) {
8265: case XML_PARSER_EOF:
8266: fprintf(stderr, "PP: try EOF\n"); break;
8267: case XML_PARSER_START:
8268: fprintf(stderr, "PP: try START\n"); break;
8269: case XML_PARSER_MISC:
8270: fprintf(stderr, "PP: try MISC\n");break;
8271: case XML_PARSER_COMMENT:
8272: fprintf(stderr, "PP: try COMMENT\n");break;
8273: case XML_PARSER_PROLOG:
8274: fprintf(stderr, "PP: try PROLOG\n");break;
8275: case XML_PARSER_START_TAG:
8276: fprintf(stderr, "PP: try START_TAG\n");break;
8277: case XML_PARSER_CONTENT:
8278: fprintf(stderr, "PP: try CONTENT\n");break;
8279: case XML_PARSER_CDATA_SECTION:
8280: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8281: case XML_PARSER_END_TAG:
8282: fprintf(stderr, "PP: try END_TAG\n");break;
8283: case XML_PARSER_ENTITY_DECL:
8284: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8285: case XML_PARSER_ENTITY_VALUE:
8286: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8287: case XML_PARSER_ATTRIBUTE_VALUE:
8288: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8289: case XML_PARSER_DTD:
8290: fprintf(stderr, "PP: try DTD\n");break;
8291: case XML_PARSER_EPILOG:
8292: fprintf(stderr, "PP: try EPILOG\n");break;
8293: case XML_PARSER_PI:
8294: fprintf(stderr, "PP: try PI\n");break;
8295: }
8296: #endif
1.128 daniel 8297:
8298: while (1) {
1.140 daniel 8299: /*
8300: * Pop-up of finished entities.
8301: */
1.152 daniel 8302: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8303: xmlPopInput(ctxt);
8304:
8305: in = ctxt->input;
8306: if (in == NULL) break;
8307: if (in->buf == NULL)
8308: avail = in->length - (in->cur - in->base);
8309: else
8310: avail = in->buf->buffer->use - (in->cur - in->base);
8311: if (avail < 1)
8312: goto done;
1.128 daniel 8313: switch (ctxt->instate) {
8314: case XML_PARSER_EOF:
1.140 daniel 8315: /*
8316: * Document parsing is done !
8317: */
8318: goto done;
8319: case XML_PARSER_START:
8320: /*
8321: * Very first chars read from the document flow.
8322: */
8323: cur = in->cur[0];
8324: if (IS_BLANK(cur)) {
8325: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8326: ctxt->sax->setDocumentLocator(ctxt->userData,
8327: &xmlDefaultSAXLocator);
8328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8329: ctxt->sax->error(ctxt->userData,
8330: "Extra spaces at the beginning of the document are not allowed\n");
8331: ctxt->errNo = XML_ERR_DOCUMENT_START;
8332: ctxt->wellFormed = 0;
8333: SKIP_BLANKS;
8334: ret++;
8335: if (in->buf == NULL)
8336: avail = in->length - (in->cur - in->base);
8337: else
8338: avail = in->buf->buffer->use - (in->cur - in->base);
8339: }
8340: if (avail < 2)
8341: goto done;
8342:
8343: cur = in->cur[0];
8344: next = in->cur[1];
8345: if (cur == 0) {
8346: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8347: ctxt->sax->setDocumentLocator(ctxt->userData,
8348: &xmlDefaultSAXLocator);
8349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8350: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8351: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8352: ctxt->wellFormed = 0;
8353: ctxt->instate = XML_PARSER_EOF;
8354: #ifdef DEBUG_PUSH
8355: fprintf(stderr, "PP: entering EOF\n");
8356: #endif
8357: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8358: ctxt->sax->endDocument(ctxt->userData);
8359: goto done;
8360: }
8361: if ((cur == '<') && (next == '?')) {
8362: /* PI or XML decl */
8363: if (avail < 5) return(ret);
1.143 daniel 8364: if ((!terminate) &&
8365: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8366: return(ret);
8367: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8368: ctxt->sax->setDocumentLocator(ctxt->userData,
8369: &xmlDefaultSAXLocator);
8370: if ((in->cur[2] == 'x') &&
8371: (in->cur[3] == 'm') &&
1.142 daniel 8372: (in->cur[4] == 'l') &&
8373: (IS_BLANK(in->cur[5]))) {
1.140 daniel 8374: ret += 5;
8375: #ifdef DEBUG_PUSH
8376: fprintf(stderr, "PP: Parsing XML Decl\n");
8377: #endif
8378: xmlParseXMLDecl(ctxt);
1.167 daniel 8379: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8380: if ((ctxt->encoding == NULL) &&
8381: (ctxt->input->encoding != NULL))
8382: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 8383: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8384: (!ctxt->disableSAX))
1.140 daniel 8385: ctxt->sax->startDocument(ctxt->userData);
8386: ctxt->instate = XML_PARSER_MISC;
8387: #ifdef DEBUG_PUSH
8388: fprintf(stderr, "PP: entering MISC\n");
8389: #endif
8390: } else {
8391: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8392: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8393: (!ctxt->disableSAX))
1.140 daniel 8394: ctxt->sax->startDocument(ctxt->userData);
8395: ctxt->instate = XML_PARSER_MISC;
8396: #ifdef DEBUG_PUSH
8397: fprintf(stderr, "PP: entering MISC\n");
8398: #endif
8399: }
8400: } else {
8401: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8402: ctxt->sax->setDocumentLocator(ctxt->userData,
8403: &xmlDefaultSAXLocator);
8404: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8405: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8406: (!ctxt->disableSAX))
1.140 daniel 8407: ctxt->sax->startDocument(ctxt->userData);
8408: ctxt->instate = XML_PARSER_MISC;
8409: #ifdef DEBUG_PUSH
8410: fprintf(stderr, "PP: entering MISC\n");
8411: #endif
8412: }
8413: break;
8414: case XML_PARSER_MISC:
8415: SKIP_BLANKS;
8416: if (in->buf == NULL)
8417: avail = in->length - (in->cur - in->base);
8418: else
8419: avail = in->buf->buffer->use - (in->cur - in->base);
8420: if (avail < 2)
8421: goto done;
8422: cur = in->cur[0];
8423: next = in->cur[1];
8424: if ((cur == '<') && (next == '?')) {
1.143 daniel 8425: if ((!terminate) &&
8426: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8427: goto done;
8428: #ifdef DEBUG_PUSH
8429: fprintf(stderr, "PP: Parsing PI\n");
8430: #endif
8431: xmlParsePI(ctxt);
8432: } else if ((cur == '<') && (next == '!') &&
8433: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8434: if ((!terminate) &&
8435: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8436: goto done;
8437: #ifdef DEBUG_PUSH
8438: fprintf(stderr, "PP: Parsing Comment\n");
8439: #endif
8440: xmlParseComment(ctxt);
8441: ctxt->instate = XML_PARSER_MISC;
8442: } else if ((cur == '<') && (next == '!') &&
8443: (in->cur[2] == 'D') && (in->cur[3] == 'O') &&
8444: (in->cur[4] == 'C') && (in->cur[5] == 'T') &&
8445: (in->cur[6] == 'Y') && (in->cur[7] == 'P') &&
8446: (in->cur[8] == 'E')) {
1.143 daniel 8447: if ((!terminate) &&
8448: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8449: goto done;
8450: #ifdef DEBUG_PUSH
8451: fprintf(stderr, "PP: Parsing internal subset\n");
8452: #endif
1.166 daniel 8453: ctxt->inSubset = 1;
1.140 daniel 8454: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8455: if (RAW == '[') {
1.140 daniel 8456: ctxt->instate = XML_PARSER_DTD;
8457: #ifdef DEBUG_PUSH
8458: fprintf(stderr, "PP: entering DTD\n");
8459: #endif
8460: } else {
1.166 daniel 8461: /*
8462: * Create and update the external subset.
8463: */
8464: ctxt->inSubset = 2;
1.171 daniel 8465: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8466: (ctxt->sax->externalSubset != NULL))
8467: ctxt->sax->externalSubset(ctxt->userData,
8468: ctxt->intSubName, ctxt->extSubSystem,
8469: ctxt->extSubURI);
8470: ctxt->inSubset = 0;
1.140 daniel 8471: ctxt->instate = XML_PARSER_PROLOG;
8472: #ifdef DEBUG_PUSH
8473: fprintf(stderr, "PP: entering PROLOG\n");
8474: #endif
8475: }
8476: } else if ((cur == '<') && (next == '!') &&
8477: (avail < 9)) {
8478: goto done;
8479: } else {
8480: ctxt->instate = XML_PARSER_START_TAG;
8481: #ifdef DEBUG_PUSH
8482: fprintf(stderr, "PP: entering START_TAG\n");
8483: #endif
8484: }
8485: break;
1.128 daniel 8486: case XML_PARSER_PROLOG:
1.140 daniel 8487: SKIP_BLANKS;
8488: if (in->buf == NULL)
8489: avail = in->length - (in->cur - in->base);
8490: else
8491: avail = in->buf->buffer->use - (in->cur - in->base);
8492: if (avail < 2)
8493: goto done;
8494: cur = in->cur[0];
8495: next = in->cur[1];
8496: if ((cur == '<') && (next == '?')) {
1.143 daniel 8497: if ((!terminate) &&
8498: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8499: goto done;
8500: #ifdef DEBUG_PUSH
8501: fprintf(stderr, "PP: Parsing PI\n");
8502: #endif
8503: xmlParsePI(ctxt);
8504: } else if ((cur == '<') && (next == '!') &&
8505: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8506: if ((!terminate) &&
8507: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8508: goto done;
8509: #ifdef DEBUG_PUSH
8510: fprintf(stderr, "PP: Parsing Comment\n");
8511: #endif
8512: xmlParseComment(ctxt);
8513: ctxt->instate = XML_PARSER_PROLOG;
8514: } else if ((cur == '<') && (next == '!') &&
8515: (avail < 4)) {
8516: goto done;
8517: } else {
8518: ctxt->instate = XML_PARSER_START_TAG;
8519: #ifdef DEBUG_PUSH
8520: fprintf(stderr, "PP: entering START_TAG\n");
8521: #endif
8522: }
8523: break;
8524: case XML_PARSER_EPILOG:
8525: SKIP_BLANKS;
8526: if (in->buf == NULL)
8527: avail = in->length - (in->cur - in->base);
8528: else
8529: avail = in->buf->buffer->use - (in->cur - in->base);
8530: if (avail < 2)
8531: goto done;
8532: cur = in->cur[0];
8533: next = in->cur[1];
8534: if ((cur == '<') && (next == '?')) {
1.143 daniel 8535: if ((!terminate) &&
8536: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8537: goto done;
8538: #ifdef DEBUG_PUSH
8539: fprintf(stderr, "PP: Parsing PI\n");
8540: #endif
8541: xmlParsePI(ctxt);
8542: ctxt->instate = XML_PARSER_EPILOG;
8543: } else if ((cur == '<') && (next == '!') &&
8544: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8545: if ((!terminate) &&
8546: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8547: goto done;
8548: #ifdef DEBUG_PUSH
8549: fprintf(stderr, "PP: Parsing Comment\n");
8550: #endif
8551: xmlParseComment(ctxt);
8552: ctxt->instate = XML_PARSER_EPILOG;
8553: } else if ((cur == '<') && (next == '!') &&
8554: (avail < 4)) {
8555: goto done;
8556: } else {
8557: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8558: ctxt->sax->error(ctxt->userData,
8559: "Extra content at the end of the document\n");
8560: ctxt->wellFormed = 0;
8561: ctxt->errNo = XML_ERR_DOCUMENT_END;
8562: ctxt->instate = XML_PARSER_EOF;
8563: #ifdef DEBUG_PUSH
8564: fprintf(stderr, "PP: entering EOF\n");
8565: #endif
1.171 daniel 8566: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8567: (!ctxt->disableSAX))
1.140 daniel 8568: ctxt->sax->endDocument(ctxt->userData);
8569: goto done;
8570: }
8571: break;
8572: case XML_PARSER_START_TAG: {
8573: xmlChar *name, *oldname;
8574:
8575: if (avail < 2)
8576: goto done;
8577: cur = in->cur[0];
8578: if (cur != '<') {
8579: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8580: ctxt->sax->error(ctxt->userData,
8581: "Start tag expect, '<' not found\n");
8582: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8583: ctxt->wellFormed = 0;
8584: ctxt->instate = XML_PARSER_EOF;
8585: #ifdef DEBUG_PUSH
8586: fprintf(stderr, "PP: entering EOF\n");
8587: #endif
1.171 daniel 8588: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8589: (!ctxt->disableSAX))
1.140 daniel 8590: ctxt->sax->endDocument(ctxt->userData);
8591: goto done;
8592: }
1.143 daniel 8593: if ((!terminate) &&
8594: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8595: goto done;
1.176 daniel 8596: if (ctxt->spaceNr == 0)
8597: spacePush(ctxt, -1);
8598: else
8599: spacePush(ctxt, *ctxt->space);
1.140 daniel 8600: name = xmlParseStartTag(ctxt);
8601: if (name == NULL) {
1.176 daniel 8602: spacePop(ctxt);
1.140 daniel 8603: ctxt->instate = XML_PARSER_EOF;
8604: #ifdef DEBUG_PUSH
8605: fprintf(stderr, "PP: entering EOF\n");
8606: #endif
1.171 daniel 8607: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8608: (!ctxt->disableSAX))
1.140 daniel 8609: ctxt->sax->endDocument(ctxt->userData);
8610: goto done;
8611: }
8612: namePush(ctxt, xmlStrdup(name));
8613:
8614: /*
8615: * [ VC: Root Element Type ]
8616: * The Name in the document type declaration must match
8617: * the element type of the root element.
8618: */
8619: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 8620: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 8621: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8622:
8623: /*
8624: * Check for an Empty Element.
8625: */
1.152 daniel 8626: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 8627: SKIP(2);
1.171 daniel 8628: if ((ctxt->sax != NULL) &&
8629: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 8630: ctxt->sax->endElement(ctxt->userData, name);
8631: xmlFree(name);
8632: oldname = namePop(ctxt);
1.176 daniel 8633: spacePop(ctxt);
1.140 daniel 8634: if (oldname != NULL) {
8635: #ifdef DEBUG_STACK
8636: fprintf(stderr,"Close: popped %s\n", oldname);
8637: #endif
8638: xmlFree(oldname);
8639: }
8640: if (ctxt->name == NULL) {
8641: ctxt->instate = XML_PARSER_EPILOG;
8642: #ifdef DEBUG_PUSH
8643: fprintf(stderr, "PP: entering EPILOG\n");
8644: #endif
8645: } else {
8646: ctxt->instate = XML_PARSER_CONTENT;
8647: #ifdef DEBUG_PUSH
8648: fprintf(stderr, "PP: entering CONTENT\n");
8649: #endif
8650: }
8651: break;
8652: }
1.152 daniel 8653: if (RAW == '>') {
1.140 daniel 8654: NEXT;
8655: } else {
8656: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8657: ctxt->sax->error(ctxt->userData,
8658: "Couldn't find end of Start Tag %s\n",
8659: name);
8660: ctxt->wellFormed = 0;
8661: ctxt->errNo = XML_ERR_GT_REQUIRED;
8662:
8663: /*
8664: * end of parsing of this node.
8665: */
8666: nodePop(ctxt);
8667: oldname = namePop(ctxt);
1.176 daniel 8668: spacePop(ctxt);
1.140 daniel 8669: if (oldname != NULL) {
8670: #ifdef DEBUG_STACK
8671: fprintf(stderr,"Close: popped %s\n", oldname);
8672: #endif
8673: xmlFree(oldname);
8674: }
8675: }
8676: xmlFree(name);
8677: ctxt->instate = XML_PARSER_CONTENT;
8678: #ifdef DEBUG_PUSH
8679: fprintf(stderr, "PP: entering CONTENT\n");
8680: #endif
8681: break;
8682: }
1.128 daniel 8683: case XML_PARSER_CONTENT:
1.140 daniel 8684: /*
8685: * Handle preparsed entities and charRef
8686: */
8687: if (ctxt->token != 0) {
8688: xmlChar cur[2] = { 0 , 0 } ;
8689:
8690: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 8691: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8692: (ctxt->sax->characters != NULL))
1.140 daniel 8693: ctxt->sax->characters(ctxt->userData, cur, 1);
8694: ctxt->token = 0;
8695: }
8696: if (avail < 2)
8697: goto done;
8698: cur = in->cur[0];
8699: next = in->cur[1];
8700: if ((cur == '<') && (next == '?')) {
1.143 daniel 8701: if ((!terminate) &&
8702: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8703: goto done;
8704: #ifdef DEBUG_PUSH
8705: fprintf(stderr, "PP: Parsing PI\n");
8706: #endif
8707: xmlParsePI(ctxt);
8708: } else if ((cur == '<') && (next == '!') &&
8709: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8710: if ((!terminate) &&
8711: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8712: goto done;
8713: #ifdef DEBUG_PUSH
8714: fprintf(stderr, "PP: Parsing Comment\n");
8715: #endif
8716: xmlParseComment(ctxt);
8717: ctxt->instate = XML_PARSER_CONTENT;
8718: } else if ((cur == '<') && (in->cur[1] == '!') &&
8719: (in->cur[2] == '[') && (NXT(3) == 'C') &&
8720: (in->cur[4] == 'D') && (NXT(5) == 'A') &&
8721: (in->cur[6] == 'T') && (NXT(7) == 'A') &&
8722: (in->cur[8] == '[')) {
8723: SKIP(9);
8724: ctxt->instate = XML_PARSER_CDATA_SECTION;
8725: #ifdef DEBUG_PUSH
8726: fprintf(stderr, "PP: entering CDATA_SECTION\n");
8727: #endif
8728: break;
8729: } else if ((cur == '<') && (next == '!') &&
8730: (avail < 9)) {
8731: goto done;
8732: } else if ((cur == '<') && (next == '/')) {
8733: ctxt->instate = XML_PARSER_END_TAG;
8734: #ifdef DEBUG_PUSH
8735: fprintf(stderr, "PP: entering END_TAG\n");
8736: #endif
8737: break;
8738: } else if (cur == '<') {
8739: ctxt->instate = XML_PARSER_START_TAG;
8740: #ifdef DEBUG_PUSH
8741: fprintf(stderr, "PP: entering START_TAG\n");
8742: #endif
8743: break;
8744: } else if (cur == '&') {
1.143 daniel 8745: if ((!terminate) &&
8746: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 8747: goto done;
8748: #ifdef DEBUG_PUSH
8749: fprintf(stderr, "PP: Parsing Reference\n");
8750: #endif
8751: /* TODO: check generation of subtrees if noent !!! */
8752: xmlParseReference(ctxt);
8753: } else {
1.156 daniel 8754: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 8755: /*
8756: * Goal of the following test is :
8757: * - minimize calls to the SAX 'character' callback
8758: * when they are mergeable
8759: * - handle an problem for isBlank when we only parse
8760: * a sequence of blank chars and the next one is
8761: * not available to check against '<' presence.
8762: * - tries to homogenize the differences in SAX
8763: * callbacks beween the push and pull versions
8764: * of the parser.
8765: */
8766: if ((ctxt->inputNr == 1) &&
8767: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 8768: if ((!terminate) &&
8769: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 8770: goto done;
8771: }
8772: ctxt->checkIndex = 0;
8773: #ifdef DEBUG_PUSH
8774: fprintf(stderr, "PP: Parsing char data\n");
8775: #endif
8776: xmlParseCharData(ctxt, 0);
8777: }
8778: /*
8779: * Pop-up of finished entities.
8780: */
1.152 daniel 8781: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8782: xmlPopInput(ctxt);
8783: break;
8784: case XML_PARSER_CDATA_SECTION: {
8785: /*
8786: * The Push mode need to have the SAX callback for
8787: * cdataBlock merge back contiguous callbacks.
8788: */
8789: int base;
8790:
8791: in = ctxt->input;
8792: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8793: if (base < 0) {
8794: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 8795: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 8796: if (ctxt->sax->cdataBlock != NULL)
8797: ctxt->sax->cdataBlock(ctxt->userData, in->cur,
8798: XML_PARSER_BIG_BUFFER_SIZE);
8799: }
8800: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8801: ctxt->checkIndex = 0;
8802: }
8803: goto done;
8804: } else {
1.171 daniel 8805: if ((ctxt->sax != NULL) && (base > 0) &&
8806: (!ctxt->disableSAX)) {
1.140 daniel 8807: if (ctxt->sax->cdataBlock != NULL)
8808: ctxt->sax->cdataBlock(ctxt->userData,
8809: in->cur, base);
8810: }
8811: SKIP(base + 3);
8812: ctxt->checkIndex = 0;
8813: ctxt->instate = XML_PARSER_CONTENT;
8814: #ifdef DEBUG_PUSH
8815: fprintf(stderr, "PP: entering CONTENT\n");
8816: #endif
8817: }
8818: break;
8819: }
1.141 daniel 8820: case XML_PARSER_END_TAG:
1.140 daniel 8821: if (avail < 2)
8822: goto done;
1.143 daniel 8823: if ((!terminate) &&
8824: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8825: goto done;
8826: xmlParseEndTag(ctxt);
8827: if (ctxt->name == NULL) {
8828: ctxt->instate = XML_PARSER_EPILOG;
8829: #ifdef DEBUG_PUSH
8830: fprintf(stderr, "PP: entering EPILOG\n");
8831: #endif
8832: } else {
8833: ctxt->instate = XML_PARSER_CONTENT;
8834: #ifdef DEBUG_PUSH
8835: fprintf(stderr, "PP: entering CONTENT\n");
8836: #endif
8837: }
8838: break;
8839: case XML_PARSER_DTD: {
8840: /*
8841: * Sorry but progressive parsing of the internal subset
8842: * is not expected to be supported. We first check that
8843: * the full content of the internal subset is available and
8844: * the parsing is launched only at that point.
8845: * Internal subset ends up with "']' S? '>'" in an unescaped
8846: * section and not in a ']]>' sequence which are conditional
8847: * sections (whoever argued to keep that crap in XML deserve
8848: * a place in hell !).
8849: */
8850: int base, i;
8851: xmlChar *buf;
8852: xmlChar quote = 0;
8853:
8854: base = in->cur - in->base;
8855: if (base < 0) return(0);
8856: if (ctxt->checkIndex > base)
8857: base = ctxt->checkIndex;
8858: buf = in->buf->buffer->content;
8859: for (;base < in->buf->buffer->use;base++) {
8860: if (quote != 0) {
8861: if (buf[base] == quote)
8862: quote = 0;
8863: continue;
8864: }
8865: if (buf[base] == '"') {
8866: quote = '"';
8867: continue;
8868: }
8869: if (buf[base] == '\'') {
8870: quote = '\'';
8871: continue;
8872: }
8873: if (buf[base] == ']') {
8874: if (base +1 >= in->buf->buffer->use)
8875: break;
8876: if (buf[base + 1] == ']') {
8877: /* conditional crap, skip both ']' ! */
8878: base++;
8879: continue;
8880: }
8881: for (i = 0;base + i < in->buf->buffer->use;i++) {
8882: if (buf[base + i] == '>')
8883: goto found_end_int_subset;
8884: }
8885: break;
8886: }
8887: }
8888: /*
8889: * We didn't found the end of the Internal subset
8890: */
8891: if (quote == 0)
8892: ctxt->checkIndex = base;
8893: #ifdef DEBUG_PUSH
8894: if (next == 0)
8895: fprintf(stderr, "PP: lookup of int subset end filed\n");
8896: #endif
8897: goto done;
8898:
8899: found_end_int_subset:
8900: xmlParseInternalSubset(ctxt);
1.166 daniel 8901: ctxt->inSubset = 2;
1.171 daniel 8902: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8903: (ctxt->sax->externalSubset != NULL))
8904: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8905: ctxt->extSubSystem, ctxt->extSubURI);
8906: ctxt->inSubset = 0;
1.140 daniel 8907: ctxt->instate = XML_PARSER_PROLOG;
8908: ctxt->checkIndex = 0;
8909: #ifdef DEBUG_PUSH
8910: fprintf(stderr, "PP: entering PROLOG\n");
8911: #endif
8912: break;
8913: }
8914: case XML_PARSER_COMMENT:
8915: fprintf(stderr, "PP: internal error, state == COMMENT\n");
8916: ctxt->instate = XML_PARSER_CONTENT;
8917: #ifdef DEBUG_PUSH
8918: fprintf(stderr, "PP: entering CONTENT\n");
8919: #endif
8920: break;
8921: case XML_PARSER_PI:
8922: fprintf(stderr, "PP: internal error, state == PI\n");
8923: ctxt->instate = XML_PARSER_CONTENT;
8924: #ifdef DEBUG_PUSH
8925: fprintf(stderr, "PP: entering CONTENT\n");
8926: #endif
8927: break;
1.128 daniel 8928: case XML_PARSER_ENTITY_DECL:
1.140 daniel 8929: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
8930: ctxt->instate = XML_PARSER_DTD;
8931: #ifdef DEBUG_PUSH
8932: fprintf(stderr, "PP: entering DTD\n");
8933: #endif
8934: break;
1.128 daniel 8935: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 8936: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
8937: ctxt->instate = XML_PARSER_CONTENT;
8938: #ifdef DEBUG_PUSH
8939: fprintf(stderr, "PP: entering DTD\n");
8940: #endif
8941: break;
1.128 daniel 8942: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 8943: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 8944: ctxt->instate = XML_PARSER_START_TAG;
8945: #ifdef DEBUG_PUSH
8946: fprintf(stderr, "PP: entering START_TAG\n");
8947: #endif
8948: break;
8949: case XML_PARSER_SYSTEM_LITERAL:
8950: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 8951: ctxt->instate = XML_PARSER_START_TAG;
8952: #ifdef DEBUG_PUSH
8953: fprintf(stderr, "PP: entering START_TAG\n");
8954: #endif
8955: break;
1.128 daniel 8956: }
8957: }
1.140 daniel 8958: done:
8959: #ifdef DEBUG_PUSH
8960: fprintf(stderr, "PP: done %d\n", ret);
8961: #endif
1.128 daniel 8962: return(ret);
8963: }
8964:
8965: /**
1.143 daniel 8966: * xmlParseTry:
8967: * @ctxt: an XML parser context
8968: *
8969: * Try to progress on parsing
8970: *
8971: * Returns zero if no parsing was possible
8972: */
8973: int
8974: xmlParseTry(xmlParserCtxtPtr ctxt) {
8975: return(xmlParseTryOrFinish(ctxt, 0));
8976: }
8977:
8978: /**
1.128 daniel 8979: * xmlParseChunk:
8980: * @ctxt: an XML parser context
8981: * @chunk: an char array
8982: * @size: the size in byte of the chunk
8983: * @terminate: last chunk indicator
8984: *
8985: * Parse a Chunk of memory
8986: *
8987: * Returns zero if no error, the xmlParserErrors otherwise.
8988: */
1.140 daniel 8989: int
1.128 daniel 8990: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8991: int terminate) {
1.132 daniel 8992: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8993: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8994: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8995: int cur = ctxt->input->cur - ctxt->input->base;
8996:
1.132 daniel 8997: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8998: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8999: ctxt->input->cur = ctxt->input->base + cur;
9000: #ifdef DEBUG_PUSH
9001: fprintf(stderr, "PP: pushed %d\n", size);
9002: #endif
9003:
1.150 daniel 9004: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9005: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9006: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9007: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9008: if (terminate) {
1.151 daniel 9009: /*
9010: * Grab the encoding if it was added on-the-fly
9011: */
9012: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
9013: (ctxt->myDoc->encoding == NULL)) {
9014: ctxt->myDoc->encoding = ctxt->encoding;
9015: ctxt->encoding = NULL;
9016: }
9017:
9018: /*
9019: * Check for termination
9020: */
1.140 daniel 9021: if ((ctxt->instate != XML_PARSER_EOF) &&
9022: (ctxt->instate != XML_PARSER_EPILOG)) {
9023: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9024: ctxt->sax->error(ctxt->userData,
9025: "Extra content at the end of the document\n");
9026: ctxt->wellFormed = 0;
9027: ctxt->errNo = XML_ERR_DOCUMENT_END;
9028: }
9029: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9030: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9031: (!ctxt->disableSAX))
1.140 daniel 9032: ctxt->sax->endDocument(ctxt->userData);
9033: }
9034: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9035: }
9036: return((xmlParserErrors) ctxt->errNo);
9037: }
9038:
9039: /************************************************************************
9040: * *
1.98 daniel 9041: * I/O front end functions to the parser *
9042: * *
9043: ************************************************************************/
9044:
1.50 daniel 9045: /**
1.140 daniel 9046: * xmlCreatePushParserCtxt :
9047: * @sax: a SAX handler
9048: * @user_data: The user data returned on SAX callbacks
9049: * @chunk: a pointer to an array of chars
9050: * @size: number of chars in the array
9051: * @filename: an optional file name or URI
9052: *
9053: * Create a parser context for using the XML parser in push mode
9054: * To allow content encoding detection, @size should be >= 4
9055: * The value of @filename is used for fetching external entities
9056: * and error/warning reports.
9057: *
9058: * Returns the new parser context or NULL
9059: */
9060: xmlParserCtxtPtr
9061: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9062: const char *chunk, int size, const char *filename) {
9063: xmlParserCtxtPtr ctxt;
9064: xmlParserInputPtr inputStream;
9065: xmlParserInputBufferPtr buf;
9066: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9067:
9068: /*
1.156 daniel 9069: * plug some encoding conversion routines
1.140 daniel 9070: */
9071: if ((chunk != NULL) && (size >= 4))
1.156 daniel 9072: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 9073:
9074: buf = xmlAllocParserInputBuffer(enc);
9075: if (buf == NULL) return(NULL);
9076:
9077: ctxt = xmlNewParserCtxt();
9078: if (ctxt == NULL) {
9079: xmlFree(buf);
9080: return(NULL);
9081: }
9082: if (sax != NULL) {
9083: if (ctxt->sax != &xmlDefaultSAXHandler)
9084: xmlFree(ctxt->sax);
9085: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9086: if (ctxt->sax == NULL) {
9087: xmlFree(buf);
9088: xmlFree(ctxt);
9089: return(NULL);
9090: }
9091: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9092: if (user_data != NULL)
9093: ctxt->userData = user_data;
9094: }
9095: if (filename == NULL) {
9096: ctxt->directory = NULL;
9097: } else {
9098: ctxt->directory = xmlParserGetDirectory(filename);
9099: }
9100:
9101: inputStream = xmlNewInputStream(ctxt);
9102: if (inputStream == NULL) {
9103: xmlFreeParserCtxt(ctxt);
9104: return(NULL);
9105: }
9106:
9107: if (filename == NULL)
9108: inputStream->filename = NULL;
9109: else
9110: inputStream->filename = xmlMemStrdup(filename);
9111: inputStream->buf = buf;
9112: inputStream->base = inputStream->buf->buffer->content;
9113: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 9114: if (enc != XML_CHAR_ENCODING_NONE) {
9115: xmlSwitchEncoding(ctxt, enc);
9116: }
1.140 daniel 9117:
9118: inputPush(ctxt, inputStream);
9119:
9120: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9121: (ctxt->input->buf != NULL)) {
9122: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9123: #ifdef DEBUG_PUSH
9124: fprintf(stderr, "PP: pushed %d\n", size);
9125: #endif
9126: }
9127:
9128: return(ctxt);
9129: }
9130:
9131: /**
1.86 daniel 9132: * xmlCreateDocParserCtxt :
1.123 daniel 9133: * @cur: a pointer to an array of xmlChar
1.50 daniel 9134: *
1.69 daniel 9135: * Create a parser context for an XML in-memory document.
9136: *
9137: * Returns the new parser context or NULL
1.16 daniel 9138: */
1.69 daniel 9139: xmlParserCtxtPtr
1.123 daniel 9140: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 9141: xmlParserCtxtPtr ctxt;
1.40 daniel 9142: xmlParserInputPtr input;
1.16 daniel 9143:
1.97 daniel 9144: ctxt = xmlNewParserCtxt();
1.16 daniel 9145: if (ctxt == NULL) {
9146: return(NULL);
9147: }
1.96 daniel 9148: input = xmlNewInputStream(ctxt);
1.40 daniel 9149: if (input == NULL) {
1.97 daniel 9150: xmlFreeParserCtxt(ctxt);
1.40 daniel 9151: return(NULL);
9152: }
9153:
9154: input->base = cur;
9155: input->cur = cur;
9156:
9157: inputPush(ctxt, input);
1.69 daniel 9158: return(ctxt);
9159: }
9160:
9161: /**
9162: * xmlSAXParseDoc :
9163: * @sax: the SAX handler block
1.123 daniel 9164: * @cur: a pointer to an array of xmlChar
1.69 daniel 9165: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9166: * documents
9167: *
9168: * parse an XML in-memory document and build a tree.
9169: * It use the given SAX function block to handle the parsing callback.
9170: * If sax is NULL, fallback to the default DOM tree building routines.
9171: *
9172: * Returns the resulting document tree
9173: */
9174:
9175: xmlDocPtr
1.123 daniel 9176: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 9177: xmlDocPtr ret;
9178: xmlParserCtxtPtr ctxt;
9179:
9180: if (cur == NULL) return(NULL);
1.16 daniel 9181:
9182:
1.69 daniel 9183: ctxt = xmlCreateDocParserCtxt(cur);
9184: if (ctxt == NULL) return(NULL);
1.74 daniel 9185: if (sax != NULL) {
9186: ctxt->sax = sax;
9187: ctxt->userData = NULL;
9188: }
1.69 daniel 9189:
1.16 daniel 9190: xmlParseDocument(ctxt);
1.72 daniel 9191: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9192: else {
9193: ret = NULL;
1.72 daniel 9194: xmlFreeDoc(ctxt->myDoc);
9195: ctxt->myDoc = NULL;
1.59 daniel 9196: }
1.86 daniel 9197: if (sax != NULL)
9198: ctxt->sax = NULL;
1.69 daniel 9199: xmlFreeParserCtxt(ctxt);
1.16 daniel 9200:
1.1 veillard 9201: return(ret);
9202: }
9203:
1.50 daniel 9204: /**
1.55 daniel 9205: * xmlParseDoc :
1.123 daniel 9206: * @cur: a pointer to an array of xmlChar
1.55 daniel 9207: *
9208: * parse an XML in-memory document and build a tree.
9209: *
1.68 daniel 9210: * Returns the resulting document tree
1.55 daniel 9211: */
9212:
1.69 daniel 9213: xmlDocPtr
1.123 daniel 9214: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9215: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9216: }
9217:
9218: /**
9219: * xmlSAXParseDTD :
9220: * @sax: the SAX handler block
9221: * @ExternalID: a NAME* containing the External ID of the DTD
9222: * @SystemID: a NAME* containing the URL to the DTD
9223: *
9224: * Load and parse an external subset.
9225: *
9226: * Returns the resulting xmlDtdPtr or NULL in case of error.
9227: */
9228:
9229: xmlDtdPtr
1.123 daniel 9230: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9231: const xmlChar *SystemID) {
1.76 daniel 9232: xmlDtdPtr ret = NULL;
9233: xmlParserCtxtPtr ctxt;
1.83 daniel 9234: xmlParserInputPtr input = NULL;
1.76 daniel 9235: xmlCharEncoding enc;
9236:
9237: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9238:
1.97 daniel 9239: ctxt = xmlNewParserCtxt();
1.76 daniel 9240: if (ctxt == NULL) {
9241: return(NULL);
9242: }
9243:
9244: /*
9245: * Set-up the SAX context
9246: */
9247: if (ctxt == NULL) return(NULL);
9248: if (sax != NULL) {
1.93 veillard 9249: if (ctxt->sax != NULL)
1.119 daniel 9250: xmlFree(ctxt->sax);
1.76 daniel 9251: ctxt->sax = sax;
9252: ctxt->userData = NULL;
9253: }
9254:
9255: /*
9256: * Ask the Entity resolver to load the damn thing
9257: */
9258:
9259: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9260: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9261: if (input == NULL) {
1.86 daniel 9262: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9263: xmlFreeParserCtxt(ctxt);
9264: return(NULL);
9265: }
9266:
9267: /*
1.156 daniel 9268: * plug some encoding conversion routines here.
1.76 daniel 9269: */
9270: xmlPushInput(ctxt, input);
1.156 daniel 9271: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9272: xmlSwitchEncoding(ctxt, enc);
9273:
1.95 veillard 9274: if (input->filename == NULL)
1.156 daniel 9275: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9276: input->line = 1;
9277: input->col = 1;
9278: input->base = ctxt->input->cur;
9279: input->cur = ctxt->input->cur;
9280: input->free = NULL;
9281:
9282: /*
9283: * let's parse that entity knowing it's an external subset.
9284: */
1.79 daniel 9285: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9286:
9287: if (ctxt->myDoc != NULL) {
9288: if (ctxt->wellFormed) {
9289: ret = ctxt->myDoc->intSubset;
9290: ctxt->myDoc->intSubset = NULL;
9291: } else {
9292: ret = NULL;
9293: }
9294: xmlFreeDoc(ctxt->myDoc);
9295: ctxt->myDoc = NULL;
9296: }
1.86 daniel 9297: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9298: xmlFreeParserCtxt(ctxt);
9299:
9300: return(ret);
9301: }
9302:
9303: /**
9304: * xmlParseDTD :
9305: * @ExternalID: a NAME* containing the External ID of the DTD
9306: * @SystemID: a NAME* containing the URL to the DTD
9307: *
9308: * Load and parse an external subset.
9309: *
9310: * Returns the resulting xmlDtdPtr or NULL in case of error.
9311: */
9312:
9313: xmlDtdPtr
1.123 daniel 9314: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9315: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9316: }
9317:
9318: /**
1.144 daniel 9319: * xmlSAXParseBalancedChunk :
9320: * @ctx: an XML parser context (possibly NULL)
9321: * @sax: the SAX handler bloc (possibly NULL)
9322: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9323: * @input: a parser input stream
9324: * @enc: the encoding
9325: *
9326: * Parse a well-balanced chunk of an XML document
9327: * The user has to provide SAX callback block whose routines will be
9328: * called by the parser
9329: * The allowed sequence for the Well Balanced Chunk is the one defined by
9330: * the content production in the XML grammar:
9331: *
9332: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9333: *
1.176 daniel 9334: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 9335: * the error code otherwise
9336: */
9337:
9338: int
9339: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
9340: void *user_data, xmlParserInputPtr input,
9341: xmlCharEncoding enc) {
9342: xmlParserCtxtPtr ctxt;
9343: int ret;
9344:
9345: if (input == NULL) return(-1);
9346:
9347: if (ctx != NULL)
9348: ctxt = ctx;
9349: else {
9350: ctxt = xmlNewParserCtxt();
9351: if (ctxt == NULL)
9352: return(-1);
9353: if (sax == NULL)
9354: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9355: }
9356:
9357: /*
9358: * Set-up the SAX context
9359: */
9360: if (sax != NULL) {
9361: if (ctxt->sax != NULL)
9362: xmlFree(ctxt->sax);
9363: ctxt->sax = sax;
9364: ctxt->userData = user_data;
9365: }
9366:
9367: /*
9368: * plug some encoding conversion routines here.
9369: */
9370: xmlPushInput(ctxt, input);
9371: if (enc != XML_CHAR_ENCODING_NONE)
9372: xmlSwitchEncoding(ctxt, enc);
9373:
9374: /*
9375: * let's parse that entity knowing it's an external subset.
9376: */
9377: xmlParseContent(ctxt);
9378: ret = ctxt->errNo;
9379:
9380: if (ctx == NULL) {
9381: if (sax != NULL)
9382: ctxt->sax = NULL;
9383: else
9384: xmlFreeDoc(ctxt->myDoc);
9385: xmlFreeParserCtxt(ctxt);
9386: }
9387: return(ret);
9388: }
9389:
9390: /**
9391: * xmlParseBalancedChunk :
1.176 daniel 9392: * @doc: the document the chunk pertains to
9393: * @sax: the SAX handler bloc (possibly NULL)
9394: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9395: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9396: * @list: the return value for the set of parsed nodes
9397: *
9398: * Parse a well-balanced chunk of an XML document
9399: * called by the parser
9400: * The allowed sequence for the Well Balanced Chunk is the one defined by
9401: * the content production in the XML grammar:
1.144 daniel 9402: *
1.175 daniel 9403: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9404: *
1.176 daniel 9405: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9406: * the parser error code otherwise
1.144 daniel 9407: */
9408:
1.175 daniel 9409: int
9410: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.176 daniel 9411: void *user_data, const xmlChar *string, xmlNodePtr *list) {
9412: xmlParserCtxtPtr ctxt;
1.175 daniel 9413: xmlDocPtr newDoc;
1.176 daniel 9414: xmlSAXHandlerPtr oldsax;
1.175 daniel 9415: int size;
1.176 daniel 9416: int ret = 0;
1.175 daniel 9417:
9418:
1.176 daniel 9419: if (list != NULL)
9420: *list = NULL;
9421: if (string == NULL)
9422: return(-1);
9423:
9424: size = xmlStrlen(string);
9425:
9426: ctxt = xmlCreateMemoryParserCtxt((char *) string, size + 1);
9427: if (ctxt == NULL) return(-1);
9428: ctxt->userData = ctxt;
1.175 daniel 9429: if (sax != NULL) {
1.176 daniel 9430: oldsax = ctxt->sax;
9431: ctxt->sax = sax;
9432: if (user_data != NULL)
9433: ctxt->userData = user_data;
1.175 daniel 9434: }
9435: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 9436: if (newDoc == NULL) {
9437: xmlFreeParserCtxt(ctxt);
9438: return(-1);
9439: }
1.175 daniel 9440: if (doc != NULL) {
9441: newDoc->intSubset = doc->intSubset;
9442: newDoc->extSubset = doc->extSubset;
9443: }
1.176 daniel 9444: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9445: if (newDoc->children == NULL) {
9446: if (sax != NULL)
9447: ctxt->sax = oldsax;
9448: xmlFreeParserCtxt(ctxt);
9449: newDoc->intSubset = NULL;
9450: newDoc->extSubset = NULL;
9451: xmlFreeDoc(newDoc);
9452: return(-1);
9453: }
9454: nodePush(ctxt, newDoc->children);
9455: if (doc == NULL) {
9456: ctxt->myDoc = newDoc;
9457: } else {
9458: ctxt->myDoc = doc;
9459: newDoc->children->doc = doc;
9460: }
9461: ctxt->instate = XML_PARSER_CONTENT;
9462:
9463: /*
9464: * Doing validity checking on chunk doesn't make sense
9465: */
9466: ctxt->validate = 0;
9467:
1.175 daniel 9468: xmlParseContent(ctxt);
1.176 daniel 9469:
9470: if ((RAW == '<') && (NXT(1) == '/')) {
9471: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9472: ctxt->sax->error(ctxt->userData,
9473: "chunk is not well balanced\n");
9474: ctxt->wellFormed = 0;
9475: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9476: } else if (RAW != 0) {
9477: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9478: ctxt->sax->error(ctxt->userData,
9479: "extra content at the end of well balanced chunk\n");
9480: ctxt->wellFormed = 0;
9481: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9482: }
9483: if (ctxt->node != newDoc->children) {
9484: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9485: ctxt->sax->error(ctxt->userData,
9486: "chunk is not well balanced\n");
9487: ctxt->wellFormed = 0;
9488: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9489: }
1.175 daniel 9490:
1.176 daniel 9491: if (!ctxt->wellFormed) {
9492: if (ctxt->errNo == 0)
9493: ret = 1;
9494: else
9495: ret = ctxt->errNo;
9496: } else {
9497: if (list != NULL) {
9498: xmlNodePtr cur;
1.175 daniel 9499:
1.176 daniel 9500: /*
9501: * Return the newly created nodeset after unlinking it from
9502: * they pseudo parent.
9503: */
9504: cur = newDoc->children->children;
9505: *list = cur;
9506: while (cur != NULL) {
9507: cur->parent = NULL;
9508: cur = cur->next;
9509: }
9510: newDoc->children->children = NULL;
9511: }
9512: ret = 0;
1.175 daniel 9513: }
1.176 daniel 9514: if (sax != NULL)
9515: ctxt->sax = oldsax;
1.175 daniel 9516: xmlFreeParserCtxt(ctxt);
9517: newDoc->intSubset = NULL;
9518: newDoc->extSubset = NULL;
1.176 daniel 9519: xmlFreeDoc(newDoc);
1.175 daniel 9520:
1.176 daniel 9521: return(ret);
1.144 daniel 9522: }
9523:
9524: /**
9525: * xmlParseBalancedChunkFile :
9526: * @doc: the document the chunk pertains to
9527: *
9528: * Parse a well-balanced chunk of an XML document contained in a file
9529: *
9530: * Returns the resulting list of nodes resulting from the parsing,
9531: * they are not added to @node
9532: */
9533:
9534: xmlNodePtr
9535: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 9536: /* TODO !!! */
9537: return(NULL);
1.144 daniel 9538: }
9539:
9540: /**
1.59 daniel 9541: * xmlRecoverDoc :
1.123 daniel 9542: * @cur: a pointer to an array of xmlChar
1.59 daniel 9543: *
9544: * parse an XML in-memory document and build a tree.
9545: * In the case the document is not Well Formed, a tree is built anyway
9546: *
1.68 daniel 9547: * Returns the resulting document tree
1.59 daniel 9548: */
9549:
1.69 daniel 9550: xmlDocPtr
1.123 daniel 9551: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 9552: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 9553: }
9554:
9555: /**
1.69 daniel 9556: * xmlCreateFileParserCtxt :
1.50 daniel 9557: * @filename: the filename
9558: *
1.69 daniel 9559: * Create a parser context for a file content.
9560: * Automatic support for ZLIB/Compress compressed document is provided
9561: * by default if found at compile-time.
1.50 daniel 9562: *
1.69 daniel 9563: * Returns the new parser context or NULL
1.9 httpng 9564: */
1.69 daniel 9565: xmlParserCtxtPtr
9566: xmlCreateFileParserCtxt(const char *filename)
9567: {
9568: xmlParserCtxtPtr ctxt;
1.40 daniel 9569: xmlParserInputPtr inputStream;
1.91 daniel 9570: xmlParserInputBufferPtr buf;
1.111 daniel 9571: char *directory = NULL;
1.9 httpng 9572:
1.91 daniel 9573: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9574: if (buf == NULL) return(NULL);
1.9 httpng 9575:
1.97 daniel 9576: ctxt = xmlNewParserCtxt();
1.16 daniel 9577: if (ctxt == NULL) {
9578: return(NULL);
9579: }
1.97 daniel 9580:
1.96 daniel 9581: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9582: if (inputStream == NULL) {
1.97 daniel 9583: xmlFreeParserCtxt(ctxt);
1.40 daniel 9584: return(NULL);
9585: }
9586:
1.119 daniel 9587: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9588: inputStream->buf = buf;
9589: inputStream->base = inputStream->buf->buffer->content;
9590: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9591:
1.40 daniel 9592: inputPush(ctxt, inputStream);
1.110 daniel 9593: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9594: directory = xmlParserGetDirectory(filename);
9595: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9596: ctxt->directory = directory;
1.106 daniel 9597:
1.69 daniel 9598: return(ctxt);
9599: }
9600:
9601: /**
9602: * xmlSAXParseFile :
9603: * @sax: the SAX handler block
9604: * @filename: the filename
9605: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9606: * documents
9607: *
9608: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9609: * compressed document is provided by default if found at compile-time.
9610: * It use the given SAX function block to handle the parsing callback.
9611: * If sax is NULL, fallback to the default DOM tree building routines.
9612: *
9613: * Returns the resulting document tree
9614: */
9615:
1.79 daniel 9616: xmlDocPtr
9617: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9618: int recovery) {
9619: xmlDocPtr ret;
9620: xmlParserCtxtPtr ctxt;
1.111 daniel 9621: char *directory = NULL;
1.69 daniel 9622:
9623: ctxt = xmlCreateFileParserCtxt(filename);
9624: if (ctxt == NULL) return(NULL);
1.74 daniel 9625: if (sax != NULL) {
1.93 veillard 9626: if (ctxt->sax != NULL)
1.119 daniel 9627: xmlFree(ctxt->sax);
1.74 daniel 9628: ctxt->sax = sax;
9629: ctxt->userData = NULL;
9630: }
1.106 daniel 9631:
1.110 daniel 9632: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9633: directory = xmlParserGetDirectory(filename);
9634: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 9635: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 9636:
9637: xmlParseDocument(ctxt);
1.40 daniel 9638:
1.72 daniel 9639: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9640: else {
9641: ret = NULL;
1.72 daniel 9642: xmlFreeDoc(ctxt->myDoc);
9643: ctxt->myDoc = NULL;
1.59 daniel 9644: }
1.86 daniel 9645: if (sax != NULL)
9646: ctxt->sax = NULL;
1.69 daniel 9647: xmlFreeParserCtxt(ctxt);
1.20 daniel 9648:
9649: return(ret);
9650: }
9651:
1.55 daniel 9652: /**
9653: * xmlParseFile :
9654: * @filename: the filename
9655: *
9656: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9657: * compressed document is provided by default if found at compile-time.
9658: *
1.68 daniel 9659: * Returns the resulting document tree
1.55 daniel 9660: */
9661:
1.79 daniel 9662: xmlDocPtr
9663: xmlParseFile(const char *filename) {
1.59 daniel 9664: return(xmlSAXParseFile(NULL, filename, 0));
9665: }
9666:
9667: /**
9668: * xmlRecoverFile :
9669: * @filename: the filename
9670: *
9671: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9672: * compressed document is provided by default if found at compile-time.
9673: * In the case the document is not Well Formed, a tree is built anyway
9674: *
1.68 daniel 9675: * Returns the resulting document tree
1.59 daniel 9676: */
9677:
1.79 daniel 9678: xmlDocPtr
9679: xmlRecoverFile(const char *filename) {
1.59 daniel 9680: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 9681: }
1.32 daniel 9682:
1.50 daniel 9683: /**
1.69 daniel 9684: * xmlCreateMemoryParserCtxt :
1.68 daniel 9685: * @buffer: an pointer to a char array
1.127 daniel 9686: * @size: the size of the array
1.50 daniel 9687: *
1.69 daniel 9688: * Create a parser context for an XML in-memory document.
1.50 daniel 9689: *
1.69 daniel 9690: * Returns the new parser context or NULL
1.20 daniel 9691: */
1.69 daniel 9692: xmlParserCtxtPtr
9693: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9694: xmlParserCtxtPtr ctxt;
1.40 daniel 9695: xmlParserInputPtr input;
9696:
1.158 daniel 9697: if (buffer[size - 1] != 0)
9698: buffer[size - 1] = '\0';
1.40 daniel 9699:
1.97 daniel 9700: ctxt = xmlNewParserCtxt();
1.20 daniel 9701: if (ctxt == NULL) {
9702: return(NULL);
9703: }
1.97 daniel 9704:
1.96 daniel 9705: input = xmlNewInputStream(ctxt);
1.40 daniel 9706: if (input == NULL) {
1.97 daniel 9707: xmlFreeParserCtxt(ctxt);
1.40 daniel 9708: return(NULL);
9709: }
1.20 daniel 9710:
1.40 daniel 9711: input->filename = NULL;
9712: input->line = 1;
9713: input->col = 1;
1.96 daniel 9714: input->buf = NULL;
1.91 daniel 9715: input->consumed = 0;
1.75 daniel 9716:
1.116 daniel 9717: input->base = BAD_CAST buffer;
9718: input->cur = BAD_CAST buffer;
1.69 daniel 9719: input->free = NULL;
1.20 daniel 9720:
1.40 daniel 9721: inputPush(ctxt, input);
1.69 daniel 9722: return(ctxt);
9723: }
9724:
9725: /**
9726: * xmlSAXParseMemory :
9727: * @sax: the SAX handler block
9728: * @buffer: an pointer to a char array
1.127 daniel 9729: * @size: the size of the array
9730: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9731: * documents
9732: *
9733: * parse an XML in-memory block and use the given SAX function block
9734: * to handle the parsing callback. If sax is NULL, fallback to the default
9735: * DOM tree building routines.
9736: *
9737: * Returns the resulting document tree
9738: */
9739: xmlDocPtr
9740: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9741: xmlDocPtr ret;
9742: xmlParserCtxtPtr ctxt;
9743:
9744: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9745: if (ctxt == NULL) return(NULL);
1.74 daniel 9746: if (sax != NULL) {
9747: ctxt->sax = sax;
9748: ctxt->userData = NULL;
9749: }
1.20 daniel 9750:
9751: xmlParseDocument(ctxt);
1.40 daniel 9752:
1.72 daniel 9753: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9754: else {
9755: ret = NULL;
1.72 daniel 9756: xmlFreeDoc(ctxt->myDoc);
9757: ctxt->myDoc = NULL;
1.59 daniel 9758: }
1.86 daniel 9759: if (sax != NULL)
9760: ctxt->sax = NULL;
1.69 daniel 9761: xmlFreeParserCtxt(ctxt);
1.16 daniel 9762:
1.9 httpng 9763: return(ret);
1.17 daniel 9764: }
9765:
1.55 daniel 9766: /**
9767: * xmlParseMemory :
1.68 daniel 9768: * @buffer: an pointer to a char array
1.55 daniel 9769: * @size: the size of the array
9770: *
9771: * parse an XML in-memory block and build a tree.
9772: *
1.68 daniel 9773: * Returns the resulting document tree
1.55 daniel 9774: */
9775:
9776: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9777: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9778: }
9779:
9780: /**
9781: * xmlRecoverMemory :
1.68 daniel 9782: * @buffer: an pointer to a char array
1.59 daniel 9783: * @size: the size of the array
9784: *
9785: * parse an XML in-memory block and build a tree.
9786: * In the case the document is not Well Formed, a tree is built anyway
9787: *
1.68 daniel 9788: * Returns the resulting document tree
1.59 daniel 9789: */
9790:
9791: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9792: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9793: }
9794:
9795:
1.50 daniel 9796: /**
9797: * xmlSetupParserForBuffer:
9798: * @ctxt: an XML parser context
1.123 daniel 9799: * @buffer: a xmlChar * buffer
1.50 daniel 9800: * @filename: a file name
9801: *
1.19 daniel 9802: * Setup the parser context to parse a new buffer; Clears any prior
9803: * contents from the parser context. The buffer parameter must not be
9804: * NULL, but the filename parameter can be
9805: */
1.55 daniel 9806: void
1.123 daniel 9807: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 9808: const char* filename)
9809: {
1.96 daniel 9810: xmlParserInputPtr input;
1.40 daniel 9811:
1.96 daniel 9812: input = xmlNewInputStream(ctxt);
9813: if (input == NULL) {
9814: perror("malloc");
1.119 daniel 9815: xmlFree(ctxt);
1.145 daniel 9816: return;
1.96 daniel 9817: }
9818:
9819: xmlClearParserCtxt(ctxt);
9820: if (filename != NULL)
1.119 daniel 9821: input->filename = xmlMemStrdup(filename);
1.96 daniel 9822: input->base = buffer;
9823: input->cur = buffer;
9824: inputPush(ctxt, input);
1.17 daniel 9825: }
9826:
1.123 daniel 9827: /**
9828: * xmlSAXUserParseFile:
9829: * @sax: a SAX handler
9830: * @user_data: The user data returned on SAX callbacks
9831: * @filename: a file name
9832: *
9833: * parse an XML file and call the given SAX handler routines.
9834: * Automatic support for ZLIB/Compress compressed document is provided
9835: *
9836: * Returns 0 in case of success or a error number otherwise
9837: */
1.131 daniel 9838: int
9839: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9840: const char *filename) {
1.123 daniel 9841: int ret = 0;
9842: xmlParserCtxtPtr ctxt;
9843:
9844: ctxt = xmlCreateFileParserCtxt(filename);
9845: if (ctxt == NULL) return -1;
1.134 daniel 9846: if (ctxt->sax != &xmlDefaultSAXHandler)
9847: xmlFree(ctxt->sax);
1.123 daniel 9848: ctxt->sax = sax;
1.140 daniel 9849: if (user_data != NULL)
9850: ctxt->userData = user_data;
1.123 daniel 9851:
9852: xmlParseDocument(ctxt);
9853:
9854: if (ctxt->wellFormed)
9855: ret = 0;
9856: else {
9857: if (ctxt->errNo != 0)
9858: ret = ctxt->errNo;
9859: else
9860: ret = -1;
9861: }
9862: if (sax != NULL)
9863: ctxt->sax = NULL;
9864: xmlFreeParserCtxt(ctxt);
9865:
9866: return ret;
9867: }
9868:
9869: /**
9870: * xmlSAXUserParseMemory:
9871: * @sax: a SAX handler
9872: * @user_data: The user data returned on SAX callbacks
9873: * @buffer: an in-memory XML document input
1.127 daniel 9874: * @size: the length of the XML document in bytes
1.123 daniel 9875: *
9876: * A better SAX parsing routine.
9877: * parse an XML in-memory buffer and call the given SAX handler routines.
9878: *
9879: * Returns 0 in case of success or a error number otherwise
9880: */
9881: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9882: char *buffer, int size) {
9883: int ret = 0;
9884: xmlParserCtxtPtr ctxt;
9885:
9886: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9887: if (ctxt == NULL) return -1;
9888: ctxt->sax = sax;
9889: ctxt->userData = user_data;
9890:
9891: xmlParseDocument(ctxt);
9892:
9893: if (ctxt->wellFormed)
9894: ret = 0;
9895: else {
9896: if (ctxt->errNo != 0)
9897: ret = ctxt->errNo;
9898: else
9899: ret = -1;
9900: }
9901: if (sax != NULL)
9902: ctxt->sax = NULL;
9903: xmlFreeParserCtxt(ctxt);
9904:
9905: return ret;
9906: }
9907:
1.32 daniel 9908:
1.98 daniel 9909: /************************************************************************
9910: * *
1.127 daniel 9911: * Miscellaneous *
1.98 daniel 9912: * *
9913: ************************************************************************/
9914:
1.132 daniel 9915: /**
9916: * xmlCleanupParser:
9917: *
9918: * Cleanup function for the XML parser. It tries to reclaim all
9919: * parsing related global memory allocated for the parser processing.
9920: * It doesn't deallocate any document related memory. Calling this
9921: * function should not prevent reusing the parser.
9922: */
9923:
9924: void
9925: xmlCleanupParser(void) {
9926: xmlCleanupCharEncodingHandlers();
1.133 daniel 9927: xmlCleanupPredefinedEntities();
1.132 daniel 9928: }
1.98 daniel 9929:
1.50 daniel 9930: /**
9931: * xmlParserFindNodeInfo:
9932: * @ctxt: an XML parser context
9933: * @node: an XML node within the tree
9934: *
9935: * Find the parser node info struct for a given node
9936: *
1.68 daniel 9937: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 9938: */
9939: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
9940: const xmlNode* node)
9941: {
9942: unsigned long pos;
9943:
9944: /* Find position where node should be at */
9945: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
9946: if ( ctx->node_seq.buffer[pos].node == node )
9947: return &ctx->node_seq.buffer[pos];
9948: else
9949: return NULL;
9950: }
9951:
9952:
1.50 daniel 9953: /**
9954: * xmlInitNodeInfoSeq :
9955: * @seq: a node info sequence pointer
9956: *
9957: * -- Initialize (set to initial state) node info sequence
1.32 daniel 9958: */
1.55 daniel 9959: void
9960: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9961: {
9962: seq->length = 0;
9963: seq->maximum = 0;
9964: seq->buffer = NULL;
9965: }
9966:
1.50 daniel 9967: /**
9968: * xmlClearNodeInfoSeq :
9969: * @seq: a node info sequence pointer
9970: *
9971: * -- Clear (release memory and reinitialize) node
1.32 daniel 9972: * info sequence
9973: */
1.55 daniel 9974: void
9975: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9976: {
9977: if ( seq->buffer != NULL )
1.119 daniel 9978: xmlFree(seq->buffer);
1.32 daniel 9979: xmlInitNodeInfoSeq(seq);
9980: }
9981:
9982:
1.50 daniel 9983: /**
9984: * xmlParserFindNodeInfoIndex:
9985: * @seq: a node info sequence pointer
9986: * @node: an XML node pointer
9987: *
9988: *
1.32 daniel 9989: * xmlParserFindNodeInfoIndex : Find the index that the info record for
9990: * the given node is or should be at in a sorted sequence
1.68 daniel 9991: *
9992: * Returns a long indicating the position of the record
1.32 daniel 9993: */
9994: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
9995: const xmlNode* node)
9996: {
9997: unsigned long upper, lower, middle;
9998: int found = 0;
9999:
10000: /* Do a binary search for the key */
10001: lower = 1;
10002: upper = seq->length;
10003: middle = 0;
10004: while ( lower <= upper && !found) {
10005: middle = lower + (upper - lower) / 2;
10006: if ( node == seq->buffer[middle - 1].node )
10007: found = 1;
10008: else if ( node < seq->buffer[middle - 1].node )
10009: upper = middle - 1;
10010: else
10011: lower = middle + 1;
10012: }
10013:
10014: /* Return position */
10015: if ( middle == 0 || seq->buffer[middle - 1].node < node )
10016: return middle;
10017: else
10018: return middle - 1;
10019: }
10020:
10021:
1.50 daniel 10022: /**
10023: * xmlParserAddNodeInfo:
10024: * @ctxt: an XML parser context
1.68 daniel 10025: * @info: a node info sequence pointer
1.50 daniel 10026: *
10027: * Insert node info record into the sorted sequence
1.32 daniel 10028: */
1.55 daniel 10029: void
10030: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 10031: const xmlParserNodeInfo* info)
1.32 daniel 10032: {
10033: unsigned long pos;
10034: static unsigned int block_size = 5;
10035:
10036: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 10037: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
10038: if ( pos < ctxt->node_seq.length
10039: && ctxt->node_seq.buffer[pos].node == info->node ) {
10040: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 10041: }
10042:
10043: /* Otherwise, we need to add new node to buffer */
10044: else {
10045: /* Expand buffer by 5 if needed */
1.55 daniel 10046: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 10047: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 10048: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
10049: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 10050:
1.55 daniel 10051: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 10052: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 10053: else
1.119 daniel 10054: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 10055:
10056: if ( tmp_buffer == NULL ) {
1.55 daniel 10057: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 10058: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 10059: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 10060: return;
10061: }
1.55 daniel 10062: ctxt->node_seq.buffer = tmp_buffer;
10063: ctxt->node_seq.maximum += block_size;
1.32 daniel 10064: }
10065:
10066: /* If position is not at end, move elements out of the way */
1.55 daniel 10067: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 10068: unsigned long i;
10069:
1.55 daniel 10070: for ( i = ctxt->node_seq.length; i > pos; i-- )
10071: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 10072: }
10073:
10074: /* Copy element and increase length */
1.55 daniel 10075: ctxt->node_seq.buffer[pos] = *info;
10076: ctxt->node_seq.length++;
1.32 daniel 10077: }
10078: }
1.77 daniel 10079:
1.98 daniel 10080:
10081: /**
10082: * xmlSubstituteEntitiesDefault :
10083: * @val: int 0 or 1
10084: *
10085: * Set and return the previous value for default entity support.
10086: * Initially the parser always keep entity references instead of substituting
10087: * entity values in the output. This function has to be used to change the
10088: * default parser behaviour
10089: * SAX::subtituteEntities() has to be used for changing that on a file by
10090: * file basis.
10091: *
10092: * Returns the last value for 0 for no substitution, 1 for substitution.
10093: */
10094:
10095: int
10096: xmlSubstituteEntitiesDefault(int val) {
10097: int old = xmlSubstituteEntitiesDefaultValue;
10098:
10099: xmlSubstituteEntitiesDefaultValue = val;
10100: return(old);
10101: }
1.77 daniel 10102:
Webmaster