Annotation of XML/parser.c, revision 1.161
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.119 daniel 36: #include "xmlmemory.h"
1.14 veillard 37: #include "tree.h"
1.1 veillard 38: #include "parser.h"
1.14 veillard 39: #include "entities.h"
1.75 daniel 40: #include "encoding.h"
1.61 daniel 41: #include "valid.h"
1.69 daniel 42: #include "parserInternals.h"
1.91 daniel 43: #include "xmlIO.h"
1.122 daniel 44: #include "xml-error.h"
1.1 veillard 45:
1.140 daniel 46: #define XML_PARSER_BIG_BUFFER_SIZE 1000
47: #define XML_PARSER_BUFFER_SIZE 100
48:
1.86 daniel 49: const char *xmlParserVersion = LIBXML_VERSION;
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.91 daniel 65: /************************************************************************
66: * *
67: * Input handling functions for progressive parsing *
68: * *
69: ************************************************************************/
70:
71: /* #define DEBUG_INPUT */
1.140 daniel 72: /* #define DEBUG_STACK */
73: /* #define DEBUG_PUSH */
74:
1.91 daniel 75:
1.110 daniel 76: #define INPUT_CHUNK 250
77: /* we need to keep enough input to show errors in context */
78: #define LINE_LEN 80
1.91 daniel 79:
80: #ifdef DEBUG_INPUT
81: #define CHECK_BUFFER(in) check_buffer(in)
82:
83: void check_buffer(xmlParserInputPtr in) {
84: if (in->base != in->buf->buffer->content) {
85: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
86: }
87: if (in->cur < in->base) {
88: fprintf(stderr, "xmlParserInput: cur < base problem\n");
89: }
90: if (in->cur > in->base + in->buf->buffer->use) {
91: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
92: }
93: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
94: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
95: in->buf->buffer->use, in->buf->buffer->size);
96: }
97:
1.110 daniel 98: #else
99: #define CHECK_BUFFER(in)
100: #endif
101:
1.91 daniel 102:
103: /**
104: * xmlParserInputRead:
105: * @in: an XML parser input
106: * @len: an indicative size for the lookahead
107: *
108: * This function refresh the input for the parser. It doesn't try to
109: * preserve pointers to the input buffer, and discard already read data
110: *
1.123 daniel 111: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 112: * end of this entity
113: */
114: int
115: xmlParserInputRead(xmlParserInputPtr in, int len) {
116: int ret;
117: int used;
118: int index;
119:
120: #ifdef DEBUG_INPUT
121: fprintf(stderr, "Read\n");
122: #endif
123: if (in->buf == NULL) return(-1);
124: if (in->base == NULL) return(-1);
125: if (in->cur == NULL) return(-1);
126: if (in->buf->buffer == NULL) return(-1);
127:
128: CHECK_BUFFER(in);
129:
130: used = in->cur - in->buf->buffer->content;
131: ret = xmlBufferShrink(in->buf->buffer, used);
132: if (ret > 0) {
133: in->cur -= ret;
134: in->consumed += ret;
135: }
136: ret = xmlParserInputBufferRead(in->buf, len);
137: if (in->base != in->buf->buffer->content) {
138: /*
139: * the buffer has been realloced
140: */
141: index = in->cur - in->base;
142: in->base = in->buf->buffer->content;
143: in->cur = &in->buf->buffer->content[index];
144: }
145:
146: CHECK_BUFFER(in);
147:
148: return(ret);
149: }
150:
151: /**
152: * xmlParserInputGrow:
153: * @in: an XML parser input
154: * @len: an indicative size for the lookahead
155: *
156: * This function increase the input for the parser. It tries to
157: * preserve pointers to the input buffer, and keep already read data
158: *
1.123 daniel 159: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 160: * end of this entity
161: */
162: int
163: xmlParserInputGrow(xmlParserInputPtr in, int len) {
164: int ret;
165: int index;
166:
167: #ifdef DEBUG_INPUT
168: fprintf(stderr, "Grow\n");
169: #endif
170: if (in->buf == NULL) return(-1);
171: if (in->base == NULL) return(-1);
172: if (in->cur == NULL) return(-1);
173: if (in->buf->buffer == NULL) return(-1);
174:
175: CHECK_BUFFER(in);
176:
177: index = in->cur - in->base;
178: if (in->buf->buffer->use > index + INPUT_CHUNK) {
179:
180: CHECK_BUFFER(in);
181:
182: return(0);
183: }
1.148 daniel 184: if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) ||
185: (in->buf->file != NULL) ||
1.140 daniel 186: #ifdef HAVE_ZLIB_H
187: (in->buf->gzfile != NULL) ||
188: #endif
189: (in->buf->fd >= 0))
190: ret = xmlParserInputBufferGrow(in->buf, len);
191: else
192: return(0);
1.135 daniel 193:
194: /*
195: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
196: * block, but we use it really as an integer to do some
197: * pointer arithmetic. Insure will raise it as a bug but in
198: * that specific case, that's not !
199: */
1.91 daniel 200: if (in->base != in->buf->buffer->content) {
201: /*
202: * the buffer has been realloced
203: */
204: index = in->cur - in->base;
205: in->base = in->buf->buffer->content;
206: in->cur = &in->buf->buffer->content[index];
207: }
208:
209: CHECK_BUFFER(in);
210:
211: return(ret);
212: }
213:
214: /**
215: * xmlParserInputShrink:
216: * @in: an XML parser input
217: *
218: * This function removes used input for the parser.
219: */
220: void
221: xmlParserInputShrink(xmlParserInputPtr in) {
222: int used;
223: int ret;
224: int index;
225:
226: #ifdef DEBUG_INPUT
227: fprintf(stderr, "Shrink\n");
228: #endif
229: if (in->buf == NULL) return;
230: if (in->base == NULL) return;
231: if (in->cur == NULL) return;
232: if (in->buf->buffer == NULL) return;
233:
234: CHECK_BUFFER(in);
235:
236: used = in->cur - in->buf->buffer->content;
237: if (used > INPUT_CHUNK) {
1.110 daniel 238: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 239: if (ret > 0) {
240: in->cur -= ret;
241: in->consumed += ret;
242: }
243: }
244:
245: CHECK_BUFFER(in);
246:
247: if (in->buf->buffer->use > INPUT_CHUNK) {
248: return;
249: }
250: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
251: if (in->base != in->buf->buffer->content) {
252: /*
253: * the buffer has been realloced
254: */
255: index = in->cur - in->base;
256: in->base = in->buf->buffer->content;
257: in->cur = &in->buf->buffer->content[index];
258: }
259:
260: CHECK_BUFFER(in);
261: }
262:
1.45 daniel 263: /************************************************************************
264: * *
265: * Parser stacks related functions and macros *
266: * *
267: ************************************************************************/
1.79 daniel 268:
269: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 270: int xmlDoValidityCheckingDefaultValue = 0;
1.135 daniel 271: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
272: const xmlChar ** str);
1.79 daniel 273:
1.1 veillard 274: /*
1.40 daniel 275: * Generic function for accessing stacks in the Parser Context
1.1 veillard 276: */
277:
1.140 daniel 278: #define PUSH_AND_POP(scope, type, name) \
279: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 280: if (ctxt->name##Nr >= ctxt->name##Max) { \
281: ctxt->name##Max *= 2; \
1.119 daniel 282: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 283: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
284: if (ctxt->name##Tab == NULL) { \
1.31 daniel 285: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 286: return(0); \
1.31 daniel 287: } \
288: } \
1.40 daniel 289: ctxt->name##Tab[ctxt->name##Nr] = value; \
290: ctxt->name = value; \
291: return(ctxt->name##Nr++); \
1.31 daniel 292: } \
1.140 daniel 293: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 294: type ret; \
1.40 daniel 295: if (ctxt->name##Nr <= 0) return(0); \
296: ctxt->name##Nr--; \
1.50 daniel 297: if (ctxt->name##Nr > 0) \
298: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
299: else \
300: ctxt->name = NULL; \
1.69 daniel 301: ret = ctxt->name##Tab[ctxt->name##Nr]; \
302: ctxt->name##Tab[ctxt->name##Nr] = 0; \
303: return(ret); \
1.31 daniel 304: } \
305:
1.140 daniel 306: PUSH_AND_POP(extern, xmlParserInputPtr, input)
307: PUSH_AND_POP(extern, xmlNodePtr, node)
308: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 309:
1.55 daniel 310: /*
311: * Macros for accessing the content. Those should be used only by the parser,
312: * and not exported.
313: *
314: * Dirty macros, i.e. one need to make assumption on the context to use them
315: *
1.123 daniel 316: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 317: * To be used with extreme caution since operations consuming
318: * characters may move the input buffer to a different location !
1.123 daniel 319: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 320: * in ISO-Latin or UTF-8.
1.151 daniel 321: * This should be used internally by the parser
1.55 daniel 322: * only to compare to ASCII values otherwise it would break when
323: * running with UTF-8 encoding.
1.123 daniel 324: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 325: * to compare on ASCII based substring.
1.123 daniel 326: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 327: * strings within the parser.
328: *
1.77 daniel 329: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 330: *
331: * NEXT Skip to the next character, this does the proper decoding
332: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 333: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 334: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 335: */
1.45 daniel 336:
1.152 daniel 337: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 338: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.135 daniel 339: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
1.55 daniel 340: #define NXT(val) ctxt->input->cur[(val)]
341: #define CUR_PTR ctxt->input->cur
1.154 daniel 342:
1.97 daniel 343: #define SHRINK xmlParserInputShrink(ctxt->input); \
344: if ((*ctxt->input->cur == 0) && \
345: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
346: xmlPopInput(ctxt)
347:
348: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
349: if ((*ctxt->input->cur == 0) && \
350: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
351: xmlPopInput(ctxt)
1.55 daniel 352:
1.155 daniel 353: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 354:
1.151 daniel 355: #define NEXT xmlNextChar(ctxt);
1.154 daniel 356:
1.153 daniel 357: #define NEXTL(l) \
358: if (*(ctxt->input->cur) == '\n') { \
359: ctxt->input->line++; ctxt->input->col = 1; \
360: } else ctxt->input->col++; \
1.154 daniel 361: ctxt->token = 0; ctxt->input->cur += l; \
362: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
363: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
364:
1.152 daniel 365: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.154 daniel 366:
1.152 daniel 367: #define COPY_BUF(l,b,i,v) \
368: if (l == 1) b[i++] = (xmlChar) v; \
369: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 370:
371: /**
372: * xmlNextChar:
373: * @ctxt: the XML parser context
374: *
375: * Skip to the next char input char.
376: */
1.55 daniel 377:
1.151 daniel 378: void
379: xmlNextChar(xmlParserCtxtPtr ctxt) {
380: if (ctxt->token != 0) ctxt->token = 0;
381: else {
382: if ((*ctxt->input->cur == 0) &&
383: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
384: (ctxt->instate != XML_PARSER_COMMENT)) {
385: /*
386: * If we are at the end of the current entity and
387: * the context allows it, we pop consumed entities
388: * automatically.
389: * TODO: the auto closing should be blocked in other cases
390: */
391: xmlPopInput(ctxt);
392: } else {
393: if (*(ctxt->input->cur) == '\n') {
394: ctxt->input->line++; ctxt->input->col = 1;
395: } else ctxt->input->col++;
396: if (ctxt->encoding == NULL) {
397: /*
398: * We are supposed to handle UTF8, check it's valid
399: * From rfc2044: encoding of the Unicode values on UTF-8:
400: *
401: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
402: * 0000 0000-0000 007F 0xxxxxxx
403: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
404: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
405: *
1.160 daniel 406: * Check for the 0x110000 limit too
1.151 daniel 407: */
408: const unsigned char *cur = ctxt->input->cur;
409: unsigned char c;
1.91 daniel 410:
1.151 daniel 411: c = *cur;
412: if (c & 0x80) {
413: if (cur[1] == 0)
414: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
415: if ((cur[1] & 0xc0) != 0x80)
416: goto encoding_error;
417: if ((c & 0xe0) == 0xe0) {
418: unsigned int val;
419:
420: if (cur[2] == 0)
421: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
422: if ((cur[2] & 0xc0) != 0x80)
423: goto encoding_error;
424: if ((c & 0xf0) == 0xf0) {
425: if (cur[3] == 0)
426: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
427: if (((c & 0xf8) != 0xf0) ||
428: ((cur[3] & 0xc0) != 0x80))
429: goto encoding_error;
430: /* 4-byte code */
431: ctxt->input->cur += 4;
432: val = (cur[0] & 0x7) << 18;
433: val |= (cur[1] & 0x3f) << 12;
434: val |= (cur[2] & 0x3f) << 6;
435: val |= cur[3] & 0x3f;
436: } else {
437: /* 3-byte code */
438: ctxt->input->cur += 3;
439: val = (cur[0] & 0xf) << 12;
440: val |= (cur[1] & 0x3f) << 6;
441: val |= cur[2] & 0x3f;
442: }
443: if (((val > 0xd7ff) && (val < 0xe000)) ||
444: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 445: (val >= 0x110000)) {
1.151 daniel 446: if ((ctxt->sax != NULL) &&
447: (ctxt->sax->error != NULL))
448: ctxt->sax->error(ctxt->userData,
449: "Char out of allowed range\n");
450: ctxt->errNo = XML_ERR_INVALID_ENCODING;
451: ctxt->wellFormed = 0;
452: }
453: } else
454: /* 2-byte code */
455: ctxt->input->cur += 2;
456: } else
457: /* 1-byte code */
458: ctxt->input->cur++;
459: } else {
460: /*
461: * Assume it's a fixed lenght encoding (1) with
462: * a compatibke encoding for the ASCII set, since
463: * XML constructs only use < 128 chars
464: */
465: ctxt->input->cur++;
466: }
467: ctxt->nbChars++;
468: if (*ctxt->input->cur == 0)
469: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
470: }
471: }
1.154 daniel 472: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
473: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.151 daniel 474: return;
475: encoding_error:
476: /*
477: * If we detect an UTF8 error that probably mean that the
478: * input encoding didn't get properly advertized in the
479: * declaration header. Report the error and switch the encoding
480: * to ISO-Latin-1 (if you don't like this policy, just declare the
481: * encoding !)
482: */
483: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
484: ctxt->sax->error(ctxt->userData,
485: "Input is not proper UTF-8, indicate encoding !\n");
486: ctxt->errNo = XML_ERR_INVALID_ENCODING;
487:
488: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
489: ctxt->input->cur++;
490: return;
491: }
1.42 daniel 492:
1.152 daniel 493: /**
494: * xmlCurrentChar:
495: * @ctxt: the XML parser context
496: * @len: pointer to the length of the char read
497: *
498: * The current char value, if using UTF-8 this may actaully span multiple
499: * bytes in the input buffer.
500: *
501: * Returns the current char value and its lenght
502: */
503:
504: int
505: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
506: if (ctxt->token != 0) {
507: *len = 0;
508: return(ctxt->token);
509: }
510: if (ctxt->encoding == NULL) {
511: /*
512: * We are supposed to handle UTF8, check it's valid
513: * From rfc2044: encoding of the Unicode values on UTF-8:
514: *
515: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
516: * 0000 0000-0000 007F 0xxxxxxx
517: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
518: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
519: *
1.160 daniel 520: * Check for the 0x110000 limit too
1.152 daniel 521: */
522: const unsigned char *cur = ctxt->input->cur;
523: unsigned char c;
524: unsigned int val;
525:
526: c = *cur;
527: if (c & 0x80) {
528: if (cur[1] == 0)
529: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
530: if ((cur[1] & 0xc0) != 0x80)
531: goto encoding_error;
532: if ((c & 0xe0) == 0xe0) {
533:
534: if (cur[2] == 0)
535: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
536: if ((cur[2] & 0xc0) != 0x80)
537: goto encoding_error;
538: if ((c & 0xf0) == 0xf0) {
539: if (cur[3] == 0)
540: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
541: if (((c & 0xf8) != 0xf0) ||
542: ((cur[3] & 0xc0) != 0x80))
543: goto encoding_error;
544: /* 4-byte code */
545: *len = 4;
546: val = (cur[0] & 0x7) << 18;
547: val |= (cur[1] & 0x3f) << 12;
548: val |= (cur[2] & 0x3f) << 6;
549: val |= cur[3] & 0x3f;
550: } else {
551: /* 3-byte code */
552: *len = 3;
553: val = (cur[0] & 0xf) << 12;
554: val |= (cur[1] & 0x3f) << 6;
555: val |= cur[2] & 0x3f;
556: }
557: } else {
558: /* 2-byte code */
559: *len = 2;
560: val = (cur[0] & 0x1f) << 6;
561: val |= cur[2] & 0x3f;
562: }
563: if (!IS_CHAR(val)) {
564: if ((ctxt->sax != NULL) &&
565: (ctxt->sax->error != NULL))
566: ctxt->sax->error(ctxt->userData,
567: "Char out of allowed range\n");
568: ctxt->errNo = XML_ERR_INVALID_ENCODING;
569: ctxt->wellFormed = 0;
570: }
571: return(val);
572: } else {
573: /* 1-byte code */
574: *len = 1;
575: return((int) *ctxt->input->cur);
576: }
577: }
578: /*
579: * Assume it's a fixed lenght encoding (1) with
580: * a compatibke encoding for the ASCII set, since
581: * XML constructs only use < 128 chars
582: */
583: *len = 1;
584: return((int) *ctxt->input->cur);
585: encoding_error:
586: /*
587: * If we detect an UTF8 error that probably mean that the
588: * input encoding didn't get properly advertized in the
589: * declaration header. Report the error and switch the encoding
590: * to ISO-Latin-1 (if you don't like this policy, just declare the
591: * encoding !)
592: */
593: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
594: ctxt->sax->error(ctxt->userData,
595: "Input is not proper UTF-8, indicate encoding !\n");
596: ctxt->errNo = XML_ERR_INVALID_ENCODING;
597:
598: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
599: *len = 1;
600: return((int) *ctxt->input->cur);
601: }
602:
603: /**
604: * xmlCopyChar:
605: * @len: pointer to the length of the char read (or zero)
606: * @array: pointer to an arry of xmlChar
607: * @val: the char value
608: *
609: * append the char value in the array
610: *
611: * Returns the number of xmlChar written
612: */
613:
614: int
615: xmlCopyChar(int len, xmlChar *out, int val) {
616: /*
617: * We are supposed to handle UTF8, check it's valid
618: * From rfc2044: encoding of the Unicode values on UTF-8:
619: *
620: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
621: * 0000 0000-0000 007F 0xxxxxxx
622: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
623: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
624: */
625: if (len == 0) {
626: if (val < 0) len = 0;
1.160 daniel 627: else if (val < 0x80) len = 1;
628: else if (val < 0x800) len = 2;
629: else if (val < 0x10000) len = 3;
630: else if (val < 0x110000) len = 4;
1.152 daniel 631: if (len == 0) {
632: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
633: val);
634: return(0);
635: }
636: }
637: if (len > 1) {
638: int bits;
639:
640: if (val < 0x80) { *out++= val; bits= -6; }
641: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
642: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
643: else { *out++= (val >> 18) | 0xF0; bits= 12; }
644:
645: for ( ; bits >= 0; bits-= 6)
646: *out++= ((val >> bits) & 0x3F) | 0x80 ;
647:
648: return(len);
649: }
650: *out = (xmlChar) val;
651: return(1);
1.155 daniel 652: }
653:
654: /**
655: * xmlSkipBlankChars:
656: * @ctxt: the XML parser context
657: *
658: * skip all blanks character found at that point in the input streams.
659: * It pops up finished entities in the process if allowable at that point.
660: *
661: * Returns the number of space chars skipped
662: */
663:
664: int
665: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
666: int cur, res = 0;
667:
668: do {
669: cur = CUR;
670: while (IS_BLANK(cur)) {
671: NEXT;
672: cur = CUR;
673: res++;
674: }
675: while ((cur == 0) && (ctxt->inputNr > 1) &&
676: (ctxt->instate != XML_PARSER_COMMENT)) {
677: xmlPopInput(ctxt);
678: cur = CUR;
679: }
680: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
681: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
682: } while (IS_BLANK(cur));
683: return(res);
1.152 daniel 684: }
685:
1.97 daniel 686: /************************************************************************
687: * *
688: * Commodity functions to handle entities processing *
689: * *
690: ************************************************************************/
1.40 daniel 691:
1.50 daniel 692: /**
693: * xmlPopInput:
694: * @ctxt: an XML parser context
695: *
1.40 daniel 696: * xmlPopInput: the current input pointed by ctxt->input came to an end
697: * pop it and return the next char.
1.45 daniel 698: *
1.123 daniel 699: * Returns the current xmlChar in the parser context
1.40 daniel 700: */
1.123 daniel 701: xmlChar
1.55 daniel 702: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 703: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 704: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 705: if ((*ctxt->input->cur == 0) &&
706: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
707: return(xmlPopInput(ctxt));
1.40 daniel 708: return(CUR);
709: }
710:
1.50 daniel 711: /**
712: * xmlPushInput:
713: * @ctxt: an XML parser context
714: * @input: an XML parser input fragment (entity, XML fragment ...).
715: *
1.40 daniel 716: * xmlPushInput: switch to a new input stream which is stacked on top
717: * of the previous one(s).
718: */
1.55 daniel 719: void
720: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 721: if (input == NULL) return;
722: inputPush(ctxt, input);
723: }
724:
1.50 daniel 725: /**
1.69 daniel 726: * xmlFreeInputStream:
1.127 daniel 727: * @input: an xmlParserInputPtr
1.69 daniel 728: *
729: * Free up an input stream.
730: */
731: void
732: xmlFreeInputStream(xmlParserInputPtr input) {
733: if (input == NULL) return;
734:
1.119 daniel 735: if (input->filename != NULL) xmlFree((char *) input->filename);
736: if (input->directory != NULL) xmlFree((char *) input->directory);
1.69 daniel 737: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 738: input->free((xmlChar *) input->base);
1.93 veillard 739: if (input->buf != NULL)
740: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 741: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 742: xmlFree(input);
1.69 daniel 743: }
744:
745: /**
1.96 daniel 746: * xmlNewInputStream:
747: * @ctxt: an XML parser context
748: *
749: * Create a new input stream structure
750: * Returns the new input stream or NULL
751: */
752: xmlParserInputPtr
753: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
754: xmlParserInputPtr input;
755:
1.119 daniel 756: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 757: if (input == NULL) {
1.123 daniel 758: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 759: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 760: ctxt->sax->error(ctxt->userData,
761: "malloc: couldn't allocate a new input stream\n");
1.123 daniel 762: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 763: return(NULL);
764: }
765: input->filename = NULL;
766: input->directory = NULL;
767: input->base = NULL;
768: input->cur = NULL;
769: input->buf = NULL;
770: input->line = 1;
771: input->col = 1;
772: input->buf = NULL;
773: input->free = NULL;
774: input->consumed = 0;
1.140 daniel 775: input->length = 0;
1.96 daniel 776: return(input);
777: }
778:
779: /**
1.50 daniel 780: * xmlNewEntityInputStream:
781: * @ctxt: an XML parser context
782: * @entity: an Entity pointer
783: *
1.82 daniel 784: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 785: *
786: * Returns the new input stream or NULL
1.45 daniel 787: */
1.50 daniel 788: xmlParserInputPtr
789: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 790: xmlParserInputPtr input;
791:
792: if (entity == NULL) {
1.123 daniel 793: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 795: ctxt->sax->error(ctxt->userData,
1.45 daniel 796: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 797: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 798: return(NULL);
1.45 daniel 799: }
800: if (entity->content == NULL) {
1.159 daniel 801: switch (entity->etype) {
1.113 daniel 802: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 803: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 804: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
805: ctxt->sax->error(ctxt->userData,
806: "xmlNewEntityInputStream unparsed entity !\n");
807: break;
808: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
809: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 810: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 811: (char *) entity->ExternalID, ctxt));
1.113 daniel 812: case XML_INTERNAL_GENERAL_ENTITY:
813: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
814: ctxt->sax->error(ctxt->userData,
815: "Internal entity %s without content !\n", entity->name);
816: break;
817: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 818: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 819: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
820: ctxt->sax->error(ctxt->userData,
821: "Internal parameter entity %s without content !\n", entity->name);
822: break;
823: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 824: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 825: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
826: ctxt->sax->error(ctxt->userData,
827: "Predefined entity %s without content !\n", entity->name);
828: break;
829: }
1.50 daniel 830: return(NULL);
1.45 daniel 831: }
1.96 daniel 832: input = xmlNewInputStream(ctxt);
1.45 daniel 833: if (input == NULL) {
1.50 daniel 834: return(NULL);
1.45 daniel 835: }
1.156 daniel 836: input->filename = (char *) entity->SystemID;
1.45 daniel 837: input->base = entity->content;
838: input->cur = entity->content;
1.140 daniel 839: input->length = entity->length;
1.50 daniel 840: return(input);
1.45 daniel 841: }
842:
1.59 daniel 843: /**
844: * xmlNewStringInputStream:
845: * @ctxt: an XML parser context
1.96 daniel 846: * @buffer: an memory buffer
1.59 daniel 847: *
848: * Create a new input stream based on a memory buffer.
1.68 daniel 849: * Returns the new input stream
1.59 daniel 850: */
851: xmlParserInputPtr
1.123 daniel 852: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 853: xmlParserInputPtr input;
854:
1.96 daniel 855: if (buffer == NULL) {
1.123 daniel 856: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 857: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 858: ctxt->sax->error(ctxt->userData,
1.59 daniel 859: "internal: xmlNewStringInputStream string = NULL\n");
860: return(NULL);
861: }
1.96 daniel 862: input = xmlNewInputStream(ctxt);
1.59 daniel 863: if (input == NULL) {
864: return(NULL);
865: }
1.96 daniel 866: input->base = buffer;
867: input->cur = buffer;
1.140 daniel 868: input->length = xmlStrlen(buffer);
1.59 daniel 869: return(input);
870: }
871:
1.76 daniel 872: /**
873: * xmlNewInputFromFile:
874: * @ctxt: an XML parser context
875: * @filename: the filename to use as entity
876: *
877: * Create a new input stream based on a file.
878: *
879: * Returns the new input stream or NULL in case of error
880: */
881: xmlParserInputPtr
1.79 daniel 882: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 883: xmlParserInputBufferPtr buf;
1.76 daniel 884: xmlParserInputPtr inputStream;
1.111 daniel 885: char *directory = NULL;
1.76 daniel 886:
1.96 daniel 887: if (ctxt == NULL) return(NULL);
1.91 daniel 888: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 889: if (buf == NULL) {
1.140 daniel 890: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 891:
1.94 daniel 892: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
893: #ifdef WIN32
894: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
895: #else
896: sprintf(name, "%s/%s", ctxt->input->directory, filename);
897: #endif
898: buf = xmlParserInputBufferCreateFilename(name,
899: XML_CHAR_ENCODING_NONE);
1.106 daniel 900: if (buf != NULL)
1.142 daniel 901: directory = xmlParserGetDirectory(name);
1.106 daniel 902: }
903: if ((buf == NULL) && (ctxt->directory != NULL)) {
904: #ifdef WIN32
905: sprintf(name, "%s\\%s", ctxt->directory, filename);
906: #else
907: sprintf(name, "%s/%s", ctxt->directory, filename);
908: #endif
909: buf = xmlParserInputBufferCreateFilename(name,
910: XML_CHAR_ENCODING_NONE);
911: if (buf != NULL)
1.142 daniel 912: directory = xmlParserGetDirectory(name);
1.106 daniel 913: }
914: if (buf == NULL)
1.94 daniel 915: return(NULL);
916: }
917: if (directory == NULL)
918: directory = xmlParserGetDirectory(filename);
1.76 daniel 919:
1.96 daniel 920: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 921: if (inputStream == NULL) {
1.119 daniel 922: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 923: return(NULL);
924: }
925:
1.119 daniel 926: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 927: inputStream->directory = directory;
1.91 daniel 928: inputStream->buf = buf;
1.76 daniel 929:
1.91 daniel 930: inputStream->base = inputStream->buf->buffer->content;
931: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 932: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 933: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 934: return(inputStream);
935: }
936:
1.77 daniel 937: /************************************************************************
938: * *
1.97 daniel 939: * Commodity functions to handle parser contexts *
940: * *
941: ************************************************************************/
942:
943: /**
944: * xmlInitParserCtxt:
945: * @ctxt: an XML parser context
946: *
947: * Initialize a parser context
948: */
949:
950: void
951: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
952: {
953: xmlSAXHandler *sax;
954:
1.119 daniel 955: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 956: if (sax == NULL) {
957: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
958: }
959:
960: /* Allocate the Input stack */
1.119 daniel 961: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 962: ctxt->inputNr = 0;
963: ctxt->inputMax = 5;
964: ctxt->input = NULL;
965: ctxt->version = NULL;
966: ctxt->encoding = NULL;
967: ctxt->standalone = -1;
1.98 daniel 968: ctxt->hasExternalSubset = 0;
969: ctxt->hasPErefs = 0;
1.97 daniel 970: ctxt->html = 0;
1.98 daniel 971: ctxt->external = 0;
1.140 daniel 972: ctxt->instate = XML_PARSER_START;
1.97 daniel 973: ctxt->token = 0;
1.106 daniel 974: ctxt->directory = NULL;
1.97 daniel 975:
976: /* Allocate the Node stack */
1.119 daniel 977: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 978: ctxt->nodeNr = 0;
979: ctxt->nodeMax = 10;
980: ctxt->node = NULL;
981:
1.140 daniel 982: /* Allocate the Name stack */
983: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
984: ctxt->nameNr = 0;
985: ctxt->nameMax = 10;
986: ctxt->name = NULL;
987:
1.160 daniel 988: if (sax == NULL) {
989: ctxt->sax = &xmlDefaultSAXHandler;
990: } else {
1.97 daniel 991: ctxt->sax = sax;
992: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
993: }
994: ctxt->userData = ctxt;
995: ctxt->myDoc = NULL;
996: ctxt->wellFormed = 1;
1.99 daniel 997: ctxt->valid = 1;
1.100 daniel 998: ctxt->validate = xmlDoValidityCheckingDefaultValue;
999: ctxt->vctxt.userData = ctxt;
1.149 daniel 1000: if (ctxt->validate) {
1001: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1002: if (xmlGetWarningsDefaultValue == 0)
1003: ctxt->vctxt.warning = NULL;
1004: else
1005: ctxt->vctxt.warning = xmlParserValidityWarning;
1.149 daniel 1006: } else {
1007: ctxt->vctxt.error = NULL;
1008: ctxt->vctxt.warning = NULL;
1009: }
1.97 daniel 1010: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1011: ctxt->record_info = 0;
1.135 daniel 1012: ctxt->nbChars = 0;
1.140 daniel 1013: ctxt->checkIndex = 0;
1014: ctxt->errNo = XML_ERR_OK;
1.97 daniel 1015: xmlInitNodeInfoSeq(&ctxt->node_seq);
1016: }
1017:
1018: /**
1019: * xmlFreeParserCtxt:
1020: * @ctxt: an XML parser context
1021: *
1022: * Free all the memory used by a parser context. However the parsed
1023: * document in ctxt->myDoc is not freed.
1024: */
1025:
1026: void
1027: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1028: {
1029: xmlParserInputPtr input;
1.140 daniel 1030: xmlChar *oldname;
1.97 daniel 1031:
1032: if (ctxt == NULL) return;
1033:
1034: while ((input = inputPop(ctxt)) != NULL) {
1035: xmlFreeInputStream(input);
1036: }
1.140 daniel 1037: while ((oldname = namePop(ctxt)) != NULL) {
1038: xmlFree(oldname);
1039: }
1040: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1041: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1042: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1043: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1044: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.97 daniel 1045: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1046: xmlFree(ctxt->sax);
1047: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1048: xmlFree(ctxt);
1.97 daniel 1049: }
1050:
1051: /**
1052: * xmlNewParserCtxt:
1053: *
1054: * Allocate and initialize a new parser context.
1055: *
1056: * Returns the xmlParserCtxtPtr or NULL
1057: */
1058:
1059: xmlParserCtxtPtr
1060: xmlNewParserCtxt()
1061: {
1062: xmlParserCtxtPtr ctxt;
1063:
1.119 daniel 1064: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1065: if (ctxt == NULL) {
1066: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1067: perror("malloc");
1068: return(NULL);
1069: }
1070: xmlInitParserCtxt(ctxt);
1071: return(ctxt);
1072: }
1073:
1074: /**
1075: * xmlClearParserCtxt:
1076: * @ctxt: an XML parser context
1077: *
1078: * Clear (release owned resources) and reinitialize a parser context
1079: */
1080:
1081: void
1082: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1083: {
1084: xmlClearNodeInfoSeq(&ctxt->node_seq);
1085: xmlInitParserCtxt(ctxt);
1086: }
1087:
1088: /************************************************************************
1089: * *
1.77 daniel 1090: * Commodity functions to handle entities *
1091: * *
1092: ************************************************************************/
1093:
1.97 daniel 1094:
1095: /**
1096: * xmlParseCharRef:
1097: * @ctxt: an XML parser context
1098: *
1099: * parse Reference declarations
1100: *
1101: * [66] CharRef ::= '&#' [0-9]+ ';' |
1102: * '&#x' [0-9a-fA-F]+ ';'
1103: *
1.98 daniel 1104: * [ WFC: Legal Character ]
1105: * Characters referred to using character references must match the
1106: * production for Char.
1107: *
1.135 daniel 1108: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1109: */
1.97 daniel 1110: int
1111: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1112: int val = 0;
1113:
1.111 daniel 1114: if (ctxt->token != 0) {
1115: val = ctxt->token;
1116: ctxt->token = 0;
1117: return(val);
1118: }
1.152 daniel 1119: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1120: (NXT(2) == 'x')) {
1121: SKIP(3);
1.152 daniel 1122: while (RAW != ';') {
1123: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1124: val = val * 16 + (CUR - '0');
1.152 daniel 1125: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1126: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1127: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1128: val = val * 16 + (CUR - 'A') + 10;
1129: else {
1.123 daniel 1130: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1131: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1132: ctxt->sax->error(ctxt->userData,
1133: "xmlParseCharRef: invalid hexadecimal value\n");
1134: ctxt->wellFormed = 0;
1135: val = 0;
1136: break;
1137: }
1138: NEXT;
1139: }
1.152 daniel 1140: if (RAW == ';')
1.126 daniel 1141: SKIP(1); /* on purpose to avoid reentrancy problems with NEXT */
1.152 daniel 1142: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1143: SKIP(2);
1.152 daniel 1144: while (RAW != ';') {
1145: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1146: val = val * 10 + (CUR - '0');
1147: else {
1.123 daniel 1148: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1149: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1150: ctxt->sax->error(ctxt->userData,
1151: "xmlParseCharRef: invalid decimal value\n");
1152: ctxt->wellFormed = 0;
1153: val = 0;
1154: break;
1155: }
1156: NEXT;
1157: }
1.152 daniel 1158: if (RAW == ';')
1.126 daniel 1159: SKIP(1); /* on purpose to avoid reentrancy problems with NEXT */
1.97 daniel 1160: } else {
1.123 daniel 1161: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1162: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1163: ctxt->sax->error(ctxt->userData,
1164: "xmlParseCharRef: invalid value\n");
1.97 daniel 1165: ctxt->wellFormed = 0;
1166: }
1.98 daniel 1167:
1.97 daniel 1168: /*
1.98 daniel 1169: * [ WFC: Legal Character ]
1170: * Characters referred to using character references must match the
1171: * production for Char.
1.97 daniel 1172: */
1173: if (IS_CHAR(val)) {
1174: return(val);
1175: } else {
1.123 daniel 1176: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1177: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1178: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1179: val);
1180: ctxt->wellFormed = 0;
1181: }
1182: return(0);
1.77 daniel 1183: }
1184:
1.96 daniel 1185: /**
1.135 daniel 1186: * xmlParseStringCharRef:
1187: * @ctxt: an XML parser context
1188: * @str: a pointer to an index in the string
1189: *
1190: * parse Reference declarations, variant parsing from a string rather
1191: * than an an input flow.
1192: *
1193: * [66] CharRef ::= '&#' [0-9]+ ';' |
1194: * '&#x' [0-9a-fA-F]+ ';'
1195: *
1196: * [ WFC: Legal Character ]
1197: * Characters referred to using character references must match the
1198: * production for Char.
1199: *
1200: * Returns the value parsed (as an int), 0 in case of error, str will be
1201: * updated to the current value of the index
1202: */
1203: int
1204: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1205: const xmlChar *ptr;
1206: xmlChar cur;
1207: int val = 0;
1208:
1209: if ((str == NULL) || (*str == NULL)) return(0);
1210: ptr = *str;
1211: cur = *ptr;
1.137 daniel 1212: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1213: ptr += 3;
1214: cur = *ptr;
1215: while (cur != ';') {
1216: if ((cur >= '0') && (cur <= '9'))
1217: val = val * 16 + (cur - '0');
1218: else if ((cur >= 'a') && (cur <= 'f'))
1219: val = val * 16 + (cur - 'a') + 10;
1220: else if ((cur >= 'A') && (cur <= 'F'))
1221: val = val * 16 + (cur - 'A') + 10;
1222: else {
1223: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1225: ctxt->sax->error(ctxt->userData,
1226: "xmlParseCharRef: invalid hexadecimal value\n");
1227: ctxt->wellFormed = 0;
1228: val = 0;
1229: break;
1230: }
1231: ptr++;
1232: cur = *ptr;
1233: }
1234: if (cur == ';')
1235: ptr++;
1.145 daniel 1236: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1237: ptr += 2;
1238: cur = *ptr;
1239: while (cur != ';') {
1240: if ((cur >= '0') && (cur <= '9'))
1241: val = val * 10 + (cur - '0');
1242: else {
1243: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1244: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1245: ctxt->sax->error(ctxt->userData,
1246: "xmlParseCharRef: invalid decimal value\n");
1247: ctxt->wellFormed = 0;
1248: val = 0;
1249: break;
1250: }
1251: ptr++;
1252: cur = *ptr;
1253: }
1254: if (cur == ';')
1255: ptr++;
1256: } else {
1257: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1258: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1259: ctxt->sax->error(ctxt->userData,
1260: "xmlParseCharRef: invalid value\n");
1261: ctxt->wellFormed = 0;
1262: return(0);
1263: }
1264: *str = ptr;
1265:
1266: /*
1267: * [ WFC: Legal Character ]
1268: * Characters referred to using character references must match the
1269: * production for Char.
1270: */
1271: if (IS_CHAR(val)) {
1272: return(val);
1273: } else {
1274: ctxt->errNo = XML_ERR_INVALID_CHAR;
1275: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1276: ctxt->sax->error(ctxt->userData,
1277: "CharRef: invalid xmlChar value %d\n", val);
1278: ctxt->wellFormed = 0;
1279: }
1280: return(0);
1281: }
1282:
1283: /**
1.96 daniel 1284: * xmlParserHandleReference:
1285: * @ctxt: the parser context
1286: *
1.97 daniel 1287: * [67] Reference ::= EntityRef | CharRef
1288: *
1.96 daniel 1289: * [68] EntityRef ::= '&' Name ';'
1290: *
1.98 daniel 1291: * [ WFC: Entity Declared ]
1292: * the Name given in the entity reference must match that in an entity
1293: * declaration, except that well-formed documents need not declare any
1294: * of the following entities: amp, lt, gt, apos, quot.
1295: *
1296: * [ WFC: Parsed Entity ]
1297: * An entity reference must not contain the name of an unparsed entity
1298: *
1.97 daniel 1299: * [66] CharRef ::= '&#' [0-9]+ ';' |
1300: * '&#x' [0-9a-fA-F]+ ';'
1301: *
1.96 daniel 1302: * A PEReference may have been detectect in the current input stream
1303: * the handling is done accordingly to
1304: * http://www.w3.org/TR/REC-xml#entproc
1305: */
1306: void
1307: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1308: xmlParserInputPtr input;
1.123 daniel 1309: xmlChar *name;
1.97 daniel 1310: xmlEntityPtr ent = NULL;
1311:
1.126 daniel 1312: if (ctxt->token != 0) {
1313: return;
1314: }
1.152 daniel 1315: if (RAW != '&') return;
1.97 daniel 1316: GROW;
1.152 daniel 1317: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1318: switch(ctxt->instate) {
1.140 daniel 1319: case XML_PARSER_ENTITY_DECL:
1320: case XML_PARSER_PI:
1.109 daniel 1321: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1322: case XML_PARSER_COMMENT:
1323: /* we just ignore it there */
1324: return;
1325: case XML_PARSER_START_TAG:
1.109 daniel 1326: return;
1.140 daniel 1327: case XML_PARSER_END_TAG:
1.97 daniel 1328: return;
1329: case XML_PARSER_EOF:
1.123 daniel 1330: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1331: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1332: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1333: ctxt->wellFormed = 0;
1334: return;
1335: case XML_PARSER_PROLOG:
1.140 daniel 1336: case XML_PARSER_START:
1337: case XML_PARSER_MISC:
1.123 daniel 1338: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1340: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1341: ctxt->wellFormed = 0;
1342: return;
1343: case XML_PARSER_EPILOG:
1.123 daniel 1344: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1345: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1346: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1347: ctxt->wellFormed = 0;
1348: return;
1349: case XML_PARSER_DTD:
1.123 daniel 1350: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1351: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1352: ctxt->sax->error(ctxt->userData,
1353: "CharRef are forbiden in DTDs!\n");
1354: ctxt->wellFormed = 0;
1355: return;
1356: case XML_PARSER_ENTITY_VALUE:
1357: /*
1358: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1359: * substitution here since we need the literal
1.97 daniel 1360: * entity value to be able to save the internal
1361: * subset of the document.
1362: * This will be handled by xmlDecodeEntities
1363: */
1364: return;
1365: case XML_PARSER_CONTENT:
1366: case XML_PARSER_ATTRIBUTE_VALUE:
1367: ctxt->token = xmlParseCharRef(ctxt);
1368: return;
1369: }
1370: return;
1371: }
1372:
1373: switch(ctxt->instate) {
1.109 daniel 1374: case XML_PARSER_CDATA_SECTION:
1375: return;
1.140 daniel 1376: case XML_PARSER_PI:
1.97 daniel 1377: case XML_PARSER_COMMENT:
1378: return;
1.140 daniel 1379: case XML_PARSER_START_TAG:
1380: return;
1381: case XML_PARSER_END_TAG:
1382: return;
1.97 daniel 1383: case XML_PARSER_EOF:
1.123 daniel 1384: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1385: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1386: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1387: ctxt->wellFormed = 0;
1388: return;
1389: case XML_PARSER_PROLOG:
1.140 daniel 1390: case XML_PARSER_START:
1391: case XML_PARSER_MISC:
1.123 daniel 1392: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1394: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1395: ctxt->wellFormed = 0;
1396: return;
1397: case XML_PARSER_EPILOG:
1.123 daniel 1398: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1399: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1400: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1401: ctxt->wellFormed = 0;
1402: return;
1403: case XML_PARSER_ENTITY_VALUE:
1404: /*
1405: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1406: * substitution here since we need the literal
1.97 daniel 1407: * entity value to be able to save the internal
1408: * subset of the document.
1409: * This will be handled by xmlDecodeEntities
1410: */
1411: return;
1412: case XML_PARSER_ATTRIBUTE_VALUE:
1413: /*
1414: * NOTE: in the case of attributes values, we don't do the
1415: * substitution here unless we are in a mode where
1416: * the parser is explicitely asked to substitute
1417: * entities. The SAX callback is called with values
1418: * without entity substitution.
1419: * This will then be handled by xmlDecodeEntities
1420: */
1.113 daniel 1421: return;
1.97 daniel 1422: case XML_PARSER_ENTITY_DECL:
1423: /*
1424: * we just ignore it there
1425: * the substitution will be done once the entity is referenced
1426: */
1427: return;
1428: case XML_PARSER_DTD:
1.123 daniel 1429: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1431: ctxt->sax->error(ctxt->userData,
1432: "Entity references are forbiden in DTDs!\n");
1433: ctxt->wellFormed = 0;
1434: return;
1435: case XML_PARSER_CONTENT:
1.113 daniel 1436: return;
1.97 daniel 1437: }
1438:
1439: NEXT;
1440: name = xmlScanName(ctxt);
1441: if (name == NULL) {
1.123 daniel 1442: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1443: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1444: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1445: ctxt->wellFormed = 0;
1446: ctxt->token = '&';
1447: return;
1448: }
1449: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1450: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1451: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1452: ctxt->sax->error(ctxt->userData,
1453: "Entity reference: ';' expected\n");
1454: ctxt->wellFormed = 0;
1455: ctxt->token = '&';
1.119 daniel 1456: xmlFree(name);
1.97 daniel 1457: return;
1458: }
1459: SKIP(xmlStrlen(name) + 1);
1460: if (ctxt->sax != NULL) {
1461: if (ctxt->sax->getEntity != NULL)
1462: ent = ctxt->sax->getEntity(ctxt->userData, name);
1463: }
1.98 daniel 1464:
1465: /*
1466: * [ WFC: Entity Declared ]
1467: * the Name given in the entity reference must match that in an entity
1468: * declaration, except that well-formed documents need not declare any
1469: * of the following entities: amp, lt, gt, apos, quot.
1470: */
1.97 daniel 1471: if (ent == NULL)
1472: ent = xmlGetPredefinedEntity(name);
1473: if (ent == NULL) {
1.123 daniel 1474: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1475: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1476: ctxt->sax->error(ctxt->userData,
1.98 daniel 1477: "Entity reference: entity %s not declared\n",
1478: name);
1.97 daniel 1479: ctxt->wellFormed = 0;
1.119 daniel 1480: xmlFree(name);
1.97 daniel 1481: return;
1482: }
1.98 daniel 1483:
1484: /*
1485: * [ WFC: Parsed Entity ]
1486: * An entity reference must not contain the name of an unparsed entity
1487: */
1.159 daniel 1488: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1489: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1490: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1491: ctxt->sax->error(ctxt->userData,
1492: "Entity reference to unparsed entity %s\n", name);
1493: ctxt->wellFormed = 0;
1494: }
1495:
1.159 daniel 1496: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1497: ctxt->token = ent->content[0];
1.119 daniel 1498: xmlFree(name);
1.97 daniel 1499: return;
1500: }
1501: input = xmlNewEntityInputStream(ctxt, ent);
1502: xmlPushInput(ctxt, input);
1.119 daniel 1503: xmlFree(name);
1.96 daniel 1504: return;
1505: }
1506:
1507: /**
1508: * xmlParserHandlePEReference:
1509: * @ctxt: the parser context
1510: *
1511: * [69] PEReference ::= '%' Name ';'
1512: *
1.98 daniel 1513: * [ WFC: No Recursion ]
1514: * TODO A parsed entity must not contain a recursive
1515: * reference to itself, either directly or indirectly.
1516: *
1517: * [ WFC: Entity Declared ]
1518: * In a document without any DTD, a document with only an internal DTD
1519: * subset which contains no parameter entity references, or a document
1520: * with "standalone='yes'", ... ... The declaration of a parameter
1521: * entity must precede any reference to it...
1522: *
1523: * [ VC: Entity Declared ]
1524: * In a document with an external subset or external parameter entities
1525: * with "standalone='no'", ... ... The declaration of a parameter entity
1526: * must precede any reference to it...
1527: *
1528: * [ WFC: In DTD ]
1529: * Parameter-entity references may only appear in the DTD.
1530: * NOTE: misleading but this is handled.
1531: *
1532: * A PEReference may have been detected in the current input stream
1.96 daniel 1533: * the handling is done accordingly to
1534: * http://www.w3.org/TR/REC-xml#entproc
1535: * i.e.
1536: * - Included in literal in entity values
1537: * - Included as Paraemeter Entity reference within DTDs
1538: */
1539: void
1540: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1541: xmlChar *name;
1.96 daniel 1542: xmlEntityPtr entity = NULL;
1543: xmlParserInputPtr input;
1544:
1.126 daniel 1545: if (ctxt->token != 0) {
1546: return;
1547: }
1.152 daniel 1548: if (RAW != '%') return;
1.96 daniel 1549: switch(ctxt->instate) {
1.109 daniel 1550: case XML_PARSER_CDATA_SECTION:
1551: return;
1.97 daniel 1552: case XML_PARSER_COMMENT:
1553: return;
1.140 daniel 1554: case XML_PARSER_START_TAG:
1555: return;
1556: case XML_PARSER_END_TAG:
1557: return;
1.96 daniel 1558: case XML_PARSER_EOF:
1.123 daniel 1559: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1561: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1562: ctxt->wellFormed = 0;
1563: return;
1564: case XML_PARSER_PROLOG:
1.140 daniel 1565: case XML_PARSER_START:
1566: case XML_PARSER_MISC:
1.123 daniel 1567: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1568: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1569: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1570: ctxt->wellFormed = 0;
1571: return;
1.97 daniel 1572: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1573: case XML_PARSER_CONTENT:
1574: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1575: case XML_PARSER_PI:
1.96 daniel 1576: /* we just ignore it there */
1577: return;
1578: case XML_PARSER_EPILOG:
1.123 daniel 1579: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1580: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1581: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1582: ctxt->wellFormed = 0;
1583: return;
1.97 daniel 1584: case XML_PARSER_ENTITY_VALUE:
1585: /*
1586: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1587: * substitution here since we need the literal
1.97 daniel 1588: * entity value to be able to save the internal
1589: * subset of the document.
1590: * This will be handled by xmlDecodeEntities
1591: */
1592: return;
1.96 daniel 1593: case XML_PARSER_DTD:
1.98 daniel 1594: /*
1595: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1596: * In the internal DTD subset, parameter-entity references
1597: * can occur only where markup declarations can occur, not
1598: * within markup declarations.
1599: * In that case this is handled in xmlParseMarkupDecl
1600: */
1601: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1602: return;
1.96 daniel 1603: }
1604:
1605: NEXT;
1606: name = xmlParseName(ctxt);
1607: if (name == NULL) {
1.123 daniel 1608: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1609: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1610: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1611: ctxt->wellFormed = 0;
1612: } else {
1.152 daniel 1613: if (RAW == ';') {
1.96 daniel 1614: NEXT;
1.98 daniel 1615: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1616: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1617: if (entity == NULL) {
1.98 daniel 1618:
1619: /*
1620: * [ WFC: Entity Declared ]
1621: * In a document without any DTD, a document with only an
1622: * internal DTD subset which contains no parameter entity
1623: * references, or a document with "standalone='yes'", ...
1624: * ... The declaration of a parameter entity must precede
1625: * any reference to it...
1626: */
1627: if ((ctxt->standalone == 1) ||
1628: ((ctxt->hasExternalSubset == 0) &&
1629: (ctxt->hasPErefs == 0))) {
1630: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1631: ctxt->sax->error(ctxt->userData,
1632: "PEReference: %%%s; not found\n", name);
1633: ctxt->wellFormed = 0;
1634: } else {
1635: /*
1636: * [ VC: Entity Declared ]
1637: * In a document with an external subset or external
1638: * parameter entities with "standalone='no'", ...
1639: * ... The declaration of a parameter entity must precede
1640: * any reference to it...
1641: */
1642: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1643: ctxt->sax->warning(ctxt->userData,
1644: "PEReference: %%%s; not found\n", name);
1645: ctxt->valid = 0;
1646: }
1.96 daniel 1647: } else {
1.159 daniel 1648: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1649: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1650: /*
1.156 daniel 1651: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1652: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1.156 daniel 1653: * TODO !!! Avoid quote processing in parameters value
1.96 daniel 1654: * c.f. http://www.w3.org/TR/REC-xml#inliteral
1655: */
1656: input = xmlNewEntityInputStream(ctxt, entity);
1657: xmlPushInput(ctxt, input);
1658: } else {
1659: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1660: ctxt->sax->error(ctxt->userData,
1661: "xmlHandlePEReference: %s is not a parameter entity\n",
1662: name);
1663: ctxt->wellFormed = 0;
1664: }
1665: }
1666: } else {
1.123 daniel 1667: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1668: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669: ctxt->sax->error(ctxt->userData,
1670: "xmlHandlePEReference: expecting ';'\n");
1671: ctxt->wellFormed = 0;
1672: }
1.119 daniel 1673: xmlFree(name);
1.97 daniel 1674: }
1675: }
1676:
1677: /*
1678: * Macro used to grow the current buffer.
1679: */
1680: #define growBuffer(buffer) { \
1681: buffer##_size *= 2; \
1.145 daniel 1682: buffer = (xmlChar *) \
1683: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1684: if (buffer == NULL) { \
1685: perror("realloc failed"); \
1.145 daniel 1686: return(NULL); \
1.97 daniel 1687: } \
1.96 daniel 1688: }
1.77 daniel 1689:
1690: /**
1691: * xmlDecodeEntities:
1692: * @ctxt: the parser context
1693: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1694: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1695: * @end: an end marker xmlChar, 0 if none
1696: * @end2: an end marker xmlChar, 0 if none
1697: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1698: *
1699: * [67] Reference ::= EntityRef | CharRef
1700: *
1701: * [69] PEReference ::= '%' Name ';'
1702: *
1703: * Returns A newly allocated string with the substitution done. The caller
1704: * must deallocate it !
1705: */
1.123 daniel 1706: xmlChar *
1.77 daniel 1707: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1708: xmlChar end, xmlChar end2, xmlChar end3) {
1709: xmlChar *buffer = NULL;
1.78 daniel 1710: int buffer_size = 0;
1.161 ! daniel 1711: int nbchars = 0;
1.78 daniel 1712:
1.123 daniel 1713: xmlChar *current = NULL;
1.77 daniel 1714: xmlEntityPtr ent;
1715: unsigned int max = (unsigned int) len;
1.161 ! daniel 1716: int c,l;
1.77 daniel 1717:
1718: /*
1719: * allocate a translation buffer.
1720: */
1.140 daniel 1721: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 1722: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1723: if (buffer == NULL) {
1724: perror("xmlDecodeEntities: malloc failed");
1725: return(NULL);
1726: }
1727:
1.78 daniel 1728: /*
1729: * Ok loop until we reach one of the ending char or a size limit.
1730: */
1.161 ! daniel 1731: c = CUR_CHAR(l);
! 1732: while ((nbchars < max) && (c != end) &&
! 1733: (c != end2) && (c != end3)) {
1.77 daniel 1734:
1.161 ! daniel 1735: if (c == 0) break;
! 1736: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 1737: int val = xmlParseCharRef(ctxt);
1.161 ! daniel 1738: COPY_BUF(0,buffer,nbchars,val);
! 1739: NEXTL(l);
! 1740: } else if ((c == '&') && (ctxt->token != '&') &&
! 1741: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 1742: ent = xmlParseEntityRef(ctxt);
1743: if ((ent != NULL) &&
1744: (ctxt->replaceEntities != 0)) {
1745: current = ent->content;
1746: while (*current != 0) {
1.161 ! daniel 1747: buffer[nbchars++] = *current++;
! 1748: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1749: growBuffer(buffer);
1.77 daniel 1750: }
1751: }
1.98 daniel 1752: } else if (ent != NULL) {
1.123 daniel 1753: const xmlChar *cur = ent->name;
1.98 daniel 1754:
1.161 ! daniel 1755: buffer[nbchars++] = '&';
! 1756: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1757: growBuffer(buffer);
1758: }
1.161 ! daniel 1759: while (*cur != 0) {
! 1760: buffer[nbchars++] = *cur++;
! 1761: }
! 1762: buffer[nbchars++] = ';';
1.77 daniel 1763: }
1.161 ! daniel 1764: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 1765: /*
1.77 daniel 1766: * a PEReference induce to switch the entity flow,
1767: * we break here to flush the current set of chars
1768: * parsed if any. We will be called back later.
1.97 daniel 1769: */
1.91 daniel 1770: if (nbchars != 0) break;
1.77 daniel 1771:
1772: xmlParsePEReference(ctxt);
1.79 daniel 1773:
1.97 daniel 1774: /*
1.79 daniel 1775: * Pop-up of finished entities.
1.97 daniel 1776: */
1.152 daniel 1777: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 1778: xmlPopInput(ctxt);
1779:
1.98 daniel 1780: break;
1.77 daniel 1781: } else {
1.161 ! daniel 1782: COPY_BUF(l,buffer,nbchars,c);
! 1783: NEXTL(l);
! 1784: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 1785: growBuffer(buffer);
1786: }
1.77 daniel 1787: }
1.161 ! daniel 1788: c = CUR_CHAR(l);
1.77 daniel 1789: }
1.161 ! daniel 1790: buffer[nbchars++] = 0;
1.77 daniel 1791: return(buffer);
1792: }
1793:
1.135 daniel 1794: /**
1795: * xmlStringDecodeEntities:
1796: * @ctxt: the parser context
1797: * @str: the input string
1798: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1799: * @end: an end marker xmlChar, 0 if none
1800: * @end2: an end marker xmlChar, 0 if none
1801: * @end3: an end marker xmlChar, 0 if none
1802: *
1803: * [67] Reference ::= EntityRef | CharRef
1804: *
1805: * [69] PEReference ::= '%' Name ';'
1806: *
1807: * Returns A newly allocated string with the substitution done. The caller
1808: * must deallocate it !
1809: */
1810: xmlChar *
1811: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1812: xmlChar end, xmlChar end2, xmlChar end3) {
1813: xmlChar *buffer = NULL;
1814: int buffer_size = 0;
1815: xmlChar *out = NULL;
1816:
1817: xmlChar *current = NULL;
1818: xmlEntityPtr ent;
1819: xmlChar cur;
1820:
1821: /*
1822: * allocate a translation buffer.
1823: */
1.140 daniel 1824: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 1825: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1826: if (buffer == NULL) {
1827: perror("xmlDecodeEntities: malloc failed");
1828: return(NULL);
1829: }
1830: out = buffer;
1831:
1832: /*
1833: * Ok loop until we reach one of the ending char or a size limit.
1834: */
1835: cur = *str;
1836: while ((cur != 0) && (cur != end) &&
1837: (cur != end2) && (cur != end3)) {
1838:
1839: if (cur == 0) break;
1840: if ((cur == '&') && (str[1] == '#')) {
1841: int val = xmlParseStringCharRef(ctxt, &str);
1842: if (val != 0)
1843: *out++ = val;
1844: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1845: ent = xmlParseStringEntityRef(ctxt, &str);
1846: if ((ent != NULL) &&
1847: (ctxt->replaceEntities != 0)) {
1848: current = ent->content;
1849: while (*current != 0) {
1850: *out++ = *current++;
1.140 daniel 1851: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1852: int index = out - buffer;
1853:
1854: growBuffer(buffer);
1855: out = &buffer[index];
1856: }
1857: }
1858: } else if (ent != NULL) {
1859: int i = xmlStrlen(ent->name);
1860: const xmlChar *cur = ent->name;
1861:
1862: *out++ = '&';
1.140 daniel 1863: if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1864: int index = out - buffer;
1865:
1866: growBuffer(buffer);
1867: out = &buffer[index];
1868: }
1869: for (;i > 0;i--)
1870: *out++ = *cur++;
1871: *out++ = ';';
1872: }
1873: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1874: ent = xmlParseStringPEReference(ctxt, &str);
1875: if (ent != NULL) {
1876: current = ent->content;
1877: while (*current != 0) {
1878: *out++ = *current++;
1.140 daniel 1879: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1880: int index = out - buffer;
1881:
1882: growBuffer(buffer);
1883: out = &buffer[index];
1884: }
1885: }
1886: }
1887: } else {
1.156 daniel 1888: /* invalid for UTF-8 , use COPY(out); !!! */
1.135 daniel 1889: *out++ = cur;
1.140 daniel 1890: if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 1891: int index = out - buffer;
1892:
1893: growBuffer(buffer);
1894: out = &buffer[index];
1895: }
1896: str++;
1897: }
1898: cur = *str;
1899: }
1900: *out = 0;
1901: return(buffer);
1902: }
1903:
1.1 veillard 1904:
1.28 daniel 1905: /************************************************************************
1906: * *
1.75 daniel 1907: * Commodity functions to handle encodings *
1908: * *
1909: ************************************************************************/
1910:
1911: /**
1912: * xmlSwitchEncoding:
1913: * @ctxt: the parser context
1.124 daniel 1914: * @enc: the encoding value (number)
1.75 daniel 1915: *
1916: * change the input functions when discovering the character encoding
1917: * of a given entity.
1918: */
1919: void
1920: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1921: {
1.156 daniel 1922: xmlCharEncodingHandlerPtr handler;
1923:
1924: handler = xmlGetCharEncodingHandler(enc);
1925: if (handler != NULL) {
1926: if (ctxt->input != NULL) {
1927: if (ctxt->input->buf != NULL) {
1928: if (ctxt->input->buf->encoder != NULL) {
1929: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1930: ctxt->sax->error(ctxt->userData,
1931: "xmlSwitchEncoding : encoder already regitered\n");
1932: return;
1933: }
1934: ctxt->input->buf->encoder = handler;
1935:
1936: /*
1937: * Is there already some content down the pipe to convert
1938: */
1939: if ((ctxt->input->buf->buffer != NULL) &&
1940: (ctxt->input->buf->buffer->use > 0)) {
1941: xmlChar *buf;
1942: int res, len, size;
1943: int processed;
1944:
1945: /*
1946: * Specific handling of the Byte Order Mark for
1947: * UTF-16
1948: */
1949: if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
1950: (ctxt->input->cur[0] == 0xFF) &&
1951: (ctxt->input->cur[1] == 0xFE)) {
1952: SKIP(2);
1953: }
1954: if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
1955: (ctxt->input->cur[0] == 0xFE) &&
1956: (ctxt->input->cur[1] == 0xFF)) {
1957: SKIP(2);
1958: }
1959:
1960: /*
1961: * convert the non processed part
1962: */
1963: processed = ctxt->input->cur - ctxt->input->base;
1964: len = ctxt->input->buf->buffer->use - processed;
1965:
1966: if (len <= 0) {
1967: return;
1968: }
1969: size = ctxt->input->buf->buffer->use * 4;
1970: if (size < 4000)
1971: size = 4000;
1.160 daniel 1972: buf = (xmlChar *) xmlMalloc(size + 1);
1.156 daniel 1973: if (buf == NULL) {
1974: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1975: ctxt->sax->error(ctxt->userData,
1976: "xmlSwitchEncoding : out of memory\n");
1977: return;
1978: }
1.160 daniel 1979: /* TODO !!! Handling of buf too small */
1.156 daniel 1980: res = handler->input(buf, size, ctxt->input->cur, &len);
1981: if ((res < 0) ||
1982: (len != ctxt->input->buf->buffer->use - processed)) {
1983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1984: ctxt->sax->error(ctxt->userData,
1985: "xmlSwitchEncoding : conversion failed\n");
1986: xmlFree(buf);
1987: return;
1988: }
1989: /*
1990: * Conversion succeeded, get rid of the old buffer
1991: */
1992: xmlFree(ctxt->input->buf->buffer->content);
1993: ctxt->input->buf->buffer->content = buf;
1994: ctxt->input->base = buf;
1995: ctxt->input->cur = buf;
1996: ctxt->input->buf->buffer->size = size;
1997: ctxt->input->buf->buffer->use = res;
1.160 daniel 1998: buf[res] = 0;
1.156 daniel 1999: }
2000: return;
2001: } else {
2002: if (ctxt->input->length == 0) {
2003: /*
2004: * When parsing a static memory array one must know the
2005: * size to be able to convert the buffer.
2006: */
2007: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2008: ctxt->sax->error(ctxt->userData,
2009: "xmlSwitchEncoding : no input\n");
2010: return;
2011: } else {
2012: xmlChar *buf;
2013: int res, len;
2014: int processed = ctxt->input->cur - ctxt->input->base;
2015:
2016: /*
2017: * convert the non processed part
2018: */
2019: len = ctxt->input->length - processed;
2020: if (len <= 0) {
2021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2022: ctxt->sax->error(ctxt->userData,
2023: "xmlSwitchEncoding : input fully consumed?\n");
2024: return;
2025: }
2026: buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
2027: if (buf == NULL) {
2028: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2029: ctxt->sax->error(ctxt->userData,
2030: "xmlSwitchEncoding : out of memory\n");
2031: return;
2032: }
2033: res = handler->input(buf, ctxt->input->length * 4,
2034: ctxt->input->cur, &len);
2035: if ((res < 0) ||
2036: (len != ctxt->input->length - processed)) {
2037: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2038: ctxt->sax->error(ctxt->userData,
2039: "xmlSwitchEncoding : conversion failed\n");
2040: xmlFree(buf);
2041: return;
2042: }
2043: /*
2044: * Conversion succeeded, get rid of the old buffer
2045: */
2046: if ((ctxt->input->free != NULL) &&
2047: (ctxt->input->base != NULL))
2048: ctxt->input->free((xmlChar *) ctxt->input->base);
2049: ctxt->input->base = ctxt->input->cur = buf;
2050: ctxt->input->length = res;
2051: }
2052: }
2053: } else {
2054: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2055: ctxt->sax->error(ctxt->userData,
2056: "xmlSwitchEncoding : no input\n");
2057: }
2058: }
2059:
1.75 daniel 2060: switch (enc) {
2061: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 2062: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 2063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2064: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2065: ctxt->wellFormed = 0;
2066: break;
2067: case XML_CHAR_ENCODING_NONE:
2068: /* let's assume it's UTF-8 without the XML decl */
2069: return;
2070: case XML_CHAR_ENCODING_UTF8:
2071: /* default encoding, no conversion should be needed */
2072: return;
2073: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 2074: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2075: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2076: ctxt->sax->error(ctxt->userData,
2077: "char encoding UTF16 little endian not supported\n");
2078: break;
2079: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 2080: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2081: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2082: ctxt->sax->error(ctxt->userData,
2083: "char encoding UTF16 big endian not supported\n");
2084: break;
2085: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 2086: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2087: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2088: ctxt->sax->error(ctxt->userData,
2089: "char encoding USC4 little endian not supported\n");
2090: break;
2091: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 2092: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2093: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2094: ctxt->sax->error(ctxt->userData,
2095: "char encoding USC4 big endian not supported\n");
2096: break;
2097: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 2098: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2099: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2100: ctxt->sax->error(ctxt->userData,
2101: "char encoding EBCDIC not supported\n");
2102: break;
2103: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 2104: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2105: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2106: ctxt->sax->error(ctxt->userData,
2107: "char encoding UCS4 2143 not supported\n");
2108: break;
2109: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 2110: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2111: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2112: ctxt->sax->error(ctxt->userData,
2113: "char encoding UCS4 3412 not supported\n");
2114: break;
2115: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 2116: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2117: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2118: ctxt->sax->error(ctxt->userData,
2119: "char encoding UCS2 not supported\n");
2120: break;
2121: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 2122: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2123: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2124: ctxt->sax->error(ctxt->userData,
2125: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2126: break;
2127: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 2128: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2129: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2130: ctxt->sax->error(ctxt->userData,
2131: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2132: break;
2133: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 2134: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2135: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2136: ctxt->sax->error(ctxt->userData,
2137: "char encoding ISO_8859_3 not supported\n");
2138: break;
2139: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 2140: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2141: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2142: ctxt->sax->error(ctxt->userData,
2143: "char encoding ISO_8859_4 not supported\n");
2144: break;
2145: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 2146: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2147: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2148: ctxt->sax->error(ctxt->userData,
2149: "char encoding ISO_8859_5 not supported\n");
2150: break;
2151: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 2152: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2153: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2154: ctxt->sax->error(ctxt->userData,
2155: "char encoding ISO_8859_6 not supported\n");
2156: break;
2157: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 2158: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2159: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2160: ctxt->sax->error(ctxt->userData,
2161: "char encoding ISO_8859_7 not supported\n");
2162: break;
2163: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 2164: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2165: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2166: ctxt->sax->error(ctxt->userData,
2167: "char encoding ISO_8859_8 not supported\n");
2168: break;
2169: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 2170: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2171: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2172: ctxt->sax->error(ctxt->userData,
2173: "char encoding ISO_8859_9 not supported\n");
2174: break;
2175: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 2176: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2177: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2178: ctxt->sax->error(ctxt->userData,
2179: "char encoding ISO-2022-JPnot supported\n");
2180: break;
2181: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 2182: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2183: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2184: ctxt->sax->error(ctxt->userData,
2185: "char encoding Shift_JISnot supported\n");
2186: break;
2187: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 2188: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2189: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2190: ctxt->sax->error(ctxt->userData,
2191: "char encoding EUC-JPnot supported\n");
2192: break;
2193: }
2194: }
2195:
2196: /************************************************************************
2197: * *
1.123 daniel 2198: * Commodity functions to handle xmlChars *
1.28 daniel 2199: * *
2200: ************************************************************************/
2201:
1.50 daniel 2202: /**
2203: * xmlStrndup:
1.123 daniel 2204: * @cur: the input xmlChar *
1.50 daniel 2205: * @len: the len of @cur
2206: *
1.123 daniel 2207: * a strndup for array of xmlChar's
1.68 daniel 2208: *
1.123 daniel 2209: * Returns a new xmlChar * or NULL
1.1 veillard 2210: */
1.123 daniel 2211: xmlChar *
2212: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2213: xmlChar *ret;
2214:
2215: if ((cur == NULL) || (len < 0)) return(NULL);
2216: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2217: if (ret == NULL) {
1.86 daniel 2218: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2219: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2220: return(NULL);
2221: }
1.123 daniel 2222: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2223: ret[len] = 0;
2224: return(ret);
2225: }
2226:
1.50 daniel 2227: /**
2228: * xmlStrdup:
1.123 daniel 2229: * @cur: the input xmlChar *
1.50 daniel 2230: *
1.152 daniel 2231: * a strdup for array of xmlChar's. Since they are supposed to be
2232: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2233: * a termination mark of '0'.
1.68 daniel 2234: *
1.123 daniel 2235: * Returns a new xmlChar * or NULL
1.1 veillard 2236: */
1.123 daniel 2237: xmlChar *
2238: xmlStrdup(const xmlChar *cur) {
2239: const xmlChar *p = cur;
1.1 veillard 2240:
1.135 daniel 2241: if (cur == NULL) return(NULL);
1.152 daniel 2242: while (*p != 0) p++;
1.1 veillard 2243: return(xmlStrndup(cur, p - cur));
2244: }
2245:
1.50 daniel 2246: /**
2247: * xmlCharStrndup:
2248: * @cur: the input char *
2249: * @len: the len of @cur
2250: *
1.123 daniel 2251: * a strndup for char's to xmlChar's
1.68 daniel 2252: *
1.123 daniel 2253: * Returns a new xmlChar * or NULL
1.45 daniel 2254: */
2255:
1.123 daniel 2256: xmlChar *
1.55 daniel 2257: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2258: int i;
1.135 daniel 2259: xmlChar *ret;
2260:
2261: if ((cur == NULL) || (len < 0)) return(NULL);
2262: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2263: if (ret == NULL) {
1.86 daniel 2264: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2265: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2266: return(NULL);
2267: }
2268: for (i = 0;i < len;i++)
1.123 daniel 2269: ret[i] = (xmlChar) cur[i];
1.45 daniel 2270: ret[len] = 0;
2271: return(ret);
2272: }
2273:
1.50 daniel 2274: /**
2275: * xmlCharStrdup:
2276: * @cur: the input char *
2277: * @len: the len of @cur
2278: *
1.123 daniel 2279: * a strdup for char's to xmlChar's
1.68 daniel 2280: *
1.123 daniel 2281: * Returns a new xmlChar * or NULL
1.45 daniel 2282: */
2283:
1.123 daniel 2284: xmlChar *
1.55 daniel 2285: xmlCharStrdup(const char *cur) {
1.45 daniel 2286: const char *p = cur;
2287:
1.135 daniel 2288: if (cur == NULL) return(NULL);
1.45 daniel 2289: while (*p != '\0') p++;
2290: return(xmlCharStrndup(cur, p - cur));
2291: }
2292:
1.50 daniel 2293: /**
2294: * xmlStrcmp:
1.123 daniel 2295: * @str1: the first xmlChar *
2296: * @str2: the second xmlChar *
1.50 daniel 2297: *
1.123 daniel 2298: * a strcmp for xmlChar's
1.68 daniel 2299: *
2300: * Returns the integer result of the comparison
1.14 veillard 2301: */
2302:
1.55 daniel 2303: int
1.123 daniel 2304: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2305: register int tmp;
2306:
1.135 daniel 2307: if ((str1 == NULL) && (str2 == NULL)) return(0);
2308: if (str1 == NULL) return(-1);
2309: if (str2 == NULL) return(1);
1.14 veillard 2310: do {
2311: tmp = *str1++ - *str2++;
2312: if (tmp != 0) return(tmp);
2313: } while ((*str1 != 0) && (*str2 != 0));
2314: return (*str1 - *str2);
2315: }
2316:
1.50 daniel 2317: /**
2318: * xmlStrncmp:
1.123 daniel 2319: * @str1: the first xmlChar *
2320: * @str2: the second xmlChar *
1.50 daniel 2321: * @len: the max comparison length
2322: *
1.123 daniel 2323: * a strncmp for xmlChar's
1.68 daniel 2324: *
2325: * Returns the integer result of the comparison
1.14 veillard 2326: */
2327:
1.55 daniel 2328: int
1.123 daniel 2329: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2330: register int tmp;
2331:
2332: if (len <= 0) return(0);
1.135 daniel 2333: if ((str1 == NULL) && (str2 == NULL)) return(0);
2334: if (str1 == NULL) return(-1);
2335: if (str2 == NULL) return(1);
1.14 veillard 2336: do {
2337: tmp = *str1++ - *str2++;
2338: if (tmp != 0) return(tmp);
2339: len--;
2340: if (len <= 0) return(0);
2341: } while ((*str1 != 0) && (*str2 != 0));
2342: return (*str1 - *str2);
2343: }
2344:
1.50 daniel 2345: /**
2346: * xmlStrchr:
1.123 daniel 2347: * @str: the xmlChar * array
2348: * @val: the xmlChar to search
1.50 daniel 2349: *
1.123 daniel 2350: * a strchr for xmlChar's
1.68 daniel 2351: *
1.123 daniel 2352: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2353: */
2354:
1.123 daniel 2355: const xmlChar *
2356: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2357: if (str == NULL) return(NULL);
1.14 veillard 2358: while (*str != 0) {
1.123 daniel 2359: if (*str == val) return((xmlChar *) str);
1.14 veillard 2360: str++;
2361: }
2362: return(NULL);
1.89 daniel 2363: }
2364:
2365: /**
2366: * xmlStrstr:
1.123 daniel 2367: * @str: the xmlChar * array (haystack)
2368: * @val: the xmlChar to search (needle)
1.89 daniel 2369: *
1.123 daniel 2370: * a strstr for xmlChar's
1.89 daniel 2371: *
1.123 daniel 2372: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2373: */
2374:
1.123 daniel 2375: const xmlChar *
2376: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2377: int n;
2378:
2379: if (str == NULL) return(NULL);
2380: if (val == NULL) return(NULL);
2381: n = xmlStrlen(val);
2382:
2383: if (n == 0) return(str);
2384: while (*str != 0) {
2385: if (*str == *val) {
1.123 daniel 2386: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2387: }
2388: str++;
2389: }
2390: return(NULL);
2391: }
2392:
2393: /**
2394: * xmlStrsub:
1.123 daniel 2395: * @str: the xmlChar * array (haystack)
1.89 daniel 2396: * @start: the index of the first char (zero based)
2397: * @len: the length of the substring
2398: *
2399: * Extract a substring of a given string
2400: *
1.123 daniel 2401: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2402: */
2403:
1.123 daniel 2404: xmlChar *
2405: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2406: int i;
2407:
2408: if (str == NULL) return(NULL);
2409: if (start < 0) return(NULL);
1.90 daniel 2410: if (len < 0) return(NULL);
1.89 daniel 2411:
2412: for (i = 0;i < start;i++) {
2413: if (*str == 0) return(NULL);
2414: str++;
2415: }
2416: if (*str == 0) return(NULL);
2417: return(xmlStrndup(str, len));
1.14 veillard 2418: }
1.28 daniel 2419:
1.50 daniel 2420: /**
2421: * xmlStrlen:
1.123 daniel 2422: * @str: the xmlChar * array
1.50 daniel 2423: *
1.127 daniel 2424: * length of a xmlChar's string
1.68 daniel 2425: *
1.123 daniel 2426: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2427: */
2428:
1.55 daniel 2429: int
1.123 daniel 2430: xmlStrlen(const xmlChar *str) {
1.45 daniel 2431: int len = 0;
2432:
2433: if (str == NULL) return(0);
2434: while (*str != 0) {
2435: str++;
2436: len++;
2437: }
2438: return(len);
2439: }
2440:
1.50 daniel 2441: /**
2442: * xmlStrncat:
1.123 daniel 2443: * @cur: the original xmlChar * array
2444: * @add: the xmlChar * array added
1.50 daniel 2445: * @len: the length of @add
2446: *
1.123 daniel 2447: * a strncat for array of xmlChar's
1.68 daniel 2448: *
1.123 daniel 2449: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2450: */
2451:
1.123 daniel 2452: xmlChar *
2453: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2454: int size;
1.123 daniel 2455: xmlChar *ret;
1.45 daniel 2456:
2457: if ((add == NULL) || (len == 0))
2458: return(cur);
2459: if (cur == NULL)
2460: return(xmlStrndup(add, len));
2461:
2462: size = xmlStrlen(cur);
1.123 daniel 2463: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2464: if (ret == NULL) {
1.86 daniel 2465: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2466: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2467: return(cur);
2468: }
1.123 daniel 2469: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2470: ret[size + len] = 0;
2471: return(ret);
2472: }
2473:
1.50 daniel 2474: /**
2475: * xmlStrcat:
1.123 daniel 2476: * @cur: the original xmlChar * array
2477: * @add: the xmlChar * array added
1.50 daniel 2478: *
1.152 daniel 2479: * a strcat for array of xmlChar's. Since they are supposed to be
2480: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2481: * a termination mark of '0'.
1.68 daniel 2482: *
1.123 daniel 2483: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2484: */
1.123 daniel 2485: xmlChar *
2486: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2487: const xmlChar *p = add;
1.45 daniel 2488:
2489: if (add == NULL) return(cur);
2490: if (cur == NULL)
2491: return(xmlStrdup(add));
2492:
1.152 daniel 2493: while (*p != 0) p++;
1.45 daniel 2494: return(xmlStrncat(cur, add, p - add));
2495: }
2496:
2497: /************************************************************************
2498: * *
2499: * Commodity functions, cleanup needed ? *
2500: * *
2501: ************************************************************************/
2502:
1.50 daniel 2503: /**
2504: * areBlanks:
2505: * @ctxt: an XML parser context
1.123 daniel 2506: * @str: a xmlChar *
1.50 daniel 2507: * @len: the size of @str
2508: *
1.45 daniel 2509: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2510: *
1.68 daniel 2511: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2512: */
2513:
1.123 daniel 2514: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2515: int i, ret;
1.45 daniel 2516: xmlNodePtr lastChild;
2517:
2518: for (i = 0;i < len;i++)
2519: if (!(IS_BLANK(str[i]))) return(0);
2520:
1.152 daniel 2521: if (RAW != '<') return(0);
1.72 daniel 2522: if (ctxt->node == NULL) return(0);
1.104 daniel 2523: if (ctxt->myDoc != NULL) {
2524: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2525: if (ret == 0) return(1);
2526: if (ret == 1) return(0);
2527: }
2528: /*
2529: * heuristic
2530: */
1.45 daniel 2531: lastChild = xmlGetLastChild(ctxt->node);
2532: if (lastChild == NULL) {
2533: if (ctxt->node->content != NULL) return(0);
2534: } else if (xmlNodeIsText(lastChild))
2535: return(0);
1.157 daniel 2536: else if ((ctxt->node->children != NULL) &&
2537: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2538: return(0);
1.45 daniel 2539: return(1);
2540: }
2541:
1.50 daniel 2542: /**
2543: * xmlHandleEntity:
2544: * @ctxt: an XML parser context
2545: * @entity: an XML entity pointer.
2546: *
2547: * Default handling of defined entities, when should we define a new input
1.45 daniel 2548: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2549: *
2550: * OBSOLETE: to be removed at some point.
1.45 daniel 2551: */
2552:
1.55 daniel 2553: void
2554: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2555: int len;
1.50 daniel 2556: xmlParserInputPtr input;
1.45 daniel 2557:
2558: if (entity->content == NULL) {
1.123 daniel 2559: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2561: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2562: entity->name);
1.59 daniel 2563: ctxt->wellFormed = 0;
1.45 daniel 2564: return;
2565: }
2566: len = xmlStrlen(entity->content);
2567: if (len <= 2) goto handle_as_char;
2568:
2569: /*
2570: * Redefine its content as an input stream.
2571: */
1.50 daniel 2572: input = xmlNewEntityInputStream(ctxt, entity);
2573: xmlPushInput(ctxt, input);
1.45 daniel 2574: return;
2575:
2576: handle_as_char:
2577: /*
2578: * Just handle the content as a set of chars.
2579: */
1.72 daniel 2580: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 2581: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2582:
2583: }
2584:
2585: /*
2586: * Forward definition for recusive behaviour.
2587: */
1.77 daniel 2588: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2589: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2590:
1.28 daniel 2591: /************************************************************************
2592: * *
2593: * Extra stuff for namespace support *
2594: * Relates to http://www.w3.org/TR/WD-xml-names *
2595: * *
2596: ************************************************************************/
2597:
1.50 daniel 2598: /**
2599: * xmlNamespaceParseNCName:
2600: * @ctxt: an XML parser context
2601: *
2602: * parse an XML namespace name.
1.28 daniel 2603: *
2604: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2605: *
2606: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2607: * CombiningChar | Extender
1.68 daniel 2608: *
2609: * Returns the namespace name or NULL
1.28 daniel 2610: */
2611:
1.123 daniel 2612: xmlChar *
1.55 daniel 2613: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 2614: xmlChar buf[XML_MAX_NAMELEN + 5];
2615: int len = 0, l;
2616: int cur = CUR_CHAR(l);
1.28 daniel 2617:
1.156 daniel 2618: /* load first the value of the char !!! */
1.152 daniel 2619: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 2620:
1.152 daniel 2621: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2622: (cur == '.') || (cur == '-') ||
2623: (cur == '_') ||
2624: (IS_COMBINING(cur)) ||
2625: (IS_EXTENDER(cur))) {
2626: COPY_BUF(l,buf,len,cur);
2627: NEXTL(l);
2628: cur = CUR_CHAR(l);
1.91 daniel 2629: if (len >= XML_MAX_NAMELEN) {
2630: fprintf(stderr,
2631: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 2632: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2633: (cur == '.') || (cur == '-') ||
2634: (cur == '_') ||
2635: (IS_COMBINING(cur)) ||
2636: (IS_EXTENDER(cur))) {
2637: NEXTL(l);
2638: cur = CUR_CHAR(l);
2639: }
1.91 daniel 2640: break;
2641: }
2642: }
2643: return(xmlStrndup(buf, len));
1.28 daniel 2644: }
2645:
1.50 daniel 2646: /**
2647: * xmlNamespaceParseQName:
2648: * @ctxt: an XML parser context
1.123 daniel 2649: * @prefix: a xmlChar **
1.50 daniel 2650: *
2651: * parse an XML qualified name
1.28 daniel 2652: *
2653: * [NS 5] QName ::= (Prefix ':')? LocalPart
2654: *
2655: * [NS 6] Prefix ::= NCName
2656: *
2657: * [NS 7] LocalPart ::= NCName
1.68 daniel 2658: *
1.127 daniel 2659: * Returns the local part, and prefix is updated
1.50 daniel 2660: * to get the Prefix if any.
1.28 daniel 2661: */
2662:
1.123 daniel 2663: xmlChar *
2664: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2665: xmlChar *ret = NULL;
1.28 daniel 2666:
2667: *prefix = NULL;
2668: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 2669: if (RAW == ':') {
1.28 daniel 2670: *prefix = ret;
1.40 daniel 2671: NEXT;
1.28 daniel 2672: ret = xmlNamespaceParseNCName(ctxt);
2673: }
2674:
2675: return(ret);
2676: }
2677:
1.50 daniel 2678: /**
1.72 daniel 2679: * xmlSplitQName:
2680: * @name: an XML parser context
1.123 daniel 2681: * @prefix: a xmlChar **
1.72 daniel 2682: *
2683: * parse an XML qualified name string
2684: *
2685: * [NS 5] QName ::= (Prefix ':')? LocalPart
2686: *
2687: * [NS 6] Prefix ::= NCName
2688: *
2689: * [NS 7] LocalPart ::= NCName
2690: *
1.127 daniel 2691: * Returns the local part, and prefix is updated
1.72 daniel 2692: * to get the Prefix if any.
2693: */
2694:
1.123 daniel 2695: xmlChar *
2696: xmlSplitQName(const xmlChar *name, xmlChar **prefix) {
2697: xmlChar *ret = NULL;
2698: const xmlChar *q;
2699: const xmlChar *cur = name;
1.72 daniel 2700:
2701: *prefix = NULL;
1.113 daniel 2702:
2703: /* xml: prefix is not really a namespace */
2704: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2705: (cur[2] == 'l') && (cur[3] == ':'))
2706: return(xmlStrdup(name));
2707:
1.72 daniel 2708: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
2709: q = cur++;
2710:
2711: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
2712: (*cur == '.') || (*cur == '-') ||
2713: (*cur == '_') ||
2714: (IS_COMBINING(*cur)) ||
2715: (IS_EXTENDER(*cur)))
2716: cur++;
2717:
2718: ret = xmlStrndup(q, cur - q);
2719:
2720: if (*cur == ':') {
2721: cur++;
2722: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
2723: *prefix = ret;
2724:
2725: q = cur++;
2726:
2727: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
2728: (*cur == '.') || (*cur == '-') ||
2729: (*cur == '_') ||
2730: (IS_COMBINING(*cur)) ||
2731: (IS_EXTENDER(*cur)))
2732: cur++;
2733:
2734: ret = xmlStrndup(q, cur - q);
2735: }
2736:
2737: return(ret);
2738: }
2739: /**
1.50 daniel 2740: * xmlNamespaceParseNSDef:
2741: * @ctxt: an XML parser context
2742: *
2743: * parse a namespace prefix declaration
1.28 daniel 2744: *
2745: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2746: *
2747: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 2748: *
2749: * Returns the namespace name
1.28 daniel 2750: */
2751:
1.123 daniel 2752: xmlChar *
1.55 daniel 2753: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 2754: xmlChar *name = NULL;
1.28 daniel 2755:
1.152 daniel 2756: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 2757: (NXT(2) == 'l') && (NXT(3) == 'n') &&
2758: (NXT(4) == 's')) {
2759: SKIP(5);
1.152 daniel 2760: if (RAW == ':') {
1.40 daniel 2761: NEXT;
1.28 daniel 2762: name = xmlNamespaceParseNCName(ctxt);
2763: }
2764: }
1.39 daniel 2765: return(name);
1.28 daniel 2766: }
2767:
1.50 daniel 2768: /**
2769: * xmlParseQuotedString:
2770: * @ctxt: an XML parser context
2771: *
1.45 daniel 2772: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 2773: * To be removed at next drop of binary compatibility
1.68 daniel 2774: *
2775: * Returns the string parser or NULL.
1.45 daniel 2776: */
1.123 daniel 2777: xmlChar *
1.55 daniel 2778: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 2779: xmlChar *buf = NULL;
1.152 daniel 2780: int len = 0,l;
1.140 daniel 2781: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2782: int c;
1.45 daniel 2783:
1.135 daniel 2784: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2785: if (buf == NULL) {
2786: fprintf(stderr, "malloc of %d byte failed\n", size);
2787: return(NULL);
2788: }
1.152 daniel 2789: if (RAW == '"') {
1.45 daniel 2790: NEXT;
1.152 daniel 2791: c = CUR_CHAR(l);
1.135 daniel 2792: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 2793: if (len + 5 >= size) {
1.135 daniel 2794: size *= 2;
2795: buf = xmlRealloc(buf, size * sizeof(xmlChar));
2796: if (buf == NULL) {
2797: fprintf(stderr, "realloc of %d byte failed\n", size);
2798: return(NULL);
2799: }
2800: }
1.152 daniel 2801: COPY_BUF(l,buf,len,c);
2802: NEXTL(l);
2803: c = CUR_CHAR(l);
1.135 daniel 2804: }
2805: if (c != '"') {
1.123 daniel 2806: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2807: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 2808: ctxt->sax->error(ctxt->userData,
2809: "String not closed \"%.50s\"\n", buf);
1.59 daniel 2810: ctxt->wellFormed = 0;
1.55 daniel 2811: } else {
1.45 daniel 2812: NEXT;
2813: }
1.152 daniel 2814: } else if (RAW == '\''){
1.45 daniel 2815: NEXT;
1.135 daniel 2816: c = CUR;
2817: while (IS_CHAR(c) && (c != '\'')) {
2818: if (len + 1 >= size) {
2819: size *= 2;
2820: buf = xmlRealloc(buf, size * sizeof(xmlChar));
2821: if (buf == NULL) {
2822: fprintf(stderr, "realloc of %d byte failed\n", size);
2823: return(NULL);
2824: }
2825: }
2826: buf[len++] = c;
2827: NEXT;
2828: c = CUR;
2829: }
1.152 daniel 2830: if (RAW != '\'') {
1.123 daniel 2831: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 2832: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 2833: ctxt->sax->error(ctxt->userData,
2834: "String not closed \"%.50s\"\n", buf);
1.59 daniel 2835: ctxt->wellFormed = 0;
1.55 daniel 2836: } else {
1.45 daniel 2837: NEXT;
2838: }
2839: }
1.135 daniel 2840: return(buf);
1.45 daniel 2841: }
2842:
1.50 daniel 2843: /**
2844: * xmlParseNamespace:
2845: * @ctxt: an XML parser context
2846: *
1.45 daniel 2847: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2848: *
2849: * This is what the older xml-name Working Draft specified, a bunch of
2850: * other stuff may still rely on it, so support is still here as
1.127 daniel 2851: * if it was declared on the root of the Tree:-(
1.110 daniel 2852: *
2853: * To be removed at next drop of binary compatibility
1.45 daniel 2854: */
2855:
1.55 daniel 2856: void
2857: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 2858: xmlChar *href = NULL;
2859: xmlChar *prefix = NULL;
1.45 daniel 2860: int garbage = 0;
2861:
2862: /*
2863: * We just skipped "namespace" or "xml:namespace"
2864: */
2865: SKIP_BLANKS;
2866:
1.153 daniel 2867: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 2868: /*
2869: * We can have "ns" or "prefix" attributes
2870: * Old encoding as 'href' or 'AS' attributes is still supported
2871: */
1.152 daniel 2872: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 2873: garbage = 0;
2874: SKIP(2);
2875: SKIP_BLANKS;
2876:
1.152 daniel 2877: if (RAW != '=') continue;
1.45 daniel 2878: NEXT;
2879: SKIP_BLANKS;
2880:
2881: href = xmlParseQuotedString(ctxt);
2882: SKIP_BLANKS;
1.152 daniel 2883: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 2884: (NXT(2) == 'e') && (NXT(3) == 'f')) {
2885: garbage = 0;
2886: SKIP(4);
2887: SKIP_BLANKS;
2888:
1.152 daniel 2889: if (RAW != '=') continue;
1.45 daniel 2890: NEXT;
2891: SKIP_BLANKS;
2892:
2893: href = xmlParseQuotedString(ctxt);
2894: SKIP_BLANKS;
1.152 daniel 2895: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 2896: (NXT(2) == 'e') && (NXT(3) == 'f') &&
2897: (NXT(4) == 'i') && (NXT(5) == 'x')) {
2898: garbage = 0;
2899: SKIP(6);
2900: SKIP_BLANKS;
2901:
1.152 daniel 2902: if (RAW != '=') continue;
1.45 daniel 2903: NEXT;
2904: SKIP_BLANKS;
2905:
2906: prefix = xmlParseQuotedString(ctxt);
2907: SKIP_BLANKS;
1.152 daniel 2908: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 2909: garbage = 0;
2910: SKIP(2);
2911: SKIP_BLANKS;
2912:
1.152 daniel 2913: if (RAW != '=') continue;
1.45 daniel 2914: NEXT;
2915: SKIP_BLANKS;
2916:
2917: prefix = xmlParseQuotedString(ctxt);
2918: SKIP_BLANKS;
1.152 daniel 2919: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 2920: garbage = 0;
1.91 daniel 2921: NEXT;
1.45 daniel 2922: } else {
2923: /*
2924: * Found garbage when parsing the namespace
2925: */
1.122 daniel 2926: if (!garbage) {
1.55 daniel 2927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2928: ctxt->sax->error(ctxt->userData,
2929: "xmlParseNamespace found garbage\n");
2930: }
1.123 daniel 2931: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 2932: ctxt->wellFormed = 0;
1.45 daniel 2933: NEXT;
2934: }
2935: }
2936:
2937: MOVETO_ENDTAG(CUR_PTR);
2938: NEXT;
2939:
2940: /*
2941: * Register the DTD.
1.72 daniel 2942: if (href != NULL)
2943: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 2944: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 2945: */
2946:
1.119 daniel 2947: if (prefix != NULL) xmlFree(prefix);
2948: if (href != NULL) xmlFree(href);
1.45 daniel 2949: }
2950:
1.28 daniel 2951: /************************************************************************
2952: * *
2953: * The parser itself *
2954: * Relates to http://www.w3.org/TR/REC-xml *
2955: * *
2956: ************************************************************************/
1.14 veillard 2957:
1.50 daniel 2958: /**
1.97 daniel 2959: * xmlScanName:
2960: * @ctxt: an XML parser context
2961: *
2962: * Trickery: parse an XML name but without consuming the input flow
2963: * Needed for rollback cases.
2964: *
2965: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2966: * CombiningChar | Extender
2967: *
2968: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2969: *
2970: * [6] Names ::= Name (S Name)*
2971: *
2972: * Returns the Name parsed or NULL
2973: */
2974:
1.123 daniel 2975: xmlChar *
1.97 daniel 2976: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 2977: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 2978: int len = 0;
2979:
2980: GROW;
1.152 daniel 2981: if (!IS_LETTER(RAW) && (RAW != '_') &&
2982: (RAW != ':')) {
1.97 daniel 2983: return(NULL);
2984: }
2985:
2986: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2987: (NXT(len) == '.') || (NXT(len) == '-') ||
2988: (NXT(len) == '_') || (NXT(len) == ':') ||
2989: (IS_COMBINING(NXT(len))) ||
2990: (IS_EXTENDER(NXT(len)))) {
2991: buf[len] = NXT(len);
2992: len++;
2993: if (len >= XML_MAX_NAMELEN) {
2994: fprintf(stderr,
2995: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
2996: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2997: (NXT(len) == '.') || (NXT(len) == '-') ||
2998: (NXT(len) == '_') || (NXT(len) == ':') ||
2999: (IS_COMBINING(NXT(len))) ||
3000: (IS_EXTENDER(NXT(len))))
3001: len++;
3002: break;
3003: }
3004: }
3005: return(xmlStrndup(buf, len));
3006: }
3007:
3008: /**
1.50 daniel 3009: * xmlParseName:
3010: * @ctxt: an XML parser context
3011: *
3012: * parse an XML name.
1.22 daniel 3013: *
3014: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3015: * CombiningChar | Extender
3016: *
3017: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3018: *
3019: * [6] Names ::= Name (S Name)*
1.68 daniel 3020: *
3021: * Returns the Name parsed or NULL
1.1 veillard 3022: */
3023:
1.123 daniel 3024: xmlChar *
1.55 daniel 3025: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3026: xmlChar buf[XML_MAX_NAMELEN + 5];
3027: int len = 0, l;
3028: int c;
1.1 veillard 3029:
1.91 daniel 3030: GROW;
1.160 daniel 3031: c = CUR_CHAR(l);
3032: if (!IS_LETTER(c) && (c != '_') &&
3033: (c != ':')) {
1.91 daniel 3034: return(NULL);
3035: }
1.40 daniel 3036:
1.160 daniel 3037: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3038: (c == '.') || (c == '-') ||
3039: (c == '_') || (c == ':') ||
3040: (IS_COMBINING(c)) ||
3041: (IS_EXTENDER(c))) {
3042: COPY_BUF(l,buf,len,c);
3043: NEXTL(l);
3044: c = CUR_CHAR(l);
1.91 daniel 3045: if (len >= XML_MAX_NAMELEN) {
3046: fprintf(stderr,
3047: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3048: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3049: (c == '.') || (c == '-') ||
3050: (c == '_') || (c == ':') ||
3051: (IS_COMBINING(c)) ||
3052: (IS_EXTENDER(c))) {
3053: NEXTL(l);
3054: c = CUR_CHAR(l);
1.97 daniel 3055: }
1.91 daniel 3056: break;
3057: }
3058: }
3059: return(xmlStrndup(buf, len));
1.22 daniel 3060: }
3061:
1.50 daniel 3062: /**
1.135 daniel 3063: * xmlParseStringName:
3064: * @ctxt: an XML parser context
3065: * @str: a pointer to an index in the string
3066: *
3067: * parse an XML name.
3068: *
3069: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3070: * CombiningChar | Extender
3071: *
3072: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3073: *
3074: * [6] Names ::= Name (S Name)*
3075: *
3076: * Returns the Name parsed or NULL. The str pointer
3077: * is updated to the current location in the string.
3078: */
3079:
3080: xmlChar *
3081: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3082: const xmlChar *ptr;
3083: const xmlChar *start;
3084: xmlChar cur;
3085:
3086: if ((str == NULL) || (*str == NULL)) return(NULL);
3087:
3088: start = ptr = *str;
3089: cur = *ptr;
3090: if (!IS_LETTER(cur) && (cur != '_') &&
3091: (cur != ':')) {
3092: return(NULL);
3093: }
3094:
3095: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3096: (cur == '.') || (cur == '-') ||
3097: (cur == '_') || (cur == ':') ||
3098: (IS_COMBINING(cur)) ||
3099: (IS_EXTENDER(cur))) {
3100: ptr++;
3101: cur = *ptr;
3102: }
3103: *str = ptr;
3104: return(xmlStrndup(start, ptr - start ));
3105: }
3106:
3107: /**
1.50 daniel 3108: * xmlParseNmtoken:
3109: * @ctxt: an XML parser context
3110: *
3111: * parse an XML Nmtoken.
1.22 daniel 3112: *
3113: * [7] Nmtoken ::= (NameChar)+
3114: *
3115: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3116: *
3117: * Returns the Nmtoken parsed or NULL
1.22 daniel 3118: */
3119:
1.123 daniel 3120: xmlChar *
1.55 daniel 3121: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3122: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3123: int len = 0;
1.160 daniel 3124: int c,l;
1.22 daniel 3125:
1.91 daniel 3126: GROW;
1.160 daniel 3127: c = CUR_CHAR(l);
3128: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3129: (c == '.') || (c == '-') ||
3130: (c == '_') || (c == ':') ||
3131: (IS_COMBINING(c)) ||
3132: (IS_EXTENDER(c))) {
3133: COPY_BUF(l,buf,len,c);
3134: NEXTL(l);
3135: c = CUR_CHAR(l);
1.91 daniel 3136: if (len >= XML_MAX_NAMELEN) {
3137: fprintf(stderr,
3138: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3139: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3140: (c == '.') || (c == '-') ||
3141: (c == '_') || (c == ':') ||
3142: (IS_COMBINING(c)) ||
3143: (IS_EXTENDER(c))) {
3144: NEXTL(l);
3145: c = CUR_CHAR(l);
3146: }
1.91 daniel 3147: break;
3148: }
3149: }
3150: return(xmlStrndup(buf, len));
1.1 veillard 3151: }
3152:
1.50 daniel 3153: /**
3154: * xmlParseEntityValue:
3155: * @ctxt: an XML parser context
1.78 daniel 3156: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3157: *
3158: * parse a value for ENTITY decl.
1.24 daniel 3159: *
3160: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3161: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3162: *
1.78 daniel 3163: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3164: */
3165:
1.123 daniel 3166: xmlChar *
3167: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3168: xmlChar *buf = NULL;
3169: int len = 0;
1.140 daniel 3170: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3171: int c, l;
1.135 daniel 3172: xmlChar stop;
1.123 daniel 3173: xmlChar *ret = NULL;
1.98 daniel 3174: xmlParserInputPtr input;
1.24 daniel 3175:
1.152 daniel 3176: if (RAW == '"') stop = '"';
3177: else if (RAW == '\'') stop = '\'';
1.135 daniel 3178: else {
3179: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3180: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3181: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3182: ctxt->wellFormed = 0;
3183: return(NULL);
3184: }
3185: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3186: if (buf == NULL) {
3187: fprintf(stderr, "malloc of %d byte failed\n", size);
3188: return(NULL);
3189: }
1.94 daniel 3190:
1.135 daniel 3191: /*
3192: * The content of the entity definition is copied in a buffer.
3193: */
1.94 daniel 3194:
1.135 daniel 3195: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3196: input = ctxt->input;
3197: GROW;
3198: NEXT;
1.152 daniel 3199: c = CUR_CHAR(l);
1.135 daniel 3200: /*
3201: * NOTE: 4.4.5 Included in Literal
3202: * When a parameter entity reference appears in a literal entity
3203: * value, ... a single or double quote character in the replacement
3204: * text is always treated as a normal data character and will not
3205: * terminate the literal.
3206: * In practice it means we stop the loop only when back at parsing
3207: * the initial entity and the quote is found
3208: */
3209: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3210: if (len + 5 >= size) {
1.135 daniel 3211: size *= 2;
3212: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3213: if (buf == NULL) {
3214: fprintf(stderr, "realloc of %d byte failed\n", size);
3215: return(NULL);
1.94 daniel 3216: }
1.79 daniel 3217: }
1.152 daniel 3218: COPY_BUF(l,buf,len,c);
3219: NEXTL(l);
1.98 daniel 3220: /*
1.135 daniel 3221: * Pop-up of finished entities.
1.98 daniel 3222: */
1.152 daniel 3223: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3224: xmlPopInput(ctxt);
1.152 daniel 3225:
3226: c = CUR_CHAR(l);
1.135 daniel 3227: if (c == 0) {
1.94 daniel 3228: GROW;
1.152 daniel 3229: c = CUR_CHAR(l);
1.79 daniel 3230: }
1.135 daniel 3231: }
3232: buf[len] = 0;
3233:
3234: /*
3235: * Then PEReference entities are substituted.
3236: */
3237: if (c != stop) {
3238: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3239: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3240: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3241: ctxt->wellFormed = 0;
1.135 daniel 3242: } else {
3243: NEXT;
3244: /*
3245: * NOTE: 4.4.7 Bypassed
3246: * When a general entity reference appears in the EntityValue in
3247: * an entity declaration, it is bypassed and left as is.
3248: * so XML_SUBSTITUTE_REF is not set.
3249: */
3250: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3251: 0, 0, 0);
3252: if (orig != NULL)
3253: *orig = buf;
3254: else
3255: xmlFree(buf);
1.24 daniel 3256: }
3257:
3258: return(ret);
3259: }
3260:
1.50 daniel 3261: /**
3262: * xmlParseAttValue:
3263: * @ctxt: an XML parser context
3264: *
3265: * parse a value for an attribute
1.78 daniel 3266: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3267: * will be handled later in xmlStringGetNodeList
1.29 daniel 3268: *
3269: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3270: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3271: *
1.129 daniel 3272: * 3.3.3 Attribute-Value Normalization:
3273: * Before the value of an attribute is passed to the application or
3274: * checked for validity, the XML processor must normalize it as follows:
3275: * - a character reference is processed by appending the referenced
3276: * character to the attribute value
3277: * - an entity reference is processed by recursively processing the
3278: * replacement text of the entity
3279: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3280: * appending #x20 to the normalized value, except that only a single
3281: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3282: * parsed entity or the literal entity value of an internal parsed entity
3283: * - other characters are processed by appending them to the normalized value
1.130 daniel 3284: * If the declared value is not CDATA, then the XML processor must further
3285: * process the normalized attribute value by discarding any leading and
3286: * trailing space (#x20) characters, and by replacing sequences of space
3287: * (#x20) characters by a single space (#x20) character.
3288: * All attributes for which no declaration has been read should be treated
3289: * by a non-validating parser as if declared CDATA.
1.129 daniel 3290: *
3291: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3292: */
3293:
1.123 daniel 3294: xmlChar *
1.55 daniel 3295: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3296: xmlChar limit = 0;
3297: xmlChar *buffer = NULL;
3298: int buffer_size = 0;
3299: xmlChar *out = NULL;
3300:
3301: xmlChar *current = NULL;
3302: xmlEntityPtr ent;
3303: xmlChar cur;
3304:
1.29 daniel 3305:
1.91 daniel 3306: SHRINK;
1.151 daniel 3307: if (NXT(0) == '"') {
1.96 daniel 3308: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3309: limit = '"';
1.40 daniel 3310: NEXT;
1.151 daniel 3311: } else if (NXT(0) == '\'') {
1.129 daniel 3312: limit = '\'';
1.96 daniel 3313: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3314: NEXT;
1.29 daniel 3315: } else {
1.123 daniel 3316: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3318: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3319: ctxt->wellFormed = 0;
1.129 daniel 3320: return(NULL);
1.29 daniel 3321: }
3322:
1.129 daniel 3323: /*
3324: * allocate a translation buffer.
3325: */
1.140 daniel 3326: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3327: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3328: if (buffer == NULL) {
3329: perror("xmlParseAttValue: malloc failed");
3330: return(NULL);
3331: }
3332: out = buffer;
3333:
3334: /*
3335: * Ok loop until we reach one of the ending char or a size limit.
3336: */
3337: cur = CUR;
1.156 daniel 3338: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3339: if (cur == 0) break;
3340: if ((cur == '&') && (NXT(1) == '#')) {
3341: int val = xmlParseCharRef(ctxt);
3342: *out++ = val;
3343: } else if (cur == '&') {
3344: ent = xmlParseEntityRef(ctxt);
3345: if ((ent != NULL) &&
3346: (ctxt->replaceEntities != 0)) {
3347: current = ent->content;
3348: while (*current != 0) {
3349: *out++ = *current++;
3350: if (out - buffer > buffer_size - 10) {
3351: int index = out - buffer;
3352:
3353: growBuffer(buffer);
3354: out = &buffer[index];
3355: }
3356: }
3357: } else if (ent != NULL) {
3358: int i = xmlStrlen(ent->name);
3359: const xmlChar *cur = ent->name;
3360:
3361: *out++ = '&';
3362: if (out - buffer > buffer_size - i - 10) {
3363: int index = out - buffer;
3364:
3365: growBuffer(buffer);
3366: out = &buffer[index];
3367: }
3368: for (;i > 0;i--)
3369: *out++ = *cur++;
3370: *out++ = ';';
3371: }
3372: } else {
1.156 daniel 3373: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3374: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3375: *out++ = 0x20;
3376: if (out - buffer > buffer_size - 10) {
3377: int index = out - buffer;
3378:
3379: growBuffer(buffer);
3380: out = &buffer[index];
1.129 daniel 3381: }
3382: } else {
3383: *out++ = cur;
3384: if (out - buffer > buffer_size - 10) {
3385: int index = out - buffer;
3386:
3387: growBuffer(buffer);
3388: out = &buffer[index];
3389: }
3390: }
3391: NEXT;
3392: }
3393: cur = CUR;
3394: }
3395: *out++ = 0;
1.152 daniel 3396: if (RAW == '<') {
1.129 daniel 3397: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3398: ctxt->sax->error(ctxt->userData,
3399: "Unescaped '<' not allowed in attributes values\n");
3400: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3401: ctxt->wellFormed = 0;
1.152 daniel 3402: } else if (RAW != limit) {
1.129 daniel 3403: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3404: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3405: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3406: ctxt->wellFormed = 0;
3407: } else
3408: NEXT;
3409: return(buffer);
1.29 daniel 3410: }
3411:
1.50 daniel 3412: /**
3413: * xmlParseSystemLiteral:
3414: * @ctxt: an XML parser context
3415: *
3416: * parse an XML Literal
1.21 daniel 3417: *
1.22 daniel 3418: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3419: *
3420: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3421: */
3422:
1.123 daniel 3423: xmlChar *
1.55 daniel 3424: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3425: xmlChar *buf = NULL;
3426: int len = 0;
1.140 daniel 3427: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3428: int cur, l;
1.135 daniel 3429: xmlChar stop;
1.21 daniel 3430:
1.91 daniel 3431: SHRINK;
1.152 daniel 3432: if (RAW == '"') {
1.40 daniel 3433: NEXT;
1.135 daniel 3434: stop = '"';
1.152 daniel 3435: } else if (RAW == '\'') {
1.40 daniel 3436: NEXT;
1.135 daniel 3437: stop = '\'';
1.21 daniel 3438: } else {
1.55 daniel 3439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3440: ctxt->sax->error(ctxt->userData,
3441: "SystemLiteral \" or ' expected\n");
1.123 daniel 3442: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3443: ctxt->wellFormed = 0;
1.135 daniel 3444: return(NULL);
1.21 daniel 3445: }
3446:
1.135 daniel 3447: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3448: if (buf == NULL) {
3449: fprintf(stderr, "malloc of %d byte failed\n", size);
3450: return(NULL);
3451: }
1.152 daniel 3452: cur = CUR_CHAR(l);
1.135 daniel 3453: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3454: if (len + 5 >= size) {
1.135 daniel 3455: size *= 2;
3456: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3457: if (buf == NULL) {
3458: fprintf(stderr, "realloc of %d byte failed\n", size);
3459: return(NULL);
3460: }
3461: }
1.152 daniel 3462: COPY_BUF(l,buf,len,cur);
3463: NEXTL(l);
3464: cur = CUR_CHAR(l);
1.135 daniel 3465: if (cur == 0) {
3466: GROW;
3467: SHRINK;
1.152 daniel 3468: cur = CUR_CHAR(l);
1.135 daniel 3469: }
3470: }
3471: buf[len] = 0;
3472: if (!IS_CHAR(cur)) {
3473: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3474: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3475: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3476: ctxt->wellFormed = 0;
3477: } else {
3478: NEXT;
3479: }
3480: return(buf);
1.21 daniel 3481: }
3482:
1.50 daniel 3483: /**
3484: * xmlParsePubidLiteral:
3485: * @ctxt: an XML parser context
1.21 daniel 3486: *
1.50 daniel 3487: * parse an XML public literal
1.68 daniel 3488: *
3489: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3490: *
3491: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3492: */
3493:
1.123 daniel 3494: xmlChar *
1.55 daniel 3495: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3496: xmlChar *buf = NULL;
3497: int len = 0;
1.140 daniel 3498: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3499: xmlChar cur;
3500: xmlChar stop;
1.125 daniel 3501:
1.91 daniel 3502: SHRINK;
1.152 daniel 3503: if (RAW == '"') {
1.40 daniel 3504: NEXT;
1.135 daniel 3505: stop = '"';
1.152 daniel 3506: } else if (RAW == '\'') {
1.40 daniel 3507: NEXT;
1.135 daniel 3508: stop = '\'';
1.21 daniel 3509: } else {
1.55 daniel 3510: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3511: ctxt->sax->error(ctxt->userData,
3512: "SystemLiteral \" or ' expected\n");
1.123 daniel 3513: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3514: ctxt->wellFormed = 0;
1.135 daniel 3515: return(NULL);
3516: }
3517: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3518: if (buf == NULL) {
3519: fprintf(stderr, "malloc of %d byte failed\n", size);
3520: return(NULL);
3521: }
3522: cur = CUR;
3523: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3524: if (len + 1 >= size) {
3525: size *= 2;
3526: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3527: if (buf == NULL) {
3528: fprintf(stderr, "realloc of %d byte failed\n", size);
3529: return(NULL);
3530: }
3531: }
3532: buf[len++] = cur;
3533: NEXT;
3534: cur = CUR;
3535: if (cur == 0) {
3536: GROW;
3537: SHRINK;
3538: cur = CUR;
3539: }
3540: }
3541: buf[len] = 0;
3542: if (cur != stop) {
3543: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3544: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3545: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3546: ctxt->wellFormed = 0;
3547: } else {
3548: NEXT;
1.21 daniel 3549: }
1.135 daniel 3550: return(buf);
1.21 daniel 3551: }
3552:
1.50 daniel 3553: /**
3554: * xmlParseCharData:
3555: * @ctxt: an XML parser context
3556: * @cdata: int indicating whether we are within a CDATA section
3557: *
3558: * parse a CharData section.
3559: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 3560: *
1.151 daniel 3561: * The right angle bracket (>) may be represented using the string ">",
3562: * and must, for compatibility, be escaped using ">" or a character
3563: * reference when it appears in the string "]]>" in content, when that
3564: * string is not marking the end of a CDATA section.
3565: *
1.27 daniel 3566: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3567: */
3568:
1.55 daniel 3569: void
3570: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 3571: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 3572: int nbchar = 0;
1.152 daniel 3573: int cur, l;
1.27 daniel 3574:
1.91 daniel 3575: SHRINK;
1.152 daniel 3576: cur = CUR_CHAR(l);
1.160 daniel 3577: while ((IS_CHAR(cur)) && ((cur != '<') || (ctxt->token == '<')) &&
1.153 daniel 3578: ((cur != '&') || (ctxt->token == '&'))) {
1.97 daniel 3579: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 3580: (NXT(2) == '>')) {
3581: if (cdata) break;
3582: else {
3583: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 3584: ctxt->sax->error(ctxt->userData,
1.59 daniel 3585: "Sequence ']]>' not allowed in content\n");
1.123 daniel 3586: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 3587: /* Should this be relaxed ??? I see a "must here */
3588: ctxt->wellFormed = 0;
1.59 daniel 3589: }
3590: }
1.152 daniel 3591: COPY_BUF(l,buf,nbchar,cur);
3592: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 3593: /*
3594: * Ok the segment is to be consumed as chars.
3595: */
3596: if (ctxt->sax != NULL) {
3597: if (areBlanks(ctxt, buf, nbchar)) {
3598: if (ctxt->sax->ignorableWhitespace != NULL)
3599: ctxt->sax->ignorableWhitespace(ctxt->userData,
3600: buf, nbchar);
3601: } else {
3602: if (ctxt->sax->characters != NULL)
3603: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3604: }
3605: }
3606: nbchar = 0;
3607: }
1.152 daniel 3608: NEXTL(l);
3609: cur = CUR_CHAR(l);
1.27 daniel 3610: }
1.91 daniel 3611: if (nbchar != 0) {
3612: /*
3613: * Ok the segment is to be consumed as chars.
3614: */
3615: if (ctxt->sax != NULL) {
3616: if (areBlanks(ctxt, buf, nbchar)) {
3617: if (ctxt->sax->ignorableWhitespace != NULL)
3618: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3619: } else {
3620: if (ctxt->sax->characters != NULL)
3621: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3622: }
3623: }
1.45 daniel 3624: }
1.27 daniel 3625: }
3626:
1.50 daniel 3627: /**
3628: * xmlParseExternalID:
3629: * @ctxt: an XML parser context
1.123 daniel 3630: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 3631: * @strict: indicate whether we should restrict parsing to only
3632: * production [75], see NOTE below
1.50 daniel 3633: *
1.67 daniel 3634: * Parse an External ID or a Public ID
3635: *
3636: * NOTE: Productions [75] and [83] interract badly since [75] can generate
3637: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 3638: *
3639: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3640: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 3641: *
3642: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3643: *
1.68 daniel 3644: * Returns the function returns SystemLiteral and in the second
1.67 daniel 3645: * case publicID receives PubidLiteral, is strict is off
3646: * it is possible to return NULL and have publicID set.
1.22 daniel 3647: */
3648:
1.123 daniel 3649: xmlChar *
3650: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3651: xmlChar *URI = NULL;
1.22 daniel 3652:
1.91 daniel 3653: SHRINK;
1.152 daniel 3654: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 3655: (NXT(2) == 'S') && (NXT(3) == 'T') &&
3656: (NXT(4) == 'E') && (NXT(5) == 'M')) {
3657: SKIP(6);
1.59 daniel 3658: if (!IS_BLANK(CUR)) {
3659: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3660: ctxt->sax->error(ctxt->userData,
1.59 daniel 3661: "Space required after 'SYSTEM'\n");
1.123 daniel 3662: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3663: ctxt->wellFormed = 0;
3664: }
1.42 daniel 3665: SKIP_BLANKS;
1.39 daniel 3666: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3667: if (URI == NULL) {
1.55 daniel 3668: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3669: ctxt->sax->error(ctxt->userData,
1.39 daniel 3670: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 3671: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3672: ctxt->wellFormed = 0;
3673: }
1.152 daniel 3674: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 3675: (NXT(2) == 'B') && (NXT(3) == 'L') &&
3676: (NXT(4) == 'I') && (NXT(5) == 'C')) {
3677: SKIP(6);
1.59 daniel 3678: if (!IS_BLANK(CUR)) {
3679: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3680: ctxt->sax->error(ctxt->userData,
1.59 daniel 3681: "Space required after 'PUBLIC'\n");
1.123 daniel 3682: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3683: ctxt->wellFormed = 0;
3684: }
1.42 daniel 3685: SKIP_BLANKS;
1.39 daniel 3686: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 3687: if (*publicID == NULL) {
1.55 daniel 3688: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3689: ctxt->sax->error(ctxt->userData,
1.39 daniel 3690: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 3691: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 3692: ctxt->wellFormed = 0;
3693: }
1.67 daniel 3694: if (strict) {
3695: /*
3696: * We don't handle [83] so "S SystemLiteral" is required.
3697: */
3698: if (!IS_BLANK(CUR)) {
3699: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3700: ctxt->sax->error(ctxt->userData,
1.67 daniel 3701: "Space required after the Public Identifier\n");
1.123 daniel 3702: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 3703: ctxt->wellFormed = 0;
3704: }
3705: } else {
3706: /*
3707: * We handle [83] so we return immediately, if
3708: * "S SystemLiteral" is not detected. From a purely parsing
3709: * point of view that's a nice mess.
3710: */
1.135 daniel 3711: const xmlChar *ptr;
3712: GROW;
3713:
3714: ptr = CUR_PTR;
1.67 daniel 3715: if (!IS_BLANK(*ptr)) return(NULL);
3716:
3717: while (IS_BLANK(*ptr)) ptr++;
3718: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 3719: }
1.42 daniel 3720: SKIP_BLANKS;
1.39 daniel 3721: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 3722: if (URI == NULL) {
1.55 daniel 3723: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3724: ctxt->sax->error(ctxt->userData,
1.39 daniel 3725: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 3726: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 3727: ctxt->wellFormed = 0;
3728: }
1.22 daniel 3729: }
1.39 daniel 3730: return(URI);
1.22 daniel 3731: }
3732:
1.50 daniel 3733: /**
3734: * xmlParseComment:
1.69 daniel 3735: * @ctxt: an XML parser context
1.50 daniel 3736: *
1.3 veillard 3737: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 3738: * The spec says that "For compatibility, the string "--" (double-hyphen)
3739: * must not occur within comments. "
1.22 daniel 3740: *
3741: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 3742: */
1.72 daniel 3743: void
1.114 daniel 3744: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 3745: xmlChar *buf = NULL;
3746: int len = 0;
1.140 daniel 3747: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3748: int q, ql;
3749: int r, rl;
3750: int cur, l;
1.140 daniel 3751: xmlParserInputState state;
1.3 veillard 3752:
3753: /*
1.22 daniel 3754: * Check that there is a comment right here.
1.3 veillard 3755: */
1.152 daniel 3756: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 3757: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 3758:
1.140 daniel 3759: state = ctxt->instate;
1.97 daniel 3760: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 3761: SHRINK;
1.40 daniel 3762: SKIP(4);
1.135 daniel 3763: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3764: if (buf == NULL) {
3765: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 3766: ctxt->instate = state;
1.135 daniel 3767: return;
3768: }
1.152 daniel 3769: q = CUR_CHAR(ql);
3770: NEXTL(ql);
3771: r = CUR_CHAR(rl);
3772: NEXTL(rl);
3773: cur = CUR_CHAR(l);
1.135 daniel 3774: while (IS_CHAR(cur) &&
3775: ((cur != '>') ||
3776: (r != '-') || (q != '-'))) {
3777: if ((r == '-') && (q == '-')) {
1.55 daniel 3778: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3779: ctxt->sax->error(ctxt->userData,
1.38 daniel 3780: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 3781: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 3782: ctxt->wellFormed = 0;
3783: }
1.152 daniel 3784: if (len + 5 >= size) {
1.135 daniel 3785: size *= 2;
3786: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3787: if (buf == NULL) {
3788: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 3789: ctxt->instate = state;
1.135 daniel 3790: return;
3791: }
3792: }
1.152 daniel 3793: COPY_BUF(ql,buf,len,q);
1.135 daniel 3794: q = r;
1.152 daniel 3795: ql = rl;
1.135 daniel 3796: r = cur;
1.152 daniel 3797: rl = l;
3798: NEXTL(l);
3799: cur = CUR_CHAR(l);
1.135 daniel 3800: if (cur == 0) {
3801: SHRINK;
3802: GROW;
1.152 daniel 3803: cur = CUR_CHAR(l);
1.135 daniel 3804: }
1.3 veillard 3805: }
1.135 daniel 3806: buf[len] = 0;
3807: if (!IS_CHAR(cur)) {
1.55 daniel 3808: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3809: ctxt->sax->error(ctxt->userData,
1.135 daniel 3810: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 3811: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 3812: ctxt->wellFormed = 0;
1.3 veillard 3813: } else {
1.40 daniel 3814: NEXT;
1.114 daniel 3815: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
1.135 daniel 3816: ctxt->sax->comment(ctxt->userData, buf);
3817: xmlFree(buf);
1.3 veillard 3818: }
1.140 daniel 3819: ctxt->instate = state;
1.3 veillard 3820: }
3821:
1.50 daniel 3822: /**
3823: * xmlParsePITarget:
3824: * @ctxt: an XML parser context
3825: *
3826: * parse the name of a PI
1.22 daniel 3827: *
3828: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 3829: *
3830: * Returns the PITarget name or NULL
1.22 daniel 3831: */
3832:
1.123 daniel 3833: xmlChar *
1.55 daniel 3834: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 3835: xmlChar *name;
1.22 daniel 3836:
3837: name = xmlParseName(ctxt);
1.139 daniel 3838: if ((name != NULL) &&
1.22 daniel 3839: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 3840: ((name[1] == 'm') || (name[1] == 'M')) &&
3841: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 3842: int i;
1.151 daniel 3843: if ((name[0] = 'x') && (name[1] == 'm') &&
3844: (name[2] = 'l') && (name[3] == 0)) {
3845: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3846: ctxt->sax->error(ctxt->userData,
3847: "XML declaration allowed only at the start of the document\n");
3848: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3849: ctxt->wellFormed = 0;
3850: return(name);
3851: } else if (name[3] == 0) {
3852: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3853: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3854: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3855: ctxt->wellFormed = 0;
3856: return(name);
3857: }
1.139 daniel 3858: for (i = 0;;i++) {
3859: if (xmlW3CPIs[i] == NULL) break;
3860: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
3861: return(name);
3862: }
3863: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3864: ctxt->sax->warning(ctxt->userData,
1.122 daniel 3865: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 3866: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 3867: }
1.22 daniel 3868: }
3869: return(name);
3870: }
3871:
1.50 daniel 3872: /**
3873: * xmlParsePI:
3874: * @ctxt: an XML parser context
3875: *
3876: * parse an XML Processing Instruction.
1.22 daniel 3877: *
3878: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 3879: *
1.69 daniel 3880: * The processing is transfered to SAX once parsed.
1.3 veillard 3881: */
3882:
1.55 daniel 3883: void
3884: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 3885: xmlChar *buf = NULL;
3886: int len = 0;
1.140 daniel 3887: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3888: int cur, l;
1.123 daniel 3889: xmlChar *target;
1.140 daniel 3890: xmlParserInputState state;
1.22 daniel 3891:
1.152 daniel 3892: if ((RAW == '<') && (NXT(1) == '?')) {
1.140 daniel 3893: state = ctxt->instate;
3894: ctxt->instate = XML_PARSER_PI;
1.3 veillard 3895: /*
3896: * this is a Processing Instruction.
3897: */
1.40 daniel 3898: SKIP(2);
1.91 daniel 3899: SHRINK;
1.3 veillard 3900:
3901: /*
1.22 daniel 3902: * Parse the target name and check for special support like
3903: * namespace.
1.3 veillard 3904: */
1.22 daniel 3905: target = xmlParsePITarget(ctxt);
3906: if (target != NULL) {
1.156 daniel 3907: if ((RAW == '?') && (NXT(1) == '>')) {
3908: SKIP(2);
3909:
3910: /*
3911: * SAX: PI detected.
3912: */
3913: if ((ctxt->sax) &&
3914: (ctxt->sax->processingInstruction != NULL))
3915: ctxt->sax->processingInstruction(ctxt->userData,
3916: target, NULL);
3917: ctxt->instate = state;
3918: return;
3919: }
1.135 daniel 3920: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3921: if (buf == NULL) {
3922: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 3923: ctxt->instate = state;
1.135 daniel 3924: return;
3925: }
3926: cur = CUR;
3927: if (!IS_BLANK(cur)) {
1.114 daniel 3928: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3929: ctxt->sax->error(ctxt->userData,
3930: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 3931: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 3932: ctxt->wellFormed = 0;
3933: }
3934: SKIP_BLANKS;
1.152 daniel 3935: cur = CUR_CHAR(l);
1.135 daniel 3936: while (IS_CHAR(cur) &&
3937: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 3938: if (len + 5 >= size) {
1.135 daniel 3939: size *= 2;
3940: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3941: if (buf == NULL) {
3942: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 3943: ctxt->instate = state;
1.135 daniel 3944: return;
3945: }
3946: }
1.152 daniel 3947: COPY_BUF(l,buf,len,cur);
3948: NEXTL(l);
3949: cur = CUR_CHAR(l);
1.135 daniel 3950: if (cur == 0) {
3951: SHRINK;
3952: GROW;
1.152 daniel 3953: cur = CUR_CHAR(l);
1.135 daniel 3954: }
3955: }
3956: buf[len] = 0;
1.152 daniel 3957: if (cur != '?') {
1.72 daniel 3958: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3959: ctxt->sax->error(ctxt->userData,
1.72 daniel 3960: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 3961: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 3962: ctxt->wellFormed = 0;
1.22 daniel 3963: } else {
1.72 daniel 3964: SKIP(2);
1.44 daniel 3965:
1.72 daniel 3966: /*
3967: * SAX: PI detected.
3968: */
3969: if ((ctxt->sax) &&
3970: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 3971: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 3972: target, buf);
1.22 daniel 3973: }
1.135 daniel 3974: xmlFree(buf);
1.119 daniel 3975: xmlFree(target);
1.3 veillard 3976: } else {
1.55 daniel 3977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 3978: ctxt->sax->error(ctxt->userData,
3979: "xmlParsePI : no target name\n");
1.123 daniel 3980: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 3981: ctxt->wellFormed = 0;
1.22 daniel 3982: }
1.140 daniel 3983: ctxt->instate = state;
1.22 daniel 3984: }
3985: }
3986:
1.50 daniel 3987: /**
3988: * xmlParseNotationDecl:
3989: * @ctxt: an XML parser context
3990: *
3991: * parse a notation declaration
1.22 daniel 3992: *
3993: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3994: *
3995: * Hence there is actually 3 choices:
3996: * 'PUBLIC' S PubidLiteral
3997: * 'PUBLIC' S PubidLiteral S SystemLiteral
3998: * and 'SYSTEM' S SystemLiteral
1.50 daniel 3999: *
1.67 daniel 4000: * See the NOTE on xmlParseExternalID().
1.22 daniel 4001: */
4002:
1.55 daniel 4003: void
4004: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4005: xmlChar *name;
4006: xmlChar *Pubid;
4007: xmlChar *Systemid;
1.22 daniel 4008:
1.152 daniel 4009: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4010: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4011: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4012: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4013: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 4014: SHRINK;
1.40 daniel 4015: SKIP(10);
1.67 daniel 4016: if (!IS_BLANK(CUR)) {
4017: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4018: ctxt->sax->error(ctxt->userData,
4019: "Space required after '<!NOTATION'\n");
1.123 daniel 4020: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4021: ctxt->wellFormed = 0;
4022: return;
4023: }
4024: SKIP_BLANKS;
1.22 daniel 4025:
4026: name = xmlParseName(ctxt);
4027: if (name == NULL) {
1.55 daniel 4028: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4029: ctxt->sax->error(ctxt->userData,
4030: "NOTATION: Name expected here\n");
1.123 daniel 4031: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4032: ctxt->wellFormed = 0;
4033: return;
4034: }
4035: if (!IS_BLANK(CUR)) {
4036: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4037: ctxt->sax->error(ctxt->userData,
1.67 daniel 4038: "Space required after the NOTATION name'\n");
1.123 daniel 4039: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4040: ctxt->wellFormed = 0;
1.22 daniel 4041: return;
4042: }
1.42 daniel 4043: SKIP_BLANKS;
1.67 daniel 4044:
1.22 daniel 4045: /*
1.67 daniel 4046: * Parse the IDs.
1.22 daniel 4047: */
1.160 daniel 4048: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4049: SKIP_BLANKS;
4050:
1.152 daniel 4051: if (RAW == '>') {
1.40 daniel 4052: NEXT;
1.72 daniel 4053: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 4054: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4055: } else {
4056: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4057: ctxt->sax->error(ctxt->userData,
1.67 daniel 4058: "'>' required to close NOTATION declaration\n");
1.123 daniel 4059: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4060: ctxt->wellFormed = 0;
4061: }
1.119 daniel 4062: xmlFree(name);
4063: if (Systemid != NULL) xmlFree(Systemid);
4064: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4065: }
4066: }
4067:
1.50 daniel 4068: /**
4069: * xmlParseEntityDecl:
4070: * @ctxt: an XML parser context
4071: *
4072: * parse <!ENTITY declarations
1.22 daniel 4073: *
4074: * [70] EntityDecl ::= GEDecl | PEDecl
4075: *
4076: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4077: *
4078: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4079: *
4080: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4081: *
4082: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4083: *
4084: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4085: *
4086: * [ VC: Notation Declared ]
1.116 daniel 4087: * The Name must match the declared name of a notation.
1.22 daniel 4088: */
4089:
1.55 daniel 4090: void
4091: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4092: xmlChar *name = NULL;
4093: xmlChar *value = NULL;
4094: xmlChar *URI = NULL, *literal = NULL;
4095: xmlChar *ndata = NULL;
1.39 daniel 4096: int isParameter = 0;
1.123 daniel 4097: xmlChar *orig = NULL;
1.22 daniel 4098:
1.94 daniel 4099: GROW;
1.152 daniel 4100: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4101: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4102: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4103: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 4104: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4105: SHRINK;
1.40 daniel 4106: SKIP(8);
1.59 daniel 4107: if (!IS_BLANK(CUR)) {
4108: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4109: ctxt->sax->error(ctxt->userData,
4110: "Space required after '<!ENTITY'\n");
1.123 daniel 4111: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4112: ctxt->wellFormed = 0;
4113: }
4114: SKIP_BLANKS;
1.40 daniel 4115:
1.152 daniel 4116: if (RAW == '%') {
1.40 daniel 4117: NEXT;
1.59 daniel 4118: if (!IS_BLANK(CUR)) {
4119: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4120: ctxt->sax->error(ctxt->userData,
4121: "Space required after '%'\n");
1.123 daniel 4122: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4123: ctxt->wellFormed = 0;
4124: }
1.42 daniel 4125: SKIP_BLANKS;
1.39 daniel 4126: isParameter = 1;
1.22 daniel 4127: }
4128:
4129: name = xmlParseName(ctxt);
1.24 daniel 4130: if (name == NULL) {
1.55 daniel 4131: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4132: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4133: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4134: ctxt->wellFormed = 0;
1.24 daniel 4135: return;
4136: }
1.59 daniel 4137: if (!IS_BLANK(CUR)) {
4138: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4139: ctxt->sax->error(ctxt->userData,
1.59 daniel 4140: "Space required after the entity name\n");
1.123 daniel 4141: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4142: ctxt->wellFormed = 0;
4143: }
1.42 daniel 4144: SKIP_BLANKS;
1.24 daniel 4145:
1.22 daniel 4146: /*
1.68 daniel 4147: * handle the various case of definitions...
1.22 daniel 4148: */
1.39 daniel 4149: if (isParameter) {
1.152 daniel 4150: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4151: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4152: if (value) {
1.72 daniel 4153: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4154: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4155: XML_INTERNAL_PARAMETER_ENTITY,
4156: NULL, NULL, value);
4157: }
1.24 daniel 4158: else {
1.67 daniel 4159: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 4160: if (URI) {
1.72 daniel 4161: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4162: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4163: XML_EXTERNAL_PARAMETER_ENTITY,
4164: literal, URI, NULL);
4165: }
1.24 daniel 4166: }
4167: } else {
1.152 daniel 4168: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4169: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 4170: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4171: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4172: XML_INTERNAL_GENERAL_ENTITY,
4173: NULL, NULL, value);
4174: } else {
1.67 daniel 4175: URI = xmlParseExternalID(ctxt, &literal, 1);
1.152 daniel 4176: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4177: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4178: ctxt->sax->error(ctxt->userData,
1.59 daniel 4179: "Space required before 'NDATA'\n");
1.123 daniel 4180: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4181: ctxt->wellFormed = 0;
4182: }
1.42 daniel 4183: SKIP_BLANKS;
1.152 daniel 4184: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4185: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4186: (NXT(4) == 'A')) {
4187: SKIP(5);
1.59 daniel 4188: if (!IS_BLANK(CUR)) {
4189: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4190: ctxt->sax->error(ctxt->userData,
1.59 daniel 4191: "Space required after 'NDATA'\n");
1.123 daniel 4192: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4193: ctxt->wellFormed = 0;
4194: }
1.42 daniel 4195: SKIP_BLANKS;
1.24 daniel 4196: ndata = xmlParseName(ctxt);
1.116 daniel 4197: if ((ctxt->sax != NULL) &&
4198: (ctxt->sax->unparsedEntityDecl != NULL))
4199: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4200: literal, URI, ndata);
4201: } else {
1.72 daniel 4202: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4203: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4204: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4205: literal, URI, NULL);
1.24 daniel 4206: }
4207: }
4208: }
1.42 daniel 4209: SKIP_BLANKS;
1.152 daniel 4210: if (RAW != '>') {
1.55 daniel 4211: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4212: ctxt->sax->error(ctxt->userData,
1.31 daniel 4213: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4214: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4215: ctxt->wellFormed = 0;
1.24 daniel 4216: } else
1.40 daniel 4217: NEXT;
1.78 daniel 4218: if (orig != NULL) {
4219: /*
1.98 daniel 4220: * Ugly mechanism to save the raw entity value.
1.78 daniel 4221: */
4222: xmlEntityPtr cur = NULL;
4223:
1.98 daniel 4224: if (isParameter) {
4225: if ((ctxt->sax != NULL) &&
4226: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4227: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4228: } else {
4229: if ((ctxt->sax != NULL) &&
4230: (ctxt->sax->getEntity != NULL))
1.120 daniel 4231: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4232: }
4233: if (cur != NULL) {
4234: if (cur->orig != NULL)
1.119 daniel 4235: xmlFree(orig);
1.98 daniel 4236: else
4237: cur->orig = orig;
4238: } else
1.119 daniel 4239: xmlFree(orig);
1.78 daniel 4240: }
1.119 daniel 4241: if (name != NULL) xmlFree(name);
4242: if (value != NULL) xmlFree(value);
4243: if (URI != NULL) xmlFree(URI);
4244: if (literal != NULL) xmlFree(literal);
4245: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4246: }
4247: }
4248:
1.50 daniel 4249: /**
1.59 daniel 4250: * xmlParseDefaultDecl:
4251: * @ctxt: an XML parser context
4252: * @value: Receive a possible fixed default value for the attribute
4253: *
4254: * Parse an attribute default declaration
4255: *
4256: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4257: *
1.99 daniel 4258: * [ VC: Required Attribute ]
1.117 daniel 4259: * if the default declaration is the keyword #REQUIRED, then the
4260: * attribute must be specified for all elements of the type in the
4261: * attribute-list declaration.
1.99 daniel 4262: *
4263: * [ VC: Attribute Default Legal ]
1.102 daniel 4264: * The declared default value must meet the lexical constraints of
4265: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4266: *
4267: * [ VC: Fixed Attribute Default ]
1.117 daniel 4268: * if an attribute has a default value declared with the #FIXED
4269: * keyword, instances of that attribute must match the default value.
1.99 daniel 4270: *
4271: * [ WFC: No < in Attribute Values ]
4272: * handled in xmlParseAttValue()
4273: *
1.59 daniel 4274: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4275: * or XML_ATTRIBUTE_FIXED.
4276: */
4277:
4278: int
1.123 daniel 4279: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4280: int val;
1.123 daniel 4281: xmlChar *ret;
1.59 daniel 4282:
4283: *value = NULL;
1.152 daniel 4284: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4285: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4286: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4287: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4288: (NXT(8) == 'D')) {
4289: SKIP(9);
4290: return(XML_ATTRIBUTE_REQUIRED);
4291: }
1.152 daniel 4292: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4293: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4294: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4295: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4296: SKIP(8);
4297: return(XML_ATTRIBUTE_IMPLIED);
4298: }
4299: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4300: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4301: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4302: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4303: SKIP(6);
4304: val = XML_ATTRIBUTE_FIXED;
4305: if (!IS_BLANK(CUR)) {
4306: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4307: ctxt->sax->error(ctxt->userData,
4308: "Space required after '#FIXED'\n");
1.123 daniel 4309: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4310: ctxt->wellFormed = 0;
4311: }
4312: SKIP_BLANKS;
4313: }
4314: ret = xmlParseAttValue(ctxt);
1.96 daniel 4315: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4316: if (ret == NULL) {
4317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4318: ctxt->sax->error(ctxt->userData,
1.59 daniel 4319: "Attribute default value declaration error\n");
4320: ctxt->wellFormed = 0;
4321: } else
4322: *value = ret;
4323: return(val);
4324: }
4325:
4326: /**
1.66 daniel 4327: * xmlParseNotationType:
4328: * @ctxt: an XML parser context
4329: *
4330: * parse an Notation attribute type.
4331: *
1.99 daniel 4332: * Note: the leading 'NOTATION' S part has already being parsed...
4333: *
1.66 daniel 4334: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4335: *
1.99 daniel 4336: * [ VC: Notation Attributes ]
1.117 daniel 4337: * Values of this type must match one of the notation names included
1.99 daniel 4338: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4339: *
4340: * Returns: the notation attribute tree built while parsing
4341: */
4342:
4343: xmlEnumerationPtr
4344: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4345: xmlChar *name;
1.66 daniel 4346: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4347:
1.152 daniel 4348: if (RAW != '(') {
1.66 daniel 4349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4350: ctxt->sax->error(ctxt->userData,
4351: "'(' required to start 'NOTATION'\n");
1.123 daniel 4352: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4353: ctxt->wellFormed = 0;
4354: return(NULL);
4355: }
1.91 daniel 4356: SHRINK;
1.66 daniel 4357: do {
4358: NEXT;
4359: SKIP_BLANKS;
4360: name = xmlParseName(ctxt);
4361: if (name == NULL) {
4362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4363: ctxt->sax->error(ctxt->userData,
1.66 daniel 4364: "Name expected in NOTATION declaration\n");
1.123 daniel 4365: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4366: ctxt->wellFormed = 0;
4367: return(ret);
4368: }
4369: cur = xmlCreateEnumeration(name);
1.119 daniel 4370: xmlFree(name);
1.66 daniel 4371: if (cur == NULL) return(ret);
4372: if (last == NULL) ret = last = cur;
4373: else {
4374: last->next = cur;
4375: last = cur;
4376: }
4377: SKIP_BLANKS;
1.152 daniel 4378: } while (RAW == '|');
4379: if (RAW != ')') {
1.66 daniel 4380: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4381: ctxt->sax->error(ctxt->userData,
1.66 daniel 4382: "')' required to finish NOTATION declaration\n");
1.123 daniel 4383: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 4384: ctxt->wellFormed = 0;
4385: return(ret);
4386: }
4387: NEXT;
4388: return(ret);
4389: }
4390:
4391: /**
4392: * xmlParseEnumerationType:
4393: * @ctxt: an XML parser context
4394: *
4395: * parse an Enumeration attribute type.
4396: *
4397: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4398: *
1.99 daniel 4399: * [ VC: Enumeration ]
1.117 daniel 4400: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 4401: * the declaration
4402: *
1.66 daniel 4403: * Returns: the enumeration attribute tree built while parsing
4404: */
4405:
4406: xmlEnumerationPtr
4407: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4408: xmlChar *name;
1.66 daniel 4409: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4410:
1.152 daniel 4411: if (RAW != '(') {
1.66 daniel 4412: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4413: ctxt->sax->error(ctxt->userData,
1.66 daniel 4414: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 4415: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 4416: ctxt->wellFormed = 0;
4417: return(NULL);
4418: }
1.91 daniel 4419: SHRINK;
1.66 daniel 4420: do {
4421: NEXT;
4422: SKIP_BLANKS;
4423: name = xmlParseNmtoken(ctxt);
4424: if (name == NULL) {
4425: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4426: ctxt->sax->error(ctxt->userData,
1.66 daniel 4427: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 4428: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 4429: ctxt->wellFormed = 0;
4430: return(ret);
4431: }
4432: cur = xmlCreateEnumeration(name);
1.119 daniel 4433: xmlFree(name);
1.66 daniel 4434: if (cur == NULL) return(ret);
4435: if (last == NULL) ret = last = cur;
4436: else {
4437: last->next = cur;
4438: last = cur;
4439: }
4440: SKIP_BLANKS;
1.152 daniel 4441: } while (RAW == '|');
4442: if (RAW != ')') {
1.66 daniel 4443: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4444: ctxt->sax->error(ctxt->userData,
1.66 daniel 4445: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 4446: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 4447: ctxt->wellFormed = 0;
4448: return(ret);
4449: }
4450: NEXT;
4451: return(ret);
4452: }
4453:
4454: /**
1.50 daniel 4455: * xmlParseEnumeratedType:
4456: * @ctxt: an XML parser context
1.66 daniel 4457: * @tree: the enumeration tree built while parsing
1.50 daniel 4458: *
1.66 daniel 4459: * parse an Enumerated attribute type.
1.22 daniel 4460: *
4461: * [57] EnumeratedType ::= NotationType | Enumeration
4462: *
4463: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4464: *
1.50 daniel 4465: *
1.66 daniel 4466: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 4467: */
4468:
1.66 daniel 4469: int
4470: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 4471: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 4472: (NXT(2) == 'T') && (NXT(3) == 'A') &&
4473: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4474: (NXT(6) == 'O') && (NXT(7) == 'N')) {
4475: SKIP(8);
4476: if (!IS_BLANK(CUR)) {
4477: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4478: ctxt->sax->error(ctxt->userData,
4479: "Space required after 'NOTATION'\n");
1.123 daniel 4480: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 4481: ctxt->wellFormed = 0;
4482: return(0);
4483: }
4484: SKIP_BLANKS;
4485: *tree = xmlParseNotationType(ctxt);
4486: if (*tree == NULL) return(0);
4487: return(XML_ATTRIBUTE_NOTATION);
4488: }
4489: *tree = xmlParseEnumerationType(ctxt);
4490: if (*tree == NULL) return(0);
4491: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 4492: }
4493:
1.50 daniel 4494: /**
4495: * xmlParseAttributeType:
4496: * @ctxt: an XML parser context
1.66 daniel 4497: * @tree: the enumeration tree built while parsing
1.50 daniel 4498: *
1.59 daniel 4499: * parse the Attribute list def for an element
1.22 daniel 4500: *
4501: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4502: *
4503: * [55] StringType ::= 'CDATA'
4504: *
4505: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4506: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 4507: *
1.102 daniel 4508: * Validity constraints for attribute values syntax are checked in
4509: * xmlValidateAttributeValue()
4510: *
1.99 daniel 4511: * [ VC: ID ]
1.117 daniel 4512: * Values of type ID must match the Name production. A name must not
1.99 daniel 4513: * appear more than once in an XML document as a value of this type;
4514: * i.e., ID values must uniquely identify the elements which bear them.
4515: *
4516: * [ VC: One ID per Element Type ]
1.117 daniel 4517: * No element type may have more than one ID attribute specified.
1.99 daniel 4518: *
4519: * [ VC: ID Attribute Default ]
1.117 daniel 4520: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 4521: *
4522: * [ VC: IDREF ]
1.102 daniel 4523: * Values of type IDREF must match the Name production, and values
1.140 daniel 4524: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 4525: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 4526: * values must match the value of some ID attribute.
4527: *
4528: * [ VC: Entity Name ]
1.102 daniel 4529: * Values of type ENTITY must match the Name production, values
1.140 daniel 4530: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 4531: * name of an unparsed entity declared in the DTD.
1.99 daniel 4532: *
4533: * [ VC: Name Token ]
1.102 daniel 4534: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 4535: * of type NMTOKENS must match Nmtokens.
4536: *
1.69 daniel 4537: * Returns the attribute type
1.22 daniel 4538: */
1.59 daniel 4539: int
1.66 daniel 4540: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 4541: SHRINK;
1.152 daniel 4542: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 4543: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4544: (NXT(4) == 'A')) {
4545: SKIP(5);
1.66 daniel 4546: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 4547: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 4548: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 4549: (NXT(4) == 'F') && (NXT(5) == 'S')) {
4550: SKIP(6);
4551: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 4552: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 4553: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 4554: (NXT(4) == 'F')) {
4555: SKIP(5);
1.59 daniel 4556: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 4557: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 4558: SKIP(2);
4559: return(XML_ATTRIBUTE_ID);
1.152 daniel 4560: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4561: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4562: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4563: SKIP(6);
1.59 daniel 4564: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 4565: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 4566: (NXT(2) == 'T') && (NXT(3) == 'I') &&
4567: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4568: (NXT(6) == 'E') && (NXT(7) == 'S')) {
4569: SKIP(8);
1.59 daniel 4570: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 4571: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 4572: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4573: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 4574: (NXT(6) == 'N') && (NXT(7) == 'S')) {
4575: SKIP(8);
4576: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 4577: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 4578: (NXT(2) == 'T') && (NXT(3) == 'O') &&
4579: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 4580: (NXT(6) == 'N')) {
4581: SKIP(7);
1.59 daniel 4582: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 4583: }
1.66 daniel 4584: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 4585: }
4586:
1.50 daniel 4587: /**
4588: * xmlParseAttributeListDecl:
4589: * @ctxt: an XML parser context
4590: *
4591: * : parse the Attribute list def for an element
1.22 daniel 4592: *
4593: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4594: *
4595: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 4596: *
1.22 daniel 4597: */
1.55 daniel 4598: void
4599: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4600: xmlChar *elemName;
4601: xmlChar *attrName;
1.103 daniel 4602: xmlEnumerationPtr tree;
1.22 daniel 4603:
1.152 daniel 4604: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4605: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4606: (NXT(4) == 'T') && (NXT(5) == 'L') &&
4607: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 4608: (NXT(8) == 'T')) {
1.40 daniel 4609: SKIP(9);
1.59 daniel 4610: if (!IS_BLANK(CUR)) {
4611: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4612: ctxt->sax->error(ctxt->userData,
4613: "Space required after '<!ATTLIST'\n");
1.123 daniel 4614: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4615: ctxt->wellFormed = 0;
4616: }
1.42 daniel 4617: SKIP_BLANKS;
1.59 daniel 4618: elemName = xmlParseName(ctxt);
4619: if (elemName == NULL) {
1.55 daniel 4620: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4621: ctxt->sax->error(ctxt->userData,
4622: "ATTLIST: no name for Element\n");
1.123 daniel 4623: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4624: ctxt->wellFormed = 0;
1.22 daniel 4625: return;
4626: }
1.42 daniel 4627: SKIP_BLANKS;
1.152 daniel 4628: while (RAW != '>') {
1.123 daniel 4629: const xmlChar *check = CUR_PTR;
1.59 daniel 4630: int type;
4631: int def;
1.123 daniel 4632: xmlChar *defaultValue = NULL;
1.59 daniel 4633:
1.103 daniel 4634: tree = NULL;
1.59 daniel 4635: attrName = xmlParseName(ctxt);
4636: if (attrName == NULL) {
4637: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4638: ctxt->sax->error(ctxt->userData,
4639: "ATTLIST: no name for Attribute\n");
1.123 daniel 4640: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4641: ctxt->wellFormed = 0;
4642: break;
4643: }
1.97 daniel 4644: GROW;
1.59 daniel 4645: if (!IS_BLANK(CUR)) {
4646: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4647: ctxt->sax->error(ctxt->userData,
1.59 daniel 4648: "Space required after the attribute name\n");
1.123 daniel 4649: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4650: ctxt->wellFormed = 0;
4651: break;
4652: }
4653: SKIP_BLANKS;
4654:
1.66 daniel 4655: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 4656: if (type <= 0) break;
1.22 daniel 4657:
1.97 daniel 4658: GROW;
1.59 daniel 4659: if (!IS_BLANK(CUR)) {
4660: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4661: ctxt->sax->error(ctxt->userData,
1.59 daniel 4662: "Space required after the attribute type\n");
1.123 daniel 4663: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4664: ctxt->wellFormed = 0;
4665: break;
4666: }
1.42 daniel 4667: SKIP_BLANKS;
1.59 daniel 4668:
4669: def = xmlParseDefaultDecl(ctxt, &defaultValue);
4670: if (def <= 0) break;
4671:
1.97 daniel 4672: GROW;
1.152 daniel 4673: if (RAW != '>') {
1.59 daniel 4674: if (!IS_BLANK(CUR)) {
4675: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4676: ctxt->sax->error(ctxt->userData,
1.59 daniel 4677: "Space required after the attribute default value\n");
1.123 daniel 4678: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4679: ctxt->wellFormed = 0;
4680: break;
4681: }
4682: SKIP_BLANKS;
4683: }
1.40 daniel 4684: if (check == CUR_PTR) {
1.55 daniel 4685: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4686: ctxt->sax->error(ctxt->userData,
1.59 daniel 4687: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 4688: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 4689: break;
4690: }
1.72 daniel 4691: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 4692: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 4693: type, def, defaultValue, tree);
1.59 daniel 4694: if (attrName != NULL)
1.119 daniel 4695: xmlFree(attrName);
1.59 daniel 4696: if (defaultValue != NULL)
1.119 daniel 4697: xmlFree(defaultValue);
1.97 daniel 4698: GROW;
1.22 daniel 4699: }
1.152 daniel 4700: if (RAW == '>')
1.40 daniel 4701: NEXT;
1.22 daniel 4702:
1.119 daniel 4703: xmlFree(elemName);
1.22 daniel 4704: }
4705: }
4706:
1.50 daniel 4707: /**
1.61 daniel 4708: * xmlParseElementMixedContentDecl:
4709: * @ctxt: an XML parser context
4710: *
4711: * parse the declaration for a Mixed Element content
4712: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4713: *
4714: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4715: * '(' S? '#PCDATA' S? ')'
4716: *
1.99 daniel 4717: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4718: *
4719: * [ VC: No Duplicate Types ]
1.117 daniel 4720: * The same name must not appear more than once in a single
4721: * mixed-content declaration.
1.99 daniel 4722: *
1.61 daniel 4723: * returns: the list of the xmlElementContentPtr describing the element choices
4724: */
4725: xmlElementContentPtr
1.62 daniel 4726: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 4727: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 4728: xmlChar *elem = NULL;
1.61 daniel 4729:
1.97 daniel 4730: GROW;
1.152 daniel 4731: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 4732: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4733: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4734: (NXT(6) == 'A')) {
4735: SKIP(7);
4736: SKIP_BLANKS;
1.91 daniel 4737: SHRINK;
1.152 daniel 4738: if (RAW == ')') {
1.63 daniel 4739: NEXT;
4740: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 4741: if (RAW == '*') {
1.136 daniel 4742: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4743: NEXT;
4744: }
1.63 daniel 4745: return(ret);
4746: }
1.152 daniel 4747: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 4748: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4749: if (ret == NULL) return(NULL);
1.99 daniel 4750: }
1.152 daniel 4751: while (RAW == '|') {
1.64 daniel 4752: NEXT;
1.61 daniel 4753: if (elem == NULL) {
4754: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4755: if (ret == NULL) return(NULL);
4756: ret->c1 = cur;
1.64 daniel 4757: cur = ret;
1.61 daniel 4758: } else {
1.64 daniel 4759: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4760: if (n == NULL) return(NULL);
4761: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4762: cur->c2 = n;
4763: cur = n;
1.119 daniel 4764: xmlFree(elem);
1.61 daniel 4765: }
4766: SKIP_BLANKS;
4767: elem = xmlParseName(ctxt);
4768: if (elem == NULL) {
4769: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4770: ctxt->sax->error(ctxt->userData,
1.61 daniel 4771: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 4772: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 4773: ctxt->wellFormed = 0;
4774: xmlFreeElementContent(cur);
4775: return(NULL);
4776: }
4777: SKIP_BLANKS;
1.97 daniel 4778: GROW;
1.61 daniel 4779: }
1.152 daniel 4780: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 4781: if (elem != NULL) {
1.61 daniel 4782: cur->c2 = xmlNewElementContent(elem,
4783: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4784: xmlFree(elem);
1.66 daniel 4785: }
1.65 daniel 4786: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 4787: SKIP(2);
1.61 daniel 4788: } else {
1.119 daniel 4789: if (elem != NULL) xmlFree(elem);
1.61 daniel 4790: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4791: ctxt->sax->error(ctxt->userData,
1.63 daniel 4792: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 4793: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 4794: ctxt->wellFormed = 0;
4795: xmlFreeElementContent(ret);
4796: return(NULL);
4797: }
4798:
4799: } else {
4800: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4801: ctxt->sax->error(ctxt->userData,
1.61 daniel 4802: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 4803: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 4804: ctxt->wellFormed = 0;
4805: }
4806: return(ret);
4807: }
4808:
4809: /**
4810: * xmlParseElementChildrenContentDecl:
1.50 daniel 4811: * @ctxt: an XML parser context
4812: *
1.61 daniel 4813: * parse the declaration for a Mixed Element content
4814: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 4815: *
1.61 daniel 4816: *
1.22 daniel 4817: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4818: *
4819: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4820: *
4821: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4822: *
4823: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4824: *
1.99 daniel 4825: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4826: * TODO Parameter-entity replacement text must be properly nested
4827: * with parenthetized groups. That is to say, if either of the
4828: * opening or closing parentheses in a choice, seq, or Mixed
4829: * construct is contained in the replacement text for a parameter
4830: * entity, both must be contained in the same replacement text. For
4831: * interoperability, if a parameter-entity reference appears in a
4832: * choice, seq, or Mixed construct, its replacement text should not
4833: * be empty, and neither the first nor last non-blank character of
4834: * the replacement text should be a connector (| or ,).
4835: *
1.62 daniel 4836: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 4837: * hierarchy.
4838: */
4839: xmlElementContentPtr
1.62 daniel 4840: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 4841: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 4842: xmlChar *elem;
4843: xmlChar type = 0;
1.62 daniel 4844:
4845: SKIP_BLANKS;
1.94 daniel 4846: GROW;
1.152 daniel 4847: if (RAW == '(') {
1.63 daniel 4848: /* Recurse on first child */
1.62 daniel 4849: NEXT;
4850: SKIP_BLANKS;
4851: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4852: SKIP_BLANKS;
1.101 daniel 4853: GROW;
1.62 daniel 4854: } else {
4855: elem = xmlParseName(ctxt);
4856: if (elem == NULL) {
4857: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4858: ctxt->sax->error(ctxt->userData,
1.62 daniel 4859: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 4860: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 4861: ctxt->wellFormed = 0;
4862: return(NULL);
4863: }
4864: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 4865: GROW;
1.152 daniel 4866: if (RAW == '?') {
1.104 daniel 4867: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 4868: NEXT;
1.152 daniel 4869: } else if (RAW == '*') {
1.104 daniel 4870: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 4871: NEXT;
1.152 daniel 4872: } else if (RAW == '+') {
1.104 daniel 4873: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 4874: NEXT;
4875: } else {
1.104 daniel 4876: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 4877: }
1.119 daniel 4878: xmlFree(elem);
1.101 daniel 4879: GROW;
1.62 daniel 4880: }
4881: SKIP_BLANKS;
1.91 daniel 4882: SHRINK;
1.152 daniel 4883: while (RAW != ')') {
1.63 daniel 4884: /*
4885: * Each loop we parse one separator and one element.
4886: */
1.152 daniel 4887: if (RAW == ',') {
1.62 daniel 4888: if (type == 0) type = CUR;
4889:
4890: /*
4891: * Detect "Name | Name , Name" error
4892: */
4893: else if (type != CUR) {
4894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4895: ctxt->sax->error(ctxt->userData,
1.62 daniel 4896: "xmlParseElementChildrenContentDecl : '%c' expected\n",
4897: type);
1.123 daniel 4898: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 4899: ctxt->wellFormed = 0;
4900: xmlFreeElementContent(ret);
4901: return(NULL);
4902: }
1.64 daniel 4903: NEXT;
1.62 daniel 4904:
1.63 daniel 4905: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4906: if (op == NULL) {
4907: xmlFreeElementContent(ret);
4908: return(NULL);
4909: }
4910: if (last == NULL) {
4911: op->c1 = ret;
1.65 daniel 4912: ret = cur = op;
1.63 daniel 4913: } else {
4914: cur->c2 = op;
4915: op->c1 = last;
4916: cur =op;
1.65 daniel 4917: last = NULL;
1.63 daniel 4918: }
1.152 daniel 4919: } else if (RAW == '|') {
1.62 daniel 4920: if (type == 0) type = CUR;
4921:
4922: /*
1.63 daniel 4923: * Detect "Name , Name | Name" error
1.62 daniel 4924: */
4925: else if (type != CUR) {
4926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4927: ctxt->sax->error(ctxt->userData,
1.62 daniel 4928: "xmlParseElementChildrenContentDecl : '%c' expected\n",
4929: type);
1.123 daniel 4930: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 4931: ctxt->wellFormed = 0;
4932: xmlFreeElementContent(ret);
4933: return(NULL);
4934: }
1.64 daniel 4935: NEXT;
1.62 daniel 4936:
1.63 daniel 4937: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4938: if (op == NULL) {
4939: xmlFreeElementContent(ret);
4940: return(NULL);
4941: }
4942: if (last == NULL) {
4943: op->c1 = ret;
1.65 daniel 4944: ret = cur = op;
1.63 daniel 4945: } else {
4946: cur->c2 = op;
4947: op->c1 = last;
4948: cur =op;
1.65 daniel 4949: last = NULL;
1.63 daniel 4950: }
1.62 daniel 4951: } else {
4952: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4953: ctxt->sax->error(ctxt->userData,
1.62 daniel 4954: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4955: ctxt->wellFormed = 0;
1.123 daniel 4956: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 4957: xmlFreeElementContent(ret);
4958: return(NULL);
4959: }
1.101 daniel 4960: GROW;
1.62 daniel 4961: SKIP_BLANKS;
1.101 daniel 4962: GROW;
1.152 daniel 4963: if (RAW == '(') {
1.63 daniel 4964: /* Recurse on second child */
1.62 daniel 4965: NEXT;
4966: SKIP_BLANKS;
1.65 daniel 4967: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 4968: SKIP_BLANKS;
4969: } else {
4970: elem = xmlParseName(ctxt);
4971: if (elem == NULL) {
4972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4973: ctxt->sax->error(ctxt->userData,
1.122 daniel 4974: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 4975: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 4976: ctxt->wellFormed = 0;
4977: return(NULL);
4978: }
1.65 daniel 4979: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4980: xmlFree(elem);
1.152 daniel 4981: if (RAW == '?') {
1.105 daniel 4982: last->ocur = XML_ELEMENT_CONTENT_OPT;
4983: NEXT;
1.152 daniel 4984: } else if (RAW == '*') {
1.105 daniel 4985: last->ocur = XML_ELEMENT_CONTENT_MULT;
4986: NEXT;
1.152 daniel 4987: } else if (RAW == '+') {
1.105 daniel 4988: last->ocur = XML_ELEMENT_CONTENT_PLUS;
4989: NEXT;
4990: } else {
4991: last->ocur = XML_ELEMENT_CONTENT_ONCE;
4992: }
1.63 daniel 4993: }
4994: SKIP_BLANKS;
1.97 daniel 4995: GROW;
1.64 daniel 4996: }
1.65 daniel 4997: if ((cur != NULL) && (last != NULL)) {
4998: cur->c2 = last;
1.62 daniel 4999: }
5000: NEXT;
1.152 daniel 5001: if (RAW == '?') {
1.62 daniel 5002: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5003: NEXT;
1.152 daniel 5004: } else if (RAW == '*') {
1.62 daniel 5005: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5006: NEXT;
1.152 daniel 5007: } else if (RAW == '+') {
1.62 daniel 5008: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5009: NEXT;
5010: }
5011: return(ret);
1.61 daniel 5012: }
5013:
5014: /**
5015: * xmlParseElementContentDecl:
5016: * @ctxt: an XML parser context
5017: * @name: the name of the element being defined.
5018: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5019: *
1.61 daniel 5020: * parse the declaration for an Element content either Mixed or Children,
5021: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5022: *
5023: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5024: *
1.61 daniel 5025: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5026: */
5027:
1.61 daniel 5028: int
1.123 daniel 5029: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5030: xmlElementContentPtr *result) {
5031:
5032: xmlElementContentPtr tree = NULL;
5033: int res;
5034:
5035: *result = NULL;
5036:
1.152 daniel 5037: if (RAW != '(') {
1.61 daniel 5038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5039: ctxt->sax->error(ctxt->userData,
1.61 daniel 5040: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5041: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5042: ctxt->wellFormed = 0;
5043: return(-1);
5044: }
5045: NEXT;
1.97 daniel 5046: GROW;
1.61 daniel 5047: SKIP_BLANKS;
1.152 daniel 5048: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5049: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5050: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5051: (NXT(6) == 'A')) {
1.62 daniel 5052: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5053: res = XML_ELEMENT_TYPE_MIXED;
5054: } else {
1.62 daniel 5055: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5056: res = XML_ELEMENT_TYPE_ELEMENT;
5057: }
5058: SKIP_BLANKS;
1.63 daniel 5059: /****************************
1.152 daniel 5060: if (RAW != ')') {
1.61 daniel 5061: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5062: ctxt->sax->error(ctxt->userData,
1.61 daniel 5063: "xmlParseElementContentDecl : ')' expected\n");
5064: ctxt->wellFormed = 0;
5065: return(-1);
5066: }
1.63 daniel 5067: ****************************/
5068: *result = tree;
1.61 daniel 5069: return(res);
1.22 daniel 5070: }
5071:
1.50 daniel 5072: /**
5073: * xmlParseElementDecl:
5074: * @ctxt: an XML parser context
5075: *
5076: * parse an Element declaration.
1.22 daniel 5077: *
5078: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5079: *
1.99 daniel 5080: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5081: * No element type may be declared more than once
1.69 daniel 5082: *
5083: * Returns the type of the element, or -1 in case of error
1.22 daniel 5084: */
1.59 daniel 5085: int
1.55 daniel 5086: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5087: xmlChar *name;
1.59 daniel 5088: int ret = -1;
1.61 daniel 5089: xmlElementContentPtr content = NULL;
1.22 daniel 5090:
1.97 daniel 5091: GROW;
1.152 daniel 5092: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5093: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5094: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5095: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5096: (NXT(8) == 'T')) {
1.40 daniel 5097: SKIP(9);
1.59 daniel 5098: if (!IS_BLANK(CUR)) {
5099: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5100: ctxt->sax->error(ctxt->userData,
1.59 daniel 5101: "Space required after 'ELEMENT'\n");
1.123 daniel 5102: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5103: ctxt->wellFormed = 0;
5104: }
1.42 daniel 5105: SKIP_BLANKS;
1.22 daniel 5106: name = xmlParseName(ctxt);
5107: if (name == NULL) {
1.55 daniel 5108: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5109: ctxt->sax->error(ctxt->userData,
1.59 daniel 5110: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5111: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5112: ctxt->wellFormed = 0;
5113: return(-1);
5114: }
5115: if (!IS_BLANK(CUR)) {
5116: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5117: ctxt->sax->error(ctxt->userData,
1.59 daniel 5118: "Space required after the element name\n");
1.123 daniel 5119: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5120: ctxt->wellFormed = 0;
1.22 daniel 5121: }
1.42 daniel 5122: SKIP_BLANKS;
1.152 daniel 5123: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5124: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5125: (NXT(4) == 'Y')) {
5126: SKIP(5);
1.22 daniel 5127: /*
5128: * Element must always be empty.
5129: */
1.59 daniel 5130: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5131: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5132: (NXT(2) == 'Y')) {
5133: SKIP(3);
1.22 daniel 5134: /*
5135: * Element is a generic container.
5136: */
1.59 daniel 5137: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5138: } else if (RAW == '(') {
1.61 daniel 5139: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5140: } else {
1.98 daniel 5141: /*
5142: * [ WFC: PEs in Internal Subset ] error handling.
5143: */
1.152 daniel 5144: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5145: (ctxt->inputNr == 1)) {
5146: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5147: ctxt->sax->error(ctxt->userData,
5148: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5149: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5150: } else {
5151: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5152: ctxt->sax->error(ctxt->userData,
5153: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5154: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5155: }
1.61 daniel 5156: ctxt->wellFormed = 0;
1.119 daniel 5157: if (name != NULL) xmlFree(name);
1.61 daniel 5158: return(-1);
1.22 daniel 5159: }
1.142 daniel 5160:
5161: SKIP_BLANKS;
5162: /*
5163: * Pop-up of finished entities.
5164: */
1.152 daniel 5165: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5166: xmlPopInput(ctxt);
1.42 daniel 5167: SKIP_BLANKS;
1.142 daniel 5168:
1.152 daniel 5169: if (RAW != '>') {
1.55 daniel 5170: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5171: ctxt->sax->error(ctxt->userData,
1.31 daniel 5172: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5173: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5174: ctxt->wellFormed = 0;
1.61 daniel 5175: } else {
1.40 daniel 5176: NEXT;
1.72 daniel 5177: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 5178: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5179: content);
1.61 daniel 5180: }
1.84 daniel 5181: if (content != NULL) {
5182: xmlFreeElementContent(content);
5183: }
1.61 daniel 5184: if (name != NULL) {
1.119 daniel 5185: xmlFree(name);
1.61 daniel 5186: }
1.22 daniel 5187: }
1.59 daniel 5188: return(ret);
1.22 daniel 5189: }
5190:
1.50 daniel 5191: /**
5192: * xmlParseMarkupDecl:
5193: * @ctxt: an XML parser context
5194: *
5195: * parse Markup declarations
1.22 daniel 5196: *
5197: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5198: * NotationDecl | PI | Comment
5199: *
1.98 daniel 5200: * [ VC: Proper Declaration/PE Nesting ]
5201: * TODO Parameter-entity replacement text must be properly nested with
5202: * markup declarations. That is to say, if either the first character
5203: * or the last character of a markup declaration (markupdecl above) is
5204: * contained in the replacement text for a parameter-entity reference,
5205: * both must be contained in the same replacement text.
5206: *
5207: * [ WFC: PEs in Internal Subset ]
5208: * In the internal DTD subset, parameter-entity references can occur
5209: * only where markup declarations can occur, not within markup declarations.
5210: * (This does not apply to references that occur in external parameter
5211: * entities or to the external subset.)
1.22 daniel 5212: */
1.55 daniel 5213: void
5214: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5215: GROW;
1.22 daniel 5216: xmlParseElementDecl(ctxt);
5217: xmlParseAttributeListDecl(ctxt);
5218: xmlParseEntityDecl(ctxt);
5219: xmlParseNotationDecl(ctxt);
5220: xmlParsePI(ctxt);
1.114 daniel 5221: xmlParseComment(ctxt);
1.98 daniel 5222: /*
5223: * This is only for internal subset. On external entities,
5224: * the replacement is done before parsing stage
5225: */
5226: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5227: xmlParsePEReference(ctxt);
1.97 daniel 5228: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5229: }
5230:
1.50 daniel 5231: /**
1.76 daniel 5232: * xmlParseTextDecl:
5233: * @ctxt: an XML parser context
5234: *
5235: * parse an XML declaration header for external entities
5236: *
5237: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5238: *
5239: * Returns the only valuable info for an external parsed entity, the encoding
5240: */
5241:
1.123 daniel 5242: xmlChar *
1.76 daniel 5243: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5244: xmlChar *version;
5245: xmlChar *encoding = NULL;
1.76 daniel 5246:
5247: /*
5248: * We know that '<?xml' is here.
5249: */
5250: SKIP(5);
5251:
5252: if (!IS_BLANK(CUR)) {
5253: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5254: ctxt->sax->error(ctxt->userData,
5255: "Space needed after '<?xml'\n");
1.123 daniel 5256: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5257: ctxt->wellFormed = 0;
5258: }
5259: SKIP_BLANKS;
5260:
5261: /*
5262: * We may have the VersionInfo here.
5263: */
5264: version = xmlParseVersionInfo(ctxt);
5265: if (version == NULL)
5266: version = xmlCharStrdup(XML_DEFAULT_VERSION);
5267: ctxt->version = xmlStrdup(version);
1.119 daniel 5268: xmlFree(version);
1.76 daniel 5269:
5270: /*
5271: * We must have the encoding declaration
5272: */
5273: if (!IS_BLANK(CUR)) {
5274: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5275: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 5276: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5277: ctxt->wellFormed = 0;
5278: }
5279: encoding = xmlParseEncodingDecl(ctxt);
5280:
5281: SKIP_BLANKS;
1.152 daniel 5282: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 5283: SKIP(2);
1.152 daniel 5284: } else if (RAW == '>') {
1.76 daniel 5285: /* Deprecated old WD ... */
5286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5287: ctxt->sax->error(ctxt->userData,
5288: "XML declaration must end-up with '?>'\n");
1.123 daniel 5289: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5290: ctxt->wellFormed = 0;
5291: NEXT;
5292: } else {
5293: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5294: ctxt->sax->error(ctxt->userData,
5295: "parsing XML declaration: '?>' expected\n");
1.123 daniel 5296: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5297: ctxt->wellFormed = 0;
5298: MOVETO_ENDTAG(CUR_PTR);
5299: NEXT;
5300: }
5301: return(encoding);
5302: }
5303:
5304: /*
5305: * xmlParseConditionalSections
5306: * @ctxt: an XML parser context
5307: *
5308: * TODO : Conditionnal section are not yet supported !
5309: *
5310: * [61] conditionalSect ::= includeSect | ignoreSect
5311: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5312: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5313: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5314: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5315: */
5316:
5317: void
5318: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5319: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5320: ctxt->sax->warning(ctxt->userData,
5321: "XML conditional section not supported\n");
5322: /*
5323: * Skip up to the end of the conditionnal section.
5324: */
1.152 daniel 5325: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || (NXT(2) != '>'))) {
1.76 daniel 5326: NEXT;
1.143 daniel 5327: /*
5328: * Pop-up of finished entities.
5329: */
1.152 daniel 5330: while ((RAW == 0) && (ctxt->inputNr > 1))
1.143 daniel 5331: xmlPopInput(ctxt);
5332:
1.152 daniel 5333: if (RAW == 0)
1.143 daniel 5334: GROW;
5335: }
5336:
1.152 daniel 5337: if (RAW == 0)
1.143 daniel 5338: SHRINK;
5339:
1.152 daniel 5340: if (RAW == 0) {
1.76 daniel 5341: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5342: ctxt->sax->error(ctxt->userData,
5343: "XML conditional section not closed\n");
1.123 daniel 5344: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 5345: ctxt->wellFormed = 0;
1.143 daniel 5346: } else {
5347: SKIP(3);
1.76 daniel 5348: }
5349: }
5350:
5351: /**
1.124 daniel 5352: * xmlParseExternalSubset:
1.76 daniel 5353: * @ctxt: an XML parser context
1.124 daniel 5354: * @ExternalID: the external identifier
5355: * @SystemID: the system identifier (or URL)
1.76 daniel 5356: *
5357: * parse Markup declarations from an external subset
5358: *
5359: * [30] extSubset ::= textDecl? extSubsetDecl
5360: *
5361: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5362: */
5363: void
1.123 daniel 5364: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5365: const xmlChar *SystemID) {
1.132 daniel 5366: GROW;
1.152 daniel 5367: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 5368: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5369: (NXT(4) == 'l')) {
1.134 daniel 5370: xmlChar *decl;
5371:
5372: decl = xmlParseTextDecl(ctxt);
5373: if (decl != NULL)
5374: xmlFree(decl);
1.76 daniel 5375: }
1.79 daniel 5376: if (ctxt->myDoc == NULL) {
1.116 daniel 5377: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 5378: }
5379: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5380: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5381:
1.96 daniel 5382: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 5383: ctxt->external = 1;
1.152 daniel 5384: while (((RAW == '<') && (NXT(1) == '?')) ||
5385: ((RAW == '<') && (NXT(1) == '!')) ||
1.153 daniel 5386: IS_BLANK(RAW)) {
1.123 daniel 5387: const xmlChar *check = CUR_PTR;
1.115 daniel 5388: int cons = ctxt->input->consumed;
5389:
1.152 daniel 5390: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 5391: xmlParseConditionalSections(ctxt);
5392: } else if (IS_BLANK(CUR)) {
5393: NEXT;
1.152 daniel 5394: } else if (RAW == '%') {
1.76 daniel 5395: xmlParsePEReference(ctxt);
5396: } else
5397: xmlParseMarkupDecl(ctxt);
1.77 daniel 5398:
5399: /*
5400: * Pop-up of finished entities.
5401: */
1.152 daniel 5402: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 5403: xmlPopInput(ctxt);
5404:
1.115 daniel 5405: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5406: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5407: ctxt->sax->error(ctxt->userData,
5408: "Content error in the external subset\n");
5409: ctxt->wellFormed = 0;
1.123 daniel 5410: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 5411: break;
5412: }
1.76 daniel 5413: }
5414:
1.152 daniel 5415: if (RAW != 0) {
1.76 daniel 5416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5417: ctxt->sax->error(ctxt->userData,
5418: "Extra content at the end of the document\n");
1.123 daniel 5419: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 5420: ctxt->wellFormed = 0;
5421: }
5422:
5423: }
5424:
5425: /**
1.77 daniel 5426: * xmlParseReference:
5427: * @ctxt: an XML parser context
5428: *
5429: * parse and handle entity references in content, depending on the SAX
5430: * interface, this may end-up in a call to character() if this is a
1.79 daniel 5431: * CharRef, a predefined entity, if there is no reference() callback.
5432: * or if the parser was asked to switch to that mode.
1.77 daniel 5433: *
5434: * [67] Reference ::= EntityRef | CharRef
5435: */
5436: void
5437: xmlParseReference(xmlParserCtxtPtr ctxt) {
5438: xmlEntityPtr ent;
1.123 daniel 5439: xmlChar *val;
1.152 daniel 5440: if (RAW != '&') return;
1.77 daniel 5441:
1.113 daniel 5442: if (ctxt->inputNr > 1) {
1.123 daniel 5443: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 5444:
5445: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5446: ctxt->sax->characters(ctxt->userData, cur, 1);
5447: if (ctxt->token == '&')
5448: ctxt->token = 0;
5449: else {
5450: SKIP(1);
5451: }
5452: return;
5453: }
1.77 daniel 5454: if (NXT(1) == '#') {
1.152 daniel 5455: int i = 0;
1.153 daniel 5456: xmlChar out[10];
5457: int hex = NXT(2);
1.77 daniel 5458: int val = xmlParseCharRef(ctxt);
1.152 daniel 5459:
1.153 daniel 5460: if (ctxt->encoding != NULL) {
5461: /*
5462: * So we are using non-UTF-8 buffers
5463: * Check that the char fit on 8bits, if not
5464: * generate a CharRef.
5465: */
5466: if (val <= 0xFF) {
5467: out[0] = val;
5468: out[1] = 0;
5469: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5470: ctxt->sax->characters(ctxt->userData, out, 1);
5471: } else {
5472: if ((hex == 'x') || (hex == 'X'))
5473: sprintf((char *)out, "#x%X", val);
5474: else
5475: sprintf((char *)out, "#%d", val);
5476: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL))
5477: ctxt->sax->reference(ctxt->userData, out);
5478: }
5479: } else {
5480: /*
5481: * Just encode the value in UTF-8
5482: */
5483: COPY_BUF(0 ,out, i, val);
5484: out[i] = 0;
5485: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5486: ctxt->sax->characters(ctxt->userData, out, i);
5487: }
1.77 daniel 5488: } else {
5489: ent = xmlParseEntityRef(ctxt);
5490: if (ent == NULL) return;
5491: if ((ent->name != NULL) &&
1.159 daniel 5492: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.113 daniel 5493: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5494: (ctxt->replaceEntities == 0)) {
5495: /*
5496: * Create a node.
5497: */
5498: ctxt->sax->reference(ctxt->userData, ent->name);
5499: return;
5500: } else if (ctxt->replaceEntities) {
5501: xmlParserInputPtr input;
1.79 daniel 5502:
1.113 daniel 5503: input = xmlNewEntityInputStream(ctxt, ent);
5504: xmlPushInput(ctxt, input);
5505: return;
5506: }
1.77 daniel 5507: }
5508: val = ent->content;
5509: if (val == NULL) return;
5510: /*
5511: * inline the entity.
5512: */
5513: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5514: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5515: }
1.24 daniel 5516: }
5517:
1.50 daniel 5518: /**
5519: * xmlParseEntityRef:
5520: * @ctxt: an XML parser context
5521: *
5522: * parse ENTITY references declarations
1.24 daniel 5523: *
5524: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 5525: *
1.98 daniel 5526: * [ WFC: Entity Declared ]
5527: * In a document without any DTD, a document with only an internal DTD
5528: * subset which contains no parameter entity references, or a document
5529: * with "standalone='yes'", the Name given in the entity reference
5530: * must match that in an entity declaration, except that well-formed
5531: * documents need not declare any of the following entities: amp, lt,
5532: * gt, apos, quot. The declaration of a parameter entity must precede
5533: * any reference to it. Similarly, the declaration of a general entity
5534: * must precede any reference to it which appears in a default value in an
5535: * attribute-list declaration. Note that if entities are declared in the
5536: * external subset or in external parameter entities, a non-validating
5537: * processor is not obligated to read and process their declarations;
5538: * for such documents, the rule that an entity must be declared is a
5539: * well-formedness constraint only if standalone='yes'.
5540: *
5541: * [ WFC: Parsed Entity ]
5542: * An entity reference must not contain the name of an unparsed entity
5543: *
1.77 daniel 5544: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 5545: */
1.77 daniel 5546: xmlEntityPtr
1.55 daniel 5547: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 5548: xmlChar *name;
1.72 daniel 5549: xmlEntityPtr ent = NULL;
1.24 daniel 5550:
1.91 daniel 5551: GROW;
1.111 daniel 5552:
1.152 daniel 5553: if (RAW == '&') {
1.40 daniel 5554: NEXT;
1.24 daniel 5555: name = xmlParseName(ctxt);
5556: if (name == NULL) {
1.55 daniel 5557: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5558: ctxt->sax->error(ctxt->userData,
5559: "xmlParseEntityRef: no name\n");
1.123 daniel 5560: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5561: ctxt->wellFormed = 0;
1.24 daniel 5562: } else {
1.152 daniel 5563: if (RAW == ';') {
1.40 daniel 5564: NEXT;
1.24 daniel 5565: /*
1.77 daniel 5566: * Ask first SAX for entity resolution, otherwise try the
5567: * predefined set.
5568: */
5569: if (ctxt->sax != NULL) {
5570: if (ctxt->sax->getEntity != NULL)
5571: ent = ctxt->sax->getEntity(ctxt->userData, name);
5572: if (ent == NULL)
5573: ent = xmlGetPredefinedEntity(name);
5574: }
5575: /*
1.98 daniel 5576: * [ WFC: Entity Declared ]
5577: * In a document without any DTD, a document with only an
5578: * internal DTD subset which contains no parameter entity
5579: * references, or a document with "standalone='yes'", the
5580: * Name given in the entity reference must match that in an
5581: * entity declaration, except that well-formed documents
5582: * need not declare any of the following entities: amp, lt,
5583: * gt, apos, quot.
5584: * The declaration of a parameter entity must precede any
5585: * reference to it.
5586: * Similarly, the declaration of a general entity must
5587: * precede any reference to it which appears in a default
5588: * value in an attribute-list declaration. Note that if
5589: * entities are declared in the external subset or in
5590: * external parameter entities, a non-validating processor
5591: * is not obligated to read and process their declarations;
5592: * for such documents, the rule that an entity must be
5593: * declared is a well-formedness constraint only if
5594: * standalone='yes'.
1.59 daniel 5595: */
1.77 daniel 5596: if (ent == NULL) {
1.98 daniel 5597: if ((ctxt->standalone == 1) ||
5598: ((ctxt->hasExternalSubset == 0) &&
5599: (ctxt->hasPErefs == 0))) {
5600: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 5601: ctxt->sax->error(ctxt->userData,
5602: "Entity '%s' not defined\n", name);
1.123 daniel 5603: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 5604: ctxt->wellFormed = 0;
5605: } else {
1.98 daniel 5606: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5607: ctxt->sax->warning(ctxt->userData,
5608: "Entity '%s' not defined\n", name);
1.123 daniel 5609: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 5610: }
1.77 daniel 5611: }
1.59 daniel 5612:
5613: /*
1.98 daniel 5614: * [ WFC: Parsed Entity ]
5615: * An entity reference must not contain the name of an
5616: * unparsed entity
5617: */
1.159 daniel 5618: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 5619: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5620: ctxt->sax->error(ctxt->userData,
5621: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 5622: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 5623: ctxt->wellFormed = 0;
5624: }
5625:
5626: /*
5627: * [ WFC: No External Entity References ]
5628: * Attribute values cannot contain direct or indirect
5629: * entity references to external entities.
5630: */
5631: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5632: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 5633: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5634: ctxt->sax->error(ctxt->userData,
5635: "Attribute references external entity '%s'\n", name);
1.123 daniel 5636: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 5637: ctxt->wellFormed = 0;
5638: }
5639: /*
5640: * [ WFC: No < in Attribute Values ]
5641: * The replacement text of any entity referred to directly or
5642: * indirectly in an attribute value (other than "<") must
5643: * not contain a <.
1.59 daniel 5644: */
1.98 daniel 5645: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 5646: (ent != NULL) &&
5647: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 5648: (ent->content != NULL) &&
5649: (xmlStrchr(ent->content, '<'))) {
5650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5651: ctxt->sax->error(ctxt->userData,
5652: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 5653: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 5654: ctxt->wellFormed = 0;
5655: }
5656:
5657: /*
5658: * Internal check, no parameter entities here ...
5659: */
5660: else {
1.159 daniel 5661: switch (ent->etype) {
1.59 daniel 5662: case XML_INTERNAL_PARAMETER_ENTITY:
5663: case XML_EXTERNAL_PARAMETER_ENTITY:
5664: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5665: ctxt->sax->error(ctxt->userData,
1.59 daniel 5666: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 5667: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 5668: ctxt->wellFormed = 0;
5669: break;
5670: }
5671: }
5672:
5673: /*
1.98 daniel 5674: * [ WFC: No Recursion ]
1.117 daniel 5675: * TODO A parsed entity must not contain a recursive reference
5676: * to itself, either directly or indirectly.
1.59 daniel 5677: */
1.77 daniel 5678:
1.24 daniel 5679: } else {
1.55 daniel 5680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5681: ctxt->sax->error(ctxt->userData,
1.59 daniel 5682: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 5683: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 5684: ctxt->wellFormed = 0;
1.24 daniel 5685: }
1.119 daniel 5686: xmlFree(name);
1.24 daniel 5687: }
5688: }
1.77 daniel 5689: return(ent);
1.24 daniel 5690: }
1.135 daniel 5691: /**
5692: * xmlParseStringEntityRef:
5693: * @ctxt: an XML parser context
5694: * @str: a pointer to an index in the string
5695: *
5696: * parse ENTITY references declarations, but this version parses it from
5697: * a string value.
5698: *
5699: * [68] EntityRef ::= '&' Name ';'
5700: *
5701: * [ WFC: Entity Declared ]
5702: * In a document without any DTD, a document with only an internal DTD
5703: * subset which contains no parameter entity references, or a document
5704: * with "standalone='yes'", the Name given in the entity reference
5705: * must match that in an entity declaration, except that well-formed
5706: * documents need not declare any of the following entities: amp, lt,
5707: * gt, apos, quot. The declaration of a parameter entity must precede
5708: * any reference to it. Similarly, the declaration of a general entity
5709: * must precede any reference to it which appears in a default value in an
5710: * attribute-list declaration. Note that if entities are declared in the
5711: * external subset or in external parameter entities, a non-validating
5712: * processor is not obligated to read and process their declarations;
5713: * for such documents, the rule that an entity must be declared is a
5714: * well-formedness constraint only if standalone='yes'.
5715: *
5716: * [ WFC: Parsed Entity ]
5717: * An entity reference must not contain the name of an unparsed entity
5718: *
5719: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5720: * is updated to the current location in the string.
5721: */
5722: xmlEntityPtr
5723: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5724: xmlChar *name;
5725: const xmlChar *ptr;
5726: xmlChar cur;
5727: xmlEntityPtr ent = NULL;
5728:
5729: GROW;
5730:
1.156 daniel 5731: if ((str == NULL) || (*str == NULL))
5732: return(NULL);
1.135 daniel 5733: ptr = *str;
5734: cur = *ptr;
5735: if (cur == '&') {
5736: ptr++;
5737: cur = *ptr;
5738: name = xmlParseStringName(ctxt, &ptr);
5739: if (name == NULL) {
5740: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5741: ctxt->sax->error(ctxt->userData,
5742: "xmlParseEntityRef: no name\n");
5743: ctxt->errNo = XML_ERR_NAME_REQUIRED;
5744: ctxt->wellFormed = 0;
5745: } else {
1.152 daniel 5746: if (RAW == ';') {
1.135 daniel 5747: NEXT;
5748: /*
5749: * Ask first SAX for entity resolution, otherwise try the
5750: * predefined set.
5751: */
5752: if (ctxt->sax != NULL) {
5753: if (ctxt->sax->getEntity != NULL)
5754: ent = ctxt->sax->getEntity(ctxt->userData, name);
5755: if (ent == NULL)
5756: ent = xmlGetPredefinedEntity(name);
5757: }
5758: /*
5759: * [ WFC: Entity Declared ]
5760: * In a document without any DTD, a document with only an
5761: * internal DTD subset which contains no parameter entity
5762: * references, or a document with "standalone='yes'", the
5763: * Name given in the entity reference must match that in an
5764: * entity declaration, except that well-formed documents
5765: * need not declare any of the following entities: amp, lt,
5766: * gt, apos, quot.
5767: * The declaration of a parameter entity must precede any
5768: * reference to it.
5769: * Similarly, the declaration of a general entity must
5770: * precede any reference to it which appears in a default
5771: * value in an attribute-list declaration. Note that if
5772: * entities are declared in the external subset or in
5773: * external parameter entities, a non-validating processor
5774: * is not obligated to read and process their declarations;
5775: * for such documents, the rule that an entity must be
5776: * declared is a well-formedness constraint only if
5777: * standalone='yes'.
5778: */
5779: if (ent == NULL) {
5780: if ((ctxt->standalone == 1) ||
5781: ((ctxt->hasExternalSubset == 0) &&
5782: (ctxt->hasPErefs == 0))) {
5783: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5784: ctxt->sax->error(ctxt->userData,
5785: "Entity '%s' not defined\n", name);
5786: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5787: ctxt->wellFormed = 0;
5788: } else {
5789: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5790: ctxt->sax->warning(ctxt->userData,
5791: "Entity '%s' not defined\n", name);
5792: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5793: }
5794: }
5795:
5796: /*
5797: * [ WFC: Parsed Entity ]
5798: * An entity reference must not contain the name of an
5799: * unparsed entity
5800: */
1.159 daniel 5801: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 5802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5803: ctxt->sax->error(ctxt->userData,
5804: "Entity reference to unparsed entity %s\n", name);
5805: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5806: ctxt->wellFormed = 0;
5807: }
5808:
5809: /*
5810: * [ WFC: No External Entity References ]
5811: * Attribute values cannot contain direct or indirect
5812: * entity references to external entities.
5813: */
5814: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5815: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 5816: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5817: ctxt->sax->error(ctxt->userData,
5818: "Attribute references external entity '%s'\n", name);
5819: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5820: ctxt->wellFormed = 0;
5821: }
5822: /*
5823: * [ WFC: No < in Attribute Values ]
5824: * The replacement text of any entity referred to directly or
5825: * indirectly in an attribute value (other than "<") must
5826: * not contain a <.
5827: */
5828: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5829: (ent != NULL) &&
5830: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
5831: (ent->content != NULL) &&
5832: (xmlStrchr(ent->content, '<'))) {
5833: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5834: ctxt->sax->error(ctxt->userData,
5835: "'<' in entity '%s' is not allowed in attributes values\n", name);
5836: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5837: ctxt->wellFormed = 0;
5838: }
5839:
5840: /*
5841: * Internal check, no parameter entities here ...
5842: */
5843: else {
1.159 daniel 5844: switch (ent->etype) {
1.135 daniel 5845: case XML_INTERNAL_PARAMETER_ENTITY:
5846: case XML_EXTERNAL_PARAMETER_ENTITY:
5847: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5848: ctxt->sax->error(ctxt->userData,
5849: "Attempt to reference the parameter entity '%s'\n", name);
5850: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5851: ctxt->wellFormed = 0;
5852: break;
5853: }
5854: }
5855:
5856: /*
5857: * [ WFC: No Recursion ]
5858: * TODO A parsed entity must not contain a recursive reference
5859: * to itself, either directly or indirectly.
5860: */
5861:
5862: } else {
5863: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5864: ctxt->sax->error(ctxt->userData,
5865: "xmlParseEntityRef: expecting ';'\n");
5866: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5867: ctxt->wellFormed = 0;
5868: }
5869: xmlFree(name);
5870: }
5871: }
5872: return(ent);
5873: }
1.24 daniel 5874:
1.50 daniel 5875: /**
5876: * xmlParsePEReference:
5877: * @ctxt: an XML parser context
5878: *
5879: * parse PEReference declarations
1.77 daniel 5880: * The entity content is handled directly by pushing it's content as
5881: * a new input stream.
1.22 daniel 5882: *
5883: * [69] PEReference ::= '%' Name ';'
1.68 daniel 5884: *
1.98 daniel 5885: * [ WFC: No Recursion ]
5886: * TODO A parsed entity must not contain a recursive
5887: * reference to itself, either directly or indirectly.
5888: *
5889: * [ WFC: Entity Declared ]
5890: * In a document without any DTD, a document with only an internal DTD
5891: * subset which contains no parameter entity references, or a document
5892: * with "standalone='yes'", ... ... The declaration of a parameter
5893: * entity must precede any reference to it...
5894: *
5895: * [ VC: Entity Declared ]
5896: * In a document with an external subset or external parameter entities
5897: * with "standalone='no'", ... ... The declaration of a parameter entity
5898: * must precede any reference to it...
5899: *
5900: * [ WFC: In DTD ]
5901: * Parameter-entity references may only appear in the DTD.
5902: * NOTE: misleading but this is handled.
1.22 daniel 5903: */
1.77 daniel 5904: void
1.55 daniel 5905: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 5906: xmlChar *name;
1.72 daniel 5907: xmlEntityPtr entity = NULL;
1.50 daniel 5908: xmlParserInputPtr input;
1.22 daniel 5909:
1.152 daniel 5910: if (RAW == '%') {
1.40 daniel 5911: NEXT;
1.22 daniel 5912: name = xmlParseName(ctxt);
5913: if (name == NULL) {
1.55 daniel 5914: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5915: ctxt->sax->error(ctxt->userData,
5916: "xmlParsePEReference: no name\n");
1.123 daniel 5917: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5918: ctxt->wellFormed = 0;
1.22 daniel 5919: } else {
1.152 daniel 5920: if (RAW == ';') {
1.40 daniel 5921: NEXT;
1.98 daniel 5922: if ((ctxt->sax != NULL) &&
5923: (ctxt->sax->getParameterEntity != NULL))
5924: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5925: name);
1.45 daniel 5926: if (entity == NULL) {
1.98 daniel 5927: /*
5928: * [ WFC: Entity Declared ]
5929: * In a document without any DTD, a document with only an
5930: * internal DTD subset which contains no parameter entity
5931: * references, or a document with "standalone='yes'", ...
5932: * ... The declaration of a parameter entity must precede
5933: * any reference to it...
5934: */
5935: if ((ctxt->standalone == 1) ||
5936: ((ctxt->hasExternalSubset == 0) &&
5937: (ctxt->hasPErefs == 0))) {
5938: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5939: ctxt->sax->error(ctxt->userData,
5940: "PEReference: %%%s; not found\n", name);
1.123 daniel 5941: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 5942: ctxt->wellFormed = 0;
5943: } else {
5944: /*
5945: * [ VC: Entity Declared ]
5946: * In a document with an external subset or external
5947: * parameter entities with "standalone='no'", ...
5948: * ... The declaration of a parameter entity must precede
5949: * any reference to it...
5950: */
5951: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5952: ctxt->sax->warning(ctxt->userData,
5953: "PEReference: %%%s; not found\n", name);
5954: ctxt->valid = 0;
5955: }
1.50 daniel 5956: } else {
1.98 daniel 5957: /*
5958: * Internal checking in case the entity quest barfed
5959: */
1.159 daniel 5960: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5961: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 5962: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5963: ctxt->sax->warning(ctxt->userData,
5964: "Internal: %%%s; is not a parameter entity\n", name);
5965: } else {
5966: input = xmlNewEntityInputStream(ctxt, entity);
5967: xmlPushInput(ctxt, input);
5968: }
1.45 daniel 5969: }
1.98 daniel 5970: ctxt->hasPErefs = 1;
1.22 daniel 5971: } else {
1.55 daniel 5972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5973: ctxt->sax->error(ctxt->userData,
1.59 daniel 5974: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 5975: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 5976: ctxt->wellFormed = 0;
1.22 daniel 5977: }
1.119 daniel 5978: xmlFree(name);
1.3 veillard 5979: }
5980: }
5981: }
5982:
1.50 daniel 5983: /**
1.135 daniel 5984: * xmlParseStringPEReference:
5985: * @ctxt: an XML parser context
5986: * @str: a pointer to an index in the string
5987: *
5988: * parse PEReference declarations
5989: *
5990: * [69] PEReference ::= '%' Name ';'
5991: *
5992: * [ WFC: No Recursion ]
5993: * TODO A parsed entity must not contain a recursive
5994: * reference to itself, either directly or indirectly.
5995: *
5996: * [ WFC: Entity Declared ]
5997: * In a document without any DTD, a document with only an internal DTD
5998: * subset which contains no parameter entity references, or a document
5999: * with "standalone='yes'", ... ... The declaration of a parameter
6000: * entity must precede any reference to it...
6001: *
6002: * [ VC: Entity Declared ]
6003: * In a document with an external subset or external parameter entities
6004: * with "standalone='no'", ... ... The declaration of a parameter entity
6005: * must precede any reference to it...
6006: *
6007: * [ WFC: In DTD ]
6008: * Parameter-entity references may only appear in the DTD.
6009: * NOTE: misleading but this is handled.
6010: *
6011: * Returns the string of the entity content.
6012: * str is updated to the current value of the index
6013: */
6014: xmlEntityPtr
6015: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6016: const xmlChar *ptr;
6017: xmlChar cur;
6018: xmlChar *name;
6019: xmlEntityPtr entity = NULL;
6020:
6021: if ((str == NULL) || (*str == NULL)) return(NULL);
6022: ptr = *str;
6023: cur = *ptr;
6024: if (cur == '%') {
6025: ptr++;
6026: cur = *ptr;
6027: name = xmlParseStringName(ctxt, &ptr);
6028: if (name == NULL) {
6029: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6030: ctxt->sax->error(ctxt->userData,
6031: "xmlParseStringPEReference: no name\n");
6032: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6033: ctxt->wellFormed = 0;
6034: } else {
6035: cur = *ptr;
6036: if (cur == ';') {
6037: ptr++;
6038: cur = *ptr;
6039: if ((ctxt->sax != NULL) &&
6040: (ctxt->sax->getParameterEntity != NULL))
6041: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6042: name);
6043: if (entity == NULL) {
6044: /*
6045: * [ WFC: Entity Declared ]
6046: * In a document without any DTD, a document with only an
6047: * internal DTD subset which contains no parameter entity
6048: * references, or a document with "standalone='yes'", ...
6049: * ... The declaration of a parameter entity must precede
6050: * any reference to it...
6051: */
6052: if ((ctxt->standalone == 1) ||
6053: ((ctxt->hasExternalSubset == 0) &&
6054: (ctxt->hasPErefs == 0))) {
6055: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6056: ctxt->sax->error(ctxt->userData,
6057: "PEReference: %%%s; not found\n", name);
6058: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6059: ctxt->wellFormed = 0;
6060: } else {
6061: /*
6062: * [ VC: Entity Declared ]
6063: * In a document with an external subset or external
6064: * parameter entities with "standalone='no'", ...
6065: * ... The declaration of a parameter entity must
6066: * precede any reference to it...
6067: */
6068: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6069: ctxt->sax->warning(ctxt->userData,
6070: "PEReference: %%%s; not found\n", name);
6071: ctxt->valid = 0;
6072: }
6073: } else {
6074: /*
6075: * Internal checking in case the entity quest barfed
6076: */
1.159 daniel 6077: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6078: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 6079: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6080: ctxt->sax->warning(ctxt->userData,
6081: "Internal: %%%s; is not a parameter entity\n", name);
6082: }
6083: }
6084: ctxt->hasPErefs = 1;
6085: } else {
6086: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6087: ctxt->sax->error(ctxt->userData,
6088: "xmlParseStringPEReference: expecting ';'\n");
6089: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6090: ctxt->wellFormed = 0;
6091: }
6092: xmlFree(name);
6093: }
6094: }
6095: *str = ptr;
6096: return(entity);
6097: }
6098:
6099: /**
1.50 daniel 6100: * xmlParseDocTypeDecl :
6101: * @ctxt: an XML parser context
6102: *
6103: * parse a DOCTYPE declaration
1.21 daniel 6104: *
1.22 daniel 6105: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6106: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 6107: *
6108: * [ VC: Root Element Type ]
1.99 daniel 6109: * The Name in the document type declaration must match the element
1.98 daniel 6110: * type of the root element.
1.21 daniel 6111: */
6112:
1.55 daniel 6113: void
6114: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6115: xmlChar *name;
6116: xmlChar *ExternalID = NULL;
6117: xmlChar *URI = NULL;
1.21 daniel 6118:
6119: /*
6120: * We know that '<!DOCTYPE' has been detected.
6121: */
1.40 daniel 6122: SKIP(9);
1.21 daniel 6123:
1.42 daniel 6124: SKIP_BLANKS;
1.21 daniel 6125:
6126: /*
6127: * Parse the DOCTYPE name.
6128: */
6129: name = xmlParseName(ctxt);
6130: if (name == NULL) {
1.55 daniel 6131: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6132: ctxt->sax->error(ctxt->userData,
6133: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 6134: ctxt->wellFormed = 0;
1.123 daniel 6135: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 6136: }
6137:
1.42 daniel 6138: SKIP_BLANKS;
1.21 daniel 6139:
6140: /*
1.22 daniel 6141: * Check for SystemID and ExternalID
6142: */
1.67 daniel 6143: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 6144:
6145: if ((URI != NULL) || (ExternalID != NULL)) {
6146: ctxt->hasExternalSubset = 1;
6147: }
6148:
1.42 daniel 6149: SKIP_BLANKS;
1.36 daniel 6150:
1.76 daniel 6151: /*
6152: * NOTE: the SAX callback may try to fetch the external subset
6153: * entity and fill it up !
6154: */
1.72 daniel 6155: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 6156: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 6157:
6158: /*
1.140 daniel 6159: * Cleanup
6160: */
6161: if (URI != NULL) xmlFree(URI);
6162: if (ExternalID != NULL) xmlFree(ExternalID);
6163: if (name != NULL) xmlFree(name);
6164:
6165: /*
6166: * Is there any internal subset declarations ?
6167: * they are handled separately in xmlParseInternalSubset()
6168: */
1.152 daniel 6169: if (RAW == '[')
1.140 daniel 6170: return;
6171:
6172: /*
6173: * We should be at the end of the DOCTYPE declaration.
6174: */
1.152 daniel 6175: if (RAW != '>') {
1.140 daniel 6176: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6177: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6178: ctxt->wellFormed = 0;
6179: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6180: }
6181: NEXT;
6182: }
6183:
6184: /**
6185: * xmlParseInternalsubset :
6186: * @ctxt: an XML parser context
6187: *
6188: * parse the internal subset declaration
6189: *
6190: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6191: */
6192:
6193: void
6194: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6195: /*
1.22 daniel 6196: * Is there any DTD definition ?
6197: */
1.152 daniel 6198: if (RAW == '[') {
1.96 daniel 6199: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 6200: NEXT;
1.22 daniel 6201: /*
6202: * Parse the succession of Markup declarations and
6203: * PEReferences.
6204: * Subsequence (markupdecl | PEReference | S)*
6205: */
1.152 daniel 6206: while (RAW != ']') {
1.123 daniel 6207: const xmlChar *check = CUR_PTR;
1.115 daniel 6208: int cons = ctxt->input->consumed;
1.22 daniel 6209:
1.42 daniel 6210: SKIP_BLANKS;
1.22 daniel 6211: xmlParseMarkupDecl(ctxt);
1.50 daniel 6212: xmlParsePEReference(ctxt);
1.22 daniel 6213:
1.115 daniel 6214: /*
6215: * Pop-up of finished entities.
6216: */
1.152 daniel 6217: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 6218: xmlPopInput(ctxt);
6219:
1.118 daniel 6220: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 6221: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6222: ctxt->sax->error(ctxt->userData,
1.140 daniel 6223: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 6224: ctxt->wellFormed = 0;
1.123 daniel 6225: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 6226: break;
6227: }
6228: }
1.152 daniel 6229: if (RAW == ']') NEXT;
1.22 daniel 6230: }
6231:
6232: /*
6233: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 6234: */
1.152 daniel 6235: if (RAW != '>') {
1.55 daniel 6236: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6237: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 6238: ctxt->wellFormed = 0;
1.123 daniel 6239: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 6240: }
1.40 daniel 6241: NEXT;
1.21 daniel 6242: }
6243:
1.50 daniel 6244: /**
6245: * xmlParseAttribute:
6246: * @ctxt: an XML parser context
1.123 daniel 6247: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 6248: *
6249: * parse an attribute
1.3 veillard 6250: *
1.22 daniel 6251: * [41] Attribute ::= Name Eq AttValue
6252: *
1.98 daniel 6253: * [ WFC: No External Entity References ]
6254: * Attribute values cannot contain direct or indirect entity references
6255: * to external entities.
6256: *
6257: * [ WFC: No < in Attribute Values ]
6258: * The replacement text of any entity referred to directly or indirectly in
6259: * an attribute value (other than "<") must not contain a <.
6260: *
6261: * [ VC: Attribute Value Type ]
1.117 daniel 6262: * The attribute must have been declared; the value must be of the type
1.99 daniel 6263: * declared for it.
1.98 daniel 6264: *
1.22 daniel 6265: * [25] Eq ::= S? '=' S?
6266: *
1.29 daniel 6267: * With namespace:
6268: *
6269: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 6270: *
6271: * Also the case QName == xmlns:??? is handled independently as a namespace
6272: * definition.
1.69 daniel 6273: *
1.72 daniel 6274: * Returns the attribute name, and the value in *value.
1.3 veillard 6275: */
6276:
1.123 daniel 6277: xmlChar *
6278: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6279: xmlChar *name, *val;
1.3 veillard 6280:
1.72 daniel 6281: *value = NULL;
6282: name = xmlParseName(ctxt);
1.22 daniel 6283: if (name == NULL) {
1.55 daniel 6284: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6285: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 6286: ctxt->wellFormed = 0;
1.123 daniel 6287: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 6288: return(NULL);
1.3 veillard 6289: }
6290:
6291: /*
1.29 daniel 6292: * read the value
1.3 veillard 6293: */
1.42 daniel 6294: SKIP_BLANKS;
1.152 daniel 6295: if (RAW == '=') {
1.40 daniel 6296: NEXT;
1.42 daniel 6297: SKIP_BLANKS;
1.72 daniel 6298: val = xmlParseAttValue(ctxt);
1.96 daniel 6299: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 6300: } else {
1.55 daniel 6301: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6302: ctxt->sax->error(ctxt->userData,
1.59 daniel 6303: "Specification mandate value for attribute %s\n", name);
1.123 daniel 6304: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 6305: ctxt->wellFormed = 0;
1.52 daniel 6306: return(NULL);
1.43 daniel 6307: }
6308:
1.72 daniel 6309: *value = val;
6310: return(name);
1.3 veillard 6311: }
6312:
1.50 daniel 6313: /**
6314: * xmlParseStartTag:
6315: * @ctxt: an XML parser context
6316: *
6317: * parse a start of tag either for rule element or
6318: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 6319: *
6320: * [40] STag ::= '<' Name (S Attribute)* S? '>'
6321: *
1.98 daniel 6322: * [ WFC: Unique Att Spec ]
6323: * No attribute name may appear more than once in the same start-tag or
6324: * empty-element tag.
6325: *
1.29 daniel 6326: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6327: *
1.98 daniel 6328: * [ WFC: Unique Att Spec ]
6329: * No attribute name may appear more than once in the same start-tag or
6330: * empty-element tag.
6331: *
1.29 daniel 6332: * With namespace:
6333: *
6334: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6335: *
6336: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 6337: *
1.129 daniel 6338: * Returne the element name parsed
1.2 veillard 6339: */
6340:
1.123 daniel 6341: xmlChar *
1.69 daniel 6342: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6343: xmlChar *name;
6344: xmlChar *attname;
6345: xmlChar *attvalue;
6346: const xmlChar **atts = NULL;
1.72 daniel 6347: int nbatts = 0;
6348: int maxatts = 0;
6349: int i;
1.2 veillard 6350:
1.152 daniel 6351: if (RAW != '<') return(NULL);
1.40 daniel 6352: NEXT;
1.3 veillard 6353:
1.72 daniel 6354: name = xmlParseName(ctxt);
1.59 daniel 6355: if (name == NULL) {
6356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6357: ctxt->sax->error(ctxt->userData,
1.59 daniel 6358: "xmlParseStartTag: invalid element name\n");
1.123 daniel 6359: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6360: ctxt->wellFormed = 0;
1.83 daniel 6361: return(NULL);
1.50 daniel 6362: }
6363:
6364: /*
1.3 veillard 6365: * Now parse the attributes, it ends up with the ending
6366: *
6367: * (S Attribute)* S?
6368: */
1.42 daniel 6369: SKIP_BLANKS;
1.91 daniel 6370: GROW;
1.153 daniel 6371: while ((IS_CHAR(RAW)) &&
1.152 daniel 6372: (RAW != '>') &&
6373: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 6374: const xmlChar *q = CUR_PTR;
1.91 daniel 6375: int cons = ctxt->input->consumed;
1.29 daniel 6376:
1.72 daniel 6377: attname = xmlParseAttribute(ctxt, &attvalue);
6378: if ((attname != NULL) && (attvalue != NULL)) {
6379: /*
1.98 daniel 6380: * [ WFC: Unique Att Spec ]
6381: * No attribute name may appear more than once in the same
6382: * start-tag or empty-element tag.
1.72 daniel 6383: */
6384: for (i = 0; i < nbatts;i += 2) {
6385: if (!xmlStrcmp(atts[i], attname)) {
6386: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6387: ctxt->sax->error(ctxt->userData,
6388: "Attribute %s redefined\n",
6389: attname);
1.72 daniel 6390: ctxt->wellFormed = 0;
1.123 daniel 6391: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 6392: xmlFree(attname);
6393: xmlFree(attvalue);
1.98 daniel 6394: goto failed;
1.72 daniel 6395: }
6396: }
6397:
6398: /*
6399: * Add the pair to atts
6400: */
6401: if (atts == NULL) {
6402: maxatts = 10;
1.123 daniel 6403: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 6404: if (atts == NULL) {
1.86 daniel 6405: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 6406: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6407: return(NULL);
1.72 daniel 6408: }
1.127 daniel 6409: } else if (nbatts + 4 > maxatts) {
1.72 daniel 6410: maxatts *= 2;
1.123 daniel 6411: atts = (const xmlChar **) xmlRealloc(atts,
6412: maxatts * sizeof(xmlChar *));
1.72 daniel 6413: if (atts == NULL) {
1.86 daniel 6414: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 6415: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6416: return(NULL);
1.72 daniel 6417: }
6418: }
6419: atts[nbatts++] = attname;
6420: atts[nbatts++] = attvalue;
6421: atts[nbatts] = NULL;
6422: atts[nbatts + 1] = NULL;
6423: }
6424:
1.116 daniel 6425: failed:
1.42 daniel 6426: SKIP_BLANKS;
1.91 daniel 6427: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 6428: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6429: ctxt->sax->error(ctxt->userData,
1.31 daniel 6430: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 6431: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 6432: ctxt->wellFormed = 0;
1.29 daniel 6433: break;
1.3 veillard 6434: }
1.91 daniel 6435: GROW;
1.3 veillard 6436: }
6437:
1.43 daniel 6438: /*
1.72 daniel 6439: * SAX: Start of Element !
1.43 daniel 6440: */
1.72 daniel 6441: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 6442: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 6443:
1.72 daniel 6444: if (atts != NULL) {
1.123 daniel 6445: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 6446: xmlFree(atts);
1.72 daniel 6447: }
1.83 daniel 6448: return(name);
1.3 veillard 6449: }
6450:
1.50 daniel 6451: /**
6452: * xmlParseEndTag:
6453: * @ctxt: an XML parser context
6454: *
6455: * parse an end of tag
1.27 daniel 6456: *
6457: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 6458: *
6459: * With namespace
6460: *
1.72 daniel 6461: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 6462: */
6463:
1.55 daniel 6464: void
1.140 daniel 6465: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6466: xmlChar *name;
1.140 daniel 6467: xmlChar *oldname;
1.7 veillard 6468:
1.91 daniel 6469: GROW;
1.152 daniel 6470: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 6471: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6472: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 6473: ctxt->wellFormed = 0;
1.123 daniel 6474: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 6475: return;
6476: }
1.40 daniel 6477: SKIP(2);
1.7 veillard 6478:
1.72 daniel 6479: name = xmlParseName(ctxt);
1.7 veillard 6480:
6481: /*
6482: * We should definitely be at the ending "S? '>'" part
6483: */
1.91 daniel 6484: GROW;
1.42 daniel 6485: SKIP_BLANKS;
1.153 daniel 6486: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 6487: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6488: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 6489: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 6490: ctxt->wellFormed = 0;
1.7 veillard 6491: } else
1.40 daniel 6492: NEXT;
1.7 veillard 6493:
1.72 daniel 6494: /*
1.98 daniel 6495: * [ WFC: Element Type Match ]
6496: * The Name in an element's end-tag must match the element type in the
6497: * start-tag.
6498: *
1.83 daniel 6499: */
1.147 daniel 6500: if ((name == NULL) || (ctxt->name == NULL) ||
6501: (xmlStrcmp(name, ctxt->name))) {
6502: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6503: if ((name != NULL) && (ctxt->name != NULL)) {
6504: ctxt->sax->error(ctxt->userData,
6505: "Opening and ending tag mismatch: %s and %s\n",
6506: ctxt->name, name);
6507: } else if (ctxt->name != NULL) {
6508: ctxt->sax->error(ctxt->userData,
6509: "Ending tag eror for: %s\n", ctxt->name);
6510: } else {
6511: ctxt->sax->error(ctxt->userData,
6512: "Ending tag error: internal error ???\n");
6513: }
1.122 daniel 6514:
1.147 daniel 6515: }
1.123 daniel 6516: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 6517: ctxt->wellFormed = 0;
6518: }
6519:
6520: /*
1.72 daniel 6521: * SAX: End of Tag
6522: */
6523: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 6524: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6525:
6526: if (name != NULL)
1.119 daniel 6527: xmlFree(name);
1.140 daniel 6528: oldname = namePop(ctxt);
6529: if (oldname != NULL) {
6530: #ifdef DEBUG_STACK
6531: fprintf(stderr,"Close: popped %s\n", oldname);
6532: #endif
6533: xmlFree(oldname);
6534: }
1.7 veillard 6535: return;
6536: }
6537:
1.50 daniel 6538: /**
6539: * xmlParseCDSect:
6540: * @ctxt: an XML parser context
6541: *
6542: * Parse escaped pure raw content.
1.29 daniel 6543: *
6544: * [18] CDSect ::= CDStart CData CDEnd
6545: *
6546: * [19] CDStart ::= '<![CDATA['
6547: *
6548: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6549: *
6550: * [21] CDEnd ::= ']]>'
1.3 veillard 6551: */
1.55 daniel 6552: void
6553: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 6554: xmlChar *buf = NULL;
6555: int len = 0;
1.140 daniel 6556: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 6557: int r, rl;
6558: int s, sl;
6559: int cur, l;
1.3 veillard 6560:
1.106 daniel 6561: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 6562: (NXT(2) == '[') && (NXT(3) == 'C') &&
6563: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6564: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6565: (NXT(8) == '[')) {
6566: SKIP(9);
1.29 daniel 6567: } else
1.45 daniel 6568: return;
1.109 daniel 6569:
6570: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 6571: r = CUR_CHAR(rl);
6572: if (!IS_CHAR(r)) {
1.55 daniel 6573: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6574: ctxt->sax->error(ctxt->userData,
1.135 daniel 6575: "CData section not finished\n");
1.59 daniel 6576: ctxt->wellFormed = 0;
1.123 daniel 6577: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 6578: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6579: return;
1.3 veillard 6580: }
1.152 daniel 6581: NEXTL(rl);
6582: s = CUR_CHAR(sl);
6583: if (!IS_CHAR(s)) {
1.55 daniel 6584: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6585: ctxt->sax->error(ctxt->userData,
1.135 daniel 6586: "CData section not finished\n");
1.123 daniel 6587: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 6588: ctxt->wellFormed = 0;
1.109 daniel 6589: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6590: return;
1.3 veillard 6591: }
1.152 daniel 6592: NEXTL(sl);
6593: cur = CUR_CHAR(l);
1.135 daniel 6594: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6595: if (buf == NULL) {
6596: fprintf(stderr, "malloc of %d byte failed\n", size);
6597: return;
6598: }
1.108 veillard 6599: while (IS_CHAR(cur) &&
1.110 daniel 6600: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 6601: if (len + 5 >= size) {
1.135 daniel 6602: size *= 2;
6603: buf = xmlRealloc(buf, size * sizeof(xmlChar));
6604: if (buf == NULL) {
6605: fprintf(stderr, "realloc of %d byte failed\n", size);
6606: return;
6607: }
6608: }
1.152 daniel 6609: COPY_BUF(rl,buf,len,r);
1.110 daniel 6610: r = s;
1.152 daniel 6611: rl = sl;
1.110 daniel 6612: s = cur;
1.152 daniel 6613: sl = l;
6614: NEXTL(l);
6615: cur = CUR_CHAR(l);
1.3 veillard 6616: }
1.135 daniel 6617: buf[len] = 0;
1.109 daniel 6618: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 6619: if (cur != '>') {
1.55 daniel 6620: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6621: ctxt->sax->error(ctxt->userData,
1.135 daniel 6622: "CData section not finished\n%.50s\n", buf);
1.123 daniel 6623: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 6624: ctxt->wellFormed = 0;
1.135 daniel 6625: xmlFree(buf);
1.45 daniel 6626: return;
1.3 veillard 6627: }
1.152 daniel 6628: NEXTL(l);
1.16 daniel 6629:
1.45 daniel 6630: /*
1.135 daniel 6631: * Ok the buffer is to be consumed as cdata.
1.45 daniel 6632: */
6633: if (ctxt->sax != NULL) {
1.107 daniel 6634: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 6635: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 6636: }
1.135 daniel 6637: xmlFree(buf);
1.2 veillard 6638: }
6639:
1.50 daniel 6640: /**
6641: * xmlParseContent:
6642: * @ctxt: an XML parser context
6643: *
6644: * Parse a content:
1.2 veillard 6645: *
1.27 daniel 6646: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 6647: */
6648:
1.55 daniel 6649: void
6650: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 6651: GROW;
1.152 daniel 6652: while ((RAW != '<') || (NXT(1) != '/')) {
1.123 daniel 6653: const xmlChar *test = CUR_PTR;
1.91 daniel 6654: int cons = ctxt->input->consumed;
1.123 daniel 6655: xmlChar tok = ctxt->token;
1.27 daniel 6656:
6657: /*
1.152 daniel 6658: * Handle possible processed charrefs.
6659: */
6660: if (ctxt->token != 0) {
6661: xmlParseCharData(ctxt, 0);
6662: }
6663: /*
1.27 daniel 6664: * First case : a Processing Instruction.
6665: */
1.152 daniel 6666: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 6667: xmlParsePI(ctxt);
6668: }
1.72 daniel 6669:
1.27 daniel 6670: /*
6671: * Second case : a CDSection
6672: */
1.152 daniel 6673: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6674: (NXT(2) == '[') && (NXT(3) == 'C') &&
6675: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6676: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6677: (NXT(8) == '[')) {
1.45 daniel 6678: xmlParseCDSect(ctxt);
1.27 daniel 6679: }
1.72 daniel 6680:
1.27 daniel 6681: /*
6682: * Third case : a comment
6683: */
1.152 daniel 6684: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6685: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 6686: xmlParseComment(ctxt);
1.97 daniel 6687: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 6688: }
1.72 daniel 6689:
1.27 daniel 6690: /*
6691: * Fourth case : a sub-element.
6692: */
1.152 daniel 6693: else if (RAW == '<') {
1.72 daniel 6694: xmlParseElement(ctxt);
1.45 daniel 6695: }
1.72 daniel 6696:
1.45 daniel 6697: /*
1.50 daniel 6698: * Fifth case : a reference. If if has not been resolved,
6699: * parsing returns it's Name, create the node
1.45 daniel 6700: */
1.97 daniel 6701:
1.152 daniel 6702: else if (RAW == '&') {
1.77 daniel 6703: xmlParseReference(ctxt);
1.27 daniel 6704: }
1.72 daniel 6705:
1.27 daniel 6706: /*
6707: * Last case, text. Note that References are handled directly.
6708: */
6709: else {
1.45 daniel 6710: xmlParseCharData(ctxt, 0);
1.3 veillard 6711: }
1.14 veillard 6712:
1.91 daniel 6713: GROW;
1.14 veillard 6714: /*
1.45 daniel 6715: * Pop-up of finished entities.
1.14 veillard 6716: */
1.152 daniel 6717: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 6718: xmlPopInput(ctxt);
1.135 daniel 6719: SHRINK;
1.45 daniel 6720:
1.113 daniel 6721: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6722: (tok == ctxt->token)) {
1.55 daniel 6723: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6724: ctxt->sax->error(ctxt->userData,
1.59 daniel 6725: "detected an error in element content\n");
1.123 daniel 6726: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 6727: ctxt->wellFormed = 0;
1.29 daniel 6728: break;
6729: }
1.3 veillard 6730: }
1.2 veillard 6731: }
6732:
1.50 daniel 6733: /**
6734: * xmlParseElement:
6735: * @ctxt: an XML parser context
6736: *
6737: * parse an XML element, this is highly recursive
1.26 daniel 6738: *
6739: * [39] element ::= EmptyElemTag | STag content ETag
6740: *
1.98 daniel 6741: * [ WFC: Element Type Match ]
6742: * The Name in an element's end-tag must match the element type in the
6743: * start-tag.
6744: *
6745: * [ VC: Element Valid ]
1.117 daniel 6746: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 6747: * where the Name matches the element type and one of the following holds:
6748: * - The declaration matches EMPTY and the element has no content.
6749: * - The declaration matches children and the sequence of child elements
6750: * belongs to the language generated by the regular expression in the
6751: * content model, with optional white space (characters matching the
6752: * nonterminal S) between each pair of child elements.
6753: * - The declaration matches Mixed and the content consists of character
6754: * data and child elements whose types match names in the content model.
6755: * - The declaration matches ANY, and the types of any child elements have
6756: * been declared.
1.2 veillard 6757: */
1.26 daniel 6758:
1.72 daniel 6759: void
1.69 daniel 6760: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 6761: const xmlChar *openTag = CUR_PTR;
6762: xmlChar *name;
1.140 daniel 6763: xmlChar *oldname;
1.32 daniel 6764: xmlParserNodeInfo node_info;
1.118 daniel 6765: xmlNodePtr ret;
1.2 veillard 6766:
1.32 daniel 6767: /* Capture start position */
1.118 daniel 6768: if (ctxt->record_info) {
6769: node_info.begin_pos = ctxt->input->consumed +
6770: (CUR_PTR - ctxt->input->base);
6771: node_info.begin_line = ctxt->input->line;
6772: }
1.32 daniel 6773:
1.83 daniel 6774: name = xmlParseStartTag(ctxt);
6775: if (name == NULL) {
6776: return;
6777: }
1.140 daniel 6778: namePush(ctxt, name);
1.118 daniel 6779: ret = ctxt->node;
1.2 veillard 6780:
6781: /*
1.99 daniel 6782: * [ VC: Root Element Type ]
6783: * The Name in the document type declaration must match the element
6784: * type of the root element.
6785: */
1.105 daniel 6786: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 6787: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 6788: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 6789:
6790: /*
1.2 veillard 6791: * Check for an Empty Element.
6792: */
1.152 daniel 6793: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 6794: SKIP(2);
1.72 daniel 6795: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 6796: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 6797: oldname = namePop(ctxt);
6798: if (oldname != NULL) {
6799: #ifdef DEBUG_STACK
6800: fprintf(stderr,"Close: popped %s\n", oldname);
6801: #endif
6802: xmlFree(oldname);
6803: }
1.72 daniel 6804: return;
1.2 veillard 6805: }
1.152 daniel 6806: if (RAW == '>') {
1.91 daniel 6807: NEXT;
6808: } else {
1.55 daniel 6809: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6810: ctxt->sax->error(ctxt->userData,
6811: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 6812: openTag);
1.59 daniel 6813: ctxt->wellFormed = 0;
1.123 daniel 6814: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 6815:
6816: /*
6817: * end of parsing of this node.
6818: */
6819: nodePop(ctxt);
1.140 daniel 6820: oldname = namePop(ctxt);
6821: if (oldname != NULL) {
6822: #ifdef DEBUG_STACK
6823: fprintf(stderr,"Close: popped %s\n", oldname);
6824: #endif
6825: xmlFree(oldname);
6826: }
1.118 daniel 6827:
6828: /*
6829: * Capture end position and add node
6830: */
6831: if ( ret != NULL && ctxt->record_info ) {
6832: node_info.end_pos = ctxt->input->consumed +
6833: (CUR_PTR - ctxt->input->base);
6834: node_info.end_line = ctxt->input->line;
6835: node_info.node = ret;
6836: xmlParserAddNodeInfo(ctxt, &node_info);
6837: }
1.72 daniel 6838: return;
1.2 veillard 6839: }
6840:
6841: /*
6842: * Parse the content of the element:
6843: */
1.45 daniel 6844: xmlParseContent(ctxt);
1.153 daniel 6845: if (!IS_CHAR(RAW)) {
1.55 daniel 6846: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6847: ctxt->sax->error(ctxt->userData,
1.57 daniel 6848: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 6849: ctxt->wellFormed = 0;
1.123 daniel 6850: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 6851:
6852: /*
6853: * end of parsing of this node.
6854: */
6855: nodePop(ctxt);
1.140 daniel 6856: oldname = namePop(ctxt);
6857: if (oldname != NULL) {
6858: #ifdef DEBUG_STACK
6859: fprintf(stderr,"Close: popped %s\n", oldname);
6860: #endif
6861: xmlFree(oldname);
6862: }
1.72 daniel 6863: return;
1.2 veillard 6864: }
6865:
6866: /*
1.27 daniel 6867: * parse the end of tag: '</' should be here.
1.2 veillard 6868: */
1.140 daniel 6869: xmlParseEndTag(ctxt);
1.118 daniel 6870:
6871: /*
6872: * Capture end position and add node
6873: */
6874: if ( ret != NULL && ctxt->record_info ) {
6875: node_info.end_pos = ctxt->input->consumed +
6876: (CUR_PTR - ctxt->input->base);
6877: node_info.end_line = ctxt->input->line;
6878: node_info.node = ret;
6879: xmlParserAddNodeInfo(ctxt, &node_info);
6880: }
1.2 veillard 6881: }
6882:
1.50 daniel 6883: /**
6884: * xmlParseVersionNum:
6885: * @ctxt: an XML parser context
6886: *
6887: * parse the XML version value.
1.29 daniel 6888: *
6889: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 6890: *
6891: * Returns the string giving the XML version number, or NULL
1.29 daniel 6892: */
1.123 daniel 6893: xmlChar *
1.55 daniel 6894: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 6895: xmlChar *buf = NULL;
6896: int len = 0;
6897: int size = 10;
6898: xmlChar cur;
1.29 daniel 6899:
1.135 daniel 6900: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6901: if (buf == NULL) {
6902: fprintf(stderr, "malloc of %d byte failed\n", size);
6903: return(NULL);
6904: }
6905: cur = CUR;
1.152 daniel 6906: while (((cur >= 'a') && (cur <= 'z')) ||
6907: ((cur >= 'A') && (cur <= 'Z')) ||
6908: ((cur >= '0') && (cur <= '9')) ||
6909: (cur == '_') || (cur == '.') ||
6910: (cur == ':') || (cur == '-')) {
1.135 daniel 6911: if (len + 1 >= size) {
6912: size *= 2;
6913: buf = xmlRealloc(buf, size * sizeof(xmlChar));
6914: if (buf == NULL) {
6915: fprintf(stderr, "realloc of %d byte failed\n", size);
6916: return(NULL);
6917: }
6918: }
6919: buf[len++] = cur;
6920: NEXT;
6921: cur=CUR;
6922: }
6923: buf[len] = 0;
6924: return(buf);
1.29 daniel 6925: }
6926:
1.50 daniel 6927: /**
6928: * xmlParseVersionInfo:
6929: * @ctxt: an XML parser context
6930: *
6931: * parse the XML version.
1.29 daniel 6932: *
6933: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6934: *
6935: * [25] Eq ::= S? '=' S?
1.50 daniel 6936: *
1.68 daniel 6937: * Returns the version string, e.g. "1.0"
1.29 daniel 6938: */
6939:
1.123 daniel 6940: xmlChar *
1.55 daniel 6941: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 6942: xmlChar *version = NULL;
6943: const xmlChar *q;
1.29 daniel 6944:
1.152 daniel 6945: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 6946: (NXT(2) == 'r') && (NXT(3) == 's') &&
6947: (NXT(4) == 'i') && (NXT(5) == 'o') &&
6948: (NXT(6) == 'n')) {
6949: SKIP(7);
1.42 daniel 6950: SKIP_BLANKS;
1.152 daniel 6951: if (RAW != '=') {
1.55 daniel 6952: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6953: ctxt->sax->error(ctxt->userData,
6954: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 6955: ctxt->wellFormed = 0;
1.123 daniel 6956: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 6957: return(NULL);
6958: }
1.40 daniel 6959: NEXT;
1.42 daniel 6960: SKIP_BLANKS;
1.152 daniel 6961: if (RAW == '"') {
1.40 daniel 6962: NEXT;
6963: q = CUR_PTR;
1.29 daniel 6964: version = xmlParseVersionNum(ctxt);
1.152 daniel 6965: if (RAW != '"') {
1.55 daniel 6966: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6967: ctxt->sax->error(ctxt->userData,
6968: "String not closed\n%.50s\n", q);
1.59 daniel 6969: ctxt->wellFormed = 0;
1.123 daniel 6970: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6971: } else
1.40 daniel 6972: NEXT;
1.152 daniel 6973: } else if (RAW == '\''){
1.40 daniel 6974: NEXT;
6975: q = CUR_PTR;
1.29 daniel 6976: version = xmlParseVersionNum(ctxt);
1.152 daniel 6977: if (RAW != '\'') {
1.55 daniel 6978: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6979: ctxt->sax->error(ctxt->userData,
6980: "String not closed\n%.50s\n", q);
1.123 daniel 6981: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 6982: ctxt->wellFormed = 0;
1.55 daniel 6983: } else
1.40 daniel 6984: NEXT;
1.31 daniel 6985: } else {
1.55 daniel 6986: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6987: ctxt->sax->error(ctxt->userData,
1.59 daniel 6988: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 6989: ctxt->wellFormed = 0;
1.123 daniel 6990: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 6991: }
6992: }
6993: return(version);
6994: }
6995:
1.50 daniel 6996: /**
6997: * xmlParseEncName:
6998: * @ctxt: an XML parser context
6999: *
7000: * parse the XML encoding name
1.29 daniel 7001: *
7002: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 7003: *
1.68 daniel 7004: * Returns the encoding name value or NULL
1.29 daniel 7005: */
1.123 daniel 7006: xmlChar *
1.55 daniel 7007: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 7008: xmlChar *buf = NULL;
7009: int len = 0;
7010: int size = 10;
7011: xmlChar cur;
1.29 daniel 7012:
1.135 daniel 7013: cur = CUR;
7014: if (((cur >= 'a') && (cur <= 'z')) ||
7015: ((cur >= 'A') && (cur <= 'Z'))) {
7016: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7017: if (buf == NULL) {
7018: fprintf(stderr, "malloc of %d byte failed\n", size);
7019: return(NULL);
7020: }
7021:
7022: buf[len++] = cur;
1.40 daniel 7023: NEXT;
1.135 daniel 7024: cur = CUR;
1.152 daniel 7025: while (((cur >= 'a') && (cur <= 'z')) ||
7026: ((cur >= 'A') && (cur <= 'Z')) ||
7027: ((cur >= '0') && (cur <= '9')) ||
7028: (cur == '.') || (cur == '_') ||
7029: (cur == '-')) {
1.135 daniel 7030: if (len + 1 >= size) {
7031: size *= 2;
7032: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7033: if (buf == NULL) {
7034: fprintf(stderr, "realloc of %d byte failed\n", size);
7035: return(NULL);
7036: }
7037: }
7038: buf[len++] = cur;
7039: NEXT;
7040: cur = CUR;
7041: if (cur == 0) {
7042: SHRINK;
7043: GROW;
7044: cur = CUR;
7045: }
7046: }
7047: buf[len] = 0;
1.29 daniel 7048: } else {
1.55 daniel 7049: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7050: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 7051: ctxt->wellFormed = 0;
1.123 daniel 7052: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 7053: }
1.135 daniel 7054: return(buf);
1.29 daniel 7055: }
7056:
1.50 daniel 7057: /**
7058: * xmlParseEncodingDecl:
7059: * @ctxt: an XML parser context
7060: *
7061: * parse the XML encoding declaration
1.29 daniel 7062: *
7063: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 7064: *
7065: * TODO: this should setup the conversion filters.
7066: *
1.68 daniel 7067: * Returns the encoding value or NULL
1.29 daniel 7068: */
7069:
1.123 daniel 7070: xmlChar *
1.55 daniel 7071: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7072: xmlChar *encoding = NULL;
7073: const xmlChar *q;
1.29 daniel 7074:
1.42 daniel 7075: SKIP_BLANKS;
1.152 daniel 7076: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 7077: (NXT(2) == 'c') && (NXT(3) == 'o') &&
7078: (NXT(4) == 'd') && (NXT(5) == 'i') &&
7079: (NXT(6) == 'n') && (NXT(7) == 'g')) {
7080: SKIP(8);
1.42 daniel 7081: SKIP_BLANKS;
1.152 daniel 7082: if (RAW != '=') {
1.55 daniel 7083: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7084: ctxt->sax->error(ctxt->userData,
7085: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 7086: ctxt->wellFormed = 0;
1.123 daniel 7087: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7088: return(NULL);
7089: }
1.40 daniel 7090: NEXT;
1.42 daniel 7091: SKIP_BLANKS;
1.152 daniel 7092: if (RAW == '"') {
1.40 daniel 7093: NEXT;
7094: q = CUR_PTR;
1.29 daniel 7095: encoding = xmlParseEncName(ctxt);
1.152 daniel 7096: if (RAW != '"') {
1.55 daniel 7097: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7098: ctxt->sax->error(ctxt->userData,
7099: "String not closed\n%.50s\n", q);
1.59 daniel 7100: ctxt->wellFormed = 0;
1.123 daniel 7101: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7102: } else
1.40 daniel 7103: NEXT;
1.152 daniel 7104: } else if (RAW == '\''){
1.40 daniel 7105: NEXT;
7106: q = CUR_PTR;
1.29 daniel 7107: encoding = xmlParseEncName(ctxt);
1.152 daniel 7108: if (RAW != '\'') {
1.55 daniel 7109: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7110: ctxt->sax->error(ctxt->userData,
7111: "String not closed\n%.50s\n", q);
1.59 daniel 7112: ctxt->wellFormed = 0;
1.123 daniel 7113: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7114: } else
1.40 daniel 7115: NEXT;
1.152 daniel 7116: } else if (RAW == '"'){
1.55 daniel 7117: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7118: ctxt->sax->error(ctxt->userData,
1.59 daniel 7119: "xmlParseEncodingDecl : expected ' or \"\n");
7120: ctxt->wellFormed = 0;
1.123 daniel 7121: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 7122: }
7123: }
7124: return(encoding);
7125: }
7126:
1.50 daniel 7127: /**
7128: * xmlParseSDDecl:
7129: * @ctxt: an XML parser context
7130: *
7131: * parse the XML standalone declaration
1.29 daniel 7132: *
7133: * [32] SDDecl ::= S 'standalone' Eq
7134: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 7135: *
7136: * [ VC: Standalone Document Declaration ]
7137: * TODO The standalone document declaration must have the value "no"
7138: * if any external markup declarations contain declarations of:
7139: * - attributes with default values, if elements to which these
7140: * attributes apply appear in the document without specifications
7141: * of values for these attributes, or
7142: * - entities (other than amp, lt, gt, apos, quot), if references
7143: * to those entities appear in the document, or
7144: * - attributes with values subject to normalization, where the
7145: * attribute appears in the document with a value which will change
7146: * as a result of normalization, or
7147: * - element types with element content, if white space occurs directly
7148: * within any instance of those types.
1.68 daniel 7149: *
7150: * Returns 1 if standalone, 0 otherwise
1.29 daniel 7151: */
7152:
1.55 daniel 7153: int
7154: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 7155: int standalone = -1;
7156:
1.42 daniel 7157: SKIP_BLANKS;
1.152 daniel 7158: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 7159: (NXT(2) == 'a') && (NXT(3) == 'n') &&
7160: (NXT(4) == 'd') && (NXT(5) == 'a') &&
7161: (NXT(6) == 'l') && (NXT(7) == 'o') &&
7162: (NXT(8) == 'n') && (NXT(9) == 'e')) {
7163: SKIP(10);
1.81 daniel 7164: SKIP_BLANKS;
1.152 daniel 7165: if (RAW != '=') {
1.55 daniel 7166: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7167: ctxt->sax->error(ctxt->userData,
1.59 daniel 7168: "XML standalone declaration : expected '='\n");
1.123 daniel 7169: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 7170: ctxt->wellFormed = 0;
1.32 daniel 7171: return(standalone);
7172: }
1.40 daniel 7173: NEXT;
1.42 daniel 7174: SKIP_BLANKS;
1.152 daniel 7175: if (RAW == '\''){
1.40 daniel 7176: NEXT;
1.152 daniel 7177: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7178: standalone = 0;
1.40 daniel 7179: SKIP(2);
1.152 daniel 7180: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7181: (NXT(2) == 's')) {
1.29 daniel 7182: standalone = 1;
1.40 daniel 7183: SKIP(3);
1.29 daniel 7184: } else {
1.55 daniel 7185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7186: ctxt->sax->error(ctxt->userData,
7187: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7188: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7189: ctxt->wellFormed = 0;
1.29 daniel 7190: }
1.152 daniel 7191: if (RAW != '\'') {
1.55 daniel 7192: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7193: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 7194: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 7195: ctxt->wellFormed = 0;
1.55 daniel 7196: } else
1.40 daniel 7197: NEXT;
1.152 daniel 7198: } else if (RAW == '"'){
1.40 daniel 7199: NEXT;
1.152 daniel 7200: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 7201: standalone = 0;
1.40 daniel 7202: SKIP(2);
1.152 daniel 7203: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 7204: (NXT(2) == 's')) {
1.29 daniel 7205: standalone = 1;
1.40 daniel 7206: SKIP(3);
1.29 daniel 7207: } else {
1.55 daniel 7208: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7209: ctxt->sax->error(ctxt->userData,
1.59 daniel 7210: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 7211: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 7212: ctxt->wellFormed = 0;
1.29 daniel 7213: }
1.152 daniel 7214: if (RAW != '"') {
1.55 daniel 7215: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7216: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 7217: ctxt->wellFormed = 0;
1.123 daniel 7218: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 7219: } else
1.40 daniel 7220: NEXT;
1.37 daniel 7221: } else {
1.55 daniel 7222: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7223: ctxt->sax->error(ctxt->userData,
7224: "Standalone value not found\n");
1.59 daniel 7225: ctxt->wellFormed = 0;
1.123 daniel 7226: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 7227: }
1.29 daniel 7228: }
7229: return(standalone);
7230: }
7231:
1.50 daniel 7232: /**
7233: * xmlParseXMLDecl:
7234: * @ctxt: an XML parser context
7235: *
7236: * parse an XML declaration header
1.29 daniel 7237: *
7238: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 7239: */
7240:
1.55 daniel 7241: void
7242: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 7243: xmlChar *version;
1.1 veillard 7244:
7245: /*
1.19 daniel 7246: * We know that '<?xml' is here.
1.1 veillard 7247: */
1.40 daniel 7248: SKIP(5);
1.1 veillard 7249:
1.153 daniel 7250: if (!IS_BLANK(RAW)) {
1.59 daniel 7251: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7252: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 7253: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7254: ctxt->wellFormed = 0;
7255: }
1.42 daniel 7256: SKIP_BLANKS;
1.1 veillard 7257:
7258: /*
1.29 daniel 7259: * We should have the VersionInfo here.
1.1 veillard 7260: */
1.29 daniel 7261: version = xmlParseVersionInfo(ctxt);
7262: if (version == NULL)
1.45 daniel 7263: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 7264: ctxt->version = xmlStrdup(version);
1.119 daniel 7265: xmlFree(version);
1.29 daniel 7266:
7267: /*
7268: * We may have the encoding declaration
7269: */
1.153 daniel 7270: if (!IS_BLANK(RAW)) {
1.152 daniel 7271: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7272: SKIP(2);
7273: return;
7274: }
7275: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7276: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 7277: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7278: ctxt->wellFormed = 0;
7279: }
1.72 daniel 7280: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 7281:
7282: /*
1.29 daniel 7283: * We may have the standalone status.
1.1 veillard 7284: */
1.153 daniel 7285: if ((ctxt->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 7286: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7287: SKIP(2);
7288: return;
7289: }
7290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7291: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 7292: ctxt->wellFormed = 0;
1.123 daniel 7293: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7294: }
7295: SKIP_BLANKS;
1.72 daniel 7296: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 7297:
1.42 daniel 7298: SKIP_BLANKS;
1.152 daniel 7299: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 7300: SKIP(2);
1.152 daniel 7301: } else if (RAW == '>') {
1.31 daniel 7302: /* Deprecated old WD ... */
1.55 daniel 7303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7304: ctxt->sax->error(ctxt->userData,
7305: "XML declaration must end-up with '?>'\n");
1.59 daniel 7306: ctxt->wellFormed = 0;
1.123 daniel 7307: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7308: NEXT;
1.29 daniel 7309: } else {
1.55 daniel 7310: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7311: ctxt->sax->error(ctxt->userData,
7312: "parsing XML declaration: '?>' expected\n");
1.59 daniel 7313: ctxt->wellFormed = 0;
1.123 daniel 7314: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 7315: MOVETO_ENDTAG(CUR_PTR);
7316: NEXT;
1.29 daniel 7317: }
1.1 veillard 7318: }
7319:
1.50 daniel 7320: /**
7321: * xmlParseMisc:
7322: * @ctxt: an XML parser context
7323: *
7324: * parse an XML Misc* optionnal field.
1.21 daniel 7325: *
1.22 daniel 7326: * [27] Misc ::= Comment | PI | S
1.1 veillard 7327: */
7328:
1.55 daniel 7329: void
7330: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 7331: while (((RAW == '<') && (NXT(1) == '?')) ||
7332: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7333: (NXT(2) == '-') && (NXT(3) == '-')) ||
7334: IS_BLANK(CUR)) {
1.152 daniel 7335: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 7336: xmlParsePI(ctxt);
1.40 daniel 7337: } else if (IS_BLANK(CUR)) {
7338: NEXT;
1.1 veillard 7339: } else
1.114 daniel 7340: xmlParseComment(ctxt);
1.1 veillard 7341: }
7342: }
7343:
1.50 daniel 7344: /**
7345: * xmlParseDocument :
7346: * @ctxt: an XML parser context
7347: *
7348: * parse an XML document (and build a tree if using the standard SAX
7349: * interface).
1.21 daniel 7350: *
1.22 daniel 7351: * [1] document ::= prolog element Misc*
1.29 daniel 7352: *
7353: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 7354: *
1.68 daniel 7355: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 7356: * as a result of the parsing.
1.1 veillard 7357: */
7358:
1.55 daniel 7359: int
7360: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 7361: xmlChar start[4];
7362: xmlCharEncoding enc;
7363:
1.45 daniel 7364: xmlDefaultSAXHandlerInit();
7365:
1.91 daniel 7366: GROW;
7367:
1.14 veillard 7368: /*
1.44 daniel 7369: * SAX: beginning of the document processing.
7370: */
1.72 daniel 7371: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 7372: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 7373:
1.156 daniel 7374: /*
7375: * Get the 4 first bytes and decode the charset
7376: * if enc != XML_CHAR_ENCODING_NONE
7377: * plug some encoding conversion routines.
7378: */
7379: start[0] = RAW;
7380: start[1] = NXT(1);
7381: start[2] = NXT(2);
7382: start[3] = NXT(3);
7383: enc = xmlDetectCharEncoding(start, 4);
7384: if (enc != XML_CHAR_ENCODING_NONE) {
7385: xmlSwitchEncoding(ctxt, enc);
7386: }
7387:
1.1 veillard 7388:
7389: /*
7390: * Wipe out everything which is before the first '<'
7391: */
1.153 daniel 7392: if (IS_BLANK(RAW)) {
1.59 daniel 7393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7394: ctxt->sax->error(ctxt->userData,
1.59 daniel 7395: "Extra spaces at the beginning of the document are not allowed\n");
1.123 daniel 7396: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.59 daniel 7397: ctxt->wellFormed = 0;
7398: SKIP_BLANKS;
7399: }
7400:
7401: if (CUR == 0) {
7402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7403: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 7404: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7405: ctxt->wellFormed = 0;
7406: }
1.1 veillard 7407:
7408: /*
7409: * Check for the XMLDecl in the Prolog.
7410: */
1.91 daniel 7411: GROW;
1.152 daniel 7412: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7413: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 7414: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.19 daniel 7415: xmlParseXMLDecl(ctxt);
1.42 daniel 7416: SKIP_BLANKS;
1.151 daniel 7417: #if 0
1.152 daniel 7418: } else if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7419: (NXT(2) == 'X') && (NXT(3) == 'M') &&
1.142 daniel 7420: (NXT(4) == 'L') && (IS_BLANK(NXT(5)))) {
1.19 daniel 7421: /*
7422: * The first drafts were using <?XML and the final W3C REC
7423: * now use <?xml ...
7424: */
1.16 daniel 7425: xmlParseXMLDecl(ctxt);
1.42 daniel 7426: SKIP_BLANKS;
1.151 daniel 7427: #endif
1.1 veillard 7428: } else {
1.72 daniel 7429: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 7430: }
1.72 daniel 7431: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 7432: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 7433:
7434: /*
7435: * The Misc part of the Prolog
7436: */
1.91 daniel 7437: GROW;
1.16 daniel 7438: xmlParseMisc(ctxt);
1.1 veillard 7439:
7440: /*
1.29 daniel 7441: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7442: * (doctypedecl Misc*)?
7443: */
1.91 daniel 7444: GROW;
1.152 daniel 7445: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7446: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7447: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7448: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7449: (NXT(8) == 'E')) {
1.22 daniel 7450: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7451: if (RAW == '[') {
1.140 daniel 7452: ctxt->instate = XML_PARSER_DTD;
7453: xmlParseInternalSubset(ctxt);
7454: }
1.96 daniel 7455: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7456: xmlParseMisc(ctxt);
1.21 daniel 7457: }
7458:
7459: /*
7460: * Time to start parsing the tree itself
1.1 veillard 7461: */
1.91 daniel 7462: GROW;
1.152 daniel 7463: if (RAW != '<') {
1.59 daniel 7464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7465: ctxt->sax->error(ctxt->userData,
1.151 daniel 7466: "Start tag expected, '<' not found\n");
1.140 daniel 7467: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7468: ctxt->wellFormed = 0;
1.140 daniel 7469: ctxt->instate = XML_PARSER_EOF;
7470: } else {
7471: ctxt->instate = XML_PARSER_CONTENT;
7472: xmlParseElement(ctxt);
7473: ctxt->instate = XML_PARSER_EPILOG;
7474:
7475:
7476: /*
7477: * The Misc part at the end
7478: */
7479: xmlParseMisc(ctxt);
7480:
1.152 daniel 7481: if (RAW != 0) {
1.140 daniel 7482: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7483: ctxt->sax->error(ctxt->userData,
7484: "Extra content at the end of the document\n");
7485: ctxt->wellFormed = 0;
7486: ctxt->errNo = XML_ERR_DOCUMENT_END;
7487: }
7488: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7489: }
7490:
1.44 daniel 7491: /*
7492: * SAX: end of the document processing.
7493: */
1.72 daniel 7494: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 7495: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7496:
7497: /*
7498: * Grab the encoding if it was added on-the-fly
7499: */
7500: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
7501: (ctxt->myDoc->encoding == NULL)) {
7502: ctxt->myDoc->encoding = ctxt->encoding;
7503: ctxt->encoding = NULL;
7504: }
1.59 daniel 7505: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7506: return(0);
7507: }
7508:
1.98 daniel 7509: /************************************************************************
7510: * *
1.128 daniel 7511: * Progressive parsing interfaces *
7512: * *
7513: ************************************************************************/
7514:
7515: /**
7516: * xmlParseLookupSequence:
7517: * @ctxt: an XML parser context
7518: * @first: the first char to lookup
1.140 daniel 7519: * @next: the next char to lookup or zero
7520: * @third: the next char to lookup or zero
1.128 daniel 7521: *
1.140 daniel 7522: * Try to find if a sequence (first, next, third) or just (first next) or
7523: * (first) is available in the input stream.
7524: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7525: * to avoid rescanning sequences of bytes, it DOES change the state of the
7526: * parser, do not use liberally.
1.128 daniel 7527: *
1.140 daniel 7528: * Returns the index to the current parsing point if the full sequence
7529: * is available, -1 otherwise.
1.128 daniel 7530: */
7531: int
1.140 daniel 7532: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7533: xmlChar next, xmlChar third) {
7534: int base, len;
7535: xmlParserInputPtr in;
7536: const xmlChar *buf;
7537:
7538: in = ctxt->input;
7539: if (in == NULL) return(-1);
7540: base = in->cur - in->base;
7541: if (base < 0) return(-1);
7542: if (ctxt->checkIndex > base)
7543: base = ctxt->checkIndex;
7544: if (in->buf == NULL) {
7545: buf = in->base;
7546: len = in->length;
7547: } else {
7548: buf = in->buf->buffer->content;
7549: len = in->buf->buffer->use;
7550: }
7551: /* take into account the sequence length */
7552: if (third) len -= 2;
7553: else if (next) len --;
7554: for (;base < len;base++) {
7555: if (buf[base] == first) {
7556: if (third != 0) {
7557: if ((buf[base + 1] != next) ||
7558: (buf[base + 2] != third)) continue;
7559: } else if (next != 0) {
7560: if (buf[base + 1] != next) continue;
7561: }
7562: ctxt->checkIndex = 0;
7563: #ifdef DEBUG_PUSH
7564: if (next == 0)
7565: fprintf(stderr, "PP: lookup '%c' found at %d\n",
7566: first, base);
7567: else if (third == 0)
7568: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
7569: first, next, base);
7570: else
7571: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
7572: first, next, third, base);
7573: #endif
7574: return(base - (in->cur - in->base));
7575: }
7576: }
7577: ctxt->checkIndex = base;
7578: #ifdef DEBUG_PUSH
7579: if (next == 0)
7580: fprintf(stderr, "PP: lookup '%c' failed\n", first);
7581: else if (third == 0)
7582: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
7583: else
7584: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
7585: #endif
7586: return(-1);
1.128 daniel 7587: }
7588:
7589: /**
1.143 daniel 7590: * xmlParseTryOrFinish:
1.128 daniel 7591: * @ctxt: an XML parser context
1.143 daniel 7592: * @terminate: last chunk indicator
1.128 daniel 7593: *
7594: * Try to progress on parsing
7595: *
7596: * Returns zero if no parsing was possible
7597: */
7598: int
1.143 daniel 7599: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 7600: int ret = 0;
1.140 daniel 7601: xmlParserInputPtr in;
7602: int avail;
7603: xmlChar cur, next;
7604:
7605: #ifdef DEBUG_PUSH
7606: switch (ctxt->instate) {
7607: case XML_PARSER_EOF:
7608: fprintf(stderr, "PP: try EOF\n"); break;
7609: case XML_PARSER_START:
7610: fprintf(stderr, "PP: try START\n"); break;
7611: case XML_PARSER_MISC:
7612: fprintf(stderr, "PP: try MISC\n");break;
7613: case XML_PARSER_COMMENT:
7614: fprintf(stderr, "PP: try COMMENT\n");break;
7615: case XML_PARSER_PROLOG:
7616: fprintf(stderr, "PP: try PROLOG\n");break;
7617: case XML_PARSER_START_TAG:
7618: fprintf(stderr, "PP: try START_TAG\n");break;
7619: case XML_PARSER_CONTENT:
7620: fprintf(stderr, "PP: try CONTENT\n");break;
7621: case XML_PARSER_CDATA_SECTION:
7622: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
7623: case XML_PARSER_END_TAG:
7624: fprintf(stderr, "PP: try END_TAG\n");break;
7625: case XML_PARSER_ENTITY_DECL:
7626: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
7627: case XML_PARSER_ENTITY_VALUE:
7628: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
7629: case XML_PARSER_ATTRIBUTE_VALUE:
7630: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
7631: case XML_PARSER_DTD:
7632: fprintf(stderr, "PP: try DTD\n");break;
7633: case XML_PARSER_EPILOG:
7634: fprintf(stderr, "PP: try EPILOG\n");break;
7635: case XML_PARSER_PI:
7636: fprintf(stderr, "PP: try PI\n");break;
7637: }
7638: #endif
1.128 daniel 7639:
7640: while (1) {
1.140 daniel 7641: /*
7642: * Pop-up of finished entities.
7643: */
1.152 daniel 7644: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7645: xmlPopInput(ctxt);
7646:
7647: in = ctxt->input;
7648: if (in == NULL) break;
7649: if (in->buf == NULL)
7650: avail = in->length - (in->cur - in->base);
7651: else
7652: avail = in->buf->buffer->use - (in->cur - in->base);
7653: if (avail < 1)
7654: goto done;
1.128 daniel 7655: switch (ctxt->instate) {
7656: case XML_PARSER_EOF:
1.140 daniel 7657: /*
7658: * Document parsing is done !
7659: */
7660: goto done;
7661: case XML_PARSER_START:
7662: /*
7663: * Very first chars read from the document flow.
7664: */
7665: cur = in->cur[0];
7666: if (IS_BLANK(cur)) {
7667: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7668: ctxt->sax->setDocumentLocator(ctxt->userData,
7669: &xmlDefaultSAXLocator);
7670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7671: ctxt->sax->error(ctxt->userData,
7672: "Extra spaces at the beginning of the document are not allowed\n");
7673: ctxt->errNo = XML_ERR_DOCUMENT_START;
7674: ctxt->wellFormed = 0;
7675: SKIP_BLANKS;
7676: ret++;
7677: if (in->buf == NULL)
7678: avail = in->length - (in->cur - in->base);
7679: else
7680: avail = in->buf->buffer->use - (in->cur - in->base);
7681: }
7682: if (avail < 2)
7683: goto done;
7684:
7685: cur = in->cur[0];
7686: next = in->cur[1];
7687: if (cur == 0) {
7688: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7689: ctxt->sax->setDocumentLocator(ctxt->userData,
7690: &xmlDefaultSAXLocator);
7691: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7692: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7693: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7694: ctxt->wellFormed = 0;
7695: ctxt->instate = XML_PARSER_EOF;
7696: #ifdef DEBUG_PUSH
7697: fprintf(stderr, "PP: entering EOF\n");
7698: #endif
7699: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7700: ctxt->sax->endDocument(ctxt->userData);
7701: goto done;
7702: }
7703: if ((cur == '<') && (next == '?')) {
7704: /* PI or XML decl */
7705: if (avail < 5) return(ret);
1.143 daniel 7706: if ((!terminate) &&
7707: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7708: return(ret);
7709: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7710: ctxt->sax->setDocumentLocator(ctxt->userData,
7711: &xmlDefaultSAXLocator);
7712: if ((in->cur[2] == 'x') &&
7713: (in->cur[3] == 'm') &&
1.142 daniel 7714: (in->cur[4] == 'l') &&
7715: (IS_BLANK(in->cur[5]))) {
1.140 daniel 7716: ret += 5;
7717: #ifdef DEBUG_PUSH
7718: fprintf(stderr, "PP: Parsing XML Decl\n");
7719: #endif
7720: xmlParseXMLDecl(ctxt);
7721: if ((ctxt->sax) && (ctxt->sax->startDocument))
7722: ctxt->sax->startDocument(ctxt->userData);
7723: ctxt->instate = XML_PARSER_MISC;
7724: #ifdef DEBUG_PUSH
7725: fprintf(stderr, "PP: entering MISC\n");
7726: #endif
7727: } else {
7728: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7729: if ((ctxt->sax) && (ctxt->sax->startDocument))
7730: ctxt->sax->startDocument(ctxt->userData);
7731: ctxt->instate = XML_PARSER_MISC;
7732: #ifdef DEBUG_PUSH
7733: fprintf(stderr, "PP: entering MISC\n");
7734: #endif
7735: }
7736: } else {
7737: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7738: ctxt->sax->setDocumentLocator(ctxt->userData,
7739: &xmlDefaultSAXLocator);
7740: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7741: if ((ctxt->sax) && (ctxt->sax->startDocument))
7742: ctxt->sax->startDocument(ctxt->userData);
7743: ctxt->instate = XML_PARSER_MISC;
7744: #ifdef DEBUG_PUSH
7745: fprintf(stderr, "PP: entering MISC\n");
7746: #endif
7747: }
7748: break;
7749: case XML_PARSER_MISC:
7750: SKIP_BLANKS;
7751: if (in->buf == NULL)
7752: avail = in->length - (in->cur - in->base);
7753: else
7754: avail = in->buf->buffer->use - (in->cur - in->base);
7755: if (avail < 2)
7756: goto done;
7757: cur = in->cur[0];
7758: next = in->cur[1];
7759: if ((cur == '<') && (next == '?')) {
1.143 daniel 7760: if ((!terminate) &&
7761: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7762: goto done;
7763: #ifdef DEBUG_PUSH
7764: fprintf(stderr, "PP: Parsing PI\n");
7765: #endif
7766: xmlParsePI(ctxt);
7767: } else if ((cur == '<') && (next == '!') &&
7768: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 7769: if ((!terminate) &&
7770: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7771: goto done;
7772: #ifdef DEBUG_PUSH
7773: fprintf(stderr, "PP: Parsing Comment\n");
7774: #endif
7775: xmlParseComment(ctxt);
7776: ctxt->instate = XML_PARSER_MISC;
7777: } else if ((cur == '<') && (next == '!') &&
7778: (in->cur[2] == 'D') && (in->cur[3] == 'O') &&
7779: (in->cur[4] == 'C') && (in->cur[5] == 'T') &&
7780: (in->cur[6] == 'Y') && (in->cur[7] == 'P') &&
7781: (in->cur[8] == 'E')) {
1.143 daniel 7782: if ((!terminate) &&
7783: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7784: goto done;
7785: #ifdef DEBUG_PUSH
7786: fprintf(stderr, "PP: Parsing internal subset\n");
7787: #endif
7788: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7789: if (RAW == '[') {
1.140 daniel 7790: ctxt->instate = XML_PARSER_DTD;
7791: #ifdef DEBUG_PUSH
7792: fprintf(stderr, "PP: entering DTD\n");
7793: #endif
7794: } else {
7795: ctxt->instate = XML_PARSER_PROLOG;
7796: #ifdef DEBUG_PUSH
7797: fprintf(stderr, "PP: entering PROLOG\n");
7798: #endif
7799: }
7800: } else if ((cur == '<') && (next == '!') &&
7801: (avail < 9)) {
7802: goto done;
7803: } else {
7804: ctxt->instate = XML_PARSER_START_TAG;
7805: #ifdef DEBUG_PUSH
7806: fprintf(stderr, "PP: entering START_TAG\n");
7807: #endif
7808: }
7809: break;
1.128 daniel 7810: case XML_PARSER_PROLOG:
1.140 daniel 7811: SKIP_BLANKS;
7812: if (in->buf == NULL)
7813: avail = in->length - (in->cur - in->base);
7814: else
7815: avail = in->buf->buffer->use - (in->cur - in->base);
7816: if (avail < 2)
7817: goto done;
7818: cur = in->cur[0];
7819: next = in->cur[1];
7820: if ((cur == '<') && (next == '?')) {
1.143 daniel 7821: if ((!terminate) &&
7822: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7823: goto done;
7824: #ifdef DEBUG_PUSH
7825: fprintf(stderr, "PP: Parsing PI\n");
7826: #endif
7827: xmlParsePI(ctxt);
7828: } else if ((cur == '<') && (next == '!') &&
7829: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 7830: if ((!terminate) &&
7831: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7832: goto done;
7833: #ifdef DEBUG_PUSH
7834: fprintf(stderr, "PP: Parsing Comment\n");
7835: #endif
7836: xmlParseComment(ctxt);
7837: ctxt->instate = XML_PARSER_PROLOG;
7838: } else if ((cur == '<') && (next == '!') &&
7839: (avail < 4)) {
7840: goto done;
7841: } else {
7842: ctxt->instate = XML_PARSER_START_TAG;
7843: #ifdef DEBUG_PUSH
7844: fprintf(stderr, "PP: entering START_TAG\n");
7845: #endif
7846: }
7847: break;
7848: case XML_PARSER_EPILOG:
7849: SKIP_BLANKS;
7850: if (in->buf == NULL)
7851: avail = in->length - (in->cur - in->base);
7852: else
7853: avail = in->buf->buffer->use - (in->cur - in->base);
7854: if (avail < 2)
7855: goto done;
7856: cur = in->cur[0];
7857: next = in->cur[1];
7858: if ((cur == '<') && (next == '?')) {
1.143 daniel 7859: if ((!terminate) &&
7860: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7861: goto done;
7862: #ifdef DEBUG_PUSH
7863: fprintf(stderr, "PP: Parsing PI\n");
7864: #endif
7865: xmlParsePI(ctxt);
7866: ctxt->instate = XML_PARSER_EPILOG;
7867: } else if ((cur == '<') && (next == '!') &&
7868: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 7869: if ((!terminate) &&
7870: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7871: goto done;
7872: #ifdef DEBUG_PUSH
7873: fprintf(stderr, "PP: Parsing Comment\n");
7874: #endif
7875: xmlParseComment(ctxt);
7876: ctxt->instate = XML_PARSER_EPILOG;
7877: } else if ((cur == '<') && (next == '!') &&
7878: (avail < 4)) {
7879: goto done;
7880: } else {
7881: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7882: ctxt->sax->error(ctxt->userData,
7883: "Extra content at the end of the document\n");
7884: ctxt->wellFormed = 0;
7885: ctxt->errNo = XML_ERR_DOCUMENT_END;
7886: ctxt->instate = XML_PARSER_EOF;
7887: #ifdef DEBUG_PUSH
7888: fprintf(stderr, "PP: entering EOF\n");
7889: #endif
7890: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7891: ctxt->sax->endDocument(ctxt->userData);
7892: goto done;
7893: }
7894: break;
7895: case XML_PARSER_START_TAG: {
7896: xmlChar *name, *oldname;
7897:
7898: if (avail < 2)
7899: goto done;
7900: cur = in->cur[0];
7901: if (cur != '<') {
7902: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7903: ctxt->sax->error(ctxt->userData,
7904: "Start tag expect, '<' not found\n");
7905: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7906: ctxt->wellFormed = 0;
7907: ctxt->instate = XML_PARSER_EOF;
7908: #ifdef DEBUG_PUSH
7909: fprintf(stderr, "PP: entering EOF\n");
7910: #endif
7911: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7912: ctxt->sax->endDocument(ctxt->userData);
7913: goto done;
7914: }
1.143 daniel 7915: if ((!terminate) &&
7916: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7917: goto done;
7918: name = xmlParseStartTag(ctxt);
7919: if (name == NULL) {
7920: ctxt->instate = XML_PARSER_EOF;
7921: #ifdef DEBUG_PUSH
7922: fprintf(stderr, "PP: entering EOF\n");
7923: #endif
7924: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7925: ctxt->sax->endDocument(ctxt->userData);
7926: goto done;
7927: }
7928: namePush(ctxt, xmlStrdup(name));
7929:
7930: /*
7931: * [ VC: Root Element Type ]
7932: * The Name in the document type declaration must match
7933: * the element type of the root element.
7934: */
7935: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7936: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 7937: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7938:
7939: /*
7940: * Check for an Empty Element.
7941: */
1.152 daniel 7942: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 7943: SKIP(2);
7944: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
7945: ctxt->sax->endElement(ctxt->userData, name);
7946: xmlFree(name);
7947: oldname = namePop(ctxt);
7948: if (oldname != NULL) {
7949: #ifdef DEBUG_STACK
7950: fprintf(stderr,"Close: popped %s\n", oldname);
7951: #endif
7952: xmlFree(oldname);
7953: }
7954: if (ctxt->name == NULL) {
7955: ctxt->instate = XML_PARSER_EPILOG;
7956: #ifdef DEBUG_PUSH
7957: fprintf(stderr, "PP: entering EPILOG\n");
7958: #endif
7959: } else {
7960: ctxt->instate = XML_PARSER_CONTENT;
7961: #ifdef DEBUG_PUSH
7962: fprintf(stderr, "PP: entering CONTENT\n");
7963: #endif
7964: }
7965: break;
7966: }
1.152 daniel 7967: if (RAW == '>') {
1.140 daniel 7968: NEXT;
7969: } else {
7970: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7971: ctxt->sax->error(ctxt->userData,
7972: "Couldn't find end of Start Tag %s\n",
7973: name);
7974: ctxt->wellFormed = 0;
7975: ctxt->errNo = XML_ERR_GT_REQUIRED;
7976:
7977: /*
7978: * end of parsing of this node.
7979: */
7980: nodePop(ctxt);
7981: oldname = namePop(ctxt);
7982: if (oldname != NULL) {
7983: #ifdef DEBUG_STACK
7984: fprintf(stderr,"Close: popped %s\n", oldname);
7985: #endif
7986: xmlFree(oldname);
7987: }
7988: }
7989: xmlFree(name);
7990: ctxt->instate = XML_PARSER_CONTENT;
7991: #ifdef DEBUG_PUSH
7992: fprintf(stderr, "PP: entering CONTENT\n");
7993: #endif
7994: break;
7995: }
1.128 daniel 7996: case XML_PARSER_CONTENT:
1.140 daniel 7997: /*
7998: * Handle preparsed entities and charRef
7999: */
8000: if (ctxt->token != 0) {
8001: xmlChar cur[2] = { 0 , 0 } ;
8002:
8003: cur[0] = (xmlChar) ctxt->token;
8004: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
8005: ctxt->sax->characters(ctxt->userData, cur, 1);
8006: ctxt->token = 0;
8007: }
8008: if (avail < 2)
8009: goto done;
8010: cur = in->cur[0];
8011: next = in->cur[1];
8012: if ((cur == '<') && (next == '?')) {
1.143 daniel 8013: if ((!terminate) &&
8014: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8015: goto done;
8016: #ifdef DEBUG_PUSH
8017: fprintf(stderr, "PP: Parsing PI\n");
8018: #endif
8019: xmlParsePI(ctxt);
8020: } else if ((cur == '<') && (next == '!') &&
8021: (in->cur[2] == '-') && (in->cur[3] == '-')) {
1.143 daniel 8022: if ((!terminate) &&
8023: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8024: goto done;
8025: #ifdef DEBUG_PUSH
8026: fprintf(stderr, "PP: Parsing Comment\n");
8027: #endif
8028: xmlParseComment(ctxt);
8029: ctxt->instate = XML_PARSER_CONTENT;
8030: } else if ((cur == '<') && (in->cur[1] == '!') &&
8031: (in->cur[2] == '[') && (NXT(3) == 'C') &&
8032: (in->cur[4] == 'D') && (NXT(5) == 'A') &&
8033: (in->cur[6] == 'T') && (NXT(7) == 'A') &&
8034: (in->cur[8] == '[')) {
8035: SKIP(9);
8036: ctxt->instate = XML_PARSER_CDATA_SECTION;
8037: #ifdef DEBUG_PUSH
8038: fprintf(stderr, "PP: entering CDATA_SECTION\n");
8039: #endif
8040: break;
8041: } else if ((cur == '<') && (next == '!') &&
8042: (avail < 9)) {
8043: goto done;
8044: } else if ((cur == '<') && (next == '/')) {
8045: ctxt->instate = XML_PARSER_END_TAG;
8046: #ifdef DEBUG_PUSH
8047: fprintf(stderr, "PP: entering END_TAG\n");
8048: #endif
8049: break;
8050: } else if (cur == '<') {
8051: ctxt->instate = XML_PARSER_START_TAG;
8052: #ifdef DEBUG_PUSH
8053: fprintf(stderr, "PP: entering START_TAG\n");
8054: #endif
8055: break;
8056: } else if (cur == '&') {
1.143 daniel 8057: if ((!terminate) &&
8058: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 8059: goto done;
8060: #ifdef DEBUG_PUSH
8061: fprintf(stderr, "PP: Parsing Reference\n");
8062: #endif
8063: /* TODO: check generation of subtrees if noent !!! */
8064: xmlParseReference(ctxt);
8065: } else {
1.156 daniel 8066: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 8067: /*
8068: * Goal of the following test is :
8069: * - minimize calls to the SAX 'character' callback
8070: * when they are mergeable
8071: * - handle an problem for isBlank when we only parse
8072: * a sequence of blank chars and the next one is
8073: * not available to check against '<' presence.
8074: * - tries to homogenize the differences in SAX
8075: * callbacks beween the push and pull versions
8076: * of the parser.
8077: */
8078: if ((ctxt->inputNr == 1) &&
8079: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 8080: if ((!terminate) &&
8081: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 8082: goto done;
8083: }
8084: ctxt->checkIndex = 0;
8085: #ifdef DEBUG_PUSH
8086: fprintf(stderr, "PP: Parsing char data\n");
8087: #endif
8088: xmlParseCharData(ctxt, 0);
8089: }
8090: /*
8091: * Pop-up of finished entities.
8092: */
1.152 daniel 8093: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8094: xmlPopInput(ctxt);
8095: break;
8096: case XML_PARSER_CDATA_SECTION: {
8097: /*
8098: * The Push mode need to have the SAX callback for
8099: * cdataBlock merge back contiguous callbacks.
8100: */
8101: int base;
8102:
8103: in = ctxt->input;
8104: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8105: if (base < 0) {
8106: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8107: if (ctxt->sax != NULL) {
8108: if (ctxt->sax->cdataBlock != NULL)
8109: ctxt->sax->cdataBlock(ctxt->userData, in->cur,
8110: XML_PARSER_BIG_BUFFER_SIZE);
8111: }
8112: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8113: ctxt->checkIndex = 0;
8114: }
8115: goto done;
8116: } else {
8117: if ((ctxt->sax != NULL) && (base > 0)) {
8118: if (ctxt->sax->cdataBlock != NULL)
8119: ctxt->sax->cdataBlock(ctxt->userData,
8120: in->cur, base);
8121: }
8122: SKIP(base + 3);
8123: ctxt->checkIndex = 0;
8124: ctxt->instate = XML_PARSER_CONTENT;
8125: #ifdef DEBUG_PUSH
8126: fprintf(stderr, "PP: entering CONTENT\n");
8127: #endif
8128: }
8129: break;
8130: }
1.141 daniel 8131: case XML_PARSER_END_TAG:
1.140 daniel 8132: if (avail < 2)
8133: goto done;
1.143 daniel 8134: if ((!terminate) &&
8135: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8136: goto done;
8137: xmlParseEndTag(ctxt);
8138: if (ctxt->name == NULL) {
8139: ctxt->instate = XML_PARSER_EPILOG;
8140: #ifdef DEBUG_PUSH
8141: fprintf(stderr, "PP: entering EPILOG\n");
8142: #endif
8143: } else {
8144: ctxt->instate = XML_PARSER_CONTENT;
8145: #ifdef DEBUG_PUSH
8146: fprintf(stderr, "PP: entering CONTENT\n");
8147: #endif
8148: }
8149: break;
8150: case XML_PARSER_DTD: {
8151: /*
8152: * Sorry but progressive parsing of the internal subset
8153: * is not expected to be supported. We first check that
8154: * the full content of the internal subset is available and
8155: * the parsing is launched only at that point.
8156: * Internal subset ends up with "']' S? '>'" in an unescaped
8157: * section and not in a ']]>' sequence which are conditional
8158: * sections (whoever argued to keep that crap in XML deserve
8159: * a place in hell !).
8160: */
8161: int base, i;
8162: xmlChar *buf;
8163: xmlChar quote = 0;
8164:
8165: base = in->cur - in->base;
8166: if (base < 0) return(0);
8167: if (ctxt->checkIndex > base)
8168: base = ctxt->checkIndex;
8169: buf = in->buf->buffer->content;
8170: for (;base < in->buf->buffer->use;base++) {
8171: if (quote != 0) {
8172: if (buf[base] == quote)
8173: quote = 0;
8174: continue;
8175: }
8176: if (buf[base] == '"') {
8177: quote = '"';
8178: continue;
8179: }
8180: if (buf[base] == '\'') {
8181: quote = '\'';
8182: continue;
8183: }
8184: if (buf[base] == ']') {
8185: if (base +1 >= in->buf->buffer->use)
8186: break;
8187: if (buf[base + 1] == ']') {
8188: /* conditional crap, skip both ']' ! */
8189: base++;
8190: continue;
8191: }
8192: for (i = 0;base + i < in->buf->buffer->use;i++) {
8193: if (buf[base + i] == '>')
8194: goto found_end_int_subset;
8195: }
8196: break;
8197: }
8198: }
8199: /*
8200: * We didn't found the end of the Internal subset
8201: */
8202: if (quote == 0)
8203: ctxt->checkIndex = base;
8204: #ifdef DEBUG_PUSH
8205: if (next == 0)
8206: fprintf(stderr, "PP: lookup of int subset end filed\n");
8207: #endif
8208: goto done;
8209:
8210: found_end_int_subset:
8211: xmlParseInternalSubset(ctxt);
8212: ctxt->instate = XML_PARSER_PROLOG;
8213: ctxt->checkIndex = 0;
8214: #ifdef DEBUG_PUSH
8215: fprintf(stderr, "PP: entering PROLOG\n");
8216: #endif
8217: break;
8218: }
8219: case XML_PARSER_COMMENT:
8220: fprintf(stderr, "PP: internal error, state == COMMENT\n");
8221: ctxt->instate = XML_PARSER_CONTENT;
8222: #ifdef DEBUG_PUSH
8223: fprintf(stderr, "PP: entering CONTENT\n");
8224: #endif
8225: break;
8226: case XML_PARSER_PI:
8227: fprintf(stderr, "PP: internal error, state == PI\n");
8228: ctxt->instate = XML_PARSER_CONTENT;
8229: #ifdef DEBUG_PUSH
8230: fprintf(stderr, "PP: entering CONTENT\n");
8231: #endif
8232: break;
1.128 daniel 8233: case XML_PARSER_ENTITY_DECL:
1.140 daniel 8234: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
8235: ctxt->instate = XML_PARSER_DTD;
8236: #ifdef DEBUG_PUSH
8237: fprintf(stderr, "PP: entering DTD\n");
8238: #endif
8239: break;
1.128 daniel 8240: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 8241: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
8242: ctxt->instate = XML_PARSER_CONTENT;
8243: #ifdef DEBUG_PUSH
8244: fprintf(stderr, "PP: entering DTD\n");
8245: #endif
8246: break;
1.128 daniel 8247: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 8248: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
8249: ctxt->instate = XML_PARSER_START_TAG;
8250: #ifdef DEBUG_PUSH
8251: fprintf(stderr, "PP: entering START_TAG\n");
8252: #endif
8253: break;
1.128 daniel 8254: }
8255: }
1.140 daniel 8256: done:
8257: #ifdef DEBUG_PUSH
8258: fprintf(stderr, "PP: done %d\n", ret);
8259: #endif
1.128 daniel 8260: return(ret);
8261: }
8262:
8263: /**
1.143 daniel 8264: * xmlParseTry:
8265: * @ctxt: an XML parser context
8266: *
8267: * Try to progress on parsing
8268: *
8269: * Returns zero if no parsing was possible
8270: */
8271: int
8272: xmlParseTry(xmlParserCtxtPtr ctxt) {
8273: return(xmlParseTryOrFinish(ctxt, 0));
8274: }
8275:
8276: /**
1.128 daniel 8277: * xmlParseChunk:
8278: * @ctxt: an XML parser context
8279: * @chunk: an char array
8280: * @size: the size in byte of the chunk
8281: * @terminate: last chunk indicator
8282: *
8283: * Parse a Chunk of memory
8284: *
8285: * Returns zero if no error, the xmlParserErrors otherwise.
8286: */
1.140 daniel 8287: int
1.128 daniel 8288: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8289: int terminate) {
1.132 daniel 8290: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8291: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8292: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8293: int cur = ctxt->input->cur - ctxt->input->base;
8294:
1.132 daniel 8295: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8296: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8297: ctxt->input->cur = ctxt->input->base + cur;
8298: #ifdef DEBUG_PUSH
8299: fprintf(stderr, "PP: pushed %d\n", size);
8300: #endif
8301:
1.150 daniel 8302: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8303: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8304: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 8305: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8306: if (terminate) {
1.151 daniel 8307: /*
8308: * Grab the encoding if it was added on-the-fly
8309: */
8310: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8311: (ctxt->myDoc->encoding == NULL)) {
8312: ctxt->myDoc->encoding = ctxt->encoding;
8313: ctxt->encoding = NULL;
8314: }
8315:
8316: /*
8317: * Check for termination
8318: */
1.140 daniel 8319: if ((ctxt->instate != XML_PARSER_EOF) &&
8320: (ctxt->instate != XML_PARSER_EPILOG)) {
8321: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8322: ctxt->sax->error(ctxt->userData,
8323: "Extra content at the end of the document\n");
8324: ctxt->wellFormed = 0;
8325: ctxt->errNo = XML_ERR_DOCUMENT_END;
8326: }
8327: if (ctxt->instate != XML_PARSER_EOF) {
8328: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8329: ctxt->sax->endDocument(ctxt->userData);
8330: }
8331: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8332: }
8333: return((xmlParserErrors) ctxt->errNo);
8334: }
8335:
8336: /************************************************************************
8337: * *
1.98 daniel 8338: * I/O front end functions to the parser *
8339: * *
8340: ************************************************************************/
8341:
1.50 daniel 8342: /**
1.140 daniel 8343: * xmlCreatePushParserCtxt :
8344: * @sax: a SAX handler
8345: * @user_data: The user data returned on SAX callbacks
8346: * @chunk: a pointer to an array of chars
8347: * @size: number of chars in the array
8348: * @filename: an optional file name or URI
8349: *
8350: * Create a parser context for using the XML parser in push mode
8351: * To allow content encoding detection, @size should be >= 4
8352: * The value of @filename is used for fetching external entities
8353: * and error/warning reports.
8354: *
8355: * Returns the new parser context or NULL
8356: */
8357: xmlParserCtxtPtr
8358: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8359: const char *chunk, int size, const char *filename) {
8360: xmlParserCtxtPtr ctxt;
8361: xmlParserInputPtr inputStream;
8362: xmlParserInputBufferPtr buf;
8363: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8364:
8365: /*
1.156 daniel 8366: * plug some encoding conversion routines
1.140 daniel 8367: */
8368: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8369: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8370:
8371: buf = xmlAllocParserInputBuffer(enc);
8372: if (buf == NULL) return(NULL);
8373:
8374: ctxt = xmlNewParserCtxt();
8375: if (ctxt == NULL) {
8376: xmlFree(buf);
8377: return(NULL);
8378: }
8379: if (sax != NULL) {
8380: if (ctxt->sax != &xmlDefaultSAXHandler)
8381: xmlFree(ctxt->sax);
8382: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8383: if (ctxt->sax == NULL) {
8384: xmlFree(buf);
8385: xmlFree(ctxt);
8386: return(NULL);
8387: }
8388: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8389: if (user_data != NULL)
8390: ctxt->userData = user_data;
8391: }
8392: if (filename == NULL) {
8393: ctxt->directory = NULL;
8394: } else {
8395: ctxt->directory = xmlParserGetDirectory(filename);
8396: }
8397:
8398: inputStream = xmlNewInputStream(ctxt);
8399: if (inputStream == NULL) {
8400: xmlFreeParserCtxt(ctxt);
8401: return(NULL);
8402: }
8403:
8404: if (filename == NULL)
8405: inputStream->filename = NULL;
8406: else
8407: inputStream->filename = xmlMemStrdup(filename);
8408: inputStream->buf = buf;
8409: inputStream->base = inputStream->buf->buffer->content;
8410: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8411: if (enc != XML_CHAR_ENCODING_NONE) {
8412: xmlSwitchEncoding(ctxt, enc);
8413: }
1.140 daniel 8414:
8415: inputPush(ctxt, inputStream);
8416:
8417: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8418: (ctxt->input->buf != NULL)) {
8419: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8420: #ifdef DEBUG_PUSH
8421: fprintf(stderr, "PP: pushed %d\n", size);
8422: #endif
8423: }
8424:
8425: return(ctxt);
8426: }
8427:
8428: /**
1.86 daniel 8429: * xmlCreateDocParserCtxt :
1.123 daniel 8430: * @cur: a pointer to an array of xmlChar
1.50 daniel 8431: *
1.69 daniel 8432: * Create a parser context for an XML in-memory document.
8433: *
8434: * Returns the new parser context or NULL
1.16 daniel 8435: */
1.69 daniel 8436: xmlParserCtxtPtr
1.123 daniel 8437: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 8438: xmlParserCtxtPtr ctxt;
1.40 daniel 8439: xmlParserInputPtr input;
1.16 daniel 8440:
1.97 daniel 8441: ctxt = xmlNewParserCtxt();
1.16 daniel 8442: if (ctxt == NULL) {
8443: return(NULL);
8444: }
1.96 daniel 8445: input = xmlNewInputStream(ctxt);
1.40 daniel 8446: if (input == NULL) {
1.97 daniel 8447: xmlFreeParserCtxt(ctxt);
1.40 daniel 8448: return(NULL);
8449: }
8450:
8451: input->base = cur;
8452: input->cur = cur;
8453:
8454: inputPush(ctxt, input);
1.69 daniel 8455: return(ctxt);
8456: }
8457:
8458: /**
8459: * xmlSAXParseDoc :
8460: * @sax: the SAX handler block
1.123 daniel 8461: * @cur: a pointer to an array of xmlChar
1.69 daniel 8462: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
8463: * documents
8464: *
8465: * parse an XML in-memory document and build a tree.
8466: * It use the given SAX function block to handle the parsing callback.
8467: * If sax is NULL, fallback to the default DOM tree building routines.
8468: *
8469: * Returns the resulting document tree
8470: */
8471:
8472: xmlDocPtr
1.123 daniel 8473: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 8474: xmlDocPtr ret;
8475: xmlParserCtxtPtr ctxt;
8476:
8477: if (cur == NULL) return(NULL);
1.16 daniel 8478:
8479:
1.69 daniel 8480: ctxt = xmlCreateDocParserCtxt(cur);
8481: if (ctxt == NULL) return(NULL);
1.74 daniel 8482: if (sax != NULL) {
8483: ctxt->sax = sax;
8484: ctxt->userData = NULL;
8485: }
1.69 daniel 8486:
1.16 daniel 8487: xmlParseDocument(ctxt);
1.72 daniel 8488: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 8489: else {
8490: ret = NULL;
1.72 daniel 8491: xmlFreeDoc(ctxt->myDoc);
8492: ctxt->myDoc = NULL;
1.59 daniel 8493: }
1.86 daniel 8494: if (sax != NULL)
8495: ctxt->sax = NULL;
1.69 daniel 8496: xmlFreeParserCtxt(ctxt);
1.16 daniel 8497:
1.1 veillard 8498: return(ret);
8499: }
8500:
1.50 daniel 8501: /**
1.55 daniel 8502: * xmlParseDoc :
1.123 daniel 8503: * @cur: a pointer to an array of xmlChar
1.55 daniel 8504: *
8505: * parse an XML in-memory document and build a tree.
8506: *
1.68 daniel 8507: * Returns the resulting document tree
1.55 daniel 8508: */
8509:
1.69 daniel 8510: xmlDocPtr
1.123 daniel 8511: xmlParseDoc(xmlChar *cur) {
1.59 daniel 8512: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 8513: }
8514:
8515: /**
8516: * xmlSAXParseDTD :
8517: * @sax: the SAX handler block
8518: * @ExternalID: a NAME* containing the External ID of the DTD
8519: * @SystemID: a NAME* containing the URL to the DTD
8520: *
8521: * Load and parse an external subset.
8522: *
8523: * Returns the resulting xmlDtdPtr or NULL in case of error.
8524: */
8525:
8526: xmlDtdPtr
1.123 daniel 8527: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8528: const xmlChar *SystemID) {
1.76 daniel 8529: xmlDtdPtr ret = NULL;
8530: xmlParserCtxtPtr ctxt;
1.83 daniel 8531: xmlParserInputPtr input = NULL;
1.76 daniel 8532: xmlCharEncoding enc;
8533:
8534: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8535:
1.97 daniel 8536: ctxt = xmlNewParserCtxt();
1.76 daniel 8537: if (ctxt == NULL) {
8538: return(NULL);
8539: }
8540:
8541: /*
8542: * Set-up the SAX context
8543: */
8544: if (ctxt == NULL) return(NULL);
8545: if (sax != NULL) {
1.93 veillard 8546: if (ctxt->sax != NULL)
1.119 daniel 8547: xmlFree(ctxt->sax);
1.76 daniel 8548: ctxt->sax = sax;
8549: ctxt->userData = NULL;
8550: }
8551:
8552: /*
8553: * Ask the Entity resolver to load the damn thing
8554: */
8555:
8556: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8557: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8558: if (input == NULL) {
1.86 daniel 8559: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8560: xmlFreeParserCtxt(ctxt);
8561: return(NULL);
8562: }
8563:
8564: /*
1.156 daniel 8565: * plug some encoding conversion routines here.
1.76 daniel 8566: */
8567: xmlPushInput(ctxt, input);
1.156 daniel 8568: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 8569: xmlSwitchEncoding(ctxt, enc);
8570:
1.95 veillard 8571: if (input->filename == NULL)
1.156 daniel 8572: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 8573: input->line = 1;
8574: input->col = 1;
8575: input->base = ctxt->input->cur;
8576: input->cur = ctxt->input->cur;
8577: input->free = NULL;
8578:
8579: /*
8580: * let's parse that entity knowing it's an external subset.
8581: */
1.79 daniel 8582: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 8583:
8584: if (ctxt->myDoc != NULL) {
8585: if (ctxt->wellFormed) {
8586: ret = ctxt->myDoc->intSubset;
8587: ctxt->myDoc->intSubset = NULL;
8588: } else {
8589: ret = NULL;
8590: }
8591: xmlFreeDoc(ctxt->myDoc);
8592: ctxt->myDoc = NULL;
8593: }
1.86 daniel 8594: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8595: xmlFreeParserCtxt(ctxt);
8596:
8597: return(ret);
8598: }
8599:
8600: /**
8601: * xmlParseDTD :
8602: * @ExternalID: a NAME* containing the External ID of the DTD
8603: * @SystemID: a NAME* containing the URL to the DTD
8604: *
8605: * Load and parse an external subset.
8606: *
8607: * Returns the resulting xmlDtdPtr or NULL in case of error.
8608: */
8609:
8610: xmlDtdPtr
1.123 daniel 8611: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 8612: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 8613: }
8614:
8615: /**
1.144 daniel 8616: * xmlSAXParseBalancedChunk :
8617: * @ctx: an XML parser context (possibly NULL)
8618: * @sax: the SAX handler bloc (possibly NULL)
8619: * @user_data: The user data returned on SAX callbacks (possibly NULL)
8620: * @input: a parser input stream
8621: * @enc: the encoding
8622: *
8623: * Parse a well-balanced chunk of an XML document
8624: * The user has to provide SAX callback block whose routines will be
8625: * called by the parser
8626: * The allowed sequence for the Well Balanced Chunk is the one defined by
8627: * the content production in the XML grammar:
8628: *
8629: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8630: *
8631: * Returns 0 id the chunk is well balanced, -1 in case of args problem and
8632: * the error code otherwise
8633: */
8634:
8635: int
8636: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8637: void *user_data, xmlParserInputPtr input,
8638: xmlCharEncoding enc) {
8639: xmlParserCtxtPtr ctxt;
8640: int ret;
8641:
8642: if (input == NULL) return(-1);
8643:
8644: if (ctx != NULL)
8645: ctxt = ctx;
8646: else {
8647: ctxt = xmlNewParserCtxt();
8648: if (ctxt == NULL)
8649: return(-1);
8650: if (sax == NULL)
8651: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8652: }
8653:
8654: /*
8655: * Set-up the SAX context
8656: */
8657: if (sax != NULL) {
8658: if (ctxt->sax != NULL)
8659: xmlFree(ctxt->sax);
8660: ctxt->sax = sax;
8661: ctxt->userData = user_data;
8662: }
8663:
8664: /*
8665: * plug some encoding conversion routines here.
8666: */
8667: xmlPushInput(ctxt, input);
8668: if (enc != XML_CHAR_ENCODING_NONE)
8669: xmlSwitchEncoding(ctxt, enc);
8670:
8671: /*
8672: * let's parse that entity knowing it's an external subset.
8673: */
8674: xmlParseContent(ctxt);
8675: ret = ctxt->errNo;
8676:
8677: if (ctx == NULL) {
8678: if (sax != NULL)
8679: ctxt->sax = NULL;
8680: else
8681: xmlFreeDoc(ctxt->myDoc);
8682: xmlFreeParserCtxt(ctxt);
8683: }
8684: return(ret);
8685: }
8686:
8687: /**
8688: * xmlParseBalancedChunk :
8689: * @doc: the document the chunk pertains to
8690: * @node: the node defining the context in which informations will be added
8691: *
8692: * Parse a well-balanced chunk of an XML document present in memory
8693: *
8694: * Returns the resulting list of nodes resulting from the parsing,
8695: * they are not added to @node
8696: */
8697:
8698: xmlNodePtr
8699: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 8700: /* TODO !!! */
8701: return(NULL);
1.144 daniel 8702: }
8703:
8704: /**
8705: * xmlParseBalancedChunkFile :
8706: * @doc: the document the chunk pertains to
8707: *
8708: * Parse a well-balanced chunk of an XML document contained in a file
8709: *
8710: * Returns the resulting list of nodes resulting from the parsing,
8711: * they are not added to @node
8712: */
8713:
8714: xmlNodePtr
8715: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 8716: /* TODO !!! */
8717: return(NULL);
1.144 daniel 8718: }
8719:
8720: /**
1.59 daniel 8721: * xmlRecoverDoc :
1.123 daniel 8722: * @cur: a pointer to an array of xmlChar
1.59 daniel 8723: *
8724: * parse an XML in-memory document and build a tree.
8725: * In the case the document is not Well Formed, a tree is built anyway
8726: *
1.68 daniel 8727: * Returns the resulting document tree
1.59 daniel 8728: */
8729:
1.69 daniel 8730: xmlDocPtr
1.123 daniel 8731: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 8732: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 8733: }
8734:
8735: /**
1.69 daniel 8736: * xmlCreateFileParserCtxt :
1.50 daniel 8737: * @filename: the filename
8738: *
1.69 daniel 8739: * Create a parser context for a file content.
8740: * Automatic support for ZLIB/Compress compressed document is provided
8741: * by default if found at compile-time.
1.50 daniel 8742: *
1.69 daniel 8743: * Returns the new parser context or NULL
1.9 httpng 8744: */
1.69 daniel 8745: xmlParserCtxtPtr
8746: xmlCreateFileParserCtxt(const char *filename)
8747: {
8748: xmlParserCtxtPtr ctxt;
1.40 daniel 8749: xmlParserInputPtr inputStream;
1.91 daniel 8750: xmlParserInputBufferPtr buf;
1.111 daniel 8751: char *directory = NULL;
1.9 httpng 8752:
1.91 daniel 8753: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
8754: if (buf == NULL) return(NULL);
1.9 httpng 8755:
1.97 daniel 8756: ctxt = xmlNewParserCtxt();
1.16 daniel 8757: if (ctxt == NULL) {
8758: return(NULL);
8759: }
1.97 daniel 8760:
1.96 daniel 8761: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 8762: if (inputStream == NULL) {
1.97 daniel 8763: xmlFreeParserCtxt(ctxt);
1.40 daniel 8764: return(NULL);
8765: }
8766:
1.119 daniel 8767: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 8768: inputStream->buf = buf;
8769: inputStream->base = inputStream->buf->buffer->content;
8770: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 8771:
1.40 daniel 8772: inputPush(ctxt, inputStream);
1.110 daniel 8773: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 8774: directory = xmlParserGetDirectory(filename);
8775: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 8776: ctxt->directory = directory;
1.106 daniel 8777:
1.69 daniel 8778: return(ctxt);
8779: }
8780:
8781: /**
8782: * xmlSAXParseFile :
8783: * @sax: the SAX handler block
8784: * @filename: the filename
8785: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
8786: * documents
8787: *
8788: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
8789: * compressed document is provided by default if found at compile-time.
8790: * It use the given SAX function block to handle the parsing callback.
8791: * If sax is NULL, fallback to the default DOM tree building routines.
8792: *
8793: * Returns the resulting document tree
8794: */
8795:
1.79 daniel 8796: xmlDocPtr
8797: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 8798: int recovery) {
8799: xmlDocPtr ret;
8800: xmlParserCtxtPtr ctxt;
1.111 daniel 8801: char *directory = NULL;
1.69 daniel 8802:
8803: ctxt = xmlCreateFileParserCtxt(filename);
8804: if (ctxt == NULL) return(NULL);
1.74 daniel 8805: if (sax != NULL) {
1.93 veillard 8806: if (ctxt->sax != NULL)
1.119 daniel 8807: xmlFree(ctxt->sax);
1.74 daniel 8808: ctxt->sax = sax;
8809: ctxt->userData = NULL;
8810: }
1.106 daniel 8811:
1.110 daniel 8812: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 8813: directory = xmlParserGetDirectory(filename);
8814: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 8815: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 8816:
8817: xmlParseDocument(ctxt);
1.40 daniel 8818:
1.72 daniel 8819: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 8820: else {
8821: ret = NULL;
1.72 daniel 8822: xmlFreeDoc(ctxt->myDoc);
8823: ctxt->myDoc = NULL;
1.59 daniel 8824: }
1.86 daniel 8825: if (sax != NULL)
8826: ctxt->sax = NULL;
1.69 daniel 8827: xmlFreeParserCtxt(ctxt);
1.20 daniel 8828:
8829: return(ret);
8830: }
8831:
1.55 daniel 8832: /**
8833: * xmlParseFile :
8834: * @filename: the filename
8835: *
8836: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
8837: * compressed document is provided by default if found at compile-time.
8838: *
1.68 daniel 8839: * Returns the resulting document tree
1.55 daniel 8840: */
8841:
1.79 daniel 8842: xmlDocPtr
8843: xmlParseFile(const char *filename) {
1.59 daniel 8844: return(xmlSAXParseFile(NULL, filename, 0));
8845: }
8846:
8847: /**
8848: * xmlRecoverFile :
8849: * @filename: the filename
8850: *
8851: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
8852: * compressed document is provided by default if found at compile-time.
8853: * In the case the document is not Well Formed, a tree is built anyway
8854: *
1.68 daniel 8855: * Returns the resulting document tree
1.59 daniel 8856: */
8857:
1.79 daniel 8858: xmlDocPtr
8859: xmlRecoverFile(const char *filename) {
1.59 daniel 8860: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 8861: }
1.32 daniel 8862:
1.50 daniel 8863: /**
1.69 daniel 8864: * xmlCreateMemoryParserCtxt :
1.68 daniel 8865: * @buffer: an pointer to a char array
1.127 daniel 8866: * @size: the size of the array
1.50 daniel 8867: *
1.69 daniel 8868: * Create a parser context for an XML in-memory document.
1.50 daniel 8869: *
1.69 daniel 8870: * Returns the new parser context or NULL
1.20 daniel 8871: */
1.69 daniel 8872: xmlParserCtxtPtr
8873: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 8874: xmlParserCtxtPtr ctxt;
1.40 daniel 8875: xmlParserInputPtr input;
8876:
1.158 daniel 8877: if (buffer[size - 1] != 0)
8878: buffer[size - 1] = '\0';
1.40 daniel 8879:
1.97 daniel 8880: ctxt = xmlNewParserCtxt();
1.20 daniel 8881: if (ctxt == NULL) {
8882: return(NULL);
8883: }
1.97 daniel 8884:
1.96 daniel 8885: input = xmlNewInputStream(ctxt);
1.40 daniel 8886: if (input == NULL) {
1.97 daniel 8887: xmlFreeParserCtxt(ctxt);
1.40 daniel 8888: return(NULL);
8889: }
1.20 daniel 8890:
1.40 daniel 8891: input->filename = NULL;
8892: input->line = 1;
8893: input->col = 1;
1.96 daniel 8894: input->buf = NULL;
1.91 daniel 8895: input->consumed = 0;
1.75 daniel 8896:
1.116 daniel 8897: input->base = BAD_CAST buffer;
8898: input->cur = BAD_CAST buffer;
1.69 daniel 8899: input->free = NULL;
1.20 daniel 8900:
1.40 daniel 8901: inputPush(ctxt, input);
1.69 daniel 8902: return(ctxt);
8903: }
8904:
8905: /**
8906: * xmlSAXParseMemory :
8907: * @sax: the SAX handler block
8908: * @buffer: an pointer to a char array
1.127 daniel 8909: * @size: the size of the array
8910: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 8911: * documents
8912: *
8913: * parse an XML in-memory block and use the given SAX function block
8914: * to handle the parsing callback. If sax is NULL, fallback to the default
8915: * DOM tree building routines.
8916: *
8917: * Returns the resulting document tree
8918: */
8919: xmlDocPtr
8920: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
8921: xmlDocPtr ret;
8922: xmlParserCtxtPtr ctxt;
8923:
8924: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
8925: if (ctxt == NULL) return(NULL);
1.74 daniel 8926: if (sax != NULL) {
8927: ctxt->sax = sax;
8928: ctxt->userData = NULL;
8929: }
1.20 daniel 8930:
8931: xmlParseDocument(ctxt);
1.40 daniel 8932:
1.72 daniel 8933: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 8934: else {
8935: ret = NULL;
1.72 daniel 8936: xmlFreeDoc(ctxt->myDoc);
8937: ctxt->myDoc = NULL;
1.59 daniel 8938: }
1.86 daniel 8939: if (sax != NULL)
8940: ctxt->sax = NULL;
1.69 daniel 8941: xmlFreeParserCtxt(ctxt);
1.16 daniel 8942:
1.9 httpng 8943: return(ret);
1.17 daniel 8944: }
8945:
1.55 daniel 8946: /**
8947: * xmlParseMemory :
1.68 daniel 8948: * @buffer: an pointer to a char array
1.55 daniel 8949: * @size: the size of the array
8950: *
8951: * parse an XML in-memory block and build a tree.
8952: *
1.68 daniel 8953: * Returns the resulting document tree
1.55 daniel 8954: */
8955:
8956: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 8957: return(xmlSAXParseMemory(NULL, buffer, size, 0));
8958: }
8959:
8960: /**
8961: * xmlRecoverMemory :
1.68 daniel 8962: * @buffer: an pointer to a char array
1.59 daniel 8963: * @size: the size of the array
8964: *
8965: * parse an XML in-memory block and build a tree.
8966: * In the case the document is not Well Formed, a tree is built anyway
8967: *
1.68 daniel 8968: * Returns the resulting document tree
1.59 daniel 8969: */
8970:
8971: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
8972: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 8973: }
8974:
8975:
1.50 daniel 8976: /**
8977: * xmlSetupParserForBuffer:
8978: * @ctxt: an XML parser context
1.123 daniel 8979: * @buffer: a xmlChar * buffer
1.50 daniel 8980: * @filename: a file name
8981: *
1.19 daniel 8982: * Setup the parser context to parse a new buffer; Clears any prior
8983: * contents from the parser context. The buffer parameter must not be
8984: * NULL, but the filename parameter can be
8985: */
1.55 daniel 8986: void
1.123 daniel 8987: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 8988: const char* filename)
8989: {
1.96 daniel 8990: xmlParserInputPtr input;
1.40 daniel 8991:
1.96 daniel 8992: input = xmlNewInputStream(ctxt);
8993: if (input == NULL) {
8994: perror("malloc");
1.119 daniel 8995: xmlFree(ctxt);
1.145 daniel 8996: return;
1.96 daniel 8997: }
8998:
8999: xmlClearParserCtxt(ctxt);
9000: if (filename != NULL)
1.119 daniel 9001: input->filename = xmlMemStrdup(filename);
1.96 daniel 9002: input->base = buffer;
9003: input->cur = buffer;
9004: inputPush(ctxt, input);
1.17 daniel 9005: }
9006:
1.123 daniel 9007: /**
9008: * xmlSAXUserParseFile:
9009: * @sax: a SAX handler
9010: * @user_data: The user data returned on SAX callbacks
9011: * @filename: a file name
9012: *
9013: * parse an XML file and call the given SAX handler routines.
9014: * Automatic support for ZLIB/Compress compressed document is provided
9015: *
9016: * Returns 0 in case of success or a error number otherwise
9017: */
1.131 daniel 9018: int
9019: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9020: const char *filename) {
1.123 daniel 9021: int ret = 0;
9022: xmlParserCtxtPtr ctxt;
9023:
9024: ctxt = xmlCreateFileParserCtxt(filename);
9025: if (ctxt == NULL) return -1;
1.134 daniel 9026: if (ctxt->sax != &xmlDefaultSAXHandler)
9027: xmlFree(ctxt->sax);
1.123 daniel 9028: ctxt->sax = sax;
1.140 daniel 9029: if (user_data != NULL)
9030: ctxt->userData = user_data;
1.123 daniel 9031:
9032: xmlParseDocument(ctxt);
9033:
9034: if (ctxt->wellFormed)
9035: ret = 0;
9036: else {
9037: if (ctxt->errNo != 0)
9038: ret = ctxt->errNo;
9039: else
9040: ret = -1;
9041: }
9042: if (sax != NULL)
9043: ctxt->sax = NULL;
9044: xmlFreeParserCtxt(ctxt);
9045:
9046: return ret;
9047: }
9048:
9049: /**
9050: * xmlSAXUserParseMemory:
9051: * @sax: a SAX handler
9052: * @user_data: The user data returned on SAX callbacks
9053: * @buffer: an in-memory XML document input
1.127 daniel 9054: * @size: the length of the XML document in bytes
1.123 daniel 9055: *
9056: * A better SAX parsing routine.
9057: * parse an XML in-memory buffer and call the given SAX handler routines.
9058: *
9059: * Returns 0 in case of success or a error number otherwise
9060: */
9061: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9062: char *buffer, int size) {
9063: int ret = 0;
9064: xmlParserCtxtPtr ctxt;
9065:
9066: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9067: if (ctxt == NULL) return -1;
9068: ctxt->sax = sax;
9069: ctxt->userData = user_data;
9070:
9071: xmlParseDocument(ctxt);
9072:
9073: if (ctxt->wellFormed)
9074: ret = 0;
9075: else {
9076: if (ctxt->errNo != 0)
9077: ret = ctxt->errNo;
9078: else
9079: ret = -1;
9080: }
9081: if (sax != NULL)
9082: ctxt->sax = NULL;
9083: xmlFreeParserCtxt(ctxt);
9084:
9085: return ret;
9086: }
9087:
1.32 daniel 9088:
1.98 daniel 9089: /************************************************************************
9090: * *
1.127 daniel 9091: * Miscellaneous *
1.98 daniel 9092: * *
9093: ************************************************************************/
9094:
1.132 daniel 9095: /**
9096: * xmlCleanupParser:
9097: *
9098: * Cleanup function for the XML parser. It tries to reclaim all
9099: * parsing related global memory allocated for the parser processing.
9100: * It doesn't deallocate any document related memory. Calling this
9101: * function should not prevent reusing the parser.
9102: */
9103:
9104: void
9105: xmlCleanupParser(void) {
9106: xmlCleanupCharEncodingHandlers();
1.133 daniel 9107: xmlCleanupPredefinedEntities();
1.132 daniel 9108: }
1.98 daniel 9109:
1.50 daniel 9110: /**
9111: * xmlParserFindNodeInfo:
9112: * @ctxt: an XML parser context
9113: * @node: an XML node within the tree
9114: *
9115: * Find the parser node info struct for a given node
9116: *
1.68 daniel 9117: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 9118: */
9119: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
9120: const xmlNode* node)
9121: {
9122: unsigned long pos;
9123:
9124: /* Find position where node should be at */
9125: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
9126: if ( ctx->node_seq.buffer[pos].node == node )
9127: return &ctx->node_seq.buffer[pos];
9128: else
9129: return NULL;
9130: }
9131:
9132:
1.50 daniel 9133: /**
9134: * xmlInitNodeInfoSeq :
9135: * @seq: a node info sequence pointer
9136: *
9137: * -- Initialize (set to initial state) node info sequence
1.32 daniel 9138: */
1.55 daniel 9139: void
9140: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9141: {
9142: seq->length = 0;
9143: seq->maximum = 0;
9144: seq->buffer = NULL;
9145: }
9146:
1.50 daniel 9147: /**
9148: * xmlClearNodeInfoSeq :
9149: * @seq: a node info sequence pointer
9150: *
9151: * -- Clear (release memory and reinitialize) node
1.32 daniel 9152: * info sequence
9153: */
1.55 daniel 9154: void
9155: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 9156: {
9157: if ( seq->buffer != NULL )
1.119 daniel 9158: xmlFree(seq->buffer);
1.32 daniel 9159: xmlInitNodeInfoSeq(seq);
9160: }
9161:
9162:
1.50 daniel 9163: /**
9164: * xmlParserFindNodeInfoIndex:
9165: * @seq: a node info sequence pointer
9166: * @node: an XML node pointer
9167: *
9168: *
1.32 daniel 9169: * xmlParserFindNodeInfoIndex : Find the index that the info record for
9170: * the given node is or should be at in a sorted sequence
1.68 daniel 9171: *
9172: * Returns a long indicating the position of the record
1.32 daniel 9173: */
9174: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
9175: const xmlNode* node)
9176: {
9177: unsigned long upper, lower, middle;
9178: int found = 0;
9179:
9180: /* Do a binary search for the key */
9181: lower = 1;
9182: upper = seq->length;
9183: middle = 0;
9184: while ( lower <= upper && !found) {
9185: middle = lower + (upper - lower) / 2;
9186: if ( node == seq->buffer[middle - 1].node )
9187: found = 1;
9188: else if ( node < seq->buffer[middle - 1].node )
9189: upper = middle - 1;
9190: else
9191: lower = middle + 1;
9192: }
9193:
9194: /* Return position */
9195: if ( middle == 0 || seq->buffer[middle - 1].node < node )
9196: return middle;
9197: else
9198: return middle - 1;
9199: }
9200:
9201:
1.50 daniel 9202: /**
9203: * xmlParserAddNodeInfo:
9204: * @ctxt: an XML parser context
1.68 daniel 9205: * @info: a node info sequence pointer
1.50 daniel 9206: *
9207: * Insert node info record into the sorted sequence
1.32 daniel 9208: */
1.55 daniel 9209: void
9210: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 9211: const xmlParserNodeInfo* info)
1.32 daniel 9212: {
9213: unsigned long pos;
9214: static unsigned int block_size = 5;
9215:
9216: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 9217: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
9218: if ( pos < ctxt->node_seq.length
9219: && ctxt->node_seq.buffer[pos].node == info->node ) {
9220: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 9221: }
9222:
9223: /* Otherwise, we need to add new node to buffer */
9224: else {
9225: /* Expand buffer by 5 if needed */
1.55 daniel 9226: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 9227: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 9228: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
9229: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 9230:
1.55 daniel 9231: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 9232: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 9233: else
1.119 daniel 9234: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 9235:
9236: if ( tmp_buffer == NULL ) {
1.55 daniel 9237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 9238: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 9239: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 9240: return;
9241: }
1.55 daniel 9242: ctxt->node_seq.buffer = tmp_buffer;
9243: ctxt->node_seq.maximum += block_size;
1.32 daniel 9244: }
9245:
9246: /* If position is not at end, move elements out of the way */
1.55 daniel 9247: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 9248: unsigned long i;
9249:
1.55 daniel 9250: for ( i = ctxt->node_seq.length; i > pos; i-- )
9251: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 9252: }
9253:
9254: /* Copy element and increase length */
1.55 daniel 9255: ctxt->node_seq.buffer[pos] = *info;
9256: ctxt->node_seq.length++;
1.32 daniel 9257: }
9258: }
1.77 daniel 9259:
1.98 daniel 9260:
9261: /**
9262: * xmlSubstituteEntitiesDefault :
9263: * @val: int 0 or 1
9264: *
9265: * Set and return the previous value for default entity support.
9266: * Initially the parser always keep entity references instead of substituting
9267: * entity values in the output. This function has to be used to change the
9268: * default parser behaviour
9269: * SAX::subtituteEntities() has to be used for changing that on a file by
9270: * file basis.
9271: *
9272: * Returns the last value for 0 for no substitution, 1 for substitution.
9273: */
9274:
9275: int
9276: xmlSubstituteEntitiesDefault(int val) {
9277: int old = xmlSubstituteEntitiesDefaultValue;
9278:
9279: xmlSubstituteEntitiesDefaultValue = val;
9280: return(old);
9281: }
1.77 daniel 9282:
Webmaster